10 years ago · ba532500c5
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1914,6 +1914,12 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
 
															 	NULL,
														
 
															 };
														
 
															+static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
														
 
															+{
														
 
															+	if (x86_pmu.sched_task)
														
 
															+		x86_pmu.sched_task(ctx, sched_in);
														
 
															+}
														
 
															+
														
 
															 static void x86_pmu_flush_branch_stack(void)
														
 
															 {
														
 
															 	if (x86_pmu.flush_branch_stack)
														
@@ -1950,6 +1956,7 @@ static struct pmu pmu = {
 
															 	.event_idx		= x86_pmu_event_idx,
														
 
															 	.flush_branch_stack	= x86_pmu_flush_branch_stack,
														
 
															+	.sched_task		= x86_pmu_sched_task,
														
 
															 };
														
 
															 void arch_perf_update_userpage(struct perf_event *event,
														
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -473,6 +473,8 @@ struct x86_pmu {
 
															 	void		(*check_microcode)(void);
														
 
															 	void		(*flush_branch_stack)(void);
														
 
															+	void		(*sched_task)(struct perf_event_context *ctx,
														
 
															+				      bool sched_in);
														
 
															 	/*
														
 
															 	 * Intel Arch Perfmon v2+
														
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -265,6 +265,13 @@ struct pmu {
 
															 	 * flush branch stack on context-switches (needed in cpu-wide mode)
														
 
															 	 */
														
 
															 	void (*flush_branch_stack)	(void);
														
 
															+
														
 
															+	/*
														
 
															+	 * context-switches callback
														
 
															+	 */
														
 
															+	void (*sched_task)		(struct perf_event_context *ctx,
														
 
															+					bool sched_in);
														
 
															+
														
 
															 };
														
 
															 /**
														
@@ -558,6 +565,8 @@ extern void perf_event_delayed_put(struct task_struct *task);
 
															 extern void perf_event_print_debug(void);
														
 
															 extern void perf_pmu_disable(struct pmu *pmu);
														
 
															 extern void perf_pmu_enable(struct pmu *pmu);
														
 
															+extern void perf_sched_cb_dec(struct pmu *pmu);
														
 
															+extern void perf_sched_cb_inc(struct pmu *pmu);
														
 
															 extern int perf_event_task_disable(void);
														
 
															 extern int perf_event_task_enable(void);
														
 
															 extern int perf_event_refresh(struct perf_event *event, int refresh);
														
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -154,6 +154,7 @@ enum event_type_t {
 
															 struct static_key_deferred perf_sched_events __read_mostly;
														
 
															 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
														
 
															 static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
														
 
															+static DEFINE_PER_CPU(int, perf_sched_cb_usages);
														
 
															 static atomic_t nr_mmap_events __read_mostly;
														
 
															 static atomic_t nr_comm_events __read_mostly;
														
@@ -2577,6 +2578,56 @@ unlock:
 
															 	}
														
 
															 }
														
 
															+void perf_sched_cb_dec(struct pmu *pmu)
														
 
															+{
														
 
															+	this_cpu_dec(perf_sched_cb_usages);
														
 
															+}
														
 
															+
														
 
															+void perf_sched_cb_inc(struct pmu *pmu)
														
 
															+{
														
 
															+	this_cpu_inc(perf_sched_cb_usages);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * This function provides the context switch callback to the lower code
														
 
															+ * layer. It is invoked ONLY when the context switch callback is enabled.
														
 
															+ */
														
 
															+static void perf_pmu_sched_task(struct task_struct *prev,
														
 
															+				struct task_struct *next,
														
 
															+				bool sched_in)
														
 
															+{
														
 
															+	struct perf_cpu_context *cpuctx;
														
 
															+	struct pmu *pmu;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	if (prev == next)
														
 
															+		return;
														
 
															+
														
 
															+	local_irq_save(flags);
														
 
															+
														
 
															+	rcu_read_lock();
														
 
															+
														
 
															+	list_for_each_entry_rcu(pmu, &pmus, entry) {
														
 
															+		if (pmu->sched_task) {
														
 
															+			cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
														
 
															+
														
 
															+			perf_ctx_lock(cpuctx, cpuctx->task_ctx);
														
 
															+
														
 
															+			perf_pmu_disable(pmu);
														
 
															+
														
 
															+			pmu->sched_task(cpuctx->task_ctx, sched_in);
														
 
															+
														
 
															+			perf_pmu_enable(pmu);
														
 
															+
														
 
															+			perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	rcu_read_unlock();
														
 
															+
														
 
															+	local_irq_restore(flags);
														
 
															+}
														
 
															+
														
 
															 #define for_each_task_context_nr(ctxn)					\
														
 
															 	for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
														
@@ -2596,6 +2647,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
 
															 {
														
 
															 	int ctxn;
														
 
															+	if (__this_cpu_read(perf_sched_cb_usages))
														
 
															+		perf_pmu_sched_task(task, next, false);
														
 
															+
														
 
															 	for_each_task_context_nr(ctxn)
														
 
															 		perf_event_context_sched_out(task, ctxn, next);
														
@@ -2847,6 +2901,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 
															 	/* check for system-wide branch_stack events */
														
 
															 	if (atomic_read(this_cpu_ptr(&perf_branch_stack_events)))
														
 
															 		perf_branch_stack_sched_in(prev, task);
														
 
															+
														
 
															+	if (__this_cpu_read(perf_sched_cb_usages))
														
 
															+		perf_pmu_sched_task(prev, task, true);
														
 
															 }
														
 
															 static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)