|
@@ -380,7 +380,6 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
|
|
|
|
|
|
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
|
|
|
{
|
|
|
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
|
struct x86_perf_task_context *task_ctx;
|
|
|
|
|
|
/*
|
|
@@ -390,31 +389,21 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
|
|
|
*/
|
|
|
task_ctx = ctx ? ctx->task_ctx_data : NULL;
|
|
|
if (task_ctx) {
|
|
|
- if (sched_in) {
|
|
|
+ if (sched_in)
|
|
|
__intel_pmu_lbr_restore(task_ctx);
|
|
|
- cpuc->lbr_context = ctx;
|
|
|
- } else {
|
|
|
+ else
|
|
|
__intel_pmu_lbr_save(task_ctx);
|
|
|
- }
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * When sampling the branck stack in system-wide, it may be
|
|
|
- * necessary to flush the stack on context switch. This happens
|
|
|
- * when the branch stack does not tag its entries with the pid
|
|
|
- * of the current task. Otherwise it becomes impossible to
|
|
|
- * associate a branch entry with a task. This ambiguity is more
|
|
|
- * likely to appear when the branch stack supports priv level
|
|
|
- * filtering and the user sets it to monitor only at the user
|
|
|
- * level (which could be a useful measurement in system-wide
|
|
|
- * mode). In that case, the risk is high of having a branch
|
|
|
- * stack with branch from multiple tasks.
|
|
|
- */
|
|
|
- if (sched_in) {
|
|
|
+ * Since a context switch can flip the address space and LBR entries
|
|
|
+ * are not tagged with an identifier, we need to wipe the LBR, even for
|
|
|
+ * per-cpu events. You simply cannot resolve the branches from the old
|
|
|
+ * address space.
|
|
|
+ */
|
|
|
+ if (sched_in)
|
|
|
intel_pmu_lbr_reset();
|
|
|
- cpuc->lbr_context = ctx;
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
static inline bool branch_user_callstack(unsigned br_sel)
|
|
@@ -430,14 +419,6 @@ void intel_pmu_lbr_add(struct perf_event *event)
|
|
|
if (!x86_pmu.lbr_nr)
|
|
|
return;
|
|
|
|
|
|
- /*
|
|
|
- * Reset the LBR stack if we changed task context to
|
|
|
- * avoid data leaks.
|
|
|
- */
|
|
|
- if (event->ctx->task && cpuc->lbr_context != event->ctx) {
|
|
|
- intel_pmu_lbr_reset();
|
|
|
- cpuc->lbr_context = event->ctx;
|
|
|
- }
|
|
|
cpuc->br_sel = event->hw.branch_reg.reg;
|
|
|
|
|
|
if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
|
|
@@ -445,8 +426,28 @@ void intel_pmu_lbr_add(struct perf_event *event)
|
|
|
task_ctx->lbr_callstack_users++;
|
|
|
}
|
|
|
|
|
|
- cpuc->lbr_users++;
|
|
|
+ /*
|
|
|
+ * Request pmu::sched_task() callback, which will fire inside the
|
|
|
+ * regular perf event scheduling, so that call will:
|
|
|
+ *
|
|
|
+ * - restore or wipe; when LBR-callstack,
|
|
|
+ * - wipe; otherwise,
|
|
|
+ *
|
|
|
+ * when this is from __perf_event_task_sched_in().
|
|
|
+ *
|
|
|
+ * However, if this is from perf_install_in_context(), no such callback
|
|
|
+ * will follow and we'll need to reset the LBR here if this is the
|
|
|
+ * first LBR event.
|
|
|
+ *
|
|
|
+ * The problem is, we cannot tell these cases apart... but we can
|
|
|
+ * exclude the biggest chunk of cases by looking at
|
|
|
+ * event->total_time_running. An event that has accrued runtime cannot
|
|
|
+ * be 'new'. Conversely, a new event can get installed through the
|
|
|
+ * context switch path for the first time.
|
|
|
+ */
|
|
|
perf_sched_cb_inc(event->ctx->pmu);
|
|
|
+ if (!cpuc->lbr_users++ && !event->total_time_running)
|
|
|
+ intel_pmu_lbr_reset();
|
|
|
}
|
|
|
|
|
|
void intel_pmu_lbr_del(struct perf_event *event)
|