|
@@ -2249,7 +2249,7 @@ static int __perf_install_in_context(void *info)
|
|
struct perf_event_context *ctx = event->ctx;
|
|
struct perf_event_context *ctx = event->ctx;
|
|
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
|
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
|
struct perf_event_context *task_ctx = cpuctx->task_ctx;
|
|
struct perf_event_context *task_ctx = cpuctx->task_ctx;
|
|
- bool activate = true;
|
|
|
|
|
|
+ bool reprogram = true;
|
|
int ret = 0;
|
|
int ret = 0;
|
|
|
|
|
|
raw_spin_lock(&cpuctx->ctx.lock);
|
|
raw_spin_lock(&cpuctx->ctx.lock);
|
|
@@ -2257,27 +2257,26 @@ static int __perf_install_in_context(void *info)
|
|
raw_spin_lock(&ctx->lock);
|
|
raw_spin_lock(&ctx->lock);
|
|
task_ctx = ctx;
|
|
task_ctx = ctx;
|
|
|
|
|
|
- /* If we're on the wrong CPU, try again */
|
|
|
|
- if (task_cpu(ctx->task) != smp_processor_id()) {
|
|
|
|
- ret = -ESRCH;
|
|
|
|
- goto unlock;
|
|
|
|
- }
|
|
|
|
|
|
+ reprogram = (ctx->task == current);
|
|
|
|
|
|
/*
|
|
/*
|
|
- * If we're on the right CPU, see if the task we target is
|
|
|
|
- * current, if not we don't have to activate the ctx, a future
|
|
|
|
- * context switch will do that for us.
|
|
|
|
|
|
+ * If the task is running, it must be running on this CPU,
|
|
|
|
+ * otherwise we cannot reprogram things.
|
|
|
|
+ *
|
|
|
|
+ * If its not running, we don't care, ctx->lock will
|
|
|
|
+ * serialize against it becoming runnable.
|
|
*/
|
|
*/
|
|
- if (ctx->task != current)
|
|
|
|
- activate = false;
|
|
|
|
- else
|
|
|
|
- WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx);
|
|
|
|
|
|
+ if (task_curr(ctx->task) && !reprogram) {
|
|
|
|
+ ret = -ESRCH;
|
|
|
|
+ goto unlock;
|
|
|
|
+ }
|
|
|
|
|
|
|
|
+ WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx);
|
|
} else if (task_ctx) {
|
|
} else if (task_ctx) {
|
|
raw_spin_lock(&task_ctx->lock);
|
|
raw_spin_lock(&task_ctx->lock);
|
|
}
|
|
}
|
|
|
|
|
|
- if (activate) {
|
|
|
|
|
|
+ if (reprogram) {
|
|
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
|
|
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
|
|
add_event_to_ctx(event, ctx);
|
|
add_event_to_ctx(event, ctx);
|
|
ctx_resched(cpuctx, task_ctx);
|
|
ctx_resched(cpuctx, task_ctx);
|
|
@@ -2328,13 +2327,36 @@ perf_install_in_context(struct perf_event_context *ctx,
|
|
/*
|
|
/*
|
|
* Installing events is tricky because we cannot rely on ctx->is_active
|
|
* Installing events is tricky because we cannot rely on ctx->is_active
|
|
* to be set in case this is the nr_events 0 -> 1 transition.
|
|
* to be set in case this is the nr_events 0 -> 1 transition.
|
|
|
|
+ *
|
|
|
|
+ * Instead we use task_curr(), which tells us if the task is running.
|
|
|
|
+ * However, since we use task_curr() outside of rq::lock, we can race
|
|
|
|
+ * against the actual state. This means the result can be wrong.
|
|
|
|
+ *
|
|
|
|
+ * If we get a false positive, we retry, this is harmless.
|
|
|
|
+ *
|
|
|
|
+ * If we get a false negative, things are complicated. If we are after
|
|
|
|
+ * perf_event_context_sched_in() ctx::lock will serialize us, and the
|
|
|
|
+ * value must be correct. If we're before, it doesn't matter since
|
|
|
|
+ * perf_event_context_sched_in() will program the counter.
|
|
|
|
+ *
|
|
|
|
+ * However, this hinges on the remote context switch having observed
|
|
|
|
+ * our task->perf_event_ctxp[] store, such that it will in fact take
|
|
|
|
+ * ctx::lock in perf_event_context_sched_in().
|
|
|
|
+ *
|
|
|
|
+ * We do this by task_function_call(), if the IPI fails to hit the task
|
|
|
|
+ * we know any future context switch of task must see the
|
|
|
|
+ * perf_event_ctpx[] store.
|
|
*/
|
|
*/
|
|
-again:
|
|
|
|
|
|
+
|
|
/*
|
|
/*
|
|
- * Cannot use task_function_call() because we need to run on the task's
|
|
|
|
- * CPU regardless of whether its current or not.
|
|
|
|
|
|
+ * This smp_mb() orders the task->perf_event_ctxp[] store with the
|
|
|
|
+ * task_cpu() load, such that if the IPI then does not find the task
|
|
|
|
+ * running, a future context switch of that task must observe the
|
|
|
|
+ * store.
|
|
*/
|
|
*/
|
|
- if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event))
|
|
|
|
|
|
+ smp_mb();
|
|
|
|
+again:
|
|
|
|
+ if (!task_function_call(task, __perf_install_in_context, event))
|
|
return;
|
|
return;
|
|
|
|
|
|
raw_spin_lock_irq(&ctx->lock);
|
|
raw_spin_lock_irq(&ctx->lock);
|
|
@@ -2348,12 +2370,16 @@ again:
|
|
raw_spin_unlock_irq(&ctx->lock);
|
|
raw_spin_unlock_irq(&ctx->lock);
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
- raw_spin_unlock_irq(&ctx->lock);
|
|
|
|
/*
|
|
/*
|
|
- * Since !ctx->is_active doesn't mean anything, we must IPI
|
|
|
|
- * unconditionally.
|
|
|
|
|
|
+ * If the task is not running, ctx->lock will avoid it becoming so,
|
|
|
|
+ * thus we can safely install the event.
|
|
*/
|
|
*/
|
|
- goto again;
|
|
|
|
|
|
+ if (task_curr(task)) {
|
|
|
|
+ raw_spin_unlock_irq(&ctx->lock);
|
|
|
|
+ goto again;
|
|
|
|
+ }
|
|
|
|
+ add_event_to_ctx(event, ctx);
|
|
|
|
+ raw_spin_unlock_irq(&ctx->lock);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|