|
@@ -2249,7 +2249,7 @@ static int __perf_install_in_context(void *info)
|
|
|
struct perf_event_context *ctx = event->ctx;
|
|
|
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
|
|
struct perf_event_context *task_ctx = cpuctx->task_ctx;
|
|
|
- bool activate = true;
|
|
|
+ bool reprogram = true;
|
|
|
int ret = 0;
|
|
|
|
|
|
raw_spin_lock(&cpuctx->ctx.lock);
|
|
@@ -2257,27 +2257,26 @@ static int __perf_install_in_context(void *info)
|
|
|
raw_spin_lock(&ctx->lock);
|
|
|
task_ctx = ctx;
|
|
|
|
|
|
- /* If we're on the wrong CPU, try again */
|
|
|
- if (task_cpu(ctx->task) != smp_processor_id()) {
|
|
|
- ret = -ESRCH;
|
|
|
- goto unlock;
|
|
|
- }
|
|
|
+ reprogram = (ctx->task == current);
|
|
|
|
|
|
/*
|
|
|
- * If we're on the right CPU, see if the task we target is
|
|
|
- * current, if not we don't have to activate the ctx, a future
|
|
|
- * context switch will do that for us.
|
|
|
+ * If the task is running, it must be running on this CPU,
|
|
|
+ * otherwise we cannot reprogram things.
|
|
|
+ *
|
|
|
+ * If its not running, we don't care, ctx->lock will
|
|
|
+ * serialize against it becoming runnable.
|
|
|
*/
|
|
|
- if (ctx->task != current)
|
|
|
- activate = false;
|
|
|
- else
|
|
|
- WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx);
|
|
|
+ if (task_curr(ctx->task) && !reprogram) {
|
|
|
+ ret = -ESRCH;
|
|
|
+ goto unlock;
|
|
|
+ }
|
|
|
|
|
|
+ WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx);
|
|
|
} else if (task_ctx) {
|
|
|
raw_spin_lock(&task_ctx->lock);
|
|
|
}
|
|
|
|
|
|
- if (activate) {
|
|
|
+ if (reprogram) {
|
|
|
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
|
|
|
add_event_to_ctx(event, ctx);
|
|
|
ctx_resched(cpuctx, task_ctx);
|
|
@@ -2328,13 +2327,36 @@ perf_install_in_context(struct perf_event_context *ctx,
|
|
|
/*
|
|
|
* Installing events is tricky because we cannot rely on ctx->is_active
|
|
|
* to be set in case this is the nr_events 0 -> 1 transition.
|
|
|
+ *
|
|
|
+ * Instead we use task_curr(), which tells us if the task is running.
|
|
|
+ * However, since we use task_curr() outside of rq::lock, we can race
|
|
|
+ * against the actual state. This means the result can be wrong.
|
|
|
+ *
|
|
|
+ * If we get a false positive, we retry, this is harmless.
|
|
|
+ *
|
|
|
+ * If we get a false negative, things are complicated. If we are after
|
|
|
+ * perf_event_context_sched_in() ctx::lock will serialize us, and the
|
|
|
+ * value must be correct. If we're before, it doesn't matter since
|
|
|
+ * perf_event_context_sched_in() will program the counter.
|
|
|
+ *
|
|
|
+ * However, this hinges on the remote context switch having observed
|
|
|
+ * our task->perf_event_ctxp[] store, such that it will in fact take
|
|
|
+ * ctx::lock in perf_event_context_sched_in().
|
|
|
+ *
|
|
|
+ * We do this by task_function_call(), if the IPI fails to hit the task
|
|
|
+ * we know any future context switch of task must see the
|
|
|
+ * perf_event_ctpx[] store.
|
|
|
*/
|
|
|
-again:
|
|
|
+
|
|
|
/*
|
|
|
- * Cannot use task_function_call() because we need to run on the task's
|
|
|
- * CPU regardless of whether its current or not.
|
|
|
+ * This smp_mb() orders the task->perf_event_ctxp[] store with the
|
|
|
+ * task_cpu() load, such that if the IPI then does not find the task
|
|
|
+ * running, a future context switch of that task must observe the
|
|
|
+ * store.
|
|
|
*/
|
|
|
- if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event))
|
|
|
+ smp_mb();
|
|
|
+again:
|
|
|
+ if (!task_function_call(task, __perf_install_in_context, event))
|
|
|
return;
|
|
|
|
|
|
raw_spin_lock_irq(&ctx->lock);
|
|
@@ -2348,12 +2370,16 @@ again:
|
|
|
raw_spin_unlock_irq(&ctx->lock);
|
|
|
return;
|
|
|
}
|
|
|
- raw_spin_unlock_irq(&ctx->lock);
|
|
|
/*
|
|
|
- * Since !ctx->is_active doesn't mean anything, we must IPI
|
|
|
- * unconditionally.
|
|
|
+ * If the task is not running, ctx->lock will avoid it becoming so,
|
|
|
+ * thus we can safely install the event.
|
|
|
*/
|
|
|
- goto again;
|
|
|
+ if (task_curr(task)) {
|
|
|
+ raw_spin_unlock_irq(&ctx->lock);
|
|
|
+ goto again;
|
|
|
+ }
|
|
|
+ add_event_to_ctx(event, ctx);
|
|
|
+ raw_spin_unlock_irq(&ctx->lock);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -7034,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event)
|
|
|
perf_output_end(&handle);
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Generic event overflow handling, sampling.
|
|
|
- */
|
|
|
-
|
|
|
-static int __perf_event_overflow(struct perf_event *event,
|
|
|
- int throttle, struct perf_sample_data *data,
|
|
|
- struct pt_regs *regs)
|
|
|
+static int
|
|
|
+__perf_event_account_interrupt(struct perf_event *event, int throttle)
|
|
|
{
|
|
|
- int events = atomic_read(&event->event_limit);
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
- u64 seq;
|
|
|
int ret = 0;
|
|
|
-
|
|
|
- /*
|
|
|
- * Non-sampling counters might still use the PMI to fold short
|
|
|
- * hardware counters, ignore those.
|
|
|
- */
|
|
|
- if (unlikely(!is_sampling_event(event)))
|
|
|
- return 0;
|
|
|
+ u64 seq;
|
|
|
|
|
|
seq = __this_cpu_read(perf_throttled_seq);
|
|
|
if (seq != hwc->interrupts_seq) {
|
|
@@ -7080,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event,
|
|
|
perf_adjust_period(event, delta, hwc->last_period, true);
|
|
|
}
|
|
|
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+int perf_event_account_interrupt(struct perf_event *event)
|
|
|
+{
|
|
|
+ return __perf_event_account_interrupt(event, 1);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Generic event overflow handling, sampling.
|
|
|
+ */
|
|
|
+
|
|
|
+static int __perf_event_overflow(struct perf_event *event,
|
|
|
+ int throttle, struct perf_sample_data *data,
|
|
|
+ struct pt_regs *regs)
|
|
|
+{
|
|
|
+ int events = atomic_read(&event->event_limit);
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Non-sampling counters might still use the PMI to fold short
|
|
|
+ * hardware counters, ignore those.
|
|
|
+ */
|
|
|
+ if (unlikely(!is_sampling_event(event)))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ ret = __perf_event_account_interrupt(event, throttle);
|
|
|
+
|
|
|
/*
|
|
|
* XXX event_limit might not quite work as expected on inherited
|
|
|
* events
|
|
@@ -9503,6 +9544,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Variation on perf_event_ctx_lock_nested(), except we take two context
|
|
|
+ * mutexes.
|
|
|
+ */
|
|
|
+static struct perf_event_context *
|
|
|
+__perf_event_ctx_lock_double(struct perf_event *group_leader,
|
|
|
+ struct perf_event_context *ctx)
|
|
|
+{
|
|
|
+ struct perf_event_context *gctx;
|
|
|
+
|
|
|
+again:
|
|
|
+ rcu_read_lock();
|
|
|
+ gctx = READ_ONCE(group_leader->ctx);
|
|
|
+ if (!atomic_inc_not_zero(&gctx->refcount)) {
|
|
|
+ rcu_read_unlock();
|
|
|
+ goto again;
|
|
|
+ }
|
|
|
+ rcu_read_unlock();
|
|
|
+
|
|
|
+ mutex_lock_double(&gctx->mutex, &ctx->mutex);
|
|
|
+
|
|
|
+ if (group_leader->ctx != gctx) {
|
|
|
+ mutex_unlock(&ctx->mutex);
|
|
|
+ mutex_unlock(&gctx->mutex);
|
|
|
+ put_ctx(gctx);
|
|
|
+ goto again;
|
|
|
+ }
|
|
|
+
|
|
|
+ return gctx;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* sys_perf_event_open - open a performance event, associate it to a task/cpu
|
|
|
*
|
|
@@ -9746,12 +9818,31 @@ SYSCALL_DEFINE5(perf_event_open,
|
|
|
}
|
|
|
|
|
|
if (move_group) {
|
|
|
- gctx = group_leader->ctx;
|
|
|
- mutex_lock_double(&gctx->mutex, &ctx->mutex);
|
|
|
+ gctx = __perf_event_ctx_lock_double(group_leader, ctx);
|
|
|
+
|
|
|
if (gctx->task == TASK_TOMBSTONE) {
|
|
|
err = -ESRCH;
|
|
|
goto err_locked;
|
|
|
}
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Check if we raced against another sys_perf_event_open() call
|
|
|
+ * moving the software group underneath us.
|
|
|
+ */
|
|
|
+ if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
|
|
|
+ /*
|
|
|
+ * If someone moved the group out from under us, check
|
|
|
+ * if this new event wound up on the same ctx, if so
|
|
|
+ * its the regular !move_group case, otherwise fail.
|
|
|
+ */
|
|
|
+ if (gctx != ctx) {
|
|
|
+ err = -EINVAL;
|
|
|
+ goto err_locked;
|
|
|
+ } else {
|
|
|
+ perf_event_ctx_unlock(group_leader, gctx);
|
|
|
+ move_group = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
} else {
|
|
|
mutex_lock(&ctx->mutex);
|
|
|
}
|
|
@@ -9853,7 +9944,7 @@ SYSCALL_DEFINE5(perf_event_open,
|
|
|
perf_unpin_context(ctx);
|
|
|
|
|
|
if (move_group)
|
|
|
- mutex_unlock(&gctx->mutex);
|
|
|
+ perf_event_ctx_unlock(group_leader, gctx);
|
|
|
mutex_unlock(&ctx->mutex);
|
|
|
|
|
|
if (task) {
|
|
@@ -9879,7 +9970,7 @@ SYSCALL_DEFINE5(perf_event_open,
|
|
|
|
|
|
err_locked:
|
|
|
if (move_group)
|
|
|
- mutex_unlock(&gctx->mutex);
|
|
|
+ perf_event_ctx_unlock(group_leader, gctx);
|
|
|
mutex_unlock(&ctx->mutex);
|
|
|
/* err_file: */
|
|
|
fput(event_file);
|