|
@@ -872,22 +872,32 @@ void perf_pmu_enable(struct pmu *pmu)
|
|
|
pmu->pmu_enable(pmu);
|
|
|
}
|
|
|
|
|
|
-static DEFINE_PER_CPU(struct list_head, rotation_list);
|
|
|
+static DEFINE_PER_CPU(struct list_head, active_ctx_list);
|
|
|
|
|
|
/*
|
|
|
- * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
|
|
|
- * because they're strictly cpu affine and rotate_start is called with IRQs
|
|
|
- * disabled, while rotate_context is called from IRQ context.
|
|
|
+ * perf_event_ctx_activate(), perf_event_ctx_deactivate(), and
|
|
|
+ * perf_event_task_tick() are fully serialized because they're strictly cpu
|
|
|
+ * affine and perf_event_ctx{activate,deactivate} are called with IRQs
|
|
|
+ * disabled, while perf_event_task_tick is called from IRQ context.
|
|
|
*/
|
|
|
-static void perf_pmu_rotate_start(struct pmu *pmu)
|
|
|
+static void perf_event_ctx_activate(struct perf_event_context *ctx)
|
|
|
{
|
|
|
- struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
|
|
|
- struct list_head *head = this_cpu_ptr(&rotation_list);
|
|
|
+ struct list_head *head = this_cpu_ptr(&active_ctx_list);
|
|
|
|
|
|
WARN_ON(!irqs_disabled());
|
|
|
|
|
|
- if (list_empty(&cpuctx->rotation_list))
|
|
|
- list_add(&cpuctx->rotation_list, head);
|
|
|
+ WARN_ON(!list_empty(&ctx->active_ctx_list));
|
|
|
+
|
|
|
+ list_add(&ctx->active_ctx_list, head);
|
|
|
+}
|
|
|
+
|
|
|
+static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
|
|
|
+{
|
|
|
+ WARN_ON(!irqs_disabled());
|
|
|
+
|
|
|
+ WARN_ON(list_empty(&ctx->active_ctx_list));
|
|
|
+
|
|
|
+ list_del_init(&ctx->active_ctx_list);
|
|
|
}
|
|
|
|
|
|
static void get_ctx(struct perf_event_context *ctx)
|
|
@@ -906,6 +916,84 @@ static void put_ctx(struct perf_event_context *ctx)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Because of perf_event::ctx migration in sys_perf_event_open::move_group and
|
|
|
+ * perf_pmu_migrate_context() we need some magic.
|
|
|
+ *
|
|
|
+ * Those places that change perf_event::ctx will hold both
|
|
|
+ * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
|
|
|
+ *
|
|
|
+ * Lock ordering is by mutex address. There is one other site where
|
|
|
+ * perf_event_context::mutex nests and that is put_event(). But remember that
|
|
|
+ * that is a parent<->child context relation, and migration does not affect
|
|
|
+ * children, therefore these two orderings should not interact.
|
|
|
+ *
|
|
|
+ * The change in perf_event::ctx does not affect children (as claimed above)
|
|
|
+ * because the sys_perf_event_open() case will install a new event and break
|
|
|
+ * the ctx parent<->child relation, and perf_pmu_migrate_context() is only
|
|
|
+ * concerned with cpuctx and that doesn't have children.
|
|
|
+ *
|
|
|
+ * The places that change perf_event::ctx will issue:
|
|
|
+ *
|
|
|
+ * perf_remove_from_context();
|
|
|
+ * synchronize_rcu();
|
|
|
+ * perf_install_in_context();
|
|
|
+ *
|
|
|
+ * to affect the change. The remove_from_context() + synchronize_rcu() should
|
|
|
+ * quiesce the event, after which we can install it in the new location. This
|
|
|
+ * means that only external vectors (perf_fops, prctl) can perturb the event
|
|
|
+ * while in transit. Therefore all such accessors should also acquire
|
|
|
+ * perf_event_context::mutex to serialize against this.
|
|
|
+ *
|
|
|
+ * However; because event->ctx can change while we're waiting to acquire
|
|
|
+ * ctx->mutex we must be careful and use the below perf_event_ctx_lock()
|
|
|
+ * function.
|
|
|
+ *
|
|
|
+ * Lock order:
|
|
|
+ * task_struct::perf_event_mutex
|
|
|
+ * perf_event_context::mutex
|
|
|
+ * perf_event_context::lock
|
|
|
+ * perf_event::child_mutex;
|
|
|
+ * perf_event::mmap_mutex
|
|
|
+ * mmap_sem
|
|
|
+ */
|
|
|
+static struct perf_event_context *
|
|
|
+perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
|
|
|
+{
|
|
|
+ struct perf_event_context *ctx;
|
|
|
+
|
|
|
+again:
|
|
|
+ rcu_read_lock();
|
|
|
+ ctx = ACCESS_ONCE(event->ctx);
|
|
|
+ if (!atomic_inc_not_zero(&ctx->refcount)) {
|
|
|
+ rcu_read_unlock();
|
|
|
+ goto again;
|
|
|
+ }
|
|
|
+ rcu_read_unlock();
|
|
|
+
|
|
|
+ mutex_lock_nested(&ctx->mutex, nesting);
|
|
|
+ if (event->ctx != ctx) {
|
|
|
+ mutex_unlock(&ctx->mutex);
|
|
|
+ put_ctx(ctx);
|
|
|
+ goto again;
|
|
|
+ }
|
|
|
+
|
|
|
+ return ctx;
|
|
|
+}
|
|
|
+
|
|
|
+static inline struct perf_event_context *
|
|
|
+perf_event_ctx_lock(struct perf_event *event)
|
|
|
+{
|
|
|
+ return perf_event_ctx_lock_nested(event, 0);
|
|
|
+}
|
|
|
+
|
|
|
+static void perf_event_ctx_unlock(struct perf_event *event,
|
|
|
+ struct perf_event_context *ctx)
|
|
|
+{
|
|
|
+ mutex_unlock(&ctx->mutex);
|
|
|
+ put_ctx(ctx);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* This must be done under the ctx->lock, such as to serialize against
|
|
|
* context_equiv(), therefore we cannot call put_ctx() since that might end up
|
|
@@ -1155,8 +1243,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
|
|
|
ctx->nr_branch_stack++;
|
|
|
|
|
|
list_add_rcu(&event->event_entry, &ctx->event_list);
|
|
|
- if (!ctx->nr_events)
|
|
|
- perf_pmu_rotate_start(ctx->pmu);
|
|
|
ctx->nr_events++;
|
|
|
if (event->attr.inherit_stat)
|
|
|
ctx->nr_stat++;
|
|
@@ -1275,6 +1361,8 @@ static void perf_group_attach(struct perf_event *event)
|
|
|
if (group_leader == event)
|
|
|
return;
|
|
|
|
|
|
+ WARN_ON_ONCE(group_leader->ctx != event->ctx);
|
|
|
+
|
|
|
if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
|
|
|
!is_software_event(event))
|
|
|
group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
|
|
@@ -1296,6 +1384,10 @@ static void
|
|
|
list_del_event(struct perf_event *event, struct perf_event_context *ctx)
|
|
|
{
|
|
|
struct perf_cpu_context *cpuctx;
|
|
|
+
|
|
|
+ WARN_ON_ONCE(event->ctx != ctx);
|
|
|
+ lockdep_assert_held(&ctx->lock);
|
|
|
+
|
|
|
/*
|
|
|
* We can have double detach due to exit/hot-unplug + close.
|
|
|
*/
|
|
@@ -1380,6 +1472,8 @@ static void perf_group_detach(struct perf_event *event)
|
|
|
|
|
|
/* Inherit group flags from the previous leader */
|
|
|
sibling->group_flags = event->group_flags;
|
|
|
+
|
|
|
+ WARN_ON_ONCE(sibling->ctx != event->ctx);
|
|
|
}
|
|
|
|
|
|
out:
|
|
@@ -1442,6 +1536,10 @@ event_sched_out(struct perf_event *event,
|
|
|
{
|
|
|
u64 tstamp = perf_event_time(event);
|
|
|
u64 delta;
|
|
|
+
|
|
|
+ WARN_ON_ONCE(event->ctx != ctx);
|
|
|
+ lockdep_assert_held(&ctx->lock);
|
|
|
+
|
|
|
/*
|
|
|
* An event which could not be activated because of
|
|
|
* filter mismatch still needs to have its timings
|
|
@@ -1471,7 +1569,8 @@ event_sched_out(struct perf_event *event,
|
|
|
|
|
|
if (!is_software_event(event))
|
|
|
cpuctx->active_oncpu--;
|
|
|
- ctx->nr_active--;
|
|
|
+ if (!--ctx->nr_active)
|
|
|
+ perf_event_ctx_deactivate(ctx);
|
|
|
if (event->attr.freq && event->attr.sample_freq)
|
|
|
ctx->nr_freq--;
|
|
|
if (event->attr.exclusive || !cpuctx->active_oncpu)
|
|
@@ -1654,7 +1753,7 @@ int __perf_event_disable(void *info)
|
|
|
* is the current context on this CPU and preemption is disabled,
|
|
|
* hence we can't get into perf_event_task_sched_out for this context.
|
|
|
*/
|
|
|
-void perf_event_disable(struct perf_event *event)
|
|
|
+static void _perf_event_disable(struct perf_event *event)
|
|
|
{
|
|
|
struct perf_event_context *ctx = event->ctx;
|
|
|
struct task_struct *task = ctx->task;
|
|
@@ -1695,6 +1794,19 @@ retry:
|
|
|
}
|
|
|
raw_spin_unlock_irq(&ctx->lock);
|
|
|
}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Strictly speaking kernel users cannot create groups and therefore this
|
|
|
+ * interface does not need the perf_event_ctx_lock() magic.
|
|
|
+ */
|
|
|
+void perf_event_disable(struct perf_event *event)
|
|
|
+{
|
|
|
+ struct perf_event_context *ctx;
|
|
|
+
|
|
|
+ ctx = perf_event_ctx_lock(event);
|
|
|
+ _perf_event_disable(event);
|
|
|
+ perf_event_ctx_unlock(event, ctx);
|
|
|
+}
|
|
|
EXPORT_SYMBOL_GPL(perf_event_disable);
|
|
|
|
|
|
static void perf_set_shadow_time(struct perf_event *event,
|
|
@@ -1782,7 +1894,8 @@ event_sched_in(struct perf_event *event,
|
|
|
|
|
|
if (!is_software_event(event))
|
|
|
cpuctx->active_oncpu++;
|
|
|
- ctx->nr_active++;
|
|
|
+ if (!ctx->nr_active++)
|
|
|
+ perf_event_ctx_activate(ctx);
|
|
|
if (event->attr.freq && event->attr.sample_freq)
|
|
|
ctx->nr_freq++;
|
|
|
|
|
@@ -2158,7 +2271,7 @@ unlock:
|
|
|
* perf_event_for_each_child or perf_event_for_each as described
|
|
|
* for perf_event_disable.
|
|
|
*/
|
|
|
-void perf_event_enable(struct perf_event *event)
|
|
|
+static void _perf_event_enable(struct perf_event *event)
|
|
|
{
|
|
|
struct perf_event_context *ctx = event->ctx;
|
|
|
struct task_struct *task = ctx->task;
|
|
@@ -2214,9 +2327,21 @@ retry:
|
|
|
out:
|
|
|
raw_spin_unlock_irq(&ctx->lock);
|
|
|
}
|
|
|
+
|
|
|
+/*
|
|
|
+ * See perf_event_disable();
|
|
|
+ */
|
|
|
+void perf_event_enable(struct perf_event *event)
|
|
|
+{
|
|
|
+ struct perf_event_context *ctx;
|
|
|
+
|
|
|
+ ctx = perf_event_ctx_lock(event);
|
|
|
+ _perf_event_enable(event);
|
|
|
+ perf_event_ctx_unlock(event, ctx);
|
|
|
+}
|
|
|
EXPORT_SYMBOL_GPL(perf_event_enable);
|
|
|
|
|
|
-int perf_event_refresh(struct perf_event *event, int refresh)
|
|
|
+static int _perf_event_refresh(struct perf_event *event, int refresh)
|
|
|
{
|
|
|
/*
|
|
|
* not supported on inherited events
|
|
@@ -2225,10 +2350,25 @@ int perf_event_refresh(struct perf_event *event, int refresh)
|
|
|
return -EINVAL;
|
|
|
|
|
|
atomic_add(refresh, &event->event_limit);
|
|
|
- perf_event_enable(event);
|
|
|
+ _perf_event_enable(event);
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
|
+
|
|
|
+/*
|
|
|
+ * See perf_event_disable()
|
|
|
+ */
|
|
|
+int perf_event_refresh(struct perf_event *event, int refresh)
|
|
|
+{
|
|
|
+ struct perf_event_context *ctx;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ ctx = perf_event_ctx_lock(event);
|
|
|
+ ret = _perf_event_refresh(event, refresh);
|
|
|
+ perf_event_ctx_unlock(event, ctx);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
EXPORT_SYMBOL_GPL(perf_event_refresh);
|
|
|
|
|
|
static void ctx_sched_out(struct perf_event_context *ctx,
|
|
@@ -2612,12 +2752,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
|
|
|
|
|
|
perf_pmu_enable(ctx->pmu);
|
|
|
perf_ctx_unlock(cpuctx, ctx);
|
|
|
-
|
|
|
- /*
|
|
|
- * Since these rotations are per-cpu, we need to ensure the
|
|
|
- * cpu-context we got scheduled on is actually rotating.
|
|
|
- */
|
|
|
- perf_pmu_rotate_start(ctx->pmu);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2905,25 +3039,18 @@ static void rotate_ctx(struct perf_event_context *ctx)
|
|
|
list_rotate_left(&ctx->flexible_groups);
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
|
|
|
- * because they're strictly cpu affine and rotate_start is called with IRQs
|
|
|
- * disabled, while rotate_context is called from IRQ context.
|
|
|
- */
|
|
|
static int perf_rotate_context(struct perf_cpu_context *cpuctx)
|
|
|
{
|
|
|
struct perf_event_context *ctx = NULL;
|
|
|
- int rotate = 0, remove = 1;
|
|
|
+ int rotate = 0;
|
|
|
|
|
|
if (cpuctx->ctx.nr_events) {
|
|
|
- remove = 0;
|
|
|
if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
|
|
|
rotate = 1;
|
|
|
}
|
|
|
|
|
|
ctx = cpuctx->task_ctx;
|
|
|
if (ctx && ctx->nr_events) {
|
|
|
- remove = 0;
|
|
|
if (ctx->nr_events != ctx->nr_active)
|
|
|
rotate = 1;
|
|
|
}
|
|
@@ -2947,8 +3074,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx)
|
|
|
perf_pmu_enable(cpuctx->ctx.pmu);
|
|
|
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
|
|
|
done:
|
|
|
- if (remove)
|
|
|
- list_del_init(&cpuctx->rotation_list);
|
|
|
|
|
|
return rotate;
|
|
|
}
|
|
@@ -2966,9 +3091,8 @@ bool perf_event_can_stop_tick(void)
|
|
|
|
|
|
void perf_event_task_tick(void)
|
|
|
{
|
|
|
- struct list_head *head = this_cpu_ptr(&rotation_list);
|
|
|
- struct perf_cpu_context *cpuctx, *tmp;
|
|
|
- struct perf_event_context *ctx;
|
|
|
+ struct list_head *head = this_cpu_ptr(&active_ctx_list);
|
|
|
+ struct perf_event_context *ctx, *tmp;
|
|
|
int throttled;
|
|
|
|
|
|
WARN_ON(!irqs_disabled());
|
|
@@ -2976,14 +3100,8 @@ void perf_event_task_tick(void)
|
|
|
__this_cpu_inc(perf_throttled_seq);
|
|
|
throttled = __this_cpu_xchg(perf_throttled_count, 0);
|
|
|
|
|
|
- list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
|
|
|
- ctx = &cpuctx->ctx;
|
|
|
+ list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
|
|
|
perf_adjust_freq_unthr_context(ctx, throttled);
|
|
|
-
|
|
|
- ctx = cpuctx->task_ctx;
|
|
|
- if (ctx)
|
|
|
- perf_adjust_freq_unthr_context(ctx, throttled);
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
static int event_enable_on_exec(struct perf_event *event,
|
|
@@ -3142,6 +3260,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
|
|
|
{
|
|
|
raw_spin_lock_init(&ctx->lock);
|
|
|
mutex_init(&ctx->mutex);
|
|
|
+ INIT_LIST_HEAD(&ctx->active_ctx_list);
|
|
|
INIT_LIST_HEAD(&ctx->pinned_groups);
|
|
|
INIT_LIST_HEAD(&ctx->flexible_groups);
|
|
|
INIT_LIST_HEAD(&ctx->event_list);
|
|
@@ -3421,7 +3540,16 @@ static void perf_remove_from_owner(struct perf_event *event)
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
if (owner) {
|
|
|
- mutex_lock(&owner->perf_event_mutex);
|
|
|
+ /*
|
|
|
+ * If we're here through perf_event_exit_task() we're already
|
|
|
+ * holding ctx->mutex which would be an inversion wrt. the
|
|
|
+ * normal lock order.
|
|
|
+ *
|
|
|
+ * However we can safely take this lock because its the child
|
|
|
+ * ctx->mutex.
|
|
|
+ */
|
|
|
+ mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING);
|
|
|
+
|
|
|
/*
|
|
|
* We have to re-check the event->owner field, if it is cleared
|
|
|
* we raced with perf_event_exit_task(), acquiring the mutex
|
|
@@ -3440,7 +3568,7 @@ static void perf_remove_from_owner(struct perf_event *event)
|
|
|
*/
|
|
|
static void put_event(struct perf_event *event)
|
|
|
{
|
|
|
- struct perf_event_context *ctx = event->ctx;
|
|
|
+ struct perf_event_context *ctx;
|
|
|
|
|
|
if (!atomic_long_dec_and_test(&event->refcount))
|
|
|
return;
|
|
@@ -3448,7 +3576,6 @@ static void put_event(struct perf_event *event)
|
|
|
if (!is_kernel_event(event))
|
|
|
perf_remove_from_owner(event);
|
|
|
|
|
|
- WARN_ON_ONCE(ctx->parent_ctx);
|
|
|
/*
|
|
|
* There are two ways this annotation is useful:
|
|
|
*
|
|
@@ -3461,7 +3588,8 @@ static void put_event(struct perf_event *event)
|
|
|
* the last filedesc died, so there is no possibility
|
|
|
* to trigger the AB-BA case.
|
|
|
*/
|
|
|
- mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
|
|
|
+ ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
|
|
|
+ WARN_ON_ONCE(ctx->parent_ctx);
|
|
|
perf_remove_from_context(event, true);
|
|
|
mutex_unlock(&ctx->mutex);
|
|
|
|
|
@@ -3547,12 +3675,13 @@ static int perf_event_read_group(struct perf_event *event,
|
|
|
u64 read_format, char __user *buf)
|
|
|
{
|
|
|
struct perf_event *leader = event->group_leader, *sub;
|
|
|
- int n = 0, size = 0, ret = -EFAULT;
|
|
|
struct perf_event_context *ctx = leader->ctx;
|
|
|
- u64 values[5];
|
|
|
+ int n = 0, size = 0, ret;
|
|
|
u64 count, enabled, running;
|
|
|
+ u64 values[5];
|
|
|
+
|
|
|
+ lockdep_assert_held(&ctx->mutex);
|
|
|
|
|
|
- mutex_lock(&ctx->mutex);
|
|
|
count = perf_event_read_value(leader, &enabled, &running);
|
|
|
|
|
|
values[n++] = 1 + leader->nr_siblings;
|
|
@@ -3567,7 +3696,7 @@ static int perf_event_read_group(struct perf_event *event,
|
|
|
size = n * sizeof(u64);
|
|
|
|
|
|
if (copy_to_user(buf, values, size))
|
|
|
- goto unlock;
|
|
|
+ return -EFAULT;
|
|
|
|
|
|
ret = size;
|
|
|
|
|
@@ -3581,14 +3710,11 @@ static int perf_event_read_group(struct perf_event *event,
|
|
|
size = n * sizeof(u64);
|
|
|
|
|
|
if (copy_to_user(buf + ret, values, size)) {
|
|
|
- ret = -EFAULT;
|
|
|
- goto unlock;
|
|
|
+ return -EFAULT;
|
|
|
}
|
|
|
|
|
|
ret += size;
|
|
|
}
|
|
|
-unlock:
|
|
|
- mutex_unlock(&ctx->mutex);
|
|
|
|
|
|
return ret;
|
|
|
}
|
|
@@ -3660,8 +3786,14 @@ static ssize_t
|
|
|
perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
|
|
{
|
|
|
struct perf_event *event = file->private_data;
|
|
|
+ struct perf_event_context *ctx;
|
|
|
+ int ret;
|
|
|
|
|
|
- return perf_read_hw(event, buf, count);
|
|
|
+ ctx = perf_event_ctx_lock(event);
|
|
|
+ ret = perf_read_hw(event, buf, count);
|
|
|
+ perf_event_ctx_unlock(event, ctx);
|
|
|
+
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
static unsigned int perf_poll(struct file *file, poll_table *wait)
|
|
@@ -3687,7 +3819,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
|
|
|
return events;
|
|
|
}
|
|
|
|
|
|
-static void perf_event_reset(struct perf_event *event)
|
|
|
+static void _perf_event_reset(struct perf_event *event)
|
|
|
{
|
|
|
(void)perf_event_read(event);
|
|
|
local64_set(&event->count, 0);
|
|
@@ -3706,6 +3838,7 @@ static void perf_event_for_each_child(struct perf_event *event,
|
|
|
struct perf_event *child;
|
|
|
|
|
|
WARN_ON_ONCE(event->ctx->parent_ctx);
|
|
|
+
|
|
|
mutex_lock(&event->child_mutex);
|
|
|
func(event);
|
|
|
list_for_each_entry(child, &event->child_list, child_list)
|
|
@@ -3719,14 +3852,13 @@ static void perf_event_for_each(struct perf_event *event,
|
|
|
struct perf_event_context *ctx = event->ctx;
|
|
|
struct perf_event *sibling;
|
|
|
|
|
|
- WARN_ON_ONCE(ctx->parent_ctx);
|
|
|
- mutex_lock(&ctx->mutex);
|
|
|
+ lockdep_assert_held(&ctx->mutex);
|
|
|
+
|
|
|
event = event->group_leader;
|
|
|
|
|
|
perf_event_for_each_child(event, func);
|
|
|
list_for_each_entry(sibling, &event->sibling_list, group_entry)
|
|
|
perf_event_for_each_child(sibling, func);
|
|
|
- mutex_unlock(&ctx->mutex);
|
|
|
}
|
|
|
|
|
|
static int perf_event_period(struct perf_event *event, u64 __user *arg)
|
|
@@ -3796,25 +3928,24 @@ static int perf_event_set_output(struct perf_event *event,
|
|
|
struct perf_event *output_event);
|
|
|
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
|
|
|
|
|
|
-static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
|
+static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
|
|
|
{
|
|
|
- struct perf_event *event = file->private_data;
|
|
|
void (*func)(struct perf_event *);
|
|
|
u32 flags = arg;
|
|
|
|
|
|
switch (cmd) {
|
|
|
case PERF_EVENT_IOC_ENABLE:
|
|
|
- func = perf_event_enable;
|
|
|
+ func = _perf_event_enable;
|
|
|
break;
|
|
|
case PERF_EVENT_IOC_DISABLE:
|
|
|
- func = perf_event_disable;
|
|
|
+ func = _perf_event_disable;
|
|
|
break;
|
|
|
case PERF_EVENT_IOC_RESET:
|
|
|
- func = perf_event_reset;
|
|
|
+ func = _perf_event_reset;
|
|
|
break;
|
|
|
|
|
|
case PERF_EVENT_IOC_REFRESH:
|
|
|
- return perf_event_refresh(event, arg);
|
|
|
+ return _perf_event_refresh(event, arg);
|
|
|
|
|
|
case PERF_EVENT_IOC_PERIOD:
|
|
|
return perf_event_period(event, (u64 __user *)arg);
|
|
@@ -3861,6 +3992,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
|
+{
|
|
|
+ struct perf_event *event = file->private_data;
|
|
|
+ struct perf_event_context *ctx;
|
|
|
+ long ret;
|
|
|
+
|
|
|
+ ctx = perf_event_ctx_lock(event);
|
|
|
+ ret = _perf_ioctl(event, cmd, arg);
|
|
|
+ perf_event_ctx_unlock(event, ctx);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
static long perf_compat_ioctl(struct file *file, unsigned int cmd,
|
|
|
unsigned long arg)
|
|
@@ -3883,11 +4027,15 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd,
|
|
|
|
|
|
int perf_event_task_enable(void)
|
|
|
{
|
|
|
+ struct perf_event_context *ctx;
|
|
|
struct perf_event *event;
|
|
|
|
|
|
mutex_lock(¤t->perf_event_mutex);
|
|
|
- list_for_each_entry(event, ¤t->perf_event_list, owner_entry)
|
|
|
- perf_event_for_each_child(event, perf_event_enable);
|
|
|
+ list_for_each_entry(event, ¤t->perf_event_list, owner_entry) {
|
|
|
+ ctx = perf_event_ctx_lock(event);
|
|
|
+ perf_event_for_each_child(event, _perf_event_enable);
|
|
|
+ perf_event_ctx_unlock(event, ctx);
|
|
|
+ }
|
|
|
mutex_unlock(¤t->perf_event_mutex);
|
|
|
|
|
|
return 0;
|
|
@@ -3895,11 +4043,15 @@ int perf_event_task_enable(void)
|
|
|
|
|
|
int perf_event_task_disable(void)
|
|
|
{
|
|
|
+ struct perf_event_context *ctx;
|
|
|
struct perf_event *event;
|
|
|
|
|
|
mutex_lock(¤t->perf_event_mutex);
|
|
|
- list_for_each_entry(event, ¤t->perf_event_list, owner_entry)
|
|
|
- perf_event_for_each_child(event, perf_event_disable);
|
|
|
+ list_for_each_entry(event, ¤t->perf_event_list, owner_entry) {
|
|
|
+ ctx = perf_event_ctx_lock(event);
|
|
|
+ perf_event_for_each_child(event, _perf_event_disable);
|
|
|
+ perf_event_ctx_unlock(event, ctx);
|
|
|
+ }
|
|
|
mutex_unlock(¤t->perf_event_mutex);
|
|
|
|
|
|
return 0;
|
|
@@ -5889,6 +6041,8 @@ end:
|
|
|
rcu_read_unlock();
|
|
|
}
|
|
|
|
|
|
+DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
|
|
|
+
|
|
|
int perf_swevent_get_recursion_context(void)
|
|
|
{
|
|
|
struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
|
|
@@ -5904,21 +6058,30 @@ inline void perf_swevent_put_recursion_context(int rctx)
|
|
|
put_recursion_context(swhash->recursion, rctx);
|
|
|
}
|
|
|
|
|
|
-void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
|
|
|
+void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
|
|
|
{
|
|
|
struct perf_sample_data data;
|
|
|
- int rctx;
|
|
|
|
|
|
- preempt_disable_notrace();
|
|
|
- rctx = perf_swevent_get_recursion_context();
|
|
|
- if (rctx < 0)
|
|
|
+ if (WARN_ON_ONCE(!regs))
|
|
|
return;
|
|
|
|
|
|
perf_sample_data_init(&data, addr, 0);
|
|
|
-
|
|
|
do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
|
|
|
+}
|
|
|
+
|
|
|
+void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
|
|
|
+{
|
|
|
+ int rctx;
|
|
|
+
|
|
|
+ preempt_disable_notrace();
|
|
|
+ rctx = perf_swevent_get_recursion_context();
|
|
|
+ if (unlikely(rctx < 0))
|
|
|
+ goto fail;
|
|
|
+
|
|
|
+ ___perf_sw_event(event_id, nr, regs, addr);
|
|
|
|
|
|
perf_swevent_put_recursion_context(rctx);
|
|
|
+fail:
|
|
|
preempt_enable_notrace();
|
|
|
}
|
|
|
|
|
@@ -6780,7 +6943,6 @@ skip_type:
|
|
|
|
|
|
__perf_cpu_hrtimer_init(cpuctx, cpu);
|
|
|
|
|
|
- INIT_LIST_HEAD(&cpuctx->rotation_list);
|
|
|
cpuctx->unique_pmu = pmu;
|
|
|
}
|
|
|
|
|
@@ -6853,6 +7015,20 @@ void perf_pmu_unregister(struct pmu *pmu)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
|
|
|
|
|
|
+static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
|
|
|
+{
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ if (!try_module_get(pmu->module))
|
|
|
+ return -ENODEV;
|
|
|
+ event->pmu = pmu;
|
|
|
+ ret = pmu->event_init(event);
|
|
|
+ if (ret)
|
|
|
+ module_put(pmu->module);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
struct pmu *perf_init_event(struct perf_event *event)
|
|
|
{
|
|
|
struct pmu *pmu = NULL;
|
|
@@ -6865,24 +7041,14 @@ struct pmu *perf_init_event(struct perf_event *event)
|
|
|
pmu = idr_find(&pmu_idr, event->attr.type);
|
|
|
rcu_read_unlock();
|
|
|
if (pmu) {
|
|
|
- if (!try_module_get(pmu->module)) {
|
|
|
- pmu = ERR_PTR(-ENODEV);
|
|
|
- goto unlock;
|
|
|
- }
|
|
|
- event->pmu = pmu;
|
|
|
- ret = pmu->event_init(event);
|
|
|
+ ret = perf_try_init_event(pmu, event);
|
|
|
if (ret)
|
|
|
pmu = ERR_PTR(ret);
|
|
|
goto unlock;
|
|
|
}
|
|
|
|
|
|
list_for_each_entry_rcu(pmu, &pmus, entry) {
|
|
|
- if (!try_module_get(pmu->module)) {
|
|
|
- pmu = ERR_PTR(-ENODEV);
|
|
|
- goto unlock;
|
|
|
- }
|
|
|
- event->pmu = pmu;
|
|
|
- ret = pmu->event_init(event);
|
|
|
+ ret = perf_try_init_event(pmu, event);
|
|
|
if (!ret)
|
|
|
goto unlock;
|
|
|
|
|
@@ -7246,6 +7412,15 @@ out:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+static void mutex_lock_double(struct mutex *a, struct mutex *b)
|
|
|
+{
|
|
|
+ if (b < a)
|
|
|
+ swap(a, b);
|
|
|
+
|
|
|
+ mutex_lock(a);
|
|
|
+ mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* sys_perf_event_open - open a performance event, associate it to a task/cpu
|
|
|
*
|
|
@@ -7261,7 +7436,7 @@ SYSCALL_DEFINE5(perf_event_open,
|
|
|
struct perf_event *group_leader = NULL, *output_event = NULL;
|
|
|
struct perf_event *event, *sibling;
|
|
|
struct perf_event_attr attr;
|
|
|
- struct perf_event_context *ctx;
|
|
|
+ struct perf_event_context *ctx, *uninitialized_var(gctx);
|
|
|
struct file *event_file = NULL;
|
|
|
struct fd group = {NULL, 0};
|
|
|
struct task_struct *task = NULL;
|
|
@@ -7459,43 +7634,68 @@ SYSCALL_DEFINE5(perf_event_open,
|
|
|
}
|
|
|
|
|
|
if (move_group) {
|
|
|
- struct perf_event_context *gctx = group_leader->ctx;
|
|
|
-
|
|
|
- mutex_lock(&gctx->mutex);
|
|
|
- perf_remove_from_context(group_leader, false);
|
|
|
+ gctx = group_leader->ctx;
|
|
|
|
|
|
/*
|
|
|
- * Removing from the context ends up with disabled
|
|
|
- * event. What we want here is event in the initial
|
|
|
- * startup state, ready to be add into new context.
|
|
|
+ * See perf_event_ctx_lock() for comments on the details
|
|
|
+ * of swizzling perf_event::ctx.
|
|
|
*/
|
|
|
- perf_event__state_init(group_leader);
|
|
|
+ mutex_lock_double(&gctx->mutex, &ctx->mutex);
|
|
|
+
|
|
|
+ perf_remove_from_context(group_leader, false);
|
|
|
+
|
|
|
list_for_each_entry(sibling, &group_leader->sibling_list,
|
|
|
group_entry) {
|
|
|
perf_remove_from_context(sibling, false);
|
|
|
- perf_event__state_init(sibling);
|
|
|
put_ctx(gctx);
|
|
|
}
|
|
|
- mutex_unlock(&gctx->mutex);
|
|
|
- put_ctx(gctx);
|
|
|
+ } else {
|
|
|
+ mutex_lock(&ctx->mutex);
|
|
|
}
|
|
|
|
|
|
WARN_ON_ONCE(ctx->parent_ctx);
|
|
|
- mutex_lock(&ctx->mutex);
|
|
|
|
|
|
if (move_group) {
|
|
|
+ /*
|
|
|
+ * Wait for everybody to stop referencing the events through
|
|
|
+ * the old lists, before installing it on new lists.
|
|
|
+ */
|
|
|
synchronize_rcu();
|
|
|
- perf_install_in_context(ctx, group_leader, group_leader->cpu);
|
|
|
- get_ctx(ctx);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Install the group siblings before the group leader.
|
|
|
+ *
|
|
|
+ * Because a group leader will try and install the entire group
|
|
|
+ * (through the sibling list, which is still in-tact), we can
|
|
|
+ * end up with siblings installed in the wrong context.
|
|
|
+ *
|
|
|
+ * By installing siblings first we NO-OP because they're not
|
|
|
+ * reachable through the group lists.
|
|
|
+ */
|
|
|
list_for_each_entry(sibling, &group_leader->sibling_list,
|
|
|
group_entry) {
|
|
|
+ perf_event__state_init(sibling);
|
|
|
perf_install_in_context(ctx, sibling, sibling->cpu);
|
|
|
get_ctx(ctx);
|
|
|
}
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Removing from the context ends up with disabled
|
|
|
+ * event. What we want here is event in the initial
|
|
|
+ * startup state, ready to be add into new context.
|
|
|
+ */
|
|
|
+ perf_event__state_init(group_leader);
|
|
|
+ perf_install_in_context(ctx, group_leader, group_leader->cpu);
|
|
|
+ get_ctx(ctx);
|
|
|
}
|
|
|
|
|
|
perf_install_in_context(ctx, event, event->cpu);
|
|
|
perf_unpin_context(ctx);
|
|
|
+
|
|
|
+ if (move_group) {
|
|
|
+ mutex_unlock(&gctx->mutex);
|
|
|
+ put_ctx(gctx);
|
|
|
+ }
|
|
|
mutex_unlock(&ctx->mutex);
|
|
|
|
|
|
put_online_cpus();
|
|
@@ -7603,7 +7803,11 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
|
|
|
src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
|
|
|
dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
|
|
|
|
|
|
- mutex_lock(&src_ctx->mutex);
|
|
|
+ /*
|
|
|
+ * See perf_event_ctx_lock() for comments on the details
|
|
|
+ * of swizzling perf_event::ctx.
|
|
|
+ */
|
|
|
+ mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
|
|
|
list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
|
|
|
event_entry) {
|
|
|
perf_remove_from_context(event, false);
|
|
@@ -7611,11 +7815,36 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
|
|
|
put_ctx(src_ctx);
|
|
|
list_add(&event->migrate_entry, &events);
|
|
|
}
|
|
|
- mutex_unlock(&src_ctx->mutex);
|
|
|
|
|
|
+ /*
|
|
|
+ * Wait for the events to quiesce before re-instating them.
|
|
|
+ */
|
|
|
synchronize_rcu();
|
|
|
|
|
|
- mutex_lock(&dst_ctx->mutex);
|
|
|
+ /*
|
|
|
+ * Re-instate events in 2 passes.
|
|
|
+ *
|
|
|
+ * Skip over group leaders and only install siblings on this first
|
|
|
+ * pass, siblings will not get enabled without a leader, however a
|
|
|
+ * leader will enable its siblings, even if those are still on the old
|
|
|
+ * context.
|
|
|
+ */
|
|
|
+ list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
|
|
|
+ if (event->group_leader == event)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ list_del(&event->migrate_entry);
|
|
|
+ if (event->state >= PERF_EVENT_STATE_OFF)
|
|
|
+ event->state = PERF_EVENT_STATE_INACTIVE;
|
|
|
+ account_event_cpu(event, dst_cpu);
|
|
|
+ perf_install_in_context(dst_ctx, event, dst_cpu);
|
|
|
+ get_ctx(dst_ctx);
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Once all the siblings are setup properly, install the group leaders
|
|
|
+ * to make it go.
|
|
|
+ */
|
|
|
list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
|
|
|
list_del(&event->migrate_entry);
|
|
|
if (event->state >= PERF_EVENT_STATE_OFF)
|
|
@@ -7625,6 +7854,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
|
|
|
get_ctx(dst_ctx);
|
|
|
}
|
|
|
mutex_unlock(&dst_ctx->mutex);
|
|
|
+ mutex_unlock(&src_ctx->mutex);
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
|
|
|
|
|
@@ -7811,14 +8041,19 @@ static void perf_free_event(struct perf_event *event,
|
|
|
|
|
|
put_event(parent);
|
|
|
|
|
|
+ raw_spin_lock_irq(&ctx->lock);
|
|
|
perf_group_detach(event);
|
|
|
list_del_event(event, ctx);
|
|
|
+ raw_spin_unlock_irq(&ctx->lock);
|
|
|
free_event(event);
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * free an unexposed, unused context as created by inheritance by
|
|
|
+ * Free an unexposed, unused context as created by inheritance by
|
|
|
* perf_event_init_task below, used by fork() in case of fail.
|
|
|
+ *
|
|
|
+ * Not all locks are strictly required, but take them anyway to be nice and
|
|
|
+ * help out with the lockdep assertions.
|
|
|
*/
|
|
|
void perf_event_free_task(struct task_struct *task)
|
|
|
{
|
|
@@ -8137,7 +8372,7 @@ static void __init perf_event_init_all_cpus(void)
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
swhash = &per_cpu(swevent_htable, cpu);
|
|
|
mutex_init(&swhash->hlist_mutex);
|
|
|
- INIT_LIST_HEAD(&per_cpu(rotation_list, cpu));
|
|
|
+ INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -8158,22 +8393,11 @@ static void perf_event_init_cpu(int cpu)
|
|
|
}
|
|
|
|
|
|
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
|
|
|
-static void perf_pmu_rotate_stop(struct pmu *pmu)
|
|
|
-{
|
|
|
- struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
|
|
|
-
|
|
|
- WARN_ON(!irqs_disabled());
|
|
|
-
|
|
|
- list_del_init(&cpuctx->rotation_list);
|
|
|
-}
|
|
|
-
|
|
|
static void __perf_event_exit_context(void *__info)
|
|
|
{
|
|
|
struct remove_event re = { .detach_group = true };
|
|
|
struct perf_event_context *ctx = __info;
|
|
|
|
|
|
- perf_pmu_rotate_stop(ctx->pmu);
|
|
|
-
|
|
|
rcu_read_lock();
|
|
|
list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
|
|
|
__perf_remove_from_context(&re);
|