пре 10 година · 95ff4ca26c
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1925,8 +1925,13 @@ event_sched_in(struct perf_event *event,
 
				 	if (event->state <= PERF_EVENT_STATE_OFF)
			
 
				 		return 0;
			
 
				 
			
 
				-	event->state = PERF_EVENT_STATE_ACTIVE;
			
 
				-	event->oncpu = smp_processor_id();
			
 
				+	WRITE_ONCE(event->oncpu, smp_processor_id());
			
 
				+	/*
			
 
				+	 * Order event::oncpu write to happen before the ACTIVE state
			
 
				+	 * is visible.
			
 
				+	 */
			
 
				+	smp_wmb();
			
 
				+	WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
			
 
				 
			
 
				 	/*
			
 
				 	 * Unthrottle events, since we scheduled we might have missed several
			
@@ -2358,6 +2363,29 @@ void perf_event_enable(struct perf_event *event)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(perf_event_enable);
			
 
				 
			
 
				+static int __perf_event_stop(void *info)
			
 
				+{
			
 
				+	struct perf_event *event = info;
			
 
				+
			
 
				+	/* for AUX events, our job is done if the event is already inactive */
			
 
				+	if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* matches smp_wmb() in event_sched_in() */
			
 
				+	smp_rmb();
			
 
				+
			
 
				+	/*
			
 
				+	 * There is a window with interrupts enabled before we get here,
			
 
				+	 * so we need to check again lest we try to stop another CPU's event.
			
 
				+	 */
			
 
				+	if (READ_ONCE(event->oncpu) != smp_processor_id())
			
 
				+		return -EAGAIN;
			
 
				+
			
 
				+	event->pmu->stop(event, PERF_EF_UPDATE);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int _perf_event_refresh(struct perf_event *event, int refresh)
			
 
				 {
			
 
				 	/*
			
@@ -4667,6 +4695,8 @@ static void perf_mmap_open(struct vm_area_struct *vma)
 
				 		event->pmu->event_mapped(event);
			
 
				 }
			
 
				 
			
 
				+static void perf_pmu_output_stop(struct perf_event *event);
			
 
				+
			
 
				 /*
			
 
				  * A buffer can be mmap()ed multiple times; either directly through the same
			
 
				  * event, or through other events by use of perf_event_set_output().
			
@@ -4694,10 +4724,22 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
				 	 */
			
 
				 	if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
			
 
				 	    atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
			
 
				+		/*
			
 
				+		 * Stop all AUX events that are writing to this buffer,
			
 
				+		 * so that we can free its AUX pages and corresponding PMU
			
 
				+		 * data. Note that after rb::aux_mmap_count dropped to zero,
			
 
				+		 * they won't start any more (see perf_aux_output_begin()).
			
 
				+		 */
			
 
				+		perf_pmu_output_stop(event);
			
 
				+
			
 
				+		/* now it's safe to free the pages */
			
 
				 		atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
			
 
				 		vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
			
 
				 
			
 
				+		/* this has to be the last one */
			
 
				 		rb_free_aux(rb);
			
 
				+		WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
			
 
				+
			
 
				 		mutex_unlock(&event->mmap_mutex);
			
 
				 	}
			
 
				 
			
@@ -5768,6 +5810,80 @@ next:
 
				 	rcu_read_unlock();
			
 
				 }
			
 
				 
			
 
				+struct remote_output {
			
 
				+	struct ring_buffer	*rb;
			
 
				+	int			err;
			
 
				+};
			
 
				+
			
 
				+static void __perf_event_output_stop(struct perf_event *event, void *data)
			
 
				+{
			
 
				+	struct perf_event *parent = event->parent;
			
 
				+	struct remote_output *ro = data;
			
 
				+	struct ring_buffer *rb = ro->rb;
			
 
				+
			
 
				+	if (!has_aux(event))
			
 
				+		return;
			
 
				+
			
 
				+	if (!parent)
			
 
				+		parent = event;
			
 
				+
			
 
				+	/*
			
 
				+	 * In case of inheritance, it will be the parent that links to the
			
 
				+	 * ring-buffer, but it will be the child that's actually using it:
			
 
				+	 */
			
 
				+	if (rcu_dereference(parent->rb) == rb)
			
 
				+		ro->err = __perf_event_stop(event);
			
 
				+}
			
 
				+
			
 
				+static int __perf_pmu_output_stop(void *info)
			
 
				+{
			
 
				+	struct perf_event *event = info;
			
 
				+	struct pmu *pmu = event->pmu;
			
 
				+	struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
			
 
				+	struct remote_output ro = {
			
 
				+		.rb	= event->rb,
			
 
				+	};
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro);
			
 
				+	if (cpuctx->task_ctx)
			
 
				+		perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
			
 
				+				   &ro);
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return ro.err;
			
 
				+}
			
 
				+
			
 
				+static void perf_pmu_output_stop(struct perf_event *event)
			
 
				+{
			
 
				+	struct perf_event *iter;
			
 
				+	int err, cpu;
			
 
				+
			
 
				+restart:
			
 
				+	rcu_read_lock();
			
 
				+	list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) {
			
 
				+		/*
			
 
				+		 * For per-CPU events, we need to make sure that neither they
			
 
				+		 * nor their children are running; for cpu==-1 events it's
			
 
				+		 * sufficient to stop the event itself if it's active, since
			
 
				+		 * it can't have children.
			
 
				+		 */
			
 
				+		cpu = iter->cpu;
			
 
				+		if (cpu == -1)
			
 
				+			cpu = READ_ONCE(iter->oncpu);
			
 
				+
			
 
				+		if (cpu == -1)
			
 
				+			continue;
			
 
				+
			
 
				+		err = cpu_function_call(cpu, __perf_pmu_output_stop, event);
			
 
				+		if (err == -EAGAIN) {
			
 
				+			rcu_read_unlock();
			
 
				+			goto restart;
			
 
				+		}
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * task tracking -- fork/exit
			
 
				  *
			
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -11,7 +11,6 @@
 
				 struct ring_buffer {
			
 
				 	atomic_t			refcount;
			
 
				 	struct rcu_head			rcu_head;
			
 
				-	struct irq_work			irq_work;
			
 
				 #ifdef CONFIG_PERF_USE_VMALLOC
			
 
				 	struct work_struct		work;
			
 
				 	int				page_order;	/* allocation order  */
			
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -221,8 +221,6 @@ void perf_output_end(struct perf_output_handle *handle)
 
				 	rcu_read_unlock();
			
 
				 }
			
 
				 
			
 
				-static void rb_irq_work(struct irq_work *work);
			
 
				-
			
 
				 static void
			
 
				 ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
			
 
				 {
			
@@ -243,16 +241,6 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 
				 
			
 
				 	INIT_LIST_HEAD(&rb->event_list);
			
 
				 	spin_lock_init(&rb->event_lock);
			
 
				-	init_irq_work(&rb->irq_work, rb_irq_work);
			
 
				-}
			
 
				-
			
 
				-static void ring_buffer_put_async(struct ring_buffer *rb)
			
 
				-{
			
 
				-	if (!atomic_dec_and_test(&rb->refcount))
			
 
				-		return;
			
 
				-
			
 
				-	rb->rcu_head.next = (void *)rb;
			
 
				-	irq_work_queue(&rb->irq_work);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -292,7 +280,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
 
				 	 * the aux buffer is in perf_mmap_close(), about to get freed.
			
 
				 	 */
			
 
				 	if (!atomic_read(&rb->aux_mmap_count))
			
 
				-		goto err;
			
 
				+		goto err_put;
			
 
				 
			
 
				 	/*
			
 
				 	 * Nesting is not supported for AUX area, make sure nested
			
@@ -338,7 +326,7 @@ err_put:
 
				 	rb_free_aux(rb);
			
 
				 
			
 
				 err:
			
 
				-	ring_buffer_put_async(rb);
			
 
				+	ring_buffer_put(rb);
			
 
				 	handle->event = NULL;
			
 
				 
			
 
				 	return NULL;
			
@@ -389,7 +377,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
 
				 
			
 
				 	local_set(&rb->aux_nest, 0);
			
 
				 	rb_free_aux(rb);
			
 
				-	ring_buffer_put_async(rb);
			
 
				+	ring_buffer_put(rb);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -470,6 +458,14 @@ static void __rb_free_aux(struct ring_buffer *rb)
 
				 {
			
 
				 	int pg;
			
 
				 
			
 
				+	/*
			
 
				+	 * Should never happen, the last reference should be dropped from
			
 
				+	 * perf_mmap_close() path, which first stops aux transactions (which
			
 
				+	 * in turn are the atomic holders of aux_refcount) and then does the
			
 
				+	 * last rb_free_aux().
			
 
				+	 */
			
 
				+	WARN_ON_ONCE(in_atomic());
			
 
				+
			
 
				 	if (rb->aux_priv) {
			
 
				 		rb->free_aux(rb->aux_priv);
			
 
				 		rb->free_aux = NULL;
			
@@ -581,18 +577,7 @@ out:
 
				 void rb_free_aux(struct ring_buffer *rb)
			
 
				 {
			
 
				 	if (atomic_dec_and_test(&rb->aux_refcount))
			
 
				-		irq_work_queue(&rb->irq_work);
			
 
				-}
			
 
				-
			
 
				-static void rb_irq_work(struct irq_work *work)
			
 
				-{
			
 
				-	struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
			
 
				-
			
 
				-	if (!atomic_read(&rb->aux_refcount))
			
 
				 		__rb_free_aux(rb);
			
 
				-
			
 
				-	if (rb->rcu_head.next == (void *)rb)
			
 
				-		call_rcu(&rb->rcu_head, rb_free_rcu);
			
 
				 }
			
 
				 
			
 
				 #ifndef CONFIG_PERF_USE_VMALLOC