浏览代码

sched/core: Fix trace_sched_switch()

__trace_sched_switch_state() is the last remaining PREEMPT_ACTIVE
user, move trace_sched_switch() from prepare_task_switch() to
__schedule() and propagate the @preempt argument.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Peter Zijlstra 10 年之前
父节点
当前提交
c73464b1c8
共有 5 个文件被更改,包括 14 次插入17 次删除
  1. 9 13
      include/trace/events/sched.h
  2. 1 1
      kernel/sched/core.c
  3. 1 1
      kernel/trace/ftrace.c
  4. 2 1
      kernel/trace/trace_sched_switch.c
  5. 1 1
      kernel/trace/trace_sched_wakeup.c

+ 9 - 13
include/trace/events/sched.h

@@ -104,22 +104,17 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
 	     TP_ARGS(p));
 	     TP_ARGS(p));
 
 
 #ifdef CREATE_TRACE_POINTS
 #ifdef CREATE_TRACE_POINTS
-static inline long __trace_sched_switch_state(struct task_struct *p)
+static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
 {
 {
-	long state = p->state;
-
-#ifdef CONFIG_PREEMPT
 #ifdef CONFIG_SCHED_DEBUG
 #ifdef CONFIG_SCHED_DEBUG
 	BUG_ON(p != current);
 	BUG_ON(p != current);
 #endif /* CONFIG_SCHED_DEBUG */
 #endif /* CONFIG_SCHED_DEBUG */
+
 	/*
 	/*
-	 * For all intents and purposes a preempted task is a running task.
+	 * Preemption ignores task state, therefore preempted tasks are always
+	 * RUNNING (we will not have dequeued if state != RUNNING).
 	 */
 	 */
-	if (preempt_count() & PREEMPT_ACTIVE)
-		state = TASK_RUNNING | TASK_STATE_MAX;
-#endif /* CONFIG_PREEMPT */
-
-	return state;
+	return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
 }
 }
 #endif /* CREATE_TRACE_POINTS */
 #endif /* CREATE_TRACE_POINTS */
 
 
@@ -128,10 +123,11 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
  */
  */
 TRACE_EVENT(sched_switch,
 TRACE_EVENT(sched_switch,
 
 
-	TP_PROTO(struct task_struct *prev,
+	TP_PROTO(bool preempt,
+		 struct task_struct *prev,
 		 struct task_struct *next),
 		 struct task_struct *next),
 
 
-	TP_ARGS(prev, next),
+	TP_ARGS(preempt, prev, next),
 
 
 	TP_STRUCT__entry(
 	TP_STRUCT__entry(
 		__array(	char,	prev_comm,	TASK_COMM_LEN	)
 		__array(	char,	prev_comm,	TASK_COMM_LEN	)
@@ -147,7 +143,7 @@ TRACE_EVENT(sched_switch,
 		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
 		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
 		__entry->prev_pid	= prev->pid;
 		__entry->prev_pid	= prev->pid;
 		__entry->prev_prio	= prev->prio;
 		__entry->prev_prio	= prev->prio;
-		__entry->prev_state	= __trace_sched_switch_state(prev);
+		__entry->prev_state	= __trace_sched_switch_state(preempt, prev);
 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
 		__entry->next_pid	= next->pid;
 		__entry->next_pid	= next->pid;
 		__entry->next_prio	= next->prio;
 		__entry->next_prio	= next->prio;

+ 1 - 1
kernel/sched/core.c

@@ -2470,7 +2470,6 @@ static inline void
 prepare_task_switch(struct rq *rq, struct task_struct *prev,
 prepare_task_switch(struct rq *rq, struct task_struct *prev,
 		    struct task_struct *next)
 		    struct task_struct *next)
 {
 {
-	trace_sched_switch(prev, next);
 	sched_info_switch(rq, prev, next);
 	sched_info_switch(rq, prev, next);
 	perf_event_task_sched_out(prev, next);
 	perf_event_task_sched_out(prev, next);
 	fire_sched_out_preempt_notifiers(prev, next);
 	fire_sched_out_preempt_notifiers(prev, next);
@@ -3132,6 +3131,7 @@ static void __sched __schedule(bool preempt)
 		rq->curr = next;
 		rq->curr = next;
 		++*switch_count;
 		++*switch_count;
 
 
+		trace_sched_switch(preempt, prev, next);
 		rq = context_switch(rq, prev, next); /* unlocks the rq */
 		rq = context_switch(rq, prev, next); /* unlocks the rq */
 		cpu = cpu_of(rq);
 		cpu = cpu_of(rq);
 	} else {
 	} else {

+ 1 - 1
kernel/trace/ftrace.c

@@ -5697,7 +5697,7 @@ free:
 }
 }
 
 
 static void
 static void
-ftrace_graph_probe_sched_switch(void *ignore,
+ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
 			struct task_struct *prev, struct task_struct *next)
 			struct task_struct *prev, struct task_struct *next)
 {
 {
 	unsigned long long timestamp;
 	unsigned long long timestamp;

+ 2 - 1
kernel/trace/trace_sched_switch.c

@@ -16,7 +16,8 @@ static int			sched_ref;
 static DEFINE_MUTEX(sched_register_mutex);
 static DEFINE_MUTEX(sched_register_mutex);
 
 
 static void
 static void
-probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next)
+probe_sched_switch(void *ignore, bool preempt,
+		   struct task_struct *prev, struct task_struct *next)
 {
 {
 	if (unlikely(!sched_ref))
 	if (unlikely(!sched_ref))
 		return;
 		return;

+ 1 - 1
kernel/trace/trace_sched_wakeup.c

@@ -420,7 +420,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 }
 }
 
 
 static void notrace
 static void notrace
-probe_wakeup_sched_switch(void *ignore,
+probe_wakeup_sched_switch(void *ignore, bool preempt,
 			  struct task_struct *prev, struct task_struct *next)
 			  struct task_struct *prev, struct task_struct *next)
 {
 {
 	struct trace_array_cpu *data;
 	struct trace_array_cpu *data;