10 years ago · 98ec21a018
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -255,6 +255,7 @@ struct held_lock {
 
				 	unsigned int check:1;       /* see lock_acquire() comment */
			
 
				 	unsigned int hardirqs_off:1;
			
 
				 	unsigned int references:12;					/* 32 bits */
			
 
				+	unsigned int pin_count;
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -354,6 +355,9 @@ extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
 
				 extern void lockdep_clear_current_reclaim_state(void);
			
 
				 extern void lockdep_trace_alloc(gfp_t mask);
			
 
				 
			
 
				+extern void lock_pin_lock(struct lockdep_map *lock);
			
 
				+extern void lock_unpin_lock(struct lockdep_map *lock);
			
 
				+
			
 
				 # define INIT_LOCKDEP				.lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
			
 
				 
			
 
				 #define lockdep_depth(tsk)	(debug_locks ? (tsk)->lockdep_depth : 0)
			
@@ -368,6 +372,9 @@ extern void lockdep_trace_alloc(gfp_t mask);
 
				 
			
 
				 #define lockdep_recursing(tsk)	((tsk)->lockdep_recursion)
			
 
				 
			
 
				+#define lockdep_pin_lock(l)		lock_pin_lock(&(l)->dep_map)
			
 
				+#define lockdep_unpin_lock(l)	lock_unpin_lock(&(l)->dep_map)
			
 
				+
			
 
				 #else /* !CONFIG_LOCKDEP */
			
 
				 
			
 
				 static inline void lockdep_off(void)
			
@@ -420,6 +427,9 @@ struct lock_class_key { };
 
				 
			
 
				 #define lockdep_recursing(tsk)			(0)
			
 
				 
			
 
				+#define lockdep_pin_lock(l)				do { (void)(l); } while (0)
			
 
				+#define lockdep_unpin_lock(l)			do { (void)(l); } while (0)
			
 
				+
			
 
				 #endif /* !LOCKDEP */
			
 
				 
			
 
				 #ifdef CONFIG_LOCK_STAT
			
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3157,6 +3157,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 
				 	hlock->waittime_stamp = 0;
			
 
				 	hlock->holdtime_stamp = lockstat_clock();
			
 
				 #endif
			
 
				+	hlock->pin_count = 0;
			
 
				 
			
 
				 	if (check && !mark_irqflags(curr, hlock))
			
 
				 		return 0;
			
@@ -3260,26 +3261,6 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Common debugging checks for both nested and non-nested unlock:
			
 
				- */
			
 
				-static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
			
 
				-			unsigned long ip)
			
 
				-{
			
 
				-	if (unlikely(!debug_locks))
			
 
				-		return 0;
			
 
				-	/*
			
 
				-	 * Lockdep should run with IRQs disabled, recursion, head-ache, etc..
			
 
				-	 */
			
 
				-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
			
 
				-		return 0;
			
 
				-
			
 
				-	if (curr->lockdep_depth <= 0)
			
 
				-		return print_unlock_imbalance_bug(curr, lock, ip);
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				 static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
			
 
				 {
			
 
				 	if (hlock->instance == lock)
			
@@ -3376,31 +3357,35 @@ found_it:
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Remove the lock to the list of currently held locks in a
			
 
				- * potentially non-nested (out of order) manner. This is a
			
 
				- * relatively rare operation, as all the unlock APIs default
			
 
				- * to nested mode (which uses lock_release()):
			
 
				+ * Remove the lock to the list of currently held locks - this gets
			
 
				+ * called on mutex_unlock()/spin_unlock*() (or on a failed
			
 
				+ * mutex_lock_interruptible()).
			
 
				+ *
			
 
				+ * @nested is an hysterical artifact, needs a tree wide cleanup.
			
 
				  */
			
 
				 static int
			
 
				-lock_release_non_nested(struct task_struct *curr,
			
 
				-			struct lockdep_map *lock, unsigned long ip)
			
 
				+__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
			
 
				 {
			
 
				+	struct task_struct *curr = current;
			
 
				 	struct held_lock *hlock, *prev_hlock;
			
 
				 	unsigned int depth;
			
 
				 	int i;
			
 
				 
			
 
				-	/*
			
 
				-	 * Check whether the lock exists in the current stack
			
 
				-	 * of held locks:
			
 
				-	 */
			
 
				+	if (unlikely(!debug_locks))
			
 
				+		return 0;
			
 
				+
			
 
				 	depth = curr->lockdep_depth;
			
 
				 	/*
			
 
				 	 * So we're all set to release this lock.. wait what lock? We don't
			
 
				 	 * own any locks, you've been drinking again?
			
 
				 	 */
			
 
				-	if (DEBUG_LOCKS_WARN_ON(!depth))
			
 
				-		return 0;
			
 
				+	if (DEBUG_LOCKS_WARN_ON(depth <= 0))
			
 
				+		 return print_unlock_imbalance_bug(curr, lock, ip);
			
 
				 
			
 
				+	/*
			
 
				+	 * Check whether the lock exists in the current stack
			
 
				+	 * of held locks:
			
 
				+	 */
			
 
				 	prev_hlock = NULL;
			
 
				 	for (i = depth-1; i >= 0; i--) {
			
 
				 		hlock = curr->held_locks + i;
			
@@ -3419,6 +3404,8 @@ found_it:
 
				 	if (hlock->instance == lock)
			
 
				 		lock_release_holdtime(hlock);
			
 
				 
			
 
				+	WARN(hlock->pin_count, "releasing a pinned lock\n");
			
 
				+
			
 
				 	if (hlock->references) {
			
 
				 		hlock->references--;
			
 
				 		if (hlock->references) {
			
@@ -3456,91 +3443,66 @@ found_it:
 
				 	 */
			
 
				 	if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
			
 
				 		return 0;
			
 
				+
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Remove the lock to the list of currently held locks - this gets
			
 
				- * called on mutex_unlock()/spin_unlock*() (or on a failed
			
 
				- * mutex_lock_interruptible()). This is done for unlocks that nest
			
 
				- * perfectly. (i.e. the current top of the lock-stack is unlocked)
			
 
				- */
			
 
				-static int lock_release_nested(struct task_struct *curr,
			
 
				-			       struct lockdep_map *lock, unsigned long ip)
			
 
				+static int __lock_is_held(struct lockdep_map *lock)
			
 
				 {
			
 
				-	struct held_lock *hlock;
			
 
				-	unsigned int depth;
			
 
				-
			
 
				-	/*
			
 
				-	 * Pop off the top of the lock stack:
			
 
				-	 */
			
 
				-	depth = curr->lockdep_depth - 1;
			
 
				-	hlock = curr->held_locks + depth;
			
 
				-
			
 
				-	/*
			
 
				-	 * Is the unlock non-nested:
			
 
				-	 */
			
 
				-	if (hlock->instance != lock || hlock->references)
			
 
				-		return lock_release_non_nested(curr, lock, ip);
			
 
				-	curr->lockdep_depth--;
			
 
				-
			
 
				-	/*
			
 
				-	 * No more locks, but somehow we've got hash left over, who left it?
			
 
				-	 */
			
 
				-	if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
			
 
				-		return 0;
			
 
				+	struct task_struct *curr = current;
			
 
				+	int i;
			
 
				 
			
 
				-	curr->curr_chain_key = hlock->prev_chain_key;
			
 
				+	for (i = 0; i < curr->lockdep_depth; i++) {
			
 
				+		struct held_lock *hlock = curr->held_locks + i;
			
 
				 
			
 
				-	lock_release_holdtime(hlock);
			
 
				+		if (match_held_lock(hlock, lock))
			
 
				+			return 1;
			
 
				+	}
			
 
				 
			
 
				-#ifdef CONFIG_DEBUG_LOCKDEP
			
 
				-	hlock->prev_chain_key = 0;
			
 
				-	hlock->class_idx = 0;
			
 
				-	hlock->acquire_ip = 0;
			
 
				-	hlock->irq_context = 0;
			
 
				-#endif
			
 
				-	return 1;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Remove the lock to the list of currently held locks - this gets
			
 
				- * called on mutex_unlock()/spin_unlock*() (or on a failed
			
 
				- * mutex_lock_interruptible()). This is done for unlocks that nest
			
 
				- * perfectly. (i.e. the current top of the lock-stack is unlocked)
			
 
				- */
			
 
				-static void
			
 
				-__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
			
 
				+static void __lock_pin_lock(struct lockdep_map *lock)
			
 
				 {
			
 
				 	struct task_struct *curr = current;
			
 
				+	int i;
			
 
				 
			
 
				-	if (!check_unlock(curr, lock, ip))
			
 
				+	if (unlikely(!debug_locks))
			
 
				 		return;
			
 
				 
			
 
				-	if (nested) {
			
 
				-		if (!lock_release_nested(curr, lock, ip))
			
 
				-			return;
			
 
				-	} else {
			
 
				-		if (!lock_release_non_nested(curr, lock, ip))
			
 
				+	for (i = 0; i < curr->lockdep_depth; i++) {
			
 
				+		struct held_lock *hlock = curr->held_locks + i;
			
 
				+
			
 
				+		if (match_held_lock(hlock, lock)) {
			
 
				+			hlock->pin_count++;
			
 
				 			return;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				-	check_chain_key(curr);
			
 
				+	WARN(1, "pinning an unheld lock\n");
			
 
				 }
			
 
				 
			
 
				-static int __lock_is_held(struct lockdep_map *lock)
			
 
				+static void __lock_unpin_lock(struct lockdep_map *lock)
			
 
				 {
			
 
				 	struct task_struct *curr = current;
			
 
				 	int i;
			
 
				 
			
 
				+	if (unlikely(!debug_locks))
			
 
				+		return;
			
 
				+
			
 
				 	for (i = 0; i < curr->lockdep_depth; i++) {
			
 
				 		struct held_lock *hlock = curr->held_locks + i;
			
 
				 
			
 
				-		if (match_held_lock(hlock, lock))
			
 
				-			return 1;
			
 
				+		if (match_held_lock(hlock, lock)) {
			
 
				+			if (WARN(!hlock->pin_count, "unpinning an unpinned lock\n"))
			
 
				+				return;
			
 
				+
			
 
				+			hlock->pin_count--;
			
 
				+			return;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				-	return 0;
			
 
				+	WARN(1, "unpinning an unheld lock\n");
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3639,7 +3601,8 @@ void lock_release(struct lockdep_map *lock, int nested,
 
				 	check_flags(flags);
			
 
				 	current->lockdep_recursion = 1;
			
 
				 	trace_lock_release(lock, ip);
			
 
				-	__lock_release(lock, nested, ip);
			
 
				+	if (__lock_release(lock, nested, ip))
			
 
				+		check_chain_key(current);
			
 
				 	current->lockdep_recursion = 0;
			
 
				 	raw_local_irq_restore(flags);
			
 
				 }
			
@@ -3665,6 +3628,40 @@ int lock_is_held(struct lockdep_map *lock)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(lock_is_held);
			
 
				 
			
 
				+void lock_pin_lock(struct lockdep_map *lock)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	if (unlikely(current->lockdep_recursion))
			
 
				+		return;
			
 
				+
			
 
				+	raw_local_irq_save(flags);
			
 
				+	check_flags(flags);
			
 
				+
			
 
				+	current->lockdep_recursion = 1;
			
 
				+	__lock_pin_lock(lock);
			
 
				+	current->lockdep_recursion = 0;
			
 
				+	raw_local_irq_restore(flags);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(lock_pin_lock);
			
 
				+
			
 
				+void lock_unpin_lock(struct lockdep_map *lock)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	if (unlikely(current->lockdep_recursion))
			
 
				+		return;
			
 
				+
			
 
				+	raw_local_irq_save(flags);
			
 
				+	check_flags(flags);
			
 
				+
			
 
				+	current->lockdep_recursion = 1;
			
 
				+	__lock_unpin_lock(lock);
			
 
				+	current->lockdep_recursion = 0;
			
 
				+	raw_local_irq_restore(flags);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(lock_unpin_lock);
			
 
				+
			
 
				 void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
			
 
				 {
			
 
				 	current->lockdep_reclaim_gfp = gfp_mask;
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1000,7 +1000,11 @@ inline int task_curr(const struct task_struct *p)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Can drop rq->lock because from sched_class::switched_from() methods drop it.
			
 
				+ * switched_from, switched_to and prio_changed must _NOT_ drop rq->lock,
			
 
				+ * use the balance_callback list if you want balancing.
			
 
				+ *
			
 
				+ * this means any call to check_class_changed() must be followed by a call to
			
 
				+ * balance_callback().
			
 
				  */
			
 
				 static inline void check_class_changed(struct rq *rq, struct task_struct *p,
			
 
				 				       const struct sched_class *prev_class,
			
@@ -1009,7 +1013,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
 
				 	if (prev_class != p->sched_class) {
			
 
				 		if (prev_class->switched_from)
			
 
				 			prev_class->switched_from(rq, p);
			
 
				-		/* Possble rq->lock 'hole'.  */
			
 
				+
			
 
				 		p->sched_class->switched_to(rq, p);
			
 
				 	} else if (oldprio != p->prio || dl_task(p))
			
 
				 		p->sched_class->prio_changed(rq, p, oldprio);
			
@@ -1041,6 +1045,177 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				+/*
			
 
				+ * This is how migration works:
			
 
				+ *
			
 
				+ * 1) we invoke migration_cpu_stop() on the target CPU using
			
 
				+ *    stop_one_cpu().
			
 
				+ * 2) stopper starts to run (implicitly forcing the migrated thread
			
 
				+ *    off the CPU)
			
 
				+ * 3) it checks whether the migrated task is still in the wrong runqueue.
			
 
				+ * 4) if it's in the wrong runqueue then the migration thread removes
			
 
				+ *    it and puts it into the right queue.
			
 
				+ * 5) stopper completes and stop_one_cpu() returns and the migration
			
 
				+ *    is done.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * move_queued_task - move a queued task to new rq.
			
 
				+ *
			
 
				+ * Returns (locked) new rq. Old rq's lock is released.
			
 
				+ */
			
 
				+static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new_cpu)
			
 
				+{
			
 
				+	lockdep_assert_held(&rq->lock);
			
 
				+
			
 
				+	dequeue_task(rq, p, 0);
			
 
				+	p->on_rq = TASK_ON_RQ_MIGRATING;
			
 
				+	set_task_cpu(p, new_cpu);
			
 
				+	raw_spin_unlock(&rq->lock);
			
 
				+
			
 
				+	rq = cpu_rq(new_cpu);
			
 
				+
			
 
				+	raw_spin_lock(&rq->lock);
			
 
				+	BUG_ON(task_cpu(p) != new_cpu);
			
 
				+	p->on_rq = TASK_ON_RQ_QUEUED;
			
 
				+	enqueue_task(rq, p, 0);
			
 
				+	check_preempt_curr(rq, p, 0);
			
 
				+
			
 
				+	return rq;
			
 
				+}
			
 
				+
			
 
				+struct migration_arg {
			
 
				+	struct task_struct *task;
			
 
				+	int dest_cpu;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Move (not current) task off this cpu, onto dest cpu. We're doing
			
 
				+ * this because either it can't run here any more (set_cpus_allowed()
			
 
				+ * away from this CPU, or CPU going down), or because we're
			
 
				+ * attempting to rebalance this task on exec (sched_exec).
			
 
				+ *
			
 
				+ * So we race with normal scheduler movements, but that's OK, as long
			
 
				+ * as the task is no longer on this CPU.
			
 
				+ */
			
 
				+static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_cpu)
			
 
				+{
			
 
				+	if (unlikely(!cpu_active(dest_cpu)))
			
 
				+		return rq;
			
 
				+
			
 
				+	/* Affinity changed (again). */
			
 
				+	if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
			
 
				+		return rq;
			
 
				+
			
 
				+	rq = move_queued_task(rq, p, dest_cpu);
			
 
				+
			
 
				+	return rq;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * migration_cpu_stop - this will be executed by a highprio stopper thread
			
 
				+ * and performs thread migration by bumping thread off CPU then
			
 
				+ * 'pushing' onto another runqueue.
			
 
				+ */
			
 
				+static int migration_cpu_stop(void *data)
			
 
				+{
			
 
				+	struct migration_arg *arg = data;
			
 
				+	struct task_struct *p = arg->task;
			
 
				+	struct rq *rq = this_rq();
			
 
				+
			
 
				+	/*
			
 
				+	 * The original target cpu might have gone down and we might
			
 
				+	 * be on another cpu but it doesn't matter.
			
 
				+	 */
			
 
				+	local_irq_disable();
			
 
				+	/*
			
 
				+	 * We need to explicitly wake pending tasks before running
			
 
				+	 * __migrate_task() such that we will not miss enforcing cpus_allowed
			
 
				+	 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
			
 
				+	 */
			
 
				+	sched_ttwu_pending();
			
 
				+
			
 
				+	raw_spin_lock(&p->pi_lock);
			
 
				+	raw_spin_lock(&rq->lock);
			
 
				+	/*
			
 
				+	 * If task_rq(p) != rq, it cannot be migrated here, because we're
			
 
				+	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
			
 
				+	 * we're holding p->pi_lock.
			
 
				+	 */
			
 
				+	if (task_rq(p) == rq && task_on_rq_queued(p))
			
 
				+		rq = __migrate_task(rq, p, arg->dest_cpu);
			
 
				+	raw_spin_unlock(&rq->lock);
			
 
				+	raw_spin_unlock(&p->pi_lock);
			
 
				+
			
 
				+	local_irq_enable();
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
			
 
				+{
			
 
				+	if (p->sched_class->set_cpus_allowed)
			
 
				+		p->sched_class->set_cpus_allowed(p, new_mask);
			
 
				+
			
 
				+	cpumask_copy(&p->cpus_allowed, new_mask);
			
 
				+	p->nr_cpus_allowed = cpumask_weight(new_mask);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Change a given task's CPU affinity. Migrate the thread to a
			
 
				+ * proper CPU and schedule it away if the CPU it's executing on
			
 
				+ * is removed from the allowed bitmask.
			
 
				+ *
			
 
				+ * NOTE: the caller must have a valid reference to the task, the
			
 
				+ * task must not exit() & deallocate itself prematurely. The
			
 
				+ * call is not atomic; no spinlocks may be held.
			
 
				+ */
			
 
				+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+	struct rq *rq;
			
 
				+	unsigned int dest_cpu;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	rq = task_rq_lock(p, &flags);
			
 
				+
			
 
				+	if (cpumask_equal(&p->cpus_allowed, new_mask))
			
 
				+		goto out;
			
 
				+
			
 
				+	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
			
 
				+		ret = -EINVAL;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	do_set_cpus_allowed(p, new_mask);
			
 
				+
			
 
				+	/* Can the task run on the task's current CPU? If so, we're done */
			
 
				+	if (cpumask_test_cpu(task_cpu(p), new_mask))
			
 
				+		goto out;
			
 
				+
			
 
				+	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
			
 
				+	if (task_running(rq, p) || p->state == TASK_WAKING) {
			
 
				+		struct migration_arg arg = { p, dest_cpu };
			
 
				+		/* Need help from migration thread: drop lock and wait. */
			
 
				+		task_rq_unlock(rq, p, &flags);
			
 
				+		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
			
 
				+		tlb_migrate_finish(p->mm);
			
 
				+		return 0;
			
 
				+	} else if (task_on_rq_queued(p)) {
			
 
				+		/*
			
 
				+		 * OK, since we're going to drop the lock immediately
			
 
				+		 * afterwards anyway.
			
 
				+		 */
			
 
				+		lockdep_unpin_lock(&rq->lock);
			
 
				+		rq = move_queued_task(rq, p, dest_cpu);
			
 
				+		lockdep_pin_lock(&rq->lock);
			
 
				+	}
			
 
				+out:
			
 
				+	task_rq_unlock(rq, p, &flags);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
			
 
				+
			
 
				 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
			
 
				 {
			
 
				 #ifdef CONFIG_SCHED_DEBUG
			
@@ -1181,13 +1356,6 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-struct migration_arg {
			
 
				-	struct task_struct *task;
			
 
				-	int dest_cpu;
			
 
				-};
			
 
				-
			
 
				-static int migration_cpu_stop(void *data);
			
 
				-
			
 
				 /*
			
 
				  * wait_task_inactive - wait for a thread to unschedule.
			
 
				  *
			
@@ -1320,9 +1488,7 @@ void kick_process(struct task_struct *p)
 
				 	preempt_enable();
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kick_process);
			
 
				-#endif /* CONFIG_SMP */
			
 
				 
			
 
				-#ifdef CONFIG_SMP
			
 
				 /*
			
 
				  * ->cpus_allowed is protected by both rq->lock and p->pi_lock
			
 
				  */
			
@@ -1402,6 +1568,8 @@ out:
 
				 static inline
			
 
				 int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
			
 
				 {
			
 
				+	lockdep_assert_held(&p->pi_lock);
			
 
				+
			
 
				 	if (p->nr_cpus_allowed > 1)
			
 
				 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
			
 
				 
			
@@ -1427,7 +1595,7 @@ static void update_avg(u64 *avg, u64 sample)
 
				 	s64 diff = sample - *avg;
			
 
				 	*avg += diff >> 3;
			
 
				 }
			
 
				-#endif
			
 
				+#endif /* CONFIG_SMP */
			
 
				 
			
 
				 static void
			
 
				 ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
			
@@ -1490,8 +1658,15 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
 
				 
			
 
				 	p->state = TASK_RUNNING;
			
 
				 #ifdef CONFIG_SMP
			
 
				-	if (p->sched_class->task_woken)
			
 
				+	if (p->sched_class->task_woken) {
			
 
				+		/*
			
 
				+		 * Our task @p is fully woken up and running; so its safe to
			
 
				+		 * drop the rq->lock, hereafter rq is only used for statistics.
			
 
				+		 */
			
 
				+		lockdep_unpin_lock(&rq->lock);
			
 
				 		p->sched_class->task_woken(rq, p);
			
 
				+		lockdep_pin_lock(&rq->lock);
			
 
				+	}
			
 
				 
			
 
				 	if (rq->idle_stamp) {
			
 
				 		u64 delta = rq_clock(rq) - rq->idle_stamp;
			
@@ -1510,6 +1685,8 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
 
				 static void
			
 
				 ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
			
 
				 {
			
 
				+	lockdep_assert_held(&rq->lock);
			
 
				+
			
 
				 #ifdef CONFIG_SMP
			
 
				 	if (p->sched_contributes_to_load)
			
 
				 		rq->nr_uninterruptible--;
			
@@ -1554,6 +1731,7 @@ void sched_ttwu_pending(void)
 
				 		return;
			
 
				 
			
 
				 	raw_spin_lock_irqsave(&rq->lock, flags);
			
 
				+	lockdep_pin_lock(&rq->lock);
			
 
				 
			
 
				 	while (llist) {
			
 
				 		p = llist_entry(llist, struct task_struct, wake_entry);
			
@@ -1561,6 +1739,7 @@ void sched_ttwu_pending(void)
 
				 		ttwu_do_activate(rq, p, 0);
			
 
				 	}
			
 
				 
			
 
				+	lockdep_unpin_lock(&rq->lock);
			
 
				 	raw_spin_unlock_irqrestore(&rq->lock, flags);
			
 
				 }
			
 
				 
			
@@ -1657,7 +1836,9 @@ static void ttwu_queue(struct task_struct *p, int cpu)
 
				 #endif
			
 
				 
			
 
				 	raw_spin_lock(&rq->lock);
			
 
				+	lockdep_pin_lock(&rq->lock);
			
 
				 	ttwu_do_activate(rq, p, 0);
			
 
				+	lockdep_unpin_lock(&rq->lock);
			
 
				 	raw_spin_unlock(&rq->lock);
			
 
				 }
			
 
				 
			
@@ -1752,9 +1933,17 @@ static void try_to_wake_up_local(struct task_struct *p)
 
				 	lockdep_assert_held(&rq->lock);
			
 
				 
			
 
				 	if (!raw_spin_trylock(&p->pi_lock)) {
			
 
				+		/*
			
 
				+		 * This is OK, because current is on_cpu, which avoids it being
			
 
				+		 * picked for load-balance and preemption/IRQs are still
			
 
				+		 * disabled avoiding further scheduler activity on it and we've
			
 
				+		 * not yet picked a replacement task.
			
 
				+		 */
			
 
				+		lockdep_unpin_lock(&rq->lock);
			
 
				 		raw_spin_unlock(&rq->lock);
			
 
				 		raw_spin_lock(&p->pi_lock);
			
 
				 		raw_spin_lock(&rq->lock);
			
 
				+		lockdep_pin_lock(&rq->lock);
			
 
				 	}
			
 
				 
			
 
				 	if (!(p->state & TASK_NORMAL))
			
@@ -2294,23 +2483,35 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 
				 #ifdef CONFIG_SMP
			
 
				 
			
 
				 /* rq->lock is NOT held, but preemption is disabled */
			
 
				-static inline void post_schedule(struct rq *rq)
			
 
				+static void __balance_callback(struct rq *rq)
			
 
				 {
			
 
				-	if (rq->post_schedule) {
			
 
				-		unsigned long flags;
			
 
				+	struct callback_head *head, *next;
			
 
				+	void (*func)(struct rq *rq);
			
 
				+	unsigned long flags;
			
 
				 
			
 
				-		raw_spin_lock_irqsave(&rq->lock, flags);
			
 
				-		if (rq->curr->sched_class->post_schedule)
			
 
				-			rq->curr->sched_class->post_schedule(rq);
			
 
				-		raw_spin_unlock_irqrestore(&rq->lock, flags);
			
 
				+	raw_spin_lock_irqsave(&rq->lock, flags);
			
 
				+	head = rq->balance_callback;
			
 
				+	rq->balance_callback = NULL;
			
 
				+	while (head) {
			
 
				+		func = (void (*)(struct rq *))head->func;
			
 
				+		next = head->next;
			
 
				+		head->next = NULL;
			
 
				+		head = next;
			
 
				 
			
 
				-		rq->post_schedule = 0;
			
 
				+		func(rq);
			
 
				 	}
			
 
				+	raw_spin_unlock_irqrestore(&rq->lock, flags);
			
 
				+}
			
 
				+
			
 
				+static inline void balance_callback(struct rq *rq)
			
 
				+{
			
 
				+	if (unlikely(rq->balance_callback))
			
 
				+		__balance_callback(rq);
			
 
				 }
			
 
				 
			
 
				 #else
			
 
				 
			
 
				-static inline void post_schedule(struct rq *rq)
			
 
				+static inline void balance_callback(struct rq *rq)
			
 
				 {
			
 
				 }
			
 
				 
			
@@ -2328,7 +2529,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
 
				 	/* finish_task_switch() drops rq->lock and enables preemtion */
			
 
				 	preempt_disable();
			
 
				 	rq = finish_task_switch(prev);
			
 
				-	post_schedule(rq);
			
 
				+	balance_callback(rq);
			
 
				 	preempt_enable();
			
 
				 
			
 
				 	if (current->set_child_tid)
			
@@ -2372,6 +2573,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 
				 	 * of the scheduler it's an obvious special-case), so we
			
 
				 	 * do an early lockdep release here:
			
 
				 	 */
			
 
				+	lockdep_unpin_lock(&rq->lock);
			
 
				 	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
			
 
				 
			
 
				 	/* Here we just switch the register state and the stack. */
			
@@ -2794,6 +2996,7 @@ static void __sched __schedule(void)
 
				 	 */
			
 
				 	smp_mb__before_spinlock();
			
 
				 	raw_spin_lock_irq(&rq->lock);
			
 
				+	lockdep_pin_lock(&rq->lock);
			
 
				 
			
 
				 	rq->clock_skip_update <<= 1; /* promote REQ to ACT */
			
 
				 
			
@@ -2836,10 +3039,12 @@ static void __sched __schedule(void)
 
				 
			
 
				 		rq = context_switch(rq, prev, next); /* unlocks the rq */
			
 
				 		cpu = cpu_of(rq);
			
 
				-	} else
			
 
				+	} else {
			
 
				+		lockdep_unpin_lock(&rq->lock);
			
 
				 		raw_spin_unlock_irq(&rq->lock);
			
 
				+	}
			
 
				 
			
 
				-	post_schedule(rq);
			
 
				+	balance_callback(rq);
			
 
				 }
			
 
				 
			
 
				 static inline void sched_submit_work(struct task_struct *tsk)
			
@@ -3103,7 +3308,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
				 
			
 
				 	check_class_changed(rq, p, prev_class, oldprio);
			
 
				 out_unlock:
			
 
				+	preempt_disable(); /* avoid rq from going away on us */
			
 
				 	__task_rq_unlock(rq);
			
 
				+
			
 
				+	balance_callback(rq);
			
 
				+	preempt_enable();
			
 
				 }
			
 
				 #endif
			
 
				 
			
@@ -3441,7 +3650,7 @@ static bool dl_param_changed(struct task_struct *p,
 
				 
			
 
				 static int __sched_setscheduler(struct task_struct *p,
			
 
				 				const struct sched_attr *attr,
			
 
				-				bool user)
			
 
				+				bool user, bool pi)
			
 
				 {
			
 
				 	int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
			
 
				 		      MAX_RT_PRIO - 1 - attr->sched_priority;
			
@@ -3627,18 +3836,20 @@ change:
 
				 	p->sched_reset_on_fork = reset_on_fork;
			
 
				 	oldprio = p->prio;
			
 
				 
			
 
				-	/*
			
 
				-	 * Take priority boosted tasks into account. If the new
			
 
				-	 * effective priority is unchanged, we just store the new
			
 
				-	 * normal parameters and do not touch the scheduler class and
			
 
				-	 * the runqueue. This will be done when the task deboost
			
 
				-	 * itself.
			
 
				-	 */
			
 
				-	new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
			
 
				-	if (new_effective_prio == oldprio) {
			
 
				-		__setscheduler_params(p, attr);
			
 
				-		task_rq_unlock(rq, p, &flags);
			
 
				-		return 0;
			
 
				+	if (pi) {
			
 
				+		/*
			
 
				+		 * Take priority boosted tasks into account. If the new
			
 
				+		 * effective priority is unchanged, we just store the new
			
 
				+		 * normal parameters and do not touch the scheduler class and
			
 
				+		 * the runqueue. This will be done when the task deboost
			
 
				+		 * itself.
			
 
				+		 */
			
 
				+		new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
			
 
				+		if (new_effective_prio == oldprio) {
			
 
				+			__setscheduler_params(p, attr);
			
 
				+			task_rq_unlock(rq, p, &flags);
			
 
				+			return 0;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	queued = task_on_rq_queued(p);
			
@@ -3649,7 +3860,7 @@ change:
 
				 		put_prev_task(rq, p);
			
 
				 
			
 
				 	prev_class = p->sched_class;
			
 
				-	__setscheduler(rq, p, attr, true);
			
 
				+	__setscheduler(rq, p, attr, pi);
			
 
				 
			
 
				 	if (running)
			
 
				 		p->sched_class->set_curr_task(rq);
			
@@ -3662,9 +3873,17 @@ change:
 
				 	}
			
 
				 
			
 
				 	check_class_changed(rq, p, prev_class, oldprio);
			
 
				+	preempt_disable(); /* avoid rq from going away on us */
			
 
				 	task_rq_unlock(rq, p, &flags);
			
 
				 
			
 
				-	rt_mutex_adjust_pi(p);
			
 
				+	if (pi)
			
 
				+		rt_mutex_adjust_pi(p);
			
 
				+
			
 
				+	/*
			
 
				+	 * Run balance callbacks after we've adjusted the PI chain.
			
 
				+	 */
			
 
				+	balance_callback(rq);
			
 
				+	preempt_enable();
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -3685,7 +3904,7 @@ static int _sched_setscheduler(struct task_struct *p, int policy,
 
				 		attr.sched_policy = policy;
			
 
				 	}
			
 
				 
			
 
				-	return __sched_setscheduler(p, &attr, check);
			
 
				+	return __sched_setscheduler(p, &attr, check, true);
			
 
				 }
			
 
				 /**
			
 
				  * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
			
@@ -3706,7 +3925,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
 
				 
			
 
				 int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
			
 
				 {
			
 
				-	return __sched_setscheduler(p, attr, true);
			
 
				+	return __sched_setscheduler(p, attr, true, true);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(sched_setattr);
			
 
				 
			
@@ -4754,149 +4973,6 @@ out:
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				-/*
			
 
				- * move_queued_task - move a queued task to new rq.
			
 
				- *
			
 
				- * Returns (locked) new rq. Old rq's lock is released.
			
 
				- */
			
 
				-static struct rq *move_queued_task(struct task_struct *p, int new_cpu)
			
 
				-{
			
 
				-	struct rq *rq = task_rq(p);
			
 
				-
			
 
				-	lockdep_assert_held(&rq->lock);
			
 
				-
			
 
				-	dequeue_task(rq, p, 0);
			
 
				-	p->on_rq = TASK_ON_RQ_MIGRATING;
			
 
				-	set_task_cpu(p, new_cpu);
			
 
				-	raw_spin_unlock(&rq->lock);
			
 
				-
			
 
				-	rq = cpu_rq(new_cpu);
			
 
				-
			
 
				-	raw_spin_lock(&rq->lock);
			
 
				-	BUG_ON(task_cpu(p) != new_cpu);
			
 
				-	p->on_rq = TASK_ON_RQ_QUEUED;
			
 
				-	enqueue_task(rq, p, 0);
			
 
				-	check_preempt_curr(rq, p, 0);
			
 
				-
			
 
				-	return rq;
			
 
				-}
			
 
				-
			
 
				-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
			
 
				-{
			
 
				-	if (p->sched_class->set_cpus_allowed)
			
 
				-		p->sched_class->set_cpus_allowed(p, new_mask);
			
 
				-
			
 
				-	cpumask_copy(&p->cpus_allowed, new_mask);
			
 
				-	p->nr_cpus_allowed = cpumask_weight(new_mask);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is how migration works:
			
 
				- *
			
 
				- * 1) we invoke migration_cpu_stop() on the target CPU using
			
 
				- *    stop_one_cpu().
			
 
				- * 2) stopper starts to run (implicitly forcing the migrated thread
			
 
				- *    off the CPU)
			
 
				- * 3) it checks whether the migrated task is still in the wrong runqueue.
			
 
				- * 4) if it's in the wrong runqueue then the migration thread removes
			
 
				- *    it and puts it into the right queue.
			
 
				- * 5) stopper completes and stop_one_cpu() returns and the migration
			
 
				- *    is done.
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * Change a given task's CPU affinity. Migrate the thread to a
			
 
				- * proper CPU and schedule it away if the CPU it's executing on
			
 
				- * is removed from the allowed bitmask.
			
 
				- *
			
 
				- * NOTE: the caller must have a valid reference to the task, the
			
 
				- * task must not exit() & deallocate itself prematurely. The
			
 
				- * call is not atomic; no spinlocks may be held.
			
 
				- */
			
 
				-int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
			
 
				-{
			
 
				-	unsigned long flags;
			
 
				-	struct rq *rq;
			
 
				-	unsigned int dest_cpu;
			
 
				-	int ret = 0;
			
 
				-
			
 
				-	rq = task_rq_lock(p, &flags);
			
 
				-
			
 
				-	if (cpumask_equal(&p->cpus_allowed, new_mask))
			
 
				-		goto out;
			
 
				-
			
 
				-	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
			
 
				-		ret = -EINVAL;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	do_set_cpus_allowed(p, new_mask);
			
 
				-
			
 
				-	/* Can the task run on the task's current CPU? If so, we're done */
			
 
				-	if (cpumask_test_cpu(task_cpu(p), new_mask))
			
 
				-		goto out;
			
 
				-
			
 
				-	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
			
 
				-	if (task_running(rq, p) || p->state == TASK_WAKING) {
			
 
				-		struct migration_arg arg = { p, dest_cpu };
			
 
				-		/* Need help from migration thread: drop lock and wait. */
			
 
				-		task_rq_unlock(rq, p, &flags);
			
 
				-		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
			
 
				-		tlb_migrate_finish(p->mm);
			
 
				-		return 0;
			
 
				-	} else if (task_on_rq_queued(p))
			
 
				-		rq = move_queued_task(p, dest_cpu);
			
 
				-out:
			
 
				-	task_rq_unlock(rq, p, &flags);
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
			
 
				-
			
 
				-/*
			
 
				- * Move (not current) task off this cpu, onto dest cpu. We're doing
			
 
				- * this because either it can't run here any more (set_cpus_allowed()
			
 
				- * away from this CPU, or CPU going down), or because we're
			
 
				- * attempting to rebalance this task on exec (sched_exec).
			
 
				- *
			
 
				- * So we race with normal scheduler movements, but that's OK, as long
			
 
				- * as the task is no longer on this CPU.
			
 
				- *
			
 
				- * Returns non-zero if task was successfully migrated.
			
 
				- */
			
 
				-static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
			
 
				-{
			
 
				-	struct rq *rq;
			
 
				-	int ret = 0;
			
 
				-
			
 
				-	if (unlikely(!cpu_active(dest_cpu)))
			
 
				-		return ret;
			
 
				-
			
 
				-	rq = cpu_rq(src_cpu);
			
 
				-
			
 
				-	raw_spin_lock(&p->pi_lock);
			
 
				-	raw_spin_lock(&rq->lock);
			
 
				-	/* Already moved. */
			
 
				-	if (task_cpu(p) != src_cpu)
			
 
				-		goto done;
			
 
				-
			
 
				-	/* Affinity changed (again). */
			
 
				-	if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
			
 
				-		goto fail;
			
 
				-
			
 
				-	/*
			
 
				-	 * If we're not on a rq, the next wake-up will ensure we're
			
 
				-	 * placed properly.
			
 
				-	 */
			
 
				-	if (task_on_rq_queued(p))
			
 
				-		rq = move_queued_task(p, dest_cpu);
			
 
				-done:
			
 
				-	ret = 1;
			
 
				-fail:
			
 
				-	raw_spin_unlock(&rq->lock);
			
 
				-	raw_spin_unlock(&p->pi_lock);
			
 
				-	return ret;
			
 
				-}
			
 
				 
			
 
				 #ifdef CONFIG_NUMA_BALANCING
			
 
				 /* Migrate current task p to target_cpu */
			
@@ -4944,35 +5020,9 @@ void sched_setnuma(struct task_struct *p, int nid)
 
				 		enqueue_task(rq, p, 0);
			
 
				 	task_rq_unlock(rq, p, &flags);
			
 
				 }
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				- * migration_cpu_stop - this will be executed by a highprio stopper thread
			
 
				- * and performs thread migration by bumping thread off CPU then
			
 
				- * 'pushing' onto another runqueue.
			
 
				- */
			
 
				-static int migration_cpu_stop(void *data)
			
 
				-{
			
 
				-	struct migration_arg *arg = data;
			
 
				-
			
 
				-	/*
			
 
				-	 * The original target cpu might have gone down and we might
			
 
				-	 * be on another cpu but it doesn't matter.
			
 
				-	 */
			
 
				-	local_irq_disable();
			
 
				-	/*
			
 
				-	 * We need to explicitly wake pending tasks before running
			
 
				-	 * __migrate_task() such that we will not miss enforcing cpus_allowed
			
 
				-	 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
			
 
				-	 */
			
 
				-	sched_ttwu_pending();
			
 
				-	__migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
			
 
				-	local_irq_enable();
			
 
				-	return 0;
			
 
				-}
			
 
				+#endif /* CONFIG_NUMA_BALANCING */
			
 
				 
			
 
				 #ifdef CONFIG_HOTPLUG_CPU
			
 
				-
			
 
				 /*
			
 
				  * Ensures that the idle task is using init_mm right before its cpu goes
			
 
				  * offline.
			
@@ -5028,9 +5078,9 @@ static struct task_struct fake_task = {
 
				  * there's no concurrency possible, we hold the required locks anyway
			
 
				  * because of lock validation efforts.
			
 
				  */
			
 
				-static void migrate_tasks(unsigned int dead_cpu)
			
 
				+static void migrate_tasks(struct rq *dead_rq)
			
 
				 {
			
 
				-	struct rq *rq = cpu_rq(dead_cpu);
			
 
				+	struct rq *rq = dead_rq;
			
 
				 	struct task_struct *next, *stop = rq->stop;
			
 
				 	int dest_cpu;
			
 
				 
			
@@ -5052,7 +5102,7 @@ static void migrate_tasks(unsigned int dead_cpu)
 
				 	 */
			
 
				 	update_rq_clock(rq);
			
 
				 
			
 
				-	for ( ; ; ) {
			
 
				+	for (;;) {
			
 
				 		/*
			
 
				 		 * There's this thread running, bail when that's the only
			
 
				 		 * remaining thread.
			
@@ -5060,22 +5110,29 @@ static void migrate_tasks(unsigned int dead_cpu)
 
				 		if (rq->nr_running == 1)
			
 
				 			break;
			
 
				 
			
 
				+		/*
			
 
				+		 * Ensure rq->lock covers the entire task selection
			
 
				+		 * until the migration.
			
 
				+		 */
			
 
				+		lockdep_pin_lock(&rq->lock);
			
 
				 		next = pick_next_task(rq, &fake_task);
			
 
				 		BUG_ON(!next);
			
 
				 		next->sched_class->put_prev_task(rq, next);
			
 
				 
			
 
				 		/* Find suitable destination for @next, with force if needed. */
			
 
				-		dest_cpu = select_fallback_rq(dead_cpu, next);
			
 
				-		raw_spin_unlock(&rq->lock);
			
 
				-
			
 
				-		__migrate_task(next, dead_cpu, dest_cpu);
			
 
				-
			
 
				-		raw_spin_lock(&rq->lock);
			
 
				+		dest_cpu = select_fallback_rq(dead_rq->cpu, next);
			
 
				+
			
 
				+		lockdep_unpin_lock(&rq->lock);
			
 
				+		rq = __migrate_task(rq, next, dest_cpu);
			
 
				+		if (rq != dead_rq) {
			
 
				+			raw_spin_unlock(&rq->lock);
			
 
				+			rq = dead_rq;
			
 
				+			raw_spin_lock(&rq->lock);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	rq->stop = stop;
			
 
				 }
			
 
				-
			
 
				 #endif /* CONFIG_HOTPLUG_CPU */
			
 
				 
			
 
				 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
			
@@ -5254,7 +5311,7 @@ static void register_sched_domain_sysctl(void)
 
				 static void unregister_sched_domain_sysctl(void)
			
 
				 {
			
 
				 }
			
 
				-#endif
			
 
				+#endif /* CONFIG_SCHED_DEBUG && CONFIG_SYSCTL */
			
 
				 
			
 
				 static void set_rq_online(struct rq *rq)
			
 
				 {
			
@@ -5323,7 +5380,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 
				 			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
			
 
				 			set_rq_offline(rq);
			
 
				 		}
			
 
				-		migrate_tasks(cpu);
			
 
				+		migrate_tasks(rq);
			
 
				 		BUG_ON(rq->nr_running != 1); /* the migration thread */
			
 
				 		raw_spin_unlock_irqrestore(&rq->lock, flags);
			
 
				 		break;
			
@@ -5401,9 +5458,6 @@ static int __init migration_init(void)
 
				 	return 0;
			
 
				 }
			
 
				 early_initcall(migration_init);
			
 
				-#endif
			
 
				-
			
 
				-#ifdef CONFIG_SMP
			
 
				 
			
 
				 static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
			
 
				 
			
@@ -6629,7 +6683,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
				 			struct sched_group *sg;
			
 
				 			struct sched_group_capacity *sgc;
			
 
				 
			
 
				-		       	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
			
 
				+			sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
			
 
				 					GFP_KERNEL, cpu_to_node(j));
			
 
				 			if (!sd)
			
 
				 				return -ENOMEM;
			
@@ -7235,7 +7289,7 @@ void __init sched_init(void)
 
				 		rq->sd = NULL;
			
 
				 		rq->rd = NULL;
			
 
				 		rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
			
 
				-		rq->post_schedule = 0;
			
 
				+		rq->balance_callback = NULL;
			
 
				 		rq->active_balance = 0;
			
 
				 		rq->next_balance = jiffies;
			
 
				 		rq->push_cpu = 0;
			
@@ -7365,32 +7419,12 @@ EXPORT_SYMBOL(___might_sleep);
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_MAGIC_SYSRQ
			
 
				-static void normalize_task(struct rq *rq, struct task_struct *p)
			
 
				+void normalize_rt_tasks(void)
			
 
				 {
			
 
				-	const struct sched_class *prev_class = p->sched_class;
			
 
				+	struct task_struct *g, *p;
			
 
				 	struct sched_attr attr = {
			
 
				 		.sched_policy = SCHED_NORMAL,
			
 
				 	};
			
 
				-	int old_prio = p->prio;
			
 
				-	int queued;
			
 
				-
			
 
				-	queued = task_on_rq_queued(p);
			
 
				-	if (queued)
			
 
				-		dequeue_task(rq, p, 0);
			
 
				-	__setscheduler(rq, p, &attr, false);
			
 
				-	if (queued) {
			
 
				-		enqueue_task(rq, p, 0);
			
 
				-		resched_curr(rq);
			
 
				-	}
			
 
				-
			
 
				-	check_class_changed(rq, p, prev_class, old_prio);
			
 
				-}
			
 
				-
			
 
				-void normalize_rt_tasks(void)
			
 
				-{
			
 
				-	struct task_struct *g, *p;
			
 
				-	unsigned long flags;
			
 
				-	struct rq *rq;
			
 
				 
			
 
				 	read_lock(&tasklist_lock);
			
 
				 	for_each_process_thread(g, p) {
			
@@ -7417,9 +7451,7 @@ void normalize_rt_tasks(void)
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		rq = task_rq_lock(p, &flags);
			
 
				-		normalize_task(rq, p);
			
 
				-		task_rq_unlock(rq, p, &flags);
			
 
				+		__sched_setscheduler(p, &attr, false, false);
			
 
				 	}
			
 
				 	read_unlock(&tasklist_lock);
			
 
				 }
			
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -213,14 +213,28 @@ static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
 
				 	return dl_task(prev);
			
 
				 }
			
 
				 
			
 
				-static inline void set_post_schedule(struct rq *rq)
			
 
				+static DEFINE_PER_CPU(struct callback_head, dl_push_head);
			
 
				+static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
			
 
				+
			
 
				+static void push_dl_tasks(struct rq *);
			
 
				+static void pull_dl_task(struct rq *);
			
 
				+
			
 
				+static inline void queue_push_tasks(struct rq *rq)
			
 
				 {
			
 
				-	rq->post_schedule = has_pushable_dl_tasks(rq);
			
 
				+	if (!has_pushable_dl_tasks(rq))
			
 
				+		return;
			
 
				+
			
 
				+	queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
			
 
				+}
			
 
				+
			
 
				+static inline void queue_pull_task(struct rq *rq)
			
 
				+{
			
 
				+	queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
			
 
				 }
			
 
				 
			
 
				 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
			
 
				 
			
 
				-static void dl_task_offline_migration(struct rq *rq, struct task_struct *p)
			
 
				+static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
			
 
				 {
			
 
				 	struct rq *later_rq = NULL;
			
 
				 	bool fallback = false;
			
@@ -254,14 +268,19 @@ static void dl_task_offline_migration(struct rq *rq, struct task_struct *p)
 
				 		double_lock_balance(rq, later_rq);
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * By now the task is replenished and enqueued; migrate it.
			
 
				+	 */
			
 
				 	deactivate_task(rq, p, 0);
			
 
				 	set_task_cpu(p, later_rq->cpu);
			
 
				-	activate_task(later_rq, p, ENQUEUE_REPLENISH);
			
 
				+	activate_task(later_rq, p, 0);
			
 
				 
			
 
				 	if (!fallback)
			
 
				 		resched_curr(later_rq);
			
 
				 
			
 
				-	double_unlock_balance(rq, later_rq);
			
 
				+	double_unlock_balance(later_rq, rq);
			
 
				+
			
 
				+	return later_rq;
			
 
				 }
			
 
				 
			
 
				 #else
			
@@ -291,12 +310,15 @@ static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				-static inline int pull_dl_task(struct rq *rq)
			
 
				+static inline void pull_dl_task(struct rq *rq)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void queue_push_tasks(struct rq *rq)
			
 
				 {
			
 
				-	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline void set_post_schedule(struct rq *rq)
			
 
				+static inline void queue_pull_task(struct rq *rq)
			
 
				 {
			
 
				 }
			
 
				 #endif /* CONFIG_SMP */
			
@@ -498,22 +520,23 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
 
				  * actually started or not (i.e., the replenishment instant is in
			
 
				  * the future or in the past).
			
 
				  */
			
 
				-static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
			
 
				+static int start_dl_timer(struct task_struct *p)
			
 
				 {
			
 
				-	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
			
 
				-	struct rq *rq = rq_of_dl_rq(dl_rq);
			
 
				+	struct sched_dl_entity *dl_se = &p->dl;
			
 
				+	struct hrtimer *timer = &dl_se->dl_timer;
			
 
				+	struct rq *rq = task_rq(p);
			
 
				 	ktime_t now, act;
			
 
				 	s64 delta;
			
 
				 
			
 
				-	if (boosted)
			
 
				-		return 0;
			
 
				+	lockdep_assert_held(&rq->lock);
			
 
				+
			
 
				 	/*
			
 
				 	 * We want the timer to fire at the deadline, but considering
			
 
				 	 * that it is actually coming from rq->clock and not from
			
 
				 	 * hrtimer's time base reading.
			
 
				 	 */
			
 
				 	act = ns_to_ktime(dl_se->deadline);
			
 
				-	now = hrtimer_cb_get_time(&dl_se->dl_timer);
			
 
				+	now = hrtimer_cb_get_time(timer);
			
 
				 	delta = ktime_to_ns(now) - rq_clock(rq);
			
 
				 	act = ktime_add_ns(act, delta);
			
 
				 
			
@@ -525,7 +548,19 @@ static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
 
				 	if (ktime_us_delta(act, now) < 0)
			
 
				 		return 0;
			
 
				 
			
 
				-	hrtimer_start(&dl_se->dl_timer, act, HRTIMER_MODE_ABS);
			
 
				+	/*
			
 
				+	 * !enqueued will guarantee another callback; even if one is already in
			
 
				+	 * progress. This ensures a balanced {get,put}_task_struct().
			
 
				+	 *
			
 
				+	 * The race against __run_timer() clearing the enqueued state is
			
 
				+	 * harmless because we're holding task_rq()->lock, therefore the timer
			
 
				+	 * expiring after we've done the check will wait on its task_rq_lock()
			
 
				+	 * and observe our state.
			
 
				+	 */
			
 
				+	if (!hrtimer_is_queued(timer)) {
			
 
				+		get_task_struct(p);
			
 
				+		hrtimer_start(timer, act, HRTIMER_MODE_ABS);
			
 
				+	}
			
 
				 
			
 
				 	return 1;
			
 
				 }
			
@@ -555,35 +590,40 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 
				 	rq = task_rq_lock(p, &flags);
			
 
				 
			
 
				 	/*
			
 
				-	 * We need to take care of several possible races here:
			
 
				-	 *
			
 
				-	 *   - the task might have changed its scheduling policy
			
 
				-	 *     to something different than SCHED_DEADLINE
			
 
				-	 *   - the task might have changed its reservation parameters
			
 
				-	 *     (through sched_setattr())
			
 
				-	 *   - the task might have been boosted by someone else and
			
 
				-	 *     might be in the boosting/deboosting path
			
 
				+	 * The task might have changed its scheduling policy to something
			
 
				+	 * different than SCHED_DEADLINE (through switched_fromd_dl()).
			
 
				+	 */
			
 
				+	if (!dl_task(p)) {
			
 
				+		__dl_clear_params(p);
			
 
				+		goto unlock;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * This is possible if switched_from_dl() raced against a running
			
 
				+	 * callback that took the above !dl_task() path and we've since then
			
 
				+	 * switched back into SCHED_DEADLINE.
			
 
				 	 *
			
 
				-	 * In all this cases we bail out, as the task is already
			
 
				-	 * in the runqueue or is going to be enqueued back anyway.
			
 
				+	 * There's nothing to do except drop our task reference.
			
 
				 	 */
			
 
				-	if (!dl_task(p) || dl_se->dl_new ||
			
 
				-	    dl_se->dl_boosted || !dl_se->dl_throttled)
			
 
				+	if (dl_se->dl_new)
			
 
				 		goto unlock;
			
 
				 
			
 
				-	sched_clock_tick();
			
 
				-	update_rq_clock(rq);
			
 
				+	/*
			
 
				+	 * The task might have been boosted by someone else and might be in the
			
 
				+	 * boosting/deboosting path, its not throttled.
			
 
				+	 */
			
 
				+	if (dl_se->dl_boosted)
			
 
				+		goto unlock;
			
 
				 
			
 
				-#ifdef CONFIG_SMP
			
 
				 	/*
			
 
				-	 * If we find that the rq the task was on is no longer
			
 
				-	 * available, we need to select a new rq.
			
 
				+	 * Spurious timer due to start_dl_timer() race; or we already received
			
 
				+	 * a replenishment from rt_mutex_setprio().
			
 
				 	 */
			
 
				-	if (unlikely(!rq->online)) {
			
 
				-		dl_task_offline_migration(rq, p);
			
 
				+	if (!dl_se->dl_throttled)
			
 
				 		goto unlock;
			
 
				-	}
			
 
				-#endif
			
 
				+
			
 
				+	sched_clock_tick();
			
 
				+	update_rq_clock(rq);
			
 
				 
			
 
				 	/*
			
 
				 	 * If the throttle happened during sched-out; like:
			
@@ -609,17 +649,38 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 
				 		check_preempt_curr_dl(rq, p, 0);
			
 
				 	else
			
 
				 		resched_curr(rq);
			
 
				+
			
 
				 #ifdef CONFIG_SMP
			
 
				 	/*
			
 
				-	 * Queueing this task back might have overloaded rq,
			
 
				-	 * check if we need to kick someone away.
			
 
				+	 * Perform balancing operations here; after the replenishments.  We
			
 
				+	 * cannot drop rq->lock before this, otherwise the assertion in
			
 
				+	 * start_dl_timer() about not missing updates is not true.
			
 
				+	 *
			
 
				+	 * If we find that the rq the task was on is no longer available, we
			
 
				+	 * need to select a new rq.
			
 
				+	 *
			
 
				+	 * XXX figure out if select_task_rq_dl() deals with offline cpus.
			
 
				+	 */
			
 
				+	if (unlikely(!rq->online))
			
 
				+		rq = dl_task_offline_migration(rq, p);
			
 
				+
			
 
				+	/*
			
 
				+	 * Queueing this task back might have overloaded rq, check if we need
			
 
				+	 * to kick someone away.
			
 
				 	 */
			
 
				 	if (has_pushable_dl_tasks(rq))
			
 
				 		push_dl_task(rq);
			
 
				 #endif
			
 
				+
			
 
				 unlock:
			
 
				 	task_rq_unlock(rq, p, &flags);
			
 
				 
			
 
				+	/*
			
 
				+	 * This can free the task_struct, including this hrtimer, do not touch
			
 
				+	 * anything related to that after this.
			
 
				+	 */
			
 
				+	put_task_struct(p);
			
 
				+
			
 
				 	return HRTIMER_NORESTART;
			
 
				 }
			
 
				 
			
@@ -679,7 +740,7 @@ static void update_curr_dl(struct rq *rq)
 
				 	if (dl_runtime_exceeded(dl_se)) {
			
 
				 		dl_se->dl_throttled = 1;
			
 
				 		__dequeue_task_dl(rq, curr, 0);
			
 
				-		if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted)))
			
 
				+		if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
			
 
				 			enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
			
 
				 
			
 
				 		if (!is_leftmost(curr, &rq->dl))
			
@@ -1036,8 +1097,6 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 
				 	resched_curr(rq);
			
 
				 }
			
 
				 
			
 
				-static int pull_dl_task(struct rq *this_rq);
			
 
				-
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				 /*
			
@@ -1094,7 +1153,15 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
 
				 	dl_rq = &rq->dl;
			
 
				 
			
 
				 	if (need_pull_dl_task(rq, prev)) {
			
 
				+		/*
			
 
				+		 * This is OK, because current is on_cpu, which avoids it being
			
 
				+		 * picked for load-balance and preemption/IRQs are still
			
 
				+		 * disabled avoiding further scheduler activity on it and we're
			
 
				+		 * being very careful to re-start the picking loop.
			
 
				+		 */
			
 
				+		lockdep_unpin_lock(&rq->lock);
			
 
				 		pull_dl_task(rq);
			
 
				+		lockdep_pin_lock(&rq->lock);
			
 
				 		/*
			
 
				 		 * pull_rt_task() can drop (and re-acquire) rq->lock; this
			
 
				 		 * means a stop task can slip in, in which case we need to
			
@@ -1128,7 +1195,7 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
 
				 	if (hrtick_enabled(rq))
			
 
				 		start_hrtick_dl(rq, p);
			
 
				 
			
 
				-	set_post_schedule(rq);
			
 
				+	queue_push_tasks(rq);
			
 
				 
			
 
				 	return p;
			
 
				 }
			
@@ -1165,7 +1232,6 @@ static void task_fork_dl(struct task_struct *p)
 
				 
			
 
				 static void task_dead_dl(struct task_struct *p)
			
 
				 {
			
 
				-	struct hrtimer *timer = &p->dl.dl_timer;
			
 
				 	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
			
 
				 
			
 
				 	/*
			
@@ -1175,8 +1241,6 @@ static void task_dead_dl(struct task_struct *p)
 
				 	/* XXX we should retain the bw until 0-lag */
			
 
				 	dl_b->total_bw -= p->dl.dl_bw;
			
 
				 	raw_spin_unlock_irq(&dl_b->lock);
			
 
				-
			
 
				-	hrtimer_cancel(timer);
			
 
				 }
			
 
				 
			
 
				 static void set_curr_task_dl(struct rq *rq)
			
@@ -1504,15 +1568,16 @@ static void push_dl_tasks(struct rq *rq)
 
				 		;
			
 
				 }
			
 
				 
			
 
				-static int pull_dl_task(struct rq *this_rq)
			
 
				+static void pull_dl_task(struct rq *this_rq)
			
 
				 {
			
 
				-	int this_cpu = this_rq->cpu, ret = 0, cpu;
			
 
				+	int this_cpu = this_rq->cpu, cpu;
			
 
				 	struct task_struct *p;
			
 
				+	bool resched = false;
			
 
				 	struct rq *src_rq;
			
 
				 	u64 dmin = LONG_MAX;
			
 
				 
			
 
				 	if (likely(!dl_overloaded(this_rq)))
			
 
				-		return 0;
			
 
				+		return;
			
 
				 
			
 
				 	/*
			
 
				 	 * Match the barrier from dl_set_overloaded; this guarantees that if we
			
@@ -1567,7 +1632,7 @@ static int pull_dl_task(struct rq *this_rq)
 
				 					   src_rq->curr->dl.deadline))
			
 
				 				goto skip;
			
 
				 
			
 
				-			ret = 1;
			
 
				+			resched = true;
			
 
				 
			
 
				 			deactivate_task(src_rq, p, 0);
			
 
				 			set_task_cpu(p, this_cpu);
			
@@ -1580,12 +1645,8 @@ skip:
 
				 		double_unlock_balance(this_rq, src_rq);
			
 
				 	}
			
 
				 
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static void post_schedule_dl(struct rq *rq)
			
 
				-{
			
 
				-	push_dl_tasks(rq);
			
 
				+	if (resched)
			
 
				+		resched_curr(this_rq);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1701,37 +1762,16 @@ void __init init_sched_dl_class(void)
 
				 
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				-/*
			
 
				- *  Ensure p's dl_timer is cancelled. May drop rq->lock for a while.
			
 
				- */
			
 
				-static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
			
 
				-{
			
 
				-	struct hrtimer *dl_timer = &p->dl.dl_timer;
			
 
				-
			
 
				-	/* Nobody will change task's class if pi_lock is held */
			
 
				-	lockdep_assert_held(&p->pi_lock);
			
 
				-
			
 
				-	if (hrtimer_active(dl_timer)) {
			
 
				-		int ret = hrtimer_try_to_cancel(dl_timer);
			
 
				-
			
 
				-		if (unlikely(ret == -1)) {
			
 
				-			/*
			
 
				-			 * Note, p may migrate OR new deadline tasks
			
 
				-			 * may appear in rq when we are unlocking it.
			
 
				-			 * A caller of us must be fine with that.
			
 
				-			 */
			
 
				-			raw_spin_unlock(&rq->lock);
			
 
				-			hrtimer_cancel(dl_timer);
			
 
				-			raw_spin_lock(&rq->lock);
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 static void switched_from_dl(struct rq *rq, struct task_struct *p)
			
 
				 {
			
 
				-	/* XXX we should retain the bw until 0-lag */
			
 
				-	cancel_dl_timer(rq, p);
			
 
				-	__dl_clear_params(p);
			
 
				+	/*
			
 
				+	 * Start the deadline timer; if we switch back to dl before this we'll
			
 
				+	 * continue consuming our current CBS slice. If we stay outside of
			
 
				+	 * SCHED_DEADLINE until the deadline passes, the timer will reset the
			
 
				+	 * task.
			
 
				+	 */
			
 
				+	if (!start_dl_timer(p))
			
 
				+		__dl_clear_params(p);
			
 
				 
			
 
				 	/*
			
 
				 	 * Since this might be the only -deadline task on the rq,
			
@@ -1741,8 +1781,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
 
				 	if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
			
 
				 		return;
			
 
				 
			
 
				-	if (pull_dl_task(rq))
			
 
				-		resched_curr(rq);
			
 
				+	queue_pull_task(rq);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1751,21 +1790,16 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
 
				  */
			
 
				 static void switched_to_dl(struct rq *rq, struct task_struct *p)
			
 
				 {
			
 
				-	int check_resched = 1;
			
 
				-
			
 
				 	if (task_on_rq_queued(p) && rq->curr != p) {
			
 
				 #ifdef CONFIG_SMP
			
 
				-		if (p->nr_cpus_allowed > 1 && rq->dl.overloaded &&
			
 
				-			push_dl_task(rq) && rq != task_rq(p))
			
 
				-			/* Only reschedule if pushing failed */
			
 
				-			check_resched = 0;
			
 
				-#endif /* CONFIG_SMP */
			
 
				-		if (check_resched) {
			
 
				-			if (dl_task(rq->curr))
			
 
				-				check_preempt_curr_dl(rq, p, 0);
			
 
				-			else
			
 
				-				resched_curr(rq);
			
 
				-		}
			
 
				+		if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
			
 
				+			queue_push_tasks(rq);
			
 
				+#else
			
 
				+		if (dl_task(rq->curr))
			
 
				+			check_preempt_curr_dl(rq, p, 0);
			
 
				+		else
			
 
				+			resched_curr(rq);
			
 
				+#endif
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1785,15 +1819,14 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
 
				 		 * or lowering its prio, so...
			
 
				 		 */
			
 
				 		if (!rq->dl.overloaded)
			
 
				-			pull_dl_task(rq);
			
 
				+			queue_pull_task(rq);
			
 
				 
			
 
				 		/*
			
 
				 		 * If we now have a earlier deadline task than p,
			
 
				 		 * then reschedule, provided p is still on this
			
 
				 		 * runqueue.
			
 
				 		 */
			
 
				-		if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
			
 
				-		    rq->curr == p)
			
 
				+		if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline))
			
 
				 			resched_curr(rq);
			
 
				 #else
			
 
				 		/*
			
@@ -1823,7 +1856,6 @@ const struct sched_class dl_sched_class = {
 
				 	.set_cpus_allowed       = set_cpus_allowed_dl,
			
 
				 	.rq_online              = rq_online_dl,
			
 
				 	.rq_offline             = rq_offline_dl,
			
 
				-	.post_schedule		= post_schedule_dl,
			
 
				 	.task_woken		= task_woken_dl,
			
 
				 #endif
			
 
				 
			
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5392,7 +5392,15 @@ simple:
 
				 	return p;
			
 
				 
			
 
				 idle:
			
 
				+	/*
			
 
				+	 * This is OK, because current is on_cpu, which avoids it being picked
			
 
				+	 * for load-balance and preemption/IRQs are still disabled avoiding
			
 
				+	 * further scheduler activity on it and we're being very careful to
			
 
				+	 * re-start the picking loop.
			
 
				+	 */
			
 
				+	lockdep_unpin_lock(&rq->lock);
			
 
				 	new_tasks = idle_balance(rq);
			
 
				+	lockdep_pin_lock(&rq->lock);
			
 
				 	/*
			
 
				 	 * Because idle_balance() releases (and re-acquires) rq->lock, it is
			
 
				 	 * possible for any higher priority task to appear. In that case we
			
@@ -7426,9 +7434,6 @@ static int idle_balance(struct rq *this_rq)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * Drop the rq->lock, but keep IRQ/preempt disabled.
			
 
				-	 */
			
 
				 	raw_spin_unlock(&this_rq->lock);
			
 
				 
			
 
				 	update_blocked_averages(this_cpu);
			
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -260,7 +260,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 
			
 
				-static int pull_rt_task(struct rq *this_rq);
			
 
				+static void pull_rt_task(struct rq *this_rq);
			
 
				 
			
 
				 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
			
 
				 {
			
@@ -354,13 +354,23 @@ static inline int has_pushable_tasks(struct rq *rq)
 
				 	return !plist_head_empty(&rq->rt.pushable_tasks);
			
 
				 }
			
 
				 
			
 
				-static inline void set_post_schedule(struct rq *rq)
			
 
				+static DEFINE_PER_CPU(struct callback_head, rt_push_head);
			
 
				+static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
			
 
				+
			
 
				+static void push_rt_tasks(struct rq *);
			
 
				+static void pull_rt_task(struct rq *);
			
 
				+
			
 
				+static inline void queue_push_tasks(struct rq *rq)
			
 
				 {
			
 
				-	/*
			
 
				-	 * We detect this state here so that we can avoid taking the RQ
			
 
				-	 * lock again later if there is no need to push
			
 
				-	 */
			
 
				-	rq->post_schedule = has_pushable_tasks(rq);
			
 
				+	if (!has_pushable_tasks(rq))
			
 
				+		return;
			
 
				+
			
 
				+	queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
			
 
				+}
			
 
				+
			
 
				+static inline void queue_pull_task(struct rq *rq)
			
 
				+{
			
 
				+	queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
			
 
				 }
			
 
				 
			
 
				 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
			
@@ -412,12 +422,11 @@ static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				-static inline int pull_rt_task(struct rq *this_rq)
			
 
				+static inline void pull_rt_task(struct rq *this_rq)
			
 
				 {
			
 
				-	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline void set_post_schedule(struct rq *rq)
			
 
				+static inline void queue_push_tasks(struct rq *rq)
			
 
				 {
			
 
				 }
			
 
				 #endif /* CONFIG_SMP */
			
@@ -1469,7 +1478,15 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
 
				 	struct rt_rq *rt_rq = &rq->rt;
			
 
				 
			
 
				 	if (need_pull_rt_task(rq, prev)) {
			
 
				+		/*
			
 
				+		 * This is OK, because current is on_cpu, which avoids it being
			
 
				+		 * picked for load-balance and preemption/IRQs are still
			
 
				+		 * disabled avoiding further scheduler activity on it and we're
			
 
				+		 * being very careful to re-start the picking loop.
			
 
				+		 */
			
 
				+		lockdep_unpin_lock(&rq->lock);
			
 
				 		pull_rt_task(rq);
			
 
				+		lockdep_pin_lock(&rq->lock);
			
 
				 		/*
			
 
				 		 * pull_rt_task() can drop (and re-acquire) rq->lock; this
			
 
				 		 * means a dl or stop task can slip in, in which case we need
			
@@ -1497,7 +1514,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
 
				 	/* The running task is never eligible for pushing */
			
 
				 	dequeue_pushable_task(rq, p);
			
 
				 
			
 
				-	set_post_schedule(rq);
			
 
				+	queue_push_tasks(rq);
			
 
				 
			
 
				 	return p;
			
 
				 }
			
@@ -1952,14 +1969,15 @@ static void push_irq_work_func(struct irq_work *work)
 
				 }
			
 
				 #endif /* HAVE_RT_PUSH_IPI */
			
 
				 
			
 
				-static int pull_rt_task(struct rq *this_rq)
			
 
				+static void pull_rt_task(struct rq *this_rq)
			
 
				 {
			
 
				-	int this_cpu = this_rq->cpu, ret = 0, cpu;
			
 
				+	int this_cpu = this_rq->cpu, cpu;
			
 
				+	bool resched = false;
			
 
				 	struct task_struct *p;
			
 
				 	struct rq *src_rq;
			
 
				 
			
 
				 	if (likely(!rt_overloaded(this_rq)))
			
 
				-		return 0;
			
 
				+		return;
			
 
				 
			
 
				 	/*
			
 
				 	 * Match the barrier from rt_set_overloaded; this guarantees that if we
			
@@ -1970,7 +1988,7 @@ static int pull_rt_task(struct rq *this_rq)
 
				 #ifdef HAVE_RT_PUSH_IPI
			
 
				 	if (sched_feat(RT_PUSH_IPI)) {
			
 
				 		tell_cpu_to_push(this_rq);
			
 
				-		return 0;
			
 
				+		return;
			
 
				 	}
			
 
				 #endif
			
 
				 
			
@@ -2023,7 +2041,7 @@ static int pull_rt_task(struct rq *this_rq)
 
				 			if (p->prio < src_rq->curr->prio)
			
 
				 				goto skip;
			
 
				 
			
 
				-			ret = 1;
			
 
				+			resched = true;
			
 
				 
			
 
				 			deactivate_task(src_rq, p, 0);
			
 
				 			set_task_cpu(p, this_cpu);
			
@@ -2039,12 +2057,8 @@ skip:
 
				 		double_unlock_balance(this_rq, src_rq);
			
 
				 	}
			
 
				 
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static void post_schedule_rt(struct rq *rq)
			
 
				-{
			
 
				-	push_rt_tasks(rq);
			
 
				+	if (resched)
			
 
				+		resched_curr(this_rq);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2140,8 +2154,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 
				 	if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
			
 
				 		return;
			
 
				 
			
 
				-	if (pull_rt_task(rq))
			
 
				-		resched_curr(rq);
			
 
				+	queue_pull_task(rq);
			
 
				 }
			
 
				 
			
 
				 void __init init_sched_rt_class(void)
			
@@ -2162,8 +2175,6 @@ void __init init_sched_rt_class(void)
 
				  */
			
 
				 static void switched_to_rt(struct rq *rq, struct task_struct *p)
			
 
				 {
			
 
				-	int check_resched = 1;
			
 
				-
			
 
				 	/*
			
 
				 	 * If we are already running, then there's nothing
			
 
				 	 * that needs to be done. But if we are not running
			
@@ -2173,13 +2184,12 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 
				 	 */
			
 
				 	if (task_on_rq_queued(p) && rq->curr != p) {
			
 
				 #ifdef CONFIG_SMP
			
 
				-		if (p->nr_cpus_allowed > 1 && rq->rt.overloaded &&
			
 
				-		    /* Don't resched if we changed runqueues */
			
 
				-		    push_rt_task(rq) && rq != task_rq(p))
			
 
				-			check_resched = 0;
			
 
				-#endif /* CONFIG_SMP */
			
 
				-		if (check_resched && p->prio < rq->curr->prio)
			
 
				+		if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
			
 
				+			queue_push_tasks(rq);
			
 
				+#else
			
 
				+		if (p->prio < rq->curr->prio)
			
 
				 			resched_curr(rq);
			
 
				+#endif /* CONFIG_SMP */
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -2200,14 +2210,13 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
 
				 		 * may need to pull tasks to this runqueue.
			
 
				 		 */
			
 
				 		if (oldprio < p->prio)
			
 
				-			pull_rt_task(rq);
			
 
				+			queue_pull_task(rq);
			
 
				+
			
 
				 		/*
			
 
				 		 * If there's a higher priority task waiting to run
			
 
				-		 * then reschedule. Note, the above pull_rt_task
			
 
				-		 * can release the rq lock and p could migrate.
			
 
				-		 * Only reschedule if p is still on the same runqueue.
			
 
				+		 * then reschedule.
			
 
				 		 */
			
 
				-		if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
			
 
				+		if (p->prio > rq->rt.highest_prio.curr)
			
 
				 			resched_curr(rq);
			
 
				 #else
			
 
				 		/* For UP simply resched on drop of prio */
			
@@ -2318,7 +2327,6 @@ const struct sched_class rt_sched_class = {
 
				 	.set_cpus_allowed       = set_cpus_allowed_rt,
			
 
				 	.rq_online              = rq_online_rt,
			
 
				 	.rq_offline             = rq_offline_rt,
			
 
				-	.post_schedule		= post_schedule_rt,
			
 
				 	.task_woken		= task_woken_rt,
			
 
				 	.switched_from		= switched_from_rt,
			
 
				 #endif
			
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -624,9 +624,10 @@ struct rq {
 
				 	unsigned long cpu_capacity;
			
 
				 	unsigned long cpu_capacity_orig;
			
 
				 
			
 
				+	struct callback_head *balance_callback;
			
 
				+
			
 
				 	unsigned char idle_balance;
			
 
				 	/* For active balancing */
			
 
				-	int post_schedule;
			
 
				 	int active_balance;
			
 
				 	int push_cpu;
			
 
				 	struct cpu_stop_work active_balance_work;
			
@@ -767,6 +768,21 @@ extern int migrate_swap(struct task_struct *, struct task_struct *);
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 
			
 
				+static inline void
			
 
				+queue_balance_callback(struct rq *rq,
			
 
				+		       struct callback_head *head,
			
 
				+		       void (*func)(struct rq *rq))
			
 
				+{
			
 
				+	lockdep_assert_held(&rq->lock);
			
 
				+
			
 
				+	if (unlikely(head->next))
			
 
				+		return;
			
 
				+
			
 
				+	head->func = (void (*)(struct callback_head *))func;
			
 
				+	head->next = rq->balance_callback;
			
 
				+	rq->balance_callback = head;
			
 
				+}
			
 
				+
			
 
				 extern void sched_ttwu_pending(void);
			
 
				 
			
 
				 #define rcu_dereference_check_sched_domain(p) \
			
@@ -1192,7 +1208,6 @@ struct sched_class {
 
				 	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
			
 
				 	void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
			
 
				 
			
 
				-	void (*post_schedule) (struct rq *this_rq);
			
 
				 	void (*task_waking) (struct task_struct *task);
			
 
				 	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
			
 
				 
			
@@ -1423,8 +1438,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
 
				 	for (;;) {
			
 
				 		rq = task_rq(p);
			
 
				 		raw_spin_lock(&rq->lock);
			
 
				-		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
			
 
				+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
			
 
				+			lockdep_pin_lock(&rq->lock);
			
 
				 			return rq;
			
 
				+		}
			
 
				 		raw_spin_unlock(&rq->lock);
			
 
				 
			
 
				 		while (unlikely(task_on_rq_migrating(p)))
			
@@ -1461,8 +1478,10 @@ static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flag
 
				 		 * If we observe the new cpu in task_rq_lock, the acquire will
			
 
				 		 * pair with the WMB to ensure we must then also see migrating.
			
 
				 		 */
			
 
				-		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
			
 
				+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
			
 
				+			lockdep_pin_lock(&rq->lock);
			
 
				 			return rq;
			
 
				+		}
			
 
				 		raw_spin_unlock(&rq->lock);
			
 
				 		raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
			
 
				 
			
@@ -1474,6 +1493,7 @@ static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flag
 
				 static inline void __task_rq_unlock(struct rq *rq)
			
 
				 	__releases(rq->lock)
			
 
				 {
			
 
				+	lockdep_unpin_lock(&rq->lock);
			
 
				 	raw_spin_unlock(&rq->lock);
			
 
				 }
			
 
				 
			
@@ -1482,6 +1502,7 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
 
				 	__releases(rq->lock)
			
 
				 	__releases(p->pi_lock)
			
 
				 {
			
 
				+	lockdep_unpin_lock(&rq->lock);
			
 
				 	raw_spin_unlock(&rq->lock);
			
 
				 	raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
			
 
				 }