9 năm trước cách đây · af345201ea
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -143,9 +143,18 @@ static void __init fpu__init_system_generic(void)
 
															 unsigned int xstate_size;
														
 
															 EXPORT_SYMBOL_GPL(xstate_size);
														
 
															-/* Enforce that 'MEMBER' is the last field of 'TYPE': */
														
 
															+/* Get alignment of the TYPE. */
														
 
															+#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
														
 
															+
														
 
															+/*
														
 
															+ * Enforce that 'MEMBER' is the last field of 'TYPE'.
														
 
															+ *
														
 
															+ * Align the computed size with alignment of the TYPE,
														
 
															+ * because that's how C aligns structs.
														
 
															+ */
														
 
															 #define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
														
 
															-	BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER))
														
 
															+	BUILD_BUG_ON(sizeof(TYPE) != ALIGN(offsetofend(TYPE, MEMBER), \
														
 
															+					   TYPE_ALIGN(TYPE)))
														
 
															 /*
														
 
															  * We append the 'struct fpu' to the task_struct:
														
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -86,7 +86,7 @@ static inline void context_tracking_init(void) { }
 
															 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
														
 
															 static inline void guest_enter(void)
														
 
															 {
														
 
															-	if (vtime_accounting_enabled())
														
 
															+	if (vtime_accounting_cpu_enabled())
														
 
															 		vtime_guest_enter(current);
														
 
															 	else
														
 
															 		current->flags |= PF_VCPU;
														
@@ -100,7 +100,7 @@ static inline void guest_exit(void)
 
															 	if (context_tracking_is_enabled())
														
 
															 		__context_tracking_exit(CONTEXT_GUEST);
														
 
															-	if (vtime_accounting_enabled())
														
 
															+	if (vtime_accounting_cpu_enabled())
														
 
															 		vtime_guest_exit(current);
														
 
															 	else
														
 
															 		current->flags &= ~PF_VCPU;
														
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -150,7 +150,7 @@ extern struct task_group root_task_group;
 
															 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
														
 
															 # define INIT_VTIME(tsk)						\
														
 
															-	.vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock),	\
														
 
															+	.vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount),	\
														
 
															 	.vtime_snap = 0,				\
														
 
															 	.vtime_snap_whence = VTIME_SYS,
														
 
															 #else
														
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -177,9 +177,9 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
															 extern void calc_global_load(unsigned long ticks);
														
 
															 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
														
 
															-extern void update_cpu_load_nohz(void);
														
 
															+extern void update_cpu_load_nohz(int active);
														
 
															 #else
														
 
															-static inline void update_cpu_load_nohz(void) { }
														
 
															+static inline void update_cpu_load_nohz(int active) { }
														
 
															 #endif
														
 
															 extern unsigned long get_parent_ip(unsigned long addr);
														
@@ -1268,8 +1268,13 @@ struct sched_entity {
 
															 #endif
														
 
															 #ifdef CONFIG_SMP
														
 
															-	/* Per entity load average tracking */
														
 
															-	struct sched_avg	avg;
														
 
															+	/*
														
 
															+	 * Per entity load average tracking.
														
 
															+	 *
														
 
															+	 * Put into separate cache line so it does not
														
 
															+	 * collide with read-mostly values above.
														
 
															+	 */
														
 
															+	struct sched_avg	avg ____cacheline_aligned_in_smp;
														
 
															 #endif
														
 
															 };
														
@@ -1520,11 +1525,14 @@ struct task_struct {
 
															 	cputime_t gtime;
														
 
															 	struct prev_cputime prev_cputime;
														
 
															 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
														
 
															-	seqlock_t vtime_seqlock;
														
 
															+	seqcount_t vtime_seqcount;
														
 
															 	unsigned long long vtime_snap;
														
 
															 	enum {
														
 
															-		VTIME_SLEEPING = 0,
														
 
															+		/* Task is sleeping or running in a CPU with VTIME inactive */
														
 
															+		VTIME_INACTIVE = 0,
														
 
															+		/* Task runs in userspace in a CPU with VTIME active */
														
 
															 		VTIME_USER,
														
 
															+		/* Task runs in kernelspace in a CPU with VTIME active */
														
 
															 		VTIME_SYS,
														
 
															 	} vtime_snap_whence;
														
 
															 #endif
														
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -29,7 +29,7 @@ struct cpu_stop_work {
 
															 int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
														
 
															 int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg);
														
 
															-void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
														
 
															+bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
														
 
															 			 struct cpu_stop_work *work_buf);
														
 
															 int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
														
 
															 int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
														
@@ -65,7 +65,7 @@ static void stop_one_cpu_nowait_workfn(struct work_struct *work)
 
															 	preempt_enable();
														
 
															 }
														
 
															-static inline void stop_one_cpu_nowait(unsigned int cpu,
														
 
															+static inline bool stop_one_cpu_nowait(unsigned int cpu,
														
 
															 				       cpu_stop_fn_t fn, void *arg,
														
 
															 				       struct cpu_stop_work *work_buf)
														
 
															 {
														
@@ -74,7 +74,10 @@ static inline void stop_one_cpu_nowait(unsigned int cpu,
 
															 		work_buf->fn = fn;
														
 
															 		work_buf->arg = arg;
														
 
															 		schedule_work(&work_buf->work);
														
 
															+		return true;
														
 
															 	}
														
 
															+
														
 
															+	return false;
														
 
															 }
														
 
															 static inline int stop_cpus(const struct cpumask *cpumask,
														
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -10,16 +10,27 @@
 
															 struct task_struct;
														
 
															 /*
														
 
															- * vtime_accounting_enabled() definitions/declarations
														
 
															+ * vtime_accounting_cpu_enabled() definitions/declarations
														
 
															  */
														
 
															 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
														
 
															-static inline bool vtime_accounting_enabled(void) { return true; }
														
 
															+static inline bool vtime_accounting_cpu_enabled(void) { return true; }
														
 
															 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
														
 
															 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
														
 
															+/*
														
 
															+ * Checks if vtime is enabled on some CPU. Cputime readers want to be careful
														
 
															+ * in that case and compute the tickless cputime.
														
 
															+ * For now vtime state is tied to context tracking. We might want to decouple
														
 
															+ * those later if necessary.
														
 
															+ */
														
 
															 static inline bool vtime_accounting_enabled(void)
														
 
															 {
														
 
															-	if (context_tracking_is_enabled()) {
														
 
															+	return context_tracking_is_enabled();
														
 
															+}
														
 
															+
														
 
															+static inline bool vtime_accounting_cpu_enabled(void)
														
 
															+{
														
 
															+	if (vtime_accounting_enabled()) {
														
 
															 		if (context_tracking_cpu_is_enabled())
														
 
															 			return true;
														
 
															 	}
														
@@ -29,7 +40,7 @@ static inline bool vtime_accounting_enabled(void)
 
															 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
														
 
															 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
														
 
															-static inline bool vtime_accounting_enabled(void) { return false; }
														
 
															+static inline bool vtime_accounting_cpu_enabled(void) { return false; }
														
 
															 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
														
@@ -44,7 +55,7 @@ extern void vtime_task_switch(struct task_struct *prev);
 
															 extern void vtime_common_task_switch(struct task_struct *prev);
														
 
															 static inline void vtime_task_switch(struct task_struct *prev)
														
 
															 {
														
 
															-	if (vtime_accounting_enabled())
														
 
															+	if (vtime_accounting_cpu_enabled())
														
 
															 		vtime_common_task_switch(prev);
														
 
															 }
														
 
															 #endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
														
@@ -59,7 +70,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk);
 
															 extern void vtime_common_account_irq_enter(struct task_struct *tsk);
														
 
															 static inline void vtime_account_irq_enter(struct task_struct *tsk)
														
 
															 {
														
 
															-	if (vtime_accounting_enabled())
														
 
															+	if (vtime_accounting_cpu_enabled())
														
 
															 		vtime_common_account_irq_enter(tsk);
														
 
															 }
														
 
															 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
														
@@ -78,7 +89,7 @@ extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
 
															 static inline void vtime_account_irq_exit(struct task_struct *tsk)
														
 
															 {
														
 
															-	if (vtime_accounting_enabled())
														
 
															+	if (vtime_accounting_cpu_enabled())
														
 
															 		vtime_gen_account_irq_exit(tsk);
														
 
															 }
														
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -102,6 +102,36 @@ init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
 
															 	q->func		= func;
														
 
															 }
														
 
															+/**
														
 
															+ * waitqueue_active -- locklessly test for waiters on the queue
														
 
															+ * @q: the waitqueue to test for waiters
														
 
															+ *
														
 
															+ * returns true if the wait list is not empty
														
 
															+ *
														
 
															+ * NOTE: this function is lockless and requires care, incorrect usage _will_
														
 
															+ * lead to sporadic and non-obvious failure.
														
 
															+ *
														
 
															+ * Use either while holding wait_queue_head_t::lock or when used for wakeups
														
 
															+ * with an extra smp_mb() like:
														
 
															+ *
														
 
															+ *      CPU0 - waker                    CPU1 - waiter
														
 
															+ *
														
 
															+ *                                      for (;;) {
														
 
															+ *      @cond = true;                     prepare_to_wait(&wq, &wait, state);
														
 
															+ *      smp_mb();                         // smp_mb() from set_current_state()
														
 
															+ *      if (waitqueue_active(wq))         if (@cond)
														
 
															+ *        wake_up(wq);                      break;
														
 
															+ *                                        schedule();
														
 
															+ *                                      }
														
 
															+ *                                      finish_wait(&wq, &wait);
														
 
															+ *
														
 
															+ * Because without the explicit smp_mb() it's possible for the
														
 
															+ * waitqueue_active() load to get hoisted over the @cond store such that we'll
														
 
															+ * observe an empty wait list while the waiter might not observe @cond.
														
 
															+ *
														
 
															+ * Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
														
 
															+ * which (when the lock is uncontended) are of roughly equal cost.
														
 
															+ */
														
 
															 static inline int waitqueue_active(wait_queue_head_t *q)
														
 
															 {
														
 
															 	return !list_empty(&q->task_list);
														
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1349,9 +1349,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
															 	prev_cputime_init(&p->prev_cputime);
														
 
															 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
														
 
															-	seqlock_init(&p->vtime_seqlock);
														
 
															+	seqcount_init(&p->vtime_seqcount);
														
 
															 	p->vtime_snap = 0;
														
 
															-	p->vtime_snap_whence = VTIME_SLEEPING;
														
 
															+	p->vtime_snap_whence = VTIME_INACTIVE;
														
 
															 #endif
														
 
															 #if defined(SPLIT_RSS_COUNTING)
														
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -212,7 +212,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
 
															 	ag = autogroup_task_get(p);
														
 
															 	down_write(&ag->lock);
														
 
															-	err = sched_group_set_shares(ag->tg, prio_to_weight[nice + 20]);
														
 
															+	err = sched_group_set_shares(ag->tg, sched_prio_to_weight[nice + 20]);
														
 
															 	if (!err)
														
 
															 		ag->nice = nice;
														
 
															 	up_write(&ag->lock);
														
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -731,7 +731,7 @@ bool sched_can_stop_tick(void)
 
															 	if (current->policy == SCHED_RR) {
														
 
															 		struct sched_rt_entity *rt_se = &current->rt;
														
 
															-		return rt_se->run_list.prev == rt_se->run_list.next;
														
 
															+		return list_is_singular(&rt_se->run_list);
														
 
															 	}
														
 
															 	/*
														
@@ -823,8 +823,8 @@ static void set_load_weight(struct task_struct *p)
 
															 		return;
														
 
															 	}
														
 
															-	load->weight = scale_load(prio_to_weight[prio]);
														
 
															-	load->inv_weight = prio_to_wmult[prio];
														
 
															+	load->weight = scale_load(sched_prio_to_weight[prio]);
														
 
															+	load->inv_weight = sched_prio_to_wmult[prio];
														
 
															 }
														
 
															 static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
														
@@ -1071,8 +1071,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
 
															 {
														
 
															 	lockdep_assert_held(&rq->lock);
														
 
															-	dequeue_task(rq, p, 0);
														
 
															 	p->on_rq = TASK_ON_RQ_MIGRATING;
														
 
															+	dequeue_task(rq, p, 0);
														
 
															 	set_task_cpu(p, new_cpu);
														
 
															 	raw_spin_unlock(&rq->lock);
														
@@ -1080,8 +1080,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
 
															 	raw_spin_lock(&rq->lock);
														
 
															 	BUG_ON(task_cpu(p) != new_cpu);
														
 
															-	p->on_rq = TASK_ON_RQ_QUEUED;
														
 
															 	enqueue_task(rq, p, 0);
														
 
															+	p->on_rq = TASK_ON_RQ_QUEUED;
														
 
															 	check_preempt_curr(rq, p, 0);
														
 
															 	return rq;
														
@@ -1274,6 +1274,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
															 	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
														
 
															 			!p->on_rq);
														
 
															+	/*
														
 
															+	 * Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING,
														
 
															+	 * because schedstat_wait_{start,end} rebase migrating task's wait_start
														
 
															+	 * time relying on p->on_rq.
														
 
															+	 */
														
 
															+	WARN_ON_ONCE(p->state == TASK_RUNNING &&
														
 
															+		     p->sched_class == &fair_sched_class &&
														
 
															+		     (p->on_rq && !task_on_rq_migrating(p)));
														
 
															+
														
 
															 #ifdef CONFIG_LOCKDEP
														
 
															 	/*
														
 
															 	 * The caller should hold either p->pi_lock or rq->lock, when changing
														
@@ -1310,9 +1319,11 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
 
															 		src_rq = task_rq(p);
														
 
															 		dst_rq = cpu_rq(cpu);
														
 
															+		p->on_rq = TASK_ON_RQ_MIGRATING;
														
 
															 		deactivate_task(src_rq, p, 0);
														
 
															 		set_task_cpu(p, cpu);
														
 
															 		activate_task(dst_rq, p, 0);
														
 
															+		p->on_rq = TASK_ON_RQ_QUEUED;
														
 
															 		check_preempt_curr(dst_rq, p, 0);
														
 
															 	} else {
														
 
															 		/*
														
@@ -2194,6 +2205,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
															 	p->se.vruntime			= 0;
														
 
															 	INIT_LIST_HEAD(&p->se.group_node);
														
 
															+#ifdef CONFIG_FAIR_GROUP_SCHED
														
 
															+	p->se.cfs_rq			= NULL;
														
 
															+#endif
														
 
															+
														
 
															 #ifdef CONFIG_SCHEDSTATS
														
 
															 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
														
 
															 #endif
														
@@ -7442,6 +7457,9 @@ int in_sched_functions(unsigned long addr)
 
															  */
														
 
															 struct task_group root_task_group;
														
 
															 LIST_HEAD(task_groups);
														
 
															+
														
 
															+/* Cacheline aligned slab cache for task_group */
														
 
															+static struct kmem_cache *task_group_cache __read_mostly;
														
 
															 #endif
														
 
															 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
														
@@ -7499,11 +7517,12 @@ void __init sched_init(void)
 
															 #endif /* CONFIG_RT_GROUP_SCHED */
														
 
															 #ifdef CONFIG_CGROUP_SCHED
														
 
															+	task_group_cache = KMEM_CACHE(task_group, 0);
														
 
															+
														
 
															 	list_add(&root_task_group.list, &task_groups);
														
 
															 	INIT_LIST_HEAD(&root_task_group.children);
														
 
															 	INIT_LIST_HEAD(&root_task_group.siblings);
														
 
															 	autogroup_init(&init_task);
														
 
															-
														
 
															 #endif /* CONFIG_CGROUP_SCHED */
														
 
															 	for_each_possible_cpu(i) {
														
@@ -7784,7 +7803,7 @@ static void free_sched_group(struct task_group *tg)
 
															 	free_fair_sched_group(tg);
														
 
															 	free_rt_sched_group(tg);
														
 
															 	autogroup_free(tg);
														
 
															-	kfree(tg);
														
 
															+	kmem_cache_free(task_group_cache, tg);
														
 
															 }
														
 
															 /* allocate runqueue etc for a new task group */
														
@@ -7792,7 +7811,7 @@ struct task_group *sched_create_group(struct task_group *parent)
 
															 {
														
 
															 	struct task_group *tg;
														
 
															-	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
														
 
															+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
														
 
															 	if (!tg)
														
 
															 		return ERR_PTR(-ENOMEM);
														
@@ -8697,3 +8716,44 @@ void dump_cpu_task(int cpu)
 
															 	pr_info("Task dump for CPU %d:\n", cpu);
														
 
															 	sched_show_task(cpu_curr(cpu));
														
 
															 }
														
 
															+
														
 
															+/*
														
 
															+ * Nice levels are multiplicative, with a gentle 10% change for every
														
 
															+ * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
														
 
															+ * nice 1, it will get ~10% less CPU time than another CPU-bound task
														
 
															+ * that remained on nice 0.
														
 
															+ *
														
 
															+ * The "10% effect" is relative and cumulative: from _any_ nice level,
														
 
															+ * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
														
 
															+ * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
														
 
															+ * If a task goes up by ~10% and another task goes down by ~10% then
														
 
															+ * the relative distance between them is ~25%.)
														
 
															+ */
														
 
															+const int sched_prio_to_weight[40] = {
														
 
															+ /* -20 */     88761,     71755,     56483,     46273,     36291,
														
 
															+ /* -15 */     29154,     23254,     18705,     14949,     11916,
														
 
															+ /* -10 */      9548,      7620,      6100,      4904,      3906,
														
 
															+ /*  -5 */      3121,      2501,      1991,      1586,      1277,
														
 
															+ /*   0 */      1024,       820,       655,       526,       423,
														
 
															+ /*   5 */       335,       272,       215,       172,       137,
														
 
															+ /*  10 */       110,        87,        70,        56,        45,
														
 
															+ /*  15 */        36,        29,        23,        18,        15,
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * Inverse (2^32/x) values of the sched_prio_to_weight[] array, precalculated.
														
 
															+ *
														
 
															+ * In cases where the weight does not change often, we can use the
														
 
															+ * precalculated inverse to speed up arithmetics by turning divisions
														
 
															+ * into multiplications:
														
 
															+ */
														
 
															+const u32 sched_prio_to_wmult[40] = {
														
 
															+ /* -20 */     48388,     59856,     76040,     92818,    118348,
														
 
															+ /* -15 */    147320,    184698,    229616,    287308,    360437,
														
 
															+ /* -10 */    449829,    563644,    704093,    875809,   1099582,
														
 
															+ /*  -5 */   1376151,   1717300,   2157191,   2708050,   3363326,
														
 
															+ /*   0 */   4194304,   5237765,   6557202,   8165337,  10153587,
														
 
															+ /*   5 */  12820798,  15790321,  19976592,  24970740,  31350126,
														
 
															+ /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717,
														
 
															+ /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
														
 
															+};
														
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -466,7 +466,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
 
															 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
														
 
															 	struct rq *rq = this_rq();
														
 
															-	if (vtime_accounting_enabled())
														
 
															+	if (vtime_accounting_cpu_enabled())
														
 
															 		return;
														
 
															 	if (sched_clock_irqtime) {
														
@@ -680,7 +680,7 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
 
															 {
														
 
															 	unsigned long long delta = vtime_delta(tsk);
														
 
															-	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
														
 
															+	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
														
 
															 	tsk->vtime_snap += delta;
														
 
															 	/* CHECKME: always safe to convert nsecs to cputime? */
														
@@ -696,37 +696,37 @@ static void __vtime_account_system(struct task_struct *tsk)
 
															 void vtime_account_system(struct task_struct *tsk)
														
 
															 {
														
 
															-	write_seqlock(&tsk->vtime_seqlock);
														
 
															+	write_seqcount_begin(&tsk->vtime_seqcount);
														
 
															 	__vtime_account_system(tsk);
														
 
															-	write_sequnlock(&tsk->vtime_seqlock);
														
 
															+	write_seqcount_end(&tsk->vtime_seqcount);
														
 
															 }
														
 
															 void vtime_gen_account_irq_exit(struct task_struct *tsk)
														
 
															 {
														
 
															-	write_seqlock(&tsk->vtime_seqlock);
														
 
															+	write_seqcount_begin(&tsk->vtime_seqcount);
														
 
															 	__vtime_account_system(tsk);
														
 
															 	if (context_tracking_in_user())
														
 
															 		tsk->vtime_snap_whence = VTIME_USER;
														
 
															-	write_sequnlock(&tsk->vtime_seqlock);
														
 
															+	write_seqcount_end(&tsk->vtime_seqcount);
														
 
															 }
														
 
															 void vtime_account_user(struct task_struct *tsk)
														
 
															 {
														
 
															 	cputime_t delta_cpu;
														
 
															-	write_seqlock(&tsk->vtime_seqlock);
														
 
															+	write_seqcount_begin(&tsk->vtime_seqcount);
														
 
															 	delta_cpu = get_vtime_delta(tsk);
														
 
															 	tsk->vtime_snap_whence = VTIME_SYS;
														
 
															 	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
														
 
															-	write_sequnlock(&tsk->vtime_seqlock);
														
 
															+	write_seqcount_end(&tsk->vtime_seqcount);
														
 
															 }
														
 
															 void vtime_user_enter(struct task_struct *tsk)
														
 
															 {
														
 
															-	write_seqlock(&tsk->vtime_seqlock);
														
 
															+	write_seqcount_begin(&tsk->vtime_seqcount);
														
 
															 	__vtime_account_system(tsk);
														
 
															 	tsk->vtime_snap_whence = VTIME_USER;
														
 
															-	write_sequnlock(&tsk->vtime_seqlock);
														
 
															+	write_seqcount_end(&tsk->vtime_seqcount);
														
 
															 }
														
 
															 void vtime_guest_enter(struct task_struct *tsk)
														
@@ -738,19 +738,19 @@ void vtime_guest_enter(struct task_struct *tsk)
 
															 	 * synchronization against the reader (task_gtime())
														
 
															 	 * that can thus safely catch up with a tickless delta.
														
 
															 	 */
														
 
															-	write_seqlock(&tsk->vtime_seqlock);
														
 
															+	write_seqcount_begin(&tsk->vtime_seqcount);
														
 
															 	__vtime_account_system(tsk);
														
 
															 	current->flags |= PF_VCPU;
														
 
															-	write_sequnlock(&tsk->vtime_seqlock);
														
 
															+	write_seqcount_end(&tsk->vtime_seqcount);
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(vtime_guest_enter);
														
 
															 void vtime_guest_exit(struct task_struct *tsk)
														
 
															 {
														
 
															-	write_seqlock(&tsk->vtime_seqlock);
														
 
															+	write_seqcount_begin(&tsk->vtime_seqcount);
														
 
															 	__vtime_account_system(tsk);
														
 
															 	current->flags &= ~PF_VCPU;
														
 
															-	write_sequnlock(&tsk->vtime_seqlock);
														
 
															+	write_seqcount_end(&tsk->vtime_seqcount);
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(vtime_guest_exit);
														
@@ -763,24 +763,26 @@ void vtime_account_idle(struct task_struct *tsk)
 
															 void arch_vtime_task_switch(struct task_struct *prev)
														
 
															 {
														
 
															-	write_seqlock(&prev->vtime_seqlock);
														
 
															-	prev->vtime_snap_whence = VTIME_SLEEPING;
														
 
															-	write_sequnlock(&prev->vtime_seqlock);
														
 
															+	write_seqcount_begin(&prev->vtime_seqcount);
														
 
															+	prev->vtime_snap_whence = VTIME_INACTIVE;
														
 
															+	write_seqcount_end(&prev->vtime_seqcount);
														
 
															-	write_seqlock(&current->vtime_seqlock);
														
 
															+	write_seqcount_begin(&current->vtime_seqcount);
														
 
															 	current->vtime_snap_whence = VTIME_SYS;
														
 
															 	current->vtime_snap = sched_clock_cpu(smp_processor_id());
														
 
															-	write_sequnlock(&current->vtime_seqlock);
														
 
															+	write_seqcount_end(&current->vtime_seqcount);
														
 
															 }
														
 
															 void vtime_init_idle(struct task_struct *t, int cpu)
														
 
															 {
														
 
															 	unsigned long flags;
														
 
															-	write_seqlock_irqsave(&t->vtime_seqlock, flags);
														
 
															+	local_irq_save(flags);
														
 
															+	write_seqcount_begin(&t->vtime_seqcount);
														
 
															 	t->vtime_snap_whence = VTIME_SYS;
														
 
															 	t->vtime_snap = sched_clock_cpu(cpu);
														
 
															-	write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
														
 
															+	write_seqcount_end(&t->vtime_seqcount);
														
 
															+	local_irq_restore(flags);
														
 
															 }
														
 
															 cputime_t task_gtime(struct task_struct *t)
														
@@ -788,17 +790,17 @@ cputime_t task_gtime(struct task_struct *t)
 
															 	unsigned int seq;
														
 
															 	cputime_t gtime;
														
 
															-	if (!context_tracking_is_enabled())
														
 
															+	if (!vtime_accounting_enabled())
														
 
															 		return t->gtime;
														
 
															 	do {
														
 
															-		seq = read_seqbegin(&t->vtime_seqlock);
														
 
															+		seq = read_seqcount_begin(&t->vtime_seqcount);
														
 
															 		gtime = t->gtime;
														
 
															-		if (t->flags & PF_VCPU)
														
 
															+		if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU)
														
 
															 			gtime += vtime_delta(t);
														
 
															-	} while (read_seqretry(&t->vtime_seqlock, seq));
														
 
															+	} while (read_seqcount_retry(&t->vtime_seqcount, seq));
														
 
															 	return gtime;
														
 
															 }
														
@@ -821,7 +823,7 @@ fetch_task_cputime(struct task_struct *t,
 
															 		*udelta = 0;
														
 
															 		*sdelta = 0;
														
 
															-		seq = read_seqbegin(&t->vtime_seqlock);
														
 
															+		seq = read_seqcount_begin(&t->vtime_seqcount);
														
 
															 		if (u_dst)
														
 
															 			*u_dst = *u_src;
														
@@ -829,7 +831,7 @@ fetch_task_cputime(struct task_struct *t,
 
															 			*s_dst = *s_src;
														
 
															 		/* Task is sleeping, nothing to add */
														
 
															-		if (t->vtime_snap_whence == VTIME_SLEEPING ||
														
 
															+		if (t->vtime_snap_whence == VTIME_INACTIVE ||
														
 
															 		    is_idle_task(t))
														
 
															 			continue;
														
@@ -845,7 +847,7 @@ fetch_task_cputime(struct task_struct *t,
 
															 			if (t->vtime_snap_whence == VTIME_SYS)
														
 
															 				*sdelta = delta;
														
 
															 		}
														
 
															-	} while (read_seqretry(&t->vtime_seqlock, seq));
														
 
															+	} while (read_seqcount_retry(&t->vtime_seqcount, seq));
														
 
															 }
														
@@ -853,6 +855,14 @@ void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
 
															 {
														
 
															 	cputime_t udelta, sdelta;
														
 
															+	if (!vtime_accounting_enabled()) {
														
 
															+		if (utime)
														
 
															+			*utime = t->utime;
														
 
															+		if (stime)
														
 
															+			*stime = t->stime;
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															 	fetch_task_cputime(t, utime, stime, &t->utime,
														
 
															 			   &t->stime, &udelta, &sdelta);
														
 
															 	if (utime)
														
@@ -866,6 +876,14 @@ void task_cputime_scaled(struct task_struct *t,
 
															 {
														
 
															 	cputime_t udelta, sdelta;
														
 
															+	if (!vtime_accounting_enabled()) {
														
 
															+		if (utimescaled)
														
 
															+			*utimescaled = t->utimescaled;
														
 
															+		if (stimescaled)
														
 
															+			*stimescaled = t->stimescaled;
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															 	fetch_task_cputime(t, utimescaled, stimescaled,
														
 
															 			   &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
														
 
															 	if (utimescaled)
														
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -176,8 +176,10 @@ static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
 
															 		}
														
 
															 	}
														
 
															-	if (leftmost)
														
 
															+	if (leftmost) {
														
 
															 		dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks;
														
 
															+		dl_rq->earliest_dl.next = p->dl.deadline;
														
 
															+	}
														
 
															 	rb_link_node(&p->pushable_dl_tasks, parent, link);
														
 
															 	rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
														
@@ -195,6 +197,10 @@ static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
 
															 		next_node = rb_next(&p->pushable_dl_tasks);
														
 
															 		dl_rq->pushable_dl_tasks_leftmost = next_node;
														
 
															+		if (next_node) {
														
 
															+			dl_rq->earliest_dl.next = rb_entry(next_node,
														
 
															+				struct task_struct, pushable_dl_tasks)->dl.deadline;
														
 
															+		}
														
 
															 	}
														
 
															 	rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
														
@@ -782,42 +788,14 @@ static void update_curr_dl(struct rq *rq)
 
															 #ifdef CONFIG_SMP
														
 
															-static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu);
														
 
															-
														
 
															-static inline u64 next_deadline(struct rq *rq)
														
 
															-{
														
 
															-	struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu);
														
 
															-
														
 
															-	if (next && dl_prio(next->prio))
														
 
															-		return next->dl.deadline;
														
 
															-	else
														
 
															-		return 0;
														
 
															-}
														
 
															-
														
 
															 static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
														
 
															 {
														
 
															 	struct rq *rq = rq_of_dl_rq(dl_rq);
														
 
															 	if (dl_rq->earliest_dl.curr == 0 ||
														
 
															 	    dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
														
 
															-		/*
														
 
															-		 * If the dl_rq had no -deadline tasks, or if the new task
														
 
															-		 * has shorter deadline than the current one on dl_rq, we
														
 
															-		 * know that the previous earliest becomes our next earliest,
														
 
															-		 * as the new task becomes the earliest itself.
														
 
															-		 */
														
 
															-		dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
														
 
															 		dl_rq->earliest_dl.curr = deadline;
														
 
															 		cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
														
 
															-	} else if (dl_rq->earliest_dl.next == 0 ||
														
 
															-		   dl_time_before(deadline, dl_rq->earliest_dl.next)) {
														
 
															-		/*
														
 
															-		 * On the other hand, if the new -deadline task has a
														
 
															-		 * a later deadline than the earliest one on dl_rq, but
														
 
															-		 * it is earlier than the next (if any), we must
														
 
															-		 * recompute the next-earliest.
														
 
															-		 */
														
 
															-		dl_rq->earliest_dl.next = next_deadline(rq);
														
 
															 	}
														
 
															 }
														
@@ -839,7 +817,6 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
 
															 		entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
														
 
															 		dl_rq->earliest_dl.curr = entry->deadline;
														
 
															-		dl_rq->earliest_dl.next = next_deadline(rq);
														
 
															 		cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
														
 
															 	}
														
 
															 }
														
@@ -1274,28 +1251,6 @@ static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
 
															 	return 0;
														
 
															 }
														
 
															-/* Returns the second earliest -deadline task, NULL otherwise */
														
 
															-static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu)
														
 
															-{
														
 
															-	struct rb_node *next_node = rq->dl.rb_leftmost;
														
 
															-	struct sched_dl_entity *dl_se;
														
 
															-	struct task_struct *p = NULL;
														
 
															-
														
 
															-next_node:
														
 
															-	next_node = rb_next(next_node);
														
 
															-	if (next_node) {
														
 
															-		dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);
														
 
															-		p = dl_task_of(dl_se);
														
 
															-
														
 
															-		if (pick_dl_task(rq, p, cpu))
														
 
															-			return p;
														
 
															-
														
 
															-		goto next_node;
														
 
															-	}
														
 
															-
														
 
															-	return NULL;
														
 
															-}
														
 
															-
														
 
															 /*
														
 
															  * Return the earliest pushable rq's task, which is suitable to be executed
														
 
															  * on the CPU, NULL otherwise:
														
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -738,12 +738,56 @@ static void update_curr_fair(struct rq *rq)
 
															 	update_curr(cfs_rq_of(&rq->curr->se));
														
 
															 }
														
 
															+#ifdef CONFIG_SCHEDSTATS
														
 
															+static inline void
														
 
															+update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
														
 
															+{
														
 
															+	u64 wait_start = rq_clock(rq_of(cfs_rq));
														
 
															+
														
 
															+	if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
														
 
															+	    likely(wait_start > se->statistics.wait_start))
														
 
															+		wait_start -= se->statistics.wait_start;
														
 
															+
														
 
															+	se->statistics.wait_start = wait_start;
														
 
															+}
														
 
															+
														
 
															+static void
														
 
															+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
														
 
															+{
														
 
															+	struct task_struct *p;
														
 
															+	u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
														
 
															+
														
 
															+	if (entity_is_task(se)) {
														
 
															+		p = task_of(se);
														
 
															+		if (task_on_rq_migrating(p)) {
														
 
															+			/*
														
 
															+			 * Preserve migrating task's wait time so wait_start
														
 
															+			 * time stamp can be adjusted to accumulate wait time
														
 
															+			 * prior to migration.
														
 
															+			 */
														
 
															+			se->statistics.wait_start = delta;
														
 
															+			return;
														
 
															+		}
														
 
															+		trace_sched_stat_wait(p, delta);
														
 
															+	}
														
 
															+
														
 
															+	se->statistics.wait_max = max(se->statistics.wait_max, delta);
														
 
															+	se->statistics.wait_count++;
														
 
															+	se->statistics.wait_sum += delta;
														
 
															+	se->statistics.wait_start = 0;
														
 
															+}
														
 
															+#else
														
 
															 static inline void
														
 
															 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
														
 
															 {
														
 
															-	schedstat_set(se->statistics.wait_start, rq_clock(rq_of(cfs_rq)));
														
 
															 }
														
 
															+static inline void
														
 
															+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
														
 
															+{
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															 /*
														
 
															  * Task is being enqueued - update stats:
														
 
															  */
														
@@ -757,23 +801,6 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
															 		update_stats_wait_start(cfs_rq, se);
														
 
															 }
														
 
															-static void
														
 
															-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
														
 
															-{
														
 
															-	schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max,
														
 
															-			rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start));
														
 
															-	schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1);
														
 
															-	schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum +
														
 
															-			rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
														
 
															-#ifdef CONFIG_SCHEDSTATS
														
 
															-	if (entity_is_task(se)) {
														
 
															-		trace_sched_stat_wait(task_of(se),
														
 
															-			rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
														
 
															-	}
														
 
															-#endif
														
 
															-	schedstat_set(se->statistics.wait_start, 0);
														
 
															-}
														
 
															-
														
 
															 static inline void
														
 
															 update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
														
 
															 {
														
@@ -2155,6 +2182,7 @@ void task_numa_work(struct callback_head *work)
 
															 	unsigned long migrate, next_scan, now = jiffies;
														
 
															 	struct task_struct *p = current;
														
 
															 	struct mm_struct *mm = p->mm;
														
 
															+	u64 runtime = p->se.sum_exec_runtime;
														
 
															 	struct vm_area_struct *vma;
														
 
															 	unsigned long start, end;
														
 
															 	unsigned long nr_pte_updates = 0;
														
@@ -2277,6 +2305,17 @@ out:
 
															 	else
														
 
															 		reset_ptenuma_scan(p);
														
 
															 	up_read(&mm->mmap_sem);
														
 
															+
														
 
															+	/*
														
 
															+	 * Make sure tasks use at least 32x as much time to run other code
														
 
															+	 * than they used here, to limit NUMA PTE scanning overhead to 3% max.
														
 
															+	 * Usually update_task_scan_period slows down scanning enough; on an
														
 
															+	 * overloaded system we need to limit overhead on a per task basis.
														
 
															+	 */
														
 
															+	if (unlikely(p->se.sum_exec_runtime != runtime)) {
														
 
															+		u64 diff = p->se.sum_exec_runtime - runtime;
														
 
															+		p->node_stamp += 32 * diff;
														
 
															+	}
														
 
															 }
														
 
															 /*
														
@@ -2670,12 +2709,64 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
 
															 {
														
 
															 	long delta = cfs_rq->avg.load_avg - cfs_rq->tg_load_avg_contrib;
														
 
															+	/*
														
 
															+	 * No need to update load_avg for root_task_group as it is not used.
														
 
															+	 */
														
 
															+	if (cfs_rq->tg == &root_task_group)
														
 
															+		return;
														
 
															+
														
 
															 	if (force || abs(delta) > cfs_rq->tg_load_avg_contrib / 64) {
														
 
															 		atomic_long_add(delta, &cfs_rq->tg->load_avg);
														
 
															 		cfs_rq->tg_load_avg_contrib = cfs_rq->avg.load_avg;
														
 
															 	}
														
 
															 }
														
 
															+/*
														
 
															+ * Called within set_task_rq() right before setting a task's cpu. The
														
 
															+ * caller only guarantees p->pi_lock is held; no other assumptions,
														
 
															+ * including the state of rq->lock, should be made.
														
 
															+ */
														
 
															+void set_task_rq_fair(struct sched_entity *se,
														
 
															+		      struct cfs_rq *prev, struct cfs_rq *next)
														
 
															+{
														
 
															+	if (!sched_feat(ATTACH_AGE_LOAD))
														
 
															+		return;
														
 
															+
														
 
															+	/*
														
 
															+	 * We are supposed to update the task to "current" time, then its up to
														
 
															+	 * date and ready to go to new CPU/cfs_rq. But we have difficulty in
														
 
															+	 * getting what current time is, so simply throw away the out-of-date
														
 
															+	 * time. This will result in the wakee task is less decayed, but giving
														
 
															+	 * the wakee more load sounds not bad.
														
 
															+	 */
														
 
															+	if (se->avg.last_update_time && prev) {
														
 
															+		u64 p_last_update_time;
														
 
															+		u64 n_last_update_time;
														
 
															+
														
 
															+#ifndef CONFIG_64BIT
														
 
															+		u64 p_last_update_time_copy;
														
 
															+		u64 n_last_update_time_copy;
														
 
															+
														
 
															+		do {
														
 
															+			p_last_update_time_copy = prev->load_last_update_time_copy;
														
 
															+			n_last_update_time_copy = next->load_last_update_time_copy;
														
 
															+
														
 
															+			smp_rmb();
														
 
															+
														
 
															+			p_last_update_time = prev->avg.last_update_time;
														
 
															+			n_last_update_time = next->avg.last_update_time;
														
 
															+
														
 
															+		} while (p_last_update_time != p_last_update_time_copy ||
														
 
															+			 n_last_update_time != n_last_update_time_copy);
														
 
															+#else
														
 
															+		p_last_update_time = prev->avg.last_update_time;
														
 
															+		n_last_update_time = next->avg.last_update_time;
														
 
															+#endif
														
 
															+		__update_load_avg(p_last_update_time, cpu_of(rq_of(prev)),
														
 
															+				  &se->avg, 0, 0, NULL);
														
 
															+		se->avg.last_update_time = n_last_update_time;
														
 
															+	}
														
 
															+}
														
 
															 #else /* CONFIG_FAIR_GROUP_SCHED */
														
 
															 static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
														
 
															 #endif /* CONFIG_FAIR_GROUP_SCHED */
														
@@ -2809,48 +2900,48 @@ dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
															 		max_t(s64,  cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
														
 
															 }
														
 
															-/*
														
 
															- * Task first catches up with cfs_rq, and then subtract
														
 
															- * itself from the cfs_rq (task must be off the queue now).
														
 
															- */
														
 
															-void remove_entity_load_avg(struct sched_entity *se)
														
 
															-{
														
 
															-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
														
 
															-	u64 last_update_time;
														
 
															-
														
 
															 #ifndef CONFIG_64BIT
														
 
															+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
														
 
															+{
														
 
															 	u64 last_update_time_copy;
														
 
															+	u64 last_update_time;
														
 
															 	do {
														
 
															 		last_update_time_copy = cfs_rq->load_last_update_time_copy;
														
 
															 		smp_rmb();
														
 
															 		last_update_time = cfs_rq->avg.last_update_time;
														
 
															 	} while (last_update_time != last_update_time_copy);
														
 
															-#else
														
 
															-	last_update_time = cfs_rq->avg.last_update_time;
														
 
															-#endif
														
 
															-	__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
														
 
															-	atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
														
 
															-	atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
														
 
															+	return last_update_time;
														
 
															 }
														
 
															-
														
 
															-/*
														
 
															- * Update the rq's load with the elapsed running time before entering
														
 
															- * idle. if the last scheduled task is not a CFS task, idle_enter will
														
 
															- * be the only way to update the runnable statistic.
														
 
															- */
														
 
															-void idle_enter_fair(struct rq *this_rq)
														
 
															+#else
														
 
															+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
														
 
															 {
														
 
															+	return cfs_rq->avg.last_update_time;
														
 
															 }
														
 
															+#endif
														
 
															 /*
														
 
															- * Update the rq's load with the elapsed idle time before a task is
														
 
															- * scheduled. if the newly scheduled task is not a CFS task, idle_exit will
														
 
															- * be the only way to update the runnable statistic.
														
 
															+ * Task first catches up with cfs_rq, and then subtract
														
 
															+ * itself from the cfs_rq (task must be off the queue now).
														
 
															  */
														
 
															-void idle_exit_fair(struct rq *this_rq)
														
 
															+void remove_entity_load_avg(struct sched_entity *se)
														
 
															 {
														
 
															+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
														
 
															+	u64 last_update_time;
														
 
															+
														
 
															+	/*
														
 
															+	 * Newly created task or never used group entity should not be removed
														
 
															+	 * from its (source) cfs_rq
														
 
															+	 */
														
 
															+	if (se->avg.last_update_time == 0)
														
 
															+		return;
														
 
															+
														
 
															+	last_update_time = cfs_rq_last_update_time(cfs_rq);
														
 
															+
														
 
															+	__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
														
 
															+	atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
														
 
															+	atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
														
 
															 }
														
 
															 static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq)
														
@@ -4240,42 +4331,37 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
															  */
														
 
															 /*
														
 
															- * The exact cpuload at various idx values, calculated at every tick would be
														
 
															- * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
														
 
															+ * The exact cpuload calculated at every tick would be:
														
 
															+ *
														
 
															+ *   load' = (1 - 1/2^i) * load + (1/2^i) * cur_load
														
 
															  *
														
 
															- * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called
														
 
															- * on nth tick when cpu may be busy, then we have:
														
 
															- * load = ((2^idx - 1) / 2^idx)^(n-1) * load
														
 
															- * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load
														
 
															+ * If a cpu misses updates for n ticks (as it was idle) and update gets
														
 
															+ * called on the n+1-th tick when cpu may be busy, then we have:
														
 
															+ *
														
 
															+ *   load_n   = (1 - 1/2^i)^n * load_0
														
 
															+ *   load_n+1 = (1 - 1/2^i)   * load_n + (1/2^i) * cur_load
														
 
															  *
														
 
															  * decay_load_missed() below does efficient calculation of
														
 
															- * load = ((2^idx - 1) / 2^idx)^(n-1) * load
														
 
															- * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load
														
 
															+ *
														
 
															+ *   load' = (1 - 1/2^i)^n * load
														
 
															+ *
														
 
															+ * Because x^(n+m) := x^n * x^m we can decompose any x^n in power-of-2 factors.
														
 
															+ * This allows us to precompute the above in said factors, thereby allowing the
														
 
															+ * reduction of an arbitrary n in O(log_2 n) steps. (See also
														
 
															+ * fixed_power_int())
														
 
															  *
														
 
															  * The calculation is approximated on a 128 point scale.
														
 
															- * degrade_zero_ticks is the number of ticks after which load at any
														
 
															- * particular idx is approximated to be zero.
														
 
															- * degrade_factor is a precomputed table, a row for each load idx.
														
 
															- * Each column corresponds to degradation factor for a power of two ticks,
														
 
															- * based on 128 point scale.
														
 
															- * Example:
														
 
															- * row 2, col 3 (=12) says that the degradation at load idx 2 after
														
 
															- * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8).
														
 
															- *
														
 
															- * With this power of 2 load factors, we can degrade the load n times
														
 
															- * by looking at 1 bits in n and doing as many mult/shift instead of
														
 
															- * n mult/shifts needed by the exact degradation.
														
 
															  */
														
 
															 #define DEGRADE_SHIFT		7
														
 
															-static const unsigned char
														
 
															-		degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
														
 
															-static const unsigned char
														
 
															-		degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
														
 
															-					{0, 0, 0, 0, 0, 0, 0, 0},
														
 
															-					{64, 32, 8, 0, 0, 0, 0, 0},
														
 
															-					{96, 72, 40, 12, 1, 0, 0},
														
 
															-					{112, 98, 75, 43, 15, 1, 0},
														
 
															-					{120, 112, 98, 76, 45, 16, 2} };
														
 
															+
														
 
															+static const u8 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
														
 
															+static const u8 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
														
 
															+	{   0,   0,  0,  0,  0,  0, 0, 0 },
														
 
															+	{  64,  32,  8,  0,  0,  0, 0, 0 },
														
 
															+	{  96,  72, 40, 12,  1,  0, 0, 0 },
														
 
															+	{ 112,  98, 75, 43, 15,  1, 0, 0 },
														
 
															+	{ 120, 112, 98, 76, 45, 16, 2, 0 }
														
 
															+};
														
 
															 /*
														
 
															  * Update cpu_load for any missed ticks, due to tickless idle. The backlog
														
@@ -4306,14 +4392,46 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
 
															 	return load;
														
 
															 }
														
 
															-/*
														
 
															+/**
														
 
															+ * __update_cpu_load - update the rq->cpu_load[] statistics
														
 
															+ * @this_rq: The rq to update statistics for
														
 
															+ * @this_load: The current load
														
 
															+ * @pending_updates: The number of missed updates
														
 
															+ * @active: !0 for NOHZ_FULL
														
 
															+ *
														
 
															  * Update rq->cpu_load[] statistics. This function is usually called every
														
 
															- * scheduler tick (TICK_NSEC). With tickless idle this will not be called
														
 
															- * every tick. We fix it up based on jiffies.
														
 
															+ * scheduler tick (TICK_NSEC).
														
 
															+ *
														
 
															+ * This function computes a decaying average:
														
 
															+ *
														
 
															+ *   load[i]' = (1 - 1/2^i) * load[i] + (1/2^i) * load
														
 
															+ *
														
 
															+ * Because of NOHZ it might not get called on every tick which gives need for
														
 
															+ * the @pending_updates argument.
														
 
															+ *
														
 
															+ *   load[i]_n = (1 - 1/2^i) * load[i]_n-1 + (1/2^i) * load_n-1
														
 
															+ *             = A * load[i]_n-1 + B ; A := (1 - 1/2^i), B := (1/2^i) * load
														
 
															+ *             = A * (A * load[i]_n-2 + B) + B
														
 
															+ *             = A * (A * (A * load[i]_n-3 + B) + B) + B
														
 
															+ *             = A^3 * load[i]_n-3 + (A^2 + A + 1) * B
														
 
															+ *             = A^n * load[i]_0 + (A^(n-1) + A^(n-2) + ... + 1) * B
														
 
															+ *             = A^n * load[i]_0 + ((1 - A^n) / (1 - A)) * B
														
 
															+ *             = (1 - 1/2^i)^n * (load[i]_0 - load) + load
														
 
															+ *
														
 
															+ * In the above we've assumed load_n := load, which is true for NOHZ_FULL as
														
 
															+ * any change in load would have resulted in the tick being turned back on.
														
 
															+ *
														
 
															+ * For regular NOHZ, this reduces to:
														
 
															+ *
														
 
															+ *   load[i]_n = (1 - 1/2^i)^n * load[i]_0
														
 
															+ *
														
 
															+ * see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra
														
 
															+ * term. See the @active paramter.
														
 
															  */
														
 
															 static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
														
 
															-			      unsigned long pending_updates)
														
 
															+			      unsigned long pending_updates, int active)
														
 
															 {
														
 
															+	unsigned long tickless_load = active ? this_rq->cpu_load[0] : 0;
														
 
															 	int i, scale;
														
 
															 	this_rq->nr_load_updates++;
														
@@ -4325,8 +4443,9 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
 
															 		/* scale is effectively 1 << i now, and >> i divides by scale */
														
 
															-		old_load = this_rq->cpu_load[i];
														
 
															+		old_load = this_rq->cpu_load[i] - tickless_load;
														
 
															 		old_load = decay_load_missed(old_load, pending_updates - 1, i);
														
 
															+		old_load += tickless_load;
														
 
															 		new_load = this_load;
														
 
															 		/*
														
 
															 		 * Round up the averaging division if load is increasing. This
														
@@ -4381,16 +4500,17 @@ static void update_idle_cpu_load(struct rq *this_rq)
 
															 	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
														
 
															 	this_rq->last_load_update_tick = curr_jiffies;
														
 
															-	__update_cpu_load(this_rq, load, pending_updates);
														
 
															+	__update_cpu_load(this_rq, load, pending_updates, 0);
														
 
															 }
														
 
															 /*
														
 
															  * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
														
 
															  */
														
 
															-void update_cpu_load_nohz(void)
														
 
															+void update_cpu_load_nohz(int active)
														
 
															 {
														
 
															 	struct rq *this_rq = this_rq();
														
 
															 	unsigned long curr_jiffies = READ_ONCE(jiffies);
														
 
															+	unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
														
 
															 	unsigned long pending_updates;
														
 
															 	if (curr_jiffies == this_rq->last_load_update_tick)
														
@@ -4401,10 +4521,11 @@ void update_cpu_load_nohz(void)
 
															 	if (pending_updates) {
														
 
															 		this_rq->last_load_update_tick = curr_jiffies;
														
 
															 		/*
														
 
															-		 * We were idle, this means load 0, the current load might be
														
 
															-		 * !0 due to remote wakeups and the sort.
														
 
															+		 * In the regular NOHZ case, we were idle, this means load 0.
														
 
															+		 * In the NOHZ_FULL case, we were non-idle, we should consider
														
 
															+		 * its weighted load.
														
 
															 		 */
														
 
															-		__update_cpu_load(this_rq, 0, pending_updates);
														
 
															+		__update_cpu_load(this_rq, load, pending_updates, active);
														
 
															 	}
														
 
															 	raw_spin_unlock(&this_rq->lock);
														
 
															 }
														
@@ -4420,7 +4541,7 @@ void update_cpu_load_active(struct rq *this_rq)
 
															 	 * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
														
 
															 	 */
														
 
															 	this_rq->last_load_update_tick = jiffies;
														
 
															-	__update_cpu_load(this_rq, load, 1);
														
 
															+	__update_cpu_load(this_rq, load, 1, 1);
														
 
															 }
														
 
															 /*
														
@@ -5007,8 +5128,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 
															 /*
														
 
															  * Called immediately before a task is migrated to a new cpu; task_cpu(p) and
														
 
															  * cfs_rq_of(p) references at time of call are still valid and identify the
														
 
															- * previous cpu.  However, the caller only guarantees p->pi_lock is held; no
														
 
															- * other assumptions, including the state of rq->lock, should be made.
														
 
															+ * previous cpu. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
														
 
															  */
														
 
															 static void migrate_task_rq_fair(struct task_struct *p)
														
 
															 {
														
@@ -5721,8 +5841,8 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
 
															 {
														
 
															 	lockdep_assert_held(&env->src_rq->lock);
														
 
															-	deactivate_task(env->src_rq, p, 0);
														
 
															 	p->on_rq = TASK_ON_RQ_MIGRATING;
														
 
															+	deactivate_task(env->src_rq, p, 0);
														
 
															 	set_task_cpu(p, env->dst_cpu);
														
 
															 }
														
@@ -5855,8 +5975,8 @@ static void attach_task(struct rq *rq, struct task_struct *p)
 
															 	lockdep_assert_held(&rq->lock);
														
 
															 	BUG_ON(task_rq(p) != rq);
														
 
															-	p->on_rq = TASK_ON_RQ_QUEUED;
														
 
															 	activate_task(rq, p, 0);
														
 
															+	p->on_rq = TASK_ON_RQ_QUEUED;
														
 
															 	check_preempt_curr(rq, p, 0);
														
 
															 }
														
@@ -6302,7 +6422,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
															 			bool *overload)
														
 
															 {
														
 
															 	unsigned long load;
														
 
															-	int i;
														
 
															+	int i, nr_running;
														
 
															 	memset(sgs, 0, sizeof(*sgs));
														
@@ -6319,7 +6439,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
															 		sgs->group_util += cpu_util(i);
														
 
															 		sgs->sum_nr_running += rq->cfs.h_nr_running;
														
 
															-		if (rq->nr_running > 1)
														
 
															+		nr_running = rq->nr_running;
														
 
															+		if (nr_running > 1)
														
 
															 			*overload = true;
														
 
															 #ifdef CONFIG_NUMA_BALANCING
														
@@ -6327,7 +6448,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
															 		sgs->nr_preferred_running += rq->nr_preferred_running;
														
 
															 #endif
														
 
															 		sgs->sum_weighted_load += weighted_cpuload(i);
														
 
															-		if (idle_cpu(i))
														
 
															+		/*
														
 
															+		 * No need to call idle_cpu() if nr_running is not 0
														
 
															+		 */
														
 
															+		if (!nr_running && idle_cpu(i))
														
 
															 			sgs->idle_cpus++;
														
 
															 	}
														
@@ -7248,8 +7372,6 @@ static int idle_balance(struct rq *this_rq)
 
															 	int pulled_task = 0;
														
 
															 	u64 curr_cost = 0;
														
 
															-	idle_enter_fair(this_rq);
														
 
															-
														
 
															 	/*
														
 
															 	 * We must set idle_stamp _before_ calling idle_balance(), such that we
														
 
															 	 * measure the duration of idle_balance() as idle time.
														
@@ -7330,10 +7452,8 @@ out:
 
															 	if (this_rq->nr_running != this_rq->cfs.h_nr_running)
														
 
															 		pulled_task = -1;
														
 
															-	if (pulled_task) {
														
 
															-		idle_exit_fair(this_rq);
														
 
															+	if (pulled_task)
														
 
															 		this_rq->idle_stamp = 0;
														
 
															-	}
														
 
															 	return pulled_task;
														
 
															 }
														
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -47,7 +47,6 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
 
															 static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
														
 
															 {
														
 
															-	idle_exit_fair(rq);
														
 
															 	rq_last_tick_reset(rq);
														
 
															 }
														
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -248,7 +248,12 @@ struct task_group {
 
															 	unsigned long shares;
														
 
															 #ifdef	CONFIG_SMP
														
 
															-	atomic_long_t load_avg;
														
 
															+	/*
														
 
															+	 * load_avg can be heavily contended at clock tick time, so put
														
 
															+	 * it in its own cacheline separated from the fields above which
														
 
															+	 * will also be accessed at each tick.
														
 
															+	 */
														
 
															+	atomic_long_t load_avg ____cacheline_aligned;
														
 
															 #endif
														
 
															 #endif
														
@@ -335,7 +340,15 @@ extern void sched_move_task(struct task_struct *tsk);
 
															 #ifdef CONFIG_FAIR_GROUP_SCHED
														
 
															 extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
														
 
															-#endif
														
 
															+
														
 
															+#ifdef CONFIG_SMP
														
 
															+extern void set_task_rq_fair(struct sched_entity *se,
														
 
															+			     struct cfs_rq *prev, struct cfs_rq *next);
														
 
															+#else /* !CONFIG_SMP */
														
 
															+static inline void set_task_rq_fair(struct sched_entity *se,
														
 
															+			     struct cfs_rq *prev, struct cfs_rq *next) { }
														
 
															+#endif /* CONFIG_SMP */
														
 
															+#endif /* CONFIG_FAIR_GROUP_SCHED */
														
 
															 #else /* CONFIG_CGROUP_SCHED */
														
@@ -933,6 +946,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
 
															 #endif
														
 
															 #ifdef CONFIG_FAIR_GROUP_SCHED
														
 
															+	set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
														
 
															 	p->se.cfs_rq = tg->cfs_rq[cpu];
														
 
															 	p->se.parent = tg->se[cpu];
														
 
															 #endif
														
@@ -1113,46 +1127,8 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 
															 #define WEIGHT_IDLEPRIO                3
														
 
															 #define WMULT_IDLEPRIO         1431655765
														
 
															-/*
														
 
															- * Nice levels are multiplicative, with a gentle 10% change for every
														
 
															- * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
														
 
															- * nice 1, it will get ~10% less CPU time than another CPU-bound task
														
 
															- * that remained on nice 0.
														
 
															- *
														
 
															- * The "10% effect" is relative and cumulative: from _any_ nice level,
														
 
															- * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
														
 
															- * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
														
 
															- * If a task goes up by ~10% and another task goes down by ~10% then
														
 
															- * the relative distance between them is ~25%.)
														
 
															- */
														
 
															-static const int prio_to_weight[40] = {
														
 
															- /* -20 */     88761,     71755,     56483,     46273,     36291,
														
 
															- /* -15 */     29154,     23254,     18705,     14949,     11916,
														
 
															- /* -10 */      9548,      7620,      6100,      4904,      3906,
														
 
															- /*  -5 */      3121,      2501,      1991,      1586,      1277,
														
 
															- /*   0 */      1024,       820,       655,       526,       423,
														
 
															- /*   5 */       335,       272,       215,       172,       137,
														
 
															- /*  10 */       110,        87,        70,        56,        45,
														
 
															- /*  15 */        36,        29,        23,        18,        15,
														
 
															-};
														
 
															-
														
 
															-/*
														
 
															- * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
														
 
															- *
														
 
															- * In cases where the weight does not change often, we can use the
														
 
															- * precalculated inverse to speed up arithmetics by turning divisions
														
 
															- * into multiplications:
														
 
															- */
														
 
															-static const u32 prio_to_wmult[40] = {
														
 
															- /* -20 */     48388,     59856,     76040,     92818,    118348,
														
 
															- /* -15 */    147320,    184698,    229616,    287308,    360437,
														
 
															- /* -10 */    449829,    563644,    704093,    875809,   1099582,
														
 
															- /*  -5 */   1376151,   1717300,   2157191,   2708050,   3363326,
														
 
															- /*   0 */   4194304,   5237765,   6557202,   8165337,  10153587,
														
 
															- /*   5 */  12820798,  15790321,  19976592,  24970740,  31350126,
														
 
															- /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717,
														
 
															- /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
														
 
															-};
														
 
															+extern const int sched_prio_to_weight[40];
														
 
															+extern const u32 sched_prio_to_wmult[40];
														
 
															 #define ENQUEUE_WAKEUP		0x01
														
 
															 #define ENQUEUE_HEAD		0x02
														
@@ -1252,16 +1228,8 @@ extern void update_group_capacity(struct sched_domain *sd, int cpu);
 
															 extern void trigger_load_balance(struct rq *rq);
														
 
															-extern void idle_enter_fair(struct rq *this_rq);
														
 
															-extern void idle_exit_fair(struct rq *this_rq);
														
 
															-
														
 
															 extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
														
 
															-#else
														
 
															-
														
 
															-static inline void idle_enter_fair(struct rq *rq) { }
														
 
															-static inline void idle_exit_fair(struct rq *rq) { }
														
 
															-
														
 
															 #endif
														
 
															 #ifdef CONFIG_CPU_IDLE
														
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -28,7 +28,6 @@
 
															  */
														
 
															 struct cpu_stop_done {
														
 
															 	atomic_t		nr_todo;	/* nr left to execute */
														
 
															-	bool			executed;	/* actually executed? */
														
 
															 	int			ret;		/* collected return value */
														
 
															 	struct completion	completion;	/* fired if nr_todo reaches 0 */
														
 
															 };
														
@@ -63,14 +62,10 @@ static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
 
															 }
														
 
															 /* signal completion unless @done is NULL */
														
 
															-static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
														
 
															+static void cpu_stop_signal_done(struct cpu_stop_done *done)
														
 
															 {
														
 
															-	if (done) {
														
 
															-		if (executed)
														
 
															-			done->executed = true;
														
 
															-		if (atomic_dec_and_test(&done->nr_todo))
														
 
															-			complete(&done->completion);
														
 
															-	}
														
 
															+	if (atomic_dec_and_test(&done->nr_todo))
														
 
															+		complete(&done->completion);
														
 
															 }
														
 
															 static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
														
@@ -81,17 +76,21 @@ static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
 
															 }
														
 
															 /* queue @work to @stopper.  if offline, @work is completed immediately */
														
 
															-static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
														
 
															+static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
														
 
															 {
														
 
															 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
														
 
															 	unsigned long flags;
														
 
															+	bool enabled;
														
 
															 	spin_lock_irqsave(&stopper->lock, flags);
														
 
															-	if (stopper->enabled)
														
 
															+	enabled = stopper->enabled;
														
 
															+	if (enabled)
														
 
															 		__cpu_stop_queue_work(stopper, work);
														
 
															-	else
														
 
															-		cpu_stop_signal_done(work->done, false);
														
 
															+	else if (work->done)
														
 
															+		cpu_stop_signal_done(work->done);
														
 
															 	spin_unlock_irqrestore(&stopper->lock, flags);
														
 
															+
														
 
															+	return enabled;
														
 
															 }
														
 
															 /**
														
@@ -124,9 +123,10 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
 
															 	struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
														
 
															 	cpu_stop_init_done(&done, 1);
														
 
															-	cpu_stop_queue_work(cpu, &work);
														
 
															+	if (!cpu_stop_queue_work(cpu, &work))
														
 
															+		return -ENOENT;
														
 
															 	wait_for_completion(&done.completion);
														
 
															-	return done.executed ? done.ret : -ENOENT;
														
 
															+	return done.ret;
														
 
															 }
														
 
															 /* This controls the threads on each CPU. */
														
@@ -258,7 +258,6 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
 
															 	struct cpu_stop_work work1, work2;
														
 
															 	struct multi_stop_data msdata;
														
 
															-	preempt_disable();
														
 
															 	msdata = (struct multi_stop_data){
														
 
															 		.fn = fn,
														
 
															 		.data = arg,
														
@@ -277,16 +276,11 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
 
															 	if (cpu1 > cpu2)
														
 
															 		swap(cpu1, cpu2);
														
 
															-	if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) {
														
 
															-		preempt_enable();
														
 
															+	if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
														
 
															 		return -ENOENT;
														
 
															-	}
														
 
															-
														
 
															-	preempt_enable();
														
 
															 	wait_for_completion(&done.completion);
														
 
															-
														
 
															-	return done.executed ? done.ret : -ENOENT;
														
 
															+	return done.ret;
														
 
															 }
														
 
															 /**
														
@@ -302,23 +296,28 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
 
															  *
														
 
															  * CONTEXT:
														
 
															  * Don't care.
														
 
															+ *
														
 
															+ * RETURNS:
														
 
															+ * true if cpu_stop_work was queued successfully and @fn will be called,
														
 
															+ * false otherwise.
														
 
															  */
														
 
															-void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
														
 
															+bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
														
 
															 			struct cpu_stop_work *work_buf)
														
 
															 {
														
 
															 	*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
														
 
															-	cpu_stop_queue_work(cpu, work_buf);
														
 
															+	return cpu_stop_queue_work(cpu, work_buf);
														
 
															 }
														
 
															 /* static data for stop_cpus */
														
 
															 static DEFINE_MUTEX(stop_cpus_mutex);
														
 
															-static void queue_stop_cpus_work(const struct cpumask *cpumask,
														
 
															+static bool queue_stop_cpus_work(const struct cpumask *cpumask,
														
 
															 				 cpu_stop_fn_t fn, void *arg,
														
 
															 				 struct cpu_stop_done *done)
														
 
															 {
														
 
															 	struct cpu_stop_work *work;
														
 
															 	unsigned int cpu;
														
 
															+	bool queued = false;
														
 
															 	/*
														
 
															 	 * Disable preemption while queueing to avoid getting
														
@@ -331,9 +330,12 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 
															 		work->fn = fn;
														
 
															 		work->arg = arg;
														
 
															 		work->done = done;
														
 
															-		cpu_stop_queue_work(cpu, work);
														
 
															+		if (cpu_stop_queue_work(cpu, work))
														
 
															+			queued = true;
														
 
															 	}
														
 
															 	lg_global_unlock(&stop_cpus_lock);
														
 
															+
														
 
															+	return queued;
														
 
															 }
														
 
															 static int __stop_cpus(const struct cpumask *cpumask,
														
@@ -342,9 +344,10 @@ static int __stop_cpus(const struct cpumask *cpumask,
 
															 	struct cpu_stop_done done;
														
 
															 	cpu_stop_init_done(&done, cpumask_weight(cpumask));
														
 
															-	queue_stop_cpus_work(cpumask, fn, arg, &done);
														
 
															+	if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
														
 
															+		return -ENOENT;
														
 
															 	wait_for_completion(&done.completion);
														
 
															-	return done.executed ? done.ret : -ENOENT;
														
 
															+	return done.ret;
														
 
															 }
														
 
															 /**
														
@@ -432,7 +435,6 @@ static void cpu_stopper_thread(unsigned int cpu)
 
															 {
														
 
															 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
														
 
															 	struct cpu_stop_work *work;
														
 
															-	int ret;
														
 
															 repeat:
														
 
															 	work = NULL;
														
@@ -448,23 +450,19 @@ repeat:
 
															 		cpu_stop_fn_t fn = work->fn;
														
 
															 		void *arg = work->arg;
														
 
															 		struct cpu_stop_done *done = work->done;
														
 
															-		char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
														
 
															-
														
 
															-		/* cpu stop callbacks are not allowed to sleep */
														
 
															-		preempt_disable();
														
 
															+		int ret;
														
 
															+		/* cpu stop callbacks must not sleep, make in_atomic() == T */
														
 
															+		preempt_count_inc();
														
 
															 		ret = fn(arg);
														
 
															-		if (ret)
														
 
															-			done->ret = ret;
														
 
															-
														
 
															-		/* restore preemption and check it's still balanced */
														
 
															-		preempt_enable();
														
 
															+		if (done) {
														
 
															+			if (ret)
														
 
															+				done->ret = ret;
														
 
															+			cpu_stop_signal_done(done);
														
 
															+		}
														
 
															+		preempt_count_dec();
														
 
															 		WARN_ONCE(preempt_count(),
														
 
															-			  "cpu_stop: %s(%p) leaked preempt count\n",
														
 
															-			  kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
														
 
															-					  ksym_buf), arg);
														
 
															-
														
 
															-		cpu_stop_signal_done(done, true);
														
 
															+			  "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg);
														
 
															 		goto repeat;
														
 
															 	}
														
 
															 }
														
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -694,11 +694,11 @@ out:
 
															 	return tick;
														
 
															 }
														
 
															-static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
														
 
															+static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int active)
														
 
															 {
														
 
															 	/* Update jiffies first */
														
 
															 	tick_do_update_jiffies64(now);
														
 
															-	update_cpu_load_nohz();
														
 
															+	update_cpu_load_nohz(active);
														
 
															 	calc_load_exit_idle();
														
 
															 	touch_softlockup_watchdog();
														
@@ -725,7 +725,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
 
															 	if (can_stop_full_tick())
														
 
															 		tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
														
 
															 	else if (ts->tick_stopped)
														
 
															-		tick_nohz_restart_sched_tick(ts, ktime_get());
														
 
															+		tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
														
 
															 #endif
														
 
															 }
														
@@ -875,7 +875,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 
															 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
														
 
															 	unsigned long ticks;
														
 
															-	if (vtime_accounting_enabled())
														
 
															+	if (vtime_accounting_cpu_enabled())
														
 
															 		return;
														
 
															 	/*
														
 
															 	 * We stopped the tick in idle. Update process times would miss the
														
@@ -916,7 +916,7 @@ void tick_nohz_idle_exit(void)
 
															 		tick_nohz_stop_idle(ts, now);
														
 
															 	if (ts->tick_stopped) {
														
 
															-		tick_nohz_restart_sched_tick(ts, now);
														
 
															+		tick_nohz_restart_sched_tick(ts, now, 0);
														
 
															 		tick_nohz_account_idle_ticks(ts);
														
 
															 	}