7 years ago · 46e0d28bdb
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1766,6 +1766,17 @@
 
				 
			
 
				 			nohz
			
 
				 			  Disable the tick when a single task runs.
			
 
				+
			
 
				+			  A residual 1Hz tick is offloaded to workqueues, which you
			
 
				+			  need to affine to housekeeping through the global
			
 
				+			  workqueue's affinity configured via the
			
 
				+			  /sys/devices/virtual/workqueue/cpumask sysfs file, or
			
 
				+			  by using the 'domain' flag described below.
			
 
				+
			
 
				+			  NOTE: by default the global workqueue runs on all CPUs,
			
 
				+			  so to protect individual CPUs the 'cpumask' file has to
			
 
				+			  be configured manually after bootup.
			
 
				+
			
 
				 			domain
			
 
				 			  Isolate from the general SMP balancing and scheduling
			
 
				 			  algorithms. Note that performing domain isolation this way
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -93,7 +93,6 @@ struct task_group;
 
				 
			
 
				 /* Convenience macros for the sake of wake_up(): */
			
 
				 #define TASK_NORMAL			(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
			
 
				-#define TASK_ALL			(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
			
 
				 
			
 
				 /* get_task_state(): */
			
 
				 #define TASK_REPORT			(TASK_RUNNING | TASK_INTERRUPTIBLE | \
			
@@ -275,6 +274,34 @@ struct load_weight {
 
				 	u32				inv_weight;
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+ * struct util_est - Estimation utilization of FAIR tasks
			
 
				+ * @enqueued: instantaneous estimated utilization of a task/cpu
			
 
				+ * @ewma:     the Exponential Weighted Moving Average (EWMA)
			
 
				+ *            utilization of a task
			
 
				+ *
			
 
				+ * Support data structure to track an Exponential Weighted Moving Average
			
 
				+ * (EWMA) of a FAIR task's utilization. New samples are added to the moving
			
 
				+ * average each time a task completes an activation. Sample's weight is chosen
			
 
				+ * so that the EWMA will be relatively insensitive to transient changes to the
			
 
				+ * task's workload.
			
 
				+ *
			
 
				+ * The enqueued attribute has a slightly different meaning for tasks and cpus:
			
 
				+ * - task:   the task's util_avg at last task dequeue time
			
 
				+ * - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU
			
 
				+ * Thus, the util_est.enqueued of a task represents the contribution on the
			
 
				+ * estimated utilization of the CPU where that task is currently enqueued.
			
 
				+ *
			
 
				+ * Only for tasks we track a moving average of the past instantaneous
			
 
				+ * estimated utilization. This allows to absorb sporadic drops in utilization
			
 
				+ * of an otherwise almost periodic task.
			
 
				+ */
			
 
				+struct util_est {
			
 
				+	unsigned int			enqueued;
			
 
				+	unsigned int			ewma;
			
 
				+#define UTIL_EST_WEIGHT_SHIFT		2
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * The load_avg/util_avg accumulates an infinite geometric series
			
 
				  * (see __update_load_avg() in kernel/sched/fair.c).
			
@@ -336,6 +363,7 @@ struct sched_avg {
 
				 	unsigned long			load_avg;
			
 
				 	unsigned long			runnable_load_avg;
			
 
				 	unsigned long			util_avg;
			
 
				+	struct util_est			util_est;
			
 
				 };
			
 
				 
			
 
				 struct sched_statistics {
			
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -8,9 +8,8 @@
 
				  * Interface between cpufreq drivers and the scheduler:
			
 
				  */
			
 
				 
			
 
				-#define SCHED_CPUFREQ_RT	(1U << 0)
			
 
				-#define SCHED_CPUFREQ_DL	(1U << 1)
			
 
				-#define SCHED_CPUFREQ_IOWAIT	(1U << 2)
			
 
				+#define SCHED_CPUFREQ_IOWAIT	(1U << 0)
			
 
				+#define SCHED_CPUFREQ_MIGRATION	(1U << 1)
			
 
				 
			
 
				 #ifdef CONFIG_CPU_FREQ
			
 
				 struct update_util_data {
			
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -1,8 +1,4 @@
 
				 /* SPDX-License-Identifier: GPL-2.0 */
			
 
				-#ifndef _LINUX_SCHED_DEADLINE_H
			
 
				-#define _LINUX_SCHED_DEADLINE_H
			
 
				-
			
 
				-#include <linux/sched.h>
			
 
				 
			
 
				 /*
			
 
				  * SCHED_DEADLINE tasks has negative priorities, reflecting
			
@@ -28,5 +24,3 @@ static inline bool dl_time_before(u64 a, u64 b)
 
				 {
			
 
				 	return (s64)(a - b) < 0;
			
 
				 }
			
 
				-
			
 
				-#endif /* _LINUX_SCHED_DEADLINE_H */
			
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -12,6 +12,7 @@ enum hk_flags {
 
				 	HK_FLAG_SCHED		= (1 << 3),
			
 
				 	HK_FLAG_TICK		= (1 << 4),
			
 
				 	HK_FLAG_DOMAIN		= (1 << 5),
			
 
				+	HK_FLAG_WQ		= (1 << 6),
			
 
				 };
			
 
				 
			
 
				 #ifdef CONFIG_CPU_ISOLATION
			
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -16,11 +16,9 @@ static inline void cpu_load_update_nohz_stop(void) { }
 
				 
			
 
				 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
			
 
				 extern void nohz_balance_enter_idle(int cpu);
			
 
				-extern void set_cpu_sd_state_idle(void);
			
 
				 extern int get_nohz_timer_target(void);
			
 
				 #else
			
 
				 static inline void nohz_balance_enter_idle(int cpu) { }
			
 
				-static inline void set_cpu_sd_state_idle(void) { }
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_NO_HZ_COMMON
			
@@ -37,8 +35,4 @@ extern void wake_up_nohz_cpu(int cpu);
 
				 static inline void wake_up_nohz_cpu(int cpu) { }
			
 
				 #endif
			
 
				 
			
 
				-#ifdef CONFIG_NO_HZ_FULL
			
 
				-extern u64 scheduler_tick_max_deferment(void);
			
 
				-#endif
			
 
				-
			
 
				 #endif /* _LINUX_SCHED_NOHZ_H */
			
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -113,7 +113,8 @@ enum tick_dep_bits {
 
				 
			
 
				 #ifdef CONFIG_NO_HZ_COMMON
			
 
				 extern bool tick_nohz_enabled;
			
 
				-extern int tick_nohz_tick_stopped(void);
			
 
				+extern bool tick_nohz_tick_stopped(void);
			
 
				+extern bool tick_nohz_tick_stopped_cpu(int cpu);
			
 
				 extern void tick_nohz_idle_enter(void);
			
 
				 extern void tick_nohz_idle_exit(void);
			
 
				 extern void tick_nohz_irq_exit(void);
			
@@ -125,6 +126,7 @@ extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 
				 #else /* !CONFIG_NO_HZ_COMMON */
			
 
				 #define tick_nohz_enabled (0)
			
 
				 static inline int tick_nohz_tick_stopped(void) { return 0; }
			
 
				+static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; }
			
 
				 static inline void tick_nohz_idle_enter(void) { }
			
 
				 static inline void tick_nohz_idle_exit(void) { }
			
 
				 
			
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -17,8 +17,9 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
 
				 endif
			
 
				 
			
 
				 obj-y += core.o loadavg.o clock.o cputime.o
			
 
				-obj-y += idle_task.o fair.o rt.o deadline.o
			
 
				-obj-y += wait.o wait_bit.o swait.o completion.o idle.o
			
 
				+obj-y += idle.o fair.o rt.o deadline.o
			
 
				+obj-y += wait.o wait_bit.o swait.o completion.o
			
 
				+
			
 
				 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
			
 
				 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
			
 
				 obj-$(CONFIG_SCHEDSTATS) += stats.o
			
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -1,10 +1,7 @@
 
				 // SPDX-License-Identifier: GPL-2.0
			
 
				-#include <linux/proc_fs.h>
			
 
				-#include <linux/seq_file.h>
			
 
				-#include <linux/utsname.h>
			
 
				-#include <linux/security.h>
			
 
				-#include <linux/export.h>
			
 
				-
			
 
				+/*
			
 
				+ * Auto-group scheduling implementation:
			
 
				+ */
			
 
				 #include "sched.h"
			
 
				 
			
 
				 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
			
@@ -168,18 +165,19 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
 
				 	autogroup_kref_put(prev);
			
 
				 }
			
 
				 
			
 
				-/* Allocates GFP_KERNEL, cannot be called under any spinlock */
			
 
				+/* Allocates GFP_KERNEL, cannot be called under any spinlock: */
			
 
				 void sched_autogroup_create_attach(struct task_struct *p)
			
 
				 {
			
 
				 	struct autogroup *ag = autogroup_create();
			
 
				 
			
 
				 	autogroup_move_group(p, ag);
			
 
				-	/* drop extra reference added by autogroup_create() */
			
 
				+
			
 
				+	/* Drop extra reference added by autogroup_create(): */
			
 
				 	autogroup_kref_put(ag);
			
 
				 }
			
 
				 EXPORT_SYMBOL(sched_autogroup_create_attach);
			
 
				 
			
 
				-/* Cannot be called under siglock.  Currently has no users */
			
 
				+/* Cannot be called under siglock. Currently has no users: */
			
 
				 void sched_autogroup_detach(struct task_struct *p)
			
 
				 {
			
 
				 	autogroup_move_group(p, &autogroup_default);
			
@@ -202,7 +200,6 @@ static int __init setup_autogroup(char *str)
 
				 
			
 
				 	return 1;
			
 
				 }
			
 
				-
			
 
				 __setup("noautogroup", setup_autogroup);
			
 
				 
			
 
				 #ifdef CONFIG_PROC_FS
			
@@ -224,7 +221,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
 
				 	if (nice < 0 && !can_nice(current, nice))
			
 
				 		return -EPERM;
			
 
				 
			
 
				-	/* this is a heavy operation taking global locks.. */
			
 
				+	/* This is a heavy operation, taking global locks.. */
			
 
				 	if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
			
 
				 		return -EAGAIN;
			
 
				 
			
@@ -267,4 +264,4 @@ int autogroup_path(struct task_group *tg, char *buf, int buflen)
 
				 
			
 
				 	return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
			
 
				 }
			
 
				-#endif /* CONFIG_SCHED_DEBUG */
			
 
				+#endif
			
--- a/kernel/sched/autogroup.h
+++ b/kernel/sched/autogroup.h
@@ -1,15 +1,11 @@
 
				 /* SPDX-License-Identifier: GPL-2.0 */
			
 
				 #ifdef CONFIG_SCHED_AUTOGROUP
			
 
				 
			
 
				-#include <linux/kref.h>
			
 
				-#include <linux/rwsem.h>
			
 
				-#include <linux/sched/autogroup.h>
			
 
				-
			
 
				 struct autogroup {
			
 
				 	/*
			
 
				-	 * reference doesn't mean how many thread attach to this
			
 
				-	 * autogroup now. It just stands for the number of task
			
 
				-	 * could use this autogroup.
			
 
				+	 * Reference doesn't mean how many threads attach to this
			
 
				+	 * autogroup now. It just stands for the number of tasks
			
 
				+	 * which could use this autogroup.
			
 
				 	 */
			
 
				 	struct kref		kref;
			
 
				 	struct task_group	*tg;
			
@@ -56,11 +52,9 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg)
 
				 	return tg;
			
 
				 }
			
 
				 
			
 
				-#ifdef CONFIG_SCHED_DEBUG
			
 
				 static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
			
 
				 {
			
 
				 	return 0;
			
 
				 }
			
 
				-#endif
			
 
				 
			
 
				 #endif /* CONFIG_SCHED_AUTOGROUP */
			
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * sched_clock for unstable cpu clocks
			
 
				+ * sched_clock() for unstable CPU clocks
			
 
				  *
			
 
				  *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra
			
 
				  *
			
@@ -11,7 +11,7 @@
 
				  *   Guillaume Chazarain <guichaz@gmail.com>
			
 
				  *
			
 
				  *
			
 
				- * What:
			
 
				+ * What this file implements:
			
 
				  *
			
 
				  * cpu_clock(i) provides a fast (execution time) high resolution
			
 
				  * clock with bounded drift between CPUs. The value of cpu_clock(i)
			
@@ -26,11 +26,11 @@
 
				  * at 0 on boot (but people really shouldn't rely on that).
			
 
				  *
			
 
				  * cpu_clock(i)       -- can be used from any context, including NMI.
			
 
				- * local_clock()      -- is cpu_clock() on the current cpu.
			
 
				+ * local_clock()      -- is cpu_clock() on the current CPU.
			
 
				  *
			
 
				  * sched_clock_cpu(i)
			
 
				  *
			
 
				- * How:
			
 
				+ * How it is implemented:
			
 
				  *
			
 
				  * The implementation either uses sched_clock() when
			
 
				  * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
			
@@ -52,19 +52,7 @@
 
				  * that is otherwise invisible (TSC gets stopped).
			
 
				  *
			
 
				  */
			
 
				-#include <linux/spinlock.h>
			
 
				-#include <linux/hardirq.h>
			
 
				-#include <linux/export.h>
			
 
				-#include <linux/percpu.h>
			
 
				-#include <linux/ktime.h>
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/nmi.h>
			
 
				-#include <linux/sched/clock.h>
			
 
				-#include <linux/static_key.h>
			
 
				-#include <linux/workqueue.h>
			
 
				-#include <linux/compiler.h>
			
 
				-#include <linux/tick.h>
			
 
				-#include <linux/init.h>
			
 
				+#include "sched.h"
			
 
				 
			
 
				 /*
			
 
				  * Scheduler clock - returns current time in nanosec units.
			
@@ -302,21 +290,21 @@ again:
 
				 	 * cmpxchg64 below only protects one readout.
			
 
				 	 *
			
 
				 	 * We must reread via sched_clock_local() in the retry case on
			
 
				-	 * 32bit as an NMI could use sched_clock_local() via the
			
 
				+	 * 32-bit kernels as an NMI could use sched_clock_local() via the
			
 
				 	 * tracer and hit between the readout of
			
 
				-	 * the low32bit and the high 32bit portion.
			
 
				+	 * the low 32-bit and the high 32-bit portion.
			
 
				 	 */
			
 
				 	this_clock = sched_clock_local(my_scd);
			
 
				 	/*
			
 
				-	 * We must enforce atomic readout on 32bit, otherwise the
			
 
				-	 * update on the remote cpu can hit inbetween the readout of
			
 
				-	 * the low32bit and the high 32bit portion.
			
 
				+	 * We must enforce atomic readout on 32-bit, otherwise the
			
 
				+	 * update on the remote CPU can hit inbetween the readout of
			
 
				+	 * the low 32-bit and the high 32-bit portion.
			
 
				 	 */
			
 
				 	remote_clock = cmpxchg64(&scd->clock, 0, 0);
			
 
				 #else
			
 
				 	/*
			
 
				-	 * On 64bit the read of [my]scd->clock is atomic versus the
			
 
				-	 * update, so we can avoid the above 32bit dance.
			
 
				+	 * On 64-bit kernels the read of [my]scd->clock is atomic versus the
			
 
				+	 * update, so we can avoid the above 32-bit dance.
			
 
				 	 */
			
 
				 	sched_clock_local(my_scd);
			
 
				 again:
			
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -11,10 +11,7 @@
 
				  * typically be used for exclusion which gives rise to priority inversion.
			
 
				  * Waiting for completion is a typically sync point, but not an exclusion point.
			
 
				  */
			
 
				-
			
 
				-#include <linux/sched/signal.h>
			
 
				-#include <linux/sched/debug.h>
			
 
				-#include <linux/completion.h>
			
 
				+#include "sched.h"
			
 
				 
			
 
				 /**
			
 
				  * complete: - signals a single thread waiting on this completion
			
@@ -283,7 +280,7 @@ EXPORT_SYMBOL(wait_for_completion_killable_timeout);
 
				 bool try_wait_for_completion(struct completion *x)
			
 
				 {
			
 
				 	unsigned long flags;
			
 
				-	int ret = 1;
			
 
				+	bool ret = true;
			
 
				 
			
 
				 	/*
			
 
				 	 * Since x->done will need to be locked only
			
@@ -292,11 +289,11 @@ bool try_wait_for_completion(struct completion *x)
 
				 	 * return early in the blocking case.
			
 
				 	 */
			
 
				 	if (!READ_ONCE(x->done))
			
 
				-		return 0;
			
 
				+		return false;
			
 
				 
			
 
				 	spin_lock_irqsave(&x->wait.lock, flags);
			
 
				 	if (!x->done)
			
 
				-		ret = 0;
			
 
				+		ret = false;
			
 
				 	else if (x->done != UINT_MAX)
			
 
				 		x->done--;
			
 
				 	spin_unlock_irqrestore(&x->wait.lock, flags);
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5,37 +5,11 @@
 
				  *
			
 
				  *  Copyright (C) 1991-2002  Linus Torvalds
			
 
				  */
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/sched/clock.h>
			
 
				-#include <uapi/linux/sched/types.h>
			
 
				-#include <linux/sched/loadavg.h>
			
 
				-#include <linux/sched/hotplug.h>
			
 
				-#include <linux/wait_bit.h>
			
 
				-#include <linux/cpuset.h>
			
 
				-#include <linux/delayacct.h>
			
 
				-#include <linux/init_task.h>
			
 
				-#include <linux/context_tracking.h>
			
 
				-#include <linux/rcupdate_wait.h>
			
 
				-#include <linux/compat.h>
			
 
				-
			
 
				-#include <linux/blkdev.h>
			
 
				-#include <linux/kprobes.h>
			
 
				-#include <linux/mmu_context.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/nmi.h>
			
 
				-#include <linux/prefetch.h>
			
 
				-#include <linux/profile.h>
			
 
				-#include <linux/security.h>
			
 
				-#include <linux/syscalls.h>
			
 
				-#include <linux/sched/isolation.h>
			
 
				+#include "sched.h"
			
 
				 
			
 
				 #include <asm/switch_to.h>
			
 
				 #include <asm/tlb.h>
			
 
				-#ifdef CONFIG_PARAVIRT
			
 
				-#include <asm/paravirt.h>
			
 
				-#endif
			
 
				 
			
 
				-#include "sched.h"
			
 
				 #include "../workqueue_internal.h"
			
 
				 #include "../smpboot.h"
			
 
				 
			
@@ -135,7 +109,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
 
				 		 *					[L] ->on_rq
			
 
				 		 *	RELEASE (rq->lock)
			
 
				 		 *
			
 
				-		 * If we observe the old cpu in task_rq_lock, the acquire of
			
 
				+		 * If we observe the old CPU in task_rq_lock, the acquire of
			
 
				 		 * the old rq->lock will fully serialize against the stores.
			
 
				 		 *
			
 
				 		 * If we observe the new CPU in task_rq_lock, the acquire will
			
@@ -333,7 +307,7 @@ void hrtick_start(struct rq *rq, u64 delay)
 
				 }
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				-static void init_rq_hrtick(struct rq *rq)
			
 
				+static void hrtick_rq_init(struct rq *rq)
			
 
				 {
			
 
				 #ifdef CONFIG_SMP
			
 
				 	rq->hrtick_csd_pending = 0;
			
@@ -351,7 +325,7 @@ static inline void hrtick_clear(struct rq *rq)
 
				 {
			
 
				 }
			
 
				 
			
 
				-static inline void init_rq_hrtick(struct rq *rq)
			
 
				+static inline void hrtick_rq_init(struct rq *rq)
			
 
				 {
			
 
				 }
			
 
				 #endif	/* CONFIG_SCHED_HRTICK */
			
@@ -609,7 +583,7 @@ static inline bool got_nohz_idle_kick(void)
 
				 {
			
 
				 	int cpu = smp_processor_id();
			
 
				 
			
 
				-	if (!test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)))
			
 
				+	if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK))
			
 
				 		return false;
			
 
				 
			
 
				 	if (idle_cpu(cpu) && !need_resched())
			
@@ -619,7 +593,7 @@ static inline bool got_nohz_idle_kick(void)
 
				 	 * We can't run Idle Load Balance on this CPU for this time so we
			
 
				 	 * cancel it and clear NOHZ_BALANCE_KICK
			
 
				 	 */
			
 
				-	clear_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
			
 
				+	atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
			
 
				 	return false;
			
 
				 }
			
 
				 
			
@@ -1457,7 +1431,7 @@ EXPORT_SYMBOL_GPL(kick_process);
 
				  *
			
 
				  *  - cpu_active must be a subset of cpu_online
			
 
				  *
			
 
				- *  - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
			
 
				+ *  - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
			
 
				  *    see __set_cpus_allowed_ptr(). At this point the newly online
			
 
				  *    CPU isn't yet part of the sched domains, and balancing will not
			
 
				  *    see it.
			
@@ -2488,17 +2462,17 @@ void wake_up_new_task(struct task_struct *p)
 
				 
			
 
				 #ifdef CONFIG_PREEMPT_NOTIFIERS
			
 
				 
			
 
				-static struct static_key preempt_notifier_key = STATIC_KEY_INIT_FALSE;
			
 
				+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
			
 
				 
			
 
				 void preempt_notifier_inc(void)
			
 
				 {
			
 
				-	static_key_slow_inc(&preempt_notifier_key);
			
 
				+	static_branch_inc(&preempt_notifier_key);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(preempt_notifier_inc);
			
 
				 
			
 
				 void preempt_notifier_dec(void)
			
 
				 {
			
 
				-	static_key_slow_dec(&preempt_notifier_key);
			
 
				+	static_branch_dec(&preempt_notifier_key);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(preempt_notifier_dec);
			
 
				 
			
@@ -2508,7 +2482,7 @@ EXPORT_SYMBOL_GPL(preempt_notifier_dec);
 
				  */
			
 
				 void preempt_notifier_register(struct preempt_notifier *notifier)
			
 
				 {
			
 
				-	if (!static_key_false(&preempt_notifier_key))
			
 
				+	if (!static_branch_unlikely(&preempt_notifier_key))
			
 
				 		WARN(1, "registering preempt_notifier while notifiers disabled\n");
			
 
				 
			
 
				 	hlist_add_head(&notifier->link, &current->preempt_notifiers);
			
@@ -2537,7 +2511,7 @@ static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
 
				 
			
 
				 static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
			
 
				 {
			
 
				-	if (static_key_false(&preempt_notifier_key))
			
 
				+	if (static_branch_unlikely(&preempt_notifier_key))
			
 
				 		__fire_sched_in_preempt_notifiers(curr);
			
 
				 }
			
 
				 
			
@@ -2555,7 +2529,7 @@ static __always_inline void
 
				 fire_sched_out_preempt_notifiers(struct task_struct *curr,
			
 
				 				 struct task_struct *next)
			
 
				 {
			
 
				-	if (static_key_false(&preempt_notifier_key))
			
 
				+	if (static_branch_unlikely(&preempt_notifier_key))
			
 
				 		__fire_sched_out_preempt_notifiers(curr, next);
			
 
				 }
			
 
				 
			
@@ -2629,6 +2603,18 @@ static inline void finish_lock_switch(struct rq *rq)
 
				 	raw_spin_unlock_irq(&rq->lock);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * NOP if the arch has not defined these:
			
 
				+ */
			
 
				+
			
 
				+#ifndef prepare_arch_switch
			
 
				+# define prepare_arch_switch(next)	do { } while (0)
			
 
				+#endif
			
 
				+
			
 
				+#ifndef finish_arch_post_lock_switch
			
 
				+# define finish_arch_post_lock_switch()	do { } while (0)
			
 
				+#endif
			
 
				+
			
 
				 /**
			
 
				  * prepare_task_switch - prepare to switch tasks
			
 
				  * @rq: the runqueue preparing to switch
			
@@ -3037,7 +3023,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
 
				 
			
 
				 #if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
			
 
				 	/*
			
 
				-	 * 64-bit doesn't need locks to atomically read a 64bit value.
			
 
				+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
			
 
				 	 * So we have a optimization chance when the task's delta_exec is 0.
			
 
				 	 * Reading ->on_cpu is racy, but this is ok.
			
 
				 	 *
			
@@ -3096,35 +3082,99 @@ void scheduler_tick(void)
 
				 	rq->idle_balance = idle_cpu(cpu);
			
 
				 	trigger_load_balance(rq);
			
 
				 #endif
			
 
				-	rq_last_tick_reset(rq);
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_NO_HZ_FULL
			
 
				-/**
			
 
				- * scheduler_tick_max_deferment
			
 
				- *
			
 
				- * Keep at least one tick per second when a single
			
 
				- * active task is running because the scheduler doesn't
			
 
				- * yet completely support full dynticks environment.
			
 
				- *
			
 
				- * This makes sure that uptime, CFS vruntime, load
			
 
				- * balancing, etc... continue to move forward, even
			
 
				- * with a very low granularity.
			
 
				- *
			
 
				- * Return: Maximum deferment in nanoseconds.
			
 
				- */
			
 
				-u64 scheduler_tick_max_deferment(void)
			
 
				+
			
 
				+struct tick_work {
			
 
				+	int			cpu;
			
 
				+	struct delayed_work	work;
			
 
				+};
			
 
				+
			
 
				+static struct tick_work __percpu *tick_work_cpu;
			
 
				+
			
 
				+static void sched_tick_remote(struct work_struct *work)
			
 
				 {
			
 
				-	struct rq *rq = this_rq();
			
 
				-	unsigned long next, now = READ_ONCE(jiffies);
			
 
				+	struct delayed_work *dwork = to_delayed_work(work);
			
 
				+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
			
 
				+	int cpu = twork->cpu;
			
 
				+	struct rq *rq = cpu_rq(cpu);
			
 
				+	struct rq_flags rf;
			
 
				 
			
 
				-	next = rq->last_sched_tick + HZ;
			
 
				+	/*
			
 
				+	 * Handle the tick only if it appears the remote CPU is running in full
			
 
				+	 * dynticks mode. The check is racy by nature, but missing a tick or
			
 
				+	 * having one too much is no big deal because the scheduler tick updates
			
 
				+	 * statistics and checks timeslices in a time-independent way, regardless
			
 
				+	 * of when exactly it is running.
			
 
				+	 */
			
 
				+	if (!idle_cpu(cpu) && tick_nohz_tick_stopped_cpu(cpu)) {
			
 
				+		struct task_struct *curr;
			
 
				+		u64 delta;
			
 
				 
			
 
				-	if (time_before_eq(next, now))
			
 
				-		return 0;
			
 
				+		rq_lock_irq(rq, &rf);
			
 
				+		update_rq_clock(rq);
			
 
				+		curr = rq->curr;
			
 
				+		delta = rq_clock_task(rq) - curr->se.exec_start;
			
 
				 
			
 
				-	return jiffies_to_nsecs(next - now);
			
 
				+		/*
			
 
				+		 * Make sure the next tick runs within a reasonable
			
 
				+		 * amount of time.
			
 
				+		 */
			
 
				+		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
			
 
				+		curr->sched_class->task_tick(rq, curr, 0);
			
 
				+		rq_unlock_irq(rq, &rf);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Run the remote tick once per second (1Hz). This arbitrary
			
 
				+	 * frequency is large enough to avoid overload but short enough
			
 
				+	 * to keep scheduler internal stats reasonably up to date.
			
 
				+	 */
			
 
				+	queue_delayed_work(system_unbound_wq, dwork, HZ);
			
 
				 }
			
 
				+
			
 
				+static void sched_tick_start(int cpu)
			
 
				+{
			
 
				+	struct tick_work *twork;
			
 
				+
			
 
				+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
			
 
				+		return;
			
 
				+
			
 
				+	WARN_ON_ONCE(!tick_work_cpu);
			
 
				+
			
 
				+	twork = per_cpu_ptr(tick_work_cpu, cpu);
			
 
				+	twork->cpu = cpu;
			
 
				+	INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
			
 
				+	queue_delayed_work(system_unbound_wq, &twork->work, HZ);
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_HOTPLUG_CPU
			
 
				+static void sched_tick_stop(int cpu)
			
 
				+{
			
 
				+	struct tick_work *twork;
			
 
				+
			
 
				+	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
			
 
				+		return;
			
 
				+
			
 
				+	WARN_ON_ONCE(!tick_work_cpu);
			
 
				+
			
 
				+	twork = per_cpu_ptr(tick_work_cpu, cpu);
			
 
				+	cancel_delayed_work_sync(&twork->work);
			
 
				+}
			
 
				+#endif /* CONFIG_HOTPLUG_CPU */
			
 
				+
			
 
				+int __init sched_tick_offload_init(void)
			
 
				+{
			
 
				+	tick_work_cpu = alloc_percpu(struct tick_work);
			
 
				+	BUG_ON(!tick_work_cpu);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#else /* !CONFIG_NO_HZ_FULL */
			
 
				+static inline void sched_tick_start(int cpu) { }
			
 
				+static inline void sched_tick_stop(int cpu) { }
			
 
				 #endif
			
 
				 
			
 
				 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
			
@@ -5786,6 +5836,7 @@ int sched_cpu_starting(unsigned int cpu)
 
				 {
			
 
				 	set_cpu_rq_start_time(cpu);
			
 
				 	sched_rq_cpu_starting(cpu);
			
 
				+	sched_tick_start(cpu);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -5797,6 +5848,7 @@ int sched_cpu_dying(unsigned int cpu)
 
				 
			
 
				 	/* Handle pending wakeups and then migrate everything off */
			
 
				 	sched_ttwu_pending();
			
 
				+	sched_tick_stop(cpu);
			
 
				 
			
 
				 	rq_lock_irqsave(rq, &rf);
			
 
				 	if (rq->rd) {
			
@@ -5809,7 +5861,7 @@ int sched_cpu_dying(unsigned int cpu)
 
				 
			
 
				 	calc_load_migrate(rq);
			
 
				 	update_max_interval();
			
 
				-	nohz_balance_exit_idle(cpu);
			
 
				+	nohz_balance_exit_idle(rq);
			
 
				 	hrtick_clear(rq);
			
 
				 	return 0;
			
 
				 }
			
@@ -6022,13 +6074,11 @@ void __init sched_init(void)
 
				 		rq_attach_root(rq, &def_root_domain);
			
 
				 #ifdef CONFIG_NO_HZ_COMMON
			
 
				 		rq->last_load_update_tick = jiffies;
			
 
				-		rq->nohz_flags = 0;
			
 
				-#endif
			
 
				-#ifdef CONFIG_NO_HZ_FULL
			
 
				-		rq->last_sched_tick = 0;
			
 
				+		rq->last_blocked_load_update_tick = jiffies;
			
 
				+		atomic_set(&rq->nohz_flags, 0);
			
 
				 #endif
			
 
				 #endif /* CONFIG_SMP */
			
 
				-		init_rq_hrtick(rq);
			
 
				+		hrtick_rq_init(rq);
			
 
				 		atomic_set(&rq->nr_iowait, 0);
			
 
				 	}
			
 
				 
			
@@ -7027,3 +7077,5 @@ const u32 sched_prio_to_wmult[40] = {
 
				  /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717,
			
 
				  /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
			
 
				 };
			
 
				+
			
 
				+#undef CREATE_TRACE_POINTS
			
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -1,24 +1,13 @@
 
				 // SPDX-License-Identifier: GPL-2.0
			
 
				-#include <linux/cgroup.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/percpu.h>
			
 
				-#include <linux/spinlock.h>
			
 
				-#include <linux/cpumask.h>
			
 
				-#include <linux/seq_file.h>
			
 
				-#include <linux/rcupdate.h>
			
 
				-#include <linux/kernel_stat.h>
			
 
				-#include <linux/err.h>
			
 
				-
			
 
				-#include "sched.h"
			
 
				-
			
 
				 /*
			
 
				  * CPU accounting code for task groups.
			
 
				  *
			
 
				  * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
			
 
				  * (balbir@in.ibm.com).
			
 
				  */
			
 
				+#include "sched.h"
			
 
				 
			
 
				-/* Time spent by the tasks of the cpu accounting group executing in ... */
			
 
				+/* Time spent by the tasks of the CPU accounting group executing in ... */
			
 
				 enum cpuacct_stat_index {
			
 
				 	CPUACCT_STAT_USER,	/* ... user mode */
			
 
				 	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */
			
@@ -35,12 +24,12 @@ struct cpuacct_usage {
 
				 	u64	usages[CPUACCT_STAT_NSTATS];
			
 
				 };
			
 
				 
			
 
				-/* track cpu usage of a group of tasks and its child groups */
			
 
				+/* track CPU usage of a group of tasks and its child groups */
			
 
				 struct cpuacct {
			
 
				-	struct cgroup_subsys_state css;
			
 
				-	/* cpuusage holds pointer to a u64-type object on every cpu */
			
 
				-	struct cpuacct_usage __percpu *cpuusage;
			
 
				-	struct kernel_cpustat __percpu *cpustat;
			
 
				+	struct cgroup_subsys_state	css;
			
 
				+	/* cpuusage holds pointer to a u64-type object on every CPU */
			
 
				+	struct cpuacct_usage __percpu	*cpuusage;
			
 
				+	struct kernel_cpustat __percpu	*cpustat;
			
 
				 };
			
 
				 
			
 
				 static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
			
@@ -48,7 +37,7 @@ static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
 
				 	return css ? container_of(css, struct cpuacct, css) : NULL;
			
 
				 }
			
 
				 
			
 
				-/* return cpu accounting group to which this task belongs */
			
 
				+/* Return CPU accounting group to which this task belongs */
			
 
				 static inline struct cpuacct *task_ca(struct task_struct *tsk)
			
 
				 {
			
 
				 	return css_ca(task_css(tsk, cpuacct_cgrp_id));
			
@@ -65,7 +54,7 @@ static struct cpuacct root_cpuacct = {
 
				 	.cpuusage	= &root_cpuacct_cpuusage,
			
 
				 };
			
 
				 
			
 
				-/* create a new cpu accounting group */
			
 
				+/* Create a new CPU accounting group */
			
 
				 static struct cgroup_subsys_state *
			
 
				 cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
			
 
				 {
			
@@ -96,7 +85,7 @@ out:
 
				 	return ERR_PTR(-ENOMEM);
			
 
				 }
			
 
				 
			
 
				-/* destroy an existing cpu accounting group */
			
 
				+/* Destroy an existing CPU accounting group */
			
 
				 static void cpuacct_css_free(struct cgroup_subsys_state *css)
			
 
				 {
			
 
				 	struct cpuacct *ca = css_ca(css);
			
@@ -162,7 +151,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-/* return total cpu usage (in nanoseconds) of a group */
			
 
				+/* Return total CPU usage (in nanoseconds) of a group */
			
 
				 static u64 __cpuusage_read(struct cgroup_subsys_state *css,
			
 
				 			   enum cpuacct_stat_index index)
			
 
				 {
			
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -10,11 +10,7 @@
 
				  *  as published by the Free Software Foundation; version 2
			
 
				  *  of the License.
			
 
				  */
			
 
				-
			
 
				-#include <linux/gfp.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include "cpudeadline.h"
			
 
				+#include "sched.h"
			
 
				 
			
 
				 static inline int parent(int i)
			
 
				 {
			
@@ -42,8 +38,9 @@ static void cpudl_heapify_down(struct cpudl *cp, int idx)
 
				 		return;
			
 
				 
			
 
				 	/* adapted from lib/prio_heap.c */
			
 
				-	while(1) {
			
 
				+	while (1) {
			
 
				 		u64 largest_dl;
			
 
				+
			
 
				 		l = left_child(idx);
			
 
				 		r = right_child(idx);
			
 
				 		largest = idx;
			
@@ -131,6 +128,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 
				 		return 1;
			
 
				 	} else {
			
 
				 		int best_cpu = cpudl_maximum(cp);
			
 
				+
			
 
				 		WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
			
 
				 
			
 
				 		if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) &&
			
@@ -145,9 +143,9 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * cpudl_clear - remove a cpu from the cpudl max-heap
			
 
				+ * cpudl_clear - remove a CPU from the cpudl max-heap
			
 
				  * @cp: the cpudl max-heap context
			
 
				- * @cpu: the target cpu
			
 
				+ * @cpu: the target CPU
			
 
				  *
			
 
				  * Notes: assumes cpu_rq(cpu)->lock is locked
			
 
				  *
			
@@ -186,8 +184,8 @@ void cpudl_clear(struct cpudl *cp, int cpu)
 
				 /*
			
 
				  * cpudl_set - update the cpudl max-heap
			
 
				  * @cp: the cpudl max-heap context
			
 
				- * @cpu: the target cpu
			
 
				- * @dl: the new earliest deadline for this cpu
			
 
				+ * @cpu: the target CPU
			
 
				+ * @dl: the new earliest deadline for this CPU
			
 
				  *
			
 
				  * Notes: assumes cpu_rq(cpu)->lock is locked
			
 
				  *
			
@@ -205,6 +203,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl)
 
				 	old_idx = cp->elements[cpu].idx;
			
 
				 	if (old_idx == IDX_INVALID) {
			
 
				 		int new_idx = cp->size++;
			
 
				+
			
 
				 		cp->elements[new_idx].dl = dl;
			
 
				 		cp->elements[new_idx].cpu = cpu;
			
 
				 		cp->elements[cpu].idx = new_idx;
			
@@ -221,7 +220,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl)
 
				 /*
			
 
				  * cpudl_set_freecpu - Set the cpudl.free_cpus
			
 
				  * @cp: the cpudl max-heap context
			
 
				- * @cpu: rd attached cpu
			
 
				+ * @cpu: rd attached CPU
			
 
				  */
			
 
				 void cpudl_set_freecpu(struct cpudl *cp, int cpu)
			
 
				 {
			
@@ -231,7 +230,7 @@ void cpudl_set_freecpu(struct cpudl *cp, int cpu)
 
				 /*
			
 
				  * cpudl_clear_freecpu - Clear the cpudl.free_cpus
			
 
				  * @cp: the cpudl max-heap context
			
 
				- * @cpu: rd attached cpu
			
 
				+ * @cpu: rd attached CPU
			
 
				  */
			
 
				 void cpudl_clear_freecpu(struct cpudl *cp, int cpu)
			
 
				 {
			
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@@ -1,35 +1,26 @@
 
				 /* SPDX-License-Identifier: GPL-2.0 */
			
 
				-#ifndef _LINUX_CPUDL_H
			
 
				-#define _LINUX_CPUDL_H
			
 
				 
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/sched/deadline.h>
			
 
				-
			
 
				-#define IDX_INVALID     -1
			
 
				+#define IDX_INVALID		-1
			
 
				 
			
 
				 struct cpudl_item {
			
 
				-	u64 dl;
			
 
				-	int cpu;
			
 
				-	int idx;
			
 
				+	u64			dl;
			
 
				+	int			cpu;
			
 
				+	int			idx;
			
 
				 };
			
 
				 
			
 
				 struct cpudl {
			
 
				-	raw_spinlock_t lock;
			
 
				-	int size;
			
 
				-	cpumask_var_t free_cpus;
			
 
				-	struct cpudl_item *elements;
			
 
				+	raw_spinlock_t		lock;
			
 
				+	int			size;
			
 
				+	cpumask_var_t		free_cpus;
			
 
				+	struct cpudl_item	*elements;
			
 
				 };
			
 
				 
			
 
				-
			
 
				 #ifdef CONFIG_SMP
			
 
				-int cpudl_find(struct cpudl *cp, struct task_struct *p,
			
 
				-	       struct cpumask *later_mask);
			
 
				+int  cpudl_find(struct cpudl *cp, struct task_struct *p, struct cpumask *later_mask);
			
 
				 void cpudl_set(struct cpudl *cp, int cpu, u64 dl);
			
 
				 void cpudl_clear(struct cpudl *cp, int cpu);
			
 
				-int cpudl_init(struct cpudl *cp);
			
 
				+int  cpudl_init(struct cpudl *cp);
			
 
				 void cpudl_set_freecpu(struct cpudl *cp, int cpu);
			
 
				 void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
			
 
				 void cpudl_cleanup(struct cpudl *cp);
			
 
				 #endif /* CONFIG_SMP */
			
 
				-
			
 
				-#endif /* _LINUX_CPUDL_H */
			
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -8,7 +8,6 @@
 
				  * it under the terms of the GNU General Public License version 2 as
			
 
				  * published by the Free Software Foundation.
			
 
				  */
			
 
				-
			
 
				 #include "sched.h"
			
 
				 
			
 
				 DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
			
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -11,61 +11,56 @@
 
				 
			
 
				 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
			
 
				 
			
 
				-#include <linux/cpufreq.h>
			
 
				-#include <linux/kthread.h>
			
 
				-#include <uapi/linux/sched/types.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <trace/events/power.h>
			
 
				-
			
 
				 #include "sched.h"
			
 
				 
			
 
				+#include <trace/events/power.h>
			
 
				+
			
 
				 struct sugov_tunables {
			
 
				-	struct gov_attr_set attr_set;
			
 
				-	unsigned int rate_limit_us;
			
 
				+	struct gov_attr_set	attr_set;
			
 
				+	unsigned int		rate_limit_us;
			
 
				 };
			
 
				 
			
 
				 struct sugov_policy {
			
 
				-	struct cpufreq_policy *policy;
			
 
				-
			
 
				-	struct sugov_tunables *tunables;
			
 
				-	struct list_head tunables_hook;
			
 
				-
			
 
				-	raw_spinlock_t update_lock;  /* For shared policies */
			
 
				-	u64 last_freq_update_time;
			
 
				-	s64 freq_update_delay_ns;
			
 
				-	unsigned int next_freq;
			
 
				-	unsigned int cached_raw_freq;
			
 
				-
			
 
				-	/* The next fields are only needed if fast switch cannot be used. */
			
 
				-	struct irq_work irq_work;
			
 
				-	struct kthread_work work;
			
 
				-	struct mutex work_lock;
			
 
				-	struct kthread_worker worker;
			
 
				-	struct task_struct *thread;
			
 
				-	bool work_in_progress;
			
 
				-
			
 
				-	bool need_freq_update;
			
 
				+	struct cpufreq_policy	*policy;
			
 
				+
			
 
				+	struct sugov_tunables	*tunables;
			
 
				+	struct list_head	tunables_hook;
			
 
				+
			
 
				+	raw_spinlock_t		update_lock;	/* For shared policies */
			
 
				+	u64			last_freq_update_time;
			
 
				+	s64			freq_update_delay_ns;
			
 
				+	unsigned int		next_freq;
			
 
				+	unsigned int		cached_raw_freq;
			
 
				+
			
 
				+	/* The next fields are only needed if fast switch cannot be used: */
			
 
				+	struct			irq_work irq_work;
			
 
				+	struct			kthread_work work;
			
 
				+	struct			mutex work_lock;
			
 
				+	struct			kthread_worker worker;
			
 
				+	struct task_struct	*thread;
			
 
				+	bool			work_in_progress;
			
 
				+
			
 
				+	bool			need_freq_update;
			
 
				 };
			
 
				 
			
 
				 struct sugov_cpu {
			
 
				-	struct update_util_data update_util;
			
 
				-	struct sugov_policy *sg_policy;
			
 
				-	unsigned int cpu;
			
 
				+	struct update_util_data	update_util;
			
 
				+	struct sugov_policy	*sg_policy;
			
 
				+	unsigned int		cpu;
			
 
				 
			
 
				-	bool iowait_boost_pending;
			
 
				-	unsigned int iowait_boost;
			
 
				-	unsigned int iowait_boost_max;
			
 
				+	bool			iowait_boost_pending;
			
 
				+	unsigned int		iowait_boost;
			
 
				+	unsigned int		iowait_boost_max;
			
 
				 	u64 last_update;
			
 
				 
			
 
				-	/* The fields below are only needed when sharing a policy. */
			
 
				-	unsigned long util_cfs;
			
 
				-	unsigned long util_dl;
			
 
				-	unsigned long max;
			
 
				-	unsigned int flags;
			
 
				+	/* The fields below are only needed when sharing a policy: */
			
 
				+	unsigned long		util_cfs;
			
 
				+	unsigned long		util_dl;
			
 
				+	unsigned long		max;
			
 
				 
			
 
				-	/* The field below is for single-CPU policies only. */
			
 
				+	/* The field below is for single-CPU policies only: */
			
 
				 #ifdef CONFIG_NO_HZ_COMMON
			
 
				-	unsigned long saved_idle_calls;
			
 
				+	unsigned long		saved_idle_calls;
			
 
				 #endif
			
 
				 };
			
 
				 
			
@@ -79,9 +74,9 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
 
				 
			
 
				 	/*
			
 
				 	 * Since cpufreq_update_util() is called with rq->lock held for
			
 
				-	 * the @target_cpu, our per-cpu data is fully serialized.
			
 
				+	 * the @target_cpu, our per-CPU data is fully serialized.
			
 
				 	 *
			
 
				-	 * However, drivers cannot in general deal with cross-cpu
			
 
				+	 * However, drivers cannot in general deal with cross-CPU
			
 
				 	 * requests, so while get_next_freq() will work, our
			
 
				 	 * sugov_update_commit() call may not for the fast switching platforms.
			
 
				 	 *
			
@@ -111,6 +106,7 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
 
				 	}
			
 
				 
			
 
				 	delta_ns = time - sg_policy->last_freq_update_time;
			
 
				+
			
 
				 	return delta_ns >= sg_policy->freq_update_delay_ns;
			
 
				 }
			
 
				 
			
@@ -186,17 +182,28 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
 
				 
			
 
				 static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
			
 
				 {
			
 
				+	struct rq *rq = cpu_rq(sg_cpu->cpu);
			
 
				+	unsigned long util;
			
 
				+
			
 
				+	if (rq->rt.rt_nr_running) {
			
 
				+		util = sg_cpu->max;
			
 
				+	} else {
			
 
				+		util = sg_cpu->util_dl;
			
 
				+		if (rq->cfs.h_nr_running)
			
 
				+			util += sg_cpu->util_cfs;
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * Ideally we would like to set util_dl as min/guaranteed freq and
			
 
				 	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
			
 
				 	 * ready for such an interface. So, we only do the latter for now.
			
 
				 	 */
			
 
				-	return min(sg_cpu->util_cfs + sg_cpu->util_dl, sg_cpu->max);
			
 
				+	return min(util, sg_cpu->max);
			
 
				 }
			
 
				 
			
 
				-static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time)
			
 
				+static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags)
			
 
				 {
			
 
				-	if (sg_cpu->flags & SCHED_CPUFREQ_IOWAIT) {
			
 
				+	if (flags & SCHED_CPUFREQ_IOWAIT) {
			
 
				 		if (sg_cpu->iowait_boost_pending)
			
 
				 			return;
			
 
				 
			
@@ -260,43 +267,51 @@ static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
 
				 static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
			
 
				 #endif /* CONFIG_NO_HZ_COMMON */
			
 
				 
			
 
				+/*
			
 
				+ * Make sugov_should_update_freq() ignore the rate limit when DL
			
 
				+ * has increased the utilization.
			
 
				+ */
			
 
				+static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
			
 
				+{
			
 
				+	if (cpu_util_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->util_dl)
			
 
				+		sg_policy->need_freq_update = true;
			
 
				+}
			
 
				+
			
 
				 static void sugov_update_single(struct update_util_data *hook, u64 time,
			
 
				 				unsigned int flags)
			
 
				 {
			
 
				 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
			
 
				 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
			
 
				-	struct cpufreq_policy *policy = sg_policy->policy;
			
 
				 	unsigned long util, max;
			
 
				 	unsigned int next_f;
			
 
				 	bool busy;
			
 
				 
			
 
				-	sugov_set_iowait_boost(sg_cpu, time);
			
 
				+	sugov_set_iowait_boost(sg_cpu, time, flags);
			
 
				 	sg_cpu->last_update = time;
			
 
				 
			
 
				+	ignore_dl_rate_limit(sg_cpu, sg_policy);
			
 
				+
			
 
				 	if (!sugov_should_update_freq(sg_policy, time))
			
 
				 		return;
			
 
				 
			
 
				 	busy = sugov_cpu_is_busy(sg_cpu);
			
 
				 
			
 
				-	if (flags & SCHED_CPUFREQ_RT) {
			
 
				-		next_f = policy->cpuinfo.max_freq;
			
 
				-	} else {
			
 
				-		sugov_get_util(sg_cpu);
			
 
				-		max = sg_cpu->max;
			
 
				-		util = sugov_aggregate_util(sg_cpu);
			
 
				-		sugov_iowait_boost(sg_cpu, &util, &max);
			
 
				-		next_f = get_next_freq(sg_policy, util, max);
			
 
				-		/*
			
 
				-		 * Do not reduce the frequency if the CPU has not been idle
			
 
				-		 * recently, as the reduction is likely to be premature then.
			
 
				-		 */
			
 
				-		if (busy && next_f < sg_policy->next_freq) {
			
 
				-			next_f = sg_policy->next_freq;
			
 
				+	sugov_get_util(sg_cpu);
			
 
				+	max = sg_cpu->max;
			
 
				+	util = sugov_aggregate_util(sg_cpu);
			
 
				+	sugov_iowait_boost(sg_cpu, &util, &max);
			
 
				+	next_f = get_next_freq(sg_policy, util, max);
			
 
				+	/*
			
 
				+	 * Do not reduce the frequency if the CPU has not been idle
			
 
				+	 * recently, as the reduction is likely to be premature then.
			
 
				+	 */
			
 
				+	if (busy && next_f < sg_policy->next_freq) {
			
 
				+		next_f = sg_policy->next_freq;
			
 
				 
			
 
				-			/* Reset cached freq as next_freq has changed */
			
 
				-			sg_policy->cached_raw_freq = 0;
			
 
				-		}
			
 
				+		/* Reset cached freq as next_freq has changed */
			
 
				+		sg_policy->cached_raw_freq = 0;
			
 
				 	}
			
 
				+
			
 
				 	sugov_update_commit(sg_policy, time, next_f);
			
 
				 }
			
 
				 
			
@@ -312,6 +327,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 
				 		unsigned long j_util, j_max;
			
 
				 		s64 delta_ns;
			
 
				 
			
 
				+		sugov_get_util(j_sg_cpu);
			
 
				+
			
 
				 		/*
			
 
				 		 * If the CFS CPU utilization was last updated before the
			
 
				 		 * previous frequency update and the time elapsed between the
			
@@ -325,28 +342,22 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 
				 		if (delta_ns > TICK_NSEC) {
			
 
				 			j_sg_cpu->iowait_boost = 0;
			
 
				 			j_sg_cpu->iowait_boost_pending = false;
			
 
				-			j_sg_cpu->util_cfs = 0;
			
 
				-			if (j_sg_cpu->util_dl == 0)
			
 
				-				continue;
			
 
				 		}
			
 
				-		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT)
			
 
				-			return policy->cpuinfo.max_freq;
			
 
				 
			
 
				 		j_max = j_sg_cpu->max;
			
 
				 		j_util = sugov_aggregate_util(j_sg_cpu);
			
 
				+		sugov_iowait_boost(j_sg_cpu, &j_util, &j_max);
			
 
				 		if (j_util * max > j_max * util) {
			
 
				 			util = j_util;
			
 
				 			max = j_max;
			
 
				 		}
			
 
				-
			
 
				-		sugov_iowait_boost(j_sg_cpu, &util, &max);
			
 
				 	}
			
 
				 
			
 
				 	return get_next_freq(sg_policy, util, max);
			
 
				 }
			
 
				 
			
 
				-static void sugov_update_shared(struct update_util_data *hook, u64 time,
			
 
				-				unsigned int flags)
			
 
				+static void
			
 
				+sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
			
 
				 {
			
 
				 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
			
 
				 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
			
@@ -354,18 +365,13 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
 
				 
			
 
				 	raw_spin_lock(&sg_policy->update_lock);
			
 
				 
			
 
				-	sugov_get_util(sg_cpu);
			
 
				-	sg_cpu->flags = flags;
			
 
				-
			
 
				-	sugov_set_iowait_boost(sg_cpu, time);
			
 
				+	sugov_set_iowait_boost(sg_cpu, time, flags);
			
 
				 	sg_cpu->last_update = time;
			
 
				 
			
 
				-	if (sugov_should_update_freq(sg_policy, time)) {
			
 
				-		if (flags & SCHED_CPUFREQ_RT)
			
 
				-			next_f = sg_policy->policy->cpuinfo.max_freq;
			
 
				-		else
			
 
				-			next_f = sugov_next_freq_shared(sg_cpu, time);
			
 
				+	ignore_dl_rate_limit(sg_cpu, sg_policy);
			
 
				 
			
 
				+	if (sugov_should_update_freq(sg_policy, time)) {
			
 
				+		next_f = sugov_next_freq_shared(sg_cpu, time);
			
 
				 		sugov_update_commit(sg_policy, time, next_f);
			
 
				 	}
			
 
				 
			
@@ -423,8 +429,8 @@ static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
 
				 	return sprintf(buf, "%u\n", tunables->rate_limit_us);
			
 
				 }
			
 
				 
			
 
				-static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf,
			
 
				-				   size_t count)
			
 
				+static ssize_t
			
 
				+rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
			
 
				 {
			
 
				 	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
			
 
				 	struct sugov_policy *sg_policy;
			
@@ -479,11 +485,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
 
				 {
			
 
				 	struct task_struct *thread;
			
 
				 	struct sched_attr attr = {
			
 
				-		.size = sizeof(struct sched_attr),
			
 
				-		.sched_policy = SCHED_DEADLINE,
			
 
				-		.sched_flags = SCHED_FLAG_SUGOV,
			
 
				-		.sched_nice = 0,
			
 
				-		.sched_priority = 0,
			
 
				+		.size		= sizeof(struct sched_attr),
			
 
				+		.sched_policy	= SCHED_DEADLINE,
			
 
				+		.sched_flags	= SCHED_FLAG_SUGOV,
			
 
				+		.sched_nice	= 0,
			
 
				+		.sched_priority	= 0,
			
 
				 		/*
			
 
				 		 * Fake (unused) bandwidth; workaround to "fix"
			
 
				 		 * priority inheritance.
			
@@ -663,21 +669,20 @@ static int sugov_start(struct cpufreq_policy *policy)
 
				 	struct sugov_policy *sg_policy = policy->governor_data;
			
 
				 	unsigned int cpu;
			
 
				 
			
 
				-	sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
			
 
				-	sg_policy->last_freq_update_time = 0;
			
 
				-	sg_policy->next_freq = UINT_MAX;
			
 
				-	sg_policy->work_in_progress = false;
			
 
				-	sg_policy->need_freq_update = false;
			
 
				-	sg_policy->cached_raw_freq = 0;
			
 
				+	sg_policy->freq_update_delay_ns	= sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
			
 
				+	sg_policy->last_freq_update_time	= 0;
			
 
				+	sg_policy->next_freq			= UINT_MAX;
			
 
				+	sg_policy->work_in_progress		= false;
			
 
				+	sg_policy->need_freq_update		= false;
			
 
				+	sg_policy->cached_raw_freq		= 0;
			
 
				 
			
 
				 	for_each_cpu(cpu, policy->cpus) {
			
 
				 		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
			
 
				 
			
 
				 		memset(sg_cpu, 0, sizeof(*sg_cpu));
			
 
				-		sg_cpu->cpu = cpu;
			
 
				-		sg_cpu->sg_policy = sg_policy;
			
 
				-		sg_cpu->flags = 0;
			
 
				-		sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
			
 
				+		sg_cpu->cpu			= cpu;
			
 
				+		sg_cpu->sg_policy		= sg_policy;
			
 
				+		sg_cpu->iowait_boost_max	= policy->cpuinfo.max_freq;
			
 
				 	}
			
 
				 
			
 
				 	for_each_cpu(cpu, policy->cpus) {
			
@@ -721,14 +726,14 @@ static void sugov_limits(struct cpufreq_policy *policy)
 
				 }
			
 
				 
			
 
				 static struct cpufreq_governor schedutil_gov = {
			
 
				-	.name = "schedutil",
			
 
				-	.owner = THIS_MODULE,
			
 
				-	.dynamic_switching = true,
			
 
				-	.init = sugov_init,
			
 
				-	.exit = sugov_exit,
			
 
				-	.start = sugov_start,
			
 
				-	.stop = sugov_stop,
			
 
				-	.limits = sugov_limits,
			
 
				+	.name			= "schedutil",
			
 
				+	.owner			= THIS_MODULE,
			
 
				+	.dynamic_switching	= true,
			
 
				+	.init			= sugov_init,
			
 
				+	.exit			= sugov_exit,
			
 
				+	.start			= sugov_start,
			
 
				+	.stop			= sugov_stop,
			
 
				+	.limits			= sugov_limits,
			
 
				 };
			
 
				 
			
 
				 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
			
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -14,7 +14,7 @@
 
				  *
			
 
				  *  going from the lowest priority to the highest.  CPUs in the INVALID state
			
 
				  *  are not eligible for routing.  The system maintains this state with
			
 
				- *  a 2 dimensional bitmap (the first for priority class, the second for cpus
			
 
				+ *  a 2 dimensional bitmap (the first for priority class, the second for CPUs
			
 
				  *  in that class).  Therefore a typical application without affinity
			
 
				  *  restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
			
 
				  *  searches).  For tasks with affinity restrictions, the algorithm has a
			
@@ -26,12 +26,7 @@
 
				  *  as published by the Free Software Foundation; version 2
			
 
				  *  of the License.
			
 
				  */
			
 
				-
			
 
				-#include <linux/gfp.h>
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/sched/rt.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include "cpupri.h"
			
 
				+#include "sched.h"
			
 
				 
			
 
				 /* Convert between a 140 based task->prio, and our 102 based cpupri */
			
 
				 static int convert_prio(int prio)
			
@@ -128,9 +123,9 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * cpupri_set - update the cpu priority setting
			
 
				+ * cpupri_set - update the CPU priority setting
			
 
				  * @cp: The cpupri context
			
 
				- * @cpu: The target cpu
			
 
				+ * @cpu: The target CPU
			
 
				  * @newpri: The priority (INVALID-RT99) to assign to this CPU
			
 
				  *
			
 
				  * Note: Assumes cpu_rq(cpu)->lock is locked
			
@@ -151,7 +146,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
				 		return;
			
 
				 
			
 
				 	/*
			
 
				-	 * If the cpu was currently mapped to a different value, we
			
 
				+	 * If the CPU was currently mapped to a different value, we
			
 
				 	 * need to map it to the new value then remove the old value.
			
 
				 	 * Note, we must add the new value first, otherwise we risk the
			
 
				 	 * cpu being missed by the priority loop in cpupri_find.
			
--- a/kernel/sched/cpupri.h
+++ b/kernel/sched/cpupri.h
@@ -1,32 +1,25 @@
 
				 /* SPDX-License-Identifier: GPL-2.0 */
			
 
				-#ifndef _LINUX_CPUPRI_H
			
 
				-#define _LINUX_CPUPRI_H
			
 
				-
			
 
				-#include <linux/sched.h>
			
 
				 
			
 
				 #define CPUPRI_NR_PRIORITIES	(MAX_RT_PRIO + 2)
			
 
				 
			
 
				-#define CPUPRI_INVALID -1
			
 
				-#define CPUPRI_IDLE     0
			
 
				-#define CPUPRI_NORMAL   1
			
 
				+#define CPUPRI_INVALID		-1
			
 
				+#define CPUPRI_IDLE		 0
			
 
				+#define CPUPRI_NORMAL		 1
			
 
				 /* values 2-101 are RT priorities 0-99 */
			
 
				 
			
 
				 struct cpupri_vec {
			
 
				-	atomic_t	count;
			
 
				-	cpumask_var_t	mask;
			
 
				+	atomic_t		count;
			
 
				+	cpumask_var_t		mask;
			
 
				 };
			
 
				 
			
 
				 struct cpupri {
			
 
				-	struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
			
 
				-	int *cpu_to_pri;
			
 
				+	struct cpupri_vec	pri_to_cpu[CPUPRI_NR_PRIORITIES];
			
 
				+	int			*cpu_to_pri;
			
 
				 };
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				-int  cpupri_find(struct cpupri *cp,
			
 
				-		 struct task_struct *p, struct cpumask *lowest_mask);
			
 
				+int  cpupri_find(struct cpupri *cp, struct task_struct *p, struct cpumask *lowest_mask);
			
 
				 void cpupri_set(struct cpupri *cp, int cpu, int pri);
			
 
				-int cpupri_init(struct cpupri *cp);
			
 
				+int  cpupri_init(struct cpupri *cp);
			
 
				 void cpupri_cleanup(struct cpupri *cp);
			
 
				 #endif
			
 
				-
			
 
				-#endif /* _LINUX_CPUPRI_H */
			
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -1,10 +1,6 @@
 
				-#include <linux/export.h>
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/tsacct_kern.h>
			
 
				-#include <linux/kernel_stat.h>
			
 
				-#include <linux/static_key.h>
			
 
				-#include <linux/context_tracking.h>
			
 
				-#include <linux/sched/cputime.h>
			
 
				+/*
			
 
				+ * Simple CPU accounting cgroup controller
			
 
				+ */
			
 
				 #include "sched.h"
			
 
				 
			
 
				 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
			
@@ -113,9 +109,9 @@ static inline void task_group_account_field(struct task_struct *p, int index,
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Account user cpu time to a process.
			
 
				- * @p: the process that the cpu time gets accounted to
			
 
				- * @cputime: the cpu time spent in user space since the last update
			
 
				+ * Account user CPU time to a process.
			
 
				+ * @p: the process that the CPU time gets accounted to
			
 
				+ * @cputime: the CPU time spent in user space since the last update
			
 
				  */
			
 
				 void account_user_time(struct task_struct *p, u64 cputime)
			
 
				 {
			
@@ -135,9 +131,9 @@ void account_user_time(struct task_struct *p, u64 cputime)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Account guest cpu time to a process.
			
 
				- * @p: the process that the cpu time gets accounted to
			
 
				- * @cputime: the cpu time spent in virtual machine since the last update
			
 
				+ * Account guest CPU time to a process.
			
 
				+ * @p: the process that the CPU time gets accounted to
			
 
				+ * @cputime: the CPU time spent in virtual machine since the last update
			
 
				  */
			
 
				 void account_guest_time(struct task_struct *p, u64 cputime)
			
 
				 {
			
@@ -159,9 +155,9 @@ void account_guest_time(struct task_struct *p, u64 cputime)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Account system cpu time to a process and desired cpustat field
			
 
				- * @p: the process that the cpu time gets accounted to
			
 
				- * @cputime: the cpu time spent in kernel space since the last update
			
 
				+ * Account system CPU time to a process and desired cpustat field
			
 
				+ * @p: the process that the CPU time gets accounted to
			
 
				+ * @cputime: the CPU time spent in kernel space since the last update
			
 
				  * @index: pointer to cpustat field that has to be updated
			
 
				  */
			
 
				 void account_system_index_time(struct task_struct *p,
			
@@ -179,10 +175,10 @@ void account_system_index_time(struct task_struct *p,
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Account system cpu time to a process.
			
 
				- * @p: the process that the cpu time gets accounted to
			
 
				+ * Account system CPU time to a process.
			
 
				+ * @p: the process that the CPU time gets accounted to
			
 
				  * @hardirq_offset: the offset to subtract from hardirq_count()
			
 
				- * @cputime: the cpu time spent in kernel space since the last update
			
 
				+ * @cputime: the CPU time spent in kernel space since the last update
			
 
				  */
			
 
				 void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime)
			
 
				 {
			
@@ -205,7 +201,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime)
 
				 
			
 
				 /*
			
 
				  * Account for involuntary wait time.
			
 
				- * @cputime: the cpu time spent in involuntary wait
			
 
				+ * @cputime: the CPU time spent in involuntary wait
			
 
				  */
			
 
				 void account_steal_time(u64 cputime)
			
 
				 {
			
@@ -216,7 +212,7 @@ void account_steal_time(u64 cputime)
 
				 
			
 
				 /*
			
 
				  * Account for idle time.
			
 
				- * @cputime: the cpu time spent in idle wait
			
 
				+ * @cputime: the CPU time spent in idle wait
			
 
				  */
			
 
				 void account_idle_time(u64 cputime)
			
 
				 {
			
@@ -338,7 +334,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 
				 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
			
 
				 /*
			
 
				  * Account a tick to a process and cpustat
			
 
				- * @p: the process that the cpu time gets accounted to
			
 
				+ * @p: the process that the CPU time gets accounted to
			
 
				  * @user_tick: is the tick from userspace
			
 
				  * @rq: the pointer to rq
			
 
				  *
			
@@ -400,17 +396,16 @@ static void irqtime_account_idle_ticks(int ticks)
 
				 	irqtime_account_process_tick(current, 0, rq, ticks);
			
 
				 }
			
 
				 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
			
 
				-static inline void irqtime_account_idle_ticks(int ticks) {}
			
 
				+static inline void irqtime_account_idle_ticks(int ticks) { }
			
 
				 static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
			
 
				-						struct rq *rq, int nr_ticks) {}
			
 
				+						struct rq *rq, int nr_ticks) { }
			
 
				 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
			
 
				 
			
 
				 /*
			
 
				  * Use precise platform statistics if available:
			
 
				  */
			
 
				 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
			
 
				-
			
 
				-#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
			
 
				+# ifndef __ARCH_HAS_VTIME_TASK_SWITCH
			
 
				 void vtime_common_task_switch(struct task_struct *prev)
			
 
				 {
			
 
				 	if (is_idle_task(prev))
			
@@ -421,8 +416,7 @@ void vtime_common_task_switch(struct task_struct *prev)
 
				 	vtime_flush(prev);
			
 
				 	arch_vtime_task_switch(prev);
			
 
				 }
			
 
				-#endif
			
 
				-
			
 
				+# endif
			
 
				 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
			
 
				 
			
 
				 
			
@@ -469,10 +463,12 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
 
				 	*ut = cputime.utime;
			
 
				 	*st = cputime.stime;
			
 
				 }
			
 
				-#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
			
 
				+
			
 
				+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */
			
 
				+
			
 
				 /*
			
 
				- * Account a single tick of cpu time.
			
 
				- * @p: the process that the cpu time gets accounted to
			
 
				+ * Account a single tick of CPU time.
			
 
				+ * @p: the process that the CPU time gets accounted to
			
 
				  * @user_tick: indicates if the tick is a user or a system tick
			
 
				  */
			
 
				 void account_process_tick(struct task_struct *p, int user_tick)
			
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -17,9 +17,6 @@
 
				  */
			
 
				 #include "sched.h"
			
 
				 
			
 
				-#include <linux/slab.h>
			
 
				-#include <uapi/linux/sched/types.h>
			
 
				-
			
 
				 struct dl_bandwidth def_dl_bandwidth;
			
 
				 
			
 
				 static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
			
@@ -87,7 +84,7 @@ void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
 
				 	SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
			
 
				 	SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
			
 
				 	/* kick cpufreq (see the comment in kernel/sched/sched.h). */
			
 
				-	cpufreq_update_util(rq_of_dl_rq(dl_rq), SCHED_CPUFREQ_DL);
			
 
				+	cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
			
 
				 }
			
 
				 
			
 
				 static inline
			
@@ -101,7 +98,7 @@ void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
 
				 	if (dl_rq->running_bw > old)
			
 
				 		dl_rq->running_bw = 0;
			
 
				 	/* kick cpufreq (see the comment in kernel/sched/sched.h). */
			
 
				-	cpufreq_update_util(rq_of_dl_rq(dl_rq), SCHED_CPUFREQ_DL);
			
 
				+	cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
			
 
				 }
			
 
				 
			
 
				 static inline
			
@@ -514,7 +511,7 @@ static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
 
				 static void push_dl_tasks(struct rq *);
			
 
				 static void pull_dl_task(struct rq *);
			
 
				 
			
 
				-static inline void queue_push_tasks(struct rq *rq)
			
 
				+static inline void deadline_queue_push_tasks(struct rq *rq)
			
 
				 {
			
 
				 	if (!has_pushable_dl_tasks(rq))
			
 
				 		return;
			
@@ -522,7 +519,7 @@ static inline void queue_push_tasks(struct rq *rq)
 
				 	queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
			
 
				 }
			
 
				 
			
 
				-static inline void queue_pull_task(struct rq *rq)
			
 
				+static inline void deadline_queue_pull_task(struct rq *rq)
			
 
				 {
			
 
				 	queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
			
 
				 }
			
@@ -539,12 +536,12 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
 
				 
			
 
				 		/*
			
 
				 		 * If we cannot preempt any rq, fall back to pick any
			
 
				-		 * online cpu.
			
 
				+		 * online CPU:
			
 
				 		 */
			
 
				 		cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
			
 
				 		if (cpu >= nr_cpu_ids) {
			
 
				 			/*
			
 
				-			 * Fail to find any suitable cpu.
			
 
				+			 * Failed to find any suitable CPU.
			
 
				 			 * The task will never come back!
			
 
				 			 */
			
 
				 			BUG_ON(dl_bandwidth_enabled());
			
@@ -597,19 +594,18 @@ static inline void pull_dl_task(struct rq *rq)
 
				 {
			
 
				 }
			
 
				 
			
 
				-static inline void queue_push_tasks(struct rq *rq)
			
 
				+static inline void deadline_queue_push_tasks(struct rq *rq)
			
 
				 {
			
 
				 }
			
 
				 
			
 
				-static inline void queue_pull_task(struct rq *rq)
			
 
				+static inline void deadline_queue_pull_task(struct rq *rq)
			
 
				 {
			
 
				 }
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
			
 
				 static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
			
 
				-static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
			
 
				-				  int flags);
			
 
				+static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
			
 
				 
			
 
				 /*
			
 
				  * We are being explicitly informed that a new instance is starting,
			
@@ -1763,7 +1759,7 @@ pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 
				 	if (hrtick_enabled(rq))
			
 
				 		start_hrtick_dl(rq, p);
			
 
				 
			
 
				-	queue_push_tasks(rq);
			
 
				+	deadline_queue_push_tasks(rq);
			
 
				 
			
 
				 	return p;
			
 
				 }
			
@@ -1776,6 +1772,14 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
 
				 		enqueue_pushable_dl_task(rq, p);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * scheduler tick hitting a task of our scheduling class.
			
 
				+ *
			
 
				+ * NOTE: This function can be called remotely by the tick offload that
			
 
				+ * goes along full dynticks. Therefore no local assumption can be made
			
 
				+ * and everything must be accessed through the @rq and @curr passed in
			
 
				+ * parameters.
			
 
				+ */
			
 
				 static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
			
 
				 {
			
 
				 	update_curr_dl(rq);
			
@@ -1865,7 +1869,7 @@ static int find_later_rq(struct task_struct *task)
 
				 
			
 
				 	/*
			
 
				 	 * We have to consider system topology and task affinity
			
 
				-	 * first, then we can look for a suitable cpu.
			
 
				+	 * first, then we can look for a suitable CPU.
			
 
				 	 */
			
 
				 	if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
			
 
				 		return -1;
			
@@ -1879,7 +1883,7 @@ static int find_later_rq(struct task_struct *task)
 
				 	 * Now we check how well this matches with task's
			
 
				 	 * affinity and system topology.
			
 
				 	 *
			
 
				-	 * The last cpu where the task run is our first
			
 
				+	 * The last CPU where the task run is our first
			
 
				 	 * guess, since it is most likely cache-hot there.
			
 
				 	 */
			
 
				 	if (cpumask_test_cpu(cpu, later_mask))
			
@@ -1909,9 +1913,9 @@ static int find_later_rq(struct task_struct *task)
 
				 			best_cpu = cpumask_first_and(later_mask,
			
 
				 							sched_domain_span(sd));
			
 
				 			/*
			
 
				-			 * Last chance: if a cpu being in both later_mask
			
 
				+			 * Last chance: if a CPU being in both later_mask
			
 
				 			 * and current sd span is valid, that becomes our
			
 
				-			 * choice. Of course, the latest possible cpu is
			
 
				+			 * choice. Of course, the latest possible CPU is
			
 
				 			 * already under consideration through later_mask.
			
 
				 			 */
			
 
				 			if (best_cpu < nr_cpu_ids) {
			
@@ -2067,7 +2071,7 @@ retry:
 
				 		if (task == next_task) {
			
 
				 			/*
			
 
				 			 * The task is still there. We don't try
			
 
				-			 * again, some other cpu will pull it when ready.
			
 
				+			 * again, some other CPU will pull it when ready.
			
 
				 			 */
			
 
				 			goto out;
			
 
				 		}
			
@@ -2300,12 +2304,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
 
				 	/*
			
 
				 	 * Since this might be the only -deadline task on the rq,
			
 
				 	 * this is the right place to try to pull some other one
			
 
				-	 * from an overloaded cpu, if any.
			
 
				+	 * from an overloaded CPU, if any.
			
 
				 	 */
			
 
				 	if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
			
 
				 		return;
			
 
				 
			
 
				-	queue_pull_task(rq);
			
 
				+	deadline_queue_pull_task(rq);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2327,7 +2331,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 
				 	if (rq->curr != p) {
			
 
				 #ifdef CONFIG_SMP
			
 
				 		if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
			
 
				-			queue_push_tasks(rq);
			
 
				+			deadline_queue_push_tasks(rq);
			
 
				 #endif
			
 
				 		if (dl_task(rq->curr))
			
 
				 			check_preempt_curr_dl(rq, p, 0);
			
@@ -2352,7 +2356,7 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
 
				 		 * or lowering its prio, so...
			
 
				 		 */
			
 
				 		if (!rq->dl.overloaded)
			
 
				-			queue_pull_task(rq);
			
 
				+			deadline_queue_pull_task(rq);
			
 
				 
			
 
				 		/*
			
 
				 		 * If we now have a earlier deadline task than p,
			
@@ -2626,17 +2630,17 @@ void __dl_clear_params(struct task_struct *p)
 
				 {
			
 
				 	struct sched_dl_entity *dl_se = &p->dl;
			
 
				 
			
 
				-	dl_se->dl_runtime = 0;
			
 
				-	dl_se->dl_deadline = 0;
			
 
				-	dl_se->dl_period = 0;
			
 
				-	dl_se->flags = 0;
			
 
				-	dl_se->dl_bw = 0;
			
 
				-	dl_se->dl_density = 0;
			
 
				+	dl_se->dl_runtime		= 0;
			
 
				+	dl_se->dl_deadline		= 0;
			
 
				+	dl_se->dl_period		= 0;
			
 
				+	dl_se->flags			= 0;
			
 
				+	dl_se->dl_bw			= 0;
			
 
				+	dl_se->dl_density		= 0;
			
 
				 
			
 
				-	dl_se->dl_throttled = 0;
			
 
				-	dl_se->dl_yielded = 0;
			
 
				-	dl_se->dl_non_contending = 0;
			
 
				-	dl_se->dl_overrun = 0;
			
 
				+	dl_se->dl_throttled		= 0;
			
 
				+	dl_se->dl_yielded		= 0;
			
 
				+	dl_se->dl_non_contending	= 0;
			
 
				+	dl_se->dl_overrun		= 0;
			
 
				 }
			
 
				 
			
 
				 bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
			
@@ -2655,21 +2659,22 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
 
				 #ifdef CONFIG_SMP
			
 
				 int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
			
 
				 {
			
 
				-	unsigned int dest_cpu = cpumask_any_and(cpu_active_mask,
			
 
				-							cs_cpus_allowed);
			
 
				+	unsigned int dest_cpu;
			
 
				 	struct dl_bw *dl_b;
			
 
				 	bool overflow;
			
 
				 	int cpus, ret;
			
 
				 	unsigned long flags;
			
 
				 
			
 
				+	dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
			
 
				+
			
 
				 	rcu_read_lock_sched();
			
 
				 	dl_b = dl_bw_of(dest_cpu);
			
 
				 	raw_spin_lock_irqsave(&dl_b->lock, flags);
			
 
				 	cpus = dl_bw_cpus(dest_cpu);
			
 
				 	overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
			
 
				-	if (overflow)
			
 
				+	if (overflow) {
			
 
				 		ret = -EBUSY;
			
 
				-	else {
			
 
				+	} else {
			
 
				 		/*
			
 
				 		 * We reserve space for this task in the destination
			
 
				 		 * root_domain, as we can't fail after this point.
			
@@ -2681,6 +2686,7 @@ int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allo
 
				 	}
			
 
				 	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
			
 
				 	rcu_read_unlock_sched();
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -2701,6 +2707,7 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
 
				 		ret = 0;
			
 
				 	raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
			
 
				 	rcu_read_unlock_sched();
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -2718,6 +2725,7 @@ bool dl_cpu_busy(unsigned int cpu)
 
				 	overflow = __dl_overflow(dl_b, cpus, 0, 0);
			
 
				 	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
			
 
				 	rcu_read_unlock_sched();
			
 
				+
			
 
				 	return overflow;
			
 
				 }
			
 
				 #endif
			
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * kernel/sched/debug.c
			
 
				  *
			
 
				- * Print the CFS rbtree
			
 
				+ * Print the CFS rbtree and other debugging details
			
 
				  *
			
 
				  * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
			
 
				  *
			
@@ -9,16 +9,6 @@
 
				  * it under the terms of the GNU General Public License version 2 as
			
 
				  * published by the Free Software Foundation.
			
 
				  */
			
 
				-
			
 
				-#include <linux/proc_fs.h>
			
 
				-#include <linux/sched/mm.h>
			
 
				-#include <linux/sched/task.h>
			
 
				-#include <linux/seq_file.h>
			
 
				-#include <linux/kallsyms.h>
			
 
				-#include <linux/utsname.h>
			
 
				-#include <linux/mempolicy.h>
			
 
				-#include <linux/debugfs.h>
			
 
				-
			
 
				 #include "sched.h"
			
 
				 
			
 
				 static DEFINE_SPINLOCK(sched_debug_lock);
			
@@ -274,34 +264,19 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
 
				 	if (table == NULL)
			
 
				 		return NULL;
			
 
				 
			
 
				-	set_table_entry(&table[0], "min_interval", &sd->min_interval,
			
 
				-		sizeof(long), 0644, proc_doulongvec_minmax, false);
			
 
				-	set_table_entry(&table[1], "max_interval", &sd->max_interval,
			
 
				-		sizeof(long), 0644, proc_doulongvec_minmax, false);
			
 
				-	set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				-	set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				-	set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				-	set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				-	set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				-	set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, false);
			
 
				-	set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, false);
			
 
				-	set_table_entry(&table[9], "cache_nice_tries",
			
 
				-		&sd->cache_nice_tries,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, false);
			
 
				-	set_table_entry(&table[10], "flags", &sd->flags,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, false);
			
 
				-	set_table_entry(&table[11], "max_newidle_lb_cost",
			
 
				-		&sd->max_newidle_lb_cost,
			
 
				-		sizeof(long), 0644, proc_doulongvec_minmax, false);
			
 
				-	set_table_entry(&table[12], "name", sd->name,
			
 
				-		CORENAME_MAX_SIZE, 0444, proc_dostring, false);
			
 
				+	set_table_entry(&table[0] , "min_interval",	   &sd->min_interval,	     sizeof(long), 0644, proc_doulongvec_minmax, false);
			
 
				+	set_table_entry(&table[1] , "max_interval",	   &sd->max_interval,	     sizeof(long), 0644, proc_doulongvec_minmax, false);
			
 
				+	set_table_entry(&table[2] , "busy_idx",		   &sd->busy_idx,	     sizeof(int) , 0644, proc_dointvec_minmax,   true );
			
 
				+	set_table_entry(&table[3] , "idle_idx",		   &sd->idle_idx,	     sizeof(int) , 0644, proc_dointvec_minmax,   true );
			
 
				+	set_table_entry(&table[4] , "newidle_idx",	   &sd->newidle_idx,	     sizeof(int) , 0644, proc_dointvec_minmax,   true );
			
 
				+	set_table_entry(&table[5] , "wake_idx",		   &sd->wake_idx,	     sizeof(int) , 0644, proc_dointvec_minmax,   true );
			
 
				+	set_table_entry(&table[6] , "forkexec_idx",	   &sd->forkexec_idx,	     sizeof(int) , 0644, proc_dointvec_minmax,   true );
			
 
				+	set_table_entry(&table[7] , "busy_factor",	   &sd->busy_factor,	     sizeof(int) , 0644, proc_dointvec_minmax,   false);
			
 
				+	set_table_entry(&table[8] , "imbalance_pct",	   &sd->imbalance_pct,	     sizeof(int) , 0644, proc_dointvec_minmax,   false);
			
 
				+	set_table_entry(&table[9] , "cache_nice_tries",	   &sd->cache_nice_tries,    sizeof(int) , 0644, proc_dointvec_minmax,   false);
			
 
				+	set_table_entry(&table[10], "flags",		   &sd->flags,		     sizeof(int) , 0644, proc_dointvec_minmax,   false);
			
 
				+	set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax, false);
			
 
				+	set_table_entry(&table[12], "name",		   sd->name,		CORENAME_MAX_SIZE, 0444, proc_dostring,		 false);
			
 
				 	/* &table[13] is terminator */
			
 
				 
			
 
				 	return table;
			
@@ -332,8 +307,8 @@ static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
 
				 	return table;
			
 
				 }
			
 
				 
			
 
				-static cpumask_var_t sd_sysctl_cpus;
			
 
				-static struct ctl_table_header *sd_sysctl_header;
			
 
				+static cpumask_var_t		sd_sysctl_cpus;
			
 
				+static struct ctl_table_header	*sd_sysctl_header;
			
 
				 
			
 
				 void register_sched_domain_sysctl(void)
			
 
				 {
			
@@ -413,14 +388,10 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 
				 {
			
 
				 	struct sched_entity *se = tg->se[cpu];
			
 
				 
			
 
				-#define P(F) \
			
 
				-	SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)F)
			
 
				-#define P_SCHEDSTAT(F) \
			
 
				-	SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)schedstat_val(F))
			
 
				-#define PN(F) \
			
 
				-	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
			
 
				-#define PN_SCHEDSTAT(F) \
			
 
				-	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
			
 
				+#define P(F)		SEQ_printf(m, "  .%-30s: %lld\n",	#F, (long long)F)
			
 
				+#define P_SCHEDSTAT(F)	SEQ_printf(m, "  .%-30s: %lld\n",	#F, (long long)schedstat_val(F))
			
 
				+#define PN(F)		SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
			
 
				+#define PN_SCHEDSTAT(F)	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
			
 
				 
			
 
				 	if (!se)
			
 
				 		return;
			
@@ -428,6 +399,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 
				 	PN(se->exec_start);
			
 
				 	PN(se->vruntime);
			
 
				 	PN(se->sum_exec_runtime);
			
 
				+
			
 
				 	if (schedstat_enabled()) {
			
 
				 		PN_SCHEDSTAT(se->statistics.wait_start);
			
 
				 		PN_SCHEDSTAT(se->statistics.sleep_start);
			
@@ -440,6 +412,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 
				 		PN_SCHEDSTAT(se->statistics.wait_sum);
			
 
				 		P_SCHEDSTAT(se->statistics.wait_count);
			
 
				 	}
			
 
				+
			
 
				 	P(se->load.weight);
			
 
				 	P(se->runnable_weight);
			
 
				 #ifdef CONFIG_SMP
			
@@ -464,6 +437,7 @@ static char *task_group_path(struct task_group *tg)
 
				 		return group_path;
			
 
				 
			
 
				 	cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
			
 
				+
			
 
				 	return group_path;
			
 
				 }
			
 
				 #endif
			
@@ -569,6 +543,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
				 			cfs_rq->avg.runnable_load_avg);
			
 
				 	SEQ_printf(m, "  .%-30s: %lu\n", "util_avg",
			
 
				 			cfs_rq->avg.util_avg);
			
 
				+	SEQ_printf(m, "  .%-30s: %u\n", "util_est_enqueued",
			
 
				+			cfs_rq->avg.util_est.enqueued);
			
 
				 	SEQ_printf(m, "  .%-30s: %ld\n", "removed.load_avg",
			
 
				 			cfs_rq->removed.load_avg);
			
 
				 	SEQ_printf(m, "  .%-30s: %ld\n", "removed.util_avg",
			
@@ -804,9 +780,9 @@ void sysrq_sched_debug_show(void)
 
				 /*
			
 
				  * This itererator needs some explanation.
			
 
				  * It returns 1 for the header position.
			
 
				- * This means 2 is cpu 0.
			
 
				- * In a hotplugged system some cpus, including cpu 0, may be missing so we have
			
 
				- * to use cpumask_* to iterate over the cpus.
			
 
				+ * This means 2 is CPU 0.
			
 
				+ * In a hotplugged system some CPUs, including CPU 0, may be missing so we have
			
 
				+ * to use cpumask_* to iterate over the CPUs.
			
 
				  */
			
 
				 static void *sched_debug_start(struct seq_file *file, loff_t *offset)
			
 
				 {
			
@@ -826,6 +802,7 @@ static void *sched_debug_start(struct seq_file *file, loff_t *offset)
 
				 
			
 
				 	if (n < nr_cpu_ids)
			
 
				 		return (void *)(unsigned long)(n + 2);
			
 
				+
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
@@ -840,10 +817,10 @@ static void sched_debug_stop(struct seq_file *file, void *data)
 
				 }
			
 
				 
			
 
				 static const struct seq_operations sched_debug_sops = {
			
 
				-	.start = sched_debug_start,
			
 
				-	.next = sched_debug_next,
			
 
				-	.stop = sched_debug_stop,
			
 
				-	.show = sched_debug_show,
			
 
				+	.start		= sched_debug_start,
			
 
				+	.next		= sched_debug_next,
			
 
				+	.stop		= sched_debug_stop,
			
 
				+	.show		= sched_debug_show,
			
 
				 };
			
 
				 
			
 
				 static int sched_debug_release(struct inode *inode, struct file *file)
			
@@ -881,14 +858,10 @@ static int __init init_sched_debug_procfs(void)
 
				 
			
 
				 __initcall(init_sched_debug_procfs);
			
 
				 
			
 
				-#define __P(F) \
			
 
				-	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
			
 
				-#define P(F) \
			
 
				-	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
			
 
				-#define __PN(F) \
			
 
				-	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
			
 
				-#define PN(F) \
			
 
				-	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
			
 
				+#define __P(F)	SEQ_printf(m, "%-45s:%21Ld\n",	     #F, (long long)F)
			
 
				+#define   P(F)	SEQ_printf(m, "%-45s:%21Ld\n",	     #F, (long long)p->F)
			
 
				+#define __PN(F)	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
			
 
				+#define   PN(F)	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
			
 
				 
			
 
				 
			
 
				 #ifdef CONFIG_NUMA_BALANCING
			
@@ -1023,6 +996,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 
				 	P(se.avg.runnable_load_avg);
			
 
				 	P(se.avg.util_avg);
			
 
				 	P(se.avg.last_update_time);
			
 
				+	P(se.avg.util_est.ewma);
			
 
				+	P(se.avg.util_est.enqueued);
			
 
				 #endif
			
 
				 	P(policy);
			
 
				 	P(prio);
			
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -20,25 +20,10 @@
 
				  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
			
 
				  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
			
 
				  */
			
 
				-
			
 
				-#include <linux/sched/mm.h>
			
 
				-#include <linux/sched/topology.h>
			
 
				-
			
 
				-#include <linux/latencytop.h>
			
 
				-#include <linux/cpumask.h>
			
 
				-#include <linux/cpuidle.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/profile.h>
			
 
				-#include <linux/interrupt.h>
			
 
				-#include <linux/mempolicy.h>
			
 
				-#include <linux/migrate.h>
			
 
				-#include <linux/task_work.h>
			
 
				-#include <linux/sched/isolation.h>
			
 
				+#include "sched.h"
			
 
				 
			
 
				 #include <trace/events/sched.h>
			
 
				 
			
 
				-#include "sched.h"
			
 
				-
			
 
				 /*
			
 
				  * Targeted preemption latency for CPU-bound tasks:
			
 
				  *
			
@@ -103,7 +88,7 @@ const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 /*
			
 
				- * For asym packing, by default the lower numbered cpu has higher priority.
			
 
				+ * For asym packing, by default the lower numbered CPU has higher priority.
			
 
				  */
			
 
				 int __weak arch_asym_cpu_priority(int cpu)
			
 
				 {
			
@@ -787,7 +772,7 @@ void post_init_entity_util_avg(struct sched_entity *se)
 
				 			 * For !fair tasks do:
			
 
				 			 *
			
 
				 			update_cfs_rq_load_avg(now, cfs_rq);
			
 
				-			attach_entity_load_avg(cfs_rq, se);
			
 
				+			attach_entity_load_avg(cfs_rq, se, 0);
			
 
				 			switched_from_fair(rq, p);
			
 
				 			 *
			
 
				 			 * such that the next switched_to_fair() has the
			
@@ -1181,7 +1166,7 @@ pid_t task_numa_group_id(struct task_struct *p)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * The averaged statistics, shared & private, memory & cpu,
			
 
				+ * The averaged statistics, shared & private, memory & CPU,
			
 
				  * occupy the first half of the array. The second half of the
			
 
				  * array is for current counters, which are averaged into the
			
 
				  * first set by task_numa_placement.
			
@@ -1587,7 +1572,7 @@ static void task_numa_compare(struct task_numa_env *env,
 
				 	 * be incurred if the tasks were swapped.
			
 
				 	 */
			
 
				 	if (cur) {
			
 
				-		/* Skip this swap candidate if cannot move to the source cpu */
			
 
				+		/* Skip this swap candidate if cannot move to the source CPU: */
			
 
				 		if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed))
			
 
				 			goto unlock;
			
 
				 
			
@@ -1631,7 +1616,7 @@ static void task_numa_compare(struct task_numa_env *env,
 
				 		goto balance;
			
 
				 	}
			
 
				 
			
 
				-	/* Balance doesn't matter much if we're running a task per cpu */
			
 
				+	/* Balance doesn't matter much if we're running a task per CPU: */
			
 
				 	if (imp > env->best_imp && src_rq->nr_running == 1 &&
			
 
				 			dst_rq->nr_running == 1)
			
 
				 		goto assign;
			
@@ -1676,7 +1661,7 @@ balance:
 
				 	 */
			
 
				 	if (!cur) {
			
 
				 		/*
			
 
				-		 * select_idle_siblings() uses an per-cpu cpumask that
			
 
				+		 * select_idle_siblings() uses an per-CPU cpumask that
			
 
				 		 * can be used from IRQ context.
			
 
				 		 */
			
 
				 		local_irq_disable();
			
@@ -1869,6 +1854,7 @@ static int task_numa_migrate(struct task_struct *p)
 
				 static void numa_migrate_preferred(struct task_struct *p)
			
 
				 {
			
 
				 	unsigned long interval = HZ;
			
 
				+	unsigned long numa_migrate_retry;
			
 
				 
			
 
				 	/* This task has no NUMA fault statistics yet */
			
 
				 	if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
			
@@ -1876,7 +1862,18 @@ static void numa_migrate_preferred(struct task_struct *p)
 
				 
			
 
				 	/* Periodically retry migrating the task to the preferred node */
			
 
				 	interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
			
 
				-	p->numa_migrate_retry = jiffies + interval;
			
 
				+	numa_migrate_retry = jiffies + interval;
			
 
				+
			
 
				+	/*
			
 
				+	 * Check that the new retry threshold is after the current one. If
			
 
				+	 * the retry is in the future, it implies that wake_affine has
			
 
				+	 * temporarily asked NUMA balancing to backoff from placement.
			
 
				+	 */
			
 
				+	if (numa_migrate_retry > p->numa_migrate_retry)
			
 
				+		return;
			
 
				+
			
 
				+	/* Safe to try placing the task on the preferred node */
			
 
				+	p->numa_migrate_retry = numa_migrate_retry;
			
 
				 
			
 
				 	/* Success if task is already running on preferred CPU */
			
 
				 	if (task_node(p) == p->numa_preferred_nid)
			
@@ -2823,7 +2820,7 @@ void reweight_task(struct task_struct *p, int prio)
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				-# ifdef CONFIG_SMP
			
 
				+#ifdef CONFIG_SMP
			
 
				 /*
			
 
				  * All this does is approximate the hierarchical proportion which includes that
			
 
				  * global sum we all love to hate.
			
@@ -2974,7 +2971,7 @@ static long calc_group_runnable(struct cfs_rq *cfs_rq, long shares)
 
				 
			
 
				 	return clamp_t(long, runnable, MIN_SHARES, shares);
			
 
				 }
			
 
				-# endif /* CONFIG_SMP */
			
 
				+#endif /* CONFIG_SMP */
			
 
				 
			
 
				 static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
			
 
				 
			
@@ -3012,11 +3009,11 @@ static inline void update_cfs_group(struct sched_entity *se)
 
				 }
			
 
				 #endif /* CONFIG_FAIR_GROUP_SCHED */
			
 
				 
			
 
				-static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
			
 
				+static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
			
 
				 {
			
 
				 	struct rq *rq = rq_of(cfs_rq);
			
 
				 
			
 
				-	if (&rq->cfs == cfs_rq) {
			
 
				+	if (&rq->cfs == cfs_rq || (flags & SCHED_CPUFREQ_MIGRATION)) {
			
 
				 		/*
			
 
				 		 * There are a few boundary cases this might miss but it should
			
 
				 		 * get called often enough that that should (hopefully) not be
			
@@ -3031,7 +3028,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 
				 		 *
			
 
				 		 * See cpu_util().
			
 
				 		 */
			
 
				-		cpufreq_update_util(rq, 0);
			
 
				+		cpufreq_update_util(rq, flags);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -3245,6 +3242,32 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runna
 
				 	sa->util_avg = sa->util_sum / divider;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * When a task is dequeued, its estimated utilization should not be update if
			
 
				+ * its util_avg has not been updated at least once.
			
 
				+ * This flag is used to synchronize util_avg updates with util_est updates.
			
 
				+ * We map this information into the LSB bit of the utilization saved at
			
 
				+ * dequeue time (i.e. util_est.dequeued).
			
 
				+ */
			
 
				+#define UTIL_AVG_UNCHANGED 0x1
			
 
				+
			
 
				+static inline void cfs_se_util_change(struct sched_avg *avg)
			
 
				+{
			
 
				+	unsigned int enqueued;
			
 
				+
			
 
				+	if (!sched_feat(UTIL_EST))
			
 
				+		return;
			
 
				+
			
 
				+	/* Avoid store if the flag has been already set */
			
 
				+	enqueued = avg->util_est.enqueued;
			
 
				+	if (!(enqueued & UTIL_AVG_UNCHANGED))
			
 
				+		return;
			
 
				+
			
 
				+	/* Reset flag to report util_avg has been updated */
			
 
				+	enqueued &= ~UTIL_AVG_UNCHANGED;
			
 
				+	WRITE_ONCE(avg->util_est.enqueued, enqueued);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * sched_entity:
			
 
				  *
			
@@ -3296,6 +3319,7 @@ __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entit
 
				 				cfs_rq->curr == se)) {
			
 
				 
			
 
				 		___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
			
 
				+		cfs_se_util_change(&se->avg);
			
 
				 		return 1;
			
 
				 	}
			
 
				 
			
@@ -3350,7 +3374,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Called within set_task_rq() right before setting a task's cpu. The
			
 
				+ * Called within set_task_rq() right before setting a task's CPU. The
			
 
				  * caller only guarantees p->pi_lock is held; no other assumptions,
			
 
				  * including the state of rq->lock, should be made.
			
 
				  */
			
@@ -3529,7 +3553,7 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
 
				 
			
 
				 	/*
			
 
				 	 * runnable_sum can't be lower than running_sum
			
 
				-	 * As running sum is scale with cpu capacity wehreas the runnable sum
			
 
				+	 * As running sum is scale with CPU capacity wehreas the runnable sum
			
 
				 	 * is not we rescale running_sum 1st
			
 
				 	 */
			
 
				 	running_sum = se->avg.util_sum /
			
@@ -3689,7 +3713,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 
				 #endif
			
 
				 
			
 
				 	if (decayed)
			
 
				-		cfs_rq_util_change(cfs_rq);
			
 
				+		cfs_rq_util_change(cfs_rq, 0);
			
 
				 
			
 
				 	return decayed;
			
 
				 }
			
@@ -3702,7 +3726,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 
				  * Must call update_cfs_rq_load_avg() before this, since we rely on
			
 
				  * cfs_rq->avg.last_update_time being current.
			
 
				  */
			
 
				-static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
 
				+static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
			
 
				 {
			
 
				 	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
			
 
				 
			
@@ -3738,7 +3762,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 
				 
			
 
				 	add_tg_cfs_propagate(cfs_rq, se->avg.load_sum);
			
 
				 
			
 
				-	cfs_rq_util_change(cfs_rq);
			
 
				+	cfs_rq_util_change(cfs_rq, flags);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -3757,7 +3781,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 
				 
			
 
				 	add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
			
 
				 
			
 
				-	cfs_rq_util_change(cfs_rq);
			
 
				+	cfs_rq_util_change(cfs_rq, 0);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3787,7 +3811,14 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 
				 
			
 
				 	if (!se->avg.last_update_time && (flags & DO_ATTACH)) {
			
 
				 
			
 
				-		attach_entity_load_avg(cfs_rq, se);
			
 
				+		/*
			
 
				+		 * DO_ATTACH means we're here from enqueue_entity().
			
 
				+		 * !last_update_time means we've passed through
			
 
				+		 * migrate_task_rq_fair() indicating we migrated.
			
 
				+		 *
			
 
				+		 * IOW we're enqueueing a task on a new CPU.
			
 
				+		 */
			
 
				+		attach_entity_load_avg(cfs_rq, se, SCHED_CPUFREQ_MIGRATION);
			
 
				 		update_tg_load_avg(cfs_rq, 0);
			
 
				 
			
 
				 	} else if (decayed && (flags & UPDATE_TG))
			
@@ -3869,6 +3900,120 @@ static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq)
 
				 
			
 
				 static int idle_balance(struct rq *this_rq, struct rq_flags *rf);
			
 
				 
			
 
				+static inline unsigned long task_util(struct task_struct *p)
			
 
				+{
			
 
				+	return READ_ONCE(p->se.avg.util_avg);
			
 
				+}
			
 
				+
			
 
				+static inline unsigned long _task_util_est(struct task_struct *p)
			
 
				+{
			
 
				+	struct util_est ue = READ_ONCE(p->se.avg.util_est);
			
 
				+
			
 
				+	return max(ue.ewma, ue.enqueued);
			
 
				+}
			
 
				+
			
 
				+static inline unsigned long task_util_est(struct task_struct *p)
			
 
				+{
			
 
				+	return max(task_util(p), _task_util_est(p));
			
 
				+}
			
 
				+
			
 
				+static inline void util_est_enqueue(struct cfs_rq *cfs_rq,
			
 
				+				    struct task_struct *p)
			
 
				+{
			
 
				+	unsigned int enqueued;
			
 
				+
			
 
				+	if (!sched_feat(UTIL_EST))
			
 
				+		return;
			
 
				+
			
 
				+	/* Update root cfs_rq's estimated utilization */
			
 
				+	enqueued  = cfs_rq->avg.util_est.enqueued;
			
 
				+	enqueued += (_task_util_est(p) | UTIL_AVG_UNCHANGED);
			
 
				+	WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Check if a (signed) value is within a specified (unsigned) margin,
			
 
				+ * based on the observation that:
			
 
				+ *
			
 
				+ *     abs(x) < y := (unsigned)(x + y - 1) < (2 * y - 1)
			
 
				+ *
			
 
				+ * NOTE: this only works when value + maring < INT_MAX.
			
 
				+ */
			
 
				+static inline bool within_margin(int value, int margin)
			
 
				+{
			
 
				+	return ((unsigned int)(value + margin - 1) < (2 * margin - 1));
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
			
 
				+{
			
 
				+	long last_ewma_diff;
			
 
				+	struct util_est ue;
			
 
				+
			
 
				+	if (!sched_feat(UTIL_EST))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * Update root cfs_rq's estimated utilization
			
 
				+	 *
			
 
				+	 * If *p is the last task then the root cfs_rq's estimated utilization
			
 
				+	 * of a CPU is 0 by definition.
			
 
				+	 */
			
 
				+	ue.enqueued = 0;
			
 
				+	if (cfs_rq->nr_running) {
			
 
				+		ue.enqueued  = cfs_rq->avg.util_est.enqueued;
			
 
				+		ue.enqueued -= min_t(unsigned int, ue.enqueued,
			
 
				+				     (_task_util_est(p) | UTIL_AVG_UNCHANGED));
			
 
				+	}
			
 
				+	WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);
			
 
				+
			
 
				+	/*
			
 
				+	 * Skip update of task's estimated utilization when the task has not
			
 
				+	 * yet completed an activation, e.g. being migrated.
			
 
				+	 */
			
 
				+	if (!task_sleep)
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * If the PELT values haven't changed since enqueue time,
			
 
				+	 * skip the util_est update.
			
 
				+	 */
			
 
				+	ue = p->se.avg.util_est;
			
 
				+	if (ue.enqueued & UTIL_AVG_UNCHANGED)
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * Skip update of task's estimated utilization when its EWMA is
			
 
				+	 * already ~1% close to its last activation value.
			
 
				+	 */
			
 
				+	ue.enqueued = (task_util(p) | UTIL_AVG_UNCHANGED);
			
 
				+	last_ewma_diff = ue.enqueued - ue.ewma;
			
 
				+	if (within_margin(last_ewma_diff, (SCHED_CAPACITY_SCALE / 100)))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * Update Task's estimated utilization
			
 
				+	 *
			
 
				+	 * When *p completes an activation we can consolidate another sample
			
 
				+	 * of the task size. This is done by storing the current PELT value
			
 
				+	 * as ue.enqueued and by using this value to update the Exponential
			
 
				+	 * Weighted Moving Average (EWMA):
			
 
				+	 *
			
 
				+	 *  ewma(t) = w *  task_util(p) + (1-w) * ewma(t-1)
			
 
				+	 *          = w *  task_util(p) +         ewma(t-1)  - w * ewma(t-1)
			
 
				+	 *          = w * (task_util(p) -         ewma(t-1)) +     ewma(t-1)
			
 
				+	 *          = w * (      last_ewma_diff            ) +     ewma(t-1)
			
 
				+	 *          = w * (last_ewma_diff  +  ewma(t-1) / w)
			
 
				+	 *
			
 
				+	 * Where 'w' is the weight of new samples, which is configured to be
			
 
				+	 * 0.25, thus making w=1/4 ( >>= UTIL_EST_WEIGHT_SHIFT)
			
 
				+	 */
			
 
				+	ue.ewma <<= UTIL_EST_WEIGHT_SHIFT;
			
 
				+	ue.ewma  += last_ewma_diff;
			
 
				+	ue.ewma >>= UTIL_EST_WEIGHT_SHIFT;
			
 
				+	WRITE_ONCE(p->se.avg.util_est, ue);
			
 
				+}
			
 
				+
			
 
				 #else /* CONFIG_SMP */
			
 
				 
			
 
				 static inline int
			
@@ -3883,13 +4028,13 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 
				 
			
 
				 static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int not_used1)
			
 
				 {
			
 
				-	cfs_rq_util_change(cfs_rq);
			
 
				+	cfs_rq_util_change(cfs_rq, 0);
			
 
				 }
			
 
				 
			
 
				 static inline void remove_entity_load_avg(struct sched_entity *se) {}
			
 
				 
			
 
				 static inline void
			
 
				-attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
			
 
				+attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) {}
			
 
				 static inline void
			
 
				 detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
			
 
				 
			
@@ -3898,6 +4043,13 @@ static inline int idle_balance(struct rq *rq, struct rq_flags *rf)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static inline void
			
 
				+util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {}
			
 
				+
			
 
				+static inline void
			
 
				+util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p,
			
 
				+		 bool task_sleep) {}
			
 
				+
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				 static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
@@ -4676,7 +4828,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 
				 	if (!se)
			
 
				 		add_nr_running(rq, task_delta);
			
 
				 
			
 
				-	/* determine whether we need to wake up potentially idle cpu */
			
 
				+	/* Determine whether we need to wake up potentially idle CPU: */
			
 
				 	if (rq->curr == rq->idle && rq->cfs.nr_running)
			
 
				 		resched_curr(rq);
			
 
				 }
			
@@ -5041,7 +5193,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Both these cpu hotplug callbacks race against unregister_fair_sched_group()
			
 
				+ * Both these CPU hotplug callbacks race against unregister_fair_sched_group()
			
 
				  *
			
 
				  * The race is harmless, since modifying bandwidth settings of unhooked group
			
 
				  * bits doesn't do much.
			
@@ -5086,7 +5238,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 
				 		 */
			
 
				 		cfs_rq->runtime_remaining = 1;
			
 
				 		/*
			
 
				-		 * Offline rq is schedulable till cpu is completely disabled
			
 
				+		 * Offline rq is schedulable till CPU is completely disabled
			
 
				 		 * in take_cpu_down(), so we prevent new cfs throttling here.
			
 
				 		 */
			
 
				 		cfs_rq->runtime_enabled = 0;
			
@@ -5245,6 +5397,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
				 	if (!se)
			
 
				 		add_nr_running(rq, 1);
			
 
				 
			
 
				+	util_est_enqueue(&rq->cfs, p);
			
 
				 	hrtick_update(rq);
			
 
				 }
			
 
				 
			
@@ -5304,6 +5457,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
				 	if (!se)
			
 
				 		sub_nr_running(rq, 1);
			
 
				 
			
 
				+	util_est_dequeue(&rq->cfs, p, task_sleep);
			
 
				 	hrtick_update(rq);
			
 
				 }
			
 
				 
			
@@ -5323,8 +5477,8 @@ DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
 
				  *
			
 
				  *   load' = (1 - 1/2^i) * load + (1/2^i) * cur_load
			
 
				  *
			
 
				- * If a cpu misses updates for n ticks (as it was idle) and update gets
			
 
				- * called on the n+1-th tick when cpu may be busy, then we have:
			
 
				+ * If a CPU misses updates for n ticks (as it was idle) and update gets
			
 
				+ * called on the n+1-th tick when CPU may be busy, then we have:
			
 
				  *
			
 
				  *   load_n   = (1 - 1/2^i)^n * load_0
			
 
				  *   load_n+1 = (1 - 1/2^i)   * load_n + (1/2^i) * cur_load
			
@@ -5379,6 +5533,15 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
 
				 	}
			
 
				 	return load;
			
 
				 }
			
 
				+
			
 
				+static struct {
			
 
				+	cpumask_var_t idle_cpus_mask;
			
 
				+	atomic_t nr_cpus;
			
 
				+	int has_blocked;		/* Idle CPUS has blocked load */
			
 
				+	unsigned long next_balance;     /* in jiffy units */
			
 
				+	unsigned long next_blocked;	/* Next update of blocked load in jiffies */
			
 
				+} nohz ____cacheline_aligned;
			
 
				+
			
 
				 #endif /* CONFIG_NO_HZ_COMMON */
			
 
				 
			
 
				 /**
			
@@ -5468,7 +5631,7 @@ static unsigned long weighted_cpuload(struct rq *rq)
 
				 #ifdef CONFIG_NO_HZ_COMMON
			
 
				 /*
			
 
				  * There is no sane way to deal with nohz on smp when using jiffies because the
			
 
				- * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
			
 
				+ * CPU doing the jiffies update might drift wrt the CPU doing the jiffy reading
			
 
				  * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
			
 
				  *
			
 
				  * Therefore we need to avoid the delta approach from the regular tick when
			
@@ -5579,7 +5742,7 @@ void cpu_load_update_active(struct rq *this_rq)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Return a low guess at the load of a migration-source cpu weighted
			
 
				+ * Return a low guess at the load of a migration-source CPU weighted
			
 
				  * according to the scheduling class and "nice" value.
			
 
				  *
			
 
				  * We want to under-estimate the load of migration sources, to
			
@@ -5597,7 +5760,7 @@ static unsigned long source_load(int cpu, int type)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Return a high guess at the load of a migration-target cpu weighted
			
 
				+ * Return a high guess at the load of a migration-target CPU weighted
			
 
				  * according to the scheduling class and "nice" value.
			
 
				  */
			
 
				 static unsigned long target_load(int cpu, int type)
			
@@ -5724,7 +5887,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
 
				 	unsigned long task_load;
			
 
				 
			
 
				 	this_eff_load = target_load(this_cpu, sd->wake_idx);
			
 
				-	prev_eff_load = source_load(prev_cpu, sd->wake_idx);
			
 
				 
			
 
				 	if (sync) {
			
 
				 		unsigned long current_load = task_h_load(current);
			
@@ -5742,18 +5904,69 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
 
				 		this_eff_load *= 100;
			
 
				 	this_eff_load *= capacity_of(prev_cpu);
			
 
				 
			
 
				+	prev_eff_load = source_load(prev_cpu, sd->wake_idx);
			
 
				 	prev_eff_load -= task_load;
			
 
				 	if (sched_feat(WA_BIAS))
			
 
				 		prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
			
 
				 	prev_eff_load *= capacity_of(this_cpu);
			
 
				 
			
 
				-	return this_eff_load <= prev_eff_load ? this_cpu : nr_cpumask_bits;
			
 
				+	/*
			
 
				+	 * If sync, adjust the weight of prev_eff_load such that if
			
 
				+	 * prev_eff == this_eff that select_idle_sibling() will consider
			
 
				+	 * stacking the wakee on top of the waker if no other CPU is
			
 
				+	 * idle.
			
 
				+	 */
			
 
				+	if (sync)
			
 
				+		prev_eff_load += 1;
			
 
				+
			
 
				+	return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_NUMA_BALANCING
			
 
				+static void
			
 
				+update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
			
 
				+{
			
 
				+	unsigned long interval;
			
 
				+
			
 
				+	if (!static_branch_likely(&sched_numa_balancing))
			
 
				+		return;
			
 
				+
			
 
				+	/* If balancing has no preference then continue gathering data */
			
 
				+	if (p->numa_preferred_nid == -1)
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * If the wakeup is not affecting locality then it is neutral from
			
 
				+	 * the perspective of NUMA balacing so continue gathering data.
			
 
				+	 */
			
 
				+	if (cpu_to_node(prev_cpu) == cpu_to_node(target))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * Temporarily prevent NUMA balancing trying to place waker/wakee after
			
 
				+	 * wakee has been moved by wake_affine. This will potentially allow
			
 
				+	 * related tasks to converge and update their data placement. The
			
 
				+	 * 4 * numa_scan_period is to allow the two-pass filter to migrate
			
 
				+	 * hot data to the wakers node.
			
 
				+	 */
			
 
				+	interval = max(sysctl_numa_balancing_scan_delay,
			
 
				+			 p->numa_scan_period << 2);
			
 
				+	p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
			
 
				+
			
 
				+	interval = max(sysctl_numa_balancing_scan_delay,
			
 
				+			 current->numa_scan_period << 2);
			
 
				+	current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
			
 
				+}
			
 
				+#else
			
 
				+static void
			
 
				+update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
			
 
				+{
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				 static int wake_affine(struct sched_domain *sd, struct task_struct *p,
			
 
				-		       int prev_cpu, int sync)
			
 
				+		       int this_cpu, int prev_cpu, int sync)
			
 
				 {
			
 
				-	int this_cpu = smp_processor_id();
			
 
				 	int target = nr_cpumask_bits;
			
 
				 
			
 
				 	if (sched_feat(WA_IDLE))
			
@@ -5766,12 +5979,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
 
				 	if (target == nr_cpumask_bits)
			
 
				 		return prev_cpu;
			
 
				 
			
 
				+	update_wa_numa_placement(p, prev_cpu, target);
			
 
				 	schedstat_inc(sd->ttwu_move_affine);
			
 
				 	schedstat_inc(p->se.statistics.nr_wakeups_affine);
			
 
				 	return target;
			
 
				 }
			
 
				 
			
 
				-static inline unsigned long task_util(struct task_struct *p);
			
 
				 static unsigned long cpu_util_wake(int cpu, struct task_struct *p);
			
 
				 
			
 
				 static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
			
@@ -5826,7 +6039,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 
				 		max_spare_cap = 0;
			
 
				 
			
 
				 		for_each_cpu(i, sched_group_span(group)) {
			
 
				-			/* Bias balancing toward cpus of our domain */
			
 
				+			/* Bias balancing toward CPUs of our domain */
			
 
				 			if (local_group)
			
 
				 				load = source_load(i, load_idx);
			
 
				 			else
			
@@ -5856,7 +6069,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 
				 			if (min_runnable_load > (runnable_load + imbalance)) {
			
 
				 				/*
			
 
				 				 * The runnable load is significantly smaller
			
 
				-				 * so we can pick this new cpu
			
 
				+				 * so we can pick this new CPU:
			
 
				 				 */
			
 
				 				min_runnable_load = runnable_load;
			
 
				 				min_avg_load = avg_load;
			
@@ -5865,7 +6078,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 
				 				   (100*min_avg_load > imbalance_scale*avg_load)) {
			
 
				 				/*
			
 
				 				 * The runnable loads are close so take the
			
 
				-				 * blocked load into account through avg_load.
			
 
				+				 * blocked load into account through avg_load:
			
 
				 				 */
			
 
				 				min_avg_load = avg_load;
			
 
				 				idlest = group;
			
@@ -5903,6 +6116,18 @@ skip_spare:
 
				 	if (!idlest)
			
 
				 		return NULL;
			
 
				 
			
 
				+	/*
			
 
				+	 * When comparing groups across NUMA domains, it's possible for the
			
 
				+	 * local domain to be very lightly loaded relative to the remote
			
 
				+	 * domains but "imbalance" skews the comparison making remote CPUs
			
 
				+	 * look much more favourable. When considering cross-domain, add
			
 
				+	 * imbalance to the runnable load on the remote node and consider
			
 
				+	 * staying local.
			
 
				+	 */
			
 
				+	if ((sd->flags & SD_NUMA) &&
			
 
				+	    min_runnable_load + imbalance >= this_runnable_load)
			
 
				+		return NULL;
			
 
				+
			
 
				 	if (min_runnable_load > (this_runnable_load + imbalance))
			
 
				 		return NULL;
			
 
				 
			
@@ -5914,7 +6139,7 @@ skip_spare:
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * find_idlest_group_cpu - find the idlest cpu among the cpus in group.
			
 
				+ * find_idlest_group_cpu - find the idlest CPU among the CPUs in the group.
			
 
				  */
			
 
				 static int
			
 
				 find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
			
@@ -5992,12 +6217,12 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 
				 
			
 
				 		new_cpu = find_idlest_group_cpu(group, p, cpu);
			
 
				 		if (new_cpu == cpu) {
			
 
				-			/* Now try balancing at a lower domain level of cpu */
			
 
				+			/* Now try balancing at a lower domain level of 'cpu': */
			
 
				 			sd = sd->child;
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		/* Now try balancing at a lower domain level of new_cpu */
			
 
				+		/* Now try balancing at a lower domain level of 'new_cpu': */
			
 
				 		cpu = new_cpu;
			
 
				 		weight = sd->span_weight;
			
 
				 		sd = NULL;
			
@@ -6007,7 +6232,6 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 
				 			if (tmp->flags & sd_flag)
			
 
				 				sd = tmp;
			
 
				 		}
			
 
				-		/* while loop will break here if sd == NULL */
			
 
				 	}
			
 
				 
			
 
				 	return new_cpu;
			
@@ -6203,12 +6427,12 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 
				 		return target;
			
 
				 
			
 
				 	/*
			
 
				-	 * If the previous cpu is cache affine and idle, don't be stupid.
			
 
				+	 * If the previous CPU is cache affine and idle, don't be stupid:
			
 
				 	 */
			
 
				 	if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
			
 
				 		return prev;
			
 
				 
			
 
				-	/* Check a recently used CPU as a potential idle candidate */
			
 
				+	/* Check a recently used CPU as a potential idle candidate: */
			
 
				 	recent_used_cpu = p->recent_used_cpu;
			
 
				 	if (recent_used_cpu != prev &&
			
 
				 	    recent_used_cpu != target &&
			
@@ -6217,7 +6441,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 
				 	    cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
			
 
				 		/*
			
 
				 		 * Replace recent_used_cpu with prev as it is a potential
			
 
				-		 * candidate for the next wake.
			
 
				+		 * candidate for the next wake:
			
 
				 		 */
			
 
				 		p->recent_used_cpu = prev;
			
 
				 		return recent_used_cpu;
			
@@ -6242,11 +6466,13 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 
				 	return target;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * cpu_util returns the amount of capacity of a CPU that is used by CFS
			
 
				- * tasks. The unit of the return value must be the one of capacity so we can
			
 
				- * compare the utilization with the capacity of the CPU that is available for
			
 
				- * CFS task (ie cpu_capacity).
			
 
				+/**
			
 
				+ * Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
			
 
				+ * @cpu: the CPU to get the utilization of
			
 
				+ *
			
 
				+ * The unit of the return value must be the one of capacity so we can compare
			
 
				+ * the utilization with the capacity of the CPU that is available for CFS task
			
 
				+ * (ie cpu_capacity).
			
 
				  *
			
 
				  * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
			
 
				  * recent utilization of currently non-runnable tasks on a CPU. It represents
			
@@ -6257,6 +6483,14 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 
				  * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
			
 
				  * the running time on this CPU scaled by capacity_curr.
			
 
				  *
			
 
				+ * The estimated utilization of a CPU is defined to be the maximum between its
			
 
				+ * cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks
			
 
				+ * currently RUNNABLE on that CPU.
			
 
				+ * This allows to properly represent the expected utilization of a CPU which
			
 
				+ * has just got a big task running since a long sleep period. At the same time
			
 
				+ * however it preserves the benefits of the "blocked utilization" in
			
 
				+ * describing the potential for other tasks waking up on the same CPU.
			
 
				+ *
			
 
				  * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
			
 
				  * higher than capacity_orig because of unfortunate rounding in
			
 
				  * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
			
@@ -6267,36 +6501,77 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 
				  * available capacity. We allow utilization to overshoot capacity_curr (but not
			
 
				  * capacity_orig) as it useful for predicting the capacity required after task
			
 
				  * migrations (scheduler-driven DVFS).
			
 
				+ *
			
 
				+ * Return: the (estimated) utilization for the specified CPU
			
 
				  */
			
 
				-static unsigned long cpu_util(int cpu)
			
 
				+static inline unsigned long cpu_util(int cpu)
			
 
				 {
			
 
				-	unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
			
 
				-	unsigned long capacity = capacity_orig_of(cpu);
			
 
				+	struct cfs_rq *cfs_rq;
			
 
				+	unsigned int util;
			
 
				 
			
 
				-	return (util >= capacity) ? capacity : util;
			
 
				-}
			
 
				+	cfs_rq = &cpu_rq(cpu)->cfs;
			
 
				+	util = READ_ONCE(cfs_rq->avg.util_avg);
			
 
				 
			
 
				-static inline unsigned long task_util(struct task_struct *p)
			
 
				-{
			
 
				-	return p->se.avg.util_avg;
			
 
				+	if (sched_feat(UTIL_EST))
			
 
				+		util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
			
 
				+
			
 
				+	return min_t(unsigned long, util, capacity_orig_of(cpu));
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * cpu_util_wake: Compute cpu utilization with any contributions from
			
 
				+ * cpu_util_wake: Compute CPU utilization with any contributions from
			
 
				  * the waking task p removed.
			
 
				  */
			
 
				 static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
			
 
				 {
			
 
				-	unsigned long util, capacity;
			
 
				+	struct cfs_rq *cfs_rq;
			
 
				+	unsigned int util;
			
 
				 
			
 
				 	/* Task has no contribution or is new */
			
 
				-	if (cpu != task_cpu(p) || !p->se.avg.last_update_time)
			
 
				+	if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
			
 
				 		return cpu_util(cpu);
			
 
				 
			
 
				-	capacity = capacity_orig_of(cpu);
			
 
				-	util = max_t(long, cpu_rq(cpu)->cfs.avg.util_avg - task_util(p), 0);
			
 
				+	cfs_rq = &cpu_rq(cpu)->cfs;
			
 
				+	util = READ_ONCE(cfs_rq->avg.util_avg);
			
 
				+
			
 
				+	/* Discount task's blocked util from CPU's util */
			
 
				+	util -= min_t(unsigned int, util, task_util(p));
			
 
				+
			
 
				+	/*
			
 
				+	 * Covered cases:
			
 
				+	 *
			
 
				+	 * a) if *p is the only task sleeping on this CPU, then:
			
 
				+	 *      cpu_util (== task_util) > util_est (== 0)
			
 
				+	 *    and thus we return:
			
 
				+	 *      cpu_util_wake = (cpu_util - task_util) = 0
			
 
				+	 *
			
 
				+	 * b) if other tasks are SLEEPING on this CPU, which is now exiting
			
 
				+	 *    IDLE, then:
			
 
				+	 *      cpu_util >= task_util
			
 
				+	 *      cpu_util > util_est (== 0)
			
 
				+	 *    and thus we discount *p's blocked utilization to return:
			
 
				+	 *      cpu_util_wake = (cpu_util - task_util) >= 0
			
 
				+	 *
			
 
				+	 * c) if other tasks are RUNNABLE on that CPU and
			
 
				+	 *      util_est > cpu_util
			
 
				+	 *    then we use util_est since it returns a more restrictive
			
 
				+	 *    estimation of the spare capacity on that CPU, by just
			
 
				+	 *    considering the expected utilization of tasks already
			
 
				+	 *    runnable on that CPU.
			
 
				+	 *
			
 
				+	 * Cases a) and b) are covered by the above code, while case c) is
			
 
				+	 * covered by the following code when estimated utilization is
			
 
				+	 * enabled.
			
 
				+	 */
			
 
				+	if (sched_feat(UTIL_EST))
			
 
				+		util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
			
 
				 
			
 
				-	return (util >= capacity) ? capacity : util;
			
 
				+	/*
			
 
				+	 * Utilization (estimated) can exceed the CPU capacity, thus let's
			
 
				+	 * clamp to the maximum CPU capacity to ensure consistency with
			
 
				+	 * the cpu_util call.
			
 
				+	 */
			
 
				+	return min_t(unsigned long, util, capacity_orig_of(cpu));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -6328,10 +6603,10 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
 
				  * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
			
 
				  * SD_BALANCE_FORK, or SD_BALANCE_EXEC.
			
 
				  *
			
 
				- * Balances load by selecting the idlest cpu in the idlest group, or under
			
 
				- * certain conditions an idle sibling cpu if the domain has SD_WAKE_AFFINE set.
			
 
				+ * Balances load by selecting the idlest CPU in the idlest group, or under
			
 
				+ * certain conditions an idle sibling CPU if the domain has SD_WAKE_AFFINE set.
			
 
				  *
			
 
				- * Returns the target cpu number.
			
 
				+ * Returns the target CPU number.
			
 
				  *
			
 
				  * preempt must be disabled.
			
 
				  */
			
@@ -6342,7 +6617,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 
				 	int cpu = smp_processor_id();
			
 
				 	int new_cpu = prev_cpu;
			
 
				 	int want_affine = 0;
			
 
				-	int sync = wake_flags & WF_SYNC;
			
 
				+	int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
			
 
				 
			
 
				 	if (sd_flag & SD_BALANCE_WAKE) {
			
 
				 		record_wakee(p);
			
@@ -6356,7 +6631,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 
				 			break;
			
 
				 
			
 
				 		/*
			
 
				-		 * If both cpu and prev_cpu are part of this domain,
			
 
				+		 * If both 'cpu' and 'prev_cpu' are part of this domain,
			
 
				 		 * cpu is a valid SD_WAKE_AFFINE target.
			
 
				 		 */
			
 
				 		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
			
@@ -6376,7 +6651,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 
				 		if (cpu == prev_cpu)
			
 
				 			goto pick_cpu;
			
 
				 
			
 
				-		new_cpu = wake_affine(affine_sd, p, prev_cpu, sync);
			
 
				+		new_cpu = wake_affine(affine_sd, p, cpu, prev_cpu, sync);
			
 
				 	}
			
 
				 
			
 
				 	if (sd && !(sd_flag & SD_BALANCE_FORK)) {
			
@@ -6407,9 +6682,9 @@ pick_cpu:
 
				 static void detach_entity_cfs_rq(struct sched_entity *se);
			
 
				 
			
 
				 /*
			
 
				- * Called immediately before a task is migrated to a new cpu; task_cpu(p) and
			
 
				+ * Called immediately before a task is migrated to a new CPU; task_cpu(p) and
			
 
				  * cfs_rq_of(p) references at time of call are still valid and identify the
			
 
				- * previous cpu. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
			
 
				+ * previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
			
 
				  */
			
 
				 static void migrate_task_rq_fair(struct task_struct *p)
			
 
				 {
			
@@ -6738,7 +7013,7 @@ simple:
 
				 
			
 
				 	p = task_of(se);
			
 
				 
			
 
				-done: __maybe_unused
			
 
				+done: __maybe_unused;
			
 
				 #ifdef CONFIG_SMP
			
 
				 	/*
			
 
				 	 * Move the next running task to the front of
			
@@ -6843,17 +7118,17 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
 
				  * BASICS
			
 
				  *
			
 
				  * The purpose of load-balancing is to achieve the same basic fairness the
			
 
				- * per-cpu scheduler provides, namely provide a proportional amount of compute
			
 
				+ * per-CPU scheduler provides, namely provide a proportional amount of compute
			
 
				  * time to each task. This is expressed in the following equation:
			
 
				  *
			
 
				  *   W_i,n/P_i == W_j,n/P_j for all i,j                               (1)
			
 
				  *
			
 
				- * Where W_i,n is the n-th weight average for cpu i. The instantaneous weight
			
 
				+ * Where W_i,n is the n-th weight average for CPU i. The instantaneous weight
			
 
				  * W_i,0 is defined as:
			
 
				  *
			
 
				  *   W_i,0 = \Sum_j w_i,j                                             (2)
			
 
				  *
			
 
				- * Where w_i,j is the weight of the j-th runnable task on cpu i. This weight
			
 
				+ * Where w_i,j is the weight of the j-th runnable task on CPU i. This weight
			
 
				  * is derived from the nice value as per sched_prio_to_weight[].
			
 
				  *
			
 
				  * The weight average is an exponential decay average of the instantaneous
			
@@ -6861,7 +7136,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
 
				  *
			
 
				  *   W'_i,n = (2^n - 1) / 2^n * W_i,n + 1 / 2^n * W_i,0               (3)
			
 
				  *
			
 
				- * C_i is the compute capacity of cpu i, typically it is the
			
 
				+ * C_i is the compute capacity of CPU i, typically it is the
			
 
				  * fraction of 'recent' time available for SCHED_OTHER task execution. But it
			
 
				  * can also include other factors [XXX].
			
 
				  *
			
@@ -6882,11 +7157,11 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
 
				  * SCHED DOMAINS
			
 
				  *
			
 
				  * In order to solve the imbalance equation (4), and avoid the obvious O(n^2)
			
 
				- * for all i,j solution, we create a tree of cpus that follows the hardware
			
 
				+ * for all i,j solution, we create a tree of CPUs that follows the hardware
			
 
				  * topology where each level pairs two lower groups (or better). This results
			
 
				- * in O(log n) layers. Furthermore we reduce the number of cpus going up the
			
 
				+ * in O(log n) layers. Furthermore we reduce the number of CPUs going up the
			
 
				  * tree to only the first of the previous level and we decrease the frequency
			
 
				- * of load-balance at each level inv. proportional to the number of cpus in
			
 
				+ * of load-balance at each level inv. proportional to the number of CPUs in
			
 
				  * the groups.
			
 
				  *
			
 
				  * This yields:
			
@@ -6895,7 +7170,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
 
				  *   \Sum       { --- * --- * 2^i } = O(n)                            (5)
			
 
				  *     i = 0      2^i   2^i
			
 
				  *                               `- size of each group
			
 
				- *         |         |     `- number of cpus doing load-balance
			
 
				+ *         |         |     `- number of CPUs doing load-balance
			
 
				  *         |         `- freq
			
 
				  *         `- sum over all levels
			
 
				  *
			
@@ -6903,7 +7178,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
 
				  * this makes (5) the runtime complexity of the balancer.
			
 
				  *
			
 
				  * An important property here is that each CPU is still (indirectly) connected
			
 
				- * to every other cpu in at most O(log n) steps:
			
 
				+ * to every other CPU in at most O(log n) steps:
			
 
				  *
			
 
				  * The adjacency matrix of the resulting graph is given by:
			
 
				  *
			
@@ -6915,7 +7190,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
 
				  *
			
 
				  *   A^(log_2 n)_i,j != 0  for all i,j                                (7)
			
 
				  *
			
 
				- * Showing there's indeed a path between every cpu in at most O(log n) steps.
			
 
				+ * Showing there's indeed a path between every CPU in at most O(log n) steps.
			
 
				  * The task movement gives a factor of O(m), giving a convergence complexity
			
 
				  * of:
			
 
				  *
			
@@ -6925,7 +7200,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
 
				  * WORK CONSERVING
			
 
				  *
			
 
				  * In order to avoid CPUs going idle while there's still work to do, new idle
			
 
				- * balancing is more aggressive and has the newly idle cpu iterate up the domain
			
 
				+ * balancing is more aggressive and has the newly idle CPU iterate up the domain
			
 
				  * tree itself instead of relying on other CPUs to bring it work.
			
 
				  *
			
 
				  * This adds some complexity to both (5) and (8) but it reduces the total idle
			
@@ -6946,7 +7221,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
 
				  *
			
 
				  *   s_k,i = \Sum_j w_i,j,k  and  S_k = \Sum_i s_k,i                 (10)
			
 
				  *
			
 
				- * w_i,j,k is the weight of the j-th runnable task in the k-th cgroup on cpu i.
			
 
				+ * w_i,j,k is the weight of the j-th runnable task in the k-th cgroup on CPU i.
			
 
				  *
			
 
				  * The big problem is S_k, its a global sum needed to compute a local (W_i)
			
 
				  * property.
			
@@ -6963,6 +7238,8 @@ enum fbq_type { regular, remote, all };
 
				 #define LBF_NEED_BREAK	0x02
			
 
				 #define LBF_DST_PINNED  0x04
			
 
				 #define LBF_SOME_PINNED	0x08
			
 
				+#define LBF_NOHZ_STATS	0x10
			
 
				+#define LBF_NOHZ_AGAIN	0x20
			
 
				 
			
 
				 struct lb_env {
			
 
				 	struct sched_domain	*sd;
			
@@ -7110,7 +7387,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 
				 		env->flags |= LBF_SOME_PINNED;
			
 
				 
			
 
				 		/*
			
 
				-		 * Remember if this task can be migrated to any other cpu in
			
 
				+		 * Remember if this task can be migrated to any other CPU in
			
 
				 		 * our sched_group. We may want to revisit it if we couldn't
			
 
				 		 * meet load balance goals by pulling other tasks on src_cpu.
			
 
				 		 *
			
@@ -7120,7 +7397,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 
				 		if (env->idle == CPU_NEWLY_IDLE || (env->flags & LBF_DST_PINNED))
			
 
				 			return 0;
			
 
				 
			
 
				-		/* Prevent to re-select dst_cpu via env's cpus */
			
 
				+		/* Prevent to re-select dst_cpu via env's CPUs: */
			
 
				 		for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
			
 
				 			if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
			
 
				 				env->flags |= LBF_DST_PINNED;
			
@@ -7347,6 +7624,17 @@ static void attach_tasks(struct lb_env *env)
 
				 	rq_unlock(env->dst_rq, &rf);
			
 
				 }
			
 
				 
			
 
				+static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq)
			
 
				+{
			
 
				+	if (cfs_rq->avg.load_avg)
			
 
				+		return true;
			
 
				+
			
 
				+	if (cfs_rq->avg.util_avg)
			
 
				+		return true;
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 
			
 
				 static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
			
@@ -7371,6 +7659,7 @@ static void update_blocked_averages(int cpu)
 
				 	struct rq *rq = cpu_rq(cpu);
			
 
				 	struct cfs_rq *cfs_rq, *pos;
			
 
				 	struct rq_flags rf;
			
 
				+	bool done = true;
			
 
				 
			
 
				 	rq_lock_irqsave(rq, &rf);
			
 
				 	update_rq_clock(rq);
			
@@ -7400,7 +7689,17 @@ static void update_blocked_averages(int cpu)
 
				 		 */
			
 
				 		if (cfs_rq_is_decayed(cfs_rq))
			
 
				 			list_del_leaf_cfs_rq(cfs_rq);
			
 
				+
			
 
				+		/* Don't need periodic decay once load/util_avg are null */
			
 
				+		if (cfs_rq_has_blocked(cfs_rq))
			
 
				+			done = false;
			
 
				 	}
			
 
				+
			
 
				+#ifdef CONFIG_NO_HZ_COMMON
			
 
				+	rq->last_blocked_load_update_tick = jiffies;
			
 
				+	if (done)
			
 
				+		rq->has_blocked_load = 0;
			
 
				+#endif
			
 
				 	rq_unlock_irqrestore(rq, &rf);
			
 
				 }
			
 
				 
			
@@ -7460,6 +7759,11 @@ static inline void update_blocked_averages(int cpu)
 
				 	rq_lock_irqsave(rq, &rf);
			
 
				 	update_rq_clock(rq);
			
 
				 	update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
			
 
				+#ifdef CONFIG_NO_HZ_COMMON
			
 
				+	rq->last_blocked_load_update_tick = jiffies;
			
 
				+	if (!cfs_rq_has_blocked(cfs_rq))
			
 
				+		rq->has_blocked_load = 0;
			
 
				+#endif
			
 
				 	rq_unlock_irqrestore(rq, &rf);
			
 
				 }
			
 
				 
			
@@ -7694,8 +7998,8 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
 
				  * Group imbalance indicates (and tries to solve) the problem where balancing
			
 
				  * groups is inadequate due to ->cpus_allowed constraints.
			
 
				  *
			
 
				- * Imagine a situation of two groups of 4 cpus each and 4 tasks each with a
			
 
				- * cpumask covering 1 cpu of the first group and 3 cpus of the second group.
			
 
				+ * Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a
			
 
				+ * cpumask covering 1 CPU of the first group and 3 CPUs of the second group.
			
 
				  * Something like:
			
 
				  *
			
 
				  *	{ 0 1 2 3 } { 4 5 6 7 }
			
@@ -7703,7 +8007,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
 
				  *
			
 
				  * If we were to balance group-wise we'd place two tasks in the first group and
			
 
				  * two tasks in the second group. Clearly this is undesired as it will overload
			
 
				- * cpu 3 and leave one of the cpus in the second group unused.
			
 
				+ * cpu 3 and leave one of the CPUs in the second group unused.
			
 
				  *
			
 
				  * The current solution to this issue is detecting the skew in the first group
			
 
				  * by noticing the lower domain failed to reach balance and had difficulty
			
@@ -7794,6 +8098,28 @@ group_type group_classify(struct sched_group *group,
 
				 	return group_other;
			
 
				 }
			
 
				 
			
 
				+static bool update_nohz_stats(struct rq *rq, bool force)
			
 
				+{
			
 
				+#ifdef CONFIG_NO_HZ_COMMON
			
 
				+	unsigned int cpu = rq->cpu;
			
 
				+
			
 
				+	if (!rq->has_blocked_load)
			
 
				+		return false;
			
 
				+
			
 
				+	if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
			
 
				+		return false;
			
 
				+
			
 
				+	if (!force && !time_after(jiffies, rq->last_blocked_load_update_tick))
			
 
				+		return true;
			
 
				+
			
 
				+	update_blocked_averages(cpu);
			
 
				+
			
 
				+	return rq->has_blocked_load;
			
 
				+#else
			
 
				+	return false;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * update_sg_lb_stats - Update sched_group's statistics for load balancing.
			
 
				  * @env: The load balancing environment.
			
@@ -7816,7 +8142,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
				 	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
			
 
				 		struct rq *rq = cpu_rq(i);
			
 
				 
			
 
				-		/* Bias balancing toward cpus of our domain */
			
 
				+		if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false))
			
 
				+			env->flags |= LBF_NOHZ_AGAIN;
			
 
				+
			
 
				+		/* Bias balancing toward CPUs of our domain: */
			
 
				 		if (local_group)
			
 
				 			load = target_load(i, load_idx);
			
 
				 		else
			
@@ -7902,7 +8231,7 @@ asym_packing:
 
				 	if (!(env->sd->flags & SD_ASYM_PACKING))
			
 
				 		return true;
			
 
				 
			
 
				-	/* No ASYM_PACKING if target cpu is already busy */
			
 
				+	/* No ASYM_PACKING if target CPU is already busy */
			
 
				 	if (env->idle == CPU_NOT_IDLE)
			
 
				 		return true;
			
 
				 	/*
			
@@ -7915,7 +8244,7 @@ asym_packing:
 
				 		if (!sds->busiest)
			
 
				 			return true;
			
 
				 
			
 
				-		/* Prefer to move from lowest priority cpu's work */
			
 
				+		/* Prefer to move from lowest priority CPU's work */
			
 
				 		if (sched_asym_prefer(sds->busiest->asym_prefer_cpu,
			
 
				 				      sg->asym_prefer_cpu))
			
 
				 			return true;
			
@@ -7971,6 +8300,11 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 
				 	if (child && child->flags & SD_PREFER_SIBLING)
			
 
				 		prefer_sibling = 1;
			
 
				 
			
 
				+#ifdef CONFIG_NO_HZ_COMMON
			
 
				+	if (env->idle == CPU_NEWLY_IDLE && READ_ONCE(nohz.has_blocked))
			
 
				+		env->flags |= LBF_NOHZ_STATS;
			
 
				+#endif
			
 
				+
			
 
				 	load_idx = get_sd_load_idx(env->sd, env->idle);
			
 
				 
			
 
				 	do {
			
@@ -8024,6 +8358,15 @@ next_group:
 
				 		sg = sg->next;
			
 
				 	} while (sg != env->sd->groups);
			
 
				 
			
 
				+#ifdef CONFIG_NO_HZ_COMMON
			
 
				+	if ((env->flags & LBF_NOHZ_AGAIN) &&
			
 
				+	    cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd))) {
			
 
				+
			
 
				+		WRITE_ONCE(nohz.next_blocked,
			
 
				+			   jiffies + msecs_to_jiffies(LOAD_AVG_PERIOD));
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				 	if (env->sd->flags & SD_NUMA)
			
 
				 		env->fbq_type = fbq_classify_group(&sds->busiest_stat);
			
 
				 
			
@@ -8168,7 +8511,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 
				 	if (busiest->group_type == group_imbalanced) {
			
 
				 		/*
			
 
				 		 * In the group_imb case we cannot rely on group-wide averages
			
 
				-		 * to ensure cpu-load equilibrium, look at wider averages. XXX
			
 
				+		 * to ensure CPU-load equilibrium, look at wider averages. XXX
			
 
				 		 */
			
 
				 		busiest->load_per_task =
			
 
				 			min(busiest->load_per_task, sds->avg_load);
			
@@ -8187,7 +8530,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * If there aren't any idle cpus, avoid creating some.
			
 
				+	 * If there aren't any idle CPUs, avoid creating some.
			
 
				 	 */
			
 
				 	if (busiest->group_type == group_overloaded &&
			
 
				 	    local->group_type   == group_overloaded) {
			
@@ -8201,9 +8544,9 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * We're trying to get all the cpus to the average_load, so we don't
			
 
				+	 * We're trying to get all the CPUs to the average_load, so we don't
			
 
				 	 * want to push ourselves above the average load, nor do we wish to
			
 
				-	 * reduce the max loaded cpu below the average load. At the same time,
			
 
				+	 * reduce the max loaded CPU below the average load. At the same time,
			
 
				 	 * we also don't want to reduce the group load below the group
			
 
				 	 * capacity. Thus we look for the minimum possible imbalance.
			
 
				 	 */
			
@@ -8297,9 +8640,9 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 
				 
			
 
				 	if (env->idle == CPU_IDLE) {
			
 
				 		/*
			
 
				-		 * This cpu is idle. If the busiest group is not overloaded
			
 
				+		 * This CPU is idle. If the busiest group is not overloaded
			
 
				 		 * and there is no imbalance between this and busiest group
			
 
				-		 * wrt idle cpus, it is balanced. The imbalance becomes
			
 
				+		 * wrt idle CPUs, it is balanced. The imbalance becomes
			
 
				 		 * significant if the diff is greater than 1 otherwise we
			
 
				 		 * might end up to just move the imbalance on another group
			
 
				 		 */
			
@@ -8327,7 +8670,7 @@ out_balanced:
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * find_busiest_queue - find the busiest runqueue among the cpus in group.
			
 
				+ * find_busiest_queue - find the busiest runqueue among the CPUs in the group.
			
 
				  */
			
 
				 static struct rq *find_busiest_queue(struct lb_env *env,
			
 
				 				     struct sched_group *group)
			
@@ -8371,7 +8714,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 
				 
			
 
				 		/*
			
 
				 		 * When comparing with imbalance, use weighted_cpuload()
			
 
				-		 * which is not scaled with the cpu capacity.
			
 
				+		 * which is not scaled with the CPU capacity.
			
 
				 		 */
			
 
				 
			
 
				 		if (rq->nr_running == 1 && wl > env->imbalance &&
			
@@ -8379,9 +8722,9 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 
				 			continue;
			
 
				 
			
 
				 		/*
			
 
				-		 * For the load comparisons with the other cpu's, consider
			
 
				-		 * the weighted_cpuload() scaled with the cpu capacity, so
			
 
				-		 * that the load can be moved away from the cpu that is
			
 
				+		 * For the load comparisons with the other CPU's, consider
			
 
				+		 * the weighted_cpuload() scaled with the CPU capacity, so
			
 
				+		 * that the load can be moved away from the CPU that is
			
 
				 		 * potentially running at a lower capacity.
			
 
				 		 *
			
 
				 		 * Thus we're looking for max(wl_i / capacity_i), crosswise
			
@@ -8452,13 +8795,13 @@ static int should_we_balance(struct lb_env *env)
 
				 		return 0;
			
 
				 
			
 
				 	/*
			
 
				-	 * In the newly idle case, we will allow all the cpu's
			
 
				+	 * In the newly idle case, we will allow all the CPUs
			
 
				 	 * to do the newly idle load balance.
			
 
				 	 */
			
 
				 	if (env->idle == CPU_NEWLY_IDLE)
			
 
				 		return 1;
			
 
				 
			
 
				-	/* Try to find first idle cpu */
			
 
				+	/* Try to find first idle CPU */
			
 
				 	for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
			
 
				 		if (!idle_cpu(cpu))
			
 
				 			continue;
			
@@ -8471,7 +8814,7 @@ static int should_we_balance(struct lb_env *env)
 
				 		balance_cpu = group_balance_cpu(sg);
			
 
				 
			
 
				 	/*
			
 
				-	 * First idle cpu or the first cpu(busiest) in this sched group
			
 
				+	 * First idle CPU or the first CPU(busiest) in this sched group
			
 
				 	 * is eligible for doing load balancing at this and above domains.
			
 
				 	 */
			
 
				 	return balance_cpu == env->dst_cpu;
			
@@ -8580,7 +8923,7 @@ more_balance:
 
				 		 * Revisit (affine) tasks on src_cpu that couldn't be moved to
			
 
				 		 * us and move them to an alternate dst_cpu in our sched_group
			
 
				 		 * where they can run. The upper limit on how many times we
			
 
				-		 * iterate on same src_cpu is dependent on number of cpus in our
			
 
				+		 * iterate on same src_cpu is dependent on number of CPUs in our
			
 
				 		 * sched_group.
			
 
				 		 *
			
 
				 		 * This changes load balance semantics a bit on who can move
			
@@ -8597,7 +8940,7 @@ more_balance:
 
				 		 */
			
 
				 		if ((env.flags & LBF_DST_PINNED) && env.imbalance > 0) {
			
 
				 
			
 
				-			/* Prevent to re-select dst_cpu via env's cpus */
			
 
				+			/* Prevent to re-select dst_cpu via env's CPUs */
			
 
				 			cpumask_clear_cpu(env.dst_cpu, env.cpus);
			
 
				 
			
 
				 			env.dst_rq	 = cpu_rq(env.new_dst_cpu);
			
@@ -8659,9 +9002,10 @@ more_balance:
 
				 
			
 
				 			raw_spin_lock_irqsave(&busiest->lock, flags);
			
 
				 
			
 
				-			/* don't kick the active_load_balance_cpu_stop,
			
 
				-			 * if the curr task on busiest cpu can't be
			
 
				-			 * moved to this_cpu
			
 
				+			/*
			
 
				+			 * Don't kick the active_load_balance_cpu_stop,
			
 
				+			 * if the curr task on busiest CPU can't be
			
 
				+			 * moved to this_cpu:
			
 
				 			 */
			
 
				 			if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) {
			
 
				 				raw_spin_unlock_irqrestore(&busiest->lock,
			
@@ -8773,167 +9117,53 @@ update_next_balance(struct sched_domain *sd, unsigned long *next_balance)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * idle_balance is called by schedule() if this_cpu is about to become
			
 
				- * idle. Attempts to pull tasks from other CPUs.
			
 
				+ * active_load_balance_cpu_stop is run by the CPU stopper. It pushes
			
 
				+ * running tasks off the busiest CPU onto idle CPUs. It requires at
			
 
				+ * least 1 task to be running on each physical CPU where possible, and
			
 
				+ * avoids physical / logical imbalances.
			
 
				  */
			
 
				-static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
			
 
				+static int active_load_balance_cpu_stop(void *data)
			
 
				 {
			
 
				-	unsigned long next_balance = jiffies + HZ;
			
 
				-	int this_cpu = this_rq->cpu;
			
 
				+	struct rq *busiest_rq = data;
			
 
				+	int busiest_cpu = cpu_of(busiest_rq);
			
 
				+	int target_cpu = busiest_rq->push_cpu;
			
 
				+	struct rq *target_rq = cpu_rq(target_cpu);
			
 
				 	struct sched_domain *sd;
			
 
				-	int pulled_task = 0;
			
 
				-	u64 curr_cost = 0;
			
 
				+	struct task_struct *p = NULL;
			
 
				+	struct rq_flags rf;
			
 
				 
			
 
				+	rq_lock_irq(busiest_rq, &rf);
			
 
				 	/*
			
 
				-	 * We must set idle_stamp _before_ calling idle_balance(), such that we
			
 
				-	 * measure the duration of idle_balance() as idle time.
			
 
				+	 * Between queueing the stop-work and running it is a hole in which
			
 
				+	 * CPUs can become inactive. We should not move tasks from or to
			
 
				+	 * inactive CPUs.
			
 
				 	 */
			
 
				-	this_rq->idle_stamp = rq_clock(this_rq);
			
 
				+	if (!cpu_active(busiest_cpu) || !cpu_active(target_cpu))
			
 
				+		goto out_unlock;
			
 
				 
			
 
				-	/*
			
 
				-	 * Do not pull tasks towards !active CPUs...
			
 
				-	 */
			
 
				-	if (!cpu_active(this_cpu))
			
 
				-		return 0;
			
 
				+	/* Make sure the requested CPU hasn't gone down in the meantime: */
			
 
				+	if (unlikely(busiest_cpu != smp_processor_id() ||
			
 
				+		     !busiest_rq->active_balance))
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	/* Is there any task to move? */
			
 
				+	if (busiest_rq->nr_running <= 1)
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	/*
			
 
				-	 * This is OK, because current is on_cpu, which avoids it being picked
			
 
				-	 * for load-balance and preemption/IRQs are still disabled avoiding
			
 
				-	 * further scheduler activity on it and we're being very careful to
			
 
				-	 * re-start the picking loop.
			
 
				+	 * This condition is "impossible", if it occurs
			
 
				+	 * we need to fix it. Originally reported by
			
 
				+	 * Bjorn Helgaas on a 128-CPU setup.
			
 
				 	 */
			
 
				-	rq_unpin_lock(this_rq, rf);
			
 
				-
			
 
				-	if (this_rq->avg_idle < sysctl_sched_migration_cost ||
			
 
				-	    !this_rq->rd->overload) {
			
 
				-		rcu_read_lock();
			
 
				-		sd = rcu_dereference_check_sched_domain(this_rq->sd);
			
 
				-		if (sd)
			
 
				-			update_next_balance(sd, &next_balance);
			
 
				-		rcu_read_unlock();
			
 
				-
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	raw_spin_unlock(&this_rq->lock);
			
 
				+	BUG_ON(busiest_rq == target_rq);
			
 
				 
			
 
				-	update_blocked_averages(this_cpu);
			
 
				+	/* Search for an sd spanning us and the target CPU. */
			
 
				 	rcu_read_lock();
			
 
				-	for_each_domain(this_cpu, sd) {
			
 
				-		int continue_balancing = 1;
			
 
				-		u64 t0, domain_cost;
			
 
				-
			
 
				-		if (!(sd->flags & SD_LOAD_BALANCE))
			
 
				-			continue;
			
 
				-
			
 
				-		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) {
			
 
				-			update_next_balance(sd, &next_balance);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (sd->flags & SD_BALANCE_NEWIDLE) {
			
 
				-			t0 = sched_clock_cpu(this_cpu);
			
 
				-
			
 
				-			pulled_task = load_balance(this_cpu, this_rq,
			
 
				-						   sd, CPU_NEWLY_IDLE,
			
 
				-						   &continue_balancing);
			
 
				-
			
 
				-			domain_cost = sched_clock_cpu(this_cpu) - t0;
			
 
				-			if (domain_cost > sd->max_newidle_lb_cost)
			
 
				-				sd->max_newidle_lb_cost = domain_cost;
			
 
				-
			
 
				-			curr_cost += domain_cost;
			
 
				-		}
			
 
				-
			
 
				-		update_next_balance(sd, &next_balance);
			
 
				-
			
 
				-		/*
			
 
				-		 * Stop searching for tasks to pull if there are
			
 
				-		 * now runnable tasks on this rq.
			
 
				-		 */
			
 
				-		if (pulled_task || this_rq->nr_running > 0)
			
 
				-			break;
			
 
				-	}
			
 
				-	rcu_read_unlock();
			
 
				-
			
 
				-	raw_spin_lock(&this_rq->lock);
			
 
				-
			
 
				-	if (curr_cost > this_rq->max_idle_balance_cost)
			
 
				-		this_rq->max_idle_balance_cost = curr_cost;
			
 
				-
			
 
				-	/*
			
 
				-	 * While browsing the domains, we released the rq lock, a task could
			
 
				-	 * have been enqueued in the meantime. Since we're not going idle,
			
 
				-	 * pretend we pulled a task.
			
 
				-	 */
			
 
				-	if (this_rq->cfs.h_nr_running && !pulled_task)
			
 
				-		pulled_task = 1;
			
 
				-
			
 
				-out:
			
 
				-	/* Move the next balance forward */
			
 
				-	if (time_after(this_rq->next_balance, next_balance))
			
 
				-		this_rq->next_balance = next_balance;
			
 
				-
			
 
				-	/* Is there a task of a high priority class? */
			
 
				-	if (this_rq->nr_running != this_rq->cfs.h_nr_running)
			
 
				-		pulled_task = -1;
			
 
				-
			
 
				-	if (pulled_task)
			
 
				-		this_rq->idle_stamp = 0;
			
 
				-
			
 
				-	rq_repin_lock(this_rq, rf);
			
 
				-
			
 
				-	return pulled_task;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * active_load_balance_cpu_stop is run by cpu stopper. It pushes
			
 
				- * running tasks off the busiest CPU onto idle CPUs. It requires at
			
 
				- * least 1 task to be running on each physical CPU where possible, and
			
 
				- * avoids physical / logical imbalances.
			
 
				- */
			
 
				-static int active_load_balance_cpu_stop(void *data)
			
 
				-{
			
 
				-	struct rq *busiest_rq = data;
			
 
				-	int busiest_cpu = cpu_of(busiest_rq);
			
 
				-	int target_cpu = busiest_rq->push_cpu;
			
 
				-	struct rq *target_rq = cpu_rq(target_cpu);
			
 
				-	struct sched_domain *sd;
			
 
				-	struct task_struct *p = NULL;
			
 
				-	struct rq_flags rf;
			
 
				-
			
 
				-	rq_lock_irq(busiest_rq, &rf);
			
 
				-	/*
			
 
				-	 * Between queueing the stop-work and running it is a hole in which
			
 
				-	 * CPUs can become inactive. We should not move tasks from or to
			
 
				-	 * inactive CPUs.
			
 
				-	 */
			
 
				-	if (!cpu_active(busiest_cpu) || !cpu_active(target_cpu))
			
 
				-		goto out_unlock;
			
 
				-
			
 
				-	/* make sure the requested cpu hasn't gone down in the meantime */
			
 
				-	if (unlikely(busiest_cpu != smp_processor_id() ||
			
 
				-		     !busiest_rq->active_balance))
			
 
				-		goto out_unlock;
			
 
				-
			
 
				-	/* Is there any task to move? */
			
 
				-	if (busiest_rq->nr_running <= 1)
			
 
				-		goto out_unlock;
			
 
				-
			
 
				-	/*
			
 
				-	 * This condition is "impossible", if it occurs
			
 
				-	 * we need to fix it. Originally reported by
			
 
				-	 * Bjorn Helgaas on a 128-cpu setup.
			
 
				-	 */
			
 
				-	BUG_ON(busiest_rq == target_rq);
			
 
				-
			
 
				-	/* Search for an sd spanning us and the target CPU. */
			
 
				-	rcu_read_lock();
			
 
				-	for_each_domain(target_cpu, sd) {
			
 
				-		if ((sd->flags & SD_LOAD_BALANCE) &&
			
 
				-		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
			
 
				-				break;
			
 
				-	}
			
 
				+	for_each_domain(target_cpu, sd) {
			
 
				+		if ((sd->flags & SD_LOAD_BALANCE) &&
			
 
				+		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
			
 
				+				break;
			
 
				+	}
			
 
				 
			
 
				 	if (likely(sd)) {
			
 
				 		struct lb_env env = {
			
@@ -8977,141 +9207,6 @@ out_unlock:
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline int on_null_domain(struct rq *rq)
			
 
				-{
			
 
				-	return unlikely(!rcu_dereference_sched(rq->sd));
			
 
				-}
			
 
				-
			
 
				-#ifdef CONFIG_NO_HZ_COMMON
			
 
				-/*
			
 
				- * idle load balancing details
			
 
				- * - When one of the busy CPUs notice that there may be an idle rebalancing
			
 
				- *   needed, they will kick the idle load balancer, which then does idle
			
 
				- *   load balancing for all the idle CPUs.
			
 
				- */
			
 
				-static struct {
			
 
				-	cpumask_var_t idle_cpus_mask;
			
 
				-	atomic_t nr_cpus;
			
 
				-	unsigned long next_balance;     /* in jiffy units */
			
 
				-} nohz ____cacheline_aligned;
			
 
				-
			
 
				-static inline int find_new_ilb(void)
			
 
				-{
			
 
				-	int ilb = cpumask_first(nohz.idle_cpus_mask);
			
 
				-
			
 
				-	if (ilb < nr_cpu_ids && idle_cpu(ilb))
			
 
				-		return ilb;
			
 
				-
			
 
				-	return nr_cpu_ids;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Kick a CPU to do the nohz balancing, if it is time for it. We pick the
			
 
				- * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
			
 
				- * CPU (if there is one).
			
 
				- */
			
 
				-static void nohz_balancer_kick(void)
			
 
				-{
			
 
				-	int ilb_cpu;
			
 
				-
			
 
				-	nohz.next_balance++;
			
 
				-
			
 
				-	ilb_cpu = find_new_ilb();
			
 
				-
			
 
				-	if (ilb_cpu >= nr_cpu_ids)
			
 
				-		return;
			
 
				-
			
 
				-	if (test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(ilb_cpu)))
			
 
				-		return;
			
 
				-	/*
			
 
				-	 * Use smp_send_reschedule() instead of resched_cpu().
			
 
				-	 * This way we generate a sched IPI on the target cpu which
			
 
				-	 * is idle. And the softirq performing nohz idle load balance
			
 
				-	 * will be run before returning from the IPI.
			
 
				-	 */
			
 
				-	smp_send_reschedule(ilb_cpu);
			
 
				-	return;
			
 
				-}
			
 
				-
			
 
				-void nohz_balance_exit_idle(unsigned int cpu)
			
 
				-{
			
 
				-	if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
			
 
				-		/*
			
 
				-		 * Completely isolated CPUs don't ever set, so we must test.
			
 
				-		 */
			
 
				-		if (likely(cpumask_test_cpu(cpu, nohz.idle_cpus_mask))) {
			
 
				-			cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
			
 
				-			atomic_dec(&nohz.nr_cpus);
			
 
				-		}
			
 
				-		clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static inline void set_cpu_sd_state_busy(void)
			
 
				-{
			
 
				-	struct sched_domain *sd;
			
 
				-	int cpu = smp_processor_id();
			
 
				-
			
 
				-	rcu_read_lock();
			
 
				-	sd = rcu_dereference(per_cpu(sd_llc, cpu));
			
 
				-
			
 
				-	if (!sd || !sd->nohz_idle)
			
 
				-		goto unlock;
			
 
				-	sd->nohz_idle = 0;
			
 
				-
			
 
				-	atomic_inc(&sd->shared->nr_busy_cpus);
			
 
				-unlock:
			
 
				-	rcu_read_unlock();
			
 
				-}
			
 
				-
			
 
				-void set_cpu_sd_state_idle(void)
			
 
				-{
			
 
				-	struct sched_domain *sd;
			
 
				-	int cpu = smp_processor_id();
			
 
				-
			
 
				-	rcu_read_lock();
			
 
				-	sd = rcu_dereference(per_cpu(sd_llc, cpu));
			
 
				-
			
 
				-	if (!sd || sd->nohz_idle)
			
 
				-		goto unlock;
			
 
				-	sd->nohz_idle = 1;
			
 
				-
			
 
				-	atomic_dec(&sd->shared->nr_busy_cpus);
			
 
				-unlock:
			
 
				-	rcu_read_unlock();
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This routine will record that the cpu is going idle with tick stopped.
			
 
				- * This info will be used in performing idle load balancing in the future.
			
 
				- */
			
 
				-void nohz_balance_enter_idle(int cpu)
			
 
				-{
			
 
				-	/*
			
 
				-	 * If this cpu is going down, then nothing needs to be done.
			
 
				-	 */
			
 
				-	if (!cpu_active(cpu))
			
 
				-		return;
			
 
				-
			
 
				-	/* Spare idle load balancing on CPUs that don't want to be disturbed: */
			
 
				-	if (!housekeeping_cpu(cpu, HK_FLAG_SCHED))
			
 
				-		return;
			
 
				-
			
 
				-	if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
			
 
				-		return;
			
 
				-
			
 
				-	/*
			
 
				-	 * If we're a completely isolated CPU, we don't play.
			
 
				-	 */
			
 
				-	if (on_null_domain(cpu_rq(cpu)))
			
 
				-		return;
			
 
				-
			
 
				-	cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
			
 
				-	atomic_inc(&nohz.nr_cpus);
			
 
				-	set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 static DEFINE_SPINLOCK(balancing);
			
 
				 
			
 
				 /*
			
@@ -9141,8 +9236,6 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
 
				 	int need_serialize, need_decay = 0;
			
 
				 	u64 max_cost = 0;
			
 
				 
			
 
				-	update_blocked_averages(cpu);
			
 
				-
			
 
				 	rcu_read_lock();
			
 
				 	for_each_domain(cpu, sd) {
			
 
				 		/*
			
@@ -9232,68 +9325,56 @@ out:
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static inline int on_null_domain(struct rq *rq)
			
 
				+{
			
 
				+	return unlikely(!rcu_dereference_sched(rq->sd));
			
 
				+}
			
 
				+
			
 
				 #ifdef CONFIG_NO_HZ_COMMON
			
 
				 /*
			
 
				- * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
			
 
				- * rebalancing for all the cpus for whom scheduler ticks are stopped.
			
 
				+ * idle load balancing details
			
 
				+ * - When one of the busy CPUs notice that there may be an idle rebalancing
			
 
				+ *   needed, they will kick the idle load balancer, which then does idle
			
 
				+ *   load balancing for all the idle CPUs.
			
 
				  */
			
 
				-static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
			
 
				+
			
 
				+static inline int find_new_ilb(void)
			
 
				 {
			
 
				-	int this_cpu = this_rq->cpu;
			
 
				-	struct rq *rq;
			
 
				-	int balance_cpu;
			
 
				-	/* Earliest time when we have to do rebalance again */
			
 
				-	unsigned long next_balance = jiffies + 60*HZ;
			
 
				-	int update_next_balance = 0;
			
 
				+	int ilb = cpumask_first(nohz.idle_cpus_mask);
			
 
				 
			
 
				-	if (idle != CPU_IDLE ||
			
 
				-	    !test_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)))
			
 
				-		goto end;
			
 
				+	if (ilb < nr_cpu_ids && idle_cpu(ilb))
			
 
				+		return ilb;
			
 
				 
			
 
				-	for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
			
 
				-		if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
			
 
				-			continue;
			
 
				+	return nr_cpu_ids;
			
 
				+}
			
 
				 
			
 
				-		/*
			
 
				-		 * If this cpu gets work to do, stop the load balancing
			
 
				-		 * work being done for other cpus. Next load
			
 
				-		 * balancing owner will pick it up.
			
 
				-		 */
			
 
				-		if (need_resched())
			
 
				-			break;
			
 
				-
			
 
				-		rq = cpu_rq(balance_cpu);
			
 
				+/*
			
 
				+ * Kick a CPU to do the nohz balancing, if it is time for it. We pick the
			
 
				+ * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
			
 
				+ * CPU (if there is one).
			
 
				+ */
			
 
				+static void kick_ilb(unsigned int flags)
			
 
				+{
			
 
				+	int ilb_cpu;
			
 
				 
			
 
				-		/*
			
 
				-		 * If time for next balance is due,
			
 
				-		 * do the balance.
			
 
				-		 */
			
 
				-		if (time_after_eq(jiffies, rq->next_balance)) {
			
 
				-			struct rq_flags rf;
			
 
				+	nohz.next_balance++;
			
 
				 
			
 
				-			rq_lock_irq(rq, &rf);
			
 
				-			update_rq_clock(rq);
			
 
				-			cpu_load_update_idle(rq);
			
 
				-			rq_unlock_irq(rq, &rf);
			
 
				+	ilb_cpu = find_new_ilb();
			
 
				 
			
 
				-			rebalance_domains(rq, CPU_IDLE);
			
 
				-		}
			
 
				+	if (ilb_cpu >= nr_cpu_ids)
			
 
				+		return;
			
 
				 
			
 
				-		if (time_after(next_balance, rq->next_balance)) {
			
 
				-			next_balance = rq->next_balance;
			
 
				-			update_next_balance = 1;
			
 
				-		}
			
 
				-	}
			
 
				+	flags = atomic_fetch_or(flags, nohz_flags(ilb_cpu));
			
 
				+	if (flags & NOHZ_KICK_MASK)
			
 
				+		return;
			
 
				 
			
 
				 	/*
			
 
				-	 * next_balance will be updated only when there is a need.
			
 
				-	 * When the CPU is attached to null domain for ex, it will not be
			
 
				-	 * updated.
			
 
				+	 * Use smp_send_reschedule() instead of resched_cpu().
			
 
				+	 * This way we generate a sched IPI on the target CPU which
			
 
				+	 * is idle. And the softirq performing nohz idle load balance
			
 
				+	 * will be run before returning from the IPI.
			
 
				 	 */
			
 
				-	if (likely(update_next_balance))
			
 
				-		nohz.next_balance = next_balance;
			
 
				-end:
			
 
				-	clear_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu));
			
 
				+	smp_send_reschedule(ilb_cpu);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -9307,36 +9388,41 @@ end:
 
				  *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
			
 
				  *     domain span are idle.
			
 
				  */
			
 
				-static inline bool nohz_kick_needed(struct rq *rq)
			
 
				+static void nohz_balancer_kick(struct rq *rq)
			
 
				 {
			
 
				 	unsigned long now = jiffies;
			
 
				 	struct sched_domain_shared *sds;
			
 
				 	struct sched_domain *sd;
			
 
				 	int nr_busy, i, cpu = rq->cpu;
			
 
				-	bool kick = false;
			
 
				+	unsigned int flags = 0;
			
 
				 
			
 
				 	if (unlikely(rq->idle_balance))
			
 
				-		return false;
			
 
				+		return;
			
 
				 
			
 
				-       /*
			
 
				-	* We may be recently in ticked or tickless idle mode. At the first
			
 
				-	* busy tick after returning from idle, we will update the busy stats.
			
 
				-	*/
			
 
				-	set_cpu_sd_state_busy();
			
 
				-	nohz_balance_exit_idle(cpu);
			
 
				+	/*
			
 
				+	 * We may be recently in ticked or tickless idle mode. At the first
			
 
				+	 * busy tick after returning from idle, we will update the busy stats.
			
 
				+	 */
			
 
				+	nohz_balance_exit_idle(rq);
			
 
				 
			
 
				 	/*
			
 
				 	 * None are in tickless mode and hence no need for NOHZ idle load
			
 
				 	 * balancing.
			
 
				 	 */
			
 
				 	if (likely(!atomic_read(&nohz.nr_cpus)))
			
 
				-		return false;
			
 
				+		return;
			
 
				+
			
 
				+	if (READ_ONCE(nohz.has_blocked) &&
			
 
				+	    time_after(now, READ_ONCE(nohz.next_blocked)))
			
 
				+		flags = NOHZ_STATS_KICK;
			
 
				 
			
 
				 	if (time_before(now, nohz.next_balance))
			
 
				-		return false;
			
 
				+		goto out;
			
 
				 
			
 
				-	if (rq->nr_running >= 2)
			
 
				-		return true;
			
 
				+	if (rq->nr_running >= 2) {
			
 
				+		flags = NOHZ_KICK_MASK;
			
 
				+		goto out;
			
 
				+	}
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				 	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
			
@@ -9347,7 +9433,7 @@ static inline bool nohz_kick_needed(struct rq *rq)
 
				 		 */
			
 
				 		nr_busy = atomic_read(&sds->nr_busy_cpus);
			
 
				 		if (nr_busy > 1) {
			
 
				-			kick = true;
			
 
				+			flags = NOHZ_KICK_MASK;
			
 
				 			goto unlock;
			
 
				 		}
			
 
				 
			
@@ -9357,7 +9443,7 @@ static inline bool nohz_kick_needed(struct rq *rq)
 
				 	if (sd) {
			
 
				 		if ((rq->cfs.h_nr_running >= 1) &&
			
 
				 				check_cpu_capacity(rq, sd)) {
			
 
				-			kick = true;
			
 
				+			flags = NOHZ_KICK_MASK;
			
 
				 			goto unlock;
			
 
				 		}
			
 
				 	}
			
@@ -9370,18 +9456,421 @@ static inline bool nohz_kick_needed(struct rq *rq)
 
				 				continue;
			
 
				 
			
 
				 			if (sched_asym_prefer(i, cpu)) {
			
 
				-				kick = true;
			
 
				+				flags = NOHZ_KICK_MASK;
			
 
				 				goto unlock;
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 unlock:
			
 
				 	rcu_read_unlock();
			
 
				-	return kick;
			
 
				+out:
			
 
				+	if (flags)
			
 
				+		kick_ilb(flags);
			
 
				+}
			
 
				+
			
 
				+static void set_cpu_sd_state_busy(int cpu)
			
 
				+{
			
 
				+	struct sched_domain *sd;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	sd = rcu_dereference(per_cpu(sd_llc, cpu));
			
 
				+
			
 
				+	if (!sd || !sd->nohz_idle)
			
 
				+		goto unlock;
			
 
				+	sd->nohz_idle = 0;
			
 
				+
			
 
				+	atomic_inc(&sd->shared->nr_busy_cpus);
			
 
				+unlock:
			
 
				+	rcu_read_unlock();
			
 
				+}
			
 
				+
			
 
				+void nohz_balance_exit_idle(struct rq *rq)
			
 
				+{
			
 
				+	SCHED_WARN_ON(rq != this_rq());
			
 
				+
			
 
				+	if (likely(!rq->nohz_tick_stopped))
			
 
				+		return;
			
 
				+
			
 
				+	rq->nohz_tick_stopped = 0;
			
 
				+	cpumask_clear_cpu(rq->cpu, nohz.idle_cpus_mask);
			
 
				+	atomic_dec(&nohz.nr_cpus);
			
 
				+
			
 
				+	set_cpu_sd_state_busy(rq->cpu);
			
 
				+}
			
 
				+
			
 
				+static void set_cpu_sd_state_idle(int cpu)
			
 
				+{
			
 
				+	struct sched_domain *sd;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	sd = rcu_dereference(per_cpu(sd_llc, cpu));
			
 
				+
			
 
				+	if (!sd || sd->nohz_idle)
			
 
				+		goto unlock;
			
 
				+	sd->nohz_idle = 1;
			
 
				+
			
 
				+	atomic_dec(&sd->shared->nr_busy_cpus);
			
 
				+unlock:
			
 
				+	rcu_read_unlock();
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This routine will record that the CPU is going idle with tick stopped.
			
 
				+ * This info will be used in performing idle load balancing in the future.
			
 
				+ */
			
 
				+void nohz_balance_enter_idle(int cpu)
			
 
				+{
			
 
				+	struct rq *rq = cpu_rq(cpu);
			
 
				+
			
 
				+	SCHED_WARN_ON(cpu != smp_processor_id());
			
 
				+
			
 
				+	/* If this CPU is going down, then nothing needs to be done: */
			
 
				+	if (!cpu_active(cpu))
			
 
				+		return;
			
 
				+
			
 
				+	/* Spare idle load balancing on CPUs that don't want to be disturbed: */
			
 
				+	if (!housekeeping_cpu(cpu, HK_FLAG_SCHED))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * Can be set safely without rq->lock held
			
 
				+	 * If a clear happens, it will have evaluated last additions because
			
 
				+	 * rq->lock is held during the check and the clear
			
 
				+	 */
			
 
				+	rq->has_blocked_load = 1;
			
 
				+
			
 
				+	/*
			
 
				+	 * The tick is still stopped but load could have been added in the
			
 
				+	 * meantime. We set the nohz.has_blocked flag to trig a check of the
			
 
				+	 * *_avg. The CPU is already part of nohz.idle_cpus_mask so the clear
			
 
				+	 * of nohz.has_blocked can only happen after checking the new load
			
 
				+	 */
			
 
				+	if (rq->nohz_tick_stopped)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* If we're a completely isolated CPU, we don't play: */
			
 
				+	if (on_null_domain(rq))
			
 
				+		return;
			
 
				+
			
 
				+	rq->nohz_tick_stopped = 1;
			
 
				+
			
 
				+	cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
			
 
				+	atomic_inc(&nohz.nr_cpus);
			
 
				+
			
 
				+	/*
			
 
				+	 * Ensures that if nohz_idle_balance() fails to observe our
			
 
				+	 * @idle_cpus_mask store, it must observe the @has_blocked
			
 
				+	 * store.
			
 
				+	 */
			
 
				+	smp_mb__after_atomic();
			
 
				+
			
 
				+	set_cpu_sd_state_idle(cpu);
			
 
				+
			
 
				+out:
			
 
				+	/*
			
 
				+	 * Each time a cpu enter idle, we assume that it has blocked load and
			
 
				+	 * enable the periodic update of the load of idle cpus
			
 
				+	 */
			
 
				+	WRITE_ONCE(nohz.has_blocked, 1);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Internal function that runs load balance for all idle cpus. The load balance
			
 
				+ * can be a simple update of blocked load or a complete load balance with
			
 
				+ * tasks movement depending of flags.
			
 
				+ * The function returns false if the loop has stopped before running
			
 
				+ * through all idle CPUs.
			
 
				+ */
			
 
				+static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
			
 
				+			       enum cpu_idle_type idle)
			
 
				+{
			
 
				+	/* Earliest time when we have to do rebalance again */
			
 
				+	unsigned long now = jiffies;
			
 
				+	unsigned long next_balance = now + 60*HZ;
			
 
				+	bool has_blocked_load = false;
			
 
				+	int update_next_balance = 0;
			
 
				+	int this_cpu = this_rq->cpu;
			
 
				+	int balance_cpu;
			
 
				+	int ret = false;
			
 
				+	struct rq *rq;
			
 
				+
			
 
				+	SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
			
 
				+
			
 
				+	/*
			
 
				+	 * We assume there will be no idle load after this update and clear
			
 
				+	 * the has_blocked flag. If a cpu enters idle in the mean time, it will
			
 
				+	 * set the has_blocked flag and trig another update of idle load.
			
 
				+	 * Because a cpu that becomes idle, is added to idle_cpus_mask before
			
 
				+	 * setting the flag, we are sure to not clear the state and not
			
 
				+	 * check the load of an idle cpu.
			
 
				+	 */
			
 
				+	WRITE_ONCE(nohz.has_blocked, 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * Ensures that if we miss the CPU, we must see the has_blocked
			
 
				+	 * store from nohz_balance_enter_idle().
			
 
				+	 */
			
 
				+	smp_mb();
			
 
				+
			
 
				+	for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
			
 
				+		if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
			
 
				+			continue;
			
 
				+
			
 
				+		/*
			
 
				+		 * If this CPU gets work to do, stop the load balancing
			
 
				+		 * work being done for other CPUs. Next load
			
 
				+		 * balancing owner will pick it up.
			
 
				+		 */
			
 
				+		if (need_resched()) {
			
 
				+			has_blocked_load = true;
			
 
				+			goto abort;
			
 
				+		}
			
 
				+
			
 
				+		rq = cpu_rq(balance_cpu);
			
 
				+
			
 
				+		has_blocked_load |= update_nohz_stats(rq, true);
			
 
				+
			
 
				+		/*
			
 
				+		 * If time for next balance is due,
			
 
				+		 * do the balance.
			
 
				+		 */
			
 
				+		if (time_after_eq(jiffies, rq->next_balance)) {
			
 
				+			struct rq_flags rf;
			
 
				+
			
 
				+			rq_lock_irqsave(rq, &rf);
			
 
				+			update_rq_clock(rq);
			
 
				+			cpu_load_update_idle(rq);
			
 
				+			rq_unlock_irqrestore(rq, &rf);
			
 
				+
			
 
				+			if (flags & NOHZ_BALANCE_KICK)
			
 
				+				rebalance_domains(rq, CPU_IDLE);
			
 
				+		}
			
 
				+
			
 
				+		if (time_after(next_balance, rq->next_balance)) {
			
 
				+			next_balance = rq->next_balance;
			
 
				+			update_next_balance = 1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Newly idle CPU doesn't need an update */
			
 
				+	if (idle != CPU_NEWLY_IDLE) {
			
 
				+		update_blocked_averages(this_cpu);
			
 
				+		has_blocked_load |= this_rq->has_blocked_load;
			
 
				+	}
			
 
				+
			
 
				+	if (flags & NOHZ_BALANCE_KICK)
			
 
				+		rebalance_domains(this_rq, CPU_IDLE);
			
 
				+
			
 
				+	WRITE_ONCE(nohz.next_blocked,
			
 
				+		now + msecs_to_jiffies(LOAD_AVG_PERIOD));
			
 
				+
			
 
				+	/* The full idle balance loop has been done */
			
 
				+	ret = true;
			
 
				+
			
 
				+abort:
			
 
				+	/* There is still blocked load, enable periodic update */
			
 
				+	if (has_blocked_load)
			
 
				+		WRITE_ONCE(nohz.has_blocked, 1);
			
 
				+
			
 
				+	/*
			
 
				+	 * next_balance will be updated only when there is a need.
			
 
				+	 * When the CPU is attached to null domain for ex, it will not be
			
 
				+	 * updated.
			
 
				+	 */
			
 
				+	if (likely(update_next_balance))
			
 
				+		nohz.next_balance = next_balance;
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
			
 
				+ * rebalancing for all the cpus for whom scheduler ticks are stopped.
			
 
				+ */
			
 
				+static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
			
 
				+{
			
 
				+	int this_cpu = this_rq->cpu;
			
 
				+	unsigned int flags;
			
 
				+
			
 
				+	if (!(atomic_read(nohz_flags(this_cpu)) & NOHZ_KICK_MASK))
			
 
				+		return false;
			
 
				+
			
 
				+	if (idle != CPU_IDLE) {
			
 
				+		atomic_andnot(NOHZ_KICK_MASK, nohz_flags(this_cpu));
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * barrier, pairs with nohz_balance_enter_idle(), ensures ...
			
 
				+	 */
			
 
				+	flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(this_cpu));
			
 
				+	if (!(flags & NOHZ_KICK_MASK))
			
 
				+		return false;
			
 
				+
			
 
				+	_nohz_idle_balance(this_rq, flags, idle);
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static void nohz_newidle_balance(struct rq *this_rq)
			
 
				+{
			
 
				+	int this_cpu = this_rq->cpu;
			
 
				+
			
 
				+	/*
			
 
				+	 * This CPU doesn't want to be disturbed by scheduler
			
 
				+	 * housekeeping
			
 
				+	 */
			
 
				+	if (!housekeeping_cpu(this_cpu, HK_FLAG_SCHED))
			
 
				+		return;
			
 
				+
			
 
				+	/* Will wake up very soon. No time for doing anything else*/
			
 
				+	if (this_rq->avg_idle < sysctl_sched_migration_cost)
			
 
				+		return;
			
 
				+
			
 
				+	/* Don't need to update blocked load of idle CPUs*/
			
 
				+	if (!READ_ONCE(nohz.has_blocked) ||
			
 
				+	    time_before(jiffies, READ_ONCE(nohz.next_blocked)))
			
 
				+		return;
			
 
				+
			
 
				+	raw_spin_unlock(&this_rq->lock);
			
 
				+	/*
			
 
				+	 * This CPU is going to be idle and blocked load of idle CPUs
			
 
				+	 * need to be updated. Run the ilb locally as it is a good
			
 
				+	 * candidate for ilb instead of waking up another idle CPU.
			
 
				+	 * Kick an normal ilb if we failed to do the update.
			
 
				+	 */
			
 
				+	if (!_nohz_idle_balance(this_rq, NOHZ_STATS_KICK, CPU_NEWLY_IDLE))
			
 
				+		kick_ilb(NOHZ_STATS_KICK);
			
 
				+	raw_spin_lock(&this_rq->lock);
			
 
				+}
			
 
				+
			
 
				+#else /* !CONFIG_NO_HZ_COMMON */
			
 
				+static inline void nohz_balancer_kick(struct rq *rq) { }
			
 
				+
			
 
				+static inline bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
			
 
				+{
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+static inline void nohz_newidle_balance(struct rq *this_rq) { }
			
 
				+#endif /* CONFIG_NO_HZ_COMMON */
			
 
				+
			
 
				+/*
			
 
				+ * idle_balance is called by schedule() if this_cpu is about to become
			
 
				+ * idle. Attempts to pull tasks from other CPUs.
			
 
				+ */
			
 
				+static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
			
 
				+{
			
 
				+	unsigned long next_balance = jiffies + HZ;
			
 
				+	int this_cpu = this_rq->cpu;
			
 
				+	struct sched_domain *sd;
			
 
				+	int pulled_task = 0;
			
 
				+	u64 curr_cost = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * We must set idle_stamp _before_ calling idle_balance(), such that we
			
 
				+	 * measure the duration of idle_balance() as idle time.
			
 
				+	 */
			
 
				+	this_rq->idle_stamp = rq_clock(this_rq);
			
 
				+
			
 
				+	/*
			
 
				+	 * Do not pull tasks towards !active CPUs...
			
 
				+	 */
			
 
				+	if (!cpu_active(this_cpu))
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * This is OK, because current is on_cpu, which avoids it being picked
			
 
				+	 * for load-balance and preemption/IRQs are still disabled avoiding
			
 
				+	 * further scheduler activity on it and we're being very careful to
			
 
				+	 * re-start the picking loop.
			
 
				+	 */
			
 
				+	rq_unpin_lock(this_rq, rf);
			
 
				+
			
 
				+	if (this_rq->avg_idle < sysctl_sched_migration_cost ||
			
 
				+	    !this_rq->rd->overload) {
			
 
				+
			
 
				+		rcu_read_lock();
			
 
				+		sd = rcu_dereference_check_sched_domain(this_rq->sd);
			
 
				+		if (sd)
			
 
				+			update_next_balance(sd, &next_balance);
			
 
				+		rcu_read_unlock();
			
 
				+
			
 
				+		nohz_newidle_balance(this_rq);
			
 
				+
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	raw_spin_unlock(&this_rq->lock);
			
 
				+
			
 
				+	update_blocked_averages(this_cpu);
			
 
				+	rcu_read_lock();
			
 
				+	for_each_domain(this_cpu, sd) {
			
 
				+		int continue_balancing = 1;
			
 
				+		u64 t0, domain_cost;
			
 
				+
			
 
				+		if (!(sd->flags & SD_LOAD_BALANCE))
			
 
				+			continue;
			
 
				+
			
 
				+		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) {
			
 
				+			update_next_balance(sd, &next_balance);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (sd->flags & SD_BALANCE_NEWIDLE) {
			
 
				+			t0 = sched_clock_cpu(this_cpu);
			
 
				+
			
 
				+			pulled_task = load_balance(this_cpu, this_rq,
			
 
				+						   sd, CPU_NEWLY_IDLE,
			
 
				+						   &continue_balancing);
			
 
				+
			
 
				+			domain_cost = sched_clock_cpu(this_cpu) - t0;
			
 
				+			if (domain_cost > sd->max_newidle_lb_cost)
			
 
				+				sd->max_newidle_lb_cost = domain_cost;
			
 
				+
			
 
				+			curr_cost += domain_cost;
			
 
				+		}
			
 
				+
			
 
				+		update_next_balance(sd, &next_balance);
			
 
				+
			
 
				+		/*
			
 
				+		 * Stop searching for tasks to pull if there are
			
 
				+		 * now runnable tasks on this rq.
			
 
				+		 */
			
 
				+		if (pulled_task || this_rq->nr_running > 0)
			
 
				+			break;
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	raw_spin_lock(&this_rq->lock);
			
 
				+
			
 
				+	if (curr_cost > this_rq->max_idle_balance_cost)
			
 
				+		this_rq->max_idle_balance_cost = curr_cost;
			
 
				+
			
 
				+	/*
			
 
				+	 * While browsing the domains, we released the rq lock, a task could
			
 
				+	 * have been enqueued in the meantime. Since we're not going idle,
			
 
				+	 * pretend we pulled a task.
			
 
				+	 */
			
 
				+	if (this_rq->cfs.h_nr_running && !pulled_task)
			
 
				+		pulled_task = 1;
			
 
				+
			
 
				+out:
			
 
				+	/* Move the next balance forward */
			
 
				+	if (time_after(this_rq->next_balance, next_balance))
			
 
				+		this_rq->next_balance = next_balance;
			
 
				+
			
 
				+	/* Is there a task of a high priority class? */
			
 
				+	if (this_rq->nr_running != this_rq->cfs.h_nr_running)
			
 
				+		pulled_task = -1;
			
 
				+
			
 
				+	if (pulled_task)
			
 
				+		this_rq->idle_stamp = 0;
			
 
				+
			
 
				+	rq_repin_lock(this_rq, rf);
			
 
				+
			
 
				+	return pulled_task;
			
 
				 }
			
 
				-#else
			
 
				-static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
			
 
				-#endif
			
 
				 
			
 
				 /*
			
 
				  * run_rebalance_domains is triggered when needed from the scheduler tick.
			
@@ -9394,14 +9883,18 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
 
				 						CPU_IDLE : CPU_NOT_IDLE;
			
 
				 
			
 
				 	/*
			
 
				-	 * If this cpu has a pending nohz_balance_kick, then do the
			
 
				-	 * balancing on behalf of the other idle cpus whose ticks are
			
 
				+	 * If this CPU has a pending nohz_balance_kick, then do the
			
 
				+	 * balancing on behalf of the other idle CPUs whose ticks are
			
 
				 	 * stopped. Do nohz_idle_balance *before* rebalance_domains to
			
 
				-	 * give the idle cpus a chance to load balance. Else we may
			
 
				+	 * give the idle CPUs a chance to load balance. Else we may
			
 
				 	 * load balance only within the local sched_domain hierarchy
			
 
				 	 * and abort nohz_idle_balance altogether if we pull some load.
			
 
				 	 */
			
 
				-	nohz_idle_balance(this_rq, idle);
			
 
				+	if (nohz_idle_balance(this_rq, idle))
			
 
				+		return;
			
 
				+
			
 
				+	/* normal load balance */
			
 
				+	update_blocked_averages(this_rq->cpu);
			
 
				 	rebalance_domains(this_rq, idle);
			
 
				 }
			
 
				 
			
@@ -9416,10 +9909,8 @@ void trigger_load_balance(struct rq *rq)
 
				 
			
 
				 	if (time_after_eq(jiffies, rq->next_balance))
			
 
				 		raise_softirq(SCHED_SOFTIRQ);
			
 
				-#ifdef CONFIG_NO_HZ_COMMON
			
 
				-	if (nohz_kick_needed(rq))
			
 
				-		nohz_balancer_kick();
			
 
				-#endif
			
 
				+
			
 
				+	nohz_balancer_kick(rq);
			
 
				 }
			
 
				 
			
 
				 static void rq_online_fair(struct rq *rq)
			
@@ -9440,7 +9931,12 @@ static void rq_offline_fair(struct rq *rq)
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				 /*
			
 
				- * scheduler tick hitting a task of our scheduling class:
			
 
				+ * scheduler tick hitting a task of our scheduling class.
			
 
				+ *
			
 
				+ * NOTE: This function can be called remotely by the tick offload that
			
 
				+ * goes along full dynticks. Therefore no local assumption can be made
			
 
				+ * and everything must be accessed through the @rq and @curr passed in
			
 
				+ * parameters.
			
 
				  */
			
 
				 static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
			
 
				 {
			
@@ -9591,7 +10087,7 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
 
				 
			
 
				 	/* Synchronize entity with its cfs_rq */
			
 
				 	update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
			
 
				-	attach_entity_load_avg(cfs_rq, se);
			
 
				+	attach_entity_load_avg(cfs_rq, se, 0);
			
 
				 	update_tg_load_avg(cfs_rq, false);
			
 
				 	propagate_entity_cfs_rq(se);
			
 
				 }
			
@@ -9993,6 +10489,7 @@ __init void init_sched_fair_class(void)
 
				 
			
 
				 #ifdef CONFIG_NO_HZ_COMMON
			
 
				 	nohz.next_balance = jiffies;
			
 
				+	nohz.next_blocked = jiffies;
			
 
				 	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
			
 
				 #endif
			
 
				 #endif /* SMP */
			
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -85,3 +85,8 @@ SCHED_FEAT(ATTACH_AGE_LOAD, true)
 
				 SCHED_FEAT(WA_IDLE, true)
			
 
				 SCHED_FEAT(WA_WEIGHT, true)
			
 
				 SCHED_FEAT(WA_BIAS, true)
			
 
				+
			
 
				+/*
			
 
				+ * UtilEstimation. Use estimated CPU utilization.
			
 
				+ */
			
 
				+SCHED_FEAT(UTIL_EST, true)
			
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -1,23 +1,14 @@
 
				 /*
			
 
				- * Generic entry point for the idle threads
			
 
				+ * Generic entry points for the idle threads and
			
 
				+ * implementation of the idle task scheduling class.
			
 
				+ *
			
 
				+ * (NOTE: these are not related to SCHED_IDLE batch scheduled
			
 
				+ *        tasks which are handled in sched/fair.c )
			
 
				  */
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/sched/idle.h>
			
 
				-#include <linux/cpu.h>
			
 
				-#include <linux/cpuidle.h>
			
 
				-#include <linux/cpuhotplug.h>
			
 
				-#include <linux/tick.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include <linux/stackprotector.h>
			
 
				-#include <linux/suspend.h>
			
 
				-#include <linux/livepatch.h>
			
 
				-
			
 
				-#include <asm/tlb.h>
			
 
				+#include "sched.h"
			
 
				 
			
 
				 #include <trace/events/power.h>
			
 
				 
			
 
				-#include "sched.h"
			
 
				-
			
 
				 /* Linker adds these: start and end of __cpuidle functions */
			
 
				 extern char __cpuidle_text_start[], __cpuidle_text_end[];
			
 
				 
			
@@ -46,6 +37,7 @@ void cpu_idle_poll_ctrl(bool enable)
 
				 static int __init cpu_idle_poll_setup(char *__unused)
			
 
				 {
			
 
				 	cpu_idle_force_poll = 1;
			
 
				+
			
 
				 	return 1;
			
 
				 }
			
 
				 __setup("nohlt", cpu_idle_poll_setup);
			
@@ -53,6 +45,7 @@ __setup("nohlt", cpu_idle_poll_setup);
 
				 static int __init cpu_idle_nopoll_setup(char *__unused)
			
 
				 {
			
 
				 	cpu_idle_force_poll = 0;
			
 
				+
			
 
				 	return 1;
			
 
				 }
			
 
				 __setup("hlt", cpu_idle_nopoll_setup);
			
@@ -64,12 +57,14 @@ static noinline int __cpuidle cpu_idle_poll(void)
 
				 	trace_cpu_idle_rcuidle(0, smp_processor_id());
			
 
				 	local_irq_enable();
			
 
				 	stop_critical_timings();
			
 
				+
			
 
				 	while (!tif_need_resched() &&
			
 
				 		(cpu_idle_force_poll || tick_check_broadcast_expired()))
			
 
				 		cpu_relax();
			
 
				 	start_critical_timings();
			
 
				 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
			
 
				 	rcu_idle_exit();
			
 
				+
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
@@ -332,8 +327,8 @@ void cpu_startup_entry(enum cpuhp_state state)
 
				 {
			
 
				 	/*
			
 
				 	 * This #ifdef needs to die, but it's too late in the cycle to
			
 
				-	 * make this generic (arm and sh have never invoked the canary
			
 
				-	 * init for the non boot cpus!). Will be fixed in 3.11
			
 
				+	 * make this generic (ARM and SH have never invoked the canary
			
 
				+	 * init for the non boot CPUs!). Will be fixed in 3.11
			
 
				 	 */
			
 
				 #ifdef CONFIG_X86
			
 
				 	/*
			
@@ -350,3 +345,116 @@ void cpu_startup_entry(enum cpuhp_state state)
 
				 	while (1)
			
 
				 		do_idle();
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * idle-task scheduling class.
			
 
				+ */
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+static int
			
 
				+select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
			
 
				+{
			
 
				+	return task_cpu(p); /* IDLE tasks as never migrated */
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Idle tasks are unconditionally rescheduled:
			
 
				+ */
			
 
				+static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
			
 
				+{
			
 
				+	resched_curr(rq);
			
 
				+}
			
 
				+
			
 
				+static struct task_struct *
			
 
				+pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
			
 
				+{
			
 
				+	put_prev_task(rq, prev);
			
 
				+	update_idle_core(rq);
			
 
				+	schedstat_inc(rq->sched_goidle);
			
 
				+
			
 
				+	return rq->idle;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * It is not legal to sleep in the idle task - print a warning
			
 
				+ * message if some code attempts to do it:
			
 
				+ */
			
 
				+static void
			
 
				+dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
			
 
				+{
			
 
				+	raw_spin_unlock_irq(&rq->lock);
			
 
				+	printk(KERN_ERR "bad: scheduling from the idle thread!\n");
			
 
				+	dump_stack();
			
 
				+	raw_spin_lock_irq(&rq->lock);
			
 
				+}
			
 
				+
			
 
				+static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * scheduler tick hitting a task of our scheduling class.
			
 
				+ *
			
 
				+ * NOTE: This function can be called remotely by the tick offload that
			
 
				+ * goes along full dynticks. Therefore no local assumption can be made
			
 
				+ * and everything must be accessed through the @rq and @curr passed in
			
 
				+ * parameters.
			
 
				+ */
			
 
				+static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static void set_curr_task_idle(struct rq *rq)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static void switched_to_idle(struct rq *rq, struct task_struct *p)
			
 
				+{
			
 
				+	BUG();
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
			
 
				+{
			
 
				+	BUG();
			
 
				+}
			
 
				+
			
 
				+static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void update_curr_idle(struct rq *rq)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Simple, special scheduling class for the per-CPU idle tasks:
			
 
				+ */
			
 
				+const struct sched_class idle_sched_class = {
			
 
				+	/* .next is NULL */
			
 
				+	/* no enqueue/yield_task for idle tasks */
			
 
				+
			
 
				+	/* dequeue is not valid, we print a debug message there: */
			
 
				+	.dequeue_task		= dequeue_task_idle,
			
 
				+
			
 
				+	.check_preempt_curr	= check_preempt_curr_idle,
			
 
				+
			
 
				+	.pick_next_task		= pick_next_task_idle,
			
 
				+	.put_prev_task		= put_prev_task_idle,
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+	.select_task_rq		= select_task_rq_idle,
			
 
				+	.set_cpus_allowed	= set_cpus_allowed_common,
			
 
				+#endif
			
 
				+
			
 
				+	.set_curr_task          = set_curr_task_idle,
			
 
				+	.task_tick		= task_tick_idle,
			
 
				+
			
 
				+	.get_rr_interval	= get_rr_interval_idle,
			
 
				+
			
 
				+	.prio_changed		= prio_changed_idle,
			
 
				+	.switched_to		= switched_to_idle,
			
 
				+	.update_curr		= update_curr_idle,
			
 
				+};
			
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -1,110 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-#include "sched.h"
			
 
				-
			
 
				-/*
			
 
				- * idle-task scheduling class.
			
 
				- *
			
 
				- * (NOTE: these are not related to SCHED_IDLE tasks which are
			
 
				- *  handled in sched/fair.c)
			
 
				- */
			
 
				-
			
 
				-#ifdef CONFIG_SMP
			
 
				-static int
			
 
				-select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
			
 
				-{
			
 
				-	return task_cpu(p); /* IDLE tasks as never migrated */
			
 
				-}
			
 
				-#endif /* CONFIG_SMP */
			
 
				-
			
 
				-/*
			
 
				- * Idle tasks are unconditionally rescheduled:
			
 
				- */
			
 
				-static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
			
 
				-{
			
 
				-	resched_curr(rq);
			
 
				-}
			
 
				-
			
 
				-static struct task_struct *
			
 
				-pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
			
 
				-{
			
 
				-	put_prev_task(rq, prev);
			
 
				-	update_idle_core(rq);
			
 
				-	schedstat_inc(rq->sched_goidle);
			
 
				-	return rq->idle;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * It is not legal to sleep in the idle task - print a warning
			
 
				- * message if some code attempts to do it:
			
 
				- */
			
 
				-static void
			
 
				-dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
			
 
				-{
			
 
				-	raw_spin_unlock_irq(&rq->lock);
			
 
				-	printk(KERN_ERR "bad: scheduling from the idle thread!\n");
			
 
				-	dump_stack();
			
 
				-	raw_spin_lock_irq(&rq->lock);
			
 
				-}
			
 
				-
			
 
				-static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
			
 
				-{
			
 
				-	rq_last_tick_reset(rq);
			
 
				-}
			
 
				-
			
 
				-static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static void set_curr_task_idle(struct rq *rq)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static void switched_to_idle(struct rq *rq, struct task_struct *p)
			
 
				-{
			
 
				-	BUG();
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
			
 
				-{
			
 
				-	BUG();
			
 
				-}
			
 
				-
			
 
				-static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void update_curr_idle(struct rq *rq)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Simple, special scheduling class for the per-CPU idle tasks:
			
 
				- */
			
 
				-const struct sched_class idle_sched_class = {
			
 
				-	/* .next is NULL */
			
 
				-	/* no enqueue/yield_task for idle tasks */
			
 
				-
			
 
				-	/* dequeue is not valid, we print a debug message there: */
			
 
				-	.dequeue_task		= dequeue_task_idle,
			
 
				-
			
 
				-	.check_preempt_curr	= check_preempt_curr_idle,
			
 
				-
			
 
				-	.pick_next_task		= pick_next_task_idle,
			
 
				-	.put_prev_task		= put_prev_task_idle,
			
 
				-
			
 
				-#ifdef CONFIG_SMP
			
 
				-	.select_task_rq		= select_task_rq_idle,
			
 
				-	.set_cpus_allowed	= set_cpus_allowed_common,
			
 
				-#endif
			
 
				-
			
 
				-	.set_curr_task          = set_curr_task_idle,
			
 
				-	.task_tick		= task_tick_idle,
			
 
				-
			
 
				-	.get_rr_interval	= get_rr_interval_idle,
			
 
				-
			
 
				-	.prio_changed		= prio_changed_idle,
			
 
				-	.switched_to		= switched_to_idle,
			
 
				-	.update_curr		= update_curr_idle,
			
 
				-};
			
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -3,15 +3,10 @@
 
				  *  any CPU: unbound workqueues, timers, kthreads and any offloadable work.
			
 
				  *
			
 
				  * Copyright (C) 2017 Red Hat, Inc., Frederic Weisbecker
			
 
				+ * Copyright (C) 2017-2018 SUSE, Frederic Weisbecker
			
 
				  *
			
 
				  */
			
 
				-
			
 
				-#include <linux/sched/isolation.h>
			
 
				-#include <linux/tick.h>
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/static_key.h>
			
 
				-#include <linux/ctype.h>
			
 
				+#include "sched.h"
			
 
				 
			
 
				 DEFINE_STATIC_KEY_FALSE(housekeeping_overriden);
			
 
				 EXPORT_SYMBOL_GPL(housekeeping_overriden);
			
@@ -60,6 +55,9 @@ void __init housekeeping_init(void)
 
				 
			
 
				 	static_branch_enable(&housekeeping_overriden);
			
 
				 
			
 
				+	if (housekeeping_flags & HK_FLAG_TICK)
			
 
				+		sched_tick_offload_init();
			
 
				+
			
 
				 	/* We need at least one CPU to handle housekeeping work */
			
 
				 	WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
			
 
				 }
			
@@ -119,7 +117,7 @@ static int __init housekeeping_nohz_full_setup(char *str)
 
				 {
			
 
				 	unsigned int flags;
			
 
				 
			
 
				-	flags = HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC;
			
 
				+	flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC;
			
 
				 
			
 
				 	return housekeeping_setup(str, flags);
			
 
				 }
			
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -6,10 +6,6 @@
 
				  * figure. Its a silly number but people think its important. We go through
			
 
				  * great pains to make it work on big machines and tickless kernels.
			
 
				  */
			
 
				-
			
 
				-#include <linux/export.h>
			
 
				-#include <linux/sched/loadavg.h>
			
 
				-
			
 
				 #include "sched.h"
			
 
				 
			
 
				 /*
			
@@ -32,29 +28,29 @@
 
				  * Due to a number of reasons the above turns in the mess below:
			
 
				  *
			
 
				  *  - for_each_possible_cpu() is prohibitively expensive on machines with
			
 
				- *    serious number of cpus, therefore we need to take a distributed approach
			
 
				+ *    serious number of CPUs, therefore we need to take a distributed approach
			
 
				  *    to calculating nr_active.
			
 
				  *
			
 
				  *        \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0
			
 
				  *                      = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) }
			
 
				  *
			
 
				  *    So assuming nr_active := 0 when we start out -- true per definition, we
			
 
				- *    can simply take per-cpu deltas and fold those into a global accumulate
			
 
				+ *    can simply take per-CPU deltas and fold those into a global accumulate
			
 
				  *    to obtain the same result. See calc_load_fold_active().
			
 
				  *
			
 
				- *    Furthermore, in order to avoid synchronizing all per-cpu delta folding
			
 
				+ *    Furthermore, in order to avoid synchronizing all per-CPU delta folding
			
 
				  *    across the machine, we assume 10 ticks is sufficient time for every
			
 
				- *    cpu to have completed this task.
			
 
				+ *    CPU to have completed this task.
			
 
				  *
			
 
				  *    This places an upper-bound on the IRQ-off latency of the machine. Then
			
 
				  *    again, being late doesn't loose the delta, just wrecks the sample.
			
 
				  *
			
 
				- *  - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because
			
 
				- *    this would add another cross-cpu cacheline miss and atomic operation
			
 
				- *    to the wakeup path. Instead we increment on whatever cpu the task ran
			
 
				- *    when it went into uninterruptible state and decrement on whatever cpu
			
 
				+ *  - cpu_rq()->nr_uninterruptible isn't accurately tracked per-CPU because
			
 
				+ *    this would add another cross-CPU cacheline miss and atomic operation
			
 
				+ *    to the wakeup path. Instead we increment on whatever CPU the task ran
			
 
				+ *    when it went into uninterruptible state and decrement on whatever CPU
			
 
				  *    did the wakeup. This means that only the sum of nr_uninterruptible over
			
 
				- *    all cpus yields the correct result.
			
 
				+ *    all CPUs yields the correct result.
			
 
				  *
			
 
				  *  This covers the NO_HZ=n code, for extra head-aches, see the comment below.
			
 
				  */
			
@@ -115,11 +111,11 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
 
				  * Handle NO_HZ for the global load-average.
			
 
				  *
			
 
				  * Since the above described distributed algorithm to compute the global
			
 
				- * load-average relies on per-cpu sampling from the tick, it is affected by
			
 
				+ * load-average relies on per-CPU sampling from the tick, it is affected by
			
 
				  * NO_HZ.
			
 
				  *
			
 
				  * The basic idea is to fold the nr_active delta into a global NO_HZ-delta upon
			
 
				- * entering NO_HZ state such that we can include this as an 'extra' cpu delta
			
 
				+ * entering NO_HZ state such that we can include this as an 'extra' CPU delta
			
 
				  * when we read the global state.
			
 
				  *
			
 
				  * Obviously reality has to ruin such a delightfully simple scheme:
			
@@ -146,9 +142,9 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
 
				  *    busy state.
			
 
				  *
			
 
				  *    This is solved by pushing the window forward, and thus skipping the
			
 
				- *    sample, for this cpu (effectively using the NO_HZ-delta for this cpu which
			
 
				+ *    sample, for this CPU (effectively using the NO_HZ-delta for this CPU which
			
 
				  *    was in effect at the time the window opened). This also solves the issue
			
 
				- *    of having to deal with a cpu having been in NO_HZ for multiple LOAD_FREQ
			
 
				+ *    of having to deal with a CPU having been in NO_HZ for multiple LOAD_FREQ
			
 
				  *    intervals.
			
 
				  *
			
 
				  * When making the ILB scale, we should try to pull this in as well.
			
@@ -299,7 +295,7 @@ calc_load_n(unsigned long load, unsigned long exp,
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * NO_HZ can leave us missing all per-cpu ticks calling
			
 
				+ * NO_HZ can leave us missing all per-CPU ticks calling
			
 
				  * calc_load_fold_active(), but since a NO_HZ CPU folds its delta into
			
 
				  * calc_load_nohz per calc_load_nohz_start(), all we need to do is fold
			
 
				  * in the pending NO_HZ delta if our NO_HZ period crossed a load cycle boundary.
			
@@ -363,7 +359,7 @@ void calc_global_load(unsigned long ticks)
 
				 		return;
			
 
				 
			
 
				 	/*
			
 
				-	 * Fold the 'old' NO_HZ-delta to include all NO_HZ cpus.
			
 
				+	 * Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs.
			
 
				 	 */
			
 
				 	delta = calc_load_nohz_fold();
			
 
				 	if (delta)
			
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -13,32 +13,25 @@
 
				  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				  * GNU General Public License for more details.
			
 
				  */
			
 
				-
			
 
				-#include <linux/syscalls.h>
			
 
				-#include <linux/membarrier.h>
			
 
				-#include <linux/tick.h>
			
 
				-#include <linux/cpumask.h>
			
 
				-#include <linux/atomic.h>
			
 
				-
			
 
				-#include "sched.h"	/* for cpu_rq(). */
			
 
				+#include "sched.h"
			
 
				 
			
 
				 /*
			
 
				  * Bitmask made from a "or" of all commands within enum membarrier_cmd,
			
 
				  * except MEMBARRIER_CMD_QUERY.
			
 
				  */
			
 
				 #ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
			
 
				-#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK	\
			
 
				-	(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \
			
 
				+#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK			\
			
 
				+	(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE			\
			
 
				 	| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
			
 
				 #else
			
 
				 #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK	0
			
 
				 #endif
			
 
				 
			
 
				-#define MEMBARRIER_CMD_BITMASK	\
			
 
				-	(MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
			
 
				-	| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
			
 
				-	| MEMBARRIER_CMD_PRIVATE_EXPEDITED	\
			
 
				-	| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED	\
			
 
				+#define MEMBARRIER_CMD_BITMASK						\
			
 
				+	(MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED	\
			
 
				+	| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED			\
			
 
				+	| MEMBARRIER_CMD_PRIVATE_EXPEDITED				\
			
 
				+	| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED			\
			
 
				 	| MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
			
 
				 
			
 
				 static void ipi_mb(void *info)
			
@@ -85,6 +78,7 @@ static int membarrier_global_expedited(void)
 
				 		 */
			
 
				 		if (cpu == raw_smp_processor_id())
			
 
				 			continue;
			
 
				+
			
 
				 		rcu_read_lock();
			
 
				 		p = task_rcu_dereference(&cpu_rq(cpu)->curr);
			
 
				 		if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
			
@@ -188,6 +182,7 @@ static int membarrier_private_expedited(int flags)
 
				 	 * rq->curr modification in scheduler.
			
 
				 	 */
			
 
				 	smp_mb();	/* exit from system call is not a mb */
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -219,6 +214,7 @@ static int membarrier_register_global_expedited(void)
 
				 	}
			
 
				 	atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
			
 
				 		  &mm->membarrier_state);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -253,6 +249,7 @@ static int membarrier_register_private_expedited(int flags)
 
				 		synchronize_sched();
			
 
				 	}
			
 
				 	atomic_or(state, &mm->membarrier_state);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -3,12 +3,8 @@
 
				  * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
			
 
				  * policies)
			
 
				  */
			
 
				-
			
 
				 #include "sched.h"
			
 
				 
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/irq_work.h>
			
 
				-
			
 
				 int sched_rr_timeslice = RR_TIMESLICE;
			
 
				 int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
			
 
				 
			
@@ -359,7 +355,7 @@ static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
 
				 static void push_rt_tasks(struct rq *);
			
 
				 static void pull_rt_task(struct rq *);
			
 
				 
			
 
				-static inline void queue_push_tasks(struct rq *rq)
			
 
				+static inline void rt_queue_push_tasks(struct rq *rq)
			
 
				 {
			
 
				 	if (!has_pushable_tasks(rq))
			
 
				 		return;
			
@@ -367,7 +363,7 @@ static inline void queue_push_tasks(struct rq *rq)
 
				 	queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
			
 
				 }
			
 
				 
			
 
				-static inline void queue_pull_task(struct rq *rq)
			
 
				+static inline void rt_queue_pull_task(struct rq *rq)
			
 
				 {
			
 
				 	queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
			
 
				 }
			
@@ -425,7 +421,7 @@ static inline void pull_rt_task(struct rq *this_rq)
 
				 {
			
 
				 }
			
 
				 
			
 
				-static inline void queue_push_tasks(struct rq *rq)
			
 
				+static inline void rt_queue_push_tasks(struct rq *rq)
			
 
				 {
			
 
				 }
			
 
				 #endif /* CONFIG_SMP */
			
@@ -961,9 +957,6 @@ static void update_curr_rt(struct rq *rq)
 
				 	if (unlikely((s64)delta_exec <= 0))
			
 
				 		return;
			
 
				 
			
 
				-	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
			
 
				-	cpufreq_update_util(rq, SCHED_CPUFREQ_RT);
			
 
				-
			
 
				 	schedstat_set(curr->se.statistics.exec_max,
			
 
				 		      max(curr->se.statistics.exec_max, delta_exec));
			
 
				 
			
@@ -1005,6 +998,9 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
 
				 
			
 
				 	sub_nr_running(rq, rt_rq->rt_nr_running);
			
 
				 	rt_rq->rt_queued = 0;
			
 
				+
			
 
				+	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
			
 
				+	cpufreq_update_util(rq, 0);
			
 
				 }
			
 
				 
			
 
				 static void
			
@@ -1021,6 +1017,9 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
 
				 
			
 
				 	add_nr_running(rq, rt_rq->rt_nr_running);
			
 
				 	rt_rq->rt_queued = 1;
			
 
				+
			
 
				+	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
			
 
				+	cpufreq_update_util(rq, 0);
			
 
				 }
			
 
				 
			
 
				 #if defined CONFIG_SMP
			
@@ -1453,9 +1452,9 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 
				 		return;
			
 
				 
			
 
				 	/*
			
 
				-	 * There appears to be other cpus that can accept
			
 
				-	 * current and none to run 'p', so lets reschedule
			
 
				-	 * to try and push current away:
			
 
				+	 * There appear to be other CPUs that can accept
			
 
				+	 * the current task but none can run 'p', so lets reschedule
			
 
				+	 * to try and push the current task away:
			
 
				 	 */
			
 
				 	requeue_task_rt(rq, p, 1);
			
 
				 	resched_curr(rq);
			
@@ -1569,7 +1568,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 
				 	/* The running task is never eligible for pushing */
			
 
				 	dequeue_pushable_task(rq, p);
			
 
				 
			
 
				-	queue_push_tasks(rq);
			
 
				+	rt_queue_push_tasks(rq);
			
 
				 
			
 
				 	return p;
			
 
				 }
			
@@ -1596,12 +1595,13 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 
				 	if (!task_running(rq, p) &&
			
 
				 	    cpumask_test_cpu(cpu, &p->cpus_allowed))
			
 
				 		return 1;
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * Return the highest pushable rq's task, which is suitable to be executed
			
 
				- * on the cpu, NULL otherwise
			
 
				+ * on the CPU, NULL otherwise
			
 
				  */
			
 
				 static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
			
 
				 {
			
@@ -1639,11 +1639,11 @@ static int find_lowest_rq(struct task_struct *task)
 
				 		return -1; /* No targets found */
			
 
				 
			
 
				 	/*
			
 
				-	 * At this point we have built a mask of cpus representing the
			
 
				+	 * At this point we have built a mask of CPUs representing the
			
 
				 	 * lowest priority tasks in the system.  Now we want to elect
			
 
				 	 * the best one based on our affinity and topology.
			
 
				 	 *
			
 
				-	 * We prioritize the last cpu that the task executed on since
			
 
				+	 * We prioritize the last CPU that the task executed on since
			
 
				 	 * it is most likely cache-hot in that location.
			
 
				 	 */
			
 
				 	if (cpumask_test_cpu(cpu, lowest_mask))
			
@@ -1651,7 +1651,7 @@ static int find_lowest_rq(struct task_struct *task)
 
				 
			
 
				 	/*
			
 
				 	 * Otherwise, we consult the sched_domains span maps to figure
			
 
				-	 * out which cpu is logically closest to our hot cache data.
			
 
				+	 * out which CPU is logically closest to our hot cache data.
			
 
				 	 */
			
 
				 	if (!cpumask_test_cpu(this_cpu, lowest_mask))
			
 
				 		this_cpu = -1; /* Skip this_cpu opt if not among lowest */
			
@@ -1692,6 +1692,7 @@ static int find_lowest_rq(struct task_struct *task)
 
				 	cpu = cpumask_any(lowest_mask);
			
 
				 	if (cpu < nr_cpu_ids)
			
 
				 		return cpu;
			
 
				+
			
 
				 	return -1;
			
 
				 }
			
 
				 
			
@@ -1827,7 +1828,7 @@ retry:
 
				 			 * The task hasn't migrated, and is still the next
			
 
				 			 * eligible task, but we failed to find a run-queue
			
 
				 			 * to push it to.  Do not retry in this case, since
			
 
				-			 * other cpus will pull from us when ready.
			
 
				+			 * other CPUs will pull from us when ready.
			
 
				 			 */
			
 
				 			goto out;
			
 
				 		}
			
@@ -1919,7 +1920,7 @@ static int rto_next_cpu(struct root_domain *rd)
 
				 	 * rt_next_cpu() will simply return the first CPU found in
			
 
				 	 * the rto_mask.
			
 
				 	 *
			
 
				-	 * If rto_next_cpu() is called with rto_cpu is a valid cpu, it
			
 
				+	 * If rto_next_cpu() is called with rto_cpu is a valid CPU, it
			
 
				 	 * will return the next CPU found in the rto_mask.
			
 
				 	 *
			
 
				 	 * If there are no more CPUs left in the rto_mask, then a check is made
			
@@ -1980,7 +1981,7 @@ static void tell_cpu_to_push(struct rq *rq)
 
				 	raw_spin_lock(&rq->rd->rto_lock);
			
 
				 
			
 
				 	/*
			
 
				-	 * The rto_cpu is updated under the lock, if it has a valid cpu
			
 
				+	 * The rto_cpu is updated under the lock, if it has a valid CPU
			
 
				 	 * then the IPI is still running and will continue due to the
			
 
				 	 * update to loop_next, and nothing needs to be done here.
			
 
				 	 * Otherwise it is finishing up and an ipi needs to be sent.
			
@@ -2105,7 +2106,7 @@ static void pull_rt_task(struct rq *this_rq)
 
				 
			
 
				 			/*
			
 
				 			 * There's a chance that p is higher in priority
			
 
				-			 * than what's currently running on its cpu.
			
 
				+			 * than what's currently running on its CPU.
			
 
				 			 * This is just that p is wakeing up and hasn't
			
 
				 			 * had a chance to schedule. We only pull
			
 
				 			 * p if it is lower in priority than the
			
@@ -2187,7 +2188,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 
				 	if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
			
 
				 		return;
			
 
				 
			
 
				-	queue_pull_task(rq);
			
 
				+	rt_queue_pull_task(rq);
			
 
				 }
			
 
				 
			
 
				 void __init init_sched_rt_class(void)
			
@@ -2218,7 +2219,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 
				 	if (task_on_rq_queued(p) && rq->curr != p) {
			
 
				 #ifdef CONFIG_SMP
			
 
				 		if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
			
 
				-			queue_push_tasks(rq);
			
 
				+			rt_queue_push_tasks(rq);
			
 
				 #endif /* CONFIG_SMP */
			
 
				 		if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
			
 
				 			resched_curr(rq);
			
@@ -2242,7 +2243,7 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
 
				 		 * may need to pull tasks to this runqueue.
			
 
				 		 */
			
 
				 		if (oldprio < p->prio)
			
 
				-			queue_pull_task(rq);
			
 
				+			rt_queue_pull_task(rq);
			
 
				 
			
 
				 		/*
			
 
				 		 * If there's a higher priority task waiting to run
			
@@ -2292,6 +2293,14 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 
				 static inline void watchdog(struct rq *rq, struct task_struct *p) { }
			
 
				 #endif
			
 
				 
			
 
				+/*
			
 
				+ * scheduler tick hitting a task of our scheduling class.
			
 
				+ *
			
 
				+ * NOTE: This function can be called remotely by the tick offload that
			
 
				+ * goes along full dynticks. Therefore no local assumption can be made
			
 
				+ * and everything must be accessed through the @rq and @curr passed in
			
 
				+ * parameters.
			
 
				+ */
			
 
				 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
			
 
				 {
			
 
				 	struct sched_rt_entity *rt_se = &p->rt;
			
@@ -2685,6 +2694,7 @@ int sched_rr_handler(struct ctl_table *table, int write,
 
				 			msecs_to_jiffies(sysctl_sched_rr_timeslice);
			
 
				 	}
			
 
				 	mutex_unlock(&mutex);
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,39 +1,73 @@
 
				 /* SPDX-License-Identifier: GPL-2.0 */
			
 
				-
			
 
				+/*
			
 
				+ * Scheduler internal types and methods:
			
 
				+ */
			
 
				 #include <linux/sched.h>
			
 
				+
			
 
				 #include <linux/sched/autogroup.h>
			
 
				-#include <linux/sched/sysctl.h>
			
 
				-#include <linux/sched/topology.h>
			
 
				-#include <linux/sched/rt.h>
			
 
				-#include <linux/sched/deadline.h>
			
 
				 #include <linux/sched/clock.h>
			
 
				-#include <linux/sched/wake_q.h>
			
 
				-#include <linux/sched/signal.h>
			
 
				-#include <linux/sched/numa_balancing.h>
			
 
				-#include <linux/sched/mm.h>
			
 
				+#include <linux/sched/coredump.h>
			
 
				 #include <linux/sched/cpufreq.h>
			
 
				-#include <linux/sched/stat.h>
			
 
				-#include <linux/sched/nohz.h>
			
 
				+#include <linux/sched/cputime.h>
			
 
				+#include <linux/sched/deadline.h>
			
 
				 #include <linux/sched/debug.h>
			
 
				 #include <linux/sched/hotplug.h>
			
 
				+#include <linux/sched/idle.h>
			
 
				+#include <linux/sched/init.h>
			
 
				+#include <linux/sched/isolation.h>
			
 
				+#include <linux/sched/jobctl.h>
			
 
				+#include <linux/sched/loadavg.h>
			
 
				+#include <linux/sched/mm.h>
			
 
				+#include <linux/sched/nohz.h>
			
 
				+#include <linux/sched/numa_balancing.h>
			
 
				+#include <linux/sched/prio.h>
			
 
				+#include <linux/sched/rt.h>
			
 
				+#include <linux/sched/signal.h>
			
 
				+#include <linux/sched/stat.h>
			
 
				+#include <linux/sched/sysctl.h>
			
 
				 #include <linux/sched/task.h>
			
 
				 #include <linux/sched/task_stack.h>
			
 
				-#include <linux/sched/cputime.h>
			
 
				-#include <linux/sched/init.h>
			
 
				+#include <linux/sched/topology.h>
			
 
				+#include <linux/sched/user.h>
			
 
				+#include <linux/sched/wake_q.h>
			
 
				+#include <linux/sched/xacct.h>
			
 
				+
			
 
				+#include <uapi/linux/sched/types.h>
			
 
				 
			
 
				-#include <linux/u64_stats_sync.h>
			
 
				-#include <linux/kernel_stat.h>
			
 
				 #include <linux/binfmts.h>
			
 
				-#include <linux/mutex.h>
			
 
				-#include <linux/spinlock.h>
			
 
				+#include <linux/blkdev.h>
			
 
				+#include <linux/compat.h>
			
 
				+#include <linux/context_tracking.h>
			
 
				+#include <linux/cpufreq.h>
			
 
				+#include <linux/cpuidle.h>
			
 
				+#include <linux/cpuset.h>
			
 
				+#include <linux/ctype.h>
			
 
				+#include <linux/debugfs.h>
			
 
				+#include <linux/delayacct.h>
			
 
				+#include <linux/init_task.h>
			
 
				+#include <linux/kprobes.h>
			
 
				+#include <linux/kthread.h>
			
 
				+#include <linux/membarrier.h>
			
 
				+#include <linux/migrate.h>
			
 
				+#include <linux/mmu_context.h>
			
 
				+#include <linux/nmi.h>
			
 
				+#include <linux/proc_fs.h>
			
 
				+#include <linux/prefetch.h>
			
 
				+#include <linux/profile.h>
			
 
				+#include <linux/rcupdate_wait.h>
			
 
				+#include <linux/security.h>
			
 
				+#include <linux/stackprotector.h>
			
 
				 #include <linux/stop_machine.h>
			
 
				-#include <linux/irq_work.h>
			
 
				-#include <linux/tick.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/cgroup.h>
			
 
				+#include <linux/suspend.h>
			
 
				+#include <linux/swait.h>
			
 
				+#include <linux/syscalls.h>
			
 
				+#include <linux/task_work.h>
			
 
				+#include <linux/tsacct_kern.h>
			
 
				+
			
 
				+#include <asm/tlb.h>
			
 
				 
			
 
				 #ifdef CONFIG_PARAVIRT
			
 
				-#include <asm/paravirt.h>
			
 
				+# include <asm/paravirt.h>
			
 
				 #endif
			
 
				 
			
 
				 #include "cpupri.h"
			
@@ -79,11 +113,11 @@ static inline void cpu_load_update_active(struct rq *this_rq) { }
 
				  * and does not change the user-interface for setting shares/weights.
			
 
				  *
			
 
				  * We increase resolution only if we have enough bits to allow this increased
			
 
				- * resolution (i.e. 64bit). The costs for increasing resolution when 32bit are
			
 
				- * pretty high and the returns do not justify the increased costs.
			
 
				+ * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
			
 
				+ * are pretty high and the returns do not justify the increased costs.
			
 
				  *
			
 
				- * Really only required when CONFIG_FAIR_GROUP_SCHED is also set, but to
			
 
				- * increase coverage and consistency always enable it on 64bit platforms.
			
 
				+ * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
			
 
				+ * increase coverage and consistency always enable it on 64-bit platforms.
			
 
				  */
			
 
				 #ifdef CONFIG_64BIT
			
 
				 # define NICE_0_LOAD_SHIFT	(SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
			
@@ -111,16 +145,12 @@ static inline void cpu_load_update_active(struct rq *this_rq) { }
 
				  * 10 -> just above 1us
			
 
				  * 9  -> just above 0.5us
			
 
				  */
			
 
				-#define DL_SCALE (10)
			
 
				+#define DL_SCALE		10
			
 
				 
			
 
				 /*
			
 
				- * These are the 'tuning knobs' of the scheduler:
			
 
				+ * Single value that denotes runtime == period, ie unlimited time.
			
 
				  */
			
 
				-
			
 
				-/*
			
 
				- * single value that denotes runtime == period, ie unlimited time.
			
 
				- */
			
 
				-#define RUNTIME_INF	((u64)~0ULL)
			
 
				+#define RUNTIME_INF		((u64)~0ULL)
			
 
				 
			
 
				 static inline int idle_policy(int policy)
			
 
				 {
			
@@ -235,9 +265,9 @@ void __dl_clear_params(struct task_struct *p);
 
				  * control.
			
 
				  */
			
 
				 struct dl_bandwidth {
			
 
				-	raw_spinlock_t dl_runtime_lock;
			
 
				-	u64 dl_runtime;
			
 
				-	u64 dl_period;
			
 
				+	raw_spinlock_t		dl_runtime_lock;
			
 
				+	u64			dl_runtime;
			
 
				+	u64			dl_period;
			
 
				 };
			
 
				 
			
 
				 static inline int dl_bandwidth_enabled(void)
			
@@ -246,8 +276,9 @@ static inline int dl_bandwidth_enabled(void)
 
				 }
			
 
				 
			
 
				 struct dl_bw {
			
 
				-	raw_spinlock_t lock;
			
 
				-	u64 bw, total_bw;
			
 
				+	raw_spinlock_t		lock;
			
 
				+	u64			bw;
			
 
				+	u64			total_bw;
			
 
				 };
			
 
				 
			
 
				 static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
			
@@ -273,20 +304,17 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
 
				 	       dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
			
 
				 }
			
 
				 
			
 
				-void dl_change_utilization(struct task_struct *p, u64 new_bw);
			
 
				+extern void dl_change_utilization(struct task_struct *p, u64 new_bw);
			
 
				 extern void init_dl_bw(struct dl_bw *dl_b);
			
 
				-extern int sched_dl_global_validate(void);
			
 
				+extern int  sched_dl_global_validate(void);
			
 
				 extern void sched_dl_do_global(void);
			
 
				-extern int sched_dl_overflow(struct task_struct *p, int policy,
			
 
				-			     const struct sched_attr *attr);
			
 
				+extern int  sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr);
			
 
				 extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
			
 
				 extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
			
 
				 extern bool __checkparam_dl(const struct sched_attr *attr);
			
 
				 extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
			
 
				-extern int dl_task_can_attach(struct task_struct *p,
			
 
				-			      const struct cpumask *cs_cpus_allowed);
			
 
				-extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
			
 
				-					const struct cpumask *trial);
			
 
				+extern int  dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
			
 
				+extern int  dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
			
 
				 extern bool dl_cpu_busy(unsigned int cpu);
			
 
				 
			
 
				 #ifdef CONFIG_CGROUP_SCHED
			
@@ -300,32 +328,36 @@ extern struct list_head task_groups;
 
				 
			
 
				 struct cfs_bandwidth {
			
 
				 #ifdef CONFIG_CFS_BANDWIDTH
			
 
				-	raw_spinlock_t lock;
			
 
				-	ktime_t period;
			
 
				-	u64 quota, runtime;
			
 
				-	s64 hierarchical_quota;
			
 
				-	u64 runtime_expires;
			
 
				-
			
 
				-	int idle, period_active;
			
 
				-	struct hrtimer period_timer, slack_timer;
			
 
				-	struct list_head throttled_cfs_rq;
			
 
				-
			
 
				-	/* statistics */
			
 
				-	int nr_periods, nr_throttled;
			
 
				-	u64 throttled_time;
			
 
				+	raw_spinlock_t		lock;
			
 
				+	ktime_t			period;
			
 
				+	u64			quota;
			
 
				+	u64			runtime;
			
 
				+	s64			hierarchical_quota;
			
 
				+	u64			runtime_expires;
			
 
				+
			
 
				+	int			idle;
			
 
				+	int			period_active;
			
 
				+	struct hrtimer		period_timer;
			
 
				+	struct hrtimer		slack_timer;
			
 
				+	struct list_head	throttled_cfs_rq;
			
 
				+
			
 
				+	/* Statistics: */
			
 
				+	int			nr_periods;
			
 
				+	int			nr_throttled;
			
 
				+	u64			throttled_time;
			
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-/* task group related information */
			
 
				+/* Task group related information */
			
 
				 struct task_group {
			
 
				 	struct cgroup_subsys_state css;
			
 
				 
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				-	/* schedulable entities of this group on each cpu */
			
 
				-	struct sched_entity **se;
			
 
				-	/* runqueue "owned" by this group on each cpu */
			
 
				-	struct cfs_rq **cfs_rq;
			
 
				-	unsigned long shares;
			
 
				+	/* schedulable entities of this group on each CPU */
			
 
				+	struct sched_entity	**se;
			
 
				+	/* runqueue "owned" by this group on each CPU */
			
 
				+	struct cfs_rq		**cfs_rq;
			
 
				+	unsigned long		shares;
			
 
				 
			
 
				 #ifdef	CONFIG_SMP
			
 
				 	/*
			
@@ -333,29 +365,29 @@ struct task_group {
 
				 	 * it in its own cacheline separated from the fields above which
			
 
				 	 * will also be accessed at each tick.
			
 
				 	 */
			
 
				-	atomic_long_t load_avg ____cacheline_aligned;
			
 
				+	atomic_long_t		load_avg ____cacheline_aligned;
			
 
				 #endif
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_RT_GROUP_SCHED
			
 
				-	struct sched_rt_entity **rt_se;
			
 
				-	struct rt_rq **rt_rq;
			
 
				+	struct sched_rt_entity	**rt_se;
			
 
				+	struct rt_rq		**rt_rq;
			
 
				 
			
 
				-	struct rt_bandwidth rt_bandwidth;
			
 
				+	struct rt_bandwidth	rt_bandwidth;
			
 
				 #endif
			
 
				 
			
 
				-	struct rcu_head rcu;
			
 
				-	struct list_head list;
			
 
				+	struct rcu_head		rcu;
			
 
				+	struct list_head	list;
			
 
				 
			
 
				-	struct task_group *parent;
			
 
				-	struct list_head siblings;
			
 
				-	struct list_head children;
			
 
				+	struct task_group	*parent;
			
 
				+	struct list_head	siblings;
			
 
				+	struct list_head	children;
			
 
				 
			
 
				 #ifdef CONFIG_SCHED_AUTOGROUP
			
 
				-	struct autogroup *autogroup;
			
 
				+	struct autogroup	*autogroup;
			
 
				 #endif
			
 
				 
			
 
				-	struct cfs_bandwidth cfs_bandwidth;
			
 
				+	struct cfs_bandwidth	cfs_bandwidth;
			
 
				 };
			
 
				 
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
@@ -369,8 +401,8 @@ struct task_group {
 
				  * (The default weight is 1024 - so there's no practical
			
 
				  *  limitation from this.)
			
 
				  */
			
 
				-#define MIN_SHARES	(1UL <<  1)
			
 
				-#define MAX_SHARES	(1UL << 18)
			
 
				+#define MIN_SHARES		(1UL <<  1)
			
 
				+#define MAX_SHARES		(1UL << 18)
			
 
				 #endif
			
 
				 
			
 
				 typedef int (*tg_visitor)(struct task_group *, void *);
			
@@ -443,35 +475,39 @@ struct cfs_bandwidth { };
 
				 
			
 
				 /* CFS-related fields in a runqueue */
			
 
				 struct cfs_rq {
			
 
				-	struct load_weight load;
			
 
				-	unsigned long runnable_weight;
			
 
				-	unsigned int nr_running, h_nr_running;
			
 
				+	struct load_weight	load;
			
 
				+	unsigned long		runnable_weight;
			
 
				+	unsigned int		nr_running;
			
 
				+	unsigned int		h_nr_running;
			
 
				 
			
 
				-	u64 exec_clock;
			
 
				-	u64 min_vruntime;
			
 
				+	u64			exec_clock;
			
 
				+	u64			min_vruntime;
			
 
				 #ifndef CONFIG_64BIT
			
 
				-	u64 min_vruntime_copy;
			
 
				+	u64			min_vruntime_copy;
			
 
				 #endif
			
 
				 
			
 
				-	struct rb_root_cached tasks_timeline;
			
 
				+	struct rb_root_cached	tasks_timeline;
			
 
				 
			
 
				 	/*
			
 
				 	 * 'curr' points to currently running entity on this cfs_rq.
			
 
				 	 * It is set to NULL otherwise (i.e when none are currently running).
			
 
				 	 */
			
 
				-	struct sched_entity *curr, *next, *last, *skip;
			
 
				+	struct sched_entity	*curr;
			
 
				+	struct sched_entity	*next;
			
 
				+	struct sched_entity	*last;
			
 
				+	struct sched_entity	*skip;
			
 
				 
			
 
				 #ifdef	CONFIG_SCHED_DEBUG
			
 
				-	unsigned int nr_spread_over;
			
 
				+	unsigned int		nr_spread_over;
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 	/*
			
 
				 	 * CFS load tracking
			
 
				 	 */
			
 
				-	struct sched_avg avg;
			
 
				+	struct sched_avg	avg;
			
 
				 #ifndef CONFIG_64BIT
			
 
				-	u64 load_last_update_time_copy;
			
 
				+	u64			load_last_update_time_copy;
			
 
				 #endif
			
 
				 	struct {
			
 
				 		raw_spinlock_t	lock ____cacheline_aligned;
			
@@ -482,9 +518,9 @@ struct cfs_rq {
 
				 	} removed;
			
 
				 
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				-	unsigned long tg_load_avg_contrib;
			
 
				-	long propagate;
			
 
				-	long prop_runnable_sum;
			
 
				+	unsigned long		tg_load_avg_contrib;
			
 
				+	long			propagate;
			
 
				+	long			prop_runnable_sum;
			
 
				 
			
 
				 	/*
			
 
				 	 *   h_load = weight * f(tg)
			
@@ -492,36 +528,38 @@ struct cfs_rq {
 
				 	 * Where f(tg) is the recursive weight fraction assigned to
			
 
				 	 * this group.
			
 
				 	 */
			
 
				-	unsigned long h_load;
			
 
				-	u64 last_h_load_update;
			
 
				-	struct sched_entity *h_load_next;
			
 
				+	unsigned long		h_load;
			
 
				+	u64			last_h_load_update;
			
 
				+	struct sched_entity	*h_load_next;
			
 
				 #endif /* CONFIG_FAIR_GROUP_SCHED */
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				-	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */
			
 
				+	struct rq		*rq;	/* CPU runqueue to which this cfs_rq is attached */
			
 
				 
			
 
				 	/*
			
 
				 	 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
			
 
				 	 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
			
 
				 	 * (like users, containers etc.)
			
 
				 	 *
			
 
				-	 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
			
 
				-	 * list is used during load balance.
			
 
				+	 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU.
			
 
				+	 * This list is used during load balance.
			
 
				 	 */
			
 
				-	int on_list;
			
 
				-	struct list_head leaf_cfs_rq_list;
			
 
				-	struct task_group *tg;	/* group that "owns" this runqueue */
			
 
				+	int			on_list;
			
 
				+	struct list_head	leaf_cfs_rq_list;
			
 
				+	struct task_group	*tg;	/* group that "owns" this runqueue */
			
 
				 
			
 
				 #ifdef CONFIG_CFS_BANDWIDTH
			
 
				-	int runtime_enabled;
			
 
				-	u64 runtime_expires;
			
 
				-	s64 runtime_remaining;
			
 
				-
			
 
				-	u64 throttled_clock, throttled_clock_task;
			
 
				-	u64 throttled_clock_task_time;
			
 
				-	int throttled, throttle_count;
			
 
				-	struct list_head throttled_list;
			
 
				+	int			runtime_enabled;
			
 
				+	u64			runtime_expires;
			
 
				+	s64			runtime_remaining;
			
 
				+
			
 
				+	u64			throttled_clock;
			
 
				+	u64			throttled_clock_task;
			
 
				+	u64			throttled_clock_task_time;
			
 
				+	int			throttled;
			
 
				+	int			throttle_count;
			
 
				+	struct list_head	throttled_list;
			
 
				 #endif /* CONFIG_CFS_BANDWIDTH */
			
 
				 #endif /* CONFIG_FAIR_GROUP_SCHED */
			
 
				 };
			
@@ -538,45 +576,45 @@ static inline int rt_bandwidth_enabled(void)
 
				 
			
 
				 /* Real-Time classes' related field in a runqueue: */
			
 
				 struct rt_rq {
			
 
				-	struct rt_prio_array active;
			
 
				-	unsigned int rt_nr_running;
			
 
				-	unsigned int rr_nr_running;
			
 
				+	struct rt_prio_array	active;
			
 
				+	unsigned int		rt_nr_running;
			
 
				+	unsigned int		rr_nr_running;
			
 
				 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
			
 
				 	struct {
			
 
				-		int curr; /* highest queued rt task prio */
			
 
				+		int		curr; /* highest queued rt task prio */
			
 
				 #ifdef CONFIG_SMP
			
 
				-		int next; /* next highest */
			
 
				+		int		next; /* next highest */
			
 
				 #endif
			
 
				 	} highest_prio;
			
 
				 #endif
			
 
				 #ifdef CONFIG_SMP
			
 
				-	unsigned long rt_nr_migratory;
			
 
				-	unsigned long rt_nr_total;
			
 
				-	int overloaded;
			
 
				-	struct plist_head pushable_tasks;
			
 
				+	unsigned long		rt_nr_migratory;
			
 
				+	unsigned long		rt_nr_total;
			
 
				+	int			overloaded;
			
 
				+	struct plist_head	pushable_tasks;
			
 
				 #endif /* CONFIG_SMP */
			
 
				-	int rt_queued;
			
 
				+	int			rt_queued;
			
 
				 
			
 
				-	int rt_throttled;
			
 
				-	u64 rt_time;
			
 
				-	u64 rt_runtime;
			
 
				+	int			rt_throttled;
			
 
				+	u64			rt_time;
			
 
				+	u64			rt_runtime;
			
 
				 	/* Nests inside the rq lock: */
			
 
				-	raw_spinlock_t rt_runtime_lock;
			
 
				+	raw_spinlock_t		rt_runtime_lock;
			
 
				 
			
 
				 #ifdef CONFIG_RT_GROUP_SCHED
			
 
				-	unsigned long rt_nr_boosted;
			
 
				+	unsigned long		rt_nr_boosted;
			
 
				 
			
 
				-	struct rq *rq;
			
 
				-	struct task_group *tg;
			
 
				+	struct rq		*rq;
			
 
				+	struct task_group	*tg;
			
 
				 #endif
			
 
				 };
			
 
				 
			
 
				 /* Deadline class' related fields in a runqueue */
			
 
				 struct dl_rq {
			
 
				 	/* runqueue is an rbtree, ordered by deadline */
			
 
				-	struct rb_root_cached root;
			
 
				+	struct rb_root_cached	root;
			
 
				 
			
 
				-	unsigned long dl_nr_running;
			
 
				+	unsigned long		dl_nr_running;
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 	/*
			
@@ -586,28 +624,28 @@ struct dl_rq {
 
				 	 * should migrate somewhere else.
			
 
				 	 */
			
 
				 	struct {
			
 
				-		u64 curr;
			
 
				-		u64 next;
			
 
				+		u64		curr;
			
 
				+		u64		next;
			
 
				 	} earliest_dl;
			
 
				 
			
 
				-	unsigned long dl_nr_migratory;
			
 
				-	int overloaded;
			
 
				+	unsigned long		dl_nr_migratory;
			
 
				+	int			overloaded;
			
 
				 
			
 
				 	/*
			
 
				 	 * Tasks on this rq that can be pushed away. They are kept in
			
 
				 	 * an rb-tree, ordered by tasks' deadlines, with caching
			
 
				 	 * of the leftmost (earliest deadline) element.
			
 
				 	 */
			
 
				-	struct rb_root_cached pushable_dl_tasks_root;
			
 
				+	struct rb_root_cached	pushable_dl_tasks_root;
			
 
				 #else
			
 
				-	struct dl_bw dl_bw;
			
 
				+	struct dl_bw		dl_bw;
			
 
				 #endif
			
 
				 	/*
			
 
				 	 * "Active utilization" for this runqueue: increased when a
			
 
				 	 * task wakes up (becomes TASK_RUNNING) and decreased when a
			
 
				 	 * task blocks
			
 
				 	 */
			
 
				-	u64 running_bw;
			
 
				+	u64			running_bw;
			
 
				 
			
 
				 	/*
			
 
				 	 * Utilization of the tasks "assigned" to this runqueue (including
			
@@ -618,14 +656,14 @@ struct dl_rq {
 
				 	 * This is needed to compute the "inactive utilization" for the
			
 
				 	 * runqueue (inactive utilization = this_bw - running_bw).
			
 
				 	 */
			
 
				-	u64 this_bw;
			
 
				-	u64 extra_bw;
			
 
				+	u64			this_bw;
			
 
				+	u64			extra_bw;
			
 
				 
			
 
				 	/*
			
 
				 	 * Inverse of the fraction of CPU utilization that can be reclaimed
			
 
				 	 * by the GRUB algorithm.
			
 
				 	 */
			
 
				-	u64 bw_ratio;
			
 
				+	u64			bw_ratio;
			
 
				 };
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
@@ -638,51 +676,51 @@ static inline bool sched_asym_prefer(int a, int b)
 
				 /*
			
 
				  * We add the notion of a root-domain which will be used to define per-domain
			
 
				  * variables. Each exclusive cpuset essentially defines an island domain by
			
 
				- * fully partitioning the member cpus from any other cpuset. Whenever a new
			
 
				+ * fully partitioning the member CPUs from any other cpuset. Whenever a new
			
 
				  * exclusive cpuset is created, we also create and attach a new root-domain
			
 
				  * object.
			
 
				  *
			
 
				  */
			
 
				 struct root_domain {
			
 
				-	atomic_t refcount;
			
 
				-	atomic_t rto_count;
			
 
				-	struct rcu_head rcu;
			
 
				-	cpumask_var_t span;
			
 
				-	cpumask_var_t online;
			
 
				+	atomic_t		refcount;
			
 
				+	atomic_t		rto_count;
			
 
				+	struct rcu_head		rcu;
			
 
				+	cpumask_var_t		span;
			
 
				+	cpumask_var_t		online;
			
 
				 
			
 
				 	/* Indicate more than one runnable task for any CPU */
			
 
				-	bool overload;
			
 
				+	bool			overload;
			
 
				 
			
 
				 	/*
			
 
				 	 * The bit corresponding to a CPU gets set here if such CPU has more
			
 
				 	 * than one runnable -deadline task (as it is below for RT tasks).
			
 
				 	 */
			
 
				-	cpumask_var_t dlo_mask;
			
 
				-	atomic_t dlo_count;
			
 
				-	struct dl_bw dl_bw;
			
 
				-	struct cpudl cpudl;
			
 
				+	cpumask_var_t		dlo_mask;
			
 
				+	atomic_t		dlo_count;
			
 
				+	struct dl_bw		dl_bw;
			
 
				+	struct cpudl		cpudl;
			
 
				 
			
 
				 #ifdef HAVE_RT_PUSH_IPI
			
 
				 	/*
			
 
				 	 * For IPI pull requests, loop across the rto_mask.
			
 
				 	 */
			
 
				-	struct irq_work rto_push_work;
			
 
				-	raw_spinlock_t rto_lock;
			
 
				+	struct irq_work		rto_push_work;
			
 
				+	raw_spinlock_t		rto_lock;
			
 
				 	/* These are only updated and read within rto_lock */
			
 
				-	int rto_loop;
			
 
				-	int rto_cpu;
			
 
				+	int			rto_loop;
			
 
				+	int			rto_cpu;
			
 
				 	/* These atomics are updated outside of a lock */
			
 
				-	atomic_t rto_loop_next;
			
 
				-	atomic_t rto_loop_start;
			
 
				+	atomic_t		rto_loop_next;
			
 
				+	atomic_t		rto_loop_start;
			
 
				 #endif
			
 
				 	/*
			
 
				 	 * The "RT overload" flag: it gets set if a CPU has more than
			
 
				 	 * one runnable RT task.
			
 
				 	 */
			
 
				-	cpumask_var_t rto_mask;
			
 
				-	struct cpupri cpupri;
			
 
				+	cpumask_var_t		rto_mask;
			
 
				+	struct cpupri		cpupri;
			
 
				 
			
 
				-	unsigned long max_cpu_capacity;
			
 
				+	unsigned long		max_cpu_capacity;
			
 
				 };
			
 
				 
			
 
				 extern struct root_domain def_root_domain;
			
@@ -708,41 +746,42 @@ extern void rto_push_irq_work_func(struct irq_work *work);
 
				  */
			
 
				 struct rq {
			
 
				 	/* runqueue lock: */
			
 
				-	raw_spinlock_t lock;
			
 
				+	raw_spinlock_t		lock;
			
 
				 
			
 
				 	/*
			
 
				 	 * nr_running and cpu_load should be in the same cacheline because
			
 
				 	 * remote CPUs use both these fields when doing load calculation.
			
 
				 	 */
			
 
				-	unsigned int nr_running;
			
 
				+	unsigned int		nr_running;
			
 
				 #ifdef CONFIG_NUMA_BALANCING
			
 
				-	unsigned int nr_numa_running;
			
 
				-	unsigned int nr_preferred_running;
			
 
				+	unsigned int		nr_numa_running;
			
 
				+	unsigned int		nr_preferred_running;
			
 
				 #endif
			
 
				 	#define CPU_LOAD_IDX_MAX 5
			
 
				-	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
			
 
				+	unsigned long		cpu_load[CPU_LOAD_IDX_MAX];
			
 
				 #ifdef CONFIG_NO_HZ_COMMON
			
 
				 #ifdef CONFIG_SMP
			
 
				-	unsigned long last_load_update_tick;
			
 
				+	unsigned long		last_load_update_tick;
			
 
				+	unsigned long		last_blocked_load_update_tick;
			
 
				+	unsigned int		has_blocked_load;
			
 
				 #endif /* CONFIG_SMP */
			
 
				-	unsigned long nohz_flags;
			
 
				+	unsigned int		nohz_tick_stopped;
			
 
				+	atomic_t nohz_flags;
			
 
				 #endif /* CONFIG_NO_HZ_COMMON */
			
 
				-#ifdef CONFIG_NO_HZ_FULL
			
 
				-	unsigned long last_sched_tick;
			
 
				-#endif
			
 
				-	/* capture load from *all* tasks on this cpu: */
			
 
				-	struct load_weight load;
			
 
				-	unsigned long nr_load_updates;
			
 
				-	u64 nr_switches;
			
 
				 
			
 
				-	struct cfs_rq cfs;
			
 
				-	struct rt_rq rt;
			
 
				-	struct dl_rq dl;
			
 
				+	/* capture load from *all* tasks on this CPU: */
			
 
				+	struct load_weight	load;
			
 
				+	unsigned long		nr_load_updates;
			
 
				+	u64			nr_switches;
			
 
				+
			
 
				+	struct cfs_rq		cfs;
			
 
				+	struct rt_rq		rt;
			
 
				+	struct dl_rq		dl;
			
 
				 
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				-	/* list of leaf cfs_rq on this cpu: */
			
 
				-	struct list_head leaf_cfs_rq_list;
			
 
				-	struct list_head *tmp_alone_branch;
			
 
				+	/* list of leaf cfs_rq on this CPU: */
			
 
				+	struct list_head	leaf_cfs_rq_list;
			
 
				+	struct list_head	*tmp_alone_branch;
			
 
				 #endif /* CONFIG_FAIR_GROUP_SCHED */
			
 
				 
			
 
				 	/*
			
@@ -751,94 +790,98 @@ struct rq {
 
				 	 * one CPU and if it got migrated afterwards it may decrease
			
 
				 	 * it on another CPU. Always updated under the runqueue lock:
			
 
				 	 */
			
 
				-	unsigned long nr_uninterruptible;
			
 
				+	unsigned long		nr_uninterruptible;
			
 
				 
			
 
				-	struct task_struct *curr, *idle, *stop;
			
 
				-	unsigned long next_balance;
			
 
				-	struct mm_struct *prev_mm;
			
 
				+	struct task_struct	*curr;
			
 
				+	struct task_struct	*idle;
			
 
				+	struct task_struct	*stop;
			
 
				+	unsigned long		next_balance;
			
 
				+	struct mm_struct	*prev_mm;
			
 
				 
			
 
				-	unsigned int clock_update_flags;
			
 
				-	u64 clock;
			
 
				-	u64 clock_task;
			
 
				+	unsigned int		clock_update_flags;
			
 
				+	u64			clock;
			
 
				+	u64			clock_task;
			
 
				 
			
 
				-	atomic_t nr_iowait;
			
 
				+	atomic_t		nr_iowait;
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				-	struct root_domain *rd;
			
 
				-	struct sched_domain *sd;
			
 
				+	struct root_domain	*rd;
			
 
				+	struct sched_domain	*sd;
			
 
				 
			
 
				-	unsigned long cpu_capacity;
			
 
				-	unsigned long cpu_capacity_orig;
			
 
				+	unsigned long		cpu_capacity;
			
 
				+	unsigned long		cpu_capacity_orig;
			
 
				 
			
 
				-	struct callback_head *balance_callback;
			
 
				+	struct callback_head	*balance_callback;
			
 
				+
			
 
				+	unsigned char		idle_balance;
			
 
				 
			
 
				-	unsigned char idle_balance;
			
 
				 	/* For active balancing */
			
 
				-	int active_balance;
			
 
				-	int push_cpu;
			
 
				-	struct cpu_stop_work active_balance_work;
			
 
				-	/* cpu of this runqueue: */
			
 
				-	int cpu;
			
 
				-	int online;
			
 
				+	int			active_balance;
			
 
				+	int			push_cpu;
			
 
				+	struct cpu_stop_work	active_balance_work;
			
 
				+
			
 
				+	/* CPU of this runqueue: */
			
 
				+	int			cpu;
			
 
				+	int			online;
			
 
				 
			
 
				 	struct list_head cfs_tasks;
			
 
				 
			
 
				-	u64 rt_avg;
			
 
				-	u64 age_stamp;
			
 
				-	u64 idle_stamp;
			
 
				-	u64 avg_idle;
			
 
				+	u64			rt_avg;
			
 
				+	u64			age_stamp;
			
 
				+	u64			idle_stamp;
			
 
				+	u64			avg_idle;
			
 
				 
			
 
				 	/* This is used to determine avg_idle's max value */
			
 
				-	u64 max_idle_balance_cost;
			
 
				+	u64			max_idle_balance_cost;
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
			
 
				-	u64 prev_irq_time;
			
 
				+	u64			prev_irq_time;
			
 
				 #endif
			
 
				 #ifdef CONFIG_PARAVIRT
			
 
				-	u64 prev_steal_time;
			
 
				+	u64			prev_steal_time;
			
 
				 #endif
			
 
				 #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
			
 
				-	u64 prev_steal_time_rq;
			
 
				+	u64			prev_steal_time_rq;
			
 
				 #endif
			
 
				 
			
 
				 	/* calc_load related fields */
			
 
				-	unsigned long calc_load_update;
			
 
				-	long calc_load_active;
			
 
				+	unsigned long		calc_load_update;
			
 
				+	long			calc_load_active;
			
 
				 
			
 
				 #ifdef CONFIG_SCHED_HRTICK
			
 
				 #ifdef CONFIG_SMP
			
 
				-	int hrtick_csd_pending;
			
 
				-	call_single_data_t hrtick_csd;
			
 
				+	int			hrtick_csd_pending;
			
 
				+	call_single_data_t	hrtick_csd;
			
 
				 #endif
			
 
				-	struct hrtimer hrtick_timer;
			
 
				+	struct hrtimer		hrtick_timer;
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_SCHEDSTATS
			
 
				 	/* latency stats */
			
 
				-	struct sched_info rq_sched_info;
			
 
				-	unsigned long long rq_cpu_time;
			
 
				+	struct sched_info	rq_sched_info;
			
 
				+	unsigned long long	rq_cpu_time;
			
 
				 	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
			
 
				 
			
 
				 	/* sys_sched_yield() stats */
			
 
				-	unsigned int yld_count;
			
 
				+	unsigned int		yld_count;
			
 
				 
			
 
				 	/* schedule() stats */
			
 
				-	unsigned int sched_count;
			
 
				-	unsigned int sched_goidle;
			
 
				+	unsigned int		sched_count;
			
 
				+	unsigned int		sched_goidle;
			
 
				 
			
 
				 	/* try_to_wake_up() stats */
			
 
				-	unsigned int ttwu_count;
			
 
				-	unsigned int ttwu_local;
			
 
				+	unsigned int		ttwu_count;
			
 
				+	unsigned int		ttwu_local;
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				-	struct llist_head wake_list;
			
 
				+	struct llist_head	wake_list;
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_CPU_IDLE
			
 
				 	/* Must be inspected within a rcu lock section */
			
 
				-	struct cpuidle_state *idle_state;
			
 
				+	struct cpuidle_state	*idle_state;
			
 
				 #endif
			
 
				 };
			
 
				 
			
@@ -904,9 +947,9 @@ static inline u64 __rq_clock_broken(struct rq *rq)
 
				  * one position though, because the next rq_unpin_lock() will shift it
			
 
				  * back.
			
 
				  */
			
 
				-#define RQCF_REQ_SKIP	0x01
			
 
				-#define RQCF_ACT_SKIP	0x02
			
 
				-#define RQCF_UPDATED	0x04
			
 
				+#define RQCF_REQ_SKIP		0x01
			
 
				+#define RQCF_ACT_SKIP		0x02
			
 
				+#define RQCF_UPDATED		0x04
			
 
				 
			
 
				 static inline void assert_clock_updated(struct rq *rq)
			
 
				 {
			
@@ -1059,12 +1102,12 @@ extern void sched_ttwu_pending(void);
 
				 
			
 
				 /**
			
 
				  * highest_flag_domain - Return highest sched_domain containing flag.
			
 
				- * @cpu:	The cpu whose highest level of sched domain is to
			
 
				+ * @cpu:	The CPU whose highest level of sched domain is to
			
 
				  *		be returned.
			
 
				  * @flag:	The flag to check for the highest sched_domain
			
 
				- *		for the given cpu.
			
 
				+ *		for the given CPU.
			
 
				  *
			
 
				- * Returns the highest sched_domain of a cpu which contains the given flag.
			
 
				+ * Returns the highest sched_domain of a CPU which contains the given flag.
			
 
				  */
			
 
				 static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
			
 
				 {
			
@@ -1099,30 +1142,30 @@ DECLARE_PER_CPU(struct sched_domain *, sd_numa);
 
				 DECLARE_PER_CPU(struct sched_domain *, sd_asym);
			
 
				 
			
 
				 struct sched_group_capacity {
			
 
				-	atomic_t ref;
			
 
				+	atomic_t		ref;
			
 
				 	/*
			
 
				 	 * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
			
 
				 	 * for a single CPU.
			
 
				 	 */
			
 
				-	unsigned long capacity;
			
 
				-	unsigned long min_capacity; /* Min per-CPU capacity in group */
			
 
				-	unsigned long next_update;
			
 
				-	int imbalance; /* XXX unrelated to capacity but shared group state */
			
 
				+	unsigned long		capacity;
			
 
				+	unsigned long		min_capacity;		/* Min per-CPU capacity in group */
			
 
				+	unsigned long		next_update;
			
 
				+	int			imbalance;		/* XXX unrelated to capacity but shared group state */
			
 
				 
			
 
				 #ifdef CONFIG_SCHED_DEBUG
			
 
				-	int id;
			
 
				+	int			id;
			
 
				 #endif
			
 
				 
			
 
				-	unsigned long cpumask[0]; /* balance mask */
			
 
				+	unsigned long		cpumask[0];		/* Balance mask */
			
 
				 };
			
 
				 
			
 
				 struct sched_group {
			
 
				-	struct sched_group *next;	/* Must be a circular list */
			
 
				-	atomic_t ref;
			
 
				+	struct sched_group	*next;			/* Must be a circular list */
			
 
				+	atomic_t		ref;
			
 
				 
			
 
				-	unsigned int group_weight;
			
 
				+	unsigned int		group_weight;
			
 
				 	struct sched_group_capacity *sgc;
			
 
				-	int asym_prefer_cpu;		/* cpu of highest priority in group */
			
 
				+	int			asym_prefer_cpu;	/* CPU of highest priority in group */
			
 
				 
			
 
				 	/*
			
 
				 	 * The CPUs this group covers.
			
@@ -1131,7 +1174,7 @@ struct sched_group {
 
				 	 * by attaching extra space to the end of the structure,
			
 
				 	 * depending on how many CPUs the kernel has booted up with)
			
 
				 	 */
			
 
				-	unsigned long cpumask[0];
			
 
				+	unsigned long		cpumask[0];
			
 
				 };
			
 
				 
			
 
				 static inline struct cpumask *sched_group_span(struct sched_group *sg)
			
@@ -1148,8 +1191,8 @@ static inline struct cpumask *group_balance_mask(struct sched_group *sg)
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
			
 
				- * @group: The group whose first cpu is to be returned.
			
 
				+ * group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
			
 
				+ * @group: The group whose first CPU is to be returned.
			
 
				  */
			
 
				 static inline unsigned int group_first_cpu(struct sched_group *group)
			
 
				 {
			
@@ -1349,19 +1392,12 @@ static inline int task_on_rq_migrating(struct task_struct *p)
 
				 	return p->on_rq == TASK_ON_RQ_MIGRATING;
			
 
				 }
			
 
				 
			
 
				-#ifndef prepare_arch_switch
			
 
				-# define prepare_arch_switch(next)	do { } while (0)
			
 
				-#endif
			
 
				-#ifndef finish_arch_post_lock_switch
			
 
				-# define finish_arch_post_lock_switch()	do { } while (0)
			
 
				-#endif
			
 
				-
			
 
				 /*
			
 
				  * wake flags
			
 
				  */
			
 
				-#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
			
 
				-#define WF_FORK		0x02		/* child wakeup after fork */
			
 
				-#define WF_MIGRATED	0x4		/* internal use, task got migrated */
			
 
				+#define WF_SYNC			0x01		/* Waker goes to sleep after wakeup */
			
 
				+#define WF_FORK			0x02		/* Child wakeup after fork */
			
 
				+#define WF_MIGRATED		0x4		/* Internal use, task got migrated */
			
 
				 
			
 
				 /*
			
 
				  * To aid in avoiding the subversion of "niceness" due to uneven distribution
			
@@ -1372,11 +1408,11 @@ static inline int task_on_rq_migrating(struct task_struct *p)
 
				  * slice expiry etc.
			
 
				  */
			
 
				 
			
 
				-#define WEIGHT_IDLEPRIO                3
			
 
				-#define WMULT_IDLEPRIO         1431655765
			
 
				+#define WEIGHT_IDLEPRIO		3
			
 
				+#define WMULT_IDLEPRIO		1431655765
			
 
				 
			
 
				-extern const int sched_prio_to_weight[40];
			
 
				-extern const u32 sched_prio_to_wmult[40];
			
 
				+extern const int		sched_prio_to_weight[40];
			
 
				+extern const u32		sched_prio_to_wmult[40];
			
 
				 
			
 
				 /*
			
 
				  * {de,en}queue flags:
			
@@ -1398,9 +1434,9 @@ extern const u32 sched_prio_to_wmult[40];
 
				  */
			
 
				 
			
 
				 #define DEQUEUE_SLEEP		0x01
			
 
				-#define DEQUEUE_SAVE		0x02 /* matches ENQUEUE_RESTORE */
			
 
				-#define DEQUEUE_MOVE		0x04 /* matches ENQUEUE_MOVE */
			
 
				-#define DEQUEUE_NOCLOCK		0x08 /* matches ENQUEUE_NOCLOCK */
			
 
				+#define DEQUEUE_SAVE		0x02 /* Matches ENQUEUE_RESTORE */
			
 
				+#define DEQUEUE_MOVE		0x04 /* Matches ENQUEUE_MOVE */
			
 
				+#define DEQUEUE_NOCLOCK		0x08 /* Matches ENQUEUE_NOCLOCK */
			
 
				 
			
 
				 #define ENQUEUE_WAKEUP		0x01
			
 
				 #define ENQUEUE_RESTORE		0x02
			
@@ -1422,10 +1458,10 @@ struct sched_class {
 
				 
			
 
				 	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
			
 
				 	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
			
 
				-	void (*yield_task) (struct rq *rq);
			
 
				-	bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
			
 
				+	void (*yield_task)   (struct rq *rq);
			
 
				+	bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt);
			
 
				 
			
 
				-	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
			
 
				+	void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
			
 
				 
			
 
				 	/*
			
 
				 	 * It is the responsibility of the pick_next_task() method that will
			
@@ -1435,16 +1471,16 @@ struct sched_class {
 
				 	 * May return RETRY_TASK when it finds a higher prio class has runnable
			
 
				 	 * tasks.
			
 
				 	 */
			
 
				-	struct task_struct * (*pick_next_task) (struct rq *rq,
			
 
				-						struct task_struct *prev,
			
 
				-						struct rq_flags *rf);
			
 
				-	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
			
 
				+	struct task_struct * (*pick_next_task)(struct rq *rq,
			
 
				+					       struct task_struct *prev,
			
 
				+					       struct rq_flags *rf);
			
 
				+	void (*put_prev_task)(struct rq *rq, struct task_struct *p);
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
			
 
				 	void (*migrate_task_rq)(struct task_struct *p);
			
 
				 
			
 
				-	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
			
 
				+	void (*task_woken)(struct rq *this_rq, struct task_struct *task);
			
 
				 
			
 
				 	void (*set_cpus_allowed)(struct task_struct *p,
			
 
				 				 const struct cpumask *newmask);
			
@@ -1453,31 +1489,31 @@ struct sched_class {
 
				 	void (*rq_offline)(struct rq *rq);
			
 
				 #endif
			
 
				 
			
 
				-	void (*set_curr_task) (struct rq *rq);
			
 
				-	void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
			
 
				-	void (*task_fork) (struct task_struct *p);
			
 
				-	void (*task_dead) (struct task_struct *p);
			
 
				+	void (*set_curr_task)(struct rq *rq);
			
 
				+	void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
			
 
				+	void (*task_fork)(struct task_struct *p);
			
 
				+	void (*task_dead)(struct task_struct *p);
			
 
				 
			
 
				 	/*
			
 
				 	 * The switched_from() call is allowed to drop rq->lock, therefore we
			
 
				 	 * cannot assume the switched_from/switched_to pair is serliazed by
			
 
				 	 * rq->lock. They are however serialized by p->pi_lock.
			
 
				 	 */
			
 
				-	void (*switched_from) (struct rq *this_rq, struct task_struct *task);
			
 
				-	void (*switched_to) (struct rq *this_rq, struct task_struct *task);
			
 
				+	void (*switched_from)(struct rq *this_rq, struct task_struct *task);
			
 
				+	void (*switched_to)  (struct rq *this_rq, struct task_struct *task);
			
 
				 	void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
			
 
				-			     int oldprio);
			
 
				+			      int oldprio);
			
 
				 
			
 
				-	unsigned int (*get_rr_interval) (struct rq *rq,
			
 
				-					 struct task_struct *task);
			
 
				+	unsigned int (*get_rr_interval)(struct rq *rq,
			
 
				+					struct task_struct *task);
			
 
				 
			
 
				-	void (*update_curr) (struct rq *rq);
			
 
				+	void (*update_curr)(struct rq *rq);
			
 
				 
			
 
				-#define TASK_SET_GROUP  0
			
 
				-#define TASK_MOVE_GROUP	1
			
 
				+#define TASK_SET_GROUP		0
			
 
				+#define TASK_MOVE_GROUP		1
			
 
				 
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				-	void (*task_change_group) (struct task_struct *p, int type);
			
 
				+	void (*task_change_group)(struct task_struct *p, int type);
			
 
				 #endif
			
 
				 };
			
 
				 
			
@@ -1526,6 +1562,7 @@ static inline void idle_set_state(struct rq *rq,
 
				 static inline struct cpuidle_state *idle_get_state(struct rq *rq)
			
 
				 {
			
 
				 	SCHED_WARN_ON(!rcu_read_lock_held());
			
 
				+
			
 
				 	return rq->idle_state;
			
 
				 }
			
 
				 #else
			
@@ -1564,9 +1601,9 @@ extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
 
				 extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
			
 
				 extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
			
 
				 
			
 
				-#define BW_SHIFT	20
			
 
				-#define BW_UNIT		(1 << BW_SHIFT)
			
 
				-#define RATIO_SHIFT	8
			
 
				+#define BW_SHIFT		20
			
 
				+#define BW_UNIT			(1 << BW_SHIFT)
			
 
				+#define RATIO_SHIFT		8
			
 
				 unsigned long to_ratio(u64 period, u64 runtime);
			
 
				 
			
 
				 extern void init_entity_runnable_average(struct sched_entity *se);
			
@@ -1574,6 +1611,7 @@ extern void post_init_entity_util_avg(struct sched_entity *se);
 
				 
			
 
				 #ifdef CONFIG_NO_HZ_FULL
			
 
				 extern bool sched_can_stop_tick(struct rq *rq);
			
 
				+extern int __init sched_tick_offload_init(void);
			
 
				 
			
 
				 /*
			
 
				  * Tick may be needed by tasks in the runqueue depending on their policy and
			
@@ -1598,6 +1636,7 @@ static inline void sched_update_tick_dependency(struct rq *rq)
 
				 		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
			
 
				 }
			
 
				 #else
			
 
				+static inline int sched_tick_offload_init(void) { return 0; }
			
 
				 static inline void sched_update_tick_dependency(struct rq *rq) { }
			
 
				 #endif
			
 
				 
			
@@ -1624,13 +1663,6 @@ static inline void sub_nr_running(struct rq *rq, unsigned count)
 
				 	sched_update_tick_dependency(rq);
			
 
				 }
			
 
				 
			
 
				-static inline void rq_last_tick_reset(struct rq *rq)
			
 
				-{
			
 
				-#ifdef CONFIG_NO_HZ_FULL
			
 
				-	rq->last_sched_tick = jiffies;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				 extern void update_rq_clock(struct rq *rq);
			
 
				 
			
 
				 extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
			
@@ -1821,8 +1853,8 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
 
				 /*
			
 
				  * Unfair double_lock_balance: Optimizes throughput at the expense of
			
 
				  * latency by eliminating extra atomic operations when the locks are
			
 
				- * already in proper order on entry.  This favors lower cpu-ids and will
			
 
				- * grant the double lock to lower cpus over higher ids under contention,
			
 
				+ * already in proper order on entry.  This favors lower CPU-ids and will
			
 
				+ * grant the double lock to lower CPUs over higher ids under contention,
			
 
				  * regardless of entry order into the function.
			
 
				  */
			
 
				 static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
			
@@ -1854,7 +1886,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
 
				 static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
			
 
				 {
			
 
				 	if (unlikely(!irqs_disabled())) {
			
 
				-		/* printk() doesn't work good under rq->lock */
			
 
				+		/* printk() doesn't work well under rq->lock */
			
 
				 		raw_spin_unlock(&this_rq->lock);
			
 
				 		BUG_ON(1);
			
 
				 	}
			
@@ -2005,16 +2037,19 @@ extern void cfs_bandwidth_usage_inc(void);
 
				 extern void cfs_bandwidth_usage_dec(void);
			
 
				 
			
 
				 #ifdef CONFIG_NO_HZ_COMMON
			
 
				-enum rq_nohz_flag_bits {
			
 
				-	NOHZ_TICK_STOPPED,
			
 
				-	NOHZ_BALANCE_KICK,
			
 
				-};
			
 
				+#define NOHZ_BALANCE_KICK_BIT	0
			
 
				+#define NOHZ_STATS_KICK_BIT	1
			
 
				+
			
 
				+#define NOHZ_BALANCE_KICK	BIT(NOHZ_BALANCE_KICK_BIT)
			
 
				+#define NOHZ_STATS_KICK		BIT(NOHZ_STATS_KICK_BIT)
			
 
				+
			
 
				+#define NOHZ_KICK_MASK	(NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
			
 
				 
			
 
				 #define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
			
 
				 
			
 
				-extern void nohz_balance_exit_idle(unsigned int cpu);
			
 
				+extern void nohz_balance_exit_idle(struct rq *rq);
			
 
				 #else
			
 
				-static inline void nohz_balance_exit_idle(unsigned int cpu) { }
			
 
				+static inline void nohz_balance_exit_idle(struct rq *rq) { }
			
 
				 #endif
			
 
				 
			
 
				 
			
@@ -2113,15 +2148,14 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
 
				 #endif /* CONFIG_CPU_FREQ */
			
 
				 
			
 
				 #ifdef arch_scale_freq_capacity
			
 
				-#ifndef arch_scale_freq_invariant
			
 
				-#define arch_scale_freq_invariant()	(true)
			
 
				-#endif
			
 
				-#else /* arch_scale_freq_capacity */
			
 
				-#define arch_scale_freq_invariant()	(false)
			
 
				+# ifndef arch_scale_freq_invariant
			
 
				+#  define arch_scale_freq_invariant()	true
			
 
				+# endif
			
 
				+#else
			
 
				+# define arch_scale_freq_invariant()	false
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
			
 
				-
			
 
				 static inline unsigned long cpu_util_dl(struct rq *rq)
			
 
				 {
			
 
				 	return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
			
@@ -2129,7 +2163,13 @@ static inline unsigned long cpu_util_dl(struct rq *rq)
 
				 
			
 
				 static inline unsigned long cpu_util_cfs(struct rq *rq)
			
 
				 {
			
 
				-	return rq->cfs.avg.util_avg;
			
 
				-}
			
 
				+	unsigned long util = READ_ONCE(rq->cfs.avg.util_avg);
			
 
				+
			
 
				+	if (sched_feat(UTIL_EST)) {
			
 
				+		util = max_t(unsigned long, util,
			
 
				+			     READ_ONCE(rq->cfs.avg.util_est.enqueued));
			
 
				+	}
			
 
				 
			
 
				+	return util;
			
 
				+}
			
 
				 #endif
			
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -1,14 +1,13 @@
 
				 // SPDX-License-Identifier: GPL-2.0
			
 
				-
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/fs.h>
			
 
				-#include <linux/seq_file.h>
			
 
				-#include <linux/proc_fs.h>
			
 
				-
			
 
				+/*
			
 
				+ * /proc/schedstat implementation
			
 
				+ */
			
 
				 #include "sched.h"
			
 
				 
			
 
				 /*
			
 
				- * bump this up when changing the output format or the meaning of an existing
			
 
				+ * Current schedstat API version.
			
 
				+ *
			
 
				+ * Bump this up when changing the output format or the meaning of an existing
			
 
				  * format, so that tools can adapt (or abort)
			
 
				  */
			
 
				 #define SCHEDSTAT_VERSION 15
			
@@ -78,8 +77,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
 
				  * This itererator needs some explanation.
			
 
				  * It returns 1 for the header position.
			
 
				  * This means 2 is cpu 0.
			
 
				- * In a hotplugged system some cpus, including cpu 0, may be missing so we have
			
 
				- * to use cpumask_* to iterate over the cpus.
			
 
				+ * In a hotplugged system some CPUs, including cpu 0, may be missing so we have
			
 
				+ * to use cpumask_* to iterate over the CPUs.
			
 
				  */
			
 
				 static void *schedstat_start(struct seq_file *file, loff_t *offset)
			
 
				 {
			
@@ -99,12 +98,14 @@ static void *schedstat_start(struct seq_file *file, loff_t *offset)
 
				 
			
 
				 	if (n < nr_cpu_ids)
			
 
				 		return (void *)(unsigned long)(n + 2);
			
 
				+
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				 static void *schedstat_next(struct seq_file *file, void *data, loff_t *offset)
			
 
				 {
			
 
				 	(*offset)++;
			
 
				+
			
 
				 	return schedstat_start(file, offset);
			
 
				 }
			
 
				 
			
@@ -134,6 +135,7 @@ static const struct file_operations proc_schedstat_operations = {
 
				 static int __init proc_schedstat_init(void)
			
 
				 {
			
 
				 	proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 subsys_initcall(proc_schedstat_init);
			
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -30,35 +30,29 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 
				 	if (rq)
			
 
				 		rq->rq_sched_info.run_delay += delta;
			
 
				 }
			
 
				-#define schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
			
 
				+#define   schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
			
 
				 #define __schedstat_inc(var)		do { var++; } while (0)
			
 
				-#define schedstat_inc(var)		do { if (schedstat_enabled()) { var++; } } while (0)
			
 
				+#define   schedstat_inc(var)		do { if (schedstat_enabled()) { var++; } } while (0)
			
 
				 #define __schedstat_add(var, amt)	do { var += (amt); } while (0)
			
 
				-#define schedstat_add(var, amt)		do { if (schedstat_enabled()) { var += (amt); } } while (0)
			
 
				-#define __schedstat_set(var, val)		do { var = (val); } while (0)
			
 
				-#define schedstat_set(var, val)		do { if (schedstat_enabled()) { var = (val); } } while (0)
			
 
				-#define schedstat_val(var)		(var)
			
 
				-#define schedstat_val_or_zero(var)	((schedstat_enabled()) ? (var) : 0)
			
 
				-
			
 
				-#else /* !CONFIG_SCHEDSTATS */
			
 
				-static inline void
			
 
				-rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
			
 
				-{}
			
 
				-static inline void
			
 
				-rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
			
 
				-{}
			
 
				-static inline void
			
 
				-rq_sched_info_depart(struct rq *rq, unsigned long long delta)
			
 
				-{}
			
 
				-#define schedstat_enabled()		0
			
 
				-#define __schedstat_inc(var)		do { } while (0)
			
 
				-#define schedstat_inc(var)		do { } while (0)
			
 
				-#define __schedstat_add(var, amt)	do { } while (0)
			
 
				-#define schedstat_add(var, amt)		do { } while (0)
			
 
				-#define __schedstat_set(var, val)	do { } while (0)
			
 
				-#define schedstat_set(var, val)		do { } while (0)
			
 
				-#define schedstat_val(var)		0
			
 
				-#define schedstat_val_or_zero(var)	0
			
 
				+#define   schedstat_add(var, amt)	do { if (schedstat_enabled()) { var += (amt); } } while (0)
			
 
				+#define __schedstat_set(var, val)	do { var = (val); } while (0)
			
 
				+#define   schedstat_set(var, val)	do { if (schedstat_enabled()) { var = (val); } } while (0)
			
 
				+#define   schedstat_val(var)		(var)
			
 
				+#define   schedstat_val_or_zero(var)	((schedstat_enabled()) ? (var) : 0)
			
 
				+
			
 
				+#else /* !CONFIG_SCHEDSTATS: */
			
 
				+static inline void rq_sched_info_arrive  (struct rq *rq, unsigned long long delta) { }
			
 
				+static inline void rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) { }
			
 
				+static inline void rq_sched_info_depart  (struct rq *rq, unsigned long long delta) { }
			
 
				+# define   schedstat_enabled()		0
			
 
				+# define __schedstat_inc(var)		do { } while (0)
			
 
				+# define   schedstat_inc(var)		do { } while (0)
			
 
				+# define __schedstat_add(var, amt)	do { } while (0)
			
 
				+# define   schedstat_add(var, amt)	do { } while (0)
			
 
				+# define __schedstat_set(var, val)	do { } while (0)
			
 
				+# define   schedstat_set(var, val)	do { } while (0)
			
 
				+# define   schedstat_val(var)		0
			
 
				+# define   schedstat_val_or_zero(var)	0
			
 
				 #endif /* CONFIG_SCHEDSTATS */
			
 
				 
			
 
				 #ifdef CONFIG_SCHED_INFO
			
@@ -69,9 +63,9 @@ static inline void sched_info_reset_dequeued(struct task_struct *t)
 
				 
			
 
				 /*
			
 
				  * We are interested in knowing how long it was from the *first* time a
			
 
				- * task was queued to the time that it finally hit a cpu, we call this routine
			
 
				- * from dequeue_task() to account for possible rq->clock skew across cpus. The
			
 
				- * delta taken on each cpu would annul the skew.
			
 
				+ * task was queued to the time that it finally hit a CPU, we call this routine
			
 
				+ * from dequeue_task() to account for possible rq->clock skew across CPUs. The
			
 
				+ * delta taken on each CPU would annul the skew.
			
 
				  */
			
 
				 static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t)
			
 
				 {
			
@@ -87,7 +81,7 @@ static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Called when a task finally hits the cpu.  We can now calculate how
			
 
				+ * Called when a task finally hits the CPU.  We can now calculate how
			
 
				  * long it was waiting to run.  We also note when it began so that we
			
 
				  * can keep stats on how long its timeslice is.
			
 
				  */
			
@@ -112,9 +106,10 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t)
 
				  */
			
 
				 static inline void sched_info_queued(struct rq *rq, struct task_struct *t)
			
 
				 {
			
 
				-	if (unlikely(sched_info_on()))
			
 
				+	if (unlikely(sched_info_on())) {
			
 
				 		if (!t->sched_info.last_queued)
			
 
				 			t->sched_info.last_queued = rq_clock(rq);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -127,8 +122,7 @@ static inline void sched_info_queued(struct rq *rq, struct task_struct *t)
 
				  */
			
 
				 static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
			
 
				 {
			
 
				-	unsigned long long delta = rq_clock(rq) -
			
 
				-					t->sched_info.last_arrival;
			
 
				+	unsigned long long delta = rq_clock(rq) - t->sched_info.last_arrival;
			
 
				 
			
 
				 	rq_sched_info_depart(rq, delta);
			
 
				 
			
@@ -142,11 +136,10 @@ static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
 
				  * the idle task.)  We are only called when prev != next.
			
 
				  */
			
 
				 static inline void
			
 
				-__sched_info_switch(struct rq *rq,
			
 
				-		    struct task_struct *prev, struct task_struct *next)
			
 
				+__sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
			
 
				 {
			
 
				 	/*
			
 
				-	 * prev now departs the cpu.  It's not interesting to record
			
 
				+	 * prev now departs the CPU.  It's not interesting to record
			
 
				 	 * stats about how efficient we were at scheduling the idle
			
 
				 	 * process, however.
			
 
				 	 */
			
@@ -156,18 +149,19 @@ __sched_info_switch(struct rq *rq,
 
				 	if (next != rq->idle)
			
 
				 		sched_info_arrive(rq, next);
			
 
				 }
			
 
				+
			
 
				 static inline void
			
 
				-sched_info_switch(struct rq *rq,
			
 
				-		  struct task_struct *prev, struct task_struct *next)
			
 
				+sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
			
 
				 {
			
 
				 	if (unlikely(sched_info_on()))
			
 
				 		__sched_info_switch(rq, prev, next);
			
 
				 }
			
 
				-#else
			
 
				-#define sched_info_queued(rq, t)		do { } while (0)
			
 
				-#define sched_info_reset_dequeued(t)	do { } while (0)
			
 
				-#define sched_info_dequeued(rq, t)		do { } while (0)
			
 
				-#define sched_info_depart(rq, t)		do { } while (0)
			
 
				-#define sched_info_arrive(rq, next)		do { } while (0)
			
 
				-#define sched_info_switch(rq, t, next)		do { } while (0)
			
 
				+
			
 
				+#else /* !CONFIG_SCHED_INFO: */
			
 
				+# define sched_info_queued(rq, t)	do { } while (0)
			
 
				+# define sched_info_reset_dequeued(t)	do { } while (0)
			
 
				+# define sched_info_dequeued(rq, t)	do { } while (0)
			
 
				+# define sched_info_depart(rq, t)	do { } while (0)
			
 
				+# define sched_info_arrive(rq, next)	do { } while (0)
			
 
				+# define sched_info_switch(rq, t, next)	do { } while (0)
			
 
				 #endif /* CONFIG_SCHED_INFO */
			
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -1,6 +1,4 @@
 
				 // SPDX-License-Identifier: GPL-2.0
			
 
				-#include "sched.h"
			
 
				-
			
 
				 /*
			
 
				  * stop-task scheduling class.
			
 
				  *
			
@@ -9,6 +7,7 @@
 
				  *
			
 
				  * See kernel/stop_machine.c
			
 
				  */
			
 
				+#include "sched.h"
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 static int
			
@@ -75,6 +74,14 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
 
				 	cgroup_account_cputime(curr, delta_exec);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * scheduler tick hitting a task of our scheduling class.
			
 
				+ *
			
 
				+ * NOTE: This function can be called remotely by the tick offload that
			
 
				+ * goes along full dynticks. Therefore no local assumption can be made
			
 
				+ * and everything must be accessed through the @rq and @curr passed in
			
 
				+ * parameters.
			
 
				+ */
			
 
				 static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
			
 
				 {
			
 
				 }
			
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -1,6 +1,8 @@
 
				 // SPDX-License-Identifier: GPL-2.0
			
 
				-#include <linux/sched/signal.h>
			
 
				-#include <linux/swait.h>
			
 
				+/*
			
 
				+ * <linux/swait.h> (simple wait queues ) implementation:
			
 
				+ */
			
 
				+#include "sched.h"
			
 
				 
			
 
				 void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
			
 
				 			     struct lock_class_key *key)
			
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2,10 +2,6 @@
 
				 /*
			
 
				  * Scheduler topology setup/handling methods
			
 
				  */
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/mutex.h>
			
 
				-#include <linux/sched/isolation.h>
			
 
				-
			
 
				 #include "sched.h"
			
 
				 
			
 
				 DEFINE_MUTEX(sched_domains_mutex);
			
@@ -41,8 +37,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
				 	if (!(sd->flags & SD_LOAD_BALANCE)) {
			
 
				 		printk("does not load-balance\n");
			
 
				 		if (sd->parent)
			
 
				-			printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
			
 
				-					" has parent");
			
 
				+			printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent");
			
 
				 		return -1;
			
 
				 	}
			
 
				 
			
@@ -50,12 +45,10 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
				 	       cpumask_pr_args(sched_domain_span(sd)), sd->name);
			
 
				 
			
 
				 	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
			
 
				-		printk(KERN_ERR "ERROR: domain->span does not contain "
			
 
				-				"CPU%d\n", cpu);
			
 
				+		printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);
			
 
				 	}
			
 
				 	if (!cpumask_test_cpu(cpu, sched_group_span(group))) {
			
 
				-		printk(KERN_ERR "ERROR: domain->groups does not contain"
			
 
				-				" CPU%d\n", cpu);
			
 
				+		printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
			
 
				 	}
			
 
				 
			
 
				 	printk(KERN_DEBUG "%*s groups:", level + 1, "");
			
@@ -115,8 +108,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
				 
			
 
				 	if (sd->parent &&
			
 
				 	    !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
			
 
				-		printk(KERN_ERR "ERROR: parent span is not a superset "
			
 
				-			"of domain->span\n");
			
 
				+		printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -595,7 +587,7 @@ int group_balance_cpu(struct sched_group *sg)
 
				  * are not.
			
 
				  *
			
 
				  * This leads to a few particularly weird cases where the sched_domain's are
			
 
				- * not of the same number for each cpu. Consider:
			
 
				+ * not of the same number for each CPU. Consider:
			
 
				  *
			
 
				  * NUMA-2	0-3						0-3
			
 
				  *  groups:	{0-2},{1-3}					{1-3},{0-2}
			
@@ -780,7 +772,7 @@ fail:
 
				  *	    ^ ^             ^ ^
			
 
				  *          `-'             `-'
			
 
				  *
			
 
				- * The sched_domains are per-cpu and have a two way link (parent & child) and
			
 
				+ * The sched_domains are per-CPU and have a two way link (parent & child) and
			
 
				  * denote the ever growing mask of CPUs belonging to that level of topology.
			
 
				  *
			
 
				  * Each sched_domain has a circular (double) linked list of sched_group's, each
			
@@ -1021,6 +1013,7 @@ __visit_domain_allocation_hell(struct s_data *d, const struct cpumask *cpu_map)
 
				 	d->rd = alloc_rootdomain();
			
 
				 	if (!d->rd)
			
 
				 		return sa_sd;
			
 
				+
			
 
				 	return sa_rootdomain;
			
 
				 }
			
 
				 
			
@@ -1047,12 +1040,14 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_NUMA
			
 
				-static int sched_domains_numa_levels;
			
 
				 enum numa_topology_type sched_numa_topology_type;
			
 
				-static int *sched_domains_numa_distance;
			
 
				-int sched_max_numa_distance;
			
 
				-static struct cpumask ***sched_domains_numa_masks;
			
 
				-static int sched_domains_curr_level;
			
 
				+
			
 
				+static int			sched_domains_numa_levels;
			
 
				+static int			sched_domains_curr_level;
			
 
				+
			
 
				+int				sched_max_numa_distance;
			
 
				+static int			*sched_domains_numa_distance;
			
 
				+static struct cpumask		***sched_domains_numa_masks;
			
 
				 #endif
			
 
				 
			
 
				 /*
			
@@ -1074,11 +1069,11 @@ static int sched_domains_curr_level;
 
				  *   SD_ASYM_PACKING        - describes SMT quirks
			
 
				  */
			
 
				 #define TOPOLOGY_SD_FLAGS		\
			
 
				-	(SD_SHARE_CPUCAPACITY |		\
			
 
				+	(SD_SHARE_CPUCAPACITY	|	\
			
 
				 	 SD_SHARE_PKG_RESOURCES |	\
			
 
				-	 SD_NUMA |			\
			
 
				-	 SD_ASYM_PACKING |		\
			
 
				-	 SD_ASYM_CPUCAPACITY |		\
			
 
				+	 SD_NUMA		|	\
			
 
				+	 SD_ASYM_PACKING	|	\
			
 
				+	 SD_ASYM_CPUCAPACITY	|	\
			
 
				 	 SD_SHARE_POWERDOMAIN)
			
 
				 
			
 
				 static struct sched_domain *
			
@@ -1628,7 +1623,7 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
 
				 			pr_err("     the %s domain not a subset of the %s domain\n",
			
 
				 					child->name, sd->name);
			
 
				 #endif
			
 
				-			/* Fixup, ensure @sd has at least @child cpus. */
			
 
				+			/* Fixup, ensure @sd has at least @child CPUs. */
			
 
				 			cpumask_or(sched_domain_span(sd),
			
 
				 				   sched_domain_span(sd),
			
 
				 				   sched_domain_span(child));
			
@@ -1720,6 +1715,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 
				 	ret = 0;
			
 
				 error:
			
 
				 	__free_domain_allocs(&d, alloc_state, cpu_map);
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -1824,6 +1820,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
 
				 		return 1;
			
 
				 
			
 
				 	tmp = SD_ATTR_INIT;
			
 
				+
			
 
				 	return !memcmp(cur ? (cur + idx_cur) : &tmp,
			
 
				 			new ? (new + idx_new) : &tmp,
			
 
				 			sizeof(struct sched_domain_attr));
			
@@ -1929,4 +1926,3 @@ match2:
 
				 
			
 
				 	mutex_unlock(&sched_domains_mutex);
			
 
				 }
			
 
				-
			
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -3,14 +3,7 @@
 
				  *
			
 
				  * (C) 2004 Nadia Yvette Chambers, Oracle
			
 
				  */
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/export.h>
			
 
				-#include <linux/sched/signal.h>
			
 
				-#include <linux/sched/debug.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include <linux/wait.h>
			
 
				-#include <linux/hash.h>
			
 
				-#include <linux/kthread.h>
			
 
				+#include "sched.h"
			
 
				 
			
 
				 void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
			
 
				 {
			
@@ -107,6 +100,7 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
 
				 			break;
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				 	return nr_exclusive;
			
 
				 }
			
 
				 
			
@@ -317,6 +311,7 @@ int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 
				 	spin_unlock(&wq->lock);
			
 
				 	schedule();
			
 
				 	spin_lock(&wq->lock);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 EXPORT_SYMBOL(do_wait_intr);
			
@@ -333,6 +328,7 @@ int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 
				 	spin_unlock_irq(&wq->lock);
			
 
				 	schedule();
			
 
				 	spin_lock_irq(&wq->lock);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 EXPORT_SYMBOL(do_wait_intr_irq);
			
@@ -378,6 +374,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
 
				 
			
 
				 	if (ret)
			
 
				 		list_del_init(&wq_entry->entry);
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 EXPORT_SYMBOL(autoremove_wake_function);
			
--- a/kernel/sched/wait_bit.c
+++ b/kernel/sched/wait_bit.c
@@ -1,10 +1,7 @@
 
				 /*
			
 
				  * The implementation of the wait_bit*() and related waiting APIs:
			
 
				  */
			
 
				-#include <linux/wait_bit.h>
			
 
				-#include <linux/sched/signal.h>
			
 
				-#include <linux/sched/debug.h>
			
 
				-#include <linux/hash.h>
			
 
				+#include "sched.h"
			
 
				 
			
 
				 #define WAIT_TABLE_BITS 8
			
 
				 #define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
			
@@ -29,8 +26,8 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
 
				 			wait_bit->key.bit_nr != key->bit_nr ||
			
 
				 			test_bit(key->bit_nr, key->flags))
			
 
				 		return 0;
			
 
				-	else
			
 
				-		return autoremove_wake_function(wq_entry, mode, sync, key);
			
 
				+
			
 
				+	return autoremove_wake_function(wq_entry, mode, sync, key);
			
 
				 }
			
 
				 EXPORT_SYMBOL(wake_bit_function);
			
 
				 
			
@@ -50,7 +47,9 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_
 
				 		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
			
 
				 			ret = (*action)(&wbq_entry->key, mode);
			
 
				 	} while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
			
 
				+
			
 
				 	finish_wait(wq_head, &wbq_entry->wq_entry);
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 EXPORT_SYMBOL(__wait_on_bit);
			
@@ -73,6 +72,7 @@ int __sched out_of_line_wait_on_bit_timeout(
 
				 	DEFINE_WAIT_BIT(wq_entry, word, bit);
			
 
				 
			
 
				 	wq_entry.key.timeout = jiffies + timeout;
			
 
				+
			
 
				 	return __wait_on_bit(wq_head, &wq_entry, action, mode);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
			
@@ -120,6 +120,7 @@ EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
 
				 void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit)
			
 
				 {
			
 
				 	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
			
 
				+
			
 
				 	if (waitqueue_active(wq_head))
			
 
				 		__wake_up(wq_head, TASK_NORMAL, 1, &key);
			
 
				 }
			
@@ -157,6 +158,7 @@ static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
 
				 {
			
 
				 	if (BITS_PER_LONG == 64) {
			
 
				 		unsigned long q = (unsigned long)p;
			
 
				+
			
 
				 		return bit_waitqueue((void *)(q & ~1), q & 1);
			
 
				 	}
			
 
				 	return bit_waitqueue(p, 0);
			
@@ -173,6 +175,7 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo
 
				 	    wait_bit->key.bit_nr != key->bit_nr ||
			
 
				 	    atomic_read(val) != 0)
			
 
				 		return 0;
			
 
				+
			
 
				 	return autoremove_wake_function(wq_entry, mode, sync, key);
			
 
				 }
			
 
				 
			
@@ -196,6 +199,7 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en
 
				 		ret = (*action)(val, mode);
			
 
				 	} while (!ret && atomic_read(val) != 0);
			
 
				 	finish_wait(wq_head, &wbq_entry->wq_entry);
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -226,6 +230,7 @@ __sched int atomic_t_wait(atomic_t *counter, unsigned int mode)
 
				 	schedule();
			
 
				 	if (signal_pending_state(mode, current))
			
 
				 		return -EINTR;
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 EXPORT_SYMBOL(atomic_t_wait);
			
@@ -250,6 +255,7 @@ __sched int bit_wait(struct wait_bit_key *word, int mode)
 
				 	schedule();
			
 
				 	if (signal_pending_state(mode, current))
			
 
				 		return -EINTR;
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 EXPORT_SYMBOL(bit_wait);
			
@@ -259,6 +265,7 @@ __sched int bit_wait_io(struct wait_bit_key *word, int mode)
 
				 	io_schedule();
			
 
				 	if (signal_pending_state(mode, current))
			
 
				 		return -EINTR;
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 EXPORT_SYMBOL(bit_wait_io);
			
@@ -266,11 +273,13 @@ EXPORT_SYMBOL(bit_wait_io);
 
				 __sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
			
 
				 {
			
 
				 	unsigned long now = READ_ONCE(jiffies);
			
 
				+
			
 
				 	if (time_after_eq(now, word->timeout))
			
 
				 		return -EAGAIN;
			
 
				 	schedule_timeout(word->timeout - now);
			
 
				 	if (signal_pending_state(mode, current))
			
 
				 		return -EINTR;
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(bit_wait_timeout);
			
@@ -278,11 +287,13 @@ EXPORT_SYMBOL_GPL(bit_wait_timeout);
 
				 __sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
			
 
				 {
			
 
				 	unsigned long now = READ_ONCE(jiffies);
			
 
				+
			
 
				 	if (time_after_eq(now, word->timeout))
			
 
				 		return -EAGAIN;
			
 
				 	io_schedule_timeout(word->timeout - now);
			
 
				 	if (signal_pending_state(mode, current))
			
 
				 		return -EINTR;
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
			
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -463,11 +463,18 @@ static int __init setup_tick_nohz(char *str)
 
				 
			
 
				 __setup("nohz=", setup_tick_nohz);
			
 
				 
			
 
				-int tick_nohz_tick_stopped(void)
			
 
				+bool tick_nohz_tick_stopped(void)
			
 
				 {
			
 
				 	return __this_cpu_read(tick_cpu_sched.tick_stopped);
			
 
				 }
			
 
				 
			
 
				+bool tick_nohz_tick_stopped_cpu(int cpu)
			
 
				+{
			
 
				+	struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
			
 
				+
			
 
				+	return ts->tick_stopped;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * tick_nohz_update_jiffies - update jiffies when idle was interrupted
			
 
				  *
			
@@ -723,12 +730,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 
				 		delta = KTIME_MAX;
			
 
				 	}
			
 
				 
			
 
				-#ifdef CONFIG_NO_HZ_FULL
			
 
				-	/* Limit the tick delta to the maximum scheduler deferment */
			
 
				-	if (!ts->inidle)
			
 
				-		delta = min(delta, scheduler_tick_max_deferment());
			
 
				-#endif
			
 
				-
			
 
				 	/* Calculate the next expiry time */
			
 
				 	if (delta < (KTIME_MAX - basemono))
			
 
				 		expires = basemono + delta;
			
@@ -935,13 +936,6 @@ void tick_nohz_idle_enter(void)
 
				 	struct tick_sched *ts;
			
 
				 
			
 
				 	lockdep_assert_irqs_enabled();
			
 
				-	/*
			
 
				-	 * Update the idle state in the scheduler domain hierarchy
			
 
				-	 * when tick_nohz_stop_sched_tick() is called from the idle loop.
			
 
				-	 * State will be updated to busy during the first busy tick after
			
 
				-	 * exiting idle.
			
 
				-	 */
			
 
				-	set_cpu_sd_state_idle();
			
 
				 
			
 
				 	local_irq_disable();
			
 
				 
			
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -5573,12 +5573,13 @@ static void __init wq_numa_init(void)
 
				 int __init workqueue_init_early(void)
			
 
				 {
			
 
				 	int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
			
 
				+	int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
			
 
				 	int i, cpu;
			
 
				 
			
 
				 	WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
			
 
				 
			
 
				 	BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
			
 
				-	cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_FLAG_DOMAIN));
			
 
				+	cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
			
 
				 
			
 
				 	pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);