11 years ago · c84a1e32ee
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -73,12 +73,14 @@ register struct thread_info *__current_thread_info __asm__("$8");
 
															 #define TIF_SYSCALL_AUDIT	4	/* syscall audit active */
														
 
															 #define TIF_DIE_IF_KERNEL	9	/* dik recursion lock */
														
 
															 #define TIF_MEMDIE		13	/* is terminating due to OOM killer */
														
 
															+#define TIF_POLLING_NRFLAG	14	/* idle is polling for TIF_NEED_RESCHED */
														
 
															 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
														
 
															 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
														
 
															 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
														
 
															 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
														
 
															 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
														
 
															+#define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
														
 
															 /* Work to do on interrupt/exception return.  */
														
 
															 #define _TIF_WORK_MASK		(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
														
@@ -92,8 +94,6 @@ register struct thread_info *__current_thread_info __asm__("$8");
 
															 #define TS_UAC_NOFIX		0x0002	/* ! flags as they match          */
														
 
															 #define TS_UAC_SIGBUS		0x0004	/* ! userspace part of 'osf_sysinfo' */
														
 
															 #define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal() */
														
 
															-#define TS_POLLING		0x0010	/* idle task polling need_resched,
														
 
															-					   skip sending interrupt */
														
 
															 #ifndef __ASSEMBLY__
														
 
															 #define HAVE_SET_RESTORE_SIGMASK	1
														
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -185,6 +185,15 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
 
															 	return &cpu_topology[cpu].core_sibling;
														
 
															 }
														
 
															+/*
														
 
															+ * The current assumption is that we can power gate each core independently.
														
 
															+ * This will be superseded by DT binding once available.
														
 
															+ */
														
 
															+const struct cpumask *cpu_corepower_mask(int cpu)
														
 
															+{
														
 
															+	return &cpu_topology[cpu].thread_sibling;
														
 
															+}
														
 
															+
														
 
															 static void update_siblings_masks(unsigned int cpuid)
														
 
															 {
														
 
															 	struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
														
@@ -266,6 +275,20 @@ void store_cpu_topology(unsigned int cpuid)
 
															 		cpu_topology[cpuid].socket_id, mpidr);
														
 
															 }
														
 
															+static inline const int cpu_corepower_flags(void)
														
 
															+{
														
 
															+	return SD_SHARE_PKG_RESOURCES  | SD_SHARE_POWERDOMAIN;
														
 
															+}
														
 
															+
														
 
															+static struct sched_domain_topology_level arm_topology[] = {
														
 
															+#ifdef CONFIG_SCHED_MC
														
 
															+	{ cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) },
														
 
															+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
														
 
															+#endif
														
 
															+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
														
 
															+	{ NULL, },
														
 
															+};
														
 
															+
														
 
															 /*
														
 
															  * init_cpu_topology is called at boot when only one cpu is running
														
 
															  * which prevent simultaneous write access to cpu_topology array
														
@@ -289,4 +312,7 @@ void __init init_cpu_topology(void)
 
															 	smp_wmb();
														
 
															 	parse_dt_topology();
														
 
															+
														
 
															+	/* Set scheduler topology descriptor */
														
 
															+	set_sched_topology(arm_topology);
														
 
															 }
														
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -95,13 +95,11 @@ static inline struct thread_info *current_thread_info(void)
 
															  *  TIF_NEED_RESCHED	- rescheduling necessary
														
 
															  *  TIF_NOTIFY_RESUME	- callback before returning to user
														
 
															  *  TIF_USEDFPU		- FPU was used by this task this quantum (SMP)
														
 
															- *  TIF_POLLING_NRFLAG	- true if poll_idle() is polling TIF_NEED_RESCHED
														
 
															  */
														
 
															 #define TIF_SIGPENDING		0
														
 
															 #define TIF_NEED_RESCHED	1
														
 
															 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
														
 
															 #define TIF_SYSCALL_TRACE	8
														
 
															-#define TIF_POLLING_NRFLAG	16
														
 
															 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
														
 
															 #define TIF_FREEZE		19
														
 
															 #define TIF_RESTORE_SIGMASK	20
														
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -107,6 +107,7 @@ struct thread_info {
 
															 #define TIF_MCA_INIT		18	/* this task is processing MCA or INIT */
														
 
															 #define TIF_DB_DISABLED		19	/* debug trap disabled for fsyscall */
														
 
															 #define TIF_RESTORE_RSE		21	/* user RBS is newer than kernel RBS */
														
 
															+#define TIF_POLLING_NRFLAG	22	/* idle is polling for TIF_NEED_RESCHED */
														
 
															 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
														
 
															 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
														
@@ -118,6 +119,7 @@ struct thread_info {
 
															 #define _TIF_MCA_INIT		(1 << TIF_MCA_INIT)
														
 
															 #define _TIF_DB_DISABLED	(1 << TIF_DB_DISABLED)
														
 
															 #define _TIF_RESTORE_RSE	(1 << TIF_RESTORE_RSE)
														
 
															+#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
														
 
															 /* "work to do on user-return" bits */
														
 
															 #define TIF_ALLWORK_MASK	(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\
														
@@ -125,7 +127,6 @@ struct thread_info {
 
															 /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
														
 
															 #define TIF_WORK_MASK		(TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
														
 
															-#define TS_POLLING		1 	/* true if in idle loop and not sleeping */
														
 
															 #define TS_RESTORE_SIGMASK	2	/* restore signal mask in do_signal() */
														
 
															 #ifndef __ASSEMBLY__
														
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -46,30 +46,6 @@
 
															 void build_cpu_to_node_map(void);
														
 
															-#define SD_CPU_INIT (struct sched_domain) {		\
														
 
															-	.parent			= NULL,			\
														
 
															-	.child			= NULL,			\
														
 
															-	.groups			= NULL,			\
														
 
															-	.min_interval		= 1,			\
														
 
															-	.max_interval		= 4,			\
														
 
															-	.busy_factor		= 64,			\
														
 
															-	.imbalance_pct		= 125,			\
														
 
															-	.cache_nice_tries	= 2,			\
														
 
															-	.busy_idx		= 2,			\
														
 
															-	.idle_idx		= 1,			\
														
 
															-	.newidle_idx		= 0,			\
														
 
															-	.wake_idx		= 0,			\
														
 
															-	.forkexec_idx		= 0,			\
														
 
															-	.flags			= SD_LOAD_BALANCE	\
														
 
															-				| SD_BALANCE_NEWIDLE	\
														
 
															-				| SD_BALANCE_EXEC	\
														
 
															-				| SD_BALANCE_FORK	\
														
 
															-				| SD_WAKE_AFFINE,	\
														
 
															-	.last_balance		= jiffies,		\
														
 
															-	.balance_interval	= 1,			\
														
 
															-	.nr_balance_failed	= 0,			\
														
 
															-}
														
 
															-
														
 
															 #endif /* CONFIG_NUMA */
														
 
															 #ifdef CONFIG_SMP
														
--- a/arch/metag/include/asm/thread_info.h
+++ b/arch/metag/include/asm/thread_info.h
@@ -117,10 +117,8 @@ static inline int kstack_end(void *addr)
 
															 #define TIF_SECCOMP		5	/* secure computing */
														
 
															 #define TIF_RESTORE_SIGMASK	6	/* restore signal mask in do_signal() */
														
 
															 #define TIF_NOTIFY_RESUME	7	/* callback before returning to user */
														
 
															-#define TIF_POLLING_NRFLAG      8	/* true if poll_idle() is polling
														
 
															-					   TIF_NEED_RESCHED */
														
 
															-#define TIF_MEMDIE		9	/* is terminating due to OOM killer */
														
 
															-#define TIF_SYSCALL_TRACEPOINT  10	/* syscall tracepoint instrumentation */
														
 
															+#define TIF_MEMDIE		8	/* is terminating due to OOM killer */
														
 
															+#define TIF_SYSCALL_TRACEPOINT	9	/* syscall tracepoint instrumentation */
														
 
															 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
														
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -766,6 +766,28 @@ int setup_profiling_timer(unsigned int multiplier)
 
															 	return 0;
														
 
															 }
														
 
															+#ifdef CONFIG_SCHED_SMT
														
 
															+/* cpumask of CPUs with asymetric SMT dependancy */
														
 
															+static const int powerpc_smt_flags(void)
														
 
															+{
														
 
															+	int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
														
 
															+
														
 
															+	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
														
 
															+		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
														
 
															+		flags |= SD_ASYM_PACKING;
														
 
															+	}
														
 
															+	return flags;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+static struct sched_domain_topology_level powerpc_topology[] = {
														
 
															+#ifdef CONFIG_SCHED_SMT
														
 
															+	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
														
 
															+#endif
														
 
															+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
														
 
															+	{ NULL, },
														
 
															+};
														
 
															+
														
 
															 void __init smp_cpus_done(unsigned int max_cpus)
														
 
															 {
														
 
															 	cpumask_var_t old_mask;
														
@@ -790,15 +812,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
															 	dump_numa_cpu_topology();
														
 
															-}
														
 
															+	set_sched_topology(powerpc_topology);
														
 
															-int arch_sd_sibling_asym_packing(void)
														
 
															-{
														
 
															-	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
														
 
															-		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
														
 
															-		return SD_ASYM_PACKING;
														
 
															-	}
														
 
															-	return 0;
														
 
															 }
														
 
															 #ifdef CONFIG_HOTPLUG_CPU
														
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -26,21 +26,12 @@ extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
 
															 #define mc_capable() 1
														
 
															-static inline const struct cpumask *cpu_coregroup_mask(int cpu)
														
 
															-{
														
 
															-	return &cpu_topology[cpu].core_mask;
														
 
															-}
														
 
															-
														
 
															-static inline const struct cpumask *cpu_book_mask(int cpu)
														
 
															-{
														
 
															-	return &cpu_topology[cpu].book_mask;
														
 
															-}
														
 
															-
														
 
															 int topology_cpu_init(struct cpu *);
														
 
															 int topology_set_cpu_management(int fc);
														
 
															 void topology_schedule_update(void);
														
 
															 void store_topology(struct sysinfo_15_1_x *info);
														
 
															 void topology_expect_change(void);
														
 
															+const struct cpumask *cpu_coregroup_mask(int cpu);
														
 
															 #else /* CONFIG_SCHED_BOOK */
														
@@ -64,8 +55,6 @@ static inline void s390_init_cpu_topology(void)
 
															 };
														
 
															 #endif
														
 
															-#define SD_BOOK_INIT	SD_CPU_INIT
														
 
															-
														
 
															 #include <asm-generic/topology.h>
														
 
															 #endif /* _ASM_S390_TOPOLOGY_H */
														
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -445,6 +445,23 @@ int topology_cpu_init(struct cpu *cpu)
 
															 	return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
														
 
															 }
														
 
															+const struct cpumask *cpu_coregroup_mask(int cpu)
														
 
															+{
														
 
															+	return &cpu_topology[cpu].core_mask;
														
 
															+}
														
 
															+
														
 
															+static const struct cpumask *cpu_book_mask(int cpu)
														
 
															+{
														
 
															+	return &cpu_topology[cpu].book_mask;
														
 
															+}
														
 
															+
														
 
															+static struct sched_domain_topology_level s390_topology[] = {
														
 
															+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
														
 
															+	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
														
 
															+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
														
 
															+	{ NULL, },
														
 
															+};
														
 
															+
														
 
															 static int __init topology_init(void)
														
 
															 {
														
 
															 	if (!MACHINE_HAS_TOPOLOGY) {
														
@@ -453,6 +470,9 @@ static int __init topology_init(void)
 
															 	}
														
 
															 	set_topology_timer();
														
 
															 out:
														
 
															+
														
 
															+	set_sched_topology(s390_topology);
														
 
															+
														
 
															 	return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
														
 
															 }
														
 
															 device_initcall(topology_init);
														
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -129,6 +129,7 @@ extern void _cpu_idle(void);
 
															 #define TIF_MEMDIE		7	/* OOM killer at work */
														
 
															 #define TIF_NOTIFY_RESUME	8	/* callback before returning to user */
														
 
															 #define TIF_SYSCALL_TRACEPOINT	9	/* syscall tracepoint instrumentation */
														
 
															+#define TIF_POLLING_NRFLAG	10	/* idle is polling for TIF_NEED_RESCHED */
														
 
															 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
														
 
															 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
														
@@ -140,6 +141,7 @@ extern void _cpu_idle(void);
 
															 #define _TIF_MEMDIE		(1<<TIF_MEMDIE)
														
 
															 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
														
 
															 #define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
														
 
															+#define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
														
 
															 /* Work to do on any return to user space. */
														
 
															 #define _TIF_ALLWORK_MASK \
														
@@ -162,7 +164,6 @@ extern void _cpu_idle(void);
 
															 #ifdef __tilegx__
														
 
															 #define TS_COMPAT		0x0001	/* 32-bit compatibility mode */
														
 
															 #endif
														
 
															-#define TS_POLLING		0x0004	/* in idle loop but not sleeping */
														
 
															 #define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal */
														
 
															 #ifndef __ASSEMBLY__
														
--- a/arch/tile/include/asm/topology.h
+++ b/arch/tile/include/asm/topology.h
@@ -44,39 +44,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
 
															 /* For now, use numa node -1 for global allocation. */
														
 
															 #define pcibus_to_node(bus)		((void)(bus), -1)
														
 
															-/*
														
 
															- * TILE architecture has many cores integrated in one processor, so we need
														
 
															- * setup bigger balance_interval for both CPU/NODE scheduling domains to
														
 
															- * reduce process scheduling costs.
														
 
															- */
														
 
															-
														
 
															-/* sched_domains SD_CPU_INIT for TILE architecture */
														
 
															-#define SD_CPU_INIT (struct sched_domain) {				\
														
 
															-	.min_interval		= 4,					\
														
 
															-	.max_interval		= 128,					\
														
 
															-	.busy_factor		= 64,					\
														
 
															-	.imbalance_pct		= 125,					\
														
 
															-	.cache_nice_tries	= 1,					\
														
 
															-	.busy_idx		= 2,					\
														
 
															-	.idle_idx		= 1,					\
														
 
															-	.newidle_idx		= 0,					\
														
 
															-	.wake_idx		= 0,					\
														
 
															-	.forkexec_idx		= 0,					\
														
 
															-									\
														
 
															-	.flags			= 1*SD_LOAD_BALANCE			\
														
 
															-				| 1*SD_BALANCE_NEWIDLE			\
														
 
															-				| 1*SD_BALANCE_EXEC			\
														
 
															-				| 1*SD_BALANCE_FORK			\
														
 
															-				| 0*SD_BALANCE_WAKE			\
														
 
															-				| 0*SD_WAKE_AFFINE			\
														
 
															-				| 0*SD_SHARE_CPUPOWER			\
														
 
															-				| 0*SD_SHARE_PKG_RESOURCES		\
														
 
															-				| 0*SD_SERIALIZE			\
														
 
															-				,					\
														
 
															-	.last_balance		= jiffies,				\
														
 
															-	.balance_interval	= 32,					\
														
 
															-}
														
 
															-
														
 
															 /* By definition, we create nodes based on online memory. */
														
 
															 #define node_has_online_mem(nid) 1
														
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,6 +83,7 @@ struct thread_info {
 
															 #define TIF_FORK		18	/* ret_from_fork */
														
 
															 #define TIF_NOHZ		19	/* in adaptive nohz mode */
														
 
															 #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
														
 
															+#define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
														
 
															 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
														
 
															 #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
														
 
															 #define TIF_BLOCKSTEP		25	/* set when we want DEBUGCTLMSR_BTF */
														
@@ -106,6 +107,7 @@ struct thread_info {
 
															 #define _TIF_IA32		(1 << TIF_IA32)
														
 
															 #define _TIF_FORK		(1 << TIF_FORK)
														
 
															 #define _TIF_NOHZ		(1 << TIF_NOHZ)
														
 
															+#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
														
 
															 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
														
 
															 #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
														
 
															 #define _TIF_BLOCKSTEP		(1 << TIF_BLOCKSTEP)
														
@@ -191,8 +193,6 @@ static inline struct thread_info *current_thread_info(void)
 
															  * have to worry about atomic accesses.
														
 
															  */
														
 
															 #define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
														
 
															-#define TS_POLLING		0x0004	/* idle task polling need_resched,
														
 
															-					   skip sending interrupt */
														
 
															 #define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal() */
														
 
															 #ifndef __ASSEMBLY__
														
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -844,21 +844,10 @@ static int apm_do_idle(void)
 
															 	int polling;
														
 
															 	int err = 0;
														
 
															-	polling = !!(current_thread_info()->status & TS_POLLING);
														
 
															-	if (polling) {
														
 
															-		current_thread_info()->status &= ~TS_POLLING;
														
 
															-		/*
														
 
															-		 * TS_POLLING-cleared state must be visible before we
														
 
															-		 * test NEED_RESCHED:
														
 
															-		 */
														
 
															-		smp_mb();
														
 
															-	}
														
 
															 	if (!need_resched()) {
														
 
															 		idled = 1;
														
 
															 		ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax, &err);
														
 
															 	}
														
 
															-	if (polling)
														
 
															-		current_thread_info()->status |= TS_POLLING;
														
 
															 	if (!idled)
														
 
															 		return 0;
														
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -548,7 +548,7 @@ static int loop_thread(void *data)
 
															 	struct loop_device *lo = data;
														
 
															 	struct bio *bio;
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 	while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
														
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -533,7 +533,7 @@ static int nbd_thread(void *data)
 
															 	struct nbd_device *nbd = data;
														
 
															 	struct request *req;
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 	while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
														
 
															 		/* wait for something to do */
														
 
															 		wait_event_interruptible(nbd->waiting_wq,
														
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1463,7 +1463,7 @@ static int kcdrwd(void *foobar)
 
															 	struct packet_data *pkt;
														
 
															 	long min_sleep_time, residue;
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 	set_freezable();
														
 
															 	for (;;) {
														
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1007,7 +1007,7 @@ static int ipmi_thread(void *data)
 
															 	struct timespec busy_until;
														
 
															 	ipmi_si_set_not_busy(&busy_until);
														
 
															-	set_user_nice(current, 19);
														
 
															+	set_user_nice(current, MAX_NICE);
														
 
															 	while (!kthread_should_stop()) {
														
 
															 		int busy_wait;
														
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -32,6 +32,7 @@ LIST_HEAD(cpuidle_detected_devices);
 
															 static int enabled_devices;
														
 
															 static int off __read_mostly;
														
 
															 static int initialized __read_mostly;
														
 
															+static bool use_deepest_state __read_mostly;
														
 
															 int cpuidle_disabled(void)
														
 
															 {
														
@@ -65,23 +66,42 @@ int cpuidle_play_dead(void)
 
															 }
														
 
															 /**
														
 
															- * cpuidle_enabled - check if the cpuidle framework is ready
														
 
															- * @dev: cpuidle device for this cpu
														
 
															- * @drv: cpuidle driver for this cpu
														
 
															+ * cpuidle_use_deepest_state - Enable/disable the "deepest idle" mode.
														
 
															+ * @enable: Whether enable or disable the feature.
														
 
															+ *
														
 
															+ * If the "deepest idle" mode is enabled, cpuidle will ignore the governor and
														
 
															+ * always use the state with the greatest exit latency (out of the states that
														
 
															+ * are not disabled).
														
 
															  *
														
 
															- * Return 0 on success, otherwise:
														
 
															- * -NODEV : the cpuidle framework is not available
														
 
															- * -EBUSY : the cpuidle framework is not initialized
														
 
															+ * This function can only be called after cpuidle_pause() to avoid races.
														
 
															  */
														
 
															-int cpuidle_enabled(struct cpuidle_driver *drv, struct cpuidle_device *dev)
														
 
															+void cpuidle_use_deepest_state(bool enable)
														
 
															 {
														
 
															-	if (off || !initialized)
														
 
															-		return -ENODEV;
														
 
															+	use_deepest_state = enable;
														
 
															+}
														
 
															-	if (!drv || !dev || !dev->enabled)
														
 
															-		return -EBUSY;
														
 
															+/**
														
 
															+ * cpuidle_find_deepest_state - Find the state of the greatest exit latency.
														
 
															+ * @drv: cpuidle driver for a given CPU.
														
 
															+ * @dev: cpuidle device for a given CPU.
														
 
															+ */
														
 
															+static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
														
 
															+				      struct cpuidle_device *dev)
														
 
															+{
														
 
															+	unsigned int latency_req = 0;
														
 
															+	int i, ret = CPUIDLE_DRIVER_STATE_START - 1;
														
 
															-	return 0;
														
 
															+	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
														
 
															+		struct cpuidle_state *s = &drv->states[i];
														
 
															+		struct cpuidle_state_usage *su = &dev->states_usage[i];
														
 
															+
														
 
															+		if (s->disabled || su->disable || s->exit_latency <= latency_req)
														
 
															+			continue;
														
 
															+
														
 
															+		latency_req = s->exit_latency;
														
 
															+		ret = i;
														
 
															+	}
														
 
															+	return ret;
														
 
															 }
														
 
															 /**
														
@@ -138,6 +158,15 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 
															  */
														
 
															 int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
														
 
															 {
														
 
															+	if (off || !initialized)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	if (!drv || !dev || !dev->enabled)
														
 
															+		return -EBUSY;
														
 
															+
														
 
															+	if (unlikely(use_deepest_state))
														
 
															+		return cpuidle_find_deepest_state(drv, dev);
														
 
															+
														
 
															 	return cpuidle_curr_governor->select(drv, dev);
														
 
															 }
														
@@ -169,7 +198,7 @@ int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 
															  */
														
 
															 void cpuidle_reflect(struct cpuidle_device *dev, int index)
														
 
															 {
														
 
															-	if (cpuidle_curr_governor->reflect)
														
 
															+	if (cpuidle_curr_governor->reflect && !unlikely(use_deepest_state))
														
 
															 		cpuidle_curr_governor->reflect(dev, index);
														
 
															 }
														
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -296,7 +296,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
															 		data->needs_update = 0;
														
 
															 	}
														
 
															-	data->last_state_idx = 0;
														
 
															+	data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
														
 
															 	/* Special case when user has set very strict latency requirement */
														
 
															 	if (unlikely(latency_req == 0))
														
@@ -310,13 +310,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
															 	data->bucket = which_bucket(data->next_timer_us);
														
 
															-	/*
														
 
															-	 * if the correction factor is 0 (eg first time init or cpu hotplug
														
 
															-	 * etc), we actually want to start out with a unity factor.
														
 
															-	 */
														
 
															-	if (data->correction_factor[data->bucket] == 0)
														
 
															-		data->correction_factor[data->bucket] = RESOLUTION * DECAY;
														
 
															-
														
 
															 	/*
														
 
															 	 * Force the result of multiplication to be 64 bits even if both
														
 
															 	 * operands are 32 bits.
														
@@ -466,9 +459,17 @@ static int menu_enable_device(struct cpuidle_driver *drv,
 
															 				struct cpuidle_device *dev)
														
 
															 {
														
 
															 	struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
														
 
															+	int i;
														
 
															 	memset(data, 0, sizeof(struct menu_device));
														
 
															+	/*
														
 
															+	 * if the correction factor is 0 (eg first time init or cpu hotplug
														
 
															+	 * etc), we actually want to start out with a unity factor.
														
 
															+	 */
														
 
															+	for(i = 0; i < BUCKETS; i++)
														
 
															+		data->correction_factor[i] = RESOLUTION * DECAY;
														
 
															+
														
 
															 	return 0;
														
 
															 }
														
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -1803,7 +1803,7 @@ static int ap_poll_thread(void *data)
 
															 	int requests;
														
 
															 	struct ap_device *ap_dev;
														
 
															-	set_user_nice(current, 19);
														
 
															+	set_user_nice(current, MAX_NICE);
														
 
															 	while (1) {
														
 
															 		if (ap_suspend_flag)
														
 
															 			return 0;
														
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -464,7 +464,7 @@ static int bnx2fc_l2_rcv_thread(void *arg)
 
															 	struct fcoe_percpu_s *bg = arg;
														
 
															 	struct sk_buff *skb;
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 	set_current_state(TASK_INTERRUPTIBLE);
														
 
															 	while (!kthread_should_stop()) {
														
 
															 		schedule();
														
@@ -602,7 +602,7 @@ int bnx2fc_percpu_io_thread(void *arg)
 
															 	struct bnx2fc_work *work, *tmp;
														
 
															 	LIST_HEAD(work_list);
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 	set_current_state(TASK_INTERRUPTIBLE);
														
 
															 	while (!kthread_should_stop()) {
														
 
															 		schedule();
														
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -1870,7 +1870,7 @@ int bnx2i_percpu_io_thread(void *arg)
 
															 	struct bnx2i_work *work, *tmp;
														
 
															 	LIST_HEAD(work_list);
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 	while (!kthread_should_stop()) {
														
 
															 		spin_lock_bh(&p->p_work_lock);
														
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1872,7 +1872,7 @@ static int fcoe_percpu_receive_thread(void *arg)
 
															 	skb_queue_head_init(&tmp);
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 retry:
														
 
															 	while (!kthread_should_stop()) {
														
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -4515,7 +4515,7 @@ static int ibmvfc_work(void *data)
 
															 	struct ibmvfc_host *vhost = data;
														
 
															 	int rc;
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 	while (1) {
														
 
															 		rc = wait_event_interruptible(vhost->work_wait_q,
														
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -2213,7 +2213,7 @@ static int ibmvscsi_work(void *data)
 
															 	struct ibmvscsi_host_data *hostdata = data;
														
 
															 	int rc;
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 	while (1) {
														
 
															 		rc = wait_event_interruptible(hostdata->work_wait_q,
														
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -731,7 +731,7 @@ lpfc_do_work(void *p)
 
															 	struct lpfc_hba *phba = p;
														
 
															 	int rc;
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 	current->flags |= PF_NOFREEZE;
														
 
															 	phba->data_flags = 0;
														
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -4828,7 +4828,7 @@ qla2x00_do_dpc(void *data)
 
															 	ha = (struct qla_hw_data *)data;
														
 
															 	base_vha = pci_get_drvdata(ha->pdev);
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 	set_current_state(TASK_INTERRUPTIBLE);
														
 
															 	while (!kthread_should_stop()) {
														
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -439,12 +439,12 @@ static void binder_set_nice(long nice)
 
															 		set_user_nice(current, nice);
														
 
															 		return;
														
 
															 	}
														
 
															-	min_nice = 20 - current->signal->rlim[RLIMIT_NICE].rlim_cur;
														
 
															+	min_nice = rlimit_to_nice(current->signal->rlim[RLIMIT_NICE].rlim_cur);
														
 
															 	binder_debug(BINDER_DEBUG_PRIORITY_CAP,
														
 
															 		     "%d: nice value %ld not allowed use %ld instead\n",
														
 
															 		      current->pid, nice, min_nice);
														
 
															 	set_user_nice(current, min_nice);
														
 
															-	if (min_nice < 20)
														
 
															+	if (min_nice <= MAX_NICE)
														
 
															 		return;
														
 
															 	binder_user_error("%d RLIMIT_NICE not set\n", current->pid);
														
 
															 }
														
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@ -404,7 +404,7 @@ static int loop_thread(void *data)
 
															 	int refcheck;
														
 
															 	int ret = 0;
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 	lo->lo_state = LLOOP_BOUND;
														
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1107,7 +1107,7 @@ static int o2hb_thread(void *data)
 
															 	mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread running\n");
														
 
															-	set_user_nice(current, -20);
														
 
															+	set_user_nice(current, MIN_NICE);
														
 
															 	/* Pin node */
														
 
															 	o2nm_depend_this_node();
														
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -120,8 +120,6 @@ struct cpuidle_driver {
 
															 #ifdef CONFIG_CPU_IDLE
														
 
															 extern void disable_cpuidle(void);
														
 
															-extern int cpuidle_enabled(struct cpuidle_driver *drv,
														
 
															-			  struct cpuidle_device *dev);
														
 
															 extern int cpuidle_select(struct cpuidle_driver *drv,
														
 
															 			  struct cpuidle_device *dev);
														
 
															 extern int cpuidle_enter(struct cpuidle_driver *drv,
														
@@ -145,13 +143,11 @@ extern void cpuidle_resume(void);
 
															 extern int cpuidle_enable_device(struct cpuidle_device *dev);
														
 
															 extern void cpuidle_disable_device(struct cpuidle_device *dev);
														
 
															 extern int cpuidle_play_dead(void);
														
 
															+extern void cpuidle_use_deepest_state(bool enable);
														
 
															 extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
														
 
															 #else
														
 
															 static inline void disable_cpuidle(void) { }
														
 
															-static inline int cpuidle_enabled(struct cpuidle_driver *drv,
														
 
															-				  struct cpuidle_device *dev)
														
 
															-{return -ENODEV; }
														
 
															 static inline int cpuidle_select(struct cpuidle_driver *drv,
														
 
															 				 struct cpuidle_device *dev)
														
 
															 {return -ENODEV; }
														
@@ -180,6 +176,7 @@ static inline int cpuidle_enable_device(struct cpuidle_device *dev)
 
															 {return -ENODEV; }
														
 
															 static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
														
 
															 static inline int cpuidle_play_dead(void) {return -ENODEV; }
														
 
															+static inline void cpuidle_use_deepest_state(bool enable) {}
														
 
															 static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
														
 
															 	struct cpuidle_device *dev) {return NULL; }
														
 
															 #endif
														
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -870,6 +870,7 @@ enum cpu_idle_type {
 
															 #define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
														
 
															 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
														
 
															 #define SD_SHARE_CPUPOWER	0x0080	/* Domain members share cpu power */
														
 
															+#define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
														
 
															 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
														
 
															 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
														
 
															 #define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
														
@@ -877,7 +878,26 @@ enum cpu_idle_type {
 
															 #define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
														
 
															 #define SD_NUMA			0x4000	/* cross-node balancing */
														
 
															-extern int __weak arch_sd_sibiling_asym_packing(void);
														
 
															+#ifdef CONFIG_SCHED_SMT
														
 
															+static inline const int cpu_smt_flags(void)
														
 
															+{
														
 
															+	return SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+#ifdef CONFIG_SCHED_MC
														
 
															+static inline const int cpu_core_flags(void)
														
 
															+{
														
 
															+	return SD_SHARE_PKG_RESOURCES;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+#ifdef CONFIG_NUMA
														
 
															+static inline const int cpu_numa_flags(void)
														
 
															+{
														
 
															+	return SD_NUMA;
														
 
															+}
														
 
															+#endif
														
 
															 struct sched_domain_attr {
														
 
															 	int relax_domain_level;
														
@@ -985,6 +1005,38 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
 
															 bool cpus_share_cache(int this_cpu, int that_cpu);
														
 
															+typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
														
 
															+typedef const int (*sched_domain_flags_f)(void);
														
 
															+
														
 
															+#define SDTL_OVERLAP	0x01
														
 
															+
														
 
															+struct sd_data {
														
 
															+	struct sched_domain **__percpu sd;
														
 
															+	struct sched_group **__percpu sg;
														
 
															+	struct sched_group_power **__percpu sgp;
														
 
															+};
														
 
															+
														
 
															+struct sched_domain_topology_level {
														
 
															+	sched_domain_mask_f mask;
														
 
															+	sched_domain_flags_f sd_flags;
														
 
															+	int		    flags;
														
 
															+	int		    numa_level;
														
 
															+	struct sd_data      data;
														
 
															+#ifdef CONFIG_SCHED_DEBUG
														
 
															+	char                *name;
														
 
															+#endif
														
 
															+};
														
 
															+
														
 
															+extern struct sched_domain_topology_level *sched_domain_topology;
														
 
															+
														
 
															+extern void set_sched_topology(struct sched_domain_topology_level *tl);
														
 
															+
														
 
															+#ifdef CONFIG_SCHED_DEBUG
														
 
															+# define SD_INIT_NAME(type)		.name = #type
														
 
															+#else
														
 
															+# define SD_INIT_NAME(type)
														
 
															+#endif
														
 
															+
														
 
															 #else /* CONFIG_SMP */
														
 
															 struct sched_domain_attr;
														
@@ -1123,8 +1175,8 @@ struct sched_dl_entity {
 
															 	/*
														
 
															 	 * Original scheduling parameters. Copied here from sched_attr
														
 
															-	 * during sched_setscheduler2(), they will remain the same until
														
 
															-	 * the next sched_setscheduler2().
														
 
															+	 * during sched_setattr(), they will remain the same until
														
 
															+	 * the next sched_setattr().
														
 
															 	 */
														
 
															 	u64 dl_runtime;		/* maximum runtime for each instance	*/
														
 
															 	u64 dl_deadline;	/* relative deadline of each instance	*/
														
@@ -2723,51 +2775,9 @@ static inline int spin_needbreak(spinlock_t *lock)
 
															 /*
														
 
															  * Idle thread specific functions to determine the need_resched
														
 
															- * polling state. We have two versions, one based on TS_POLLING in
														
 
															- * thread_info.status and one based on TIF_POLLING_NRFLAG in
														
 
															- * thread_info.flags
														
 
															+ * polling state.
														
 
															  */
														
 
															-#ifdef TS_POLLING
														
 
															-static inline int tsk_is_polling(struct task_struct *p)
														
 
															-{
														
 
															-	return task_thread_info(p)->status & TS_POLLING;
														
 
															-}
														
 
															-static inline void __current_set_polling(void)
														
 
															-{
														
 
															-	current_thread_info()->status |= TS_POLLING;
														
 
															-}
														
 
															-
														
 
															-static inline bool __must_check current_set_polling_and_test(void)
														
 
															-{
														
 
															-	__current_set_polling();
														
 
															-
														
 
															-	/*
														
 
															-	 * Polling state must be visible before we test NEED_RESCHED,
														
 
															-	 * paired by resched_task()
														
 
															-	 */
														
 
															-	smp_mb();
														
 
															-
														
 
															-	return unlikely(tif_need_resched());
														
 
															-}
														
 
															-
														
 
															-static inline void __current_clr_polling(void)
														
 
															-{
														
 
															-	current_thread_info()->status &= ~TS_POLLING;
														
 
															-}
														
 
															-
														
 
															-static inline bool __must_check current_clr_polling_and_test(void)
														
 
															-{
														
 
															-	__current_clr_polling();
														
 
															-
														
 
															-	/*
														
 
															-	 * Polling state must be visible before we test NEED_RESCHED,
														
 
															-	 * paired by resched_task()
														
 
															-	 */
														
 
															-	smp_mb();
														
 
															-
														
 
															-	return unlikely(tif_need_resched());
														
 
															-}
														
 
															-#elif defined(TIF_POLLING_NRFLAG)
														
 
															+#ifdef TIF_POLLING_NRFLAG
														
 
															 static inline int tsk_is_polling(struct task_struct *p)
														
 
															 {
														
 
															 	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
														
--- a/include/linux/sched/prio.h
+++ b/include/linux/sched/prio.h
@@ -41,4 +41,20 @@
 
															 #define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)
														
 
															 #define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
														
 
															+/*
														
 
															+ * Convert nice value [19,-20] to rlimit style value [1,40].
														
 
															+ */
														
 
															+static inline long nice_to_rlimit(long nice)
														
 
															+{
														
 
															+	return (MAX_NICE - nice + 1);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Convert rlimit style value [1,40] to nice value [-20, 19].
														
 
															+ */
														
 
															+static inline long rlimit_to_nice(long prio)
														
 
															+{
														
 
															+	return (MAX_NICE - prio + 1);
														
 
															+}
														
 
															+
														
 
															 #endif /* _SCHED_PRIO_H */
														
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -104,20 +104,6 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
 
															 #define test_thread_flag(flag) \
														
 
															 	test_ti_thread_flag(current_thread_info(), flag)
														
 
															-static inline __deprecated void set_need_resched(void)
														
 
															-{
														
 
															-	/*
														
 
															-	 * Use of this function in deprecated.
														
 
															-	 *
														
 
															-	 * As of this writing there are only a few users in the DRM tree left
														
 
															-	 * all of which are wrong and can be removed without causing too much
														
 
															-	 * grief.
														
 
															-	 *
														
 
															-	 * The DRM people are aware and are working on removing the last few
														
 
															-	 * instances.
														
 
															-	 */
														
 
															-}
														
 
															-
														
 
															 #define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
														
 
															 #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
														
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -66,121 +66,6 @@ int arch_update_cpu_topology(void);
 
															 #define PENALTY_FOR_NODE_WITH_CPUS	(1)
														
 
															 #endif
														
 
															-/*
														
 
															- * Below are the 3 major initializers used in building sched_domains:
														
 
															- * SD_SIBLING_INIT, for SMT domains
														
 
															- * SD_CPU_INIT, for SMP domains
														
 
															- *
														
 
															- * Any architecture that cares to do any tuning to these values should do so
														
 
															- * by defining their own arch-specific initializer in include/asm/topology.h.
														
 
															- * A definition there will automagically override these default initializers
														
 
															- * and allow arch-specific performance tuning of sched_domains.
														
 
															- * (Only non-zero and non-null fields need be specified.)
														
 
															- */
														
 
															-
														
 
															-#ifdef CONFIG_SCHED_SMT
														
 
															-/* MCD - Do we really need this?  It is always on if CONFIG_SCHED_SMT is,
														
 
															- * so can't we drop this in favor of CONFIG_SCHED_SMT?
														
 
															- */
														
 
															-#define ARCH_HAS_SCHED_WAKE_IDLE
														
 
															-/* Common values for SMT siblings */
														
 
															-#ifndef SD_SIBLING_INIT
														
 
															-#define SD_SIBLING_INIT (struct sched_domain) {				\
														
 
															-	.min_interval		= 1,					\
														
 
															-	.max_interval		= 2,					\
														
 
															-	.busy_factor		= 64,					\
														
 
															-	.imbalance_pct		= 110,					\
														
 
															-									\
														
 
															-	.flags			= 1*SD_LOAD_BALANCE			\
														
 
															-				| 1*SD_BALANCE_NEWIDLE			\
														
 
															-				| 1*SD_BALANCE_EXEC			\
														
 
															-				| 1*SD_BALANCE_FORK			\
														
 
															-				| 0*SD_BALANCE_WAKE			\
														
 
															-				| 1*SD_WAKE_AFFINE			\
														
 
															-				| 1*SD_SHARE_CPUPOWER			\
														
 
															-				| 1*SD_SHARE_PKG_RESOURCES		\
														
 
															-				| 0*SD_SERIALIZE			\
														
 
															-				| 0*SD_PREFER_SIBLING			\
														
 
															-				| arch_sd_sibling_asym_packing()	\
														
 
															-				,					\
														
 
															-	.last_balance		= jiffies,				\
														
 
															-	.balance_interval	= 1,					\
														
 
															-	.smt_gain		= 1178,	/* 15% */			\
														
 
															-	.max_newidle_lb_cost	= 0,					\
														
 
															-	.next_decay_max_lb_cost	= jiffies,				\
														
 
															-}
														
 
															-#endif
														
 
															-#endif /* CONFIG_SCHED_SMT */
														
 
															-
														
 
															-#ifdef CONFIG_SCHED_MC
														
 
															-/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */
														
 
															-#ifndef SD_MC_INIT
														
 
															-#define SD_MC_INIT (struct sched_domain) {				\
														
 
															-	.min_interval		= 1,					\
														
 
															-	.max_interval		= 4,					\
														
 
															-	.busy_factor		= 64,					\
														
 
															-	.imbalance_pct		= 125,					\
														
 
															-	.cache_nice_tries	= 1,					\
														
 
															-	.busy_idx		= 2,					\
														
 
															-	.wake_idx		= 0,					\
														
 
															-	.forkexec_idx		= 0,					\
														
 
															-									\
														
 
															-	.flags			= 1*SD_LOAD_BALANCE			\
														
 
															-				| 1*SD_BALANCE_NEWIDLE			\
														
 
															-				| 1*SD_BALANCE_EXEC			\
														
 
															-				| 1*SD_BALANCE_FORK			\
														
 
															-				| 0*SD_BALANCE_WAKE			\
														
 
															-				| 1*SD_WAKE_AFFINE			\
														
 
															-				| 0*SD_SHARE_CPUPOWER			\
														
 
															-				| 1*SD_SHARE_PKG_RESOURCES		\
														
 
															-				| 0*SD_SERIALIZE			\
														
 
															-				,					\
														
 
															-	.last_balance		= jiffies,				\
														
 
															-	.balance_interval	= 1,					\
														
 
															-	.max_newidle_lb_cost	= 0,					\
														
 
															-	.next_decay_max_lb_cost	= jiffies,				\
														
 
															-}
														
 
															-#endif
														
 
															-#endif /* CONFIG_SCHED_MC */
														
 
															-
														
 
															-/* Common values for CPUs */
														
 
															-#ifndef SD_CPU_INIT
														
 
															-#define SD_CPU_INIT (struct sched_domain) {				\
														
 
															-	.min_interval		= 1,					\
														
 
															-	.max_interval		= 4,					\
														
 
															-	.busy_factor		= 64,					\
														
 
															-	.imbalance_pct		= 125,					\
														
 
															-	.cache_nice_tries	= 1,					\
														
 
															-	.busy_idx		= 2,					\
														
 
															-	.idle_idx		= 1,					\
														
 
															-	.newidle_idx		= 0,					\
														
 
															-	.wake_idx		= 0,					\
														
 
															-	.forkexec_idx		= 0,					\
														
 
															-									\
														
 
															-	.flags			= 1*SD_LOAD_BALANCE			\
														
 
															-				| 1*SD_BALANCE_NEWIDLE			\
														
 
															-				| 1*SD_BALANCE_EXEC			\
														
 
															-				| 1*SD_BALANCE_FORK			\
														
 
															-				| 0*SD_BALANCE_WAKE			\
														
 
															-				| 1*SD_WAKE_AFFINE			\
														
 
															-				| 0*SD_SHARE_CPUPOWER			\
														
 
															-				| 0*SD_SHARE_PKG_RESOURCES		\
														
 
															-				| 0*SD_SERIALIZE			\
														
 
															-				| 1*SD_PREFER_SIBLING			\
														
 
															-				,					\
														
 
															-	.last_balance		= jiffies,				\
														
 
															-	.balance_interval	= 1,					\
														
 
															-	.max_newidle_lb_cost	= 0,					\
														
 
															-	.next_decay_max_lb_cost	= jiffies,				\
														
 
															-}
														
 
															-#endif
														
 
															-
														
 
															-#ifdef CONFIG_SCHED_BOOK
														
 
															-#ifndef SD_BOOK_INIT
														
 
															-#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!!
														
 
															-#endif
														
 
															-#endif /* CONFIG_SCHED_BOOK */
														
 
															-
														
 
															 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
														
 
															 DECLARE_PER_CPU(int, numa_node);
														
@@ -295,4 +180,17 @@ static inline int cpu_to_mem(int cpu)
 
															 #define topology_core_cpumask(cpu)		cpumask_of(cpu)
														
 
															 #endif
														
 
															+#ifdef CONFIG_SCHED_SMT
														
 
															+static inline const struct cpumask *cpu_smt_mask(int cpu)
														
 
															+{
														
 
															+	return topology_thread_cpumask(cpu);
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+static inline const struct cpumask *cpu_cpu_mask(int cpu)
														
 
															+{
														
 
															+	return cpumask_of_node(cpu_to_node(cpu));
														
 
															+}
														
 
															+
														
 
															+
														
 
															 #endif /* _LINUX_TOPOLOGY_H */
														
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -216,7 +216,7 @@ static int lock_torture_writer(void *arg)
 
															 	static DEFINE_TORTURE_RANDOM(rand);
														
 
															 	VERBOSE_TOROUT_STRING("lock_torture_writer task started");
														
 
															-	set_user_nice(current, 19);
														
 
															+	set_user_nice(current, MAX_NICE);
														
 
															 	do {
														
 
															 		if ((torture_random(&rand) & 0xfffff) == 0)
														
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -54,9 +54,11 @@ static void freeze_begin(void)
 
															 static void freeze_enter(void)
														
 
															 {
														
 
															+	cpuidle_use_deepest_state(true);
														
 
															 	cpuidle_resume();
														
 
															 	wait_event(suspend_freeze_wait_head, suspend_freeze_wake);
														
 
															 	cpuidle_pause();
														
 
															+	cpuidle_use_deepest_state(false);
														
 
															 }
														
 
															 void freeze_wake(void)
														
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -521,6 +521,39 @@ static inline void init_hrtick(void)
 
															 }
														
 
															 #endif	/* CONFIG_SCHED_HRTICK */
														
 
															+/*
														
 
															+ * cmpxchg based fetch_or, macro so it works for different integer types
														
 
															+ */
														
 
															+#define fetch_or(ptr, val)						\
														
 
															+({	typeof(*(ptr)) __old, __val = *(ptr);				\
														
 
															+ 	for (;;) {							\
														
 
															+ 		__old = cmpxchg((ptr), __val, __val | (val));		\
														
 
															+ 		if (__old == __val)					\
														
 
															+ 			break;						\
														
 
															+ 		__val = __old;						\
														
 
															+ 	}								\
														
 
															+ 	__old;								\
														
 
															+})
														
 
															+
														
 
															+#ifdef TIF_POLLING_NRFLAG
														
 
															+/*
														
 
															+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
														
 
															+ * this avoids any races wrt polling state changes and thereby avoids
														
 
															+ * spurious IPIs.
														
 
															+ */
														
 
															+static bool set_nr_and_not_polling(struct task_struct *p)
														
 
															+{
														
 
															+	struct thread_info *ti = task_thread_info(p);
														
 
															+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
														
 
															+}
														
 
															+#else
														
 
															+static bool set_nr_and_not_polling(struct task_struct *p)
														
 
															+{
														
 
															+	set_tsk_need_resched(p);
														
 
															+	return true;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															 /*
														
 
															  * resched_task - mark a task 'to be rescheduled now'.
														
 
															  *
														
@@ -537,17 +570,15 @@ void resched_task(struct task_struct *p)
 
															 	if (test_tsk_need_resched(p))
														
 
															 		return;
														
 
															-	set_tsk_need_resched(p);
														
 
															-
														
 
															 	cpu = task_cpu(p);
														
 
															+
														
 
															 	if (cpu == smp_processor_id()) {
														
 
															+		set_tsk_need_resched(p);
														
 
															 		set_preempt_need_resched();
														
 
															 		return;
														
 
															 	}
														
 
															-	/* NEED_RESCHED must be visible before we test polling */
														
 
															-	smp_mb();
														
 
															-	if (!tsk_is_polling(p))
														
 
															+	if (set_nr_and_not_polling(p))
														
 
															 		smp_send_reschedule(cpu);
														
 
															 }
														
@@ -3018,7 +3049,7 @@ EXPORT_SYMBOL(set_user_nice);
 
															 int can_nice(const struct task_struct *p, const int nice)
														
 
															 {
														
 
															 	/* convert nice value [19,-20] to rlimit style value [1,40] */
														
 
															-	int nice_rlim = 20 - nice;
														
 
															+	int nice_rlim = nice_to_rlimit(nice);
														
 
															 	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
														
 
															 		capable(CAP_SYS_NICE));
														
@@ -3042,17 +3073,10 @@ SYSCALL_DEFINE1(nice, int, increment)
 
															 	 * We don't have to worry. Conceptually one call occurs first
														
 
															 	 * and we have a single winner.
														
 
															 	 */
														
 
															-	if (increment < -40)
														
 
															-		increment = -40;
														
 
															-	if (increment > 40)
														
 
															-		increment = 40;
														
 
															-
														
 
															+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
														
 
															 	nice = task_nice(current) + increment;
														
 
															-	if (nice < MIN_NICE)
														
 
															-		nice = MIN_NICE;
														
 
															-	if (nice > MAX_NICE)
														
 
															-		nice = MAX_NICE;
														
 
															+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
														
 
															 	if (increment < 0 && !can_nice(current, nice))
														
 
															 		return -EPERM;
														
@@ -3642,13 +3666,11 @@ static int sched_copy_attr(struct sched_attr __user *uattr,
 
															 	 */
														
 
															 	attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE);
														
 
															-out:
														
 
															-	return ret;
														
 
															+	return 0;
														
 
															 err_size:
														
 
															 	put_user(sizeof(*attr), &uattr->size);
														
 
															-	ret = -E2BIG;
														
 
															-	goto out;
														
 
															+	return -E2BIG;
														
 
															 }
														
 
															 /**
														
@@ -3808,7 +3830,7 @@ static int sched_read_attr(struct sched_attr __user *uattr,
 
															 		for (; addr < end; addr++) {
														
 
															 			if (*addr)
														
 
															-				goto err_size;
														
 
															+				return -EFBIG;
														
 
															 		}
														
 
															 		attr->size = usize;
														
@@ -3818,12 +3840,7 @@ static int sched_read_attr(struct sched_attr __user *uattr,
 
															 	if (ret)
														
 
															 		return -EFAULT;
														
 
															-out:
														
 
															-	return ret;
														
 
															-
														
 
															-err_size:
														
 
															-	ret = -E2BIG;
														
 
															-	goto out;
														
 
															+	return 0;
														
 
															 }
														
 
															 /**
														
@@ -5093,10 +5110,20 @@ static struct notifier_block migration_notifier = {
 
															 	.priority = CPU_PRI_MIGRATION,
														
 
															 };
														
 
															+static void __cpuinit set_cpu_rq_start_time(void)
														
 
															+{
														
 
															+	int cpu = smp_processor_id();
														
 
															+	struct rq *rq = cpu_rq(cpu);
														
 
															+	rq->age_stamp = sched_clock_cpu(cpu);
														
 
															+}
														
 
															+
														
 
															 static int sched_cpu_active(struct notifier_block *nfb,
														
 
															 				      unsigned long action, void *hcpu)
														
 
															 {
														
 
															 	switch (action & ~CPU_TASKS_FROZEN) {
														
 
															+	case CPU_STARTING:
														
 
															+		set_cpu_rq_start_time();
														
 
															+		return NOTIFY_OK;
														
 
															 	case CPU_DOWN_FAILED:
														
 
															 		set_cpu_active((long)hcpu, true);
														
 
															 		return NOTIFY_OK;
														
@@ -5305,7 +5332,8 @@ static int sd_degenerate(struct sched_domain *sd)
 
															 			 SD_BALANCE_FORK |
														
 
															 			 SD_BALANCE_EXEC |
														
 
															 			 SD_SHARE_CPUPOWER |
														
 
															-			 SD_SHARE_PKG_RESOURCES)) {
														
 
															+			 SD_SHARE_PKG_RESOURCES |
														
 
															+			 SD_SHARE_POWERDOMAIN)) {
														
 
															 		if (sd->groups != sd->groups->next)
														
 
															 			return 0;
														
 
															 	}
														
@@ -5336,7 +5364,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 
															 				SD_BALANCE_EXEC |
														
 
															 				SD_SHARE_CPUPOWER |
														
 
															 				SD_SHARE_PKG_RESOURCES |
														
 
															-				SD_PREFER_SIBLING);
														
 
															+				SD_PREFER_SIBLING |
														
 
															+				SD_SHARE_POWERDOMAIN);
														
 
															 		if (nr_node_ids == 1)
														
 
															 			pflags &= ~SD_SERIALIZE;
														
 
															 	}
														
@@ -5610,17 +5639,6 @@ static int __init isolated_cpu_setup(char *str)
 
															 __setup("isolcpus=", isolated_cpu_setup);
														
 
															-static const struct cpumask *cpu_cpu_mask(int cpu)
														
 
															-{
														
 
															-	return cpumask_of_node(cpu_to_node(cpu));
														
 
															-}
														
 
															-
														
 
															-struct sd_data {
														
 
															-	struct sched_domain **__percpu sd;
														
 
															-	struct sched_group **__percpu sg;
														
 
															-	struct sched_group_power **__percpu sgp;
														
 
															-};
														
 
															-
														
 
															 struct s_data {
														
 
															 	struct sched_domain ** __percpu sd;
														
 
															 	struct root_domain	*rd;
														
@@ -5633,21 +5651,6 @@ enum s_alloc {
 
															 	sa_none,
														
 
															 };
														
 
															-struct sched_domain_topology_level;
														
 
															-
														
 
															-typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
														
 
															-typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
														
 
															-
														
 
															-#define SDTL_OVERLAP	0x01
														
 
															-
														
 
															-struct sched_domain_topology_level {
														
 
															-	sched_domain_init_f init;
														
 
															-	sched_domain_mask_f mask;
														
 
															-	int		    flags;
														
 
															-	int		    numa_level;
														
 
															-	struct sd_data      data;
														
 
															-};
														
 
															-
														
 
															 /*
														
 
															  * Build an iteration mask that can exclude certain CPUs from the upwards
														
 
															  * domain traversal.
														
@@ -5815,8 +5818,6 @@ build_sched_groups(struct sched_domain *sd, int cpu)
 
															 			continue;
														
 
															 		group = get_group(i, sdd, &sg);
														
 
															-		cpumask_clear(sched_group_cpus(sg));
														
 
															-		sg->sgp->power = 0;
														
 
															 		cpumask_setall(sched_group_mask(sg));
														
 
															 		for_each_cpu(j, span) {
														
@@ -5866,44 +5867,11 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 
															 	atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
														
 
															 }
														
 
															-int __weak arch_sd_sibling_asym_packing(void)
														
 
															-{
														
 
															-       return 0*SD_ASYM_PACKING;
														
 
															-}
														
 
															-
														
 
															 /*
														
 
															  * Initializers for schedule domains
														
 
															  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
														
 
															  */
														
 
															-#ifdef CONFIG_SCHED_DEBUG
														
 
															-# define SD_INIT_NAME(sd, type)		sd->name = #type
														
 
															-#else
														
 
															-# define SD_INIT_NAME(sd, type)		do { } while (0)
														
 
															-#endif
														
 
															-
														
 
															-#define SD_INIT_FUNC(type)						\
														
 
															-static noinline struct sched_domain *					\
														
 
															-sd_init_##type(struct sched_domain_topology_level *tl, int cpu) 	\
														
 
															-{									\
														
 
															-	struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);	\
														
 
															-	*sd = SD_##type##_INIT;						\
														
 
															-	SD_INIT_NAME(sd, type);						\
														
 
															-	sd->private = &tl->data;					\
														
 
															-	return sd;							\
														
 
															-}
														
 
															-
														
 
															-SD_INIT_FUNC(CPU)
														
 
															-#ifdef CONFIG_SCHED_SMT
														
 
															- SD_INIT_FUNC(SIBLING)
														
 
															-#endif
														
 
															-#ifdef CONFIG_SCHED_MC
														
 
															- SD_INIT_FUNC(MC)
														
 
															-#endif
														
 
															-#ifdef CONFIG_SCHED_BOOK
														
 
															- SD_INIT_FUNC(BOOK)
														
 
															-#endif
														
 
															-
														
 
															 static int default_relax_domain_level = -1;
														
 
															 int sched_domain_level_max;
														
@@ -5991,99 +5959,154 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
 
															 		*per_cpu_ptr(sdd->sgp, cpu) = NULL;
														
 
															 }
														
 
															-#ifdef CONFIG_SCHED_SMT
														
 
															-static const struct cpumask *cpu_smt_mask(int cpu)
														
 
															-{
														
 
															-	return topology_thread_cpumask(cpu);
														
 
															-}
														
 
															-#endif
														
 
															-
														
 
															-/*
														
 
															- * Topology list, bottom-up.
														
 
															- */
														
 
															-static struct sched_domain_topology_level default_topology[] = {
														
 
															-#ifdef CONFIG_SCHED_SMT
														
 
															-	{ sd_init_SIBLING, cpu_smt_mask, },
														
 
															-#endif
														
 
															-#ifdef CONFIG_SCHED_MC
														
 
															-	{ sd_init_MC, cpu_coregroup_mask, },
														
 
															-#endif
														
 
															-#ifdef CONFIG_SCHED_BOOK
														
 
															-	{ sd_init_BOOK, cpu_book_mask, },
														
 
															-#endif
														
 
															-	{ sd_init_CPU, cpu_cpu_mask, },
														
 
															-	{ NULL, },
														
 
															-};
														
 
															-
														
 
															-static struct sched_domain_topology_level *sched_domain_topology = default_topology;
														
 
															-
														
 
															-#define for_each_sd_topology(tl)			\
														
 
															-	for (tl = sched_domain_topology; tl->init; tl++)
														
 
															-
														
 
															 #ifdef CONFIG_NUMA
														
 
															-
														
 
															 static int sched_domains_numa_levels;
														
 
															 static int *sched_domains_numa_distance;
														
 
															 static struct cpumask ***sched_domains_numa_masks;
														
 
															 static int sched_domains_curr_level;
														
 
															+#endif
														
 
															-static inline int sd_local_flags(int level)
														
 
															-{
														
 
															-	if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE)
														
 
															-		return 0;
														
 
															-
														
 
															-	return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE;
														
 
															-}
														
 
															+/*
														
 
															+ * SD_flags allowed in topology descriptions.
														
 
															+ *
														
 
															+ * SD_SHARE_CPUPOWER      - describes SMT topologies
														
 
															+ * SD_SHARE_PKG_RESOURCES - describes shared caches
														
 
															+ * SD_NUMA                - describes NUMA topologies
														
 
															+ * SD_SHARE_POWERDOMAIN   - describes shared power domain
														
 
															+ *
														
 
															+ * Odd one out:
														
 
															+ * SD_ASYM_PACKING        - describes SMT quirks
														
 
															+ */
														
 
															+#define TOPOLOGY_SD_FLAGS		\
														
 
															+	(SD_SHARE_CPUPOWER |		\
														
 
															+	 SD_SHARE_PKG_RESOURCES |	\
														
 
															+	 SD_NUMA |			\
														
 
															+	 SD_ASYM_PACKING |		\
														
 
															+	 SD_SHARE_POWERDOMAIN)
														
 
															 static struct sched_domain *
														
 
															-sd_numa_init(struct sched_domain_topology_level *tl, int cpu)
														
 
															+sd_init(struct sched_domain_topology_level *tl, int cpu)
														
 
															 {
														
 
															 	struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
														
 
															-	int level = tl->numa_level;
														
 
															-	int sd_weight = cpumask_weight(
														
 
															-			sched_domains_numa_masks[level][cpu_to_node(cpu)]);
														
 
															+	int sd_weight, sd_flags = 0;
														
 
															+
														
 
															+#ifdef CONFIG_NUMA
														
 
															+	/*
														
 
															+	 * Ugly hack to pass state to sd_numa_mask()...
														
 
															+	 */
														
 
															+	sched_domains_curr_level = tl->numa_level;
														
 
															+#endif
														
 
															+
														
 
															+	sd_weight = cpumask_weight(tl->mask(cpu));
														
 
															+
														
 
															+	if (tl->sd_flags)
														
 
															+		sd_flags = (*tl->sd_flags)();
														
 
															+	if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS,
														
 
															+			"wrong sd_flags in topology description\n"))
														
 
															+		sd_flags &= ~TOPOLOGY_SD_FLAGS;
														
 
															 	*sd = (struct sched_domain){
														
 
															 		.min_interval		= sd_weight,
														
 
															 		.max_interval		= 2*sd_weight,
														
 
															 		.busy_factor		= 32,
														
 
															 		.imbalance_pct		= 125,
														
 
															-		.cache_nice_tries	= 2,
														
 
															-		.busy_idx		= 3,
														
 
															-		.idle_idx		= 2,
														
 
															+
														
 
															+		.cache_nice_tries	= 0,
														
 
															+		.busy_idx		= 0,
														
 
															+		.idle_idx		= 0,
														
 
															 		.newidle_idx		= 0,
														
 
															 		.wake_idx		= 0,
														
 
															 		.forkexec_idx		= 0,
														
 
															 		.flags			= 1*SD_LOAD_BALANCE
														
 
															 					| 1*SD_BALANCE_NEWIDLE
														
 
															-					| 0*SD_BALANCE_EXEC
														
 
															-					| 0*SD_BALANCE_FORK
														
 
															+					| 1*SD_BALANCE_EXEC
														
 
															+					| 1*SD_BALANCE_FORK
														
 
															 					| 0*SD_BALANCE_WAKE
														
 
															-					| 0*SD_WAKE_AFFINE
														
 
															+					| 1*SD_WAKE_AFFINE
														
 
															 					| 0*SD_SHARE_CPUPOWER
														
 
															 					| 0*SD_SHARE_PKG_RESOURCES
														
 
															-					| 1*SD_SERIALIZE
														
 
															+					| 0*SD_SERIALIZE
														
 
															 					| 0*SD_PREFER_SIBLING
														
 
															-					| 1*SD_NUMA
														
 
															-					| sd_local_flags(level)
														
 
															+					| 0*SD_NUMA
														
 
															+					| sd_flags
														
 
															 					,
														
 
															+
														
 
															 		.last_balance		= jiffies,
														
 
															 		.balance_interval	= sd_weight,
														
 
															+		.smt_gain		= 0,
														
 
															 		.max_newidle_lb_cost	= 0,
														
 
															 		.next_decay_max_lb_cost	= jiffies,
														
 
															+#ifdef CONFIG_SCHED_DEBUG
														
 
															+		.name			= tl->name,
														
 
															+#endif
														
 
															 	};
														
 
															-	SD_INIT_NAME(sd, NUMA);
														
 
															-	sd->private = &tl->data;
														
 
															 	/*
														
 
															-	 * Ugly hack to pass state to sd_numa_mask()...
														
 
															+	 * Convert topological properties into behaviour.
														
 
															 	 */
														
 
															-	sched_domains_curr_level = tl->numa_level;
														
 
															+
														
 
															+	if (sd->flags & SD_SHARE_CPUPOWER) {
														
 
															+		sd->imbalance_pct = 110;
														
 
															+		sd->smt_gain = 1178; /* ~15% */
														
 
															+
														
 
															+	} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
														
 
															+		sd->imbalance_pct = 117;
														
 
															+		sd->cache_nice_tries = 1;
														
 
															+		sd->busy_idx = 2;
														
 
															+
														
 
															+#ifdef CONFIG_NUMA
														
 
															+	} else if (sd->flags & SD_NUMA) {
														
 
															+		sd->cache_nice_tries = 2;
														
 
															+		sd->busy_idx = 3;
														
 
															+		sd->idle_idx = 2;
														
 
															+
														
 
															+		sd->flags |= SD_SERIALIZE;
														
 
															+		if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
														
 
															+			sd->flags &= ~(SD_BALANCE_EXEC |
														
 
															+				       SD_BALANCE_FORK |
														
 
															+				       SD_WAKE_AFFINE);
														
 
															+		}
														
 
															+
														
 
															+#endif
														
 
															+	} else {
														
 
															+		sd->flags |= SD_PREFER_SIBLING;
														
 
															+		sd->cache_nice_tries = 1;
														
 
															+		sd->busy_idx = 2;
														
 
															+		sd->idle_idx = 1;
														
 
															+	}
														
 
															+
														
 
															+	sd->private = &tl->data;
														
 
															 	return sd;
														
 
															 }
														
 
															+/*
														
 
															+ * Topology list, bottom-up.
														
 
															+ */
														
 
															+static struct sched_domain_topology_level default_topology[] = {
														
 
															+#ifdef CONFIG_SCHED_SMT
														
 
															+	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
														
 
															+#endif
														
 
															+#ifdef CONFIG_SCHED_MC
														
 
															+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
														
 
															+#endif
														
 
															+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
														
 
															+	{ NULL, },
														
 
															+};
														
 
															+
														
 
															+struct sched_domain_topology_level *sched_domain_topology = default_topology;
														
 
															+
														
 
															+#define for_each_sd_topology(tl)			\
														
 
															+	for (tl = sched_domain_topology; tl->mask; tl++)
														
 
															+
														
 
															+void set_sched_topology(struct sched_domain_topology_level *tl)
														
 
															+{
														
 
															+	sched_domain_topology = tl;
														
 
															+}
														
 
															+
														
 
															+#ifdef CONFIG_NUMA
														
 
															+
														
 
															 static const struct cpumask *sd_numa_mask(int cpu)
														
 
															 {
														
 
															 	return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
														
@@ -6227,7 +6250,10 @@ static void sched_init_numa(void)
 
															 		}
														
 
															 	}
														
 
															-	tl = kzalloc((ARRAY_SIZE(default_topology) + level) *
														
 
															+	/* Compute default topology size */
														
 
															+	for (i = 0; sched_domain_topology[i].mask; i++);
														
 
															+
														
 
															+	tl = kzalloc((i + level + 1) *
														
 
															 			sizeof(struct sched_domain_topology_level), GFP_KERNEL);
														
 
															 	if (!tl)
														
 
															 		return;
														
@@ -6235,18 +6261,19 @@ static void sched_init_numa(void)
 
															 	/*
														
 
															 	 * Copy the default topology bits..
														
 
															 	 */
														
 
															-	for (i = 0; default_topology[i].init; i++)
														
 
															-		tl[i] = default_topology[i];
														
 
															+	for (i = 0; sched_domain_topology[i].mask; i++)
														
 
															+		tl[i] = sched_domain_topology[i];
														
 
															 	/*
														
 
															 	 * .. and append 'j' levels of NUMA goodness.
														
 
															 	 */
														
 
															 	for (j = 0; j < level; i++, j++) {
														
 
															 		tl[i] = (struct sched_domain_topology_level){
														
 
															-			.init = sd_numa_init,
														
 
															 			.mask = sd_numa_mask,
														
 
															+			.sd_flags = cpu_numa_flags,
														
 
															 			.flags = SDTL_OVERLAP,
														
 
															 			.numa_level = j,
														
 
															+			SD_INIT_NAME(NUMA)
														
 
															 		};
														
 
															 	}
														
@@ -6404,7 +6431,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
 
															 		const struct cpumask *cpu_map, struct sched_domain_attr *attr,
														
 
															 		struct sched_domain *child, int cpu)
														
 
															 {
														
 
															-	struct sched_domain *sd = tl->init(tl, cpu);
														
 
															+	struct sched_domain *sd = sd_init(tl, cpu);
														
 
															 	if (!sd)
														
 
															 		return child;
														
@@ -6974,6 +7001,7 @@ void __init sched_init(void)
 
															 	if (cpu_isolated_map == NULL)
														
 
															 		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
														
 
															 	idle_thread_set_boot_cpu();
														
 
															+	set_cpu_rq_start_time();
														
 
															 #endif
														
 
															 	init_sched_fair_class();
														
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -520,7 +520,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 
															 	 * We need to take care of a possible races here. In fact, the
														
 
															 	 * task might have changed its scheduling policy to something
														
 
															 	 * different from SCHED_DEADLINE or changed its reservation
														
 
															-	 * parameters (through sched_setscheduler()).
														
 
															+	 * parameters (through sched_setattr()).
														
 
															 	 */
														
 
															 	if (!dl_task(p) || dl_se->dl_new)
														
 
															 		goto unlock;
														
@@ -741,7 +741,7 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 
															 	WARN_ON(!dl_prio(prio));
														
 
															 	dl_rq->dl_nr_running++;
														
 
															-	inc_nr_running(rq_of_dl_rq(dl_rq));
														
 
															+	add_nr_running(rq_of_dl_rq(dl_rq), 1);
														
 
															 	inc_dl_deadline(dl_rq, deadline);
														
 
															 	inc_dl_migration(dl_se, dl_rq);
														
@@ -755,7 +755,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 
															 	WARN_ON(!dl_prio(prio));
														
 
															 	WARN_ON(!dl_rq->dl_nr_running);
														
 
															 	dl_rq->dl_nr_running--;
														
 
															-	dec_nr_running(rq_of_dl_rq(dl_rq));
														
 
															+	sub_nr_running(rq_of_dl_rq(dl_rq), 1);
														
 
															 	dec_dl_deadline(dl_rq, dl_se->deadline);
														
 
															 	dec_dl_migration(dl_se, dl_rq);
														
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1095,6 +1095,34 @@ static void task_numa_assign(struct task_numa_env *env,
 
															 	env->best_cpu = env->dst_cpu;
														
 
															 }
														
 
															+static bool load_too_imbalanced(long orig_src_load, long orig_dst_load,
														
 
															+				long src_load, long dst_load,
														
 
															+				struct task_numa_env *env)
														
 
															+{
														
 
															+	long imb, old_imb;
														
 
															+
														
 
															+	/* We care about the slope of the imbalance, not the direction. */
														
 
															+	if (dst_load < src_load)
														
 
															+		swap(dst_load, src_load);
														
 
															+
														
 
															+	/* Is the difference below the threshold? */
														
 
															+	imb = dst_load * 100 - src_load * env->imbalance_pct;
														
 
															+	if (imb <= 0)
														
 
															+		return false;
														
 
															+
														
 
															+	/*
														
 
															+	 * The imbalance is above the allowed threshold.
														
 
															+	 * Compare it with the old imbalance.
														
 
															+	 */
														
 
															+	if (orig_dst_load < orig_src_load)
														
 
															+		swap(orig_dst_load, orig_src_load);
														
 
															+
														
 
															+	old_imb = orig_dst_load * 100 - orig_src_load * env->imbalance_pct;
														
 
															+
														
 
															+	/* Would this change make things worse? */
														
 
															+	return (old_imb > imb);
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * This checks if the overall compute and NUMA accesses of the system would
														
 
															  * be improved if the source tasks was migrated to the target dst_cpu taking
														
@@ -1107,7 +1135,8 @@ static void task_numa_compare(struct task_numa_env *env,
 
															 	struct rq *src_rq = cpu_rq(env->src_cpu);
														
 
															 	struct rq *dst_rq = cpu_rq(env->dst_cpu);
														
 
															 	struct task_struct *cur;
														
 
															-	long dst_load, src_load;
														
 
															+	long orig_src_load, src_load;
														
 
															+	long orig_dst_load, dst_load;
														
 
															 	long load;
														
 
															 	long imp = (groupimp > 0) ? groupimp : taskimp;
														
@@ -1181,13 +1210,13 @@ static void task_numa_compare(struct task_numa_env *env,
 
															 	 * In the overloaded case, try and keep the load balanced.
														
 
															 	 */
														
 
															 balance:
														
 
															-	dst_load = env->dst_stats.load;
														
 
															-	src_load = env->src_stats.load;
														
 
															+	orig_dst_load = env->dst_stats.load;
														
 
															+	orig_src_load = env->src_stats.load;
														
 
															 	/* XXX missing power terms */
														
 
															 	load = task_h_load(env->p);
														
 
															-	dst_load += load;
														
 
															-	src_load -= load;
														
 
															+	dst_load = orig_dst_load + load;
														
 
															+	src_load = orig_src_load - load;
														
 
															 	if (cur) {
														
 
															 		load = task_h_load(cur);
														
@@ -1195,11 +1224,8 @@ balance:
 
															 		src_load += load;
														
 
															 	}
														
 
															-	/* make src_load the smaller */
														
 
															-	if (dst_load < src_load)
														
 
															-		swap(dst_load, src_load);
														
 
															-
														
 
															-	if (src_load * env->imbalance_pct < dst_load * 100)
														
 
															+	if (load_too_imbalanced(orig_src_load, orig_dst_load,
														
 
															+				src_load, dst_load, env))
														
 
															 		goto unlock;
														
 
															 assign:
														
@@ -1301,7 +1327,16 @@ static int task_numa_migrate(struct task_struct *p)
 
															 	if (env.best_cpu == -1)
														
 
															 		return -EAGAIN;
														
 
															-	sched_setnuma(p, env.dst_nid);
														
 
															+	/*
														
 
															+	 * If the task is part of a workload that spans multiple NUMA nodes,
														
 
															+	 * and is migrating into one of the workload's active nodes, remember
														
 
															+	 * this node as the task's preferred numa node, so the workload can
														
 
															+	 * settle down.
														
 
															+	 * A task that migrated to a second choice node will be better off
														
 
															+	 * trying for a better one later. Do not set the preferred node here.
														
 
															+	 */
														
 
															+	if (p->numa_group && node_isset(env.dst_nid, p->numa_group->active_nodes))
														
 
															+		sched_setnuma(p, env.dst_nid);
														
 
															 	/*
														
 
															 	 * Reset the scan period if the task is being rescheduled on an
														
@@ -1326,12 +1361,15 @@ static int task_numa_migrate(struct task_struct *p)
 
															 /* Attempt to migrate a task to a CPU on the preferred node. */
														
 
															 static void numa_migrate_preferred(struct task_struct *p)
														
 
															 {
														
 
															+	unsigned long interval = HZ;
														
 
															+
														
 
															 	/* This task has no NUMA fault statistics yet */
														
 
															 	if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults_memory))
														
 
															 		return;
														
 
															 	/* Periodically retry migrating the task to the preferred node */
														
 
															-	p->numa_migrate_retry = jiffies + HZ;
														
 
															+	interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
														
 
															+	p->numa_migrate_retry = jiffies + interval;
														
 
															 	/* Success if task is already running on preferred CPU */
														
 
															 	if (task_node(p) == p->numa_preferred_nid)
														
@@ -1738,6 +1776,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 
															 	struct task_struct *p = current;
														
 
															 	bool migrated = flags & TNF_MIGRATED;
														
 
															 	int cpu_node = task_node(current);
														
 
															+	int local = !!(flags & TNF_FAULT_LOCAL);
														
 
															 	int priv;
														
 
															 	if (!numabalancing_enabled)
														
@@ -1786,6 +1825,17 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 
															 			task_numa_group(p, last_cpupid, flags, &priv);
														
 
															 	}
														
 
															+	/*
														
 
															+	 * If a workload spans multiple NUMA nodes, a shared fault that
														
 
															+	 * occurs wholly within the set of nodes that the workload is
														
 
															+	 * actively using should be counted as local. This allows the
														
 
															+	 * scan rate to slow down when a workload has settled down.
														
 
															+	 */
														
 
															+	if (!priv && !local && p->numa_group &&
														
 
															+			node_isset(cpu_node, p->numa_group->active_nodes) &&
														
 
															+			node_isset(mem_node, p->numa_group->active_nodes))
														
 
															+		local = 1;
														
 
															+
														
 
															 	task_numa_placement(p);
														
 
															 	/*
														
@@ -1800,7 +1850,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 
															 	p->numa_faults_buffer_memory[task_faults_idx(mem_node, priv)] += pages;
														
 
															 	p->numa_faults_buffer_cpu[task_faults_idx(cpu_node, priv)] += pages;
														
 
															-	p->numa_faults_locality[!!(flags & TNF_FAULT_LOCAL)] += pages;
														
 
															+	p->numa_faults_locality[local] += pages;
														
 
															 }
														
 
															 static void reset_ptenuma_scan(struct task_struct *p)
														
@@ -3301,7 +3351,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
 
															 	}
														
 
															 	if (!se)
														
 
															-		rq->nr_running -= task_delta;
														
 
															+		sub_nr_running(rq, task_delta);
														
 
															 	cfs_rq->throttled = 1;
														
 
															 	cfs_rq->throttled_clock = rq_clock(rq);
														
@@ -3352,7 +3402,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 
															 	}
														
 
															 	if (!se)
														
 
															-		rq->nr_running += task_delta;
														
 
															+		add_nr_running(rq, task_delta);
														
 
															 	/* determine whether we need to wake up potentially idle cpu */
														
 
															 	if (rq->curr == rq->idle && rq->cfs.nr_running)
														
@@ -3884,7 +3934,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
															 	if (!se) {
														
 
															 		update_rq_runnable_avg(rq, rq->nr_running);
														
 
															-		inc_nr_running(rq);
														
 
															+		add_nr_running(rq, 1);
														
 
															 	}
														
 
															 	hrtick_update(rq);
														
 
															 }
														
@@ -3944,7 +3994,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
															 	}
														
 
															 	if (!se) {
														
 
															-		dec_nr_running(rq);
														
 
															+		sub_nr_running(rq, 1);
														
 
															 		update_rq_runnable_avg(rq, 1);
														
 
															 	}
														
 
															 	hrtick_update(rq);
														
@@ -4015,7 +4065,7 @@ static void record_wakee(struct task_struct *p)
 
															 	 * about the loss.
														
 
															 	 */
														
 
															 	if (jiffies > current->wakee_flip_decay_ts + HZ) {
														
 
															-		current->wakee_flips = 0;
														
 
															+		current->wakee_flips >>= 1;
														
 
															 		current->wakee_flip_decay_ts = jiffies;
														
 
															 	}
														
@@ -4449,10 +4499,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 
															 			sd = tmp;
														
 
															 	}
														
 
															-	if (affine_sd) {
														
 
															-		if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
														
 
															-			prev_cpu = cpu;
														
 
															+	if (affine_sd && cpu != prev_cpu && wake_affine(affine_sd, p, sync))
														
 
															+		prev_cpu = cpu;
														
 
															+	if (sd_flag & SD_BALANCE_WAKE) {
														
 
															 		new_cpu = select_idle_sibling(p, prev_cpu);
														
 
															 		goto unlock;
														
 
															 	}
														
@@ -4520,6 +4570,9 @@ migrate_task_rq_fair(struct task_struct *p, int next_cpu)
 
															 		atomic_long_add(se->avg.load_avg_contrib,
														
 
															 						&cfs_rq->removed_load);
														
 
															 	}
														
 
															+
														
 
															+	/* We have migrated, no longer consider this task hot */
														
 
															+	se->exec_start = 0;
														
 
															 }
														
 
															 #endif /* CONFIG_SMP */
														
@@ -5070,6 +5123,7 @@ task_hot(struct task_struct *p, u64 now)
 
															 /* Returns true if the destination node has incurred more faults */
														
 
															 static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
														
 
															 {
														
 
															+	struct numa_group *numa_group = rcu_dereference(p->numa_group);
														
 
															 	int src_nid, dst_nid;
														
 
															 	if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults_memory ||
														
@@ -5083,21 +5137,29 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
 
															 	if (src_nid == dst_nid)
														
 
															 		return false;
														
 
															-	/* Always encourage migration to the preferred node. */
														
 
															-	if (dst_nid == p->numa_preferred_nid)
														
 
															-		return true;
														
 
															+	if (numa_group) {
														
 
															+		/* Task is already in the group's interleave set. */
														
 
															+		if (node_isset(src_nid, numa_group->active_nodes))
														
 
															+			return false;
														
 
															+
														
 
															+		/* Task is moving into the group's interleave set. */
														
 
															+		if (node_isset(dst_nid, numa_group->active_nodes))
														
 
															+			return true;
														
 
															-	/* If both task and group weight improve, this move is a winner. */
														
 
															-	if (task_weight(p, dst_nid) > task_weight(p, src_nid) &&
														
 
															-	    group_weight(p, dst_nid) > group_weight(p, src_nid))
														
 
															+		return group_faults(p, dst_nid) > group_faults(p, src_nid);
														
 
															+	}
														
 
															+
														
 
															+	/* Encourage migration to the preferred node. */
														
 
															+	if (dst_nid == p->numa_preferred_nid)
														
 
															 		return true;
														
 
															-	return false;
														
 
															+	return task_faults(p, dst_nid) > task_faults(p, src_nid);
														
 
															 }
														
 
															 static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
														
 
															 {
														
 
															+	struct numa_group *numa_group = rcu_dereference(p->numa_group);
														
 
															 	int src_nid, dst_nid;
														
 
															 	if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER))
														
@@ -5112,16 +5174,23 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
 
															 	if (src_nid == dst_nid)
														
 
															 		return false;
														
 
															+	if (numa_group) {
														
 
															+		/* Task is moving within/into the group's interleave set. */
														
 
															+		if (node_isset(dst_nid, numa_group->active_nodes))
														
 
															+			return false;
														
 
															+
														
 
															+		/* Task is moving out of the group's interleave set. */
														
 
															+		if (node_isset(src_nid, numa_group->active_nodes))
														
 
															+			return true;
														
 
															+
														
 
															+		return group_faults(p, dst_nid) < group_faults(p, src_nid);
														
 
															+	}
														
 
															+
														
 
															 	/* Migrating away from the preferred node is always bad. */
														
 
															 	if (src_nid == p->numa_preferred_nid)
														
 
															 		return true;
														
 
															-	/* If either task or group weight get worse, don't do it. */
														
 
															-	if (task_weight(p, dst_nid) < task_weight(p, src_nid) ||
														
 
															-	    group_weight(p, dst_nid) < group_weight(p, src_nid))
														
 
															-		return true;
														
 
															-
														
 
															-	return false;
														
 
															+	return task_faults(p, dst_nid) < task_faults(p, src_nid);
														
 
															 }
														
 
															 #else
														
@@ -5564,6 +5633,7 @@ static unsigned long scale_rt_power(int cpu)
 
															 {
														
 
															 	struct rq *rq = cpu_rq(cpu);
														
 
															 	u64 total, available, age_stamp, avg;
														
 
															+	s64 delta;
														
 
															 	/*
														
 
															 	 * Since we're reading these variables without serialization make sure
														
@@ -5572,7 +5642,11 @@ static unsigned long scale_rt_power(int cpu)
 
															 	age_stamp = ACCESS_ONCE(rq->age_stamp);
														
 
															 	avg = ACCESS_ONCE(rq->rt_avg);
														
 
															-	total = sched_avg_period() + (rq_clock(rq) - age_stamp);
														
 
															+	delta = rq_clock(rq) - age_stamp;
														
 
															+	if (unlikely(delta < 0))
														
 
															+		delta = 0;
														
 
															+
														
 
															+	total = sched_avg_period() + delta;
														
 
															 	if (unlikely(total < avg)) {
														
 
															 		/* Ensures that power won't end up being negative */
														
@@ -6640,17 +6714,44 @@ out:
 
															 	return ld_moved;
														
 
															 }
														
 
															+static inline unsigned long
														
 
															+get_sd_balance_interval(struct sched_domain *sd, int cpu_busy)
														
 
															+{
														
 
															+	unsigned long interval = sd->balance_interval;
														
 
															+
														
 
															+	if (cpu_busy)
														
 
															+		interval *= sd->busy_factor;
														
 
															+
														
 
															+	/* scale ms to jiffies */
														
 
															+	interval = msecs_to_jiffies(interval);
														
 
															+	interval = clamp(interval, 1UL, max_load_balance_interval);
														
 
															+
														
 
															+	return interval;
														
 
															+}
														
 
															+
														
 
															+static inline void
														
 
															+update_next_balance(struct sched_domain *sd, int cpu_busy, unsigned long *next_balance)
														
 
															+{
														
 
															+	unsigned long interval, next;
														
 
															+
														
 
															+	interval = get_sd_balance_interval(sd, cpu_busy);
														
 
															+	next = sd->last_balance + interval;
														
 
															+
														
 
															+	if (time_after(*next_balance, next))
														
 
															+		*next_balance = next;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * idle_balance is called by schedule() if this_cpu is about to become
														
 
															  * idle. Attempts to pull tasks from other CPUs.
														
 
															  */
														
 
															 static int idle_balance(struct rq *this_rq)
														
 
															 {
														
 
															+	unsigned long next_balance = jiffies + HZ;
														
 
															+	int this_cpu = this_rq->cpu;
														
 
															 	struct sched_domain *sd;
														
 
															 	int pulled_task = 0;
														
 
															-	unsigned long next_balance = jiffies + HZ;
														
 
															 	u64 curr_cost = 0;
														
 
															-	int this_cpu = this_rq->cpu;
														
 
															 	idle_enter_fair(this_rq);
														
@@ -6660,8 +6761,15 @@ static int idle_balance(struct rq *this_rq)
 
															 	 */
														
 
															 	this_rq->idle_stamp = rq_clock(this_rq);
														
 
															-	if (this_rq->avg_idle < sysctl_sched_migration_cost)
														
 
															+	if (this_rq->avg_idle < sysctl_sched_migration_cost) {
														
 
															+		rcu_read_lock();
														
 
															+		sd = rcu_dereference_check_sched_domain(this_rq->sd);
														
 
															+		if (sd)
														
 
															+			update_next_balance(sd, 0, &next_balance);
														
 
															+		rcu_read_unlock();
														
 
															+
														
 
															 		goto out;
														
 
															+	}
														
 
															 	/*
														
 
															 	 * Drop the rq->lock, but keep IRQ/preempt disabled.
														
@@ -6671,20 +6779,20 @@ static int idle_balance(struct rq *this_rq)
 
															 	update_blocked_averages(this_cpu);
														
 
															 	rcu_read_lock();
														
 
															 	for_each_domain(this_cpu, sd) {
														
 
															-		unsigned long interval;
														
 
															 		int continue_balancing = 1;
														
 
															 		u64 t0, domain_cost;
														
 
															 		if (!(sd->flags & SD_LOAD_BALANCE))
														
 
															 			continue;
														
 
															-		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
														
 
															+		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) {
														
 
															+			update_next_balance(sd, 0, &next_balance);
														
 
															 			break;
														
 
															+		}
														
 
															 		if (sd->flags & SD_BALANCE_NEWIDLE) {
														
 
															 			t0 = sched_clock_cpu(this_cpu);
														
 
															-			/* If we've pulled tasks over stop searching: */
														
 
															 			pulled_task = load_balance(this_cpu, this_rq,
														
 
															 						   sd, CPU_NEWLY_IDLE,
														
 
															 						   &continue_balancing);
														
@@ -6696,10 +6804,13 @@ static int idle_balance(struct rq *this_rq)
 
															 			curr_cost += domain_cost;
														
 
															 		}
														
 
															-		interval = msecs_to_jiffies(sd->balance_interval);
														
 
															-		if (time_after(next_balance, sd->last_balance + interval))
														
 
															-			next_balance = sd->last_balance + interval;
														
 
															-		if (pulled_task)
														
 
															+		update_next_balance(sd, 0, &next_balance);
														
 
															+
														
 
															+		/*
														
 
															+		 * Stop searching for tasks to pull if there are
														
 
															+		 * now runnable tasks on this rq.
														
 
															+		 */
														
 
															+		if (pulled_task || this_rq->nr_running > 0)
														
 
															 			break;
														
 
															 	}
														
 
															 	rcu_read_unlock();
														
@@ -6717,20 +6828,13 @@ static int idle_balance(struct rq *this_rq)
 
															 	if (this_rq->cfs.h_nr_running && !pulled_task)
														
 
															 		pulled_task = 1;
														
 
															-	if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
														
 
															-		/*
														
 
															-		 * We are going idle. next_balance may be set based on
														
 
															-		 * a busy processor. So reset next_balance.
														
 
															-		 */
														
 
															+out:
														
 
															+	/* Move the next balance forward */
														
 
															+	if (time_after(this_rq->next_balance, next_balance))
														
 
															 		this_rq->next_balance = next_balance;
														
 
															-	}
														
 
															-out:
														
 
															 	/* Is there a task of a high priority class? */
														
 
															-	if (this_rq->nr_running != this_rq->cfs.h_nr_running &&
														
 
															-	    ((this_rq->stop && this_rq->stop->on_rq) ||
														
 
															-	     this_rq->dl.dl_nr_running ||
														
 
															-	     (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt))))
														
 
															+	if (this_rq->nr_running != this_rq->cfs.h_nr_running)
														
 
															 		pulled_task = -1;
														
 
															 	if (pulled_task) {
														
@@ -7011,16 +7115,9 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
 
															 			break;
														
 
															 		}
														
 
															-		interval = sd->balance_interval;
														
 
															-		if (idle != CPU_IDLE)
														
 
															-			interval *= sd->busy_factor;
														
 
															-
														
 
															-		/* scale ms to jiffies */
														
 
															-		interval = msecs_to_jiffies(interval);
														
 
															-		interval = clamp(interval, 1UL, max_load_balance_interval);
														
 
															+		interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
														
 
															 		need_serialize = sd->flags & SD_SERIALIZE;
														
 
															-
														
 
															 		if (need_serialize) {
														
 
															 			if (!spin_trylock(&balancing))
														
 
															 				goto out;
														
@@ -7036,6 +7133,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
 
															 				idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
														
 
															 			}
														
 
															 			sd->last_balance = jiffies;
														
 
															+			interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
														
 
															 		}
														
 
															 		if (need_serialize)
														
 
															 			spin_unlock(&balancing);
														
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -67,24 +67,21 @@ void __weak arch_cpu_idle(void)
 
															  * cpuidle_idle_call - the main idle function
														
 
															  *
														
 
															  * NOTE: no locks or semaphores should be used here
														
 
															- * return non-zero on failure
														
 
															  */
														
 
															-static int cpuidle_idle_call(void)
														
 
															+static void cpuidle_idle_call(void)
														
 
															 {
														
 
															 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
														
 
															 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
														
 
															-	int next_state, entered_state, ret;
														
 
															+	int next_state, entered_state;
														
 
															 	bool broadcast;
														
 
															 	/*
														
 
															 	 * Check if the idle task must be rescheduled. If it is the
														
 
															-	 * case, exit the function after re-enabling the local irq and
														
 
															-	 * set again the polling flag
														
 
															+	 * case, exit the function after re-enabling the local irq.
														
 
															 	 */
														
 
															-	if (current_clr_polling_and_test()) {
														
 
															+	if (need_resched()) {
														
 
															 		local_irq_enable();
														
 
															-		__current_set_polling();
														
 
															-		return 0;
														
 
															+		return;
														
 
															 	}
														
 
															 	/*
														
@@ -101,96 +98,79 @@ static int cpuidle_idle_call(void)
 
															 	rcu_idle_enter();
														
 
															 	/*
														
 
															-	 * Check if the cpuidle framework is ready, otherwise fallback
														
 
															-	 * to the default arch specific idle method
														
 
															+	 * Ask the cpuidle framework to choose a convenient idle state.
														
 
															+	 * Fall back to the default arch idle method on errors.
														
 
															 	 */
														
 
															-	ret = cpuidle_enabled(drv, dev);
														
 
															-
														
 
															-	if (!ret) {
														
 
															+	next_state = cpuidle_select(drv, dev);
														
 
															+	if (next_state < 0) {
														
 
															+use_default:
														
 
															 		/*
														
 
															-		 * Ask the governor to choose an idle state it thinks
														
 
															-		 * it is convenient to go to. There is *always* a
														
 
															-		 * convenient idle state
														
 
															+		 * We can't use the cpuidle framework, let's use the default
														
 
															+		 * idle routine.
														
 
															 		 */
														
 
															-		next_state = cpuidle_select(drv, dev);
														
 
															-
														
 
															-		/*
														
 
															-		 * The idle task must be scheduled, it is pointless to
														
 
															-		 * go to idle, just update no idle residency and get
														
 
															-		 * out of this function
														
 
															-		 */
														
 
															-		if (current_clr_polling_and_test()) {
														
 
															-			dev->last_residency = 0;
														
 
															-			entered_state = next_state;
														
 
															+		if (current_clr_polling_and_test())
														
 
															 			local_irq_enable();
														
 
															-		} else {
														
 
															-			broadcast = !!(drv->states[next_state].flags &
														
 
															-				       CPUIDLE_FLAG_TIMER_STOP);
														
 
															-
														
 
															-			if (broadcast)
														
 
															-				/*
														
 
															-				 * Tell the time framework to switch
														
 
															-				 * to a broadcast timer because our
														
 
															-				 * local timer will be shutdown. If a
														
 
															-				 * local timer is used from another
														
 
															-				 * cpu as a broadcast timer, this call
														
 
															-				 * may fail if it is not available
														
 
															-				 */
														
 
															-				ret = clockevents_notify(
														
 
															-					CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
														
 
															-					&dev->cpu);
														
 
															-
														
 
															-			if (!ret) {
														
 
															-				trace_cpu_idle_rcuidle(next_state, dev->cpu);
														
 
															-
														
 
															-				/*
														
 
															-				 * Enter the idle state previously
														
 
															-				 * returned by the governor
														
 
															-				 * decision. This function will block
														
 
															-				 * until an interrupt occurs and will
														
 
															-				 * take care of re-enabling the local
														
 
															-				 * interrupts
														
 
															-				 */
														
 
															-				entered_state = cpuidle_enter(drv, dev,
														
 
															-							      next_state);
														
 
															-
														
 
															-				trace_cpu_idle_rcuidle(PWR_EVENT_EXIT,
														
 
															-						       dev->cpu);
														
 
															-
														
 
															-				if (broadcast)
														
 
															-					clockevents_notify(
														
 
															-						CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
														
 
															-						&dev->cpu);
														
 
															-
														
 
															-				/*
														
 
															-				 * Give the governor an opportunity to reflect on the
														
 
															-				 * outcome
														
 
															-				 */
														
 
															-				cpuidle_reflect(dev, entered_state);
														
 
															-			}
														
 
															-		}
														
 
															+		else
														
 
															+			arch_cpu_idle();
														
 
															+
														
 
															+		goto exit_idle;
														
 
															 	}
														
 
															+
														
 
															 	/*
														
 
															-	 * We can't use the cpuidle framework, let's use the default
														
 
															-	 * idle routine
														
 
															+	 * The idle task must be scheduled, it is pointless to
														
 
															+	 * go to idle, just update no idle residency and get
														
 
															+	 * out of this function
														
 
															 	 */
														
 
															-	if (ret)
														
 
															-		arch_cpu_idle();
														
 
															+	if (current_clr_polling_and_test()) {
														
 
															+		dev->last_residency = 0;
														
 
															+		entered_state = next_state;
														
 
															+		local_irq_enable();
														
 
															+		goto exit_idle;
														
 
															+	}
														
 
															+
														
 
															+	broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP);
														
 
															+	/*
														
 
															+	 * Tell the time framework to switch to a broadcast timer
														
 
															+	 * because our local timer will be shutdown. If a local timer
														
 
															+	 * is used from another cpu as a broadcast timer, this call may
														
 
															+	 * fail if it is not available
														
 
															+	 */
														
 
															+	if (broadcast &&
														
 
															+	    clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu))
														
 
															+		goto use_default;
														
 
															+
														
 
															+	trace_cpu_idle_rcuidle(next_state, dev->cpu);
														
 
															+
														
 
															+	/*
														
 
															+	 * Enter the idle state previously returned by the governor decision.
														
 
															+	 * This function will block until an interrupt occurs and will take
														
 
															+	 * care of re-enabling the local interrupts
														
 
															+	 */
														
 
															+	entered_state = cpuidle_enter(drv, dev, next_state);
														
 
															+
														
 
															+	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
														
 
															+
														
 
															+	if (broadcast)
														
 
															+		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
														
 
															+
														
 
															+	/*
														
 
															+	 * Give the governor an opportunity to reflect on the outcome
														
 
															+	 */
														
 
															+	cpuidle_reflect(dev, entered_state);
														
 
															+
														
 
															+exit_idle:
														
 
															 	__current_set_polling();
														
 
															 	/*
														
 
															-	 * It is up to the idle functions to enable back the local
														
 
															-	 * interrupt
														
 
															+	 * It is up to the idle functions to reenable local interrupts
														
 
															 	 */
														
 
															 	if (WARN_ON_ONCE(irqs_disabled()))
														
 
															 		local_irq_enable();
														
 
															 	rcu_idle_exit();
														
 
															 	start_critical_timings();
														
 
															-
														
 
															-	return 0;
														
 
															 }
														
 
															 /*
														
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -79,6 +79,8 @@ void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 
															 	rt_rq->overloaded = 0;
														
 
															 	plist_head_init(&rt_rq->pushable_tasks);
														
 
															 #endif
														
 
															+	/* We start is dequeued state, because no RT tasks are queued */
														
 
															+	rt_rq->rt_queued = 0;
														
 
															 	rt_rq->rt_time = 0;
														
 
															 	rt_rq->rt_throttled = 0;
														
@@ -112,6 +114,13 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 
															 	return rt_se->rt_rq;
														
 
															 }
														
 
															+static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
														
 
															+{
														
 
															+	struct rt_rq *rt_rq = rt_se->rt_rq;
														
 
															+
														
 
															+	return rt_rq->rq;
														
 
															+}
														
 
															+
														
 
															 void free_rt_sched_group(struct task_group *tg)
														
 
															 {
														
 
															 	int i;
														
@@ -211,10 +220,16 @@ static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
 
															 	return container_of(rt_rq, struct rq, rt);
														
 
															 }
														
 
															-static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
														
 
															+static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
														
 
															 {
														
 
															 	struct task_struct *p = rt_task_of(rt_se);
														
 
															-	struct rq *rq = task_rq(p);
														
 
															+
														
 
															+	return task_rq(p);
														
 
															+}
														
 
															+
														
 
															+static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
														
 
															+{
														
 
															+	struct rq *rq = rq_of_rt_se(rt_se);
														
 
															 	return &rq->rt;
														
 
															 }
														
@@ -391,6 +406,9 @@ static inline void set_post_schedule(struct rq *rq)
 
															 }
														
 
															 #endif /* CONFIG_SMP */
														
 
															+static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
														
 
															+static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
														
 
															+
														
 
															 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
														
 
															 {
														
 
															 	return !list_empty(&rt_se->run_list);
														
@@ -452,8 +470,11 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 
															 	rt_se = rt_rq->tg->rt_se[cpu];
														
 
															 	if (rt_rq->rt_nr_running) {
														
 
															-		if (rt_se && !on_rt_rq(rt_se))
														
 
															+		if (!rt_se)
														
 
															+			enqueue_top_rt_rq(rt_rq);
														
 
															+		else if (!on_rt_rq(rt_se))
														
 
															 			enqueue_rt_entity(rt_se, false);
														
 
															+
														
 
															 		if (rt_rq->highest_prio.curr < curr->prio)
														
 
															 			resched_task(curr);
														
 
															 	}
														
@@ -466,10 +487,17 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 
															 	rt_se = rt_rq->tg->rt_se[cpu];
														
 
															-	if (rt_se && on_rt_rq(rt_se))
														
 
															+	if (!rt_se)
														
 
															+		dequeue_top_rt_rq(rt_rq);
														
 
															+	else if (on_rt_rq(rt_se))
														
 
															 		dequeue_rt_entity(rt_se);
														
 
															 }
														
 
															+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
														
 
															+{
														
 
															+	return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
														
 
															+}
														
 
															+
														
 
															 static int rt_se_boosted(struct sched_rt_entity *rt_se)
														
 
															 {
														
 
															 	struct rt_rq *rt_rq = group_rt_rq(rt_se);
														
@@ -532,12 +560,23 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 
															 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
														
 
															 {
														
 
															-	if (rt_rq->rt_nr_running)
														
 
															-		resched_task(rq_of_rt_rq(rt_rq)->curr);
														
 
															+	struct rq *rq = rq_of_rt_rq(rt_rq);
														
 
															+
														
 
															+	if (!rt_rq->rt_nr_running)
														
 
															+		return;
														
 
															+
														
 
															+	enqueue_top_rt_rq(rt_rq);
														
 
															+	resched_task(rq->curr);
														
 
															 }
														
 
															 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
														
 
															 {
														
 
															+	dequeue_top_rt_rq(rt_rq);
														
 
															+}
														
 
															+
														
 
															+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
														
 
															+{
														
 
															+	return rt_rq->rt_throttled;
														
 
															 }
														
 
															 static inline const struct cpumask *sched_rt_period_mask(void)
														
@@ -922,6 +961,38 @@ static void update_curr_rt(struct rq *rq)
 
															 	}
														
 
															 }
														
 
															+static void
														
 
															+dequeue_top_rt_rq(struct rt_rq *rt_rq)
														
 
															+{
														
 
															+	struct rq *rq = rq_of_rt_rq(rt_rq);
														
 
															+
														
 
															+	BUG_ON(&rq->rt != rt_rq);
														
 
															+
														
 
															+	if (!rt_rq->rt_queued)
														
 
															+		return;
														
 
															+
														
 
															+	BUG_ON(!rq->nr_running);
														
 
															+
														
 
															+	sub_nr_running(rq, rt_rq->rt_nr_running);
														
 
															+	rt_rq->rt_queued = 0;
														
 
															+}
														
 
															+
														
 
															+static void
														
 
															+enqueue_top_rt_rq(struct rt_rq *rt_rq)
														
 
															+{
														
 
															+	struct rq *rq = rq_of_rt_rq(rt_rq);
														
 
															+
														
 
															+	BUG_ON(&rq->rt != rt_rq);
														
 
															+
														
 
															+	if (rt_rq->rt_queued)
														
 
															+		return;
														
 
															+	if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running)
														
 
															+		return;
														
 
															+
														
 
															+	add_nr_running(rq, rt_rq->rt_nr_running);
														
 
															+	rt_rq->rt_queued = 1;
														
 
															+}
														
 
															+
														
 
															 #if defined CONFIG_SMP
														
 
															 static void
														
@@ -1044,13 +1115,24 @@ void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
 
															 #endif /* CONFIG_RT_GROUP_SCHED */
														
 
															+static inline
														
 
															+unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
														
 
															+{
														
 
															+	struct rt_rq *group_rq = group_rt_rq(rt_se);
														
 
															+
														
 
															+	if (group_rq)
														
 
															+		return group_rq->rt_nr_running;
														
 
															+	else
														
 
															+		return 1;
														
 
															+}
														
 
															+
														
 
															 static inline
														
 
															 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
														
 
															 {
														
 
															 	int prio = rt_se_prio(rt_se);
														
 
															 	WARN_ON(!rt_prio(prio));
														
 
															-	rt_rq->rt_nr_running++;
														
 
															+	rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
														
 
															 	inc_rt_prio(rt_rq, prio);
														
 
															 	inc_rt_migration(rt_se, rt_rq);
														
@@ -1062,7 +1144,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
															 {
														
 
															 	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
														
 
															 	WARN_ON(!rt_rq->rt_nr_running);
														
 
															-	rt_rq->rt_nr_running--;
														
 
															+	rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
														
 
															 	dec_rt_prio(rt_rq, rt_se_prio(rt_se));
														
 
															 	dec_rt_migration(rt_se, rt_rq);
														
@@ -1119,6 +1201,8 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
 
															 		back = rt_se;
														
 
															 	}
														
 
															+	dequeue_top_rt_rq(rt_rq_of_se(back));
														
 
															+
														
 
															 	for (rt_se = back; rt_se; rt_se = rt_se->back) {
														
 
															 		if (on_rt_rq(rt_se))
														
 
															 			__dequeue_rt_entity(rt_se);
														
@@ -1127,13 +1211,18 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
 
															 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
														
 
															 {
														
 
															+	struct rq *rq = rq_of_rt_se(rt_se);
														
 
															+
														
 
															 	dequeue_rt_stack(rt_se);
														
 
															 	for_each_sched_rt_entity(rt_se)
														
 
															 		__enqueue_rt_entity(rt_se, head);
														
 
															+	enqueue_top_rt_rq(&rq->rt);
														
 
															 }
														
 
															 static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
														
 
															 {
														
 
															+	struct rq *rq = rq_of_rt_se(rt_se);
														
 
															+
														
 
															 	dequeue_rt_stack(rt_se);
														
 
															 	for_each_sched_rt_entity(rt_se) {
														
@@ -1142,6 +1231,7 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
 
															 		if (rt_rq && rt_rq->rt_nr_running)
														
 
															 			__enqueue_rt_entity(rt_se, false);
														
 
															 	}
														
 
															+	enqueue_top_rt_rq(&rq->rt);
														
 
															 }
														
 
															 /*
														
@@ -1159,8 +1249,6 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
															 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
														
 
															 		enqueue_pushable_task(rq, p);
														
 
															-
														
 
															-	inc_nr_running(rq);
														
 
															 }
														
 
															 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
														
@@ -1171,8 +1259,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
															 	dequeue_rt_entity(rt_se);
														
 
															 	dequeue_pushable_task(rq, p);
														
 
															-
														
 
															-	dec_nr_running(rq);
														
 
															 }
														
 
															 /*
														
@@ -1377,10 +1463,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
 
															 	if (prev->sched_class == &rt_sched_class)
														
 
															 		update_curr_rt(rq);
														
 
															-	if (!rt_rq->rt_nr_running)
														
 
															-		return NULL;
														
 
															-
														
 
															-	if (rt_rq_throttled(rt_rq))
														
 
															+	if (!rt_rq->rt_queued)
														
 
															 		return NULL;
														
 
															 	put_prev_task(rq, prev);
														
@@ -1892,9 +1975,9 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 
															 	 */
														
 
															 	if (p->on_rq && rq->curr != p) {
														
 
															 #ifdef CONFIG_SMP
														
 
															-		if (rq->rt.overloaded && push_rt_task(rq) &&
														
 
															+		if (p->nr_cpus_allowed > 1 && rq->rt.overloaded &&
														
 
															 		    /* Don't resched if we changed runqueues */
														
 
															-		    rq != task_rq(p))
														
 
															+		    push_rt_task(rq) && rq != task_rq(p))
														
 
															 			check_resched = 0;
														
 
															 #endif /* CONFIG_SMP */
														
 
															 		if (check_resched && p->prio < rq->curr->prio)
														
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -409,6 +409,8 @@ struct rt_rq {
 
															 	int overloaded;
														
 
															 	struct plist_head pushable_tasks;
														
 
															 #endif
														
 
															+	int rt_queued;
														
 
															+
														
 
															 	int rt_throttled;
														
 
															 	u64 rt_time;
														
 
															 	u64 rt_runtime;
														
@@ -423,18 +425,6 @@ struct rt_rq {
 
															 #endif
														
 
															 };
														
 
															-#ifdef CONFIG_RT_GROUP_SCHED
														
 
															-static inline int rt_rq_throttled(struct rt_rq *rt_rq)
														
 
															-{
														
 
															-	return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
														
 
															-}
														
 
															-#else
														
 
															-static inline int rt_rq_throttled(struct rt_rq *rt_rq)
														
 
															-{
														
 
															-	return rt_rq->rt_throttled;
														
 
															-}
														
 
															-#endif
														
 
															-
														
 
															 /* Deadline class' related fields in a runqueue */
														
 
															 struct dl_rq {
														
 
															 	/* runqueue is an rbtree, ordered by deadline */
														
@@ -1216,12 +1206,14 @@ extern void update_idle_cpu_load(struct rq *this_rq);
 
															 extern void init_task_runnable_average(struct task_struct *p);
														
 
															-static inline void inc_nr_running(struct rq *rq)
														
 
															+static inline void add_nr_running(struct rq *rq, unsigned count)
														
 
															 {
														
 
															-	rq->nr_running++;
														
 
															+	unsigned prev_nr = rq->nr_running;
														
 
															+
														
 
															+	rq->nr_running = prev_nr + count;
														
 
															 #ifdef CONFIG_NO_HZ_FULL
														
 
															-	if (rq->nr_running == 2) {
														
 
															+	if (prev_nr < 2 && rq->nr_running >= 2) {
														
 
															 		if (tick_nohz_full_cpu(rq->cpu)) {
														
 
															 			/* Order rq->nr_running write against the IPI */
														
 
															 			smp_wmb();
														
@@ -1231,9 +1223,9 @@ static inline void inc_nr_running(struct rq *rq)
 
															 #endif
														
 
															 }
														
 
															-static inline void dec_nr_running(struct rq *rq)
														
 
															+static inline void sub_nr_running(struct rq *rq, unsigned count)
														
 
															 {
														
 
															-	rq->nr_running--;
														
 
															+	rq->nr_running -= count;
														
 
															 }
														
 
															 static inline void rq_last_tick_reset(struct rq *rq)
														
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -41,13 +41,13 @@ pick_next_task_stop(struct rq *rq, struct task_struct *prev)
 
															 static void
														
 
															 enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
														
 
															 {
														
 
															-	inc_nr_running(rq);
														
 
															+	add_nr_running(rq, 1);
														
 
															 }
														
 
															 static void
														
 
															 dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
														
 
															 {
														
 
															-	dec_nr_running(rq);
														
 
															+	sub_nr_running(rq, 1);
														
 
															 }
														
 
															 static void yield_task_stop(struct rq *rq)
														
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -250,7 +250,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
 
															 			else
														
 
															 				p = current;
														
 
															 			if (p) {
														
 
															-				niceval = 20 - task_nice(p);
														
 
															+				niceval = nice_to_rlimit(task_nice(p));
														
 
															 				if (niceval > retval)
														
 
															 					retval = niceval;
														
 
															 			}
														
@@ -261,7 +261,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
 
															 			else
														
 
															 				pgrp = task_pgrp(current);
														
 
															 			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
														
 
															-				niceval = 20 - task_nice(p);
														
 
															+				niceval = nice_to_rlimit(task_nice(p));
														
 
															 				if (niceval > retval)
														
 
															 					retval = niceval;
														
 
															 			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
														
@@ -277,7 +277,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
 
															 			do_each_thread(g, p) {
														
 
															 				if (uid_eq(task_uid(p), uid)) {
														
 
															-					niceval = 20 - task_nice(p);
														
 
															+					niceval = nice_to_rlimit(task_nice(p));
														
 
															 					if (niceval > retval)
														
 
															 						retval = niceval;
														
 
															 				}
														
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -100,10 +100,10 @@ enum {
 
															 	/*
														
 
															 	 * Rescue workers are used only on emergencies and shared by
														
 
															-	 * all cpus.  Give -20.
														
 
															+	 * all cpus.  Give MIN_NICE.
														
 
															 	 */
														
 
															-	RESCUER_NICE_LEVEL	= -20,
														
 
															-	HIGHPRI_NICE_LEVEL	= -20,
														
 
															+	RESCUER_NICE_LEVEL	= MIN_NICE,
														
 
															+	HIGHPRI_NICE_LEVEL	= MIN_NICE,
														
 
															 	WQ_NAME_LEN		= 24,
														
 
															 };
														
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2740,7 +2740,7 @@ static int khugepaged(void *none)
 
															 	struct mm_slot *mm_slot;
														
 
															 	set_freezable();
														
 
															-	set_user_nice(current, 19);
														
 
															+	set_user_nice(current, MAX_NICE);
														
 
															 	while (!kthread_should_stop()) {
														
 
															 		khugepaged_do_scan();
														
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3920,9 +3920,6 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
															 		}
														
 
															 	}
														
 
															-	/* THP should already have been handled */
														
 
															-	BUG_ON(pmd_numa(*pmd));
														
 
															-
														
 
															 	/*
														
 
															 	 * Use __pte_alloc instead of pte_alloc_map, because we can't
														
 
															 	 * run pte_offset_map on the pmd, if an huge pmd could