9 лет назад · d4e796152a
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3532,6 +3532,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
				 
			
 
				 	sched_debug	[KNL] Enables verbose scheduler debug messages.
			
 
				 
			
 
				+	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
			
 
				+			Allowed values are enable and disable. This feature
			
 
				+			incurs a small amount of overhead in the scheduler
			
 
				+			but is useful for debugging and performance tuning.
			
 
				+
			
 
				 	skew_tick=	[KNL] Offset the periodic timer tick per cpu to mitigate
			
 
				 			xtime_lock contention on larger systems, and/or RCU lock
			
 
				 			contention on all systems with CONFIG_MAXSMP set.
			
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -773,6 +773,14 @@ rtsig-nr shows the number of RT signals currently queued.
 
				 
			
 
				 ==============================================================
			
 
				 
			
 
				+sched_schedstats:
			
 
				+
			
 
				+Enables/disables scheduler statistics. Enabling this feature
			
 
				+incurs a small amount of overhead in the scheduler but is
			
 
				+useful for debugging and performance tuning.
			
 
				+
			
 
				+==============================================================
			
 
				+
			
 
				 sg-big-buff:
			
 
				 
			
 
				 This file shows the size of the generic SCSI (sg) buffer.
			
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -506,18 +506,18 @@ static void kvm_arm_resume_guest(struct kvm *kvm)
 
				 	struct kvm_vcpu *vcpu;
			
 
				 
			
 
				 	kvm_for_each_vcpu(i, vcpu, kvm) {
			
 
				-		wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
			
 
				+		struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
			
 
				 
			
 
				 		vcpu->arch.pause = false;
			
 
				-		wake_up_interruptible(wq);
			
 
				+		swake_up(wq);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 static void vcpu_sleep(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
			
 
				+	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
			
 
				 
			
 
				-	wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
			
 
				+	swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
			
 
				 				       (!vcpu->arch.pause)));
			
 
				 }
			
 
				 
			
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -70,7 +70,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 
				 {
			
 
				 	struct kvm *kvm = source_vcpu->kvm;
			
 
				 	struct kvm_vcpu *vcpu = NULL;
			
 
				-	wait_queue_head_t *wq;
			
 
				+	struct swait_queue_head *wq;
			
 
				 	unsigned long cpu_id;
			
 
				 	unsigned long context_id;
			
 
				 	phys_addr_t target_pc;
			
@@ -119,7 +119,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 
				 	smp_mb();		/* Make sure the above is visible */
			
 
				 
			
 
				 	wq = kvm_arch_vcpu_wq(vcpu);
			
 
				-	wake_up_interruptible(wq);
			
 
				+	swake_up(wq);
			
 
				 
			
 
				 	return PSCI_RET_SUCCESS;
			
 
				 }
			
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -445,8 +445,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 
				 
			
 
				 	dvcpu->arch.wait = 0;
			
 
				 
			
 
				-	if (waitqueue_active(&dvcpu->wq))
			
 
				-		wake_up_interruptible(&dvcpu->wq);
			
 
				+	if (swait_active(&dvcpu->wq))
			
 
				+		swake_up(&dvcpu->wq);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -1174,8 +1174,8 @@ static void kvm_mips_comparecount_func(unsigned long data)
 
				 	kvm_mips_callbacks->queue_timer_int(vcpu);
			
 
				 
			
 
				 	vcpu->arch.wait = 0;
			
 
				-	if (waitqueue_active(&vcpu->wq))
			
 
				-		wake_up_interruptible(&vcpu->wq);
			
 
				+	if (swait_active(&vcpu->wq))
			
 
				+		swake_up(&vcpu->wq);
			
 
				 }
			
 
				 
			
 
				 /* low level hrtimer wake routine */
			
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -289,7 +289,7 @@ struct kvmppc_vcore {
 
				 	struct list_head runnable_threads;
			
 
				 	struct list_head preempt_list;
			
 
				 	spinlock_t lock;
			
 
				-	wait_queue_head_t wq;
			
 
				+	struct swait_queue_head wq;
			
 
				 	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
			
 
				 	u64 stolen_tb;
			
 
				 	u64 preempt_tb;
			
@@ -629,7 +629,7 @@ struct kvm_vcpu_arch {
 
				 	u8 prodded;
			
 
				 	u32 last_inst;
			
 
				 
			
 
				-	wait_queue_head_t *wqp;
			
 
				+	struct swait_queue_head *wqp;
			
 
				 	struct kvmppc_vcore *vcore;
			
 
				 	int ret;
			
 
				 	int trap;
			
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -114,11 +114,11 @@ static bool kvmppc_ipi_thread(int cpu)
 
				 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	int cpu;
			
 
				-	wait_queue_head_t *wqp;
			
 
				+	struct swait_queue_head *wqp;
			
 
				 
			
 
				 	wqp = kvm_arch_vcpu_wq(vcpu);
			
 
				-	if (waitqueue_active(wqp)) {
			
 
				-		wake_up_interruptible(wqp);
			
 
				+	if (swait_active(wqp)) {
			
 
				+		swake_up(wqp);
			
 
				 		++vcpu->stat.halt_wakeup;
			
 
				 	}
			
 
				 
			
@@ -701,8 +701,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
				 		tvcpu->arch.prodded = 1;
			
 
				 		smp_mb();
			
 
				 		if (vcpu->arch.ceded) {
			
 
				-			if (waitqueue_active(&vcpu->wq)) {
			
 
				-				wake_up_interruptible(&vcpu->wq);
			
 
				+			if (swait_active(&vcpu->wq)) {
			
 
				+				swake_up(&vcpu->wq);
			
 
				 				vcpu->stat.halt_wakeup++;
			
 
				 			}
			
 
				 		}
			
@@ -1459,7 +1459,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 
				 	INIT_LIST_HEAD(&vcore->runnable_threads);
			
 
				 	spin_lock_init(&vcore->lock);
			
 
				 	spin_lock_init(&vcore->stoltb_lock);
			
 
				-	init_waitqueue_head(&vcore->wq);
			
 
				+	init_swait_queue_head(&vcore->wq);
			
 
				 	vcore->preempt_tb = TB_NIL;
			
 
				 	vcore->lpcr = kvm->arch.lpcr;
			
 
				 	vcore->first_vcpuid = core * threads_per_subcore;
			
@@ -2531,10 +2531,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 
				 {
			
 
				 	struct kvm_vcpu *vcpu;
			
 
				 	int do_sleep = 1;
			
 
				+	DECLARE_SWAITQUEUE(wait);
			
 
				 
			
 
				-	DEFINE_WAIT(wait);
			
 
				-
			
 
				-	prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
			
 
				+	prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
			
 
				 
			
 
				 	/*
			
 
				 	 * Check one last time for pending exceptions and ceded state after
			
@@ -2548,7 +2547,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 
				 	}
			
 
				 
			
 
				 	if (!do_sleep) {
			
 
				-		finish_wait(&vc->wq, &wait);
			
 
				+		finish_swait(&vc->wq, &wait);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
@@ -2556,7 +2555,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 
				 	trace_kvmppc_vcore_blocked(vc, 0);
			
 
				 	spin_unlock(&vc->lock);
			
 
				 	schedule();
			
 
				-	finish_wait(&vc->wq, &wait);
			
 
				+	finish_swait(&vc->wq, &wait);
			
 
				 	spin_lock(&vc->lock);
			
 
				 	vc->vcore_state = VCORE_INACTIVE;
			
 
				 	trace_kvmppc_vcore_blocked(vc, 1);
			
@@ -2612,7 +2611,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 			kvmppc_start_thread(vcpu, vc);
			
 
				 			trace_kvm_guest_enter(vcpu);
			
 
				 		} else if (vc->vcore_state == VCORE_SLEEPING) {
			
 
				-			wake_up(&vc->wq);
			
 
				+			swake_up(&vc->wq);
			
 
				 		}
			
 
				 
			
 
				 	}
			
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -467,7 +467,7 @@ struct kvm_s390_irq_payload {
 
				 struct kvm_s390_local_interrupt {
			
 
				 	spinlock_t lock;
			
 
				 	struct kvm_s390_float_interrupt *float_int;
			
 
				-	wait_queue_head_t *wq;
			
 
				+	struct swait_queue_head *wq;
			
 
				 	atomic_t *cpuflags;
			
 
				 	DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
			
 
				 	struct kvm_s390_irq_payload irq;
			
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -966,13 +966,13 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 
				 
			
 
				 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	if (waitqueue_active(&vcpu->wq)) {
			
 
				+	if (swait_active(&vcpu->wq)) {
			
 
				 		/*
			
 
				 		 * The vcpu gave up the cpu voluntarily, mark it as a good
			
 
				 		 * yield-candidate.
			
 
				 		 */
			
 
				 		vcpu->preempted = true;
			
 
				-		wake_up_interruptible(&vcpu->wq);
			
 
				+		swake_up(&vcpu->wq);
			
 
				 		vcpu->stat.halt_wakeup++;
			
 
				 	}
			
 
				 }
			
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1195,7 +1195,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
 
				 static void apic_timer_expired(struct kvm_lapic *apic)
			
 
				 {
			
 
				 	struct kvm_vcpu *vcpu = apic->vcpu;
			
 
				-	wait_queue_head_t *q = &vcpu->wq;
			
 
				+	struct swait_queue_head *q = &vcpu->wq;
			
 
				 	struct kvm_timer *ktimer = &apic->lapic_timer;
			
 
				 
			
 
				 	if (atomic_read(&apic->lapic_timer.pending))
			
@@ -1204,8 +1204,8 @@ static void apic_timer_expired(struct kvm_lapic *apic)
 
				 	atomic_inc(&apic->lapic_timer.pending);
			
 
				 	kvm_set_pending_timer(vcpu);
			
 
				 
			
 
				-	if (waitqueue_active(q))
			
 
				-		wake_up_interruptible(q);
			
 
				+	if (swait_active(q))
			
 
				+		swake_up(q);
			
 
				 
			
 
				 	if (apic_lvtt_tscdeadline(apic))
			
 
				 		ktimer->expired_tscdeadline = ktimer->tscdeadline;
			
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -713,6 +713,18 @@ static inline void __ftrace_enabled_restore(int enabled)
 
				 #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
			
 
				 #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
			
 
				 
			
 
				+static inline unsigned long get_lock_parent_ip(void)
			
 
				+{
			
 
				+	unsigned long addr = CALLER_ADDR0;
			
 
				+
			
 
				+	if (!in_lock_functions(addr))
			
 
				+		return addr;
			
 
				+	addr = CALLER_ADDR1;
			
 
				+	if (!in_lock_functions(addr))
			
 
				+		return addr;
			
 
				+	return CALLER_ADDR2;
			
 
				+}
			
 
				+
			
 
				 #ifdef CONFIG_IRQSOFF_TRACER
			
 
				   extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
			
 
				   extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
			
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -25,6 +25,7 @@
 
				 #include <linux/irqflags.h>
			
 
				 #include <linux/context_tracking.h>
			
 
				 #include <linux/irqbypass.h>
			
 
				+#include <linux/swait.h>
			
 
				 #include <asm/signal.h>
			
 
				 
			
 
				 #include <linux/kvm.h>
			
@@ -218,7 +219,7 @@ struct kvm_vcpu {
 
				 	int fpu_active;
			
 
				 	int guest_fpu_loaded, guest_xcr0_loaded;
			
 
				 	unsigned char fpu_counter;
			
 
				-	wait_queue_head_t wq;
			
 
				+	struct swait_queue_head wq;
			
 
				 	struct pid *pid;
			
 
				 	int sigset_active;
			
 
				 	sigset_t sigset;
			
@@ -782,7 +783,7 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
			
 
				+static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 #ifdef __KVM_HAVE_ARCH_WQP
			
 
				 	return vcpu->arch.wqp;
			
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h
@@ -37,6 +37,9 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter)
 
				 
			
 
				 void clear_all_latency_tracing(struct task_struct *p);
			
 
				 
			
 
				+extern int sysctl_latencytop(struct ctl_table *table, int write,
			
 
				+			void __user *buffer, size_t *lenp, loff_t *ppos);
			
 
				+
			
 
				 #else
			
 
				 
			
 
				 static inline void
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -182,8 +182,6 @@ extern void update_cpu_load_nohz(int active);
 
				 static inline void update_cpu_load_nohz(int active) { }
			
 
				 #endif
			
 
				 
			
 
				-extern unsigned long get_parent_ip(unsigned long addr);
			
 
				-
			
 
				 extern void dump_cpu_task(int cpu);
			
 
				 
			
 
				 struct seq_file;
			
@@ -920,6 +918,10 @@ static inline int sched_info_on(void)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_SCHEDSTATS
			
 
				+void force_schedstat_enabled(void);
			
 
				+#endif
			
 
				+
			
 
				 enum cpu_idle_type {
			
 
				 	CPU_IDLE,
			
 
				 	CPU_NOT_IDLE,
			
@@ -1289,6 +1291,8 @@ struct sched_rt_entity {
 
				 	unsigned long timeout;
			
 
				 	unsigned long watchdog_stamp;
			
 
				 	unsigned int time_slice;
			
 
				+	unsigned short on_rq;
			
 
				+	unsigned short on_list;
			
 
				 
			
 
				 	struct sched_rt_entity *back;
			
 
				 #ifdef CONFIG_RT_GROUP_SCHED
			
@@ -1329,10 +1333,6 @@ struct sched_dl_entity {
 
				 	 * task has to wait for a replenishment to be performed at the
			
 
				 	 * next firing of dl_timer.
			
 
				 	 *
			
 
				-	 * @dl_new tells if a new instance arrived. If so we must
			
 
				-	 * start executing it with full runtime and reset its absolute
			
 
				-	 * deadline;
			
 
				-	 *
			
 
				 	 * @dl_boosted tells if we are boosted due to DI. If so we are
			
 
				 	 * outside bandwidth enforcement mechanism (but only until we
			
 
				 	 * exit the critical section);
			
@@ -1340,7 +1340,7 @@ struct sched_dl_entity {
 
				 	 * @dl_yielded tells if task gave up the cpu before consuming
			
 
				 	 * all its available runtime during the last job.
			
 
				 	 */
			
 
				-	int dl_throttled, dl_new, dl_boosted, dl_yielded;
			
 
				+	int dl_throttled, dl_boosted, dl_yielded;
			
 
				 
			
 
				 	/*
			
 
				 	 * Bandwidth enforcement timer. Each -deadline task has its
			
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -95,4 +95,8 @@ extern int sysctl_numa_balancing(struct ctl_table *table, int write,
 
				 				 void __user *buffer, size_t *lenp,
			
 
				 				 loff_t *ppos);
			
 
				 
			
 
				+extern int sysctl_schedstats(struct ctl_table *table, int write,
			
 
				+				 void __user *buffer, size_t *lenp,
			
 
				+				 loff_t *ppos);
			
 
				+
			
 
				 #endif /* _SCHED_SYSCTL_H */
			
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -0,0 +1,172 @@
 
				+#ifndef _LINUX_SWAIT_H
			
 
				+#define _LINUX_SWAIT_H
			
 
				+
			
 
				+#include <linux/list.h>
			
 
				+#include <linux/stddef.h>
			
 
				+#include <linux/spinlock.h>
			
 
				+#include <asm/current.h>
			
 
				+
			
 
				+/*
			
 
				+ * Simple wait queues
			
 
				+ *
			
 
				+ * While these are very similar to the other/complex wait queues (wait.h) the
			
 
				+ * most important difference is that the simple waitqueue allows for
			
 
				+ * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
			
 
				+ * times.
			
 
				+ *
			
 
				+ * In order to make this so, we had to drop a fair number of features of the
			
 
				+ * other waitqueue code; notably:
			
 
				+ *
			
 
				+ *  - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue;
			
 
				+ *    all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
			
 
				+ *    sleeper state.
			
 
				+ *
			
 
				+ *  - the exclusive mode; because this requires preserving the list order
			
 
				+ *    and this is hard.
			
 
				+ *
			
 
				+ *  - custom wake functions; because you cannot give any guarantees about
			
 
				+ *    random code.
			
 
				+ *
			
 
				+ * As a side effect of this; the data structures are slimmer.
			
 
				+ *
			
 
				+ * One would recommend using this wait queue where possible.
			
 
				+ */
			
 
				+
			
 
				+struct task_struct;
			
 
				+
			
 
				+struct swait_queue_head {
			
 
				+	raw_spinlock_t		lock;
			
 
				+	struct list_head	task_list;
			
 
				+};
			
 
				+
			
 
				+struct swait_queue {
			
 
				+	struct task_struct	*task;
			
 
				+	struct list_head	task_list;
			
 
				+};
			
 
				+
			
 
				+#define __SWAITQUEUE_INITIALIZER(name) {				\
			
 
				+	.task		= current,					\
			
 
				+	.task_list	= LIST_HEAD_INIT((name).task_list),		\
			
 
				+}
			
 
				+
			
 
				+#define DECLARE_SWAITQUEUE(name)					\
			
 
				+	struct swait_queue name = __SWAITQUEUE_INITIALIZER(name)
			
 
				+
			
 
				+#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) {				\
			
 
				+	.lock		= __RAW_SPIN_LOCK_UNLOCKED(name.lock),		\
			
 
				+	.task_list	= LIST_HEAD_INIT((name).task_list),		\
			
 
				+}
			
 
				+
			
 
				+#define DECLARE_SWAIT_QUEUE_HEAD(name)					\
			
 
				+	struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name)
			
 
				+
			
 
				+extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
			
 
				+				    struct lock_class_key *key);
			
 
				+
			
 
				+#define init_swait_queue_head(q)				\
			
 
				+	do {							\
			
 
				+		static struct lock_class_key __key;		\
			
 
				+		__init_swait_queue_head((q), #q, &__key);	\
			
 
				+	} while (0)
			
 
				+
			
 
				+#ifdef CONFIG_LOCKDEP
			
 
				+# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)			\
			
 
				+	({ init_swait_queue_head(&name); name; })
			
 
				+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)			\
			
 
				+	struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)
			
 
				+#else
			
 
				+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)			\
			
 
				+	DECLARE_SWAIT_QUEUE_HEAD(name)
			
 
				+#endif
			
 
				+
			
 
				+static inline int swait_active(struct swait_queue_head *q)
			
 
				+{
			
 
				+	return !list_empty(&q->task_list);
			
 
				+}
			
 
				+
			
 
				+extern void swake_up(struct swait_queue_head *q);
			
 
				+extern void swake_up_all(struct swait_queue_head *q);
			
 
				+extern void swake_up_locked(struct swait_queue_head *q);
			
 
				+
			
 
				+extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
			
 
				+extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
			
 
				+extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
			
 
				+
			
 
				+extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
			
 
				+extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
			
 
				+
			
 
				+/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
			
 
				+#define ___swait_event(wq, condition, state, ret, cmd)			\
			
 
				+({									\
			
 
				+	struct swait_queue __wait;					\
			
 
				+	long __ret = ret;						\
			
 
				+									\
			
 
				+	INIT_LIST_HEAD(&__wait.task_list);				\
			
 
				+	for (;;) {							\
			
 
				+		long __int = prepare_to_swait_event(&wq, &__wait, state);\
			
 
				+									\
			
 
				+		if (condition)						\
			
 
				+			break;						\
			
 
				+									\
			
 
				+		if (___wait_is_interruptible(state) && __int) {		\
			
 
				+			__ret = __int;					\
			
 
				+			break;						\
			
 
				+		}							\
			
 
				+									\
			
 
				+		cmd;							\
			
 
				+	}								\
			
 
				+	finish_swait(&wq, &__wait);					\
			
 
				+	__ret;								\
			
 
				+})
			
 
				+
			
 
				+#define __swait_event(wq, condition)					\
			
 
				+	(void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,	\
			
 
				+			    schedule())
			
 
				+
			
 
				+#define swait_event(wq, condition)					\
			
 
				+do {									\
			
 
				+	if (condition)							\
			
 
				+		break;							\
			
 
				+	__swait_event(wq, condition);					\
			
 
				+} while (0)
			
 
				+
			
 
				+#define __swait_event_timeout(wq, condition, timeout)			\
			
 
				+	___swait_event(wq, ___wait_cond_timeout(condition),		\
			
 
				+		      TASK_UNINTERRUPTIBLE, timeout,			\
			
 
				+		      __ret = schedule_timeout(__ret))
			
 
				+
			
 
				+#define swait_event_timeout(wq, condition, timeout)			\
			
 
				+({									\
			
 
				+	long __ret = timeout;						\
			
 
				+	if (!___wait_cond_timeout(condition))				\
			
 
				+		__ret = __swait_event_timeout(wq, condition, timeout);	\
			
 
				+	__ret;								\
			
 
				+})
			
 
				+
			
 
				+#define __swait_event_interruptible(wq, condition)			\
			
 
				+	___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0,		\
			
 
				+		      schedule())
			
 
				+
			
 
				+#define swait_event_interruptible(wq, condition)			\
			
 
				+({									\
			
 
				+	int __ret = 0;							\
			
 
				+	if (!(condition))						\
			
 
				+		__ret = __swait_event_interruptible(wq, condition);	\
			
 
				+	__ret;								\
			
 
				+})
			
 
				+
			
 
				+#define __swait_event_interruptible_timeout(wq, condition, timeout)	\
			
 
				+	___swait_event(wq, ___wait_cond_timeout(condition),		\
			
 
				+		      TASK_INTERRUPTIBLE, timeout,			\
			
 
				+		      __ret = schedule_timeout(__ret))
			
 
				+
			
 
				+#define swait_event_interruptible_timeout(wq, condition, timeout)	\
			
 
				+({									\
			
 
				+	long __ret = timeout;						\
			
 
				+	if (!___wait_cond_timeout(condition))				\
			
 
				+		__ret = __swait_event_interruptible_timeout(wq,		\
			
 
				+						condition, timeout);	\
			
 
				+	__ret;								\
			
 
				+})
			
 
				+
			
 
				+#endif /* _LINUX_SWAIT_H */
			
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -338,7 +338,7 @@ do {									\
 
				 			    schedule(); try_to_freeze())
			
 
				 
			
 
				 /**
			
 
				- * wait_event - sleep (or freeze) until a condition gets true
			
 
				+ * wait_event_freezable - sleep (or freeze) until a condition gets true
			
 
				  * @wq: the waitqueue to wait on
			
 
				  * @condition: a C expression for the event to wait for
			
 
				  *
			
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -47,12 +47,12 @@
 
				  * of times)
			
 
				  */
			
 
				 
			
 
				-#include <linux/latencytop.h>
			
 
				 #include <linux/kallsyms.h>
			
 
				 #include <linux/seq_file.h>
			
 
				 #include <linux/notifier.h>
			
 
				 #include <linux/spinlock.h>
			
 
				 #include <linux/proc_fs.h>
			
 
				+#include <linux/latencytop.h>
			
 
				 #include <linux/export.h>
			
 
				 #include <linux/sched.h>
			
 
				 #include <linux/list.h>
			
@@ -289,4 +289,16 @@ static int __init init_lstats_procfs(void)
 
				 	proc_create("latency_stats", 0644, NULL, &lstats_fops);
			
 
				 	return 0;
			
 
				 }
			
 
				+
			
 
				+int sysctl_latencytop(struct ctl_table *table, int write,
			
 
				+			void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				+{
			
 
				+	int err;
			
 
				+
			
 
				+	err = proc_dointvec(table, write, buffer, lenp, ppos);
			
 
				+	if (latencytop_enabled)
			
 
				+		force_schedstat_enabled();
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				 device_initcall(init_lstats_procfs);
			
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -59,6 +59,7 @@ int profile_setup(char *str)
 
				 
			
 
				 	if (!strncmp(str, sleepstr, strlen(sleepstr))) {
			
 
				 #ifdef CONFIG_SCHEDSTATS
			
 
				+		force_schedstat_enabled();
			
 
				 		prof_on = SLEEP_PROFILING;
			
 
				 		if (str[strlen(sleepstr)] == ',')
			
 
				 			str += strlen(sleepstr) + 1;
			
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1614,7 +1614,6 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 
				 	int needmore;
			
 
				 	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
			
 
				 
			
 
				-	rcu_nocb_gp_cleanup(rsp, rnp);
			
 
				 	rnp->need_future_gp[c & 0x1] = 0;
			
 
				 	needmore = rnp->need_future_gp[(c + 1) & 0x1];
			
 
				 	trace_rcu_future_gp(rnp, rdp, c,
			
@@ -1635,7 +1634,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
 
				 	    !READ_ONCE(rsp->gp_flags) ||
			
 
				 	    !rsp->gp_kthread)
			
 
				 		return;
			
 
				-	wake_up(&rsp->gp_wq);
			
 
				+	swake_up(&rsp->gp_wq);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2010,6 +2009,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
 
				 	int nocb = 0;
			
 
				 	struct rcu_data *rdp;
			
 
				 	struct rcu_node *rnp = rcu_get_root(rsp);
			
 
				+	struct swait_queue_head *sq;
			
 
				 
			
 
				 	WRITE_ONCE(rsp->gp_activity, jiffies);
			
 
				 	raw_spin_lock_irq_rcu_node(rnp);
			
@@ -2046,7 +2046,9 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
 
				 			needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
			
 
				 		/* smp_mb() provided by prior unlock-lock pair. */
			
 
				 		nocb += rcu_future_gp_cleanup(rsp, rnp);
			
 
				+		sq = rcu_nocb_gp_get(rnp);
			
 
				 		raw_spin_unlock_irq(&rnp->lock);
			
 
				+		rcu_nocb_gp_cleanup(sq);
			
 
				 		cond_resched_rcu_qs();
			
 
				 		WRITE_ONCE(rsp->gp_activity, jiffies);
			
 
				 		rcu_gp_slow(rsp, gp_cleanup_delay);
			
@@ -2092,7 +2094,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 
				 					       READ_ONCE(rsp->gpnum),
			
 
				 					       TPS("reqwait"));
			
 
				 			rsp->gp_state = RCU_GP_WAIT_GPS;
			
 
				-			wait_event_interruptible(rsp->gp_wq,
			
 
				+			swait_event_interruptible(rsp->gp_wq,
			
 
				 						 READ_ONCE(rsp->gp_flags) &
			
 
				 						 RCU_GP_FLAG_INIT);
			
 
				 			rsp->gp_state = RCU_GP_DONE_GPS;
			
@@ -2122,7 +2124,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 
				 					       READ_ONCE(rsp->gpnum),
			
 
				 					       TPS("fqswait"));
			
 
				 			rsp->gp_state = RCU_GP_WAIT_FQS;
			
 
				-			ret = wait_event_interruptible_timeout(rsp->gp_wq,
			
 
				+			ret = swait_event_interruptible_timeout(rsp->gp_wq,
			
 
				 					rcu_gp_fqs_check_wake(rsp, &gf), j);
			
 
				 			rsp->gp_state = RCU_GP_DOING_FQS;
			
 
				 			/* Locking provides needed memory barriers. */
			
@@ -2246,7 +2248,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
 
				 	WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
			
 
				 	WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
			
 
				 	raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
			
 
				-	rcu_gp_kthread_wake(rsp);
			
 
				+	swake_up(&rsp->gp_wq);  /* Memory barrier implied by swake_up() path. */
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2900,7 +2902,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
 
				 	}
			
 
				 	WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
			
 
				 	raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
			
 
				-	rcu_gp_kthread_wake(rsp);
			
 
				+	swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3529,7 +3531,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 
				 			raw_spin_unlock_irqrestore(&rnp->lock, flags);
			
 
				 			if (wake) {
			
 
				 				smp_mb(); /* EGP done before wake_up(). */
			
 
				-				wake_up(&rsp->expedited_wq);
			
 
				+				swake_up(&rsp->expedited_wq);
			
 
				 			}
			
 
				 			break;
			
 
				 		}
			
@@ -3780,7 +3782,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
 
				 	jiffies_start = jiffies;
			
 
				 
			
 
				 	for (;;) {
			
 
				-		ret = wait_event_interruptible_timeout(
			
 
				+		ret = swait_event_timeout(
			
 
				 				rsp->expedited_wq,
			
 
				 				sync_rcu_preempt_exp_done(rnp_root),
			
 
				 				jiffies_stall);
			
@@ -3788,7 +3790,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
 
				 			return;
			
 
				 		if (ret < 0) {
			
 
				 			/* Hit a signal, disable CPU stall warnings. */
			
 
				-			wait_event(rsp->expedited_wq,
			
 
				+			swait_event(rsp->expedited_wq,
			
 
				 				   sync_rcu_preempt_exp_done(rnp_root));
			
 
				 			return;
			
 
				 		}
			
@@ -4482,8 +4484,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	init_waitqueue_head(&rsp->gp_wq);
			
 
				-	init_waitqueue_head(&rsp->expedited_wq);
			
 
				+	init_swait_queue_head(&rsp->gp_wq);
			
 
				+	init_swait_queue_head(&rsp->expedited_wq);
			
 
				 	rnp = rsp->level[rcu_num_lvls - 1];
			
 
				 	for_each_possible_cpu(i) {
			
 
				 		while (i > rnp->grphi)
			
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -27,6 +27,7 @@
 
				 #include <linux/threads.h>
			
 
				 #include <linux/cpumask.h>
			
 
				 #include <linux/seqlock.h>
			
 
				+#include <linux/swait.h>
			
 
				 #include <linux/stop_machine.h>
			
 
				 
			
 
				 /*
			
@@ -243,7 +244,7 @@ struct rcu_node {
 
				 				/* Refused to boost: not sure why, though. */
			
 
				 				/*  This can happen due to race conditions. */
			
 
				 #ifdef CONFIG_RCU_NOCB_CPU
			
 
				-	wait_queue_head_t nocb_gp_wq[2];
			
 
				+	struct swait_queue_head nocb_gp_wq[2];
			
 
				 				/* Place for rcu_nocb_kthread() to wait GP. */
			
 
				 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
			
 
				 	int need_future_gp[2];
			
@@ -399,7 +400,7 @@ struct rcu_data {
 
				 	atomic_long_t nocb_q_count_lazy; /*  invocation (all stages). */
			
 
				 	struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
			
 
				 	struct rcu_head **nocb_follower_tail;
			
 
				-	wait_queue_head_t nocb_wq;	/* For nocb kthreads to sleep on. */
			
 
				+	struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */
			
 
				 	struct task_struct *nocb_kthread;
			
 
				 	int nocb_defer_wakeup;		/* Defer wakeup of nocb_kthread. */
			
 
				 
			
@@ -478,7 +479,7 @@ struct rcu_state {
 
				 	unsigned long gpnum;			/* Current gp number. */
			
 
				 	unsigned long completed;		/* # of last completed gp. */
			
 
				 	struct task_struct *gp_kthread;		/* Task for grace periods. */
			
 
				-	wait_queue_head_t gp_wq;		/* Where GP task waits. */
			
 
				+	struct swait_queue_head gp_wq;		/* Where GP task waits. */
			
 
				 	short gp_flags;				/* Commands for GP task. */
			
 
				 	short gp_state;				/* GP kthread sleep state. */
			
 
				 
			
@@ -506,7 +507,7 @@ struct rcu_state {
 
				 	unsigned long expedited_sequence;	/* Take a ticket. */
			
 
				 	atomic_long_t expedited_normal;		/* # fallbacks to normal. */
			
 
				 	atomic_t expedited_need_qs;		/* # CPUs left to check in. */
			
 
				-	wait_queue_head_t expedited_wq;		/* Wait for check-ins. */
			
 
				+	struct swait_queue_head expedited_wq;	/* Wait for check-ins. */
			
 
				 	int ncpus_snap;				/* # CPUs seen last time. */
			
 
				 
			
 
				 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
			
@@ -621,7 +622,8 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
 
				 static void increment_cpu_stall_ticks(void);
			
 
				 static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
			
 
				 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
			
 
				-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
			
 
				+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
			
 
				+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
			
 
				 static void rcu_init_one_nocb(struct rcu_node *rnp);
			
 
				 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
			
 
				 			    bool lazy, unsigned long flags);
			
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1811,9 +1811,9 @@ early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
 
				  * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
			
 
				  * grace period.
			
 
				  */
			
 
				-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
			
 
				+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
			
 
				 {
			
 
				-	wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
			
 
				+	swake_up_all(sq);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1829,10 +1829,15 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
 
				 	rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
			
 
				 }
			
 
				 
			
 
				+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
			
 
				+{
			
 
				+	return &rnp->nocb_gp_wq[rnp->completed & 0x1];
			
 
				+}
			
 
				+
			
 
				 static void rcu_init_one_nocb(struct rcu_node *rnp)
			
 
				 {
			
 
				-	init_waitqueue_head(&rnp->nocb_gp_wq[0]);
			
 
				-	init_waitqueue_head(&rnp->nocb_gp_wq[1]);
			
 
				+	init_swait_queue_head(&rnp->nocb_gp_wq[0]);
			
 
				+	init_swait_queue_head(&rnp->nocb_gp_wq[1]);
			
 
				 }
			
 
				 
			
 
				 #ifndef CONFIG_RCU_NOCB_CPU_ALL
			
@@ -1857,7 +1862,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
 
				 	if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
			
 
				 		/* Prior smp_mb__after_atomic() orders against prior enqueue. */
			
 
				 		WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
			
 
				-		wake_up(&rdp_leader->nocb_wq);
			
 
				+		swake_up(&rdp_leader->nocb_wq);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -2069,7 +2074,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
 
				 	 */
			
 
				 	trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
			
 
				 	for (;;) {
			
 
				-		wait_event_interruptible(
			
 
				+		swait_event_interruptible(
			
 
				 			rnp->nocb_gp_wq[c & 0x1],
			
 
				 			(d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
			
 
				 		if (likely(d))
			
@@ -2097,7 +2102,7 @@ static void nocb_leader_wait(struct rcu_data *my_rdp)
 
				 	/* Wait for callbacks to appear. */
			
 
				 	if (!rcu_nocb_poll) {
			
 
				 		trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
			
 
				-		wait_event_interruptible(my_rdp->nocb_wq,
			
 
				+		swait_event_interruptible(my_rdp->nocb_wq,
			
 
				 				!READ_ONCE(my_rdp->nocb_leader_sleep));
			
 
				 		/* Memory barrier handled by smp_mb() calls below and repoll. */
			
 
				 	} else if (firsttime) {
			
@@ -2172,7 +2177,7 @@ static void nocb_leader_wait(struct rcu_data *my_rdp)
 
				 			 * List was empty, wake up the follower.
			
 
				 			 * Memory barriers supplied by atomic_long_add().
			
 
				 			 */
			
 
				-			wake_up(&rdp->nocb_wq);
			
 
				+			swake_up(&rdp->nocb_wq);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -2193,7 +2198,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
 
				 		if (!rcu_nocb_poll) {
			
 
				 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
			
 
				 					    "FollowerSleep");
			
 
				-			wait_event_interruptible(rdp->nocb_wq,
			
 
				+			swait_event_interruptible(rdp->nocb_wq,
			
 
				 						 READ_ONCE(rdp->nocb_follower_head));
			
 
				 		} else if (firsttime) {
			
 
				 			/* Don't drown trace log with "Poll"! */
			
@@ -2352,7 +2357,7 @@ void __init rcu_init_nohz(void)
 
				 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
			
 
				 {
			
 
				 	rdp->nocb_tail = &rdp->nocb_head;
			
 
				-	init_waitqueue_head(&rdp->nocb_wq);
			
 
				+	init_swait_queue_head(&rdp->nocb_wq);
			
 
				 	rdp->nocb_follower_tail = &rdp->nocb_follower_head;
			
 
				 }
			
 
				 
			
@@ -2502,7 +2507,7 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
			
 
				+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
			
 
				 {
			
 
				 }
			
 
				 
			
@@ -2510,6 +2515,11 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
 
				 {
			
 
				 }
			
 
				 
			
 
				+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
			
 
				+{
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				 static void rcu_init_one_nocb(struct rcu_node *rnp)
			
 
				 {
			
 
				 }
			
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -13,7 +13,7 @@ endif
 
				 
			
 
				 obj-y += core.o loadavg.o clock.o cputime.o
			
 
				 obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
			
 
				-obj-y += wait.o completion.o idle.o
			
 
				+obj-y += wait.o swait.o completion.o idle.o
			
 
				 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
			
 
				 obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
			
 
				 obj-$(CONFIG_SCHEDSTATS) += stats.o
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -67,12 +67,10 @@
 
				 #include <linux/pagemap.h>
			
 
				 #include <linux/hrtimer.h>
			
 
				 #include <linux/tick.h>
			
 
				-#include <linux/debugfs.h>
			
 
				 #include <linux/ctype.h>
			
 
				 #include <linux/ftrace.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/init_task.h>
			
 
				-#include <linux/binfmts.h>
			
 
				 #include <linux/context_tracking.h>
			
 
				 #include <linux/compiler.h>
			
 
				 
			
@@ -125,138 +123,6 @@ const_debug unsigned int sysctl_sched_features =
 
				 
			
 
				 #undef SCHED_FEAT
			
 
				 
			
 
				-#ifdef CONFIG_SCHED_DEBUG
			
 
				-#define SCHED_FEAT(name, enabled)	\
			
 
				-	#name ,
			
 
				-
			
 
				-static const char * const sched_feat_names[] = {
			
 
				-#include "features.h"
			
 
				-};
			
 
				-
			
 
				-#undef SCHED_FEAT
			
 
				-
			
 
				-static int sched_feat_show(struct seq_file *m, void *v)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < __SCHED_FEAT_NR; i++) {
			
 
				-		if (!(sysctl_sched_features & (1UL << i)))
			
 
				-			seq_puts(m, "NO_");
			
 
				-		seq_printf(m, "%s ", sched_feat_names[i]);
			
 
				-	}
			
 
				-	seq_puts(m, "\n");
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-#ifdef HAVE_JUMP_LABEL
			
 
				-
			
 
				-#define jump_label_key__true  STATIC_KEY_INIT_TRUE
			
 
				-#define jump_label_key__false STATIC_KEY_INIT_FALSE
			
 
				-
			
 
				-#define SCHED_FEAT(name, enabled)	\
			
 
				-	jump_label_key__##enabled ,
			
 
				-
			
 
				-struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
			
 
				-#include "features.h"
			
 
				-};
			
 
				-
			
 
				-#undef SCHED_FEAT
			
 
				-
			
 
				-static void sched_feat_disable(int i)
			
 
				-{
			
 
				-	static_key_disable(&sched_feat_keys[i]);
			
 
				-}
			
 
				-
			
 
				-static void sched_feat_enable(int i)
			
 
				-{
			
 
				-	static_key_enable(&sched_feat_keys[i]);
			
 
				-}
			
 
				-#else
			
 
				-static void sched_feat_disable(int i) { };
			
 
				-static void sched_feat_enable(int i) { };
			
 
				-#endif /* HAVE_JUMP_LABEL */
			
 
				-
			
 
				-static int sched_feat_set(char *cmp)
			
 
				-{
			
 
				-	int i;
			
 
				-	int neg = 0;
			
 
				-
			
 
				-	if (strncmp(cmp, "NO_", 3) == 0) {
			
 
				-		neg = 1;
			
 
				-		cmp += 3;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < __SCHED_FEAT_NR; i++) {
			
 
				-		if (strcmp(cmp, sched_feat_names[i]) == 0) {
			
 
				-			if (neg) {
			
 
				-				sysctl_sched_features &= ~(1UL << i);
			
 
				-				sched_feat_disable(i);
			
 
				-			} else {
			
 
				-				sysctl_sched_features |= (1UL << i);
			
 
				-				sched_feat_enable(i);
			
 
				-			}
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return i;
			
 
				-}
			
 
				-
			
 
				-static ssize_t
			
 
				-sched_feat_write(struct file *filp, const char __user *ubuf,
			
 
				-		size_t cnt, loff_t *ppos)
			
 
				-{
			
 
				-	char buf[64];
			
 
				-	char *cmp;
			
 
				-	int i;
			
 
				-	struct inode *inode;
			
 
				-
			
 
				-	if (cnt > 63)
			
 
				-		cnt = 63;
			
 
				-
			
 
				-	if (copy_from_user(&buf, ubuf, cnt))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	buf[cnt] = 0;
			
 
				-	cmp = strstrip(buf);
			
 
				-
			
 
				-	/* Ensure the static_key remains in a consistent state */
			
 
				-	inode = file_inode(filp);
			
 
				-	inode_lock(inode);
			
 
				-	i = sched_feat_set(cmp);
			
 
				-	inode_unlock(inode);
			
 
				-	if (i == __SCHED_FEAT_NR)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	*ppos += cnt;
			
 
				-
			
 
				-	return cnt;
			
 
				-}
			
 
				-
			
 
				-static int sched_feat_open(struct inode *inode, struct file *filp)
			
 
				-{
			
 
				-	return single_open(filp, sched_feat_show, NULL);
			
 
				-}
			
 
				-
			
 
				-static const struct file_operations sched_feat_fops = {
			
 
				-	.open		= sched_feat_open,
			
 
				-	.write		= sched_feat_write,
			
 
				-	.read		= seq_read,
			
 
				-	.llseek		= seq_lseek,
			
 
				-	.release	= single_release,
			
 
				-};
			
 
				-
			
 
				-static __init int sched_init_debug(void)
			
 
				-{
			
 
				-	debugfs_create_file("sched_features", 0644, NULL, NULL,
			
 
				-			&sched_feat_fops);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-late_initcall(sched_init_debug);
			
 
				-#endif /* CONFIG_SCHED_DEBUG */
			
 
				-
			
 
				 /*
			
 
				  * Number of tasks to iterate in a single balance run.
			
 
				  * Limited because this is done with IRQs disabled.
			
@@ -2094,7 +1960,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 
				 
			
 
				 	ttwu_queue(p, cpu);
			
 
				 stat:
			
 
				-	ttwu_stat(p, cpu, wake_flags);
			
 
				+	if (schedstat_enabled())
			
 
				+		ttwu_stat(p, cpu, wake_flags);
			
 
				 out:
			
 
				 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
			
 
				 
			
@@ -2142,7 +2009,8 @@ static void try_to_wake_up_local(struct task_struct *p)
 
				 		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
			
 
				 
			
 
				 	ttwu_do_wakeup(rq, p, 0);
			
 
				-	ttwu_stat(p, smp_processor_id(), 0);
			
 
				+	if (schedstat_enabled())
			
 
				+		ttwu_stat(p, smp_processor_id(), 0);
			
 
				 out:
			
 
				 	raw_spin_unlock(&p->pi_lock);
			
 
				 }
			
@@ -2184,7 +2052,6 @@ void __dl_clear_params(struct task_struct *p)
 
				 	dl_se->dl_bw = 0;
			
 
				 
			
 
				 	dl_se->dl_throttled = 0;
			
 
				-	dl_se->dl_new = 1;
			
 
				 	dl_se->dl_yielded = 0;
			
 
				 }
			
 
				 
			
@@ -2211,6 +2078,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_SCHEDSTATS
			
 
				+	/* Even if schedstat is disabled, there should not be garbage */
			
 
				 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
			
 
				 #endif
			
 
				 
			
@@ -2219,6 +2087,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
				 	__dl_clear_params(p);
			
 
				 
			
 
				 	INIT_LIST_HEAD(&p->rt.run_list);
			
 
				+	p->rt.timeout		= 0;
			
 
				+	p->rt.time_slice	= sched_rr_timeslice;
			
 
				+	p->rt.on_rq		= 0;
			
 
				+	p->rt.on_list		= 0;
			
 
				 
			
 
				 #ifdef CONFIG_PREEMPT_NOTIFIERS
			
 
				 	INIT_HLIST_HEAD(&p->preempt_notifiers);
			
@@ -2282,6 +2154,69 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
 
				 #endif
			
 
				 #endif
			
 
				 
			
 
				+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
			
 
				+
			
 
				+#ifdef CONFIG_SCHEDSTATS
			
 
				+static void set_schedstats(bool enabled)
			
 
				+{
			
 
				+	if (enabled)
			
 
				+		static_branch_enable(&sched_schedstats);
			
 
				+	else
			
 
				+		static_branch_disable(&sched_schedstats);
			
 
				+}
			
 
				+
			
 
				+void force_schedstat_enabled(void)
			
 
				+{
			
 
				+	if (!schedstat_enabled()) {
			
 
				+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
			
 
				+		static_branch_enable(&sched_schedstats);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int __init setup_schedstats(char *str)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+	if (!str)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (!strcmp(str, "enable")) {
			
 
				+		set_schedstats(true);
			
 
				+		ret = 1;
			
 
				+	} else if (!strcmp(str, "disable")) {
			
 
				+		set_schedstats(false);
			
 
				+		ret = 1;
			
 
				+	}
			
 
				+out:
			
 
				+	if (!ret)
			
 
				+		pr_warn("Unable to parse schedstats=\n");
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+__setup("schedstats=", setup_schedstats);
			
 
				+
			
 
				+#ifdef CONFIG_PROC_SYSCTL
			
 
				+int sysctl_schedstats(struct ctl_table *table, int write,
			
 
				+			 void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				+{
			
 
				+	struct ctl_table t;
			
 
				+	int err;
			
 
				+	int state = static_branch_likely(&sched_schedstats);
			
 
				+
			
 
				+	if (write && !capable(CAP_SYS_ADMIN))
			
 
				+		return -EPERM;
			
 
				+
			
 
				+	t = *table;
			
 
				+	t.data = &state;
			
 
				+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
			
 
				+	if (err < 0)
			
 
				+		return err;
			
 
				+	if (write)
			
 
				+		set_schedstats(state);
			
 
				+	return err;
			
 
				+}
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				  * fork()/clone()-time setup:
			
 
				  */
			
@@ -3011,16 +2946,6 @@ u64 scheduler_tick_max_deferment(void)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-notrace unsigned long get_parent_ip(unsigned long addr)
			
 
				-{
			
 
				-	if (in_lock_functions(addr)) {
			
 
				-		addr = CALLER_ADDR2;
			
 
				-		if (in_lock_functions(addr))
			
 
				-			addr = CALLER_ADDR3;
			
 
				-	}
			
 
				-	return addr;
			
 
				-}
			
 
				-
			
 
				 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
			
 
				 				defined(CONFIG_PREEMPT_TRACER))
			
 
				 
			
@@ -3042,7 +2967,7 @@ void preempt_count_add(int val)
 
				 				PREEMPT_MASK - 10);
			
 
				 #endif
			
 
				 	if (preempt_count() == val) {
			
 
				-		unsigned long ip = get_parent_ip(CALLER_ADDR1);
			
 
				+		unsigned long ip = get_lock_parent_ip();
			
 
				 #ifdef CONFIG_DEBUG_PREEMPT
			
 
				 		current->preempt_disable_ip = ip;
			
 
				 #endif
			
@@ -3069,7 +2994,7 @@ void preempt_count_sub(int val)
 
				 #endif
			
 
				 
			
 
				 	if (preempt_count() == val)
			
 
				-		trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
			
 
				+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
			
 
				 	__preempt_count_sub(val);
			
 
				 }
			
 
				 EXPORT_SYMBOL(preempt_count_sub);
			
@@ -3281,7 +3206,6 @@ static void __sched notrace __schedule(bool preempt)
 
				 
			
 
				 		trace_sched_switch(preempt, prev, next);
			
 
				 		rq = context_switch(rq, prev, next); /* unlocks the rq */
			
 
				-		cpu = cpu_of(rq);
			
 
				 	} else {
			
 
				 		lockdep_unpin_lock(&rq->lock);
			
 
				 		raw_spin_unlock_irq(&rq->lock);
			
@@ -3467,7 +3391,7 @@ EXPORT_SYMBOL(default_wake_function);
 
				  */
			
 
				 void rt_mutex_setprio(struct task_struct *p, int prio)
			
 
				 {
			
 
				-	int oldprio, queued, running, enqueue_flag = ENQUEUE_RESTORE;
			
 
				+	int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
			
 
				 	struct rq *rq;
			
 
				 	const struct sched_class *prev_class;
			
 
				 
			
@@ -3495,11 +3419,15 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
				 
			
 
				 	trace_sched_pi_setprio(p, prio);
			
 
				 	oldprio = p->prio;
			
 
				+
			
 
				+	if (oldprio == prio)
			
 
				+		queue_flag &= ~DEQUEUE_MOVE;
			
 
				+
			
 
				 	prev_class = p->sched_class;
			
 
				 	queued = task_on_rq_queued(p);
			
 
				 	running = task_current(rq, p);
			
 
				 	if (queued)
			
 
				-		dequeue_task(rq, p, DEQUEUE_SAVE);
			
 
				+		dequeue_task(rq, p, queue_flag);
			
 
				 	if (running)
			
 
				 		put_prev_task(rq, p);
			
 
				 
			
@@ -3517,7 +3445,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
				 		if (!dl_prio(p->normal_prio) ||
			
 
				 		    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
			
 
				 			p->dl.dl_boosted = 1;
			
 
				-			enqueue_flag |= ENQUEUE_REPLENISH;
			
 
				+			queue_flag |= ENQUEUE_REPLENISH;
			
 
				 		} else
			
 
				 			p->dl.dl_boosted = 0;
			
 
				 		p->sched_class = &dl_sched_class;
			
@@ -3525,7 +3453,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
				 		if (dl_prio(oldprio))
			
 
				 			p->dl.dl_boosted = 0;
			
 
				 		if (oldprio < prio)
			
 
				-			enqueue_flag |= ENQUEUE_HEAD;
			
 
				+			queue_flag |= ENQUEUE_HEAD;
			
 
				 		p->sched_class = &rt_sched_class;
			
 
				 	} else {
			
 
				 		if (dl_prio(oldprio))
			
@@ -3540,7 +3468,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
				 	if (running)
			
 
				 		p->sched_class->set_curr_task(rq);
			
 
				 	if (queued)
			
 
				-		enqueue_task(rq, p, enqueue_flag);
			
 
				+		enqueue_task(rq, p, queue_flag);
			
 
				 
			
 
				 	check_class_changed(rq, p, prev_class, oldprio);
			
 
				 out_unlock:
			
@@ -3896,6 +3824,7 @@ static int __sched_setscheduler(struct task_struct *p,
 
				 	const struct sched_class *prev_class;
			
 
				 	struct rq *rq;
			
 
				 	int reset_on_fork;
			
 
				+	int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
			
 
				 
			
 
				 	/* may grab non-irq protected spin_locks */
			
 
				 	BUG_ON(in_interrupt());
			
@@ -4078,17 +4007,14 @@ static int __sched_setscheduler(struct task_struct *p,
 
				 		 * itself.
			
 
				 		 */
			
 
				 		new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
			
 
				-		if (new_effective_prio == oldprio) {
			
 
				-			__setscheduler_params(p, attr);
			
 
				-			task_rq_unlock(rq, p, &flags);
			
 
				-			return 0;
			
 
				-		}
			
 
				+		if (new_effective_prio == oldprio)
			
 
				+			queue_flags &= ~DEQUEUE_MOVE;
			
 
				 	}
			
 
				 
			
 
				 	queued = task_on_rq_queued(p);
			
 
				 	running = task_current(rq, p);
			
 
				 	if (queued)
			
 
				-		dequeue_task(rq, p, DEQUEUE_SAVE);
			
 
				+		dequeue_task(rq, p, queue_flags);
			
 
				 	if (running)
			
 
				 		put_prev_task(rq, p);
			
 
				 
			
@@ -4098,15 +4024,14 @@ static int __sched_setscheduler(struct task_struct *p,
 
				 	if (running)
			
 
				 		p->sched_class->set_curr_task(rq);
			
 
				 	if (queued) {
			
 
				-		int enqueue_flags = ENQUEUE_RESTORE;
			
 
				 		/*
			
 
				 		 * We enqueue to tail when the priority of a task is
			
 
				 		 * increased (user space view).
			
 
				 		 */
			
 
				-		if (oldprio <= p->prio)
			
 
				-			enqueue_flags |= ENQUEUE_HEAD;
			
 
				+		if (oldprio < p->prio)
			
 
				+			queue_flags |= ENQUEUE_HEAD;
			
 
				 
			
 
				-		enqueue_task(rq, p, enqueue_flags);
			
 
				+		enqueue_task(rq, p, queue_flags);
			
 
				 	}
			
 
				 
			
 
				 	check_class_changed(rq, p, prev_class, oldprio);
			
@@ -5408,183 +5333,6 @@ static void migrate_tasks(struct rq *dead_rq)
 
				 }
			
 
				 #endif /* CONFIG_HOTPLUG_CPU */
			
 
				 
			
 
				-#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
			
 
				-
			
 
				-static struct ctl_table sd_ctl_dir[] = {
			
 
				-	{
			
 
				-		.procname	= "sched_domain",
			
 
				-		.mode		= 0555,
			
 
				-	},
			
 
				-	{}
			
 
				-};
			
 
				-
			
 
				-static struct ctl_table sd_ctl_root[] = {
			
 
				-	{
			
 
				-		.procname	= "kernel",
			
 
				-		.mode		= 0555,
			
 
				-		.child		= sd_ctl_dir,
			
 
				-	},
			
 
				-	{}
			
 
				-};
			
 
				-
			
 
				-static struct ctl_table *sd_alloc_ctl_entry(int n)
			
 
				-{
			
 
				-	struct ctl_table *entry =
			
 
				-		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
			
 
				-
			
 
				-	return entry;
			
 
				-}
			
 
				-
			
 
				-static void sd_free_ctl_entry(struct ctl_table **tablep)
			
 
				-{
			
 
				-	struct ctl_table *entry;
			
 
				-
			
 
				-	/*
			
 
				-	 * In the intermediate directories, both the child directory and
			
 
				-	 * procname are dynamically allocated and could fail but the mode
			
 
				-	 * will always be set. In the lowest directory the names are
			
 
				-	 * static strings and all have proc handlers.
			
 
				-	 */
			
 
				-	for (entry = *tablep; entry->mode; entry++) {
			
 
				-		if (entry->child)
			
 
				-			sd_free_ctl_entry(&entry->child);
			
 
				-		if (entry->proc_handler == NULL)
			
 
				-			kfree(entry->procname);
			
 
				-	}
			
 
				-
			
 
				-	kfree(*tablep);
			
 
				-	*tablep = NULL;
			
 
				-}
			
 
				-
			
 
				-static int min_load_idx = 0;
			
 
				-static int max_load_idx = CPU_LOAD_IDX_MAX-1;
			
 
				-
			
 
				-static void
			
 
				-set_table_entry(struct ctl_table *entry,
			
 
				-		const char *procname, void *data, int maxlen,
			
 
				-		umode_t mode, proc_handler *proc_handler,
			
 
				-		bool load_idx)
			
 
				-{
			
 
				-	entry->procname = procname;
			
 
				-	entry->data = data;
			
 
				-	entry->maxlen = maxlen;
			
 
				-	entry->mode = mode;
			
 
				-	entry->proc_handler = proc_handler;
			
 
				-
			
 
				-	if (load_idx) {
			
 
				-		entry->extra1 = &min_load_idx;
			
 
				-		entry->extra2 = &max_load_idx;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static struct ctl_table *
			
 
				-sd_alloc_ctl_domain_table(struct sched_domain *sd)
			
 
				-{
			
 
				-	struct ctl_table *table = sd_alloc_ctl_entry(14);
			
 
				-
			
 
				-	if (table == NULL)
			
 
				-		return NULL;
			
 
				-
			
 
				-	set_table_entry(&table[0], "min_interval", &sd->min_interval,
			
 
				-		sizeof(long), 0644, proc_doulongvec_minmax, false);
			
 
				-	set_table_entry(&table[1], "max_interval", &sd->max_interval,
			
 
				-		sizeof(long), 0644, proc_doulongvec_minmax, false);
			
 
				-	set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				-	set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				-	set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				-	set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				-	set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				-	set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, false);
			
 
				-	set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, false);
			
 
				-	set_table_entry(&table[9], "cache_nice_tries",
			
 
				-		&sd->cache_nice_tries,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, false);
			
 
				-	set_table_entry(&table[10], "flags", &sd->flags,
			
 
				-		sizeof(int), 0644, proc_dointvec_minmax, false);
			
 
				-	set_table_entry(&table[11], "max_newidle_lb_cost",
			
 
				-		&sd->max_newidle_lb_cost,
			
 
				-		sizeof(long), 0644, proc_doulongvec_minmax, false);
			
 
				-	set_table_entry(&table[12], "name", sd->name,
			
 
				-		CORENAME_MAX_SIZE, 0444, proc_dostring, false);
			
 
				-	/* &table[13] is terminator */
			
 
				-
			
 
				-	return table;
			
 
				-}
			
 
				-
			
 
				-static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
			
 
				-{
			
 
				-	struct ctl_table *entry, *table;
			
 
				-	struct sched_domain *sd;
			
 
				-	int domain_num = 0, i;
			
 
				-	char buf[32];
			
 
				-
			
 
				-	for_each_domain(cpu, sd)
			
 
				-		domain_num++;
			
 
				-	entry = table = sd_alloc_ctl_entry(domain_num + 1);
			
 
				-	if (table == NULL)
			
 
				-		return NULL;
			
 
				-
			
 
				-	i = 0;
			
 
				-	for_each_domain(cpu, sd) {
			
 
				-		snprintf(buf, 32, "domain%d", i);
			
 
				-		entry->procname = kstrdup(buf, GFP_KERNEL);
			
 
				-		entry->mode = 0555;
			
 
				-		entry->child = sd_alloc_ctl_domain_table(sd);
			
 
				-		entry++;
			
 
				-		i++;
			
 
				-	}
			
 
				-	return table;
			
 
				-}
			
 
				-
			
 
				-static struct ctl_table_header *sd_sysctl_header;
			
 
				-static void register_sched_domain_sysctl(void)
			
 
				-{
			
 
				-	int i, cpu_num = num_possible_cpus();
			
 
				-	struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
			
 
				-	char buf[32];
			
 
				-
			
 
				-	WARN_ON(sd_ctl_dir[0].child);
			
 
				-	sd_ctl_dir[0].child = entry;
			
 
				-
			
 
				-	if (entry == NULL)
			
 
				-		return;
			
 
				-
			
 
				-	for_each_possible_cpu(i) {
			
 
				-		snprintf(buf, 32, "cpu%d", i);
			
 
				-		entry->procname = kstrdup(buf, GFP_KERNEL);
			
 
				-		entry->mode = 0555;
			
 
				-		entry->child = sd_alloc_ctl_cpu_table(i);
			
 
				-		entry++;
			
 
				-	}
			
 
				-
			
 
				-	WARN_ON(sd_sysctl_header);
			
 
				-	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
			
 
				-}
			
 
				-
			
 
				-/* may be called multiple times per register */
			
 
				-static void unregister_sched_domain_sysctl(void)
			
 
				-{
			
 
				-	unregister_sysctl_table(sd_sysctl_header);
			
 
				-	sd_sysctl_header = NULL;
			
 
				-	if (sd_ctl_dir[0].child)
			
 
				-		sd_free_ctl_entry(&sd_ctl_dir[0].child);
			
 
				-}
			
 
				-#else
			
 
				-static void register_sched_domain_sysctl(void)
			
 
				-{
			
 
				-}
			
 
				-static void unregister_sched_domain_sysctl(void)
			
 
				-{
			
 
				-}
			
 
				-#endif /* CONFIG_SCHED_DEBUG && CONFIG_SYSCTL */
			
 
				-
			
 
				 static void set_rq_online(struct rq *rq)
			
 
				 {
			
 
				 	if (!rq->online) {
			
@@ -6176,11 +5924,16 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 
				 /* Setup the mask of cpus configured for isolated domains */
			
 
				 static int __init isolated_cpu_setup(char *str)
			
 
				 {
			
 
				+	int ret;
			
 
				+
			
 
				 	alloc_bootmem_cpumask_var(&cpu_isolated_map);
			
 
				-	cpulist_parse(str, cpu_isolated_map);
			
 
				+	ret = cpulist_parse(str, cpu_isolated_map);
			
 
				+	if (ret) {
			
 
				+		pr_err("sched: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids);
			
 
				+		return 0;
			
 
				+	}
			
 
				 	return 1;
			
 
				 }
			
 
				-
			
 
				 __setup("isolcpus=", isolated_cpu_setup);
			
 
				 
			
 
				 struct s_data {
			
@@ -7863,11 +7616,9 @@ void sched_destroy_group(struct task_group *tg)
 
				 void sched_offline_group(struct task_group *tg)
			
 
				 {
			
 
				 	unsigned long flags;
			
 
				-	int i;
			
 
				 
			
 
				 	/* end participation in shares distribution */
			
 
				-	for_each_possible_cpu(i)
			
 
				-		unregister_fair_sched_group(tg, i);
			
 
				+	unregister_fair_sched_group(tg);
			
 
				 
			
 
				 	spin_lock_irqsave(&task_group_lock, flags);
			
 
				 	list_del_rcu(&tg->list);
			
@@ -7893,7 +7644,7 @@ void sched_move_task(struct task_struct *tsk)
 
				 	queued = task_on_rq_queued(tsk);
			
 
				 
			
 
				 	if (queued)
			
 
				-		dequeue_task(rq, tsk, DEQUEUE_SAVE);
			
 
				+		dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
			
 
				 	if (unlikely(running))
			
 
				 		put_prev_task(rq, tsk);
			
 
				 
			
@@ -7917,7 +7668,7 @@ void sched_move_task(struct task_struct *tsk)
 
				 	if (unlikely(running))
			
 
				 		tsk->sched_class->set_curr_task(rq);
			
 
				 	if (queued)
			
 
				-		enqueue_task(rq, tsk, ENQUEUE_RESTORE);
			
 
				+		enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
			
 
				 
			
 
				 	task_rq_unlock(rq, tsk, &flags);
			
 
				 }
			
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -262,21 +262,21 @@ static __always_inline bool steal_account_process_tick(void)
 
				 #ifdef CONFIG_PARAVIRT
			
 
				 	if (static_key_false(&paravirt_steal_enabled)) {
			
 
				 		u64 steal;
			
 
				-		cputime_t steal_ct;
			
 
				+		unsigned long steal_jiffies;
			
 
				 
			
 
				 		steal = paravirt_steal_clock(smp_processor_id());
			
 
				 		steal -= this_rq()->prev_steal_time;
			
 
				 
			
 
				 		/*
			
 
				-		 * cputime_t may be less precise than nsecs (eg: if it's
			
 
				-		 * based on jiffies). Lets cast the result to cputime
			
 
				+		 * steal is in nsecs but our caller is expecting steal
			
 
				+		 * time in jiffies. Lets cast the result to jiffies
			
 
				 		 * granularity and account the rest on the next rounds.
			
 
				 		 */
			
 
				-		steal_ct = nsecs_to_cputime(steal);
			
 
				-		this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct);
			
 
				+		steal_jiffies = nsecs_to_jiffies(steal);
			
 
				+		this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
			
 
				 
			
 
				-		account_steal_time(steal_ct);
			
 
				-		return steal_ct;
			
 
				+		account_steal_time(jiffies_to_cputime(steal_jiffies));
			
 
				+		return steal_jiffies;
			
 
				 	}
			
 
				 #endif
			
 
				 	return false;
			
@@ -668,26 +668,25 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 
				 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
			
 
				 
			
 
				 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
			
 
				-static unsigned long long vtime_delta(struct task_struct *tsk)
			
 
				+static cputime_t vtime_delta(struct task_struct *tsk)
			
 
				 {
			
 
				-	unsigned long long clock;
			
 
				+	unsigned long now = READ_ONCE(jiffies);
			
 
				 
			
 
				-	clock = local_clock();
			
 
				-	if (clock < tsk->vtime_snap)
			
 
				+	if (time_before(now, (unsigned long)tsk->vtime_snap))
			
 
				 		return 0;
			
 
				 
			
 
				-	return clock - tsk->vtime_snap;
			
 
				+	return jiffies_to_cputime(now - tsk->vtime_snap);
			
 
				 }
			
 
				 
			
 
				 static cputime_t get_vtime_delta(struct task_struct *tsk)
			
 
				 {
			
 
				-	unsigned long long delta = vtime_delta(tsk);
			
 
				+	unsigned long now = READ_ONCE(jiffies);
			
 
				+	unsigned long delta = now - tsk->vtime_snap;
			
 
				 
			
 
				 	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
			
 
				-	tsk->vtime_snap += delta;
			
 
				+	tsk->vtime_snap = now;
			
 
				 
			
 
				-	/* CHECKME: always safe to convert nsecs to cputime? */
			
 
				-	return nsecs_to_cputime(delta);
			
 
				+	return jiffies_to_cputime(delta);
			
 
				 }
			
 
				 
			
 
				 static void __vtime_account_system(struct task_struct *tsk)
			
@@ -699,6 +698,9 @@ static void __vtime_account_system(struct task_struct *tsk)
 
				 
			
 
				 void vtime_account_system(struct task_struct *tsk)
			
 
				 {
			
 
				+	if (!vtime_delta(tsk))
			
 
				+		return;
			
 
				+
			
 
				 	write_seqcount_begin(&tsk->vtime_seqcount);
			
 
				 	__vtime_account_system(tsk);
			
 
				 	write_seqcount_end(&tsk->vtime_seqcount);
			
@@ -707,7 +709,8 @@ void vtime_account_system(struct task_struct *tsk)
 
				 void vtime_gen_account_irq_exit(struct task_struct *tsk)
			
 
				 {
			
 
				 	write_seqcount_begin(&tsk->vtime_seqcount);
			
 
				-	__vtime_account_system(tsk);
			
 
				+	if (vtime_delta(tsk))
			
 
				+		__vtime_account_system(tsk);
			
 
				 	if (context_tracking_in_user())
			
 
				 		tsk->vtime_snap_whence = VTIME_USER;
			
 
				 	write_seqcount_end(&tsk->vtime_seqcount);
			
@@ -718,16 +721,19 @@ void vtime_account_user(struct task_struct *tsk)
 
				 	cputime_t delta_cpu;
			
 
				 
			
 
				 	write_seqcount_begin(&tsk->vtime_seqcount);
			
 
				-	delta_cpu = get_vtime_delta(tsk);
			
 
				 	tsk->vtime_snap_whence = VTIME_SYS;
			
 
				-	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
			
 
				+	if (vtime_delta(tsk)) {
			
 
				+		delta_cpu = get_vtime_delta(tsk);
			
 
				+		account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
			
 
				+	}
			
 
				 	write_seqcount_end(&tsk->vtime_seqcount);
			
 
				 }
			
 
				 
			
 
				 void vtime_user_enter(struct task_struct *tsk)
			
 
				 {
			
 
				 	write_seqcount_begin(&tsk->vtime_seqcount);
			
 
				-	__vtime_account_system(tsk);
			
 
				+	if (vtime_delta(tsk))
			
 
				+		__vtime_account_system(tsk);
			
 
				 	tsk->vtime_snap_whence = VTIME_USER;
			
 
				 	write_seqcount_end(&tsk->vtime_seqcount);
			
 
				 }
			
@@ -742,7 +748,8 @@ void vtime_guest_enter(struct task_struct *tsk)
 
				 	 * that can thus safely catch up with a tickless delta.
			
 
				 	 */
			
 
				 	write_seqcount_begin(&tsk->vtime_seqcount);
			
 
				-	__vtime_account_system(tsk);
			
 
				+	if (vtime_delta(tsk))
			
 
				+		__vtime_account_system(tsk);
			
 
				 	current->flags |= PF_VCPU;
			
 
				 	write_seqcount_end(&tsk->vtime_seqcount);
			
 
				 }
			
@@ -772,7 +779,7 @@ void arch_vtime_task_switch(struct task_struct *prev)
 
				 
			
 
				 	write_seqcount_begin(&current->vtime_seqcount);
			
 
				 	current->vtime_snap_whence = VTIME_SYS;
			
 
				-	current->vtime_snap = sched_clock_cpu(smp_processor_id());
			
 
				+	current->vtime_snap = jiffies;
			
 
				 	write_seqcount_end(&current->vtime_seqcount);
			
 
				 }
			
 
				 
			
@@ -783,7 +790,7 @@ void vtime_init_idle(struct task_struct *t, int cpu)
 
				 	local_irq_save(flags);
			
 
				 	write_seqcount_begin(&t->vtime_seqcount);
			
 
				 	t->vtime_snap_whence = VTIME_SYS;
			
 
				-	t->vtime_snap = sched_clock_cpu(cpu);
			
 
				+	t->vtime_snap = jiffies;
			
 
				 	write_seqcount_end(&t->vtime_seqcount);
			
 
				 	local_irq_restore(flags);
			
 
				 }
			
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -352,7 +352,15 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
 
				 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
			
 
				 	struct rq *rq = rq_of_dl_rq(dl_rq);
			
 
				 
			
 
				-	WARN_ON(!dl_se->dl_new || dl_se->dl_throttled);
			
 
				+	WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
			
 
				+
			
 
				+	/*
			
 
				+	 * We are racing with the deadline timer. So, do nothing because
			
 
				+	 * the deadline timer handler will take care of properly recharging
			
 
				+	 * the runtime and postponing the deadline
			
 
				+	 */
			
 
				+	if (dl_se->dl_throttled)
			
 
				+		return;
			
 
				 
			
 
				 	/*
			
 
				 	 * We use the regular wall clock time to set deadlines in the
			
@@ -361,7 +369,6 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
 
				 	 */
			
 
				 	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
			
 
				 	dl_se->runtime = pi_se->dl_runtime;
			
 
				-	dl_se->dl_new = 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -399,6 +406,9 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
 
				 		dl_se->runtime = pi_se->dl_runtime;
			
 
				 	}
			
 
				 
			
 
				+	if (dl_se->dl_yielded && dl_se->runtime > 0)
			
 
				+		dl_se->runtime = 0;
			
 
				+
			
 
				 	/*
			
 
				 	 * We keep moving the deadline away until we get some
			
 
				 	 * available runtime for the entity. This ensures correct
			
@@ -500,15 +510,6 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
 
				 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
			
 
				 	struct rq *rq = rq_of_dl_rq(dl_rq);
			
 
				 
			
 
				-	/*
			
 
				-	 * The arrival of a new instance needs special treatment, i.e.,
			
 
				-	 * the actual scheduling parameters have to be "renewed".
			
 
				-	 */
			
 
				-	if (dl_se->dl_new) {
			
 
				-		setup_new_dl_entity(dl_se, pi_se);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				 	if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
			
 
				 	    dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
			
 
				 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
			
@@ -604,16 +605,6 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 
				 		goto unlock;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * This is possible if switched_from_dl() raced against a running
			
 
				-	 * callback that took the above !dl_task() path and we've since then
			
 
				-	 * switched back into SCHED_DEADLINE.
			
 
				-	 *
			
 
				-	 * There's nothing to do except drop our task reference.
			
 
				-	 */
			
 
				-	if (dl_se->dl_new)
			
 
				-		goto unlock;
			
 
				-
			
 
				 	/*
			
 
				 	 * The task might have been boosted by someone else and might be in the
			
 
				 	 * boosting/deboosting path, its not throttled.
			
@@ -735,8 +726,11 @@ static void update_curr_dl(struct rq *rq)
 
				 	 * approach need further study.
			
 
				 	 */
			
 
				 	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
			
 
				-	if (unlikely((s64)delta_exec <= 0))
			
 
				+	if (unlikely((s64)delta_exec <= 0)) {
			
 
				+		if (unlikely(dl_se->dl_yielded))
			
 
				+			goto throttle;
			
 
				 		return;
			
 
				+	}
			
 
				 
			
 
				 	schedstat_set(curr->se.statistics.exec_max,
			
 
				 		      max(curr->se.statistics.exec_max, delta_exec));
			
@@ -749,8 +743,10 @@ static void update_curr_dl(struct rq *rq)
 
				 
			
 
				 	sched_rt_avg_update(rq, delta_exec);
			
 
				 
			
 
				-	dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
			
 
				-	if (dl_runtime_exceeded(dl_se)) {
			
 
				+	dl_se->runtime -= delta_exec;
			
 
				+
			
 
				+throttle:
			
 
				+	if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
			
 
				 		dl_se->dl_throttled = 1;
			
 
				 		__dequeue_task_dl(rq, curr, 0);
			
 
				 		if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
			
@@ -917,7 +913,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
 
				 	 * parameters of the task might need updating. Otherwise,
			
 
				 	 * we want a replenishment of its runtime.
			
 
				 	 */
			
 
				-	if (dl_se->dl_new || flags & ENQUEUE_WAKEUP)
			
 
				+	if (flags & ENQUEUE_WAKEUP)
			
 
				 		update_dl_entity(dl_se, pi_se);
			
 
				 	else if (flags & ENQUEUE_REPLENISH)
			
 
				 		replenish_dl_entity(dl_se, pi_se);
			
@@ -994,18 +990,14 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 
				  */
			
 
				 static void yield_task_dl(struct rq *rq)
			
 
				 {
			
 
				-	struct task_struct *p = rq->curr;
			
 
				-
			
 
				 	/*
			
 
				 	 * We make the task go to sleep until its current deadline by
			
 
				 	 * forcing its runtime to zero. This way, update_curr_dl() stops
			
 
				 	 * it and the bandwidth timer will wake it up and will give it
			
 
				 	 * new scheduling parameters (thanks to dl_yielded=1).
			
 
				 	 */
			
 
				-	if (p->dl.runtime > 0) {
			
 
				-		rq->curr->dl.dl_yielded = 1;
			
 
				-		p->dl.runtime = 0;
			
 
				-	}
			
 
				+	rq->curr->dl.dl_yielded = 1;
			
 
				+
			
 
				 	update_rq_clock(rq);
			
 
				 	update_curr_dl(rq);
			
 
				 	/*
			
@@ -1722,6 +1714,9 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
 
				  */
			
 
				 static void switched_to_dl(struct rq *rq, struct task_struct *p)
			
 
				 {
			
 
				+	if (dl_time_before(p->dl.deadline, rq_clock(rq)))
			
 
				+		setup_new_dl_entity(&p->dl, &p->dl);
			
 
				+
			
 
				 	if (task_on_rq_queued(p) && rq->curr != p) {
			
 
				 #ifdef CONFIG_SMP
			
 
				 		if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
			
@@ -1768,8 +1763,7 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
 
				 		 */
			
 
				 		resched_curr(rq);
			
 
				 #endif /* CONFIG_SMP */
			
 
				-	} else
			
 
				-		switched_to_dl(rq, p);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 const struct sched_class dl_sched_class = {
			
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -16,6 +16,7 @@
 
				 #include <linux/kallsyms.h>
			
 
				 #include <linux/utsname.h>
			
 
				 #include <linux/mempolicy.h>
			
 
				+#include <linux/debugfs.h>
			
 
				 
			
 
				 #include "sched.h"
			
 
				 
			
@@ -58,6 +59,309 @@ static unsigned long nsec_low(unsigned long long nsec)
 
				 
			
 
				 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
			
 
				 
			
 
				+#define SCHED_FEAT(name, enabled)	\
			
 
				+	#name ,
			
 
				+
			
 
				+static const char * const sched_feat_names[] = {
			
 
				+#include "features.h"
			
 
				+};
			
 
				+
			
 
				+#undef SCHED_FEAT
			
 
				+
			
 
				+static int sched_feat_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < __SCHED_FEAT_NR; i++) {
			
 
				+		if (!(sysctl_sched_features & (1UL << i)))
			
 
				+			seq_puts(m, "NO_");
			
 
				+		seq_printf(m, "%s ", sched_feat_names[i]);
			
 
				+	}
			
 
				+	seq_puts(m, "\n");
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#ifdef HAVE_JUMP_LABEL
			
 
				+
			
 
				+#define jump_label_key__true  STATIC_KEY_INIT_TRUE
			
 
				+#define jump_label_key__false STATIC_KEY_INIT_FALSE
			
 
				+
			
 
				+#define SCHED_FEAT(name, enabled)	\
			
 
				+	jump_label_key__##enabled ,
			
 
				+
			
 
				+struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
			
 
				+#include "features.h"
			
 
				+};
			
 
				+
			
 
				+#undef SCHED_FEAT
			
 
				+
			
 
				+static void sched_feat_disable(int i)
			
 
				+{
			
 
				+	static_key_disable(&sched_feat_keys[i]);
			
 
				+}
			
 
				+
			
 
				+static void sched_feat_enable(int i)
			
 
				+{
			
 
				+	static_key_enable(&sched_feat_keys[i]);
			
 
				+}
			
 
				+#else
			
 
				+static void sched_feat_disable(int i) { };
			
 
				+static void sched_feat_enable(int i) { };
			
 
				+#endif /* HAVE_JUMP_LABEL */
			
 
				+
			
 
				+static int sched_feat_set(char *cmp)
			
 
				+{
			
 
				+	int i;
			
 
				+	int neg = 0;
			
 
				+
			
 
				+	if (strncmp(cmp, "NO_", 3) == 0) {
			
 
				+		neg = 1;
			
 
				+		cmp += 3;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < __SCHED_FEAT_NR; i++) {
			
 
				+		if (strcmp(cmp, sched_feat_names[i]) == 0) {
			
 
				+			if (neg) {
			
 
				+				sysctl_sched_features &= ~(1UL << i);
			
 
				+				sched_feat_disable(i);
			
 
				+			} else {
			
 
				+				sysctl_sched_features |= (1UL << i);
			
 
				+				sched_feat_enable(i);
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return i;
			
 
				+}
			
 
				+
			
 
				+static ssize_t
			
 
				+sched_feat_write(struct file *filp, const char __user *ubuf,
			
 
				+		size_t cnt, loff_t *ppos)
			
 
				+{
			
 
				+	char buf[64];
			
 
				+	char *cmp;
			
 
				+	int i;
			
 
				+	struct inode *inode;
			
 
				+
			
 
				+	if (cnt > 63)
			
 
				+		cnt = 63;
			
 
				+
			
 
				+	if (copy_from_user(&buf, ubuf, cnt))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	buf[cnt] = 0;
			
 
				+	cmp = strstrip(buf);
			
 
				+
			
 
				+	/* Ensure the static_key remains in a consistent state */
			
 
				+	inode = file_inode(filp);
			
 
				+	inode_lock(inode);
			
 
				+	i = sched_feat_set(cmp);
			
 
				+	inode_unlock(inode);
			
 
				+	if (i == __SCHED_FEAT_NR)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	*ppos += cnt;
			
 
				+
			
 
				+	return cnt;
			
 
				+}
			
 
				+
			
 
				+static int sched_feat_open(struct inode *inode, struct file *filp)
			
 
				+{
			
 
				+	return single_open(filp, sched_feat_show, NULL);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations sched_feat_fops = {
			
 
				+	.open		= sched_feat_open,
			
 
				+	.write		= sched_feat_write,
			
 
				+	.read		= seq_read,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static __init int sched_init_debug(void)
			
 
				+{
			
 
				+	debugfs_create_file("sched_features", 0644, NULL, NULL,
			
 
				+			&sched_feat_fops);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+late_initcall(sched_init_debug);
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+
			
 
				+#ifdef CONFIG_SYSCTL
			
 
				+
			
 
				+static struct ctl_table sd_ctl_dir[] = {
			
 
				+	{
			
 
				+		.procname	= "sched_domain",
			
 
				+		.mode		= 0555,
			
 
				+	},
			
 
				+	{}
			
 
				+};
			
 
				+
			
 
				+static struct ctl_table sd_ctl_root[] = {
			
 
				+	{
			
 
				+		.procname	= "kernel",
			
 
				+		.mode		= 0555,
			
 
				+		.child		= sd_ctl_dir,
			
 
				+	},
			
 
				+	{}
			
 
				+};
			
 
				+
			
 
				+static struct ctl_table *sd_alloc_ctl_entry(int n)
			
 
				+{
			
 
				+	struct ctl_table *entry =
			
 
				+		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
			
 
				+
			
 
				+	return entry;
			
 
				+}
			
 
				+
			
 
				+static void sd_free_ctl_entry(struct ctl_table **tablep)
			
 
				+{
			
 
				+	struct ctl_table *entry;
			
 
				+
			
 
				+	/*
			
 
				+	 * In the intermediate directories, both the child directory and
			
 
				+	 * procname are dynamically allocated and could fail but the mode
			
 
				+	 * will always be set. In the lowest directory the names are
			
 
				+	 * static strings and all have proc handlers.
			
 
				+	 */
			
 
				+	for (entry = *tablep; entry->mode; entry++) {
			
 
				+		if (entry->child)
			
 
				+			sd_free_ctl_entry(&entry->child);
			
 
				+		if (entry->proc_handler == NULL)
			
 
				+			kfree(entry->procname);
			
 
				+	}
			
 
				+
			
 
				+	kfree(*tablep);
			
 
				+	*tablep = NULL;
			
 
				+}
			
 
				+
			
 
				+static int min_load_idx = 0;
			
 
				+static int max_load_idx = CPU_LOAD_IDX_MAX-1;
			
 
				+
			
 
				+static void
			
 
				+set_table_entry(struct ctl_table *entry,
			
 
				+		const char *procname, void *data, int maxlen,
			
 
				+		umode_t mode, proc_handler *proc_handler,
			
 
				+		bool load_idx)
			
 
				+{
			
 
				+	entry->procname = procname;
			
 
				+	entry->data = data;
			
 
				+	entry->maxlen = maxlen;
			
 
				+	entry->mode = mode;
			
 
				+	entry->proc_handler = proc_handler;
			
 
				+
			
 
				+	if (load_idx) {
			
 
				+		entry->extra1 = &min_load_idx;
			
 
				+		entry->extra2 = &max_load_idx;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static struct ctl_table *
			
 
				+sd_alloc_ctl_domain_table(struct sched_domain *sd)
			
 
				+{
			
 
				+	struct ctl_table *table = sd_alloc_ctl_entry(14);
			
 
				+
			
 
				+	if (table == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	set_table_entry(&table[0], "min_interval", &sd->min_interval,
			
 
				+		sizeof(long), 0644, proc_doulongvec_minmax, false);
			
 
				+	set_table_entry(&table[1], "max_interval", &sd->max_interval,
			
 
				+		sizeof(long), 0644, proc_doulongvec_minmax, false);
			
 
				+	set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
			
 
				+		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				+	set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
			
 
				+		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				+	set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
			
 
				+		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				+	set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
			
 
				+		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				+	set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
			
 
				+		sizeof(int), 0644, proc_dointvec_minmax, true);
			
 
				+	set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
			
 
				+		sizeof(int), 0644, proc_dointvec_minmax, false);
			
 
				+	set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
			
 
				+		sizeof(int), 0644, proc_dointvec_minmax, false);
			
 
				+	set_table_entry(&table[9], "cache_nice_tries",
			
 
				+		&sd->cache_nice_tries,
			
 
				+		sizeof(int), 0644, proc_dointvec_minmax, false);
			
 
				+	set_table_entry(&table[10], "flags", &sd->flags,
			
 
				+		sizeof(int), 0644, proc_dointvec_minmax, false);
			
 
				+	set_table_entry(&table[11], "max_newidle_lb_cost",
			
 
				+		&sd->max_newidle_lb_cost,
			
 
				+		sizeof(long), 0644, proc_doulongvec_minmax, false);
			
 
				+	set_table_entry(&table[12], "name", sd->name,
			
 
				+		CORENAME_MAX_SIZE, 0444, proc_dostring, false);
			
 
				+	/* &table[13] is terminator */
			
 
				+
			
 
				+	return table;
			
 
				+}
			
 
				+
			
 
				+static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
			
 
				+{
			
 
				+	struct ctl_table *entry, *table;
			
 
				+	struct sched_domain *sd;
			
 
				+	int domain_num = 0, i;
			
 
				+	char buf[32];
			
 
				+
			
 
				+	for_each_domain(cpu, sd)
			
 
				+		domain_num++;
			
 
				+	entry = table = sd_alloc_ctl_entry(domain_num + 1);
			
 
				+	if (table == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	i = 0;
			
 
				+	for_each_domain(cpu, sd) {
			
 
				+		snprintf(buf, 32, "domain%d", i);
			
 
				+		entry->procname = kstrdup(buf, GFP_KERNEL);
			
 
				+		entry->mode = 0555;
			
 
				+		entry->child = sd_alloc_ctl_domain_table(sd);
			
 
				+		entry++;
			
 
				+		i++;
			
 
				+	}
			
 
				+	return table;
			
 
				+}
			
 
				+
			
 
				+static struct ctl_table_header *sd_sysctl_header;
			
 
				+void register_sched_domain_sysctl(void)
			
 
				+{
			
 
				+	int i, cpu_num = num_possible_cpus();
			
 
				+	struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
			
 
				+	char buf[32];
			
 
				+
			
 
				+	WARN_ON(sd_ctl_dir[0].child);
			
 
				+	sd_ctl_dir[0].child = entry;
			
 
				+
			
 
				+	if (entry == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	for_each_possible_cpu(i) {
			
 
				+		snprintf(buf, 32, "cpu%d", i);
			
 
				+		entry->procname = kstrdup(buf, GFP_KERNEL);
			
 
				+		entry->mode = 0555;
			
 
				+		entry->child = sd_alloc_ctl_cpu_table(i);
			
 
				+		entry++;
			
 
				+	}
			
 
				+
			
 
				+	WARN_ON(sd_sysctl_header);
			
 
				+	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
			
 
				+}
			
 
				+
			
 
				+/* may be called multiple times per register */
			
 
				+void unregister_sched_domain_sysctl(void)
			
 
				+{
			
 
				+	unregister_sysctl_table(sd_sysctl_header);
			
 
				+	sd_sysctl_header = NULL;
			
 
				+	if (sd_ctl_dir[0].child)
			
 
				+		sd_free_ctl_entry(&sd_ctl_dir[0].child);
			
 
				+}
			
 
				+#endif /* CONFIG_SYSCTL */
			
 
				+#endif /* CONFIG_SMP */
			
 
				+
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
			
 
				 {
			
@@ -75,16 +379,18 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 
				 	PN(se->vruntime);
			
 
				 	PN(se->sum_exec_runtime);
			
 
				 #ifdef CONFIG_SCHEDSTATS
			
 
				-	PN(se->statistics.wait_start);
			
 
				-	PN(se->statistics.sleep_start);
			
 
				-	PN(se->statistics.block_start);
			
 
				-	PN(se->statistics.sleep_max);
			
 
				-	PN(se->statistics.block_max);
			
 
				-	PN(se->statistics.exec_max);
			
 
				-	PN(se->statistics.slice_max);
			
 
				-	PN(se->statistics.wait_max);
			
 
				-	PN(se->statistics.wait_sum);
			
 
				-	P(se->statistics.wait_count);
			
 
				+	if (schedstat_enabled()) {
			
 
				+		PN(se->statistics.wait_start);
			
 
				+		PN(se->statistics.sleep_start);
			
 
				+		PN(se->statistics.block_start);
			
 
				+		PN(se->statistics.sleep_max);
			
 
				+		PN(se->statistics.block_max);
			
 
				+		PN(se->statistics.exec_max);
			
 
				+		PN(se->statistics.slice_max);
			
 
				+		PN(se->statistics.wait_max);
			
 
				+		PN(se->statistics.wait_sum);
			
 
				+		P(se->statistics.wait_count);
			
 
				+	}
			
 
				 #endif
			
 
				 	P(se->load.weight);
			
 
				 #ifdef CONFIG_SMP
			
@@ -122,10 +428,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 
				 		(long long)(p->nvcsw + p->nivcsw),
			
 
				 		p->prio);
			
 
				 #ifdef CONFIG_SCHEDSTATS
			
 
				-	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
			
 
				-		SPLIT_NS(p->se.statistics.wait_sum),
			
 
				-		SPLIT_NS(p->se.sum_exec_runtime),
			
 
				-		SPLIT_NS(p->se.statistics.sum_sleep_runtime));
			
 
				+	if (schedstat_enabled()) {
			
 
				+		SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
			
 
				+			SPLIT_NS(p->se.statistics.wait_sum),
			
 
				+			SPLIT_NS(p->se.sum_exec_runtime),
			
 
				+			SPLIT_NS(p->se.statistics.sum_sleep_runtime));
			
 
				+	}
			
 
				 #else
			
 
				 	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
			
 
				 		0LL, 0L,
			
@@ -258,8 +566,17 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 
				 
			
 
				 void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
			
 
				 {
			
 
				+	struct dl_bw *dl_bw;
			
 
				+
			
 
				 	SEQ_printf(m, "\ndl_rq[%d]:\n", cpu);
			
 
				 	SEQ_printf(m, "  .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running);
			
 
				+#ifdef CONFIG_SMP
			
 
				+	dl_bw = &cpu_rq(cpu)->rd->dl_bw;
			
 
				+#else
			
 
				+	dl_bw = &dl_rq->dl_bw;
			
 
				+#endif
			
 
				+	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
			
 
				+	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
			
 
				 }
			
 
				 
			
 
				 extern __read_mostly int sched_clock_running;
			
@@ -313,17 +630,18 @@ do {									\
 
				 #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
			
 
				 #define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
			
 
				 
			
 
				-	P(yld_count);
			
 
				-
			
 
				-	P(sched_count);
			
 
				-	P(sched_goidle);
			
 
				 #ifdef CONFIG_SMP
			
 
				 	P64(avg_idle);
			
 
				 	P64(max_idle_balance_cost);
			
 
				 #endif
			
 
				 
			
 
				-	P(ttwu_count);
			
 
				-	P(ttwu_local);
			
 
				+	if (schedstat_enabled()) {
			
 
				+		P(yld_count);
			
 
				+		P(sched_count);
			
 
				+		P(sched_goidle);
			
 
				+		P(ttwu_count);
			
 
				+		P(ttwu_local);
			
 
				+	}
			
 
				 
			
 
				 #undef P
			
 
				 #undef P64
			
@@ -569,38 +887,39 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 
				 	nr_switches = p->nvcsw + p->nivcsw;
			
 
				 
			
 
				 #ifdef CONFIG_SCHEDSTATS
			
 
				-	PN(se.statistics.sum_sleep_runtime);
			
 
				-	PN(se.statistics.wait_start);
			
 
				-	PN(se.statistics.sleep_start);
			
 
				-	PN(se.statistics.block_start);
			
 
				-	PN(se.statistics.sleep_max);
			
 
				-	PN(se.statistics.block_max);
			
 
				-	PN(se.statistics.exec_max);
			
 
				-	PN(se.statistics.slice_max);
			
 
				-	PN(se.statistics.wait_max);
			
 
				-	PN(se.statistics.wait_sum);
			
 
				-	P(se.statistics.wait_count);
			
 
				-	PN(se.statistics.iowait_sum);
			
 
				-	P(se.statistics.iowait_count);
			
 
				 	P(se.nr_migrations);
			
 
				-	P(se.statistics.nr_migrations_cold);
			
 
				-	P(se.statistics.nr_failed_migrations_affine);
			
 
				-	P(se.statistics.nr_failed_migrations_running);
			
 
				-	P(se.statistics.nr_failed_migrations_hot);
			
 
				-	P(se.statistics.nr_forced_migrations);
			
 
				-	P(se.statistics.nr_wakeups);
			
 
				-	P(se.statistics.nr_wakeups_sync);
			
 
				-	P(se.statistics.nr_wakeups_migrate);
			
 
				-	P(se.statistics.nr_wakeups_local);
			
 
				-	P(se.statistics.nr_wakeups_remote);
			
 
				-	P(se.statistics.nr_wakeups_affine);
			
 
				-	P(se.statistics.nr_wakeups_affine_attempts);
			
 
				-	P(se.statistics.nr_wakeups_passive);
			
 
				-	P(se.statistics.nr_wakeups_idle);
			
 
				 
			
 
				-	{
			
 
				+	if (schedstat_enabled()) {
			
 
				 		u64 avg_atom, avg_per_cpu;
			
 
				 
			
 
				+		PN(se.statistics.sum_sleep_runtime);
			
 
				+		PN(se.statistics.wait_start);
			
 
				+		PN(se.statistics.sleep_start);
			
 
				+		PN(se.statistics.block_start);
			
 
				+		PN(se.statistics.sleep_max);
			
 
				+		PN(se.statistics.block_max);
			
 
				+		PN(se.statistics.exec_max);
			
 
				+		PN(se.statistics.slice_max);
			
 
				+		PN(se.statistics.wait_max);
			
 
				+		PN(se.statistics.wait_sum);
			
 
				+		P(se.statistics.wait_count);
			
 
				+		PN(se.statistics.iowait_sum);
			
 
				+		P(se.statistics.iowait_count);
			
 
				+		P(se.statistics.nr_migrations_cold);
			
 
				+		P(se.statistics.nr_failed_migrations_affine);
			
 
				+		P(se.statistics.nr_failed_migrations_running);
			
 
				+		P(se.statistics.nr_failed_migrations_hot);
			
 
				+		P(se.statistics.nr_forced_migrations);
			
 
				+		P(se.statistics.nr_wakeups);
			
 
				+		P(se.statistics.nr_wakeups_sync);
			
 
				+		P(se.statistics.nr_wakeups_migrate);
			
 
				+		P(se.statistics.nr_wakeups_local);
			
 
				+		P(se.statistics.nr_wakeups_remote);
			
 
				+		P(se.statistics.nr_wakeups_affine);
			
 
				+		P(se.statistics.nr_wakeups_affine_attempts);
			
 
				+		P(se.statistics.nr_wakeups_passive);
			
 
				+		P(se.statistics.nr_wakeups_idle);
			
 
				+
			
 
				 		avg_atom = p->se.sum_exec_runtime;
			
 
				 		if (nr_switches)
			
 
				 			avg_atom = div64_ul(avg_atom, nr_switches);
			
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -20,8 +20,8 @@
 
				  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
			
 
				  */
			
 
				 
			
 
				-#include <linux/latencytop.h>
			
 
				 #include <linux/sched.h>
			
 
				+#include <linux/latencytop.h>
			
 
				 #include <linux/cpumask.h>
			
 
				 #include <linux/cpuidle.h>
			
 
				 #include <linux/slab.h>
			
@@ -755,7 +755,9 @@ static void
 
				 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
 
				 {
			
 
				 	struct task_struct *p;
			
 
				-	u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
			
 
				+	u64 delta;
			
 
				+
			
 
				+	delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
			
 
				 
			
 
				 	if (entity_is_task(se)) {
			
 
				 		p = task_of(se);
			
@@ -776,22 +778,12 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
				 	se->statistics.wait_sum += delta;
			
 
				 	se->statistics.wait_start = 0;
			
 
				 }
			
 
				-#else
			
 
				-static inline void
			
 
				-update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
 
				-{
			
 
				-}
			
 
				-#endif
			
 
				 
			
 
				 /*
			
 
				  * Task is being enqueued - update stats:
			
 
				  */
			
 
				-static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
 
				+static inline void
			
 
				+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
 
				 {
			
 
				 	/*
			
 
				 	 * Are we enqueueing a waiting task? (for current tasks
			
@@ -802,7 +794,7 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
				 }
			
 
				 
			
 
				 static inline void
			
 
				-update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
 
				+update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
			
 
				 {
			
 
				 	/*
			
 
				 	 * Mark the end of the wait period if dequeueing a
			
@@ -810,8 +802,41 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
				 	 */
			
 
				 	if (se != cfs_rq->curr)
			
 
				 		update_stats_wait_end(cfs_rq, se);
			
 
				+
			
 
				+	if (flags & DEQUEUE_SLEEP) {
			
 
				+		if (entity_is_task(se)) {
			
 
				+			struct task_struct *tsk = task_of(se);
			
 
				+
			
 
				+			if (tsk->state & TASK_INTERRUPTIBLE)
			
 
				+				se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
			
 
				+			if (tsk->state & TASK_UNINTERRUPTIBLE)
			
 
				+				se->statistics.block_start = rq_clock(rq_of(cfs_rq));
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+}
			
 
				+#else
			
 
				+static inline void
			
 
				+update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
 
				+{
			
 
				 }
			
 
				 
			
 
				+static inline void
			
 
				+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
			
 
				+{
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				  * We are picking a new current task - update its stats:
			
 
				  */
			
@@ -907,10 +932,11 @@ struct numa_group {
 
				 	spinlock_t lock; /* nr_tasks, tasks */
			
 
				 	int nr_tasks;
			
 
				 	pid_t gid;
			
 
				+	int active_nodes;
			
 
				 
			
 
				 	struct rcu_head rcu;
			
 
				-	nodemask_t active_nodes;
			
 
				 	unsigned long total_faults;
			
 
				+	unsigned long max_faults_cpu;
			
 
				 	/*
			
 
				 	 * Faults_cpu is used to decide whether memory should move
			
 
				 	 * towards the CPU. As a consequence, these stats are weighted
			
@@ -969,6 +995,18 @@ static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
 
				 		group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)];
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * A node triggering more than 1/3 as many NUMA faults as the maximum is
			
 
				+ * considered part of a numa group's pseudo-interleaving set. Migrations
			
 
				+ * between these nodes are slowed down, to allow things to settle down.
			
 
				+ */
			
 
				+#define ACTIVE_NODE_FRACTION 3
			
 
				+
			
 
				+static bool numa_is_active_node(int nid, struct numa_group *ng)
			
 
				+{
			
 
				+	return group_faults_cpu(ng, nid) * ACTIVE_NODE_FRACTION > ng->max_faults_cpu;
			
 
				+}
			
 
				+
			
 
				 /* Handle placement on systems where not all nodes are directly connected. */
			
 
				 static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
			
 
				 					int maxdist, bool task)
			
@@ -1118,27 +1156,23 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
 
				 		return true;
			
 
				 
			
 
				 	/*
			
 
				-	 * Do not migrate if the destination is not a node that
			
 
				-	 * is actively used by this numa group.
			
 
				+	 * Destination node is much more heavily used than the source
			
 
				+	 * node? Allow migration.
			
 
				 	 */
			
 
				-	if (!node_isset(dst_nid, ng->active_nodes))
			
 
				-		return false;
			
 
				-
			
 
				-	/*
			
 
				-	 * Source is a node that is not actively used by this
			
 
				-	 * numa group, while the destination is. Migrate.
			
 
				-	 */
			
 
				-	if (!node_isset(src_nid, ng->active_nodes))
			
 
				+	if (group_faults_cpu(ng, dst_nid) > group_faults_cpu(ng, src_nid) *
			
 
				+					ACTIVE_NODE_FRACTION)
			
 
				 		return true;
			
 
				 
			
 
				 	/*
			
 
				-	 * Both source and destination are nodes in active
			
 
				-	 * use by this numa group. Maximize memory bandwidth
			
 
				-	 * by migrating from more heavily used groups, to less
			
 
				-	 * heavily used ones, spreading the load around.
			
 
				-	 * Use a 1/4 hysteresis to avoid spurious page movement.
			
 
				+	 * Distribute memory according to CPU & memory use on each node,
			
 
				+	 * with 3/4 hysteresis to avoid unnecessary memory migrations:
			
 
				+	 *
			
 
				+	 * faults_cpu(dst)   3   faults_cpu(src)
			
 
				+	 * --------------- * - > ---------------
			
 
				+	 * faults_mem(dst)   4   faults_mem(src)
			
 
				 	 */
			
 
				-	return group_faults(p, dst_nid) < (group_faults(p, src_nid) * 3 / 4);
			
 
				+	return group_faults_cpu(ng, dst_nid) * group_faults(p, src_nid) * 3 >
			
 
				+	       group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
			
 
				 }
			
 
				 
			
 
				 static unsigned long weighted_cpuload(const int cpu);
			
@@ -1484,7 +1518,7 @@ static int task_numa_migrate(struct task_struct *p)
 
				 
			
 
				 		.best_task = NULL,
			
 
				 		.best_imp = 0,
			
 
				-		.best_cpu = -1
			
 
				+		.best_cpu = -1,
			
 
				 	};
			
 
				 	struct sched_domain *sd;
			
 
				 	unsigned long taskweight, groupweight;
			
@@ -1536,8 +1570,7 @@ static int task_numa_migrate(struct task_struct *p)
 
				 	 *   multiple NUMA nodes; in order to better consolidate the group,
			
 
				 	 *   we need to check other locations.
			
 
				 	 */
			
 
				-	if (env.best_cpu == -1 || (p->numa_group &&
			
 
				-			nodes_weight(p->numa_group->active_nodes) > 1)) {
			
 
				+	if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) {
			
 
				 		for_each_online_node(nid) {
			
 
				 			if (nid == env.src_nid || nid == p->numa_preferred_nid)
			
 
				 				continue;
			
@@ -1572,12 +1605,14 @@ static int task_numa_migrate(struct task_struct *p)
 
				 	 * trying for a better one later. Do not set the preferred node here.
			
 
				 	 */
			
 
				 	if (p->numa_group) {
			
 
				+		struct numa_group *ng = p->numa_group;
			
 
				+
			
 
				 		if (env.best_cpu == -1)
			
 
				 			nid = env.src_nid;
			
 
				 		else
			
 
				 			nid = env.dst_nid;
			
 
				 
			
 
				-		if (node_isset(nid, p->numa_group->active_nodes))
			
 
				+		if (ng->active_nodes > 1 && numa_is_active_node(env.dst_nid, ng))
			
 
				 			sched_setnuma(p, env.dst_nid);
			
 
				 	}
			
 
				 
			
@@ -1627,20 +1662,15 @@ static void numa_migrate_preferred(struct task_struct *p)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Find the nodes on which the workload is actively running. We do this by
			
 
				+ * Find out how many nodes on the workload is actively running on. Do this by
			
 
				  * tracking the nodes from which NUMA hinting faults are triggered. This can
			
 
				  * be different from the set of nodes where the workload's memory is currently
			
 
				  * located.
			
 
				- *
			
 
				- * The bitmask is used to make smarter decisions on when to do NUMA page
			
 
				- * migrations, To prevent flip-flopping, and excessive page migrations, nodes
			
 
				- * are added when they cause over 6/16 of the maximum number of faults, but
			
 
				- * only removed when they drop below 3/16.
			
 
				  */
			
 
				-static void update_numa_active_node_mask(struct numa_group *numa_group)
			
 
				+static void numa_group_count_active_nodes(struct numa_group *numa_group)
			
 
				 {
			
 
				 	unsigned long faults, max_faults = 0;
			
 
				-	int nid;
			
 
				+	int nid, active_nodes = 0;
			
 
				 
			
 
				 	for_each_online_node(nid) {
			
 
				 		faults = group_faults_cpu(numa_group, nid);
			
@@ -1650,12 +1680,12 @@ static void update_numa_active_node_mask(struct numa_group *numa_group)
 
				 
			
 
				 	for_each_online_node(nid) {
			
 
				 		faults = group_faults_cpu(numa_group, nid);
			
 
				-		if (!node_isset(nid, numa_group->active_nodes)) {
			
 
				-			if (faults > max_faults * 6 / 16)
			
 
				-				node_set(nid, numa_group->active_nodes);
			
 
				-		} else if (faults < max_faults * 3 / 16)
			
 
				-			node_clear(nid, numa_group->active_nodes);
			
 
				+		if (faults * ACTIVE_NODE_FRACTION > max_faults)
			
 
				+			active_nodes++;
			
 
				 	}
			
 
				+
			
 
				+	numa_group->max_faults_cpu = max_faults;
			
 
				+	numa_group->active_nodes = active_nodes;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1946,7 +1976,7 @@ static void task_numa_placement(struct task_struct *p)
 
				 	update_task_scan_period(p, fault_types[0], fault_types[1]);
			
 
				 
			
 
				 	if (p->numa_group) {
			
 
				-		update_numa_active_node_mask(p->numa_group);
			
 
				+		numa_group_count_active_nodes(p->numa_group);
			
 
				 		spin_unlock_irq(group_lock);
			
 
				 		max_nid = preferred_group_nid(p, max_group_nid);
			
 
				 	}
			
@@ -1990,14 +2020,14 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
 
				 			return;
			
 
				 
			
 
				 		atomic_set(&grp->refcount, 1);
			
 
				+		grp->active_nodes = 1;
			
 
				+		grp->max_faults_cpu = 0;
			
 
				 		spin_lock_init(&grp->lock);
			
 
				 		grp->gid = p->pid;
			
 
				 		/* Second half of the array tracks nids where faults happen */
			
 
				 		grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES *
			
 
				 						nr_node_ids;
			
 
				 
			
 
				-		node_set(task_node(current), grp->active_nodes);
			
 
				-
			
 
				 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
			
 
				 			grp->faults[i] = p->numa_faults[i];
			
 
				 
			
@@ -2111,6 +2141,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 
				 	bool migrated = flags & TNF_MIGRATED;
			
 
				 	int cpu_node = task_node(current);
			
 
				 	int local = !!(flags & TNF_FAULT_LOCAL);
			
 
				+	struct numa_group *ng;
			
 
				 	int priv;
			
 
				 
			
 
				 	if (!static_branch_likely(&sched_numa_balancing))
			
@@ -2151,9 +2182,10 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 
				 	 * actively using should be counted as local. This allows the
			
 
				 	 * scan rate to slow down when a workload has settled down.
			
 
				 	 */
			
 
				-	if (!priv && !local && p->numa_group &&
			
 
				-			node_isset(cpu_node, p->numa_group->active_nodes) &&
			
 
				-			node_isset(mem_node, p->numa_group->active_nodes))
			
 
				+	ng = p->numa_group;
			
 
				+	if (!priv && !local && ng && ng->active_nodes > 1 &&
			
 
				+				numa_is_active_node(cpu_node, ng) &&
			
 
				+				numa_is_active_node(mem_node, ng))
			
 
				 		local = 1;
			
 
				 
			
 
				 	task_numa_placement(p);
			
@@ -3102,6 +3134,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 
				 
			
 
				 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
			
 
				 
			
 
				+static inline void check_schedstat_required(void)
			
 
				+{
			
 
				+#ifdef CONFIG_SCHEDSTATS
			
 
				+	if (schedstat_enabled())
			
 
				+		return;
			
 
				+
			
 
				+	/* Force schedstat enabled if a dependent tracepoint is active */
			
 
				+	if (trace_sched_stat_wait_enabled()    ||
			
 
				+			trace_sched_stat_sleep_enabled()   ||
			
 
				+			trace_sched_stat_iowait_enabled()  ||
			
 
				+			trace_sched_stat_blocked_enabled() ||
			
 
				+			trace_sched_stat_runtime_enabled())  {
			
 
				+		pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
			
 
				+			     "stat_blocked and stat_runtime require the "
			
 
				+			     "kernel parameter schedstats=enabled or "
			
 
				+			     "kernel.sched_schedstats=1\n");
			
 
				+	}
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 static void
			
 
				 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
			
 
				 {
			
@@ -3122,11 +3174,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
				 
			
 
				 	if (flags & ENQUEUE_WAKEUP) {
			
 
				 		place_entity(cfs_rq, se, 0);
			
 
				-		enqueue_sleeper(cfs_rq, se);
			
 
				+		if (schedstat_enabled())
			
 
				+			enqueue_sleeper(cfs_rq, se);
			
 
				 	}
			
 
				 
			
 
				-	update_stats_enqueue(cfs_rq, se);
			
 
				-	check_spread(cfs_rq, se);
			
 
				+	check_schedstat_required();
			
 
				+	if (schedstat_enabled()) {
			
 
				+		update_stats_enqueue(cfs_rq, se);
			
 
				+		check_spread(cfs_rq, se);
			
 
				+	}
			
 
				 	if (se != cfs_rq->curr)
			
 
				 		__enqueue_entity(cfs_rq, se);
			
 
				 	se->on_rq = 1;
			
@@ -3193,19 +3249,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
				 	update_curr(cfs_rq);
			
 
				 	dequeue_entity_load_avg(cfs_rq, se);
			
 
				 
			
 
				-	update_stats_dequeue(cfs_rq, se);
			
 
				-	if (flags & DEQUEUE_SLEEP) {
			
 
				-#ifdef CONFIG_SCHEDSTATS
			
 
				-		if (entity_is_task(se)) {
			
 
				-			struct task_struct *tsk = task_of(se);
			
 
				-
			
 
				-			if (tsk->state & TASK_INTERRUPTIBLE)
			
 
				-				se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
			
 
				-			if (tsk->state & TASK_UNINTERRUPTIBLE)
			
 
				-				se->statistics.block_start = rq_clock(rq_of(cfs_rq));
			
 
				-		}
			
 
				-#endif
			
 
				-	}
			
 
				+	if (schedstat_enabled())
			
 
				+		update_stats_dequeue(cfs_rq, se, flags);
			
 
				 
			
 
				 	clear_buddies(cfs_rq, se);
			
 
				 
			
@@ -3279,7 +3324,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
				 		 * a CPU. So account for the time it spent waiting on the
			
 
				 		 * runqueue.
			
 
				 		 */
			
 
				-		update_stats_wait_end(cfs_rq, se);
			
 
				+		if (schedstat_enabled())
			
 
				+			update_stats_wait_end(cfs_rq, se);
			
 
				 		__dequeue_entity(cfs_rq, se);
			
 
				 		update_load_avg(se, 1);
			
 
				 	}
			
@@ -3292,7 +3338,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
				 	 * least twice that of our own weight (i.e. dont track it
			
 
				 	 * when there are only lesser-weight tasks around):
			
 
				 	 */
			
 
				-	if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
			
 
				+	if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
			
 
				 		se->statistics.slice_max = max(se->statistics.slice_max,
			
 
				 			se->sum_exec_runtime - se->prev_sum_exec_runtime);
			
 
				 	}
			
@@ -3375,9 +3421,13 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 
				 	/* throttle cfs_rqs exceeding runtime */
			
 
				 	check_cfs_rq_runtime(cfs_rq);
			
 
				 
			
 
				-	check_spread(cfs_rq, prev);
			
 
				+	if (schedstat_enabled()) {
			
 
				+		check_spread(cfs_rq, prev);
			
 
				+		if (prev->on_rq)
			
 
				+			update_stats_wait_start(cfs_rq, prev);
			
 
				+	}
			
 
				+
			
 
				 	if (prev->on_rq) {
			
 
				-		update_stats_wait_start(cfs_rq, prev);
			
 
				 		/* Put 'current' back into the tree. */
			
 
				 		__enqueue_entity(cfs_rq, prev);
			
 
				 		/* in !on_rq case, update occurred at dequeue */
			
@@ -4459,9 +4509,17 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
 
				 
			
 
				 		/* scale is effectively 1 << i now, and >> i divides by scale */
			
 
				 
			
 
				-		old_load = this_rq->cpu_load[i] - tickless_load;
			
 
				+		old_load = this_rq->cpu_load[i];
			
 
				 		old_load = decay_load_missed(old_load, pending_updates - 1, i);
			
 
				-		old_load += tickless_load;
			
 
				+		if (tickless_load) {
			
 
				+			old_load -= decay_load_missed(tickless_load, pending_updates - 1, i);
			
 
				+			/*
			
 
				+			 * old_load can never be a negative value because a
			
 
				+			 * decayed tickless_load cannot be greater than the
			
 
				+			 * original tickless_load.
			
 
				+			 */
			
 
				+			old_load += tickless_load;
			
 
				+		}
			
 
				 		new_load = this_load;
			
 
				 		/*
			
 
				 		 * Round up the averaging division if load is increasing. This
			
@@ -4484,6 +4542,25 @@ static unsigned long weighted_cpuload(const int cpu)
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_NO_HZ_COMMON
			
 
				+static void __update_cpu_load_nohz(struct rq *this_rq,
			
 
				+				   unsigned long curr_jiffies,
			
 
				+				   unsigned long load,
			
 
				+				   int active)
			
 
				+{
			
 
				+	unsigned long pending_updates;
			
 
				+
			
 
				+	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
			
 
				+	if (pending_updates) {
			
 
				+		this_rq->last_load_update_tick = curr_jiffies;
			
 
				+		/*
			
 
				+		 * In the regular NOHZ case, we were idle, this means load 0.
			
 
				+		 * In the NOHZ_FULL case, we were non-idle, we should consider
			
 
				+		 * its weighted load.
			
 
				+		 */
			
 
				+		__update_cpu_load(this_rq, load, pending_updates, active);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * There is no sane way to deal with nohz on smp when using jiffies because the
			
 
				  * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
			
@@ -4501,22 +4578,15 @@ static unsigned long weighted_cpuload(const int cpu)
 
				  * Called from nohz_idle_balance() to update the load ratings before doing the
			
 
				  * idle balance.
			
 
				  */
			
 
				-static void update_idle_cpu_load(struct rq *this_rq)
			
 
				+static void update_cpu_load_idle(struct rq *this_rq)
			
 
				 {
			
 
				-	unsigned long curr_jiffies = READ_ONCE(jiffies);
			
 
				-	unsigned long load = weighted_cpuload(cpu_of(this_rq));
			
 
				-	unsigned long pending_updates;
			
 
				-
			
 
				 	/*
			
 
				 	 * bail if there's load or we're actually up-to-date.
			
 
				 	 */
			
 
				-	if (load || curr_jiffies == this_rq->last_load_update_tick)
			
 
				+	if (weighted_cpuload(cpu_of(this_rq)))
			
 
				 		return;
			
 
				 
			
 
				-	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
			
 
				-	this_rq->last_load_update_tick = curr_jiffies;
			
 
				-
			
 
				-	__update_cpu_load(this_rq, load, pending_updates, 0);
			
 
				+	__update_cpu_load_nohz(this_rq, READ_ONCE(jiffies), 0, 0);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -4527,22 +4597,12 @@ void update_cpu_load_nohz(int active)
 
				 	struct rq *this_rq = this_rq();
			
 
				 	unsigned long curr_jiffies = READ_ONCE(jiffies);
			
 
				 	unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
			
 
				-	unsigned long pending_updates;
			
 
				 
			
 
				 	if (curr_jiffies == this_rq->last_load_update_tick)
			
 
				 		return;
			
 
				 
			
 
				 	raw_spin_lock(&this_rq->lock);
			
 
				-	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
			
 
				-	if (pending_updates) {
			
 
				-		this_rq->last_load_update_tick = curr_jiffies;
			
 
				-		/*
			
 
				-		 * In the regular NOHZ case, we were idle, this means load 0.
			
 
				-		 * In the NOHZ_FULL case, we were non-idle, we should consider
			
 
				-		 * its weighted load.
			
 
				-		 */
			
 
				-		__update_cpu_load(this_rq, load, pending_updates, active);
			
 
				-	}
			
 
				+	__update_cpu_load_nohz(this_rq, curr_jiffies, load, active);
			
 
				 	raw_spin_unlock(&this_rq->lock);
			
 
				 }
			
 
				 #endif /* CONFIG_NO_HZ */
			
@@ -4554,7 +4614,7 @@ void update_cpu_load_active(struct rq *this_rq)
 
				 {
			
 
				 	unsigned long load = weighted_cpuload(cpu_of(this_rq));
			
 
				 	/*
			
 
				-	 * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
			
 
				+	 * See the mess around update_cpu_load_idle() / update_cpu_load_nohz().
			
 
				 	 */
			
 
				 	this_rq->last_load_update_tick = jiffies;
			
 
				 	__update_cpu_load(this_rq, load, 1, 1);
			
@@ -7848,7 +7908,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
 
				 		if (time_after_eq(jiffies, rq->next_balance)) {
			
 
				 			raw_spin_lock_irq(&rq->lock);
			
 
				 			update_rq_clock(rq);
			
 
				-			update_idle_cpu_load(rq);
			
 
				+			update_cpu_load_idle(rq);
			
 
				 			raw_spin_unlock_irq(&rq->lock);
			
 
				 			rebalance_domains(rq, CPU_IDLE);
			
 
				 		}
			
@@ -8234,11 +8294,8 @@ void free_fair_sched_group(struct task_group *tg)
 
				 	for_each_possible_cpu(i) {
			
 
				 		if (tg->cfs_rq)
			
 
				 			kfree(tg->cfs_rq[i]);
			
 
				-		if (tg->se) {
			
 
				-			if (tg->se[i])
			
 
				-				remove_entity_load_avg(tg->se[i]);
			
 
				+		if (tg->se)
			
 
				 			kfree(tg->se[i]);
			
 
				-		}
			
 
				 	}
			
 
				 
			
 
				 	kfree(tg->cfs_rq);
			
@@ -8286,21 +8343,29 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-void unregister_fair_sched_group(struct task_group *tg, int cpu)
			
 
				+void unregister_fair_sched_group(struct task_group *tg)
			
 
				 {
			
 
				-	struct rq *rq = cpu_rq(cpu);
			
 
				 	unsigned long flags;
			
 
				+	struct rq *rq;
			
 
				+	int cpu;
			
 
				 
			
 
				-	/*
			
 
				-	* Only empty task groups can be destroyed; so we can speculatively
			
 
				-	* check on_list without danger of it being re-added.
			
 
				-	*/
			
 
				-	if (!tg->cfs_rq[cpu]->on_list)
			
 
				-		return;
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				+		if (tg->se[cpu])
			
 
				+			remove_entity_load_avg(tg->se[cpu]);
			
 
				 
			
 
				-	raw_spin_lock_irqsave(&rq->lock, flags);
			
 
				-	list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
			
 
				-	raw_spin_unlock_irqrestore(&rq->lock, flags);
			
 
				+		/*
			
 
				+		 * Only empty task groups can be destroyed; so we can speculatively
			
 
				+		 * check on_list without danger of it being re-added.
			
 
				+		 */
			
 
				+		if (!tg->cfs_rq[cpu]->on_list)
			
 
				+			continue;
			
 
				+
			
 
				+		rq = cpu_rq(cpu);
			
 
				+
			
 
				+		raw_spin_lock_irqsave(&rq->lock, flags);
			
 
				+		list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
			
 
				+		raw_spin_unlock_irqrestore(&rq->lock, flags);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
			
@@ -8382,7 +8447,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-void unregister_fair_sched_group(struct task_group *tg, int cpu) { }
			
 
				+void unregister_fair_sched_group(struct task_group *tg) { }
			
 
				 
			
 
				 #endif /* CONFIG_FAIR_GROUP_SCHED */
			
 
				 
			
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -58,7 +58,15 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 
				 	raw_spin_lock(&rt_b->rt_runtime_lock);
			
 
				 	if (!rt_b->rt_period_active) {
			
 
				 		rt_b->rt_period_active = 1;
			
 
				-		hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
			
 
				+		/*
			
 
				+		 * SCHED_DEADLINE updates the bandwidth, as a run away
			
 
				+		 * RT task with a DL task could hog a CPU. But DL does
			
 
				+		 * not reset the period. If a deadline task was running
			
 
				+		 * without an RT task running, it can cause RT tasks to
			
 
				+		 * throttle when they start up. Kick the timer right away
			
 
				+		 * to update the period.
			
 
				+		 */
			
 
				+		hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
			
 
				 		hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
			
 
				 	}
			
 
				 	raw_spin_unlock(&rt_b->rt_runtime_lock);
			
@@ -436,7 +444,7 @@ static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
 
				 
			
 
				 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
			
 
				 {
			
 
				-	return !list_empty(&rt_se->run_list);
			
 
				+	return rt_se->on_rq;
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_RT_GROUP_SCHED
			
@@ -482,8 +490,8 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 
				 	return rt_se->my_q;
			
 
				 }
			
 
				 
			
 
				-static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
			
 
				-static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
			
 
				+static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
			
 
				+static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
			
 
				 
			
 
				 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
			
 
				 {
			
@@ -499,7 +507,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 
				 		if (!rt_se)
			
 
				 			enqueue_top_rt_rq(rt_rq);
			
 
				 		else if (!on_rt_rq(rt_se))
			
 
				-			enqueue_rt_entity(rt_se, false);
			
 
				+			enqueue_rt_entity(rt_se, 0);
			
 
				 
			
 
				 		if (rt_rq->highest_prio.curr < curr->prio)
			
 
				 			resched_curr(rq);
			
@@ -516,7 +524,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 
				 	if (!rt_se)
			
 
				 		dequeue_top_rt_rq(rt_rq);
			
 
				 	else if (on_rt_rq(rt_se))
			
 
				-		dequeue_rt_entity(rt_se);
			
 
				+		dequeue_rt_entity(rt_se, 0);
			
 
				 }
			
 
				 
			
 
				 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
			
@@ -1166,7 +1174,30 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
				 	dec_rt_group(rt_se, rt_rq);
			
 
				 }
			
 
				 
			
 
				-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
			
 
				+/*
			
 
				+ * Change rt_se->run_list location unless SAVE && !MOVE
			
 
				+ *
			
 
				+ * assumes ENQUEUE/DEQUEUE flags match
			
 
				+ */
			
 
				+static inline bool move_entity(unsigned int flags)
			
 
				+{
			
 
				+	if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
			
 
				+{
			
 
				+	list_del_init(&rt_se->run_list);
			
 
				+
			
 
				+	if (list_empty(array->queue + rt_se_prio(rt_se)))
			
 
				+		__clear_bit(rt_se_prio(rt_se), array->bitmap);
			
 
				+
			
 
				+	rt_se->on_list = 0;
			
 
				+}
			
 
				+
			
 
				+static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
			
 
				 {
			
 
				 	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
			
 
				 	struct rt_prio_array *array = &rt_rq->active;
			
@@ -1179,26 +1210,37 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
 
				 	 * get throttled and the current group doesn't have any other
			
 
				 	 * active members.
			
 
				 	 */
			
 
				-	if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
			
 
				+	if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
			
 
				+		if (rt_se->on_list)
			
 
				+			__delist_rt_entity(rt_se, array);
			
 
				 		return;
			
 
				+	}
			
 
				 
			
 
				-	if (head)
			
 
				-		list_add(&rt_se->run_list, queue);
			
 
				-	else
			
 
				-		list_add_tail(&rt_se->run_list, queue);
			
 
				-	__set_bit(rt_se_prio(rt_se), array->bitmap);
			
 
				+	if (move_entity(flags)) {
			
 
				+		WARN_ON_ONCE(rt_se->on_list);
			
 
				+		if (flags & ENQUEUE_HEAD)
			
 
				+			list_add(&rt_se->run_list, queue);
			
 
				+		else
			
 
				+			list_add_tail(&rt_se->run_list, queue);
			
 
				+
			
 
				+		__set_bit(rt_se_prio(rt_se), array->bitmap);
			
 
				+		rt_se->on_list = 1;
			
 
				+	}
			
 
				+	rt_se->on_rq = 1;
			
 
				 
			
 
				 	inc_rt_tasks(rt_se, rt_rq);
			
 
				 }
			
 
				 
			
 
				-static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
			
 
				+static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
			
 
				 {
			
 
				 	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
			
 
				 	struct rt_prio_array *array = &rt_rq->active;
			
 
				 
			
 
				-	list_del_init(&rt_se->run_list);
			
 
				-	if (list_empty(array->queue + rt_se_prio(rt_se)))
			
 
				-		__clear_bit(rt_se_prio(rt_se), array->bitmap);
			
 
				+	if (move_entity(flags)) {
			
 
				+		WARN_ON_ONCE(!rt_se->on_list);
			
 
				+		__delist_rt_entity(rt_se, array);
			
 
				+	}
			
 
				+	rt_se->on_rq = 0;
			
 
				 
			
 
				 	dec_rt_tasks(rt_se, rt_rq);
			
 
				 }
			
@@ -1207,7 +1249,7 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
 
				  * Because the prio of an upper entry depends on the lower
			
 
				  * entries, we must remove entries top - down.
			
 
				  */
			
 
				-static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
			
 
				+static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
			
 
				 {
			
 
				 	struct sched_rt_entity *back = NULL;
			
 
				 
			
@@ -1220,31 +1262,31 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
 
				 
			
 
				 	for (rt_se = back; rt_se; rt_se = rt_se->back) {
			
 
				 		if (on_rt_rq(rt_se))
			
 
				-			__dequeue_rt_entity(rt_se);
			
 
				+			__dequeue_rt_entity(rt_se, flags);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
			
 
				+static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
			
 
				 {
			
 
				 	struct rq *rq = rq_of_rt_se(rt_se);
			
 
				 
			
 
				-	dequeue_rt_stack(rt_se);
			
 
				+	dequeue_rt_stack(rt_se, flags);
			
 
				 	for_each_sched_rt_entity(rt_se)
			
 
				-		__enqueue_rt_entity(rt_se, head);
			
 
				+		__enqueue_rt_entity(rt_se, flags);
			
 
				 	enqueue_top_rt_rq(&rq->rt);
			
 
				 }
			
 
				 
			
 
				-static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
			
 
				+static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
			
 
				 {
			
 
				 	struct rq *rq = rq_of_rt_se(rt_se);
			
 
				 
			
 
				-	dequeue_rt_stack(rt_se);
			
 
				+	dequeue_rt_stack(rt_se, flags);
			
 
				 
			
 
				 	for_each_sched_rt_entity(rt_se) {
			
 
				 		struct rt_rq *rt_rq = group_rt_rq(rt_se);
			
 
				 
			
 
				 		if (rt_rq && rt_rq->rt_nr_running)
			
 
				-			__enqueue_rt_entity(rt_se, false);
			
 
				+			__enqueue_rt_entity(rt_se, flags);
			
 
				 	}
			
 
				 	enqueue_top_rt_rq(&rq->rt);
			
 
				 }
			
@@ -1260,7 +1302,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
				 	if (flags & ENQUEUE_WAKEUP)
			
 
				 		rt_se->timeout = 0;
			
 
				 
			
 
				-	enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
			
 
				+	enqueue_rt_entity(rt_se, flags);
			
 
				 
			
 
				 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
			
 
				 		enqueue_pushable_task(rq, p);
			
@@ -1271,7 +1313,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
				 	struct sched_rt_entity *rt_se = &p->rt;
			
 
				 
			
 
				 	update_curr_rt(rq);
			
 
				-	dequeue_rt_entity(rt_se);
			
 
				+	dequeue_rt_entity(rt_se, flags);
			
 
				 
			
 
				 	dequeue_pushable_task(rq, p);
			
 
				 }
			
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3,6 +3,7 @@
 
				 #include <linux/sched/sysctl.h>
			
 
				 #include <linux/sched/rt.h>
			
 
				 #include <linux/sched/deadline.h>
			
 
				+#include <linux/binfmts.h>
			
 
				 #include <linux/mutex.h>
			
 
				 #include <linux/spinlock.h>
			
 
				 #include <linux/stop_machine.h>
			
@@ -313,12 +314,11 @@ extern int tg_nop(struct task_group *tg, void *data);
 
				 
			
 
				 extern void free_fair_sched_group(struct task_group *tg);
			
 
				 extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
			
 
				-extern void unregister_fair_sched_group(struct task_group *tg, int cpu);
			
 
				+extern void unregister_fair_sched_group(struct task_group *tg);
			
 
				 extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
			
 
				 			struct sched_entity *se, int cpu,
			
 
				 			struct sched_entity *parent);
			
 
				 extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
			
 
				-extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
			
 
				 
			
 
				 extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
			
 
				 extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
			
@@ -909,6 +909,18 @@ static inline unsigned int group_first_cpu(struct sched_group *group)
 
				 
			
 
				 extern int group_balance_cpu(struct sched_group *sg);
			
 
				 
			
 
				+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
			
 
				+void register_sched_domain_sysctl(void);
			
 
				+void unregister_sched_domain_sysctl(void);
			
 
				+#else
			
 
				+static inline void register_sched_domain_sysctl(void)
			
 
				+{
			
 
				+}
			
 
				+static inline void unregister_sched_domain_sysctl(void)
			
 
				+{
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 #else
			
 
				 
			
 
				 static inline void sched_ttwu_pending(void) { }
			
@@ -1022,6 +1034,7 @@ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
 
				 #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
			
 
				 
			
 
				 extern struct static_key_false sched_numa_balancing;
			
 
				+extern struct static_key_false sched_schedstats;
			
 
				 
			
 
				 static inline u64 global_rt_period(void)
			
 
				 {
			
@@ -1130,18 +1143,40 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 
				 extern const int sched_prio_to_weight[40];
			
 
				 extern const u32 sched_prio_to_wmult[40];
			
 
				 
			
 
				+/*
			
 
				+ * {de,en}queue flags:
			
 
				+ *
			
 
				+ * DEQUEUE_SLEEP  - task is no longer runnable
			
 
				+ * ENQUEUE_WAKEUP - task just became runnable
			
 
				+ *
			
 
				+ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
			
 
				+ *                are in a known state which allows modification. Such pairs
			
 
				+ *                should preserve as much state as possible.
			
 
				+ *
			
 
				+ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
			
 
				+ *        in the runqueue.
			
 
				+ *
			
 
				+ * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
			
 
				+ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
			
 
				+ * ENQUEUE_WAKING    - sched_class::task_waking was called
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#define DEQUEUE_SLEEP		0x01
			
 
				+#define DEQUEUE_SAVE		0x02 /* matches ENQUEUE_RESTORE */
			
 
				+#define DEQUEUE_MOVE		0x04 /* matches ENQUEUE_MOVE */
			
 
				+
			
 
				 #define ENQUEUE_WAKEUP		0x01
			
 
				-#define ENQUEUE_HEAD		0x02
			
 
				+#define ENQUEUE_RESTORE		0x02
			
 
				+#define ENQUEUE_MOVE		0x04
			
 
				+
			
 
				+#define ENQUEUE_HEAD		0x08
			
 
				+#define ENQUEUE_REPLENISH	0x10
			
 
				 #ifdef CONFIG_SMP
			
 
				-#define ENQUEUE_WAKING		0x04	/* sched_class::task_waking was called */
			
 
				+#define ENQUEUE_WAKING		0x20
			
 
				 #else
			
 
				 #define ENQUEUE_WAKING		0x00
			
 
				 #endif
			
 
				-#define ENQUEUE_REPLENISH	0x08
			
 
				-#define ENQUEUE_RESTORE	0x10
			
 
				-
			
 
				-#define DEQUEUE_SLEEP		0x01
			
 
				-#define DEQUEUE_SAVE		0x02
			
 
				 
			
 
				 #define RETRY_TASK		((void *)-1UL)
			
 
				 
			
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -29,9 +29,10 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 
				 	if (rq)
			
 
				 		rq->rq_sched_info.run_delay += delta;
			
 
				 }
			
 
				-# define schedstat_inc(rq, field)	do { (rq)->field++; } while (0)
			
 
				-# define schedstat_add(rq, field, amt)	do { (rq)->field += (amt); } while (0)
			
 
				-# define schedstat_set(var, val)	do { var = (val); } while (0)
			
 
				+# define schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
			
 
				+# define schedstat_inc(rq, field)	do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
			
 
				+# define schedstat_add(rq, field, amt)	do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
			
 
				+# define schedstat_set(var, val)	do { if (schedstat_enabled()) { var = (val); } } while (0)
			
 
				 #else /* !CONFIG_SCHEDSTATS */
			
 
				 static inline void
			
 
				 rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
			
@@ -42,6 +43,7 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 
				 static inline void
			
 
				 rq_sched_info_depart(struct rq *rq, unsigned long long delta)
			
 
				 {}
			
 
				+# define schedstat_enabled()		0
			
 
				 # define schedstat_inc(rq, field)	do { } while (0)
			
 
				 # define schedstat_add(rq, field, amt)	do { } while (0)
			
 
				 # define schedstat_set(var, val)	do { } while (0)
			
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -0,0 +1,123 @@
 
				+#include <linux/sched.h>
			
 
				+#include <linux/swait.h>
			
 
				+
			
 
				+void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
			
 
				+			     struct lock_class_key *key)
			
 
				+{
			
 
				+	raw_spin_lock_init(&q->lock);
			
 
				+	lockdep_set_class_and_name(&q->lock, key, name);
			
 
				+	INIT_LIST_HEAD(&q->task_list);
			
 
				+}
			
 
				+EXPORT_SYMBOL(__init_swait_queue_head);
			
 
				+
			
 
				+/*
			
 
				+ * The thing about the wake_up_state() return value; I think we can ignore it.
			
 
				+ *
			
 
				+ * If for some reason it would return 0, that means the previously waiting
			
 
				+ * task is already running, so it will observe condition true (or has already).
			
 
				+ */
			
 
				+void swake_up_locked(struct swait_queue_head *q)
			
 
				+{
			
 
				+	struct swait_queue *curr;
			
 
				+
			
 
				+	if (list_empty(&q->task_list))
			
 
				+		return;
			
 
				+
			
 
				+	curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
			
 
				+	wake_up_process(curr->task);
			
 
				+	list_del_init(&curr->task_list);
			
 
				+}
			
 
				+EXPORT_SYMBOL(swake_up_locked);
			
 
				+
			
 
				+void swake_up(struct swait_queue_head *q)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	if (!swait_active(q))
			
 
				+		return;
			
 
				+
			
 
				+	raw_spin_lock_irqsave(&q->lock, flags);
			
 
				+	swake_up_locked(q);
			
 
				+	raw_spin_unlock_irqrestore(&q->lock, flags);
			
 
				+}
			
 
				+EXPORT_SYMBOL(swake_up);
			
 
				+
			
 
				+/*
			
 
				+ * Does not allow usage from IRQ disabled, since we must be able to
			
 
				+ * release IRQs to guarantee bounded hold time.
			
 
				+ */
			
 
				+void swake_up_all(struct swait_queue_head *q)
			
 
				+{
			
 
				+	struct swait_queue *curr;
			
 
				+	LIST_HEAD(tmp);
			
 
				+
			
 
				+	if (!swait_active(q))
			
 
				+		return;
			
 
				+
			
 
				+	raw_spin_lock_irq(&q->lock);
			
 
				+	list_splice_init(&q->task_list, &tmp);
			
 
				+	while (!list_empty(&tmp)) {
			
 
				+		curr = list_first_entry(&tmp, typeof(*curr), task_list);
			
 
				+
			
 
				+		wake_up_state(curr->task, TASK_NORMAL);
			
 
				+		list_del_init(&curr->task_list);
			
 
				+
			
 
				+		if (list_empty(&tmp))
			
 
				+			break;
			
 
				+
			
 
				+		raw_spin_unlock_irq(&q->lock);
			
 
				+		raw_spin_lock_irq(&q->lock);
			
 
				+	}
			
 
				+	raw_spin_unlock_irq(&q->lock);
			
 
				+}
			
 
				+EXPORT_SYMBOL(swake_up_all);
			
 
				+
			
 
				+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
			
 
				+{
			
 
				+	wait->task = current;
			
 
				+	if (list_empty(&wait->task_list))
			
 
				+		list_add(&wait->task_list, &q->task_list);
			
 
				+}
			
 
				+
			
 
				+void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	raw_spin_lock_irqsave(&q->lock, flags);
			
 
				+	__prepare_to_swait(q, wait);
			
 
				+	set_current_state(state);
			
 
				+	raw_spin_unlock_irqrestore(&q->lock, flags);
			
 
				+}
			
 
				+EXPORT_SYMBOL(prepare_to_swait);
			
 
				+
			
 
				+long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
			
 
				+{
			
 
				+	if (signal_pending_state(state, current))
			
 
				+		return -ERESTARTSYS;
			
 
				+
			
 
				+	prepare_to_swait(q, wait, state);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(prepare_to_swait_event);
			
 
				+
			
 
				+void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
			
 
				+{
			
 
				+	__set_current_state(TASK_RUNNING);
			
 
				+	if (!list_empty(&wait->task_list))
			
 
				+		list_del_init(&wait->task_list);
			
 
				+}
			
 
				+
			
 
				+void finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	__set_current_state(TASK_RUNNING);
			
 
				+
			
 
				+	if (!list_empty_careful(&wait->task_list)) {
			
 
				+		raw_spin_lock_irqsave(&q->lock, flags);
			
 
				+		list_del_init(&wait->task_list);
			
 
				+		raw_spin_unlock_irqrestore(&q->lock, flags);
			
 
				+	}
			
 
				+}
			
 
				+EXPORT_SYMBOL(finish_swait);
			
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -116,9 +116,9 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
 
				 
			
 
				 	if (preempt_count() == cnt) {
			
 
				 #ifdef CONFIG_DEBUG_PREEMPT
			
 
				-		current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
			
 
				+		current->preempt_disable_ip = get_lock_parent_ip();
			
 
				 #endif
			
 
				-		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
			
 
				+		trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
			
 
				 	}
			
 
				 }
			
 
				 EXPORT_SYMBOL(__local_bh_disable_ip);
			
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -350,6 +350,17 @@ static struct ctl_table kern_table[] = {
 
				 		.mode		= 0644,
			
 
				 		.proc_handler	= proc_dointvec,
			
 
				 	},
			
 
				+#ifdef CONFIG_SCHEDSTATS
			
 
				+	{
			
 
				+		.procname	= "sched_schedstats",
			
 
				+		.data		= NULL,
			
 
				+		.maxlen		= sizeof(unsigned int),
			
 
				+		.mode		= 0644,
			
 
				+		.proc_handler	= sysctl_schedstats,
			
 
				+		.extra1		= &zero,
			
 
				+		.extra2		= &one,
			
 
				+	},
			
 
				+#endif /* CONFIG_SCHEDSTATS */
			
 
				 #endif /* CONFIG_SMP */
			
 
				 #ifdef CONFIG_NUMA_BALANCING
			
 
				 	{
			
@@ -505,7 +516,7 @@ static struct ctl_table kern_table[] = {
 
				 		.data		= &latencytop_enabled,
			
 
				 		.maxlen		= sizeof(int),
			
 
				 		.mode		= 0644,
			
 
				-		.proc_handler	= proc_dointvec,
			
 
				+		.proc_handler	= sysctl_latencytop,
			
 
				 	},
			
 
				 #endif
			
 
				 #ifdef CONFIG_BLK_DEV_INITRD
			
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -93,9 +93,11 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 
				 {
			
 
				 	struct mm_struct *mm;
			
 
				 
			
 
				-	/* convert pages-usec to Mbyte-usec */
			
 
				-	stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
			
 
				-	stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
			
 
				+	/* convert pages-nsec/1024 to Mbyte-usec, see __acct_update_integrals */
			
 
				+	stats->coremem = p->acct_rss_mem1 * PAGE_SIZE;
			
 
				+	do_div(stats->coremem, 1000 * KB);
			
 
				+	stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE;
			
 
				+	do_div(stats->virtmem, 1000 * KB);
			
 
				 	mm = get_task_mm(p);
			
 
				 	if (mm) {
			
 
				 		/* adjust to KB unit */
			
@@ -123,27 +125,28 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 
				 static void __acct_update_integrals(struct task_struct *tsk,
			
 
				 				    cputime_t utime, cputime_t stime)
			
 
				 {
			
 
				-	if (likely(tsk->mm)) {
			
 
				-		cputime_t time, dtime;
			
 
				-		struct timeval value;
			
 
				-		unsigned long flags;
			
 
				-		u64 delta;
			
 
				-
			
 
				-		local_irq_save(flags);
			
 
				-		time = stime + utime;
			
 
				-		dtime = time - tsk->acct_timexpd;
			
 
				-		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
			
 
				-		delta = value.tv_sec;
			
 
				-		delta = delta * USEC_PER_SEC + value.tv_usec;
			
 
				-
			
 
				-		if (delta == 0)
			
 
				-			goto out;
			
 
				-		tsk->acct_timexpd = time;
			
 
				-		tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
			
 
				-		tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
			
 
				-	out:
			
 
				-		local_irq_restore(flags);
			
 
				-	}
			
 
				+	cputime_t time, dtime;
			
 
				+	u64 delta;
			
 
				+
			
 
				+	if (!likely(tsk->mm))
			
 
				+		return;
			
 
				+
			
 
				+	time = stime + utime;
			
 
				+	dtime = time - tsk->acct_timexpd;
			
 
				+	/* Avoid division: cputime_t is often in nanoseconds already. */
			
 
				+	delta = cputime_to_nsecs(dtime);
			
 
				+
			
 
				+	if (delta < TICK_NSEC)
			
 
				+		return;
			
 
				+
			
 
				+	tsk->acct_timexpd = time;
			
 
				+	/*
			
 
				+	 * Divide by 1024 to avoid overflow, and to avoid division.
			
 
				+	 * The final unit reported to userspace is Mbyte-usecs,
			
 
				+	 * the rest of the math is done in xacct_add_tsk.
			
 
				+	 */
			
 
				+	tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10;
			
 
				+	tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -153,9 +156,12 @@ static void __acct_update_integrals(struct task_struct *tsk,
 
				 void acct_update_integrals(struct task_struct *tsk)
			
 
				 {
			
 
				 	cputime_t utime, stime;
			
 
				+	unsigned long flags;
			
 
				 
			
 
				+	local_irq_save(flags);
			
 
				 	task_cputime(tsk, &utime, &stime);
			
 
				 	__acct_update_integrals(tsk, utime, stime);
			
 
				+	local_irq_restore(flags);
			
 
				 }
			
 
				 
			
 
				 /**
			
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -97,8 +97,8 @@ static void async_pf_execute(struct work_struct *work)
 
				 	 * This memory barrier pairs with prepare_to_wait's set_current_state()
			
 
				 	 */
			
 
				 	smp_mb();
			
 
				-	if (waitqueue_active(&vcpu->wq))
			
 
				-		wake_up_interruptible(&vcpu->wq);
			
 
				+	if (swait_active(&vcpu->wq))
			
 
				+		swake_up(&vcpu->wq);
			
 
				 
			
 
				 	mmput(mm);
			
 
				 	kvm_put_kvm(vcpu->kvm);
			
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -216,8 +216,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 
				 	vcpu->kvm = kvm;
			
 
				 	vcpu->vcpu_id = id;
			
 
				 	vcpu->pid = NULL;
			
 
				-	vcpu->halt_poll_ns = 0;
			
 
				-	init_waitqueue_head(&vcpu->wq);
			
 
				+	init_swait_queue_head(&vcpu->wq);
			
 
				 	kvm_async_pf_vcpu_init(vcpu);
			
 
				 
			
 
				 	vcpu->pre_pcpu = -1;
			
@@ -1993,7 +1992,7 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
 
				 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	ktime_t start, cur;
			
 
				-	DEFINE_WAIT(wait);
			
 
				+	DECLARE_SWAITQUEUE(wait);
			
 
				 	bool waited = false;
			
 
				 	u64 block_ns;
			
 
				 
			
@@ -2018,7 +2017,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 
				 	kvm_arch_vcpu_blocking(vcpu);
			
 
				 
			
 
				 	for (;;) {
			
 
				-		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
			
 
				+		prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
			
 
				 
			
 
				 		if (kvm_vcpu_check_block(vcpu) < 0)
			
 
				 			break;
			
@@ -2027,7 +2026,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 
				 		schedule();
			
 
				 	}
			
 
				 
			
 
				-	finish_wait(&vcpu->wq, &wait);
			
 
				+	finish_swait(&vcpu->wq, &wait);
			
 
				 	cur = ktime_get();
			
 
				 
			
 
				 	kvm_arch_vcpu_unblocking(vcpu);
			
@@ -2059,11 +2058,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 
				 {
			
 
				 	int me;
			
 
				 	int cpu = vcpu->cpu;
			
 
				-	wait_queue_head_t *wqp;
			
 
				+	struct swait_queue_head *wqp;
			
 
				 
			
 
				 	wqp = kvm_arch_vcpu_wq(vcpu);
			
 
				-	if (waitqueue_active(wqp)) {
			
 
				-		wake_up_interruptible(wqp);
			
 
				+	if (swait_active(wqp)) {
			
 
				+		swake_up(wqp);
			
 
				 		++vcpu->stat.halt_wakeup;
			
 
				 	}
			
 
				 
			
@@ -2164,7 +2163,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 
				 				continue;
			
 
				 			if (vcpu == me)
			
 
				 				continue;
			
 
				-			if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
			
 
				+			if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
			
 
				 				continue;
			
 
				 			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
			
 
				 				continue;