Browse Source

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler changes from Ingo Molnar:
 "Main changes:

   - scheduler side full-dynticks (user-space execution is undisturbed
     and receives no timer IRQs) preparation changes that convert the
     cputime accounting code to be full-dynticks ready, from Frederic
     Weisbecker.

   - Initial sched.h split-up changes, by Clark Williams

   - select_idle_sibling() performance improvement by Mike Galbraith:

        " 1 tbench pair (worst case) in a 10 core + SMT package:

          pre   15.22 MB/sec 1 procs
          post 252.01 MB/sec 1 procs "

  - sched_rr_get_interval() ABI fix/change.  We think this detail is not
    used by apps (so it's not an ABI in practice), but lets keep it
    under observation.

  - misc RT scheduling cleanups, optimizations"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
  sched/rt: Add <linux/sched/rt.h> header to <linux/init_task.h>
  cputime: Remove irqsave from seqlock readers
  sched, powerpc: Fix sched.h split-up build failure
  cputime: Restore CPU_ACCOUNTING config defaults for PPC64
  sched/rt: Move rt specific bits into new header file
  sched/rt: Add a tuning knob to allow changing SCHED_RR timeslice
  sched: Move sched.h sysctl bits into separate header
  sched: Fix signedness bug in yield_to()
  sched: Fix select_idle_sibling() bouncing cow syndrome
  sched/rt: Further simplify pick_rt_task()
  sched/rt: Do not account zero delta_exec in update_curr_rt()
  cputime: Safely read cputime of full dynticks CPUs
  kvm: Prepare to add generic guest entry/exit callbacks
  cputime: Use accessors to read task cputime stats
  cputime: Allow dynamic switch between tick/virtual based cputime accounting
  cputime: Generic on-demand virtual cputime accounting
  cputime: Move default nsecs_to_cputime() to jiffies based cputime file
  cputime: Librarize per nsecs resolution cputime definitions
  cputime: Avoid multiplication overflow on utime scaling
  context_tracking: Export context state for generic vtime
  ...

Fix up conflict in kernel/context_tracking.c due to comment additions.
Linus Torvalds 12 years ago
parent
commit
d652e1eb8e
83 changed files with 1105 additions and 507 deletions
  1. 4 2
      arch/alpha/kernel/osf_sys.c
  2. 6 86
      arch/ia64/include/asm/cputime.h
  3. 2 2
      arch/ia64/include/asm/thread_info.h
  4. 1 1
      arch/ia64/include/asm/xen/minstate.h
  5. 1 1
      arch/ia64/kernel/asm-offsets.c
  6. 8 8
      arch/ia64/kernel/entry.S
  7. 2 2
      arch/ia64/kernel/fsys.S
  8. 2 2
      arch/ia64/kernel/head.S
  9. 4 4
      arch/ia64/kernel/ivt.S
  10. 1 1
      arch/ia64/kernel/minstate.h
  11. 3 2
      arch/ia64/kernel/time.c
  12. 1 1
      arch/powerpc/configs/chroma_defconfig
  13. 1 1
      arch/powerpc/configs/corenet64_smp_defconfig
  14. 1 1
      arch/powerpc/configs/pasemi_defconfig
  15. 3 3
      arch/powerpc/include/asm/cputime.h
  16. 1 1
      arch/powerpc/include/asm/lppaca.h
  17. 2 2
      arch/powerpc/include/asm/ppc_asm.h
  18. 2 2
      arch/powerpc/kernel/entry_64.S
  19. 3 2
      arch/powerpc/kernel/time.c
  20. 1 0
      arch/powerpc/platforms/cell/spufs/sched.c
  21. 3 3
      arch/powerpc/platforms/pseries/dtl.c
  22. 3 3
      arch/powerpc/platforms/pseries/setup.c
  23. 3 3
      arch/s390/kernel/vtime.c
  24. 6 5
      arch/x86/kernel/apm_32.c
  25. 1 0
      block/blk-exec.c
  26. 6 1
      drivers/isdn/mISDN/stack.c
  27. 1 1
      drivers/spi/spi.c
  28. 1 1
      drivers/staging/csr/bh.c
  29. 1 1
      drivers/staging/csr/unifi_sme.c
  30. 1 0
      drivers/tty/sysrq.c
  31. 6 2
      fs/binfmt_elf.c
  32. 5 2
      fs/binfmt_elf_fdpic.c
  33. 2 2
      fs/proc/array.c
  34. 1 0
      fs/select.c
  35. 6 60
      include/asm-generic/cputime.h
  36. 72 0
      include/asm-generic/cputime_jiffies.h
  37. 104 0
      include/asm-generic/cputime_nsecs.h
  38. 28 0
      include/linux/context_tracking.h
  39. 2 2
      include/linux/hardirq.h
  40. 12 0
      include/linux/init_task.h
  41. 1 1
      include/linux/kernel_stat.h
  42. 46 9
      include/linux/kvm_host.h
  43. 42 143
      include/linux/sched.h
  44. 58 0
      include/linux/sched/rt.h
  45. 110 0
      include/linux/sched/sysctl.h
  46. 3 0
      include/linux/tsacct_kern.h
  47. 40 19
      include/linux/vtime.h
  48. 23 2
      init/Kconfig
  49. 2 0
      init/init_task.c
  50. 4 2
      kernel/acct.c
  51. 25 18
      kernel/context_tracking.c
  52. 3 1
      kernel/cpu.c
  53. 5 2
      kernel/delayacct.c
  54. 6 4
      kernel/exit.c
  55. 6 0
      kernel/fork.c
  56. 1 0
      kernel/futex.c
  57. 2 0
      kernel/hrtimer.c
  58. 1 0
      kernel/irq/manage.c
  59. 1 0
      kernel/mutex.c
  60. 22 6
      kernel/posix-cpu-timers.c
  61. 1 0
      kernel/rtmutex-debug.c
  62. 1 0
      kernel/rtmutex-tester.c
  63. 1 0
      kernel/rtmutex.c
  64. 21 1
      kernel/sched/core.c
  65. 2 0
      kernel/sched/cpupri.c
  66. 271 43
      kernel/sched/cputime.c
  67. 9 18
      kernel/sched/fair.c
  68. 17 9
      kernel/sched/rt.c
  69. 2 0
      kernel/sched/sched.h
  70. 8 4
      kernel/signal.c
  71. 3 3
      kernel/softirq.c
  72. 8 0
      kernel/sysctl.c
  73. 4 1
      kernel/time/tick-sched.c
  74. 1 0
      kernel/timer.c
  75. 1 0
      kernel/trace/trace.c
  76. 1 1
      kernel/trace/trace_sched_wakeup.c
  77. 34 10
      kernel/tsacct.c
  78. 1 0
      kernel/watchdog.c
  79. 1 0
      mm/mmap.c
  80. 1 0
      mm/mremap.c
  81. 1 0
      mm/nommu.c
  82. 1 0
      mm/page-writeback.c
  83. 1 0
      mm/page_alloc.c

+ 4 - 2
arch/alpha/kernel/osf_sys.c

@@ -1139,6 +1139,7 @@ struct rusage32 {
 SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
 SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
 {
 {
 	struct rusage32 r;
 	struct rusage32 r;
+	cputime_t utime, stime;
 
 
 	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
 	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
 		return -EINVAL;
 		return -EINVAL;
@@ -1146,8 +1147,9 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
 	memset(&r, 0, sizeof(r));
 	memset(&r, 0, sizeof(r));
 	switch (who) {
 	switch (who) {
 	case RUSAGE_SELF:
 	case RUSAGE_SELF:
-		jiffies_to_timeval32(current->utime, &r.ru_utime);
-		jiffies_to_timeval32(current->stime, &r.ru_stime);
+		task_cputime(current, &utime, &stime);
+		jiffies_to_timeval32(utime, &r.ru_utime);
+		jiffies_to_timeval32(stime, &r.ru_stime);
 		r.ru_minflt = current->min_flt;
 		r.ru_minflt = current->min_flt;
 		r.ru_majflt = current->maj_flt;
 		r.ru_majflt = current->maj_flt;
 		break;
 		break;

+ 6 - 86
arch/ia64/include/asm/cputime.h

@@ -11,99 +11,19 @@
  * as published by the Free Software Foundation; either version
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  * 2 of the License, or (at your option) any later version.
  *
  *
- * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec.
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec.
  * Otherwise we measure cpu time in jiffies using the generic definitions.
  * Otherwise we measure cpu time in jiffies using the generic definitions.
  */
  */
 
 
 #ifndef __IA64_CPUTIME_H
 #ifndef __IA64_CPUTIME_H
 #define __IA64_CPUTIME_H
 #define __IA64_CPUTIME_H
 
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-#include <asm-generic/cputime.h>
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+# include <asm-generic/cputime.h>
 #else
 #else
-
-#include <linux/time.h>
-#include <linux/jiffies.h>
-#include <asm/processor.h>
-
-typedef u64 __nocast cputime_t;
-typedef u64 __nocast cputime64_t;
-
-#define cputime_one_jiffy		jiffies_to_cputime(1)
-
-/*
- * Convert cputime <-> jiffies (HZ)
- */
-#define cputime_to_jiffies(__ct)	\
-	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies_to_cputime(__jif)	\
-	(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
-#define cputime64_to_jiffies64(__ct)	\
-	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies64_to_cputime64(__jif)	\
-	(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
-
-/*
- * Convert cputime <-> microseconds
- */
-#define cputime_to_usecs(__ct)		\
-	((__force u64)(__ct) / NSEC_PER_USEC)
-#define usecs_to_cputime(__usecs)	\
-	(__force cputime_t)((__usecs) * NSEC_PER_USEC)
-#define usecs_to_cputime64(__usecs)	\
-	(__force cputime64_t)((__usecs) * NSEC_PER_USEC)
-
-/*
- * Convert cputime <-> seconds
- */
-#define cputime_to_secs(__ct)		\
-	((__force u64)(__ct) / NSEC_PER_SEC)
-#define secs_to_cputime(__secs)		\
-	(__force cputime_t)((__secs) * NSEC_PER_SEC)
-
-/*
- * Convert cputime <-> timespec (nsec)
- */
-static inline cputime_t timespec_to_cputime(const struct timespec *val)
-{
-	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
-	return (__force cputime_t) ret;
-}
-static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
-{
-	val->tv_sec  = (__force u64) ct / NSEC_PER_SEC;
-	val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
-}
-
-/*
- * Convert cputime <-> timeval (msec)
- */
-static inline cputime_t timeval_to_cputime(struct timeval *val)
-{
-	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
-	return (__force cputime_t) ret;
-}
-static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
-{
-	val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
-	val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
-}
-
-/*
- * Convert cputime <-> clock (USER_HZ)
- */
-#define cputime_to_clock_t(__ct)	\
-	((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
-#define clock_t_to_cputime(__x)		\
-	(__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
-
-/*
- * Convert cputime64 to clock.
- */
-#define cputime64_to_clock_t(__ct)	\
-	cputime_to_clock_t((__force cputime_t)__ct)
-
+# include <asm/processor.h>
+# include <asm-generic/cputime_nsecs.h>
 extern void arch_vtime_task_switch(struct task_struct *tsk);
 extern void arch_vtime_task_switch(struct task_struct *tsk);
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 #endif /* __IA64_CPUTIME_H */
 #endif /* __IA64_CPUTIME_H */

+ 2 - 2
arch/ia64/include/asm/thread_info.h

@@ -31,7 +31,7 @@ struct thread_info {
 	mm_segment_t addr_limit;	/* user-level address space limit */
 	mm_segment_t addr_limit;	/* user-level address space limit */
 	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
 	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
 	struct restart_block restart_block;
 	struct restart_block restart_block;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	__u64 ac_stamp;
 	__u64 ac_stamp;
 	__u64 ac_leave;
 	__u64 ac_leave;
 	__u64 ac_stime;
 	__u64 ac_stime;
@@ -69,7 +69,7 @@ struct thread_info {
 #define task_stack_page(tsk)	((void *)(tsk))
 #define task_stack_page(tsk)	((void *)(tsk))
 
 
 #define __HAVE_THREAD_FUNCTIONS
 #define __HAVE_THREAD_FUNCTIONS
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #define setup_thread_stack(p, org)			\
 #define setup_thread_stack(p, org)			\
 	*task_thread_info(p) = *task_thread_info(org);	\
 	*task_thread_info(p) = *task_thread_info(org);	\
 	task_thread_info(p)->ac_stime = 0;		\
 	task_thread_info(p)->ac_stime = 0;		\

+ 1 - 1
arch/ia64/include/asm/xen/minstate.h

@@ -1,5 +1,5 @@
 
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /* read ar.itc in advance, and use it before leaving bank 0 */
 /* read ar.itc in advance, and use it before leaving bank 0 */
 #define XEN_ACCOUNT_GET_STAMP		\
 #define XEN_ACCOUNT_GET_STAMP		\
 	MOV_FROM_ITC(pUStk, p6, r20, r2);
 	MOV_FROM_ITC(pUStk, p6, r20, r2);

+ 1 - 1
arch/ia64/kernel/asm-offsets.c

@@ -41,7 +41,7 @@ void foo(void)
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
 	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
 	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
 	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
 	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
 	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));

+ 8 - 8
arch/ia64/kernel/entry.S

@@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall)
 #endif
 #endif
 .global __paravirt_work_processed_syscall;
 .global __paravirt_work_processed_syscall;
 __paravirt_work_processed_syscall:
 __paravirt_work_processed_syscall:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	adds r2=PT(LOADRS)+16,r12
 	adds r2=PT(LOADRS)+16,r12
 	MOV_FROM_ITC(pUStk, p9, r22, r19)	// fetch time at leave
 	MOV_FROM_ITC(pUStk, p9, r22, r19)	// fetch time at leave
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -762,7 +762,7 @@ __paravirt_work_processed_syscall:
 
 
 	ld8 r29=[r2],16		// M0|1 load cr.ipsr
 	ld8 r29=[r2],16		// M0|1 load cr.ipsr
 	ld8 r28=[r3],16		// M0|1 load cr.iip
 	ld8 r28=[r3],16		// M0|1 load cr.iip
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
 (pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
 	;;
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
@@ -793,7 +793,7 @@ __paravirt_work_processed_syscall:
 	ld8.fill r1=[r3],16			// M0|1 load r1
 	ld8.fill r1=[r3],16			// M0|1 load r1
 (pUStk) mov r17=1				// A
 (pUStk) mov r17=1				// A
 	;;
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk) st1 [r15]=r17				// M2|3
 (pUStk) st1 [r15]=r17				// M2|3
 #else
 #else
 (pUStk) st1 [r14]=r17				// M2|3
 (pUStk) st1 [r14]=r17				// M2|3
@@ -813,7 +813,7 @@ __paravirt_work_processed_syscall:
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
 	COVER				// B    add current frame into dirty partition & set cr.ifs
 	COVER				// B    add current frame into dirty partition & set cr.ifs
 	;;
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	mov r19=ar.bsp			// M2   get new backing store pointer
 	mov r19=ar.bsp			// M2   get new backing store pointer
 	st8 [r14]=r22			// M	save time at leave
 	st8 [r14]=r22			// M	save time at leave
 	mov f10=f0			// F    clear f10
 	mov f10=f0			// F    clear f10
@@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	.pred.rel.mutex pUStk,pKStk
 	.pred.rel.mutex pUStk,pKStk
 	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
 	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
 	MOV_FROM_ITC(pUStk, p9, r22, r29)	// M  fetch time at leave
 	MOV_FROM_ITC(pUStk, p9, r22, r29)	// M  fetch time at leave
@@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	;;
 	;;
 	ld8.fill r12=[r16],16
 	ld8.fill r12=[r16],16
 	ld8.fill r13=[r17],16
 	ld8.fill r13=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
 (pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
 #else
 #else
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
@@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	;;
 	;;
 	ld8 r20=[r16],16	// ar.fpsr
 	ld8 r20=[r16],16	// ar.fpsr
 	ld8.fill r15=[r17],16
 	ld8.fill r15=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
 #endif
 #endif
 	;;
 	;;
@@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	ld8.fill r2=[r17]
 	ld8.fill r2=[r17]
 (pUStk)	mov r17=1
 (pUStk)	mov r17=1
 	;;
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
 	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
 	//  mib  :  mov add br        ->  mib  : ld8 add br
 	//  mib  :  mov add br        ->  mib  : ld8 add br
 	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
 	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;

+ 2 - 2
arch/ia64/kernel/fsys.S

@@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	nop.i 0
 	nop.i 0
 	;;
 	;;
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	MOV_FROM_ITC(p0, p6, r30, r23)		// M    get cycle for accounting
 	MOV_FROM_ITC(p0, p6, r30, r23)		// M    get cycle for accounting
 #else
 #else
 	nop.m 0
 	nop.m 0
@@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
 	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 	;;
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	// mov.m r30=ar.itc is called in advance
 	// mov.m r30=ar.itc is called in advance
 	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
 	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
 	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
 	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2

+ 2 - 2
arch/ia64/kernel/head.S

@@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock)
 sched_clock = ia64_native_sched_clock
 sched_clock = ia64_native_sched_clock
 #endif
 #endif
 
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 GLOBAL_ENTRY(cycle_to_cputime)
 GLOBAL_ENTRY(cycle_to_cputime)
 	alloc r16=ar.pfs,1,0,0,0
 	alloc r16=ar.pfs,1,0,0,0
 	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
 	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
@@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime)
 	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
 	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
 	br.ret.sptk.many rp
 	br.ret.sptk.many rp
 END(cycle_to_cputime)
 END(cycle_to_cputime)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 
 #ifdef CONFIG_IA64_BRL_EMU
 #ifdef CONFIG_IA64_BRL_EMU
 
 

+ 4 - 4
arch/ia64/kernel/ivt.S

@@ -784,7 +784,7 @@ ENTRY(break_fault)
 
 
 (p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
 (p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
 (p9)	adds r8=1,r8				// A    increment ei to next slot
 (p9)	adds r8=1,r8				// A    increment ei to next slot
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	;;
 	;;
 	mov b6=r30				// I0   setup syscall handler branch reg early
 	mov b6=r30				// I0   setup syscall handler branch reg early
 #else
 #else
@@ -801,7 +801,7 @@ ENTRY(break_fault)
 	//
 	//
 ///////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	MOV_FROM_ITC(p0, p14, r30, r18)		// M    get cycle for accounting
 	MOV_FROM_ITC(p0, p14, r30, r18)		// M    get cycle for accounting
 #else
 #else
 	mov b6=r30				// I0   setup syscall handler branch reg early
 	mov b6=r30				// I0   setup syscall handler branch reg early
@@ -817,7 +817,7 @@ ENTRY(break_fault)
 	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
 	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 1:
 1:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	// mov.m r30=ar.itc is called in advance, and r13 is current
 	// mov.m r30=ar.itc is called in advance, and r13 is current
 	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
 	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
 	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
 	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
@@ -1043,7 +1043,7 @@ END(ia64_syscall_setup)
 	DBG_FAULT(16)
 	DBG_FAULT(16)
 	FAULT(16)
 	FAULT(16)
 
 
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
 	/*
 	/*
 	 * There is no particular reason for this code to be here, other than
 	 * There is no particular reason for this code to be here, other than
 	 * that there happens to be space here that would go unused otherwise.
 	 * that there happens to be space here that would go unused otherwise.

+ 1 - 1
arch/ia64/kernel/minstate.h

@@ -4,7 +4,7 @@
 #include "entry.h"
 #include "entry.h"
 #include "paravirt_inst.h"
 #include "paravirt_inst.h"
 
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /* read ar.itc in advance, and use it before leaving bank 0 */
 /* read ar.itc in advance, and use it before leaving bank 0 */
 #define ACCOUNT_GET_STAMP				\
 #define ACCOUNT_GET_STAMP				\
 (pUStk) mov.m r20=ar.itc;
 (pUStk) mov.m r20=ar.itc;

+ 3 - 2
arch/ia64/kernel/time.c

@@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = {
 };
 };
 static struct clocksource *itc_clocksource;
 static struct clocksource *itc_clocksource;
 
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 
 
 #include <linux/kernel_stat.h>
 #include <linux/kernel_stat.h>
 
 
@@ -136,13 +136,14 @@ void vtime_account_system(struct task_struct *tsk)
 
 
 	account_system_time(tsk, 0, delta, delta);
 	account_system_time(tsk, 0, delta, delta);
 }
 }
+EXPORT_SYMBOL_GPL(vtime_account_system);
 
 
 void vtime_account_idle(struct task_struct *tsk)
 void vtime_account_idle(struct task_struct *tsk)
 {
 {
 	account_idle_time(vtime_delta(tsk));
 	account_idle_time(vtime_delta(tsk));
 }
 }
 
 
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 
 static irqreturn_t
 static irqreturn_t
 timer_interrupt (int irq, void *dev_id)
 timer_interrupt (int irq, void *dev_id)

+ 1 - 1
arch/powerpc/configs/chroma_defconfig

@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_PPC64=y
 CONFIG_PPC_BOOK3E_64=y
 CONFIG_PPC_BOOK3E_64=y
-# CONFIG_VIRT_CPU_ACCOUNTING is not set
+# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
 CONFIG_SMP=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=256
 CONFIG_NR_CPUS=256
 CONFIG_EXPERIMENTAL=y
 CONFIG_EXPERIMENTAL=y

+ 1 - 1
arch/powerpc/configs/corenet64_smp_defconfig

@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_PPC64=y
 CONFIG_PPC_BOOK3E_64=y
 CONFIG_PPC_BOOK3E_64=y
-# CONFIG_VIRT_CPU_ACCOUNTING is not set
+# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
 CONFIG_SMP=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
 CONFIG_EXPERIMENTAL=y

+ 1 - 1
arch/powerpc/configs/pasemi_defconfig

@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_PPC64=y
 CONFIG_ALTIVEC=y
 CONFIG_ALTIVEC=y
-# CONFIG_VIRT_CPU_ACCOUNTING is not set
+# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
 CONFIG_SMP=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
 CONFIG_EXPERIMENTAL=y

+ 3 - 3
arch/powerpc/include/asm/cputime.h

@@ -8,7 +8,7 @@
  * as published by the Free Software Foundation; either version
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  * 2 of the License, or (at your option) any later version.
  *
  *
- * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in
  * the same units as the timebase.  Otherwise we measure cpu time
  * the same units as the timebase.  Otherwise we measure cpu time
  * in jiffies using the generic definitions.
  * in jiffies using the generic definitions.
  */
  */
@@ -16,7 +16,7 @@
 #ifndef __POWERPC_CPUTIME_H
 #ifndef __POWERPC_CPUTIME_H
 #define __POWERPC_CPUTIME_H
 #define __POWERPC_CPUTIME_H
 
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #include <asm-generic/cputime.h>
 #include <asm-generic/cputime.h>
 #ifdef __KERNEL__
 #ifdef __KERNEL__
 static inline void setup_cputime_one_jiffy(void) { }
 static inline void setup_cputime_one_jiffy(void) { }
@@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
 static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
 
 
 #endif /* __KERNEL__ */
 #endif /* __KERNEL__ */
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 #endif /* __POWERPC_CPUTIME_H */
 #endif /* __POWERPC_CPUTIME_H */

+ 1 - 1
arch/powerpc/include/asm/lppaca.h

@@ -145,7 +145,7 @@ struct dtl_entry {
 extern struct kmem_cache *dtl_cache;
 extern struct kmem_cache *dtl_cache;
 
 
 /*
 /*
- * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls
+ * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
  * reading from the dispatch trace log.  If other code wants to consume
  * reading from the dispatch trace log.  If other code wants to consume
  * DTL entries, it can set this pointer to a function that will get
  * DTL entries, it can set this pointer to a function that will get
  * called once for each DTL entry that gets processed.
  * called once for each DTL entry that gets processed.

+ 2 - 2
arch/powerpc/include/asm/ppc_asm.h

@@ -24,7 +24,7 @@
  * user_time and system_time fields in the paca.
  * user_time and system_time fields in the paca.
  */
  */
 
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #define ACCOUNT_CPU_USER_ENTRY(ra, rb)
 #define ACCOUNT_CPU_USER_ENTRY(ra, rb)
 #define ACCOUNT_CPU_USER_EXIT(ra, rb)
 #define ACCOUNT_CPU_USER_EXIT(ra, rb)
 #define ACCOUNT_STOLEN_TIME
 #define ACCOUNT_STOLEN_TIME
@@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 
 
 #endif /* CONFIG_PPC_SPLPAR */
 #endif /* CONFIG_PPC_SPLPAR */
 
 
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 
 /*
 /*
  * Macros for storing registers into and loading registers from
  * Macros for storing registers into and loading registers from

+ 2 - 2
arch/powerpc/kernel/entry_64.S

@@ -94,7 +94,7 @@ system_call_common:
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 	ld	r11,exception_marker@toc(r2)
 	ld	r11,exception_marker@toc(r2)
 	std	r11,-16(r9)		/* "regshere" marker */
 	std	r11,-16(r9)		/* "regshere" marker */
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
 BEGIN_FW_FTR_SECTION
 BEGIN_FW_FTR_SECTION
 	beq	33f
 	beq	33f
 	/* if from user, see if there are any DTL entries to process */
 	/* if from user, see if there are any DTL entries to process */
@@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 33:
 33:
 END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
 
 
 	/*
 	/*
 	 * A syscall should always be called with interrupts enabled
 	 * A syscall should always be called with interrupts enabled

+ 3 - 2
arch/powerpc/kernel/time.c

@@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
 unsigned long ppc_tb_freq;
 unsigned long ppc_tb_freq;
 EXPORT_SYMBOL_GPL(ppc_tb_freq);
 EXPORT_SYMBOL_GPL(ppc_tb_freq);
 
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
 /*
  * Factors for converting from cputime_t (timebase ticks) to
  * Factors for converting from cputime_t (timebase ticks) to
  * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
  * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
@@ -347,6 +347,7 @@ void vtime_account_system(struct task_struct *tsk)
 	if (stolen)
 	if (stolen)
 		account_steal_time(stolen);
 		account_steal_time(stolen);
 }
 }
+EXPORT_SYMBOL_GPL(vtime_account_system);
 
 
 void vtime_account_idle(struct task_struct *tsk)
 void vtime_account_idle(struct task_struct *tsk)
 {
 {
@@ -377,7 +378,7 @@ void vtime_account_user(struct task_struct *tsk)
 	account_user_time(tsk, utime, utimescaled);
 	account_user_time(tsk, utime, utimescaled);
 }
 }
 
 
-#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 #define calc_cputime_factors()
 #define calc_cputime_factors()
 #endif
 #endif
 
 

+ 1 - 0
arch/powerpc/platforms/cell/spufs/sched.c

@@ -24,6 +24,7 @@
 
 
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/slab.h>

+ 3 - 3
arch/powerpc/platforms/pseries/dtl.c

@@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7;
  */
  */
 static int dtl_buf_entries = N_DISPATCH_LOG;
 static int dtl_buf_entries = N_DISPATCH_LOG;
 
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 struct dtl_ring {
 struct dtl_ring {
 	u64	write_index;
 	u64	write_index;
 	struct dtl_entry *write_ptr;
 	struct dtl_entry *write_ptr;
@@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl)
 	return per_cpu(dtl_rings, dtl->cpu).write_index;
 	return per_cpu(dtl_rings, dtl->cpu).write_index;
 }
 }
 
 
-#else /* CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 
 static int dtl_start(struct dtl *dtl)
 static int dtl_start(struct dtl *dtl)
 {
 {
@@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl)
 {
 {
 	return lppaca_of(dtl->cpu).dtl_idx;
 	return lppaca_of(dtl->cpu).dtl_idx;
 }
 }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 
 static int dtl_enable(struct dtl *dtl)
 static int dtl_enable(struct dtl *dtl)
 {
 {

+ 3 - 3
arch/powerpc/platforms/pseries/setup.c

@@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = {
 
 
 struct kmem_cache *dtl_cache;
 struct kmem_cache *dtl_cache;
 
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
 /*
  * Allocate space for the dispatch trace log for all possible cpus
  * Allocate space for the dispatch trace log for all possible cpus
  * and register the buffers with the hypervisor.  This is used for
  * and register the buffers with the hypervisor.  This is used for
@@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void)
 
 
 	return 0;
 	return 0;
 }
 }
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 static inline int alloc_dispatch_logs(void)
 static inline int alloc_dispatch_logs(void)
 {
 {
 	return 0;
 	return 0;
 }
 }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 
 static int alloc_dispatch_log_kmem_cache(void)
 static int alloc_dispatch_log_kmem_cache(void)
 {
 {

+ 3 - 3
arch/s390/kernel/vtime.c

@@ -127,7 +127,7 @@ void vtime_account_user(struct task_struct *tsk)
  * Update process times based on virtual cpu times stored by entry.S
  * Update process times based on virtual cpu times stored by entry.S
  * to the lowcore fields user_timer, system_timer & steal_clock.
  * to the lowcore fields user_timer, system_timer & steal_clock.
  */
  */
-void vtime_account(struct task_struct *tsk)
+void vtime_account_irq_enter(struct task_struct *tsk)
 {
 {
 	struct thread_info *ti = task_thread_info(tsk);
 	struct thread_info *ti = task_thread_info(tsk);
 	u64 timer, system;
 	u64 timer, system;
@@ -145,10 +145,10 @@ void vtime_account(struct task_struct *tsk)
 
 
 	virt_timer_forward(system);
 	virt_timer_forward(system);
 }
 }
-EXPORT_SYMBOL_GPL(vtime_account);
+EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 
 
 void vtime_account_system(struct task_struct *tsk)
 void vtime_account_system(struct task_struct *tsk)
-__attribute__((alias("vtime_account")));
+__attribute__((alias("vtime_account_irq_enter")));
 EXPORT_SYMBOL_GPL(vtime_account_system);
 EXPORT_SYMBOL_GPL(vtime_account_system);
 
 
 void __kprobes vtime_stop_cpu(void)
 void __kprobes vtime_stop_cpu(void)

+ 6 - 5
arch/x86/kernel/apm_32.c

@@ -899,6 +899,7 @@ static void apm_cpu_idle(void)
 	static int use_apm_idle; /* = 0 */
 	static int use_apm_idle; /* = 0 */
 	static unsigned int last_jiffies; /* = 0 */
 	static unsigned int last_jiffies; /* = 0 */
 	static unsigned int last_stime; /* = 0 */
 	static unsigned int last_stime; /* = 0 */
+	cputime_t stime;
 
 
 	int apm_idle_done = 0;
 	int apm_idle_done = 0;
 	unsigned int jiffies_since_last_check = jiffies - last_jiffies;
 	unsigned int jiffies_since_last_check = jiffies - last_jiffies;
@@ -906,23 +907,23 @@ static void apm_cpu_idle(void)
 
 
 	WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
 	WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
 recalc:
 recalc:
+	task_cputime(current, NULL, &stime);
 	if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
 	if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
 		use_apm_idle = 0;
 		use_apm_idle = 0;
-		last_jiffies = jiffies;
-		last_stime = current->stime;
 	} else if (jiffies_since_last_check > idle_period) {
 	} else if (jiffies_since_last_check > idle_period) {
 		unsigned int idle_percentage;
 		unsigned int idle_percentage;
 
 
-		idle_percentage = current->stime - last_stime;
+		idle_percentage = stime - last_stime;
 		idle_percentage *= 100;
 		idle_percentage *= 100;
 		idle_percentage /= jiffies_since_last_check;
 		idle_percentage /= jiffies_since_last_check;
 		use_apm_idle = (idle_percentage > idle_threshold);
 		use_apm_idle = (idle_percentage > idle_threshold);
 		if (apm_info.forbid_idle)
 		if (apm_info.forbid_idle)
 			use_apm_idle = 0;
 			use_apm_idle = 0;
-		last_jiffies = jiffies;
-		last_stime = current->stime;
 	}
 	}
 
 
+	last_jiffies = jiffies;
+	last_stime = stime;
+
 	bucket = IDLE_LEAKY_MAX;
 	bucket = IDLE_LEAKY_MAX;
 
 
 	while (!need_resched()) {
 	while (!need_resched()) {

+ 1 - 0
block/blk-exec.c

@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/blkdev.h>
+#include <linux/sched/sysctl.h>
 
 
 #include "blk.h"
 #include "blk.h"
 
 

+ 6 - 1
drivers/isdn/mISDN/stack.c

@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/mISDNif.h>
 #include <linux/mISDNif.h>
 #include <linux/kthread.h>
 #include <linux/kthread.h>
+#include <linux/sched.h>
 #include "core.h"
 #include "core.h"
 
 
 static u_int	*debug;
 static u_int	*debug;
@@ -202,6 +203,9 @@ static int
 mISDNStackd(void *data)
 mISDNStackd(void *data)
 {
 {
 	struct mISDNstack *st = data;
 	struct mISDNstack *st = data;
+#ifdef MISDN_MSG_STATS
+	cputime_t utime, stime;
+#endif
 	int err = 0;
 	int err = 0;
 
 
 	sigfillset(&current->blocked);
 	sigfillset(&current->blocked);
@@ -303,9 +307,10 @@ mISDNStackd(void *data)
 	       "msg %d sleep %d stopped\n",
 	       "msg %d sleep %d stopped\n",
 	       dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt,
 	       dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt,
 	       st->stopped_cnt);
 	       st->stopped_cnt);
+	task_cputime(st->thread, &utime, &stime);
 	printk(KERN_DEBUG
 	printk(KERN_DEBUG
 	       "mISDNStackd daemon for %s utime(%ld) stime(%ld)\n",
 	       "mISDNStackd daemon for %s utime(%ld) stime(%ld)\n",
-	       dev_name(&st->dev->dev), st->thread->utime, st->thread->stime);
+	       dev_name(&st->dev->dev), utime, stime);
 	printk(KERN_DEBUG
 	printk(KERN_DEBUG
 	       "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n",
 	       "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n",
 	       dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw);
 	       dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw);

+ 1 - 1
drivers/spi/spi.c

@@ -33,7 +33,7 @@
 #include <linux/of_gpio.h>
 #include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_runtime.h>
 #include <linux/export.h>
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/kthread.h>
 #include <linux/ioport.h>
 #include <linux/ioport.h>

+ 1 - 1
drivers/staging/csr/bh.c

@@ -15,7 +15,7 @@
  */
  */
 #include "csr_wifi_hip_unifi.h"
 #include "csr_wifi_hip_unifi.h"
 #include "unifi_priv.h"
 #include "unifi_priv.h"
-
+#include <linux/sched/rt.h>
 
 
 /*
 /*
  * ---------------------------------------------------------------------------
  * ---------------------------------------------------------------------------

+ 1 - 1
drivers/staging/csr/unifi_sme.c

@@ -15,7 +15,7 @@
 #include "unifi_priv.h"
 #include "unifi_priv.h"
 #include "csr_wifi_hip_unifi.h"
 #include "csr_wifi_hip_unifi.h"
 #include "csr_wifi_hip_conversions.h"
 #include "csr_wifi_hip_conversions.h"
-
+#include <linux/sched/rt.h>
 
 
 
 
 
 

+ 1 - 0
drivers/tty/sysrq.c

@@ -15,6 +15,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/fs.h>

+ 6 - 2
fs/binfmt_elf.c

@@ -33,6 +33,7 @@
 #include <linux/elf.h>
 #include <linux/elf.h>
 #include <linux/utsname.h>
 #include <linux/utsname.h>
 #include <linux/coredump.h>
 #include <linux/coredump.h>
+#include <linux/sched.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/param.h>
 #include <asm/param.h>
 #include <asm/page.h>
 #include <asm/page.h>
@@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
 	} else {
-		cputime_to_timeval(p->utime, &prstatus->pr_utime);
-		cputime_to_timeval(p->stime, &prstatus->pr_stime);
+		cputime_t utime, stime;
+
+		task_cputime(p, &utime, &stime);
+		cputime_to_timeval(utime, &prstatus->pr_utime);
+		cputime_to_timeval(stime, &prstatus->pr_stime);
 	}
 	}
 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);

+ 5 - 2
fs/binfmt_elf_fdpic.c

@@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
 	} else {
-		cputime_to_timeval(p->utime, &prstatus->pr_utime);
-		cputime_to_timeval(p->stime, &prstatus->pr_stime);
+		cputime_t utime, stime;
+
+		task_cputime(p, &utime, &stime);
+		cputime_to_timeval(utime, &prstatus->pr_utime);
+		cputime_to_timeval(stime, &prstatus->pr_stime);
 	}
 	}
 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);

+ 2 - 2
fs/proc/array.c

@@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			do {
 			do {
 				min_flt += t->min_flt;
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
 				maj_flt += t->maj_flt;
-				gtime += t->gtime;
+				gtime += task_gtime(t);
 				t = next_thread(t);
 				t = next_thread(t);
 			} while (t != task);
 			} while (t != task);
 
 
@@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		min_flt = task->min_flt;
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
 		maj_flt = task->maj_flt;
 		task_cputime_adjusted(task, &utime, &stime);
 		task_cputime_adjusted(task, &utime, &stime);
-		gtime = task->gtime;
+		gtime = task_gtime(task);
 	}
 	}
 
 
 	/* scale priority and nice values from timeslices to -20..20 */
 	/* scale priority and nice values from timeslices to -20..20 */

+ 1 - 0
fs/select.c

@@ -26,6 +26,7 @@
 #include <linux/fs.h>
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
 #include <linux/rcupdate.h>
 #include <linux/hrtimer.h>
 #include <linux/hrtimer.h>
+#include <linux/sched/rt.h>
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 
 

+ 6 - 60
include/asm-generic/cputime.h

@@ -4,66 +4,12 @@
 #include <linux/time.h>
 #include <linux/time.h>
 #include <linux/jiffies.h>
 #include <linux/jiffies.h>
 
 
-typedef unsigned long __nocast cputime_t;
-
-#define cputime_one_jiffy		jiffies_to_cputime(1)
-#define cputime_to_jiffies(__ct)	(__force unsigned long)(__ct)
-#define cputime_to_scaled(__ct)		(__ct)
-#define jiffies_to_cputime(__hz)	(__force cputime_t)(__hz)
-
-typedef u64 __nocast cputime64_t;
-
-#define cputime64_to_jiffies64(__ct)	(__force u64)(__ct)
-#define jiffies64_to_cputime64(__jif)	(__force cputime64_t)(__jif)
-
-#define nsecs_to_cputime64(__ct)	\
-	jiffies64_to_cputime64(nsecs_to_jiffies64(__ct))
-
-
-/*
- * Convert cputime to microseconds and back.
- */
-#define cputime_to_usecs(__ct)		\
-	jiffies_to_usecs(cputime_to_jiffies(__ct))
-#define usecs_to_cputime(__usec)	\
-	jiffies_to_cputime(usecs_to_jiffies(__usec))
-#define usecs_to_cputime64(__usec)	\
-	jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
-
-/*
- * Convert cputime to seconds and back.
- */
-#define cputime_to_secs(jif)		(cputime_to_jiffies(jif) / HZ)
-#define secs_to_cputime(sec)		jiffies_to_cputime((sec) * HZ)
-
-/*
- * Convert cputime to timespec and back.
- */
-#define timespec_to_cputime(__val)	\
-	jiffies_to_cputime(timespec_to_jiffies(__val))
-#define cputime_to_timespec(__ct,__val)	\
-	jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
-
-/*
- * Convert cputime to timeval and back.
- */
-#define timeval_to_cputime(__val)	\
-	jiffies_to_cputime(timeval_to_jiffies(__val))
-#define cputime_to_timeval(__ct,__val)	\
-	jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
-
-/*
- * Convert cputime to clock and back.
- */
-#define cputime_to_clock_t(__ct)	\
-	jiffies_to_clock_t(cputime_to_jiffies(__ct))
-#define clock_t_to_cputime(__x)		\
-	jiffies_to_cputime(clock_t_to_jiffies(__x))
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+# include <asm-generic/cputime_jiffies.h>
+#endif
 
 
-/*
- * Convert cputime64 to clock.
- */
-#define cputime64_to_clock_t(__ct)	\
-	jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+# include <asm-generic/cputime_nsecs.h>
+#endif
 
 
 #endif
 #endif

+ 72 - 0
include/asm-generic/cputime_jiffies.h

@@ -0,0 +1,72 @@
+#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H
+#define _ASM_GENERIC_CPUTIME_JIFFIES_H
+
+typedef unsigned long __nocast cputime_t;
+
+#define cputime_one_jiffy		jiffies_to_cputime(1)
+#define cputime_to_jiffies(__ct)	(__force unsigned long)(__ct)
+#define cputime_to_scaled(__ct)		(__ct)
+#define jiffies_to_cputime(__hz)	(__force cputime_t)(__hz)
+
+typedef u64 __nocast cputime64_t;
+
+#define cputime64_to_jiffies64(__ct)	(__force u64)(__ct)
+#define jiffies64_to_cputime64(__jif)	(__force cputime64_t)(__jif)
+
+
+/*
+ * Convert nanoseconds to cputime
+ */
+#define nsecs_to_cputime64(__nsec)	\
+	jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec))
+#define nsecs_to_cputime(__nsec)	\
+	jiffies_to_cputime(nsecs_to_jiffies(__nsec))
+
+
+/*
+ * Convert cputime to microseconds and back.
+ */
+#define cputime_to_usecs(__ct)		\
+	jiffies_to_usecs(cputime_to_jiffies(__ct))
+#define usecs_to_cputime(__usec)	\
+	jiffies_to_cputime(usecs_to_jiffies(__usec))
+#define usecs_to_cputime64(__usec)	\
+	jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
+
+/*
+ * Convert cputime to seconds and back.
+ */
+#define cputime_to_secs(jif)		(cputime_to_jiffies(jif) / HZ)
+#define secs_to_cputime(sec)		jiffies_to_cputime((sec) * HZ)
+
+/*
+ * Convert cputime to timespec and back.
+ */
+#define timespec_to_cputime(__val)	\
+	jiffies_to_cputime(timespec_to_jiffies(__val))
+#define cputime_to_timespec(__ct,__val)	\
+	jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
+
+/*
+ * Convert cputime to timeval and back.
+ */
+#define timeval_to_cputime(__val)	\
+	jiffies_to_cputime(timeval_to_jiffies(__val))
+#define cputime_to_timeval(__ct,__val)	\
+	jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
+
+/*
+ * Convert cputime to clock and back.
+ */
+#define cputime_to_clock_t(__ct)	\
+	jiffies_to_clock_t(cputime_to_jiffies(__ct))
+#define clock_t_to_cputime(__x)		\
+	jiffies_to_cputime(clock_t_to_jiffies(__x))
+
+/*
+ * Convert cputime64 to clock.
+ */
+#define cputime64_to_clock_t(__ct)	\
+	jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
+
+#endif

+ 104 - 0
include/asm-generic/cputime_nsecs.h

@@ -0,0 +1,104 @@
+/*
+ * Definitions for measuring cputime in nsecs resolution.
+ *
+ * Based on <arch/ia64/include/asm/cputime.h>
+ *
+ * Copyright (C) 2007 FUJITSU LIMITED
+ * Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef _ASM_GENERIC_CPUTIME_NSECS_H
+#define _ASM_GENERIC_CPUTIME_NSECS_H
+
+typedef u64 __nocast cputime_t;
+typedef u64 __nocast cputime64_t;
+
+#define cputime_one_jiffy		jiffies_to_cputime(1)
+
+/*
+ * Convert cputime <-> jiffies (HZ)
+ */
+#define cputime_to_jiffies(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define cputime_to_scaled(__ct)		(__ct)
+#define jiffies_to_cputime(__jif)	\
+	(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime64_to_jiffies64(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies64_to_cputime64(__jif)	\
+	(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
+
+
+/*
+ * Convert cputime <-> nanoseconds
+ */
+#define nsecs_to_cputime(__nsecs)	((__force u64)(__nsecs))
+
+
+/*
+ * Convert cputime <-> microseconds
+ */
+#define cputime_to_usecs(__ct)		\
+	((__force u64)(__ct) / NSEC_PER_USEC)
+#define usecs_to_cputime(__usecs)	\
+	(__force cputime_t)((__usecs) * NSEC_PER_USEC)
+#define usecs_to_cputime64(__usecs)	\
+	(__force cputime64_t)((__usecs) * NSEC_PER_USEC)
+
+/*
+ * Convert cputime <-> seconds
+ */
+#define cputime_to_secs(__ct)		\
+	((__force u64)(__ct) / NSEC_PER_SEC)
+#define secs_to_cputime(__secs)		\
+	(__force cputime_t)((__secs) * NSEC_PER_SEC)
+
+/*
+ * Convert cputime <-> timespec (nsec)
+ */
+static inline cputime_t timespec_to_cputime(const struct timespec *val)
+{
+	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
+	return (__force cputime_t) ret;
+}
+static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
+{
+	val->tv_sec  = (__force u64) ct / NSEC_PER_SEC;
+	val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
+}
+
+/*
+ * Convert cputime <-> timeval (msec)
+ */
+static inline cputime_t timeval_to_cputime(struct timeval *val)
+{
+	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
+	return (__force cputime_t) ret;
+}
+static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
+{
+	val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
+	val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
+}
+
+/*
+ * Convert cputime <-> clock (USER_HZ)
+ */
+#define cputime_to_clock_t(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
+#define clock_t_to_cputime(__x)		\
+	(__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
+
+/*
+ * Convert cputime64 to clock.
+ */
+#define cputime64_to_clock_t(__ct)	\
+	cputime_to_clock_t((__force cputime_t)__ct)
+
+#endif

+ 28 - 0
include/linux/context_tracking.h

@@ -3,12 +3,40 @@
 
 
 #ifdef CONFIG_CONTEXT_TRACKING
 #ifdef CONFIG_CONTEXT_TRACKING
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/percpu.h>
+
+struct context_tracking {
+	/*
+	 * When active is false, probes are unset in order
+	 * to minimize overhead: TIF flags are cleared
+	 * and calls to user_enter/exit are ignored. This
+	 * may be further optimized using static keys.
+	 */
+	bool active;
+	enum {
+		IN_KERNEL = 0,
+		IN_USER,
+	} state;
+};
+
+DECLARE_PER_CPU(struct context_tracking, context_tracking);
+
+static inline bool context_tracking_in_user(void)
+{
+	return __this_cpu_read(context_tracking.state) == IN_USER;
+}
+
+static inline bool context_tracking_active(void)
+{
+	return __this_cpu_read(context_tracking.active);
+}
 
 
 extern void user_enter(void);
 extern void user_enter(void);
 extern void user_exit(void);
 extern void user_exit(void);
 extern void context_tracking_task_switch(struct task_struct *prev,
 extern void context_tracking_task_switch(struct task_struct *prev,
 					 struct task_struct *next);
 					 struct task_struct *next);
 #else
 #else
+static inline bool context_tracking_in_user(void) { return false; }
 static inline void user_enter(void) { }
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
 static inline void user_exit(void) { }
 static inline void context_tracking_task_switch(struct task_struct *prev,
 static inline void context_tracking_task_switch(struct task_struct *prev,

+ 2 - 2
include/linux/hardirq.h

@@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
  */
  */
 #define __irq_enter()					\
 #define __irq_enter()					\
 	do {						\
 	do {						\
-		vtime_account_irq_enter(current);	\
+		account_irq_enter_time(current);	\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
 		trace_hardirq_enter();			\
 	} while (0)
 	} while (0)
@@ -169,7 +169,7 @@ extern void irq_enter(void);
 #define __irq_exit()					\
 #define __irq_exit()					\
 	do {						\
 	do {						\
 		trace_hardirq_exit();			\
 		trace_hardirq_exit();			\
-		vtime_account_irq_exit(current);	\
+		account_irq_exit_time(current);		\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
 	} while (0)
 	} while (0)
 
 

+ 12 - 0
include/linux/init_task.h

@@ -10,7 +10,9 @@
 #include <linux/pid_namespace.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
 #include <linux/user_namespace.h>
 #include <linux/securebits.h>
 #include <linux/securebits.h>
+#include <linux/seqlock.h>
 #include <net/net_namespace.h>
 #include <net/net_namespace.h>
+#include <linux/sched/rt.h>
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 # define INIT_PUSHABLE_TASKS(tsk)					\
 # define INIT_PUSHABLE_TASKS(tsk)					\
@@ -141,6 +143,15 @@ extern struct task_group root_task_group;
 # define INIT_PERF_EVENTS(tsk)
 # define INIT_PERF_EVENTS(tsk)
 #endif
 #endif
 
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+# define INIT_VTIME(tsk)						\
+	.vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock),	\
+	.vtime_snap = 0,				\
+	.vtime_snap_whence = VTIME_SYS,
+#else
+# define INIT_VTIME(tsk)
+#endif
+
 #define INIT_TASK_COMM "swapper"
 #define INIT_TASK_COMM "swapper"
 
 
 /*
 /*
@@ -210,6 +221,7 @@ extern struct task_group root_task_group;
 	INIT_TRACE_RECURSION						\
 	INIT_TRACE_RECURSION						\
 	INIT_TASK_RCU_PREEMPT(tsk)					\
 	INIT_TASK_RCU_PREEMPT(tsk)					\
 	INIT_CPUSET_SEQ							\
 	INIT_CPUSET_SEQ							\
+	INIT_VTIME(tsk)							\
 }
 }
 
 
 
 

+ 1 - 1
include/linux/kernel_stat.h

@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
 extern void account_steal_time(cputime_t);
 extern void account_steal_time(cputime_t);
 extern void account_idle_time(cputime_t);
 extern void account_idle_time(cputime_t);
 
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline void account_process_tick(struct task_struct *tsk, int user)
 static inline void account_process_tick(struct task_struct *tsk, int user)
 {
 {
 	vtime_account_user(tsk);
 	vtime_account_user(tsk);

+ 46 - 9
include/linux/kvm_host.h

@@ -22,6 +22,7 @@
 #include <linux/rcupdate.h>
 #include <linux/rcupdate.h>
 #include <linux/ratelimit.h>
 #include <linux/ratelimit.h>
 #include <linux/err.h>
 #include <linux/err.h>
+#include <linux/irqflags.h>
 #include <asm/signal.h>
 #include <asm/signal.h>
 
 
 #include <linux/kvm.h>
 #include <linux/kvm.h>
@@ -740,15 +741,52 @@ static inline int kvm_deassign_device(struct kvm *kvm,
 }
 }
 #endif /* CONFIG_IOMMU_API */
 #endif /* CONFIG_IOMMU_API */
 
 
-static inline void kvm_guest_enter(void)
+static inline void __guest_enter(void)
 {
 {
-	BUG_ON(preemptible());
 	/*
 	/*
 	 * This is running in ioctl context so we can avoid
 	 * This is running in ioctl context so we can avoid
 	 * the call to vtime_account() with its unnecessary idle check.
 	 * the call to vtime_account() with its unnecessary idle check.
 	 */
 	 */
-	vtime_account_system_irqsafe(current);
+	vtime_account_system(current);
 	current->flags |= PF_VCPU;
 	current->flags |= PF_VCPU;
+}
+
+static inline void __guest_exit(void)
+{
+	/*
+	 * This is running in ioctl context so we can avoid
+	 * the call to vtime_account() with its unnecessary idle check.
+	 */
+	vtime_account_system(current);
+	current->flags &= ~PF_VCPU;
+}
+
+#ifdef CONFIG_CONTEXT_TRACKING
+extern void guest_enter(void);
+extern void guest_exit(void);
+
+#else /* !CONFIG_CONTEXT_TRACKING */
+static inline void guest_enter(void)
+{
+	__guest_enter();
+}
+
+static inline void guest_exit(void)
+{
+	__guest_exit();
+}
+#endif /* !CONFIG_CONTEXT_TRACKING */
+
+static inline void kvm_guest_enter(void)
+{
+	unsigned long flags;
+
+	BUG_ON(preemptible());
+
+	local_irq_save(flags);
+	guest_enter();
+	local_irq_restore(flags);
+
 	/* KVM does not hold any references to rcu protected data when it
 	/* KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
 	 * switches CPU into a guest mode. In fact switching to a guest mode
 	 * is very similar to exiting to userspase from rcu point of view. In
 	 * is very similar to exiting to userspase from rcu point of view. In
@@ -761,12 +799,11 @@ static inline void kvm_guest_enter(void)
 
 
 static inline void kvm_guest_exit(void)
 static inline void kvm_guest_exit(void)
 {
 {
-	/*
-	 * This is running in ioctl context so we can avoid
-	 * the call to vtime_account() with its unnecessary idle check.
-	 */
-	vtime_account_system_irqsafe(current);
-	current->flags &= ~PF_VCPU;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	guest_exit();
+	local_irq_restore(flags);
 }
 }
 
 
 /*
 /*

+ 42 - 143
include/linux/sched.h

@@ -304,19 +304,6 @@ static inline void lockup_detector_init(void)
 }
 }
 #endif
 #endif
 
 
-#ifdef CONFIG_DETECT_HUNG_TASK
-extern unsigned int  sysctl_hung_task_panic;
-extern unsigned long sysctl_hung_task_check_count;
-extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_warnings;
-extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-					 void __user *buffer,
-					 size_t *lenp, loff_t *ppos);
-#else
-/* Avoid need for ifdefs elsewhere in the code */
-enum { sysctl_hung_task_timeout_secs = 0 };
-#endif
-
 /* Attach to any functions which should be ignored in wchan output. */
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched		__attribute__((__section__(".sched.text")))
 #define __sched		__attribute__((__section__(".sched.text")))
 
 
@@ -338,23 +325,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
 struct nsproxy;
 struct nsproxy;
 struct user_namespace;
 struct user_namespace;
 
 
-/*
- * Default maximum number of active map areas, this limits the number of vmas
- * per mm struct. Users can overwrite this number by sysctl but there is a
- * problem.
- *
- * When a program's coredump is generated as ELF format, a section is created
- * per a vma. In ELF, the number of sections is represented in unsigned short.
- * This means the number of sections should be smaller than 65535 at coredump.
- * Because the kernel adds some informative sections to a image of program at
- * generating coredump, we need some margin. The number of extra sections is
- * 1-3 now and depends on arch. We use "5" as safe margin, here.
- */
-#define MAPCOUNT_ELF_CORE_MARGIN	(5)
-#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-
-extern int sysctl_max_map_count;
-
 #include <linux/aio.h>
 #include <linux/aio.h>
 
 
 #ifdef CONFIG_MMU
 #ifdef CONFIG_MMU
@@ -1194,6 +1164,7 @@ struct sched_entity {
 	/* rq "owned" by this entity/group: */
 	/* rq "owned" by this entity/group: */
 	struct cfs_rq		*my_q;
 	struct cfs_rq		*my_q;
 #endif
 #endif
+
 /*
 /*
  * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
  * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
  * removed when useful for applications beyond shares distribution (e.g.
  * removed when useful for applications beyond shares distribution (e.g.
@@ -1208,6 +1179,7 @@ struct sched_entity {
 struct sched_rt_entity {
 struct sched_rt_entity {
 	struct list_head run_list;
 	struct list_head run_list;
 	unsigned long timeout;
 	unsigned long timeout;
+	unsigned long watchdog_stamp;
 	unsigned int time_slice;
 	unsigned int time_slice;
 
 
 	struct sched_rt_entity *back;
 	struct sched_rt_entity *back;
@@ -1220,11 +1192,6 @@ struct sched_rt_entity {
 #endif
 #endif
 };
 };
 
 
-/*
- * default timeslice is 100 msecs (used only for SCHED_RR tasks).
- * Timeslices get refilled after they expire.
- */
-#define RR_TIMESLICE		(100 * HZ / 1000)
 
 
 struct rcu_node;
 struct rcu_node;
 
 
@@ -1367,6 +1334,15 @@ struct task_struct {
 	cputime_t gtime;
 	cputime_t gtime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	struct cputime prev_cputime;
 	struct cputime prev_cputime;
+#endif
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+	seqlock_t vtime_seqlock;
+	unsigned long long vtime_snap;
+	enum {
+		VTIME_SLEEPING = 0,
+		VTIME_USER,
+		VTIME_SYS,
+	} vtime_snap_whence;
 #endif
 #endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	struct timespec start_time; 		/* monotonic time */
 	struct timespec start_time; 		/* monotonic time */
@@ -1622,37 +1598,6 @@ static inline void set_numabalancing_state(bool enabled)
 }
 }
 #endif
 #endif
 
 
-/*
- * Priority of a process goes from 0..MAX_PRIO-1, valid RT
- * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
- * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
- * values are inverted: lower p->prio value means higher priority.
- *
- * The MAX_USER_RT_PRIO value allows the actual maximum
- * RT priority to be separate from the value exported to
- * user-space.  This allows kernel threads to set their
- * priority to a value higher than any user task. Note:
- * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
- */
-
-#define MAX_USER_RT_PRIO	100
-#define MAX_RT_PRIO		MAX_USER_RT_PRIO
-
-#define MAX_PRIO		(MAX_RT_PRIO + 40)
-#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
-
-static inline int rt_prio(int prio)
-{
-	if (unlikely(prio < MAX_RT_PRIO))
-		return 1;
-	return 0;
-}
-
-static inline int rt_task(struct task_struct *p)
-{
-	return rt_prio(p->prio);
-}
-
 static inline struct pid *task_pid(struct task_struct *task)
 static inline struct pid *task_pid(struct task_struct *task)
 {
 {
 	return task->pids[PIDTYPE_PID].pid;
 	return task->pids[PIDTYPE_PID].pid;
@@ -1792,6 +1737,37 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 		__put_task_struct(t);
 }
 }
 
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void task_cputime(struct task_struct *t,
+			 cputime_t *utime, cputime_t *stime);
+extern void task_cputime_scaled(struct task_struct *t,
+				cputime_t *utimescaled, cputime_t *stimescaled);
+extern cputime_t task_gtime(struct task_struct *t);
+#else
+static inline void task_cputime(struct task_struct *t,
+				cputime_t *utime, cputime_t *stime)
+{
+	if (utime)
+		*utime = t->utime;
+	if (stime)
+		*stime = t->stime;
+}
+
+static inline void task_cputime_scaled(struct task_struct *t,
+				       cputime_t *utimescaled,
+				       cputime_t *stimescaled)
+{
+	if (utimescaled)
+		*utimescaled = t->utimescaled;
+	if (stimescaled)
+		*stimescaled = t->stimescaled;
+}
+
+static inline cputime_t task_gtime(struct task_struct *t)
+{
+	return t->gtime;
+}
+#endif
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
 
@@ -2033,58 +2009,7 @@ extern void wake_up_idle_cpu(int cpu);
 static inline void wake_up_idle_cpu(int cpu) { }
 static inline void wake_up_idle_cpu(int cpu) { }
 #endif
 #endif
 
 
-extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_min_granularity;
-extern unsigned int sysctl_sched_wakeup_granularity;
-extern unsigned int sysctl_sched_child_runs_first;
-
-enum sched_tunable_scaling {
-	SCHED_TUNABLESCALING_NONE,
-	SCHED_TUNABLESCALING_LOG,
-	SCHED_TUNABLESCALING_LINEAR,
-	SCHED_TUNABLESCALING_END,
-};
-extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
-
-extern unsigned int sysctl_numa_balancing_scan_delay;
-extern unsigned int sysctl_numa_balancing_scan_period_min;
-extern unsigned int sysctl_numa_balancing_scan_period_max;
-extern unsigned int sysctl_numa_balancing_scan_period_reset;
-extern unsigned int sysctl_numa_balancing_scan_size;
-extern unsigned int sysctl_numa_balancing_settle_count;
-
-#ifdef CONFIG_SCHED_DEBUG
-extern unsigned int sysctl_sched_migration_cost;
-extern unsigned int sysctl_sched_nr_migrate;
-extern unsigned int sysctl_sched_time_avg;
-extern unsigned int sysctl_timer_migration;
-extern unsigned int sysctl_sched_shares_window;
-
-int sched_proc_update_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *length,
-		loff_t *ppos);
-#endif
-#ifdef CONFIG_SCHED_DEBUG
-static inline unsigned int get_sysctl_timer_migration(void)
-{
-	return sysctl_timer_migration;
-}
-#else
-static inline unsigned int get_sysctl_timer_migration(void)
-{
-	return 1;
-}
-#endif
-extern unsigned int sysctl_sched_rt_period;
-extern int sysctl_sched_rt_runtime;
-
-int sched_rt_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
-
 #ifdef CONFIG_SCHED_AUTOGROUP
 #ifdef CONFIG_SCHED_AUTOGROUP
-extern unsigned int sysctl_sched_autogroup_enabled;
-
 extern void sched_autogroup_create_attach(struct task_struct *p);
 extern void sched_autogroup_create_attach(struct task_struct *p);
 extern void sched_autogroup_detach(struct task_struct *p);
 extern void sched_autogroup_detach(struct task_struct *p);
 extern void sched_autogroup_fork(struct signal_struct *sig);
 extern void sched_autogroup_fork(struct signal_struct *sig);
@@ -2100,30 +2025,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 #endif
 #endif
 
 
-#ifdef CONFIG_CFS_BANDWIDTH
-extern unsigned int sysctl_sched_cfs_bandwidth_slice;
-#endif
-
-#ifdef CONFIG_RT_MUTEXES
-extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
-extern void rt_mutex_adjust_pi(struct task_struct *p);
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
-	return tsk->pi_blocked_on != NULL;
-}
-#else
-static inline int rt_mutex_getprio(struct task_struct *p)
-{
-	return p->normal_prio;
-}
-# define rt_mutex_adjust_pi(p)		do { } while (0)
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
-	return false;
-}
-#endif
-
 extern bool yield_to(struct task_struct *p, bool preempt);
 extern bool yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
 extern int task_prio(const struct task_struct *p);
@@ -2753,8 +2654,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
 
-extern void normalize_rt_tasks(void);
-
 #ifdef CONFIG_CGROUP_SCHED
 #ifdef CONFIG_CGROUP_SCHED
 
 
 extern struct task_group root_task_group;
 extern struct task_group root_task_group;

+ 58 - 0
include/linux/sched/rt.h

@@ -0,0 +1,58 @@
+#ifndef _SCHED_RT_H
+#define _SCHED_RT_H
+
+/*
+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
+ * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
+ * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
+ * values are inverted: lower p->prio value means higher priority.
+ *
+ * The MAX_USER_RT_PRIO value allows the actual maximum
+ * RT priority to be separate from the value exported to
+ * user-space.  This allows kernel threads to set their
+ * priority to a value higher than any user task. Note:
+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
+ */
+
+#define MAX_USER_RT_PRIO	100
+#define MAX_RT_PRIO		MAX_USER_RT_PRIO
+
+#define MAX_PRIO		(MAX_RT_PRIO + 40)
+#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
+
+static inline int rt_prio(int prio)
+{
+	if (unlikely(prio < MAX_RT_PRIO))
+		return 1;
+	return 0;
+}
+
+static inline int rt_task(struct task_struct *p)
+{
+	return rt_prio(p->prio);
+}
+
+#ifdef CONFIG_RT_MUTEXES
+extern int rt_mutex_getprio(struct task_struct *p);
+extern void rt_mutex_setprio(struct task_struct *p, int prio);
+extern void rt_mutex_adjust_pi(struct task_struct *p);
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+	return tsk->pi_blocked_on != NULL;
+}
+#else
+static inline int rt_mutex_getprio(struct task_struct *p)
+{
+	return p->normal_prio;
+}
+# define rt_mutex_adjust_pi(p)		do { } while (0)
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+	return false;
+}
+#endif
+
+extern void normalize_rt_tasks(void);
+
+
+#endif /* _SCHED_RT_H */

+ 110 - 0
include/linux/sched/sysctl.h

@@ -0,0 +1,110 @@
+#ifndef _SCHED_SYSCTL_H
+#define _SCHED_SYSCTL_H
+
+#ifdef CONFIG_DETECT_HUNG_TASK
+extern unsigned int  sysctl_hung_task_panic;
+extern unsigned long sysctl_hung_task_check_count;
+extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_warnings;
+extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+					 void __user *buffer,
+					 size_t *lenp, loff_t *ppos);
+#else
+/* Avoid need for ifdefs elsewhere in the code */
+enum { sysctl_hung_task_timeout_secs = 0 };
+#endif
+
+/*
+ * Default maximum number of active map areas, this limits the number of vmas
+ * per mm struct. Users can overwrite this number by sysctl but there is a
+ * problem.
+ *
+ * When a program's coredump is generated as ELF format, a section is created
+ * per a vma. In ELF, the number of sections is represented in unsigned short.
+ * This means the number of sections should be smaller than 65535 at coredump.
+ * Because the kernel adds some informative sections to a image of program at
+ * generating coredump, we need some margin. The number of extra sections is
+ * 1-3 now and depends on arch. We use "5" as safe margin, here.
+ */
+#define MAPCOUNT_ELF_CORE_MARGIN	(5)
+#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+
+extern int sysctl_max_map_count;
+
+extern unsigned int sysctl_sched_latency;
+extern unsigned int sysctl_sched_min_granularity;
+extern unsigned int sysctl_sched_wakeup_granularity;
+extern unsigned int sysctl_sched_child_runs_first;
+
+enum sched_tunable_scaling {
+	SCHED_TUNABLESCALING_NONE,
+	SCHED_TUNABLESCALING_LOG,
+	SCHED_TUNABLESCALING_LINEAR,
+	SCHED_TUNABLESCALING_END,
+};
+extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
+
+extern unsigned int sysctl_numa_balancing_scan_delay;
+extern unsigned int sysctl_numa_balancing_scan_period_min;
+extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_period_reset;
+extern unsigned int sysctl_numa_balancing_scan_size;
+extern unsigned int sysctl_numa_balancing_settle_count;
+
+#ifdef CONFIG_SCHED_DEBUG
+extern unsigned int sysctl_sched_migration_cost;
+extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_time_avg;
+extern unsigned int sysctl_timer_migration;
+extern unsigned int sysctl_sched_shares_window;
+
+int sched_proc_update_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *length,
+		loff_t *ppos);
+#endif
+#ifdef CONFIG_SCHED_DEBUG
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+	return sysctl_timer_migration;
+}
+#else
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+	return 1;
+}
+#endif
+
+/*
+ *  control realtime throttling:
+ *
+ *  /proc/sys/kernel/sched_rt_period_us
+ *  /proc/sys/kernel/sched_rt_runtime_us
+ */
+extern unsigned int sysctl_sched_rt_period;
+extern int sysctl_sched_rt_runtime;
+
+#ifdef CONFIG_CFS_BANDWIDTH
+extern unsigned int sysctl_sched_cfs_bandwidth_slice;
+#endif
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+extern unsigned int sysctl_sched_autogroup_enabled;
+#endif
+
+/*
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
+ * Timeslices get refilled after they expire.
+ */
+#define RR_TIMESLICE		(100 * HZ / 1000)
+
+extern int sched_rr_timeslice;
+
+extern int sched_rr_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos);
+
+extern int sched_rt_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos);
+
+#endif /* _SCHED_SYSCTL_H */

+ 3 - 0
include/linux/tsacct_kern.h

@@ -23,12 +23,15 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns,
 #ifdef CONFIG_TASK_XACCT
 #ifdef CONFIG_TASK_XACCT
 extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
 extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
 extern void acct_update_integrals(struct task_struct *tsk);
 extern void acct_update_integrals(struct task_struct *tsk);
+extern void acct_account_cputime(struct task_struct *tsk);
 extern void acct_clear_integrals(struct task_struct *tsk);
 extern void acct_clear_integrals(struct task_struct *tsk);
 #else
 #else
 static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 {}
 {}
 static inline void acct_update_integrals(struct task_struct *tsk)
 static inline void acct_update_integrals(struct task_struct *tsk)
 {}
 {}
+static inline void acct_account_cputime(struct task_struct *tsk)
+{}
 static inline void acct_clear_integrals(struct task_struct *tsk)
 static inline void acct_clear_integrals(struct task_struct *tsk)
 {}
 {}
 #endif /* CONFIG_TASK_XACCT */
 #endif /* CONFIG_TASK_XACCT */

+ 40 - 19
include/linux/vtime.h

@@ -6,15 +6,46 @@ struct task_struct;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 extern void vtime_task_switch(struct task_struct *prev);
 extern void vtime_task_switch(struct task_struct *prev);
 extern void vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_system(struct task_struct *tsk);
-extern void vtime_account_system_irqsafe(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
-extern void vtime_account(struct task_struct *tsk);
-#else
+extern void vtime_account_irq_enter(struct task_struct *tsk);
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+static inline bool vtime_accounting_enabled(void) { return true; }
+#endif
+
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
-static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
-static inline void vtime_account(struct task_struct *tsk) { }
+static inline void vtime_account_user(struct task_struct *tsk) { }
+static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
+static inline bool vtime_accounting_enabled(void) { return false; }
+#endif
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void arch_vtime_task_switch(struct task_struct *tsk);
+extern void vtime_account_irq_exit(struct task_struct *tsk);
+extern bool vtime_accounting_enabled(void);
+extern void vtime_user_enter(struct task_struct *tsk);
+static inline void vtime_user_exit(struct task_struct *tsk)
+{
+	vtime_account_user(tsk);
+}
+extern void vtime_guest_enter(struct task_struct *tsk);
+extern void vtime_guest_exit(struct task_struct *tsk);
+extern void vtime_init_idle(struct task_struct *tsk);
+#else
+static inline void vtime_account_irq_exit(struct task_struct *tsk)
+{
+	/* On hard|softirq exit we always account to hard|softirq cputime */
+	vtime_account_system(tsk);
+}
+static inline void vtime_user_enter(struct task_struct *tsk) { }
+static inline void vtime_user_exit(struct task_struct *tsk) { }
+static inline void vtime_guest_enter(struct task_struct *tsk) { }
+static inline void vtime_guest_exit(struct task_struct *tsk) { }
+static inline void vtime_init_idle(struct task_struct *tsk) { }
 #endif
 #endif
 
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -23,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk);
 static inline void irqtime_account_irq(struct task_struct *tsk) { }
 static inline void irqtime_account_irq(struct task_struct *tsk) { }
 #endif
 #endif
 
 
-static inline void vtime_account_irq_enter(struct task_struct *tsk)
+static inline void account_irq_enter_time(struct task_struct *tsk)
 {
 {
-	/*
-	 * Hardirq can interrupt idle task anytime. So we need vtime_account()
-	 * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
-	 * Softirq can also interrupt idle task directly if it calls
-	 * local_bh_enable(). Such case probably don't exist but we never know.
-	 * Ksoftirqd is not concerned because idle time is flushed on context
-	 * switch. Softirqs in the end of hardirqs are also not a problem because
-	 * the idle time is flushed on hardirq time already.
-	 */
-	vtime_account(tsk);
+	vtime_account_irq_enter(tsk);
 	irqtime_account_irq(tsk);
 	irqtime_account_irq(tsk);
 }
 }
 
 
-static inline void vtime_account_irq_exit(struct task_struct *tsk)
+static inline void account_irq_exit_time(struct task_struct *tsk)
 {
 {
-	/* On hard|softirq exit we always account to hard|softirq cputime */
-	vtime_account_system(tsk);
+	vtime_account_irq_exit(tsk);
 	irqtime_account_irq(tsk);
 	irqtime_account_irq(tsk);
 }
 }
 
 

+ 23 - 2
init/Kconfig

@@ -322,10 +322,13 @@ source "kernel/time/Kconfig"
 
 
 menu "CPU/Task time and stats accounting"
 menu "CPU/Task time and stats accounting"
 
 
+config VIRT_CPU_ACCOUNTING
+	bool
+
 choice
 choice
 	prompt "Cputime accounting"
 	prompt "Cputime accounting"
 	default TICK_CPU_ACCOUNTING if !PPC64
 	default TICK_CPU_ACCOUNTING if !PPC64
-	default VIRT_CPU_ACCOUNTING if PPC64
+	default VIRT_CPU_ACCOUNTING_NATIVE if PPC64
 
 
 # Kind of a stub config for the pure tick based cputime accounting
 # Kind of a stub config for the pure tick based cputime accounting
 config TICK_CPU_ACCOUNTING
 config TICK_CPU_ACCOUNTING
@@ -338,9 +341,10 @@ config TICK_CPU_ACCOUNTING
 
 
 	  If unsure, say Y.
 	  If unsure, say Y.
 
 
-config VIRT_CPU_ACCOUNTING
+config VIRT_CPU_ACCOUNTING_NATIVE
 	bool "Deterministic task and CPU time accounting"
 	bool "Deterministic task and CPU time accounting"
 	depends on HAVE_VIRT_CPU_ACCOUNTING
 	depends on HAVE_VIRT_CPU_ACCOUNTING
+	select VIRT_CPU_ACCOUNTING
 	help
 	help
 	  Select this option to enable more accurate task and CPU time
 	  Select this option to enable more accurate task and CPU time
 	  accounting.  This is done by reading a CPU counter on each
 	  accounting.  This is done by reading a CPU counter on each
@@ -350,6 +354,23 @@ config VIRT_CPU_ACCOUNTING
 	  this also enables accounting of stolen time on logically-partitioned
 	  this also enables accounting of stolen time on logically-partitioned
 	  systems.
 	  systems.
 
 
+config VIRT_CPU_ACCOUNTING_GEN
+	bool "Full dynticks CPU time accounting"
+	depends on HAVE_CONTEXT_TRACKING && 64BIT
+	select VIRT_CPU_ACCOUNTING
+	select CONTEXT_TRACKING
+	help
+	  Select this option to enable task and CPU time accounting on full
+	  dynticks systems. This accounting is implemented by watching every
+	  kernel-user boundaries using the context tracking subsystem.
+	  The accounting is thus performed at the expense of some significant
+	  overhead.
+
+	  For now this is only useful if you are working on the full
+	  dynticks subsystem development.
+
+	  If unsure, say N.
+
 config IRQ_TIME_ACCOUNTING
 config IRQ_TIME_ACCOUNTING
 	bool "Fine granularity task level IRQ time accounting"
 	bool "Fine granularity task level IRQ time accounting"
 	depends on HAVE_IRQ_TIME_ACCOUNTING
 	depends on HAVE_IRQ_TIME_ACCOUNTING

+ 2 - 0
init/init_task.c

@@ -2,6 +2,8 @@
 #include <linux/export.h>
 #include <linux/export.h>
 #include <linux/mqueue.h>
 #include <linux/mqueue.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/mm.h>

+ 4 - 2
kernel/acct.c

@@ -566,6 +566,7 @@ out:
 void acct_collect(long exitcode, int group_dead)
 void acct_collect(long exitcode, int group_dead)
 {
 {
 	struct pacct_struct *pacct = &current->signal->pacct;
 	struct pacct_struct *pacct = &current->signal->pacct;
+	cputime_t utime, stime;
 	unsigned long vsize = 0;
 	unsigned long vsize = 0;
 
 
 	if (group_dead && current->mm) {
 	if (group_dead && current->mm) {
@@ -593,8 +594,9 @@ void acct_collect(long exitcode, int group_dead)
 		pacct->ac_flag |= ACORE;
 		pacct->ac_flag |= ACORE;
 	if (current->flags & PF_SIGNALED)
 	if (current->flags & PF_SIGNALED)
 		pacct->ac_flag |= AXSIG;
 		pacct->ac_flag |= AXSIG;
-	pacct->ac_utime += current->utime;
-	pacct->ac_stime += current->stime;
+	task_cputime(current, &utime, &stime);
+	pacct->ac_utime += utime;
+	pacct->ac_stime += stime;
 	pacct->ac_minflt += current->min_flt;
 	pacct->ac_minflt += current->min_flt;
 	pacct->ac_majflt += current->maj_flt;
 	pacct->ac_majflt += current->maj_flt;
 	spin_unlock_irq(&current->sighand->siglock);
 	spin_unlock_irq(&current->sighand->siglock);

+ 25 - 18
kernel/context_tracking.c

@@ -15,26 +15,13 @@
  */
  */
 
 
 #include <linux/context_tracking.h>
 #include <linux/context_tracking.h>
+#include <linux/kvm_host.h>
 #include <linux/rcupdate.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
-#include <linux/percpu.h>
 #include <linux/hardirq.h>
 #include <linux/hardirq.h>
+#include <linux/export.h>
 
 
-struct context_tracking {
-	/*
-	 * When active is false, probes are unset in order
-	 * to minimize overhead: TIF flags are cleared
-	 * and calls to user_enter/exit are ignored. This
-	 * may be further optimized using static keys.
-	 */
-	bool active;
-	enum {
-		IN_KERNEL = 0,
-		IN_USER,
-	} state;
-};
-
-static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
+DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
 #ifdef CONFIG_CONTEXT_TRACKING_FORCE
 #ifdef CONFIG_CONTEXT_TRACKING_FORCE
 	.active = true,
 	.active = true,
 #endif
 #endif
@@ -70,7 +57,6 @@ void user_enter(void)
 	local_irq_save(flags);
 	local_irq_save(flags);
 	if (__this_cpu_read(context_tracking.active) &&
 	if (__this_cpu_read(context_tracking.active) &&
 	    __this_cpu_read(context_tracking.state) != IN_USER) {
 	    __this_cpu_read(context_tracking.state) != IN_USER) {
-		__this_cpu_write(context_tracking.state, IN_USER);
 		/*
 		/*
 		 * At this stage, only low level arch entry code remains and
 		 * At this stage, only low level arch entry code remains and
 		 * then we'll run in userspace. We can assume there won't be
 		 * then we'll run in userspace. We can assume there won't be
@@ -78,7 +64,9 @@ void user_enter(void)
 		 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
 		 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
 		 * on the tick.
 		 * on the tick.
 		 */
 		 */
+		vtime_user_enter(current);
 		rcu_user_enter();
 		rcu_user_enter();
+		__this_cpu_write(context_tracking.state, IN_USER);
 	}
 	}
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 }
 }
@@ -104,16 +92,35 @@ void user_exit(void)
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
 	if (__this_cpu_read(context_tracking.state) == IN_USER) {
 	if (__this_cpu_read(context_tracking.state) == IN_USER) {
-		__this_cpu_write(context_tracking.state, IN_KERNEL);
 		/*
 		/*
 		 * We are going to run code that may use RCU. Inform
 		 * We are going to run code that may use RCU. Inform
 		 * RCU core about that (ie: we may need the tick again).
 		 * RCU core about that (ie: we may need the tick again).
 		 */
 		 */
 		rcu_user_exit();
 		rcu_user_exit();
+		vtime_user_exit(current);
+		__this_cpu_write(context_tracking.state, IN_KERNEL);
 	}
 	}
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 }
 }
 
 
+void guest_enter(void)
+{
+	if (vtime_accounting_enabled())
+		vtime_guest_enter(current);
+	else
+		__guest_enter();
+}
+EXPORT_SYMBOL_GPL(guest_enter);
+
+void guest_exit(void)
+{
+	if (vtime_accounting_enabled())
+		vtime_guest_exit(current);
+	else
+		__guest_exit();
+}
+EXPORT_SYMBOL_GPL(guest_exit);
+
 
 
 /**
 /**
  * context_tracking_task_switch - context switch the syscall callbacks
  * context_tracking_task_switch - context switch the syscall callbacks

+ 3 - 1
kernel/cpu.c

@@ -224,11 +224,13 @@ void clear_tasks_mm_cpumask(int cpu)
 static inline void check_for_tasks(int cpu)
 static inline void check_for_tasks(int cpu)
 {
 {
 	struct task_struct *p;
 	struct task_struct *p;
+	cputime_t utime, stime;
 
 
 	write_lock_irq(&tasklist_lock);
 	write_lock_irq(&tasklist_lock);
 	for_each_process(p) {
 	for_each_process(p) {
+		task_cputime(p, &utime, &stime);
 		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
 		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
-		    (p->utime || p->stime))
+		    (utime || stime))
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
 				"(state = %ld, flags = %x)\n",
 				"(state = %ld, flags = %x)\n",
 				p->comm, task_pid_nr(p), cpu,
 				p->comm, task_pid_nr(p), cpu,

+ 5 - 2
kernel/delayacct.c

@@ -106,6 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	unsigned long long t2, t3;
 	unsigned long long t2, t3;
 	unsigned long flags;
 	unsigned long flags;
 	struct timespec ts;
 	struct timespec ts;
+	cputime_t utime, stime, stimescaled, utimescaled;
 
 
 	/* Though tsk->delays accessed later, early exit avoids
 	/* Though tsk->delays accessed later, early exit avoids
 	 * unnecessary returning of other data
 	 * unnecessary returning of other data
@@ -114,12 +115,14 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 		goto done;
 		goto done;
 
 
 	tmp = (s64)d->cpu_run_real_total;
 	tmp = (s64)d->cpu_run_real_total;
-	cputime_to_timespec(tsk->utime + tsk->stime, &ts);
+	task_cputime(tsk, &utime, &stime);
+	cputime_to_timespec(utime + stime, &ts);
 	tmp += timespec_to_ns(&ts);
 	tmp += timespec_to_ns(&ts);
 	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
 	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
 
 
 	tmp = (s64)d->cpu_scaled_run_real_total;
 	tmp = (s64)d->cpu_scaled_run_real_total;
-	cputime_to_timespec(tsk->utimescaled + tsk->stimescaled, &ts);
+	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
+	cputime_to_timespec(utimescaled + stimescaled, &ts);
 	tmp += timespec_to_ns(&ts);
 	tmp += timespec_to_ns(&ts);
 	d->cpu_scaled_run_real_total =
 	d->cpu_scaled_run_real_total =
 		(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
 		(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;

+ 6 - 4
kernel/exit.c

@@ -85,6 +85,7 @@ static void __exit_signal(struct task_struct *tsk)
 	bool group_dead = thread_group_leader(tsk);
 	bool group_dead = thread_group_leader(tsk);
 	struct sighand_struct *sighand;
 	struct sighand_struct *sighand;
 	struct tty_struct *uninitialized_var(tty);
 	struct tty_struct *uninitialized_var(tty);
+	cputime_t utime, stime;
 
 
 	sighand = rcu_dereference_check(tsk->sighand,
 	sighand = rcu_dereference_check(tsk->sighand,
 					lockdep_tasklist_lock_is_held());
 					lockdep_tasklist_lock_is_held());
@@ -123,9 +124,10 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 * will have been the last reference on the signal_struct.
 		 */
 		 */
-		sig->utime += tsk->utime;
-		sig->stime += tsk->stime;
-		sig->gtime += tsk->gtime;
+		task_cputime(tsk, &utime, &stime);
+		sig->utime += utime;
+		sig->stime += stime;
+		sig->gtime += task_gtime(tsk);
 		sig->min_flt += tsk->min_flt;
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->nvcsw += tsk->nvcsw;
 		sig->nvcsw += tsk->nvcsw;
@@ -1092,7 +1094,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		sig = p->signal;
 		sig = p->signal;
 		psig->cutime += tgutime + sig->cutime;
 		psig->cutime += tgutime + sig->cutime;
 		psig->cstime += tgstime + sig->cstime;
 		psig->cstime += tgstime + sig->cstime;
-		psig->cgtime += p->gtime + sig->gtime + sig->cgtime;
+		psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
 		psig->cmin_flt +=
 		psig->cmin_flt +=
 			p->min_flt + sig->min_flt + sig->cmin_flt;
 			p->min_flt + sig->min_flt + sig->cmin_flt;
 		psig->cmaj_flt +=
 		psig->cmaj_flt +=

+ 6 - 0
kernel/fork.c

@@ -1233,6 +1233,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	p->prev_cputime.utime = p->prev_cputime.stime = 0;
 	p->prev_cputime.utime = p->prev_cputime.stime = 0;
 #endif
 #endif
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+	seqlock_init(&p->vtime_seqlock);
+	p->vtime_snap = 0;
+	p->vtime_snap_whence = VTIME_SLEEPING;
+#endif
+
 #if defined(SPLIT_RSS_COUNTING)
 #if defined(SPLIT_RSS_COUNTING)
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
 #endif
 #endif

+ 1 - 0
kernel/futex.c

@@ -60,6 +60,7 @@
 #include <linux/pid.h>
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/nsproxy.h>
 #include <linux/ptrace.h>
 #include <linux/ptrace.h>
+#include <linux/sched/rt.h>
 
 
 #include <asm/futex.h>
 #include <asm/futex.h>
 
 

+ 2 - 0
kernel/hrtimer.c

@@ -44,6 +44,8 @@
 #include <linux/err.h>
 #include <linux/err.h>
 #include <linux/debugobjects.h>
 #include <linux/debugobjects.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
 #include <linux/timer.h>
 #include <linux/timer.h>
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>

+ 1 - 0
kernel/irq/manage.c

@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/task_work.h>
 #include <linux/task_work.h>
 
 
 #include "internals.h"
 #include "internals.h"

+ 1 - 0
kernel/mutex.c

@@ -19,6 +19,7 @@
  */
  */
 #include <linux/mutex.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/export.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>

+ 22 - 6
kernel/posix-cpu-timers.c

@@ -155,11 +155,19 @@ static void bump_cpu_timer(struct k_itimer *timer,
 
 
 static inline cputime_t prof_ticks(struct task_struct *p)
 static inline cputime_t prof_ticks(struct task_struct *p)
 {
 {
-	return p->utime + p->stime;
+	cputime_t utime, stime;
+
+	task_cputime(p, &utime, &stime);
+
+	return utime + stime;
 }
 }
 static inline cputime_t virt_ticks(struct task_struct *p)
 static inline cputime_t virt_ticks(struct task_struct *p)
 {
 {
-	return p->utime;
+	cputime_t utime;
+
+	task_cputime(p, &utime, NULL);
+
+	return utime;
 }
 }
 
 
 static int
 static int
@@ -471,18 +479,23 @@ static void cleanup_timers(struct list_head *head,
  */
  */
 void posix_cpu_timers_exit(struct task_struct *tsk)
 void posix_cpu_timers_exit(struct task_struct *tsk)
 {
 {
+	cputime_t utime, stime;
+
 	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
 	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
 						sizeof(unsigned long long));
 						sizeof(unsigned long long));
+	task_cputime(tsk, &utime, &stime);
 	cleanup_timers(tsk->cpu_timers,
 	cleanup_timers(tsk->cpu_timers,
-		       tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
+		       utime, stime, tsk->se.sum_exec_runtime);
 
 
 }
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
 {
 	struct signal_struct *const sig = tsk->signal;
 	struct signal_struct *const sig = tsk->signal;
+	cputime_t utime, stime;
 
 
+	task_cputime(tsk, &utime, &stime);
 	cleanup_timers(tsk->signal->cpu_timers,
 	cleanup_timers(tsk->signal->cpu_timers,
-		       tsk->utime + sig->utime, tsk->stime + sig->stime,
+		       utime + sig->utime, stime + sig->stime,
 		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
 		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
 }
 }
 
 
@@ -1226,11 +1239,14 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
 static inline int fastpath_timer_check(struct task_struct *tsk)
 static inline int fastpath_timer_check(struct task_struct *tsk)
 {
 {
 	struct signal_struct *sig;
 	struct signal_struct *sig;
+	cputime_t utime, stime;
+
+	task_cputime(tsk, &utime, &stime);
 
 
 	if (!task_cputime_zero(&tsk->cputime_expires)) {
 	if (!task_cputime_zero(&tsk->cputime_expires)) {
 		struct task_cputime task_sample = {
 		struct task_cputime task_sample = {
-			.utime = tsk->utime,
-			.stime = tsk->stime,
+			.utime = utime,
+			.stime = stime,
 			.sum_exec_runtime = tsk->se.sum_exec_runtime
 			.sum_exec_runtime = tsk->se.sum_exec_runtime
 		};
 		};
 
 

+ 1 - 0
kernel/rtmutex-debug.c

@@ -17,6 +17,7 @@
  * See rt.c in preempt-rt for proper credits and further information
  * See rt.c in preempt-rt for proper credits and further information
  */
  */
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>

+ 1 - 0
kernel/rtmutex-tester.c

@@ -10,6 +10,7 @@
 #include <linux/kthread.h>
 #include <linux/kthread.h>
 #include <linux/export.h>
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/timer.h>
 #include <linux/freezer.h>
 #include <linux/freezer.h>

+ 1 - 0
kernel/rtmutex.c

@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
 #include <linux/export.h>
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/timer.h>
 #include <linux/timer.h>
 
 
 #include "rtmutex_common.h"
 #include "rtmutex_common.h"

+ 21 - 1
kernel/sched/core.c

@@ -4371,7 +4371,7 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
 	struct task_struct *curr = current;
 	struct task_struct *curr = current;
 	struct rq *rq, *p_rq;
 	struct rq *rq, *p_rq;
 	unsigned long flags;
 	unsigned long flags;
-	bool yielded = 0;
+	int yielded = 0;
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
 	rq = this_rq();
 	rq = this_rq();
@@ -4667,6 +4667,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	 */
 	 */
 	idle->sched_class = &idle_sched_class;
 	idle->sched_class = &idle_sched_class;
 	ftrace_graph_init_idle_task(idle, cpu);
 	ftrace_graph_init_idle_task(idle, cpu);
+	vtime_init_idle(idle);
 #if defined(CONFIG_SMP)
 #if defined(CONFIG_SMP)
 	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
 	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
 #endif
 #endif
@@ -7508,6 +7509,25 @@ static int sched_rt_global_constraints(void)
 }
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 
+int sched_rr_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int ret;
+	static DEFINE_MUTEX(mutex);
+
+	mutex_lock(&mutex);
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	/* make sure that internally we keep jiffies */
+	/* also, writing zero resets timeslice to default */
+	if (!ret && write) {
+		sched_rr_timeslice = sched_rr_timeslice <= 0 ?
+			RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
+	}
+	mutex_unlock(&mutex);
+	return ret;
+}
+
 int sched_rt_handler(struct ctl_table *table, int write,
 int sched_rt_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 		loff_t *ppos)

+ 2 - 0
kernel/sched/cpupri.c

@@ -28,6 +28,8 @@
  */
  */
 
 
 #include <linux/gfp.h>
 #include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include "cpupri.h"
 #include "cpupri.h"
 
 
 /* Convert between a 140 based task->prio, and our 102 based cpupri */
 /* Convert between a 140 based task->prio, and our 102 based cpupri */

+ 271 - 43
kernel/sched/cputime.c

@@ -3,6 +3,7 @@
 #include <linux/tsacct_kern.h>
 #include <linux/tsacct_kern.h>
 #include <linux/kernel_stat.h>
 #include <linux/kernel_stat.h>
 #include <linux/static_key.h>
 #include <linux/static_key.h>
+#include <linux/context_tracking.h>
 #include "sched.h"
 #include "sched.h"
 
 
 
 
@@ -163,7 +164,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
 	task_group_account_field(p, index, (__force u64) cputime);
 	task_group_account_field(p, index, (__force u64) cputime);
 
 
 	/* Account for user time used */
 	/* Account for user time used */
-	acct_update_integrals(p);
+	acct_account_cputime(p);
 }
 }
 
 
 /*
 /*
@@ -213,7 +214,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
 	task_group_account_field(p, index, (__force u64) cputime);
 	task_group_account_field(p, index, (__force u64) cputime);
 
 
 	/* Account for system time used */
 	/* Account for system time used */
-	acct_update_integrals(p);
+	acct_account_cputime(p);
 }
 }
 
 
 /*
 /*
@@ -295,6 +296,7 @@ static __always_inline bool steal_account_process_tick(void)
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 {
 {
 	struct signal_struct *sig = tsk->signal;
 	struct signal_struct *sig = tsk->signal;
+	cputime_t utime, stime;
 	struct task_struct *t;
 	struct task_struct *t;
 
 
 	times->utime = sig->utime;
 	times->utime = sig->utime;
@@ -308,16 +310,15 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 
 
 	t = tsk;
 	t = tsk;
 	do {
 	do {
-		times->utime += t->utime;
-		times->stime += t->stime;
+		task_cputime(tsk, &utime, &stime);
+		times->utime += utime;
+		times->stime += stime;
 		times->sum_exec_runtime += task_sched_runtime(t);
 		times->sum_exec_runtime += task_sched_runtime(t);
 	} while_each_thread(tsk, t);
 	} while_each_thread(tsk, t);
 out:
 out:
 	rcu_read_unlock();
 	rcu_read_unlock();
 }
 }
 
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 /*
 /*
  * Account a tick to a process and cpustat
  * Account a tick to a process and cpustat
@@ -382,11 +383,12 @@ static void irqtime_account_idle_ticks(int ticks)
 		irqtime_account_process_tick(current, 0, rq);
 		irqtime_account_process_tick(current, 0, rq);
 }
 }
 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
-static void irqtime_account_idle_ticks(int ticks) {}
-static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
+static inline void irqtime_account_idle_ticks(int ticks) {}
+static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 						struct rq *rq) {}
 						struct rq *rq) {}
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 
 
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
 /*
  * Account a single tick of cpu time.
  * Account a single tick of cpu time.
  * @p: the process that the cpu time gets accounted to
  * @p: the process that the cpu time gets accounted to
@@ -397,6 +399,9 @@ void account_process_tick(struct task_struct *p, int user_tick)
 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	struct rq *rq = this_rq();
 	struct rq *rq = this_rq();
 
 
+	if (vtime_accounting_enabled())
+		return;
+
 	if (sched_clock_irqtime) {
 	if (sched_clock_irqtime) {
 		irqtime_account_process_tick(p, user_tick, rq);
 		irqtime_account_process_tick(p, user_tick, rq);
 		return;
 		return;
@@ -438,8 +443,7 @@ void account_idle_ticks(unsigned long ticks)
 
 
 	account_idle_time(jiffies_to_cputime(ticks));
 	account_idle_time(jiffies_to_cputime(ticks));
 }
 }
-
-#endif
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 
 /*
 /*
  * Use precise platform statistics if available:
  * Use precise platform statistics if available:
@@ -461,25 +465,20 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 	*st = cputime.stime;
 	*st = cputime.stime;
 }
 }
 
 
-void vtime_account_system_irqsafe(struct task_struct *tsk)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	vtime_account_system(tsk);
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
-
 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH
 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH
 void vtime_task_switch(struct task_struct *prev)
 void vtime_task_switch(struct task_struct *prev)
 {
 {
+	if (!vtime_accounting_enabled())
+		return;
+
 	if (is_idle_task(prev))
 	if (is_idle_task(prev))
 		vtime_account_idle(prev);
 		vtime_account_idle(prev);
 	else
 	else
 		vtime_account_system(prev);
 		vtime_account_system(prev);
 
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	vtime_account_user(prev);
 	vtime_account_user(prev);
+#endif
 	arch_vtime_task_switch(prev);
 	arch_vtime_task_switch(prev);
 }
 }
 #endif
 #endif
@@ -493,27 +492,40 @@ void vtime_task_switch(struct task_struct *prev)
  * vtime_account().
  * vtime_account().
  */
  */
 #ifndef __ARCH_HAS_VTIME_ACCOUNT
 #ifndef __ARCH_HAS_VTIME_ACCOUNT
-void vtime_account(struct task_struct *tsk)
+void vtime_account_irq_enter(struct task_struct *tsk)
 {
 {
-	if (in_interrupt() || !is_idle_task(tsk))
-		vtime_account_system(tsk);
-	else
-		vtime_account_idle(tsk);
+	if (!vtime_accounting_enabled())
+		return;
+
+	if (!in_interrupt()) {
+		/*
+		 * If we interrupted user, context_tracking_in_user()
+		 * is 1 because the context tracking don't hook
+		 * on irq entry/exit. This way we know if
+		 * we need to flush user time on kernel entry.
+		 */
+		if (context_tracking_in_user()) {
+			vtime_account_user(tsk);
+			return;
+		}
+
+		if (is_idle_task(tsk)) {
+			vtime_account_idle(tsk);
+			return;
+		}
+	}
+	vtime_account_system(tsk);
 }
 }
-EXPORT_SYMBOL_GPL(vtime_account);
+EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
 
 
-#else
-
-#ifndef nsecs_to_cputime
-# define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
-#endif
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 
-static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
+static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total)
 {
 {
 	u64 temp = (__force u64) rtime;
 	u64 temp = (__force u64) rtime;
 
 
-	temp *= (__force u64) utime;
+	temp *= (__force u64) stime;
 
 
 	if (sizeof(cputime_t) == 4)
 	if (sizeof(cputime_t) == 4)
 		temp = div_u64(temp, (__force u32) total);
 		temp = div_u64(temp, (__force u32) total);
@@ -531,10 +543,10 @@ static void cputime_adjust(struct task_cputime *curr,
 			   struct cputime *prev,
 			   struct cputime *prev,
 			   cputime_t *ut, cputime_t *st)
 			   cputime_t *ut, cputime_t *st)
 {
 {
-	cputime_t rtime, utime, total;
+	cputime_t rtime, stime, total;
 
 
-	utime = curr->utime;
-	total = utime + curr->stime;
+	stime = curr->stime;
+	total = stime + curr->utime;
 
 
 	/*
 	/*
 	 * Tick based cputime accounting depend on random scheduling
 	 * Tick based cputime accounting depend on random scheduling
@@ -549,17 +561,17 @@ static void cputime_adjust(struct task_cputime *curr,
 	rtime = nsecs_to_cputime(curr->sum_exec_runtime);
 	rtime = nsecs_to_cputime(curr->sum_exec_runtime);
 
 
 	if (total)
 	if (total)
-		utime = scale_utime(utime, rtime, total);
+		stime = scale_stime(stime, rtime, total);
 	else
 	else
-		utime = rtime;
+		stime = rtime;
 
 
 	/*
 	/*
 	 * If the tick based count grows faster than the scheduler one,
 	 * If the tick based count grows faster than the scheduler one,
 	 * the result of the scaling may go backward.
 	 * the result of the scaling may go backward.
 	 * Let's enforce monotonicity.
 	 * Let's enforce monotonicity.
 	 */
 	 */
-	prev->utime = max(prev->utime, utime);
-	prev->stime = max(prev->stime, rtime - prev->utime);
+	prev->stime = max(prev->stime, stime);
+	prev->utime = max(prev->utime, rtime - prev->stime);
 
 
 	*ut = prev->utime;
 	*ut = prev->utime;
 	*st = prev->stime;
 	*st = prev->stime;
@@ -568,11 +580,10 @@ static void cputime_adjust(struct task_cputime *curr,
 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 {
 	struct task_cputime cputime = {
 	struct task_cputime cputime = {
-		.utime = p->utime,
-		.stime = p->stime,
 		.sum_exec_runtime = p->se.sum_exec_runtime,
 		.sum_exec_runtime = p->se.sum_exec_runtime,
 	};
 	};
 
 
+	task_cputime(p, &cputime.utime, &cputime.stime);
 	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 }
 }
 
 
@@ -586,4 +597,221 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 	thread_group_cputime(p, &cputime);
 	thread_group_cputime(p, &cputime);
 	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 }
 }
-#endif
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+static unsigned long long vtime_delta(struct task_struct *tsk)
+{
+	unsigned long long clock;
+
+	clock = sched_clock();
+	if (clock < tsk->vtime_snap)
+		return 0;
+
+	return clock - tsk->vtime_snap;
+}
+
+static cputime_t get_vtime_delta(struct task_struct *tsk)
+{
+	unsigned long long delta = vtime_delta(tsk);
+
+	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
+	tsk->vtime_snap += delta;
+
+	/* CHECKME: always safe to convert nsecs to cputime? */
+	return nsecs_to_cputime(delta);
+}
+
+static void __vtime_account_system(struct task_struct *tsk)
+{
+	cputime_t delta_cpu = get_vtime_delta(tsk);
+
+	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
+}
+
+void vtime_account_system(struct task_struct *tsk)
+{
+	if (!vtime_accounting_enabled())
+		return;
+
+	write_seqlock(&tsk->vtime_seqlock);
+	__vtime_account_system(tsk);
+	write_sequnlock(&tsk->vtime_seqlock);
+}
+
+void vtime_account_irq_exit(struct task_struct *tsk)
+{
+	if (!vtime_accounting_enabled())
+		return;
+
+	write_seqlock(&tsk->vtime_seqlock);
+	if (context_tracking_in_user())
+		tsk->vtime_snap_whence = VTIME_USER;
+	__vtime_account_system(tsk);
+	write_sequnlock(&tsk->vtime_seqlock);
+}
+
+void vtime_account_user(struct task_struct *tsk)
+{
+	cputime_t delta_cpu;
+
+	if (!vtime_accounting_enabled())
+		return;
+
+	delta_cpu = get_vtime_delta(tsk);
+
+	write_seqlock(&tsk->vtime_seqlock);
+	tsk->vtime_snap_whence = VTIME_SYS;
+	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+	write_sequnlock(&tsk->vtime_seqlock);
+}
+
+void vtime_user_enter(struct task_struct *tsk)
+{
+	if (!vtime_accounting_enabled())
+		return;
+
+	write_seqlock(&tsk->vtime_seqlock);
+	tsk->vtime_snap_whence = VTIME_USER;
+	__vtime_account_system(tsk);
+	write_sequnlock(&tsk->vtime_seqlock);
+}
+
+void vtime_guest_enter(struct task_struct *tsk)
+{
+	write_seqlock(&tsk->vtime_seqlock);
+	__vtime_account_system(tsk);
+	current->flags |= PF_VCPU;
+	write_sequnlock(&tsk->vtime_seqlock);
+}
+
+void vtime_guest_exit(struct task_struct *tsk)
+{
+	write_seqlock(&tsk->vtime_seqlock);
+	__vtime_account_system(tsk);
+	current->flags &= ~PF_VCPU;
+	write_sequnlock(&tsk->vtime_seqlock);
+}
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+	cputime_t delta_cpu = get_vtime_delta(tsk);
+
+	account_idle_time(delta_cpu);
+}
+
+bool vtime_accounting_enabled(void)
+{
+	return context_tracking_active();
+}
+
+void arch_vtime_task_switch(struct task_struct *prev)
+{
+	write_seqlock(&prev->vtime_seqlock);
+	prev->vtime_snap_whence = VTIME_SLEEPING;
+	write_sequnlock(&prev->vtime_seqlock);
+
+	write_seqlock(&current->vtime_seqlock);
+	current->vtime_snap_whence = VTIME_SYS;
+	current->vtime_snap = sched_clock();
+	write_sequnlock(&current->vtime_seqlock);
+}
+
+void vtime_init_idle(struct task_struct *t)
+{
+	unsigned long flags;
+
+	write_seqlock_irqsave(&t->vtime_seqlock, flags);
+	t->vtime_snap_whence = VTIME_SYS;
+	t->vtime_snap = sched_clock();
+	write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
+}
+
+cputime_t task_gtime(struct task_struct *t)
+{
+	unsigned int seq;
+	cputime_t gtime;
+
+	do {
+		seq = read_seqbegin(&t->vtime_seqlock);
+
+		gtime = t->gtime;
+		if (t->flags & PF_VCPU)
+			gtime += vtime_delta(t);
+
+	} while (read_seqretry(&t->vtime_seqlock, seq));
+
+	return gtime;
+}
+
+/*
+ * Fetch cputime raw values from fields of task_struct and
+ * add up the pending nohz execution time since the last
+ * cputime snapshot.
+ */
+static void
+fetch_task_cputime(struct task_struct *t,
+		   cputime_t *u_dst, cputime_t *s_dst,
+		   cputime_t *u_src, cputime_t *s_src,
+		   cputime_t *udelta, cputime_t *sdelta)
+{
+	unsigned int seq;
+	unsigned long long delta;
+
+	do {
+		*udelta = 0;
+		*sdelta = 0;
+
+		seq = read_seqbegin(&t->vtime_seqlock);
+
+		if (u_dst)
+			*u_dst = *u_src;
+		if (s_dst)
+			*s_dst = *s_src;
+
+		/* Task is sleeping, nothing to add */
+		if (t->vtime_snap_whence == VTIME_SLEEPING ||
+		    is_idle_task(t))
+			continue;
+
+		delta = vtime_delta(t);
+
+		/*
+		 * Task runs either in user or kernel space, add pending nohz time to
+		 * the right place.
+		 */
+		if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
+			*udelta = delta;
+		} else {
+			if (t->vtime_snap_whence == VTIME_SYS)
+				*sdelta = delta;
+		}
+	} while (read_seqretry(&t->vtime_seqlock, seq));
+}
+
+
+void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
+{
+	cputime_t udelta, sdelta;
+
+	fetch_task_cputime(t, utime, stime, &t->utime,
+			   &t->stime, &udelta, &sdelta);
+	if (utime)
+		*utime += udelta;
+	if (stime)
+		*stime += sdelta;
+}
+
+void task_cputime_scaled(struct task_struct *t,
+			 cputime_t *utimescaled, cputime_t *stimescaled)
+{
+	cputime_t udelta, sdelta;
+
+	fetch_task_cputime(t, utimescaled, stimescaled,
+			   &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
+	if (utimescaled)
+		*utimescaled += cputime_to_scaled(udelta);
+	if (stimescaled)
+		*stimescaled += cputime_to_scaled(sdelta);
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */

+ 9 - 18
kernel/sched/fair.c

@@ -1680,9 +1680,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 	}
 	}
 
 
 	/* ensure we never gain time by being placed backwards. */
 	/* ensure we never gain time by being placed backwards. */
-	vruntime = max_vruntime(se->vruntime, vruntime);
-
-	se->vruntime = vruntime;
+	se->vruntime = max_vruntime(se->vruntime, vruntime);
 }
 }
 
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
@@ -3254,25 +3252,18 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
  */
  */
 static int select_idle_sibling(struct task_struct *p, int target)
 static int select_idle_sibling(struct task_struct *p, int target)
 {
 {
-	int cpu = smp_processor_id();
-	int prev_cpu = task_cpu(p);
 	struct sched_domain *sd;
 	struct sched_domain *sd;
 	struct sched_group *sg;
 	struct sched_group *sg;
-	int i;
+	int i = task_cpu(p);
 
 
-	/*
-	 * If the task is going to be woken-up on this cpu and if it is
-	 * already idle, then it is the right target.
-	 */
-	if (target == cpu && idle_cpu(cpu))
-		return cpu;
+	if (idle_cpu(target))
+		return target;
 
 
 	/*
 	/*
-	 * If the task is going to be woken-up on the cpu where it previously
-	 * ran and if it is currently idle, then it the right target.
+	 * If the prevous cpu is cache affine and idle, don't be stupid.
 	 */
 	 */
-	if (target == prev_cpu && idle_cpu(prev_cpu))
-		return prev_cpu;
+	if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
+		return i;
 
 
 	/*
 	/*
 	 * Otherwise, iterate the domains and find an elegible idle cpu.
 	 * Otherwise, iterate the domains and find an elegible idle cpu.
@@ -3286,7 +3277,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
 				goto next;
 				goto next;
 
 
 			for_each_cpu(i, sched_group_cpus(sg)) {
 			for_each_cpu(i, sched_group_cpus(sg)) {
-				if (!idle_cpu(i))
+				if (i == target || !idle_cpu(i))
 					goto next;
 					goto next;
 			}
 			}
 
 
@@ -6101,7 +6092,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
 	 * idle runqueue:
 	 * idle runqueue:
 	 */
 	 */
 	if (rq->cfs.load.weight)
 	if (rq->cfs.load.weight)
-		rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
+		rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se));
 
 
 	return rr_interval;
 	return rr_interval;
 }
 }

+ 17 - 9
kernel/sched/rt.c

@@ -7,6 +7,8 @@
 
 
 #include <linux/slab.h>
 #include <linux/slab.h>
 
 
+int sched_rr_timeslice = RR_TIMESLICE;
+
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
 
 
 struct rt_bandwidth def_rt_bandwidth;
 struct rt_bandwidth def_rt_bandwidth;
@@ -925,8 +927,8 @@ static void update_curr_rt(struct rq *rq)
 		return;
 		return;
 
 
 	delta_exec = rq->clock_task - curr->se.exec_start;
 	delta_exec = rq->clock_task - curr->se.exec_start;
-	if (unlikely((s64)delta_exec < 0))
-		delta_exec = 0;
+	if (unlikely((s64)delta_exec <= 0))
+		return;
 
 
 	schedstat_set(curr->se.statistics.exec_max,
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
 		      max(curr->se.statistics.exec_max, delta_exec));
@@ -1427,8 +1429,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 {
 	if (!task_running(rq, p) &&
 	if (!task_running(rq, p) &&
-	    (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
-	    (p->nr_cpus_allowed > 1))
+	    cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
 		return 1;
 		return 1;
 	return 0;
 	return 0;
 }
 }
@@ -1889,8 +1890,11 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 	 * we may need to handle the pulling of RT tasks
 	 * we may need to handle the pulling of RT tasks
 	 * now.
 	 * now.
 	 */
 	 */
-	if (p->on_rq && !rq->rt.rt_nr_running)
-		pull_rt_task(rq);
+	if (!p->on_rq || rq->rt.rt_nr_running)
+		return;
+
+	if (pull_rt_task(rq))
+		resched_task(rq->curr);
 }
 }
 
 
 void init_sched_rt_class(void)
 void init_sched_rt_class(void)
@@ -1985,7 +1989,11 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 	if (soft != RLIM_INFINITY) {
 	if (soft != RLIM_INFINITY) {
 		unsigned long next;
 		unsigned long next;
 
 
-		p->rt.timeout++;
+		if (p->rt.watchdog_stamp != jiffies) {
+			p->rt.timeout++;
+			p->rt.watchdog_stamp = jiffies;
+		}
+
 		next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
 		next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
 		if (p->rt.timeout > next)
 		if (p->rt.timeout > next)
 			p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
 			p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
@@ -2010,7 +2018,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 	if (--p->rt.time_slice)
 	if (--p->rt.time_slice)
 		return;
 		return;
 
 
-	p->rt.time_slice = RR_TIMESLICE;
+	p->rt.time_slice = sched_rr_timeslice;
 
 
 	/*
 	/*
 	 * Requeue to the end of queue if we (and all of our ancestors) are the
 	 * Requeue to the end of queue if we (and all of our ancestors) are the
@@ -2041,7 +2049,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 	 * Time slice is 0 for SCHED_FIFO tasks
 	 * Time slice is 0 for SCHED_FIFO tasks
 	 */
 	 */
 	if (task->policy == SCHED_RR)
 	if (task->policy == SCHED_RR)
-		return RR_TIMESLICE;
+		return sched_rr_timeslice;
 	else
 	else
 		return 0;
 		return 0;
 }
 }

+ 2 - 0
kernel/sched/sched.h

@@ -1,5 +1,7 @@
 
 
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
 #include <linux/mutex.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
 #include <linux/stop_machine.h>

+ 8 - 4
kernel/signal.c

@@ -1632,6 +1632,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 	unsigned long flags;
 	unsigned long flags;
 	struct sighand_struct *psig;
 	struct sighand_struct *psig;
 	bool autoreap = false;
 	bool autoreap = false;
+	cputime_t utime, stime;
 
 
 	BUG_ON(sig == -1);
 	BUG_ON(sig == -1);
 
 
@@ -1669,8 +1670,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 				       task_uid(tsk));
 				       task_uid(tsk));
 	rcu_read_unlock();
 	rcu_read_unlock();
 
 
-	info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime);
-	info.si_stime = cputime_to_clock_t(tsk->stime + tsk->signal->stime);
+	task_cputime(tsk, &utime, &stime);
+	info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime);
+	info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime);
 
 
 	info.si_status = tsk->exit_code & 0x7f;
 	info.si_status = tsk->exit_code & 0x7f;
 	if (tsk->exit_code & 0x80)
 	if (tsk->exit_code & 0x80)
@@ -1734,6 +1736,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	unsigned long flags;
 	unsigned long flags;
 	struct task_struct *parent;
 	struct task_struct *parent;
 	struct sighand_struct *sighand;
 	struct sighand_struct *sighand;
+	cputime_t utime, stime;
 
 
 	if (for_ptracer) {
 	if (for_ptracer) {
 		parent = tsk->parent;
 		parent = tsk->parent;
@@ -1752,8 +1755,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
 	info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
 	rcu_read_unlock();
 	rcu_read_unlock();
 
 
-	info.si_utime = cputime_to_clock_t(tsk->utime);
-	info.si_stime = cputime_to_clock_t(tsk->stime);
+	task_cputime(tsk, &utime, &stime);
+	info.si_utime = cputime_to_clock_t(utime);
+	info.si_stime = cputime_to_clock_t(stime);
 
 
  	info.si_code = why;
  	info.si_code = why;
  	switch (why) {
  	switch (why) {

+ 3 - 3
kernel/softirq.c

@@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void)
 	current->flags &= ~PF_MEMALLOC;
 	current->flags &= ~PF_MEMALLOC;
 
 
 	pending = local_softirq_pending();
 	pending = local_softirq_pending();
-	vtime_account_irq_enter(current);
+	account_irq_enter_time(current);
 
 
 	__local_bh_disable((unsigned long)__builtin_return_address(0),
 	__local_bh_disable((unsigned long)__builtin_return_address(0),
 				SOFTIRQ_OFFSET);
 				SOFTIRQ_OFFSET);
@@ -272,7 +272,7 @@ restart:
 
 
 	lockdep_softirq_exit();
 	lockdep_softirq_exit();
 
 
-	vtime_account_irq_exit(current);
+	account_irq_exit_time(current);
 	__local_bh_enable(SOFTIRQ_OFFSET);
 	__local_bh_enable(SOFTIRQ_OFFSET);
 	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
 	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
 }
 }
@@ -341,7 +341,7 @@ static inline void invoke_softirq(void)
  */
  */
 void irq_exit(void)
 void irq_exit(void)
 {
 {
-	vtime_account_irq_exit(current);
+	account_irq_exit_time(current);
 	trace_hardirq_exit();
 	trace_hardirq_exit();
 	sub_preempt_count(IRQ_EXIT_OFFSET);
 	sub_preempt_count(IRQ_EXIT_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
 	if (!in_interrupt() && local_softirq_pending())

+ 8 - 0
kernel/sysctl.c

@@ -61,6 +61,7 @@
 #include <linux/kmod.h>
 #include <linux/kmod.h>
 #include <linux/capability.h>
 #include <linux/capability.h>
 #include <linux/binfmts.h>
 #include <linux/binfmts.h>
+#include <linux/sched/sysctl.h>
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
@@ -403,6 +404,13 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.mode		= 0644,
 		.proc_handler	= sched_rt_handler,
 		.proc_handler	= sched_rt_handler,
 	},
 	},
+	{
+		.procname	= "sched_rr_timeslice_ms",
+		.data		= &sched_rr_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= sched_rr_handler,
+	},
 #ifdef CONFIG_SCHED_AUTOGROUP
 #ifdef CONFIG_SCHED_AUTOGROUP
 	{
 	{
 		.procname	= "sched_autogroup_enabled",
 		.procname	= "sched_autogroup_enabled",

+ 4 - 1
kernel/time/tick-sched.c

@@ -632,8 +632,11 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 
 
 static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 {
 {
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	unsigned long ticks;
 	unsigned long ticks;
+
+	if (vtime_accounting_enabled())
+		return;
 	/*
 	/*
 	 * We stopped the tick in idle. Update process times would miss the
 	 * We stopped the tick in idle. Update process times would miss the
 	 * time we slept as update_process_times does only a 1 tick
 	 * time we slept as update_process_times does only a 1 tick

+ 1 - 0
kernel/timer.c

@@ -39,6 +39,7 @@
 #include <linux/kallsyms.h>
 #include <linux/kallsyms.h>
 #include <linux/irq_work.h>
 #include <linux/irq_work.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>

+ 1 - 0
kernel/trace/trace.c

@@ -39,6 +39,7 @@
 #include <linux/poll.h>
 #include <linux/poll.h>
 #include <linux/nmi.h>
 #include <linux/nmi.h>
 #include <linux/fs.h>
 #include <linux/fs.h>
+#include <linux/sched/rt.h>
 
 
 #include "trace.h"
 #include "trace.h"
 #include "trace_output.h"
 #include "trace_output.h"

+ 1 - 1
kernel/trace/trace_sched_wakeup.c

@@ -15,8 +15,8 @@
 #include <linux/kallsyms.h>
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
 #include <linux/ftrace.h>
+#include <linux/sched/rt.h>
 #include <trace/events/sched.h>
 #include <trace/events/sched.h>
-
 #include "trace.h"
 #include "trace.h"
 
 
 static struct trace_array	*wakeup_trace;
 static struct trace_array	*wakeup_trace;

+ 34 - 10
kernel/tsacct.c

@@ -32,6 +32,7 @@ void bacct_add_tsk(struct user_namespace *user_ns,
 {
 {
 	const struct cred *tcred;
 	const struct cred *tcred;
 	struct timespec uptime, ts;
 	struct timespec uptime, ts;
+	cputime_t utime, stime, utimescaled, stimescaled;
 	u64 ac_etime;
 	u64 ac_etime;
 
 
 	BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
 	BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
@@ -65,10 +66,15 @@ void bacct_add_tsk(struct user_namespace *user_ns,
 	stats->ac_ppid	 = pid_alive(tsk) ?
 	stats->ac_ppid	 = pid_alive(tsk) ?
 		task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0;
 		task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0;
 	rcu_read_unlock();
 	rcu_read_unlock();
-	stats->ac_utime = cputime_to_usecs(tsk->utime);
-	stats->ac_stime = cputime_to_usecs(tsk->stime);
-	stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled);
-	stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled);
+
+	task_cputime(tsk, &utime, &stime);
+	stats->ac_utime = cputime_to_usecs(utime);
+	stats->ac_stime = cputime_to_usecs(stime);
+
+	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
+	stats->ac_utimescaled = cputime_to_usecs(utimescaled);
+	stats->ac_stimescaled = cputime_to_usecs(stimescaled);
+
 	stats->ac_minflt = tsk->min_flt;
 	stats->ac_minflt = tsk->min_flt;
 	stats->ac_majflt = tsk->maj_flt;
 	stats->ac_majflt = tsk->maj_flt;
 
 
@@ -115,11 +121,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 #undef KB
 #undef KB
 #undef MB
 #undef MB
 
 
-/**
- * acct_update_integrals - update mm integral fields in task_struct
- * @tsk: task_struct for accounting
- */
-void acct_update_integrals(struct task_struct *tsk)
+static void __acct_update_integrals(struct task_struct *tsk,
+				    cputime_t utime, cputime_t stime)
 {
 {
 	if (likely(tsk->mm)) {
 	if (likely(tsk->mm)) {
 		cputime_t time, dtime;
 		cputime_t time, dtime;
@@ -128,7 +131,7 @@ void acct_update_integrals(struct task_struct *tsk)
 		u64 delta;
 		u64 delta;
 
 
 		local_irq_save(flags);
 		local_irq_save(flags);
-		time = tsk->stime + tsk->utime;
+		time = stime + utime;
 		dtime = time - tsk->acct_timexpd;
 		dtime = time - tsk->acct_timexpd;
 		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
 		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
 		delta = value.tv_sec;
 		delta = value.tv_sec;
@@ -144,6 +147,27 @@ void acct_update_integrals(struct task_struct *tsk)
 	}
 	}
 }
 }
 
 
+/**
+ * acct_update_integrals - update mm integral fields in task_struct
+ * @tsk: task_struct for accounting
+ */
+void acct_update_integrals(struct task_struct *tsk)
+{
+	cputime_t utime, stime;
+
+	task_cputime(tsk, &utime, &stime);
+	__acct_update_integrals(tsk, utime, stime);
+}
+
+/**
+ * acct_account_cputime - update mm integral after cputime update
+ * @tsk: task_struct for accounting
+ */
+void acct_account_cputime(struct task_struct *tsk)
+{
+	__acct_update_integrals(tsk, tsk->utime, tsk->stime);
+}
+
 /**
 /**
  * acct_clear_integrals - clear the mm integral fields in task_struct
  * acct_clear_integrals - clear the mm integral fields in task_struct
  * @tsk: task_struct whose accounting fields are cleared
  * @tsk: task_struct whose accounting fields are cleared

+ 1 - 0
kernel/watchdog.c

@@ -23,6 +23,7 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/sysctl.h>
 #include <linux/sysctl.h>
 #include <linux/smpboot.h>
 #include <linux/smpboot.h>
+#include <linux/sched/rt.h>
 
 
 #include <asm/irq_regs.h>
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
 #include <linux/kvm_para.h>

+ 1 - 0
mm/mmap.c

@@ -32,6 +32,7 @@
 #include <linux/khugepaged.h>
 #include <linux/khugepaged.h>
 #include <linux/uprobes.h>
 #include <linux/uprobes.h>
 #include <linux/rbtree_augmented.h>
 #include <linux/rbtree_augmented.h>
+#include <linux/sched/sysctl.h>
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>

+ 1 - 0
mm/mremap.c

@@ -19,6 +19,7 @@
 #include <linux/security.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/syscalls.h>
 #include <linux/mmu_notifier.h>
 #include <linux/mmu_notifier.h>
+#include <linux/sched/sysctl.h>
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>

+ 1 - 0
mm/nommu.c

@@ -29,6 +29,7 @@
 #include <linux/security.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/syscalls.h>
 #include <linux/audit.h>
 #include <linux/audit.h>
+#include <linux/sched/sysctl.h>
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
 #include <asm/tlb.h>

+ 1 - 0
mm/page-writeback.c

@@ -35,6 +35,7 @@
 #include <linux/buffer_head.h> /* __set_page_dirty_buffers */
 #include <linux/buffer_head.h> /* __set_page_dirty_buffers */
 #include <linux/pagevec.h>
 #include <linux/pagevec.h>
 #include <linux/timer.h>
 #include <linux/timer.h>
+#include <linux/sched/rt.h>
 #include <trace/events/writeback.h>
 #include <trace/events/writeback.h>
 
 
 /*
 /*

+ 1 - 0
mm/page_alloc.c

@@ -58,6 +58,7 @@
 #include <linux/prefetch.h>
 #include <linux/prefetch.h>
 #include <linux/migrate.h>
 #include <linux/migrate.h>
 #include <linux/page-debug-flags.h>
 #include <linux/page-debug-flags.h>
+#include <linux/sched/rt.h>
 
 
 #include <asm/tlbflush.h>
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
 #include <asm/div64.h>