8 years ago · 656e7c0c0a
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8621,7 +8621,7 @@ M:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 
				 M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
			
 
				 L:	linux-kernel@vger.kernel.org
			
 
				 S:	Supported
			
 
				-F:	kernel/membarrier.c
			
 
				+F:	kernel/sched/membarrier.c
			
 
				 F:	include/uapi/linux/membarrier.h
			
 
				 
			
 
				 MEMORY MANAGEMENT
			
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -16,11 +16,6 @@
 
				 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
			
 
				 #define arch_spin_is_locked(x)	((x)->lock != 0)
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	smp_cond_load_acquire(&lock->lock, !VAL);
			
 
				-}
			
 
				-
			
 
				 static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
			
 
				 {
			
 
				         return lock.lock == 0;
			
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -16,11 +16,6 @@
 
				 #define arch_spin_is_locked(x)	((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
			
 
				 #define arch_spin_lock_flags(lock, flags)	arch_spin_lock(lock)
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	smp_cond_load_acquire(&lock->slock, !VAL);
			
 
				-}
			
 
				-
			
 
				 #ifdef CONFIG_ARC_HAS_LLSC
			
 
				 
			
 
				 static inline void arch_spin_lock(arch_spinlock_t *lock)
			
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -52,22 +52,6 @@ static inline void dsb_sev(void)
 
				  * memory.
			
 
				  */
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	u16 owner = READ_ONCE(lock->tickets.owner);
			
 
				-
			
 
				-	for (;;) {
			
 
				-		arch_spinlock_t tmp = READ_ONCE(*lock);
			
 
				-
			
 
				-		if (tmp.tickets.owner == tmp.tickets.next ||
			
 
				-		    tmp.tickets.owner != owner)
			
 
				-			break;
			
 
				-
			
 
				-		wfe();
			
 
				-	}
			
 
				-	smp_acquire__after_ctrl_dep();
			
 
				-}
			
 
				-
			
 
				 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
			
 
				 
			
 
				 static inline void arch_spin_lock(arch_spinlock_t *lock)
			
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -26,58 +26,6 @@
 
				  * The memory barriers are implicit with the load-acquire and store-release
			
 
				  * instructions.
			
 
				  */
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	unsigned int tmp;
			
 
				-	arch_spinlock_t lockval;
			
 
				-	u32 owner;
			
 
				-
			
 
				-	/*
			
 
				-	 * Ensure prior spin_lock operations to other locks have completed
			
 
				-	 * on this CPU before we test whether "lock" is locked.
			
 
				-	 */
			
 
				-	smp_mb();
			
 
				-	owner = READ_ONCE(lock->owner) << 16;
			
 
				-
			
 
				-	asm volatile(
			
 
				-"	sevl\n"
			
 
				-"1:	wfe\n"
			
 
				-"2:	ldaxr	%w0, %2\n"
			
 
				-	/* Is the lock free? */
			
 
				-"	eor	%w1, %w0, %w0, ror #16\n"
			
 
				-"	cbz	%w1, 3f\n"
			
 
				-	/* Lock taken -- has there been a subsequent unlock->lock transition? */
			
 
				-"	eor	%w1, %w3, %w0, lsl #16\n"
			
 
				-"	cbz	%w1, 1b\n"
			
 
				-	/*
			
 
				-	 * The owner has been updated, so there was an unlock->lock
			
 
				-	 * transition that we missed. That means we can rely on the
			
 
				-	 * store-release of the unlock operation paired with the
			
 
				-	 * load-acquire of the lock operation to publish any of our
			
 
				-	 * previous stores to the new lock owner and therefore don't
			
 
				-	 * need to bother with the writeback below.
			
 
				-	 */
			
 
				-"	b	4f\n"
			
 
				-"3:\n"
			
 
				-	/*
			
 
				-	 * Serialise against any concurrent lockers by writing back the
			
 
				-	 * unlocked lock value
			
 
				-	 */
			
 
				-	ARM64_LSE_ATOMIC_INSN(
			
 
				-	/* LL/SC */
			
 
				-"	stxr	%w1, %w0, %2\n"
			
 
				-	__nops(2),
			
 
				-	/* LSE atomics */
			
 
				-"	mov	%w1, %w0\n"
			
 
				-"	cas	%w0, %w0, %2\n"
			
 
				-"	eor	%w1, %w1, %w0\n")
			
 
				-	/* Somebody else wrote to the lock, GOTO 10 and reload the value */
			
 
				-"	cbnz	%w1, 2b\n"
			
 
				-"4:"
			
 
				-	: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
			
 
				-	: "r" (owner)
			
 
				-	: "memory");
			
 
				-}
			
 
				 
			
 
				 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
			
 
				 
			
@@ -176,7 +124,11 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 
				 
			
 
				 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
			
 
				 {
			
 
				-	smp_mb(); /* See arch_spin_unlock_wait */
			
 
				+	/*
			
 
				+	 * Ensure prior spin_lock operations to other locks have completed
			
 
				+	 * on this CPU before we test whether "lock" is locked.
			
 
				+	 */
			
 
				+	smp_mb(); /* ^^^ */
			
 
				 	return !arch_spin_value_unlocked(READ_ONCE(*lock));
			
 
				 }
			
 
				 
			
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -360,6 +360,8 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 
				 	/*
			
 
				 	 * Complete any pending TLB or cache maintenance on this CPU in case
			
 
				 	 * the thread migrates to a different CPU.
			
 
				+	 * This full barrier is also required by the membarrier system
			
 
				+	 * call.
			
 
				 	 */
			
 
				 	dsb(ish);
			
 
				 
			
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -48,11 +48,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
				 	__raw_spin_unlock_asm(&lock->lock);
			
 
				 }
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	smp_cond_load_acquire(&lock->lock, !VAL);
			
 
				-}
			
 
				-
			
 
				 static inline int arch_read_can_lock(arch_rwlock_t *rw)
			
 
				 {
			
 
				 	return __raw_uncached_fetch_asm(&rw->lock) > 0;
			
--- a/arch/blackfin/kernel/module.c
+++ b/arch/blackfin/kernel/module.c
@@ -4,8 +4,6 @@
 
				  * Licensed under the GPL-2 or later
			
 
				  */
			
 
				 
			
 
				-#define pr_fmt(fmt) "module %s: " fmt, mod->name
			
 
				-
			
 
				 #include <linux/moduleloader.h>
			
 
				 #include <linux/elf.h>
			
 
				 #include <linux/vmalloc.h>
			
@@ -16,6 +14,11 @@
 
				 #include <asm/cacheflush.h>
			
 
				 #include <linux/uaccess.h>
			
 
				 
			
 
				+#define mod_err(mod, fmt, ...)						\
			
 
				+	pr_err("module %s: " fmt, (mod)->name, ##__VA_ARGS__)
			
 
				+#define mod_debug(mod, fmt, ...)					\
			
 
				+	pr_debug("module %s: " fmt, (mod)->name, ##__VA_ARGS__)
			
 
				+
			
 
				 /* Transfer the section to the L1 memory */
			
 
				 int
			
 
				 module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
			
@@ -44,7 +47,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 
				 			dest = l1_inst_sram_alloc(s->sh_size);
			
 
				 			mod->arch.text_l1 = dest;
			
 
				 			if (dest == NULL) {
			
 
				-				pr_err("L1 inst memory allocation failed\n");
			
 
				+				mod_err(mod, "L1 inst memory allocation failed\n");
			
 
				 				return -1;
			
 
				 			}
			
 
				 			dma_memcpy(dest, (void *)s->sh_addr, s->sh_size);
			
@@ -56,7 +59,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 
				 			dest = l1_data_sram_alloc(s->sh_size);
			
 
				 			mod->arch.data_a_l1 = dest;
			
 
				 			if (dest == NULL) {
			
 
				-				pr_err("L1 data memory allocation failed\n");
			
 
				+				mod_err(mod, "L1 data memory allocation failed\n");
			
 
				 				return -1;
			
 
				 			}
			
 
				 			memcpy(dest, (void *)s->sh_addr, s->sh_size);
			
@@ -68,7 +71,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 
				 			dest = l1_data_sram_zalloc(s->sh_size);
			
 
				 			mod->arch.bss_a_l1 = dest;
			
 
				 			if (dest == NULL) {
			
 
				-				pr_err("L1 data memory allocation failed\n");
			
 
				+				mod_err(mod, "L1 data memory allocation failed\n");
			
 
				 				return -1;
			
 
				 			}
			
 
				 
			
@@ -77,7 +80,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 
				 			dest = l1_data_B_sram_alloc(s->sh_size);
			
 
				 			mod->arch.data_b_l1 = dest;
			
 
				 			if (dest == NULL) {
			
 
				-				pr_err("L1 data memory allocation failed\n");
			
 
				+				mod_err(mod, "L1 data memory allocation failed\n");
			
 
				 				return -1;
			
 
				 			}
			
 
				 			memcpy(dest, (void *)s->sh_addr, s->sh_size);
			
@@ -87,7 +90,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 
				 			dest = l1_data_B_sram_alloc(s->sh_size);
			
 
				 			mod->arch.bss_b_l1 = dest;
			
 
				 			if (dest == NULL) {
			
 
				-				pr_err("L1 data memory allocation failed\n");
			
 
				+				mod_err(mod, "L1 data memory allocation failed\n");
			
 
				 				return -1;
			
 
				 			}
			
 
				 			memset(dest, 0, s->sh_size);
			
@@ -99,7 +102,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 
				 			dest = l2_sram_alloc(s->sh_size);
			
 
				 			mod->arch.text_l2 = dest;
			
 
				 			if (dest == NULL) {
			
 
				-				pr_err("L2 SRAM allocation failed\n");
			
 
				+				mod_err(mod, "L2 SRAM allocation failed\n");
			
 
				 				return -1;
			
 
				 			}
			
 
				 			memcpy(dest, (void *)s->sh_addr, s->sh_size);
			
@@ -111,7 +114,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 
				 			dest = l2_sram_alloc(s->sh_size);
			
 
				 			mod->arch.data_l2 = dest;
			
 
				 			if (dest == NULL) {
			
 
				-				pr_err("L2 SRAM allocation failed\n");
			
 
				+				mod_err(mod, "L2 SRAM allocation failed\n");
			
 
				 				return -1;
			
 
				 			}
			
 
				 			memcpy(dest, (void *)s->sh_addr, s->sh_size);
			
@@ -123,7 +126,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 
				 			dest = l2_sram_zalloc(s->sh_size);
			
 
				 			mod->arch.bss_l2 = dest;
			
 
				 			if (dest == NULL) {
			
 
				-				pr_err("L2 SRAM allocation failed\n");
			
 
				+				mod_err(mod, "L2 SRAM allocation failed\n");
			
 
				 				return -1;
			
 
				 			}
			
 
				 
			
@@ -157,8 +160,8 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 
				 	Elf32_Sym *sym;
			
 
				 	unsigned long location, value, size;
			
 
				 
			
 
				-	pr_debug("applying relocate section %u to %u\n",
			
 
				-		relsec, sechdrs[relsec].sh_info);
			
 
				+	mod_debug(mod, "applying relocate section %u to %u\n",
			
 
				+		  relsec, sechdrs[relsec].sh_info);
			
 
				 
			
 
				 	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
			
 
				 		/* This is where to make the change */
			
@@ -174,14 +177,14 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 		if (location >= COREB_L1_DATA_A_START) {
			
 
				-			pr_err("cannot relocate in L1: %u (SMP kernel)\n",
			
 
				+			mod_err(mod, "cannot relocate in L1: %u (SMP kernel)\n",
			
 
				 				ELF32_R_TYPE(rel[i].r_info));
			
 
				 			return -ENOEXEC;
			
 
				 		}
			
 
				 #endif
			
 
				 
			
 
				-		pr_debug("location is %lx, value is %lx type is %d\n",
			
 
				-			location, value, ELF32_R_TYPE(rel[i].r_info));
			
 
				+		mod_debug(mod, "location is %lx, value is %lx type is %d\n",
			
 
				+			  location, value, ELF32_R_TYPE(rel[i].r_info));
			
 
				 
			
 
				 		switch (ELF32_R_TYPE(rel[i].r_info)) {
			
 
				 
			
@@ -200,12 +203,12 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 
				 		case R_BFIN_PCREL12_JUMP:
			
 
				 		case R_BFIN_PCREL12_JUMP_S:
			
 
				 		case R_BFIN_PCREL10:
			
 
				-			pr_err("unsupported relocation: %u (no -mlong-calls?)\n",
			
 
				+			mod_err(mod, "unsupported relocation: %u (no -mlong-calls?)\n",
			
 
				 				ELF32_R_TYPE(rel[i].r_info));
			
 
				 			return -ENOEXEC;
			
 
				 
			
 
				 		default:
			
 
				-			pr_err("unknown relocation: %u\n",
			
 
				+			mod_err(mod, "unknown relocation: %u\n",
			
 
				 				ELF32_R_TYPE(rel[i].r_info));
			
 
				 			return -ENOEXEC;
			
 
				 		}
			
@@ -222,7 +225,7 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 
				 			isram_memcpy((void *)location, &value, size);
			
 
				 			break;
			
 
				 		default:
			
 
				-			pr_err("invalid relocation for %#lx\n", location);
			
 
				+			mod_err(mod, "invalid relocation for %#lx\n", location);
			
 
				 			return -ENOEXEC;
			
 
				 		}
			
 
				 	}
			
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -179,11 +179,6 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
 
				  */
			
 
				 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	smp_cond_load_acquire(&lock->lock, !VAL);
			
 
				-}
			
 
				-
			
 
				 #define arch_spin_is_locked(x) ((x)->lock != 0)
			
 
				 
			
 
				 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
			
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -76,22 +76,6 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 
				 	ACCESS_ONCE(*p) = (tmp + 2) & ~1;
			
 
				 }
			
 
				 
			
 
				-static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	int	*p = (int *)&lock->lock, ticket;
			
 
				-
			
 
				-	ia64_invala();
			
 
				-
			
 
				-	for (;;) {
			
 
				-		asm volatile ("ld4.c.nc %0=[%1]" : "=r"(ticket) : "r"(p) : "memory");
			
 
				-		if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
			
 
				-			return;
			
 
				-		cpu_relax();
			
 
				-	}
			
 
				-
			
 
				-	smp_acquire__after_ctrl_dep();
			
 
				-}
			
 
				-
			
 
				 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
			
 
				 {
			
 
				 	long tmp = ACCESS_ONCE(lock->lock);
			
@@ -143,11 +127,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 
				 	arch_spin_lock(lock);
			
 
				 }
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	__ticket_spin_unlock_wait(lock);
			
 
				-}
			
 
				-
			
 
				 #define arch_read_can_lock(rw)		(*(volatile int *)(rw) >= 0)
			
 
				 #define arch_write_can_lock(rw)	(*(volatile int *)(rw) == 0)
			
 
				 
			
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -30,11 +30,6 @@
 
				 #define arch_spin_is_locked(x)		(*(volatile int *)(&(x)->slock) <= 0)
			
 
				 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	smp_cond_load_acquire(&lock->slock, VAL > 0);
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * arch_spin_trylock - Try spin lock and return a result
			
 
				  * @lock: Pointer to the lock variable
			
--- a/arch/metag/include/asm/spinlock.h
+++ b/arch/metag/include/asm/spinlock.h
@@ -15,11 +15,6 @@
 
				  * locked.
			
 
				  */
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	smp_cond_load_acquire(&lock->lock, !VAL);
			
 
				-}
			
 
				-
			
 
				 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
			
 
				 
			
 
				 #define	arch_read_lock_flags(lock, flags) arch_read_lock(lock)
			
--- a/arch/mn10300/include/asm/spinlock.h
+++ b/arch/mn10300/include/asm/spinlock.h
@@ -26,11 +26,6 @@
 
				 
			
 
				 #define arch_spin_is_locked(x)	(*(volatile signed char *)(&(x)->slock) != 0)
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	smp_cond_load_acquire(&lock->slock, !VAL);
			
 
				-}
			
 
				-
			
 
				 static inline void arch_spin_unlock(arch_spinlock_t *lock)
			
 
				 {
			
 
				 	asm volatile(
			
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -14,13 +14,6 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x)
 
				 
			
 
				 #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0)
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *x)
			
 
				-{
			
 
				-	volatile unsigned int *a = __ldcw_align(x);
			
 
				-
			
 
				-	smp_cond_load_acquire(a, VAL);
			
 
				-}
			
 
				-
			
 
				 static inline void arch_spin_lock_flags(arch_spinlock_t *x,
			
 
				 					 unsigned long flags)
			
 
				 {
			
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -170,39 +170,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
				 	lock->slock = 0;
			
 
				 }
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	arch_spinlock_t lock_val;
			
 
				-
			
 
				-	smp_mb();
			
 
				-
			
 
				-	/*
			
 
				-	 * Atomically load and store back the lock value (unchanged). This
			
 
				-	 * ensures that our observation of the lock value is ordered with
			
 
				-	 * respect to other lock operations.
			
 
				-	 */
			
 
				-	__asm__ __volatile__(
			
 
				-"1:	" PPC_LWARX(%0, 0, %2, 0) "\n"
			
 
				-"	stwcx. %0, 0, %2\n"
			
 
				-"	bne- 1b\n"
			
 
				-	: "=&r" (lock_val), "+m" (*lock)
			
 
				-	: "r" (lock)
			
 
				-	: "cr0", "xer");
			
 
				-
			
 
				-	if (arch_spin_value_unlocked(lock_val))
			
 
				-		goto out;
			
 
				-
			
 
				-	while (lock->slock) {
			
 
				-		HMT_low();
			
 
				-		if (SHARED_PROCESSOR)
			
 
				-			__spin_yield(lock);
			
 
				-	}
			
 
				-	HMT_medium();
			
 
				-
			
 
				-out:
			
 
				-	smp_mb();
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Read-write spinlocks, allowing multiple readers
			
 
				  * but only one writer.
			
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -98,13 +98,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
 
				 		: "cc", "memory");
			
 
				 }
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	while (arch_spin_is_locked(lock))
			
 
				-		arch_spin_relax(lock);
			
 
				-	smp_acquire__after_ctrl_dep();
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Read-write spinlocks, allowing multiple readers
			
 
				  * but only one writer.
			
--- a/arch/sh/include/asm/spinlock-cas.h
+++ b/arch/sh/include/asm/spinlock-cas.h
@@ -29,11 +29,6 @@ static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new
 
				 #define arch_spin_is_locked(x)		((x)->lock <= 0)
			
 
				 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	smp_cond_load_acquire(&lock->lock, VAL > 0);
			
 
				-}
			
 
				-
			
 
				 static inline void arch_spin_lock(arch_spinlock_t *lock)
			
 
				 {
			
 
				 	while (!__sl_cas(&lock->lock, 1, 0));
			
--- a/arch/sh/include/asm/spinlock-llsc.h
+++ b/arch/sh/include/asm/spinlock-llsc.h
@@ -21,11 +21,6 @@
 
				 #define arch_spin_is_locked(x)		((x)->lock <= 0)
			
 
				 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	smp_cond_load_acquire(&lock->lock, VAL > 0);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Simple spin lock operations.  There are two variants, one clears IRQ's
			
 
				  * on the local processor, one does not.
			
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -14,11 +14,6 @@
 
				 
			
 
				 #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	smp_cond_load_acquire(&lock->lock, !VAL);
			
 
				-}
			
 
				-
			
 
				 static inline void arch_spin_lock(arch_spinlock_t *lock)
			
 
				 {
			
 
				 	__asm__ __volatile__(
			
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -64,8 +64,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
				 	lock->current_ticket = old_ticket + TICKET_QUANTUM;
			
 
				 }
			
 
				 
			
 
				-void arch_spin_unlock_wait(arch_spinlock_t *lock);
			
 
				-
			
 
				 /*
			
 
				  * Read-write spinlocks, allowing multiple readers
			
 
				  * but only one writer.
			
--- a/arch/tile/include/asm/spinlock_64.h
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -58,8 +58,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
				 	__insn_fetchadd4(&lock->lock, 1U << __ARCH_SPIN_CURRENT_SHIFT);
			
 
				 }
			
 
				 
			
 
				-void arch_spin_unlock_wait(arch_spinlock_t *lock);
			
 
				-
			
 
				 void arch_spin_lock_slow(arch_spinlock_t *lock, u32 val);
			
 
				 
			
 
				 /* Grab the "next" ticket number and bump it atomically.
			
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -62,29 +62,6 @@ int arch_spin_trylock(arch_spinlock_t *lock)
 
				 }
			
 
				 EXPORT_SYMBOL(arch_spin_trylock);
			
 
				 
			
 
				-void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	u32 iterations = 0;
			
 
				-	int curr = READ_ONCE(lock->current_ticket);
			
 
				-	int next = READ_ONCE(lock->next_ticket);
			
 
				-
			
 
				-	/* Return immediately if unlocked. */
			
 
				-	if (next == curr)
			
 
				-		return;
			
 
				-
			
 
				-	/* Wait until the current locker has released the lock. */
			
 
				-	do {
			
 
				-		delay_backoff(iterations++);
			
 
				-	} while (READ_ONCE(lock->current_ticket) == curr);
			
 
				-
			
 
				-	/*
			
 
				-	 * The TILE architecture doesn't do read speculation; therefore
			
 
				-	 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
			
 
				-	 */
			
 
				-	barrier();
			
 
				-}
			
 
				-EXPORT_SYMBOL(arch_spin_unlock_wait);
			
 
				-
			
 
				 /*
			
 
				  * The low byte is always reserved to be the marker for a "tns" operation
			
 
				  * since the low bit is set to "1" by a tns.  The next seven bits are
			
--- a/arch/tile/lib/spinlock_64.c
+++ b/arch/tile/lib/spinlock_64.c
@@ -62,28 +62,6 @@ int arch_spin_trylock(arch_spinlock_t *lock)
 
				 }
			
 
				 EXPORT_SYMBOL(arch_spin_trylock);
			
 
				 
			
 
				-void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	u32 iterations = 0;
			
 
				-	u32 val = READ_ONCE(lock->lock);
			
 
				-	u32 curr = arch_spin_current(val);
			
 
				-
			
 
				-	/* Return immediately if unlocked. */
			
 
				-	if (arch_spin_next(val) == curr)
			
 
				-		return;
			
 
				-
			
 
				-	/* Wait until the current locker has released the lock. */
			
 
				-	do {
			
 
				-		delay_backoff(iterations++);
			
 
				-	} while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
			
 
				-
			
 
				-	/*
			
 
				-	 * The TILE architecture doesn't do read speculation; therefore
			
 
				-	 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
			
 
				-	 */
			
 
				-	barrier();
			
 
				-}
			
 
				-EXPORT_SYMBOL(arch_spin_unlock_wait);
			
 
				 
			
 
				 /*
			
 
				  * If the read lock fails due to a writer, we retry periodically
			
--- a/arch/xtensa/include/asm/spinlock.h
+++ b/arch/xtensa/include/asm/spinlock.h
@@ -33,11 +33,6 @@
 
				 
			
 
				 #define arch_spin_is_locked(x) ((x)->slock != 0)
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	smp_cond_load_acquire(&lock->slock, !VAL);
			
 
				-}
			
 
				-
			
 
				 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
			
 
				 
			
 
				 static inline void arch_spin_lock(arch_spinlock_t *lock)
			
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -645,12 +645,11 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
 
				 	 * completions are honored.  A scmd is determined to have
			
 
				 	 * timed out iff its associated qc is active and not failed.
			
 
				 	 */
			
 
				+	spin_lock_irqsave(ap->lock, flags);
			
 
				 	if (ap->ops->error_handler) {
			
 
				 		struct scsi_cmnd *scmd, *tmp;
			
 
				 		int nr_timedout = 0;
			
 
				 
			
 
				-		spin_lock_irqsave(ap->lock, flags);
			
 
				-
			
 
				 		/* This must occur under the ap->lock as we don't want
			
 
				 		   a polled recovery to race the real interrupt handler
			
 
				 
			
@@ -700,12 +699,11 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
 
				 		if (nr_timedout)
			
 
				 			__ata_port_freeze(ap);
			
 
				 
			
 
				-		spin_unlock_irqrestore(ap->lock, flags);
			
 
				 
			
 
				 		/* initialize eh_tries */
			
 
				 		ap->eh_tries = ATA_EH_MAX_TRIES;
			
 
				-	} else
			
 
				-		spin_unlock_wait(ap->lock);
			
 
				+	}
			
 
				+	spin_unlock_irqrestore(ap->lock, flags);
			
 
				 
			
 
				 }
			
 
				 EXPORT_SYMBOL(ata_scsi_cmd_error_handler);
			
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -21,17 +21,6 @@
 
				 
			
 
				 #include <asm-generic/qspinlock_types.h>
			
 
				 
			
 
				-/**
			
 
				- * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock
			
 
				- * @lock : Pointer to queued spinlock structure
			
 
				- *
			
 
				- * There is a very slight possibility of live-lock if the lockers keep coming
			
 
				- * and the waiter is just unfortunate enough to not see any unlock state.
			
 
				- */
			
 
				-#ifndef queued_spin_unlock_wait
			
 
				-extern void queued_spin_unlock_wait(struct qspinlock *lock);
			
 
				-#endif
			
 
				-
			
 
				 /**
			
 
				  * queued_spin_is_locked - is the spinlock locked?
			
 
				  * @lock: Pointer to queued spinlock structure
			
@@ -41,8 +30,6 @@ extern void queued_spin_unlock_wait(struct qspinlock *lock);
 
				 static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
			
 
				 {
			
 
				 	/*
			
 
				-	 * See queued_spin_unlock_wait().
			
 
				-	 *
			
 
				 	 * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL
			
 
				 	 * isn't immediately observable.
			
 
				 	 */
			
@@ -135,6 +122,5 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 
				 #define arch_spin_trylock(l)		queued_spin_trylock(l)
			
 
				 #define arch_spin_unlock(l)		queued_spin_unlock(l)
			
 
				 #define arch_spin_lock_flags(l, f)	queued_spin_lock(l)
			
 
				-#define arch_spin_unlock_wait(l)	queued_spin_unlock_wait(l)
			
 
				 
			
 
				 #endif /* __ASM_GENERIC_QSPINLOCK_H */
			
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -125,18 +125,12 @@ extern struct group_info init_groups;
 
				 #define INIT_IDS
			
 
				 #endif
			
 
				 
			
 
				-#ifdef CONFIG_PREEMPT_RCU
			
 
				-#define INIT_TASK_RCU_TREE_PREEMPT()					\
			
 
				-	.rcu_blocked_node = NULL,
			
 
				-#else
			
 
				-#define INIT_TASK_RCU_TREE_PREEMPT(tsk)
			
 
				-#endif
			
 
				 #ifdef CONFIG_PREEMPT_RCU
			
 
				 #define INIT_TASK_RCU_PREEMPT(tsk)					\
			
 
				 	.rcu_read_lock_nesting = 0,					\
			
 
				 	.rcu_read_unlock_special.s = 0,					\
			
 
				 	.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),		\
			
 
				-	INIT_TASK_RCU_TREE_PREEMPT()
			
 
				+	.rcu_blocked_node = NULL,
			
 
				 #else
			
 
				 #define INIT_TASK_RCU_PREEMPT(tsk)
			
 
				 #endif
			
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -58,8 +58,6 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
 
				 void call_rcu_bh(struct rcu_head *head, rcu_callback_t func);
			
 
				 void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
			
 
				 void synchronize_sched(void);
			
 
				-void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
			
 
				-void synchronize_rcu_tasks(void);
			
 
				 void rcu_barrier_tasks(void);
			
 
				 
			
 
				 #ifdef CONFIG_PREEMPT_RCU
			
@@ -105,11 +103,13 @@ static inline int rcu_preempt_depth(void)
 
				 
			
 
				 /* Internal to kernel */
			
 
				 void rcu_init(void);
			
 
				+extern int rcu_scheduler_active __read_mostly;
			
 
				 void rcu_sched_qs(void);
			
 
				 void rcu_bh_qs(void);
			
 
				 void rcu_check_callbacks(int user);
			
 
				 void rcu_report_dead(unsigned int cpu);
			
 
				 void rcu_cpu_starting(unsigned int cpu);
			
 
				+void rcutree_migrate_callbacks(int cpu);
			
 
				 
			
 
				 #ifdef CONFIG_RCU_STALL_COMMON
			
 
				 void rcu_sysrq_start(void);
			
@@ -164,8 +164,6 @@ static inline void rcu_init_nohz(void) { }
 
				  * macro rather than an inline function to avoid #include hell.
			
 
				  */
			
 
				 #ifdef CONFIG_TASKS_RCU
			
 
				-#define TASKS_RCU(x) x
			
 
				-extern struct srcu_struct tasks_rcu_exit_srcu;
			
 
				 #define rcu_note_voluntary_context_switch_lite(t) \
			
 
				 	do { \
			
 
				 		if (READ_ONCE((t)->rcu_tasks_holdout)) \
			
@@ -176,10 +174,17 @@ extern struct srcu_struct tasks_rcu_exit_srcu;
 
				 		rcu_all_qs(); \
			
 
				 		rcu_note_voluntary_context_switch_lite(t); \
			
 
				 	} while (0)
			
 
				+void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
			
 
				+void synchronize_rcu_tasks(void);
			
 
				+void exit_tasks_rcu_start(void);
			
 
				+void exit_tasks_rcu_finish(void);
			
 
				 #else /* #ifdef CONFIG_TASKS_RCU */
			
 
				-#define TASKS_RCU(x) do { } while (0)
			
 
				 #define rcu_note_voluntary_context_switch_lite(t)	do { } while (0)
			
 
				 #define rcu_note_voluntary_context_switch(t)		rcu_all_qs()
			
 
				+#define call_rcu_tasks call_rcu_sched
			
 
				+#define synchronize_rcu_tasks synchronize_sched
			
 
				+static inline void exit_tasks_rcu_start(void) { }
			
 
				+static inline void exit_tasks_rcu_finish(void) { }
			
 
				 #endif /* #else #ifdef CONFIG_TASKS_RCU */
			
 
				 
			
 
				 /**
			
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -116,13 +116,11 @@ static inline void rcu_irq_exit_irqson(void) { }
 
				 static inline void rcu_irq_enter_irqson(void) { }
			
 
				 static inline void rcu_irq_exit(void) { }
			
 
				 static inline void exit_rcu(void) { }
			
 
				-
			
 
				-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
			
 
				-extern int rcu_scheduler_active __read_mostly;
			
 
				+#ifdef CONFIG_SRCU
			
 
				 void rcu_scheduler_starting(void);
			
 
				-#else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
			
 
				+#else /* #ifndef CONFIG_SRCU */
			
 
				 static inline void rcu_scheduler_starting(void) { }
			
 
				-#endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
			
 
				+#endif /* #else #ifndef CONFIG_SRCU */
			
 
				 static inline void rcu_end_inkernel_boot(void) { }
			
 
				 static inline bool rcu_is_watching(void) { return true; }
			
 
				 
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -589,9 +589,10 @@ struct task_struct {
 
				 
			
 
				 #ifdef CONFIG_TASKS_RCU
			
 
				 	unsigned long			rcu_tasks_nvcsw;
			
 
				-	bool				rcu_tasks_holdout;
			
 
				-	struct list_head		rcu_tasks_holdout_list;
			
 
				+	u8				rcu_tasks_holdout;
			
 
				+	u8				rcu_tasks_idx;
			
 
				 	int				rcu_tasks_idle_cpu;
			
 
				+	struct list_head		rcu_tasks_holdout_list;
			
 
				 #endif /* #ifdef CONFIG_TASKS_RCU */
			
 
				 
			
 
				 	struct sched_info		sched_info;
			
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -130,12 +130,6 @@ do {								\
 
				 #define smp_mb__before_spinlock()	smp_wmb()
			
 
				 #endif
			
 
				 
			
 
				-/**
			
 
				- * raw_spin_unlock_wait - wait until the spinlock gets unlocked
			
 
				- * @lock: the spinlock in question.
			
 
				- */
			
 
				-#define raw_spin_unlock_wait(lock)	arch_spin_unlock_wait(&(lock)->raw_lock)
			
 
				-
			
 
				 #ifdef CONFIG_DEBUG_SPINLOCK
			
 
				  extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
			
 
				 #define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock)
			
@@ -369,31 +363,6 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock)
 
				 	raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
			
 
				 })
			
 
				 
			
 
				-/**
			
 
				- * spin_unlock_wait - Interpose between successive critical sections
			
 
				- * @lock: the spinlock whose critical sections are to be interposed.
			
 
				- *
			
 
				- * Semantically this is equivalent to a spin_lock() immediately
			
 
				- * followed by a spin_unlock().  However, most architectures have
			
 
				- * more efficient implementations in which the spin_unlock_wait()
			
 
				- * cannot block concurrent lock acquisition, and in some cases
			
 
				- * where spin_unlock_wait() does not write to the lock variable.
			
 
				- * Nevertheless, spin_unlock_wait() can have high overhead, so if
			
 
				- * you feel the need to use it, please check to see if there is
			
 
				- * a better way to get your job done.
			
 
				- *
			
 
				- * The ordering guarantees provided by spin_unlock_wait() are:
			
 
				- *
			
 
				- * 1.  All accesses preceding the spin_unlock_wait() happen before
			
 
				- *     any accesses in later critical sections for this same lock.
			
 
				- * 2.  All accesses following the spin_unlock_wait() happen after
			
 
				- *     any accesses in earlier critical sections for this same lock.
			
 
				- */
			
 
				-static __always_inline void spin_unlock_wait(spinlock_t *lock)
			
 
				-{
			
 
				-	raw_spin_unlock_wait(&lock->rlock);
			
 
				-}
			
 
				-
			
 
				 static __always_inline int spin_is_locked(spinlock_t *lock)
			
 
				 {
			
 
				 	return raw_spin_is_locked(&lock->rlock);
			
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -26,11 +26,6 @@
 
				 #ifdef CONFIG_DEBUG_SPINLOCK
			
 
				 #define arch_spin_is_locked(x)		((x)->slock == 0)
			
 
				 
			
 
				-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
			
 
				-{
			
 
				-	smp_cond_load_acquire(&lock->slock, VAL);
			
 
				-}
			
 
				-
			
 
				 static inline void arch_spin_lock(arch_spinlock_t *lock)
			
 
				 {
			
 
				 	lock->slock = 0;
			
@@ -73,7 +68,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
				 
			
 
				 #else /* DEBUG_SPINLOCK */
			
 
				 #define arch_spin_is_locked(lock)	((void)(lock), 0)
			
 
				-#define arch_spin_unlock_wait(lock)	do { barrier(); (void)(lock); } while (0)
			
 
				 /* for sched/core.c and kernel_lock.c: */
			
 
				 # define arch_spin_lock(lock)		do { barrier(); (void)(lock); } while (0)
			
 
				 # define arch_spin_lock_flags(lock, flags)	do { barrier(); (void)(lock); } while (0)
			
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -87,4 +87,17 @@ static inline void srcu_barrier(struct srcu_struct *sp)
 
				 	synchronize_srcu(sp);
			
 
				 }
			
 
				 
			
 
				+/* Defined here to avoid size increase for non-torture kernels. */
			
 
				+static inline void srcu_torture_stats_print(struct srcu_struct *sp,
			
 
				+					    char *tt, char *tf)
			
 
				+{
			
 
				+	int idx;
			
 
				+
			
 
				+	idx = READ_ONCE(sp->srcu_idx) & 0x1;
			
 
				+	pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
			
 
				+		 tt, tf, idx,
			
 
				+		 READ_ONCE(sp->srcu_lock_nesting[!idx]),
			
 
				+		 READ_ONCE(sp->srcu_lock_nesting[idx]));
			
 
				+}
			
 
				+
			
 
				 #endif
			
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -104,8 +104,6 @@ struct srcu_struct {
 
				 #define SRCU_STATE_SCAN1	1
			
 
				 #define SRCU_STATE_SCAN2	2
			
 
				 
			
 
				-void process_srcu(struct work_struct *work);
			
 
				-
			
 
				 #define __SRCU_STRUCT_INIT(name)					\
			
 
				 	{								\
			
 
				 		.sda = &name##_srcu_data,				\
			
@@ -141,5 +139,6 @@ void process_srcu(struct work_struct *work);
 
				 
			
 
				 void synchronize_srcu_expedited(struct srcu_struct *sp);
			
 
				 void srcu_barrier(struct srcu_struct *sp);
			
 
				+void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf);
			
 
				 
			
 
				 #endif
			
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -169,4 +169,59 @@ do {									\
 
				 	__ret;								\
			
 
				 })
			
 
				 
			
 
				+#define __swait_event_idle(wq, condition)				\
			
 
				+	(void)___swait_event(wq, condition, TASK_IDLE, 0, schedule())
			
 
				+
			
 
				+/**
			
 
				+ * swait_event_idle - wait without system load contribution
			
 
				+ * @wq: the waitqueue to wait on
			
 
				+ * @condition: a C expression for the event to wait for
			
 
				+ *
			
 
				+ * The process is put to sleep (TASK_IDLE) until the @condition evaluates to
			
 
				+ * true. The @condition is checked each time the waitqueue @wq is woken up.
			
 
				+ *
			
 
				+ * This function is mostly used when a kthread or workqueue waits for some
			
 
				+ * condition and doesn't want to contribute to system load. Signals are
			
 
				+ * ignored.
			
 
				+ */
			
 
				+#define swait_event_idle(wq, condition)					\
			
 
				+do {									\
			
 
				+	if (condition)							\
			
 
				+		break;							\
			
 
				+	__swait_event_idle(wq, condition);				\
			
 
				+} while (0)
			
 
				+
			
 
				+#define __swait_event_idle_timeout(wq, condition, timeout)		\
			
 
				+	___swait_event(wq, ___wait_cond_timeout(condition),		\
			
 
				+		       TASK_IDLE, timeout,				\
			
 
				+		       __ret = schedule_timeout(__ret))
			
 
				+
			
 
				+/**
			
 
				+ * swait_event_idle_timeout - wait up to timeout without load contribution
			
 
				+ * @wq: the waitqueue to wait on
			
 
				+ * @condition: a C expression for the event to wait for
			
 
				+ * @timeout: timeout at which we'll give up in jiffies
			
 
				+ *
			
 
				+ * The process is put to sleep (TASK_IDLE) until the @condition evaluates to
			
 
				+ * true. The @condition is checked each time the waitqueue @wq is woken up.
			
 
				+ *
			
 
				+ * This function is mostly used when a kthread or workqueue waits for some
			
 
				+ * condition and doesn't want to contribute to system load. Signals are
			
 
				+ * ignored.
			
 
				+ *
			
 
				+ * Returns:
			
 
				+ * 0 if the @condition evaluated to %false after the @timeout elapsed,
			
 
				+ * 1 if the @condition evaluated to %true after the @timeout elapsed,
			
 
				+ * or the remaining jiffies (at least 1) if the @condition evaluated
			
 
				+ * to %true before the @timeout elapsed.
			
 
				+ */
			
 
				+#define swait_event_idle_timeout(wq, condition, timeout)		\
			
 
				+({									\
			
 
				+	long __ret = timeout;						\
			
 
				+	if (!___wait_cond_timeout(condition))				\
			
 
				+		__ret = __swait_event_idle_timeout(wq,			\
			
 
				+						   condition, timeout);	\
			
 
				+	__ret;								\
			
 
				+})
			
 
				+
			
 
				 #endif /* _LINUX_SWAIT_H */
			
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -703,6 +703,7 @@ TRACE_EVENT(rcu_batch_end,
 
				  * at the beginning and end of the read, respectively.  Note that the
			
 
				  * callback address can be NULL.
			
 
				  */
			
 
				+#define RCUTORTURENAME_LEN 8
			
 
				 TRACE_EVENT(rcu_torture_read,
			
 
				 
			
 
				 	TP_PROTO(const char *rcutorturename, struct rcu_head *rhp,
			
@@ -711,7 +712,7 @@ TRACE_EVENT(rcu_torture_read,
 
				 	TP_ARGS(rcutorturename, rhp, secs, c_old, c),
			
 
				 
			
 
				 	TP_STRUCT__entry(
			
 
				-		__field(const char *, rcutorturename)
			
 
				+		__field(char, rcutorturename[RCUTORTURENAME_LEN])
			
 
				 		__field(struct rcu_head *, rhp)
			
 
				 		__field(unsigned long, secs)
			
 
				 		__field(unsigned long, c_old)
			
@@ -719,7 +720,9 @@ TRACE_EVENT(rcu_torture_read,
 
				 	),
			
 
				 
			
 
				 	TP_fast_assign(
			
 
				-		__entry->rcutorturename = rcutorturename;
			
 
				+		strncpy(__entry->rcutorturename, rcutorturename,
			
 
				+			RCUTORTURENAME_LEN);
			
 
				+		__entry->rcutorturename[RCUTORTURENAME_LEN - 1] = 0;
			
 
				 		__entry->rhp = rhp;
			
 
				 		__entry->secs = secs;
			
 
				 		__entry->c_old = c_old;
			
--- a/include/uapi/linux/membarrier.h
+++ b/include/uapi/linux/membarrier.h
@@ -40,14 +40,33 @@
 
				  *                          (non-running threads are de facto in such a
			
 
				  *                          state). This covers threads from all processes
			
 
				  *                          running on the system. This command returns 0.
			
 
				+ * @MEMBARRIER_CMD_PRIVATE_EXPEDITED:
			
 
				+ *                          Execute a memory barrier on each running
			
 
				+ *                          thread belonging to the same process as the current
			
 
				+ *                          thread. Upon return from system call, the
			
 
				+ *                          caller thread is ensured that all its running
			
 
				+ *                          threads siblings have passed through a state
			
 
				+ *                          where all memory accesses to user-space
			
 
				+ *                          addresses match program order between entry
			
 
				+ *                          to and return from the system call
			
 
				+ *                          (non-running threads are de facto in such a
			
 
				+ *                          state). This only covers threads from the
			
 
				+ *                          same processes as the caller thread. This
			
 
				+ *                          command returns 0. The "expedited" commands
			
 
				+ *                          complete faster than the non-expedited ones,
			
 
				+ *                          they never block, but have the downside of
			
 
				+ *                          causing extra overhead.
			
 
				  *
			
 
				  * Command to be passed to the membarrier system call. The commands need to
			
 
				  * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
			
 
				  * the value 0.
			
 
				  */
			
 
				 enum membarrier_cmd {
			
 
				-	MEMBARRIER_CMD_QUERY = 0,
			
 
				-	MEMBARRIER_CMD_SHARED = (1 << 0),
			
 
				+	MEMBARRIER_CMD_QUERY			= 0,
			
 
				+	MEMBARRIER_CMD_SHARED			= (1 << 0),
			
 
				+	/* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
			
 
				+	/* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
			
 
				+	MEMBARRIER_CMD_PRIVATE_EXPEDITED	= (1 << 3),
			
 
				 };
			
 
				 
			
 
				 #endif /* _UAPI_LINUX_MEMBARRIER_H */
			
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -2091,7 +2091,8 @@ void exit_sem(struct task_struct *tsk)
 
				 			 * possibility where we exit while freeary() didn't
			
 
				 			 * finish unlocking sem_undo_list.
			
 
				 			 */
			
 
				-			spin_unlock_wait(&ulp->lock);
			
 
				+			spin_lock(&ulp->lock);
			
 
				+			spin_unlock(&ulp->lock);
			
 
				 			rcu_read_unlock();
			
 
				 			break;
			
 
				 		}
			
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -108,7 +108,6 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 
				 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
			
 
				 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
			
 
				 obj-$(CONFIG_TORTURE_TEST) += torture.o
			
 
				-obj-$(CONFIG_MEMBARRIER) += membarrier.o
			
 
				 
			
 
				 obj-$(CONFIG_HAS_IOMEM) += memremap.o
			
 
				 
			
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -650,6 +650,7 @@ static int takedown_cpu(unsigned int cpu)
 
				 	__cpu_die(cpu);
			
 
				 
			
 
				 	tick_cleanup_dead_cpu(cpu);
			
 
				+	rcutree_migrate_callbacks(cpu);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -764,7 +764,6 @@ void __noreturn do_exit(long code)
 
				 {
			
 
				 	struct task_struct *tsk = current;
			
 
				 	int group_dead;
			
 
				-	TASKS_RCU(int tasks_rcu_i);
			
 
				 
			
 
				 	profile_task_exit(tsk);
			
 
				 	kcov_task_exit(tsk);
			
@@ -819,7 +818,8 @@ void __noreturn do_exit(long code)
 
				 	 * Ensure that we must observe the pi_state in exit_mm() ->
			
 
				 	 * mm_release() -> exit_pi_state_list().
			
 
				 	 */
			
 
				-	raw_spin_unlock_wait(&tsk->pi_lock);
			
 
				+	raw_spin_lock_irq(&tsk->pi_lock);
			
 
				+	raw_spin_unlock_irq(&tsk->pi_lock);
			
 
				 
			
 
				 	if (unlikely(in_atomic())) {
			
 
				 		pr_info("note: %s[%d] exited with preempt_count %d\n",
			
@@ -881,9 +881,7 @@ void __noreturn do_exit(long code)
 
				 	 */
			
 
				 	flush_ptrace_hw_breakpoint(tsk);
			
 
				 
			
 
				-	TASKS_RCU(preempt_disable());
			
 
				-	TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
			
 
				-	TASKS_RCU(preempt_enable());
			
 
				+	exit_tasks_rcu_start();
			
 
				 	exit_notify(tsk, group_dead);
			
 
				 	proc_exit_connector(tsk);
			
 
				 	mpol_put_task_policy(tsk);
			
@@ -918,7 +916,7 @@ void __noreturn do_exit(long code)
 
				 	if (tsk->nr_dirtied)
			
 
				 		__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
			
 
				 	exit_rcu();
			
 
				-	TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
			
 
				+	exit_tasks_rcu_finish();
			
 
				 
			
 
				 	do_task_dead();
			
 
				 }
			
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -268,123 +268,6 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
 
				 #define queued_spin_lock_slowpath	native_queued_spin_lock_slowpath
			
 
				 #endif
			
 
				 
			
 
				-/*
			
 
				- * Various notes on spin_is_locked() and spin_unlock_wait(), which are
			
 
				- * 'interesting' functions:
			
 
				- *
			
 
				- * PROBLEM: some architectures have an interesting issue with atomic ACQUIRE
			
 
				- * operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64,
			
 
				- * PPC). Also qspinlock has a similar issue per construction, the setting of
			
 
				- * the locked byte can be unordered acquiring the lock proper.
			
 
				- *
			
 
				- * This gets to be 'interesting' in the following cases, where the /should/s
			
 
				- * end up false because of this issue.
			
 
				- *
			
 
				- *
			
 
				- * CASE 1:
			
 
				- *
			
 
				- * So the spin_is_locked() correctness issue comes from something like:
			
 
				- *
			
 
				- *   CPU0				CPU1
			
 
				- *
			
 
				- *   global_lock();			local_lock(i)
			
 
				- *     spin_lock(&G)			  spin_lock(&L[i])
			
 
				- *     for (i)				  if (!spin_is_locked(&G)) {
			
 
				- *       spin_unlock_wait(&L[i]);	    smp_acquire__after_ctrl_dep();
			
 
				- *					    return;
			
 
				- *					  }
			
 
				- *					  // deal with fail
			
 
				- *
			
 
				- * Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such
			
 
				- * that there is exclusion between the two critical sections.
			
 
				- *
			
 
				- * The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from
			
 
				- * spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i])
			
 
				- * /should/ be constrained by the ACQUIRE from spin_lock(&G).
			
 
				- *
			
 
				- * Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB.
			
 
				- *
			
 
				- *
			
 
				- * CASE 2:
			
 
				- *
			
 
				- * For spin_unlock_wait() there is a second correctness issue, namely:
			
 
				- *
			
 
				- *   CPU0				CPU1
			
 
				- *
			
 
				- *   flag = set;
			
 
				- *   smp_mb();				spin_lock(&l)
			
 
				- *   spin_unlock_wait(&l);		if (!flag)
			
 
				- *					  // add to lockless list
			
 
				- *					spin_unlock(&l);
			
 
				- *   // iterate lockless list
			
 
				- *
			
 
				- * Which wants to ensure that CPU1 will stop adding bits to the list and CPU0
			
 
				- * will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE
			
 
				- * semantics etc..)
			
 
				- *
			
 
				- * Where flag /should/ be ordered against the locked store of l.
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
			
 
				- * issuing an _unordered_ store to set _Q_LOCKED_VAL.
			
 
				- *
			
 
				- * This means that the store can be delayed, but no later than the
			
 
				- * store-release from the unlock. This means that simply observing
			
 
				- * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
			
 
				- *
			
 
				- * There are two paths that can issue the unordered store:
			
 
				- *
			
 
				- *  (1) clear_pending_set_locked():	*,1,0 -> *,0,1
			
 
				- *
			
 
				- *  (2) set_locked():			t,0,0 -> t,0,1 ; t != 0
			
 
				- *      atomic_cmpxchg_relaxed():	t,0,0 -> 0,0,1
			
 
				- *
			
 
				- * However, in both cases we have other !0 state we've set before to queue
			
 
				- * ourseves:
			
 
				- *
			
 
				- * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
			
 
				- * load is constrained by that ACQUIRE to not pass before that, and thus must
			
 
				- * observe the store.
			
 
				- *
			
 
				- * For (2) we have a more intersting scenario. We enqueue ourselves using
			
 
				- * xchg_tail(), which ends up being a RELEASE. This in itself is not
			
 
				- * sufficient, however that is followed by an smp_cond_acquire() on the same
			
 
				- * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
			
 
				- * guarantees we must observe that store.
			
 
				- *
			
 
				- * Therefore both cases have other !0 state that is observable before the
			
 
				- * unordered locked byte store comes through. This means we can use that to
			
 
				- * wait for the lock store, and then wait for an unlock.
			
 
				- */
			
 
				-#ifndef queued_spin_unlock_wait
			
 
				-void queued_spin_unlock_wait(struct qspinlock *lock)
			
 
				-{
			
 
				-	u32 val;
			
 
				-
			
 
				-	for (;;) {
			
 
				-		val = atomic_read(&lock->val);
			
 
				-
			
 
				-		if (!val) /* not locked, we're done */
			
 
				-			goto done;
			
 
				-
			
 
				-		if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
			
 
				-			break;
			
 
				-
			
 
				-		/* not locked, but pending, wait until we observe the lock */
			
 
				-		cpu_relax();
			
 
				-	}
			
 
				-
			
 
				-	/* any unlock is good */
			
 
				-	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
			
 
				-		cpu_relax();
			
 
				-
			
 
				-done:
			
 
				-	smp_acquire__after_ctrl_dep();
			
 
				-}
			
 
				-EXPORT_SYMBOL(queued_spin_unlock_wait);
			
 
				-#endif
			
 
				-
			
 
				 #endif /* _GEN_PV_LOCK_SLOWPATH */
			
 
				 
			
 
				 /**
			
--- a/kernel/membarrier.c
+++ b/kernel/membarrier.c
@@ -1,70 +0,0 @@
 
				-/*
			
 
				- * Copyright (C) 2010, 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
			
 
				- *
			
 
				- * membarrier system call
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License as published by
			
 
				- * the Free Software Foundation; either version 2 of the License, or
			
 
				- * (at your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- * GNU General Public License for more details.
			
 
				- */
			
 
				-
			
 
				-#include <linux/syscalls.h>
			
 
				-#include <linux/membarrier.h>
			
 
				-#include <linux/tick.h>
			
 
				-
			
 
				-/*
			
 
				- * Bitmask made from a "or" of all commands within enum membarrier_cmd,
			
 
				- * except MEMBARRIER_CMD_QUERY.
			
 
				- */
			
 
				-#define MEMBARRIER_CMD_BITMASK	(MEMBARRIER_CMD_SHARED)
			
 
				-
			
 
				-/**
			
 
				- * sys_membarrier - issue memory barriers on a set of threads
			
 
				- * @cmd:   Takes command values defined in enum membarrier_cmd.
			
 
				- * @flags: Currently needs to be 0. For future extensions.
			
 
				- *
			
 
				- * If this system call is not implemented, -ENOSYS is returned. If the
			
 
				- * command specified does not exist, or if the command argument is invalid,
			
 
				- * this system call returns -EINVAL. For a given command, with flags argument
			
 
				- * set to 0, this system call is guaranteed to always return the same value
			
 
				- * until reboot.
			
 
				- *
			
 
				- * All memory accesses performed in program order from each targeted thread
			
 
				- * is guaranteed to be ordered with respect to sys_membarrier(). If we use
			
 
				- * the semantic "barrier()" to represent a compiler barrier forcing memory
			
 
				- * accesses to be performed in program order across the barrier, and
			
 
				- * smp_mb() to represent explicit memory barriers forcing full memory
			
 
				- * ordering across the barrier, we have the following ordering table for
			
 
				- * each pair of barrier(), sys_membarrier() and smp_mb():
			
 
				- *
			
 
				- * The pair ordering is detailed as (O: ordered, X: not ordered):
			
 
				- *
			
 
				- *                        barrier()   smp_mb() sys_membarrier()
			
 
				- *        barrier()          X           X            O
			
 
				- *        smp_mb()           X           O            O
			
 
				- *        sys_membarrier()   O           O            O
			
 
				- */
			
 
				-SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
			
 
				-{
			
 
				-	/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
			
 
				-	if (tick_nohz_full_enabled())
			
 
				-		return -ENOSYS;
			
 
				-	if (unlikely(flags))
			
 
				-		return -EINVAL;
			
 
				-	switch (cmd) {
			
 
				-	case MEMBARRIER_CMD_QUERY:
			
 
				-		return MEMBARRIER_CMD_BITMASK;
			
 
				-	case MEMBARRIER_CMD_SHARED:
			
 
				-		if (num_online_cpus() > 1)
			
 
				-			synchronize_sched();
			
 
				-		return 0;
			
 
				-	default:
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-}
			
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -69,8 +69,7 @@ config TREE_SRCU
 
				 	  This option selects the full-fledged version of SRCU.
			
 
				 
			
 
				 config TASKS_RCU
			
 
				-	bool
			
 
				-	default n
			
 
				+	def_bool PREEMPT
			
 
				 	select SRCU
			
 
				 	help
			
 
				 	  This option enables a task-based RCU implementation that uses
			
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -356,22 +356,10 @@ do {									\
 
				 
			
 
				 #ifdef CONFIG_TINY_RCU
			
 
				 /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
			
 
				-static inline bool rcu_gp_is_normal(void)  /* Internal RCU use. */
			
 
				-{
			
 
				-	return true;
			
 
				-}
			
 
				-static inline bool rcu_gp_is_expedited(void)  /* Internal RCU use. */
			
 
				-{
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				-static inline void rcu_expedite_gp(void)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static inline void rcu_unexpedite_gp(void)
			
 
				-{
			
 
				-}
			
 
				+static inline bool rcu_gp_is_normal(void) { return true; }
			
 
				+static inline bool rcu_gp_is_expedited(void) { return false; }
			
 
				+static inline void rcu_expedite_gp(void) { }
			
 
				+static inline void rcu_unexpedite_gp(void) { }
			
 
				 #else /* #ifdef CONFIG_TINY_RCU */
			
 
				 bool rcu_gp_is_normal(void);     /* Internal RCU use. */
			
 
				 bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
			
@@ -419,12 +407,8 @@ static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
 
				 	*gpnum = 0;
			
 
				 	*completed = 0;
			
 
				 }
			
 
				-static inline void rcutorture_record_test_transition(void)
			
 
				-{
			
 
				-}
			
 
				-static inline void rcutorture_record_progress(unsigned long vernum)
			
 
				-{
			
 
				-}
			
 
				+static inline void rcutorture_record_test_transition(void) { }
			
 
				+static inline void rcutorture_record_progress(unsigned long vernum) { }
			
 
				 #ifdef CONFIG_RCU_TRACE
			
 
				 void do_trace_rcu_torture_read(const char *rcutorturename,
			
 
				 			       struct rcu_head *rhp,
			
@@ -460,92 +444,20 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_TINY_RCU
			
 
				-
			
 
				-/*
			
 
				- * Return the number of grace periods started.
			
 
				- */
			
 
				-static inline unsigned long rcu_batches_started(void)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return the number of bottom-half grace periods started.
			
 
				- */
			
 
				-static inline unsigned long rcu_batches_started_bh(void)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return the number of sched grace periods started.
			
 
				- */
			
 
				-static inline unsigned long rcu_batches_started_sched(void)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return the number of grace periods completed.
			
 
				- */
			
 
				-static inline unsigned long rcu_batches_completed(void)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return the number of bottom-half grace periods completed.
			
 
				- */
			
 
				-static inline unsigned long rcu_batches_completed_bh(void)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return the number of sched grace periods completed.
			
 
				- */
			
 
				-static inline unsigned long rcu_batches_completed_sched(void)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return the number of expedited grace periods completed.
			
 
				- */
			
 
				-static inline unsigned long rcu_exp_batches_completed(void)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return the number of expedited sched grace periods completed.
			
 
				- */
			
 
				-static inline unsigned long rcu_exp_batches_completed_sched(void)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static inline void rcu_force_quiescent_state(void)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static inline void rcu_bh_force_quiescent_state(void)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static inline void rcu_sched_force_quiescent_state(void)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static inline void show_rcu_gp_kthreads(void)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				+static inline unsigned long rcu_batches_started(void) { return 0; }
			
 
				+static inline unsigned long rcu_batches_started_bh(void) { return 0; }
			
 
				+static inline unsigned long rcu_batches_started_sched(void) { return 0; }
			
 
				+static inline unsigned long rcu_batches_completed(void) { return 0; }
			
 
				+static inline unsigned long rcu_batches_completed_bh(void) { return 0; }
			
 
				+static inline unsigned long rcu_batches_completed_sched(void) { return 0; }
			
 
				+static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
			
 
				+static inline unsigned long rcu_exp_batches_completed_sched(void) { return 0; }
			
 
				+static inline unsigned long
			
 
				+srcu_batches_completed(struct srcu_struct *sp) { return 0; }
			
 
				+static inline void rcu_force_quiescent_state(void) { }
			
 
				+static inline void rcu_bh_force_quiescent_state(void) { }
			
 
				+static inline void rcu_sched_force_quiescent_state(void) { }
			
 
				+static inline void show_rcu_gp_kthreads(void) { }
			
 
				 #else /* #ifdef CONFIG_TINY_RCU */
			
 
				 extern unsigned long rcutorture_testseq;
			
 
				 extern unsigned long rcutorture_vernum;
			
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -35,24 +35,6 @@ void rcu_cblist_init(struct rcu_cblist *rclp)
 
				 	rclp->len_lazy = 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Debug function to actually count the number of callbacks.
			
 
				- * If the number exceeds the limit specified, return -1.
			
 
				- */
			
 
				-long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
			
 
				-{
			
 
				-	int cnt = 0;
			
 
				-	struct rcu_head **rhpp = &rclp->head;
			
 
				-
			
 
				-	for (;;) {
			
 
				-		if (!*rhpp)
			
 
				-			return cnt;
			
 
				-		if (++cnt > lim)
			
 
				-			return -1;
			
 
				-		rhpp = &(*rhpp)->next;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Dequeue the oldest rcu_head structure from the specified callback
			
 
				  * list.  This function assumes that the callback is non-lazy, but
			
@@ -102,17 +84,6 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
 
				 	rsclp->tails[RCU_NEXT_TAIL] = NULL;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Is the specified segment of the specified rcu_segcblist structure
			
 
				- * empty of callbacks?
			
 
				- */
			
 
				-bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
			
 
				-{
			
 
				-	if (seg == RCU_DONE_TAIL)
			
 
				-		return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
			
 
				-	return rsclp->tails[seg - 1] == rsclp->tails[seg];
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Does the specified rcu_segcblist structure contain callbacks that
			
 
				  * are ready to be invoked?
			
@@ -133,50 +104,6 @@ bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
 
				 	       !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Dequeue and return the first ready-to-invoke callback.  If there
			
 
				- * are no ready-to-invoke callbacks, return NULL.  Disables interrupts
			
 
				- * to avoid interference.  Does not protect from interference from other
			
 
				- * CPUs or tasks.
			
 
				- */
			
 
				-struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
			
 
				-{
			
 
				-	unsigned long flags;
			
 
				-	int i;
			
 
				-	struct rcu_head *rhp;
			
 
				-
			
 
				-	local_irq_save(flags);
			
 
				-	if (!rcu_segcblist_ready_cbs(rsclp)) {
			
 
				-		local_irq_restore(flags);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-	rhp = rsclp->head;
			
 
				-	BUG_ON(!rhp);
			
 
				-	rsclp->head = rhp->next;
			
 
				-	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
			
 
				-		if (rsclp->tails[i] != &rhp->next)
			
 
				-			break;
			
 
				-		rsclp->tails[i] = &rsclp->head;
			
 
				-	}
			
 
				-	smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
			
 
				-	WRITE_ONCE(rsclp->len, rsclp->len - 1);
			
 
				-	local_irq_restore(flags);
			
 
				-	return rhp;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Account for the fact that a previously dequeued callback turned out
			
 
				- * to be marked as lazy.
			
 
				- */
			
 
				-void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
			
 
				-{
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	local_irq_save(flags);
			
 
				-	rsclp->len_lazy--;
			
 
				-	local_irq_restore(flags);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Return a pointer to the first callback in the specified rcu_segcblist
			
 
				  * structure.  This is useful for diagnostics.
			
@@ -202,17 +129,6 @@ struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Does the specified rcu_segcblist structure contain callbacks that
			
 
				- * have not yet been processed beyond having been posted, that is,
			
 
				- * does it contain callbacks in its last segment?
			
 
				- */
			
 
				-bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
			
 
				-{
			
 
				-	return rcu_segcblist_is_enabled(rsclp) &&
			
 
				-	       !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Enqueue the specified callback onto the specified rcu_segcblist
			
 
				  * structure, updating accounting as needed.  Note that the ->len
			
@@ -503,3 +419,27 @@ bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
 
				 			return true;
			
 
				 	return false;
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * Merge the source rcu_segcblist structure into the destination
			
 
				+ * rcu_segcblist structure, then initialize the source.  Any pending
			
 
				+ * callbacks from the source get to start over.  It is best to
			
 
				+ * advance and accelerate both the destination and the source
			
 
				+ * before merging.
			
 
				+ */
			
 
				+void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
			
 
				+			 struct rcu_segcblist *src_rsclp)
			
 
				+{
			
 
				+	struct rcu_cblist donecbs;
			
 
				+	struct rcu_cblist pendcbs;
			
 
				+
			
 
				+	rcu_cblist_init(&donecbs);
			
 
				+	rcu_cblist_init(&pendcbs);
			
 
				+	rcu_segcblist_extract_count(src_rsclp, &donecbs);
			
 
				+	rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs);
			
 
				+	rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs);
			
 
				+	rcu_segcblist_insert_count(dst_rsclp, &donecbs);
			
 
				+	rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs);
			
 
				+	rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs);
			
 
				+	rcu_segcblist_init(src_rsclp);
			
 
				+}
			
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -31,29 +31,7 @@ static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
 
				 	rclp->len_lazy--;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Interim function to return rcu_cblist head pointer.  Longer term, the
			
 
				- * rcu_cblist will be used more pervasively, removing the need for this
			
 
				- * function.
			
 
				- */
			
 
				-static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
			
 
				-{
			
 
				-	return rclp->head;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Interim function to return rcu_cblist head pointer.  Longer term, the
			
 
				- * rcu_cblist will be used more pervasively, removing the need for this
			
 
				- * function.
			
 
				- */
			
 
				-static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
			
 
				-{
			
 
				-	WARN_ON_ONCE(!rclp->head);
			
 
				-	return rclp->tail;
			
 
				-}
			
 
				-
			
 
				 void rcu_cblist_init(struct rcu_cblist *rclp);
			
 
				-long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim);
			
 
				 struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp);
			
 
				 
			
 
				 /*
			
@@ -134,14 +112,10 @@ static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
 
				 
			
 
				 void rcu_segcblist_init(struct rcu_segcblist *rsclp);
			
 
				 void rcu_segcblist_disable(struct rcu_segcblist *rsclp);
			
 
				-bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg);
			
 
				 bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp);
			
 
				 bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp);
			
 
				-struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp);
			
 
				-void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp);
			
 
				 struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp);
			
 
				 struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp);
			
 
				-bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp);
			
 
				 void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
			
 
				 			   struct rcu_head *rhp, bool lazy);
			
 
				 bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
			
@@ -162,3 +136,5 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
 
				 bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
			
 
				 bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
			
 
				 				    unsigned long seq);
			
 
				+void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
			
 
				+			 struct rcu_segcblist *src_rsclp);
			
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -317,8 +317,6 @@ static struct rcu_perf_ops sched_ops = {
 
				 	.name		= "sched"
			
 
				 };
			
 
				 
			
 
				-#ifdef CONFIG_TASKS_RCU
			
 
				-
			
 
				 /*
			
 
				  * Definitions for RCU-tasks perf testing.
			
 
				  */
			
@@ -346,24 +344,11 @@ static struct rcu_perf_ops tasks_ops = {
 
				 	.name		= "tasks"
			
 
				 };
			
 
				 
			
 
				-#define RCUPERF_TASKS_OPS &tasks_ops,
			
 
				-
			
 
				 static bool __maybe_unused torturing_tasks(void)
			
 
				 {
			
 
				 	return cur_ops == &tasks_ops;
			
 
				 }
			
 
				 
			
 
				-#else /* #ifdef CONFIG_TASKS_RCU */
			
 
				-
			
 
				-#define RCUPERF_TASKS_OPS
			
 
				-
			
 
				-static bool __maybe_unused torturing_tasks(void)
			
 
				-{
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				-#endif /* #else #ifdef CONFIG_TASKS_RCU */
			
 
				-
			
 
				 /*
			
 
				  * If performance tests complete, wait for shutdown to commence.
			
 
				  */
			
@@ -658,7 +643,7 @@ rcu_perf_init(void)
 
				 	int firsterr = 0;
			
 
				 	static struct rcu_perf_ops *perf_ops[] = {
			
 
				 		&rcu_ops, &rcu_bh_ops, &srcu_ops, &srcud_ops, &sched_ops,
			
 
				-		RCUPERF_TASKS_OPS
			
 
				+		&tasks_ops,
			
 
				 	};
			
 
				 
			
 
				 	if (!torture_init_begin(perf_type, verbose, &perf_runnable))
			
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -199,7 +199,8 @@ MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot");
 
				 static u64 notrace rcu_trace_clock_local(void)
			
 
				 {
			
 
				 	u64 ts = trace_clock_local();
			
 
				-	unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC);
			
 
				+
			
 
				+	(void)do_div(ts, NSEC_PER_USEC);
			
 
				 	return ts;
			
 
				 }
			
 
				 #else /* #ifdef CONFIG_RCU_TRACE */
			
@@ -496,7 +497,7 @@ static struct rcu_torture_ops rcu_busted_ops = {
 
				 	.fqs		= NULL,
			
 
				 	.stats		= NULL,
			
 
				 	.irq_capable	= 1,
			
 
				-	.name		= "rcu_busted"
			
 
				+	.name		= "busted"
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -522,7 +523,7 @@ static void srcu_read_delay(struct torture_random_state *rrsp)
 
				 
			
 
				 	delay = torture_random(rrsp) %
			
 
				 		(nrealreaders * 2 * longdelay * uspertick);
			
 
				-	if (!delay)
			
 
				+	if (!delay && in_task())
			
 
				 		schedule_timeout_interruptible(longdelay);
			
 
				 	else
			
 
				 		rcu_read_delay(rrsp);
			
@@ -561,44 +562,7 @@ static void srcu_torture_barrier(void)
 
				 
			
 
				 static void srcu_torture_stats(void)
			
 
				 {
			
 
				-	int __maybe_unused cpu;
			
 
				-	int idx;
			
 
				-
			
 
				-#ifdef CONFIG_TREE_SRCU
			
 
				-	idx = srcu_ctlp->srcu_idx & 0x1;
			
 
				-	pr_alert("%s%s Tree SRCU per-CPU(idx=%d):",
			
 
				-		 torture_type, TORTURE_FLAG, idx);
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		unsigned long l0, l1;
			
 
				-		unsigned long u0, u1;
			
 
				-		long c0, c1;
			
 
				-		struct srcu_data *counts;
			
 
				-
			
 
				-		counts = per_cpu_ptr(srcu_ctlp->sda, cpu);
			
 
				-		u0 = counts->srcu_unlock_count[!idx];
			
 
				-		u1 = counts->srcu_unlock_count[idx];
			
 
				-
			
 
				-		/*
			
 
				-		 * Make sure that a lock is always counted if the corresponding
			
 
				-		 * unlock is counted.
			
 
				-		 */
			
 
				-		smp_rmb();
			
 
				-
			
 
				-		l0 = counts->srcu_lock_count[!idx];
			
 
				-		l1 = counts->srcu_lock_count[idx];
			
 
				-
			
 
				-		c0 = l0 - u0;
			
 
				-		c1 = l1 - u1;
			
 
				-		pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
			
 
				-	}
			
 
				-	pr_cont("\n");
			
 
				-#elif defined(CONFIG_TINY_SRCU)
			
 
				-	idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1;
			
 
				-	pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
			
 
				-		 torture_type, TORTURE_FLAG, idx,
			
 
				-		 READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]),
			
 
				-		 READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx]));
			
 
				-#endif
			
 
				+	srcu_torture_stats_print(srcu_ctlp, torture_type, TORTURE_FLAG);
			
 
				 }
			
 
				 
			
 
				 static void srcu_torture_synchronize_expedited(void)
			
@@ -620,6 +584,7 @@ static struct rcu_torture_ops srcu_ops = {
 
				 	.call		= srcu_torture_call,
			
 
				 	.cb_barrier	= srcu_torture_barrier,
			
 
				 	.stats		= srcu_torture_stats,
			
 
				+	.irq_capable	= 1,
			
 
				 	.name		= "srcu"
			
 
				 };
			
 
				 
			
@@ -652,6 +617,7 @@ static struct rcu_torture_ops srcud_ops = {
 
				 	.call		= srcu_torture_call,
			
 
				 	.cb_barrier	= srcu_torture_barrier,
			
 
				 	.stats		= srcu_torture_stats,
			
 
				+	.irq_capable	= 1,
			
 
				 	.name		= "srcud"
			
 
				 };
			
 
				 
			
@@ -696,8 +662,6 @@ static struct rcu_torture_ops sched_ops = {
 
				 	.name		= "sched"
			
 
				 };
			
 
				 
			
 
				-#ifdef CONFIG_TASKS_RCU
			
 
				-
			
 
				 /*
			
 
				  * Definitions for RCU-tasks torture testing.
			
 
				  */
			
@@ -735,24 +699,11 @@ static struct rcu_torture_ops tasks_ops = {
 
				 	.name		= "tasks"
			
 
				 };
			
 
				 
			
 
				-#define RCUTORTURE_TASKS_OPS &tasks_ops,
			
 
				-
			
 
				 static bool __maybe_unused torturing_tasks(void)
			
 
				 {
			
 
				 	return cur_ops == &tasks_ops;
			
 
				 }
			
 
				 
			
 
				-#else /* #ifdef CONFIG_TASKS_RCU */
			
 
				-
			
 
				-#define RCUTORTURE_TASKS_OPS
			
 
				-
			
 
				-static bool __maybe_unused torturing_tasks(void)
			
 
				-{
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				-#endif /* #else #ifdef CONFIG_TASKS_RCU */
			
 
				-
			
 
				 /*
			
 
				  * RCU torture priority-boost testing.  Runs one real-time thread per
			
 
				  * CPU for moderate bursts, repeatedly registering RCU callbacks and
			
@@ -1114,6 +1065,11 @@ rcu_torture_fakewriter(void *arg)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void rcu_torture_timer_cb(struct rcu_head *rhp)
			
 
				+{
			
 
				+	kfree(rhp);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * RCU torture reader from timer handler.  Dereferences rcu_torture_current,
			
 
				  * incrementing the corresponding element of the pipeline array.  The
			
@@ -1176,6 +1132,14 @@ static void rcu_torture_timer(unsigned long unused)
 
				 	__this_cpu_inc(rcu_torture_batch[completed]);
			
 
				 	preempt_enable();
			
 
				 	cur_ops->readunlock(idx);
			
 
				+
			
 
				+	/* Test call_rcu() invocation from interrupt handler. */
			
 
				+	if (cur_ops->call) {
			
 
				+		struct rcu_head *rhp = kmalloc(sizeof(*rhp), GFP_NOWAIT);
			
 
				+
			
 
				+		if (rhp)
			
 
				+			cur_ops->call(rhp, rcu_torture_timer_cb);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1354,11 +1318,12 @@ rcu_torture_stats_print(void)
 
				 		srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
			
 
				 					&flags, &gpnum, &completed);
			
 
				 		wtp = READ_ONCE(writer_task);
			
 
				-		pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx\n",
			
 
				+		pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx cpu %d\n",
			
 
				 			 rcu_torture_writer_state_getname(),
			
 
				 			 rcu_torture_writer_state,
			
 
				 			 gpnum, completed, flags,
			
 
				-			 wtp == NULL ? ~0UL : wtp->state);
			
 
				+			 wtp == NULL ? ~0UL : wtp->state,
			
 
				+			 wtp == NULL ? -1 : (int)task_cpu(wtp));
			
 
				 		show_rcu_gp_kthreads();
			
 
				 		rcu_ftrace_dump(DUMP_ALL);
			
 
				 	}
			
@@ -1749,7 +1714,7 @@ rcu_torture_init(void)
 
				 	int firsterr = 0;
			
 
				 	static struct rcu_torture_ops *torture_ops[] = {
			
 
				 		&rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
			
 
				-		&sched_ops, RCUTORTURE_TASKS_OPS
			
 
				+		&sched_ops, &tasks_ops,
			
 
				 	};
			
 
				 
			
 
				 	if (!torture_init_begin(torture_type, verbose, &torture_runnable))
			
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -33,6 +33,8 @@
 
				 #include "rcu_segcblist.h"
			
 
				 #include "rcu.h"
			
 
				 
			
 
				+int rcu_scheduler_active __read_mostly;
			
 
				+
			
 
				 static int init_srcu_struct_fields(struct srcu_struct *sp)
			
 
				 {
			
 
				 	sp->srcu_lock_nesting[0] = 0;
			
@@ -193,3 +195,9 @@ void synchronize_srcu(struct srcu_struct *sp)
 
				 	destroy_rcu_head_on_stack(&rs.head);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(synchronize_srcu);
			
 
				+
			
 
				+/* Lockdep diagnostics.  */
			
 
				+void __init rcu_scheduler_starting(void)
			
 
				+{
			
 
				+	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
			
 
				+}
			
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -51,6 +51,7 @@ module_param(counter_wrap_check, ulong, 0444);
 
				 
			
 
				 static void srcu_invoke_callbacks(struct work_struct *work);
			
 
				 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
			
 
				+static void process_srcu(struct work_struct *work);
			
 
				 
			
 
				 /*
			
 
				  * Initialize SRCU combining tree.  Note that statically allocated
			
@@ -896,6 +897,15 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
 
				 	__call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm);
			
 
				 	wait_for_completion(&rcu.completion);
			
 
				 	destroy_rcu_head_on_stack(&rcu.head);
			
 
				+
			
 
				+	/*
			
 
				+	 * Make sure that later code is ordered after the SRCU grace
			
 
				+	 * period.  This pairs with the raw_spin_lock_irq_rcu_node()
			
 
				+	 * in srcu_invoke_callbacks().  Unlike Tree RCU, this is needed
			
 
				+	 * because the current CPU might have been totally uninvolved with
			
 
				+	 * (and thus unordered against) that grace period.
			
 
				+	 */
			
 
				+	smp_mb();
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -1194,7 +1204,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
 
				 /*
			
 
				  * This is the work-queue function that handles SRCU grace periods.
			
 
				  */
			
 
				-void process_srcu(struct work_struct *work)
			
 
				+static void process_srcu(struct work_struct *work)
			
 
				 {
			
 
				 	struct srcu_struct *sp;
			
 
				 
			
@@ -1203,7 +1213,6 @@ void process_srcu(struct work_struct *work)
 
				 	srcu_advance_state(sp);
			
 
				 	srcu_reschedule(sp, srcu_get_delay(sp));
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(process_srcu);
			
 
				 
			
 
				 void srcutorture_get_gp_data(enum rcutorture_type test_type,
			
 
				 			     struct srcu_struct *sp, int *flags,
			
@@ -1217,6 +1226,43 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
			
 
				 
			
 
				+void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf)
			
 
				+{
			
 
				+	int cpu;
			
 
				+	int idx;
			
 
				+	unsigned long s0 = 0, s1 = 0;
			
 
				+
			
 
				+	idx = sp->srcu_idx & 0x1;
			
 
				+	pr_alert("%s%s Tree SRCU per-CPU(idx=%d):", tt, tf, idx);
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				+		unsigned long l0, l1;
			
 
				+		unsigned long u0, u1;
			
 
				+		long c0, c1;
			
 
				+		struct srcu_data *counts;
			
 
				+
			
 
				+		counts = per_cpu_ptr(sp->sda, cpu);
			
 
				+		u0 = counts->srcu_unlock_count[!idx];
			
 
				+		u1 = counts->srcu_unlock_count[idx];
			
 
				+
			
 
				+		/*
			
 
				+		 * Make sure that a lock is always counted if the corresponding
			
 
				+		 * unlock is counted.
			
 
				+		 */
			
 
				+		smp_rmb();
			
 
				+
			
 
				+		l0 = counts->srcu_lock_count[!idx];
			
 
				+		l1 = counts->srcu_lock_count[idx];
			
 
				+
			
 
				+		c0 = l0 - u0;
			
 
				+		c1 = l1 - u1;
			
 
				+		pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
			
 
				+		s0 += c0;
			
 
				+		s1 += c1;
			
 
				+	}
			
 
				+	pr_cont(" T(%ld,%ld)\n", s0, s1);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(srcu_torture_stats_print);
			
 
				+
			
 
				 static int __init srcu_bootup_announce(void)
			
 
				 {
			
 
				 	pr_info("Hierarchical SRCU implementation.\n");
			
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -56,8 +56,6 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = {
 
				 	.curtail	= &rcu_bh_ctrlblk.rcucblist,
			
 
				 };
			
 
				 
			
 
				-#include "tiny_plugin.h"
			
 
				-
			
 
				 void rcu_barrier_bh(void)
			
 
				 {
			
 
				 	wait_rcu_gp(call_rcu_bh);
			
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -1,47 +0,0 @@
 
				-/*
			
 
				- * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
			
 
				- * Internal non-public definitions that provide either classic
			
 
				- * or preemptible semantics.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License as published by
			
 
				- * the Free Software Foundation; either version 2 of the License, or
			
 
				- * (at your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- * GNU General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * along with this program; if not, you can access it online at
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html.
			
 
				- *
			
 
				- * Copyright (c) 2010 Linaro
			
 
				- *
			
 
				- * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
			
 
				- */
			
 
				-
			
 
				-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
			
 
				-#include <linux/kernel_stat.h>
			
 
				-
			
 
				-int rcu_scheduler_active __read_mostly;
			
 
				-EXPORT_SYMBOL_GPL(rcu_scheduler_active);
			
 
				-
			
 
				-/*
			
 
				- * During boot, we forgive RCU lockdep issues.  After this function is
			
 
				- * invoked, we start taking RCU lockdep issues seriously.  Note that unlike
			
 
				- * Tree RCU, Tiny RCU transitions directly from RCU_SCHEDULER_INACTIVE
			
 
				- * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage.
			
 
				- * The reason for this is that Tiny RCU does not need kthreads, so does
			
 
				- * not have to care about the fact that the scheduler is half-initialized
			
 
				- * at a certain phase of the boot process.  Unless SRCU is in the mix.
			
 
				- */
			
 
				-void __init rcu_scheduler_starting(void)
			
 
				-{
			
 
				-	WARN_ON(nr_context_switches() > 0);
			
 
				-	rcu_scheduler_active = IS_ENABLED(CONFIG_SRCU)
			
 
				-		? RCU_SCHEDULER_INIT : RCU_SCHEDULER_RUNNING;
			
 
				-}
			
 
				-
			
 
				-#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
			
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -97,9 +97,6 @@ struct rcu_state sname##_state = { \
 
				 	.gp_state = RCU_GP_IDLE, \
			
 
				 	.gpnum = 0UL - 300UL, \
			
 
				 	.completed = 0UL - 300UL, \
			
 
				-	.orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
			
 
				-	.orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \
			
 
				-	.orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \
			
 
				 	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
			
 
				 	.name = RCU_STATE_NAME(sname), \
			
 
				 	.abbr = sabbr, \
			
@@ -843,13 +840,9 @@ static void rcu_eqs_enter(bool user)
 
				  */
			
 
				 void rcu_idle_enter(void)
			
 
				 {
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	local_irq_save(flags);
			
 
				+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_idle_enter() invoked with irqs enabled!!!");
			
 
				 	rcu_eqs_enter(false);
			
 
				-	local_irq_restore(flags);
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(rcu_idle_enter);
			
 
				 
			
 
				 #ifdef CONFIG_NO_HZ_FULL
			
 
				 /**
			
@@ -862,7 +855,8 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
 
				  */
			
 
				 void rcu_user_enter(void)
			
 
				 {
			
 
				-	rcu_eqs_enter(1);
			
 
				+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_user_enter() invoked with irqs enabled!!!");
			
 
				+	rcu_eqs_enter(true);
			
 
				 }
			
 
				 #endif /* CONFIG_NO_HZ_FULL */
			
 
				 
			
@@ -955,8 +949,10 @@ static void rcu_eqs_exit(bool user)
 
				 	if (oldval & DYNTICK_TASK_NEST_MASK) {
			
 
				 		rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
			
 
				 	} else {
			
 
				+		__this_cpu_inc(disable_rcu_irq_enter);
			
 
				 		rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
			
 
				 		rcu_eqs_exit_common(oldval, user);
			
 
				+		__this_cpu_dec(disable_rcu_irq_enter);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -979,7 +975,6 @@ void rcu_idle_exit(void)
 
				 	rcu_eqs_exit(false);
			
 
				 	local_irq_restore(flags);
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(rcu_idle_exit);
			
 
				 
			
 
				 #ifdef CONFIG_NO_HZ_FULL
			
 
				 /**
			
@@ -1358,12 +1353,13 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
 
				 	j = jiffies;
			
 
				 	gpa = READ_ONCE(rsp->gp_activity);
			
 
				 	if (j - gpa > 2 * HZ) {
			
 
				-		pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n",
			
 
				+		pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
			
 
				 		       rsp->name, j - gpa,
			
 
				 		       rsp->gpnum, rsp->completed,
			
 
				 		       rsp->gp_flags,
			
 
				 		       gp_state_getname(rsp->gp_state), rsp->gp_state,
			
 
				-		       rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
			
 
				+		       rsp->gp_kthread ? rsp->gp_kthread->state : ~0,
			
 
				+		       rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1);
			
 
				 		if (rsp->gp_kthread) {
			
 
				 			sched_show_task(rsp->gp_kthread);
			
 
				 			wake_up_process(rsp->gp_kthread);
			
@@ -2067,8 +2063,8 @@ static bool rcu_gp_init(struct rcu_state *rsp)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Helper function for wait_event_interruptible_timeout() wakeup
			
 
				- * at force-quiescent-state time.
			
 
				+ * Helper function for swait_event_idle() wakeup at force-quiescent-state
			
 
				+ * time.
			
 
				  */
			
 
				 static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
			
 
				 {
			
@@ -2206,9 +2202,8 @@ static int __noreturn rcu_gp_kthread(void *arg)
 
				 					       READ_ONCE(rsp->gpnum),
			
 
				 					       TPS("reqwait"));
			
 
				 			rsp->gp_state = RCU_GP_WAIT_GPS;
			
 
				-			swait_event_interruptible(rsp->gp_wq,
			
 
				-						 READ_ONCE(rsp->gp_flags) &
			
 
				-						 RCU_GP_FLAG_INIT);
			
 
				+			swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
			
 
				+						     RCU_GP_FLAG_INIT);
			
 
				 			rsp->gp_state = RCU_GP_DONE_GPS;
			
 
				 			/* Locking provides needed memory barrier. */
			
 
				 			if (rcu_gp_init(rsp))
			
@@ -2239,7 +2234,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 
				 					       READ_ONCE(rsp->gpnum),
			
 
				 					       TPS("fqswait"));
			
 
				 			rsp->gp_state = RCU_GP_WAIT_FQS;
			
 
				-			ret = swait_event_interruptible_timeout(rsp->gp_wq,
			
 
				+			ret = swait_event_idle_timeout(rsp->gp_wq,
			
 
				 					rcu_gp_fqs_check_wake(rsp, &gf), j);
			
 
				 			rsp->gp_state = RCU_GP_DOING_FQS;
			
 
				 			/* Locking provides needed memory barriers. */
			
@@ -2409,6 +2404,8 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
 
				 			return;
			
 
				 		}
			
 
				 		WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
			
 
				+		WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 &&
			
 
				+			     rcu_preempt_blocked_readers_cgp(rnp));
			
 
				 		rnp->qsmask &= ~mask;
			
 
				 		trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
			
 
				 						 mask, rnp->qsmask, rnp->level,
			
@@ -2562,85 +2559,6 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
 
				 	rcu_report_qs_rdp(rdp->cpu, rsp, rdp);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Send the specified CPU's RCU callbacks to the orphanage.  The
			
 
				- * specified CPU must be offline, and the caller must hold the
			
 
				- * ->orphan_lock.
			
 
				- */
			
 
				-static void
			
 
				-rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
			
 
				-			  struct rcu_node *rnp, struct rcu_data *rdp)
			
 
				-{
			
 
				-	lockdep_assert_held(&rsp->orphan_lock);
			
 
				-
			
 
				-	/* No-CBs CPUs do not have orphanable callbacks. */
			
 
				-	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu))
			
 
				-		return;
			
 
				-
			
 
				-	/*
			
 
				-	 * Orphan the callbacks.  First adjust the counts.  This is safe
			
 
				-	 * because _rcu_barrier() excludes CPU-hotplug operations, so it
			
 
				-	 * cannot be running now.  Thus no memory barrier is required.
			
 
				-	 */
			
 
				-	rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist);
			
 
				-	rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done);
			
 
				-
			
 
				-	/*
			
 
				-	 * Next, move those callbacks still needing a grace period to
			
 
				-	 * the orphanage, where some other CPU will pick them up.
			
 
				-	 * Some of the callbacks might have gone partway through a grace
			
 
				-	 * period, but that is too bad.  They get to start over because we
			
 
				-	 * cannot assume that grace periods are synchronized across CPUs.
			
 
				-	 */
			
 
				-	rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
			
 
				-
			
 
				-	/*
			
 
				-	 * Then move the ready-to-invoke callbacks to the orphanage,
			
 
				-	 * where some other CPU will pick them up.  These will not be
			
 
				-	 * required to pass though another grace period: They are done.
			
 
				-	 */
			
 
				-	rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done);
			
 
				-
			
 
				-	/* Finally, disallow further callbacks on this CPU.  */
			
 
				-	rcu_segcblist_disable(&rdp->cblist);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Adopt the RCU callbacks from the specified rcu_state structure's
			
 
				- * orphanage.  The caller must hold the ->orphan_lock.
			
 
				- */
			
 
				-static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
			
 
				-{
			
 
				-	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
			
 
				-
			
 
				-	lockdep_assert_held(&rsp->orphan_lock);
			
 
				-
			
 
				-	/* No-CBs CPUs are handled specially. */
			
 
				-	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
			
 
				-	    rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
			
 
				-		return;
			
 
				-
			
 
				-	/* Do the accounting first. */
			
 
				-	rdp->n_cbs_adopted += rsp->orphan_done.len;
			
 
				-	if (rsp->orphan_done.len_lazy != rsp->orphan_done.len)
			
 
				-		rcu_idle_count_callbacks_posted();
			
 
				-	rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done);
			
 
				-
			
 
				-	/*
			
 
				-	 * We do not need a memory barrier here because the only way we
			
 
				-	 * can get here if there is an rcu_barrier() in flight is if
			
 
				-	 * we are the task doing the rcu_barrier().
			
 
				-	 */
			
 
				-
			
 
				-	/* First adopt the ready-to-invoke callbacks, then the done ones. */
			
 
				-	rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done);
			
 
				-	WARN_ON_ONCE(rsp->orphan_done.head);
			
 
				-	rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
			
 
				-	WARN_ON_ONCE(rsp->orphan_pend.head);
			
 
				-	WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) !=
			
 
				-		     !rcu_segcblist_n_cbs(&rdp->cblist));
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Trace the fact that this CPU is going offline.
			
 
				  */
			
@@ -2704,14 +2622,12 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
 
				 
			
 
				 /*
			
 
				  * The CPU has been completely removed, and some other CPU is reporting
			
 
				- * this fact from process context.  Do the remainder of the cleanup,
			
 
				- * including orphaning the outgoing CPU's RCU callbacks, and also
			
 
				- * adopting them.  There can only be one CPU hotplug operation at a time,
			
 
				- * so no other CPU can be attempting to update rcu_cpu_kthread_task.
			
 
				+ * this fact from process context.  Do the remainder of the cleanup.
			
 
				+ * There can only be one CPU hotplug operation at a time, so no need for
			
 
				+ * explicit locking.
			
 
				  */
			
 
				 static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
			
 
				 {
			
 
				-	unsigned long flags;
			
 
				 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
			
 
				 	struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
			
 
				 
			
@@ -2720,18 +2636,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 
				 
			
 
				 	/* Adjust any no-longer-needed kthreads. */
			
 
				 	rcu_boost_kthread_setaffinity(rnp, -1);
			
 
				-
			
 
				-	/* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
			
 
				-	raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
			
 
				-	rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
			
 
				-	rcu_adopt_orphan_cbs(rsp, flags);
			
 
				-	raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
			
 
				-
			
 
				-	WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
			
 
				-		  !rcu_segcblist_empty(&rdp->cblist),
			
 
				-		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
			
 
				-		  cpu, rcu_segcblist_n_cbs(&rdp->cblist),
			
 
				-		  rcu_segcblist_first_cb(&rdp->cblist));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3569,10 +3473,11 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
 
				 	struct rcu_state *rsp = rdp->rsp;
			
 
				 
			
 
				 	if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
			
 
				-		_rcu_barrier_trace(rsp, "LastCB", -1, rsp->barrier_sequence);
			
 
				+		_rcu_barrier_trace(rsp, TPS("LastCB"), -1,
			
 
				+				   rsp->barrier_sequence);
			
 
				 		complete(&rsp->barrier_completion);
			
 
				 	} else {
			
 
				-		_rcu_barrier_trace(rsp, "CB", -1, rsp->barrier_sequence);
			
 
				+		_rcu_barrier_trace(rsp, TPS("CB"), -1, rsp->barrier_sequence);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -3584,14 +3489,15 @@ static void rcu_barrier_func(void *type)
 
				 	struct rcu_state *rsp = type;
			
 
				 	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
			
 
				 
			
 
				-	_rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence);
			
 
				+	_rcu_barrier_trace(rsp, TPS("IRQ"), -1, rsp->barrier_sequence);
			
 
				 	rdp->barrier_head.func = rcu_barrier_callback;
			
 
				 	debug_rcu_head_queue(&rdp->barrier_head);
			
 
				 	if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
			
 
				 		atomic_inc(&rsp->barrier_cpu_count);
			
 
				 	} else {
			
 
				 		debug_rcu_head_unqueue(&rdp->barrier_head);
			
 
				-		_rcu_barrier_trace(rsp, "IRQNQ", -1, rsp->barrier_sequence);
			
 
				+		_rcu_barrier_trace(rsp, TPS("IRQNQ"), -1,
			
 
				+				   rsp->barrier_sequence);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -3605,14 +3511,15 @@ static void _rcu_barrier(struct rcu_state *rsp)
 
				 	struct rcu_data *rdp;
			
 
				 	unsigned long s = rcu_seq_snap(&rsp->barrier_sequence);
			
 
				 
			
 
				-	_rcu_barrier_trace(rsp, "Begin", -1, s);
			
 
				+	_rcu_barrier_trace(rsp, TPS("Begin"), -1, s);
			
 
				 
			
 
				 	/* Take mutex to serialize concurrent rcu_barrier() requests. */
			
 
				 	mutex_lock(&rsp->barrier_mutex);
			
 
				 
			
 
				 	/* Did someone else do our work for us? */
			
 
				 	if (rcu_seq_done(&rsp->barrier_sequence, s)) {
			
 
				-		_rcu_barrier_trace(rsp, "EarlyExit", -1, rsp->barrier_sequence);
			
 
				+		_rcu_barrier_trace(rsp, TPS("EarlyExit"), -1,
			
 
				+				   rsp->barrier_sequence);
			
 
				 		smp_mb(); /* caller's subsequent code after above check. */
			
 
				 		mutex_unlock(&rsp->barrier_mutex);
			
 
				 		return;
			
@@ -3620,7 +3527,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 
				 
			
 
				 	/* Mark the start of the barrier operation. */
			
 
				 	rcu_seq_start(&rsp->barrier_sequence);
			
 
				-	_rcu_barrier_trace(rsp, "Inc1", -1, rsp->barrier_sequence);
			
 
				+	_rcu_barrier_trace(rsp, TPS("Inc1"), -1, rsp->barrier_sequence);
			
 
				 
			
 
				 	/*
			
 
				 	 * Initialize the count to one rather than to zero in order to
			
@@ -3643,10 +3550,10 @@ static void _rcu_barrier(struct rcu_state *rsp)
 
				 		rdp = per_cpu_ptr(rsp->rda, cpu);
			
 
				 		if (rcu_is_nocb_cpu(cpu)) {
			
 
				 			if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) {
			
 
				-				_rcu_barrier_trace(rsp, "OfflineNoCB", cpu,
			
 
				+				_rcu_barrier_trace(rsp, TPS("OfflineNoCB"), cpu,
			
 
				 						   rsp->barrier_sequence);
			
 
				 			} else {
			
 
				-				_rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
			
 
				+				_rcu_barrier_trace(rsp, TPS("OnlineNoCB"), cpu,
			
 
				 						   rsp->barrier_sequence);
			
 
				 				smp_mb__before_atomic();
			
 
				 				atomic_inc(&rsp->barrier_cpu_count);
			
@@ -3654,11 +3561,11 @@ static void _rcu_barrier(struct rcu_state *rsp)
 
				 					   rcu_barrier_callback, rsp, cpu, 0);
			
 
				 			}
			
 
				 		} else if (rcu_segcblist_n_cbs(&rdp->cblist)) {
			
 
				-			_rcu_barrier_trace(rsp, "OnlineQ", cpu,
			
 
				+			_rcu_barrier_trace(rsp, TPS("OnlineQ"), cpu,
			
 
				 					   rsp->barrier_sequence);
			
 
				 			smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
			
 
				 		} else {
			
 
				-			_rcu_barrier_trace(rsp, "OnlineNQ", cpu,
			
 
				+			_rcu_barrier_trace(rsp, TPS("OnlineNQ"), cpu,
			
 
				 					   rsp->barrier_sequence);
			
 
				 		}
			
 
				 	}
			
@@ -3675,7 +3582,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 
				 	wait_for_completion(&rsp->barrier_completion);
			
 
				 
			
 
				 	/* Mark the end of the barrier operation. */
			
 
				-	_rcu_barrier_trace(rsp, "Inc2", -1, rsp->barrier_sequence);
			
 
				+	_rcu_barrier_trace(rsp, TPS("Inc2"), -1, rsp->barrier_sequence);
			
 
				 	rcu_seq_end(&rsp->barrier_sequence);
			
 
				 
			
 
				 	/* Other rcu_barrier() invocations can now safely proceed. */
			
@@ -3777,8 +3684,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 
				 	 */
			
 
				 	rnp = rdp->mynode;
			
 
				 	raw_spin_lock_rcu_node(rnp);		/* irqs already disabled. */
			
 
				-	if (!rdp->beenonline)
			
 
				-		WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
			
 
				 	rdp->beenonline = true;	 /* We have now been online. */
			
 
				 	rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
			
 
				 	rdp->completed = rnp->completed;
			
@@ -3882,6 +3787,8 @@ void rcu_cpu_starting(unsigned int cpu)
 
				 {
			
 
				 	unsigned long flags;
			
 
				 	unsigned long mask;
			
 
				+	int nbits;
			
 
				+	unsigned long oldmask;
			
 
				 	struct rcu_data *rdp;
			
 
				 	struct rcu_node *rnp;
			
 
				 	struct rcu_state *rsp;
			
@@ -3892,9 +3799,15 @@ void rcu_cpu_starting(unsigned int cpu)
 
				 		mask = rdp->grpmask;
			
 
				 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
			
 
				 		rnp->qsmaskinitnext |= mask;
			
 
				+		oldmask = rnp->expmaskinitnext;
			
 
				 		rnp->expmaskinitnext |= mask;
			
 
				+		oldmask ^= rnp->expmaskinitnext;
			
 
				+		nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
			
 
				+		/* Allow lockless access for expedited grace periods. */
			
 
				+		smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */
			
 
				 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
			
 
				 	}
			
 
				+	smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_HOTPLUG_CPU
			
@@ -3937,6 +3850,50 @@ void rcu_report_dead(unsigned int cpu)
 
				 	for_each_rcu_flavor(rsp)
			
 
				 		rcu_cleanup_dying_idle_cpu(cpu, rsp);
			
 
				 }
			
 
				+
			
 
				+/* Migrate the dead CPU's callbacks to the current CPU. */
			
 
				+static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+	struct rcu_data *my_rdp;
			
 
				+	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
			
 
				+	struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
			
 
				+
			
 
				+	if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))
			
 
				+		return;  /* No callbacks to migrate. */
			
 
				+
			
 
				+	local_irq_save(flags);
			
 
				+	my_rdp = this_cpu_ptr(rsp->rda);
			
 
				+	if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) {
			
 
				+		local_irq_restore(flags);
			
 
				+		return;
			
 
				+	}
			
 
				+	raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
			
 
				+	rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */
			
 
				+	rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */
			
 
				+	rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
			
 
				+	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
			
 
				+		     !rcu_segcblist_n_cbs(&my_rdp->cblist));
			
 
				+	raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags);
			
 
				+	WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
			
 
				+		  !rcu_segcblist_empty(&rdp->cblist),
			
 
				+		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
			
 
				+		  cpu, rcu_segcblist_n_cbs(&rdp->cblist),
			
 
				+		  rcu_segcblist_first_cb(&rdp->cblist));
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The outgoing CPU has just passed through the dying-idle state,
			
 
				+ * and we are being invoked from the CPU that was IPIed to continue the
			
 
				+ * offline operation.  We need to migrate the outgoing CPU's callbacks.
			
 
				+ */
			
 
				+void rcutree_migrate_callbacks(int cpu)
			
 
				+{
			
 
				+	struct rcu_state *rsp;
			
 
				+
			
 
				+	for_each_rcu_flavor(rsp)
			
 
				+		rcu_migrate_callbacks(cpu, rsp);
			
 
				+}
			
 
				 #endif
			
 
				 
			
 
				 /*
			
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -219,8 +219,6 @@ struct rcu_data {
 
				 					/* qlen at last check for QS forcing */
			
 
				 	unsigned long	n_cbs_invoked;	/* count of RCU cbs invoked. */
			
 
				 	unsigned long	n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */
			
 
				-	unsigned long   n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
			
 
				-	unsigned long   n_cbs_adopted;  /* RCU cbs adopted from dying CPU */
			
 
				 	unsigned long	n_force_qs_snap;
			
 
				 					/* did other CPU force QS recently? */
			
 
				 	long		blimit;		/* Upper limit on a processed batch */
			
@@ -268,7 +266,9 @@ struct rcu_data {
 
				 	struct rcu_head **nocb_follower_tail;
			
 
				 	struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */
			
 
				 	struct task_struct *nocb_kthread;
			
 
				+	raw_spinlock_t nocb_lock;	/* Guard following pair of fields. */
			
 
				 	int nocb_defer_wakeup;		/* Defer wakeup of nocb_kthread. */
			
 
				+	struct timer_list nocb_timer;	/* Enforce finite deferral. */
			
 
				 
			
 
				 	/* The following fields are used by the leader, hence own cacheline. */
			
 
				 	struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp;
			
@@ -350,15 +350,6 @@ struct rcu_state {
 
				 
			
 
				 	/* End of fields guarded by root rcu_node's lock. */
			
 
				 
			
 
				-	raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp;
			
 
				-						/* Protect following fields. */
			
 
				-	struct rcu_cblist orphan_pend;		/* Orphaned callbacks that */
			
 
				-						/*  need a grace period. */
			
 
				-	struct rcu_cblist orphan_done;		/* Orphaned callbacks that */
			
 
				-						/*  are ready to invoke. */
			
 
				-						/* (Contains counts.) */
			
 
				-	/* End of fields guarded by orphan_lock. */
			
 
				-
			
 
				 	struct mutex barrier_mutex;		/* Guards barrier fields. */
			
 
				 	atomic_t barrier_cpu_count;		/* # CPUs waiting on. */
			
 
				 	struct completion barrier_completion;	/* Wake at barrier end. */
			
@@ -495,7 +486,7 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
 
				 static void rcu_init_one_nocb(struct rcu_node *rnp);
			
 
				 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
			
 
				 			    bool lazy, unsigned long flags);
			
 
				-static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
			
 
				+static bool rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
			
 
				 				      struct rcu_data *rdp,
			
 
				 				      unsigned long flags);
			
 
				 static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
			
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -73,7 +73,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
 
				 	unsigned long flags;
			
 
				 	unsigned long mask;
			
 
				 	unsigned long oldmask;
			
 
				-	int ncpus = READ_ONCE(rsp->ncpus);
			
 
				+	int ncpus = smp_load_acquire(&rsp->ncpus); /* Order against locking. */
			
 
				 	struct rcu_node *rnp;
			
 
				 	struct rcu_node *rnp_up;
			
 
				 
			
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -180,6 +180,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
 
				 	struct task_struct *t = current;
			
 
				 
			
 
				 	lockdep_assert_held(&rnp->lock);
			
 
				+	WARN_ON_ONCE(rdp->mynode != rnp);
			
 
				+	WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
			
 
				 
			
 
				 	/*
			
 
				 	 * Decide where to queue the newly blocked task.  In theory,
			
@@ -261,6 +263,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
 
				 		rnp->gp_tasks = &t->rcu_node_entry;
			
 
				 	if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
			
 
				 		rnp->exp_tasks = &t->rcu_node_entry;
			
 
				+	WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
			
 
				+		     !(rnp->qsmask & rdp->grpmask));
			
 
				+	WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
			
 
				+		     !(rnp->expmask & rdp->grpmask));
			
 
				 	raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */
			
 
				 
			
 
				 	/*
			
@@ -482,6 +488,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 
				 		rnp = t->rcu_blocked_node;
			
 
				 		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
			
 
				 		WARN_ON_ONCE(rnp != t->rcu_blocked_node);
			
 
				+		WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
			
 
				 		empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
			
 
				 		empty_exp = sync_rcu_preempt_exp_done(rnp);
			
 
				 		smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
			
@@ -495,10 +502,10 @@ void rcu_read_unlock_special(struct task_struct *t)
 
				 		if (&t->rcu_node_entry == rnp->exp_tasks)
			
 
				 			rnp->exp_tasks = np;
			
 
				 		if (IS_ENABLED(CONFIG_RCU_BOOST)) {
			
 
				-			if (&t->rcu_node_entry == rnp->boost_tasks)
			
 
				-				rnp->boost_tasks = np;
			
 
				 			/* Snapshot ->boost_mtx ownership w/rnp->lock held. */
			
 
				 			drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
			
 
				+			if (&t->rcu_node_entry == rnp->boost_tasks)
			
 
				+				rnp->boost_tasks = np;
			
 
				 		}
			
 
				 
			
 
				 		/*
			
@@ -636,10 +643,17 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
 
				  */
			
 
				 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
			
 
				 {
			
 
				+	struct task_struct *t;
			
 
				+
			
 
				 	RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
			
 
				 	WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
			
 
				-	if (rcu_preempt_has_tasks(rnp))
			
 
				+	if (rcu_preempt_has_tasks(rnp)) {
			
 
				 		rnp->gp_tasks = rnp->blkd_tasks.next;
			
 
				+		t = container_of(rnp->gp_tasks, struct task_struct,
			
 
				+				 rcu_node_entry);
			
 
				+		trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
			
 
				+						rnp->gpnum, t->pid);
			
 
				+	}
			
 
				 	WARN_ON_ONCE(rnp->qsmask);
			
 
				 }
			
 
				 
			
@@ -1788,22 +1802,61 @@ bool rcu_is_nocb_cpu(int cpu)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Kick the leader kthread for this NOCB group.
			
 
				+ * Kick the leader kthread for this NOCB group.  Caller holds ->nocb_lock
			
 
				+ * and this function releases it.
			
 
				  */
			
 
				-static void wake_nocb_leader(struct rcu_data *rdp, bool force)
			
 
				+static void __wake_nocb_leader(struct rcu_data *rdp, bool force,
			
 
				+			       unsigned long flags)
			
 
				+	__releases(rdp->nocb_lock)
			
 
				 {
			
 
				 	struct rcu_data *rdp_leader = rdp->nocb_leader;
			
 
				 
			
 
				-	if (!READ_ONCE(rdp_leader->nocb_kthread))
			
 
				+	lockdep_assert_held(&rdp->nocb_lock);
			
 
				+	if (!READ_ONCE(rdp_leader->nocb_kthread)) {
			
 
				+		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
			
 
				 		return;
			
 
				-	if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
			
 
				+	}
			
 
				+	if (rdp_leader->nocb_leader_sleep || force) {
			
 
				 		/* Prior smp_mb__after_atomic() orders against prior enqueue. */
			
 
				 		WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
			
 
				+		del_timer(&rdp->nocb_timer);
			
 
				+		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
			
 
				 		smp_mb(); /* ->nocb_leader_sleep before swake_up(). */
			
 
				 		swake_up(&rdp_leader->nocb_wq);
			
 
				+	} else {
			
 
				+		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Kick the leader kthread for this NOCB group, but caller has not
			
 
				+ * acquired locks.
			
 
				+ */
			
 
				+static void wake_nocb_leader(struct rcu_data *rdp, bool force)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
			
 
				+	__wake_nocb_leader(rdp, force, flags);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Arrange to wake the leader kthread for this NOCB group at some
			
 
				+ * future time when it is safe to do so.
			
 
				+ */
			
 
				+static void wake_nocb_leader_defer(struct rcu_data *rdp, int waketype,
			
 
				+				   const char *reason)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
			
 
				+	if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT)
			
 
				+		mod_timer(&rdp->nocb_timer, jiffies + 1);
			
 
				+	WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);
			
 
				+	trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, reason);
			
 
				+	raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Does the specified CPU need an RCU callback for the specified flavor
			
 
				  * of rcu_barrier()?
			
@@ -1891,11 +1944,8 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
 
				 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
			
 
				 					    TPS("WakeEmpty"));
			
 
				 		} else {
			
 
				-			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE);
			
 
				-			/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
			
 
				-			smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
			
 
				-			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
			
 
				-					    TPS("WakeEmptyIsDeferred"));
			
 
				+			wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,
			
 
				+					       TPS("WakeEmptyIsDeferred"));
			
 
				 		}
			
 
				 		rdp->qlen_last_fqs_check = 0;
			
 
				 	} else if (len > rdp->qlen_last_fqs_check + qhimark) {
			
@@ -1905,11 +1955,8 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
 
				 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
			
 
				 					    TPS("WakeOvf"));
			
 
				 		} else {
			
 
				-			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_FORCE);
			
 
				-			/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
			
 
				-			smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
			
 
				-			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
			
 
				-					    TPS("WakeOvfIsDeferred"));
			
 
				+			wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,
			
 
				+					       TPS("WakeOvfIsDeferred"));
			
 
				 		}
			
 
				 		rdp->qlen_last_fqs_check = LONG_MAX / 2;
			
 
				 	} else {
			
@@ -1961,30 +2008,19 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
 
				  * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is
			
 
				  * not a no-CBs CPU.
			
 
				  */
			
 
				-static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
			
 
				+static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
			
 
				 						     struct rcu_data *rdp,
			
 
				 						     unsigned long flags)
			
 
				 {
			
 
				-	long ql = rsp->orphan_done.len;
			
 
				-	long qll = rsp->orphan_done.len_lazy;
			
 
				-
			
 
				-	/* If this is not a no-CBs CPU, tell the caller to do it the old way. */
			
 
				+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_nocb_adopt_orphan_cbs() invoked with irqs enabled!!!");
			
 
				 	if (!rcu_is_nocb_cpu(smp_processor_id()))
			
 
				-		return false;
			
 
				-
			
 
				-	/* First, enqueue the donelist, if any.  This preserves CB ordering. */
			
 
				-	if (rsp->orphan_done.head) {
			
 
				-		__call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_done),
			
 
				-					rcu_cblist_tail(&rsp->orphan_done),
			
 
				-					ql, qll, flags);
			
 
				-	}
			
 
				-	if (rsp->orphan_pend.head) {
			
 
				-		__call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_pend),
			
 
				-					rcu_cblist_tail(&rsp->orphan_pend),
			
 
				-					ql, qll, flags);
			
 
				-	}
			
 
				-	rcu_cblist_init(&rsp->orphan_done);
			
 
				-	rcu_cblist_init(&rsp->orphan_pend);
			
 
				+		return false; /* Not NOCBs CPU, caller must migrate CBs. */
			
 
				+	__call_rcu_nocb_enqueue(my_rdp, rcu_segcblist_head(&rdp->cblist),
			
 
				+				rcu_segcblist_tail(&rdp->cblist),
			
 
				+				rcu_segcblist_n_cbs(&rdp->cblist),
			
 
				+				rcu_segcblist_n_lazy_cbs(&rdp->cblist), flags);
			
 
				+	rcu_segcblist_init(&rdp->cblist);
			
 
				+	rcu_segcblist_disable(&rdp->cblist);
			
 
				 	return true;
			
 
				 }
			
 
				 
			
@@ -2031,6 +2067,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
 
				 static void nocb_leader_wait(struct rcu_data *my_rdp)
			
 
				 {
			
 
				 	bool firsttime = true;
			
 
				+	unsigned long flags;
			
 
				 	bool gotcbs;
			
 
				 	struct rcu_data *rdp;
			
 
				 	struct rcu_head **tail;
			
@@ -2039,13 +2076,17 @@ wait_again:
 
				 
			
 
				 	/* Wait for callbacks to appear. */
			
 
				 	if (!rcu_nocb_poll) {
			
 
				-		trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
			
 
				+		trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep"));
			
 
				 		swait_event_interruptible(my_rdp->nocb_wq,
			
 
				 				!READ_ONCE(my_rdp->nocb_leader_sleep));
			
 
				-		/* Memory barrier handled by smp_mb() calls below and repoll. */
			
 
				+		raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
			
 
				+		my_rdp->nocb_leader_sleep = true;
			
 
				+		WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
			
 
				+		del_timer(&my_rdp->nocb_timer);
			
 
				+		raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
			
 
				 	} else if (firsttime) {
			
 
				 		firsttime = false; /* Don't drown trace log with "Poll"! */
			
 
				-		trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Poll");
			
 
				+		trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Poll"));
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -2054,7 +2095,7 @@ wait_again:
 
				 	 * nocb_gp_head, where they await a grace period.
			
 
				 	 */
			
 
				 	gotcbs = false;
			
 
				-	smp_mb(); /* wakeup before ->nocb_head reads. */
			
 
				+	smp_mb(); /* wakeup and _sleep before ->nocb_head reads. */
			
 
				 	for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
			
 
				 		rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);
			
 
				 		if (!rdp->nocb_gp_head)
			
@@ -2066,56 +2107,41 @@ wait_again:
 
				 		gotcbs = true;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * If there were no callbacks, sleep a bit, rescan after a
			
 
				-	 * memory barrier, and go retry.
			
 
				-	 */
			
 
				+	/* No callbacks?  Sleep a bit if polling, and go retry.  */
			
 
				 	if (unlikely(!gotcbs)) {
			
 
				-		if (!rcu_nocb_poll)
			
 
				-			trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
			
 
				-					    "WokeEmpty");
			
 
				 		WARN_ON(signal_pending(current));
			
 
				-		schedule_timeout_interruptible(1);
			
 
				-
			
 
				-		/* Rescan in case we were a victim of memory ordering. */
			
 
				-		my_rdp->nocb_leader_sleep = true;
			
 
				-		smp_mb();  /* Ensure _sleep true before scan. */
			
 
				-		for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower)
			
 
				-			if (READ_ONCE(rdp->nocb_head)) {
			
 
				-				/* Found CB, so short-circuit next wait. */
			
 
				-				my_rdp->nocb_leader_sleep = false;
			
 
				-				break;
			
 
				-			}
			
 
				+		if (rcu_nocb_poll) {
			
 
				+			schedule_timeout_interruptible(1);
			
 
				+		} else {
			
 
				+			trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
			
 
				+					    TPS("WokeEmpty"));
			
 
				+		}
			
 
				 		goto wait_again;
			
 
				 	}
			
 
				 
			
 
				 	/* Wait for one grace period. */
			
 
				 	rcu_nocb_wait_gp(my_rdp);
			
 
				 
			
 
				-	/*
			
 
				-	 * We left ->nocb_leader_sleep unset to reduce cache thrashing.
			
 
				-	 * We set it now, but recheck for new callbacks while
			
 
				-	 * traversing our follower list.
			
 
				-	 */
			
 
				-	my_rdp->nocb_leader_sleep = true;
			
 
				-	smp_mb(); /* Ensure _sleep true before scan of ->nocb_head. */
			
 
				-
			
 
				 	/* Each pass through the following loop wakes a follower, if needed. */
			
 
				 	for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
			
 
				-		if (READ_ONCE(rdp->nocb_head))
			
 
				+		if (!rcu_nocb_poll &&
			
 
				+		    READ_ONCE(rdp->nocb_head) &&
			
 
				+		    READ_ONCE(my_rdp->nocb_leader_sleep)) {
			
 
				+			raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
			
 
				 			my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/
			
 
				+			raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
			
 
				+		}
			
 
				 		if (!rdp->nocb_gp_head)
			
 
				 			continue; /* No CBs, so no need to wake follower. */
			
 
				 
			
 
				 		/* Append callbacks to follower's "done" list. */
			
 
				-		tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail);
			
 
				+		raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
			
 
				+		tail = rdp->nocb_follower_tail;
			
 
				+		rdp->nocb_follower_tail = rdp->nocb_gp_tail;
			
 
				 		*tail = rdp->nocb_gp_head;
			
 
				-		smp_mb__after_atomic(); /* Store *tail before wakeup. */
			
 
				+		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
			
 
				 		if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
			
 
				-			/*
			
 
				-			 * List was empty, wake up the follower.
			
 
				-			 * Memory barriers supplied by atomic_long_add().
			
 
				-			 */
			
 
				+			/* List was empty, so wake up the follower.  */
			
 
				 			swake_up(&rdp->nocb_wq);
			
 
				 		}
			
 
				 	}
			
@@ -2131,28 +2157,16 @@ wait_again:
 
				  */
			
 
				 static void nocb_follower_wait(struct rcu_data *rdp)
			
 
				 {
			
 
				-	bool firsttime = true;
			
 
				-
			
 
				 	for (;;) {
			
 
				-		if (!rcu_nocb_poll) {
			
 
				-			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
			
 
				-					    "FollowerSleep");
			
 
				-			swait_event_interruptible(rdp->nocb_wq,
			
 
				-						 READ_ONCE(rdp->nocb_follower_head));
			
 
				-		} else if (firsttime) {
			
 
				-			/* Don't drown trace log with "Poll"! */
			
 
				-			firsttime = false;
			
 
				-			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "Poll");
			
 
				-		}
			
 
				+		trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep"));
			
 
				+		swait_event_interruptible(rdp->nocb_wq,
			
 
				+					 READ_ONCE(rdp->nocb_follower_head));
			
 
				 		if (smp_load_acquire(&rdp->nocb_follower_head)) {
			
 
				 			/* ^^^ Ensure CB invocation follows _head test. */
			
 
				 			return;
			
 
				 		}
			
 
				-		if (!rcu_nocb_poll)
			
 
				-			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
			
 
				-					    "WokeEmpty");
			
 
				 		WARN_ON(signal_pending(current));
			
 
				-		schedule_timeout_interruptible(1);
			
 
				+		trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeEmpty"));
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -2165,6 +2179,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
 
				 static int rcu_nocb_kthread(void *arg)
			
 
				 {
			
 
				 	int c, cl;
			
 
				+	unsigned long flags;
			
 
				 	struct rcu_head *list;
			
 
				 	struct rcu_head *next;
			
 
				 	struct rcu_head **tail;
			
@@ -2179,11 +2194,14 @@ static int rcu_nocb_kthread(void *arg)
 
				 			nocb_follower_wait(rdp);
			
 
				 
			
 
				 		/* Pull the ready-to-invoke callbacks onto local list. */
			
 
				-		list = READ_ONCE(rdp->nocb_follower_head);
			
 
				+		raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
			
 
				+		list = rdp->nocb_follower_head;
			
 
				+		rdp->nocb_follower_head = NULL;
			
 
				+		tail = rdp->nocb_follower_tail;
			
 
				+		rdp->nocb_follower_tail = &rdp->nocb_follower_head;
			
 
				+		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
			
 
				 		BUG_ON(!list);
			
 
				-		trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty");
			
 
				-		WRITE_ONCE(rdp->nocb_follower_head, NULL);
			
 
				-		tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head);
			
 
				+		trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeNonEmpty"));
			
 
				 
			
 
				 		/* Each pass through the following loop invokes a callback. */
			
 
				 		trace_rcu_batch_start(rdp->rsp->name,
			
@@ -2226,18 +2244,39 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
 
				 }
			
 
				 
			
 
				 /* Do a deferred wakeup of rcu_nocb_kthread(). */
			
 
				-static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
			
 
				+static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
			
 
				 {
			
 
				+	unsigned long flags;
			
 
				 	int ndw;
			
 
				 
			
 
				-	if (!rcu_nocb_need_deferred_wakeup(rdp))
			
 
				+	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
			
 
				+	if (!rcu_nocb_need_deferred_wakeup(rdp)) {
			
 
				+		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
			
 
				 		return;
			
 
				+	}
			
 
				 	ndw = READ_ONCE(rdp->nocb_defer_wakeup);
			
 
				 	WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
			
 
				-	wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE);
			
 
				+	__wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
			
 
				 	trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
			
 
				 }
			
 
				 
			
 
				+/* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
			
 
				+static void do_nocb_deferred_wakeup_timer(unsigned long x)
			
 
				+{
			
 
				+	do_nocb_deferred_wakeup_common((struct rcu_data *)x);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Do a deferred wakeup of rcu_nocb_kthread() from fastpath.
			
 
				+ * This means we do an inexact common-case check.  Note that if
			
 
				+ * we miss, ->nocb_timer will eventually clean things up.
			
 
				+ */
			
 
				+static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
			
 
				+{
			
 
				+	if (rcu_nocb_need_deferred_wakeup(rdp))
			
 
				+		do_nocb_deferred_wakeup_common(rdp);
			
 
				+}
			
 
				+
			
 
				 void __init rcu_init_nohz(void)
			
 
				 {
			
 
				 	int cpu;
			
@@ -2287,6 +2326,9 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
 
				 	rdp->nocb_tail = &rdp->nocb_head;
			
 
				 	init_swait_queue_head(&rdp->nocb_wq);
			
 
				 	rdp->nocb_follower_tail = &rdp->nocb_follower_head;
			
 
				+	raw_spin_lock_init(&rdp->nocb_lock);
			
 
				+	setup_timer(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer,
			
 
				+		    (unsigned long)rdp);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2459,7 +2501,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				-static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
			
 
				+static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
			
 
				 						     struct rcu_data *rdp,
			
 
				 						     unsigned long flags)
			
 
				 {
			
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -568,7 +568,7 @@ static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
 
				 static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
			
 
				 
			
 
				 /* Track exiting tasks in order to allow them to be waited for. */
			
 
				-DEFINE_SRCU(tasks_rcu_exit_srcu);
			
 
				+DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
			
 
				 
			
 
				 /* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
			
 
				 #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
			
@@ -875,6 +875,22 @@ static void rcu_spawn_tasks_kthread(void)
 
				 	mutex_unlock(&rcu_tasks_kthread_mutex);
			
 
				 }
			
 
				 
			
 
				+/* Do the srcu_read_lock() for the above synchronize_srcu().  */
			
 
				+void exit_tasks_rcu_start(void)
			
 
				+{
			
 
				+	preempt_disable();
			
 
				+	current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
			
 
				+	preempt_enable();
			
 
				+}
			
 
				+
			
 
				+/* Do the srcu_read_unlock() for the above synchronize_srcu().  */
			
 
				+void exit_tasks_rcu_finish(void)
			
 
				+{
			
 
				+	preempt_disable();
			
 
				+	__srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx);
			
 
				+	preempt_enable();
			
 
				+}
			
 
				+
			
 
				 #endif /* #ifdef CONFIG_TASKS_RCU */
			
 
				 
			
 
				 #ifndef CONFIG_TINY_RCU
			
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -25,3 +25,4 @@ obj-$(CONFIG_SCHED_DEBUG) += debug.o
 
				 obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
			
 
				 obj-$(CONFIG_CPU_FREQ) += cpufreq.o
			
 
				 obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
			
 
				+obj-$(CONFIG_MEMBARRIER) += membarrier.o
			
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -300,6 +300,8 @@ EXPORT_SYMBOL(try_wait_for_completion);
 
				  */
			
 
				 bool completion_done(struct completion *x)
			
 
				 {
			
 
				+	unsigned long flags;
			
 
				+
			
 
				 	if (!READ_ONCE(x->done))
			
 
				 		return false;
			
 
				 
			
@@ -307,14 +309,9 @@ bool completion_done(struct completion *x)
 
				 	 * If ->done, we need to wait for complete() to release ->wait.lock
			
 
				 	 * otherwise we can end up freeing the completion before complete()
			
 
				 	 * is done referencing it.
			
 
				-	 *
			
 
				-	 * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
			
 
				-	 * the loads of ->done and ->wait.lock such that we cannot observe
			
 
				-	 * the lock before complete() acquires it while observing the ->done
			
 
				-	 * after it's acquired the lock.
			
 
				 	 */
			
 
				-	smp_rmb();
			
 
				-	spin_unlock_wait(&x->wait.lock);
			
 
				+	spin_lock_irqsave(&x->wait.lock, flags);
			
 
				+	spin_unlock_irqrestore(&x->wait.lock, flags);
			
 
				 	return true;
			
 
				 }
			
 
				 EXPORT_SYMBOL(completion_done);
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -951,8 +951,13 @@ struct migration_arg {
 
				 static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
			
 
				 				 struct task_struct *p, int dest_cpu)
			
 
				 {
			
 
				-	if (unlikely(!cpu_active(dest_cpu)))
			
 
				-		return rq;
			
 
				+	if (p->flags & PF_KTHREAD) {
			
 
				+		if (unlikely(!cpu_online(dest_cpu)))
			
 
				+			return rq;
			
 
				+	} else {
			
 
				+		if (unlikely(!cpu_active(dest_cpu)))
			
 
				+			return rq;
			
 
				+	}
			
 
				 
			
 
				 	/* Affinity changed (again). */
			
 
				 	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
			
@@ -2635,6 +2640,16 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 
				 	prev_state = prev->state;
			
 
				 	vtime_task_switch(prev);
			
 
				 	perf_event_task_sched_in(prev, current);
			
 
				+	/*
			
 
				+	 * The membarrier system call requires a full memory barrier
			
 
				+	 * after storing to rq->curr, before going back to user-space.
			
 
				+	 *
			
 
				+	 * TODO: This smp_mb__after_unlock_lock can go away if PPC end
			
 
				+	 * up adding a full barrier to switch_mm(), or we should figure
			
 
				+	 * out if a smp_mb__after_unlock_lock is really the proper API
			
 
				+	 * to use.
			
 
				+	 */
			
 
				+	smp_mb__after_unlock_lock();
			
 
				 	finish_lock_switch(rq, prev);
			
 
				 	finish_arch_post_lock_switch();
			
 
				 
			
@@ -3324,6 +3339,21 @@ static void __sched notrace __schedule(bool preempt)
 
				 	if (likely(prev != next)) {
			
 
				 		rq->nr_switches++;
			
 
				 		rq->curr = next;
			
 
				+		/*
			
 
				+		 * The membarrier system call requires each architecture
			
 
				+		 * to have a full memory barrier after updating
			
 
				+		 * rq->curr, before returning to user-space. For TSO
			
 
				+		 * (e.g. x86), the architecture must provide its own
			
 
				+		 * barrier in switch_mm(). For weakly ordered machines
			
 
				+		 * for which spin_unlock() acts as a full memory
			
 
				+		 * barrier, finish_lock_switch() in common code takes
			
 
				+		 * care of this barrier. For weakly ordered machines for
			
 
				+		 * which spin_unlock() acts as a RELEASE barrier (only
			
 
				+		 * arm64 and PowerPC), arm64 has a full barrier in
			
 
				+		 * switch_to(), and PowerPC has
			
 
				+		 * smp_mb__after_unlock_lock() before
			
 
				+		 * finish_lock_switch().
			
 
				+		 */
			
 
				 		++*switch_count;
			
 
				 
			
 
				 		trace_sched_switch(preempt, prev, next);
			
@@ -3352,8 +3382,8 @@ void __noreturn do_task_dead(void)
 
				 	 * To avoid it, we have to wait for releasing tsk->pi_lock which
			
 
				 	 * is held by try_to_wake_up()
			
 
				 	 */
			
 
				-	smp_mb();
			
 
				-	raw_spin_unlock_wait(&current->pi_lock);
			
 
				+	raw_spin_lock_irq(&current->pi_lock);
			
 
				+	raw_spin_unlock_irq(&current->pi_lock);
			
 
				 
			
 
				 	/* Causes final put_task_struct in finish_task_switch(): */
			
 
				 	__set_current_state(TASK_DEAD);
			
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -0,0 +1,152 @@
 
				+/*
			
 
				+ * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
			
 
				+ *
			
 
				+ * membarrier system call
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2 of the License, or
			
 
				+ * (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/syscalls.h>
			
 
				+#include <linux/membarrier.h>
			
 
				+#include <linux/tick.h>
			
 
				+#include <linux/cpumask.h>
			
 
				+
			
 
				+#include "sched.h"	/* for cpu_rq(). */
			
 
				+
			
 
				+/*
			
 
				+ * Bitmask made from a "or" of all commands within enum membarrier_cmd,
			
 
				+ * except MEMBARRIER_CMD_QUERY.
			
 
				+ */
			
 
				+#define MEMBARRIER_CMD_BITMASK	\
			
 
				+	(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED)
			
 
				+
			
 
				+static void ipi_mb(void *info)
			
 
				+{
			
 
				+	smp_mb();	/* IPIs should be serializing but paranoid. */
			
 
				+}
			
 
				+
			
 
				+static void membarrier_private_expedited(void)
			
 
				+{
			
 
				+	int cpu;
			
 
				+	bool fallback = false;
			
 
				+	cpumask_var_t tmpmask;
			
 
				+
			
 
				+	if (num_online_cpus() == 1)
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * Matches memory barriers around rq->curr modification in
			
 
				+	 * scheduler.
			
 
				+	 */
			
 
				+	smp_mb();	/* system call entry is not a mb. */
			
 
				+
			
 
				+	/*
			
 
				+	 * Expedited membarrier commands guarantee that they won't
			
 
				+	 * block, hence the GFP_NOWAIT allocation flag and fallback
			
 
				+	 * implementation.
			
 
				+	 */
			
 
				+	if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
			
 
				+		/* Fallback for OOM. */
			
 
				+		fallback = true;
			
 
				+	}
			
 
				+
			
 
				+	cpus_read_lock();
			
 
				+	for_each_online_cpu(cpu) {
			
 
				+		struct task_struct *p;
			
 
				+
			
 
				+		/*
			
 
				+		 * Skipping the current CPU is OK even through we can be
			
 
				+		 * migrated at any point. The current CPU, at the point
			
 
				+		 * where we read raw_smp_processor_id(), is ensured to
			
 
				+		 * be in program order with respect to the caller
			
 
				+		 * thread. Therefore, we can skip this CPU from the
			
 
				+		 * iteration.
			
 
				+		 */
			
 
				+		if (cpu == raw_smp_processor_id())
			
 
				+			continue;
			
 
				+		rcu_read_lock();
			
 
				+		p = task_rcu_dereference(&cpu_rq(cpu)->curr);
			
 
				+		if (p && p->mm == current->mm) {
			
 
				+			if (!fallback)
			
 
				+				__cpumask_set_cpu(cpu, tmpmask);
			
 
				+			else
			
 
				+				smp_call_function_single(cpu, ipi_mb, NULL, 1);
			
 
				+		}
			
 
				+		rcu_read_unlock();
			
 
				+	}
			
 
				+	if (!fallback) {
			
 
				+		smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
			
 
				+		free_cpumask_var(tmpmask);
			
 
				+	}
			
 
				+	cpus_read_unlock();
			
 
				+
			
 
				+	/*
			
 
				+	 * Memory barrier on the caller thread _after_ we finished
			
 
				+	 * waiting for the last IPI. Matches memory barriers around
			
 
				+	 * rq->curr modification in scheduler.
			
 
				+	 */
			
 
				+	smp_mb();	/* exit from system call is not a mb */
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * sys_membarrier - issue memory barriers on a set of threads
			
 
				+ * @cmd:   Takes command values defined in enum membarrier_cmd.
			
 
				+ * @flags: Currently needs to be 0. For future extensions.
			
 
				+ *
			
 
				+ * If this system call is not implemented, -ENOSYS is returned. If the
			
 
				+ * command specified does not exist, not available on the running
			
 
				+ * kernel, or if the command argument is invalid, this system call
			
 
				+ * returns -EINVAL. For a given command, with flags argument set to 0,
			
 
				+ * this system call is guaranteed to always return the same value until
			
 
				+ * reboot.
			
 
				+ *
			
 
				+ * All memory accesses performed in program order from each targeted thread
			
 
				+ * is guaranteed to be ordered with respect to sys_membarrier(). If we use
			
 
				+ * the semantic "barrier()" to represent a compiler barrier forcing memory
			
 
				+ * accesses to be performed in program order across the barrier, and
			
 
				+ * smp_mb() to represent explicit memory barriers forcing full memory
			
 
				+ * ordering across the barrier, we have the following ordering table for
			
 
				+ * each pair of barrier(), sys_membarrier() and smp_mb():
			
 
				+ *
			
 
				+ * The pair ordering is detailed as (O: ordered, X: not ordered):
			
 
				+ *
			
 
				+ *                        barrier()   smp_mb() sys_membarrier()
			
 
				+ *        barrier()          X           X            O
			
 
				+ *        smp_mb()           X           O            O
			
 
				+ *        sys_membarrier()   O           O            O
			
 
				+ */
			
 
				+SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
			
 
				+{
			
 
				+	if (unlikely(flags))
			
 
				+		return -EINVAL;
			
 
				+	switch (cmd) {
			
 
				+	case MEMBARRIER_CMD_QUERY:
			
 
				+	{
			
 
				+		int cmd_mask = MEMBARRIER_CMD_BITMASK;
			
 
				+
			
 
				+		if (tick_nohz_full_enabled())
			
 
				+			cmd_mask &= ~MEMBARRIER_CMD_SHARED;
			
 
				+		return cmd_mask;
			
 
				+	}
			
 
				+	case MEMBARRIER_CMD_SHARED:
			
 
				+		/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
			
 
				+		if (tick_nohz_full_enabled())
			
 
				+			return -EINVAL;
			
 
				+		if (num_online_cpus() > 1)
			
 
				+			synchronize_sched();
			
 
				+		return 0;
			
 
				+	case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
			
 
				+		membarrier_private_expedited();
			
 
				+		return 0;
			
 
				+	default:
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+}
			
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -96,20 +96,16 @@ void task_work_run(void)
 
				 		 * work->func() can do task_work_add(), do not set
			
 
				 		 * work_exited unless the list is empty.
			
 
				 		 */
			
 
				+		raw_spin_lock_irq(&task->pi_lock);
			
 
				 		do {
			
 
				 			work = READ_ONCE(task->task_works);
			
 
				 			head = !work && (task->flags & PF_EXITING) ?
			
 
				 				&work_exited : NULL;
			
 
				 		} while (cmpxchg(&task->task_works, work, head) != work);
			
 
				+		raw_spin_unlock_irq(&task->pi_lock);
			
 
				 
			
 
				 		if (!work)
			
 
				 			break;
			
 
				-		/*
			
 
				-		 * Synchronize with task_work_cancel(). It can't remove
			
 
				-		 * the first entry == work, cmpxchg(task_works) should
			
 
				-		 * fail, but it can play with *work and other entries.
			
 
				-		 */
			
 
				-		raw_spin_unlock_wait(&task->pi_lock);
			
 
				 
			
 
				 		do {
			
 
				 			next = work->next;
			
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -117,7 +117,7 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
 
				 				 torture_type, cpu);
			
 
				 		(*n_offl_successes)++;
			
 
				 		delta = jiffies - starttime;
			
 
				-		sum_offl += delta;
			
 
				+		*sum_offl += delta;
			
 
				 		if (*min_offl < 0) {
			
 
				 			*min_offl = delta;
			
 
				 			*max_offl = delta;
			
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -96,19 +96,26 @@ static struct conntrack_gc_work conntrack_gc_work;
 
				 
			
 
				 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
			
 
				 {
			
 
				+	/* 1) Acquire the lock */
			
 
				 	spin_lock(lock);
			
 
				-	while (unlikely(nf_conntrack_locks_all)) {
			
 
				-		spin_unlock(lock);
			
 
				 
			
 
				-		/*
			
 
				-		 * Order the 'nf_conntrack_locks_all' load vs. the
			
 
				-		 * spin_unlock_wait() loads below, to ensure
			
 
				-		 * that 'nf_conntrack_locks_all_lock' is indeed held:
			
 
				-		 */
			
 
				-		smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
			
 
				-		spin_unlock_wait(&nf_conntrack_locks_all_lock);
			
 
				-		spin_lock(lock);
			
 
				-	}
			
 
				+	/* 2) read nf_conntrack_locks_all, with ACQUIRE semantics
			
 
				+	 * It pairs with the smp_store_release() in nf_conntrack_all_unlock()
			
 
				+	 */
			
 
				+	if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false))
			
 
				+		return;
			
 
				+
			
 
				+	/* fast path failed, unlock */
			
 
				+	spin_unlock(lock);
			
 
				+
			
 
				+	/* Slow path 1) get global lock */
			
 
				+	spin_lock(&nf_conntrack_locks_all_lock);
			
 
				+
			
 
				+	/* Slow path 2) get the lock we want */
			
 
				+	spin_lock(lock);
			
 
				+
			
 
				+	/* Slow path 3) release the global lock */
			
 
				+	spin_unlock(&nf_conntrack_locks_all_lock);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
			
 
				 
			
@@ -149,28 +156,27 @@ static void nf_conntrack_all_lock(void)
 
				 	int i;
			
 
				 
			
 
				 	spin_lock(&nf_conntrack_locks_all_lock);
			
 
				-	nf_conntrack_locks_all = true;
			
 
				 
			
 
				-	/*
			
 
				-	 * Order the above store of 'nf_conntrack_locks_all' against
			
 
				-	 * the spin_unlock_wait() loads below, such that if
			
 
				-	 * nf_conntrack_lock() observes 'nf_conntrack_locks_all'
			
 
				-	 * we must observe nf_conntrack_locks[] held:
			
 
				-	 */
			
 
				-	smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
			
 
				+	nf_conntrack_locks_all = true;
			
 
				 
			
 
				 	for (i = 0; i < CONNTRACK_LOCKS; i++) {
			
 
				-		spin_unlock_wait(&nf_conntrack_locks[i]);
			
 
				+		spin_lock(&nf_conntrack_locks[i]);
			
 
				+
			
 
				+		/* This spin_unlock provides the "release" to ensure that
			
 
				+		 * nf_conntrack_locks_all==true is visible to everyone that
			
 
				+		 * acquired spin_lock(&nf_conntrack_locks[]).
			
 
				+		 */
			
 
				+		spin_unlock(&nf_conntrack_locks[i]);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 static void nf_conntrack_all_unlock(void)
			
 
				 {
			
 
				-	/*
			
 
				-	 * All prior stores must be complete before we clear
			
 
				+	/* All prior stores must be complete before we clear
			
 
				 	 * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
			
 
				 	 * might observe the false value but not the entire
			
 
				-	 * critical section:
			
 
				+	 * critical section.
			
 
				+	 * It pairs with the smp_load_acquire() in nf_conntrack_lock()
			
 
				 	 */
			
 
				 	smp_store_release(&nf_conntrack_locks_all, false);
			
 
				 	spin_unlock(&nf_conntrack_locks_all_lock);
			
--- a/tools/testing/selftests/rcutorture/bin/config_override.sh
+++ b/tools/testing/selftests/rcutorture/bin/config_override.sh
@@ -0,0 +1,61 @@
 
				+#!/bin/bash
			
 
				+#
			
 
				+# config_override.sh base override
			
 
				+#
			
 
				+# Combines base and override, removing any Kconfig options from base
			
 
				+# that conflict with any in override, concatenating what remains and
			
 
				+# sending the result to standard output.
			
 
				+#
			
 
				+# This program is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU General Public License as published by
			
 
				+# the Free Software Foundation; either version 2 of the License, or
			
 
				+# (at your option) any later version.
			
 
				+#
			
 
				+# This program is distributed in the hope that it will be useful,
			
 
				+# but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+# GNU General Public License for more details.
			
 
				+#
			
 
				+# You should have received a copy of the GNU General Public License
			
 
				+# along with this program; if not, you can access it online at
			
 
				+# http://www.gnu.org/licenses/gpl-2.0.html.
			
 
				+#
			
 
				+# Copyright (C) IBM Corporation, 2017
			
 
				+#
			
 
				+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
			
 
				+
			
 
				+base=$1
			
 
				+if test -r $base
			
 
				+then
			
 
				+	:
			
 
				+else
			
 
				+	echo Base file $base unreadable!!!
			
 
				+	exit 1
			
 
				+fi
			
 
				+
			
 
				+override=$2
			
 
				+if test -r $override
			
 
				+then
			
 
				+	:
			
 
				+else
			
 
				+	echo Override file $override unreadable!!!
			
 
				+	exit 1
			
 
				+fi
			
 
				+
			
 
				+T=/tmp/config_override.sh.$$
			
 
				+trap 'rm -rf $T' 0
			
 
				+mkdir $T
			
 
				+
			
 
				+sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' |
			
 
				+	awk '
			
 
				+	{
			
 
				+		if (last)
			
 
				+			print last " |";
			
 
				+		last = $0;
			
 
				+	}
			
 
				+	END {
			
 
				+		if (last)
			
 
				+			print last;
			
 
				+	}' > $T/script
			
 
				+sh $T/script < $base
			
 
				+cat $override
			
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -66,8 +66,33 @@ configfrag_boot_params () {
 
				 
			
 
				 # configfrag_boot_cpus bootparam-string config-fragment-file config-cpus
			
 
				 #
			
 
				-# Decreases number of CPUs based on any maxcpus= boot parameters specified.
			
 
				+# Decreases number of CPUs based on any nr_cpus= boot parameters specified.
			
 
				 configfrag_boot_cpus () {
			
 
				+	local bootargs="`configfrag_boot_params "$1" "$2"`"
			
 
				+	local nr_cpus
			
 
				+	if echo "${bootargs}" | grep -q 'nr_cpus=[0-9]'
			
 
				+	then
			
 
				+		nr_cpus="`echo "${bootargs}" | sed -e 's/^.*nr_cpus=\([0-9]*\).*$/\1/'`"
			
 
				+		if test "$3" -gt "$nr_cpus"
			
 
				+		then
			
 
				+			echo $nr_cpus
			
 
				+		else
			
 
				+			echo $3
			
 
				+		fi
			
 
				+	else
			
 
				+		echo $3
			
 
				+	fi
			
 
				+}
			
 
				+
			
 
				+# configfrag_boot_maxcpus bootparam-string config-fragment-file config-cpus
			
 
				+#
			
 
				+# Decreases number of CPUs based on any maxcpus= boot parameters specified.
			
 
				+# This allows tests where additional CPUs come online later during the
			
 
				+# test run.  However, the torture parameters will be set based on the
			
 
				+# number of CPUs initially present, so the scripting should schedule
			
 
				+# test runs based on the maxcpus= boot parameter controlling the initial
			
 
				+# number of CPUs instead of on the ultimate number of CPUs.
			
 
				+configfrag_boot_maxcpus () {
			
 
				 	local bootargs="`configfrag_boot_params "$1" "$2"`"
			
 
				 	local maxcpus
			
 
				 	if echo "${bootargs}" | grep -q 'maxcpus=[0-9]'
			
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -2,7 +2,7 @@
 
				 #
			
 
				 # Build a kvm-ready Linux kernel from the tree in the current directory.
			
 
				 #
			
 
				-# Usage: kvm-build.sh config-template build-dir more-configs
			
 
				+# Usage: kvm-build.sh config-template build-dir
			
 
				 #
			
 
				 # This program is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU General Public License as published by
			
@@ -34,24 +34,17 @@ then
 
				 	echo "kvm-build.sh :$builddir: Not a writable directory, cannot build into it"
			
 
				 	exit 1
			
 
				 fi
			
 
				-moreconfigs=${3}
			
 
				-if test -z "$moreconfigs" -o ! -r "$moreconfigs"
			
 
				-then
			
 
				-	echo "kvm-build.sh :$moreconfigs: Not a readable file"
			
 
				-	exit 1
			
 
				-fi
			
 
				 
			
 
				 T=/tmp/test-linux.sh.$$
			
 
				 trap 'rm -rf $T' 0
			
 
				 mkdir $T
			
 
				 
			
 
				-grep -v 'CONFIG_[A-Z]*_TORTURE_TEST=' < ${config_template} > $T/config
			
 
				+cp ${config_template} $T/config
			
 
				 cat << ___EOF___ >> $T/config
			
 
				 CONFIG_INITRAMFS_SOURCE="$TORTURE_INITRD"
			
 
				 CONFIG_VIRTIO_PCI=y
			
 
				 CONFIG_VIRTIO_CONSOLE=y
			
 
				 ___EOF___
			
 
				-cat $moreconfigs >> $T/config
			
 
				 
			
 
				 configinit.sh $T/config O=$builddir
			
 
				 retval=$?
			
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -40,7 +40,7 @@
 
				 
			
 
				 T=/tmp/kvm-test-1-run.sh.$$
			
 
				 trap 'rm -rf $T' 0
			
 
				-touch $T
			
 
				+mkdir $T
			
 
				 
			
 
				 . $KVM/bin/functions.sh
			
 
				 . $CONFIGFRAG/ver_functions.sh
			
@@ -60,37 +60,33 @@ then
 
				 	echo "kvm-test-1-run.sh :$resdir: Not a writable directory, cannot store results into it"
			
 
				 	exit 1
			
 
				 fi
			
 
				-cp $config_template $resdir/ConfigFragment
			
 
				 echo ' ---' `date`: Starting build
			
 
				 echo ' ---' Kconfig fragment at: $config_template >> $resdir/log
			
 
				+touch $resdir/ConfigFragment.input $resdir/ConfigFragment
			
 
				 if test -r "$config_dir/CFcommon"
			
 
				 then
			
 
				-	cat < $config_dir/CFcommon >> $T
			
 
				+	echo " --- $config_dir/CFcommon" >> $resdir/ConfigFragment.input
			
 
				+	cat < $config_dir/CFcommon >> $resdir/ConfigFragment.input
			
 
				+	config_override.sh $config_dir/CFcommon $config_template > $T/Kc1
			
 
				+	grep '#CHECK#' $config_dir/CFcommon >> $resdir/ConfigFragment
			
 
				+else
			
 
				+	cp $config_template $T/Kc1
			
 
				 fi
			
 
				-# Optimizations below this point
			
 
				-# CONFIG_USB=n
			
 
				-# CONFIG_SECURITY=n
			
 
				-# CONFIG_NFS_FS=n
			
 
				-# CONFIG_SOUND=n
			
 
				-# CONFIG_INPUT_JOYSTICK=n
			
 
				-# CONFIG_INPUT_TABLET=n
			
 
				-# CONFIG_INPUT_TOUCHSCREEN=n
			
 
				-# CONFIG_INPUT_MISC=n
			
 
				-# CONFIG_INPUT_MOUSE=n
			
 
				-# # CONFIG_NET=n # disables console access, so accept the slower build.
			
 
				-# CONFIG_SCSI=n
			
 
				-# CONFIG_ATA=n
			
 
				-# CONFIG_FAT_FS=n
			
 
				-# CONFIG_MSDOS_FS=n
			
 
				-# CONFIG_VFAT_FS=n
			
 
				-# CONFIG_ISO9660_FS=n
			
 
				-# CONFIG_QUOTA=n
			
 
				-# CONFIG_HID=n
			
 
				-# CONFIG_CRYPTO=n
			
 
				-# CONFIG_PCCARD=n
			
 
				-# CONFIG_PCMCIA=n
			
 
				-# CONFIG_CARDBUS=n
			
 
				-# CONFIG_YENTA=n
			
 
				+echo " --- $config_template" >> $resdir/ConfigFragment.input
			
 
				+cat $config_template >> $resdir/ConfigFragment.input
			
 
				+grep '#CHECK#' $config_template >> $resdir/ConfigFragment
			
 
				+if test -n "$TORTURE_KCONFIG_ARG"
			
 
				+then
			
 
				+	echo $TORTURE_KCONFIG_ARG | tr -s " " "\012" > $T/cmdline
			
 
				+	echo " --- --kconfig argument" >> $resdir/ConfigFragment.input
			
 
				+	cat $T/cmdline >> $resdir/ConfigFragment.input
			
 
				+	config_override.sh $T/Kc1 $T/cmdline > $T/Kc2
			
 
				+	# Note that "#CHECK#" is not permitted on commandline.
			
 
				+else
			
 
				+	cp $T/Kc1 $T/Kc2
			
 
				+fi
			
 
				+cat $T/Kc2 >> $resdir/ConfigFragment
			
 
				+
			
 
				 base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
			
 
				 if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
			
 
				 then
			
@@ -100,7 +96,9 @@ then
 
				 	KERNEL=$base_resdir/${BOOT_IMAGE##*/} # use the last component of ${BOOT_IMAGE}
			
 
				 	ln -s $base_resdir/Make*.out $resdir  # for kvm-recheck.sh
			
 
				 	ln -s $base_resdir/.config $resdir  # for kvm-recheck.sh
			
 
				-elif kvm-build.sh $config_template $builddir $T
			
 
				+	# Arch-independent indicator
			
 
				+	touch $resdir/builtkernel
			
 
				+elif kvm-build.sh $T/Kc2 $builddir
			
 
				 then
			
 
				 	# Had to build a kernel for this test.
			
 
				 	QEMU="`identify_qemu $builddir/vmlinux`"
			
@@ -112,6 +110,8 @@ then
 
				 	then
			
 
				 		cp $builddir/$BOOT_IMAGE $resdir
			
 
				 		KERNEL=$resdir/${BOOT_IMAGE##*/}
			
 
				+		# Arch-independent indicator
			
 
				+		touch $resdir/builtkernel
			
 
				 	else
			
 
				 		echo No identifiable boot image, not running KVM, see $resdir.
			
 
				 		echo Do the torture scripts know about your architecture?
			
@@ -149,7 +149,7 @@ fi
 
				 
			
 
				 # Generate -smp qemu argument.
			
 
				 qemu_args="-enable-kvm -nographic $qemu_args"
			
 
				-cpu_count=`configNR_CPUS.sh $config_template`
			
 
				+cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment`
			
 
				 cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"`
			
 
				 vcpus=`identify_qemu_vcpus`
			
 
				 if test $cpu_count -gt $vcpus
			
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -41,6 +41,7 @@ PATH=${KVM}/bin:$PATH; export PATH
 
				 TORTURE_DEFCONFIG=defconfig
			
 
				 TORTURE_BOOT_IMAGE=""
			
 
				 TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
			
 
				+TORTURE_KCONFIG_ARG=""
			
 
				 TORTURE_KMAKE_ARG=""
			
 
				 TORTURE_SHUTDOWN_GRACE=180
			
 
				 TORTURE_SUITE=rcu
			
@@ -65,6 +66,7 @@ usage () {
 
				 	echo "       --duration minutes"
			
 
				 	echo "       --interactive"
			
 
				 	echo "       --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
			
 
				+	echo "       --kconfig Kconfig-options"
			
 
				 	echo "       --kmake-arg kernel-make-arguments"
			
 
				 	echo "       --mac nn:nn:nn:nn:nn:nn"
			
 
				 	echo "       --no-initrd"
			
@@ -129,6 +131,11 @@ do
 
				 		jitter="$2"
			
 
				 		shift
			
 
				 		;;
			
 
				+	--kconfig)
			
 
				+		checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
			
 
				+		TORTURE_KCONFIG_ARG="$2"
			
 
				+		shift
			
 
				+		;;
			
 
				 	--kmake-arg)
			
 
				 		checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
			
 
				 		TORTURE_KMAKE_ARG="$2"
			
@@ -205,6 +212,7 @@ do
 
				 	then
			
 
				 		cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1`
			
 
				 		cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
			
 
				+		cpu_count=`configfrag_boot_maxcpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
			
 
				 		for ((cur_rep=0;cur_rep<$config_reps;cur_rep++))
			
 
				 		do
			
 
				 			echo $CF1 $cpu_count >> $T/cfgcpu
			
@@ -275,6 +283,7 @@ TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
 
				 TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
			
 
				 TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
			
 
				 TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD
			
 
				+TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG
			
 
				 TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
			
 
				 TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
			
 
				 TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
			
@@ -324,6 +333,7 @@ function dump(first, pastlast, batchnum)
 
				 {
			
 
				 	print "echo ----Start batch " batchnum ": `date`";
			
 
				 	print "echo ----Start batch " batchnum ": `date` >> " rd "/log";
			
 
				+	print "needqemurun="
			
 
				 	jn=1
			
 
				 	for (j = first; j < pastlast; j++) {
			
 
				 		builddir=KVM "/b" jn
			
@@ -359,10 +369,11 @@ function dump(first, pastlast, batchnum)
 
				 	for (j = 1; j < jn; j++) {
			
 
				 		builddir=KVM "/b" j
			
 
				 		print "rm -f " builddir ".ready"
			
 
				-		print "if test -z \"$TORTURE_BUILDONLY\""
			
 
				+		print "if test -f \"" rd cfr[j] "/builtkernel\""
			
 
				 		print "then"
			
 
				-		print "\techo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date`";
			
 
				-		print "\techo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date` >> " rd "/log";
			
 
				+		print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date`";
			
 
				+		print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` >> " rd "/log";
			
 
				+		print "\tneedqemurun=1"
			
 
				 		print "fi"
			
 
				 	}
			
 
				 	njitter = 0;
			
@@ -377,13 +388,22 @@ function dump(first, pastlast, batchnum)
 
				 		njitter = 0;
			
 
				 		print "echo Build-only run, so suppressing jitter >> " rd "/log"
			
 
				 	}
			
 
				-	for (j = 0; j < njitter; j++)
			
 
				-		print "jitter.sh " j " " dur " " ja[2] " " ja[3] "&"
			
 
				-	print "wait"
			
 
				-	print "if test -z \"$TORTURE_BUILDONLY\""
			
 
				+	if (TORTURE_BUILDONLY) {
			
 
				+		print "needqemurun="
			
 
				+	}
			
 
				+	print "if test -n \"$needqemurun\""
			
 
				 	print "then"
			
 
				+	print "\techo ---- Starting kernels. `date`";
			
 
				+	print "\techo ---- Starting kernels. `date` >> " rd "/log";
			
 
				+	for (j = 0; j < njitter; j++)
			
 
				+		print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&"
			
 
				+	print "\twait"
			
 
				 	print "\techo ---- All kernel runs complete. `date`";
			
 
				 	print "\techo ---- All kernel runs complete. `date` >> " rd "/log";
			
 
				+	print "else"
			
 
				+	print "\twait"
			
 
				+	print "\techo ---- No kernel runs. `date`";
			
 
				+	print "\techo ---- No kernel runs. `date` >> " rd "/log";
			
 
				 	print "fi"
			
 
				 	for (j = 1; j < jn; j++) {
			
 
				 		builddir=KVM "/b" j
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot
@@ -1 +1 @@
 
				-rcutorture.torture_type=rcu_busted
			
 
				+rcutorture.torture_type=busted
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot
@@ -1 +0,0 @@
 
				-rcutorture.torture_type=srcud
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
@@ -4,6 +4,7 @@ CONFIG_PREEMPT_VOLUNTARY=n
 
				 CONFIG_PREEMPT=n
			
 
				 #CHECK#CONFIG_TINY_SRCU=y
			
 
				 CONFIG_RCU_TRACE=n
			
 
				-CONFIG_DEBUG_LOCK_ALLOC=n
			
 
				+CONFIG_DEBUG_LOCK_ALLOC=y
			
 
				+CONFIG_PROVE_LOCKING=y
			
 
				 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
			
 
				 CONFIG_PREEMPT_COUNT=n
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -1,4 +1,4 @@
 
				-rcutorture.torture_type=rcu_bh maxcpus=8
			
 
				+rcutorture.torture_type=rcu_bh maxcpus=8 nr_cpus=43
			
 
				 rcutree.gp_preinit_delay=3
			
 
				 rcutree.gp_init_delay=3
			
 
				 rcutree.gp_cleanup_delay=3
			
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -69,11 +69,11 @@ CONFIG_RCU_TORTURE_TEST_RUNNABLE
 
				 CONFIG_PREEMPT_RCU
			
 
				 CONFIG_TREE_RCU
			
 
				 CONFIG_TINY_RCU
			
 
				+CONFIG_TASKS_RCU
			
 
				 
			
 
				 	These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
			
 
				 
			
 
				 CONFIG_SRCU
			
 
				-CONFIG_TASKS_RCU
			
 
				 
			
 
				 	Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable.