9 年之前 · 1c19b68a27
--- a/Documentation/locking/lockdep-design.txt
+++ b/Documentation/locking/lockdep-design.txt
@@ -97,7 +97,7 @@ between any two lock-classes:
 
				    <hardirq-safe>   ->  <hardirq-unsafe>
			
 
				    <softirq-safe>   ->  <softirq-unsafe>
			
 
				 
			
 
				-The first rule comes from the fact the a hardirq-safe lock could be
			
 
				+The first rule comes from the fact that a hardirq-safe lock could be
			
 
				 taken by a hardirq context, interrupting a hardirq-unsafe lock - and
			
 
				 thus could result in a lock inversion deadlock. Likewise, a softirq-safe
			
 
				 lock could be taken by an softirq context, interrupting a softirq-unsafe
			
@@ -220,7 +220,7 @@ calculated, which hash is unique for every lock chain. The hash value,
 
				 when the chain is validated for the first time, is then put into a hash
			
 
				 table, which hash-table can be checked in a lockfree manner. If the
			
 
				 locking chain occurs again later on, the hash table tells us that we
			
 
				-dont have to validate the chain again.
			
 
				+don't have to validate the chain again.
			
 
				 
			
 
				 Troubleshooting:
			
 
				 ----------------
			
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -4,8 +4,40 @@
 
				 
			
 
				 By: David Howells <dhowells@redhat.com>
			
 
				     Paul E. McKenney <paulmck@linux.vnet.ibm.com>
			
 
				+    Will Deacon <will.deacon@arm.com>
			
 
				+    Peter Zijlstra <peterz@infradead.org>
			
 
				 
			
 
				-Contents:
			
 
				+==========
			
 
				+DISCLAIMER
			
 
				+==========
			
 
				+
			
 
				+This document is not a specification; it is intentionally (for the sake of
			
 
				+brevity) and unintentionally (due to being human) incomplete. This document is
			
 
				+meant as a guide to using the various memory barriers provided by Linux, but
			
 
				+in case of any doubt (and there are many) please ask.
			
 
				+
			
 
				+To repeat, this document is not a specification of what Linux expects from
			
 
				+hardware.
			
 
				+
			
 
				+The purpose of this document is twofold:
			
 
				+
			
 
				+ (1) to specify the minimum functionality that one can rely on for any
			
 
				+     particular barrier, and
			
 
				+
			
 
				+ (2) to provide a guide as to how to use the barriers that are available.
			
 
				+
			
 
				+Note that an architecture can provide more than the minimum requirement
			
 
				+for any particular barrier, but if the architecure provides less than
			
 
				+that, that architecture is incorrect.
			
 
				+
			
 
				+Note also that it is possible that a barrier may be a no-op for an
			
 
				+architecture because the way that arch works renders an explicit barrier
			
 
				+unnecessary in that case.
			
 
				+
			
 
				+
			
 
				+========
			
 
				+CONTENTS
			
 
				+========
			
 
				 
			
 
				  (*) Abstract memory access model.
			
 
				 
			
@@ -31,15 +63,15 @@ Contents:
 
				 
			
 
				  (*) Implicit kernel memory barriers.
			
 
				 
			
 
				-     - Locking functions.
			
 
				+     - Lock acquisition functions.
			
 
				      - Interrupt disabling functions.
			
 
				      - Sleep and wake-up functions.
			
 
				      - Miscellaneous functions.
			
 
				 
			
 
				- (*) Inter-CPU locking barrier effects.
			
 
				+ (*) Inter-CPU acquiring barrier effects.
			
 
				 
			
 
				-     - Locks vs memory accesses.
			
 
				-     - Locks vs I/O accesses.
			
 
				+     - Acquires vs memory accesses.
			
 
				+     - Acquires vs I/O accesses.
			
 
				 
			
 
				  (*) Where are memory barriers needed?
			
 
				 
			
@@ -61,6 +93,7 @@ Contents:
 
				  (*) The things CPUs get up to.
			
 
				 
			
 
				      - And then there's the Alpha.
			
 
				+     - Virtual Machine Guests.
			
 
				 
			
 
				  (*) Example uses.
			
 
				 
			
@@ -148,7 +181,7 @@ As a further example, consider this sequence of events:
 
				 
			
 
				 	CPU 1		CPU 2
			
 
				 	===============	===============
			
 
				-	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
			
 
				+	{ A == 1, B == 2, C == 3, P == &A, Q == &C }
			
 
				 	B = 4;		Q = P;
			
 
				 	P = &B		D = *Q;
			
 
				 
			
@@ -430,8 +463,9 @@ And a couple of implicit varieties:
 
				      This acts as a one-way permeable barrier.  It guarantees that all memory
			
 
				      operations after the ACQUIRE operation will appear to happen after the
			
 
				      ACQUIRE operation with respect to the other components of the system.
			
 
				-     ACQUIRE operations include LOCK operations and smp_load_acquire()
			
 
				-     operations.
			
 
				+     ACQUIRE operations include LOCK operations and both smp_load_acquire()
			
 
				+     and smp_cond_acquire() operations. The later builds the necessary ACQUIRE
			
 
				+     semantics from relying on a control dependency and smp_rmb().
			
 
				 
			
 
				      Memory operations that occur before an ACQUIRE operation may appear to
			
 
				      happen after it completes.
			
@@ -464,6 +498,11 @@ And a couple of implicit varieties:
 
				      This means that ACQUIRE acts as a minimal "acquire" operation and
			
 
				      RELEASE acts as a minimal "release" operation.
			
 
				 
			
 
				+A subset of the atomic operations described in atomic_ops.txt have ACQUIRE
			
 
				+and RELEASE variants in addition to fully-ordered and relaxed (no barrier
			
 
				+semantics) definitions.  For compound atomics performing both a load and a
			
 
				+store, ACQUIRE semantics apply only to the load and RELEASE semantics apply
			
 
				+only to the store portion of the operation.
			
 
				 
			
 
				 Memory barriers are only required where there's a possibility of interaction
			
 
				 between two CPUs or between a CPU and a device.  If it can be guaranteed that
			
@@ -517,7 +556,7 @@ following sequence of events:
 
				 
			
 
				 	CPU 1		      CPU 2
			
 
				 	===============	      ===============
			
 
				-	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
			
 
				+	{ A == 1, B == 2, C == 3, P == &A, Q == &C }
			
 
				 	B = 4;
			
 
				 	<write barrier>
			
 
				 	WRITE_ONCE(P, &B)
			
@@ -544,7 +583,7 @@ between the address load and the data load:
 
				 
			
 
				 	CPU 1		      CPU 2
			
 
				 	===============	      ===============
			
 
				-	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
			
 
				+	{ A == 1, B == 2, C == 3, P == &A, Q == &C }
			
 
				 	B = 4;
			
 
				 	<write barrier>
			
 
				 	WRITE_ONCE(P, &B);
			
@@ -813,9 +852,10 @@ In summary:
 
				       the same variable, then those stores must be ordered, either by
			
 
				       preceding both of them with smp_mb() or by using smp_store_release()
			
 
				       to carry out the stores.  Please note that it is -not- sufficient
			
 
				-      to use barrier() at beginning of each leg of the "if" statement,
			
 
				-      as optimizing compilers do not necessarily respect barrier()
			
 
				-      in this case.
			
 
				+      to use barrier() at beginning of each leg of the "if" statement
			
 
				+      because, as shown by the example above, optimizing compilers can
			
 
				+      destroy the control dependency while respecting the letter of the
			
 
				+      barrier() law.
			
 
				 
			
 
				   (*) Control dependencies require at least one run-time conditional
			
 
				       between the prior load and the subsequent store, and this
			
@@ -1731,15 +1771,15 @@ The Linux kernel has eight basic CPU memory barriers:
 
				 
			
 
				 
			
 
				 All memory barriers except the data dependency barriers imply a compiler
			
 
				-barrier. Data dependencies do not impose any additional compiler ordering.
			
 
				+barrier.  Data dependencies do not impose any additional compiler ordering.
			
 
				 
			
 
				 Aside: In the case of data dependencies, the compiler would be expected
			
 
				 to issue the loads in the correct order (eg. `a[b]` would have to load
			
 
				 the value of b before loading a[b]), however there is no guarantee in
			
 
				 the C specification that the compiler may not speculate the value of b
			
 
				 (eg. is equal to 1) and load a before b (eg. tmp = a[1]; if (b != 1)
			
 
				-tmp = a[b]; ). There is also the problem of a compiler reloading b after
			
 
				-having loaded a[b], thus having a newer copy of b than a[b]. A consensus
			
 
				+tmp = a[b]; ).  There is also the problem of a compiler reloading b after
			
 
				+having loaded a[b], thus having a newer copy of b than a[b].  A consensus
			
 
				 has not yet been reached about these problems, however the READ_ONCE()
			
 
				 macro is a good place to start looking.
			
 
				 
			
@@ -1794,6 +1834,7 @@ There are some more advanced barrier functions:
 
				 
			
 
				 
			
 
				  (*) lockless_dereference();
			
 
				+
			
 
				      This can be thought of as a pointer-fetch wrapper around the
			
 
				      smp_read_barrier_depends() data-dependency barrier.
			
 
				 
			
@@ -1858,7 +1899,7 @@ This is a variation on the mandatory write barrier that causes writes to weakly
 
				 ordered I/O regions to be partially ordered.  Its effects may go beyond the
			
 
				 CPU->Hardware interface and actually affect the hardware at some level.
			
 
				 
			
 
				-See the subsection "Locks vs I/O accesses" for more information.
			
 
				+See the subsection "Acquires vs I/O accesses" for more information.
			
 
				 
			
 
				 
			
 
				 ===============================
			
@@ -1873,8 +1914,8 @@ provide more substantial guarantees, but these may not be relied upon outside
 
				 of arch specific code.
			
 
				 
			
 
				 
			
 
				-ACQUIRING FUNCTIONS
			
 
				--------------------
			
 
				+LOCK ACQUISITION FUNCTIONS
			
 
				+--------------------------
			
 
				 
			
 
				 The Linux kernel has a number of locking constructs:
			
 
				 
			
@@ -1895,7 +1936,7 @@ for each construct.  These operations all imply certain barriers:
 
				      Memory operations issued before the ACQUIRE may be completed after
			
 
				      the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
			
 
				      combined with a following ACQUIRE, orders prior stores against
			
 
				-     subsequent loads and stores. Note that this is weaker than smp_mb()!
			
 
				+     subsequent loads and stores.  Note that this is weaker than smp_mb()!
			
 
				      The smp_mb__before_spinlock() primitive is free on many architectures.
			
 
				 
			
 
				  (2) RELEASE operation implication:
			
@@ -2090,9 +2131,9 @@ or:
 
				 	event_indicated = 1;
			
 
				 	wake_up_process(event_daemon);
			
 
				 
			
 
				-A write memory barrier is implied by wake_up() and co. if and only if they wake
			
 
				-something up.  The barrier occurs before the task state is cleared, and so sits
			
 
				-between the STORE to indicate the event and the STORE to set TASK_RUNNING:
			
 
				+A write memory barrier is implied by wake_up() and co.  if and only if they
			
 
				+wake something up.  The barrier occurs before the task state is cleared, and so
			
 
				+sits between the STORE to indicate the event and the STORE to set TASK_RUNNING:
			
 
				 
			
 
				 	CPU 1				CPU 2
			
 
				 	===============================	===============================
			
@@ -2206,7 +2247,7 @@ three CPUs; then should the following sequence of events occur:
 
				 
			
 
				 Then there is no guarantee as to what order CPU 3 will see the accesses to *A
			
 
				 through *H occur in, other than the constraints imposed by the separate locks
			
 
				-on the separate CPUs. It might, for example, see:
			
 
				+on the separate CPUs.  It might, for example, see:
			
 
				 
			
 
				 	*E, ACQUIRE M, ACQUIRE Q, *G, *C, *F, *A, *B, RELEASE Q, *D, *H, RELEASE M
			
 
				 
			
@@ -2486,9 +2527,9 @@ The following operations are special locking primitives:
 
				 	clear_bit_unlock();
			
 
				 	__clear_bit_unlock();
			
 
				 
			
 
				-These implement ACQUIRE-class and RELEASE-class operations. These should be used in
			
 
				-preference to other operations when implementing locking primitives, because
			
 
				-their implementations can be optimised on many architectures.
			
 
				+These implement ACQUIRE-class and RELEASE-class operations.  These should be
			
 
				+used in preference to other operations when implementing locking primitives,
			
 
				+because their implementations can be optimised on many architectures.
			
 
				 
			
 
				 [!] Note that special memory barrier primitives are available for these
			
 
				 situations because on some CPUs the atomic instructions used imply full memory
			
@@ -2568,12 +2609,12 @@ explicit barriers are used.
 
				 
			
 
				 Normally this won't be a problem because the I/O accesses done inside such
			
 
				 sections will include synchronous load operations on strictly ordered I/O
			
 
				-registers that form implicit I/O barriers. If this isn't sufficient then an
			
 
				+registers that form implicit I/O barriers.  If this isn't sufficient then an
			
 
				 mmiowb() may need to be used explicitly.
			
 
				 
			
 
				 
			
 
				 A similar situation may occur between an interrupt routine and two routines
			
 
				-running on separate CPUs that communicate with each other. If such a case is
			
 
				+running on separate CPUs that communicate with each other.  If such a case is
			
 
				 likely, then interrupt-disabling locks should be used to guarantee ordering.
			
 
				 
			
 
				 
			
@@ -2587,8 +2628,8 @@ functions:
 
				  (*) inX(), outX():
			
 
				 
			
 
				      These are intended to talk to I/O space rather than memory space, but
			
 
				-     that's primarily a CPU-specific concept. The i386 and x86_64 processors do
			
 
				-     indeed have special I/O space access cycles and instructions, but many
			
 
				+     that's primarily a CPU-specific concept.  The i386 and x86_64 processors
			
 
				+     do indeed have special I/O space access cycles and instructions, but many
			
 
				      CPUs don't have such a concept.
			
 
				 
			
 
				      The PCI bus, amongst others, defines an I/O space concept which - on such
			
@@ -2610,7 +2651,7 @@ functions:
 
				 
			
 
				      Whether these are guaranteed to be fully ordered and uncombined with
			
 
				      respect to each other on the issuing CPU depends on the characteristics
			
 
				-     defined for the memory window through which they're accessing. On later
			
 
				+     defined for the memory window through which they're accessing.  On later
			
 
				      i386 architecture machines, for example, this is controlled by way of the
			
 
				      MTRR registers.
			
 
				 
			
@@ -2635,10 +2676,10 @@ functions:
 
				  (*) readX_relaxed(), writeX_relaxed()
			
 
				 
			
 
				      These are similar to readX() and writeX(), but provide weaker memory
			
 
				-     ordering guarantees. Specifically, they do not guarantee ordering with
			
 
				+     ordering guarantees.  Specifically, they do not guarantee ordering with
			
 
				      respect to normal memory accesses (e.g. DMA buffers) nor do they guarantee
			
 
				-     ordering with respect to LOCK or UNLOCK operations. If the latter is
			
 
				-     required, an mmiowb() barrier can be used. Note that relaxed accesses to
			
 
				+     ordering with respect to LOCK or UNLOCK operations.  If the latter is
			
 
				+     required, an mmiowb() barrier can be used.  Note that relaxed accesses to
			
 
				      the same peripheral are guaranteed to be ordered with respect to each
			
 
				      other.
			
 
				 
			
@@ -3040,8 +3081,9 @@ The Alpha defines the Linux kernel's memory barrier model.
 
				 
			
 
				 See the subsection on "Cache Coherency" above.
			
 
				 
			
 
				+
			
 
				 VIRTUAL MACHINE GUESTS
			
 
				--------------------
			
 
				+----------------------
			
 
				 
			
 
				 Guests running within virtual machines might be affected by SMP effects even if
			
 
				 the guest itself is compiled without SMP support.  This is an artifact of
			
@@ -3050,7 +3092,7 @@ barriers for this use-case would be possible but is often suboptimal.
 
				 
			
 
				 To handle this case optimally, low-level virt_mb() etc macros are available.
			
 
				 These have the same effect as smp_mb() etc when SMP is enabled, but generate
			
 
				-identical code for SMP and non-SMP systems. For example, virtual machine guests
			
 
				+identical code for SMP and non-SMP systems.  For example, virtual machine guests
			
 
				 should use virt_mb() rather than smp_mb() when synchronizing against a
			
 
				 (possibly SMP) host.
			
 
				 
			
@@ -3058,6 +3100,7 @@ These are equivalent to smp_mb() etc counterparts in all other respects,
 
				 in particular, they do not control MMIO effects: to control
			
 
				 MMIO effects, use mandatory barriers.
			
 
				 
			
 
				+
			
 
				 ============
			
 
				 EXAMPLE USES
			
 
				 ============
			
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -560,11 +560,11 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 
				 
			
 
				 /**
			
 
				  * atomic_fetch_or - perform *p |= mask and return old value of *p
			
 
				- * @p: pointer to atomic_t
			
 
				  * @mask: mask to OR on the atomic_t
			
 
				+ * @p: pointer to atomic_t
			
 
				  */
			
 
				 #ifndef atomic_fetch_or
			
 
				-static inline int atomic_fetch_or(atomic_t *p, int mask)
			
 
				+static inline int atomic_fetch_or(int mask, atomic_t *p)
			
 
				 {
			
 
				 	int old, val = atomic_read(p);
			
 
				 
			
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -708,7 +708,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
 
				  * yet. Otherwise we look it up. We cache the result in the lock object
			
 
				  * itself, so actual lookup of the hash should be once per lock object.
			
 
				  */
			
 
				-static inline struct lock_class *
			
 
				+static struct lock_class *
			
 
				 register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
			
 
				 {
			
 
				 	struct lockdep_subclass_key *key;
			
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -75,12 +75,7 @@ struct lock_stress_stats {
 
				 	long n_lock_acquired;
			
 
				 };
			
 
				 
			
 
				-#if defined(MODULE)
			
 
				-#define LOCKTORTURE_RUNNABLE_INIT 1
			
 
				-#else
			
 
				-#define LOCKTORTURE_RUNNABLE_INIT 0
			
 
				-#endif
			
 
				-int torture_runnable = LOCKTORTURE_RUNNABLE_INIT;
			
 
				+int torture_runnable = IS_ENABLED(MODULE);
			
 
				 module_param(torture_runnable, int, 0444);
			
 
				 MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init");
			
 
				 
			
@@ -394,12 +389,12 @@ static void torture_rtmutex_boost(struct torture_random_state *trsp)
 
				 
			
 
				 	if (!rt_task(current)) {
			
 
				 		/*
			
 
				-		 * (1) Boost priority once every ~50k operations. When the
			
 
				+		 * Boost priority once every ~50k operations. When the
			
 
				 		 * task tries to take the lock, the rtmutex it will account
			
 
				 		 * for the new priority, and do any corresponding pi-dance.
			
 
				 		 */
			
 
				-		if (!(torture_random(trsp) %
			
 
				-		      (cxt.nrealwriters_stress * factor))) {
			
 
				+		if (trsp && !(torture_random(trsp) %
			
 
				+			      (cxt.nrealwriters_stress * factor))) {
			
 
				 			policy = SCHED_FIFO;
			
 
				 			param.sched_priority = MAX_RT_PRIO - 1;
			
 
				 		} else /* common case, do nothing */
			
@@ -748,6 +743,15 @@ static void lock_torture_cleanup(void)
 
				 	if (torture_cleanup_begin())
			
 
				 		return;
			
 
				 
			
 
				+	/*
			
 
				+	 * Indicates early cleanup, meaning that the test has not run,
			
 
				+	 * such as when passing bogus args when loading the module. As
			
 
				+	 * such, only perform the underlying torture-specific cleanups,
			
 
				+	 * and avoid anything related to locktorture.
			
 
				+	 */
			
 
				+	if (!cxt.lwsa)
			
 
				+		goto end;
			
 
				+
			
 
				 	if (writer_tasks) {
			
 
				 		for (i = 0; i < cxt.nrealwriters_stress; i++)
			
 
				 			torture_stop_kthread(lock_torture_writer,
			
@@ -776,6 +780,7 @@ static void lock_torture_cleanup(void)
 
				 	else
			
 
				 		lock_torture_print_module_parms(cxt.cur_ops,
			
 
				 						"End of test: SUCCESS");
			
 
				+end:
			
 
				 	torture_cleanup_end();
			
 
				 }
			
 
				 
			
@@ -870,6 +875,7 @@ static int __init lock_torture_init(void)
 
				 			VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
			
 
				 			firsterr = -ENOMEM;
			
 
				 			kfree(cxt.lwsa);
			
 
				+			cxt.lwsa = NULL;
			
 
				 			goto unwind;
			
 
				 		}
			
 
				 
			
@@ -878,6 +884,7 @@ static int __init lock_torture_init(void)
 
				 			cxt.lrsa[i].n_lock_acquired = 0;
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				 	lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
			
 
				 
			
 
				 	/* Prepare torture context. */
			
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -191,8 +191,6 @@ static ssize_t qstat_write(struct file *file, const char __user *user_buf,
 
				 
			
 
				 		for (i = 0 ; i < qstat_num; i++)
			
 
				 			WRITE_ONCE(ptr[i], 0);
			
 
				-		for (i = 0 ; i < qstat_num; i++)
			
 
				-			WRITE_ONCE(ptr[i], 0);
			
 
				 	}
			
 
				 	return count;
			
 
				 }
			
@@ -214,10 +212,8 @@ static int __init init_qspinlock_stat(void)
 
				 	struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
			
 
				 	int i;
			
 
				 
			
 
				-	if (!d_qstat) {
			
 
				-		pr_warn("Could not create 'qlockstat' debugfs directory\n");
			
 
				-		return 0;
			
 
				-	}
			
 
				+	if (!d_qstat)
			
 
				+		goto out;
			
 
				 
			
 
				 	/*
			
 
				 	 * Create the debugfs files
			
@@ -227,12 +223,20 @@ static int __init init_qspinlock_stat(void)
 
				 	 * performance.
			
 
				 	 */
			
 
				 	for (i = 0; i < qstat_num; i++)
			
 
				-		debugfs_create_file(qstat_names[i], 0400, d_qstat,
			
 
				-				   (void *)(long)i, &fops_qstat);
			
 
				+		if (!debugfs_create_file(qstat_names[i], 0400, d_qstat,
			
 
				+					 (void *)(long)i, &fops_qstat))
			
 
				+			goto fail_undo;
			
 
				+
			
 
				+	if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
			
 
				+				 (void *)(long)qstat_reset_cnts, &fops_qstat))
			
 
				+		goto fail_undo;
			
 
				 
			
 
				-	debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
			
 
				-			   (void *)(long)qstat_reset_cnts, &fops_qstat);
			
 
				 	return 0;
			
 
				+fail_undo:
			
 
				+	debugfs_remove_recursive(d_qstat);
			
 
				+out:
			
 
				+	pr_warn("Could not create 'qlockstat' debugfs entries\n");
			
 
				+	return -ENOMEM;
			
 
				 }
			
 
				 fs_initcall(init_qspinlock_stat);
			
 
				 
			
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -262,7 +262,7 @@ static void tick_nohz_dep_set_all(atomic_t *dep,
 
				 {
			
 
				 	int prev;
			
 
				 
			
 
				-	prev = atomic_fetch_or(dep, BIT(bit));
			
 
				+	prev = atomic_fetch_or(BIT(bit), dep);
			
 
				 	if (!prev)
			
 
				 		tick_nohz_full_kick_all();
			
 
				 }
			
@@ -292,7 +292,7 @@ void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
 
				 
			
 
				 	ts = per_cpu_ptr(&tick_cpu_sched, cpu);
			
 
				 
			
 
				-	prev = atomic_fetch_or(&ts->tick_dep_mask, BIT(bit));
			
 
				+	prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
			
 
				 	if (!prev) {
			
 
				 		preempt_disable();
			
 
				 		/* Perf needs local kick that is NMI safe */