8 gadi atpakaļ · 5f82e71a00
--- a/Documentation/atomic_bitops.txt
+++ b/Documentation/atomic_bitops.txt
@@ -0,0 +1,66 @@
 
															+
														
 
															+On atomic bitops.
														
 
															+
														
 
															+
														
 
															+While our bitmap_{}() functions are non-atomic, we have a number of operations
														
 
															+operating on single bits in a bitmap that are atomic.
														
 
															+
														
 
															+
														
 
															+API
														
 
															+---
														
 
															+
														
 
															+The single bit operations are:
														
 
															+
														
 
															+Non-RMW ops:
														
 
															+
														
 
															+  test_bit()
														
 
															+
														
 
															+RMW atomic operations without return value:
														
 
															+
														
 
															+  {set,clear,change}_bit()
														
 
															+  clear_bit_unlock()
														
 
															+
														
 
															+RMW atomic operations with return value:
														
 
															+
														
 
															+  test_and_{set,clear,change}_bit()
														
 
															+  test_and_set_bit_lock()
														
 
															+
														
 
															+Barriers:
														
 
															+
														
 
															+  smp_mb__{before,after}_atomic()
														
 
															+
														
 
															+
														
 
															+All RMW atomic operations have a '__' prefixed variant which is non-atomic.
														
 
															+
														
 
															+
														
 
															+SEMANTICS
														
 
															+---------
														
 
															+
														
 
															+Non-atomic ops:
														
 
															+
														
 
															+In particular __clear_bit_unlock() suffers the same issue as atomic_set(),
														
 
															+which is why the generic version maps to clear_bit_unlock(), see atomic_t.txt.
														
 
															+
														
 
															+
														
 
															+RMW ops:
														
 
															+
														
 
															+The test_and_{}_bit() operations return the original value of the bit.
														
 
															+
														
 
															+
														
 
															+ORDERING
														
 
															+--------
														
 
															+
														
 
															+Like with atomic_t, the rule of thumb is:
														
 
															+
														
 
															+ - non-RMW operations are unordered;
														
 
															+
														
 
															+ - RMW operations that have no return value are unordered;
														
 
															+
														
 
															+ - RMW operations that have a return value are fully ordered.
														
 
															+
														
 
															+Except for test_and_set_bit_lock() which has ACQUIRE semantics and
														
 
															+clear_bit_unlock() which has RELEASE semantics.
														
 
															+
														
 
															+Since a platform only has a single means of achieving atomic operations
														
 
															+the same barriers as for atomic_t are used, see atomic_t.txt.
														
 
															+
														
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -0,0 +1,242 @@
 
															+
														
 
															+On atomic types (atomic_t atomic64_t and atomic_long_t).
														
 
															+
														
 
															+The atomic type provides an interface to the architecture's means of atomic
														
 
															+RMW operations between CPUs (atomic operations on MMIO are not supported and
														
 
															+can lead to fatal traps on some platforms).
														
 
															+
														
 
															+API
														
 
															+---
														
 
															+
														
 
															+The 'full' API consists of (atomic64_ and atomic_long_ prefixes omitted for
														
 
															+brevity):
														
 
															+
														
 
															+Non-RMW ops:
														
 
															+
														
 
															+  atomic_read(), atomic_set()
														
 
															+  atomic_read_acquire(), atomic_set_release()
														
 
															+
														
 
															+
														
 
															+RMW atomic operations:
														
 
															+
														
 
															+Arithmetic:
														
 
															+
														
 
															+  atomic_{add,sub,inc,dec}()
														
 
															+  atomic_{add,sub,inc,dec}_return{,_relaxed,_acquire,_release}()
														
 
															+  atomic_fetch_{add,sub,inc,dec}{,_relaxed,_acquire,_release}()
														
 
															+
														
 
															+
														
 
															+Bitwise:
														
 
															+
														
 
															+  atomic_{and,or,xor,andnot}()
														
 
															+  atomic_fetch_{and,or,xor,andnot}{,_relaxed,_acquire,_release}()
														
 
															+
														
 
															+
														
 
															+Swap:
														
 
															+
														
 
															+  atomic_xchg{,_relaxed,_acquire,_release}()
														
 
															+  atomic_cmpxchg{,_relaxed,_acquire,_release}()
														
 
															+  atomic_try_cmpxchg{,_relaxed,_acquire,_release}()
														
 
															+
														
 
															+
														
 
															+Reference count (but please see refcount_t):
														
 
															+
														
 
															+  atomic_add_unless(), atomic_inc_not_zero()
														
 
															+  atomic_sub_and_test(), atomic_dec_and_test()
														
 
															+
														
 
															+
														
 
															+Misc:
														
 
															+
														
 
															+  atomic_inc_and_test(), atomic_add_negative()
														
 
															+  atomic_dec_unless_positive(), atomic_inc_unless_negative()
														
 
															+
														
 
															+
														
 
															+Barriers:
														
 
															+
														
 
															+  smp_mb__{before,after}_atomic()
														
 
															+
														
 
															+
														
 
															+
														
 
															+SEMANTICS
														
 
															+---------
														
 
															+
														
 
															+Non-RMW ops:
														
 
															+
														
 
															+The non-RMW ops are (typically) regular LOADs and STOREs and are canonically
														
 
															+implemented using READ_ONCE(), WRITE_ONCE(), smp_load_acquire() and
														
 
															+smp_store_release() respectively.
														
 
															+
														
 
															+The one detail to this is that atomic_set{}() should be observable to the RMW
														
 
															+ops. That is:
														
 
															+
														
 
															+  C atomic-set
														
 
															+
														
 
															+  {
														
 
															+    atomic_set(v, 1);
														
 
															+  }
														
 
															+
														
 
															+  P1(atomic_t *v)
														
 
															+  {
														
 
															+    atomic_add_unless(v, 1, 0);
														
 
															+  }
														
 
															+
														
 
															+  P2(atomic_t *v)
														
 
															+  {
														
 
															+    atomic_set(v, 0);
														
 
															+  }
														
 
															+
														
 
															+  exists
														
 
															+  (v=2)
														
 
															+
														
 
															+In this case we would expect the atomic_set() from CPU1 to either happen
														
 
															+before the atomic_add_unless(), in which case that latter one would no-op, or
														
 
															+_after_ in which case we'd overwrite its result. In no case is "2" a valid
														
 
															+outcome.
														
 
															+
														
 
															+This is typically true on 'normal' platforms, where a regular competing STORE
														
 
															+will invalidate a LL/SC or fail a CMPXCHG.
														
 
															+
														
 
															+The obvious case where this is not so is when we need to implement atomic ops
														
 
															+with a lock:
														
 
															+
														
 
															+  CPU0						CPU1
														
 
															+
														
 
															+  atomic_add_unless(v, 1, 0);
														
 
															+    lock();
														
 
															+    ret = READ_ONCE(v->counter); // == 1
														
 
															+						atomic_set(v, 0);
														
 
															+    if (ret != u)				  WRITE_ONCE(v->counter, 0);
														
 
															+      WRITE_ONCE(v->counter, ret + 1);
														
 
															+    unlock();
														
 
															+
														
 
															+the typical solution is to then implement atomic_set{}() with atomic_xchg().
														
 
															+
														
 
															+
														
 
															+RMW ops:
														
 
															+
														
 
															+These come in various forms:
														
 
															+
														
 
															+ - plain operations without return value: atomic_{}()
														
 
															+
														
 
															+ - operations which return the modified value: atomic_{}_return()
														
 
															+
														
 
															+   these are limited to the arithmetic operations because those are
														
 
															+   reversible. Bitops are irreversible and therefore the modified value
														
 
															+   is of dubious utility.
														
 
															+
														
 
															+ - operations which return the original value: atomic_fetch_{}()
														
 
															+
														
 
															+ - swap operations: xchg(), cmpxchg() and try_cmpxchg()
														
 
															+
														
 
															+ - misc; the special purpose operations that are commonly used and would,
														
 
															+   given the interface, normally be implemented using (try_)cmpxchg loops but
														
 
															+   are time critical and can, (typically) on LL/SC architectures, be more
														
 
															+   efficiently implemented.
														
 
															+
														
 
															+All these operations are SMP atomic; that is, the operations (for a single
														
 
															+atomic variable) can be fully ordered and no intermediate state is lost or
														
 
															+visible.
														
 
															+
														
 
															+
														
 
															+ORDERING  (go read memory-barriers.txt first)
														
 
															+--------
														
 
															+
														
 
															+The rule of thumb:
														
 
															+
														
 
															+ - non-RMW operations are unordered;
														
 
															+
														
 
															+ - RMW operations that have no return value are unordered;
														
 
															+
														
 
															+ - RMW operations that have a return value are fully ordered;
														
 
															+
														
 
															+ - RMW operations that are conditional are unordered on FAILURE,
														
 
															+   otherwise the above rules apply.
														
 
															+
														
 
															+Except of course when an operation has an explicit ordering like:
														
 
															+
														
 
															+ {}_relaxed: unordered
														
 
															+ {}_acquire: the R of the RMW (or atomic_read) is an ACQUIRE
														
 
															+ {}_release: the W of the RMW (or atomic_set)  is a  RELEASE
														
 
															+
														
 
															+Where 'unordered' is against other memory locations. Address dependencies are
														
 
															+not defeated.
														
 
															+
														
 
															+Fully ordered primitives are ordered against everything prior and everything
														
 
															+subsequent. Therefore a fully ordered primitive is like having an smp_mb()
														
 
															+before and an smp_mb() after the primitive.
														
 
															+
														
 
															+
														
 
															+The barriers:
														
 
															+
														
 
															+  smp_mb__{before,after}_atomic()
														
 
															+
														
 
															+only apply to the RMW ops and can be used to augment/upgrade the ordering
														
 
															+inherent to the used atomic op. These barriers provide a full smp_mb().
														
 
															+
														
 
															+These helper barriers exist because architectures have varying implicit
														
 
															+ordering on their SMP atomic primitives. For example our TSO architectures
														
 
															+provide full ordered atomics and these barriers are no-ops.
														
 
															+
														
 
															+Thus:
														
 
															+
														
 
															+  atomic_fetch_add();
														
 
															+
														
 
															+is equivalent to:
														
 
															+
														
 
															+  smp_mb__before_atomic();
														
 
															+  atomic_fetch_add_relaxed();
														
 
															+  smp_mb__after_atomic();
														
 
															+
														
 
															+However the atomic_fetch_add() might be implemented more efficiently.
														
 
															+
														
 
															+Further, while something like:
														
 
															+
														
 
															+  smp_mb__before_atomic();
														
 
															+  atomic_dec(&X);
														
 
															+
														
 
															+is a 'typical' RELEASE pattern, the barrier is strictly stronger than
														
 
															+a RELEASE. Similarly for something like:
														
 
															+
														
 
															+  atomic_inc(&X);
														
 
															+  smp_mb__after_atomic();
														
 
															+
														
 
															+is an ACQUIRE pattern (though very much not typical), but again the barrier is
														
 
															+strictly stronger than ACQUIRE. As illustrated:
														
 
															+
														
 
															+  C strong-acquire
														
 
															+
														
 
															+  {
														
 
															+  }
														
 
															+
														
 
															+  P1(int *x, atomic_t *y)
														
 
															+  {
														
 
															+    r0 = READ_ONCE(*x);
														
 
															+    smp_rmb();
														
 
															+    r1 = atomic_read(y);
														
 
															+  }
														
 
															+
														
 
															+  P2(int *x, atomic_t *y)
														
 
															+  {
														
 
															+    atomic_inc(y);
														
 
															+    smp_mb__after_atomic();
														
 
															+    WRITE_ONCE(*x, 1);
														
 
															+  }
														
 
															+
														
 
															+  exists
														
 
															+  (r0=1 /\ r1=0)
														
 
															+
														
 
															+This should not happen; but a hypothetical atomic_inc_acquire() --
														
 
															+(void)atomic_fetch_inc_acquire() for instance -- would allow the outcome,
														
 
															+since then:
														
 
															+
														
 
															+  P1			P2
														
 
															+
														
 
															+			t = LL.acq *y (0)
														
 
															+			t++;
														
 
															+			*x = 1;
														
 
															+  r0 = *x (1)
														
 
															+  RMB
														
 
															+  r1 = *y (0)
														
 
															+			SC *y, t;
														
 
															+
														
 
															+is allowed.
														
--- a/Documentation/locking/crossrelease.txt
+++ b/Documentation/locking/crossrelease.txt
@@ -0,0 +1,874 @@
 
															+Crossrelease
														
 
															+============
														
 
															+
														
 
															+Started by Byungchul Park <byungchul.park@lge.com>
														
 
															+
														
 
															+Contents:
														
 
															+
														
 
															+ (*) Background
														
 
															+
														
 
															+     - What causes deadlock
														
 
															+     - How lockdep works
														
 
															+
														
 
															+ (*) Limitation
														
 
															+
														
 
															+     - Limit lockdep
														
 
															+     - Pros from the limitation
														
 
															+     - Cons from the limitation
														
 
															+     - Relax the limitation
														
 
															+
														
 
															+ (*) Crossrelease
														
 
															+
														
 
															+     - Introduce crossrelease
														
 
															+     - Introduce commit
														
 
															+
														
 
															+ (*) Implementation
														
 
															+
														
 
															+     - Data structures
														
 
															+     - How crossrelease works
														
 
															+
														
 
															+ (*) Optimizations
														
 
															+
														
 
															+     - Avoid duplication
														
 
															+     - Lockless for hot paths
														
 
															+
														
 
															+ (*) APPENDIX A: What lockdep does to work aggresively
														
 
															+
														
 
															+ (*) APPENDIX B: How to avoid adding false dependencies
														
 
															+
														
 
															+
														
 
															+==========
														
 
															+Background
														
 
															+==========
														
 
															+
														
 
															+What causes deadlock
														
 
															+--------------------
														
 
															+
														
 
															+A deadlock occurs when a context is waiting for an event to happen,
														
 
															+which is impossible because another (or the) context who can trigger the
														
 
															+event is also waiting for another (or the) event to happen, which is
														
 
															+also impossible due to the same reason.
														
 
															+
														
 
															+For example:
														
 
															+
														
 
															+   A context going to trigger event C is waiting for event A to happen.
														
 
															+   A context going to trigger event A is waiting for event B to happen.
														
 
															+   A context going to trigger event B is waiting for event C to happen.
														
 
															+
														
 
															+A deadlock occurs when these three wait operations run at the same time,
														
 
															+because event C cannot be triggered if event A does not happen, which in
														
 
															+turn cannot be triggered if event B does not happen, which in turn
														
 
															+cannot be triggered if event C does not happen. After all, no event can
														
 
															+be triggered since any of them never meets its condition to wake up.
														
 
															+
														
 
															+A dependency might exist between two waiters and a deadlock might happen
														
 
															+due to an incorrect releationship between dependencies. Thus, we must
														
 
															+define what a dependency is first. A dependency exists between them if:
														
 
															+
														
 
															+   1. There are two waiters waiting for each event at a given time.
														
 
															+   2. The only way to wake up each waiter is to trigger its event.
														
 
															+   3. Whether one can be woken up depends on whether the other can.
														
 
															+
														
 
															+Each wait in the example creates its dependency like:
														
 
															+
														
 
															+   Event C depends on event A.
														
 
															+   Event A depends on event B.
														
 
															+   Event B depends on event C.
														
 
															+
														
 
															+   NOTE: Precisely speaking, a dependency is one between whether a
														
 
															+   waiter for an event can be woken up and whether another waiter for
														
 
															+   another event can be woken up. However from now on, we will describe
														
 
															+   a dependency as if it's one between an event and another event for
														
 
															+   simplicity.
														
 
															+
														
 
															+And they form circular dependencies like:
														
 
															+
														
 
															+    -> C -> A -> B -
														
 
															+   /                \
														
 
															+   \                /
														
 
															+    ----------------
														
 
															+
														
 
															+   where 'A -> B' means that event A depends on event B.
														
 
															+
														
 
															+Such circular dependencies lead to a deadlock since no waiter can meet
														
 
															+its condition to wake up as described.
														
 
															+
														
 
															+CONCLUSION
														
 
															+
														
 
															+Circular dependencies cause a deadlock.
														
 
															+
														
 
															+
														
 
															+How lockdep works
														
 
															+-----------------
														
 
															+
														
 
															+Lockdep tries to detect a deadlock by checking dependencies created by
														
 
															+lock operations, acquire and release. Waiting for a lock corresponds to
														
 
															+waiting for an event, and releasing a lock corresponds to triggering an
														
 
															+event in the previous section.
														
 
															+
														
 
															+In short, lockdep does:
														
 
															+
														
 
															+   1. Detect a new dependency.
														
 
															+   2. Add the dependency into a global graph.
														
 
															+   3. Check if that makes dependencies circular.
														
 
															+   4. Report a deadlock or its possibility if so.
														
 
															+
														
 
															+For example, consider a graph built by lockdep that looks like:
														
 
															+
														
 
															+   A -> B -
														
 
															+           \
														
 
															+            -> E
														
 
															+           /
														
 
															+   C -> D -
														
 
															+
														
 
															+   where A, B,..., E are different lock classes.
														
 
															+
														
 
															+Lockdep will add a dependency into the graph on detection of a new
														
 
															+dependency. For example, it will add a dependency 'E -> C' when a new
														
 
															+dependency between lock E and lock C is detected. Then the graph will be:
														
 
															+
														
 
															+       A -> B -
														
 
															+               \
														
 
															+                -> E -
														
 
															+               /      \
														
 
															+    -> C -> D -        \
														
 
															+   /                   /
														
 
															+   \                  /
														
 
															+    ------------------
														
 
															+
														
 
															+   where A, B,..., E are different lock classes.
														
 
															+
														
 
															+This graph contains a subgraph which demonstrates circular dependencies:
														
 
															+
														
 
															+                -> E -
														
 
															+               /      \
														
 
															+    -> C -> D -        \
														
 
															+   /                   /
														
 
															+   \                  /
														
 
															+    ------------------
														
 
															+
														
 
															+   where C, D and E are different lock classes.
														
 
															+
														
 
															+This is the condition under which a deadlock might occur. Lockdep
														
 
															+reports it on detection after adding a new dependency. This is the way
														
 
															+how lockdep works.
														
 
															+
														
 
															+CONCLUSION
														
 
															+
														
 
															+Lockdep detects a deadlock or its possibility by checking if circular
														
 
															+dependencies were created after adding each new dependency.
														
 
															+
														
 
															+
														
 
															+==========
														
 
															+Limitation
														
 
															+==========
														
 
															+
														
 
															+Limit lockdep
														
 
															+-------------
														
 
															+
														
 
															+Limiting lockdep to work on only typical locks e.g. spin locks and
														
 
															+mutexes, which are released within the acquire context, the
														
 
															+implementation becomes simple but its capacity for detection becomes
														
 
															+limited. Let's check pros and cons in next section.
														
 
															+
														
 
															+
														
 
															+Pros from the limitation
														
 
															+------------------------
														
 
															+
														
 
															+Given the limitation, when acquiring a lock, locks in a held_locks
														
 
															+cannot be released if the context cannot acquire it so has to wait to
														
 
															+acquire it, which means all waiters for the locks in the held_locks are
														
 
															+stuck. It's an exact case to create dependencies between each lock in
														
 
															+the held_locks and the lock to acquire.
														
 
															+
														
 
															+For example:
														
 
															+
														
 
															+   CONTEXT X
														
 
															+   ---------
														
 
															+   acquire A
														
 
															+   acquire B /* Add a dependency 'A -> B' */
														
 
															+   release B
														
 
															+   release A
														
 
															+
														
 
															+   where A and B are different lock classes.
														
 
															+
														
 
															+When acquiring lock A, the held_locks of CONTEXT X is empty thus no
														
 
															+dependency is added. But when acquiring lock B, lockdep detects and adds
														
 
															+a new dependency 'A -> B' between lock A in the held_locks and lock B.
														
 
															+They can be simply added whenever acquiring each lock.
														
 
															+
														
 
															+And data required by lockdep exists in a local structure, held_locks
														
 
															+embedded in task_struct. Forcing to access the data within the context,
														
 
															+lockdep can avoid racy problems without explicit locks while handling
														
 
															+the local data.
														
 
															+
														
 
															+Lastly, lockdep only needs to keep locks currently being held, to build
														
 
															+a dependency graph. However, relaxing the limitation, it needs to keep
														
 
															+even locks already released, because a decision whether they created
														
 
															+dependencies might be long-deferred.
														
 
															+
														
 
															+To sum up, we can expect several advantages from the limitation:
														
 
															+
														
 
															+   1. Lockdep can easily identify a dependency when acquiring a lock.
														
 
															+   2. Races are avoidable while accessing local locks in a held_locks.
														
 
															+   3. Lockdep only needs to keep locks currently being held.
														
 
															+
														
 
															+CONCLUSION
														
 
															+
														
 
															+Given the limitation, the implementation becomes simple and efficient.
														
 
															+
														
 
															+
														
 
															+Cons from the limitation
														
 
															+------------------------
														
 
															+
														
 
															+Given the limitation, lockdep is applicable only to typical locks. For
														
 
															+example, page locks for page access or completions for synchronization
														
 
															+cannot work with lockdep.
														
 
															+
														
 
															+Can we detect deadlocks below, under the limitation?
														
 
															+
														
 
															+Example 1:
														
 
															+
														
 
															+   CONTEXT X	   CONTEXT Y	   CONTEXT Z
														
 
															+   ---------	   ---------	   ----------
														
 
															+		   mutex_lock A
														
 
															+   lock_page B
														
 
															+		   lock_page B
														
 
															+				   mutex_lock A /* DEADLOCK */
														
 
															+				   unlock_page B held by X
														
 
															+		   unlock_page B
														
 
															+		   mutex_unlock A
														
 
															+				   mutex_unlock A
														
 
															+
														
 
															+   where A and B are different lock classes.
														
 
															+
														
 
															+No, we cannot.
														
 
															+
														
 
															+Example 2:
														
 
															+
														
 
															+   CONTEXT X		   CONTEXT Y
														
 
															+   ---------		   ---------
														
 
															+			   mutex_lock A
														
 
															+   mutex_lock A
														
 
															+			   wait_for_complete B /* DEADLOCK */
														
 
															+   complete B
														
 
															+			   mutex_unlock A
														
 
															+   mutex_unlock A
														
 
															+
														
 
															+   where A is a lock class and B is a completion variable.
														
 
															+
														
 
															+No, we cannot.
														
 
															+
														
 
															+CONCLUSION
														
 
															+
														
 
															+Given the limitation, lockdep cannot detect a deadlock or its
														
 
															+possibility caused by page locks or completions.
														
 
															+
														
 
															+
														
 
															+Relax the limitation
														
 
															+--------------------
														
 
															+
														
 
															+Under the limitation, things to create dependencies are limited to
														
 
															+typical locks. However, synchronization primitives like page locks and
														
 
															+completions, which are allowed to be released in any context, also
														
 
															+create dependencies and can cause a deadlock. So lockdep should track
														
 
															+these locks to do a better job. We have to relax the limitation for
														
 
															+these locks to work with lockdep.
														
 
															+
														
 
															+Detecting dependencies is very important for lockdep to work because
														
 
															+adding a dependency means adding an opportunity to check whether it
														
 
															+causes a deadlock. The more lockdep adds dependencies, the more it
														
 
															+thoroughly works. Thus Lockdep has to do its best to detect and add as
														
 
															+many true dependencies into a graph as possible.
														
 
															+
														
 
															+For example, considering only typical locks, lockdep builds a graph like:
														
 
															+
														
 
															+   A -> B -
														
 
															+           \
														
 
															+            -> E
														
 
															+           /
														
 
															+   C -> D -
														
 
															+
														
 
															+   where A, B,..., E are different lock classes.
														
 
															+
														
 
															+On the other hand, under the relaxation, additional dependencies might
														
 
															+be created and added. Assuming additional 'FX -> C' and 'E -> GX' are
														
 
															+added thanks to the relaxation, the graph will be:
														
 
															+
														
 
															+         A -> B -
														
 
															+                 \
														
 
															+                  -> E -> GX
														
 
															+                 /
														
 
															+   FX -> C -> D -
														
 
															+
														
 
															+   where A, B,..., E, FX and GX are different lock classes, and a suffix
														
 
															+   'X' is added on non-typical locks.
														
 
															+
														
 
															+The latter graph gives us more chances to check circular dependencies
														
 
															+than the former. However, it might suffer performance degradation since
														
 
															+relaxing the limitation, with which design and implementation of lockdep
														
 
															+can be efficient, might introduce inefficiency inevitably. So lockdep
														
 
															+should provide two options, strong detection and efficient detection.
														
 
															+
														
 
															+Choosing efficient detection:
														
 
															+
														
 
															+   Lockdep works with only locks restricted to be released within the
														
 
															+   acquire context. However, lockdep works efficiently.
														
 
															+
														
 
															+Choosing strong detection:
														
 
															+
														
 
															+   Lockdep works with all synchronization primitives. However, lockdep
														
 
															+   suffers performance degradation.
														
 
															+
														
 
															+CONCLUSION
														
 
															+
														
 
															+Relaxing the limitation, lockdep can add additional dependencies giving
														
 
															+additional opportunities to check circular dependencies.
														
 
															+
														
 
															+
														
 
															+============
														
 
															+Crossrelease
														
 
															+============
														
 
															+
														
 
															+Introduce crossrelease
														
 
															+----------------------
														
 
															+
														
 
															+In order to allow lockdep to handle additional dependencies by what
														
 
															+might be released in any context, namely 'crosslock', we have to be able
														
 
															+to identify those created by crosslocks. The proposed 'crossrelease'
														
 
															+feature provoides a way to do that.
														
 
															+
														
 
															+Crossrelease feature has to do:
														
 
															+
														
 
															+   1. Identify dependencies created by crosslocks.
														
 
															+   2. Add the dependencies into a dependency graph.
														
 
															+
														
 
															+That's all. Once a meaningful dependency is added into graph, then
														
 
															+lockdep would work with the graph as it did. The most important thing
														
 
															+crossrelease feature has to do is to correctly identify and add true
														
 
															+dependencies into the global graph.
														
 
															+
														
 
															+A dependency e.g. 'A -> B' can be identified only in the A's release
														
 
															+context because a decision required to identify the dependency can be
														
 
															+made only in the release context. That is to decide whether A can be
														
 
															+released so that a waiter for A can be woken up. It cannot be made in
														
 
															+other than the A's release context.
														
 
															+
														
 
															+It's no matter for typical locks because each acquire context is same as
														
 
															+its release context, thus lockdep can decide whether a lock can be
														
 
															+released in the acquire context. However for crosslocks, lockdep cannot
														
 
															+make the decision in the acquire context but has to wait until the
														
 
															+release context is identified.
														
 
															+
														
 
															+Therefore, deadlocks by crosslocks cannot be detected just when it
														
 
															+happens, because those cannot be identified until the crosslocks are
														
 
															+released. However, deadlock possibilities can be detected and it's very
														
 
															+worth. See 'APPENDIX A' section to check why.
														
 
															+
														
 
															+CONCLUSION
														
 
															+
														
 
															+Using crossrelease feature, lockdep can work with what might be released
														
 
															+in any context, namely crosslock.
														
 
															+
														
 
															+
														
 
															+Introduce commit
														
 
															+----------------
														
 
															+
														
 
															+Since crossrelease defers the work adding true dependencies of
														
 
															+crosslocks until they are actually released, crossrelease has to queue
														
 
															+all acquisitions which might create dependencies with the crosslocks.
														
 
															+Then it identifies dependencies using the queued data in batches at a
														
 
															+proper time. We call it 'commit'.
														
 
															+
														
 
															+There are four types of dependencies:
														
 
															+
														
 
															+1. TT type: 'typical lock A -> typical lock B'
														
 
															+
														
 
															+   Just when acquiring B, lockdep can see it's in the A's release
														
 
															+   context. So the dependency between A and B can be identified
														
 
															+   immediately. Commit is unnecessary.
														
 
															+
														
 
															+2. TC type: 'typical lock A -> crosslock BX'
														
 
															+
														
 
															+   Just when acquiring BX, lockdep can see it's in the A's release
														
 
															+   context. So the dependency between A and BX can be identified
														
 
															+   immediately. Commit is unnecessary, too.
														
 
															+
														
 
															+3. CT type: 'crosslock AX -> typical lock B'
														
 
															+
														
 
															+   When acquiring B, lockdep cannot identify the dependency because
														
 
															+   there's no way to know if it's in the AX's release context. It has
														
 
															+   to wait until the decision can be made. Commit is necessary.
														
 
															+
														
 
															+4. CC type: 'crosslock AX -> crosslock BX'
														
 
															+
														
 
															+   When acquiring BX, lockdep cannot identify the dependency because
														
 
															+   there's no way to know if it's in the AX's release context. It has
														
 
															+   to wait until the decision can be made. Commit is necessary.
														
 
															+   But, handling CC type is not implemented yet. It's a future work.
														
 
															+
														
 
															+Lockdep can work without commit for typical locks, but commit step is
														
 
															+necessary once crosslocks are involved. Introducing commit, lockdep
														
 
															+performs three steps. What lockdep does in each step is:
														
 
															+
														
 
															+1. Acquisition: For typical locks, lockdep does what it originally did
														
 
															+   and queues the lock so that CT type dependencies can be checked using
														
 
															+   it at the commit step. For crosslocks, it saves data which will be
														
 
															+   used at the commit step and increases a reference count for it.
														
 
															+
														
 
															+2. Commit: No action is reauired for typical locks. For crosslocks,
														
 
															+   lockdep adds CT type dependencies using the data saved at the
														
 
															+   acquisition step.
														
 
															+
														
 
															+3. Release: No changes are required for typical locks. When a crosslock
														
 
															+   is released, it decreases a reference count for it.
														
 
															+
														
 
															+CONCLUSION
														
 
															+
														
 
															+Crossrelease introduces commit step to handle dependencies of crosslocks
														
 
															+in batches at a proper time.
														
 
															+
														
 
															+
														
 
															+==============
														
 
															+Implementation
														
 
															+==============
														
 
															+
														
 
															+Data structures
														
 
															+---------------
														
 
															+
														
 
															+Crossrelease introduces two main data structures.
														
 
															+
														
 
															+1. hist_lock
														
 
															+
														
 
															+   This is an array embedded in task_struct, for keeping lock history so
														
 
															+   that dependencies can be added using them at the commit step. Since
														
 
															+   it's local data, it can be accessed locklessly in the owner context.
														
 
															+   The array is filled at the acquisition step and consumed at the
														
 
															+   commit step. And it's managed in circular manner.
														
 
															+
														
 
															+2. cross_lock
														
 
															+
														
 
															+   One per lockdep_map exists. This is for keeping data of crosslocks
														
 
															+   and used at the commit step.
														
 
															+
														
 
															+
														
 
															+How crossrelease works
														
 
															+----------------------
														
 
															+
														
 
															+It's the key of how crossrelease works, to defer necessary works to an
														
 
															+appropriate point in time and perform in at once at the commit step.
														
 
															+Let's take a look with examples step by step, starting from how lockdep
														
 
															+works without crossrelease for typical locks.
														
 
															+
														
 
															+   acquire A /* Push A onto held_locks */
														
 
															+   acquire B /* Push B onto held_locks and add 'A -> B' */
														
 
															+   acquire C /* Push C onto held_locks and add 'B -> C' */
														
 
															+   release C /* Pop C from held_locks */
														
 
															+   release B /* Pop B from held_locks */
														
 
															+   release A /* Pop A from held_locks */
														
 
															+
														
 
															+   where A, B and C are different lock classes.
														
 
															+
														
 
															+   NOTE: This document assumes that readers already understand how
														
 
															+   lockdep works without crossrelease thus omits details. But there's
														
 
															+   one thing to note. Lockdep pretends to pop a lock from held_locks
														
 
															+   when releasing it. But it's subtly different from the original pop
														
 
															+   operation because lockdep allows other than the top to be poped.
														
 
															+
														
 
															+In this case, lockdep adds 'the top of held_locks -> the lock to acquire'
														
 
															+dependency every time acquiring a lock.
														
 
															+
														
 
															+After adding 'A -> B', a dependency graph will be:
														
 
															+
														
 
															+   A -> B
														
 
															+
														
 
															+   where A and B are different lock classes.
														
 
															+
														
 
															+And after adding 'B -> C', the graph will be:
														
 
															+
														
 
															+   A -> B -> C
														
 
															+
														
 
															+   where A, B and C are different lock classes.
														
 
															+
														
 
															+Let's performs commit step even for typical locks to add dependencies.
														
 
															+Of course, commit step is not necessary for them, however, it would work
														
 
															+well because this is a more general way.
														
 
															+
														
 
															+   acquire A
														
 
															+   /*
														
 
															+    * Queue A into hist_locks
														
 
															+    *
														
 
															+    * In hist_locks: A
														
 
															+    * In graph: Empty
														
 
															+    */
														
 
															+
														
 
															+   acquire B
														
 
															+   /*
														
 
															+    * Queue B into hist_locks
														
 
															+    *
														
 
															+    * In hist_locks: A, B
														
 
															+    * In graph: Empty
														
 
															+    */
														
 
															+
														
 
															+   acquire C
														
 
															+   /*
														
 
															+    * Queue C into hist_locks
														
 
															+    *
														
 
															+    * In hist_locks: A, B, C
														
 
															+    * In graph: Empty
														
 
															+    */
														
 
															+
														
 
															+   commit C
														
 
															+   /*
														
 
															+    * Add 'C -> ?'
														
 
															+    * Answer the following to decide '?'
														
 
															+    * What has been queued since acquire C: Nothing
														
 
															+    *
														
 
															+    * In hist_locks: A, B, C
														
 
															+    * In graph: Empty
														
 
															+    */
														
 
															+
														
 
															+   release C
														
 
															+
														
 
															+   commit B
														
 
															+   /*
														
 
															+    * Add 'B -> ?'
														
 
															+    * Answer the following to decide '?'
														
 
															+    * What has been queued since acquire B: C
														
 
															+    *
														
 
															+    * In hist_locks: A, B, C
														
 
															+    * In graph: 'B -> C'
														
 
															+    */
														
 
															+
														
 
															+   release B
														
 
															+
														
 
															+   commit A
														
 
															+   /*
														
 
															+    * Add 'A -> ?'
														
 
															+    * Answer the following to decide '?'
														
 
															+    * What has been queued since acquire A: B, C
														
 
															+    *
														
 
															+    * In hist_locks: A, B, C
														
 
															+    * In graph: 'B -> C', 'A -> B', 'A -> C'
														
 
															+    */
														
 
															+
														
 
															+   release A
														
 
															+
														
 
															+   where A, B and C are different lock classes.
														
 
															+
														
 
															+In this case, dependencies are added at the commit step as described.
														
 
															+
														
 
															+After commits for A, B and C, the graph will be:
														
 
															+
														
 
															+   A -> B -> C
														
 
															+
														
 
															+   where A, B and C are different lock classes.
														
 
															+
														
 
															+   NOTE: A dependency 'A -> C' is optimized out.
														
 
															+
														
 
															+We can see the former graph built without commit step is same as the
														
 
															+latter graph built using commit steps. Of course the former way leads to
														
 
															+earlier finish for building the graph, which means we can detect a
														
 
															+deadlock or its possibility sooner. So the former way would be prefered
														
 
															+when possible. But we cannot avoid using the latter way for crosslocks.
														
 
															+
														
 
															+Let's look at how commit steps work for crosslocks. In this case, the
														
 
															+commit step is performed only on crosslock AX as real. And it assumes
														
 
															+that the AX release context is different from the AX acquire context.
														
 
															+
														
 
															+   BX RELEASE CONTEXT		   BX ACQUIRE CONTEXT
														
 
															+   ------------------		   ------------------
														
 
															+				   acquire A
														
 
															+				   /*
														
 
															+				    * Push A onto held_locks
														
 
															+				    * Queue A into hist_locks
														
 
															+				    *
														
 
															+				    * In held_locks: A
														
 
															+				    * In hist_locks: A
														
 
															+				    * In graph: Empty
														
 
															+				    */
														
 
															+
														
 
															+				   acquire BX
														
 
															+				   /*
														
 
															+				    * Add 'the top of held_locks -> BX'
														
 
															+				    *
														
 
															+				    * In held_locks: A
														
 
															+				    * In hist_locks: A
														
 
															+				    * In graph: 'A -> BX'
														
 
															+				    */
														
 
															+
														
 
															+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
														
 
															+   It must be guaranteed that the following operations are seen after
														
 
															+   acquiring BX globally. It can be done by things like barrier.
														
 
															+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
														
 
															+
														
 
															+   acquire C
														
 
															+   /*
														
 
															+    * Push C onto held_locks
														
 
															+    * Queue C into hist_locks
														
 
															+    *
														
 
															+    * In held_locks: C
														
 
															+    * In hist_locks: C
														
 
															+    * In graph: 'A -> BX'
														
 
															+    */
														
 
															+
														
 
															+   release C
														
 
															+   /*
														
 
															+    * Pop C from held_locks
														
 
															+    *
														
 
															+    * In held_locks: Empty
														
 
															+    * In hist_locks: C
														
 
															+    * In graph: 'A -> BX'
														
 
															+    */
														
 
															+				   acquire D
														
 
															+				   /*
														
 
															+				    * Push D onto held_locks
														
 
															+				    * Queue D into hist_locks
														
 
															+				    * Add 'the top of held_locks -> D'
														
 
															+				    *
														
 
															+				    * In held_locks: A, D
														
 
															+				    * In hist_locks: A, D
														
 
															+				    * In graph: 'A -> BX', 'A -> D'
														
 
															+				    */
														
 
															+   acquire E
														
 
															+   /*
														
 
															+    * Push E onto held_locks
														
 
															+    * Queue E into hist_locks
														
 
															+    *
														
 
															+    * In held_locks: E
														
 
															+    * In hist_locks: C, E
														
 
															+    * In graph: 'A -> BX', 'A -> D'
														
 
															+    */
														
 
															+
														
 
															+   release E
														
 
															+   /*
														
 
															+    * Pop E from held_locks
														
 
															+    *
														
 
															+    * In held_locks: Empty
														
 
															+    * In hist_locks: D, E
														
 
															+    * In graph: 'A -> BX', 'A -> D'
														
 
															+    */
														
 
															+				   release D
														
 
															+				   /*
														
 
															+				    * Pop D from held_locks
														
 
															+				    *
														
 
															+				    * In held_locks: A
														
 
															+				    * In hist_locks: A, D
														
 
															+				    * In graph: 'A -> BX', 'A -> D'
														
 
															+				    */
														
 
															+   commit BX
														
 
															+   /*
														
 
															+    * Add 'BX -> ?'
														
 
															+    * What has been queued since acquire BX: C, E
														
 
															+    *
														
 
															+    * In held_locks: Empty
														
 
															+    * In hist_locks: D, E
														
 
															+    * In graph: 'A -> BX', 'A -> D',
														
 
															+    *           'BX -> C', 'BX -> E'
														
 
															+    */
														
 
															+
														
 
															+   release BX
														
 
															+   /*
														
 
															+    * In held_locks: Empty
														
 
															+    * In hist_locks: D, E
														
 
															+    * In graph: 'A -> BX', 'A -> D',
														
 
															+    *           'BX -> C', 'BX -> E'
														
 
															+    */
														
 
															+				   release A
														
 
															+				   /*
														
 
															+				    * Pop A from held_locks
														
 
															+				    *
														
 
															+				    * In held_locks: Empty
														
 
															+				    * In hist_locks: A, D
														
 
															+				    * In graph: 'A -> BX', 'A -> D',
														
 
															+				    *           'BX -> C', 'BX -> E'
														
 
															+				    */
														
 
															+
														
 
															+   where A, BX, C,..., E are different lock classes, and a suffix 'X' is
														
 
															+   added on crosslocks.
														
 
															+
														
 
															+Crossrelease considers all acquisitions after acqiuring BX are
														
 
															+candidates which might create dependencies with BX. True dependencies
														
 
															+will be determined when identifying the release context of BX. Meanwhile,
														
 
															+all typical locks are queued so that they can be used at the commit step.
														
 
															+And then two dependencies 'BX -> C' and 'BX -> E' are added at the
														
 
															+commit step when identifying the release context.
														
 
															+
														
 
															+The final graph will be, with crossrelease:
														
 
															+
														
 
															+               -> C
														
 
															+              /
														
 
															+       -> BX -
														
 
															+      /       \
														
 
															+   A -         -> E
														
 
															+      \
														
 
															+       -> D
														
 
															+
														
 
															+   where A, BX, C,..., E are different lock classes, and a suffix 'X' is
														
 
															+   added on crosslocks.
														
 
															+
														
 
															+However, the final graph will be, without crossrelease:
														
 
															+
														
 
															+   A -> D
														
 
															+
														
 
															+   where A and D are different lock classes.
														
 
															+
														
 
															+The former graph has three more dependencies, 'A -> BX', 'BX -> C' and
														
 
															+'BX -> E' giving additional opportunities to check if they cause
														
 
															+deadlocks. This way lockdep can detect a deadlock or its possibility
														
 
															+caused by crosslocks.
														
 
															+
														
 
															+CONCLUSION
														
 
															+
														
 
															+We checked how crossrelease works with several examples.
														
 
															+
														
 
															+
														
 
															+=============
														
 
															+Optimizations
														
 
															+=============
														
 
															+
														
 
															+Avoid duplication
														
 
															+-----------------
														
 
															+
														
 
															+Crossrelease feature uses a cache like what lockdep already uses for
														
 
															+dependency chains, but this time it's for caching CT type dependencies.
														
 
															+Once that dependency is cached, the same will never be added again.
														
 
															+
														
 
															+
														
 
															+Lockless for hot paths
														
 
															+----------------------
														
 
															+
														
 
															+To keep all locks for later use at the commit step, crossrelease adopts
														
 
															+a local array embedded in task_struct, which makes access to the data
														
 
															+lockless by forcing it to happen only within the owner context. It's
														
 
															+like how lockdep handles held_locks. Lockless implmentation is important
														
 
															+since typical locks are very frequently acquired and released.
														
 
															+
														
 
															+
														
 
															+=================================================
														
 
															+APPENDIX A: What lockdep does to work aggresively
														
 
															+=================================================
														
 
															+
														
 
															+A deadlock actually occurs when all wait operations creating circular
														
 
															+dependencies run at the same time. Even though they don't, a potential
														
 
															+deadlock exists if the problematic dependencies exist. Thus it's
														
 
															+meaningful to detect not only an actual deadlock but also its potential
														
 
															+possibility. The latter is rather valuable. When a deadlock occurs
														
 
															+actually, we can identify what happens in the system by some means or
														
 
															+other even without lockdep. However, there's no way to detect possiblity
														
 
															+without lockdep unless the whole code is parsed in head. It's terrible.
														
 
															+Lockdep does the both, and crossrelease only focuses on the latter.
														
 
															+
														
 
															+Whether or not a deadlock actually occurs depends on several factors.
														
 
															+For example, what order contexts are switched in is a factor. Assuming
														
 
															+circular dependencies exist, a deadlock would occur when contexts are
														
 
															+switched so that all wait operations creating the dependencies run
														
 
															+simultaneously. Thus to detect a deadlock possibility even in the case
														
 
															+that it has not occured yet, lockdep should consider all possible
														
 
															+combinations of dependencies, trying to:
														
 
															+
														
 
															+1. Use a global dependency graph.
														
 
															+
														
 
															+   Lockdep combines all dependencies into one global graph and uses them,
														
 
															+   regardless of which context generates them or what order contexts are
														
 
															+   switched in. Aggregated dependencies are only considered so they are
														
 
															+   prone to be circular if a problem exists.
														
 
															+
														
 
															+2. Check dependencies between classes instead of instances.
														
 
															+
														
 
															+   What actually causes a deadlock are instances of lock. However,
														
 
															+   lockdep checks dependencies between classes instead of instances.
														
 
															+   This way lockdep can detect a deadlock which has not happened but
														
 
															+   might happen in future by others but the same class.
														
 
															+
														
 
															+3. Assume all acquisitions lead to waiting.
														
 
															+
														
 
															+   Although locks might be acquired without waiting which is essential
														
 
															+   to create dependencies, lockdep assumes all acquisitions lead to
														
 
															+   waiting since it might be true some time or another.
														
 
															+
														
 
															+CONCLUSION
														
 
															+
														
 
															+Lockdep detects not only an actual deadlock but also its possibility,
														
 
															+and the latter is more valuable.
														
 
															+
														
 
															+
														
 
															+==================================================
														
 
															+APPENDIX B: How to avoid adding false dependencies
														
 
															+==================================================
														
 
															+
														
 
															+Remind what a dependency is. A dependency exists if:
														
 
															+
														
 
															+   1. There are two waiters waiting for each event at a given time.
														
 
															+   2. The only way to wake up each waiter is to trigger its event.
														
 
															+   3. Whether one can be woken up depends on whether the other can.
														
 
															+
														
 
															+For example:
														
 
															+
														
 
															+   acquire A
														
 
															+   acquire B /* A dependency 'A -> B' exists */
														
 
															+   release B
														
 
															+   release A
														
 
															+
														
 
															+   where A and B are different lock classes.
														
 
															+
														
 
															+A depedency 'A -> B' exists since:
														
 
															+
														
 
															+   1. A waiter for A and a waiter for B might exist when acquiring B.
														
 
															+   2. Only way to wake up each is to release what it waits for.
														
 
															+   3. Whether the waiter for A can be woken up depends on whether the
														
 
															+      other can. IOW, TASK X cannot release A if it fails to acquire B.
														
 
															+
														
 
															+For another example:
														
 
															+
														
 
															+   TASK X			   TASK Y
														
 
															+   ------			   ------
														
 
															+				   acquire AX
														
 
															+   acquire B /* A dependency 'AX -> B' exists */
														
 
															+   release B
														
 
															+   release AX held by Y
														
 
															+
														
 
															+   where AX and B are different lock classes, and a suffix 'X' is added
														
 
															+   on crosslocks.
														
 
															+
														
 
															+Even in this case involving crosslocks, the same rule can be applied. A
														
 
															+depedency 'AX -> B' exists since:
														
 
															+
														
 
															+   1. A waiter for AX and a waiter for B might exist when acquiring B.
														
 
															+   2. Only way to wake up each is to release what it waits for.
														
 
															+   3. Whether the waiter for AX can be woken up depends on whether the
														
 
															+      other can. IOW, TASK X cannot release AX if it fails to acquire B.
														
 
															+
														
 
															+Let's take a look at more complicated example:
														
 
															+
														
 
															+   TASK X			   TASK Y
														
 
															+   ------			   ------
														
 
															+   acquire B
														
 
															+   release B
														
 
															+   fork Y
														
 
															+				   acquire AX
														
 
															+   acquire C /* A dependency 'AX -> C' exists */
														
 
															+   release C
														
 
															+   release AX held by Y
														
 
															+
														
 
															+   where AX, B and C are different lock classes, and a suffix 'X' is
														
 
															+   added on crosslocks.
														
 
															+
														
 
															+Does a dependency 'AX -> B' exist? Nope.
														
 
															+
														
 
															+Two waiters are essential to create a dependency. However, waiters for
														
 
															+AX and B to create 'AX -> B' cannot exist at the same time in this
														
 
															+example. Thus the dependency 'AX -> B' cannot be created.
														
 
															+
														
 
															+It would be ideal if the full set of true ones can be considered. But
														
 
															+we can ensure nothing but what actually happened. Relying on what
														
 
															+actually happens at runtime, we can anyway add only true ones, though
														
 
															+they might be a subset of true ones. It's similar to how lockdep works
														
 
															+for typical locks. There might be more true dependencies than what
														
 
															+lockdep has detected in runtime. Lockdep has no choice but to rely on
														
 
															+what actually happens. Crossrelease also relies on it.
														
 
															+
														
 
															+CONCLUSION
														
 
															+
														
 
															+Relying on what actually happens, lockdep can avoid adding false
														
 
															+dependencies.
														
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -498,11 +498,11 @@ And a couple of implicit varieties:
 
															      This means that ACQUIRE acts as a minimal "acquire" operation and
														
 
															      RELEASE acts as a minimal "release" operation.
														
 
															-A subset of the atomic operations described in core-api/atomic_ops.rst have
														
 
															-ACQUIRE and RELEASE variants in addition to fully-ordered and relaxed (no
														
 
															-barrier semantics) definitions.  For compound atomics performing both a load
														
 
															-and a store, ACQUIRE semantics apply only to the load and RELEASE semantics
														
 
															-apply only to the store portion of the operation.
														
 
															+A subset of the atomic operations described in atomic_t.txt have ACQUIRE and
														
 
															+RELEASE variants in addition to fully-ordered and relaxed (no barrier
														
 
															+semantics) definitions.  For compound atomics performing both a load and a
														
 
															+store, ACQUIRE semantics apply only to the load and RELEASE semantics apply
														
 
															+only to the store portion of the operation.
														
 
															 Memory barriers are only required where there's a possibility of interaction
														
 
															 between two CPUs or between a CPU and a device.  If it can be guaranteed that
														
@@ -1883,8 +1883,7 @@ There are some more advanced barrier functions:
 
															      This makes sure that the death mark on the object is perceived to be set
														
 
															      *before* the reference counter is decremented.
														
 
															-     See Documentation/core-api/atomic_ops.rst for more information.  See the
														
 
															-     "Atomic operations" subsection for information on where to use these.
														
 
															+     See Documentation/atomic_{t,bitops}.txt for more information.
														
 
															  (*) lockless_dereference();
														
@@ -1989,10 +1988,7 @@ for each construct.  These operations all imply certain barriers:
 
															      ACQUIRE operation has completed.
														
 
															      Memory operations issued before the ACQUIRE may be completed after
														
 
															-     the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
														
 
															-     combined with a following ACQUIRE, orders prior stores against
														
 
															-     subsequent loads and stores.  Note that this is weaker than smp_mb()!
														
 
															-     The smp_mb__before_spinlock() primitive is free on many architectures.
														
 
															+     the ACQUIRE operation has completed.
														
 
															  (2) RELEASE operation implication:
														
@@ -2510,88 +2506,7 @@ operations are noted specially as some of them imply full memory barriers and
 
															 some don't, but they're very heavily relied on as a group throughout the
														
 
															 kernel.
														
 
															-Any atomic operation that modifies some state in memory and returns information
														
 
															-about the state (old or new) implies an SMP-conditional general memory barrier
														
 
															-(smp_mb()) on each side of the actual operation (with the exception of
														
 
															-explicit lock operations, described later).  These include:
														
 
															-
														
 
															-	xchg();
														
 
															-	atomic_xchg();			atomic_long_xchg();
														
 
															-	atomic_inc_return();		atomic_long_inc_return();
														
 
															-	atomic_dec_return();		atomic_long_dec_return();
														
 
															-	atomic_add_return();		atomic_long_add_return();
														
 
															-	atomic_sub_return();		atomic_long_sub_return();
														
 
															-	atomic_inc_and_test();		atomic_long_inc_and_test();
														
 
															-	atomic_dec_and_test();		atomic_long_dec_and_test();
														
 
															-	atomic_sub_and_test();		atomic_long_sub_and_test();
														
 
															-	atomic_add_negative();		atomic_long_add_negative();
														
 
															-	test_and_set_bit();
														
 
															-	test_and_clear_bit();
														
 
															-	test_and_change_bit();
														
 
															-
														
 
															-	/* when succeeds */
														
 
															-	cmpxchg();
														
 
															-	atomic_cmpxchg();		atomic_long_cmpxchg();
														
 
															-	atomic_add_unless();		atomic_long_add_unless();
														
 
															-
														
 
															-These are used for such things as implementing ACQUIRE-class and RELEASE-class
														
 
															-operations and adjusting reference counters towards object destruction, and as
														
 
															-such the implicit memory barrier effects are necessary.
														
 
															-
														
 
															-
														
 
															-The following operations are potential problems as they do _not_ imply memory
														
 
															-barriers, but might be used for implementing such things as RELEASE-class
														
 
															-operations:
														
 
															-
														
 
															-	atomic_set();
														
 
															-	set_bit();
														
 
															-	clear_bit();
														
 
															-	change_bit();
														
 
															-
														
 
															-With these the appropriate explicit memory barrier should be used if necessary
														
 
															-(smp_mb__before_atomic() for instance).
														
 
															-
														
 
															-
														
 
															-The following also do _not_ imply memory barriers, and so may require explicit
														
 
															-memory barriers under some circumstances (smp_mb__before_atomic() for
														
 
															-instance):
														
 
															-
														
 
															-	atomic_add();
														
 
															-	atomic_sub();
														
 
															-	atomic_inc();
														
 
															-	atomic_dec();
														
 
															-
														
 
															-If they're used for statistics generation, then they probably don't need memory
														
 
															-barriers, unless there's a coupling between statistical data.
														
 
															-
														
 
															-If they're used for reference counting on an object to control its lifetime,
														
 
															-they probably don't need memory barriers because either the reference count
														
 
															-will be adjusted inside a locked section, or the caller will already hold
														
 
															-sufficient references to make the lock, and thus a memory barrier unnecessary.
														
 
															-
														
 
															-If they're used for constructing a lock of some description, then they probably
														
 
															-do need memory barriers as a lock primitive generally has to do things in a
														
 
															-specific order.
														
 
															-
														
 
															-Basically, each usage case has to be carefully considered as to whether memory
														
 
															-barriers are needed or not.
														
 
															-
														
 
															-The following operations are special locking primitives:
														
 
															-
														
 
															-	test_and_set_bit_lock();
														
 
															-	clear_bit_unlock();
														
 
															-	__clear_bit_unlock();
														
 
															-
														
 
															-These implement ACQUIRE-class and RELEASE-class operations.  These should be
														
 
															-used in preference to other operations when implementing locking primitives,
														
 
															-because their implementations can be optimised on many architectures.
														
 
															-
														
 
															-[!] Note that special memory barrier primitives are available for these
														
 
															-situations because on some CPUs the atomic instructions used imply full memory
														
 
															-barriers, and so barrier instructions are superfluous in conjunction with them,
														
 
															-and in such cases the special barrier primitives will be no-ops.
														
 
															-
														
 
															-See Documentation/core-api/atomic_ops.rst for more information.
														
 
															+See Documentation/atomic_t.txt for more information.
														
 
															 ACCESSING DEVICES
														
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -149,6 +149,26 @@ static_branch_inc(), will change the branch back to true. Likewise, if the
 
															 key is initialized false, a 'static_branch_inc()', will change the branch to
														
 
															 true. And then a 'static_branch_dec()', will again make the branch false.
														
 
															+The state and the reference count can be retrieved with 'static_key_enabled()'
														
 
															+and 'static_key_count()'.  In general, if you use these functions, they
														
 
															+should be protected with the same mutex used around the enable/disable
														
 
															+or increment/decrement function.
														
 
															+
														
 
															+Note that switching branches results in some locks being taken,
														
 
															+particularly the CPU hotplug lock (in order to avoid races against
														
 
															+CPUs being brought in the kernel whilst the kernel is getting
														
 
															+patched). Calling the static key API from within a hotplug notifier is
														
 
															+thus a sure deadlock recipe. In order to still allow use of the
														
 
															+functionnality, the following functions are provided:
														
 
															+
														
 
															+	static_key_enable_cpuslocked()
														
 
															+	static_key_disable_cpuslocked()
														
 
															+	static_branch_enable_cpuslocked()
														
 
															+	static_branch_disable_cpuslocked()
														
 
															+
														
 
															+These functions are *not* general purpose, and must only be used when
														
 
															+you really know that you're in the above context, and no other.
														
 
															+
														
 
															 Where an array of keys is required, it can be defined as::
														
 
															 	DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
														
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -1956,10 +1956,7 @@ MMIO 쓰기 배리어
 
															      뒤에 완료됩니다.
														
 
															      ACQUIRE 앞에서 요청된 메모리 오퍼레이션은 ACQUIRE 오퍼레이션이 완료된 후에
														
 
															-     완료될 수 있습니다.  smp_mb__before_spinlock() 뒤에 ACQUIRE 가 실행되는
														
 
															-     코드 블록은 블록 앞의 스토어를 블록 뒤의 로드와 스토어에 대해 순서
														
 
															-     맞춥니다.  이건 smp_mb() 보다 완화된 것임을 기억하세요!  많은 아키텍쳐에서
														
 
															-     smp_mb__before_spinlock() 은 사실 아무일도 하지 않습니다.
														
 
															+     완료될 수 있습니다.
														
 
															  (2) RELEASE 오퍼레이션의 영향:
														
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -931,6 +931,18 @@ config STRICT_MODULE_RWX
 
															 config ARCH_WANT_RELAX_ORDER
														
 
															 	bool
														
 
															+config ARCH_HAS_REFCOUNT
														
 
															+	bool
														
 
															+	help
														
 
															+	  An architecture selects this when it has implemented refcount_t
														
 
															+	  using open coded assembly primitives that provide an optimized
														
 
															+	  refcount_t implementation, possibly at the expense of some full
														
 
															+	  refcount state checks of CONFIG_REFCOUNT_FULL=y.
														
 
															+
														
 
															+	  The refcount overflow check behavior, however, must be retained.
														
 
															+	  Catching overflows is the primary security concern for protecting
														
 
															+	  against bugs in reference counts.
														
 
															+
														
 
															 config REFCOUNT_FULL
														
 
															 	bool "Perform full reference count validation at the expense of speed"
														
 
															 	help
														
--- a/arch/alpha/include/asm/futex.h
+++ b/arch/alpha/include/asm/futex.h
@@ -25,18 +25,10 @@
 
															 	:	"r" (uaddr), "r"(oparg)				\
														
 
															 	:	"memory")
														
 
															-static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
														
 
															+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
														
 
															+		u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															 	pagefault_disable();
														
@@ -62,17 +54,9 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
														
 
															-		default: ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -123,6 +123,8 @@ static inline void atomic_set(atomic_t *v, int i)
 
															 	atomic_ops_unlock(flags);
														
 
															 }
														
 
															+#define atomic_set_release(v, i)	atomic_set((v), (i))
														
 
															+
														
 
															 #endif
														
 
															 /*
														
--- a/arch/arc/include/asm/futex.h
+++ b/arch/arc/include/asm/futex.h
@@ -73,20 +73,11 @@
 
															 #endif
														
 
															-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
														
 
															+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
														
 
															+		u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
														
 
															-		return -EFAULT;
														
 
															-
														
 
															 #ifndef CONFIG_ARC_HAS_LLSC
														
 
															 	preempt_disable();	/* to guarantee atomic r-m-w of futex op */
														
 
															 #endif
														
@@ -118,30 +109,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
															 	preempt_enable();
														
 
															 #endif
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ:
														
 
															-			ret = (oldval == cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_NE:
														
 
															-			ret = (oldval != cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_LT:
														
 
															-			ret = (oldval < cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_GE:
														
 
															-			ret = (oldval >= cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_LE:
														
 
															-			ret = (oldval <= cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_GT:
														
 
															-			ret = (oldval > cmparg);
														
 
															-			break;
														
 
															-		default:
														
 
															-			ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -128,20 +128,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 
															 #endif /* !SMP */
														
 
															 static inline int
														
 
															-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
														
 
															+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret, tmp;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															-
														
 
															 #ifndef CONFIG_SMP
														
 
															 	preempt_disable();
														
 
															 #endif
														
@@ -172,17 +162,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 
															 	preempt_enable();
														
 
															 #endif
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
														
 
															-		default: ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -48,20 +48,10 @@ do {									\
 
															 } while (0)
														
 
															 static inline int
														
 
															-futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
														
 
															+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (int)(encoded_op << 8) >> 20;
														
 
															-	int cmparg = (int)(encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret, tmp;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1U << (oparg & 0x1f);
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															-
														
 
															 	pagefault_disable();
														
 
															 	switch (op) {
														
@@ -91,17 +81,9 @@ futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
														
 
															-		default: ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -310,14 +310,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 
															 #define arch_read_relax(lock)	cpu_relax()
														
 
															 #define arch_write_relax(lock)	cpu_relax()
														
 
															-/*
														
 
															- * Accesses appearing in program order before a spin_lock() operation
														
 
															- * can be reordered with accesses inside the critical section, by virtue
														
 
															- * of arch_spin_lock being constructed using acquire semantics.
														
 
															- *
														
 
															- * In cases where this is problematic (e.g. try_to_wake_up), an
														
 
															- * smp_mb__before_spinlock() can restore the required ordering.
														
 
															- */
														
 
															-#define smp_mb__before_spinlock()	smp_mb()
														
 
															+/* See include/linux/spinlock.h */
														
 
															+#define smp_mb__after_spinlock()	smp_mb()
														
 
															 #endif /* __ASM_SPINLOCK_H */
														
--- a/arch/frv/include/asm/futex.h
+++ b/arch/frv/include/asm/futex.h
@@ -7,7 +7,8 @@
 
															 #include <asm/errno.h>
														
 
															 #include <linux/uaccess.h>
														
 
															-extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr);
														
 
															+extern int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
														
 
															+		u32 __user *uaddr);
														
 
															 static inline int
														
 
															 futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
														
--- a/arch/frv/kernel/futex.c
+++ b/arch/frv/kernel/futex.c
@@ -186,20 +186,10 @@ static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, int *_o
 
															 /*
														
 
															  * do the futex operations
														
 
															  */
														
 
															-int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
														
 
															+int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															-
														
 
															 	pagefault_disable();
														
 
															 	switch (op) {
														
@@ -225,18 +215,9 @@ int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
														
 
															-		default: ret = -ENOSYS; break;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															 	return ret;
														
 
															-} /* end futex_atomic_op_inuser() */
														
 
															+} /* end arch_futex_atomic_op_inuser() */
														
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -42,6 +42,8 @@ static inline void atomic_set(atomic_t *v, int new)
 
															 	);
														
 
															 }
														
 
															+#define atomic_set_release(v, i)	atomic_set((v), (i))
														
 
															+
														
 
															 /**
														
 
															  * atomic_read - reads a word, atomically
														
 
															  * @v: pointer to atomic value
														
--- a/arch/hexagon/include/asm/futex.h
+++ b/arch/hexagon/include/asm/futex.h
@@ -31,18 +31,9 @@
 
															 static inline int
														
 
															-futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
														
 
															+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
														
 
															-		return -EFAULT;
														
 
															 	pagefault_disable();
														
@@ -72,30 +63,9 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ:
														
 
															-			ret = (oldval == cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_NE:
														
 
															-			ret = (oldval != cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_LT:
														
 
															-			ret = (oldval < cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_GE:
														
 
															-			ret = (oldval >= cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_LE:
														
 
															-			ret = (oldval <= cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_GT:
														
 
															-			ret = (oldval > cmparg);
														
 
															-			break;
														
 
															-		default:
														
 
															-			ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/ia64/include/asm/futex.h
+++ b/arch/ia64/include/asm/futex.h
@@ -45,18 +45,9 @@ do {									\
 
															 } while (0)
														
 
															 static inline int
														
 
															-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
														
 
															+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															 	pagefault_disable();
														
@@ -84,17 +75,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
														
 
															-		default: ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -37,6 +37,8 @@ static inline int atomic_set(atomic_t *v, int i)
 
															 	return i;
														
 
															 }
														
 
															+#define atomic_set_release(v, i) atomic_set((v), (i))
														
 
															+
														
 
															 #define ATOMIC_OP(op, c_op)						\
														
 
															 static inline void atomic_##op(int i, atomic_t *v)			\
														
 
															 {									\
														
--- a/arch/microblaze/include/asm/futex.h
+++ b/arch/microblaze/include/asm/futex.h
@@ -29,18 +29,9 @@
 
															 })
														
 
															 static inline int
														
 
															-futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
														
 
															+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															 	pagefault_disable();
														
@@ -66,30 +57,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ:
														
 
															-			ret = (oldval == cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_NE:
														
 
															-			ret = (oldval != cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_LT:
														
 
															-			ret = (oldval < cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_GE:
														
 
															-			ret = (oldval >= cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_LE:
														
 
															-			ret = (oldval <= cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_GT:
														
 
															-			ret = (oldval > cmparg);
														
 
															-			break;
														
 
															-		default:
														
 
															-			ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -83,18 +83,9 @@
 
															 }
														
 
															 static inline int
														
 
															-futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
														
 
															+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															 	pagefault_disable();
														
@@ -125,17 +116,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
														
 
															-		default: ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -648,12 +648,12 @@ EXPORT_SYMBOL(flush_tlb_one);
 
															 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
														
 
															 static DEFINE_PER_CPU(atomic_t, tick_broadcast_count);
														
 
															-static DEFINE_PER_CPU(struct call_single_data, tick_broadcast_csd);
														
 
															+static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd);
														
 
															 void tick_broadcast(const struct cpumask *mask)
														
 
															 {
														
 
															 	atomic_t *count;
														
 
															-	struct call_single_data *csd;
														
 
															+	call_single_data_t *csd;
														
 
															 	int cpu;
														
 
															 	for_each_cpu(cpu, mask) {
														
@@ -674,7 +674,7 @@ static void tick_broadcast_callee(void *info)
 
															 static int __init tick_broadcast_init(void)
														
 
															 {
														
 
															-	struct call_single_data *csd;
														
 
															+	call_single_data_t *csd;
														
 
															 	int cpu;
														
 
															 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
														
--- a/arch/openrisc/include/asm/futex.h
+++ b/arch/openrisc/include/asm/futex.h
@@ -30,20 +30,10 @@
 
															 })
														
 
															 static inline int
														
 
															-futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
														
 
															+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															-
														
 
															 	pagefault_disable();
														
 
															 	switch (op) {
														
@@ -68,30 +58,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ:
														
 
															-			ret = (oldval == cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_NE:
														
 
															-			ret = (oldval != cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_LT:
														
 
															-			ret = (oldval < cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_GE:
														
 
															-			ret = (oldval >= cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_LE:
														
 
															-			ret = (oldval <= cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_GT:
														
 
															-			ret = (oldval > cmparg);
														
 
															-			break;
														
 
															-		default:
														
 
															-			ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -65,6 +65,8 @@ static __inline__ void atomic_set(atomic_t *v, int i)
 
															 	_atomic_spin_unlock_irqrestore(v, flags);
														
 
															 }
														
 
															+#define atomic_set_release(v, i)	atomic_set((v), (i))
														
 
															+
														
 
															 static __inline__ int atomic_read(const atomic_t *v)
														
 
															 {
														
 
															 	return READ_ONCE((v)->counter);
														
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -32,22 +32,12 @@ _futex_spin_unlock_irqrestore(u32 __user *uaddr, unsigned long int *flags)
 
															 }
														
 
															 static inline int
														
 
															-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
														
 
															+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
														
 
															 {
														
 
															 	unsigned long int flags;
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval, ret;
														
 
															 	u32 tmp;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr)))
														
 
															-		return -EFAULT;
														
 
															-
														
 
															 	_futex_spin_lock_irqsave(uaddr, &flags);
														
 
															 	pagefault_disable();
														
@@ -85,17 +75,9 @@ out_pagefault_enable:
 
															 	pagefault_enable();
														
 
															 	_futex_spin_unlock_irqrestore(uaddr, &flags);
														
 
															-	if (ret == 0) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
														
 
															-		default: ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -74,13 +74,6 @@ do {									\
 
															 	___p1;								\
														
 
															 })
														
 
															-/*
														
 
															- * This must resolve to hwsync on SMP for the context switch path.
														
 
															- * See _switch, and core scheduler context switch memory ordering
														
 
															- * comments.
														
 
															- */
														
 
															-#define smp_mb__before_spinlock()   smp_mb()
														
 
															-
														
 
															 #include <asm-generic/barrier.h>
														
 
															 #endif /* _ASM_POWERPC_BARRIER_H */
														
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -29,18 +29,10 @@
 
															 	: "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
														
 
															 	: "cr0", "memory")
														
 
															-static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
														
 
															+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
														
 
															+		u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															 	pagefault_disable();
														
@@ -66,17 +58,9 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
														
 
															-		default: ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -309,5 +309,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 
															 #define arch_read_relax(lock)	__rw_yield(lock)
														
 
															 #define arch_write_relax(lock)	__rw_yield(lock)
														
 
															+/* See include/linux/spinlock.h */
														
 
															+#define smp_mb__after_spinlock()   smp_mb()
														
 
															+
														
 
															 #endif /* __KERNEL__ */
														
 
															 #endif /* __ASM_SPINLOCK_H */
														
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -21,17 +21,12 @@
 
															 		: "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
														
 
															 		  "m" (*uaddr) : "cc");
														
 
															-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
														
 
															+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
														
 
															+		u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, newval, ret;
														
 
															 	load_kernel_asce();
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															 	pagefault_disable();
														
 
															 	switch (op) {
														
@@ -60,17 +55,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
															 	}
														
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
														
 
															-		default: ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -27,21 +27,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 
															 	return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
														
 
															 }
														
 
															-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
														
 
															+static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
														
 
															+		u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	u32 oparg = (encoded_op << 8) >> 20;
														
 
															-	u32 cmparg = (encoded_op << 20) >> 20;
														
 
															 	u32 oldval, newval, prev;
														
 
															 	int ret;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															-
														
 
															 	pagefault_disable();
														
 
															 	do {
														
@@ -80,17 +71,8 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = ((int)oldval < (int)cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = ((int)oldval >= (int)cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = ((int)oldval <= (int)cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = ((int)oldval > (int)cmparg); break;
														
 
															-		default: ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -29,6 +29,8 @@ int atomic_xchg(atomic_t *, int);
 
															 int __atomic_add_unless(atomic_t *, int, int);
														
 
															 void atomic_set(atomic_t *, int);
														
 
															+#define atomic_set_release(v, i)	atomic_set((v), (i))
														
 
															+
														
 
															 #define atomic_read(v)          ACCESS_ONCE((v)->counter)
														
 
															 #define atomic_add(i, v)	((void)atomic_add_return( (int)(i), (v)))
														
--- a/arch/sparc/include/asm/futex_64.h
+++ b/arch/sparc/include/asm/futex_64.h
@@ -29,22 +29,14 @@
 
															 	: "r" (uaddr), "r" (oparg), "i" (-EFAULT)	\
														
 
															 	: "memory")
														
 
															-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
														
 
															+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
														
 
															+		u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret, tem;
														
 
															-	if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
														
 
															-		return -EFAULT;
														
 
															 	if (unlikely((((unsigned long) uaddr) & 0x3UL)))
														
 
															 		return -EINVAL;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															 	pagefault_disable();
														
 
															 	switch (op) {
														
@@ -69,17 +61,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
														
 
															-		default: ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -101,6 +101,8 @@ static inline void atomic_set(atomic_t *v, int n)
 
															 	_atomic_xchg(&v->counter, n);
														
 
															 }
														
 
															+#define atomic_set_release(v, i)	atomic_set((v), (i))
														
 
															+
														
 
															 /* A 64bit atomic type */
														
 
															 typedef struct {
														
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -106,12 +106,9 @@
 
															 	lock = __atomic_hashed_lock((int __force *)uaddr)
														
 
															 #endif
														
 
															-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
														
 
															+static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
														
 
															+		u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int uninitialized_var(val), ret;
														
 
															 	__futex_prolog();
														
@@ -119,12 +116,6 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
															 	/* The 32-bit futex code makes this assumption, so validate it here. */
														
 
															 	BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int));
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															-
														
 
															 	pagefault_disable();
														
 
															 	switch (op) {
														
 
															 	case FUTEX_OP_SET:
														
@@ -148,30 +139,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
															 	}
														
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ:
														
 
															-			ret = (val == cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_NE:
														
 
															-			ret = (val != cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_LT:
														
 
															-			ret = (val < cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_GE:
														
 
															-			ret = (val >= cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_LE:
														
 
															-			ret = (val <= cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_GT:
														
 
															-			ret = (val > cmparg);
														
 
															-			break;
														
 
															-		default:
														
 
															-			ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = val;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,6 +55,8 @@ config X86
 
															 	select ARCH_HAS_KCOV			if X86_64
														
 
															 	select ARCH_HAS_MMIO_FLUSH
														
 
															 	select ARCH_HAS_PMEM_API		if X86_64
														
 
															+	# Causing hangs/crashes, see the commit that added this change for details.
														
 
															+	select ARCH_HAS_REFCOUNT		if BROKEN
														
 
															 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
														
 
															 	select ARCH_HAS_SET_MEMORY
														
 
															 	select ARCH_HAS_SG_CHAIN
														
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -74,6 +74,9 @@
 
															 # define _ASM_EXTABLE_EX(from, to)				\
														
 
															 	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
														
 
															+# define _ASM_EXTABLE_REFCOUNT(from, to)			\
														
 
															+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
														
 
															+
														
 
															 # define _ASM_NOKPROBE(entry)					\
														
 
															 	.pushsection "_kprobe_blacklist","aw" ;			\
														
 
															 	_ASM_ALIGN ;						\
														
@@ -123,6 +126,9 @@
 
															 # define _ASM_EXTABLE_EX(from, to)				\
														
 
															 	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
														
 
															+# define _ASM_EXTABLE_REFCOUNT(from, to)			\
														
 
															+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
														
 
															+
														
 
															 /* For C file, we already have NOKPROBE_SYMBOL macro */
														
 
															 #endif
														
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -197,35 +197,56 @@ static inline int atomic_xchg(atomic_t *v, int new)
 
															 	return xchg(&v->counter, new);
														
 
															 }
														
 
															-#define ATOMIC_OP(op)							\
														
 
															-static inline void atomic_##op(int i, atomic_t *v)			\
														
 
															-{									\
														
 
															-	asm volatile(LOCK_PREFIX #op"l %1,%0"				\
														
 
															-			: "+m" (v->counter)				\
														
 
															-			: "ir" (i)					\
														
 
															-			: "memory");					\
														
 
															+static inline void atomic_and(int i, atomic_t *v)
														
 
															+{
														
 
															+	asm volatile(LOCK_PREFIX "andl %1,%0"
														
 
															+			: "+m" (v->counter)
														
 
															+			: "ir" (i)
														
 
															+			: "memory");
														
 
															+}
														
 
															+
														
 
															+static inline int atomic_fetch_and(int i, atomic_t *v)
														
 
															+{
														
 
															+	int val = atomic_read(v);
														
 
															+
														
 
															+	do { } while (!atomic_try_cmpxchg(v, &val, val & i));
														
 
															+
														
 
															+	return val;
														
 
															 }
														
 
															-#define ATOMIC_FETCH_OP(op, c_op)					\
														
 
															-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
														
 
															-{									\
														
 
															-	int val = atomic_read(v);					\
														
 
															-	do {								\
														
 
															-	} while (!atomic_try_cmpxchg(v, &val, val c_op i));		\
														
 
															-	return val;							\
														
 
															+static inline void atomic_or(int i, atomic_t *v)
														
 
															+{
														
 
															+	asm volatile(LOCK_PREFIX "orl %1,%0"
														
 
															+			: "+m" (v->counter)
														
 
															+			: "ir" (i)
														
 
															+			: "memory");
														
 
															 }
														
 
															-#define ATOMIC_OPS(op, c_op)						\
														
 
															-	ATOMIC_OP(op)							\
														
 
															-	ATOMIC_FETCH_OP(op, c_op)
														
 
															+static inline int atomic_fetch_or(int i, atomic_t *v)
														
 
															+{
														
 
															+	int val = atomic_read(v);
														
 
															-ATOMIC_OPS(and, &)
														
 
															-ATOMIC_OPS(or , |)
														
 
															-ATOMIC_OPS(xor, ^)
														
 
															+	do { } while (!atomic_try_cmpxchg(v, &val, val | i));
														
 
															-#undef ATOMIC_OPS
														
 
															-#undef ATOMIC_FETCH_OP
														
 
															-#undef ATOMIC_OP
														
 
															+	return val;
														
 
															+}
														
 
															+
														
 
															+static inline void atomic_xor(int i, atomic_t *v)
														
 
															+{
														
 
															+	asm volatile(LOCK_PREFIX "xorl %1,%0"
														
 
															+			: "+m" (v->counter)
														
 
															+			: "ir" (i)
														
 
															+			: "memory");
														
 
															+}
														
 
															+
														
 
															+static inline int atomic_fetch_xor(int i, atomic_t *v)
														
 
															+{
														
 
															+	int val = atomic_read(v);
														
 
															+
														
 
															+	do { } while (!atomic_try_cmpxchg(v, &val, val ^ i));
														
 
															+
														
 
															+	return val;
														
 
															+}
														
 
															 /**
														
 
															  * __atomic_add_unless - add unless the number is already a given value
														
@@ -239,10 +260,12 @@ ATOMIC_OPS(xor, ^)
 
															 static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
														
 
															 {
														
 
															 	int c = atomic_read(v);
														
 
															+
														
 
															 	do {
														
 
															 		if (unlikely(c == u))
														
 
															 			break;
														
 
															 	} while (!atomic_try_cmpxchg(v, &c, c + a));
														
 
															+
														
 
															 	return c;
														
 
															 }
														
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -312,37 +312,70 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 
															 #undef alternative_atomic64
														
 
															 #undef __alternative_atomic64
														
 
															-#define ATOMIC64_OP(op, c_op)						\
														
 
															-static inline void atomic64_##op(long long i, atomic64_t *v)		\
														
 
															-{									\
														
 
															-	long long old, c = 0;						\
														
 
															-	while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c)		\
														
 
															-		c = old;						\
														
 
															+static inline void atomic64_and(long long i, atomic64_t *v)
														
 
															+{
														
 
															+	long long old, c = 0;
														
 
															+
														
 
															+	while ((old = atomic64_cmpxchg(v, c, c & i)) != c)
														
 
															+		c = old;
														
 
															 }
														
 
															-#define ATOMIC64_FETCH_OP(op, c_op)					\
														
 
															-static inline long long atomic64_fetch_##op(long long i, atomic64_t *v)	\
														
 
															-{									\
														
 
															-	long long old, c = 0;						\
														
 
															-	while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c)		\
														
 
															-		c = old;						\
														
 
															-	return old;							\
														
 
															+static inline long long atomic64_fetch_and(long long i, atomic64_t *v)
														
 
															+{
														
 
															+	long long old, c = 0;
														
 
															+
														
 
															+	while ((old = atomic64_cmpxchg(v, c, c & i)) != c)
														
 
															+		c = old;
														
 
															+
														
 
															+	return old;
														
 
															 }
														
 
															-ATOMIC64_FETCH_OP(add, +)
														
 
															+static inline void atomic64_or(long long i, atomic64_t *v)
														
 
															+{
														
 
															+	long long old, c = 0;
														
 
															-#define atomic64_fetch_sub(i, v)	atomic64_fetch_add(-(i), (v))
														
 
															+	while ((old = atomic64_cmpxchg(v, c, c | i)) != c)
														
 
															+		c = old;
														
 
															+}
														
 
															+
														
 
															+static inline long long atomic64_fetch_or(long long i, atomic64_t *v)
														
 
															+{
														
 
															+	long long old, c = 0;
														
 
															+
														
 
															+	while ((old = atomic64_cmpxchg(v, c, c | i)) != c)
														
 
															+		c = old;
														
 
															+
														
 
															+	return old;
														
 
															+}
														
 
															-#define ATOMIC64_OPS(op, c_op)						\
														
 
															-	ATOMIC64_OP(op, c_op)						\
														
 
															-	ATOMIC64_FETCH_OP(op, c_op)
														
 
															+static inline void atomic64_xor(long long i, atomic64_t *v)
														
 
															+{
														
 
															+	long long old, c = 0;
														
 
															+
														
 
															+	while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c)
														
 
															+		c = old;
														
 
															+}
														
 
															-ATOMIC64_OPS(and, &)
														
 
															-ATOMIC64_OPS(or, |)
														
 
															-ATOMIC64_OPS(xor, ^)
														
 
															+static inline long long atomic64_fetch_xor(long long i, atomic64_t *v)
														
 
															+{
														
 
															+	long long old, c = 0;
														
 
															+
														
 
															+	while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c)
														
 
															+		c = old;
														
 
															+
														
 
															+	return old;
														
 
															+}
														
 
															-#undef ATOMIC64_OPS
														
 
															-#undef ATOMIC64_FETCH_OP
														
 
															-#undef ATOMIC64_OP
														
 
															+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
														
 
															+{
														
 
															+	long long old, c = 0;
														
 
															+
														
 
															+	while ((old = atomic64_cmpxchg(v, c, c + i)) != c)
														
 
															+		c = old;
														
 
															+
														
 
															+	return old;
														
 
															+}
														
 
															+
														
 
															+#define atomic64_fetch_sub(i, v)	atomic64_fetch_add(-(i), (v))
														
 
															 #endif /* _ASM_X86_ATOMIC64_32_H */
														
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -177,7 +177,7 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
 
															 }
														
 
															 #define atomic64_try_cmpxchg atomic64_try_cmpxchg
														
 
															-static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new)
														
 
															+static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
														
 
															 {
														
 
															 	return try_cmpxchg(&v->counter, old, new);
														
 
															 }
														
@@ -198,7 +198,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
 
															  */
														
 
															 static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
														
 
															 {
														
 
															-	long c = atomic64_read(v);
														
 
															+	s64 c = atomic64_read(v);
														
 
															 	do {
														
 
															 		if (unlikely(c == u))
														
 
															 			return false;
														
@@ -217,7 +217,7 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
 
															  */
														
 
															 static inline long atomic64_dec_if_positive(atomic64_t *v)
														
 
															 {
														
 
															-	long dec, c = atomic64_read(v);
														
 
															+	s64 dec, c = atomic64_read(v);
														
 
															 	do {
														
 
															 		dec = c - 1;
														
 
															 		if (unlikely(dec < 0))
														
@@ -226,34 +226,55 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 
															 	return dec;
														
 
															 }
														
 
															-#define ATOMIC64_OP(op)							\
														
 
															-static inline void atomic64_##op(long i, atomic64_t *v)			\
														
 
															-{									\
														
 
															-	asm volatile(LOCK_PREFIX #op"q %1,%0"				\
														
 
															-			: "+m" (v->counter)				\
														
 
															-			: "er" (i)					\
														
 
															-			: "memory");					\
														
 
															+static inline void atomic64_and(long i, atomic64_t *v)
														
 
															+{
														
 
															+	asm volatile(LOCK_PREFIX "andq %1,%0"
														
 
															+			: "+m" (v->counter)
														
 
															+			: "er" (i)
														
 
															+			: "memory");
														
 
															 }
														
 
															-#define ATOMIC64_FETCH_OP(op, c_op)					\
														
 
															-static inline long atomic64_fetch_##op(long i, atomic64_t *v)		\
														
 
															-{									\
														
 
															-	long val = atomic64_read(v);					\
														
 
															-	do {								\
														
 
															-	} while (!atomic64_try_cmpxchg(v, &val, val c_op i));		\
														
 
															-	return val;							\
														
 
															+static inline long atomic64_fetch_and(long i, atomic64_t *v)
														
 
															+{
														
 
															+	s64 val = atomic64_read(v);
														
 
															+
														
 
															+	do {
														
 
															+	} while (!atomic64_try_cmpxchg(v, &val, val & i));
														
 
															+	return val;
														
 
															 }
														
 
															-#define ATOMIC64_OPS(op, c_op)						\
														
 
															-	ATOMIC64_OP(op)							\
														
 
															-	ATOMIC64_FETCH_OP(op, c_op)
														
 
															+static inline void atomic64_or(long i, atomic64_t *v)
														
 
															+{
														
 
															+	asm volatile(LOCK_PREFIX "orq %1,%0"
														
 
															+			: "+m" (v->counter)
														
 
															+			: "er" (i)
														
 
															+			: "memory");
														
 
															+}
														
 
															-ATOMIC64_OPS(and, &)
														
 
															-ATOMIC64_OPS(or, |)
														
 
															-ATOMIC64_OPS(xor, ^)
														
 
															+static inline long atomic64_fetch_or(long i, atomic64_t *v)
														
 
															+{
														
 
															+	s64 val = atomic64_read(v);
														
 
															-#undef ATOMIC64_OPS
														
 
															-#undef ATOMIC64_FETCH_OP
														
 
															-#undef ATOMIC64_OP
														
 
															+	do {
														
 
															+	} while (!atomic64_try_cmpxchg(v, &val, val | i));
														
 
															+	return val;
														
 
															+}
														
 
															+
														
 
															+static inline void atomic64_xor(long i, atomic64_t *v)
														
 
															+{
														
 
															+	asm volatile(LOCK_PREFIX "xorq %1,%0"
														
 
															+			: "+m" (v->counter)
														
 
															+			: "er" (i)
														
 
															+			: "memory");
														
 
															+}
														
 
															+
														
 
															+static inline long atomic64_fetch_xor(long i, atomic64_t *v)
														
 
															+{
														
 
															+	s64 val = atomic64_read(v);
														
 
															+
														
 
															+	do {
														
 
															+	} while (!atomic64_try_cmpxchg(v, &val, val ^ i));
														
 
															+	return val;
														
 
															+}
														
 
															 #endif /* _ASM_X86_ATOMIC64_64_H */
														
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -157,7 +157,7 @@ extern void __add_wrong_size(void)
 
															 #define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock)		\
														
 
															 ({									\
														
 
															 	bool success;							\
														
 
															-	__typeof__(_ptr) _old = (_pold);				\
														
 
															+	__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);		\
														
 
															 	__typeof__(*(_ptr)) __old = *_old;				\
														
 
															 	__typeof__(*(_ptr)) __new = (_new);				\
														
 
															 	switch (size) {							\
														
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -41,20 +41,11 @@
 
															 		       "+m" (*uaddr), "=&r" (tem)		\
														
 
															 		     : "r" (oparg), "i" (-EFAULT), "1" (0))
														
 
															-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
														
 
															+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
														
 
															+		u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret, tem;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															-
														
 
															 	pagefault_disable();
														
 
															 	switch (op) {
														
@@ -80,30 +71,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ:
														
 
															-			ret = (oldval == cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_NE:
														
 
															-			ret = (oldval != cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_LT:
														
 
															-			ret = (oldval < cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_GE:
														
 
															-			ret = (oldval >= cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_LE:
														
 
															-			ret = (oldval <= cmparg);
														
 
															-			break;
														
 
															-		case FUTEX_OP_CMP_GT:
														
 
															-			ret = (oldval > cmparg);
														
 
															-			break;
														
 
															-		default:
														
 
															-			ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -0,0 +1,109 @@
 
															+#ifndef __ASM_X86_REFCOUNT_H
														
 
															+#define __ASM_X86_REFCOUNT_H
														
 
															+/*
														
 
															+ * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
														
 
															+ * PaX/grsecurity.
														
 
															+ */
														
 
															+#include <linux/refcount.h>
														
 
															+
														
 
															+/*
														
 
															+ * This is the first portion of the refcount error handling, which lives in
														
 
															+ * .text.unlikely, and is jumped to from the CPU flag check (in the
														
 
															+ * following macros). This saves the refcount value location into CX for
														
 
															+ * the exception handler to use (in mm/extable.c), and then triggers the
														
 
															+ * central refcount exception. The fixup address for the exception points
														
 
															+ * back to the regular execution flow in .text.
														
 
															+ */
														
 
															+#define _REFCOUNT_EXCEPTION				\
														
 
															+	".pushsection .text.unlikely\n"			\
														
 
															+	"111:\tlea %[counter], %%" _ASM_CX "\n"		\
														
 
															+	"112:\t" ASM_UD0 "\n"				\
														
 
															+	ASM_UNREACHABLE					\
														
 
															+	".popsection\n"					\
														
 
															+	"113:\n"					\
														
 
															+	_ASM_EXTABLE_REFCOUNT(112b, 113b)
														
 
															+
														
 
															+/* Trigger refcount exception if refcount result is negative. */
														
 
															+#define REFCOUNT_CHECK_LT_ZERO				\
														
 
															+	"js 111f\n\t"					\
														
 
															+	_REFCOUNT_EXCEPTION
														
 
															+
														
 
															+/* Trigger refcount exception if refcount result is zero or negative. */
														
 
															+#define REFCOUNT_CHECK_LE_ZERO				\
														
 
															+	"jz 111f\n\t"					\
														
 
															+	REFCOUNT_CHECK_LT_ZERO
														
 
															+
														
 
															+/* Trigger refcount exception unconditionally. */
														
 
															+#define REFCOUNT_ERROR					\
														
 
															+	"jmp 111f\n\t"					\
														
 
															+	_REFCOUNT_EXCEPTION
														
 
															+
														
 
															+static __always_inline void refcount_add(unsigned int i, refcount_t *r)
														
 
															+{
														
 
															+	asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
														
 
															+		REFCOUNT_CHECK_LT_ZERO
														
 
															+		: [counter] "+m" (r->refs.counter)
														
 
															+		: "ir" (i)
														
 
															+		: "cc", "cx");
														
 
															+}
														
 
															+
														
 
															+static __always_inline void refcount_inc(refcount_t *r)
														
 
															+{
														
 
															+	asm volatile(LOCK_PREFIX "incl %0\n\t"
														
 
															+		REFCOUNT_CHECK_LT_ZERO
														
 
															+		: [counter] "+m" (r->refs.counter)
														
 
															+		: : "cc", "cx");
														
 
															+}
														
 
															+
														
 
															+static __always_inline void refcount_dec(refcount_t *r)
														
 
															+{
														
 
															+	asm volatile(LOCK_PREFIX "decl %0\n\t"
														
 
															+		REFCOUNT_CHECK_LE_ZERO
														
 
															+		: [counter] "+m" (r->refs.counter)
														
 
															+		: : "cc", "cx");
														
 
															+}
														
 
															+
														
 
															+static __always_inline __must_check
														
 
															+bool refcount_sub_and_test(unsigned int i, refcount_t *r)
														
 
															+{
														
 
															+	GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
														
 
															+				  r->refs.counter, "er", i, "%0", e);
														
 
															+}
														
 
															+
														
 
															+static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
														
 
															+{
														
 
															+	GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
														
 
															+				 r->refs.counter, "%0", e);
														
 
															+}
														
 
															+
														
 
															+static __always_inline __must_check
														
 
															+bool refcount_add_not_zero(unsigned int i, refcount_t *r)
														
 
															+{
														
 
															+	int c, result;
														
 
															+
														
 
															+	c = atomic_read(&(r->refs));
														
 
															+	do {
														
 
															+		if (unlikely(c == 0))
														
 
															+			return false;
														
 
															+
														
 
															+		result = c + i;
														
 
															+
														
 
															+		/* Did we try to increment from/to an undesirable state? */
														
 
															+		if (unlikely(c < 0 || c == INT_MAX || result < c)) {
														
 
															+			asm volatile(REFCOUNT_ERROR
														
 
															+				     : : [counter] "m" (r->refs.counter)
														
 
															+				     : "cc", "cx");
														
 
															+			break;
														
 
															+		}
														
 
															+
														
 
															+	} while (!atomic_try_cmpxchg(&(r->refs), &c, result));
														
 
															+
														
 
															+	return c != 0;
														
 
															+}
														
 
															+
														
 
															+static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
														
 
															+{
														
 
															+	return refcount_add_not_zero(1, r);
														
 
															+}
														
 
															+
														
 
															+#endif
														
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -36,6 +36,48 @@ bool ex_handler_fault(const struct exception_table_entry *fixup,
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(ex_handler_fault);
														
 
															+/*
														
 
															+ * Handler for UD0 exception following a failed test against the
														
 
															+ * result of a refcount inc/dec/add/sub.
														
 
															+ */
														
 
															+bool ex_handler_refcount(const struct exception_table_entry *fixup,
														
 
															+			 struct pt_regs *regs, int trapnr)
														
 
															+{
														
 
															+	/* First unconditionally saturate the refcount. */
														
 
															+	*(int *)regs->cx = INT_MIN / 2;
														
 
															+
														
 
															+	/*
														
 
															+	 * Strictly speaking, this reports the fixup destination, not
														
 
															+	 * the fault location, and not the actually overflowing
														
 
															+	 * instruction, which is the instruction before the "js", but
														
 
															+	 * since that instruction could be a variety of lengths, just
														
 
															+	 * report the location after the overflow, which should be close
														
 
															+	 * enough for finding the overflow, as it's at least back in
														
 
															+	 * the function, having returned from .text.unlikely.
														
 
															+	 */
														
 
															+	regs->ip = ex_fixup_addr(fixup);
														
 
															+
														
 
															+	/*
														
 
															+	 * This function has been called because either a negative refcount
														
 
															+	 * value was seen by any of the refcount functions, or a zero
														
 
															+	 * refcount value was seen by refcount_dec().
														
 
															+	 *
														
 
															+	 * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result
														
 
															+	 * wrapped around) will be set. Additionally, seeing the refcount
														
 
															+	 * reach 0 will set ZF (Zero Flag: result was zero). In each of
														
 
															+	 * these cases we want a report, since it's a boundary condition.
														
 
															+	 *
														
 
															+	 */
														
 
															+	if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) {
														
 
															+		bool zero = regs->flags & X86_EFLAGS_ZF;
														
 
															+
														
 
															+		refcount_error_report(regs, zero ? "hit zero" : "overflow");
														
 
															+	}
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(ex_handler_refcount);
														
 
															+
														
 
															 bool ex_handler_ext(const struct exception_table_entry *fixup,
														
 
															 		   struct pt_regs *regs, int trapnr)
														
 
															 {
														
--- a/arch/xtensa/include/asm/futex.h
+++ b/arch/xtensa/include/asm/futex.h
@@ -44,18 +44,10 @@
 
															 	: "r" (uaddr), "I" (-EFAULT), "r" (oparg)	\
														
 
															 	: "memory")
														
 
															-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
														
 
															+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
														
 
															+		u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															 #if !XCHAL_HAVE_S32C1I
														
 
															 	return -ENOSYS;
														
@@ -89,19 +81,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (ret)
														
 
															-		return ret;
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															-	switch (cmp) {
														
 
															-	case FUTEX_OP_CMP_EQ: return (oldval == cmparg);
														
 
															-	case FUTEX_OP_CMP_NE: return (oldval != cmparg);
														
 
															-	case FUTEX_OP_CMP_LT: return (oldval < cmparg);
														
 
															-	case FUTEX_OP_CMP_GE: return (oldval >= cmparg);
														
 
															-	case FUTEX_OP_CMP_LE: return (oldval <= cmparg);
														
 
															-	case FUTEX_OP_CMP_GT: return (oldval > cmparg);
														
 
															-	}
														
 
															-
														
 
															-	return -ENOSYS;
														
 
															+	return ret;
														
 
															 }
														
 
															 static inline int
														
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -60,7 +60,7 @@ static void trigger_softirq(void *data)
 
															 static int raise_blk_irq(int cpu, struct request *rq)
														
 
															 {
														
 
															 	if (cpu_online(cpu)) {
														
 
															-		struct call_single_data *data = &rq->csd;
														
 
															+		call_single_data_t *data = &rq->csd;
														
 
															 		data->func = trigger_softirq;
														
 
															 		data->info = rq;
														
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2884,7 +2884,7 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
 
															 	 * need to be interruptible while waiting.
														
 
															 	 */
														
 
															 	INIT_WORK_ONSTACK(&flush.work, flush_probe);
														
 
															-	COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
														
 
															+	init_completion(&flush.cmp);
														
 
															 	queue_work(nfit_wq, &flush.work);
														
 
															 	mutex_unlock(&acpi_desc->init_mutex);
														
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -13,7 +13,7 @@
 
															 struct nullb_cmd {
														
 
															 	struct list_head list;
														
 
															 	struct llist_node ll_list;
														
 
															-	struct call_single_data csd;
														
 
															+	call_single_data_t csd;
														
 
															 	struct request *rq;
														
 
															 	struct bio *bio;
														
 
															 	unsigned int tag;
														
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -455,7 +455,11 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa
 
															 			per_cpu(timer_unstable_counter_workaround, i) = wa;
														
 
															 	}
														
 
															-	static_branch_enable(&arch_timer_read_ool_enabled);
														
 
															+	/*
														
 
															+	 * Use the locked version, as we're called from the CPU
														
 
															+	 * hotplug framework. Otherwise, we end-up in deadlock-land.
														
 
															+	 */
														
 
															+	static_branch_enable_cpuslocked(&arch_timer_read_ool_enabled);
														
 
															 	/*
														
 
															 	 * Don't use the vdso fastpath if errata require using the
														
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -119,13 +119,13 @@ struct cpuidle_coupled {
 
															 #define CPUIDLE_COUPLED_NOT_IDLE	(-1)
														
 
															-static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
														
 
															+static DEFINE_PER_CPU(call_single_data_t, cpuidle_coupled_poke_cb);
														
 
															 /*
														
 
															  * The cpuidle_coupled_poke_pending mask is used to avoid calling
														
 
															- * __smp_call_function_single with the per cpu call_single_data struct already
														
 
															+ * __smp_call_function_single with the per cpu call_single_data_t struct already
														
 
															  * in use.  This prevents a deadlock where two cpus are waiting for each others
														
 
															- * call_single_data struct to be available
														
 
															+ * call_single_data_t struct to be available
														
 
															  */
														
 
															 static cpumask_t cpuidle_coupled_poke_pending;
														
@@ -339,7 +339,7 @@ static void cpuidle_coupled_handle_poke(void *info)
 
															  */
														
 
															 static void cpuidle_coupled_poke(int cpu)
														
 
															 {
														
 
															-	struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
														
 
															+	call_single_data_t *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
														
 
															 	if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending))
														
 
															 		smp_call_function_single_async(cpu, csd);
														
@@ -651,7 +651,7 @@ int cpuidle_coupled_register_device(struct cpuidle_device *dev)
 
															 {
														
 
															 	int cpu;
														
 
															 	struct cpuidle_device *other_dev;
														
 
															-	struct call_single_data *csd;
														
 
															+	call_single_data_t *csd;
														
 
															 	struct cpuidle_coupled *coupled;
														
 
															 	if (cpumask_empty(&dev->coupled_cpus))
														
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -28,6 +28,7 @@
 
															 #include <linux/debugfs.h>
														
 
															 #include <linux/sort.h>
														
 
															+#include <linux/sched/mm.h>
														
 
															 #include "intel_drv.h"
														
 
															 static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
														
@@ -4305,7 +4306,7 @@ i915_drop_caches_set(void *data, u64 val)
 
															 		mutex_unlock(&dev->struct_mutex);
														
 
															 	}
														
 
															-	lockdep_set_current_reclaim_state(GFP_KERNEL);
														
 
															+	fs_reclaim_acquire(GFP_KERNEL);
														
 
															 	if (val & DROP_BOUND)
														
 
															 		i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND);
														
@@ -4314,7 +4315,7 @@ i915_drop_caches_set(void *data, u64 val)
 
															 	if (val & DROP_SHRINK_ALL)
														
 
															 		i915_gem_shrink_all(dev_priv);
														
 
															-	lockdep_clear_current_reclaim_state();
														
 
															+	fs_reclaim_release(GFP_KERNEL);
														
 
															 	if (val & DROP_FREED) {
														
 
															 		synchronize_rcu();
														
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -2468,7 +2468,7 @@ static void liquidio_napi_drv_callback(void *arg)
 
															 	if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) {
														
 
															 		napi_schedule_irqoff(&droq->napi);
														
 
															 	} else {
														
 
															-		struct call_single_data *csd = &droq->csd;
														
 
															+		call_single_data_t *csd = &droq->csd;
														
 
															 		csd->func = napi_schedule_wrapper;
														
 
															 		csd->info = &droq->napi;
														
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -328,7 +328,7 @@ struct octeon_droq {
 
															 	u32 cpu_id;
														
 
															-	struct call_single_data csd;
														
 
															+	call_single_data_t csd;
														
 
															 };
														
 
															 #define OCT_DROQ_SIZE   (sizeof(struct octeon_droq))
														
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -446,14 +446,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
 
															 			ovl_path_upper(dentry, &upperpath);
														
 
															 			realfile = ovl_path_open(&upperpath, O_RDONLY);
														
 
															-			smp_mb__before_spinlock();
														
 
															+
														
 
															 			inode_lock(inode);
														
 
															 			if (!od->upperfile) {
														
 
															 				if (IS_ERR(realfile)) {
														
 
															 					inode_unlock(inode);
														
 
															 					return PTR_ERR(realfile);
														
 
															 				}
														
 
															-				od->upperfile = realfile;
														
 
															+				smp_store_release(&od->upperfile, realfile);
														
 
															 			} else {
														
 
															 				/* somebody has beaten us to it */
														
 
															 				if (!IS_ERR(realfile))
														
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -109,27 +109,24 @@ static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
 
															 		goto out;
														
 
															 	WRITE_ONCE(uwq->waken, true);
														
 
															 	/*
														
 
															-	 * The implicit smp_mb__before_spinlock in try_to_wake_up()
														
 
															-	 * renders uwq->waken visible to other CPUs before the task is
														
 
															-	 * waken.
														
 
															+	 * The Program-Order guarantees provided by the scheduler
														
 
															+	 * ensure uwq->waken is visible before the task is woken.
														
 
															 	 */
														
 
															 	ret = wake_up_state(wq->private, mode);
														
 
															-	if (ret)
														
 
															+	if (ret) {
														
 
															 		/*
														
 
															 		 * Wake only once, autoremove behavior.
														
 
															 		 *
														
 
															-		 * After the effect of list_del_init is visible to the
														
 
															-		 * other CPUs, the waitqueue may disappear from under
														
 
															-		 * us, see the !list_empty_careful() in
														
 
															-		 * handle_userfault(). try_to_wake_up() has an
														
 
															-		 * implicit smp_mb__before_spinlock, and the
														
 
															-		 * wq->private is read before calling the extern
														
 
															-		 * function "wake_up_state" (which in turns calls
														
 
															-		 * try_to_wake_up). While the spin_lock;spin_unlock;
														
 
															-		 * wouldn't be enough, the smp_mb__before_spinlock is
														
 
															-		 * enough to avoid an explicit smp_mb() here.
														
 
															+		 * After the effect of list_del_init is visible to the other
														
 
															+		 * CPUs, the waitqueue may disappear from under us, see the
														
 
															+		 * !list_empty_careful() in handle_userfault().
														
 
															+		 *
														
 
															+		 * try_to_wake_up() has an implicit smp_mb(), and the
														
 
															+		 * wq->private is read before calling the extern function
														
 
															+		 * "wake_up_state" (which in turns calls try_to_wake_up).
														
 
															 		 */
														
 
															 		list_del_init(&wq->entry);
														
 
															+	}
														
 
															 out:
														
 
															 	return ret;
														
 
															 }
														
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -21,6 +21,8 @@ typedef struct {
 
															 extern long long atomic64_read(const atomic64_t *v);
														
 
															 extern void	 atomic64_set(atomic64_t *v, long long i);
														
 
															+#define atomic64_set_release(v, i)	atomic64_set((v), (i))
														
 
															+
														
 
															 #define ATOMIC64_OP(op)							\
														
 
															 extern void	 atomic64_##op(long long a, atomic64_t *v);
														
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -13,7 +13,7 @@
 
															  */
														
 
															 /**
														
 
															- * futex_atomic_op_inuser() - Atomic arithmetic operation with constant
														
 
															+ * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant
														
 
															  *			  argument and comparison of the previous
														
 
															  *			  futex value with another constant.
														
 
															  *
														
@@ -25,18 +25,11 @@
 
															  * <0 - On error
														
 
															  */
														
 
															 static inline int
														
 
															-futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
														
 
															+arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval, ret;
														
 
															 	u32 tmp;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															 	preempt_disable();
														
 
															 	pagefault_disable();
														
@@ -74,17 +67,9 @@ out_pagefault_enable:
 
															 	pagefault_enable();
														
 
															 	preempt_enable();
														
 
															-	if (ret == 0) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
														
 
															-		default: ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (ret == 0)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
@@ -126,18 +111,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 
															 #else
														
 
															 static inline int
														
 
															-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
														
 
															+arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
														
 
															 {
														
 
															-	int op = (encoded_op >> 28) & 7;
														
 
															-	int cmp = (encoded_op >> 24) & 15;
														
 
															-	int oparg = (encoded_op << 8) >> 20;
														
 
															-	int cmparg = (encoded_op << 20) >> 20;
														
 
															 	int oldval = 0, ret;
														
 
															-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
														
 
															-		oparg = 1 << oparg;
														
 
															-
														
 
															-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															-		return -EFAULT;
														
 
															 	pagefault_disable();
														
@@ -153,17 +129,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 
															 	pagefault_enable();
														
 
															-	if (!ret) {
														
 
															-		switch (cmp) {
														
 
															-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
														
 
															-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
														
 
															-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
														
 
															-		default: ret = -ENOSYS;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!ret)
														
 
															+		*oval = oldval;
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -38,6 +38,9 @@
 
															  * Besides, if an arch has a special barrier for acquire/release, it could
														
 
															  * implement its own __atomic_op_* and use the same framework for building
														
 
															  * variants
														
 
															+ *
														
 
															+ * If an architecture overrides __atomic_op_acquire() it will probably want
														
 
															+ * to define smp_mb__after_spinlock().
														
 
															  */
														
 
															 #ifndef __atomic_op_acquire
														
 
															 #define __atomic_op_acquire(op, args...)				\
														
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -134,7 +134,7 @@ typedef __u32 __bitwise req_flags_t;
 
															 struct request {
														
 
															 	struct list_head queuelist;
														
 
															 	union {
														
 
															-		struct call_single_data csd;
														
 
															+		call_single_data_t csd;
														
 
															 		u64 fifo_time;
														
 
															 	};
														
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -9,6 +9,9 @@
 
															  */
														
 
															 #include <linux/wait.h>
														
 
															+#ifdef CONFIG_LOCKDEP_COMPLETIONS
														
 
															+#include <linux/lockdep.h>
														
 
															+#endif
														
 
															 /*
														
 
															  * struct completion - structure used to maintain state for a "completion"
														
@@ -25,13 +28,53 @@
 
															 struct completion {
														
 
															 	unsigned int done;
														
 
															 	wait_queue_head_t wait;
														
 
															+#ifdef CONFIG_LOCKDEP_COMPLETIONS
														
 
															+	struct lockdep_map_cross map;
														
 
															+#endif
														
 
															 };
														
 
															+#ifdef CONFIG_LOCKDEP_COMPLETIONS
														
 
															+static inline void complete_acquire(struct completion *x)
														
 
															+{
														
 
															+	lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_);
														
 
															+}
														
 
															+
														
 
															+static inline void complete_release(struct completion *x)
														
 
															+{
														
 
															+	lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_);
														
 
															+}
														
 
															+
														
 
															+static inline void complete_release_commit(struct completion *x)
														
 
															+{
														
 
															+	lock_commit_crosslock((struct lockdep_map *)&x->map);
														
 
															+}
														
 
															+
														
 
															+#define init_completion(x)						\
														
 
															+do {									\
														
 
															+	static struct lock_class_key __key;				\
														
 
															+	lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map,	\
														
 
															+			"(complete)" #x,				\
														
 
															+			&__key, 0);					\
														
 
															+	__init_completion(x);						\
														
 
															+} while (0)
														
 
															+#else
														
 
															+#define init_completion(x) __init_completion(x)
														
 
															+static inline void complete_acquire(struct completion *x) {}
														
 
															+static inline void complete_release(struct completion *x) {}
														
 
															+static inline void complete_release_commit(struct completion *x) {}
														
 
															+#endif
														
 
															+
														
 
															+#ifdef CONFIG_LOCKDEP_COMPLETIONS
														
 
															+#define COMPLETION_INITIALIZER(work) \
														
 
															+	{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \
														
 
															+	STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) }
														
 
															+#else
														
 
															 #define COMPLETION_INITIALIZER(work) \
														
 
															 	{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
														
 
															+#endif
														
 
															 #define COMPLETION_INITIALIZER_ONSTACK(work) \
														
 
															-	({ init_completion(&work); work; })
														
 
															+	(*({ init_completion(&work); &work; }))
														
 
															 /**
														
 
															  * DECLARE_COMPLETION - declare and initialize a completion structure
														
@@ -70,7 +113,7 @@ struct completion {
 
															  * This inline function will initialize a dynamically created completion
														
 
															  * structure.
														
 
															  */
														
 
															-static inline void init_completion(struct completion *x)
														
 
															+static inline void __init_completion(struct completion *x)
														
 
															 {
														
 
															 	x->done = 0;
														
 
															 	init_waitqueue_head(&x->wait);
														
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -37,12 +37,6 @@ static inline bool cpusets_enabled(void)
 
															 	return static_branch_unlikely(&cpusets_enabled_key);
														
 
															 }
														
 
															-static inline int nr_cpusets(void)
														
 
															-{
														
 
															-	/* jump label reference count + the top-level cpuset */
														
 
															-	return static_key_count(&cpusets_enabled_key.key) + 1;
														
 
															-}
														
 
															-
														
 
															 static inline void cpuset_inc(void)
														
 
															 {
														
 
															 	static_branch_inc(&cpusets_pre_enable_key);
														
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -54,7 +54,6 @@ union futex_key {
 
															 #ifdef CONFIG_FUTEX
														
 
															 extern void exit_robust_list(struct task_struct *curr);
														
 
															-extern void exit_pi_state_list(struct task_struct *curr);
														
 
															 #ifdef CONFIG_HAVE_FUTEX_CMPXCHG
														
 
															 #define futex_cmpxchg_enabled 1
														
 
															 #else
														
@@ -64,8 +63,14 @@ extern int futex_cmpxchg_enabled;
 
															 static inline void exit_robust_list(struct task_struct *curr)
														
 
															 {
														
 
															 }
														
 
															+#endif
														
 
															+
														
 
															+#ifdef CONFIG_FUTEX_PI
														
 
															+extern void exit_pi_state_list(struct task_struct *curr);
														
 
															+#else
														
 
															 static inline void exit_pi_state_list(struct task_struct *curr)
														
 
															 {
														
 
															 }
														
 
															 #endif
														
 
															+
														
 
															 #endif
														
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -23,10 +23,26 @@
 
															 # define trace_softirq_context(p)	((p)->softirq_context)
														
 
															 # define trace_hardirqs_enabled(p)	((p)->hardirqs_enabled)
														
 
															 # define trace_softirqs_enabled(p)	((p)->softirqs_enabled)
														
 
															-# define trace_hardirq_enter()	do { current->hardirq_context++; } while (0)
														
 
															-# define trace_hardirq_exit()	do { current->hardirq_context--; } while (0)
														
 
															-# define lockdep_softirq_enter()	do { current->softirq_context++; } while (0)
														
 
															-# define lockdep_softirq_exit()	do { current->softirq_context--; } while (0)
														
 
															+# define trace_hardirq_enter()			\
														
 
															+do {						\
														
 
															+	current->hardirq_context++;		\
														
 
															+	crossrelease_hist_start(XHLOCK_HARD);	\
														
 
															+} while (0)
														
 
															+# define trace_hardirq_exit()			\
														
 
															+do {						\
														
 
															+	current->hardirq_context--;		\
														
 
															+	crossrelease_hist_end(XHLOCK_HARD);	\
														
 
															+} while (0)
														
 
															+# define lockdep_softirq_enter()		\
														
 
															+do {						\
														
 
															+	current->softirq_context++;		\
														
 
															+	crossrelease_hist_start(XHLOCK_SOFT);	\
														
 
															+} while (0)
														
 
															+# define lockdep_softirq_exit()			\
														
 
															+do {						\
														
 
															+	current->softirq_context--;		\
														
 
															+	crossrelease_hist_end(XHLOCK_SOFT);	\
														
 
															+} while (0)
														
 
															 # define INIT_TRACE_IRQFLAGS	.softirqs_enabled = 1,
														
 
															 #else
														
 
															 # define trace_hardirqs_on()		do { } while (0)
														
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -163,6 +163,8 @@ extern void jump_label_apply_nops(struct module *mod);
 
															 extern int static_key_count(struct static_key *key);
														
 
															 extern void static_key_enable(struct static_key *key);
														
 
															 extern void static_key_disable(struct static_key *key);
														
 
															+extern void static_key_enable_cpuslocked(struct static_key *key);
														
 
															+extern void static_key_disable_cpuslocked(struct static_key *key);
														
 
															 /*
														
 
															  * We should be using ATOMIC_INIT() for initializing .enabled, but
														
@@ -234,24 +236,29 @@ static inline int jump_label_apply_nops(struct module *mod)
 
															 static inline void static_key_enable(struct static_key *key)
														
 
															 {
														
 
															-	int count = static_key_count(key);
														
 
															-
														
 
															-	WARN_ON_ONCE(count < 0 || count > 1);
														
 
															+	STATIC_KEY_CHECK_USE();
														
 
															-	if (!count)
														
 
															-		static_key_slow_inc(key);
														
 
															+	if (atomic_read(&key->enabled) != 0) {
														
 
															+		WARN_ON_ONCE(atomic_read(&key->enabled) != 1);
														
 
															+		return;
														
 
															+	}
														
 
															+	atomic_set(&key->enabled, 1);
														
 
															 }
														
 
															 static inline void static_key_disable(struct static_key *key)
														
 
															 {
														
 
															-	int count = static_key_count(key);
														
 
															-
														
 
															-	WARN_ON_ONCE(count < 0 || count > 1);
														
 
															+	STATIC_KEY_CHECK_USE();
														
 
															-	if (count)
														
 
															-		static_key_slow_dec(key);
														
 
															+	if (atomic_read(&key->enabled) != 1) {
														
 
															+		WARN_ON_ONCE(atomic_read(&key->enabled) != 0);
														
 
															+		return;
														
 
															+	}
														
 
															+	atomic_set(&key->enabled, 0);
														
 
															 }
														
 
															+#define static_key_enable_cpuslocked(k)		static_key_enable((k))
														
 
															+#define static_key_disable_cpuslocked(k)	static_key_disable((k))
														
 
															+
														
 
															 #define STATIC_KEY_INIT_TRUE	{ .enabled = ATOMIC_INIT(1) }
														
 
															 #define STATIC_KEY_INIT_FALSE	{ .enabled = ATOMIC_INIT(0) }
														
@@ -413,8 +420,10 @@ extern bool ____wrong_branch_error(void);
 
															  * Normal usage; boolean enable/disable.
														
 
															  */
														
 
															-#define static_branch_enable(x)		static_key_enable(&(x)->key)
														
 
															-#define static_branch_disable(x)	static_key_disable(&(x)->key)
														
 
															+#define static_branch_enable(x)			static_key_enable(&(x)->key)
														
 
															+#define static_branch_disable(x)		static_key_disable(&(x)->key)
														
 
															+#define static_branch_enable_cpuslocked(x)	static_key_enable_cpuslocked(&(x)->key)
														
 
															+#define static_branch_disable_cpuslocked(x)	static_key_disable_cpuslocked(&(x)->key)
														
 
															 #endif /* __ASSEMBLY__ */
														
--- a/include/linux/kasan-checks.h
+++ b/include/linux/kasan-checks.h
@@ -2,11 +2,13 @@
 
															 #define _LINUX_KASAN_CHECKS_H
														
 
															 #ifdef CONFIG_KASAN
														
 
															-void kasan_check_read(const void *p, unsigned int size);
														
 
															-void kasan_check_write(const void *p, unsigned int size);
														
 
															+void kasan_check_read(const volatile void *p, unsigned int size);
														
 
															+void kasan_check_write(const volatile void *p, unsigned int size);
														
 
															 #else
														
 
															-static inline void kasan_check_read(const void *p, unsigned int size) { }
														
 
															-static inline void kasan_check_write(const void *p, unsigned int size) { }
														
 
															+static inline void kasan_check_read(const volatile void *p, unsigned int size)
														
 
															+{ }
														
 
															+static inline void kasan_check_write(const volatile void *p, unsigned int size)
														
 
															+{ }
														
 
															 #endif
														
 
															 #endif
														
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -277,6 +277,13 @@ extern int oops_may_print(void);
 
															 void do_exit(long error_code) __noreturn;
														
 
															 void complete_and_exit(struct completion *, long) __noreturn;
														
 
															+#ifdef CONFIG_ARCH_HAS_REFCOUNT
														
 
															+void refcount_error_report(struct pt_regs *regs, const char *err);
														
 
															+#else
														
 
															+static inline void refcount_error_report(struct pt_regs *regs, const char *err)
														
 
															+{ }
														
 
															+#endif
														
 
															+
														
 
															 /* Internal, do not use. */
														
 
															 int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
														
 
															 int __must_check _kstrtol(const char *s, unsigned int base, long *res);
														
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -18,6 +18,8 @@ extern int lock_stat;
 
															 #define MAX_LOCKDEP_SUBCLASSES		8UL
														
 
															+#include <linux/types.h>
														
 
															+
														
 
															 #ifdef CONFIG_LOCKDEP
														
 
															 #include <linux/linkage.h>
														
@@ -29,7 +31,7 @@ extern int lock_stat;
 
															  * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
														
 
															  * the total number of states... :-(
														
 
															  */
														
 
															-#define XXX_LOCK_USAGE_STATES		(1+3*4)
														
 
															+#define XXX_LOCK_USAGE_STATES		(1+2*4)
														
 
															 /*
														
 
															  * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
														
@@ -155,6 +157,12 @@ struct lockdep_map {
 
															 	int				cpu;
														
 
															 	unsigned long			ip;
														
 
															 #endif
														
 
															+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
														
 
															+	/*
														
 
															+	 * Whether it's a crosslock.
														
 
															+	 */
														
 
															+	int				cross;
														
 
															+#endif
														
 
															 };
														
 
															 static inline void lockdep_copy_map(struct lockdep_map *to,
														
@@ -258,8 +266,95 @@ struct held_lock {
 
															 	unsigned int hardirqs_off:1;
														
 
															 	unsigned int references:12;					/* 32 bits */
														
 
															 	unsigned int pin_count;
														
 
															+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
														
 
															+	/*
														
 
															+	 * Generation id.
														
 
															+	 *
														
 
															+	 * A value of cross_gen_id will be stored when holding this,
														
 
															+	 * which is globally increased whenever each crosslock is held.
														
 
															+	 */
														
 
															+	unsigned int gen_id;
														
 
															+#endif
														
 
															+};
														
 
															+
														
 
															+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
														
 
															+#define MAX_XHLOCK_TRACE_ENTRIES 5
														
 
															+
														
 
															+/*
														
 
															+ * This is for keeping locks waiting for commit so that true dependencies
														
 
															+ * can be added at commit step.
														
 
															+ */
														
 
															+struct hist_lock {
														
 
															+	/*
														
 
															+	 * Id for each entry in the ring buffer. This is used to
														
 
															+	 * decide whether the ring buffer was overwritten or not.
														
 
															+	 *
														
 
															+	 * For example,
														
 
															+	 *
														
 
															+	 *           |<----------- hist_lock ring buffer size ------->|
														
 
															+	 *           pppppppppppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiii
														
 
															+	 * wrapped > iiiiiiiiiiiiiiiiiiiiiiiiiii.......................
														
 
															+	 *
														
 
															+	 *           where 'p' represents an acquisition in process
														
 
															+	 *           context, 'i' represents an acquisition in irq
														
 
															+	 *           context.
														
 
															+	 *
														
 
															+	 * In this example, the ring buffer was overwritten by
														
 
															+	 * acquisitions in irq context, that should be detected on
														
 
															+	 * rollback or commit.
														
 
															+	 */
														
 
															+	unsigned int hist_id;
														
 
															+
														
 
															+	/*
														
 
															+	 * Seperate stack_trace data. This will be used at commit step.
														
 
															+	 */
														
 
															+	struct stack_trace	trace;
														
 
															+	unsigned long		trace_entries[MAX_XHLOCK_TRACE_ENTRIES];
														
 
															+
														
 
															+	/*
														
 
															+	 * Seperate hlock instance. This will be used at commit step.
														
 
															+	 *
														
 
															+	 * TODO: Use a smaller data structure containing only necessary
														
 
															+	 * data. However, we should make lockdep code able to handle the
														
 
															+	 * smaller one first.
														
 
															+	 */
														
 
															+	struct held_lock	hlock;
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * To initialize a lock as crosslock, lockdep_init_map_crosslock() should
														
 
															+ * be called instead of lockdep_init_map().
														
 
															+ */
														
 
															+struct cross_lock {
														
 
															+	/*
														
 
															+	 * When more than one acquisition of crosslocks are overlapped,
														
 
															+	 * we have to perform commit for them based on cross_gen_id of
														
 
															+	 * the first acquisition, which allows us to add more true
														
 
															+	 * dependencies.
														
 
															+	 *
														
 
															+	 * Moreover, when no acquisition of a crosslock is in progress,
														
 
															+	 * we should not perform commit because the lock might not exist
														
 
															+	 * any more, which might cause incorrect memory access. So we
														
 
															+	 * have to track the number of acquisitions of a crosslock.
														
 
															+	 */
														
 
															+	int nr_acquire;
														
 
															+
														
 
															+	/*
														
 
															+	 * Seperate hlock instance. This will be used at commit step.
														
 
															+	 *
														
 
															+	 * TODO: Use a smaller data structure containing only necessary
														
 
															+	 * data. However, we should make lockdep code able to handle the
														
 
															+	 * smaller one first.
														
 
															+	 */
														
 
															+	struct held_lock	hlock;
														
 
															 };
														
 
															+struct lockdep_map_cross {
														
 
															+	struct lockdep_map map;
														
 
															+	struct cross_lock xlock;
														
 
															+};
														
 
															+#endif
														
 
															+
														
 
															 /*
														
 
															  * Initialization, self-test and debugging-output methods:
														
 
															  */
														
@@ -281,13 +376,6 @@ extern void lockdep_on(void);
 
															 extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
														
 
															 			     struct lock_class_key *key, int subclass);
														
 
															-/*
														
 
															- * To initialize a lockdep_map statically use this macro.
														
 
															- * Note that _name must not be NULL.
														
 
															- */
														
 
															-#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
														
 
															-	{ .name = (_name), .key = (void *)(_key), }
														
 
															-
														
 
															 /*
														
 
															  * Reinitialize a lock key - for cases where there is special locking or
														
 
															  * special initialization of locks so that the validator gets the scope
														
@@ -363,10 +451,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
 
															 extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);
														
 
															-extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
														
 
															-extern void lockdep_clear_current_reclaim_state(void);
														
 
															-extern void lockdep_trace_alloc(gfp_t mask);
														
 
															-
														
 
															 struct pin_cookie { unsigned int val; };
														
 
															 #define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
														
@@ -375,7 +459,7 @@ extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
 
															 extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie);
														
 
															 extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
														
 
															-# define INIT_LOCKDEP				.lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
														
 
															+# define INIT_LOCKDEP				.lockdep_recursion = 0,
														
 
															 #define lockdep_depth(tsk)	(debug_locks ? (tsk)->lockdep_depth : 0)
														
@@ -416,9 +500,6 @@ static inline void lockdep_on(void)
 
															 # define lock_downgrade(l, i)			do { } while (0)
														
 
															 # define lock_set_class(l, n, k, s, i)		do { } while (0)
														
 
															 # define lock_set_subclass(l, s, i)		do { } while (0)
														
 
															-# define lockdep_set_current_reclaim_state(g)	do { } while (0)
														
 
															-# define lockdep_clear_current_reclaim_state()	do { } while (0)
														
 
															-# define lockdep_trace_alloc(g)			do { } while (0)
														
 
															 # define lockdep_info()				do { } while (0)
														
 
															 # define lockdep_init_map(lock, name, key, sub) \
														
 
															 		do { (void)(name); (void)(key); } while (0)
														
@@ -467,6 +548,58 @@ struct pin_cookie { };
 
															 #endif /* !LOCKDEP */
														
 
															+enum xhlock_context_t {
														
 
															+	XHLOCK_HARD,
														
 
															+	XHLOCK_SOFT,
														
 
															+	XHLOCK_CTX_NR,
														
 
															+};
														
 
															+
														
 
															+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
														
 
															+extern void lockdep_init_map_crosslock(struct lockdep_map *lock,
														
 
															+				       const char *name,
														
 
															+				       struct lock_class_key *key,
														
 
															+				       int subclass);
														
 
															+extern void lock_commit_crosslock(struct lockdep_map *lock);
														
 
															+
														
 
															+/*
														
 
															+ * What we essencially have to initialize is 'nr_acquire'. Other members
														
 
															+ * will be initialized in add_xlock().
														
 
															+ */
														
 
															+#define STATIC_CROSS_LOCK_INIT() \
														
 
															+	{ .nr_acquire = 0,}
														
 
															+
														
 
															+#define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \
														
 
															+	{ .map.name = (_name), .map.key = (void *)(_key), \
														
 
															+	  .map.cross = 1, .xlock = STATIC_CROSS_LOCK_INIT(), }
														
 
															+
														
 
															+/*
														
 
															+ * To initialize a lockdep_map statically use this macro.
														
 
															+ * Note that _name must not be NULL.
														
 
															+ */
														
 
															+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
														
 
															+	{ .name = (_name), .key = (void *)(_key), .cross = 0, }
														
 
															+
														
 
															+extern void crossrelease_hist_start(enum xhlock_context_t c);
														
 
															+extern void crossrelease_hist_end(enum xhlock_context_t c);
														
 
															+extern void lockdep_invariant_state(bool force);
														
 
															+extern void lockdep_init_task(struct task_struct *task);
														
 
															+extern void lockdep_free_task(struct task_struct *task);
														
 
															+#else /* !CROSSRELEASE */
														
 
															+#define lockdep_init_map_crosslock(m, n, k, s) do {} while (0)
														
 
															+/*
														
 
															+ * To initialize a lockdep_map statically use this macro.
														
 
															+ * Note that _name must not be NULL.
														
 
															+ */
														
 
															+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
														
 
															+	{ .name = (_name), .key = (void *)(_key), }
														
 
															+
														
 
															+static inline void crossrelease_hist_start(enum xhlock_context_t c) {}
														
 
															+static inline void crossrelease_hist_end(enum xhlock_context_t c) {}
														
 
															+static inline void lockdep_invariant_state(bool force) {}
														
 
															+static inline void lockdep_init_task(struct task_struct *task) {}
														
 
															+static inline void lockdep_free_task(struct task_struct *task) {}
														
 
															+#endif /* CROSSRELEASE */
														
 
															+
														
 
															 #ifdef CONFIG_LOCK_STAT
														
 
															 extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
														
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -526,26 +526,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 
															 extern void tlb_finish_mmu(struct mmu_gather *tlb,
														
 
															 				unsigned long start, unsigned long end);
														
 
															-/*
														
 
															- * Memory barriers to keep this state in sync are graciously provided by
														
 
															- * the page table locks, outside of which no page table modifications happen.
														
 
															- * The barriers are used to ensure the order between tlb_flush_pending updates,
														
 
															- * which happen while the lock is not taken, and the PTE updates, which happen
														
 
															- * while the lock is taken, are serialized.
														
 
															- */
														
 
															-static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
														
 
															-{
														
 
															-	return atomic_read(&mm->tlb_flush_pending) > 0;
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * Returns true if there are two above TLB batching threads in parallel.
														
 
															- */
														
 
															-static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
														
 
															-{
														
 
															-	return atomic_read(&mm->tlb_flush_pending) > 1;
														
 
															-}
														
 
															-
														
 
															 static inline void init_tlb_flush_pending(struct mm_struct *mm)
														
 
															 {
														
 
															 	atomic_set(&mm->tlb_flush_pending, 0);
														
@@ -554,27 +534,82 @@ static inline void init_tlb_flush_pending(struct mm_struct *mm)
 
															 static inline void inc_tlb_flush_pending(struct mm_struct *mm)
														
 
															 {
														
 
															 	atomic_inc(&mm->tlb_flush_pending);
														
 
															-
														
 
															 	/*
														
 
															-	 * Guarantee that the tlb_flush_pending increase does not leak into the
														
 
															-	 * critical section updating the page tables
														
 
															+	 * The only time this value is relevant is when there are indeed pages
														
 
															+	 * to flush. And we'll only flush pages after changing them, which
														
 
															+	 * requires the PTL.
														
 
															+	 *
														
 
															+	 * So the ordering here is:
														
 
															+	 *
														
 
															+	 *	atomic_inc(&mm->tlb_flush_pending);
														
 
															+	 *	spin_lock(&ptl);
														
 
															+	 *	...
														
 
															+	 *	set_pte_at();
														
 
															+	 *	spin_unlock(&ptl);
														
 
															+	 *
														
 
															+	 *				spin_lock(&ptl)
														
 
															+	 *				mm_tlb_flush_pending();
														
 
															+	 *				....
														
 
															+	 *				spin_unlock(&ptl);
														
 
															+	 *
														
 
															+	 *	flush_tlb_range();
														
 
															+	 *	atomic_dec(&mm->tlb_flush_pending);
														
 
															+	 *
														
 
															+	 * Where the increment if constrained by the PTL unlock, it thus
														
 
															+	 * ensures that the increment is visible if the PTE modification is
														
 
															+	 * visible. After all, if there is no PTE modification, nobody cares
														
 
															+	 * about TLB flushes either.
														
 
															+	 *
														
 
															+	 * This very much relies on users (mm_tlb_flush_pending() and
														
 
															+	 * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
														
 
															+	 * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
														
 
															+	 * locks (PPC) the unlock of one doesn't order against the lock of
														
 
															+	 * another PTL.
														
 
															+	 *
														
 
															+	 * The decrement is ordered by the flush_tlb_range(), such that
														
 
															+	 * mm_tlb_flush_pending() will not return false unless all flushes have
														
 
															+	 * completed.
														
 
															 	 */
														
 
															-	smp_mb__before_spinlock();
														
 
															 }
														
 
															-/* Clearing is done after a TLB flush, which also provides a barrier. */
														
 
															 static inline void dec_tlb_flush_pending(struct mm_struct *mm)
														
 
															 {
														
 
															 	/*
														
 
															-	 * Guarantee that the tlb_flush_pending does not not leak into the
														
 
															-	 * critical section, since we must order the PTE change and changes to
														
 
															-	 * the pending TLB flush indication. We could have relied on TLB flush
														
 
															-	 * as a memory barrier, but this behavior is not clearly documented.
														
 
															+	 * See inc_tlb_flush_pending().
														
 
															+	 *
														
 
															+	 * This cannot be smp_mb__before_atomic() because smp_mb() simply does
														
 
															+	 * not order against TLB invalidate completion, which is what we need.
														
 
															+	 *
														
 
															+	 * Therefore we must rely on tlb_flush_*() to guarantee order.
														
 
															 	 */
														
 
															-	smp_mb__before_atomic();
														
 
															 	atomic_dec(&mm->tlb_flush_pending);
														
 
															 }
														
 
															+static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
														
 
															+{
														
 
															+	/*
														
 
															+	 * Must be called after having acquired the PTL; orders against that
														
 
															+	 * PTLs release and therefore ensures that if we observe the modified
														
 
															+	 * PTE we must also observe the increment from inc_tlb_flush_pending().
														
 
															+	 *
														
 
															+	 * That is, it only guarantees to return true if there is a flush
														
 
															+	 * pending for _this_ PTL.
														
 
															+	 */
														
 
															+	return atomic_read(&mm->tlb_flush_pending);
														
 
															+}
														
 
															+
														
 
															+static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
														
 
															+{
														
 
															+	/*
														
 
															+	 * Similar to mm_tlb_flush_pending(), we must have acquired the PTL
														
 
															+	 * for which there is a TLB flush pending in order to guarantee
														
 
															+	 * we've seen both that PTE modification and the increment.
														
 
															+	 *
														
 
															+	 * (no requirement on actually still holding the PTL, that is irrelevant)
														
 
															+	 */
														
 
															+	return atomic_read(&mm->tlb_flush_pending) > 1;
														
 
															+}
														
 
															+
														
 
															 struct vm_fault;
														
 
															 struct vm_special_mapping {
														
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2774,7 +2774,7 @@ struct softnet_data {
 
															 	unsigned int		input_queue_head ____cacheline_aligned_in_smp;
														
 
															 	/* Elements below can be accessed between CPUs for RPS/RFS */
														
 
															-	struct call_single_data	csd ____cacheline_aligned_in_smp;
														
 
															+	call_single_data_t	csd ____cacheline_aligned_in_smp;
														
 
															 	struct softnet_data	*rps_ipi_next;
														
 
															 	unsigned int		cpu;
														
 
															 	unsigned int		input_queue_tail;
														
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -53,6 +53,9 @@ extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r);
 
															 extern __must_check bool refcount_dec_and_test(refcount_t *r);
														
 
															 extern void refcount_dec(refcount_t *r);
														
 
															 #else
														
 
															+# ifdef CONFIG_ARCH_HAS_REFCOUNT
														
 
															+#  include <asm/refcount.h>
														
 
															+# else
														
 
															 static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r)
														
 
															 {
														
 
															 	return atomic_add_unless(&r->refs, i, 0);
														
@@ -87,6 +90,7 @@ static inline void refcount_dec(refcount_t *r)
 
															 {
														
 
															 	atomic_dec(&r->refs);
														
 
															 }
														
 
															+# endif /* !CONFIG_ARCH_HAS_REFCOUNT */
														
 
															 #endif /* CONFIG_REFCOUNT_FULL */
														
 
															 extern __must_check bool refcount_dec_if_one(refcount_t *r);
														
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -32,6 +32,7 @@ struct rw_semaphore {
 
															 #define RWSEM_UNLOCKED_VALUE		0x00000000
														
 
															 extern void __down_read(struct rw_semaphore *sem);
														
 
															+extern int __must_check __down_read_killable(struct rw_semaphore *sem);
														
 
															 extern int __down_read_trylock(struct rw_semaphore *sem);
														
 
															 extern void __down_write(struct rw_semaphore *sem);
														
 
															 extern int __must_check __down_write_killable(struct rw_semaphore *sem);
														
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -44,6 +44,7 @@ struct rw_semaphore {
 
															 };
														
 
															 extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
														
 
															+extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
														
 
															 extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
														
 
															 extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
														
 
															 extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
														
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -847,7 +847,17 @@ struct task_struct {
 
															 	int				lockdep_depth;
														
 
															 	unsigned int			lockdep_recursion;
														
 
															 	struct held_lock		held_locks[MAX_LOCK_DEPTH];
														
 
															-	gfp_t				lockdep_reclaim_gfp;
														
 
															+#endif
														
 
															+
														
 
															+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
														
 
															+#define MAX_XHLOCKS_NR 64UL
														
 
															+	struct hist_lock *xhlocks; /* Crossrelease history locks */
														
 
															+	unsigned int xhlock_idx;
														
 
															+	/* For restoring at history boundaries */
														
 
															+	unsigned int xhlock_idx_hist[XHLOCK_CTX_NR];
														
 
															+	unsigned int hist_id;
														
 
															+	/* For overwrite check at each context exit */
														
 
															+	unsigned int hist_id_save[XHLOCK_CTX_NR];
														
 
															 #endif
														
 
															 #ifdef CONFIG_UBSAN
														
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -167,6 +167,14 @@ static inline gfp_t current_gfp_context(gfp_t flags)
 
															 	return flags;
														
 
															 }
														
 
															+#ifdef CONFIG_LOCKDEP
														
 
															+extern void fs_reclaim_acquire(gfp_t gfp_mask);
														
 
															+extern void fs_reclaim_release(gfp_t gfp_mask);
														
 
															+#else
														
 
															+static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
														
 
															+static inline void fs_reclaim_release(gfp_t gfp_mask) { }
														
 
															+#endif
														
 
															+
														
 
															 static inline unsigned int memalloc_noio_save(void)
														
 
															 {
														
 
															 	unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
														
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -14,13 +14,17 @@
 
															 #include <linux/llist.h>
														
 
															 typedef void (*smp_call_func_t)(void *info);
														
 
															-struct call_single_data {
														
 
															+struct __call_single_data {
														
 
															 	struct llist_node llist;
														
 
															 	smp_call_func_t func;
														
 
															 	void *info;
														
 
															 	unsigned int flags;
														
 
															 };
														
 
															+/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
														
 
															+typedef struct __call_single_data call_single_data_t
														
 
															+	__aligned(sizeof(struct __call_single_data));
														
 
															+
														
 
															 /* total number of cpus in this system (may exceed NR_CPUS) */
														
 
															 extern unsigned int total_cpus;
														
@@ -48,7 +52,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 
															 		smp_call_func_t func, void *info, bool wait,
														
 
															 		gfp_t gfp_flags);
														
 
															-int smp_call_function_single_async(int cpu, struct call_single_data *csd);
														
 
															+int smp_call_function_single_async(int cpu, call_single_data_t *csd);
														
 
															 #ifdef CONFIG_SMP
														
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -118,16 +118,39 @@ do {								\
 
															 #endif
														
 
															 /*
														
 
															- * Despite its name it doesn't necessarily has to be a full barrier.
														
 
															- * It should only guarantee that a STORE before the critical section
														
 
															- * can not be reordered with LOADs and STOREs inside this section.
														
 
															- * spin_lock() is the one-way barrier, this LOAD can not escape out
														
 
															- * of the region. So the default implementation simply ensures that
														
 
															- * a STORE can not move into the critical section, smp_wmb() should
														
 
															- * serialize it with another STORE done by spin_lock().
														
 
															+ * This barrier must provide two things:
														
 
															+ *
														
 
															+ *   - it must guarantee a STORE before the spin_lock() is ordered against a
														
 
															+ *     LOAD after it, see the comments at its two usage sites.
														
 
															+ *
														
 
															+ *   - it must ensure the critical section is RCsc.
														
 
															+ *
														
 
															+ * The latter is important for cases where we observe values written by other
														
 
															+ * CPUs in spin-loops, without barriers, while being subject to scheduling.
														
 
															+ *
														
 
															+ * CPU0			CPU1			CPU2
														
 
															+ *
														
 
															+ *			for (;;) {
														
 
															+ *			  if (READ_ONCE(X))
														
 
															+ *			    break;
														
 
															+ *			}
														
 
															+ * X=1
														
 
															+ *			<sched-out>
														
 
															+ *						<sched-in>
														
 
															+ *						r = X;
														
 
															+ *
														
 
															+ * without transitivity it could be that CPU1 observes X!=0 breaks the loop,
														
 
															+ * we get migrated and CPU2 sees X==0.
														
 
															+ *
														
 
															+ * Since most load-store architectures implement ACQUIRE with an smp_mb() after
														
 
															+ * the LL/SC loop, they need no further barriers. Similarly all our TSO
														
 
															+ * architectures imply an smp_mb() for each atomic instruction and equally don't
														
 
															+ * need more.
														
 
															+ *
														
 
															+ * Architectures that can implement ACQUIRE better need to take care.
														
 
															  */
														
 
															-#ifndef smp_mb__before_spinlock
														
 
															-#define smp_mb__before_spinlock()	smp_wmb()
														
 
															+#ifndef smp_mb__after_spinlock
														
 
															+#define smp_mb__after_spinlock()	do { } while (0)
														
 
															 #endif
														
 
															 #ifdef CONFIG_DEBUG_SPINLOCK
														
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1275,12 +1275,17 @@ config BASE_FULL
 
															 config FUTEX
														
 
															 	bool "Enable futex support" if EXPERT
														
 
															 	default y
														
 
															-	select RT_MUTEXES
														
 
															+	imply RT_MUTEXES
														
 
															 	help
														
 
															 	  Disabling this option will cause the kernel to be built without
														
 
															 	  support for "fast userspace mutexes".  The resulting kernel may not
														
 
															 	  run glibc-based applications correctly.
														
 
															+config FUTEX_PI
														
 
															+	bool
														
 
															+	depends on FUTEX && RT_MUTEXES
														
 
															+	default y
														
 
															+
														
 
															 config HAVE_FUTEX_CMPXCHG
														
 
															 	bool
														
 
															 	depends on FUTEX
														
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -577,6 +577,13 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
 
															 	rcu_read_unlock();
														
 
															 }
														
 
															+/* Must be called with cpuset_mutex held.  */
														
 
															+static inline int nr_cpusets(void)
														
 
															+{
														
 
															+	/* jump label reference count + the top-level cpuset */
														
 
															+	return static_key_count(&cpusets_enabled_key.key) + 1;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * generate_sched_domains()
														
 
															  *
														
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -918,6 +918,7 @@ void __noreturn do_exit(long code)
 
															 	exit_rcu();
														
 
															 	exit_tasks_rcu_finish();
														
 
															+	lockdep_free_task(tsk);
														
 
															 	do_task_dead();
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(do_exit);
														
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -484,6 +484,8 @@ void __init fork_init(void)
 
															 	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
														
 
															 			  NULL, free_vm_stack_cache);
														
 
															 #endif
														
 
															+
														
 
															+	lockdep_init_task(&init_task);
														
 
															 }
														
 
															 int __weak arch_dup_task_struct(struct task_struct *dst,
														
@@ -1700,6 +1702,7 @@ static __latent_entropy struct task_struct *copy_process(
 
															 	p->lockdep_depth = 0; /* no locks held yet */
														
 
															 	p->curr_chain_key = 0;
														
 
															 	p->lockdep_recursion = 0;
														
 
															+	lockdep_init_task(p);
														
 
															 #endif
														
 
															 #ifdef CONFIG_DEBUG_MUTEXES
														
@@ -1958,6 +1961,7 @@ bad_fork_cleanup_audit:
 
															 bad_fork_cleanup_perf:
														
 
															 	perf_event_free_task(p);
														
 
															 bad_fork_cleanup_policy:
														
 
															+	lockdep_free_task(p);
														
 
															 #ifdef CONFIG_NUMA
														
 
															 	mpol_put(p->mempolicy);
														
 
															 bad_fork_cleanup_threadgroup_lock:
														
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -876,6 +876,8 @@ static struct task_struct *futex_find_get_task(pid_t pid)
 
															 	return p;
														
 
															 }
														
 
															+#ifdef CONFIG_FUTEX_PI
														
 
															+
														
 
															 /*
														
 
															  * This task is holding PI mutexes at exit time => bad.
														
 
															  * Kernel cleans up PI-state, but userspace is likely hosed.
														
@@ -933,6 +935,8 @@ void exit_pi_state_list(struct task_struct *curr)
 
															 	raw_spin_unlock_irq(&curr->pi_lock);
														
 
															 }
														
 
															+#endif
														
 
															+
														
 
															 /*
														
 
															  * We need to check the following states:
														
 
															  *
														
@@ -1547,6 +1551,45 @@ out:
 
															 	return ret;
														
 
															 }
														
 
															+static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
														
 
															+{
														
 
															+	unsigned int op =	  (encoded_op & 0x70000000) >> 28;
														
 
															+	unsigned int cmp =	  (encoded_op & 0x0f000000) >> 24;
														
 
															+	int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 12);
														
 
															+	int cmparg = sign_extend32(encoded_op & 0x00000fff, 12);
														
 
															+	int oldval, ret;
														
 
															+
														
 
															+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
														
 
															+		if (oparg < 0 || oparg > 31)
														
 
															+			return -EINVAL;
														
 
															+		oparg = 1 << oparg;
														
 
															+	}
														
 
															+
														
 
															+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	switch (cmp) {
														
 
															+	case FUTEX_OP_CMP_EQ:
														
 
															+		return oldval == cmparg;
														
 
															+	case FUTEX_OP_CMP_NE:
														
 
															+		return oldval != cmparg;
														
 
															+	case FUTEX_OP_CMP_LT:
														
 
															+		return oldval < cmparg;
														
 
															+	case FUTEX_OP_CMP_GE:
														
 
															+		return oldval >= cmparg;
														
 
															+	case FUTEX_OP_CMP_LE:
														
 
															+		return oldval <= cmparg;
														
 
															+	case FUTEX_OP_CMP_GT:
														
 
															+		return oldval > cmparg;
														
 
															+	default:
														
 
															+		return -ENOSYS;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Wake up all waiters hashed on the physical page that is mapped
														
 
															  * to this virtual address:
														
@@ -1800,6 +1843,15 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
 
															 	struct futex_q *this, *next;
														
 
															 	DEFINE_WAKE_Q(wake_q);
														
 
															+	/*
														
 
															+	 * When PI not supported: return -ENOSYS if requeue_pi is true,
														
 
															+	 * consequently the compiler knows requeue_pi is always false past
														
 
															+	 * this point which will optimize away all the conditional code
														
 
															+	 * further down.
														
 
															+	 */
														
 
															+	if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
														
 
															+		return -ENOSYS;
														
 
															+
														
 
															 	if (requeue_pi) {
														
 
															 		/*
														
 
															 		 * Requeue PI only works on two distinct uaddrs. This
														
@@ -2595,6 +2647,9 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
 
															 	struct futex_q q = futex_q_init;
														
 
															 	int res, ret;
														
 
															+	if (!IS_ENABLED(CONFIG_FUTEX_PI))
														
 
															+		return -ENOSYS;
														
 
															+
														
 
															 	if (refill_pi_state_cache())
														
 
															 		return -ENOMEM;
														
@@ -2774,6 +2829,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
 
															 	struct futex_q *top_waiter;
														
 
															 	int ret;
														
 
															+	if (!IS_ENABLED(CONFIG_FUTEX_PI))
														
 
															+		return -ENOSYS;
														
 
															+
														
 
															 retry:
														
 
															 	if (get_user(uval, uaddr))
														
 
															 		return -EFAULT;
														
@@ -2984,6 +3042,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 
															 	struct futex_q q = futex_q_init;
														
 
															 	int res, ret;
														
 
															+	if (!IS_ENABLED(CONFIG_FUTEX_PI))
														
 
															+		return -ENOSYS;
														
 
															+
														
 
															 	if (uaddr == uaddr2)
														
 
															 		return -EINVAL;
														
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -79,29 +79,7 @@ int static_key_count(struct static_key *key)
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(static_key_count);
														
 
															-void static_key_enable(struct static_key *key)
														
 
															-{
														
 
															-	int count = static_key_count(key);
														
 
															-
														
 
															-	WARN_ON_ONCE(count < 0 || count > 1);
														
 
															-
														
 
															-	if (!count)
														
 
															-		static_key_slow_inc(key);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(static_key_enable);
														
 
															-
														
 
															-void static_key_disable(struct static_key *key)
														
 
															-{
														
 
															-	int count = static_key_count(key);
														
 
															-
														
 
															-	WARN_ON_ONCE(count < 0 || count > 1);
														
 
															-
														
 
															-	if (count)
														
 
															-		static_key_slow_dec(key);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(static_key_disable);
														
 
															-
														
 
															-void static_key_slow_inc(struct static_key *key)
														
 
															+static void static_key_slow_inc_cpuslocked(struct static_key *key)
														
 
															 {
														
 
															 	int v, v1;
														
@@ -125,24 +103,87 @@ void static_key_slow_inc(struct static_key *key)
 
															 			return;
														
 
															 	}
														
 
															-	cpus_read_lock();
														
 
															 	jump_label_lock();
														
 
															 	if (atomic_read(&key->enabled) == 0) {
														
 
															 		atomic_set(&key->enabled, -1);
														
 
															 		jump_label_update(key);
														
 
															-		atomic_set(&key->enabled, 1);
														
 
															+		/*
														
 
															+		 * Ensure that if the above cmpxchg loop observes our positive
														
 
															+		 * value, it must also observe all the text changes.
														
 
															+		 */
														
 
															+		atomic_set_release(&key->enabled, 1);
														
 
															 	} else {
														
 
															 		atomic_inc(&key->enabled);
														
 
															 	}
														
 
															 	jump_label_unlock();
														
 
															+}
														
 
															+
														
 
															+void static_key_slow_inc(struct static_key *key)
														
 
															+{
														
 
															+	cpus_read_lock();
														
 
															+	static_key_slow_inc_cpuslocked(key);
														
 
															 	cpus_read_unlock();
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(static_key_slow_inc);
														
 
															-static void __static_key_slow_dec(struct static_key *key,
														
 
															-		unsigned long rate_limit, struct delayed_work *work)
														
 
															+void static_key_enable_cpuslocked(struct static_key *key)
														
 
															+{
														
 
															+	STATIC_KEY_CHECK_USE();
														
 
															+
														
 
															+	if (atomic_read(&key->enabled) > 0) {
														
 
															+		WARN_ON_ONCE(atomic_read(&key->enabled) != 1);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	jump_label_lock();
														
 
															+	if (atomic_read(&key->enabled) == 0) {
														
 
															+		atomic_set(&key->enabled, -1);
														
 
															+		jump_label_update(key);
														
 
															+		/*
														
 
															+		 * See static_key_slow_inc().
														
 
															+		 */
														
 
															+		atomic_set_release(&key->enabled, 1);
														
 
															+	}
														
 
															+	jump_label_unlock();
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(static_key_enable_cpuslocked);
														
 
															+
														
 
															+void static_key_enable(struct static_key *key)
														
 
															+{
														
 
															+	cpus_read_lock();
														
 
															+	static_key_enable_cpuslocked(key);
														
 
															+	cpus_read_unlock();
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(static_key_enable);
														
 
															+
														
 
															+void static_key_disable_cpuslocked(struct static_key *key)
														
 
															+{
														
 
															+	STATIC_KEY_CHECK_USE();
														
 
															+
														
 
															+	if (atomic_read(&key->enabled) != 1) {
														
 
															+		WARN_ON_ONCE(atomic_read(&key->enabled) != 0);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	jump_label_lock();
														
 
															+	if (atomic_cmpxchg(&key->enabled, 1, 0))
														
 
															+		jump_label_update(key);
														
 
															+	jump_label_unlock();
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(static_key_disable_cpuslocked);
														
 
															+
														
 
															+void static_key_disable(struct static_key *key)
														
 
															 {
														
 
															 	cpus_read_lock();
														
 
															+	static_key_disable_cpuslocked(key);
														
 
															+	cpus_read_unlock();
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(static_key_disable);
														
 
															+
														
 
															+static void static_key_slow_dec_cpuslocked(struct static_key *key,
														
 
															+					   unsigned long rate_limit,
														
 
															+					   struct delayed_work *work)
														
 
															+{
														
 
															 	/*
														
 
															 	 * The negative count check is valid even when a negative
														
 
															 	 * key->enabled is in use by static_key_slow_inc(); a
														
@@ -153,7 +194,6 @@ static void __static_key_slow_dec(struct static_key *key,
 
															 	if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
														
 
															 		WARN(atomic_read(&key->enabled) < 0,
														
 
															 		     "jump label: negative count!\n");
														
 
															-		cpus_read_unlock();
														
 
															 		return;
														
 
															 	}
														
@@ -164,6 +204,14 @@ static void __static_key_slow_dec(struct static_key *key,
 
															 		jump_label_update(key);
														
 
															 	}
														
 
															 	jump_label_unlock();
														
 
															+}
														
 
															+
														
 
															+static void __static_key_slow_dec(struct static_key *key,
														
 
															+				  unsigned long rate_limit,
														
 
															+				  struct delayed_work *work)
														
 
															+{
														
 
															+	cpus_read_lock();
														
 
															+	static_key_slow_dec_cpuslocked(key, rate_limit, work);
														
 
															 	cpus_read_unlock();
														
 
															 }
														
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -58,6 +58,10 @@
 
															 #define CREATE_TRACE_POINTS
														
 
															 #include <trace/events/lock.h>
														
 
															+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
														
 
															+#include <linux/slab.h>
														
 
															+#endif
														
 
															+
														
 
															 #ifdef CONFIG_PROVE_LOCKING
														
 
															 int prove_locking = 1;
														
 
															 module_param(prove_locking, int, 0644);
														
@@ -344,14 +348,12 @@ EXPORT_SYMBOL(lockdep_on);
 
															 #if VERBOSE
														
 
															 # define HARDIRQ_VERBOSE	1
														
 
															 # define SOFTIRQ_VERBOSE	1
														
 
															-# define RECLAIM_VERBOSE	1
														
 
															 #else
														
 
															 # define HARDIRQ_VERBOSE	0
														
 
															 # define SOFTIRQ_VERBOSE	0
														
 
															-# define RECLAIM_VERBOSE	0
														
 
															 #endif
														
 
															-#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE
														
 
															+#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE
														
 
															 /*
														
 
															  * Quick filtering for interesting events:
														
 
															  */
														
@@ -726,6 +728,18 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
 
															 	return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL);
														
 
															 }
														
 
															+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
														
 
															+static void cross_init(struct lockdep_map *lock, int cross);
														
 
															+static int cross_lock(struct lockdep_map *lock);
														
 
															+static int lock_acquire_crosslock(struct held_lock *hlock);
														
 
															+static int lock_release_crosslock(struct lockdep_map *lock);
														
 
															+#else
														
 
															+static inline void cross_init(struct lockdep_map *lock, int cross) {}
														
 
															+static inline int cross_lock(struct lockdep_map *lock) { return 0; }
														
 
															+static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; }
														
 
															+static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; }
														
 
															+#endif
														
 
															+
														
 
															 /*
														
 
															  * Register a lock's class in the hash-table, if the class is not present
														
 
															  * yet. Otherwise we look it up. We cache the result in the lock object
														
@@ -1125,22 +1139,41 @@ print_circular_lock_scenario(struct held_lock *src,
 
															 		printk(KERN_CONT "\n\n");
														
 
															 	}
														
 
															-	printk(" Possible unsafe locking scenario:\n\n");
														
 
															-	printk("       CPU0                    CPU1\n");
														
 
															-	printk("       ----                    ----\n");
														
 
															-	printk("  lock(");
														
 
															-	__print_lock_name(target);
														
 
															-	printk(KERN_CONT ");\n");
														
 
															-	printk("                               lock(");
														
 
															-	__print_lock_name(parent);
														
 
															-	printk(KERN_CONT ");\n");
														
 
															-	printk("                               lock(");
														
 
															-	__print_lock_name(target);
														
 
															-	printk(KERN_CONT ");\n");
														
 
															-	printk("  lock(");
														
 
															-	__print_lock_name(source);
														
 
															-	printk(KERN_CONT ");\n");
														
 
															-	printk("\n *** DEADLOCK ***\n\n");
														
 
															+	if (cross_lock(tgt->instance)) {
														
 
															+		printk(" Possible unsafe locking scenario by crosslock:\n\n");
														
 
															+		printk("       CPU0                    CPU1\n");
														
 
															+		printk("       ----                    ----\n");
														
 
															+		printk("  lock(");
														
 
															+		__print_lock_name(parent);
														
 
															+		printk(KERN_CONT ");\n");
														
 
															+		printk("  lock(");
														
 
															+		__print_lock_name(target);
														
 
															+		printk(KERN_CONT ");\n");
														
 
															+		printk("                               lock(");
														
 
															+		__print_lock_name(source);
														
 
															+		printk(KERN_CONT ");\n");
														
 
															+		printk("                               unlock(");
														
 
															+		__print_lock_name(target);
														
 
															+		printk(KERN_CONT ");\n");
														
 
															+		printk("\n *** DEADLOCK ***\n\n");
														
 
															+	} else {
														
 
															+		printk(" Possible unsafe locking scenario:\n\n");
														
 
															+		printk("       CPU0                    CPU1\n");
														
 
															+		printk("       ----                    ----\n");
														
 
															+		printk("  lock(");
														
 
															+		__print_lock_name(target);
														
 
															+		printk(KERN_CONT ");\n");
														
 
															+		printk("                               lock(");
														
 
															+		__print_lock_name(parent);
														
 
															+		printk(KERN_CONT ");\n");
														
 
															+		printk("                               lock(");
														
 
															+		__print_lock_name(target);
														
 
															+		printk(KERN_CONT ");\n");
														
 
															+		printk("  lock(");
														
 
															+		__print_lock_name(source);
														
 
															+		printk(KERN_CONT ");\n");
														
 
															+		printk("\n *** DEADLOCK ***\n\n");
														
 
															+	}
														
 
															 }
														
 
															 /*
														
@@ -1165,7 +1198,12 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
 
															 	pr_warn("%s/%d is trying to acquire lock:\n",
														
 
															 		curr->comm, task_pid_nr(curr));
														
 
															 	print_lock(check_src);
														
 
															-	pr_warn("\nbut task is already holding lock:\n");
														
 
															+
														
 
															+	if (cross_lock(check_tgt->instance))
														
 
															+		pr_warn("\nbut now in release context of a crosslock acquired at the following:\n");
														
 
															+	else
														
 
															+		pr_warn("\nbut task is already holding lock:\n");
														
 
															+
														
 
															 	print_lock(check_tgt);
														
 
															 	pr_warn("\nwhich lock already depends on the new lock.\n\n");
														
 
															 	pr_warn("\nthe existing dependency chain (in reverse order) is:\n");
														
@@ -1183,7 +1221,8 @@ static inline int class_equal(struct lock_list *entry, void *data)
 
															 static noinline int print_circular_bug(struct lock_list *this,
														
 
															 				struct lock_list *target,
														
 
															 				struct held_lock *check_src,
														
 
															-				struct held_lock *check_tgt)
														
 
															+				struct held_lock *check_tgt,
														
 
															+				struct stack_trace *trace)
														
 
															 {
														
 
															 	struct task_struct *curr = current;
														
 
															 	struct lock_list *parent;
														
@@ -1193,7 +1232,9 @@ static noinline int print_circular_bug(struct lock_list *this,
 
															 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
														
 
															 		return 0;
														
 
															-	if (!save_trace(&this->trace))
														
 
															+	if (cross_lock(check_tgt->instance))
														
 
															+		this->trace = *trace;
														
 
															+	else if (!save_trace(&this->trace))
														
 
															 		return 0;
														
 
															 	depth = get_lock_depth(target);
														
@@ -1309,6 +1350,19 @@ check_noncircular(struct lock_list *root, struct lock_class *target,
 
															 	return result;
														
 
															 }
														
 
															+static noinline int
														
 
															+check_redundant(struct lock_list *root, struct lock_class *target,
														
 
															+		struct lock_list **target_entry)
														
 
															+{
														
 
															+	int result;
														
 
															+
														
 
															+	debug_atomic_inc(nr_redundant_checks);
														
 
															+
														
 
															+	result = __bfs_forwards(root, target, class_equal, target_entry);
														
 
															+
														
 
															+	return result;
														
 
															+}
														
 
															+
														
 
															 #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
														
 
															 /*
														
 
															  * Forwards and backwards subgraph searching, for the purposes of
														
@@ -1784,6 +1838,9 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
 
															 		if (nest)
														
 
															 			return 2;
														
 
															+		if (cross_lock(prev->instance))
														
 
															+			continue;
														
 
															+
														
 
															 		return print_deadlock_bug(curr, prev, next);
														
 
															 	}
														
 
															 	return 1;
														
@@ -1813,20 +1870,13 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
 
															  */
														
 
															 static int
														
 
															 check_prev_add(struct task_struct *curr, struct held_lock *prev,
														
 
															-	       struct held_lock *next, int distance, int *stack_saved)
														
 
															+	       struct held_lock *next, int distance, struct stack_trace *trace,
														
 
															+	       int (*save)(struct stack_trace *trace))
														
 
															 {
														
 
															 	struct lock_list *entry;
														
 
															 	int ret;
														
 
															 	struct lock_list this;
														
 
															 	struct lock_list *uninitialized_var(target_entry);
														
 
															-	/*
														
 
															-	 * Static variable, serialized by the graph_lock().
														
 
															-	 *
														
 
															-	 * We use this static variable to save the stack trace in case
														
 
															-	 * we call into this function multiple times due to encountering
														
 
															-	 * trylocks in the held lock stack.
														
 
															-	 */
														
 
															-	static struct stack_trace trace;
														
 
															 	/*
														
 
															 	 * Prove that the new <prev> -> <next> dependency would not
														
@@ -1841,7 +1891,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 
															 	this.parent = NULL;
														
 
															 	ret = check_noncircular(&this, hlock_class(prev), &target_entry);
														
 
															 	if (unlikely(!ret))
														
 
															-		return print_circular_bug(&this, target_entry, next, prev);
														
 
															+		return print_circular_bug(&this, target_entry, next, prev, trace);
														
 
															 	else if (unlikely(ret < 0))
														
 
															 		return print_bfs_bug(ret);
														
@@ -1870,15 +1920,26 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 
															 		if (entry->class == hlock_class(next)) {
														
 
															 			if (distance == 1)
														
 
															 				entry->distance = 1;
														
 
															-			return 2;
														
 
															+			return 1;
														
 
															 		}
														
 
															 	}
														
 
															-	if (!*stack_saved) {
														
 
															-		if (!save_trace(&trace))
														
 
															-			return 0;
														
 
															-		*stack_saved = 1;
														
 
															+	/*
														
 
															+	 * Is the <prev> -> <next> link redundant?
														
 
															+	 */
														
 
															+	this.class = hlock_class(prev);
														
 
															+	this.parent = NULL;
														
 
															+	ret = check_redundant(&this, hlock_class(next), &target_entry);
														
 
															+	if (!ret) {
														
 
															+		debug_atomic_inc(nr_redundant);
														
 
															+		return 2;
														
 
															 	}
														
 
															+	if (ret < 0)
														
 
															+		return print_bfs_bug(ret);
														
 
															+
														
 
															+
														
 
															+	if (save && !save(trace))
														
 
															+		return 0;
														
 
															 	/*
														
 
															 	 * Ok, all validations passed, add the new lock
														
@@ -1886,14 +1947,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 
															 	 */
														
 
															 	ret = add_lock_to_list(hlock_class(next),
														
 
															 			       &hlock_class(prev)->locks_after,
														
 
															-			       next->acquire_ip, distance, &trace);
														
 
															+			       next->acquire_ip, distance, trace);
														
 
															 	if (!ret)
														
 
															 		return 0;
														
 
															 	ret = add_lock_to_list(hlock_class(prev),
														
 
															 			       &hlock_class(next)->locks_before,
														
 
															-			       next->acquire_ip, distance, &trace);
														
 
															+			       next->acquire_ip, distance, trace);
														
 
															 	if (!ret)
														
 
															 		return 0;
														
@@ -1901,8 +1962,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 
															 	 * Debugging printouts:
														
 
															 	 */
														
 
															 	if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
														
 
															-		/* We drop graph lock, so another thread can overwrite trace. */
														
 
															-		*stack_saved = 0;
														
 
															 		graph_unlock();
														
 
															 		printk("\n new dependency: ");
														
 
															 		print_lock_name(hlock_class(prev));
														
@@ -1910,9 +1969,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 
															 		print_lock_name(hlock_class(next));
														
 
															 		printk(KERN_CONT "\n");
														
 
															 		dump_stack();
														
 
															-		return graph_lock();
														
 
															+		if (!graph_lock())
														
 
															+			return 0;
														
 
															 	}
														
 
															-	return 1;
														
 
															+	return 2;
														
 
															 }
														
 
															 /*
														
@@ -1925,8 +1985,9 @@ static int
 
															 check_prevs_add(struct task_struct *curr, struct held_lock *next)
														
 
															 {
														
 
															 	int depth = curr->lockdep_depth;
														
 
															-	int stack_saved = 0;
														
 
															 	struct held_lock *hlock;
														
 
															+	struct stack_trace trace;
														
 
															+	int (*save)(struct stack_trace *trace) = save_trace;
														
 
															 	/*
														
 
															 	 * Debugging checks.
														
@@ -1947,21 +2008,36 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
 
															 		int distance = curr->lockdep_depth - depth + 1;
														
 
															 		hlock = curr->held_locks + depth - 1;
														
 
															 		/*
														
 
															-		 * Only non-recursive-read entries get new dependencies
														
 
															-		 * added:
														
 
															+		 * Only non-crosslock entries get new dependencies added.
														
 
															+		 * Crosslock entries will be added by commit later:
														
 
															 		 */
														
 
															-		if (hlock->read != 2 && hlock->check) {
														
 
															-			if (!check_prev_add(curr, hlock, next,
														
 
															-						distance, &stack_saved))
														
 
															-				return 0;
														
 
															+		if (!cross_lock(hlock->instance)) {
														
 
															 			/*
														
 
															-			 * Stop after the first non-trylock entry,
														
 
															-			 * as non-trylock entries have added their
														
 
															-			 * own direct dependencies already, so this
														
 
															-			 * lock is connected to them indirectly:
														
 
															+			 * Only non-recursive-read entries get new dependencies
														
 
															+			 * added:
														
 
															 			 */
														
 
															-			if (!hlock->trylock)
														
 
															-				break;
														
 
															+			if (hlock->read != 2 && hlock->check) {
														
 
															+				int ret = check_prev_add(curr, hlock, next,
														
 
															+							 distance, &trace, save);
														
 
															+				if (!ret)
														
 
															+					return 0;
														
 
															+
														
 
															+				/*
														
 
															+				 * Stop saving stack_trace if save_trace() was
														
 
															+				 * called at least once:
														
 
															+				 */
														
 
															+				if (save && ret == 2)
														
 
															+					save = NULL;
														
 
															+
														
 
															+				/*
														
 
															+				 * Stop after the first non-trylock entry,
														
 
															+				 * as non-trylock entries have added their
														
 
															+				 * own direct dependencies already, so this
														
 
															+				 * lock is connected to them indirectly:
														
 
															+				 */
														
 
															+				if (!hlock->trylock)
														
 
															+					break;
														
 
															+			}
														
 
															 		}
														
 
															 		depth--;
														
 
															 		/*
														
@@ -2126,19 +2202,26 @@ static int check_no_collision(struct task_struct *curr,
 
															 }
														
 
															 /*
														
 
															- * Look up a dependency chain. If the key is not present yet then
														
 
															- * add it and return 1 - in this case the new dependency chain is
														
 
															- * validated. If the key is already hashed, return 0.
														
 
															- * (On return with 1 graph_lock is held.)
														
 
															+ * This is for building a chain between just two different classes,
														
 
															+ * instead of adding a new hlock upon current, which is done by
														
 
															+ * add_chain_cache().
														
 
															+ *
														
 
															+ * This can be called in any context with two classes, while
														
 
															+ * add_chain_cache() must be done within the lock owener's context
														
 
															+ * since it uses hlock which might be racy in another context.
														
 
															  */
														
 
															-static inline int lookup_chain_cache(struct task_struct *curr,
														
 
															-				     struct held_lock *hlock,
														
 
															-				     u64 chain_key)
														
 
															+static inline int add_chain_cache_classes(unsigned int prev,
														
 
															+					  unsigned int next,
														
 
															+					  unsigned int irq_context,
														
 
															+					  u64 chain_key)
														
 
															 {
														
 
															-	struct lock_class *class = hlock_class(hlock);
														
 
															 	struct hlist_head *hash_head = chainhashentry(chain_key);
														
 
															 	struct lock_chain *chain;
														
 
															-	int i, j;
														
 
															+
														
 
															+	/*
														
 
															+	 * Allocate a new chain entry from the static array, and add
														
 
															+	 * it to the hash:
														
 
															+	 */
														
 
															 	/*
														
 
															 	 * We might need to take the graph lock, ensure we've got IRQs
														
@@ -2147,43 +2230,76 @@ static inline int lookup_chain_cache(struct task_struct *curr,
 
															 	 */
														
 
															 	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
														
 
															 		return 0;
														
 
															+
														
 
															+	if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
														
 
															+		if (!debug_locks_off_graph_unlock())
														
 
															+			return 0;
														
 
															+
														
 
															+		print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
														
 
															+		dump_stack();
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	chain = lock_chains + nr_lock_chains++;
														
 
															+	chain->chain_key = chain_key;
														
 
															+	chain->irq_context = irq_context;
														
 
															+	chain->depth = 2;
														
 
															+	if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
														
 
															+		chain->base = nr_chain_hlocks;
														
 
															+		nr_chain_hlocks += chain->depth;
														
 
															+		chain_hlocks[chain->base] = prev - 1;
														
 
															+		chain_hlocks[chain->base + 1] = next -1;
														
 
															+	}
														
 
															+#ifdef CONFIG_DEBUG_LOCKDEP
														
 
															 	/*
														
 
															-	 * We can walk it lock-free, because entries only get added
														
 
															-	 * to the hash:
														
 
															+	 * Important for check_no_collision().
														
 
															 	 */
														
 
															-	hlist_for_each_entry_rcu(chain, hash_head, entry) {
														
 
															-		if (chain->chain_key == chain_key) {
														
 
															-cache_hit:
														
 
															-			debug_atomic_inc(chain_lookup_hits);
														
 
															-			if (!check_no_collision(curr, hlock, chain))
														
 
															-				return 0;
														
 
															-
														
 
															-			if (very_verbose(class))
														
 
															-				printk("\nhash chain already cached, key: "
														
 
															-					"%016Lx tail class: [%p] %s\n",
														
 
															-					(unsigned long long)chain_key,
														
 
															-					class->key, class->name);
														
 
															+	else {
														
 
															+		if (!debug_locks_off_graph_unlock())
														
 
															 			return 0;
														
 
															-		}
														
 
															+
														
 
															+		print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
														
 
															+		dump_stack();
														
 
															+		return 0;
														
 
															 	}
														
 
															-	if (very_verbose(class))
														
 
															-		printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n",
														
 
															-			(unsigned long long)chain_key, class->key, class->name);
														
 
															+#endif
														
 
															+
														
 
															+	hlist_add_head_rcu(&chain->entry, hash_head);
														
 
															+	debug_atomic_inc(chain_lookup_misses);
														
 
															+	inc_chains();
														
 
															+
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Adds a dependency chain into chain hashtable. And must be called with
														
 
															+ * graph_lock held.
														
 
															+ *
														
 
															+ * Return 0 if fail, and graph_lock is released.
														
 
															+ * Return 1 if succeed, with graph_lock held.
														
 
															+ */
														
 
															+static inline int add_chain_cache(struct task_struct *curr,
														
 
															+				  struct held_lock *hlock,
														
 
															+				  u64 chain_key)
														
 
															+{
														
 
															+	struct lock_class *class = hlock_class(hlock);
														
 
															+	struct hlist_head *hash_head = chainhashentry(chain_key);
														
 
															+	struct lock_chain *chain;
														
 
															+	int i, j;
														
 
															+
														
 
															 	/*
														
 
															 	 * Allocate a new chain entry from the static array, and add
														
 
															 	 * it to the hash:
														
 
															 	 */
														
 
															-	if (!graph_lock())
														
 
															-		return 0;
														
 
															+
														
 
															 	/*
														
 
															-	 * We have to walk the chain again locked - to avoid duplicates:
														
 
															+	 * We might need to take the graph lock, ensure we've got IRQs
														
 
															+	 * disabled to make this an IRQ-safe lock.. for recursion reasons
														
 
															+	 * lockdep won't complain about its own locking errors.
														
 
															 	 */
														
 
															-	hlist_for_each_entry(chain, hash_head, entry) {
														
 
															-		if (chain->chain_key == chain_key) {
														
 
															-			graph_unlock();
														
 
															-			goto cache_hit;
														
 
															-		}
														
 
															-	}
														
 
															+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
														
 
															+		return 0;
														
 
															+
														
 
															 	if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
														
 
															 		if (!debug_locks_off_graph_unlock())
														
 
															 			return 0;
														
@@ -2235,6 +2351,78 @@ cache_hit:
 
															 	return 1;
														
 
															 }
														
 
															+/*
														
 
															+ * Look up a dependency chain.
														
 
															+ */
														
 
															+static inline struct lock_chain *lookup_chain_cache(u64 chain_key)
														
 
															+{
														
 
															+	struct hlist_head *hash_head = chainhashentry(chain_key);
														
 
															+	struct lock_chain *chain;
														
 
															+
														
 
															+	/*
														
 
															+	 * We can walk it lock-free, because entries only get added
														
 
															+	 * to the hash:
														
 
															+	 */
														
 
															+	hlist_for_each_entry_rcu(chain, hash_head, entry) {
														
 
															+		if (chain->chain_key == chain_key) {
														
 
															+			debug_atomic_inc(chain_lookup_hits);
														
 
															+			return chain;
														
 
															+		}
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * If the key is not present yet in dependency chain cache then
														
 
															+ * add it and return 1 - in this case the new dependency chain is
														
 
															+ * validated. If the key is already hashed, return 0.
														
 
															+ * (On return with 1 graph_lock is held.)
														
 
															+ */
														
 
															+static inline int lookup_chain_cache_add(struct task_struct *curr,
														
 
															+					 struct held_lock *hlock,
														
 
															+					 u64 chain_key)
														
 
															+{
														
 
															+	struct lock_class *class = hlock_class(hlock);
														
 
															+	struct lock_chain *chain = lookup_chain_cache(chain_key);
														
 
															+
														
 
															+	if (chain) {
														
 
															+cache_hit:
														
 
															+		if (!check_no_collision(curr, hlock, chain))
														
 
															+			return 0;
														
 
															+
														
 
															+		if (very_verbose(class)) {
														
 
															+			printk("\nhash chain already cached, key: "
														
 
															+					"%016Lx tail class: [%p] %s\n",
														
 
															+					(unsigned long long)chain_key,
														
 
															+					class->key, class->name);
														
 
															+		}
														
 
															+
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	if (very_verbose(class)) {
														
 
															+		printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n",
														
 
															+			(unsigned long long)chain_key, class->key, class->name);
														
 
															+	}
														
 
															+
														
 
															+	if (!graph_lock())
														
 
															+		return 0;
														
 
															+
														
 
															+	/*
														
 
															+	 * We have to walk the chain again locked - to avoid duplicates:
														
 
															+	 */
														
 
															+	chain = lookup_chain_cache(chain_key);
														
 
															+	if (chain) {
														
 
															+		graph_unlock();
														
 
															+		goto cache_hit;
														
 
															+	}
														
 
															+
														
 
															+	if (!add_chain_cache(curr, hlock, chain_key))
														
 
															+		return 0;
														
 
															+
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															 static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
														
 
															 		struct held_lock *hlock, int chain_head, u64 chain_key)
														
 
															 {
														
@@ -2245,11 +2433,11 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
 
															 	 *
														
 
															 	 * We look up the chain_key and do the O(N^2) check and update of
														
 
															 	 * the dependencies only if this is a new dependency chain.
														
 
															-	 * (If lookup_chain_cache() returns with 1 it acquires
														
 
															+	 * (If lookup_chain_cache_add() return with 1 it acquires
														
 
															 	 * graph_lock for us)
														
 
															 	 */
														
 
															 	if (!hlock->trylock && hlock->check &&
														
 
															-	    lookup_chain_cache(curr, hlock, chain_key)) {
														
 
															+	    lookup_chain_cache_add(curr, hlock, chain_key)) {
														
 
															 		/*
														
 
															 		 * Check whether last held lock:
														
 
															 		 *
														
@@ -2277,14 +2465,17 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
 
															 		 * Add dependency only if this lock is not the head
														
 
															 		 * of the chain, and if it's not a secondary read-lock:
														
 
															 		 */
														
 
															-		if (!chain_head && ret != 2)
														
 
															+		if (!chain_head && ret != 2) {
														
 
															 			if (!check_prevs_add(curr, hlock))
														
 
															 				return 0;
														
 
															+		}
														
 
															+
														
 
															 		graph_unlock();
														
 
															-	} else
														
 
															-		/* after lookup_chain_cache(): */
														
 
															+	} else {
														
 
															+		/* after lookup_chain_cache_add(): */
														
 
															 		if (unlikely(!debug_locks))
														
 
															 			return 0;
														
 
															+	}
														
 
															 	return 1;
														
 
															 }
														
@@ -2567,14 +2758,6 @@ static int SOFTIRQ_verbose(struct lock_class *class)
 
															 	return 0;
														
 
															 }
														
 
															-static int RECLAIM_FS_verbose(struct lock_class *class)
														
 
															-{
														
 
															-#if RECLAIM_VERBOSE
														
 
															-	return class_filter(class);
														
 
															-#endif
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															 #define STRICT_READ_CHECKS	1
														
 
															 static int (*state_verbose_f[])(struct lock_class *class) = {
														
@@ -2870,57 +3053,6 @@ void trace_softirqs_off(unsigned long ip)
 
															 		debug_atomic_inc(redundant_softirqs_off);
														
 
															 }
														
 
															-static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
														
 
															-{
														
 
															-	struct task_struct *curr = current;
														
 
															-
														
 
															-	if (unlikely(!debug_locks))
														
 
															-		return;
														
 
															-
														
 
															-	gfp_mask = current_gfp_context(gfp_mask);
														
 
															-
														
 
															-	/* no reclaim without waiting on it */
														
 
															-	if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
														
 
															-		return;
														
 
															-
														
 
															-	/* this guy won't enter reclaim */
														
 
															-	if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
														
 
															-		return;
														
 
															-
														
 
															-	/* We're only interested __GFP_FS allocations for now */
														
 
															-	if (!(gfp_mask & __GFP_FS) || (curr->flags & PF_MEMALLOC_NOFS))
														
 
															-		return;
														
 
															-
														
 
															-	/*
														
 
															-	 * Oi! Can't be having __GFP_FS allocations with IRQs disabled.
														
 
															-	 */
														
 
															-	if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)))
														
 
															-		return;
														
 
															-
														
 
															-	/* Disable lockdep if explicitly requested */
														
 
															-	if (gfp_mask & __GFP_NOLOCKDEP)
														
 
															-		return;
														
 
															-
														
 
															-	mark_held_locks(curr, RECLAIM_FS);
														
 
															-}
														
 
															-
														
 
															-static void check_flags(unsigned long flags);
														
 
															-
														
 
															-void lockdep_trace_alloc(gfp_t gfp_mask)
														
 
															-{
														
 
															-	unsigned long flags;
														
 
															-
														
 
															-	if (unlikely(current->lockdep_recursion))
														
 
															-		return;
														
 
															-
														
 
															-	raw_local_irq_save(flags);
														
 
															-	check_flags(flags);
														
 
															-	current->lockdep_recursion = 1;
														
 
															-	__lockdep_trace_alloc(gfp_mask, flags);
														
 
															-	current->lockdep_recursion = 0;
														
 
															-	raw_local_irq_restore(flags);
														
 
															-}
														
 
															-
														
 
															 static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
														
 
															 {
														
 
															 	/*
														
@@ -2966,22 +3098,6 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
 
															 		}
														
 
															 	}
														
 
															-	/*
														
 
															-	 * We reuse the irq context infrastructure more broadly as a general
														
 
															-	 * context checking code. This tests GFP_FS recursion (a lock taken
														
 
															-	 * during reclaim for a GFP_FS allocation is held over a GFP_FS
														
 
															-	 * allocation).
														
 
															-	 */
														
 
															-	if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) {
														
 
															-		if (hlock->read) {
														
 
															-			if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ))
														
 
															-					return 0;
														
 
															-		} else {
														
 
															-			if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS))
														
 
															-					return 0;
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															 	return 1;
														
 
															 }
														
@@ -3040,10 +3156,6 @@ static inline int separate_irq_context(struct task_struct *curr,
 
															 	return 0;
														
 
															 }
														
 
															-void lockdep_trace_alloc(gfp_t gfp_mask)
														
 
															-{
														
 
															-}
														
 
															-
														
 
															 #endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
														
 
															 /*
														
@@ -3116,7 +3228,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
 
															 /*
														
 
															  * Initialize a lock instance's lock-class mapping info:
														
 
															  */
														
 
															-void lockdep_init_map(struct lockdep_map *lock, const char *name,
														
 
															+static void __lockdep_init_map(struct lockdep_map *lock, const char *name,
														
 
															 		      struct lock_class_key *key, int subclass)
														
 
															 {
														
 
															 	int i;
														
@@ -3174,8 +3286,25 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
 
															 		raw_local_irq_restore(flags);
														
 
															 	}
														
 
															 }
														
 
															+
														
 
															+void lockdep_init_map(struct lockdep_map *lock, const char *name,
														
 
															+		      struct lock_class_key *key, int subclass)
														
 
															+{
														
 
															+	cross_init(lock, 0);
														
 
															+	__lockdep_init_map(lock, name, key, subclass);
														
 
															+}
														
 
															 EXPORT_SYMBOL_GPL(lockdep_init_map);
														
 
															+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
														
 
															+void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name,
														
 
															+		      struct lock_class_key *key, int subclass)
														
 
															+{
														
 
															+	cross_init(lock, 1);
														
 
															+	__lockdep_init_map(lock, name, key, subclass);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock);
														
 
															+#endif
														
 
															+
														
 
															 struct lock_class_key __lockdep_no_validate__;
														
 
															 EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
														
@@ -3231,6 +3360,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 
															 	int chain_head = 0;
														
 
															 	int class_idx;
														
 
															 	u64 chain_key;
														
 
															+	int ret;
														
 
															 	if (unlikely(!debug_locks))
														
 
															 		return 0;
														
@@ -3279,7 +3409,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 
															 	class_idx = class - lock_classes + 1;
														
 
															-	if (depth) {
														
 
															+	/* TODO: nest_lock is not implemented for crosslock yet. */
														
 
															+	if (depth && !cross_lock(lock)) {
														
 
															 		hlock = curr->held_locks + depth - 1;
														
 
															 		if (hlock->class_idx == class_idx && nest_lock) {
														
 
															 			if (hlock->references) {
														
@@ -3367,6 +3498,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 
															 	if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
														
 
															 		return 0;
														
 
															+	ret = lock_acquire_crosslock(hlock);
														
 
															+	/*
														
 
															+	 * 2 means normal acquire operations are needed. Otherwise, it's
														
 
															+	 * ok just to return with '0:fail, 1:success'.
														
 
															+	 */
														
 
															+	if (ret != 2)
														
 
															+		return ret;
														
 
															+
														
 
															 	curr->curr_chain_key = chain_key;
														
 
															 	curr->lockdep_depth++;
														
 
															 	check_chain_key(curr);
														
@@ -3604,11 +3743,19 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
 
															 	struct task_struct *curr = current;
														
 
															 	struct held_lock *hlock;
														
 
															 	unsigned int depth;
														
 
															-	int i;
														
 
															+	int ret, i;
														
 
															 	if (unlikely(!debug_locks))
														
 
															 		return 0;
														
 
															+	ret = lock_release_crosslock(lock);
														
 
															+	/*
														
 
															+	 * 2 means normal release operations are needed. Otherwise, it's
														
 
															+	 * ok just to return with '0:fail, 1:success'.
														
 
															+	 */
														
 
															+	if (ret != 2)
														
 
															+		return ret;
														
 
															+
														
 
															 	depth = curr->lockdep_depth;
														
 
															 	/*
														
 
															 	 * So we're all set to release this lock.. wait what lock? We don't
														
@@ -3952,18 +4099,6 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(lock_unpin_lock);
														
 
															-void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
														
 
															-{
														
 
															-	current->lockdep_reclaim_gfp = current_gfp_context(gfp_mask);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(lockdep_set_current_reclaim_state);
														
 
															-
														
 
															-void lockdep_clear_current_reclaim_state(void)
														
 
															-{
														
 
															-	current->lockdep_reclaim_gfp = 0;
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(lockdep_clear_current_reclaim_state);
														
 
															-
														
 
															 #ifdef CONFIG_LOCK_STAT
														
 
															 static int
														
 
															 print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
														
@@ -4484,6 +4619,12 @@ asmlinkage __visible void lockdep_sys_exit(void)
 
															 				curr->comm, curr->pid);
														
 
															 		lockdep_print_held_locks(curr);
														
 
															 	}
														
 
															+
														
 
															+	/*
														
 
															+	 * The lock history for each syscall should be independent. So wipe the
														
 
															+	 * slate clean on return to userspace.
														
 
															+	 */
														
 
															+	lockdep_invariant_state(false);
														
 
															 }
														
 
															 void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
														
@@ -4532,3 +4673,488 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 
															 	dump_stack();
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
														
 
															+
														
 
															+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
														
 
															+
														
 
															+/*
														
 
															+ * Crossrelease works by recording a lock history for each thread and
														
 
															+ * connecting those historic locks that were taken after the
														
 
															+ * wait_for_completion() in the complete() context.
														
 
															+ *
														
 
															+ * Task-A				Task-B
														
 
															+ *
														
 
															+ *					mutex_lock(&A);
														
 
															+ *					mutex_unlock(&A);
														
 
															+ *
														
 
															+ * wait_for_completion(&C);
														
 
															+ *   lock_acquire_crosslock();
														
 
															+ *     atomic_inc_return(&cross_gen_id);
														
 
															+ *                                |
														
 
															+ *				  |	mutex_lock(&B);
														
 
															+ *				  |	mutex_unlock(&B);
														
 
															+ *                                |
														
 
															+ *				  |	complete(&C);
														
 
															+ *				  `--	  lock_commit_crosslock();
														
 
															+ *
														
 
															+ * Which will then add a dependency between B and C.
														
 
															+ */
														
 
															+
														
 
															+#define xhlock(i)         (current->xhlocks[(i) % MAX_XHLOCKS_NR])
														
 
															+
														
 
															+/*
														
 
															+ * Whenever a crosslock is held, cross_gen_id will be increased.
														
 
															+ */
														
 
															+static atomic_t cross_gen_id; /* Can be wrapped */
														
 
															+
														
 
															+/*
														
 
															+ * Make an entry of the ring buffer invalid.
														
 
															+ */
														
 
															+static inline void invalidate_xhlock(struct hist_lock *xhlock)
														
 
															+{
														
 
															+	/*
														
 
															+	 * Normally, xhlock->hlock.instance must be !NULL.
														
 
															+	 */
														
 
															+	xhlock->hlock.instance = NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Lock history stacks; we have 2 nested lock history stacks:
														
 
															+ *
														
 
															+ *   HARD(IRQ)
														
 
															+ *   SOFT(IRQ)
														
 
															+ *
														
 
															+ * The thing is that once we complete a HARD/SOFT IRQ the future task locks
														
 
															+ * should not depend on any of the locks observed while running the IRQ.  So
														
 
															+ * what we do is rewind the history buffer and erase all our knowledge of that
														
 
															+ * temporal event.
														
 
															+ */
														
 
															+
														
 
															+void crossrelease_hist_start(enum xhlock_context_t c)
														
 
															+{
														
 
															+	struct task_struct *cur = current;
														
 
															+
														
 
															+	if (!cur->xhlocks)
														
 
															+		return;
														
 
															+
														
 
															+	cur->xhlock_idx_hist[c] = cur->xhlock_idx;
														
 
															+	cur->hist_id_save[c]    = cur->hist_id;
														
 
															+}
														
 
															+
														
 
															+void crossrelease_hist_end(enum xhlock_context_t c)
														
 
															+{
														
 
															+	struct task_struct *cur = current;
														
 
															+
														
 
															+	if (cur->xhlocks) {
														
 
															+		unsigned int idx = cur->xhlock_idx_hist[c];
														
 
															+		struct hist_lock *h = &xhlock(idx);
														
 
															+
														
 
															+		cur->xhlock_idx = idx;
														
 
															+
														
 
															+		/* Check if the ring was overwritten. */
														
 
															+		if (h->hist_id != cur->hist_id_save[c])
														
 
															+			invalidate_xhlock(h);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * lockdep_invariant_state() is used to annotate independence inside a task, to
														
 
															+ * make one task look like multiple independent 'tasks'.
														
 
															+ *
														
 
															+ * Take for instance workqueues; each work is independent of the last. The
														
 
															+ * completion of a future work does not depend on the completion of a past work
														
 
															+ * (in general). Therefore we must not carry that (lock) dependency across
														
 
															+ * works.
														
 
															+ *
														
 
															+ * This is true for many things; pretty much all kthreads fall into this
														
 
															+ * pattern, where they have an invariant state and future completions do not
														
 
															+ * depend on past completions. Its just that since they all have the 'same'
														
 
															+ * form -- the kthread does the same over and over -- it doesn't typically
														
 
															+ * matter.
														
 
															+ *
														
 
															+ * The same is true for system-calls, once a system call is completed (we've
														
 
															+ * returned to userspace) the next system call does not depend on the lock
														
 
															+ * history of the previous system call.
														
 
															+ *
														
 
															+ * They key property for independence, this invariant state, is that it must be
														
 
															+ * a point where we hold no locks and have no history. Because if we were to
														
 
															+ * hold locks, the restore at _end() would not necessarily recover it's history
														
 
															+ * entry. Similarly, independence per-definition means it does not depend on
														
 
															+ * prior state.
														
 
															+ */
														
 
															+void lockdep_invariant_state(bool force)
														
 
															+{
														
 
															+	/*
														
 
															+	 * We call this at an invariant point, no current state, no history.
														
 
															+	 * Verify the former, enforce the latter.
														
 
															+	 */
														
 
															+	WARN_ON_ONCE(!force && current->lockdep_depth);
														
 
															+	invalidate_xhlock(&xhlock(current->xhlock_idx));
														
 
															+}
														
 
															+
														
 
															+static int cross_lock(struct lockdep_map *lock)
														
 
															+{
														
 
															+	return lock ? lock->cross : 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * This is needed to decide the relationship between wrapable variables.
														
 
															+ */
														
 
															+static inline int before(unsigned int a, unsigned int b)
														
 
															+{
														
 
															+	return (int)(a - b) < 0;
														
 
															+}
														
 
															+
														
 
															+static inline struct lock_class *xhlock_class(struct hist_lock *xhlock)
														
 
															+{
														
 
															+	return hlock_class(&xhlock->hlock);
														
 
															+}
														
 
															+
														
 
															+static inline struct lock_class *xlock_class(struct cross_lock *xlock)
														
 
															+{
														
 
															+	return hlock_class(&xlock->hlock);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Should we check a dependency with previous one?
														
 
															+ */
														
 
															+static inline int depend_before(struct held_lock *hlock)
														
 
															+{
														
 
															+	return hlock->read != 2 && hlock->check && !hlock->trylock;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Should we check a dependency with next one?
														
 
															+ */
														
 
															+static inline int depend_after(struct held_lock *hlock)
														
 
															+{
														
 
															+	return hlock->read != 2 && hlock->check;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Check if the xhlock is valid, which would be false if,
														
 
															+ *
														
 
															+ *    1. Has not used after initializaion yet.
														
 
															+ *    2. Got invalidated.
														
 
															+ *
														
 
															+ * Remind hist_lock is implemented as a ring buffer.
														
 
															+ */
														
 
															+static inline int xhlock_valid(struct hist_lock *xhlock)
														
 
															+{
														
 
															+	/*
														
 
															+	 * xhlock->hlock.instance must be !NULL.
														
 
															+	 */
														
 
															+	return !!xhlock->hlock.instance;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Record a hist_lock entry.
														
 
															+ *
														
 
															+ * Irq disable is only required.
														
 
															+ */
														
 
															+static void add_xhlock(struct held_lock *hlock)
														
 
															+{
														
 
															+	unsigned int idx = ++current->xhlock_idx;
														
 
															+	struct hist_lock *xhlock = &xhlock(idx);
														
 
															+
														
 
															+#ifdef CONFIG_DEBUG_LOCKDEP
														
 
															+	/*
														
 
															+	 * This can be done locklessly because they are all task-local
														
 
															+	 * state, we must however ensure IRQs are disabled.
														
 
															+	 */
														
 
															+	WARN_ON_ONCE(!irqs_disabled());
														
 
															+#endif
														
 
															+
														
 
															+	/* Initialize hist_lock's members */
														
 
															+	xhlock->hlock = *hlock;
														
 
															+	xhlock->hist_id = ++current->hist_id;
														
 
															+
														
 
															+	xhlock->trace.nr_entries = 0;
														
 
															+	xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES;
														
 
															+	xhlock->trace.entries = xhlock->trace_entries;
														
 
															+	xhlock->trace.skip = 3;
														
 
															+	save_stack_trace(&xhlock->trace);
														
 
															+}
														
 
															+
														
 
															+static inline int same_context_xhlock(struct hist_lock *xhlock)
														
 
															+{
														
 
															+	return xhlock->hlock.irq_context == task_irq_context(current);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * This should be lockless as far as possible because this would be
														
 
															+ * called very frequently.
														
 
															+ */
														
 
															+static void check_add_xhlock(struct held_lock *hlock)
														
 
															+{
														
 
															+	/*
														
 
															+	 * Record a hist_lock, only in case that acquisitions ahead
														
 
															+	 * could depend on the held_lock. For example, if the held_lock
														
 
															+	 * is trylock then acquisitions ahead never depends on that.
														
 
															+	 * In that case, we don't need to record it. Just return.
														
 
															+	 */
														
 
															+	if (!current->xhlocks || !depend_before(hlock))
														
 
															+		return;
														
 
															+
														
 
															+	add_xhlock(hlock);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * For crosslock.
														
 
															+ */
														
 
															+static int add_xlock(struct held_lock *hlock)
														
 
															+{
														
 
															+	struct cross_lock *xlock;
														
 
															+	unsigned int gen_id;
														
 
															+
														
 
															+	if (!graph_lock())
														
 
															+		return 0;
														
 
															+
														
 
															+	xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock;
														
 
															+
														
 
															+	/*
														
 
															+	 * When acquisitions for a crosslock are overlapped, we use
														
 
															+	 * nr_acquire to perform commit for them, based on cross_gen_id
														
 
															+	 * of the first acquisition, which allows to add additional
														
 
															+	 * dependencies.
														
 
															+	 *
														
 
															+	 * Moreover, when no acquisition of a crosslock is in progress,
														
 
															+	 * we should not perform commit because the lock might not exist
														
 
															+	 * any more, which might cause incorrect memory access. So we
														
 
															+	 * have to track the number of acquisitions of a crosslock.
														
 
															+	 *
														
 
															+	 * depend_after() is necessary to initialize only the first
														
 
															+	 * valid xlock so that the xlock can be used on its commit.
														
 
															+	 */
														
 
															+	if (xlock->nr_acquire++ && depend_after(&xlock->hlock))
														
 
															+		goto unlock;
														
 
															+
														
 
															+	gen_id = (unsigned int)atomic_inc_return(&cross_gen_id);
														
 
															+	xlock->hlock = *hlock;
														
 
															+	xlock->hlock.gen_id = gen_id;
														
 
															+unlock:
														
 
															+	graph_unlock();
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Called for both normal and crosslock acquires. Normal locks will be
														
 
															+ * pushed on the hist_lock queue. Cross locks will record state and
														
 
															+ * stop regular lock_acquire() to avoid being placed on the held_lock
														
 
															+ * stack.
														
 
															+ *
														
 
															+ * Return: 0 - failure;
														
 
															+ *         1 - crosslock, done;
														
 
															+ *         2 - normal lock, continue to held_lock[] ops.
														
 
															+ */
														
 
															+static int lock_acquire_crosslock(struct held_lock *hlock)
														
 
															+{
														
 
															+	/*
														
 
															+	 *	CONTEXT 1		CONTEXT 2
														
 
															+	 *	---------		---------
														
 
															+	 *	lock A (cross)
														
 
															+	 *	X = atomic_inc_return(&cross_gen_id)
														
 
															+	 *	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
														
 
															+	 *				Y = atomic_read_acquire(&cross_gen_id)
														
 
															+	 *				lock B
														
 
															+	 *
														
 
															+	 * atomic_read_acquire() is for ordering between A and B,
														
 
															+	 * IOW, A happens before B, when CONTEXT 2 see Y >= X.
														
 
															+	 *
														
 
															+	 * Pairs with atomic_inc_return() in add_xlock().
														
 
															+	 */
														
 
															+	hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id);
														
 
															+
														
 
															+	if (cross_lock(hlock->instance))
														
 
															+		return add_xlock(hlock);
														
 
															+
														
 
															+	check_add_xhlock(hlock);
														
 
															+	return 2;
														
 
															+}
														
 
															+
														
 
															+static int copy_trace(struct stack_trace *trace)
														
 
															+{
														
 
															+	unsigned long *buf = stack_trace + nr_stack_trace_entries;
														
 
															+	unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
														
 
															+	unsigned int nr = min(max_nr, trace->nr_entries);
														
 
															+
														
 
															+	trace->nr_entries = nr;
														
 
															+	memcpy(buf, trace->entries, nr * sizeof(trace->entries[0]));
														
 
															+	trace->entries = buf;
														
 
															+	nr_stack_trace_entries += nr;
														
 
															+
														
 
															+	if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) {
														
 
															+		if (!debug_locks_off_graph_unlock())
														
 
															+			return 0;
														
 
															+
														
 
															+		print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
														
 
															+		dump_stack();
														
 
															+
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock)
														
 
															+{
														
 
															+	unsigned int xid, pid;
														
 
															+	u64 chain_key;
														
 
															+
														
 
															+	xid = xlock_class(xlock) - lock_classes;
														
 
															+	chain_key = iterate_chain_key((u64)0, xid);
														
 
															+	pid = xhlock_class(xhlock) - lock_classes;
														
 
															+	chain_key = iterate_chain_key(chain_key, pid);
														
 
															+
														
 
															+	if (lookup_chain_cache(chain_key))
														
 
															+		return 1;
														
 
															+
														
 
															+	if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context,
														
 
															+				chain_key))
														
 
															+		return 0;
														
 
															+
														
 
															+	if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1,
														
 
															+			    &xhlock->trace, copy_trace))
														
 
															+		return 0;
														
 
															+
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+static void commit_xhlocks(struct cross_lock *xlock)
														
 
															+{
														
 
															+	unsigned int cur = current->xhlock_idx;
														
 
															+	unsigned int prev_hist_id = xhlock(cur).hist_id;
														
 
															+	unsigned int i;
														
 
															+
														
 
															+	if (!graph_lock())
														
 
															+		return;
														
 
															+
														
 
															+	if (xlock->nr_acquire) {
														
 
															+		for (i = 0; i < MAX_XHLOCKS_NR; i++) {
														
 
															+			struct hist_lock *xhlock = &xhlock(cur - i);
														
 
															+
														
 
															+			if (!xhlock_valid(xhlock))
														
 
															+				break;
														
 
															+
														
 
															+			if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id))
														
 
															+				break;
														
 
															+
														
 
															+			if (!same_context_xhlock(xhlock))
														
 
															+				break;
														
 
															+
														
 
															+			/*
														
 
															+			 * Filter out the cases where the ring buffer was
														
 
															+			 * overwritten and the current entry has a bigger
														
 
															+			 * hist_id than the previous one, which is impossible
														
 
															+			 * otherwise:
														
 
															+			 */
														
 
															+			if (unlikely(before(prev_hist_id, xhlock->hist_id)))
														
 
															+				break;
														
 
															+
														
 
															+			prev_hist_id = xhlock->hist_id;
														
 
															+
														
 
															+			/*
														
 
															+			 * commit_xhlock() returns 0 with graph_lock already
														
 
															+			 * released if fail.
														
 
															+			 */
														
 
															+			if (!commit_xhlock(xlock, xhlock))
														
 
															+				return;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	graph_unlock();
														
 
															+}
														
 
															+
														
 
															+void lock_commit_crosslock(struct lockdep_map *lock)
														
 
															+{
														
 
															+	struct cross_lock *xlock;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	if (unlikely(!debug_locks || current->lockdep_recursion))
														
 
															+		return;
														
 
															+
														
 
															+	if (!current->xhlocks)
														
 
															+		return;
														
 
															+
														
 
															+	/*
														
 
															+	 * Do commit hist_locks with the cross_lock, only in case that
														
 
															+	 * the cross_lock could depend on acquisitions after that.
														
 
															+	 *
														
 
															+	 * For example, if the cross_lock does not have the 'check' flag
														
 
															+	 * then we don't need to check dependencies and commit for that.
														
 
															+	 * Just skip it. In that case, of course, the cross_lock does
														
 
															+	 * not depend on acquisitions ahead, either.
														
 
															+	 *
														
 
															+	 * WARNING: Don't do that in add_xlock() in advance. When an
														
 
															+	 * acquisition context is different from the commit context,
														
 
															+	 * invalid(skipped) cross_lock might be accessed.
														
 
															+	 */
														
 
															+	if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock))
														
 
															+		return;
														
 
															+
														
 
															+	raw_local_irq_save(flags);
														
 
															+	check_flags(flags);
														
 
															+	current->lockdep_recursion = 1;
														
 
															+	xlock = &((struct lockdep_map_cross *)lock)->xlock;
														
 
															+	commit_xhlocks(xlock);
														
 
															+	current->lockdep_recursion = 0;
														
 
															+	raw_local_irq_restore(flags);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(lock_commit_crosslock);
														
 
															+
														
 
															+/*
														
 
															+ * Return: 0 - failure;
														
 
															+ *         1 - crosslock, done;
														
 
															+ *         2 - normal lock, continue to held_lock[] ops.
														
 
															+ */
														
 
															+static int lock_release_crosslock(struct lockdep_map *lock)
														
 
															+{
														
 
															+	if (cross_lock(lock)) {
														
 
															+		if (!graph_lock())
														
 
															+			return 0;
														
 
															+		((struct lockdep_map_cross *)lock)->xlock.nr_acquire--;
														
 
															+		graph_unlock();
														
 
															+		return 1;
														
 
															+	}
														
 
															+	return 2;
														
 
															+}
														
 
															+
														
 
															+static void cross_init(struct lockdep_map *lock, int cross)
														
 
															+{
														
 
															+	if (cross)
														
 
															+		((struct lockdep_map_cross *)lock)->xlock.nr_acquire = 0;
														
 
															+
														
 
															+	lock->cross = cross;
														
 
															+
														
 
															+	/*
														
 
															+	 * Crossrelease assumes that the ring buffer size of xhlocks
														
 
															+	 * is aligned with power of 2. So force it on build.
														
 
															+	 */
														
 
															+	BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1));
														
 
															+}
														
 
															+
														
 
															+void lockdep_init_task(struct task_struct *task)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	task->xhlock_idx = UINT_MAX;
														
 
															+	task->hist_id = 0;
														
 
															+
														
 
															+	for (i = 0; i < XHLOCK_CTX_NR; i++) {
														
 
															+		task->xhlock_idx_hist[i] = UINT_MAX;
														
 
															+		task->hist_id_save[i] = 0;
														
 
															+	}
														
 
															+
														
 
															+	task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR,
														
 
															+				GFP_KERNEL);
														
 
															+}
														
 
															+
														
 
															+void lockdep_free_task(struct task_struct *task)
														
 
															+{
														
 
															+	if (task->xhlocks) {
														
 
															+		void *tmp = task->xhlocks;
														
 
															+		/* Diable crossrelease for current */
														
 
															+		task->xhlocks = NULL;
														
 
															+		kfree(tmp);
														
 
															+	}
														
 
															+}
														
 
															+#endif
														
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -143,6 +143,8 @@ struct lockdep_stats {
 
															 	int	redundant_softirqs_on;
														
 
															 	int	redundant_softirqs_off;
														
 
															 	int	nr_unused_locks;
														
 
															+	int	nr_redundant_checks;
														
 
															+	int	nr_redundant;
														
 
															 	int	nr_cyclic_checks;
														
 
															 	int	nr_cyclic_check_recursions;
														
 
															 	int	nr_find_usage_forwards_checks;
														
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -201,6 +201,10 @@ static void lockdep_stats_debug_show(struct seq_file *m)
 
															 		debug_atomic_read(chain_lookup_hits));
														
 
															 	seq_printf(m, " cyclic checks:                 %11llu\n",
														
 
															 		debug_atomic_read(nr_cyclic_checks));
														
 
															+	seq_printf(m, " redundant checks:              %11llu\n",
														
 
															+		debug_atomic_read(nr_redundant_checks));
														
 
															+	seq_printf(m, " redundant links:               %11llu\n",
														
 
															+		debug_atomic_read(nr_redundant));
														
 
															 	seq_printf(m, " find-mask forwards checks:     %11llu\n",
														
 
															 		debug_atomic_read(nr_find_usage_forwards_checks));
														
 
															 	seq_printf(m, " find-mask backwards checks:    %11llu\n",
														
--- a/kernel/locking/lockdep_states.h
+++ b/kernel/locking/lockdep_states.h
@@ -6,4 +6,3 @@
 
															  */
														
 
															 LOCKDEP_STATE(HARDIRQ)
														
 
															 LOCKDEP_STATE(SOFTIRQ)
														
 
															-LOCKDEP_STATE(RECLAIM_FS)
														
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -109,6 +109,19 @@ bool osq_lock(struct optimistic_spin_queue *lock)
 
															 	prev = decode_cpu(old);
														
 
															 	node->prev = prev;
														
 
															+
														
 
															+	/*
														
 
															+	 * osq_lock()			unqueue
														
 
															+	 *
														
 
															+	 * node->prev = prev		osq_wait_next()
														
 
															+	 * WMB				MB
														
 
															+	 * prev->next = node		next->prev = prev // unqueue-C
														
 
															+	 *
														
 
															+	 * Here 'node->prev' and 'next->prev' are the same variable and we need
														
 
															+	 * to ensure these stores happen in-order to avoid corrupting the list.
														
 
															+	 */
														
 
															+	smp_wmb();
														
 
															+
														
 
															 	WRITE_ONCE(prev->next, node);
														
 
															 	/*
														
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -72,7 +72,7 @@ static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
 
															 	struct __qspinlock *l = (void *)lock;
														
 
															 	if (!(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
														
 
															-	    (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0)) {
														
 
															+	    (cmpxchg_acquire(&l->locked, 0, _Q_LOCKED_VAL) == 0)) {
														
 
															 		qstat_inc(qstat_pv_lock_stealing, true);
														
 
															 		return true;
														
 
															 	}
														
@@ -101,16 +101,16 @@ static __always_inline void clear_pending(struct qspinlock *lock)
 
															 /*
														
 
															  * The pending bit check in pv_queued_spin_steal_lock() isn't a memory
														
 
															- * barrier. Therefore, an atomic cmpxchg() is used to acquire the lock
														
 
															- * just to be sure that it will get it.
														
 
															+ * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
														
 
															+ * lock just to be sure that it will get it.
														
 
															  */
														
 
															 static __always_inline int trylock_clear_pending(struct qspinlock *lock)
														
 
															 {
														
 
															 	struct __qspinlock *l = (void *)lock;
														
 
															 	return !READ_ONCE(l->locked) &&
														
 
															-	       (cmpxchg(&l->locked_pending, _Q_PENDING_VAL, _Q_LOCKED_VAL)
														
 
															-			== _Q_PENDING_VAL);
														
 
															+	       (cmpxchg_acquire(&l->locked_pending, _Q_PENDING_VAL,
														
 
															+				_Q_LOCKED_VAL) == _Q_PENDING_VAL);
														
 
															 }
														
 
															 #else /* _Q_PENDING_BITS == 8 */
														
 
															 static __always_inline void set_pending(struct qspinlock *lock)
														
@@ -138,7 +138,7 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
 
															 		 */
														
 
															 		old = val;
														
 
															 		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
														
 
															-		val = atomic_cmpxchg(&lock->val, old, new);
														
 
															+		val = atomic_cmpxchg_acquire(&lock->val, old, new);
														
 
															 		if (val == old)
														
 
															 			return 1;
														
@@ -362,8 +362,18 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 
															 	 * observe its next->locked value and advance itself.
														
 
															 	 *
														
 
															 	 * Matches with smp_store_mb() and cmpxchg() in pv_wait_node()
														
 
															+	 *
														
 
															+	 * The write to next->locked in arch_mcs_spin_unlock_contended()
														
 
															+	 * must be ordered before the read of pn->state in the cmpxchg()
														
 
															+	 * below for the code to work correctly. To guarantee full ordering
														
 
															+	 * irrespective of the success or failure of the cmpxchg(),
														
 
															+	 * a relaxed version with explicit barrier is used. The control
														
 
															+	 * dependency will order the reading of pn->state before any
														
 
															+	 * subsequent writes.
														
 
															 	 */
														
 
															-	if (cmpxchg(&pn->state, vcpu_halted, vcpu_hashed) != vcpu_halted)
														
 
															+	smp_mb__before_atomic();
														
 
															+	if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
														
 
															+	    != vcpu_halted)
														
 
															 		return;
														
 
															 	/*
														
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -40,6 +40,9 @@ struct rt_mutex_waiter {
 
															 /*
														
 
															  * Various helpers to access the waiters-tree:
														
 
															  */
														
 
															+
														
 
															+#ifdef CONFIG_RT_MUTEXES
														
 
															+
														
 
															 static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
														
 
															 {
														
 
															 	return !RB_EMPTY_ROOT(&lock->waiters);
														
@@ -69,6 +72,32 @@ task_top_pi_waiter(struct task_struct *p)
 
															 			pi_tree_entry);
														
 
															 }
														
 
															+#else
														
 
															+
														
 
															+static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
														
 
															+{
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static inline struct rt_mutex_waiter *
														
 
															+rt_mutex_top_waiter(struct rt_mutex *lock)
														
 
															+{
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static inline int task_has_pi_waiters(struct task_struct *p)
														
 
															+{
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static inline struct rt_mutex_waiter *
														
 
															+task_top_pi_waiter(struct task_struct *p)
														
 
															+{
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+#endif
														
 
															+
														
 
															 /*
														
 
															  * lock->owner state tracking:
														
 
															  */
														
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -126,7 +126,7 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem)
 
															 /*
														
 
															  * get a read lock on the semaphore
														
 
															  */
														
 
															-void __sched __down_read(struct rw_semaphore *sem)
														
 
															+int __sched __down_read_common(struct rw_semaphore *sem, int state)
														
 
															 {
														
 
															 	struct rwsem_waiter waiter;
														
 
															 	unsigned long flags;
														
@@ -140,8 +140,6 @@ void __sched __down_read(struct rw_semaphore *sem)
 
															 		goto out;
														
 
															 	}
														
 
															-	set_current_state(TASK_UNINTERRUPTIBLE);
														
 
															-
														
 
															 	/* set up my own style of waitqueue */
														
 
															 	waiter.task = current;
														
 
															 	waiter.type = RWSEM_WAITING_FOR_READ;
														
@@ -149,20 +147,41 @@ void __sched __down_read(struct rw_semaphore *sem)
 
															 	list_add_tail(&waiter.list, &sem->wait_list);
														
 
															-	/* we don't need to touch the semaphore struct anymore */
														
 
															-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
														
 
															-
														
 
															 	/* wait to be given the lock */
														
 
															 	for (;;) {
														
 
															 		if (!waiter.task)
														
 
															 			break;
														
 
															+		if (signal_pending_state(state, current))
														
 
															+			goto out_nolock;
														
 
															+		set_current_state(state);
														
 
															+		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
														
 
															 		schedule();
														
 
															-		set_current_state(TASK_UNINTERRUPTIBLE);
														
 
															+		raw_spin_lock_irqsave(&sem->wait_lock, flags);
														
 
															 	}
														
 
															-	__set_current_state(TASK_RUNNING);
														
 
															+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
														
 
															  out:
														
 
															-	;
														
 
															+	return 0;
														
 
															+
														
 
															+out_nolock:
														
 
															+	/*
														
 
															+	 * We didn't take the lock, so that there is a writer, which
														
 
															+	 * is owner or the first waiter of the sem. If it's a waiter,
														
 
															+	 * it will be woken by current owner. Not need to wake anybody.
														
 
															+	 */
														
 
															+	list_del(&waiter.list);
														
 
															+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
														
 
															+	return -EINTR;
														
 
															+}
														
 
															+
														
 
															+void __sched __down_read(struct rw_semaphore *sem)
														
 
															+{
														
 
															+	__down_read_common(sem, TASK_UNINTERRUPTIBLE);
														
 
															+}
														
 
															+
														
 
															+int __sched __down_read_killable(struct rw_semaphore *sem)
														
 
															+{
														
 
															+	return __down_read_common(sem, TASK_KILLABLE);
														
 
															 }
														
 
															 /*
														
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -221,8 +221,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 
															 /*
														
 
															  * Wait for the read lock to be granted
														
 
															  */
														
 
															-__visible
														
 
															-struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
														
 
															+static inline struct rw_semaphore __sched *
														
 
															+__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
														
 
															 {
														
 
															 	long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
														
 
															 	struct rwsem_waiter waiter;
														
@@ -255,17 +255,44 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 
															 	/* wait to be given the lock */
														
 
															 	while (true) {
														
 
															-		set_current_state(TASK_UNINTERRUPTIBLE);
														
 
															+		set_current_state(state);
														
 
															 		if (!waiter.task)
														
 
															 			break;
														
 
															+		if (signal_pending_state(state, current)) {
														
 
															+			raw_spin_lock_irq(&sem->wait_lock);
														
 
															+			if (waiter.task)
														
 
															+				goto out_nolock;
														
 
															+			raw_spin_unlock_irq(&sem->wait_lock);
														
 
															+			break;
														
 
															+		}
														
 
															 		schedule();
														
 
															 	}
														
 
															 	__set_current_state(TASK_RUNNING);
														
 
															 	return sem;
														
 
															+out_nolock:
														
 
															+	list_del(&waiter.list);
														
 
															+	if (list_empty(&sem->wait_list))
														
 
															+		atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
														
 
															+	raw_spin_unlock_irq(&sem->wait_lock);
														
 
															+	__set_current_state(TASK_RUNNING);
														
 
															+	return ERR_PTR(-EINTR);
														
 
															+}
														
 
															+
														
 
															+__visible struct rw_semaphore * __sched
														
 
															+rwsem_down_read_failed(struct rw_semaphore *sem)
														
 
															+{
														
 
															+	return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
														
 
															 }
														
 
															 EXPORT_SYMBOL(rwsem_down_read_failed);
														
 
															+__visible struct rw_semaphore * __sched
														
 
															+rwsem_down_read_failed_killable(struct rw_semaphore *sem)
														
 
															+{
														
 
															+	return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
														
 
															+}
														
 
															+EXPORT_SYMBOL(rwsem_down_read_failed_killable);
														
 
															+
														
 
															 /*
														
 
															  * This function must be called with the sem->wait_lock held to prevent
														
 
															  * race conditions between checking the rwsem wait list and setting the
														
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -26,6 +26,7 @@
 
															 #include <linux/nmi.h>
														
 
															 #include <linux/console.h>
														
 
															 #include <linux/bug.h>
														
 
															+#include <linux/ratelimit.h>
														
 
															 #define PANIC_TIMER_STEP 100
														
 
															 #define PANIC_BLINK_SPD 18
														
@@ -601,6 +602,17 @@ EXPORT_SYMBOL(__stack_chk_fail);
 
															 #endif
														
 
															+#ifdef CONFIG_ARCH_HAS_REFCOUNT
														
 
															+void refcount_error_report(struct pt_regs *regs, const char *err)
														
 
															+{
														
 
															+	WARN_RATELIMIT(1, "refcount_t %s at %pB in %s[%d], uid/euid: %u/%u\n",
														
 
															+		err, (void *)instruction_pointer(regs),
														
 
															+		current->comm, task_pid_nr(current),
														
 
															+		from_kuid_munged(&init_user_ns, current_uid()),
														
 
															+		from_kuid_munged(&init_user_ns, current_euid()));
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															 core_param(panic, panic_timeout, int, 0644);
														
 
															 core_param(pause_on_oops, pause_on_oops, int, 0644);
														
 
															 core_param(panic_on_warn, panic_on_warn, int, 0644);
														
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -32,6 +32,12 @@ void complete(struct completion *x)
 
															 	unsigned long flags;
														
 
															 	spin_lock_irqsave(&x->wait.lock, flags);
														
 
															+
														
 
															+	/*
														
 
															+	 * Perform commit of crossrelease here.
														
 
															+	 */
														
 
															+	complete_release_commit(x);
														
 
															+
														
 
															 	if (x->done != UINT_MAX)
														
 
															 		x->done++;
														
 
															 	__wake_up_locked(&x->wait, TASK_NORMAL, 1);
														
@@ -99,9 +105,14 @@ __wait_for_common(struct completion *x,
 
															 {
														
 
															 	might_sleep();
														
 
															+	complete_acquire(x);
														
 
															+
														
 
															 	spin_lock_irq(&x->wait.lock);
														
 
															 	timeout = do_wait_for_common(x, action, timeout, state);
														
 
															 	spin_unlock_irq(&x->wait.lock);
														
 
															+
														
 
															+	complete_release(x);
														
 
															+
														
 
															 	return timeout;
														
 
															 }
														
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1972,8 +1972,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 
															 	 * reordered with p->state check below. This pairs with mb() in
														
 
															 	 * set_current_state() the waiting thread does.
														
 
															 	 */
														
 
															-	smp_mb__before_spinlock();
														
 
															 	raw_spin_lock_irqsave(&p->pi_lock, flags);
														
 
															+	smp_mb__after_spinlock();
														
 
															 	if (!(p->state & state))
														
 
															 		goto out;
														
@@ -3296,8 +3296,8 @@ static void __sched notrace __schedule(bool preempt)
 
															 	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
														
 
															 	 * done by the caller to avoid the race with signal_wake_up().
														
 
															 	 */
														
 
															-	smp_mb__before_spinlock();
														
 
															 	rq_lock(rq, &rf);
														
 
															+	smp_mb__after_spinlock();
														
 
															 	/* Promote REQ to ACT */
														
 
															 	rq->clock_update_flags <<= 1;
														
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -769,7 +769,7 @@ struct rq {
 
															 #ifdef CONFIG_SCHED_HRTICK
														
 
															 #ifdef CONFIG_SMP
														
 
															 	int hrtick_csd_pending;
														
 
															-	struct call_single_data hrtick_csd;
														
 
															+	call_single_data_t hrtick_csd;
														
 
															 #endif
														
 
															 	struct hrtimer hrtick_timer;
														
 
															 #endif
														
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -33,9 +33,6 @@ void swake_up(struct swait_queue_head *q)
 
															 {
														
 
															 	unsigned long flags;
														
 
															-	if (!swait_active(q))
														
 
															-		return;
														
 
															-
														
 
															 	raw_spin_lock_irqsave(&q->lock, flags);
														
 
															 	swake_up_locked(q);
														
 
															 	raw_spin_unlock_irqrestore(&q->lock, flags);
														
@@ -51,9 +48,6 @@ void swake_up_all(struct swait_queue_head *q)
 
															 	struct swait_queue *curr;
														
 
															 	LIST_HEAD(tmp);
														
 
															-	if (!swait_active(q))
														
 
															-		return;
														
 
															-
														
 
															 	raw_spin_lock_irq(&q->lock);
														
 
															 	list_splice_init(&q->task_list, &tmp);
														
 
															 	while (!list_empty(&tmp)) {
														
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -28,7 +28,7 @@ enum {
 
															 };
														
 
															 struct call_function_data {
														
 
															-	struct call_single_data	__percpu *csd;
														
 
															+	call_single_data_t	__percpu *csd;
														
 
															 	cpumask_var_t		cpumask;
														
 
															 	cpumask_var_t		cpumask_ipi;
														
 
															 };
														
@@ -51,7 +51,7 @@ int smpcfd_prepare_cpu(unsigned int cpu)
 
															 		free_cpumask_var(cfd->cpumask);
														
 
															 		return -ENOMEM;
														
 
															 	}
														
 
															-	cfd->csd = alloc_percpu(struct call_single_data);
														
 
															+	cfd->csd = alloc_percpu(call_single_data_t);
														
 
															 	if (!cfd->csd) {
														
 
															 		free_cpumask_var(cfd->cpumask);
														
 
															 		free_cpumask_var(cfd->cpumask_ipi);
														
@@ -103,12 +103,12 @@ void __init call_function_init(void)
 
															  * previous function call. For multi-cpu calls its even more interesting
														
 
															  * as we'll have to ensure no other cpu is observing our csd.
														
 
															  */
														
 
															-static __always_inline void csd_lock_wait(struct call_single_data *csd)
														
 
															+static __always_inline void csd_lock_wait(call_single_data_t *csd)
														
 
															 {
														
 
															 	smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
														
 
															 }
														
 
															-static __always_inline void csd_lock(struct call_single_data *csd)
														
 
															+static __always_inline void csd_lock(call_single_data_t *csd)
														
 
															 {
														
 
															 	csd_lock_wait(csd);
														
 
															 	csd->flags |= CSD_FLAG_LOCK;
														
@@ -116,12 +116,12 @@ static __always_inline void csd_lock(struct call_single_data *csd)
 
															 	/*
														
 
															 	 * prevent CPU from reordering the above assignment
														
 
															 	 * to ->flags with any subsequent assignments to other
														
 
															-	 * fields of the specified call_single_data structure:
														
 
															+	 * fields of the specified call_single_data_t structure:
														
 
															 	 */
														
 
															 	smp_wmb();
														
 
															 }
														
 
															-static __always_inline void csd_unlock(struct call_single_data *csd)
														
 
															+static __always_inline void csd_unlock(call_single_data_t *csd)
														
 
															 {
														
 
															 	WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
														
@@ -131,14 +131,14 @@ static __always_inline void csd_unlock(struct call_single_data *csd)
 
															 	smp_store_release(&csd->flags, 0);
														
 
															 }
														
 
															-static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
														
 
															+static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
														
 
															 /*
														
 
															- * Insert a previously allocated call_single_data element
														
 
															+ * Insert a previously allocated call_single_data_t element
														
 
															  * for execution on the given CPU. data must already have
														
 
															  * ->func, ->info, and ->flags set.
														
 
															  */
														
 
															-static int generic_exec_single(int cpu, struct call_single_data *csd,
														
 
															+static int generic_exec_single(int cpu, call_single_data_t *csd,
														
 
															 			       smp_call_func_t func, void *info)
														
 
															 {
														
 
															 	if (cpu == smp_processor_id()) {
														
@@ -210,7 +210,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
 
															 {
														
 
															 	struct llist_head *head;
														
 
															 	struct llist_node *entry;
														
 
															-	struct call_single_data *csd, *csd_next;
														
 
															+	call_single_data_t *csd, *csd_next;
														
 
															 	static bool warned;
														
 
															 	WARN_ON(!irqs_disabled());
														
@@ -268,8 +268,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
 
															 int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
														
 
															 			     int wait)
														
 
															 {
														
 
															-	struct call_single_data *csd;
														
 
															-	struct call_single_data csd_stack = { .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS };
														
 
															+	call_single_data_t *csd;
														
 
															+	call_single_data_t csd_stack = {
														
 
															+		.flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS,
														
 
															+	};
														
 
															 	int this_cpu;
														
 
															 	int err;
														
@@ -321,7 +323,7 @@ EXPORT_SYMBOL(smp_call_function_single);
 
															  * NOTE: Be careful, there is unfortunately no current debugging facility to
														
 
															  * validate the correctness of this serialization.
														
 
															  */
														
 
															-int smp_call_function_single_async(int cpu, struct call_single_data *csd)
														
 
															+int smp_call_function_single_async(int cpu, call_single_data_t *csd)
														
 
															 {
														
 
															 	int err = 0;
														
@@ -444,7 +446,7 @@ void smp_call_function_many(const struct cpumask *mask,
 
															 	cpumask_clear(cfd->cpumask_ipi);
														
 
															 	for_each_cpu(cpu, cfd->cpumask) {
														
 
															-		struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
														
 
															+		call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
														
 
															 		csd_lock(csd);
														
 
															 		if (wait)
														
@@ -460,7 +462,7 @@ void smp_call_function_many(const struct cpumask *mask,
 
															 	if (wait) {
														
 
															 		for_each_cpu(cpu, cfd->cpumask) {
														
 
															-			struct call_single_data *csd;
														
 
															+			call_single_data_t *csd;
														
 
															 			csd = per_cpu_ptr(cfd->csd, cpu);
														
 
															 			csd_lock_wait(csd);
														
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -23,7 +23,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 
															 }
														
 
															 EXPORT_SYMBOL(smp_call_function_single);
														
 
															-int smp_call_function_single_async(int cpu, struct call_single_data *csd)
														
 
															+int smp_call_function_single_async(int cpu, call_single_data_t *csd)
														
 
															 {
														
 
															 	unsigned long flags;
														
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2091,8 +2091,30 @@ __acquires(&pool->lock)
 
															 	spin_unlock_irq(&pool->lock);
														
 
															-	lock_map_acquire_read(&pwq->wq->lockdep_map);
														
 
															+	lock_map_acquire(&pwq->wq->lockdep_map);
														
 
															 	lock_map_acquire(&lockdep_map);
														
 
															+	/*
														
 
															+	 * Strictly speaking we should mark the invariant state without holding
														
 
															+	 * any locks, that is, before these two lock_map_acquire()'s.
														
 
															+	 *
														
 
															+	 * However, that would result in:
														
 
															+	 *
														
 
															+	 *   A(W1)
														
 
															+	 *   WFC(C)
														
 
															+	 *		A(W1)
														
 
															+	 *		C(C)
														
 
															+	 *
														
 
															+	 * Which would create W1->C->W1 dependencies, even though there is no
														
 
															+	 * actual deadlock possible. There are two solutions, using a
														
 
															+	 * read-recursive acquire on the work(queue) 'locks', but this will then
														
 
															+	 * hit the lockdep limitation on recursive locks, or simply discard
														
 
															+	 * these locks.
														
 
															+	 *
														
 
															+	 * AFAICT there is no possible deadlock scenario between the
														
 
															+	 * flush_work() and complete() primitives (except for single-threaded
														
 
															+	 * workqueues), so hiding them isn't a problem.
														
 
															+	 */
														
 
															+	lockdep_invariant_state(true);
														
 
															 	trace_workqueue_execute_start(work);
														
 
															 	worker->current_func(work);
														
 
															 	/*
														
@@ -2474,7 +2496,16 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
 
															 	 */
														
 
															 	INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
														
 
															 	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
														
 
															-	init_completion(&barr->done);
														
 
															+
														
 
															+	/*
														
 
															+	 * Explicitly init the crosslock for wq_barrier::done, make its lock
														
 
															+	 * key a subkey of the corresponding work. As a result we won't
														
 
															+	 * build a dependency between wq_barrier::done and unrelated work.
														
 
															+	 */
														
 
															+	lockdep_init_map_crosslock((struct lockdep_map *)&barr->done.map,
														
 
															+				   "(complete)wq_barr::done",
														
 
															+				   target->lockdep_map.key, 1);
														
 
															+	__init_completion(&barr->done);
														
 
															 	barr->task = current;
														
 
															 	/*
														
@@ -2815,16 +2846,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 
															 	spin_unlock_irq(&pool->lock);
														
 
															 	/*
														
 
															-	 * If @max_active is 1 or rescuer is in use, flushing another work
														
 
															-	 * item on the same workqueue may lead to deadlock.  Make sure the
														
 
															-	 * flusher is not running on the same workqueue by verifying write
														
 
															-	 * access.
														
 
															+	 * Force a lock recursion deadlock when using flush_work() inside a
														
 
															+	 * single-threaded or rescuer equipped workqueue.
														
 
															+	 *
														
 
															+	 * For single threaded workqueues the deadlock happens when the work
														
 
															+	 * is after the work issuing the flush_work(). For rescuer equipped
														
 
															+	 * workqueues the deadlock happens when the rescuer stalls, blocking
														
 
															+	 * forward progress.
														
 
															 	 */
														
 
															-	if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)
														
 
															+	if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) {
														
 
															 		lock_map_acquire(&pwq->wq->lockdep_map);
														
 
															-	else
														
 
															-		lock_map_acquire_read(&pwq->wq->lockdep_map);
														
 
															-	lock_map_release(&pwq->wq->lockdep_map);
														
 
															+		lock_map_release(&pwq->wq->lockdep_map);
														
 
															+	}
														
 
															 	return true;
														
 
															 already_gone:
														
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1091,6 +1091,8 @@ config PROVE_LOCKING
 
															 	select DEBUG_MUTEXES
														
 
															 	select DEBUG_RT_MUTEXES if RT_MUTEXES
														
 
															 	select DEBUG_LOCK_ALLOC
														
 
															+	select LOCKDEP_CROSSRELEASE
														
 
															+	select LOCKDEP_COMPLETIONS
														
 
															 	select TRACE_IRQFLAGS
														
 
															 	default n
														
 
															 	help
														
@@ -1160,6 +1162,22 @@ config LOCK_STAT
 
															 	 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
														
 
															 	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
														
 
															+config LOCKDEP_CROSSRELEASE
														
 
															+	bool
														
 
															+	help
														
 
															+	 This makes lockdep work for crosslock which is a lock allowed to
														
 
															+	 be released in a different context from the acquisition context.
														
 
															+	 Normally a lock must be released in the context acquiring the lock.
														
 
															+	 However, relexing this constraint helps synchronization primitives
														
 
															+	 such as page locks or completions can use the lock correctness
														
 
															+	 detector, lockdep.
														
 
															+
														
 
															+config LOCKDEP_COMPLETIONS
														
 
															+	bool
														
 
															+	help
														
 
															+	 A deadlock caused by wait_for_completion() and complete() can be
														
 
															+	 detected by lockdep using crossrelease feature.
														
 
															+
														
 
															 config DEBUG_LOCKDEP
														
 
															 	bool "Lock dependency engine debugging"
														
 
															 	depends on DEBUG_KERNEL && LOCKDEP
														
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -362,6 +362,103 @@ static void rsem_AA3(void)
 
															 	RSL(X2); // this one should fail
														
 
															 }
														
 
															+/*
														
 
															+ * read_lock(A)
														
 
															+ * spin_lock(B)
														
 
															+ *		spin_lock(B)
														
 
															+ *		write_lock(A)
														
 
															+ */
														
 
															+static void rlock_ABBA1(void)
														
 
															+{
														
 
															+	RL(X1);
														
 
															+	L(Y1);
														
 
															+	U(Y1);
														
 
															+	RU(X1);
														
 
															+
														
 
															+	L(Y1);
														
 
															+	WL(X1);
														
 
															+	WU(X1);
														
 
															+	U(Y1); // should fail
														
 
															+}
														
 
															+
														
 
															+static void rwsem_ABBA1(void)
														
 
															+{
														
 
															+	RSL(X1);
														
 
															+	ML(Y1);
														
 
															+	MU(Y1);
														
 
															+	RSU(X1);
														
 
															+
														
 
															+	ML(Y1);
														
 
															+	WSL(X1);
														
 
															+	WSU(X1);
														
 
															+	MU(Y1); // should fail
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * read_lock(A)
														
 
															+ * spin_lock(B)
														
 
															+ *		spin_lock(B)
														
 
															+ *		read_lock(A)
														
 
															+ */
														
 
															+static void rlock_ABBA2(void)
														
 
															+{
														
 
															+	RL(X1);
														
 
															+	L(Y1);
														
 
															+	U(Y1);
														
 
															+	RU(X1);
														
 
															+
														
 
															+	L(Y1);
														
 
															+	RL(X1);
														
 
															+	RU(X1);
														
 
															+	U(Y1); // should NOT fail
														
 
															+}
														
 
															+
														
 
															+static void rwsem_ABBA2(void)
														
 
															+{
														
 
															+	RSL(X1);
														
 
															+	ML(Y1);
														
 
															+	MU(Y1);
														
 
															+	RSU(X1);
														
 
															+
														
 
															+	ML(Y1);
														
 
															+	RSL(X1);
														
 
															+	RSU(X1);
														
 
															+	MU(Y1); // should fail
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+ * write_lock(A)
														
 
															+ * spin_lock(B)
														
 
															+ *		spin_lock(B)
														
 
															+ *		write_lock(A)
														
 
															+ */
														
 
															+static void rlock_ABBA3(void)
														
 
															+{
														
 
															+	WL(X1);
														
 
															+	L(Y1);
														
 
															+	U(Y1);
														
 
															+	WU(X1);
														
 
															+
														
 
															+	L(Y1);
														
 
															+	WL(X1);
														
 
															+	WU(X1);
														
 
															+	U(Y1); // should fail
														
 
															+}
														
 
															+
														
 
															+static void rwsem_ABBA3(void)
														
 
															+{
														
 
															+	WSL(X1);
														
 
															+	ML(Y1);
														
 
															+	MU(Y1);
														
 
															+	WSU(X1);
														
 
															+
														
 
															+	ML(Y1);
														
 
															+	WSL(X1);
														
 
															+	WSU(X1);
														
 
															+	MU(Y1); // should fail
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * ABBA deadlock:
														
 
															  */
														
@@ -1056,8 +1153,6 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
 
															 	if (debug_locks != expected) {
														
 
															 		unexpected_testcase_failures++;
														
 
															 		pr_cont("FAILED|");
														
 
															-
														
 
															-		dump_stack();
														
 
															 	} else {
														
 
															 		testcase_successes++;
														
 
															 		pr_cont("  ok  |");
														
@@ -1933,6 +2028,30 @@ void locking_selftest(void)
 
															 	dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
														
 
															 	pr_cont("\n");
														
 
															+	print_testname("mixed read-lock/lock-write ABBA");
														
 
															+	pr_cont("             |");
														
 
															+	dotest(rlock_ABBA1, FAILURE, LOCKTYPE_RWLOCK);
														
 
															+	/*
														
 
															+	 * Lockdep does indeed fail here, but there's nothing we can do about
														
 
															+	 * that now.  Don't kill lockdep for it.
														
 
															+	 */
														
 
															+	unexpected_testcase_failures--;
														
 
															+
														
 
															+	pr_cont("             |");
														
 
															+	dotest(rwsem_ABBA1, FAILURE, LOCKTYPE_RWSEM);
														
 
															+
														
 
															+	print_testname("mixed read-lock/lock-read ABBA");
														
 
															+	pr_cont("             |");
														
 
															+	dotest(rlock_ABBA2, SUCCESS, LOCKTYPE_RWLOCK);
														
 
															+	pr_cont("             |");
														
 
															+	dotest(rwsem_ABBA2, FAILURE, LOCKTYPE_RWSEM);
														
 
															+
														
 
															+	print_testname("mixed write-lock/lock-write ABBA");
														
 
															+	pr_cont("             |");
														
 
															+	dotest(rlock_ABBA3, FAILURE, LOCKTYPE_RWLOCK);
														
 
															+	pr_cont("             |");
														
 
															+	dotest(rwsem_ABBA3, FAILURE, LOCKTYPE_RWSEM);
														
 
															+
														
 
															 	printk("  --------------------------------------------------------------------------\n");
														
 
															 	/*