|
@@ -743,10 +743,58 @@ void exit_pi_state_list(struct task_struct *curr)
|
|
|
raw_spin_unlock_irq(&curr->pi_lock);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * We need to check the following states:
|
|
|
+ *
|
|
|
+ * Waiter | pi_state | pi->owner | uTID | uODIED | ?
|
|
|
+ *
|
|
|
+ * [1] NULL | --- | --- | 0 | 0/1 | Valid
|
|
|
+ * [2] NULL | --- | --- | >0 | 0/1 | Valid
|
|
|
+ *
|
|
|
+ * [3] Found | NULL | -- | Any | 0/1 | Invalid
|
|
|
+ *
|
|
|
+ * [4] Found | Found | NULL | 0 | 1 | Valid
|
|
|
+ * [5] Found | Found | NULL | >0 | 1 | Invalid
|
|
|
+ *
|
|
|
+ * [6] Found | Found | task | 0 | 1 | Valid
|
|
|
+ *
|
|
|
+ * [7] Found | Found | NULL | Any | 0 | Invalid
|
|
|
+ *
|
|
|
+ * [8] Found | Found | task | ==taskTID | 0/1 | Valid
|
|
|
+ * [9] Found | Found | task | 0 | 0 | Invalid
|
|
|
+ * [10] Found | Found | task | !=taskTID | 0/1 | Invalid
|
|
|
+ *
|
|
|
+ * [1] Indicates that the kernel can acquire the futex atomically. We
|
|
|
+ * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
|
|
|
+ *
|
|
|
+ * [2] Valid, if TID does not belong to a kernel thread. If no matching
|
|
|
+ * thread is found then it indicates that the owner TID has died.
|
|
|
+ *
|
|
|
+ * [3] Invalid. The waiter is queued on a non PI futex
|
|
|
+ *
|
|
|
+ * [4] Valid state after exit_robust_list(), which sets the user space
|
|
|
+ * value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
|
|
|
+ *
|
|
|
+ * [5] The user space value got manipulated between exit_robust_list()
|
|
|
+ * and exit_pi_state_list()
|
|
|
+ *
|
|
|
+ * [6] Valid state after exit_pi_state_list() which sets the new owner in
|
|
|
+ * the pi_state but cannot access the user space value.
|
|
|
+ *
|
|
|
+ * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
|
|
|
+ *
|
|
|
+ * [8] Owner and user space value match
|
|
|
+ *
|
|
|
+ * [9] There is no transient state which sets the user space TID to 0
|
|
|
+ * except exit_robust_list(), but this is indicated by the
|
|
|
+ * FUTEX_OWNER_DIED bit. See [4]
|
|
|
+ *
|
|
|
+ * [10] There is no transient state which leaves owner and user space
|
|
|
+ * TID out of sync.
|
|
|
+ */
|
|
|
static int
|
|
|
lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
|
|
- union futex_key *key, struct futex_pi_state **ps,
|
|
|
- struct task_struct *task)
|
|
|
+ union futex_key *key, struct futex_pi_state **ps)
|
|
|
{
|
|
|
struct futex_pi_state *pi_state = NULL;
|
|
|
struct futex_q *this, *next;
|
|
@@ -756,12 +804,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
|
|
plist_for_each_entry_safe(this, next, &hb->chain, list) {
|
|
|
if (match_futex(&this->key, key)) {
|
|
|
/*
|
|
|
- * Another waiter already exists - bump up
|
|
|
- * the refcount and return its pi_state:
|
|
|
+ * Sanity check the waiter before increasing
|
|
|
+ * the refcount and attaching to it.
|
|
|
*/
|
|
|
pi_state = this->pi_state;
|
|
|
/*
|
|
|
- * Userspace might have messed up non-PI and PI futexes
|
|
|
+ * Userspace might have messed up non-PI and
|
|
|
+ * PI futexes [3]
|
|
|
*/
|
|
|
if (unlikely(!pi_state))
|
|
|
return -EINVAL;
|
|
@@ -769,44 +818,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
|
|
WARN_ON(!atomic_read(&pi_state->refcount));
|
|
|
|
|
|
/*
|
|
|
- * When pi_state->owner is NULL then the owner died
|
|
|
- * and another waiter is on the fly. pi_state->owner
|
|
|
- * is fixed up by the task which acquires
|
|
|
- * pi_state->rt_mutex.
|
|
|
- *
|
|
|
- * We do not check for pid == 0 which can happen when
|
|
|
- * the owner died and robust_list_exit() cleared the
|
|
|
- * TID.
|
|
|
+ * Handle the owner died case:
|
|
|
*/
|
|
|
- if (pid && pi_state->owner) {
|
|
|
+ if (uval & FUTEX_OWNER_DIED) {
|
|
|
/*
|
|
|
- * Bail out if user space manipulated the
|
|
|
- * futex value.
|
|
|
+ * exit_pi_state_list sets owner to NULL and
|
|
|
+ * wakes the topmost waiter. The task which
|
|
|
+ * acquires the pi_state->rt_mutex will fixup
|
|
|
+ * owner.
|
|
|
*/
|
|
|
- if (pid != task_pid_vnr(pi_state->owner))
|
|
|
+ if (!pi_state->owner) {
|
|
|
+ /*
|
|
|
+ * No pi state owner, but the user
|
|
|
+ * space TID is not 0. Inconsistent
|
|
|
+ * state. [5]
|
|
|
+ */
|
|
|
+ if (pid)
|
|
|
+ return -EINVAL;
|
|
|
+ /*
|
|
|
+ * Take a ref on the state and
|
|
|
+ * return. [4]
|
|
|
+ */
|
|
|
+ goto out_state;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If TID is 0, then either the dying owner
|
|
|
+ * has not yet executed exit_pi_state_list()
|
|
|
+ * or some waiter acquired the rtmutex in the
|
|
|
+ * pi state, but did not yet fixup the TID in
|
|
|
+ * user space.
|
|
|
+ *
|
|
|
+ * Take a ref on the state and return. [6]
|
|
|
+ */
|
|
|
+ if (!pid)
|
|
|
+ goto out_state;
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * If the owner died bit is not set,
|
|
|
+ * then the pi_state must have an
|
|
|
+ * owner. [7]
|
|
|
+ */
|
|
|
+ if (!pi_state->owner)
|
|
|
return -EINVAL;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Protect against a corrupted uval. If uval
|
|
|
- * is 0x80000000 then pid is 0 and the waiter
|
|
|
- * bit is set. So the deadlock check in the
|
|
|
- * calling code has failed and we did not fall
|
|
|
- * into the check above due to !pid.
|
|
|
+ * Bail out if user space manipulated the
|
|
|
+ * futex value. If pi state exists then the
|
|
|
+ * owner TID must be the same as the user
|
|
|
+ * space TID. [9/10]
|
|
|
*/
|
|
|
- if (task && pi_state->owner == task)
|
|
|
- return -EDEADLK;
|
|
|
+ if (pid != task_pid_vnr(pi_state->owner))
|
|
|
+ return -EINVAL;
|
|
|
|
|
|
+ out_state:
|
|
|
atomic_inc(&pi_state->refcount);
|
|
|
*ps = pi_state;
|
|
|
-
|
|
|
return 0;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
* We are the first waiter - try to look up the real owner and attach
|
|
|
- * the new pi_state to it, but bail out when TID = 0
|
|
|
+ * the new pi_state to it, but bail out when TID = 0 [1]
|
|
|
*/
|
|
|
if (!pid)
|
|
|
return -ESRCH;
|
|
@@ -839,6 +914,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * No existing pi state. First waiter. [2]
|
|
|
+ */
|
|
|
pi_state = alloc_pi_state();
|
|
|
|
|
|
/*
|
|
@@ -910,10 +988,18 @@ retry:
|
|
|
return -EDEADLK;
|
|
|
|
|
|
/*
|
|
|
- * Surprise - we got the lock. Just return to userspace:
|
|
|
+ * Surprise - we got the lock, but we do not trust user space at all.
|
|
|
*/
|
|
|
- if (unlikely(!curval))
|
|
|
- return 1;
|
|
|
+ if (unlikely(!curval)) {
|
|
|
+ /*
|
|
|
+ * We verify whether there is kernel state for this
|
|
|
+ * futex. If not, we can safely assume, that the 0 ->
|
|
|
+ * TID transition is correct. If state exists, we do
|
|
|
+ * not bother to fixup the user space state as it was
|
|
|
+ * corrupted already.
|
|
|
+ */
|
|
|
+ return futex_top_waiter(hb, key) ? -EINVAL : 1;
|
|
|
+ }
|
|
|
|
|
|
uval = curval;
|
|
|
|
|
@@ -951,7 +1037,7 @@ retry:
|
|
|
* We dont have the lock. Look up the PI state (or create it if
|
|
|
* we are the first waiter):
|
|
|
*/
|
|
|
- ret = lookup_pi_state(uval, hb, key, ps, task);
|
|
|
+ ret = lookup_pi_state(uval, hb, key, ps);
|
|
|
|
|
|
if (unlikely(ret)) {
|
|
|
switch (ret) {
|
|
@@ -1044,6 +1130,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
|
|
|
struct task_struct *new_owner;
|
|
|
struct futex_pi_state *pi_state = this->pi_state;
|
|
|
u32 uninitialized_var(curval), newval;
|
|
|
+ int ret = 0;
|
|
|
|
|
|
if (!pi_state)
|
|
|
return -EINVAL;
|
|
@@ -1067,23 +1154,19 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
|
|
|
new_owner = this->task;
|
|
|
|
|
|
/*
|
|
|
- * We pass it to the next owner. (The WAITERS bit is always
|
|
|
- * kept enabled while there is PI state around. We must also
|
|
|
- * preserve the owner died bit.)
|
|
|
+ * We pass it to the next owner. The WAITERS bit is always
|
|
|
+ * kept enabled while there is PI state around. We cleanup the
|
|
|
+ * owner died bit, because we are the owner.
|
|
|
*/
|
|
|
- if (!(uval & FUTEX_OWNER_DIED)) {
|
|
|
- int ret = 0;
|
|
|
-
|
|
|
- newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
|
|
|
+ newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
|
|
|
|
|
|
- if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
|
|
|
- ret = -EFAULT;
|
|
|
- else if (curval != uval)
|
|
|
- ret = -EINVAL;
|
|
|
- if (ret) {
|
|
|
- raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
|
|
|
- return ret;
|
|
|
- }
|
|
|
+ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
|
|
|
+ ret = -EFAULT;
|
|
|
+ else if (curval != uval)
|
|
|
+ ret = -EINVAL;
|
|
|
+ if (ret) {
|
|
|
+ raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
raw_spin_lock_irq(&pi_state->owner->pi_lock);
|
|
@@ -1441,6 +1524,13 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
|
|
|
struct futex_q *this, *next;
|
|
|
|
|
|
if (requeue_pi) {
|
|
|
+ /*
|
|
|
+ * Requeue PI only works on two distinct uaddrs. This
|
|
|
+ * check is only valid for private futexes. See below.
|
|
|
+ */
|
|
|
+ if (uaddr1 == uaddr2)
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
/*
|
|
|
* requeue_pi requires a pi_state, try to allocate it now
|
|
|
* without any locks in case it fails.
|
|
@@ -1479,6 +1569,15 @@ retry:
|
|
|
if (unlikely(ret != 0))
|
|
|
goto out_put_key1;
|
|
|
|
|
|
+ /*
|
|
|
+ * The check above which compares uaddrs is not sufficient for
|
|
|
+ * shared futexes. We need to compare the keys:
|
|
|
+ */
|
|
|
+ if (requeue_pi && match_futex(&key1, &key2)) {
|
|
|
+ ret = -EINVAL;
|
|
|
+ goto out_put_keys;
|
|
|
+ }
|
|
|
+
|
|
|
hb1 = hash_futex(&key1);
|
|
|
hb2 = hash_futex(&key2);
|
|
|
|
|
@@ -1544,7 +1643,7 @@ retry_private:
|
|
|
* rereading and handing potential crap to
|
|
|
* lookup_pi_state.
|
|
|
*/
|
|
|
- ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL);
|
|
|
+ ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
|
|
|
}
|
|
|
|
|
|
switch (ret) {
|
|
@@ -2327,9 +2426,10 @@ retry:
|
|
|
/*
|
|
|
* To avoid races, try to do the TID -> 0 atomic transition
|
|
|
* again. If it succeeds then we can return without waking
|
|
|
- * anyone else up:
|
|
|
+ * anyone else up. We only try this if neither the waiters nor
|
|
|
+ * the owner died bit are set.
|
|
|
*/
|
|
|
- if (!(uval & FUTEX_OWNER_DIED) &&
|
|
|
+ if (!(uval & ~FUTEX_TID_MASK) &&
|
|
|
cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
|
|
|
goto pi_faulted;
|
|
|
/*
|
|
@@ -2359,11 +2459,9 @@ retry:
|
|
|
/*
|
|
|
* No waiters - kernel unlocks the futex:
|
|
|
*/
|
|
|
- if (!(uval & FUTEX_OWNER_DIED)) {
|
|
|
- ret = unlock_futex_pi(uaddr, uval);
|
|
|
- if (ret == -EFAULT)
|
|
|
- goto pi_faulted;
|
|
|
- }
|
|
|
+ ret = unlock_futex_pi(uaddr, uval);
|
|
|
+ if (ret == -EFAULT)
|
|
|
+ goto pi_faulted;
|
|
|
|
|
|
out_unlock:
|
|
|
spin_unlock(&hb->lock);
|
|
@@ -2525,6 +2623,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
|
|
if (ret)
|
|
|
goto out_key2;
|
|
|
|
|
|
+ /*
|
|
|
+ * The check above which compares uaddrs is not sufficient for
|
|
|
+ * shared futexes. We need to compare the keys:
|
|
|
+ */
|
|
|
+ if (match_futex(&q.key, &key2)) {
|
|
|
+ ret = -EINVAL;
|
|
|
+ goto out_put_keys;
|
|
|
+ }
|
|
|
+
|
|
|
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
|
|
|
futex_wait_queue_me(hb, &q, to);
|
|
|
|