|
@@ -45,9 +45,10 @@ struct ptr_ring {
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
/* Note: callers invoking this in a loop must use a compiler barrier,
|
|
/* Note: callers invoking this in a loop must use a compiler barrier,
|
|
|
- * for example cpu_relax(). If ring is ever resized, callers must hold
|
|
|
|
|
- * producer_lock - see e.g. ptr_ring_full. Otherwise, if callers don't hold
|
|
|
|
|
- * producer_lock, the next call to __ptr_ring_produce may fail.
|
|
|
|
|
|
|
+ * for example cpu_relax().
|
|
|
|
|
+ *
|
|
|
|
|
+ * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
|
|
|
|
|
+ * see e.g. ptr_ring_full.
|
|
|
*/
|
|
*/
|
|
|
static inline bool __ptr_ring_full(struct ptr_ring *r)
|
|
static inline bool __ptr_ring_full(struct ptr_ring *r)
|
|
|
{
|
|
{
|
|
@@ -113,7 +114,7 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
|
|
|
/* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
|
|
/* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
|
|
|
smp_wmb();
|
|
smp_wmb();
|
|
|
|
|
|
|
|
- r->queue[r->producer++] = ptr;
|
|
|
|
|
|
|
+ WRITE_ONCE(r->queue[r->producer++], ptr);
|
|
|
if (unlikely(r->producer >= r->size))
|
|
if (unlikely(r->producer >= r->size))
|
|
|
r->producer = 0;
|
|
r->producer = 0;
|
|
|
return 0;
|
|
return 0;
|
|
@@ -169,32 +170,36 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
|
|
|
return ret;
|
|
return ret;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-/* Note: callers invoking this in a loop must use a compiler barrier,
|
|
|
|
|
- * for example cpu_relax(). Callers must take consumer_lock
|
|
|
|
|
- * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
|
|
|
|
|
- * If ring is never resized, and if the pointer is merely
|
|
|
|
|
- * tested, there's no need to take the lock - see e.g. __ptr_ring_empty.
|
|
|
|
|
- * However, if called outside the lock, and if some other CPU
|
|
|
|
|
- * consumes ring entries at the same time, the value returned
|
|
|
|
|
- * is not guaranteed to be correct.
|
|
|
|
|
- * In this case - to avoid incorrectly detecting the ring
|
|
|
|
|
- * as empty - the CPU consuming the ring entries is responsible
|
|
|
|
|
- * for either consuming all ring entries until the ring is empty,
|
|
|
|
|
- * or synchronizing with some other CPU and causing it to
|
|
|
|
|
- * execute __ptr_ring_peek and/or consume the ring enteries
|
|
|
|
|
- * after the synchronization point.
|
|
|
|
|
- */
|
|
|
|
|
static inline void *__ptr_ring_peek(struct ptr_ring *r)
|
|
static inline void *__ptr_ring_peek(struct ptr_ring *r)
|
|
|
{
|
|
{
|
|
|
if (likely(r->size))
|
|
if (likely(r->size))
|
|
|
- return r->queue[r->consumer_head];
|
|
|
|
|
|
|
+ return READ_ONCE(r->queue[r->consumer_head]);
|
|
|
return NULL;
|
|
return NULL;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-/* See __ptr_ring_peek above for locking rules. */
|
|
|
|
|
|
|
+/*
|
|
|
|
|
+ * Test ring empty status without taking any locks.
|
|
|
|
|
+ *
|
|
|
|
|
+ * NB: This is only safe to call if ring is never resized.
|
|
|
|
|
+ *
|
|
|
|
|
+ * However, if some other CPU consumes ring entries at the same time, the value
|
|
|
|
|
+ * returned is not guaranteed to be correct.
|
|
|
|
|
+ *
|
|
|
|
|
+ * In this case - to avoid incorrectly detecting the ring
|
|
|
|
|
+ * as empty - the CPU consuming the ring entries is responsible
|
|
|
|
|
+ * for either consuming all ring entries until the ring is empty,
|
|
|
|
|
+ * or synchronizing with some other CPU and causing it to
|
|
|
|
|
+ * re-test __ptr_ring_empty and/or consume the ring enteries
|
|
|
|
|
+ * after the synchronization point.
|
|
|
|
|
+ *
|
|
|
|
|
+ * Note: callers invoking this in a loop must use a compiler barrier,
|
|
|
|
|
+ * for example cpu_relax().
|
|
|
|
|
+ */
|
|
|
static inline bool __ptr_ring_empty(struct ptr_ring *r)
|
|
static inline bool __ptr_ring_empty(struct ptr_ring *r)
|
|
|
{
|
|
{
|
|
|
- return !__ptr_ring_peek(r);
|
|
|
|
|
|
|
+ if (likely(r->size))
|
|
|
|
|
+ return !r->queue[READ_ONCE(r->consumer_head)];
|
|
|
|
|
+ return true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static inline bool ptr_ring_empty(struct ptr_ring *r)
|
|
static inline bool ptr_ring_empty(struct ptr_ring *r)
|
|
@@ -248,22 +253,28 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r)
|
|
|
/* Fundamentally, what we want to do is update consumer
|
|
/* Fundamentally, what we want to do is update consumer
|
|
|
* index and zero out the entry so producer can reuse it.
|
|
* index and zero out the entry so producer can reuse it.
|
|
|
* Doing it naively at each consume would be as simple as:
|
|
* Doing it naively at each consume would be as simple as:
|
|
|
- * r->queue[r->consumer++] = NULL;
|
|
|
|
|
- * if (unlikely(r->consumer >= r->size))
|
|
|
|
|
- * r->consumer = 0;
|
|
|
|
|
|
|
+ * consumer = r->consumer;
|
|
|
|
|
+ * r->queue[consumer++] = NULL;
|
|
|
|
|
+ * if (unlikely(consumer >= r->size))
|
|
|
|
|
+ * consumer = 0;
|
|
|
|
|
+ * r->consumer = consumer;
|
|
|
* but that is suboptimal when the ring is full as producer is writing
|
|
* but that is suboptimal when the ring is full as producer is writing
|
|
|
* out new entries in the same cache line. Defer these updates until a
|
|
* out new entries in the same cache line. Defer these updates until a
|
|
|
* batch of entries has been consumed.
|
|
* batch of entries has been consumed.
|
|
|
*/
|
|
*/
|
|
|
- int head = r->consumer_head++;
|
|
|
|
|
|
|
+ /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
|
|
|
|
|
+ * to work correctly.
|
|
|
|
|
+ */
|
|
|
|
|
+ int consumer_head = r->consumer_head;
|
|
|
|
|
+ int head = consumer_head++;
|
|
|
|
|
|
|
|
/* Once we have processed enough entries invalidate them in
|
|
/* Once we have processed enough entries invalidate them in
|
|
|
* the ring all at once so producer can reuse their space in the ring.
|
|
* the ring all at once so producer can reuse their space in the ring.
|
|
|
* We also do this when we reach end of the ring - not mandatory
|
|
* We also do this when we reach end of the ring - not mandatory
|
|
|
* but helps keep the implementation simple.
|
|
* but helps keep the implementation simple.
|
|
|
*/
|
|
*/
|
|
|
- if (unlikely(r->consumer_head - r->consumer_tail >= r->batch ||
|
|
|
|
|
- r->consumer_head >= r->size)) {
|
|
|
|
|
|
|
+ if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
|
|
|
|
|
+ consumer_head >= r->size)) {
|
|
|
/* Zero out entries in the reverse order: this way we touch the
|
|
/* Zero out entries in the reverse order: this way we touch the
|
|
|
* cache line that producer might currently be reading the last;
|
|
* cache line that producer might currently be reading the last;
|
|
|
* producer won't make progress and touch other cache lines
|
|
* producer won't make progress and touch other cache lines
|
|
@@ -271,12 +282,14 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r)
|
|
|
*/
|
|
*/
|
|
|
while (likely(head >= r->consumer_tail))
|
|
while (likely(head >= r->consumer_tail))
|
|
|
r->queue[head--] = NULL;
|
|
r->queue[head--] = NULL;
|
|
|
- r->consumer_tail = r->consumer_head;
|
|
|
|
|
|
|
+ r->consumer_tail = consumer_head;
|
|
|
}
|
|
}
|
|
|
- if (unlikely(r->consumer_head >= r->size)) {
|
|
|
|
|
- r->consumer_head = 0;
|
|
|
|
|
|
|
+ if (unlikely(consumer_head >= r->size)) {
|
|
|
|
|
+ consumer_head = 0;
|
|
|
r->consumer_tail = 0;
|
|
r->consumer_tail = 0;
|
|
|
}
|
|
}
|
|
|
|
|
+ /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
|
|
|
|
|
+ WRITE_ONCE(r->consumer_head, consumer_head);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static inline void *__ptr_ring_consume(struct ptr_ring *r)
|
|
static inline void *__ptr_ring_consume(struct ptr_ring *r)
|
|
@@ -453,12 +466,7 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
|
|
|
|
|
|
|
|
static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
|
|
static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
|
|
|
{
|
|
{
|
|
|
- /* Allocate an extra dummy element at end of ring to avoid consumer head
|
|
|
|
|
- * or produce head access past the end of the array. Possible when
|
|
|
|
|
- * producer/consumer operations and __ptr_ring_peek operations run in
|
|
|
|
|
- * parallel.
|
|
|
|
|
- */
|
|
|
|
|
- return kcalloc(size + 1, sizeof(void *), gfp);
|
|
|
|
|
|
|
+ return kcalloc(size, sizeof(void *), gfp);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
|
|
static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
|
|
@@ -532,7 +540,9 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
|
|
|
goto done;
|
|
goto done;
|
|
|
}
|
|
}
|
|
|
r->queue[head] = batch[--n];
|
|
r->queue[head] = batch[--n];
|
|
|
- r->consumer_tail = r->consumer_head = head;
|
|
|
|
|
|
|
+ r->consumer_tail = head;
|
|
|
|
|
+ /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
|
|
|
|
|
+ WRITE_ONCE(r->consumer_head, head);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
done:
|