|
@@ -399,6 +399,17 @@ struct rb_irq_work {
|
|
|
bool wakeup_full;
|
|
|
};
|
|
|
|
|
|
+/*
|
|
|
+ * Structure to hold event state and handle nested events.
|
|
|
+ */
|
|
|
+struct rb_event_info {
|
|
|
+ u64 ts;
|
|
|
+ u64 delta;
|
|
|
+ unsigned long length;
|
|
|
+ struct buffer_page *tail_page;
|
|
|
+ int add_timestamp;
|
|
|
+};
|
|
|
+
|
|
|
/*
|
|
|
* Used for which event context the event is in.
|
|
|
* NMI = 0
|
|
@@ -1876,73 +1887,6 @@ rb_event_index(struct ring_buffer_event *event)
|
|
|
return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
|
|
|
}
|
|
|
|
|
|
-static inline int
|
|
|
-rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
- struct ring_buffer_event *event)
|
|
|
-{
|
|
|
- unsigned long addr = (unsigned long)event;
|
|
|
- unsigned long index;
|
|
|
-
|
|
|
- index = rb_event_index(event);
|
|
|
- addr &= PAGE_MASK;
|
|
|
-
|
|
|
- return cpu_buffer->commit_page->page == (void *)addr &&
|
|
|
- rb_commit_index(cpu_buffer) == index;
|
|
|
-}
|
|
|
-
|
|
|
-static void
|
|
|
-rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
|
|
|
-{
|
|
|
- unsigned long max_count;
|
|
|
-
|
|
|
- /*
|
|
|
- * We only race with interrupts and NMIs on this CPU.
|
|
|
- * If we own the commit event, then we can commit
|
|
|
- * all others that interrupted us, since the interruptions
|
|
|
- * are in stack format (they finish before they come
|
|
|
- * back to us). This allows us to do a simple loop to
|
|
|
- * assign the commit to the tail.
|
|
|
- */
|
|
|
- again:
|
|
|
- max_count = cpu_buffer->nr_pages * 100;
|
|
|
-
|
|
|
- while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
|
|
|
- if (RB_WARN_ON(cpu_buffer, !(--max_count)))
|
|
|
- return;
|
|
|
- if (RB_WARN_ON(cpu_buffer,
|
|
|
- rb_is_reader_page(cpu_buffer->tail_page)))
|
|
|
- return;
|
|
|
- local_set(&cpu_buffer->commit_page->page->commit,
|
|
|
- rb_page_write(cpu_buffer->commit_page));
|
|
|
- rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
|
|
|
- cpu_buffer->write_stamp =
|
|
|
- cpu_buffer->commit_page->page->time_stamp;
|
|
|
- /* add barrier to keep gcc from optimizing too much */
|
|
|
- barrier();
|
|
|
- }
|
|
|
- while (rb_commit_index(cpu_buffer) !=
|
|
|
- rb_page_write(cpu_buffer->commit_page)) {
|
|
|
-
|
|
|
- local_set(&cpu_buffer->commit_page->page->commit,
|
|
|
- rb_page_write(cpu_buffer->commit_page));
|
|
|
- RB_WARN_ON(cpu_buffer,
|
|
|
- local_read(&cpu_buffer->commit_page->page->commit) &
|
|
|
- ~RB_WRITE_MASK);
|
|
|
- barrier();
|
|
|
- }
|
|
|
-
|
|
|
- /* again, keep gcc from optimizing */
|
|
|
- barrier();
|
|
|
-
|
|
|
- /*
|
|
|
- * If an interrupt came in just after the first while loop
|
|
|
- * and pushed the tail page forward, we will be left with
|
|
|
- * a dangling commit that will never go forward.
|
|
|
- */
|
|
|
- if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
|
|
|
- goto again;
|
|
|
-}
|
|
|
-
|
|
|
static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
|
|
|
{
|
|
|
cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
|
|
@@ -1968,64 +1912,6 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
|
|
|
iter->head = 0;
|
|
|
}
|
|
|
|
|
|
-/* Slow path, do not inline */
|
|
|
-static noinline struct ring_buffer_event *
|
|
|
-rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
|
|
|
-{
|
|
|
- event->type_len = RINGBUF_TYPE_TIME_EXTEND;
|
|
|
-
|
|
|
- /* Not the first event on the page? */
|
|
|
- if (rb_event_index(event)) {
|
|
|
- event->time_delta = delta & TS_MASK;
|
|
|
- event->array[0] = delta >> TS_SHIFT;
|
|
|
- } else {
|
|
|
- /* nope, just zero it */
|
|
|
- event->time_delta = 0;
|
|
|
- event->array[0] = 0;
|
|
|
- }
|
|
|
-
|
|
|
- return skip_time_extend(event);
|
|
|
-}
|
|
|
-
|
|
|
-/**
|
|
|
- * rb_update_event - update event type and data
|
|
|
- * @event: the event to update
|
|
|
- * @type: the type of event
|
|
|
- * @length: the size of the event field in the ring buffer
|
|
|
- *
|
|
|
- * Update the type and data fields of the event. The length
|
|
|
- * is the actual size that is written to the ring buffer,
|
|
|
- * and with this, we can determine what to place into the
|
|
|
- * data field.
|
|
|
- */
|
|
|
-static void
|
|
|
-rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
- struct ring_buffer_event *event, unsigned length,
|
|
|
- int add_timestamp, u64 delta)
|
|
|
-{
|
|
|
- /* Only a commit updates the timestamp */
|
|
|
- if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
|
|
|
- delta = 0;
|
|
|
-
|
|
|
- /*
|
|
|
- * If we need to add a timestamp, then we
|
|
|
- * add it to the start of the resevered space.
|
|
|
- */
|
|
|
- if (unlikely(add_timestamp)) {
|
|
|
- event = rb_add_time_stamp(event, delta);
|
|
|
- length -= RB_LEN_TIME_EXTEND;
|
|
|
- delta = 0;
|
|
|
- }
|
|
|
-
|
|
|
- event->time_delta = delta;
|
|
|
- length -= RB_EVNT_HDR_SIZE;
|
|
|
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
|
|
|
- event->type_len = 0;
|
|
|
- event->array[0] = length;
|
|
|
- } else
|
|
|
- event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* rb_handle_head_page - writer hit the head page
|
|
|
*
|
|
@@ -2184,29 +2070,13 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static unsigned rb_calculate_event_length(unsigned length)
|
|
|
-{
|
|
|
- struct ring_buffer_event event; /* Used only for sizeof array */
|
|
|
-
|
|
|
- /* zero length can cause confusions */
|
|
|
- if (!length)
|
|
|
- length++;
|
|
|
-
|
|
|
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
|
|
|
- length += sizeof(event.array[0]);
|
|
|
-
|
|
|
- length += RB_EVNT_HDR_SIZE;
|
|
|
- length = ALIGN(length, RB_ARCH_ALIGNMENT);
|
|
|
-
|
|
|
- return length;
|
|
|
-}
|
|
|
-
|
|
|
static inline void
|
|
|
rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
- struct buffer_page *tail_page,
|
|
|
- unsigned long tail, unsigned long length)
|
|
|
+ unsigned long tail, struct rb_event_info *info)
|
|
|
{
|
|
|
+ struct buffer_page *tail_page = info->tail_page;
|
|
|
struct ring_buffer_event *event;
|
|
|
+ unsigned long length = info->length;
|
|
|
|
|
|
/*
|
|
|
* Only the event that crossed the page boundary
|
|
@@ -2276,13 +2146,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
*/
|
|
|
static noinline struct ring_buffer_event *
|
|
|
rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
- unsigned long length, unsigned long tail,
|
|
|
- struct buffer_page *tail_page, u64 ts)
|
|
|
+ unsigned long tail, struct rb_event_info *info)
|
|
|
{
|
|
|
+ struct buffer_page *tail_page = info->tail_page;
|
|
|
struct buffer_page *commit_page = cpu_buffer->commit_page;
|
|
|
struct ring_buffer *buffer = cpu_buffer->buffer;
|
|
|
struct buffer_page *next_page;
|
|
|
int ret;
|
|
|
+ u64 ts;
|
|
|
|
|
|
next_page = tail_page;
|
|
|
|
|
@@ -2368,74 +2239,120 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
|
|
|
out_again:
|
|
|
|
|
|
- rb_reset_tail(cpu_buffer, tail_page, tail, length);
|
|
|
+ rb_reset_tail(cpu_buffer, tail, info);
|
|
|
|
|
|
/* fail and let the caller try again */
|
|
|
return ERR_PTR(-EAGAIN);
|
|
|
|
|
|
out_reset:
|
|
|
/* reset write */
|
|
|
- rb_reset_tail(cpu_buffer, tail_page, tail, length);
|
|
|
+ rb_reset_tail(cpu_buffer, tail, info);
|
|
|
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
-static struct ring_buffer_event *
|
|
|
-__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
- unsigned long length, u64 ts,
|
|
|
- u64 delta, int add_timestamp)
|
|
|
+/* Slow path, do not inline */
|
|
|
+static noinline struct ring_buffer_event *
|
|
|
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
|
|
|
{
|
|
|
- struct buffer_page *tail_page;
|
|
|
- struct ring_buffer_event *event;
|
|
|
- unsigned long tail, write;
|
|
|
+ event->type_len = RINGBUF_TYPE_TIME_EXTEND;
|
|
|
|
|
|
- /*
|
|
|
- * If the time delta since the last event is too big to
|
|
|
- * hold in the time field of the event, then we append a
|
|
|
- * TIME EXTEND event ahead of the data event.
|
|
|
- */
|
|
|
- if (unlikely(add_timestamp))
|
|
|
- length += RB_LEN_TIME_EXTEND;
|
|
|
+ /* Not the first event on the page? */
|
|
|
+ if (rb_event_index(event)) {
|
|
|
+ event->time_delta = delta & TS_MASK;
|
|
|
+ event->array[0] = delta >> TS_SHIFT;
|
|
|
+ } else {
|
|
|
+ /* nope, just zero it */
|
|
|
+ event->time_delta = 0;
|
|
|
+ event->array[0] = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ return skip_time_extend(event);
|
|
|
+}
|
|
|
|
|
|
- tail_page = cpu_buffer->tail_page;
|
|
|
- write = local_add_return(length, &tail_page->write);
|
|
|
+static inline int rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
+ struct ring_buffer_event *event);
|
|
|
|
|
|
- /* set write to only the index of the write */
|
|
|
- write &= RB_WRITE_MASK;
|
|
|
- tail = write - length;
|
|
|
+/**
|
|
|
+ * rb_update_event - update event type and data
|
|
|
+ * @event: the event to update
|
|
|
+ * @type: the type of event
|
|
|
+ * @length: the size of the event field in the ring buffer
|
|
|
+ *
|
|
|
+ * Update the type and data fields of the event. The length
|
|
|
+ * is the actual size that is written to the ring buffer,
|
|
|
+ * and with this, we can determine what to place into the
|
|
|
+ * data field.
|
|
|
+ */
|
|
|
+static void
|
|
|
+rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
+ struct ring_buffer_event *event,
|
|
|
+ struct rb_event_info *info)
|
|
|
+{
|
|
|
+ unsigned length = info->length;
|
|
|
+ u64 delta = info->delta;
|
|
|
+
|
|
|
+ /* Only a commit updates the timestamp */
|
|
|
+ if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
|
|
|
+ delta = 0;
|
|
|
|
|
|
/*
|
|
|
- * If this is the first commit on the page, then it has the same
|
|
|
- * timestamp as the page itself.
|
|
|
+ * If we need to add a timestamp, then we
|
|
|
+ * add it to the start of the resevered space.
|
|
|
*/
|
|
|
- if (!tail)
|
|
|
+ if (unlikely(info->add_timestamp)) {
|
|
|
+ event = rb_add_time_stamp(event, delta);
|
|
|
+ length -= RB_LEN_TIME_EXTEND;
|
|
|
delta = 0;
|
|
|
+ }
|
|
|
|
|
|
- /* See if we shot pass the end of this buffer page */
|
|
|
- if (unlikely(write > BUF_PAGE_SIZE))
|
|
|
- return rb_move_tail(cpu_buffer, length, tail,
|
|
|
- tail_page, ts);
|
|
|
+ event->time_delta = delta;
|
|
|
+ length -= RB_EVNT_HDR_SIZE;
|
|
|
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
|
|
|
+ event->type_len = 0;
|
|
|
+ event->array[0] = length;
|
|
|
+ } else
|
|
|
+ event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
|
|
|
+}
|
|
|
|
|
|
- /* We reserved something on the buffer */
|
|
|
+static unsigned rb_calculate_event_length(unsigned length)
|
|
|
+{
|
|
|
+ struct ring_buffer_event event; /* Used only for sizeof array */
|
|
|
|
|
|
- event = __rb_page_index(tail_page, tail);
|
|
|
- kmemcheck_annotate_bitfield(event, bitfield);
|
|
|
- rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
|
|
|
+ /* zero length can cause confusions */
|
|
|
+ if (!length)
|
|
|
+ length++;
|
|
|
|
|
|
- local_inc(&tail_page->entries);
|
|
|
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
|
|
|
+ length += sizeof(event.array[0]);
|
|
|
+
|
|
|
+ length += RB_EVNT_HDR_SIZE;
|
|
|
+ length = ALIGN(length, RB_ARCH_ALIGNMENT);
|
|
|
|
|
|
/*
|
|
|
- * If this is the first commit on the page, then update
|
|
|
- * its timestamp.
|
|
|
+ * In case the time delta is larger than the 27 bits for it
|
|
|
+ * in the header, we need to add a timestamp. If another
|
|
|
+ * event comes in when trying to discard this one to increase
|
|
|
+ * the length, then the timestamp will be added in the allocated
|
|
|
+ * space of this event. If length is bigger than the size needed
|
|
|
+ * for the TIME_EXTEND, then padding has to be used. The events
|
|
|
+ * length must be either RB_LEN_TIME_EXTEND, or greater than or equal
|
|
|
+ * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
|
|
|
+ * As length is a multiple of 4, we only need to worry if it
|
|
|
+ * is 12 (RB_LEN_TIME_EXTEND + 4).
|
|
|
*/
|
|
|
- if (!tail)
|
|
|
- tail_page->page->time_stamp = ts;
|
|
|
+ if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
|
|
|
+ length += RB_ALIGNMENT;
|
|
|
|
|
|
- /* account for these added bytes */
|
|
|
- local_add(length, &cpu_buffer->entries_bytes);
|
|
|
+ return length;
|
|
|
+}
|
|
|
|
|
|
- return event;
|
|
|
+#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
|
|
|
+static inline bool sched_clock_stable(void)
|
|
|
+{
|
|
|
+ return true;
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
static inline int
|
|
|
rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
|
|
@@ -2483,22 +2400,75 @@ static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
|
|
|
local_inc(&cpu_buffer->commits);
|
|
|
}
|
|
|
|
|
|
-static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
|
|
|
+static void
|
|
|
+rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
|
|
|
{
|
|
|
- unsigned long commits;
|
|
|
-
|
|
|
- if (RB_WARN_ON(cpu_buffer,
|
|
|
- !local_read(&cpu_buffer->committing)))
|
|
|
- return;
|
|
|
+ unsigned long max_count;
|
|
|
|
|
|
+ /*
|
|
|
+ * We only race with interrupts and NMIs on this CPU.
|
|
|
+ * If we own the commit event, then we can commit
|
|
|
+ * all others that interrupted us, since the interruptions
|
|
|
+ * are in stack format (they finish before they come
|
|
|
+ * back to us). This allows us to do a simple loop to
|
|
|
+ * assign the commit to the tail.
|
|
|
+ */
|
|
|
again:
|
|
|
- commits = local_read(&cpu_buffer->commits);
|
|
|
- /* synchronize with interrupts */
|
|
|
- barrier();
|
|
|
- if (local_read(&cpu_buffer->committing) == 1)
|
|
|
- rb_set_commit_to_write(cpu_buffer);
|
|
|
-
|
|
|
- local_dec(&cpu_buffer->committing);
|
|
|
+ max_count = cpu_buffer->nr_pages * 100;
|
|
|
+
|
|
|
+ while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
|
|
|
+ if (RB_WARN_ON(cpu_buffer, !(--max_count)))
|
|
|
+ return;
|
|
|
+ if (RB_WARN_ON(cpu_buffer,
|
|
|
+ rb_is_reader_page(cpu_buffer->tail_page)))
|
|
|
+ return;
|
|
|
+ local_set(&cpu_buffer->commit_page->page->commit,
|
|
|
+ rb_page_write(cpu_buffer->commit_page));
|
|
|
+ rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
|
|
|
+ cpu_buffer->write_stamp =
|
|
|
+ cpu_buffer->commit_page->page->time_stamp;
|
|
|
+ /* add barrier to keep gcc from optimizing too much */
|
|
|
+ barrier();
|
|
|
+ }
|
|
|
+ while (rb_commit_index(cpu_buffer) !=
|
|
|
+ rb_page_write(cpu_buffer->commit_page)) {
|
|
|
+
|
|
|
+ local_set(&cpu_buffer->commit_page->page->commit,
|
|
|
+ rb_page_write(cpu_buffer->commit_page));
|
|
|
+ RB_WARN_ON(cpu_buffer,
|
|
|
+ local_read(&cpu_buffer->commit_page->page->commit) &
|
|
|
+ ~RB_WRITE_MASK);
|
|
|
+ barrier();
|
|
|
+ }
|
|
|
+
|
|
|
+ /* again, keep gcc from optimizing */
|
|
|
+ barrier();
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If an interrupt came in just after the first while loop
|
|
|
+ * and pushed the tail page forward, we will be left with
|
|
|
+ * a dangling commit that will never go forward.
|
|
|
+ */
|
|
|
+ if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
|
|
|
+ goto again;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
|
|
|
+{
|
|
|
+ unsigned long commits;
|
|
|
+
|
|
|
+ if (RB_WARN_ON(cpu_buffer,
|
|
|
+ !local_read(&cpu_buffer->committing)))
|
|
|
+ return;
|
|
|
+
|
|
|
+ again:
|
|
|
+ commits = local_read(&cpu_buffer->commits);
|
|
|
+ /* synchronize with interrupts */
|
|
|
+ barrier();
|
|
|
+ if (local_read(&cpu_buffer->committing) == 1)
|
|
|
+ rb_set_commit_to_write(cpu_buffer);
|
|
|
+
|
|
|
+ local_dec(&cpu_buffer->committing);
|
|
|
|
|
|
/* synchronize with interrupts */
|
|
|
barrier();
|
|
@@ -2515,91 +2485,94 @@ static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static struct ring_buffer_event *
|
|
|
-rb_reserve_next_event(struct ring_buffer *buffer,
|
|
|
- struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
- unsigned long length)
|
|
|
+static inline void rb_event_discard(struct ring_buffer_event *event)
|
|
|
{
|
|
|
- struct ring_buffer_event *event;
|
|
|
- u64 ts, delta;
|
|
|
- int nr_loops = 0;
|
|
|
- int add_timestamp;
|
|
|
- u64 diff;
|
|
|
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
|
|
|
+ event = skip_time_extend(event);
|
|
|
|
|
|
- rb_start_commit(cpu_buffer);
|
|
|
+ /* array[0] holds the actual length for the discarded event */
|
|
|
+ event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
|
|
|
+ event->type_len = RINGBUF_TYPE_PADDING;
|
|
|
+ /* time delta must be non zero */
|
|
|
+ if (!event->time_delta)
|
|
|
+ event->time_delta = 1;
|
|
|
+}
|
|
|
|
|
|
-#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
|
|
|
- /*
|
|
|
- * Due to the ability to swap a cpu buffer from a buffer
|
|
|
- * it is possible it was swapped before we committed.
|
|
|
- * (committing stops a swap). We check for it here and
|
|
|
- * if it happened, we have to fail the write.
|
|
|
- */
|
|
|
- barrier();
|
|
|
- if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
|
|
|
- local_dec(&cpu_buffer->committing);
|
|
|
- local_dec(&cpu_buffer->commits);
|
|
|
- return NULL;
|
|
|
- }
|
|
|
-#endif
|
|
|
+static inline int
|
|
|
+rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
+ struct ring_buffer_event *event)
|
|
|
+{
|
|
|
+ unsigned long addr = (unsigned long)event;
|
|
|
+ unsigned long index;
|
|
|
|
|
|
- length = rb_calculate_event_length(length);
|
|
|
- again:
|
|
|
- add_timestamp = 0;
|
|
|
- delta = 0;
|
|
|
+ index = rb_event_index(event);
|
|
|
+ addr &= PAGE_MASK;
|
|
|
+
|
|
|
+ return cpu_buffer->commit_page->page == (void *)addr &&
|
|
|
+ rb_commit_index(cpu_buffer) == index;
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
+ struct ring_buffer_event *event)
|
|
|
+{
|
|
|
+ u64 delta;
|
|
|
|
|
|
/*
|
|
|
- * We allow for interrupts to reenter here and do a trace.
|
|
|
- * If one does, it will cause this original code to loop
|
|
|
- * back here. Even with heavy interrupts happening, this
|
|
|
- * should only happen a few times in a row. If this happens
|
|
|
- * 1000 times in a row, there must be either an interrupt
|
|
|
- * storm or we have something buggy.
|
|
|
- * Bail!
|
|
|
+ * The event first in the commit queue updates the
|
|
|
+ * time stamp.
|
|
|
*/
|
|
|
- if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
|
|
|
- goto out_fail;
|
|
|
+ if (rb_event_is_commit(cpu_buffer, event)) {
|
|
|
+ /*
|
|
|
+ * A commit event that is first on a page
|
|
|
+ * updates the write timestamp with the page stamp
|
|
|
+ */
|
|
|
+ if (!rb_event_index(event))
|
|
|
+ cpu_buffer->write_stamp =
|
|
|
+ cpu_buffer->commit_page->page->time_stamp;
|
|
|
+ else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
|
|
|
+ delta = event->array[0];
|
|
|
+ delta <<= TS_SHIFT;
|
|
|
+ delta += event->time_delta;
|
|
|
+ cpu_buffer->write_stamp += delta;
|
|
|
+ } else
|
|
|
+ cpu_buffer->write_stamp += event->time_delta;
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
- ts = rb_time_stamp(cpu_buffer->buffer);
|
|
|
- diff = ts - cpu_buffer->write_stamp;
|
|
|
+static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
+ struct ring_buffer_event *event)
|
|
|
+{
|
|
|
+ local_inc(&cpu_buffer->entries);
|
|
|
+ rb_update_write_stamp(cpu_buffer, event);
|
|
|
+ rb_end_commit(cpu_buffer);
|
|
|
+}
|
|
|
|
|
|
- /* make sure this diff is calculated here */
|
|
|
- barrier();
|
|
|
+static __always_inline void
|
|
|
+rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
|
|
|
+{
|
|
|
+ bool pagebusy;
|
|
|
|
|
|
- /* Did the write stamp get updated already? */
|
|
|
- if (likely(ts >= cpu_buffer->write_stamp)) {
|
|
|
- delta = diff;
|
|
|
- if (unlikely(test_time_stamp(delta))) {
|
|
|
- int local_clock_stable = 1;
|
|
|
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
|
|
|
- local_clock_stable = sched_clock_stable();
|
|
|
-#endif
|
|
|
- WARN_ONCE(delta > (1ULL << 59),
|
|
|
- KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
|
|
|
- (unsigned long long)delta,
|
|
|
- (unsigned long long)ts,
|
|
|
- (unsigned long long)cpu_buffer->write_stamp,
|
|
|
- local_clock_stable ? "" :
|
|
|
- "If you just came from a suspend/resume,\n"
|
|
|
- "please switch to the trace global clock:\n"
|
|
|
- " echo global > /sys/kernel/debug/tracing/trace_clock\n");
|
|
|
- add_timestamp = 1;
|
|
|
- }
|
|
|
+ if (buffer->irq_work.waiters_pending) {
|
|
|
+ buffer->irq_work.waiters_pending = false;
|
|
|
+ /* irq_work_queue() supplies it's own memory barriers */
|
|
|
+ irq_work_queue(&buffer->irq_work.work);
|
|
|
}
|
|
|
|
|
|
- event = __rb_reserve_next(cpu_buffer, length, ts,
|
|
|
- delta, add_timestamp);
|
|
|
- if (unlikely(PTR_ERR(event) == -EAGAIN))
|
|
|
- goto again;
|
|
|
-
|
|
|
- if (!event)
|
|
|
- goto out_fail;
|
|
|
+ if (cpu_buffer->irq_work.waiters_pending) {
|
|
|
+ cpu_buffer->irq_work.waiters_pending = false;
|
|
|
+ /* irq_work_queue() supplies it's own memory barriers */
|
|
|
+ irq_work_queue(&cpu_buffer->irq_work.work);
|
|
|
+ }
|
|
|
|
|
|
- return event;
|
|
|
+ pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
|
|
|
|
|
|
- out_fail:
|
|
|
- rb_end_commit(cpu_buffer);
|
|
|
- return NULL;
|
|
|
+ if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
|
|
|
+ cpu_buffer->irq_work.wakeup_full = true;
|
|
|
+ cpu_buffer->irq_work.full_waiters_pending = false;
|
|
|
+ /* irq_work_queue() supplies it's own memory barriers */
|
|
|
+ irq_work_queue(&cpu_buffer->irq_work.work);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2671,6 +2644,178 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
|
|
|
cpu_buffer->current_context &= cpu_buffer->current_context - 1;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * ring_buffer_unlock_commit - commit a reserved
|
|
|
+ * @buffer: The buffer to commit to
|
|
|
+ * @event: The event pointer to commit.
|
|
|
+ *
|
|
|
+ * This commits the data to the ring buffer, and releases any locks held.
|
|
|
+ *
|
|
|
+ * Must be paired with ring_buffer_lock_reserve.
|
|
|
+ */
|
|
|
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
|
|
|
+ struct ring_buffer_event *event)
|
|
|
+{
|
|
|
+ struct ring_buffer_per_cpu *cpu_buffer;
|
|
|
+ int cpu = raw_smp_processor_id();
|
|
|
+
|
|
|
+ cpu_buffer = buffer->buffers[cpu];
|
|
|
+
|
|
|
+ rb_commit(cpu_buffer, event);
|
|
|
+
|
|
|
+ rb_wakeups(buffer, cpu_buffer);
|
|
|
+
|
|
|
+ trace_recursive_unlock(cpu_buffer);
|
|
|
+
|
|
|
+ preempt_enable_notrace();
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
|
|
|
+
|
|
|
+static noinline void
|
|
|
+rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
+ struct rb_event_info *info)
|
|
|
+{
|
|
|
+ WARN_ONCE(info->delta > (1ULL << 59),
|
|
|
+ KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
|
|
|
+ (unsigned long long)info->delta,
|
|
|
+ (unsigned long long)info->ts,
|
|
|
+ (unsigned long long)cpu_buffer->write_stamp,
|
|
|
+ sched_clock_stable() ? "" :
|
|
|
+ "If you just came from a suspend/resume,\n"
|
|
|
+ "please switch to the trace global clock:\n"
|
|
|
+ " echo global > /sys/kernel/debug/tracing/trace_clock\n");
|
|
|
+ info->add_timestamp = 1;
|
|
|
+}
|
|
|
+
|
|
|
+static struct ring_buffer_event *
|
|
|
+__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
+ struct rb_event_info *info)
|
|
|
+{
|
|
|
+ struct ring_buffer_event *event;
|
|
|
+ struct buffer_page *tail_page;
|
|
|
+ unsigned long tail, write;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the time delta since the last event is too big to
|
|
|
+ * hold in the time field of the event, then we append a
|
|
|
+ * TIME EXTEND event ahead of the data event.
|
|
|
+ */
|
|
|
+ if (unlikely(info->add_timestamp))
|
|
|
+ info->length += RB_LEN_TIME_EXTEND;
|
|
|
+
|
|
|
+ tail_page = info->tail_page = cpu_buffer->tail_page;
|
|
|
+ write = local_add_return(info->length, &tail_page->write);
|
|
|
+
|
|
|
+ /* set write to only the index of the write */
|
|
|
+ write &= RB_WRITE_MASK;
|
|
|
+ tail = write - info->length;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If this is the first commit on the page, then it has the same
|
|
|
+ * timestamp as the page itself.
|
|
|
+ */
|
|
|
+ if (!tail)
|
|
|
+ info->delta = 0;
|
|
|
+
|
|
|
+ /* See if we shot pass the end of this buffer page */
|
|
|
+ if (unlikely(write > BUF_PAGE_SIZE))
|
|
|
+ return rb_move_tail(cpu_buffer, tail, info);
|
|
|
+
|
|
|
+ /* We reserved something on the buffer */
|
|
|
+
|
|
|
+ event = __rb_page_index(tail_page, tail);
|
|
|
+ kmemcheck_annotate_bitfield(event, bitfield);
|
|
|
+ rb_update_event(cpu_buffer, event, info);
|
|
|
+
|
|
|
+ local_inc(&tail_page->entries);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If this is the first commit on the page, then update
|
|
|
+ * its timestamp.
|
|
|
+ */
|
|
|
+ if (!tail)
|
|
|
+ tail_page->page->time_stamp = info->ts;
|
|
|
+
|
|
|
+ /* account for these added bytes */
|
|
|
+ local_add(info->length, &cpu_buffer->entries_bytes);
|
|
|
+
|
|
|
+ return event;
|
|
|
+}
|
|
|
+
|
|
|
+static struct ring_buffer_event *
|
|
|
+rb_reserve_next_event(struct ring_buffer *buffer,
|
|
|
+ struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
+ unsigned long length)
|
|
|
+{
|
|
|
+ struct ring_buffer_event *event;
|
|
|
+ struct rb_event_info info;
|
|
|
+ int nr_loops = 0;
|
|
|
+ u64 diff;
|
|
|
+
|
|
|
+ rb_start_commit(cpu_buffer);
|
|
|
+
|
|
|
+#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
|
|
|
+ /*
|
|
|
+ * Due to the ability to swap a cpu buffer from a buffer
|
|
|
+ * it is possible it was swapped before we committed.
|
|
|
+ * (committing stops a swap). We check for it here and
|
|
|
+ * if it happened, we have to fail the write.
|
|
|
+ */
|
|
|
+ barrier();
|
|
|
+ if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
|
|
|
+ local_dec(&cpu_buffer->committing);
|
|
|
+ local_dec(&cpu_buffer->commits);
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
+ info.length = rb_calculate_event_length(length);
|
|
|
+ again:
|
|
|
+ info.add_timestamp = 0;
|
|
|
+ info.delta = 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We allow for interrupts to reenter here and do a trace.
|
|
|
+ * If one does, it will cause this original code to loop
|
|
|
+ * back here. Even with heavy interrupts happening, this
|
|
|
+ * should only happen a few times in a row. If this happens
|
|
|
+ * 1000 times in a row, there must be either an interrupt
|
|
|
+ * storm or we have something buggy.
|
|
|
+ * Bail!
|
|
|
+ */
|
|
|
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
|
|
|
+ goto out_fail;
|
|
|
+
|
|
|
+ info.ts = rb_time_stamp(cpu_buffer->buffer);
|
|
|
+ diff = info.ts - cpu_buffer->write_stamp;
|
|
|
+
|
|
|
+ /* make sure this diff is calculated here */
|
|
|
+ barrier();
|
|
|
+
|
|
|
+ /* Did the write stamp get updated already? */
|
|
|
+ if (likely(info.ts >= cpu_buffer->write_stamp)) {
|
|
|
+ info.delta = diff;
|
|
|
+ if (unlikely(test_time_stamp(info.delta)))
|
|
|
+ rb_handle_timestamp(cpu_buffer, &info);
|
|
|
+ }
|
|
|
+
|
|
|
+ event = __rb_reserve_next(cpu_buffer, &info);
|
|
|
+
|
|
|
+ if (unlikely(PTR_ERR(event) == -EAGAIN))
|
|
|
+ goto again;
|
|
|
+
|
|
|
+ if (!event)
|
|
|
+ goto out_fail;
|
|
|
+
|
|
|
+ return event;
|
|
|
+
|
|
|
+ out_fail:
|
|
|
+ rb_end_commit(cpu_buffer);
|
|
|
+ return NULL;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* ring_buffer_lock_reserve - reserve a part of the buffer
|
|
|
* @buffer: the ring buffer to reserve from
|
|
@@ -2729,111 +2874,6 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
|
|
|
|
|
|
-static void
|
|
|
-rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
- struct ring_buffer_event *event)
|
|
|
-{
|
|
|
- u64 delta;
|
|
|
-
|
|
|
- /*
|
|
|
- * The event first in the commit queue updates the
|
|
|
- * time stamp.
|
|
|
- */
|
|
|
- if (rb_event_is_commit(cpu_buffer, event)) {
|
|
|
- /*
|
|
|
- * A commit event that is first on a page
|
|
|
- * updates the write timestamp with the page stamp
|
|
|
- */
|
|
|
- if (!rb_event_index(event))
|
|
|
- cpu_buffer->write_stamp =
|
|
|
- cpu_buffer->commit_page->page->time_stamp;
|
|
|
- else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
|
|
|
- delta = event->array[0];
|
|
|
- delta <<= TS_SHIFT;
|
|
|
- delta += event->time_delta;
|
|
|
- cpu_buffer->write_stamp += delta;
|
|
|
- } else
|
|
|
- cpu_buffer->write_stamp += event->time_delta;
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
|
|
|
- struct ring_buffer_event *event)
|
|
|
-{
|
|
|
- local_inc(&cpu_buffer->entries);
|
|
|
- rb_update_write_stamp(cpu_buffer, event);
|
|
|
- rb_end_commit(cpu_buffer);
|
|
|
-}
|
|
|
-
|
|
|
-static __always_inline void
|
|
|
-rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
|
|
|
-{
|
|
|
- bool pagebusy;
|
|
|
-
|
|
|
- if (buffer->irq_work.waiters_pending) {
|
|
|
- buffer->irq_work.waiters_pending = false;
|
|
|
- /* irq_work_queue() supplies it's own memory barriers */
|
|
|
- irq_work_queue(&buffer->irq_work.work);
|
|
|
- }
|
|
|
-
|
|
|
- if (cpu_buffer->irq_work.waiters_pending) {
|
|
|
- cpu_buffer->irq_work.waiters_pending = false;
|
|
|
- /* irq_work_queue() supplies it's own memory barriers */
|
|
|
- irq_work_queue(&cpu_buffer->irq_work.work);
|
|
|
- }
|
|
|
-
|
|
|
- pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
|
|
|
-
|
|
|
- if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
|
|
|
- cpu_buffer->irq_work.wakeup_full = true;
|
|
|
- cpu_buffer->irq_work.full_waiters_pending = false;
|
|
|
- /* irq_work_queue() supplies it's own memory barriers */
|
|
|
- irq_work_queue(&cpu_buffer->irq_work.work);
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-/**
|
|
|
- * ring_buffer_unlock_commit - commit a reserved
|
|
|
- * @buffer: The buffer to commit to
|
|
|
- * @event: The event pointer to commit.
|
|
|
- *
|
|
|
- * This commits the data to the ring buffer, and releases any locks held.
|
|
|
- *
|
|
|
- * Must be paired with ring_buffer_lock_reserve.
|
|
|
- */
|
|
|
-int ring_buffer_unlock_commit(struct ring_buffer *buffer,
|
|
|
- struct ring_buffer_event *event)
|
|
|
-{
|
|
|
- struct ring_buffer_per_cpu *cpu_buffer;
|
|
|
- int cpu = raw_smp_processor_id();
|
|
|
-
|
|
|
- cpu_buffer = buffer->buffers[cpu];
|
|
|
-
|
|
|
- rb_commit(cpu_buffer, event);
|
|
|
-
|
|
|
- rb_wakeups(buffer, cpu_buffer);
|
|
|
-
|
|
|
- trace_recursive_unlock(cpu_buffer);
|
|
|
-
|
|
|
- preempt_enable_notrace();
|
|
|
-
|
|
|
- return 0;
|
|
|
-}
|
|
|
-EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
|
|
|
-
|
|
|
-static inline void rb_event_discard(struct ring_buffer_event *event)
|
|
|
-{
|
|
|
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
|
|
|
- event = skip_time_extend(event);
|
|
|
-
|
|
|
- /* array[0] holds the actual length for the discarded event */
|
|
|
- event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
|
|
|
- event->type_len = RINGBUF_TYPE_PADDING;
|
|
|
- /* time delta must be non zero */
|
|
|
- if (!event->time_delta)
|
|
|
- event->time_delta = 1;
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* Decrement the entries to the page that an event is on.
|
|
|
* The event does not even need to exist, only the pointer
|