10 years ago · 887d9dc989
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -53,30 +53,25 @@ enum hrtimer_restart {
 
				  *
			
 
				  * 0x00		inactive
			
 
				  * 0x01		enqueued into rbtree
			
 
				- * 0x02		callback function running
			
 
				- * 0x04		timer is migrated to another cpu
			
 
				  *
			
 
				- * Special cases:
			
 
				- * 0x03		callback function running and enqueued
			
 
				- *		(was requeued on another CPU)
			
 
				- * 0x05		timer was migrated on CPU hotunplug
			
 
				+ * The callback state is not part of the timer->state because clearing it would
			
 
				+ * mean touching the timer after the callback, this makes it impossible to free
			
 
				+ * the timer from the callback function.
			
 
				  *
			
 
				- * The "callback function running and enqueued" status is only possible on
			
 
				- * SMP. It happens for example when a posix timer expired and the callback
			
 
				+ * Therefore we track the callback state in:
			
 
				+ *
			
 
				+ *	timer->base->cpu_base->running == timer
			
 
				+ *
			
 
				+ * On SMP it is possible to have a "callback function running and enqueued"
			
 
				+ * status. It happens for example when a posix timer expired and the callback
			
 
				  * queued a signal. Between dropping the lock which protects the posix timer
			
 
				  * and reacquiring the base lock of the hrtimer, another CPU can deliver the
			
 
				- * signal and rearm the timer. We have to preserve the callback running state,
			
 
				- * as otherwise the timer could be removed before the softirq code finishes the
			
 
				- * the handling of the timer.
			
 
				- *
			
 
				- * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state
			
 
				- * to preserve the HRTIMER_STATE_CALLBACK in the above scenario.
			
 
				+ * signal and rearm the timer.
			
 
				  *
			
 
				  * All state transitions are protected by cpu_base->lock.
			
 
				  */
			
 
				 #define HRTIMER_STATE_INACTIVE	0x00
			
 
				 #define HRTIMER_STATE_ENQUEUED	0x01
			
 
				-#define HRTIMER_STATE_CALLBACK	0x02
			
 
				 
			
 
				 /**
			
 
				  * struct hrtimer - the basic hrtimer structure
			
@@ -163,6 +158,8 @@ enum  hrtimer_base_type {
 
				  * struct hrtimer_cpu_base - the per cpu clock bases
			
 
				  * @lock:		lock protecting the base and associated clock bases
			
 
				  *			and timers
			
 
				+ * @seq:		seqcount around __run_hrtimer
			
 
				+ * @running:		pointer to the currently running hrtimer
			
 
				  * @cpu:		cpu number
			
 
				  * @active_bases:	Bitfield to mark bases with active timers
			
 
				  * @clock_was_set_seq:	Sequence counter of clock was set events
			
@@ -184,6 +181,8 @@ enum  hrtimer_base_type {
 
				  */
			
 
				 struct hrtimer_cpu_base {
			
 
				 	raw_spinlock_t			lock;
			
 
				+	seqcount_t			seq;
			
 
				+	struct hrtimer			*running;
			
 
				 	unsigned int			cpu;
			
 
				 	unsigned int			active_bases;
			
 
				 	unsigned int			clock_was_set_seq;
			
@@ -391,15 +390,7 @@ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
 
				 
			
 
				 extern u64 hrtimer_get_next_event(void);
			
 
				 
			
 
				-/*
			
 
				- * A timer is active, when it is enqueued into the rbtree or the
			
 
				- * callback function is running or it's in the state of being migrated
			
 
				- * to another cpu.
			
 
				- */
			
 
				-static inline int hrtimer_active(const struct hrtimer *timer)
			
 
				-{
			
 
				-	return timer->state != HRTIMER_STATE_INACTIVE;
			
 
				-}
			
 
				+extern bool hrtimer_active(const struct hrtimer *timer);
			
 
				 
			
 
				 /*
			
 
				  * Helper function to check, whether the timer is on one of the queues
			
@@ -415,7 +406,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer)
 
				  */
			
 
				 static inline int hrtimer_callback_running(struct hrtimer *timer)
			
 
				 {
			
 
				-	return timer->state & HRTIMER_STATE_CALLBACK;
			
 
				+	return timer->base->cpu_base->running == timer;
			
 
				 }
			
 
				 
			
 
				 /* Forward a hrtimer so it expires after now: */
			
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -67,6 +67,7 @@
 
				 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
			
 
				 {
			
 
				 	.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
			
 
				+	.seq = SEQCNT_ZERO(hrtimer_bases.seq),
			
 
				 	.clock_base =
			
 
				 	{
			
 
				 		{
			
@@ -110,6 +111,18 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
 
				  */
			
 
				 #ifdef CONFIG_SMP
			
 
				 
			
 
				+/*
			
 
				+ * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
			
 
				+ * such that hrtimer_callback_running() can unconditionally dereference
			
 
				+ * timer->base->cpu_base
			
 
				+ */
			
 
				+static struct hrtimer_cpu_base migration_cpu_base = {
			
 
				+	.seq = SEQCNT_ZERO(migration_cpu_base),
			
 
				+	.clock_base = { { .cpu_base = &migration_cpu_base, }, },
			
 
				+};
			
 
				+
			
 
				+#define migration_base	migration_cpu_base.clock_base[0]
			
 
				+
			
 
				 /*
			
 
				  * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
			
 
				  * means that all timers which are tied to this base via timer->base are
			
@@ -119,8 +132,8 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
 
				  * be found on the lists/queues.
			
 
				  *
			
 
				  * When the timer's base is locked, and the timer removed from list, it is
			
 
				- * possible to set timer->base = NULL and drop the lock: the timer remains
			
 
				- * locked.
			
 
				+ * possible to set timer->base = &migration_base and drop the lock: the timer
			
 
				+ * remains locked.
			
 
				  */
			
 
				 static
			
 
				 struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
			
@@ -130,7 +143,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
 
				 
			
 
				 	for (;;) {
			
 
				 		base = timer->base;
			
 
				-		if (likely(base != NULL)) {
			
 
				+		if (likely(base != &migration_base)) {
			
 
				 			raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
			
 
				 			if (likely(base == timer->base))
			
 
				 				return base;
			
@@ -194,8 +207,8 @@ again:
 
				 		if (unlikely(hrtimer_callback_running(timer)))
			
 
				 			return base;
			
 
				 
			
 
				-		/* See the comment in lock_timer_base() */
			
 
				-		timer->base = NULL;
			
 
				+		/* See the comment in lock_hrtimer_base() */
			
 
				+		timer->base = &migration_base;
			
 
				 		raw_spin_unlock(&base->cpu_base->lock);
			
 
				 		raw_spin_lock(&new_base->cpu_base->lock);
			
 
				 
			
@@ -838,11 +851,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
 
				 
			
 
				 	base->cpu_base->active_bases |= 1 << base->index;
			
 
				 
			
 
				-	/*
			
 
				-	 * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
			
 
				-	 * state of a possibly running callback.
			
 
				-	 */
			
 
				-	timer->state |= HRTIMER_STATE_ENQUEUED;
			
 
				+	timer->state = HRTIMER_STATE_ENQUEUED;
			
 
				 
			
 
				 	return timerqueue_add(&base->active, &timer->node);
			
 
				 }
			
@@ -907,14 +916,9 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
 
				 		timer_stats_hrtimer_clear_start_info(timer);
			
 
				 		reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
			
 
				 
			
 
				-		if (!restart) {
			
 
				-			/*
			
 
				-			 * We must preserve the CALLBACK state flag here,
			
 
				-			 * otherwise we could move the timer base in
			
 
				-			 * switch_hrtimer_base.
			
 
				-			 */
			
 
				-			state &= HRTIMER_STATE_CALLBACK;
			
 
				-		}
			
 
				+		if (!restart)
			
 
				+			state = HRTIMER_STATE_INACTIVE;
			
 
				+
			
 
				 		__remove_hrtimer(timer, base, state, reprogram);
			
 
				 		return 1;
			
 
				 	}
			
@@ -1115,6 +1119,51 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(hrtimer_init);
			
 
				 
			
 
				+/*
			
 
				+ * A timer is active, when it is enqueued into the rbtree or the
			
 
				+ * callback function is running or it's in the state of being migrated
			
 
				+ * to another cpu.
			
 
				+ *
			
 
				+ * It is important for this function to not return a false negative.
			
 
				+ */
			
 
				+bool hrtimer_active(const struct hrtimer *timer)
			
 
				+{
			
 
				+	struct hrtimer_cpu_base *cpu_base;
			
 
				+	unsigned int seq;
			
 
				+
			
 
				+	do {
			
 
				+		cpu_base = READ_ONCE(timer->base->cpu_base);
			
 
				+		seq = raw_read_seqcount_begin(&cpu_base->seq);
			
 
				+
			
 
				+		if (timer->state != HRTIMER_STATE_INACTIVE ||
			
 
				+		    cpu_base->running == timer)
			
 
				+			return true;
			
 
				+
			
 
				+	} while (read_seqcount_retry(&cpu_base->seq, seq) ||
			
 
				+		 cpu_base != READ_ONCE(timer->base->cpu_base));
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(hrtimer_active);
			
 
				+
			
 
				+/*
			
 
				+ * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3
			
 
				+ * distinct sections:
			
 
				+ *
			
 
				+ *  - queued:	the timer is queued
			
 
				+ *  - callback:	the timer is being ran
			
 
				+ *  - post:	the timer is inactive or (re)queued
			
 
				+ *
			
 
				+ * On the read side we ensure we observe timer->state and cpu_base->running
			
 
				+ * from the same section, if anything changed while we looked at it, we retry.
			
 
				+ * This includes timer->base changing because sequence numbers alone are
			
 
				+ * insufficient for that.
			
 
				+ *
			
 
				+ * The sequence numbers are required because otherwise we could still observe
			
 
				+ * a false negative if the read side got smeared over multiple consequtive
			
 
				+ * __run_hrtimer() invocations.
			
 
				+ */
			
 
				+
			
 
				 static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
			
 
				 			  struct hrtimer_clock_base *base,
			
 
				 			  struct hrtimer *timer, ktime_t *now)
			
@@ -1122,10 +1171,21 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
 
				 	enum hrtimer_restart (*fn)(struct hrtimer *);
			
 
				 	int restart;
			
 
				 
			
 
				-	WARN_ON(!irqs_disabled());
			
 
				+	lockdep_assert_held(&cpu_base->lock);
			
 
				 
			
 
				 	debug_deactivate(timer);
			
 
				-	__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
			
 
				+	cpu_base->running = timer;
			
 
				+
			
 
				+	/*
			
 
				+	 * Separate the ->running assignment from the ->state assignment.
			
 
				+	 *
			
 
				+	 * As with a regular write barrier, this ensures the read side in
			
 
				+	 * hrtimer_active() cannot observe cpu_base->running == NULL &&
			
 
				+	 * timer->state == INACTIVE.
			
 
				+	 */
			
 
				+	raw_write_seqcount_barrier(&cpu_base->seq);
			
 
				+
			
 
				+	__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
			
 
				 	timer_stats_account_hrtimer(timer);
			
 
				 	fn = timer->function;
			
 
				 
			
@@ -1141,7 +1201,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
 
				 	raw_spin_lock(&cpu_base->lock);
			
 
				 
			
 
				 	/*
			
 
				-	 * Note: We clear the CALLBACK bit after enqueue_hrtimer and
			
 
				+	 * Note: We clear the running state after enqueue_hrtimer and
			
 
				 	 * we do not reprogramm the event hardware. Happens either in
			
 
				 	 * hrtimer_start_range_ns() or in hrtimer_interrupt()
			
 
				 	 *
			
@@ -1153,9 +1213,17 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
 
				 	    !(timer->state & HRTIMER_STATE_ENQUEUED))
			
 
				 		enqueue_hrtimer(timer, base);
			
 
				 
			
 
				-	WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK));
			
 
				+	/*
			
 
				+	 * Separate the ->running assignment from the ->state assignment.
			
 
				+	 *
			
 
				+	 * As with a regular write barrier, this ensures the read side in
			
 
				+	 * hrtimer_active() cannot observe cpu_base->running == NULL &&
			
 
				+	 * timer->state == INACTIVE.
			
 
				+	 */
			
 
				+	raw_write_seqcount_barrier(&cpu_base->seq);
			
 
				 
			
 
				-	timer->state &= ~HRTIMER_STATE_CALLBACK;
			
 
				+	WARN_ON_ONCE(cpu_base->running != timer);
			
 
				+	cpu_base->running = NULL;
			
 
				 }
			
 
				 
			
 
				 static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)