|
@@ -203,6 +203,7 @@ struct timer_base {
|
|
|
bool migration_enabled;
|
|
|
bool nohz_active;
|
|
|
bool is_idle;
|
|
|
+ bool must_forward_clk;
|
|
|
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
|
|
|
struct hlist_head vectors[WHEEL_SIZE];
|
|
|
} ____cacheline_aligned;
|
|
@@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags)
|
|
|
|
|
|
static inline void forward_timer_base(struct timer_base *base)
|
|
|
{
|
|
|
- unsigned long jnow = READ_ONCE(jiffies);
|
|
|
+ unsigned long jnow;
|
|
|
|
|
|
/*
|
|
|
- * We only forward the base when it's idle and we have a delta between
|
|
|
- * base clock and jiffies.
|
|
|
+ * We only forward the base when we are idle or have just come out of
|
|
|
+ * idle (must_forward_clk logic), and have a delta between base clock
|
|
|
+ * and jiffies. In the common case, run_timers will take care of it.
|
|
|
*/
|
|
|
- if (!base->is_idle || (long) (jnow - base->clk) < 2)
|
|
|
+ if (likely(!base->must_forward_clk))
|
|
|
+ return;
|
|
|
+
|
|
|
+ jnow = READ_ONCE(jiffies);
|
|
|
+ base->must_forward_clk = base->is_idle;
|
|
|
+ if ((long)(jnow - base->clk) < 2)
|
|
|
return;
|
|
|
|
|
|
/*
|
|
@@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
|
|
* same array bucket then just return:
|
|
|
*/
|
|
|
if (timer_pending(timer)) {
|
|
|
+ /*
|
|
|
+ * The downside of this optimization is that it can result in
|
|
|
+ * larger granularity than you would get from adding a new
|
|
|
+ * timer with this expiry.
|
|
|
+ */
|
|
|
if (timer->expires == expires)
|
|
|
return 1;
|
|
|
|
|
@@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
|
|
* dequeue/enqueue dance.
|
|
|
*/
|
|
|
base = lock_timer_base(timer, &flags);
|
|
|
+ forward_timer_base(base);
|
|
|
|
|
|
clk = base->clk;
|
|
|
idx = calc_wheel_index(expires, clk);
|
|
@@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
|
|
}
|
|
|
} else {
|
|
|
base = lock_timer_base(timer, &flags);
|
|
|
+ forward_timer_base(base);
|
|
|
}
|
|
|
|
|
|
ret = detach_if_pending(timer, base, false);
|
|
@@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
|
|
raw_spin_lock(&base->lock);
|
|
|
WRITE_ONCE(timer->flags,
|
|
|
(timer->flags & ~TIMER_BASEMASK) | base->cpu);
|
|
|
+ forward_timer_base(base);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /* Try to forward a stale timer base clock */
|
|
|
- forward_timer_base(base);
|
|
|
-
|
|
|
timer->expires = expires;
|
|
|
/*
|
|
|
* If 'idx' was calculated above and the base time did not advance
|
|
@@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
|
|
|
WRITE_ONCE(timer->flags,
|
|
|
(timer->flags & ~TIMER_BASEMASK) | cpu);
|
|
|
}
|
|
|
+ forward_timer_base(base);
|
|
|
|
|
|
debug_activate(timer, timer->expires);
|
|
|
internal_add_timer(base, timer);
|
|
@@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
|
|
|
if (!is_max_delta)
|
|
|
expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
|
|
|
/*
|
|
|
- * If we expect to sleep more than a tick, mark the base idle:
|
|
|
+ * If we expect to sleep more than a tick, mark the base idle.
|
|
|
+ * Also the tick is stopped so any added timer must forward
|
|
|
+ * the base clk itself to keep granularity small. This idle
|
|
|
+ * logic is only maintained for the BASE_STD base, deferrable
|
|
|
+ * timers may still see large granularity skew (by design).
|
|
|
*/
|
|
|
- if ((expires - basem) > TICK_NSEC)
|
|
|
+ if ((expires - basem) > TICK_NSEC) {
|
|
|
+ base->must_forward_clk = true;
|
|
|
base->is_idle = true;
|
|
|
+ }
|
|
|
}
|
|
|
raw_spin_unlock(&base->lock);
|
|
|
|
|
@@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
|
|
|
{
|
|
|
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
|
|
|
|
|
|
+ /*
|
|
|
+ * must_forward_clk must be cleared before running timers so that any
|
|
|
+ * timer functions that call mod_timer will not try to forward the
|
|
|
+ * base. idle trcking / clock forwarding logic is only used with
|
|
|
+ * BASE_STD timers.
|
|
|
+ *
|
|
|
+ * The deferrable base does not do idle tracking at all, so we do
|
|
|
+ * not forward it. This can result in very large variations in
|
|
|
+ * granularity for deferrable timers, but they can be deferred for
|
|
|
+ * long periods due to idle.
|
|
|
+ */
|
|
|
+ base->must_forward_clk = false;
|
|
|
+
|
|
|
__run_timers(base);
|
|
|
if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
|
|
|
__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
|