|
@@ -73,27 +73,6 @@
|
|
|
DEFINE_MUTEX(sched_domains_mutex);
|
|
|
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
|
|
|
|
|
|
-static void update_rq_clock_task(struct rq *rq, s64 delta);
|
|
|
-
|
|
|
-void update_rq_clock(struct rq *rq)
|
|
|
-{
|
|
|
- s64 delta;
|
|
|
-
|
|
|
- lockdep_assert_held(&rq->lock);
|
|
|
-
|
|
|
- if (rq->clock_update_flags & RQCF_ACT_SKIP)
|
|
|
- return;
|
|
|
-
|
|
|
-#ifdef CONFIG_SCHED_DEBUG
|
|
|
- rq->clock_update_flags |= RQCF_UPDATED;
|
|
|
-#endif
|
|
|
- delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
|
|
|
- if (delta < 0)
|
|
|
- return;
|
|
|
- rq->clock += delta;
|
|
|
- update_rq_clock_task(rq, delta);
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* Debugging: various feature bits
|
|
|
*/
|
|
@@ -218,6 +197,84 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * RQ-clock updating methods:
|
|
|
+ */
|
|
|
+
|
|
|
+static void update_rq_clock_task(struct rq *rq, s64 delta)
|
|
|
+{
|
|
|
+/*
|
|
|
+ * In theory, the compile should just see 0 here, and optimize out the call
|
|
|
+ * to sched_rt_avg_update. But I don't trust it...
|
|
|
+ */
|
|
|
+#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
|
|
|
+ s64 steal = 0, irq_delta = 0;
|
|
|
+#endif
|
|
|
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
|
|
+ irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Since irq_time is only updated on {soft,}irq_exit, we might run into
|
|
|
+ * this case when a previous update_rq_clock() happened inside a
|
|
|
+ * {soft,}irq region.
|
|
|
+ *
|
|
|
+ * When this happens, we stop ->clock_task and only update the
|
|
|
+ * prev_irq_time stamp to account for the part that fit, so that a next
|
|
|
+ * update will consume the rest. This ensures ->clock_task is
|
|
|
+ * monotonic.
|
|
|
+ *
|
|
|
+ * It does however cause some slight miss-attribution of {soft,}irq
|
|
|
+ * time, a more accurate solution would be to update the irq_time using
|
|
|
+ * the current rq->clock timestamp, except that would require using
|
|
|
+ * atomic ops.
|
|
|
+ */
|
|
|
+ if (irq_delta > delta)
|
|
|
+ irq_delta = delta;
|
|
|
+
|
|
|
+ rq->prev_irq_time += irq_delta;
|
|
|
+ delta -= irq_delta;
|
|
|
+#endif
|
|
|
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
|
|
|
+ if (static_key_false((¶virt_steal_rq_enabled))) {
|
|
|
+ steal = paravirt_steal_clock(cpu_of(rq));
|
|
|
+ steal -= rq->prev_steal_time_rq;
|
|
|
+
|
|
|
+ if (unlikely(steal > delta))
|
|
|
+ steal = delta;
|
|
|
+
|
|
|
+ rq->prev_steal_time_rq += steal;
|
|
|
+ delta -= steal;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
+ rq->clock_task += delta;
|
|
|
+
|
|
|
+#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
|
|
|
+ if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
|
|
|
+ sched_rt_avg_update(rq, irq_delta + steal);
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
+void update_rq_clock(struct rq *rq)
|
|
|
+{
|
|
|
+ s64 delta;
|
|
|
+
|
|
|
+ lockdep_assert_held(&rq->lock);
|
|
|
+
|
|
|
+ if (rq->clock_update_flags & RQCF_ACT_SKIP)
|
|
|
+ return;
|
|
|
+
|
|
|
+#ifdef CONFIG_SCHED_DEBUG
|
|
|
+ rq->clock_update_flags |= RQCF_UPDATED;
|
|
|
+#endif
|
|
|
+ delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
|
|
|
+ if (delta < 0)
|
|
|
+ return;
|
|
|
+ rq->clock += delta;
|
|
|
+ update_rq_clock_task(rq, delta);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
#ifdef CONFIG_SCHED_HRTICK
|
|
|
/*
|
|
|
* Use HR-timers to deliver accurate preemption points.
|
|
@@ -767,60 +824,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
|
|
|
dequeue_task(rq, p, flags);
|
|
|
}
|
|
|
|
|
|
-static void update_rq_clock_task(struct rq *rq, s64 delta)
|
|
|
-{
|
|
|
-/*
|
|
|
- * In theory, the compile should just see 0 here, and optimize out the call
|
|
|
- * to sched_rt_avg_update. But I don't trust it...
|
|
|
- */
|
|
|
-#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
|
|
|
- s64 steal = 0, irq_delta = 0;
|
|
|
-#endif
|
|
|
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
|
|
- irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
|
|
|
-
|
|
|
- /*
|
|
|
- * Since irq_time is only updated on {soft,}irq_exit, we might run into
|
|
|
- * this case when a previous update_rq_clock() happened inside a
|
|
|
- * {soft,}irq region.
|
|
|
- *
|
|
|
- * When this happens, we stop ->clock_task and only update the
|
|
|
- * prev_irq_time stamp to account for the part that fit, so that a next
|
|
|
- * update will consume the rest. This ensures ->clock_task is
|
|
|
- * monotonic.
|
|
|
- *
|
|
|
- * It does however cause some slight miss-attribution of {soft,}irq
|
|
|
- * time, a more accurate solution would be to update the irq_time using
|
|
|
- * the current rq->clock timestamp, except that would require using
|
|
|
- * atomic ops.
|
|
|
- */
|
|
|
- if (irq_delta > delta)
|
|
|
- irq_delta = delta;
|
|
|
-
|
|
|
- rq->prev_irq_time += irq_delta;
|
|
|
- delta -= irq_delta;
|
|
|
-#endif
|
|
|
-#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
|
|
|
- if (static_key_false((¶virt_steal_rq_enabled))) {
|
|
|
- steal = paravirt_steal_clock(cpu_of(rq));
|
|
|
- steal -= rq->prev_steal_time_rq;
|
|
|
-
|
|
|
- if (unlikely(steal > delta))
|
|
|
- steal = delta;
|
|
|
-
|
|
|
- rq->prev_steal_time_rq += steal;
|
|
|
- delta -= steal;
|
|
|
- }
|
|
|
-#endif
|
|
|
-
|
|
|
- rq->clock_task += delta;
|
|
|
-
|
|
|
-#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
|
|
|
- if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
|
|
|
- sched_rt_avg_update(rq, irq_delta + steal);
|
|
|
-#endif
|
|
|
-}
|
|
|
-
|
|
|
void sched_set_stop_task(int cpu, struct task_struct *stop)
|
|
|
{
|
|
|
struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
|