Эх сурвалжийг харах

intel_pstate: Avoid extra invocation of intel_pstate_sample()

The initialization of intel_pstate for a given CPU involves populating
the fields of its struct cpudata that represent the previous sample,
but currently that is done in a problematic way.

Namely, intel_pstate_init_cpu() makes an extra call to
intel_pstate_sample() so it reads the current register values that
will be used to populate the "previous sample" record during the
next invocation of intel_pstate_sample().  However, after commit
a4675fbc4a7a (cpufreq: intel_pstate: Replace timers with utilization
update callbacks) that doesn't work for last_sample_time, because
the time value is passed to intel_pstate_sample() as an argument now.
Passing 0 to it from intel_pstate_init_cpu() is problematic, because
that causes cpu->last_sample_time == 0 to be visible in
get_target_pstate_use_performance() (and hence the extra
cpu->last_sample_time > 0 check in there) and effectively allows
the first invocation of intel_pstate_sample() from
intel_pstate_update_util() to happen immediately after the
initialization which may lead to a significant "turn on"
effect in the governor algorithm.

To mitigate that issue, rework the initialization to avoid the
extra intel_pstate_sample() call from intel_pstate_init_cpu().
Instead, make intel_pstate_sample() return false if it has been
called with cpu->sample.time equal to zero, which will make
intel_pstate_update_util() skip the sample in that case, and
reset cpu->sample.time from intel_pstate_set_update_util_hook()
to make the algorithm start properly every time the hook is set.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Rafael J. Wysocki 9 жил өмнө
parent
commit
febce40feb

+ 15 - 6
drivers/cpufreq/intel_pstate.c

@@ -910,7 +910,14 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
 	cpu->prev_aperf = aperf;
 	cpu->prev_mperf = mperf;
 	cpu->prev_tsc = tsc;
-	return true;
+	/*
+	 * First time this function is invoked in a given cycle, all of the
+	 * previous sample data fields are equal to zero or stale and they must
+	 * be populated with meaningful numbers for things to work, so assume
+	 * that sample.time will always be reset before setting the utilization
+	 * update hook and make the caller skip the sample then.
+	 */
+	return !!cpu->last_sample_time;
 }
 
 static inline int32_t get_avg_frequency(struct cpudata *cpu)
@@ -984,8 +991,7 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 	 * enough period of time to adjust our busyness.
 	 */
 	duration_ns = cpu->sample.time - cpu->last_sample_time;
-	if ((s64)duration_ns > pid_params.sample_rate_ns * 3
-	    && cpu->last_sample_time > 0) {
+	if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
 		sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
 				      int_tofp(duration_ns));
 		core_busy = mul_fp(core_busy, sample_ratio);
@@ -1100,7 +1106,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 	intel_pstate_get_cpu_pstates(cpu);
 
 	intel_pstate_busy_pid_reset(cpu);
-	intel_pstate_sample(cpu, 0);
 
 	cpu->update_util.func = intel_pstate_update_util;
 
@@ -1121,9 +1126,13 @@ static unsigned int intel_pstate_get(unsigned int cpu_num)
 	return get_avg_frequency(cpu);
 }
 
-static void intel_pstate_set_update_util_hook(unsigned int cpu)
+static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 {
-	cpufreq_set_update_util_data(cpu, &all_cpu_data[cpu]->update_util);
+	struct cpudata *cpu = all_cpu_data[cpu_num];
+
+	/* Prevent intel_pstate_update_util() from using stale data. */
+	cpu->sample.time = 0;
+	cpufreq_set_update_util_data(cpu_num, &cpu->update_util);
 }
 
 static void intel_pstate_clear_update_util_hook(unsigned int cpu)