|
@@ -221,6 +221,11 @@ struct global_params {
|
|
|
* preference/bias
|
|
|
* @epp_saved: Saved EPP/EPB during system suspend or CPU offline
|
|
|
* operation
|
|
|
+ * @hwp_req_cached: Cached value of the last HWP Request MSR
|
|
|
+ * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR
|
|
|
+ * @last_io_update: Last time when IO wake flag was set
|
|
|
+ * @sched_flags: Store scheduler flags for possible cross CPU update
|
|
|
+ * @hwp_boost_min: Last HWP boosted min performance
|
|
|
*
|
|
|
* This structure stores per CPU instance data for all CPUs.
|
|
|
*/
|
|
@@ -253,6 +258,11 @@ struct cpudata {
|
|
|
s16 epp_policy;
|
|
|
s16 epp_default;
|
|
|
s16 epp_saved;
|
|
|
+ u64 hwp_req_cached;
|
|
|
+ u64 hwp_cap_cached;
|
|
|
+ u64 last_io_update;
|
|
|
+ unsigned int sched_flags;
|
|
|
+ u32 hwp_boost_min;
|
|
|
};
|
|
|
|
|
|
static struct cpudata **all_cpu_data;
|
|
@@ -285,6 +295,7 @@ static struct pstate_funcs pstate_funcs __read_mostly;
|
|
|
|
|
|
static int hwp_active __read_mostly;
|
|
|
static bool per_cpu_limits __read_mostly;
|
|
|
+static bool hwp_boost __read_mostly;
|
|
|
|
|
|
static struct cpufreq_driver *intel_pstate_driver __read_mostly;
|
|
|
|
|
@@ -689,6 +700,7 @@ static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
|
|
|
u64 cap;
|
|
|
|
|
|
rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
|
|
|
+ WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap);
|
|
|
if (global.no_turbo)
|
|
|
*current_max = HWP_GUARANTEED_PERF(cap);
|
|
|
else
|
|
@@ -763,6 +775,7 @@ update_epp:
|
|
|
intel_pstate_set_epb(cpu, epp);
|
|
|
}
|
|
|
skip_epp:
|
|
|
+ WRITE_ONCE(cpu_data->hwp_req_cached, value);
|
|
|
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
|
|
|
}
|
|
|
|
|
@@ -1020,6 +1033,30 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
|
|
|
return count;
|
|
|
}
|
|
|
|
|
|
+static ssize_t show_hwp_dynamic_boost(struct kobject *kobj,
|
|
|
+ struct attribute *attr, char *buf)
|
|
|
+{
|
|
|
+ return sprintf(buf, "%u\n", hwp_boost);
|
|
|
+}
|
|
|
+
|
|
|
+static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b,
|
|
|
+ const char *buf, size_t count)
|
|
|
+{
|
|
|
+ unsigned int input;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ ret = kstrtouint(buf, 10, &input);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ mutex_lock(&intel_pstate_driver_lock);
|
|
|
+ hwp_boost = !!input;
|
|
|
+ intel_pstate_update_policies();
|
|
|
+ mutex_unlock(&intel_pstate_driver_lock);
|
|
|
+
|
|
|
+ return count;
|
|
|
+}
|
|
|
+
|
|
|
show_one(max_perf_pct, max_perf_pct);
|
|
|
show_one(min_perf_pct, min_perf_pct);
|
|
|
|
|
@@ -1029,6 +1066,7 @@ define_one_global_rw(max_perf_pct);
|
|
|
define_one_global_rw(min_perf_pct);
|
|
|
define_one_global_ro(turbo_pct);
|
|
|
define_one_global_ro(num_pstates);
|
|
|
+define_one_global_rw(hwp_dynamic_boost);
|
|
|
|
|
|
static struct attribute *intel_pstate_attributes[] = {
|
|
|
&status.attr,
|
|
@@ -1069,6 +1107,11 @@ static void __init intel_pstate_sysfs_expose_params(void)
|
|
|
rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);
|
|
|
WARN_ON(rc);
|
|
|
|
|
|
+ if (hwp_active) {
|
|
|
+ rc = sysfs_create_file(intel_pstate_kobject,
|
|
|
+ &hwp_dynamic_boost.attr);
|
|
|
+ WARN_ON(rc);
|
|
|
+ }
|
|
|
}
|
|
|
/************************** sysfs end ************************/
|
|
|
|
|
@@ -1381,6 +1424,116 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
|
|
|
intel_pstate_set_min_pstate(cpu);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Long hold time will keep high perf limits for long time,
|
|
|
+ * which negatively impacts perf/watt for some workloads,
|
|
|
+ * like specpower. 3ms is based on experiements on some
|
|
|
+ * workoads.
|
|
|
+ */
|
|
|
+static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
|
|
|
+
|
|
|
+static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
|
|
|
+{
|
|
|
+ u64 hwp_req = READ_ONCE(cpu->hwp_req_cached);
|
|
|
+ u32 max_limit = (hwp_req & 0xff00) >> 8;
|
|
|
+ u32 min_limit = (hwp_req & 0xff);
|
|
|
+ u32 boost_level1;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Cases to consider (User changes via sysfs or boot time):
|
|
|
+ * If, P0 (Turbo max) = P1 (Guaranteed max) = min:
|
|
|
+ * No boost, return.
|
|
|
+ * If, P0 (Turbo max) > P1 (Guaranteed max) = min:
|
|
|
+ * Should result in one level boost only for P0.
|
|
|
+ * If, P0 (Turbo max) = P1 (Guaranteed max) > min:
|
|
|
+ * Should result in two level boost:
|
|
|
+ * (min + p1)/2 and P1.
|
|
|
+ * If, P0 (Turbo max) > P1 (Guaranteed max) > min:
|
|
|
+ * Should result in three level boost:
|
|
|
+ * (min + p1)/2, P1 and P0.
|
|
|
+ */
|
|
|
+
|
|
|
+ /* If max and min are equal or already at max, nothing to boost */
|
|
|
+ if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit)
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (!cpu->hwp_boost_min)
|
|
|
+ cpu->hwp_boost_min = min_limit;
|
|
|
+
|
|
|
+ /* level at half way mark between min and guranteed */
|
|
|
+ boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1;
|
|
|
+
|
|
|
+ if (cpu->hwp_boost_min < boost_level1)
|
|
|
+ cpu->hwp_boost_min = boost_level1;
|
|
|
+ else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
|
|
|
+ cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached);
|
|
|
+ else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) &&
|
|
|
+ max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
|
|
|
+ cpu->hwp_boost_min = max_limit;
|
|
|
+ else
|
|
|
+ return;
|
|
|
+
|
|
|
+ hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min;
|
|
|
+ wrmsrl(MSR_HWP_REQUEST, hwp_req);
|
|
|
+ cpu->last_update = cpu->sample.time;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
|
|
|
+{
|
|
|
+ if (cpu->hwp_boost_min) {
|
|
|
+ bool expired;
|
|
|
+
|
|
|
+ /* Check if we are idle for hold time to boost down */
|
|
|
+ expired = time_after64(cpu->sample.time, cpu->last_update +
|
|
|
+ hwp_boost_hold_time_ns);
|
|
|
+ if (expired) {
|
|
|
+ wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached);
|
|
|
+ cpu->hwp_boost_min = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ cpu->last_update = cpu->sample.time;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu,
|
|
|
+ u64 time)
|
|
|
+{
|
|
|
+ cpu->sample.time = time;
|
|
|
+
|
|
|
+ if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) {
|
|
|
+ bool do_io = false;
|
|
|
+
|
|
|
+ cpu->sched_flags = 0;
|
|
|
+ /*
|
|
|
+ * Set iowait_boost flag and update time. Since IO WAIT flag
|
|
|
+ * is set all the time, we can't just conclude that there is
|
|
|
+ * some IO bound activity is scheduled on this CPU with just
|
|
|
+ * one occurrence. If we receive at least two in two
|
|
|
+ * consecutive ticks, then we treat as boost candidate.
|
|
|
+ */
|
|
|
+ if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
|
|
|
+ do_io = true;
|
|
|
+
|
|
|
+ cpu->last_io_update = time;
|
|
|
+
|
|
|
+ if (do_io)
|
|
|
+ intel_pstate_hwp_boost_up(cpu);
|
|
|
+
|
|
|
+ } else {
|
|
|
+ intel_pstate_hwp_boost_down(cpu);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
|
|
|
+ u64 time, unsigned int flags)
|
|
|
+{
|
|
|
+ struct cpudata *cpu = container_of(data, struct cpudata, update_util);
|
|
|
+
|
|
|
+ cpu->sched_flags |= flags;
|
|
|
+
|
|
|
+ if (smp_processor_id() == cpu->cpu)
|
|
|
+ intel_pstate_update_util_hwp_local(cpu, time);
|
|
|
+}
|
|
|
+
|
|
|
static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
|
|
|
{
|
|
|
struct sample *sample = &cpu->sample;
|
|
@@ -1641,6 +1794,12 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
|
|
|
{}
|
|
|
};
|
|
|
|
|
|
+static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = {
|
|
|
+ ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
|
|
|
+ ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs),
|
|
|
+ {}
|
|
|
+};
|
|
|
+
|
|
|
static int intel_pstate_init_cpu(unsigned int cpunum)
|
|
|
{
|
|
|
struct cpudata *cpu;
|
|
@@ -1671,6 +1830,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
|
|
|
intel_pstate_disable_ee(cpunum);
|
|
|
|
|
|
intel_pstate_hwp_enable(cpu);
|
|
|
+
|
|
|
+ id = x86_match_cpu(intel_pstate_hwp_boost_ids);
|
|
|
+ if (id)
|
|
|
+ hwp_boost = true;
|
|
|
}
|
|
|
|
|
|
intel_pstate_get_cpu_pstates(cpu);
|
|
@@ -1684,7 +1847,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
|
|
|
{
|
|
|
struct cpudata *cpu = all_cpu_data[cpu_num];
|
|
|
|
|
|
- if (hwp_active)
|
|
|
+ if (hwp_active && !hwp_boost)
|
|
|
return;
|
|
|
|
|
|
if (cpu->update_util_set)
|
|
@@ -1693,7 +1856,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
|
|
|
/* Prevent intel_pstate_update_util() from using stale data. */
|
|
|
cpu->sample.time = 0;
|
|
|
cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
|
|
|
- intel_pstate_update_util);
|
|
|
+ (hwp_active ?
|
|
|
+ intel_pstate_update_util_hwp :
|
|
|
+ intel_pstate_update_util));
|
|
|
cpu->update_util_set = true;
|
|
|
}
|
|
|
|
|
@@ -1805,8 +1970,16 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
|
|
|
intel_pstate_set_update_util_hook(policy->cpu);
|
|
|
}
|
|
|
|
|
|
- if (hwp_active)
|
|
|
+ if (hwp_active) {
|
|
|
+ /*
|
|
|
+ * When hwp_boost was active before and dynamically it
|
|
|
+ * was turned off, in that case we need to clear the
|
|
|
+ * update util hook.
|
|
|
+ */
|
|
|
+ if (!hwp_boost)
|
|
|
+ intel_pstate_clear_update_util_hook(policy->cpu);
|
|
|
intel_pstate_hwp_set(policy->cpu);
|
|
|
+ }
|
|
|
|
|
|
mutex_unlock(&intel_pstate_limits_lock);
|
|
|
|