Forráskód Böngészése

Merge back earlier cpufreq material for v4.4.

Rafael J. Wysocki 9 éve
szülő
commit
7855e10294

+ 3 - 0
Documentation/kernel-parameters.txt

@@ -1546,6 +1546,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 		hwp_only
 		hwp_only
 			Only load intel_pstate on systems which support
 			Only load intel_pstate on systems which support
 			hardware P state control (HWP) if available.
 			hardware P state control (HWP) if available.
+		no_acpi
+			Don't use ACPI processor performance control objects
+			_PSS and _PPC specified limits.
 
 
 	intremap=	[X86-64, Intel-IOMMU]
 	intremap=	[X86-64, Intel-IOMMU]
 			on	enable Interrupt Remapping (default)
 			on	enable Interrupt Remapping (default)

+ 7 - 0
arch/x86/include/asm/msr-index.h

@@ -206,6 +206,13 @@
 #define MSR_GFX_PERF_LIMIT_REASONS	0x000006B0
 #define MSR_GFX_PERF_LIMIT_REASONS	0x000006B0
 #define MSR_RING_PERF_LIMIT_REASONS	0x000006B1
 #define MSR_RING_PERF_LIMIT_REASONS	0x000006B1
 
 
+/* Config TDP MSRs */
+#define MSR_CONFIG_TDP_NOMINAL		0x00000648
+#define MSR_CONFIG_TDP_LEVEL1		0x00000649
+#define MSR_CONFIG_TDP_LEVEL2		0x0000064A
+#define MSR_CONFIG_TDP_CONTROL		0x0000064B
+#define MSR_TURBO_ACTIVATION_RATIO	0x0000064C
+
 /* Hardware P state interface */
 /* Hardware P state interface */
 #define MSR_PPERF			0x0000064e
 #define MSR_PPERF			0x0000064e
 #define MSR_PERF_LIMIT_REASONS		0x0000064f
 #define MSR_PERF_LIMIT_REASONS		0x0000064f

+ 1 - 0
drivers/cpufreq/Kconfig.x86

@@ -5,6 +5,7 @@
 config X86_INTEL_PSTATE
 config X86_INTEL_PSTATE
        bool "Intel P state control"
        bool "Intel P state control"
        depends on X86
        depends on X86
+       select ACPI_PROCESSOR if ACPI
        help
        help
           This driver provides a P state for Intel core processors.
           This driver provides a P state for Intel core processors.
 	  The driver implements an internal governor and will become
 	  The driver implements an internal governor and will become

+ 0 - 7
drivers/cpufreq/cpufreq.c

@@ -843,18 +843,11 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 
 
 	down_write(&policy->rwsem);
 	down_write(&policy->rwsem);
 
 
-	/* Updating inactive policies is invalid, so avoid doing that. */
-	if (unlikely(policy_is_inactive(policy))) {
-		ret = -EBUSY;
-		goto unlock_policy_rwsem;
-	}
-
 	if (fattr->store)
 	if (fattr->store)
 		ret = fattr->store(policy, buf, count);
 		ret = fattr->store(policy, buf, count);
 	else
 	else
 		ret = -EIO;
 		ret = -EIO;
 
 
-unlock_policy_rwsem:
 	up_write(&policy->rwsem);
 	up_write(&policy->rwsem);
 unlock:
 unlock:
 	put_online_cpus();
 	put_online_cpus();

+ 18 - 13
drivers/cpufreq/cpufreq_conservative.c

@@ -23,6 +23,19 @@
 
 
 static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info);
 static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info);
 
 
+static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy,
+				   unsigned int event);
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_conservative = {
+	.name			= "conservative",
+	.governor		= cs_cpufreq_governor_dbs,
+	.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
+	.owner			= THIS_MODULE,
+};
+
 static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners,
 static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners,
 					   struct cpufreq_policy *policy)
 					   struct cpufreq_policy *policy)
 {
 {
@@ -119,12 +132,14 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 	struct cpufreq_freqs *freq = data;
 	struct cpufreq_freqs *freq = data;
 	struct cs_cpu_dbs_info_s *dbs_info =
 	struct cs_cpu_dbs_info_s *dbs_info =
 					&per_cpu(cs_cpu_dbs_info, freq->cpu);
 					&per_cpu(cs_cpu_dbs_info, freq->cpu);
-	struct cpufreq_policy *policy;
+	struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu);
 
 
-	if (!dbs_info->enable)
+	if (!policy)
 		return 0;
 		return 0;
 
 
-	policy = dbs_info->cdbs.shared->policy;
+	/* policy isn't governed by conservative governor */
+	if (policy->governor != &cpufreq_gov_conservative)
+		return 0;
 
 
 	/*
 	/*
 	 * we only care if our internally tracked freq moves outside the 'valid'
 	 * we only care if our internally tracked freq moves outside the 'valid'
@@ -367,16 +382,6 @@ static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy,
 	return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event);
 	return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event);
 }
 }
 
 
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
-static
-#endif
-struct cpufreq_governor cpufreq_gov_conservative = {
-	.name			= "conservative",
-	.governor		= cs_cpufreq_governor_dbs,
-	.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
-	.owner			= THIS_MODULE,
-};
-
 static int __init cpufreq_gov_dbs_init(void)
 static int __init cpufreq_gov_dbs_init(void)
 {
 {
 	return cpufreq_register_governor(&cpufreq_gov_conservative);
 	return cpufreq_register_governor(&cpufreq_gov_conservative);

+ 1 - 11
drivers/cpufreq/cpufreq_governor.c

@@ -463,7 +463,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 			cdata->get_cpu_dbs_info_s(cpu);
 			cdata->get_cpu_dbs_info_s(cpu);
 
 
 		cs_dbs_info->down_skip = 0;
 		cs_dbs_info->down_skip = 0;
-		cs_dbs_info->enable = 1;
 		cs_dbs_info->requested_freq = policy->cur;
 		cs_dbs_info->requested_freq = policy->cur;
 	} else {
 	} else {
 		struct od_ops *od_ops = cdata->gov_ops;
 		struct od_ops *od_ops = cdata->gov_ops;
@@ -482,9 +481,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 static int cpufreq_governor_stop(struct cpufreq_policy *policy,
 static int cpufreq_governor_stop(struct cpufreq_policy *policy,
 				 struct dbs_data *dbs_data)
 				 struct dbs_data *dbs_data)
 {
 {
-	struct common_dbs_data *cdata = dbs_data->cdata;
-	unsigned int cpu = policy->cpu;
-	struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
+	struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu);
 	struct cpu_common_dbs_info *shared = cdbs->shared;
 	struct cpu_common_dbs_info *shared = cdbs->shared;
 
 
 	/* State should be equivalent to START */
 	/* State should be equivalent to START */
@@ -493,13 +490,6 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy,
 
 
 	gov_cancel_work(dbs_data, policy);
 	gov_cancel_work(dbs_data, policy);
 
 
-	if (cdata->governor == GOV_CONSERVATIVE) {
-		struct cs_cpu_dbs_info_s *cs_dbs_info =
-			cdata->get_cpu_dbs_info_s(cpu);
-
-		cs_dbs_info->enable = 0;
-	}
-
 	shared->policy = NULL;
 	shared->policy = NULL;
 	mutex_destroy(&shared->timer_mutex);
 	mutex_destroy(&shared->timer_mutex);
 	return 0;
 	return 0;

+ 0 - 1
drivers/cpufreq/cpufreq_governor.h

@@ -170,7 +170,6 @@ struct cs_cpu_dbs_info_s {
 	struct cpu_dbs_info cdbs;
 	struct cpu_dbs_info cdbs;
 	unsigned int down_skip;
 	unsigned int down_skip;
 	unsigned int requested_freq;
 	unsigned int requested_freq;
-	unsigned int enable:1;
 };
 };
 
 
 /* Per policy Governors sysfs tunables */
 /* Per policy Governors sysfs tunables */

+ 45 - 5
drivers/cpufreq/imx6q-cpufreq.c

@@ -30,6 +30,10 @@ static struct clk *pll1_sw_clk;
 static struct clk *step_clk;
 static struct clk *step_clk;
 static struct clk *pll2_pfd2_396m_clk;
 static struct clk *pll2_pfd2_396m_clk;
 
 
+/* clk used by i.MX6UL */
+static struct clk *pll2_bus_clk;
+static struct clk *secondary_sel_clk;
+
 static struct device *cpu_dev;
 static struct device *cpu_dev;
 static bool free_opp;
 static bool free_opp;
 static struct cpufreq_frequency_table *freq_table;
 static struct cpufreq_frequency_table *freq_table;
@@ -91,16 +95,36 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index)
 	 * The setpoints are selected per PLL/PDF frequencies, so we need to
 	 * The setpoints are selected per PLL/PDF frequencies, so we need to
 	 * reprogram PLL for frequency scaling.  The procedure of reprogramming
 	 * reprogram PLL for frequency scaling.  The procedure of reprogramming
 	 * PLL1 is as below.
 	 * PLL1 is as below.
-	 *
+	 * For i.MX6UL, it has a secondary clk mux, the cpu frequency change
+	 * flow is slightly different from other i.MX6 OSC.
+	 * The cpu frequeny change flow for i.MX6(except i.MX6UL) is as below:
 	 *  - Enable pll2_pfd2_396m_clk and reparent pll1_sw_clk to it
 	 *  - Enable pll2_pfd2_396m_clk and reparent pll1_sw_clk to it
 	 *  - Reprogram pll1_sys_clk and reparent pll1_sw_clk back to it
 	 *  - Reprogram pll1_sys_clk and reparent pll1_sw_clk back to it
 	 *  - Disable pll2_pfd2_396m_clk
 	 *  - Disable pll2_pfd2_396m_clk
 	 */
 	 */
-	clk_set_parent(step_clk, pll2_pfd2_396m_clk);
-	clk_set_parent(pll1_sw_clk, step_clk);
-	if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) {
-		clk_set_rate(pll1_sys_clk, new_freq * 1000);
+	if (of_machine_is_compatible("fsl,imx6ul")) {
+		/*
+		 * When changing pll1_sw_clk's parent to pll1_sys_clk,
+		 * CPU may run at higher than 528MHz, this will lead to
+		 * the system unstable if the voltage is lower than the
+		 * voltage of 528MHz, so lower the CPU frequency to one
+		 * half before changing CPU frequency.
+		 */
+		clk_set_rate(arm_clk, (old_freq >> 1) * 1000);
 		clk_set_parent(pll1_sw_clk, pll1_sys_clk);
 		clk_set_parent(pll1_sw_clk, pll1_sys_clk);
+		if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk))
+			clk_set_parent(secondary_sel_clk, pll2_bus_clk);
+		else
+			clk_set_parent(secondary_sel_clk, pll2_pfd2_396m_clk);
+		clk_set_parent(step_clk, secondary_sel_clk);
+		clk_set_parent(pll1_sw_clk, step_clk);
+	} else {
+		clk_set_parent(step_clk, pll2_pfd2_396m_clk);
+		clk_set_parent(pll1_sw_clk, step_clk);
+		if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) {
+			clk_set_rate(pll1_sys_clk, new_freq * 1000);
+			clk_set_parent(pll1_sw_clk, pll1_sys_clk);
+		}
 	}
 	}
 
 
 	/* Ensure the arm clock divider is what we expect */
 	/* Ensure the arm clock divider is what we expect */
@@ -186,6 +210,16 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 		goto put_clk;
 		goto put_clk;
 	}
 	}
 
 
+	if (of_machine_is_compatible("fsl,imx6ul")) {
+		pll2_bus_clk = clk_get(cpu_dev, "pll2_bus");
+		secondary_sel_clk = clk_get(cpu_dev, "secondary_sel");
+		if (IS_ERR(pll2_bus_clk) || IS_ERR(secondary_sel_clk)) {
+			dev_err(cpu_dev, "failed to get clocks specific to imx6ul\n");
+			ret = -ENOENT;
+			goto put_clk;
+		}
+	}
+
 	arm_reg = regulator_get(cpu_dev, "arm");
 	arm_reg = regulator_get(cpu_dev, "arm");
 	pu_reg = regulator_get_optional(cpu_dev, "pu");
 	pu_reg = regulator_get_optional(cpu_dev, "pu");
 	soc_reg = regulator_get(cpu_dev, "soc");
 	soc_reg = regulator_get(cpu_dev, "soc");
@@ -331,6 +365,10 @@ put_clk:
 		clk_put(step_clk);
 		clk_put(step_clk);
 	if (!IS_ERR(pll2_pfd2_396m_clk))
 	if (!IS_ERR(pll2_pfd2_396m_clk))
 		clk_put(pll2_pfd2_396m_clk);
 		clk_put(pll2_pfd2_396m_clk);
+	if (!IS_ERR(pll2_bus_clk))
+		clk_put(pll2_bus_clk);
+	if (!IS_ERR(secondary_sel_clk))
+		clk_put(secondary_sel_clk);
 	of_node_put(np);
 	of_node_put(np);
 	return ret;
 	return ret;
 }
 }
@@ -350,6 +388,8 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev)
 	clk_put(pll1_sw_clk);
 	clk_put(pll1_sw_clk);
 	clk_put(step_clk);
 	clk_put(step_clk);
 	clk_put(pll2_pfd2_396m_clk);
 	clk_put(pll2_pfd2_396m_clk);
+	clk_put(pll2_bus_clk);
+	clk_put(secondary_sel_clk);
 
 
 	return 0;
 	return 0;
 }
 }

+ 2 - 0
drivers/cpufreq/integrator-cpufreq.c

@@ -221,6 +221,8 @@ static const struct of_device_id integrator_cpufreq_match[] = {
 	{ },
 	{ },
 };
 };
 
 
+MODULE_DEVICE_TABLE(of, integrator_cpufreq_match);
+
 static struct platform_driver integrator_cpufreq_driver = {
 static struct platform_driver integrator_cpufreq_driver = {
 	.driver = {
 	.driver = {
 		.name = "integrator-cpufreq",
 		.name = "integrator-cpufreq",

+ 266 - 10
drivers/cpufreq/intel_pstate.c

@@ -34,6 +34,10 @@
 #include <asm/cpu_device_id.h>
 #include <asm/cpu_device_id.h>
 #include <asm/cpufeature.h>
 #include <asm/cpufeature.h>
 
 
+#if IS_ENABLED(CONFIG_ACPI)
+#include <acpi/processor.h>
+#endif
+
 #define BYT_RATIOS		0x66a
 #define BYT_RATIOS		0x66a
 #define BYT_VIDS		0x66b
 #define BYT_VIDS		0x66b
 #define BYT_TURBO_RATIOS	0x66c
 #define BYT_TURBO_RATIOS	0x66c
@@ -43,7 +47,6 @@
 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
 #define fp_toint(X) ((X) >> FRAC_BITS)
 #define fp_toint(X) ((X) >> FRAC_BITS)
 
 
-
 static inline int32_t mul_fp(int32_t x, int32_t y)
 static inline int32_t mul_fp(int32_t x, int32_t y)
 {
 {
 	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
 	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
@@ -78,6 +81,7 @@ struct pstate_data {
 	int	current_pstate;
 	int	current_pstate;
 	int	min_pstate;
 	int	min_pstate;
 	int	max_pstate;
 	int	max_pstate;
+	int	max_pstate_physical;
 	int	scaling;
 	int	scaling;
 	int	turbo_pstate;
 	int	turbo_pstate;
 };
 };
@@ -113,6 +117,9 @@ struct cpudata {
 	u64	prev_mperf;
 	u64	prev_mperf;
 	u64	prev_tsc;
 	u64	prev_tsc;
 	struct sample sample;
 	struct sample sample;
+#if IS_ENABLED(CONFIG_ACPI)
+	struct acpi_processor_performance acpi_perf_data;
+#endif
 };
 };
 
 
 static struct cpudata **all_cpu_data;
 static struct cpudata **all_cpu_data;
@@ -127,6 +134,7 @@ struct pstate_adjust_policy {
 
 
 struct pstate_funcs {
 struct pstate_funcs {
 	int (*get_max)(void);
 	int (*get_max)(void);
+	int (*get_max_physical)(void);
 	int (*get_min)(void);
 	int (*get_min)(void);
 	int (*get_turbo)(void);
 	int (*get_turbo)(void);
 	int (*get_scaling)(void);
 	int (*get_scaling)(void);
@@ -142,6 +150,7 @@ struct cpu_defaults {
 static struct pstate_adjust_policy pid_params;
 static struct pstate_adjust_policy pid_params;
 static struct pstate_funcs pstate_funcs;
 static struct pstate_funcs pstate_funcs;
 static int hwp_active;
 static int hwp_active;
+static int no_acpi_perf;
 
 
 struct perf_limits {
 struct perf_limits {
 	int no_turbo;
 	int no_turbo;
@@ -154,6 +163,8 @@ struct perf_limits {
 	int max_sysfs_pct;
 	int max_sysfs_pct;
 	int min_policy_pct;
 	int min_policy_pct;
 	int min_sysfs_pct;
 	int min_sysfs_pct;
+	int max_perf_ctl;
+	int min_perf_ctl;
 };
 };
 
 
 static struct perf_limits limits = {
 static struct perf_limits limits = {
@@ -167,8 +178,157 @@ static struct perf_limits limits = {
 	.max_sysfs_pct = 100,
 	.max_sysfs_pct = 100,
 	.min_policy_pct = 0,
 	.min_policy_pct = 0,
 	.min_sysfs_pct = 0,
 	.min_sysfs_pct = 0,
+	.max_perf_ctl = 0,
+	.min_perf_ctl = 0,
 };
 };
 
 
+#if IS_ENABLED(CONFIG_ACPI)
+/*
+ * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
+ * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
+ * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state
+ * ratio, out of it only high 8 bits are used. For example 0x1700 is setting
+ * target ratio 0x17. The _PSS control value stores in a format which can be
+ * directly written to PERF_CTL MSR. But in intel_pstate driver this shift
+ * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()).
+ * This function converts the _PSS control value to intel pstate driver format
+ * for comparison and assignment.
+ */
+static int convert_to_native_pstate_format(struct cpudata *cpu, int index)
+{
+	return cpu->acpi_perf_data.states[index].control >> 8;
+}
+
+static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy)
+{
+	struct cpudata *cpu;
+	int ret;
+	bool turbo_absent = false;
+	int max_pstate_index;
+	int min_pss_ctl, max_pss_ctl, turbo_pss_ctl;
+	int i;
+
+	cpu = all_cpu_data[policy->cpu];
+
+	pr_debug("intel_pstate: default limits 0x%x 0x%x 0x%x\n",
+		 cpu->pstate.min_pstate, cpu->pstate.max_pstate,
+		 cpu->pstate.turbo_pstate);
+
+	if (!cpu->acpi_perf_data.shared_cpu_map &&
+	    zalloc_cpumask_var_node(&cpu->acpi_perf_data.shared_cpu_map,
+				    GFP_KERNEL, cpu_to_node(policy->cpu))) {
+		return -ENOMEM;
+	}
+
+	ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
+						  policy->cpu);
+	if (ret)
+		return ret;
+
+	/*
+	 * Check if the control value in _PSS is for PERF_CTL MSR, which should
+	 * guarantee that the states returned by it map to the states in our
+	 * list directly.
+	 */
+	if (cpu->acpi_perf_data.control_register.space_id !=
+						ACPI_ADR_SPACE_FIXED_HARDWARE)
+		return -EIO;
+
+	pr_debug("intel_pstate: CPU%u - ACPI _PSS perf data\n", policy->cpu);
+	for (i = 0; i < cpu->acpi_perf_data.state_count; i++)
+		pr_debug("     %cP%d: %u MHz, %u mW, 0x%x\n",
+			 (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
+			 (u32) cpu->acpi_perf_data.states[i].core_frequency,
+			 (u32) cpu->acpi_perf_data.states[i].power,
+			 (u32) cpu->acpi_perf_data.states[i].control);
+
+	/*
+	 * If there is only one entry _PSS, simply ignore _PSS and continue as
+	 * usual without taking _PSS into account
+	 */
+	if (cpu->acpi_perf_data.state_count < 2)
+		return 0;
+
+	turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0);
+	min_pss_ctl = convert_to_native_pstate_format(cpu,
+					cpu->acpi_perf_data.state_count - 1);
+	/* Check if there is a turbo freq in _PSS */
+	if (turbo_pss_ctl <= cpu->pstate.max_pstate &&
+	    turbo_pss_ctl > cpu->pstate.min_pstate) {
+		pr_debug("intel_pstate: no turbo range exists in _PSS\n");
+		limits.no_turbo = limits.turbo_disabled = 1;
+		cpu->pstate.turbo_pstate = cpu->pstate.max_pstate;
+		turbo_absent = true;
+	}
+
+	/* Check if the max non turbo p state < Intel P state max */
+	max_pstate_index = turbo_absent ? 0 : 1;
+	max_pss_ctl = convert_to_native_pstate_format(cpu, max_pstate_index);
+	if (max_pss_ctl < cpu->pstate.max_pstate &&
+	    max_pss_ctl > cpu->pstate.min_pstate)
+		cpu->pstate.max_pstate = max_pss_ctl;
+
+	/* check If min perf > Intel P State min */
+	if (min_pss_ctl > cpu->pstate.min_pstate &&
+	    min_pss_ctl < cpu->pstate.max_pstate) {
+		cpu->pstate.min_pstate = min_pss_ctl;
+		policy->cpuinfo.min_freq = min_pss_ctl * cpu->pstate.scaling;
+	}
+
+	if (turbo_absent)
+		policy->cpuinfo.max_freq = cpu->pstate.max_pstate *
+						cpu->pstate.scaling;
+	else {
+		policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate *
+						cpu->pstate.scaling;
+		/*
+		 * The _PSS table doesn't contain whole turbo frequency range.
+		 * This just contains +1 MHZ above the max non turbo frequency,
+		 * with control value corresponding to max turbo ratio. But
+		 * when cpufreq set policy is called, it will call with this
+		 * max frequency, which will cause a reduced performance as
+		 * this driver uses real max turbo frequency as the max
+		 * frequeny. So correct this frequency in _PSS table to
+		 * correct max turbo frequency based on the turbo ratio.
+		 * Also need to convert to MHz as _PSS freq is in MHz.
+		 */
+		cpu->acpi_perf_data.states[0].core_frequency =
+						turbo_pss_ctl * 100;
+	}
+
+	pr_debug("intel_pstate: Updated limits using _PSS 0x%x 0x%x 0x%x\n",
+		 cpu->pstate.min_pstate, cpu->pstate.max_pstate,
+		 cpu->pstate.turbo_pstate);
+	pr_debug("intel_pstate: policy max_freq=%d Khz min_freq = %d KHz\n",
+		 policy->cpuinfo.max_freq, policy->cpuinfo.min_freq);
+
+	return 0;
+}
+
+static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+{
+	struct cpudata *cpu;
+
+	if (!no_acpi_perf)
+		return 0;
+
+	cpu = all_cpu_data[policy->cpu];
+	acpi_processor_unregister_performance(policy->cpu);
+	return 0;
+}
+
+#else
+static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy)
+{
+	return 0;
+}
+
+static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+{
+	return 0;
+}
+#endif
+
 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
 			     int deadband, int integral) {
 			     int deadband, int integral) {
 	pid->setpoint = setpoint;
 	pid->setpoint = setpoint;
@@ -591,7 +751,7 @@ static int core_get_min_pstate(void)
 	return (value >> 40) & 0xFF;
 	return (value >> 40) & 0xFF;
 }
 }
 
 
-static int core_get_max_pstate(void)
+static int core_get_max_pstate_physical(void)
 {
 {
 	u64 value;
 	u64 value;
 
 
@@ -599,6 +759,46 @@ static int core_get_max_pstate(void)
 	return (value >> 8) & 0xFF;
 	return (value >> 8) & 0xFF;
 }
 }
 
 
+static int core_get_max_pstate(void)
+{
+	u64 tar;
+	u64 plat_info;
+	int max_pstate;
+	int err;
+
+	rdmsrl(MSR_PLATFORM_INFO, plat_info);
+	max_pstate = (plat_info >> 8) & 0xFF;
+
+	err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
+	if (!err) {
+		/* Do some sanity checking for safety */
+		if (plat_info & 0x600000000) {
+			u64 tdp_ctrl;
+			u64 tdp_ratio;
+			int tdp_msr;
+
+			err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
+			if (err)
+				goto skip_tar;
+
+			tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl;
+			err = rdmsrl_safe(tdp_msr, &tdp_ratio);
+			if (err)
+				goto skip_tar;
+
+			if (tdp_ratio - 1 == tar) {
+				max_pstate = tar;
+				pr_debug("max_pstate=TAC %x\n", max_pstate);
+			} else {
+				goto skip_tar;
+			}
+		}
+	}
+
+skip_tar:
+	return max_pstate;
+}
+
 static int core_get_turbo_pstate(void)
 static int core_get_turbo_pstate(void)
 {
 {
 	u64 value;
 	u64 value;
@@ -652,6 +852,7 @@ static struct cpu_defaults core_params = {
 	},
 	},
 	.funcs = {
 	.funcs = {
 		.get_max = core_get_max_pstate,
 		.get_max = core_get_max_pstate,
+		.get_max_physical = core_get_max_pstate_physical,
 		.get_min = core_get_min_pstate,
 		.get_min = core_get_min_pstate,
 		.get_turbo = core_get_turbo_pstate,
 		.get_turbo = core_get_turbo_pstate,
 		.get_scaling = core_get_scaling,
 		.get_scaling = core_get_scaling,
@@ -670,6 +871,7 @@ static struct cpu_defaults byt_params = {
 	},
 	},
 	.funcs = {
 	.funcs = {
 		.get_max = byt_get_max_pstate,
 		.get_max = byt_get_max_pstate,
+		.get_max_physical = byt_get_max_pstate,
 		.get_min = byt_get_min_pstate,
 		.get_min = byt_get_min_pstate,
 		.get_turbo = byt_get_turbo_pstate,
 		.get_turbo = byt_get_turbo_pstate,
 		.set = byt_set_pstate,
 		.set = byt_set_pstate,
@@ -689,6 +891,7 @@ static struct cpu_defaults knl_params = {
 	},
 	},
 	.funcs = {
 	.funcs = {
 		.get_max = core_get_max_pstate,
 		.get_max = core_get_max_pstate,
+		.get_max_physical = core_get_max_pstate_physical,
 		.get_min = core_get_min_pstate,
 		.get_min = core_get_min_pstate,
 		.get_turbo = knl_get_turbo_pstate,
 		.get_turbo = knl_get_turbo_pstate,
 		.get_scaling = core_get_scaling,
 		.get_scaling = core_get_scaling,
@@ -710,12 +913,23 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 	 * policy, or by cpu specific default values determined through
 	 * policy, or by cpu specific default values determined through
 	 * experimentation.
 	 * experimentation.
 	 */
 	 */
-	max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
-	*max = clamp_t(int, max_perf_adj,
-			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
+	if (limits.max_perf_ctl && limits.max_sysfs_pct >=
+						limits.max_policy_pct) {
+		*max = limits.max_perf_ctl;
+	} else {
+		max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf),
+					limits.max_perf));
+		*max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate,
+			       cpu->pstate.turbo_pstate);
+	}
 
 
-	min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf));
-	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
+	if (limits.min_perf_ctl) {
+		*min = limits.min_perf_ctl;
+	} else {
+		min_perf = fp_toint(mul_fp(int_tofp(max_perf),
+				    limits.min_perf));
+		*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
+	}
 }
 }
 
 
 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
@@ -743,6 +957,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 {
 {
 	cpu->pstate.min_pstate = pstate_funcs.get_min();
 	cpu->pstate.min_pstate = pstate_funcs.get_min();
 	cpu->pstate.max_pstate = pstate_funcs.get_max();
 	cpu->pstate.max_pstate = pstate_funcs.get_max();
+	cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
 	cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
 	cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
 	cpu->pstate.scaling = pstate_funcs.get_scaling();
 	cpu->pstate.scaling = pstate_funcs.get_scaling();
 
 
@@ -761,7 +976,8 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu)
 
 
 	sample->freq = fp_toint(
 	sample->freq = fp_toint(
 		mul_fp(int_tofp(
 		mul_fp(int_tofp(
-			cpu->pstate.max_pstate * cpu->pstate.scaling / 100),
+			cpu->pstate.max_pstate_physical *
+			cpu->pstate.scaling / 100),
 			core_pct));
 			core_pct));
 
 
 	sample->core_pct_busy = (int32_t)core_pct;
 	sample->core_pct_busy = (int32_t)core_pct;
@@ -834,7 +1050,7 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
 	 * specified pstate.
 	 * specified pstate.
 	 */
 	 */
 	core_busy = cpu->sample.core_pct_busy;
 	core_busy = cpu->sample.core_pct_busy;
-	max_pstate = int_tofp(cpu->pstate.max_pstate);
+	max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
 	current_pstate = int_tofp(cpu->pstate.current_pstate);
 	current_pstate = int_tofp(cpu->pstate.current_pstate);
 	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 
 
@@ -988,6 +1204,12 @@ static unsigned int intel_pstate_get(unsigned int cpu_num)
 
 
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
 {
+#if IS_ENABLED(CONFIG_ACPI)
+	struct cpudata *cpu;
+	int i;
+#endif
+	pr_debug("intel_pstate: %s max %u policy->max %u\n", __func__,
+		 policy->cpuinfo.max_freq, policy->max);
 	if (!policy->cpuinfo.max_freq)
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
 		return -ENODEV;
 
 
@@ -1000,6 +1222,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 		limits.max_perf_pct = 100;
 		limits.max_perf_pct = 100;
 		limits.max_perf = int_tofp(1);
 		limits.max_perf = int_tofp(1);
 		limits.no_turbo = 0;
 		limits.no_turbo = 0;
+		limits.max_perf_ctl = 0;
+		limits.min_perf_ctl = 0;
 		return 0;
 		return 0;
 	}
 	}
 
 
@@ -1020,6 +1244,23 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
 	limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
 	limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
 	limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
 
 
+#if IS_ENABLED(CONFIG_ACPI)
+	cpu = all_cpu_data[policy->cpu];
+	for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
+		int control;
+
+		control = convert_to_native_pstate_format(cpu, i);
+		if (control * cpu->pstate.scaling == policy->max)
+			limits.max_perf_ctl = control;
+		if (control * cpu->pstate.scaling == policy->min)
+			limits.min_perf_ctl = control;
+	}
+
+	pr_debug("intel_pstate: max %u policy_max %u perf_ctl [0x%x-0x%x]\n",
+		 policy->cpuinfo.max_freq, policy->max, limits.min_perf_ctl,
+		 limits.max_perf_ctl);
+#endif
+
 	if (hwp_active)
 	if (hwp_active)
 		intel_pstate_hwp_set();
 		intel_pstate_hwp_set();
 
 
@@ -1074,18 +1315,30 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->cpuinfo.max_freq =
 	policy->cpuinfo.max_freq =
 		cpu->pstate.turbo_pstate * cpu->pstate.scaling;
 		cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+	if (!no_acpi_perf)
+		intel_pstate_init_perf_limits(policy);
+	/*
+	 * If there is no acpi perf data or error, we ignore and use Intel P
+	 * state calculated limits, So this is not fatal error.
+	 */
 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 	cpumask_set_cpu(policy->cpu, policy->cpus);
 	cpumask_set_cpu(policy->cpu, policy->cpus);
 
 
 	return 0;
 	return 0;
 }
 }
 
 
+static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
+{
+	return intel_pstate_exit_perf_limits(policy);
+}
+
 static struct cpufreq_driver intel_pstate_driver = {
 static struct cpufreq_driver intel_pstate_driver = {
 	.flags		= CPUFREQ_CONST_LOOPS,
 	.flags		= CPUFREQ_CONST_LOOPS,
 	.verify		= intel_pstate_verify_policy,
 	.verify		= intel_pstate_verify_policy,
 	.setpolicy	= intel_pstate_set_policy,
 	.setpolicy	= intel_pstate_set_policy,
 	.get		= intel_pstate_get,
 	.get		= intel_pstate_get,
 	.init		= intel_pstate_cpu_init,
 	.init		= intel_pstate_cpu_init,
+	.exit		= intel_pstate_cpu_exit,
 	.stop_cpu	= intel_pstate_stop_cpu,
 	.stop_cpu	= intel_pstate_stop_cpu,
 	.name		= "intel_pstate",
 	.name		= "intel_pstate",
 };
 };
@@ -1118,6 +1371,7 @@ static void copy_pid_params(struct pstate_adjust_policy *policy)
 static void copy_cpu_funcs(struct pstate_funcs *funcs)
 static void copy_cpu_funcs(struct pstate_funcs *funcs)
 {
 {
 	pstate_funcs.get_max   = funcs->get_max;
 	pstate_funcs.get_max   = funcs->get_max;
+	pstate_funcs.get_max_physical = funcs->get_max_physical;
 	pstate_funcs.get_min   = funcs->get_min;
 	pstate_funcs.get_min   = funcs->get_min;
 	pstate_funcs.get_turbo = funcs->get_turbo;
 	pstate_funcs.get_turbo = funcs->get_turbo;
 	pstate_funcs.get_scaling = funcs->get_scaling;
 	pstate_funcs.get_scaling = funcs->get_scaling;
@@ -1126,7 +1380,6 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
 }
 }
 
 
 #if IS_ENABLED(CONFIG_ACPI)
 #if IS_ENABLED(CONFIG_ACPI)
-#include <acpi/processor.h>
 
 
 static bool intel_pstate_no_acpi_pss(void)
 static bool intel_pstate_no_acpi_pss(void)
 {
 {
@@ -1318,6 +1571,9 @@ static int __init intel_pstate_setup(char *str)
 		force_load = 1;
 		force_load = 1;
 	if (!strcmp(str, "hwp_only"))
 	if (!strcmp(str, "hwp_only"))
 		hwp_only = 1;
 		hwp_only = 1;
+	if (!strcmp(str, "no_acpi"))
+		no_acpi_perf = 1;
+
 	return 0;
 	return 0;
 }
 }
 early_param("intel_pstate", intel_pstate_setup);
 early_param("intel_pstate", intel_pstate_setup);

+ 8 - 2
drivers/cpufreq/powernv-cpufreq.c

@@ -327,8 +327,14 @@ static void powernv_cpufreq_throttle_check(void *data)
 		if (chips[i].throttled)
 		if (chips[i].throttled)
 			goto next;
 			goto next;
 		chips[i].throttled = true;
 		chips[i].throttled = true;
-		pr_info("CPU %d on Chip %u has Pmax reduced to %d\n", cpu,
-			chips[i].id, pmsr_pmax);
+		if (pmsr_pmax < powernv_pstate_info.nominal)
+			pr_crit("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n",
+				cpu, chips[i].id, pmsr_pmax,
+				powernv_pstate_info.nominal);
+		else
+			pr_info("CPU %d on Chip %u has Pmax reduced below turbo frequency (%d < %d)\n",
+				cpu, chips[i].id, pmsr_pmax,
+				powernv_pstate_info.max);
 	} else if (chips[i].throttled) {
 	} else if (chips[i].throttled) {
 		chips[i].throttled = false;
 		chips[i].throttled = false;
 		pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu,
 		pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu,

+ 0 - 2
drivers/cpufreq/tegra20-cpufreq.c

@@ -175,9 +175,7 @@ static struct cpufreq_driver tegra_cpufreq_driver = {
 	.exit			= tegra_cpu_exit,
 	.exit			= tegra_cpu_exit,
 	.name			= "tegra",
 	.name			= "tegra",
 	.attr			= cpufreq_generic_attr,
 	.attr			= cpufreq_generic_attr,
-#ifdef CONFIG_PM
 	.suspend		= cpufreq_generic_suspend,
 	.suspend		= cpufreq_generic_suspend,
-#endif
 };
 };
 
 
 static int __init tegra_cpufreq_init(void)
 static int __init tegra_cpufreq_init(void)