Эх сурвалжийг харах

Merge back earlier cpufreq material for v4.5.

Rafael J. Wysocki 9 жил өмнө
parent
commit
9ad55cd9e6

+ 2 - 2
Documentation/cpu-freq/pcc-cpufreq.txt

@@ -159,8 +159,8 @@ to be strictly associated with a P-state.
 
 
 2.2 cpuinfo_transition_latency:
 2.2 cpuinfo_transition_latency:
 -------------------------------
 -------------------------------
-The cpuinfo_transition_latency field is 0. The PCC specification does
-not include a field to expose this value currently.
+The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification
+does not include a field to expose this value currently.
 
 
 2.3 cpuinfo_cur_freq:
 2.3 cpuinfo_cur_freq:
 ---------------------
 ---------------------

+ 17 - 0
Documentation/devicetree/bindings/arm/cpus.txt

@@ -242,6 +242,23 @@ nodes to be present and contain the properties described below.
 		Definition: Specifies the syscon node controlling the cpu core
 		Definition: Specifies the syscon node controlling the cpu core
 			    power domains.
 			    power domains.
 
 
+	- dynamic-power-coefficient
+		Usage: optional
+		Value type: <prop-encoded-array>
+		Definition: A u32 value that represents the running time dynamic
+			    power coefficient in units of mW/MHz/uVolt^2. The
+			    coefficient can either be calculated from power
+			    measurements or derived by analysis.
+
+			    The dynamic power consumption of the CPU  is
+			    proportional to the square of the Voltage (V) and
+			    the clock frequency (f). The coefficient is used to
+			    calculate the dynamic power as below -
+
+			    Pdyn = dynamic-power-coefficient * V^2 * f
+
+			    where voltage is in uV, frequency is in MHz.
+
 Example 1 (dual-cluster big.LITTLE system 32-bit):
 Example 1 (dual-cluster big.LITTLE system 32-bit):
 
 
 	cpus {
 	cpus {

+ 91 - 0
Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt

@@ -0,0 +1,91 @@
+Binding for ST's CPUFreq driver
+===============================
+
+ST's CPUFreq driver attempts to read 'process' and 'version' attributes
+from the SoC, then supplies the OPP framework with 'prop' and 'supported
+hardware' information respectively.  The framework is then able to read
+the DT and operate in the usual way.
+
+For more information about the expected DT format [See: ../opp/opp.txt].
+
+Frequency Scaling only
+----------------------
+
+No vendor specific driver required for this.
+
+Located in CPU's node:
+
+- operating-points		: [See: ../power/opp.txt]
+
+Example [safe]
+--------------
+
+cpus {
+	cpu@0 {
+				 /* kHz     uV   */
+		operating-points = <1500000 0
+				    1200000 0
+				    800000  0
+				    500000  0>;
+	};
+};
+
+Dynamic Voltage and Frequency Scaling (DVFS)
+--------------------------------------------
+
+This requires the ST CPUFreq driver to supply 'process' and 'version' info.
+
+Located in CPU's node:
+
+- operating-points-v2		: [See ../power/opp.txt]
+
+Example [unsafe]
+----------------
+
+cpus {
+	cpu@0 {
+		operating-points-v2	= <&cpu0_opp_table>;
+	};
+};
+
+cpu0_opp_table: opp_table {
+	compatible = "operating-points-v2";
+
+	/* ############################################################### */
+	/* # WARNING: Do not attempt to copy/replicate these nodes,      # */
+	/* #          they are only to be supplied by the bootloader !!! # */
+	/* ############################################################### */
+	opp0 {
+		/*			   Major       Minor       Substrate */
+		/*			   2           all         all       */
+		opp-supported-hw	= <0x00000004  0xffffffff  0xffffffff>;
+		opp-hz			= /bits/ 64 <1500000000>;
+		clock-latency-ns	= <10000000>;
+
+		opp-microvolt-pcode0	= <1200000>;
+		opp-microvolt-pcode1	= <1200000>;
+		opp-microvolt-pcode2	= <1200000>;
+		opp-microvolt-pcode3	= <1200000>;
+		opp-microvolt-pcode4	= <1170000>;
+		opp-microvolt-pcode5	= <1140000>;
+		opp-microvolt-pcode6	= <1100000>;
+		opp-microvolt-pcode7	= <1070000>;
+	};
+
+	opp1 {
+		/*			   Major       Minor       Substrate */
+		/*			   all         all         all       */
+		opp-supported-hw	= <0xffffffff  0xffffffff  0xffffffff>;
+		opp-hz			= /bits/ 64 <1200000000>;
+		clock-latency-ns	= <10000000>;
+
+		opp-microvolt-pcode0	= <1110000>;
+		opp-microvolt-pcode1	= <1150000>;
+		opp-microvolt-pcode2	= <1100000>;
+		opp-microvolt-pcode3	= <1080000>;
+		opp-microvolt-pcode4	= <1040000>;
+		opp-microvolt-pcode5	= <1020000>;
+		opp-microvolt-pcode6	= <980000>;
+		opp-microvolt-pcode7	= <930000>;
+	};
+};

+ 93 - 39
Documentation/devicetree/bindings/opp/opp.txt

@@ -45,21 +45,10 @@ Devices supporting OPPs must set their "operating-points-v2" property with
 phandle to a OPP table in their DT node. The OPP core will use this phandle to
 phandle to a OPP table in their DT node. The OPP core will use this phandle to
 find the operating points for the device.
 find the operating points for the device.
 
 
-Devices may want to choose OPP tables at runtime and so can provide a list of
-phandles here. But only *one* of them should be chosen at runtime. This must be
-accompanied by a corresponding "operating-points-names" property, to uniquely
-identify the OPP tables.
-
 If required, this can be extended for SoC vendor specfic bindings. Such bindings
 If required, this can be extended for SoC vendor specfic bindings. Such bindings
 should be documented as Documentation/devicetree/bindings/power/<vendor>-opp.txt
 should be documented as Documentation/devicetree/bindings/power/<vendor>-opp.txt
 and should have a compatible description like: "operating-points-v2-<vendor>".
 and should have a compatible description like: "operating-points-v2-<vendor>".
 
 
-Optional properties:
-- operating-points-names: Names of OPP tables (required if multiple OPP
-  tables are present), to uniquely identify them. The same list must be present
-  for all the CPUs which are sharing clock/voltage rails and hence the OPP
-  tables.
-
 * OPP Table Node
 * OPP Table Node
 
 
 This describes the OPPs belonging to a device. This node can have following
 This describes the OPPs belonging to a device. This node can have following
@@ -100,6 +89,14 @@ Optional properties:
   Entries for multiple regulators must be present in the same order as
   Entries for multiple regulators must be present in the same order as
   regulators are specified in device's DT node.
   regulators are specified in device's DT node.
 
 
+- opp-microvolt-<name>: Named opp-microvolt property. This is exactly similar to
+  the above opp-microvolt property, but allows multiple voltage ranges to be
+  provided for the same OPP. At runtime, the platform can pick a <name> and
+  matching opp-microvolt-<name> property will be enabled for all OPPs. If the
+  platform doesn't pick a specific <name> or the <name> doesn't match with any
+  opp-microvolt-<name> properties, then opp-microvolt property shall be used, if
+  present.
+
 - opp-microamp: The maximum current drawn by the device in microamperes
 - opp-microamp: The maximum current drawn by the device in microamperes
   considering system specific parameters (such as transients, process, aging,
   considering system specific parameters (such as transients, process, aging,
   maximum operating temperature range etc.) as necessary. This may be used to
   maximum operating temperature range etc.) as necessary. This may be used to
@@ -112,6 +109,9 @@ Optional properties:
   for few regulators, then this should be marked as zero for them. If it isn't
   for few regulators, then this should be marked as zero for them. If it isn't
   required for any regulator, then this property need not be present.
   required for any regulator, then this property need not be present.
 
 
+- opp-microamp-<name>: Named opp-microamp property. Similar to
+  opp-microvolt-<name> property, but for microamp instead.
+
 - clock-latency-ns: Specifies the maximum possible transition latency (in
 - clock-latency-ns: Specifies the maximum possible transition latency (in
   nanoseconds) for switching to this OPP from any other OPP.
   nanoseconds) for switching to this OPP from any other OPP.
 
 
@@ -123,6 +123,26 @@ Optional properties:
 - opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in
 - opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in
   the table should have this.
   the table should have this.
 
 
+- opp-supported-hw: This enables us to select only a subset of OPPs from the
+  larger OPP table, based on what version of the hardware we are running on. We
+  still can't have multiple nodes with the same opp-hz value in OPP table.
+
+  It's an user defined array containing a hierarchy of hardware version numbers,
+  supported by the OPP. For example: a platform with hierarchy of three levels
+  of versions (A, B and C), this field should be like <X Y Z>, where X
+  corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z
+  corresponds to version hierarchy C.
+
+  Each level of hierarchy is represented by a 32 bit value, and so there can be
+  only 32 different supported version per hierarchy. i.e. 1 bit per version. A
+  value of 0xFFFFFFFF will enable the OPP for all versions for that hierarchy
+  level. And a value of 0x00000000 will disable the OPP completely, and so we
+  never want that to happen.
+
+  If 32 values aren't sufficient for a version hierarchy, than that version
+  hierarchy can be contained in multiple 32 bit values. i.e. <X Y Z1 Z2> in the
+  above example, Z1 & Z2 refer to the version hierarchy Z.
+
 - status: Marks the node enabled/disabled.
 - status: Marks the node enabled/disabled.
 
 
 Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together.
 Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together.
@@ -157,20 +177,20 @@ Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together.
 		compatible = "operating-points-v2";
 		compatible = "operating-points-v2";
 		opp-shared;
 		opp-shared;
 
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <970000 975000 985000>;
 			opp-microvolt = <970000 975000 985000>;
 			opp-microamp = <70000>;
 			opp-microamp = <70000>;
 			clock-latency-ns = <300000>;
 			clock-latency-ns = <300000>;
 			opp-suspend;
 			opp-suspend;
 		};
 		};
-		opp01 {
+		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
 			opp-hz = /bits/ 64 <1100000000>;
 			opp-microvolt = <980000 1000000 1010000>;
 			opp-microvolt = <980000 1000000 1010000>;
 			opp-microamp = <80000>;
 			opp-microamp = <80000>;
 			clock-latency-ns = <310000>;
 			clock-latency-ns = <310000>;
 		};
 		};
-		opp02 {
+		opp@1200000000 {
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-microvolt = <1025000>;
 			opp-microvolt = <1025000>;
 			clock-latency-ns = <290000>;
 			clock-latency-ns = <290000>;
@@ -236,20 +256,20 @@ independently.
 		 * independently.
 		 * independently.
 		 */
 		 */
 
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <970000 975000 985000>;
 			opp-microvolt = <970000 975000 985000>;
 			opp-microamp = <70000>;
 			opp-microamp = <70000>;
 			clock-latency-ns = <300000>;
 			clock-latency-ns = <300000>;
 			opp-suspend;
 			opp-suspend;
 		};
 		};
-		opp01 {
+		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
 			opp-hz = /bits/ 64 <1100000000>;
 			opp-microvolt = <980000 1000000 1010000>;
 			opp-microvolt = <980000 1000000 1010000>;
 			opp-microamp = <80000>;
 			opp-microamp = <80000>;
 			clock-latency-ns = <310000>;
 			clock-latency-ns = <310000>;
 		};
 		};
-		opp02 {
+		opp@1200000000 {
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-microvolt = <1025000>;
 			opp-microvolt = <1025000>;
 			opp-microamp = <90000;
 			opp-microamp = <90000;
@@ -312,20 +332,20 @@ DVFS state together.
 		compatible = "operating-points-v2";
 		compatible = "operating-points-v2";
 		opp-shared;
 		opp-shared;
 
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <970000 975000 985000>;
 			opp-microvolt = <970000 975000 985000>;
 			opp-microamp = <70000>;
 			opp-microamp = <70000>;
 			clock-latency-ns = <300000>;
 			clock-latency-ns = <300000>;
 			opp-suspend;
 			opp-suspend;
 		};
 		};
-		opp01 {
+		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
 			opp-hz = /bits/ 64 <1100000000>;
 			opp-microvolt = <980000 1000000 1010000>;
 			opp-microvolt = <980000 1000000 1010000>;
 			opp-microamp = <80000>;
 			opp-microamp = <80000>;
 			clock-latency-ns = <310000>;
 			clock-latency-ns = <310000>;
 		};
 		};
-		opp02 {
+		opp@1200000000 {
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-microvolt = <1025000>;
 			opp-microvolt = <1025000>;
 			opp-microamp = <90000>;
 			opp-microamp = <90000>;
@@ -338,20 +358,20 @@ DVFS state together.
 		compatible = "operating-points-v2";
 		compatible = "operating-points-v2";
 		opp-shared;
 		opp-shared;
 
 
-		opp10 {
+		opp@1300000000 {
 			opp-hz = /bits/ 64 <1300000000>;
 			opp-hz = /bits/ 64 <1300000000>;
 			opp-microvolt = <1045000 1050000 1055000>;
 			opp-microvolt = <1045000 1050000 1055000>;
 			opp-microamp = <95000>;
 			opp-microamp = <95000>;
 			clock-latency-ns = <400000>;
 			clock-latency-ns = <400000>;
 			opp-suspend;
 			opp-suspend;
 		};
 		};
-		opp11 {
+		opp@1400000000 {
 			opp-hz = /bits/ 64 <1400000000>;
 			opp-hz = /bits/ 64 <1400000000>;
 			opp-microvolt = <1075000>;
 			opp-microvolt = <1075000>;
 			opp-microamp = <100000>;
 			opp-microamp = <100000>;
 			clock-latency-ns = <400000>;
 			clock-latency-ns = <400000>;
 		};
 		};
-		opp12 {
+		opp@1500000000 {
 			opp-hz = /bits/ 64 <1500000000>;
 			opp-hz = /bits/ 64 <1500000000>;
 			opp-microvolt = <1010000 1100000 1110000>;
 			opp-microvolt = <1010000 1100000 1110000>;
 			opp-microamp = <95000>;
 			opp-microamp = <95000>;
@@ -378,7 +398,7 @@ Example 4: Handling multiple regulators
 		compatible = "operating-points-v2";
 		compatible = "operating-points-v2";
 		opp-shared;
 		opp-shared;
 
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <970000>, /* Supply 0 */
 			opp-microvolt = <970000>, /* Supply 0 */
 					<960000>, /* Supply 1 */
 					<960000>, /* Supply 1 */
@@ -391,7 +411,7 @@ Example 4: Handling multiple regulators
 
 
 		/* OR */
 		/* OR */
 
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <970000 975000 985000>, /* Supply 0 */
 			opp-microvolt = <970000 975000 985000>, /* Supply 0 */
 					<960000 965000 975000>, /* Supply 1 */
 					<960000 965000 975000>, /* Supply 1 */
@@ -404,7 +424,7 @@ Example 4: Handling multiple regulators
 
 
 		/* OR */
 		/* OR */
 
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <970000 975000 985000>, /* Supply 0 */
 			opp-microvolt = <970000 975000 985000>, /* Supply 0 */
 					<960000 965000 975000>, /* Supply 1 */
 					<960000 965000 975000>, /* Supply 1 */
@@ -417,7 +437,8 @@ Example 4: Handling multiple regulators
 	};
 	};
 };
 };
 
 
-Example 5: Multiple OPP tables
+Example 5: opp-supported-hw
+(example: three level hierarchy of versions: cuts, substrate and process)
 
 
 / {
 / {
 	cpus {
 	cpus {
@@ -426,40 +447,73 @@ Example 5: Multiple OPP tables
 			...
 			...
 
 
 			cpu-supply = <&cpu_supply>
 			cpu-supply = <&cpu_supply>
-			operating-points-v2 = <&cpu0_opp_table_slow>, <&cpu0_opp_table_fast>;
-			operating-points-names = "slow", "fast";
+			operating-points-v2 = <&cpu0_opp_table_slow>;
 		};
 		};
 	};
 	};
 
 
-	cpu0_opp_table_slow: opp_table_slow {
+	opp_table {
 		compatible = "operating-points-v2";
 		compatible = "operating-points-v2";
 		status = "okay";
 		status = "okay";
 		opp-shared;
 		opp-shared;
 
 
-		opp00 {
+		opp@600000000 {
+			/*
+			 * Supports all substrate and process versions for 0xF
+			 * cuts, i.e. only first four cuts.
+			 */
+			opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF>
 			opp-hz = /bits/ 64 <600000000>;
 			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <900000 915000 925000>;
 			...
 			...
 		};
 		};
 
 
-		opp01 {
+		opp@800000000 {
+			/*
+			 * Supports:
+			 * - cuts: only one, 6th cut (represented by 6th bit).
+			 * - substrate: supports 16 different substrate versions
+			 * - process: supports 9 different process versions
+			 */
+			opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0>
 			opp-hz = /bits/ 64 <800000000>;
 			opp-hz = /bits/ 64 <800000000>;
+			opp-microvolt = <900000 915000 925000>;
 			...
 			...
 		};
 		};
 	};
 	};
+};
+
+Example 6: opp-microvolt-<name>, opp-microamp-<name>:
+(example: device with two possible microvolt ranges: slow and fast)
 
 
-	cpu0_opp_table_fast: opp_table_fast {
+/ {
+	cpus {
+		cpu@0 {
+			compatible = "arm,cortex-a7";
+			...
+
+			operating-points-v2 = <&cpu0_opp_table>;
+		};
+	};
+
+	cpu0_opp_table: opp_table0 {
 		compatible = "operating-points-v2";
 		compatible = "operating-points-v2";
-		status = "okay";
 		opp-shared;
 		opp-shared;
 
 
-		opp10 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-hz = /bits/ 64 <1000000000>;
-			...
+			opp-microvolt-slow = <900000 915000 925000>;
+			opp-microvolt-fast = <970000 975000 985000>;
+			opp-microamp-slow =  <70000>;
+			opp-microamp-fast =  <71000>;
 		};
 		};
 
 
-		opp11 {
-			opp-hz = /bits/ 64 <1100000000>;
-			...
+		opp@1200000000 {
+			opp-hz = /bits/ 64 <1200000000>;
+			opp-microvolt-slow = <900000 915000 925000>, /* Supply vcc0 */
+					      <910000 925000 935000>; /* Supply vcc1 */
+			opp-microvolt-fast = <970000 975000 985000>, /* Supply vcc0 */
+					     <960000 965000 975000>; /* Supply vcc1 */
+			opp-microamp =  <70000>; /* Will be used for both slow/fast */
 		};
 		};
 	};
 	};
 };
 };

+ 14 - 14
arch/arm/boot/dts/exynos4412.dtsi

@@ -64,73 +64,73 @@
 		compatible = "operating-points-v2";
 		compatible = "operating-points-v2";
 		opp-shared;
 		opp-shared;
 
 
-		opp00 {
+		opp@200000000 {
 			opp-hz = /bits/ 64 <200000000>;
 			opp-hz = /bits/ 64 <200000000>;
 			opp-microvolt = <900000>;
 			opp-microvolt = <900000>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 		};
 		};
-		opp01 {
+		opp@300000000 {
 			opp-hz = /bits/ 64 <300000000>;
 			opp-hz = /bits/ 64 <300000000>;
 			opp-microvolt = <900000>;
 			opp-microvolt = <900000>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 		};
 		};
-		opp02 {
+		opp@400000000 {
 			opp-hz = /bits/ 64 <400000000>;
 			opp-hz = /bits/ 64 <400000000>;
 			opp-microvolt = <925000>;
 			opp-microvolt = <925000>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 		};
 		};
-		opp03 {
+		opp@500000000 {
 			opp-hz = /bits/ 64 <500000000>;
 			opp-hz = /bits/ 64 <500000000>;
 			opp-microvolt = <950000>;
 			opp-microvolt = <950000>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 		};
 		};
-		opp04 {
+		opp@600000000 {
 			opp-hz = /bits/ 64 <600000000>;
 			opp-hz = /bits/ 64 <600000000>;
 			opp-microvolt = <975000>;
 			opp-microvolt = <975000>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 		};
 		};
-		opp05 {
+		opp@700000000 {
 			opp-hz = /bits/ 64 <700000000>;
 			opp-hz = /bits/ 64 <700000000>;
 			opp-microvolt = <987500>;
 			opp-microvolt = <987500>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 		};
 		};
-		opp06 {
+		opp@800000000 {
 			opp-hz = /bits/ 64 <800000000>;
 			opp-hz = /bits/ 64 <800000000>;
 			opp-microvolt = <1000000>;
 			opp-microvolt = <1000000>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 			opp-suspend;
 			opp-suspend;
 		};
 		};
-		opp07 {
+		opp@900000000 {
 			opp-hz = /bits/ 64 <900000000>;
 			opp-hz = /bits/ 64 <900000000>;
 			opp-microvolt = <1037500>;
 			opp-microvolt = <1037500>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 		};
 		};
-		opp08 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <1087500>;
 			opp-microvolt = <1087500>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 		};
 		};
-		opp09 {
+		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
 			opp-hz = /bits/ 64 <1100000000>;
 			opp-microvolt = <1137500>;
 			opp-microvolt = <1137500>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 		};
 		};
-		opp10 {
+		opp@1200000000 {
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-microvolt = <1187500>;
 			opp-microvolt = <1187500>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 		};
 		};
-		opp11 {
+		opp@1300000000 {
 			opp-hz = /bits/ 64 <1300000000>;
 			opp-hz = /bits/ 64 <1300000000>;
 			opp-microvolt = <1250000>;
 			opp-microvolt = <1250000>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 		};
 		};
-		opp12 {
+		opp@1400000000 {
 			opp-hz = /bits/ 64 <1400000000>;
 			opp-hz = /bits/ 64 <1400000000>;
 			opp-microvolt = <1287500>;
 			opp-microvolt = <1287500>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;
 		};
 		};
-		opp13 {
+		opp@1500000000 {
 			opp-hz = /bits/ 64 <1500000000>;
 			opp-hz = /bits/ 64 <1500000000>;
 			opp-microvolt = <1350000>;
 			opp-microvolt = <1350000>;
 			clock-latency-ns = <200000>;
 			clock-latency-ns = <200000>;

+ 1 - 0
drivers/base/power/opp/Makefile

@@ -1,2 +1,3 @@
 ccflags-$(CONFIG_DEBUG_DRIVER)	:= -DDEBUG
 ccflags-$(CONFIG_DEBUG_DRIVER)	:= -DDEBUG
 obj-y				+= core.o cpu.o
 obj-y				+= core.o cpu.o
+obj-$(CONFIG_DEBUG_FS)		+= debugfs.o

+ 317 - 17
drivers/base/power/opp/core.c

@@ -463,6 +463,7 @@ static void _kfree_list_dev_rcu(struct rcu_head *head)
 static void _remove_list_dev(struct device_list_opp *list_dev,
 static void _remove_list_dev(struct device_list_opp *list_dev,
 			     struct device_opp *dev_opp)
 			     struct device_opp *dev_opp)
 {
 {
+	opp_debug_unregister(list_dev, dev_opp);
 	list_del(&list_dev->node);
 	list_del(&list_dev->node);
 	call_srcu(&dev_opp->srcu_head.srcu, &list_dev->rcu_head,
 	call_srcu(&dev_opp->srcu_head.srcu, &list_dev->rcu_head,
 		  _kfree_list_dev_rcu);
 		  _kfree_list_dev_rcu);
@@ -472,6 +473,7 @@ struct device_list_opp *_add_list_dev(const struct device *dev,
 				      struct device_opp *dev_opp)
 				      struct device_opp *dev_opp)
 {
 {
 	struct device_list_opp *list_dev;
 	struct device_list_opp *list_dev;
+	int ret;
 
 
 	list_dev = kzalloc(sizeof(*list_dev), GFP_KERNEL);
 	list_dev = kzalloc(sizeof(*list_dev), GFP_KERNEL);
 	if (!list_dev)
 	if (!list_dev)
@@ -481,6 +483,12 @@ struct device_list_opp *_add_list_dev(const struct device *dev,
 	list_dev->dev = dev;
 	list_dev->dev = dev;
 	list_add_rcu(&list_dev->node, &dev_opp->dev_list);
 	list_add_rcu(&list_dev->node, &dev_opp->dev_list);
 
 
+	/* Create debugfs entries for the dev_opp */
+	ret = opp_debug_register(list_dev, dev_opp);
+	if (ret)
+		dev_err(dev, "%s: Failed to register opp debugfs (%d)\n",
+			__func__, ret);
+
 	return list_dev;
 	return list_dev;
 }
 }
 
 
@@ -551,6 +559,12 @@ static void _remove_device_opp(struct device_opp *dev_opp)
 	if (!list_empty(&dev_opp->opp_list))
 	if (!list_empty(&dev_opp->opp_list))
 		return;
 		return;
 
 
+	if (dev_opp->supported_hw)
+		return;
+
+	if (dev_opp->prop_name)
+		return;
+
 	list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp,
 	list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp,
 				    node);
 				    node);
 
 
@@ -596,6 +610,7 @@ static void _opp_remove(struct device_opp *dev_opp,
 	 */
 	 */
 	if (notify)
 	if (notify)
 		srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp);
 		srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp);
+	opp_debug_remove_one(opp);
 	list_del_rcu(&opp->node);
 	list_del_rcu(&opp->node);
 	call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
 	call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
 
 
@@ -673,6 +688,7 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
 {
 {
 	struct dev_pm_opp *opp;
 	struct dev_pm_opp *opp;
 	struct list_head *head = &dev_opp->opp_list;
 	struct list_head *head = &dev_opp->opp_list;
+	int ret;
 
 
 	/*
 	/*
 	 * Insert new OPP in order of increasing frequency and discard if
 	 * Insert new OPP in order of increasing frequency and discard if
@@ -703,6 +719,11 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
 	new_opp->dev_opp = dev_opp;
 	new_opp->dev_opp = dev_opp;
 	list_add_rcu(&new_opp->node, head);
 	list_add_rcu(&new_opp->node, head);
 
 
+	ret = opp_debug_create_one(new_opp, dev_opp);
+	if (ret)
+		dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n",
+			__func__, ret);
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -776,35 +797,48 @@ unlock:
 }
 }
 
 
 /* TODO: Support multiple regulators */
 /* TODO: Support multiple regulators */
-static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev)
+static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
+			      struct device_opp *dev_opp)
 {
 {
 	u32 microvolt[3] = {0};
 	u32 microvolt[3] = {0};
 	u32 val;
 	u32 val;
 	int count, ret;
 	int count, ret;
+	struct property *prop = NULL;
+	char name[NAME_MAX];
+
+	/* Search for "opp-microvolt-<name>" */
+	if (dev_opp->prop_name) {
+		sprintf(name, "opp-microvolt-%s", dev_opp->prop_name);
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (!prop) {
+		/* Search for "opp-microvolt" */
+		name[13] = '\0';
+		prop = of_find_property(opp->np, name, NULL);
 
 
-	/* Missing property isn't a problem, but an invalid entry is */
-	if (!of_find_property(opp->np, "opp-microvolt", NULL))
-		return 0;
+		/* Missing property isn't a problem, but an invalid entry is */
+		if (!prop)
+			return 0;
+	}
 
 
-	count = of_property_count_u32_elems(opp->np, "opp-microvolt");
+	count = of_property_count_u32_elems(opp->np, name);
 	if (count < 0) {
 	if (count < 0) {
-		dev_err(dev, "%s: Invalid opp-microvolt property (%d)\n",
-			__func__, count);
+		dev_err(dev, "%s: Invalid %s property (%d)\n",
+			__func__, name, count);
 		return count;
 		return count;
 	}
 	}
 
 
 	/* There can be one or three elements here */
 	/* There can be one or three elements here */
 	if (count != 1 && count != 3) {
 	if (count != 1 && count != 3) {
-		dev_err(dev, "%s: Invalid number of elements in opp-microvolt property (%d)\n",
-			__func__, count);
+		dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n",
+			__func__, name, count);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	ret = of_property_read_u32_array(opp->np, "opp-microvolt", microvolt,
-					 count);
+	ret = of_property_read_u32_array(opp->np, name, microvolt, count);
 	if (ret) {
 	if (ret) {
-		dev_err(dev, "%s: error parsing opp-microvolt: %d\n", __func__,
-			ret);
+		dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
@@ -812,12 +846,270 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev)
 	opp->u_volt_min = microvolt[1];
 	opp->u_volt_min = microvolt[1];
 	opp->u_volt_max = microvolt[2];
 	opp->u_volt_max = microvolt[2];
 
 
-	if (!of_property_read_u32(opp->np, "opp-microamp", &val))
+	/* Search for "opp-microamp-<name>" */
+	prop = NULL;
+	if (dev_opp->prop_name) {
+		sprintf(name, "opp-microamp-%s", dev_opp->prop_name);
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (!prop) {
+		/* Search for "opp-microamp" */
+		name[12] = '\0';
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (prop && !of_property_read_u32(opp->np, name, &val))
 		opp->u_amp = val;
 		opp->u_amp = val;
 
 
 	return 0;
 	return 0;
 }
 }
 
 
+/**
+ * dev_pm_opp_set_supported_hw() - Set supported platforms
+ * @dev: Device for which supported-hw has to be set.
+ * @versions: Array of hierarchy of versions to match.
+ * @count: Number of elements in the array.
+ *
+ * This is required only for the V2 bindings, and it enables a platform to
+ * specify the hierarchy of versions it supports. OPP layer will then enable
+ * OPPs, which are available for those versions, based on its 'opp-supported-hw'
+ * property.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
+				unsigned int count)
+{
+	struct device_opp *dev_opp;
+	int ret = 0;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _add_device_opp(dev);
+	if (!dev_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	/* Do we already have a version hierarchy associated with dev_opp? */
+	if (dev_opp->supported_hw) {
+		dev_err(dev, "%s: Already have supported hardware list\n",
+			__func__);
+		ret = -EBUSY;
+		goto err;
+	}
+
+	dev_opp->supported_hw = kmemdup(versions, count * sizeof(*versions),
+					GFP_KERNEL);
+	if (!dev_opp->supported_hw) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	dev_opp->supported_hw_count = count;
+	mutex_unlock(&dev_opp_list_lock);
+	return 0;
+
+err:
+	_remove_device_opp(dev_opp);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw);
+
+/**
+ * dev_pm_opp_put_supported_hw() - Releases resources blocked for supported hw
+ * @dev: Device for which supported-hw has to be set.
+ *
+ * This is required only for the V2 bindings, and is called for a matching
+ * dev_pm_opp_set_supported_hw(). Until this is called, the device_opp structure
+ * will not be freed.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_put_supported_hw(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Check for existing list for 'dev' first */
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	if (!dev_opp->supported_hw) {
+		dev_err(dev, "%s: Doesn't have supported hardware list\n",
+			__func__);
+		goto unlock;
+	}
+
+	kfree(dev_opp->supported_hw);
+	dev_opp->supported_hw = NULL;
+	dev_opp->supported_hw_count = 0;
+
+	/* Try freeing device_opp if this was the last blocking resource */
+	_remove_device_opp(dev_opp);
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw);
+
+/**
+ * dev_pm_opp_set_prop_name() - Set prop-extn name
+ * @dev: Device for which the regulator has to be set.
+ * @name: name to postfix to properties.
+ *
+ * This is required only for the V2 bindings, and it enables a platform to
+ * specify the extn to be used for certain property names. The properties to
+ * which the extension will apply are opp-microvolt and opp-microamp. OPP core
+ * should postfix the property name with -<name> while looking for them.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
+{
+	struct device_opp *dev_opp;
+	int ret = 0;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _add_device_opp(dev);
+	if (!dev_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	/* Do we already have a prop-name associated with dev_opp? */
+	if (dev_opp->prop_name) {
+		dev_err(dev, "%s: Already have prop-name %s\n", __func__,
+			dev_opp->prop_name);
+		ret = -EBUSY;
+		goto err;
+	}
+
+	dev_opp->prop_name = kstrdup(name, GFP_KERNEL);
+	if (!dev_opp->prop_name) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	mutex_unlock(&dev_opp_list_lock);
+	return 0;
+
+err:
+	_remove_device_opp(dev_opp);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name);
+
+/**
+ * dev_pm_opp_put_prop_name() - Releases resources blocked for prop-name
+ * @dev: Device for which the regulator has to be set.
+ *
+ * This is required only for the V2 bindings, and is called for a matching
+ * dev_pm_opp_set_prop_name(). Until this is called, the device_opp structure
+ * will not be freed.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_put_prop_name(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Check for existing list for 'dev' first */
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	if (!dev_opp->prop_name) {
+		dev_err(dev, "%s: Doesn't have a prop-name\n", __func__);
+		goto unlock;
+	}
+
+	kfree(dev_opp->prop_name);
+	dev_opp->prop_name = NULL;
+
+	/* Try freeing device_opp if this was the last blocking resource */
+	_remove_device_opp(dev_opp);
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
+
+static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
+			      struct device_node *np)
+{
+	unsigned int count = dev_opp->supported_hw_count;
+	u32 version;
+	int ret;
+
+	if (!dev_opp->supported_hw)
+		return true;
+
+	while (count--) {
+		ret = of_property_read_u32_index(np, "opp-supported-hw", count,
+						 &version);
+		if (ret) {
+			dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n",
+				 __func__, count, ret);
+			return false;
+		}
+
+		/* Both of these are bitwise masks of the versions */
+		if (!(version & dev_opp->supported_hw[count]))
+			return false;
+	}
+
+	return true;
+}
+
 /**
 /**
  * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
  * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
  * @dev:	device for which we do this operation
  * @dev:	device for which we do this operation
@@ -864,6 +1156,12 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 		goto free_opp;
 		goto free_opp;
 	}
 	}
 
 
+	/* Check if the OPP supports hardware's hierarchy of versions or not */
+	if (!_opp_is_supported(dev, dev_opp, np)) {
+		dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate);
+		goto free_opp;
+	}
+
 	/*
 	/*
 	 * Rate is defined as an unsigned long in clk API, and so casting
 	 * Rate is defined as an unsigned long in clk API, and so casting
 	 * explicitly to its type. Must be fixed once rate is 64 bit
 	 * explicitly to its type. Must be fixed once rate is 64 bit
@@ -879,7 +1177,7 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 	if (!of_property_read_u32(np, "clock-latency-ns", &val))
 	if (!of_property_read_u32(np, "clock-latency-ns", &val))
 		new_opp->clock_latency_ns = val;
 		new_opp->clock_latency_ns = val;
 
 
-	ret = opp_parse_supplies(new_opp, dev);
+	ret = opp_parse_supplies(new_opp, dev, dev_opp);
 	if (ret)
 	if (ret)
 		goto free_opp;
 		goto free_opp;
 
 
@@ -889,12 +1187,14 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 
 
 	/* OPP to select on device suspend */
 	/* OPP to select on device suspend */
 	if (of_property_read_bool(np, "opp-suspend")) {
 	if (of_property_read_bool(np, "opp-suspend")) {
-		if (dev_opp->suspend_opp)
+		if (dev_opp->suspend_opp) {
 			dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n",
 			dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n",
 				 __func__, dev_opp->suspend_opp->rate,
 				 __func__, dev_opp->suspend_opp->rate,
 				 new_opp->rate);
 				 new_opp->rate);
-		else
+		} else {
+			new_opp->suspend = true;
 			dev_opp->suspend_opp = new_opp;
 			dev_opp->suspend_opp = new_opp;
+		}
 	}
 	}
 
 
 	if (new_opp->clock_latency_ns > dev_opp->clock_latency_ns_max)
 	if (new_opp->clock_latency_ns > dev_opp->clock_latency_ns_max)

+ 219 - 0
drivers/base/power/opp/debugfs.c

@@ -0,0 +1,219 @@
+/*
+ * Generic OPP debugfs interface
+ *
+ * Copyright (C) 2015-2016 Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/limits.h>
+
+#include "opp.h"
+
+static struct dentry *rootdir;
+
+static void opp_set_dev_name(const struct device *dev, char *name)
+{
+	if (dev->parent)
+		snprintf(name, NAME_MAX, "%s-%s", dev_name(dev->parent),
+			 dev_name(dev));
+	else
+		snprintf(name, NAME_MAX, "%s", dev_name(dev));
+}
+
+void opp_debug_remove_one(struct dev_pm_opp *opp)
+{
+	debugfs_remove_recursive(opp->dentry);
+}
+
+int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp)
+{
+	struct dentry *pdentry = dev_opp->dentry;
+	struct dentry *d;
+	char name[25];	/* 20 chars for 64 bit value + 5 (opp:\0) */
+
+	/* Rate is unique to each OPP, use it to give opp-name */
+	snprintf(name, sizeof(name), "opp:%lu", opp->rate);
+
+	/* Create per-opp directory */
+	d = debugfs_create_dir(name, pdentry);
+	if (!d)
+		return -ENOMEM;
+
+	if (!debugfs_create_bool("available", S_IRUGO, d, &opp->available))
+		return -ENOMEM;
+
+	if (!debugfs_create_bool("dynamic", S_IRUGO, d, &opp->dynamic))
+		return -ENOMEM;
+
+	if (!debugfs_create_bool("turbo", S_IRUGO, d, &opp->turbo))
+		return -ENOMEM;
+
+	if (!debugfs_create_bool("suspend", S_IRUGO, d, &opp->suspend))
+		return -ENOMEM;
+
+	if (!debugfs_create_ulong("rate_hz", S_IRUGO, d, &opp->rate))
+		return -ENOMEM;
+
+	if (!debugfs_create_ulong("u_volt_target", S_IRUGO, d, &opp->u_volt))
+		return -ENOMEM;
+
+	if (!debugfs_create_ulong("u_volt_min", S_IRUGO, d, &opp->u_volt_min))
+		return -ENOMEM;
+
+	if (!debugfs_create_ulong("u_volt_max", S_IRUGO, d, &opp->u_volt_max))
+		return -ENOMEM;
+
+	if (!debugfs_create_ulong("u_amp", S_IRUGO, d, &opp->u_amp))
+		return -ENOMEM;
+
+	if (!debugfs_create_ulong("clock_latency_ns", S_IRUGO, d,
+				  &opp->clock_latency_ns))
+		return -ENOMEM;
+
+	opp->dentry = d;
+	return 0;
+}
+
+static int device_opp_debug_create_dir(struct device_list_opp *list_dev,
+				       struct device_opp *dev_opp)
+{
+	const struct device *dev = list_dev->dev;
+	struct dentry *d;
+
+	opp_set_dev_name(dev, dev_opp->dentry_name);
+
+	/* Create device specific directory */
+	d = debugfs_create_dir(dev_opp->dentry_name, rootdir);
+	if (!d) {
+		dev_err(dev, "%s: Failed to create debugfs dir\n", __func__);
+		return -ENOMEM;
+	}
+
+	list_dev->dentry = d;
+	dev_opp->dentry = d;
+
+	return 0;
+}
+
+static int device_opp_debug_create_link(struct device_list_opp *list_dev,
+					struct device_opp *dev_opp)
+{
+	const struct device *dev = list_dev->dev;
+	char name[NAME_MAX];
+	struct dentry *d;
+
+	opp_set_dev_name(list_dev->dev, name);
+
+	/* Create device specific directory link */
+	d = debugfs_create_symlink(name, rootdir, dev_opp->dentry_name);
+	if (!d) {
+		dev_err(dev, "%s: Failed to create link\n", __func__);
+		return -ENOMEM;
+	}
+
+	list_dev->dentry = d;
+
+	return 0;
+}
+
+/**
+ * opp_debug_register - add a device opp node to the debugfs 'opp' directory
+ * @list_dev: list-dev pointer for device
+ * @dev_opp: the device-opp being added
+ *
+ * Dynamically adds device specific directory in debugfs 'opp' directory. If the
+ * device-opp is shared with other devices, then links will be created for all
+ * devices except the first.
+ *
+ * Return: 0 on success, otherwise negative error.
+ */
+int opp_debug_register(struct device_list_opp *list_dev,
+		       struct device_opp *dev_opp)
+{
+	if (!rootdir) {
+		pr_debug("%s: Uninitialized rootdir\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dev_opp->dentry)
+		return device_opp_debug_create_link(list_dev, dev_opp);
+
+	return device_opp_debug_create_dir(list_dev, dev_opp);
+}
+
+static void opp_migrate_dentry(struct device_list_opp *list_dev,
+			       struct device_opp *dev_opp)
+{
+	struct device_list_opp *new_dev;
+	const struct device *dev;
+	struct dentry *dentry;
+
+	/* Look for next list-dev */
+	list_for_each_entry(new_dev, &dev_opp->dev_list, node)
+		if (new_dev != list_dev)
+			break;
+
+	/* new_dev is guaranteed to be valid here */
+	dev = new_dev->dev;
+	debugfs_remove_recursive(new_dev->dentry);
+
+	opp_set_dev_name(dev, dev_opp->dentry_name);
+
+	dentry = debugfs_rename(rootdir, list_dev->dentry, rootdir,
+				dev_opp->dentry_name);
+	if (!dentry) {
+		dev_err(dev, "%s: Failed to rename link from: %s to %s\n",
+			__func__, dev_name(list_dev->dev), dev_name(dev));
+		return;
+	}
+
+	new_dev->dentry = dentry;
+	dev_opp->dentry = dentry;
+}
+
+/**
+ * opp_debug_unregister - remove a device opp node from debugfs opp directory
+ * @list_dev: list-dev pointer for device
+ * @dev_opp: the device-opp being removed
+ *
+ * Dynamically removes device specific directory from debugfs 'opp' directory.
+ */
+void opp_debug_unregister(struct device_list_opp *list_dev,
+			  struct device_opp *dev_opp)
+{
+	if (list_dev->dentry == dev_opp->dentry) {
+		/* Move the real dentry object under another device */
+		if (!list_is_singular(&dev_opp->dev_list)) {
+			opp_migrate_dentry(list_dev, dev_opp);
+			goto out;
+		}
+		dev_opp->dentry = NULL;
+	}
+
+	debugfs_remove_recursive(list_dev->dentry);
+
+out:
+	list_dev->dentry = NULL;
+}
+
+static int __init opp_debug_init(void)
+{
+	/* Create /sys/kernel/debug/opp directory */
+	rootdir = debugfs_create_dir("opp", NULL);
+	if (!rootdir) {
+		pr_err("%s: Failed to create root directory\n", __func__);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+core_initcall(opp_debug_init);

+ 52 - 1
drivers/base/power/opp/opp.h

@@ -17,6 +17,7 @@
 #include <linux/device.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/list.h>
+#include <linux/limits.h>
 #include <linux/pm_opp.h>
 #include <linux/pm_opp.h>
 #include <linux/rculist.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/rcupdate.h>
@@ -50,9 +51,10 @@ extern struct mutex dev_opp_list_lock;
  *		are protected by the dev_opp_list_lock for integrity.
  *		are protected by the dev_opp_list_lock for integrity.
  *		IMPORTANT: the opp nodes should be maintained in increasing
  *		IMPORTANT: the opp nodes should be maintained in increasing
  *		order.
  *		order.
- * @dynamic:	not-created from static DT entries.
  * @available:	true/false - marks if this OPP as available or not
  * @available:	true/false - marks if this OPP as available or not
+ * @dynamic:	not-created from static DT entries.
  * @turbo:	true if turbo (boost) OPP
  * @turbo:	true if turbo (boost) OPP
+ * @suspend:	true if suspend OPP
  * @rate:	Frequency in hertz
  * @rate:	Frequency in hertz
  * @u_volt:	Target voltage in microvolts corresponding to this OPP
  * @u_volt:	Target voltage in microvolts corresponding to this OPP
  * @u_volt_min:	Minimum voltage in microvolts corresponding to this OPP
  * @u_volt_min:	Minimum voltage in microvolts corresponding to this OPP
@@ -63,6 +65,7 @@ extern struct mutex dev_opp_list_lock;
  * @dev_opp:	points back to the device_opp struct this opp belongs to
  * @dev_opp:	points back to the device_opp struct this opp belongs to
  * @rcu_head:	RCU callback head used for deferred freeing
  * @rcu_head:	RCU callback head used for deferred freeing
  * @np:		OPP's device node.
  * @np:		OPP's device node.
+ * @dentry:	debugfs dentry pointer (per opp)
  *
  *
  * This structure stores the OPP information for a given device.
  * This structure stores the OPP information for a given device.
  */
  */
@@ -72,6 +75,7 @@ struct dev_pm_opp {
 	bool available;
 	bool available;
 	bool dynamic;
 	bool dynamic;
 	bool turbo;
 	bool turbo;
+	bool suspend;
 	unsigned long rate;
 	unsigned long rate;
 
 
 	unsigned long u_volt;
 	unsigned long u_volt;
@@ -84,6 +88,10 @@ struct dev_pm_opp {
 	struct rcu_head rcu_head;
 	struct rcu_head rcu_head;
 
 
 	struct device_node *np;
 	struct device_node *np;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *dentry;
+#endif
 };
 };
 
 
 /**
 /**
@@ -91,6 +99,7 @@ struct dev_pm_opp {
  * @node:	list node
  * @node:	list node
  * @dev:	device to which the struct object belongs
  * @dev:	device to which the struct object belongs
  * @rcu_head:	RCU callback head used for deferred freeing
  * @rcu_head:	RCU callback head used for deferred freeing
+ * @dentry:	debugfs dentry pointer (per device)
  *
  *
  * This is an internal data structure maintaining the list of devices that are
  * This is an internal data structure maintaining the list of devices that are
  * managed by 'struct device_opp'.
  * managed by 'struct device_opp'.
@@ -99,6 +108,10 @@ struct device_list_opp {
 	struct list_head node;
 	struct list_head node;
 	const struct device *dev;
 	const struct device *dev;
 	struct rcu_head rcu_head;
 	struct rcu_head rcu_head;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *dentry;
+#endif
 };
 };
 
 
 /**
 /**
@@ -113,7 +126,14 @@ struct device_list_opp {
  * @dev_list:	list of devices that share these OPPs
  * @dev_list:	list of devices that share these OPPs
  * @opp_list:	list of opps
  * @opp_list:	list of opps
  * @np:		struct device_node pointer for opp's DT node.
  * @np:		struct device_node pointer for opp's DT node.
+ * @clock_latency_ns_max: Max clock latency in nanoseconds.
  * @shared_opp: OPP is shared between multiple devices.
  * @shared_opp: OPP is shared between multiple devices.
+ * @suspend_opp: Pointer to OPP to be used during device suspend.
+ * @supported_hw: Array of version number to support.
+ * @supported_hw_count: Number of elements in supported_hw array.
+ * @prop_name: A name to postfix to many DT properties, while parsing them.
+ * @dentry:	debugfs dentry pointer of the real device directory (not links).
+ * @dentry_name: Name of the real dentry.
  *
  *
  * This is an internal data structure maintaining the link to opps attached to
  * This is an internal data structure maintaining the link to opps attached to
  * a device. This structure is not meant to be shared to users as it is
  * a device. This structure is not meant to be shared to users as it is
@@ -135,6 +155,15 @@ struct device_opp {
 	unsigned long clock_latency_ns_max;
 	unsigned long clock_latency_ns_max;
 	bool shared_opp;
 	bool shared_opp;
 	struct dev_pm_opp *suspend_opp;
 	struct dev_pm_opp *suspend_opp;
+
+	unsigned int *supported_hw;
+	unsigned int supported_hw_count;
+	const char *prop_name;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *dentry;
+	char dentry_name[NAME_MAX];
+#endif
 };
 };
 
 
 /* Routines internal to opp core */
 /* Routines internal to opp core */
@@ -143,4 +172,26 @@ struct device_list_opp *_add_list_dev(const struct device *dev,
 				      struct device_opp *dev_opp);
 				      struct device_opp *dev_opp);
 struct device_node *_of_get_opp_desc_node(struct device *dev);
 struct device_node *_of_get_opp_desc_node(struct device *dev);
 
 
+#ifdef CONFIG_DEBUG_FS
+void opp_debug_remove_one(struct dev_pm_opp *opp);
+int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp);
+int opp_debug_register(struct device_list_opp *list_dev,
+		       struct device_opp *dev_opp);
+void opp_debug_unregister(struct device_list_opp *list_dev,
+			  struct device_opp *dev_opp);
+#else
+static inline void opp_debug_remove_one(struct dev_pm_opp *opp) {}
+
+static inline int opp_debug_create_one(struct dev_pm_opp *opp,
+				       struct device_opp *dev_opp)
+{ return 0; }
+static inline int opp_debug_register(struct device_list_opp *list_dev,
+				     struct device_opp *dev_opp)
+{ return 0; }
+
+static inline void opp_debug_unregister(struct device_list_opp *list_dev,
+					struct device_opp *dev_opp)
+{ }
+#endif		/* DEBUG_FS */
+
 #endif		/* __DRIVER_OPP_H__ */
 #endif		/* __DRIVER_OPP_H__ */

+ 12 - 0
drivers/cpufreq/Kconfig.arm

@@ -6,6 +6,8 @@
 config ARM_BIG_LITTLE_CPUFREQ
 config ARM_BIG_LITTLE_CPUFREQ
 	tristate "Generic ARM big LITTLE CPUfreq driver"
 	tristate "Generic ARM big LITTLE CPUfreq driver"
 	depends on (ARM_CPU_TOPOLOGY || ARM64) && HAVE_CLK
 	depends on (ARM_CPU_TOPOLOGY || ARM64) && HAVE_CLK
+	# if CPU_THERMAL is on and THERMAL=m, ARM_BIT_LITTLE_CPUFREQ cannot be =y
+	depends on !CPU_THERMAL || THERMAL
 	select PM_OPP
 	select PM_OPP
 	help
 	help
 	  This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
 	  This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
@@ -217,6 +219,16 @@ config ARM_SPEAR_CPUFREQ
 	help
 	help
 	  This adds the CPUFreq driver support for SPEAr SOCs.
 	  This adds the CPUFreq driver support for SPEAr SOCs.
 
 
+config ARM_STI_CPUFREQ
+	tristate "STi CPUFreq support"
+	depends on SOC_STIH407
+	help
+	  This driver uses the generic OPP framework to match the running
+	  platform with a predefined set of suitable values.  If not provided
+	  we will fall-back so safe-values contained in Device Tree.  Enable
+	  this config option if you wish to add CPUFreq support for STi based
+	  SoCs.
+
 config ARM_TEGRA20_CPUFREQ
 config ARM_TEGRA20_CPUFREQ
 	bool "Tegra20 CPUFreq support"
 	bool "Tegra20 CPUFreq support"
 	depends on ARCH_TEGRA
 	depends on ARCH_TEGRA

+ 1 - 0
drivers/cpufreq/Makefile

@@ -73,6 +73,7 @@ obj-$(CONFIG_ARM_SA1100_CPUFREQ)	+= sa1100-cpufreq.o
 obj-$(CONFIG_ARM_SA1110_CPUFREQ)	+= sa1110-cpufreq.o
 obj-$(CONFIG_ARM_SA1110_CPUFREQ)	+= sa1110-cpufreq.o
 obj-$(CONFIG_ARM_SCPI_CPUFREQ)		+= scpi-cpufreq.o
 obj-$(CONFIG_ARM_SCPI_CPUFREQ)		+= scpi-cpufreq.o
 obj-$(CONFIG_ARM_SPEAR_CPUFREQ)		+= spear-cpufreq.o
 obj-$(CONFIG_ARM_SPEAR_CPUFREQ)		+= spear-cpufreq.o
+obj-$(CONFIG_ARM_STI_CPUFREQ)		+= sti-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA20_CPUFREQ)	+= tegra20-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA20_CPUFREQ)	+= tegra20-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
 obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o
 obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o

+ 41 - 0
drivers/cpufreq/arm_big_little.c

@@ -23,6 +23,7 @@
 #include <linux/cpu.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
+#include <linux/cpu_cooling.h>
 #include <linux/export.h>
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/mutex.h>
@@ -55,6 +56,7 @@ static bool bL_switching_enabled;
 #define ACTUAL_FREQ(cluster, freq)  ((cluster == A7_CLUSTER) ? freq << 1 : freq)
 #define ACTUAL_FREQ(cluster, freq)  ((cluster == A7_CLUSTER) ? freq << 1 : freq)
 #define VIRT_FREQ(cluster, freq)    ((cluster == A7_CLUSTER) ? freq >> 1 : freq)
 #define VIRT_FREQ(cluster, freq)    ((cluster == A7_CLUSTER) ? freq >> 1 : freq)
 
 
+static struct thermal_cooling_device *cdev[MAX_CLUSTERS];
 static struct cpufreq_arm_bL_ops *arm_bL_ops;
 static struct cpufreq_arm_bL_ops *arm_bL_ops;
 static struct clk *clk[MAX_CLUSTERS];
 static struct clk *clk[MAX_CLUSTERS];
 static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
 static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
@@ -493,6 +495,12 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
 static int bL_cpufreq_exit(struct cpufreq_policy *policy)
 static int bL_cpufreq_exit(struct cpufreq_policy *policy)
 {
 {
 	struct device *cpu_dev;
 	struct device *cpu_dev;
+	int cur_cluster = cpu_to_cluster(policy->cpu);
+
+	if (cur_cluster < MAX_CLUSTERS) {
+		cpufreq_cooling_unregister(cdev[cur_cluster]);
+		cdev[cur_cluster] = NULL;
+	}
 
 
 	cpu_dev = get_cpu_device(policy->cpu);
 	cpu_dev = get_cpu_device(policy->cpu);
 	if (!cpu_dev) {
 	if (!cpu_dev) {
@@ -507,6 +515,38 @@ static int bL_cpufreq_exit(struct cpufreq_policy *policy)
 	return 0;
 	return 0;
 }
 }
 
 
+static void bL_cpufreq_ready(struct cpufreq_policy *policy)
+{
+	struct device *cpu_dev = get_cpu_device(policy->cpu);
+	int cur_cluster = cpu_to_cluster(policy->cpu);
+	struct device_node *np;
+
+	/* Do not register a cpu_cooling device if we are in IKS mode */
+	if (cur_cluster >= MAX_CLUSTERS)
+		return;
+
+	np = of_node_get(cpu_dev->of_node);
+	if (WARN_ON(!np))
+		return;
+
+	if (of_find_property(np, "#cooling-cells", NULL)) {
+		u32 power_coefficient = 0;
+
+		of_property_read_u32(np, "dynamic-power-coefficient",
+				     &power_coefficient);
+
+		cdev[cur_cluster] = of_cpufreq_power_cooling_register(np,
+				policy->related_cpus, power_coefficient, NULL);
+		if (IS_ERR(cdev[cur_cluster])) {
+			dev_err(cpu_dev,
+				"running cpufreq without cooling device: %ld\n",
+				PTR_ERR(cdev[cur_cluster]));
+			cdev[cur_cluster] = NULL;
+		}
+	}
+	of_node_put(np);
+}
+
 static struct cpufreq_driver bL_cpufreq_driver = {
 static struct cpufreq_driver bL_cpufreq_driver = {
 	.name			= "arm-big-little",
 	.name			= "arm-big-little",
 	.flags			= CPUFREQ_STICKY |
 	.flags			= CPUFREQ_STICKY |
@@ -517,6 +557,7 @@ static struct cpufreq_driver bL_cpufreq_driver = {
 	.get			= bL_cpufreq_get_rate,
 	.get			= bL_cpufreq_get_rate,
 	.init			= bL_cpufreq_init,
 	.init			= bL_cpufreq_init,
 	.exit			= bL_cpufreq_exit,
 	.exit			= bL_cpufreq_exit,
+	.ready			= bL_cpufreq_ready,
 	.attr			= cpufreq_generic_attr,
 	.attr			= cpufreq_generic_attr,
 };
 };
 
 

+ 7 - 2
drivers/cpufreq/cpufreq-dt.c

@@ -407,8 +407,13 @@ static void cpufreq_ready(struct cpufreq_policy *policy)
 	 * thermal DT code takes care of matching them.
 	 * thermal DT code takes care of matching them.
 	 */
 	 */
 	if (of_find_property(np, "#cooling-cells", NULL)) {
 	if (of_find_property(np, "#cooling-cells", NULL)) {
-		priv->cdev = of_cpufreq_cooling_register(np,
-							 policy->related_cpus);
+		u32 power_coefficient = 0;
+
+		of_property_read_u32(np, "dynamic-power-coefficient",
+				     &power_coefficient);
+
+		priv->cdev = of_cpufreq_power_cooling_register(np,
+				policy->related_cpus, power_coefficient, NULL);
 		if (IS_ERR(priv->cdev)) {
 		if (IS_ERR(priv->cdev)) {
 			dev_err(priv->cpu_dev,
 			dev_err(priv->cpu_dev,
 				"running cpufreq without cooling device: %ld\n",
 				"running cpufreq without cooling device: %ld\n",

+ 3 - 3
drivers/cpufreq/cpufreq_conservative.c

@@ -115,13 +115,13 @@ static void cs_check_cpu(int cpu, unsigned int load)
 	}
 	}
 }
 }
 
 
-static unsigned int cs_dbs_timer(struct cpu_dbs_info *cdbs,
-				 struct dbs_data *dbs_data, bool modify_all)
+static unsigned int cs_dbs_timer(struct cpufreq_policy *policy, bool modify_all)
 {
 {
+	struct dbs_data *dbs_data = policy->governor_data;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 
 
 	if (modify_all)
 	if (modify_all)
-		dbs_check_cpu(dbs_data, cdbs->shared->policy->cpu);
+		dbs_check_cpu(dbs_data, policy->cpu);
 
 
 	return delay_for_sampling_rate(cs_tuners->sampling_rate);
 	return delay_for_sampling_rate(cs_tuners->sampling_rate);
 }
 }

+ 81 - 62
drivers/cpufreq/cpufreq_governor.c

@@ -158,47 +158,55 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 }
 }
 EXPORT_SYMBOL_GPL(dbs_check_cpu);
 EXPORT_SYMBOL_GPL(dbs_check_cpu);
 
 
-static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
-		unsigned int delay)
+void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay)
 {
 {
-	struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
-
-	mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay);
-}
-
-void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
-		unsigned int delay, bool all_cpus)
-{
-	int i;
+	struct dbs_data *dbs_data = policy->governor_data;
+	struct cpu_dbs_info *cdbs;
+	int cpu;
 
 
-	if (!all_cpus) {
-		/*
-		 * Use raw_smp_processor_id() to avoid preemptible warnings.
-		 * We know that this is only called with all_cpus == false from
-		 * works that have been queued with *_work_on() functions and
-		 * those works are canceled during CPU_DOWN_PREPARE so they
-		 * can't possibly run on any other CPU.
-		 */
-		__gov_queue_work(raw_smp_processor_id(), dbs_data, delay);
-	} else {
-		for_each_cpu(i, policy->cpus)
-			__gov_queue_work(i, dbs_data, delay);
+	for_each_cpu(cpu, policy->cpus) {
+		cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
+		cdbs->timer.expires = jiffies + delay;
+		add_timer_on(&cdbs->timer, cpu);
 	}
 	}
 }
 }
-EXPORT_SYMBOL_GPL(gov_queue_work);
+EXPORT_SYMBOL_GPL(gov_add_timers);
 
 
-static inline void gov_cancel_work(struct dbs_data *dbs_data,
-		struct cpufreq_policy *policy)
+static inline void gov_cancel_timers(struct cpufreq_policy *policy)
 {
 {
+	struct dbs_data *dbs_data = policy->governor_data;
 	struct cpu_dbs_info *cdbs;
 	struct cpu_dbs_info *cdbs;
 	int i;
 	int i;
 
 
 	for_each_cpu(i, policy->cpus) {
 	for_each_cpu(i, policy->cpus) {
 		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
 		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
-		cancel_delayed_work_sync(&cdbs->dwork);
+		del_timer_sync(&cdbs->timer);
 	}
 	}
 }
 }
 
 
+void gov_cancel_work(struct cpu_common_dbs_info *shared)
+{
+	/* Tell dbs_timer_handler() to skip queuing up work items. */
+	atomic_inc(&shared->skip_work);
+	/*
+	 * If dbs_timer_handler() is already running, it may not notice the
+	 * incremented skip_work, so wait for it to complete to prevent its work
+	 * item from being queued up after the cancel_work_sync() below.
+	 */
+	gov_cancel_timers(shared->policy);
+	/*
+	 * In case dbs_timer_handler() managed to run and spawn a work item
+	 * before the timers have been canceled, wait for that work item to
+	 * complete and then cancel all of the timers set up by it.  If
+	 * dbs_timer_handler() runs again at that point, it will see the
+	 * positive value of skip_work and won't spawn any more work items.
+	 */
+	cancel_work_sync(&shared->work);
+	gov_cancel_timers(shared->policy);
+	atomic_set(&shared->skip_work, 0);
+}
+EXPORT_SYMBOL_GPL(gov_cancel_work);
+
 /* Will return if we need to evaluate cpu load again or not */
 /* Will return if we need to evaluate cpu load again or not */
 static bool need_load_eval(struct cpu_common_dbs_info *shared,
 static bool need_load_eval(struct cpu_common_dbs_info *shared,
 			   unsigned int sampling_rate)
 			   unsigned int sampling_rate)
@@ -217,29 +225,21 @@ static bool need_load_eval(struct cpu_common_dbs_info *shared,
 	return true;
 	return true;
 }
 }
 
 
-static void dbs_timer(struct work_struct *work)
+static void dbs_work_handler(struct work_struct *work)
 {
 {
-	struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info,
-						 dwork.work);
-	struct cpu_common_dbs_info *shared = cdbs->shared;
+	struct cpu_common_dbs_info *shared = container_of(work, struct
+					cpu_common_dbs_info, work);
 	struct cpufreq_policy *policy;
 	struct cpufreq_policy *policy;
 	struct dbs_data *dbs_data;
 	struct dbs_data *dbs_data;
 	unsigned int sampling_rate, delay;
 	unsigned int sampling_rate, delay;
-	bool modify_all = true;
-
-	mutex_lock(&shared->timer_mutex);
+	bool eval_load;
 
 
 	policy = shared->policy;
 	policy = shared->policy;
-
-	/*
-	 * Governor might already be disabled and there is no point continuing
-	 * with the work-handler.
-	 */
-	if (!policy)
-		goto unlock;
-
 	dbs_data = policy->governor_data;
 	dbs_data = policy->governor_data;
 
 
+	/* Kill all timers */
+	gov_cancel_timers(policy);
+
 	if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
 	if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
 		struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 		struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 
 
@@ -250,14 +250,37 @@ static void dbs_timer(struct work_struct *work)
 		sampling_rate = od_tuners->sampling_rate;
 		sampling_rate = od_tuners->sampling_rate;
 	}
 	}
 
 
-	if (!need_load_eval(cdbs->shared, sampling_rate))
-		modify_all = false;
+	eval_load = need_load_eval(shared, sampling_rate);
 
 
-	delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all);
-	gov_queue_work(dbs_data, policy, delay, modify_all);
-
-unlock:
+	/*
+	 * Make sure cpufreq_governor_limits() isn't evaluating load in
+	 * parallel.
+	 */
+	mutex_lock(&shared->timer_mutex);
+	delay = dbs_data->cdata->gov_dbs_timer(policy, eval_load);
 	mutex_unlock(&shared->timer_mutex);
 	mutex_unlock(&shared->timer_mutex);
+
+	atomic_dec(&shared->skip_work);
+
+	gov_add_timers(policy, delay);
+}
+
+static void dbs_timer_handler(unsigned long data)
+{
+	struct cpu_dbs_info *cdbs = (struct cpu_dbs_info *)data;
+	struct cpu_common_dbs_info *shared = cdbs->shared;
+
+	/*
+	 * Timer handler may not be allowed to queue the work at the moment,
+	 * because:
+	 * - Another timer handler has done that
+	 * - We are stopping the governor
+	 * - Or we are updating the sampling rate of the ondemand governor
+	 */
+	if (atomic_inc_return(&shared->skip_work) > 1)
+		atomic_dec(&shared->skip_work);
+	else
+		queue_work(system_wq, &shared->work);
 }
 }
 
 
 static void set_sampling_rate(struct dbs_data *dbs_data,
 static void set_sampling_rate(struct dbs_data *dbs_data,
@@ -287,6 +310,9 @@ static int alloc_common_dbs_info(struct cpufreq_policy *policy,
 	for_each_cpu(j, policy->related_cpus)
 	for_each_cpu(j, policy->related_cpus)
 		cdata->get_cpu_cdbs(j)->shared = shared;
 		cdata->get_cpu_cdbs(j)->shared = shared;
 
 
+	mutex_init(&shared->timer_mutex);
+	atomic_set(&shared->skip_work, 0);
+	INIT_WORK(&shared->work, dbs_work_handler);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -297,6 +323,8 @@ static void free_common_dbs_info(struct cpufreq_policy *policy,
 	struct cpu_common_dbs_info *shared = cdbs->shared;
 	struct cpu_common_dbs_info *shared = cdbs->shared;
 	int j;
 	int j;
 
 
+	mutex_destroy(&shared->timer_mutex);
+
 	for_each_cpu(j, policy->cpus)
 	for_each_cpu(j, policy->cpus)
 		cdata->get_cpu_cdbs(j)->shared = NULL;
 		cdata->get_cpu_cdbs(j)->shared = NULL;
 
 
@@ -433,7 +461,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 
 
 	shared->policy = policy;
 	shared->policy = policy;
 	shared->time_stamp = ktime_get();
 	shared->time_stamp = ktime_get();
-	mutex_init(&shared->timer_mutex);
 
 
 	for_each_cpu(j, policy->cpus) {
 	for_each_cpu(j, policy->cpus) {
 		struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j);
 		struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j);
@@ -450,7 +477,9 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 		if (ignore_nice)
 		if (ignore_nice)
 			j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 			j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 
 
-		INIT_DEFERRABLE_WORK(&j_cdbs->dwork, dbs_timer);
+		__setup_timer(&j_cdbs->timer, dbs_timer_handler,
+			      (unsigned long)j_cdbs,
+			      TIMER_DEFERRABLE | TIMER_IRQSAFE);
 	}
 	}
 
 
 	if (cdata->governor == GOV_CONSERVATIVE) {
 	if (cdata->governor == GOV_CONSERVATIVE) {
@@ -468,8 +497,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 		od_ops->powersave_bias_init_cpu(cpu);
 		od_ops->powersave_bias_init_cpu(cpu);
 	}
 	}
 
 
-	gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate),
-		       true);
+	gov_add_timers(policy, delay_for_sampling_rate(sampling_rate));
 	return 0;
 	return 0;
 }
 }
 
 
@@ -483,18 +511,9 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy,
 	if (!shared || !shared->policy)
 	if (!shared || !shared->policy)
 		return -EBUSY;
 		return -EBUSY;
 
 
-	/*
-	 * Work-handler must see this updated, as it should not proceed any
-	 * further after governor is disabled. And so timer_mutex is taken while
-	 * updating this value.
-	 */
-	mutex_lock(&shared->timer_mutex);
+	gov_cancel_work(shared);
 	shared->policy = NULL;
 	shared->policy = NULL;
-	mutex_unlock(&shared->timer_mutex);
-
-	gov_cancel_work(dbs_data, policy);
 
 
-	mutex_destroy(&shared->timer_mutex);
 	return 0;
 	return 0;
 }
 }
 
 

+ 10 - 8
drivers/cpufreq/cpufreq_governor.h

@@ -17,6 +17,7 @@
 #ifndef _CPUFREQ_GOVERNOR_H
 #ifndef _CPUFREQ_GOVERNOR_H
 #define _CPUFREQ_GOVERNOR_H
 #define _CPUFREQ_GOVERNOR_H
 
 
+#include <linux/atomic.h>
 #include <linux/cpufreq.h>
 #include <linux/cpufreq.h>
 #include <linux/kernel_stat.h>
 #include <linux/kernel_stat.h>
 #include <linux/module.h>
 #include <linux/module.h>
@@ -132,12 +133,14 @@ static void *get_cpu_dbs_info_s(int cpu)				\
 struct cpu_common_dbs_info {
 struct cpu_common_dbs_info {
 	struct cpufreq_policy *policy;
 	struct cpufreq_policy *policy;
 	/*
 	/*
-	 * percpu mutex that serializes governor limit change with dbs_timer
-	 * invocation. We do not want dbs_timer to run when user is changing
-	 * the governor or limits.
+	 * Per policy mutex that serializes load evaluation from limit-change
+	 * and work-handler.
 	 */
 	 */
 	struct mutex timer_mutex;
 	struct mutex timer_mutex;
+
 	ktime_t time_stamp;
 	ktime_t time_stamp;
+	atomic_t skip_work;
+	struct work_struct work;
 };
 };
 
 
 /* Per cpu structures */
 /* Per cpu structures */
@@ -152,7 +155,7 @@ struct cpu_dbs_info {
 	 * wake-up from idle.
 	 * wake-up from idle.
 	 */
 	 */
 	unsigned int prev_load;
 	unsigned int prev_load;
-	struct delayed_work dwork;
+	struct timer_list timer;
 	struct cpu_common_dbs_info *shared;
 	struct cpu_common_dbs_info *shared;
 };
 };
 
 
@@ -209,8 +212,7 @@ struct common_dbs_data {
 
 
 	struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu);
 	struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu);
 	void *(*get_cpu_dbs_info_s)(int cpu);
 	void *(*get_cpu_dbs_info_s)(int cpu);
-	unsigned int (*gov_dbs_timer)(struct cpu_dbs_info *cdbs,
-				      struct dbs_data *dbs_data,
+	unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy,
 				      bool modify_all);
 				      bool modify_all);
 	void (*gov_check_cpu)(int cpu, unsigned int load);
 	void (*gov_check_cpu)(int cpu, unsigned int load);
 	int (*init)(struct dbs_data *dbs_data, bool notify);
 	int (*init)(struct dbs_data *dbs_data, bool notify);
@@ -269,11 +271,11 @@ static ssize_t show_sampling_rate_min_gov_pol				\
 
 
 extern struct mutex cpufreq_governor_lock;
 extern struct mutex cpufreq_governor_lock;
 
 
+void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay);
+void gov_cancel_work(struct cpu_common_dbs_info *shared);
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
 int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		struct common_dbs_data *cdata, unsigned int event);
 		struct common_dbs_data *cdata, unsigned int event);
-void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
-		unsigned int delay, bool all_cpus);
 void od_register_powersave_bias_handler(unsigned int (*f)
 void od_register_powersave_bias_handler(unsigned int (*f)
 		(struct cpufreq_policy *, unsigned int, unsigned int),
 		(struct cpufreq_policy *, unsigned int, unsigned int),
 		unsigned int powersave_bias);
 		unsigned int powersave_bias);

+ 43 - 18
drivers/cpufreq/cpufreq_ondemand.c

@@ -191,10 +191,9 @@ static void od_check_cpu(int cpu, unsigned int load)
 	}
 	}
 }
 }
 
 
-static unsigned int od_dbs_timer(struct cpu_dbs_info *cdbs,
-				 struct dbs_data *dbs_data, bool modify_all)
+static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all)
 {
 {
-	struct cpufreq_policy *policy = cdbs->shared->policy;
+	struct dbs_data *dbs_data = policy->governor_data;
 	unsigned int cpu = policy->cpu;
 	unsigned int cpu = policy->cpu;
 	struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
 	struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
 			cpu);
 			cpu);
@@ -247,40 +246,66 @@ static void update_sampling_rate(struct dbs_data *dbs_data,
 		unsigned int new_rate)
 		unsigned int new_rate)
 {
 {
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
+	struct cpumask cpumask;
 	int cpu;
 	int cpu;
 
 
 	od_tuners->sampling_rate = new_rate = max(new_rate,
 	od_tuners->sampling_rate = new_rate = max(new_rate,
 			dbs_data->min_sampling_rate);
 			dbs_data->min_sampling_rate);
 
 
-	for_each_online_cpu(cpu) {
+	/*
+	 * Lock governor so that governor start/stop can't execute in parallel.
+	 */
+	mutex_lock(&od_dbs_cdata.mutex);
+
+	cpumask_copy(&cpumask, cpu_online_mask);
+
+	for_each_cpu(cpu, &cpumask) {
 		struct cpufreq_policy *policy;
 		struct cpufreq_policy *policy;
 		struct od_cpu_dbs_info_s *dbs_info;
 		struct od_cpu_dbs_info_s *dbs_info;
+		struct cpu_dbs_info *cdbs;
+		struct cpu_common_dbs_info *shared;
 		unsigned long next_sampling, appointed_at;
 		unsigned long next_sampling, appointed_at;
 
 
-		policy = cpufreq_cpu_get(cpu);
-		if (!policy)
-			continue;
-		if (policy->governor != &cpufreq_gov_ondemand) {
-			cpufreq_cpu_put(policy);
-			continue;
-		}
 		dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
 		dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
-		cpufreq_cpu_put(policy);
+		cdbs = &dbs_info->cdbs;
+		shared = cdbs->shared;
 
 
-		if (!delayed_work_pending(&dbs_info->cdbs.dwork))
+		/*
+		 * A valid shared and shared->policy means governor hasn't
+		 * stopped or exited yet.
+		 */
+		if (!shared || !shared->policy)
+			continue;
+
+		policy = shared->policy;
+
+		/* clear all CPUs of this policy */
+		cpumask_andnot(&cpumask, &cpumask, policy->cpus);
+
+		/*
+		 * Update sampling rate for CPUs whose policy is governed by
+		 * dbs_data. In case of governor_per_policy, only a single
+		 * policy will be governed by dbs_data, otherwise there can be
+		 * multiple policies that are governed by the same dbs_data.
+		 */
+		if (dbs_data != policy->governor_data)
 			continue;
 			continue;
 
 
+		/*
+		 * Checking this for any CPU should be fine, timers for all of
+		 * them are scheduled together.
+		 */
 		next_sampling = jiffies + usecs_to_jiffies(new_rate);
 		next_sampling = jiffies + usecs_to_jiffies(new_rate);
-		appointed_at = dbs_info->cdbs.dwork.timer.expires;
+		appointed_at = dbs_info->cdbs.timer.expires;
 
 
 		if (time_before(next_sampling, appointed_at)) {
 		if (time_before(next_sampling, appointed_at)) {
-			cancel_delayed_work_sync(&dbs_info->cdbs.dwork);
-
-			gov_queue_work(dbs_data, policy,
-				       usecs_to_jiffies(new_rate), true);
+			gov_cancel_work(shared);
+			gov_add_timers(policy, usecs_to_jiffies(new_rate));
 
 
 		}
 		}
 	}
 	}
+
+	mutex_unlock(&od_dbs_cdata.mutex);
 }
 }
 
 
 static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
 static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,

+ 57 - 16
drivers/cpufreq/intel_pstate.c

@@ -66,6 +66,7 @@ static inline int ceiling_fp(int32_t x)
 
 
 struct sample {
 struct sample {
 	int32_t core_pct_busy;
 	int32_t core_pct_busy;
+	int32_t busy_scaled;
 	u64 aperf;
 	u64 aperf;
 	u64 mperf;
 	u64 mperf;
 	u64 tsc;
 	u64 tsc;
@@ -112,6 +113,7 @@ struct cpudata {
 	u64	prev_aperf;
 	u64	prev_aperf;
 	u64	prev_mperf;
 	u64	prev_mperf;
 	u64	prev_tsc;
 	u64	prev_tsc;
+	u64	prev_cummulative_iowait;
 	struct sample sample;
 	struct sample sample;
 };
 };
 
 
@@ -133,6 +135,7 @@ struct pstate_funcs {
 	int (*get_scaling)(void);
 	int (*get_scaling)(void);
 	void (*set)(struct cpudata*, int pstate);
 	void (*set)(struct cpudata*, int pstate);
 	void (*get_vid)(struct cpudata *);
 	void (*get_vid)(struct cpudata *);
+	int32_t (*get_target_pstate)(struct cpudata *);
 };
 };
 
 
 struct cpu_defaults {
 struct cpu_defaults {
@@ -140,6 +143,9 @@ struct cpu_defaults {
 	struct pstate_funcs funcs;
 	struct pstate_funcs funcs;
 };
 };
 
 
+static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
+static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
+
 static struct pstate_adjust_policy pid_params;
 static struct pstate_adjust_policy pid_params;
 static struct pstate_funcs pstate_funcs;
 static struct pstate_funcs pstate_funcs;
 static int hwp_active;
 static int hwp_active;
@@ -738,6 +744,7 @@ static struct cpu_defaults core_params = {
 		.get_turbo = core_get_turbo_pstate,
 		.get_turbo = core_get_turbo_pstate,
 		.get_scaling = core_get_scaling,
 		.get_scaling = core_get_scaling,
 		.set = core_set_pstate,
 		.set = core_set_pstate,
+		.get_target_pstate = get_target_pstate_use_performance,
 	},
 	},
 };
 };
 
 
@@ -758,6 +765,7 @@ static struct cpu_defaults silvermont_params = {
 		.set = atom_set_pstate,
 		.set = atom_set_pstate,
 		.get_scaling = silvermont_get_scaling,
 		.get_scaling = silvermont_get_scaling,
 		.get_vid = atom_get_vid,
 		.get_vid = atom_get_vid,
+		.get_target_pstate = get_target_pstate_use_cpu_load,
 	},
 	},
 };
 };
 
 
@@ -778,6 +786,7 @@ static struct cpu_defaults airmont_params = {
 		.set = atom_set_pstate,
 		.set = atom_set_pstate,
 		.get_scaling = airmont_get_scaling,
 		.get_scaling = airmont_get_scaling,
 		.get_vid = atom_get_vid,
 		.get_vid = atom_get_vid,
+		.get_target_pstate = get_target_pstate_use_cpu_load,
 	},
 	},
 };
 };
 
 
@@ -797,6 +806,7 @@ static struct cpu_defaults knl_params = {
 		.get_turbo = knl_get_turbo_pstate,
 		.get_turbo = knl_get_turbo_pstate,
 		.get_scaling = core_get_scaling,
 		.get_scaling = core_get_scaling,
 		.set = core_set_pstate,
 		.set = core_set_pstate,
+		.get_target_pstate = get_target_pstate_use_performance,
 	},
 	},
 };
 };
 
 
@@ -882,12 +892,11 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
 	local_irq_save(flags);
 	local_irq_save(flags);
 	rdmsrl(MSR_IA32_APERF, aperf);
 	rdmsrl(MSR_IA32_APERF, aperf);
 	rdmsrl(MSR_IA32_MPERF, mperf);
 	rdmsrl(MSR_IA32_MPERF, mperf);
-	if (cpu->prev_mperf == mperf) {
+	tsc = rdtsc();
+	if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) {
 		local_irq_restore(flags);
 		local_irq_restore(flags);
 		return;
 		return;
 	}
 	}
-
-	tsc = rdtsc();
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 
 
 	cpu->last_sample_time = cpu->sample.time;
 	cpu->last_sample_time = cpu->sample.time;
@@ -922,7 +931,43 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
 	mod_timer_pinned(&cpu->timer, jiffies + delay);
 	mod_timer_pinned(&cpu->timer, jiffies + delay);
 }
 }
 
 
-static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
+static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
+{
+	struct sample *sample = &cpu->sample;
+	u64 cummulative_iowait, delta_iowait_us;
+	u64 delta_iowait_mperf;
+	u64 mperf, now;
+	int32_t cpu_load;
+
+	cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now);
+
+	/*
+	 * Convert iowait time into number of IO cycles spent at max_freq.
+	 * IO is considered as busy only for the cpu_load algorithm. For
+	 * performance this is not needed since we always try to reach the
+	 * maximum P-State, so we are already boosting the IOs.
+	 */
+	delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
+	delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
+		cpu->pstate.max_pstate, MSEC_PER_SEC);
+
+	mperf = cpu->sample.mperf + delta_iowait_mperf;
+	cpu->prev_cummulative_iowait = cummulative_iowait;
+
+
+	/*
+	 * The load can be estimated as the ratio of the mperf counter
+	 * running at a constant frequency during active periods
+	 * (C0) and the time stamp counter running at the same frequency
+	 * also during C-states.
+	 */
+	cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
+	cpu->sample.busy_scaled = cpu_load;
+
+	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load);
+}
+
+static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 {
 {
 	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
 	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
 	s64 duration_us;
 	s64 duration_us;
@@ -960,30 +1005,24 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
 		core_busy = mul_fp(core_busy, sample_ratio);
 		core_busy = mul_fp(core_busy, sample_ratio);
 	}
 	}
 
 
-	return core_busy;
+	cpu->sample.busy_scaled = core_busy;
+	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
 }
 }
 
 
 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 {
 {
-	int32_t busy_scaled;
-	struct _pid *pid;
-	signed int ctl;
-	int from;
+	int from, target_pstate;
 	struct sample *sample;
 	struct sample *sample;
 
 
 	from = cpu->pstate.current_pstate;
 	from = cpu->pstate.current_pstate;
 
 
-	pid = &cpu->pid;
-	busy_scaled = intel_pstate_get_scaled_busy(cpu);
+	target_pstate = pstate_funcs.get_target_pstate(cpu);
 
 
-	ctl = pid_calc(pid, busy_scaled);
-
-	/* Negative values of ctl increase the pstate and vice versa */
-	intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl, true);
+	intel_pstate_set_pstate(cpu, target_pstate, true);
 
 
 	sample = &cpu->sample;
 	sample = &cpu->sample;
 	trace_pstate_sample(fp_toint(sample->core_pct_busy),
 	trace_pstate_sample(fp_toint(sample->core_pct_busy),
-		fp_toint(busy_scaled),
+		fp_toint(sample->busy_scaled),
 		from,
 		from,
 		cpu->pstate.current_pstate,
 		cpu->pstate.current_pstate,
 		sample->mperf,
 		sample->mperf,
@@ -1237,6 +1276,8 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
 	pstate_funcs.get_scaling = funcs->get_scaling;
 	pstate_funcs.get_scaling = funcs->get_scaling;
 	pstate_funcs.set       = funcs->set;
 	pstate_funcs.set       = funcs->set;
 	pstate_funcs.get_vid   = funcs->get_vid;
 	pstate_funcs.get_vid   = funcs->get_vid;
+	pstate_funcs.get_target_pstate = funcs->get_target_pstate;
+
 }
 }
 
 
 #if IS_ENABLED(CONFIG_ACPI)
 #if IS_ENABLED(CONFIG_ACPI)

+ 98 - 26
drivers/cpufreq/mt8173-cpufreq.c

@@ -41,16 +41,35 @@
  * the original PLL becomes stable at target frequency.
  * the original PLL becomes stable at target frequency.
  */
  */
 struct mtk_cpu_dvfs_info {
 struct mtk_cpu_dvfs_info {
+	struct cpumask cpus;
 	struct device *cpu_dev;
 	struct device *cpu_dev;
 	struct regulator *proc_reg;
 	struct regulator *proc_reg;
 	struct regulator *sram_reg;
 	struct regulator *sram_reg;
 	struct clk *cpu_clk;
 	struct clk *cpu_clk;
 	struct clk *inter_clk;
 	struct clk *inter_clk;
 	struct thermal_cooling_device *cdev;
 	struct thermal_cooling_device *cdev;
+	struct list_head list_head;
 	int intermediate_voltage;
 	int intermediate_voltage;
 	bool need_voltage_tracking;
 	bool need_voltage_tracking;
 };
 };
 
 
+static LIST_HEAD(dvfs_info_list);
+
+static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu)
+{
+	struct mtk_cpu_dvfs_info *info;
+	struct list_head *list;
+
+	list_for_each(list, &dvfs_info_list) {
+		info = list_entry(list, struct mtk_cpu_dvfs_info, list_head);
+
+		if (cpumask_test_cpu(cpu, &info->cpus))
+			return info;
+	}
+
+	return NULL;
+}
+
 static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info,
 static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info,
 					int new_vproc)
 					int new_vproc)
 {
 {
@@ -59,7 +78,10 @@ static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info,
 	int old_vproc, old_vsram, new_vsram, vsram, vproc, ret;
 	int old_vproc, old_vsram, new_vsram, vsram, vproc, ret;
 
 
 	old_vproc = regulator_get_voltage(proc_reg);
 	old_vproc = regulator_get_voltage(proc_reg);
-	old_vsram = regulator_get_voltage(sram_reg);
+	if (old_vproc < 0) {
+		pr_err("%s: invalid Vproc value: %d\n", __func__, old_vproc);
+		return old_vproc;
+	}
 	/* Vsram should not exceed the maximum allowed voltage of SoC. */
 	/* Vsram should not exceed the maximum allowed voltage of SoC. */
 	new_vsram = min(new_vproc + MIN_VOLT_SHIFT, MAX_VOLT_LIMIT);
 	new_vsram = min(new_vproc + MIN_VOLT_SHIFT, MAX_VOLT_LIMIT);
 
 
@@ -72,7 +94,17 @@ static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info,
 		 */
 		 */
 		do {
 		do {
 			old_vsram = regulator_get_voltage(sram_reg);
 			old_vsram = regulator_get_voltage(sram_reg);
+			if (old_vsram < 0) {
+				pr_err("%s: invalid Vsram value: %d\n",
+				       __func__, old_vsram);
+				return old_vsram;
+			}
 			old_vproc = regulator_get_voltage(proc_reg);
 			old_vproc = regulator_get_voltage(proc_reg);
+			if (old_vproc < 0) {
+				pr_err("%s: invalid Vproc value: %d\n",
+				       __func__, old_vproc);
+				return old_vproc;
+			}
 
 
 			vsram = min(new_vsram, old_vproc + MAX_VOLT_SHIFT);
 			vsram = min(new_vsram, old_vproc + MAX_VOLT_SHIFT);
 
 
@@ -117,7 +149,17 @@ static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info,
 		 */
 		 */
 		do {
 		do {
 			old_vproc = regulator_get_voltage(proc_reg);
 			old_vproc = regulator_get_voltage(proc_reg);
+			if (old_vproc < 0) {
+				pr_err("%s: invalid Vproc value: %d\n",
+				       __func__, old_vproc);
+				return old_vproc;
+			}
 			old_vsram = regulator_get_voltage(sram_reg);
 			old_vsram = regulator_get_voltage(sram_reg);
+			if (old_vsram < 0) {
+				pr_err("%s: invalid Vsram value: %d\n",
+				       __func__, old_vsram);
+				return old_vsram;
+			}
 
 
 			vproc = max(new_vproc, old_vsram - MAX_VOLT_SHIFT);
 			vproc = max(new_vproc, old_vsram - MAX_VOLT_SHIFT);
 			ret = regulator_set_voltage(proc_reg, vproc,
 			ret = regulator_set_voltage(proc_reg, vproc,
@@ -185,6 +227,10 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 
 
 	old_freq_hz = clk_get_rate(cpu_clk);
 	old_freq_hz = clk_get_rate(cpu_clk);
 	old_vproc = regulator_get_voltage(info->proc_reg);
 	old_vproc = regulator_get_voltage(info->proc_reg);
+	if (old_vproc < 0) {
+		pr_err("%s: invalid Vproc value: %d\n", __func__, old_vproc);
+		return old_vproc;
+	}
 
 
 	freq_hz = freq_table[index].frequency * 1000;
 	freq_hz = freq_table[index].frequency * 1000;
 
 
@@ -375,6 +421,9 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
 	 */
 	 */
 	info->need_voltage_tracking = !IS_ERR(sram_reg);
 	info->need_voltage_tracking = !IS_ERR(sram_reg);
 
 
+	/* CPUs in the same cluster share a clock and power domain. */
+	cpumask_copy(&info->cpus, &cpu_topology[cpu].core_sibling);
+
 	return 0;
 	return 0;
 
 
 out_free_opp_table:
 out_free_opp_table:
@@ -413,22 +462,18 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy)
 	struct cpufreq_frequency_table *freq_table;
 	struct cpufreq_frequency_table *freq_table;
 	int ret;
 	int ret;
 
 
-	info = kzalloc(sizeof(*info), GFP_KERNEL);
-	if (!info)
-		return -ENOMEM;
-
-	ret = mtk_cpu_dvfs_info_init(info, policy->cpu);
-	if (ret) {
-		pr_err("%s failed to initialize dvfs info for cpu%d\n",
-		       __func__, policy->cpu);
-		goto out_free_dvfs_info;
+	info = mtk_cpu_dvfs_info_lookup(policy->cpu);
+	if (!info) {
+		pr_err("dvfs info for cpu%d is not initialized.\n",
+		       policy->cpu);
+		return -EINVAL;
 	}
 	}
 
 
 	ret = dev_pm_opp_init_cpufreq_table(info->cpu_dev, &freq_table);
 	ret = dev_pm_opp_init_cpufreq_table(info->cpu_dev, &freq_table);
 	if (ret) {
 	if (ret) {
 		pr_err("failed to init cpufreq table for cpu%d: %d\n",
 		pr_err("failed to init cpufreq table for cpu%d: %d\n",
 		       policy->cpu, ret);
 		       policy->cpu, ret);
-		goto out_release_dvfs_info;
+		return ret;
 	}
 	}
 
 
 	ret = cpufreq_table_validate_and_show(policy, freq_table);
 	ret = cpufreq_table_validate_and_show(policy, freq_table);
@@ -437,8 +482,7 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy)
 		goto out_free_cpufreq_table;
 		goto out_free_cpufreq_table;
 	}
 	}
 
 
-	/* CPUs in the same cluster share a clock and power domain. */
-	cpumask_copy(policy->cpus, &cpu_topology[policy->cpu].core_sibling);
+	cpumask_copy(policy->cpus, &info->cpus);
 	policy->driver_data = info;
 	policy->driver_data = info;
 	policy->clk = info->cpu_clk;
 	policy->clk = info->cpu_clk;
 
 
@@ -446,13 +490,6 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy)
 
 
 out_free_cpufreq_table:
 out_free_cpufreq_table:
 	dev_pm_opp_free_cpufreq_table(info->cpu_dev, &freq_table);
 	dev_pm_opp_free_cpufreq_table(info->cpu_dev, &freq_table);
-
-out_release_dvfs_info:
-	mtk_cpu_dvfs_info_release(info);
-
-out_free_dvfs_info:
-	kfree(info);
-
 	return ret;
 	return ret;
 }
 }
 
 
@@ -462,14 +499,13 @@ static int mtk_cpufreq_exit(struct cpufreq_policy *policy)
 
 
 	cpufreq_cooling_unregister(info->cdev);
 	cpufreq_cooling_unregister(info->cdev);
 	dev_pm_opp_free_cpufreq_table(info->cpu_dev, &policy->freq_table);
 	dev_pm_opp_free_cpufreq_table(info->cpu_dev, &policy->freq_table);
-	mtk_cpu_dvfs_info_release(info);
-	kfree(info);
 
 
 	return 0;
 	return 0;
 }
 }
 
 
 static struct cpufreq_driver mt8173_cpufreq_driver = {
 static struct cpufreq_driver mt8173_cpufreq_driver = {
-	.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
+	.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK |
+		 CPUFREQ_HAVE_GOVERNOR_PER_POLICY,
 	.verify = cpufreq_generic_frequency_table_verify,
 	.verify = cpufreq_generic_frequency_table_verify,
 	.target_index = mtk_cpufreq_set_target,
 	.target_index = mtk_cpufreq_set_target,
 	.get = cpufreq_generic_get,
 	.get = cpufreq_generic_get,
@@ -482,11 +518,47 @@ static struct cpufreq_driver mt8173_cpufreq_driver = {
 
 
 static int mt8173_cpufreq_probe(struct platform_device *pdev)
 static int mt8173_cpufreq_probe(struct platform_device *pdev)
 {
 {
-	int ret;
+	struct mtk_cpu_dvfs_info *info;
+	struct list_head *list, *tmp;
+	int cpu, ret;
+
+	for_each_possible_cpu(cpu) {
+		info = mtk_cpu_dvfs_info_lookup(cpu);
+		if (info)
+			continue;
+
+		info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
+		if (!info) {
+			ret = -ENOMEM;
+			goto release_dvfs_info_list;
+		}
+
+		ret = mtk_cpu_dvfs_info_init(info, cpu);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"failed to initialize dvfs info for cpu%d\n",
+				cpu);
+			goto release_dvfs_info_list;
+		}
+
+		list_add(&info->list_head, &dvfs_info_list);
+	}
 
 
 	ret = cpufreq_register_driver(&mt8173_cpufreq_driver);
 	ret = cpufreq_register_driver(&mt8173_cpufreq_driver);
-	if (ret)
-		pr_err("failed to register mtk cpufreq driver\n");
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register mtk cpufreq driver\n");
+		goto release_dvfs_info_list;
+	}
+
+	return 0;
+
+release_dvfs_info_list:
+	list_for_each_safe(list, tmp, &dvfs_info_list) {
+		info = list_entry(list, struct mtk_cpu_dvfs_info, list_head);
+
+		mtk_cpu_dvfs_info_release(info);
+		list_del(list);
+	}
 
 
 	return ret;
 	return ret;
 }
 }

+ 2 - 0
drivers/cpufreq/pcc-cpufreq.c

@@ -555,6 +555,8 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->min = policy->cpuinfo.min_freq =
 	policy->min = policy->cpuinfo.min_freq =
 		ioread32(&pcch_hdr->minimum_frequency) * 1000;
 		ioread32(&pcch_hdr->minimum_frequency) * 1000;
 
 
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+
 	pr_debug("init: policy->max is %d, policy->min is %d\n",
 	pr_debug("init: policy->max is %d, policy->min is %d\n",
 		policy->max, policy->min);
 		policy->max, policy->min);
 out:
 out:

+ 24 - 0
drivers/cpufreq/qoriq-cpufreq.c

@@ -12,6 +12,7 @@
 
 
 #include <linux/clk.h>
 #include <linux/clk.h>
 #include <linux/cpufreq.h>
 #include <linux/cpufreq.h>
+#include <linux/cpu_cooling.h>
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
@@ -33,6 +34,7 @@
 struct cpu_data {
 struct cpu_data {
 	struct clk **pclk;
 	struct clk **pclk;
 	struct cpufreq_frequency_table *table;
 	struct cpufreq_frequency_table *table;
+	struct thermal_cooling_device *cdev;
 };
 };
 
 
 /**
 /**
@@ -321,6 +323,27 @@ static int qoriq_cpufreq_target(struct cpufreq_policy *policy,
 	return clk_set_parent(policy->clk, parent);
 	return clk_set_parent(policy->clk, parent);
 }
 }
 
 
+
+static void qoriq_cpufreq_ready(struct cpufreq_policy *policy)
+{
+	struct cpu_data *cpud = policy->driver_data;
+	struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
+
+	if (of_find_property(np, "#cooling-cells", NULL)) {
+		cpud->cdev = of_cpufreq_cooling_register(np,
+							 policy->related_cpus);
+
+		if (IS_ERR(cpud->cdev)) {
+			pr_err("Failed to register cooling device cpu%d: %ld\n",
+					policy->cpu, PTR_ERR(cpud->cdev));
+
+			cpud->cdev = NULL;
+		}
+	}
+
+	of_node_put(np);
+}
+
 static struct cpufreq_driver qoriq_cpufreq_driver = {
 static struct cpufreq_driver qoriq_cpufreq_driver = {
 	.name		= "qoriq_cpufreq",
 	.name		= "qoriq_cpufreq",
 	.flags		= CPUFREQ_CONST_LOOPS,
 	.flags		= CPUFREQ_CONST_LOOPS,
@@ -329,6 +352,7 @@ static struct cpufreq_driver qoriq_cpufreq_driver = {
 	.verify		= cpufreq_generic_frequency_table_verify,
 	.verify		= cpufreq_generic_frequency_table_verify,
 	.target_index	= qoriq_cpufreq_target,
 	.target_index	= qoriq_cpufreq_target,
 	.get		= cpufreq_generic_get,
 	.get		= cpufreq_generic_get,
+	.ready		= qoriq_cpufreq_ready,
 	.attr		= cpufreq_generic_attr,
 	.attr		= cpufreq_generic_attr,
 };
 };
 
 

+ 294 - 0
drivers/cpufreq/sti-cpufreq.c

@@ -0,0 +1,294 @@
+/*
+ * Match running platform with pre-defined OPP values for CPUFreq
+ *
+ * Author: Ajit Pal Singh <ajitpal.singh@st.com>
+ *         Lee Jones <lee.jones@linaro.org>
+ *
+ * Copyright (C) 2015 STMicroelectronics (R&D) Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License as
+ * published by the Free Software Foundation
+ */
+
+#include <linux/cpu.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/pm_opp.h>
+#include <linux/regmap.h>
+
+#define VERSION_ELEMENTS	3
+#define MAX_PCODE_NAME_LEN	7
+
+#define VERSION_SHIFT		28
+#define HW_INFO_INDEX		1
+#define MAJOR_ID_INDEX		1
+#define MINOR_ID_INDEX		2
+
+/*
+ * Only match on "suitable for ALL versions" entries
+ *
+ * This will be used with the BIT() macro.  It sets the
+ * top bit of a 32bit value and is equal to 0x80000000.
+ */
+#define DEFAULT_VERSION		31
+
+enum {
+	PCODE = 0,
+	SUBSTRATE,
+	DVFS_MAX_REGFIELDS,
+};
+
+/**
+ * ST CPUFreq Driver Data
+ *
+ * @cpu_node		CPU's OF node
+ * @syscfg_eng		Engineering Syscon register map
+ * @regmap		Syscon register map
+ */
+static struct sti_cpufreq_ddata {
+	struct device *cpu;
+	struct regmap *syscfg_eng;
+	struct regmap *syscfg;
+} ddata;
+
+static int sti_cpufreq_fetch_major(void) {
+	struct device_node *np = ddata.cpu->of_node;
+	struct device *dev = ddata.cpu;
+	unsigned int major_offset;
+	unsigned int socid;
+	int ret;
+
+	ret = of_property_read_u32_index(np, "st,syscfg",
+					 MAJOR_ID_INDEX, &major_offset);
+	if (ret) {
+		dev_err(dev, "No major number offset provided in %s [%d]\n",
+			np->full_name, ret);
+		return ret;
+	}
+
+	ret = regmap_read(ddata.syscfg, major_offset, &socid);
+	if (ret) {
+		dev_err(dev, "Failed to read major number from syscon [%d]\n",
+			ret);
+		return ret;
+	}
+
+	return ((socid >> VERSION_SHIFT) & 0xf) + 1;
+}
+
+static int sti_cpufreq_fetch_minor(void)
+{
+	struct device *dev = ddata.cpu;
+	struct device_node *np = dev->of_node;
+	unsigned int minor_offset;
+	unsigned int minid;
+	int ret;
+
+	ret = of_property_read_u32_index(np, "st,syscfg-eng",
+					 MINOR_ID_INDEX, &minor_offset);
+	if (ret) {
+		dev_err(dev,
+			"No minor number offset provided %s [%d]\n",
+			np->full_name, ret);
+		return ret;
+	}
+
+	ret = regmap_read(ddata.syscfg_eng, minor_offset, &minid);
+	if (ret) {
+		dev_err(dev,
+			"Failed to read the minor number from syscon [%d]\n",
+			ret);
+		return ret;
+	}
+
+	return minid & 0xf;
+}
+
+static int sti_cpufreq_fetch_regmap_field(const struct reg_field *reg_fields,
+					  int hw_info_offset, int field)
+{
+	struct regmap_field *regmap_field;
+	struct reg_field reg_field = reg_fields[field];
+	struct device *dev = ddata.cpu;
+	unsigned int value;
+	int ret;
+
+	reg_field.reg = hw_info_offset;
+	regmap_field = devm_regmap_field_alloc(dev,
+					       ddata.syscfg_eng,
+					       reg_field);
+	if (IS_ERR(regmap_field)) {
+		dev_err(dev, "Failed to allocate reg field\n");
+		return PTR_ERR(regmap_field);
+	}
+
+	ret = regmap_field_read(regmap_field, &value);
+	if (ret) {
+		dev_err(dev, "Failed to read %s code\n",
+			field ? "SUBSTRATE" : "PCODE");
+		return ret;
+	}
+
+	return value;
+}
+
+static const struct reg_field sti_stih407_dvfs_regfields[DVFS_MAX_REGFIELDS] = {
+	[PCODE]		= REG_FIELD(0, 16, 19),
+	[SUBSTRATE]	= REG_FIELD(0, 0, 2),
+};
+
+static const struct reg_field *sti_cpufreq_match(void)
+{
+	if (of_machine_is_compatible("st,stih407") ||
+	    of_machine_is_compatible("st,stih410"))
+		return sti_stih407_dvfs_regfields;
+
+	return NULL;
+}
+
+static int sti_cpufreq_set_opp_info(void)
+{
+	struct device *dev = ddata.cpu;
+	struct device_node *np = dev->of_node;
+	const struct reg_field *reg_fields;
+	unsigned int hw_info_offset;
+	unsigned int version[VERSION_ELEMENTS];
+	int pcode, substrate, major, minor;
+	int ret;
+	char name[MAX_PCODE_NAME_LEN];
+
+	reg_fields = sti_cpufreq_match();
+	if (!reg_fields) {
+		dev_err(dev, "This SoC doesn't support voltage scaling");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32_index(np, "st,syscfg-eng",
+					 HW_INFO_INDEX, &hw_info_offset);
+	if (ret) {
+		dev_warn(dev, "Failed to read HW info offset from DT\n");
+		substrate = DEFAULT_VERSION;
+		pcode = 0;
+		goto use_defaults;
+	}
+
+	pcode = sti_cpufreq_fetch_regmap_field(reg_fields,
+					       hw_info_offset,
+					       PCODE);
+	if (pcode < 0) {
+		dev_warn(dev, "Failed to obtain process code\n");
+		/* Use default pcode */
+		pcode = 0;
+	}
+
+	substrate = sti_cpufreq_fetch_regmap_field(reg_fields,
+						   hw_info_offset,
+						   SUBSTRATE);
+	if (substrate) {
+		dev_warn(dev, "Failed to obtain substrate code\n");
+		/* Use default substrate */
+		substrate = DEFAULT_VERSION;
+	}
+
+use_defaults:
+	major = sti_cpufreq_fetch_major();
+	if (major < 0) {
+		dev_err(dev, "Failed to obtain major version\n");
+		/* Use default major number */
+		major = DEFAULT_VERSION;
+	}
+
+	minor = sti_cpufreq_fetch_minor();
+	if (minor < 0) {
+		dev_err(dev, "Failed to obtain minor version\n");
+		/* Use default minor number */
+		minor = DEFAULT_VERSION;
+	}
+
+	snprintf(name, MAX_PCODE_NAME_LEN, "pcode%d", pcode);
+
+	ret = dev_pm_opp_set_prop_name(dev, name);
+	if (ret) {
+		dev_err(dev, "Failed to set prop name\n");
+		return ret;
+	}
+
+	version[0] = BIT(major);
+	version[1] = BIT(minor);
+	version[2] = BIT(substrate);
+
+	ret = dev_pm_opp_set_supported_hw(dev, version, VERSION_ELEMENTS);
+	if (ret) {
+		dev_err(dev, "Failed to set supported hardware\n");
+		return ret;
+	}
+
+	dev_dbg(dev, "pcode: %d major: %d minor: %d substrate: %d\n",
+		pcode, major, minor, substrate);
+	dev_dbg(dev, "version[0]: %x version[1]: %x version[2]: %x\n",
+		version[0], version[1], version[2]);
+
+	return 0;
+}
+
+static int sti_cpufreq_fetch_syscon_regsiters(void)
+{
+	struct device *dev = ddata.cpu;
+	struct device_node *np = dev->of_node;
+
+	ddata.syscfg = syscon_regmap_lookup_by_phandle(np, "st,syscfg");
+	if (IS_ERR(ddata.syscfg)) {
+		dev_err(dev,  "\"st,syscfg\" not supplied\n");
+		return PTR_ERR(ddata.syscfg);
+	}
+
+	ddata.syscfg_eng = syscon_regmap_lookup_by_phandle(np, "st,syscfg-eng");
+	if (IS_ERR(ddata.syscfg_eng)) {
+		dev_err(dev, "\"st,syscfg-eng\" not supplied\n");
+		return PTR_ERR(ddata.syscfg_eng);
+	}
+
+	return 0;
+}
+
+static int sti_cpufreq_init(void)
+{
+	int ret;
+
+	ddata.cpu = get_cpu_device(0);
+	if (!ddata.cpu) {
+		dev_err(ddata.cpu, "Failed to get device for CPU0\n");
+		goto skip_voltage_scaling;
+	}
+
+	if (!of_get_property(ddata.cpu->of_node, "operating-points-v2", NULL)) {
+		dev_err(ddata.cpu, "OPP-v2 not supported\n");
+		goto skip_voltage_scaling;
+	}
+
+	ret = sti_cpufreq_fetch_syscon_regsiters();
+	if (ret)
+		goto skip_voltage_scaling;
+
+	ret = sti_cpufreq_set_opp_info();
+	if (!ret)
+		goto register_cpufreq_dt;
+
+skip_voltage_scaling:
+	dev_err(ddata.cpu, "Not doing voltage scaling\n");
+
+register_cpufreq_dt:
+	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
+
+	return 0;
+}
+module_init(sti_cpufreq_init);
+
+MODULE_DESCRIPTION("STMicroelectronics CPUFreq/OPP driver");
+MODULE_AUTHOR("Ajitpal Singh <ajitpal.singh@st.com>");
+MODULE_AUTHOR("Lee Jones <lee.jones@linaro.org>");
+MODULE_LICENSE("GPL v2");

+ 22 - 0
include/linux/pm_opp.h

@@ -55,6 +55,11 @@ int dev_pm_opp_enable(struct device *dev, unsigned long freq);
 int dev_pm_opp_disable(struct device *dev, unsigned long freq);
 int dev_pm_opp_disable(struct device *dev, unsigned long freq);
 
 
 struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev);
 struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev);
+int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
+				unsigned int count);
+void dev_pm_opp_put_supported_hw(struct device *dev);
+int dev_pm_opp_set_prop_name(struct device *dev, const char *name);
+void dev_pm_opp_put_prop_name(struct device *dev);
 #else
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
 {
@@ -129,6 +134,23 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
 {
 {
 	return ERR_PTR(-EINVAL);
 	return ERR_PTR(-EINVAL);
 }
 }
+
+static inline int dev_pm_opp_set_supported_hw(struct device *dev,
+					      const u32 *versions,
+					      unsigned int count)
+{
+	return -EINVAL;
+}
+
+static inline void dev_pm_opp_put_supported_hw(struct device *dev) {}
+
+static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
+{
+	return -EINVAL;
+}
+
+static inline void dev_pm_opp_put_prop_name(struct device *dev) {}
+
 #endif		/* CONFIG_PM_OPP */
 #endif		/* CONFIG_PM_OPP */
 
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)