Pārlūkot izejas kodu

Merge branch 'pm-cpufreq'

* pm-cpufreq: (28 commits)
  MAINTAINERS: cpufreq: add bmips-cpufreq.c
  cpufreq: CPPC: add ACPI_PROCESSOR dependency
  cpufreq: make ti-cpufreq explicitly non-modular
  cpufreq: Do not clear real_cpus mask on policy init
  cpufreq: dt: Don't use generic platdev driver for ti-cpufreq platforms
  cpufreq: ti: Add cpufreq driver to determine available OPPs at runtime
  Documentation: dt: add bindings for ti-cpufreq
  cpufreq: qoriq: Don't look at clock implementation details
  cpufreq: qoriq: add ARM64 SoCs support
  cpufreq: brcmstb-avs-cpufreq: remove unnecessary platform_set_drvdata()
  cpufreq: s3c2416: double free on driver init error path
  MIPS: BMIPS: enable CPUfreq
  cpufreq: bmips-cpufreq: CPUfreq driver for Broadcom's BMIPS SoCs
  BMIPS: Enable prerequisites for CPUfreq in MIPS Kconfig.
  MIPS: BMIPS: Update defconfig
  cpufreq: Fix typos in comments
  cpufreq: intel_pstate: Calculate guaranteed performance for HWP
  cpufreq: intel_pstate: Make HWP limits compatible with legacy
  cpufreq: intel_pstate: Lower frequency than expected under no_turbo
  cpufreq: intel_pstate: Operation mode control from sysfs
  ...
Rafael J. Wysocki 8 gadi atpakaļ
vecāks
revīzija
f6cbe34f52
36 mainītis faili ar 1408 papildinājumiem un 534 dzēšanām
  1. 13 11
      Documentation/cpu-freq/core.txt
  2. 99 78
      Documentation/cpu-freq/cpu-drivers.txt
  3. 6 18
      Documentation/cpu-freq/cpufreq-stats.txt
  4. 177 145
      Documentation/cpu-freq/governors.txt
  5. 17 6
      Documentation/cpu-freq/index.txt
  6. 15 0
      Documentation/cpu-freq/intel-pstate.txt
  7. 32 28
      Documentation/cpu-freq/user-guide.txt
  8. 128 0
      Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
  9. 7 0
      MAINTAINERS
  10. 1 1
      arch/arm/configs/exynos_defconfig
  11. 1 1
      arch/arm/configs/multi_v5_defconfig
  12. 1 1
      arch/arm/configs/multi_v7_defconfig
  13. 1 1
      arch/arm/configs/mvebu_v5_defconfig
  14. 1 1
      arch/arm/configs/pxa_defconfig
  15. 1 1
      arch/arm/configs/shmobile_defconfig
  16. 2 0
      arch/mips/Kconfig
  17. 10 6
      arch/mips/configs/bmips_stb_defconfig
  18. 0 1
      arch/mips/configs/lemote2f_defconfig
  19. 0 1
      arch/powerpc/configs/ppc6xx_defconfig
  20. 1 1
      arch/sh/configs/sh7785lcr_32bit_defconfig
  21. 1 3
      drivers/acpi/processor_perflib.c
  22. 11 9
      drivers/cpufreq/Kconfig
  23. 12 1
      drivers/cpufreq/Kconfig.arm
  24. 2 0
      drivers/cpufreq/Makefile
  25. 188 0
      drivers/cpufreq/bmips-cpufreq.c
  26. 0 2
      drivers/cpufreq/brcmstb-avs-cpufreq.c
  27. 0 2
      drivers/cpufreq/cpufreq-dt-platdev.c
  28. 5 16
      drivers/cpufreq/cpufreq.c
  29. 0 14
      drivers/cpufreq/cpufreq_stats.c
  30. 310 75
      drivers/cpufreq/intel_pstate.c
  31. 47 3
      drivers/cpufreq/powernv-cpufreq.c
  32. 0 3
      drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
  33. 49 99
      drivers/cpufreq/qoriq-cpufreq.c
  34. 0 1
      drivers/cpufreq/s3c2416-cpufreq.c
  35. 268 0
      drivers/cpufreq/ti-cpufreq.c
  36. 2 5
      include/linux/cpufreq.h

+ 13 - 11
Documentation/cpu-freq/core.txt

@@ -8,6 +8,8 @@
 
 		    Dominik Brodowski  <linux@brodo.de>
 		     David Kimdon <dwhedon@debian.org>
+		Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+		   Viresh Kumar <viresh.kumar@linaro.org>
 
 
 
@@ -36,10 +38,11 @@ speed limits (like LCD drivers on ARM architecture). Additionally, the
 kernel "constant" loops_per_jiffy is updated on frequency changes
 here.
 
-Reference counting is done by cpufreq_get_cpu and cpufreq_put_cpu,
-which make sure that the cpufreq processor driver is correctly
-registered with the core, and will not be unloaded until
-cpufreq_put_cpu is called.
+Reference counting of the cpufreq policies is done by cpufreq_cpu_get
+and cpufreq_cpu_put, which make sure that the cpufreq driver is
+correctly registered with the core, and will not be unloaded until
+cpufreq_put_cpu is called. That also ensures that the respective cpufreq
+policy doesn't get freed while being used.
 
 2. CPUFreq notifiers
 ====================
@@ -69,18 +72,16 @@ CPUFreq policy notifier is called twice for a policy transition:
 The phase is specified in the second argument to the notifier.
 
 The third argument, a void *pointer, points to a struct cpufreq_policy
-consisting of five values: cpu, min, max, policy and max_cpu_freq. min 
-and max are the lower and upper frequencies (in kHz) of the new
-policy, policy the new policy, cpu the number of the affected CPU; and 
-max_cpu_freq the maximum supported CPU frequency. This value is given 
-for informational purposes only.
+consisting of several values, including min, max (the lower and upper
+frequencies (in kHz) of the new policy).
 
 
 2.2 CPUFreq transition notifiers
 --------------------------------
 
-These are notified twice when the CPUfreq driver switches the CPU core
-frequency and this change has any external implications.
+These are notified twice for each online CPU in the policy, when the
+CPUfreq driver switches the CPU core frequency and this change has no
+any external implications.
 
 The second argument specifies the phase - CPUFREQ_PRECHANGE or
 CPUFREQ_POSTCHANGE.
@@ -90,6 +91,7 @@ values:
 cpu	- number of the affected CPU
 old	- old frequency
 new	- new frequency
+flags	- flags of the cpufreq driver
 
 3. CPUFreq Table Generation with Operating Performance Point (OPP)
 ==================================================================

+ 99 - 78
Documentation/cpu-freq/cpu-drivers.txt

@@ -9,6 +9,8 @@
 
 
 		    Dominik Brodowski  <linux@brodo.de>
+		Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+		   Viresh Kumar <viresh.kumar@linaro.org>
 
 
 
@@ -49,49 +51,65 @@ using cpufreq_register_driver()
 
 What shall this struct cpufreq_driver contain? 
 
-cpufreq_driver.name -		The name of this driver.
+ .name - The name of this driver.
 
-cpufreq_driver.init -		A pointer to the per-CPU initialization 
-				function.
+ .init - A pointer to the per-policy initialization function.
 
-cpufreq_driver.verify -		A pointer to a "verification" function.
+ .verify - A pointer to a "verification" function.
 
-cpufreq_driver.setpolicy _or_ 
-cpufreq_driver.target/
-target_index		-	See below on the differences.
+ .setpolicy _or_ .fast_switch _or_ .target _or_ .target_index - See
+ below on the differences.
 
 And optionally
 
-cpufreq_driver.exit -		A pointer to a per-CPU cleanup
-				function called during CPU_POST_DEAD
-				phase of cpu hotplug process.
+ .flags - Hints for the cpufreq core.
 
-cpufreq_driver.stop_cpu -	A pointer to a per-CPU stop function
-				called during CPU_DOWN_PREPARE phase of
-				cpu hotplug process.
+ .driver_data - cpufreq driver specific data.
 
-cpufreq_driver.resume -		A pointer to a per-CPU resume function
-				which is called with interrupts disabled
-				and _before_ the pre-suspend frequency
-				and/or policy is restored by a call to
-				->target/target_index or ->setpolicy.
+ .resolve_freq - Returns the most appropriate frequency for a target
+ frequency. Doesn't change the frequency though.
 
-cpufreq_driver.attr -		A pointer to a NULL-terminated list of
-				"struct freq_attr" which allow to
-				export values to sysfs.
+ .get_intermediate and target_intermediate - Used to switch to stable
+ frequency while changing CPU frequency.
 
-cpufreq_driver.get_intermediate
-and target_intermediate		Used to switch to stable frequency while
-				changing CPU frequency.
+ .get - Returns current frequency of the CPU.
+
+ .bios_limit - Returns HW/BIOS max frequency limitations for the CPU.
+
+ .exit - A pointer to a per-policy cleanup function called during
+ CPU_POST_DEAD phase of cpu hotplug process.
+
+ .stop_cpu - A pointer to a per-policy stop function called during
+ CPU_DOWN_PREPARE phase of cpu hotplug process.
+
+ .suspend - A pointer to a per-policy suspend function which is called
+ with interrupts disabled and _after_ the governor is stopped for the
+ policy.
+
+ .resume - A pointer to a per-policy resume function which is called
+ with interrupts disabled and _before_ the governor is started again.
+
+ .ready - A pointer to a per-policy ready function which is called after
+ the policy is fully initialized.
+
+ .attr - A pointer to a NULL-terminated list of "struct freq_attr" which
+ allow to export values to sysfs.
+
+ .boost_enabled - If set, boost frequencies are enabled.
+
+ .set_boost - A pointer to a per-policy function to enable/disable boost
+ frequencies.
 
 
 1.2 Per-CPU Initialization
 --------------------------
 
 Whenever a new CPU is registered with the device model, or after the
-cpufreq driver registers itself, the per-CPU initialization function 
-cpufreq_driver.init is called. It takes a struct cpufreq_policy
-*policy as argument. What to do now?
+cpufreq driver registers itself, the per-policy initialization function
+cpufreq_driver.init is called if no cpufreq policy existed for the CPU.
+Note that the .init() and .exit() routines are called only once for the
+policy and not for each CPU managed by the policy. It takes a struct
+cpufreq_policy *policy as argument. What to do now?
 
 If necessary, activate the CPUfreq support on your CPU.
 
@@ -117,47 +135,45 @@ policy->governor		must contain the "default policy" for
 				cpufreq_driver.setpolicy or
 				cpufreq_driver.target/target_index is called
 				with these values.
+policy->cpus			Update this with the masks of the
+				(online + offline) CPUs that do DVFS
+				along with this CPU (i.e.  that share
+				clock/voltage rails with it).
 
 For setting some of these values (cpuinfo.min[max]_freq, policy->min[max]), the
 frequency table helpers might be helpful. See the section 2 for more information
 on them.
 
-SMP systems normally have same clock source for a group of cpus. For these the
-.init() would be called only once for the first online cpu. Here the .init()
-routine must initialize policy->cpus with mask of all possible cpus (Online +
-Offline) that share the clock. Then the core would copy this mask onto
-policy->related_cpus and will reset policy->cpus to carry only online cpus.
-
 
 1.3 verify
-------------
+----------
 
 When the user decides a new policy (consisting of
 "policy,governor,min,max") shall be set, this policy must be validated
 so that incompatible values can be corrected. For verifying these
-values, a frequency table helper and/or the
-cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned
-int min_freq, unsigned int max_freq) function might be helpful. See
-section 2 for details on frequency table helpers.
+values cpufreq_verify_within_limits(struct cpufreq_policy *policy,
+unsigned int min_freq, unsigned int max_freq) function might be helpful.
+See section 2 for details on frequency table helpers.
 
 You need to make sure that at least one valid frequency (or operating
 range) is within policy->min and policy->max. If necessary, increase
 policy->max first, and only if this is no solution, decrease policy->min.
 
 
-1.4 target/target_index or setpolicy?
-----------------------------
+1.4 target or target_index or setpolicy or fast_switch?
+-------------------------------------------------------
 
 Most cpufreq drivers or even most cpu frequency scaling algorithms 
-only allow the CPU to be set to one frequency. For these, you use the
-->target/target_index call.
+only allow the CPU frequency to be set to predefined fixed values. For
+these, you use the ->target(), ->target_index() or ->fast_switch()
+callbacks.
 
-Some cpufreq-capable processors switch the frequency between certain
-limits on their own. These shall use the ->setpolicy call
+Some cpufreq capable processors switch the frequency between certain
+limits on their own. These shall use the ->setpolicy() callback.
 
 
 1.5. target/target_index
--------------
+------------------------
 
 The target_index call has two arguments: struct cpufreq_policy *policy,
 and unsigned int index (into the exposed frequency table).
@@ -186,9 +202,20 @@ actual frequency must be determined using the following rules:
 Here again the frequency table helper might assist you - see section 2
 for details.
 
+1.6. fast_switch
+----------------
 
-1.6 setpolicy
----------------
+This function is used for frequency switching from scheduler's context.
+Not all drivers are expected to implement it, as sleeping from within
+this callback isn't allowed. This callback must be highly optimized to
+do switching as fast as possible.
+
+This function has two arguments: struct cpufreq_policy *policy and
+unsigned int target_frequency.
+
+
+1.7 setpolicy
+-------------
 
 The setpolicy call only takes a struct cpufreq_policy *policy as
 argument. You need to set the lower limit of the in-processor or
@@ -198,7 +225,7 @@ setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a
 powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check
 the reference implementation in drivers/cpufreq/longrun.c
 
-1.7 get_intermediate and target_intermediate
+1.8 get_intermediate and target_intermediate
 --------------------------------------------
 
 Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION unset.
@@ -222,42 +249,36 @@ failures as core would send notifications for that.
 
 As most cpufreq processors only allow for being set to a few specific
 frequencies, a "frequency table" with some functions might assist in
-some work of the processor driver. Such a "frequency table" consists
-of an array of struct cpufreq_frequency_table entries, with any value in
-"driver_data" you want to use, and the corresponding frequency in
-"frequency". At the end of the table, you need to add a
-cpufreq_frequency_table entry with frequency set to CPUFREQ_TABLE_END. And
-if you want to skip one entry in the table, set the frequency to 
-CPUFREQ_ENTRY_INVALID. The entries don't need to be in ascending
-order.
-
-By calling cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
-					struct cpufreq_frequency_table *table);
-the cpuinfo.min_freq and cpuinfo.max_freq values are detected, and
-policy->min and policy->max are set to the same values. This is
-helpful for the per-CPU initialization stage.
-
-int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
-                                   struct cpufreq_frequency_table *table);
-assures that at least one valid frequency is within policy->min and
-policy->max, and all other criteria are met. This is helpful for the
-->verify call.
-
-int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
-                                   unsigned int target_freq,
-                                   unsigned int relation);
-
-is the corresponding frequency table helper for the ->target
-stage. Just pass the values to this function, and this function
-returns the number of the frequency table entry which contains
-the frequency the CPU shall be set to.
+some work of the processor driver. Such a "frequency table" consists of
+an array of struct cpufreq_frequency_table entries, with driver specific
+values in "driver_data", the corresponding frequency in "frequency" and
+flags set. At the end of the table, you need to add a
+cpufreq_frequency_table entry with frequency set to CPUFREQ_TABLE_END.
+And if you want to skip one entry in the table, set the frequency to
+CPUFREQ_ENTRY_INVALID. The entries don't need to be in sorted in any
+particular order, but if they are cpufreq core will do DVFS a bit
+quickly for them as search for best match is faster.
+
+By calling cpufreq_table_validate_and_show(), the cpuinfo.min_freq and
+cpuinfo.max_freq values are detected, and policy->min and policy->max
+are set to the same values. This is helpful for the per-CPU
+initialization stage.
+
+cpufreq_frequency_table_verify() assures that at least one valid
+frequency is within policy->min and policy->max, and all other criteria
+are met. This is helpful for the ->verify call.
+
+cpufreq_frequency_table_target() is the corresponding frequency table
+helper for the ->target stage. Just pass the values to this function,
+and this function returns the of the frequency table entry which
+contains the frequency the CPU shall be set to.
 
 The following macros can be used as iterators over cpufreq_frequency_table:
 
 cpufreq_for_each_entry(pos, table) - iterates over all entries of frequency
 table.
 
-cpufreq-for_each_valid_entry(pos, table) - iterates over all entries,
+cpufreq_for_each_valid_entry(pos, table) - iterates over all entries,
 excluding CPUFREQ_ENTRY_INVALID frequencies.
 Use arguments "pos" - a cpufreq_frequency_table * as a loop cursor and
 "table" - the cpufreq_frequency_table * you want to iterate over.

+ 6 - 18
Documentation/cpu-freq/cpufreq-stats.txt

@@ -34,10 +34,10 @@ cpufreq stats provides following statistics (explained in detail below).
 -  total_trans
 -  trans_table
 
-All the statistics will be from the time the stats driver has been inserted 
-to the time when a read of a particular statistic is done. Obviously, stats 
-driver will not have any information about the frequency transitions before
-the stats driver insertion.
+All the statistics will be from the time the stats driver has been inserted
+(or the time the stats were reset) to the time when a read of a particular
+statistic is done. Obviously, stats driver will not have any information
+about the frequency transitions before the stats driver insertion.
 
 --------------------------------------------------------------------------------
 <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # ls -l
@@ -110,25 +110,13 @@ Config Main Menu
 		CPU Frequency scaling  --->
 			[*] CPU Frequency scaling
 			[*]   CPU frequency translation statistics
-			[*]     CPU frequency translation statistics details
 
 
 "CPU Frequency scaling" (CONFIG_CPU_FREQ) should be enabled to configure
 cpufreq-stats.
 
 "CPU frequency translation statistics" (CONFIG_CPU_FREQ_STAT) provides the
-basic statistics which includes time_in_state and total_trans.
+statistics which includes time_in_state, total_trans and trans_table.
 
-"CPU frequency translation statistics details" (CONFIG_CPU_FREQ_STAT_DETAILS)
-provides fine grained cpufreq stats by trans_table. The reason for having a
-separate config option for trans_table is:
-- trans_table goes against the traditional /sysfs rule of one value per
-  interface. It provides a whole bunch of value in a 2 dimensional matrix
-  form.
-
-Once these two options are enabled and your CPU supports cpufrequency, you
+Once this option is enabled and your CPU supports cpufrequency, you
 will be able to see the CPU frequency statistics in /sysfs.
-
-
-
-

+ 177 - 145
Documentation/cpu-freq/governors.txt

@@ -10,6 +10,8 @@
 
 		    Dominik Brodowski  <linux@brodo.de>
             some additions and corrections by Nico Golde <nico@ngolde.de>
+		Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+		   Viresh Kumar <viresh.kumar@linaro.org>
 
 
 
@@ -28,32 +30,27 @@ Contents:
 2.3  Userspace
 2.4  Ondemand
 2.5  Conservative
+2.6  Schedutil
 
 3.   The Governor Interface in the CPUfreq Core
 
+4.   References
 
 
 1. What Is A CPUFreq Governor?
 ==============================
 
 Most cpufreq drivers (except the intel_pstate and longrun) or even most
-cpu frequency scaling algorithms only offer the CPU to be set to one
-frequency. In order to offer dynamic frequency scaling, the cpufreq
-core must be able to tell these drivers of a "target frequency". So
-these specific drivers will be transformed to offer a "->target/target_index"
-call instead of the existing "->setpolicy" call. For "longrun", all
-stays the same, though.
+cpu frequency scaling algorithms only allow the CPU frequency to be set
+to predefined fixed values.  In order to offer dynamic frequency
+scaling, the cpufreq core must be able to tell these drivers of a
+"target frequency". So these specific drivers will be transformed to
+offer a "->target/target_index/fast_switch()" call instead of the
+"->setpolicy()" call. For set_policy drivers, all stays the same,
+though.
 
 How to decide what frequency within the CPUfreq policy should be used?
-That's done using "cpufreq governors". Two are already in this patch
--- they're the already existing "powersave" and "performance" which
-set the frequency statically to the lowest or highest frequency,
-respectively. At least two more such governors will be ready for
-addition in the near future, but likely many more as there are various
-different theories and models about dynamic frequency scaling
-around. Using such a generic interface as cpufreq offers to scaling
-governors, these can be tested extensively, and the best one can be
-selected for each specific use.
+That's done using "cpufreq governors".
 
 Basically, it's the following flow graph:
 
@@ -71,7 +68,7 @@ CPU can be set to switch independently	 |	   CPU can only be set
 		    /			       the limits of policy->{min,max}
 		   /			            \
 		  /				     \
-	Using the ->setpolicy call,		 Using the ->target/target_index call,
+	Using the ->setpolicy call,		 Using the ->target/target_index/fast_switch call,
 	    the limits and the			  the frequency closest
 	     "policy" is set.			  to target_freq is set.
 						  It is assured that it
@@ -109,114 +106,159 @@ directory.
 2.4 Ondemand
 ------------
 
-The CPUfreq governor "ondemand" sets the CPU depending on the
-current usage. To do this the CPU must have the capability to
-switch the frequency very quickly.  There are a number of sysfs file
-accessible parameters:
-
-sampling_rate: measured in uS (10^-6 seconds), this is how often you
-want the kernel to look at the CPU usage and to make decisions on
-what to do about the frequency.  Typically this is set to values of
-around '10000' or more. It's default value is (cmp. with users-guide.txt):
-transition_latency * 1000
-Be aware that transition latency is in ns and sampling_rate is in us, so you
-get the same sysfs value by default.
-Sampling rate should always get adjusted considering the transition latency
-To set the sampling rate 750 times as high as the transition latency
-in the bash (as said, 1000 is default), do:
-echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) \
-    >ondemand/sampling_rate
-
-sampling_rate_min:
-The sampling rate is limited by the HW transition latency:
-transition_latency * 100
-Or by kernel restrictions:
-If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed.
-If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is used, the
-limits depend on the CONFIG_HZ option:
-HZ=1000: min=20000us  (20ms)
-HZ=250:  min=80000us  (80ms)
-HZ=100:  min=200000us (200ms)
-The highest value of kernel and HW latency restrictions is shown and
-used as the minimum sampling rate.
-
-up_threshold: defines what the average CPU usage between the samplings
-of 'sampling_rate' needs to be for the kernel to make a decision on
-whether it should increase the frequency.  For example when it is set
-to its default value of '95' it means that between the checking
-intervals the CPU needs to be on average more than 95% in use to then
-decide that the CPU frequency needs to be increased.  
-
-ignore_nice_load: this parameter takes a value of '0' or '1'. When
-set to '0' (its default), all processes are counted towards the
-'cpu utilisation' value.  When set to '1', the processes that are
-run with a 'nice' value will not count (and thus be ignored) in the
-overall usage calculation.  This is useful if you are running a CPU
-intensive calculation on your laptop that you do not care how long it
-takes to complete as you can 'nice' it and prevent it from taking part
-in the deciding process of whether to increase your CPU frequency.
-
-sampling_down_factor: this parameter controls the rate at which the
-kernel makes a decision on when to decrease the frequency while running
-at top speed. When set to 1 (the default) decisions to reevaluate load
-are made at the same interval regardless of current clock speed. But
-when set to greater than 1 (e.g. 100) it acts as a multiplier for the
-scheduling interval for reevaluating load when the CPU is at its top
-speed due to high load. This improves performance by reducing the overhead
-of load evaluation and helping the CPU stay at its top speed when truly
-busy, rather than shifting back and forth in speed. This tunable has no
-effect on behavior at lower speeds/lower CPU loads.
-
-powersave_bias: this parameter takes a value between 0 to 1000. It
-defines the percentage (times 10) value of the target frequency that
-will be shaved off of the target. For example, when set to 100 -- 10%,
-when ondemand governor would have targeted 1000 MHz, it will target
-1000 MHz - (10% of 1000 MHz) = 900 MHz instead. This is set to 0
-(disabled) by default.
-When AMD frequency sensitivity powersave bias driver --
-drivers/cpufreq/amd_freq_sensitivity.c is loaded, this parameter
-defines the workload frequency sensitivity threshold in which a lower
-frequency is chosen instead of ondemand governor's original target.
-The frequency sensitivity is a hardware reported (on AMD Family 16h
-Processors and above) value between 0 to 100% that tells software how
-the performance of the workload running on a CPU will change when
-frequency changes. A workload with sensitivity of 0% (memory/IO-bound)
-will not perform any better on higher core frequency, whereas a
-workload with sensitivity of 100% (CPU-bound) will perform better
-higher the frequency. When the driver is loaded, this is set to 400
-by default -- for CPUs running workloads with sensitivity value below
-40%, a lower frequency is chosen. Unloading the driver or writing 0
-will disable this feature.
+The CPUfreq governor "ondemand" sets the CPU frequency depending on the
+current system load. Load estimation is triggered by the scheduler
+through the update_util_data->func hook; when triggered, cpufreq checks
+the CPU-usage statistics over the last period and the governor sets the
+CPU accordingly.  The CPU must have the capability to switch the
+frequency very quickly.
+
+Sysfs files:
+
+* sampling_rate:
+
+  Measured in uS (10^-6 seconds), this is how often you want the kernel
+  to look at the CPU usage and to make decisions on what to do about the
+  frequency.  Typically this is set to values of around '10000' or more.
+  It's default value is (cmp. with users-guide.txt): transition_latency
+  * 1000.  Be aware that transition latency is in ns and sampling_rate
+  is in us, so you get the same sysfs value by default.  Sampling rate
+  should always get adjusted considering the transition latency to set
+  the sampling rate 750 times as high as the transition latency in the
+  bash (as said, 1000 is default), do:
+
+  $ echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) > ondemand/sampling_rate
+
+* sampling_rate_min:
+
+  The sampling rate is limited by the HW transition latency:
+  transition_latency * 100
+
+  Or by kernel restrictions:
+  - If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed.
+  - If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is
+    used, the limits depend on the CONFIG_HZ option:
+    HZ=1000: min=20000us  (20ms)
+    HZ=250:  min=80000us  (80ms)
+    HZ=100:  min=200000us (200ms)
+
+  The highest value of kernel and HW latency restrictions is shown and
+  used as the minimum sampling rate.
+
+* up_threshold:
+
+  This defines what the average CPU usage between the samplings of
+  'sampling_rate' needs to be for the kernel to make a decision on
+  whether it should increase the frequency.  For example when it is set
+  to its default value of '95' it means that between the checking
+  intervals the CPU needs to be on average more than 95% in use to then
+  decide that the CPU frequency needs to be increased.
+
+* ignore_nice_load:
+
+  This parameter takes a value of '0' or '1'. When set to '0' (its
+  default), all processes are counted towards the 'cpu utilisation'
+  value.  When set to '1', the processes that are run with a 'nice'
+  value will not count (and thus be ignored) in the overall usage
+  calculation.  This is useful if you are running a CPU intensive
+  calculation on your laptop that you do not care how long it takes to
+  complete as you can 'nice' it and prevent it from taking part in the
+  deciding process of whether to increase your CPU frequency.
+
+* sampling_down_factor:
+
+  This parameter controls the rate at which the kernel makes a decision
+  on when to decrease the frequency while running at top speed. When set
+  to 1 (the default) decisions to reevaluate load are made at the same
+  interval regardless of current clock speed. But when set to greater
+  than 1 (e.g. 100) it acts as a multiplier for the scheduling interval
+  for reevaluating load when the CPU is at its top speed due to high
+  load. This improves performance by reducing the overhead of load
+  evaluation and helping the CPU stay at its top speed when truly busy,
+  rather than shifting back and forth in speed. This tunable has no
+  effect on behavior at lower speeds/lower CPU loads.
+
+* powersave_bias:
+
+  This parameter takes a value between 0 to 1000. It defines the
+  percentage (times 10) value of the target frequency that will be
+  shaved off of the target. For example, when set to 100 -- 10%, when
+  ondemand governor would have targeted 1000 MHz, it will target
+  1000 MHz - (10% of 1000 MHz) = 900 MHz instead. This is set to 0
+  (disabled) by default.
+
+  When AMD frequency sensitivity powersave bias driver --
+  drivers/cpufreq/amd_freq_sensitivity.c is loaded, this parameter
+  defines the workload frequency sensitivity threshold in which a lower
+  frequency is chosen instead of ondemand governor's original target.
+  The frequency sensitivity is a hardware reported (on AMD Family 16h
+  Processors and above) value between 0 to 100% that tells software how
+  the performance of the workload running on a CPU will change when
+  frequency changes. A workload with sensitivity of 0% (memory/IO-bound)
+  will not perform any better on higher core frequency, whereas a
+  workload with sensitivity of 100% (CPU-bound) will perform better
+  higher the frequency. When the driver is loaded, this is set to 400 by
+  default -- for CPUs running workloads with sensitivity value below
+  40%, a lower frequency is chosen. Unloading the driver or writing 0
+  will disable this feature.
 
 
 2.5 Conservative
 ----------------
 
 The CPUfreq governor "conservative", much like the "ondemand"
-governor, sets the CPU depending on the current usage.  It differs in
-behaviour in that it gracefully increases and decreases the CPU speed
-rather than jumping to max speed the moment there is any load on the
-CPU.  This behaviour more suitable in a battery powered environment.
-The governor is tweaked in the same manner as the "ondemand" governor
-through sysfs with the addition of:
-
-freq_step: this describes what percentage steps the cpu freq should be
-increased and decreased smoothly by.  By default the cpu frequency will
-increase in 5% chunks of your maximum cpu frequency.  You can change this
-value to anywhere between 0 and 100 where '0' will effectively lock your
-CPU at a speed regardless of its load whilst '100' will, in theory, make
-it behave identically to the "ondemand" governor.
-
-down_threshold: same as the 'up_threshold' found for the "ondemand"
-governor but for the opposite direction.  For example when set to its
-default value of '20' it means that if the CPU usage needs to be below
-20% between samples to have the frequency decreased.
-
-sampling_down_factor: similar functionality as in "ondemand" governor.
-But in "conservative", it controls the rate at which the kernel makes
-a decision on when to decrease the frequency while running in any
-speed. Load for frequency increase is still evaluated every
-sampling rate.
+governor, sets the CPU frequency depending on the current usage.  It
+differs in behaviour in that it gracefully increases and decreases the
+CPU speed rather than jumping to max speed the moment there is any load
+on the CPU. This behaviour is more suitable in a battery powered
+environment.  The governor is tweaked in the same manner as the
+"ondemand" governor through sysfs with the addition of:
+
+* freq_step:
+
+  This describes what percentage steps the cpu freq should be increased
+  and decreased smoothly by.  By default the cpu frequency will increase
+  in 5% chunks of your maximum cpu frequency.  You can change this value
+  to anywhere between 0 and 100 where '0' will effectively lock your CPU
+  at a speed regardless of its load whilst '100' will, in theory, make
+  it behave identically to the "ondemand" governor.
+
+* down_threshold:
+
+  Same as the 'up_threshold' found for the "ondemand" governor but for
+  the opposite direction.  For example when set to its default value of
+  '20' it means that if the CPU usage needs to be below 20% between
+  samples to have the frequency decreased.
+
+* sampling_down_factor:
+
+  Similar functionality as in "ondemand" governor.  But in
+  "conservative", it controls the rate at which the kernel makes a
+  decision on when to decrease the frequency while running in any speed.
+  Load for frequency increase is still evaluated every sampling rate.
+
+
+2.6 Schedutil
+-------------
+
+The "schedutil" governor aims at better integration with the Linux
+kernel scheduler.  Load estimation is achieved through the scheduler's
+Per-Entity Load Tracking (PELT) mechanism, which also provides
+information about the recent load [1].  This governor currently does
+load based DVFS only for tasks managed by CFS. RT and DL scheduler tasks
+are always run at the highest frequency.  Unlike all the other
+governors, the code is located under the kernel/sched/ directory.
+
+Sysfs files:
+
+* rate_limit_us:
+
+  This contains a value in microseconds. The governor waits for
+  rate_limit_us time before reevaluating the load again, after it has
+  evaluated the load once.
+
+For an in-depth comparison with the other governors refer to [2].
+
 
 3. The Governor Interface in the CPUfreq Core
 =============================================
@@ -225,26 +267,10 @@ A new governor must register itself with the CPUfreq core using
 "cpufreq_register_governor". The struct cpufreq_governor, which has to
 be passed to that function, must contain the following values:
 
-governor->name -	    A unique name for this governor
-governor->governor -	    The governor callback function
-governor->owner	-	    .THIS_MODULE for the governor module (if 
-			    appropriate)
-
-The governor->governor callback is called with the current (or to-be-set)
-cpufreq_policy struct for that CPU, and an unsigned int event. The
-following events are currently defined:
-
-CPUFREQ_GOV_START:   This governor shall start its duty for the CPU
-		     policy->cpu
-CPUFREQ_GOV_STOP:    This governor shall end its duty for the CPU
-		     policy->cpu
-CPUFREQ_GOV_LIMITS:  The limits for CPU policy->cpu have changed to
-		     policy->min and policy->max.
-
-If you need other "events" externally of your driver, _only_ use the
-cpufreq_governor_l(unsigned int cpu, unsigned int event) call to the
-CPUfreq core to ensure proper locking.
+governor->name - A unique name for this governor.
+governor->owner - .THIS_MODULE for the governor module (if appropriate).
 
+plus a set of hooks to the functions implementing the governor's logic.
 
 The CPUfreq governor may call the CPU processor driver using one of
 these two functions:
@@ -258,12 +284,18 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
                                    unsigned int relation);
 
 target_freq must be within policy->min and policy->max, of course.
-What's the difference between these two functions? When your governor
-still is in a direct code path of a call to governor->governor, the
-per-CPU cpufreq lock is still held in the cpufreq core, and there's
-no need to lock it again (in fact, this would cause a deadlock). So
-use __cpufreq_driver_target only in these cases. In all other cases 
-(for example, when there's a "daemonized" function that wakes up 
-every second), use cpufreq_driver_target to lock the cpufreq per-CPU
-lock before the command is passed to the cpufreq processor driver.
+What's the difference between these two functions? When your governor is
+in a direct code path of a call to governor callbacks, like
+governor->start(), the policy->rwsem is still held in the cpufreq core,
+and there's no need to lock it again (in fact, this would cause a
+deadlock). So use __cpufreq_driver_target only in these cases. In all
+other cases (for example, when there's a "daemonized" function that
+wakes up every second), use cpufreq_driver_target to take policy->rwsem
+before the command is passed to the cpufreq driver.
+
+4. References
+=============
+
+[1] Per-entity load tracking: https://lwn.net/Articles/531853/
+[2] Improvements in CPU frequency management: https://lwn.net/Articles/682391/
 

+ 17 - 6
Documentation/cpu-freq/index.txt

@@ -18,16 +18,29 @@
 
 Documents in this directory:
 ----------------------------
+
+amd-powernow.txt -	AMD powernow driver specific file.
+
+boost.txt -		Frequency boosting support.
+
 core.txt	-	General description of the CPUFreq core and
-			of CPUFreq notifiers
+			of CPUFreq notifiers.
+
+cpu-drivers.txt -	How to implement a new cpufreq processor driver.
 
-cpu-drivers.txt -	How to implement a new cpufreq processor driver
+cpufreq-nforce2.txt -	nVidia nForce2 platform specific file.
+
+cpufreq-stats.txt -	General description of sysfs cpufreq stats.
 
 governors.txt	-	What are cpufreq governors and how to
 			implement them?
 
 index.txt	-	File index, Mailing list and Links (this document)
 
+intel-pstate.txt -	Intel pstate cpufreq driver specific file.
+
+pcc-cpufreq.txt -	PCC cpufreq driver specific file.
+
 user-guide.txt	-	User Guide to CPUFreq
 
 
@@ -35,9 +48,7 @@ Mailing List
 ------------
 There is a CPU frequency changing CVS commit and general list where
 you can report bugs, problems or submit patches. To post a message,
-send an email to linux-pm@vger.kernel.org, to subscribe go to
-http://vger.kernel.org/vger-lists.html#linux-pm and follow the
-instructions there.
+send an email to linux-pm@vger.kernel.org.
 
 Links
 -----
@@ -48,7 +59,7 @@ how to access the CVS repository:
 * http://cvs.arm.linux.org.uk/
 
 the CPUFreq Mailing list:
-* http://vger.kernel.org/vger-lists.html#cpufreq
+* http://vger.kernel.org/vger-lists.html#linux-pm
 
 Clock and voltage scaling for the SA-1100:
 * http://www.lartmaker.nl/projects/scaling

+ 15 - 0
Documentation/cpu-freq/intel-pstate.txt

@@ -85,6 +85,21 @@ Sysfs will show :
 Refer to "Intel® 64 and IA-32 Architectures Software Developer’s Manual
 Volume 3: System Programming Guide" to understand ratios.
 
+There is one more sysfs attribute in /sys/devices/system/cpu/intel_pstate/
+that can be used for controlling the operation mode of the driver:
+
+      status: Three settings are possible:
+      "off"     - The driver is not in use at this time.
+      "active"  - The driver works as a P-state governor (default).
+      "passive" - The driver works as a regular cpufreq one and collaborates
+                  with the generic cpufreq governors (it sets P-states as
+                  requested by those governors).
+      The current setting is returned by reads from this attribute.  Writing one
+      of the above strings to it changes the operation mode as indicated by that
+      string, if possible.  If HW-managed P-states (HWP) are enabled, it is not
+      possible to change the driver's operation mode and attempts to write to
+      this attribute will fail.
+
 cpufreq sysfs for Intel P-State
 
 Since this driver registers with cpufreq, cpufreq sysfs is also presented.

+ 32 - 28
Documentation/cpu-freq/user-guide.txt

@@ -18,7 +18,7 @@
 Contents:
 ---------
 1. Supported Architectures and Processors
-1.1 ARM
+1.1 ARM and ARM64
 1.2 x86
 1.3 sparc64
 1.4 ppc
@@ -37,16 +37,10 @@ Contents:
 1. Supported Architectures and Processors
 =========================================
 
-1.1 ARM
--------
-
-The following ARM processors are supported by cpufreq:
-
-ARM Integrator
-ARM-SA1100
-ARM-SA1110
-Intel PXA
+1.1 ARM and ARM64
+-----------------
 
+Almost all ARM and ARM64 platforms support CPU frequency scaling.
 
 1.2 x86
 -------
@@ -69,6 +63,7 @@ Transmeta Crusoe
 Transmeta Efficeon
 VIA Cyrix 3 / C3
 various processors on some ACPI 2.0-compatible systems [*]
+And many more
 
 [*] Only if "ACPI Processor Performance States" are available
 to the ACPI<->BIOS interface.
@@ -147,10 +142,19 @@ mounted it at /sys, the cpufreq interface is located in a subdirectory
 "cpufreq" within the cpu-device directory
 (e.g. /sys/devices/system/cpu/cpu0/cpufreq/ for the first CPU).
 
+affected_cpus :			List of Online CPUs that require software
+				coordination of frequency.
+
+cpuinfo_cur_freq :		Current frequency of the CPU as obtained from
+				the hardware, in KHz. This is the frequency
+				the CPU actually runs at.
+
 cpuinfo_min_freq :		this file shows the minimum operating
 				frequency the processor can run at(in kHz) 
+
 cpuinfo_max_freq :		this file shows the maximum operating
 				frequency the processor can run at(in kHz) 
+
 cpuinfo_transition_latency	The time it takes on this CPU to
 				switch between two frequencies in nano
 				seconds. If unknown or known to be
@@ -163,25 +167,30 @@ cpuinfo_transition_latency	The time it takes on this CPU to
 				userspace daemon. Make sure to not
 				switch the frequency too often
 				resulting in performance loss.
-scaling_driver :		this file shows what cpufreq driver is
-				used to set the frequency on this CPU
+
+related_cpus :			List of Online + Offline CPUs that need software
+				coordination of frequency.
+
+scaling_available_frequencies : List of available frequencies, in KHz.
 
 scaling_available_governors :	this file shows the CPUfreq governors
 				available in this kernel. You can see the
 				currently activated governor in
 
+scaling_cur_freq :		Current frequency of the CPU as determined by
+				the governor and cpufreq core, in KHz. This is
+				the frequency the kernel thinks the CPU runs
+				at.
+
+scaling_driver :		this file shows what cpufreq driver is
+				used to set the frequency on this CPU
+
 scaling_governor,		and by "echoing" the name of another
 				governor you can change it. Please note
 				that some governors won't load - they only
 				work on some specific architectures or
 				processors.
 
-cpuinfo_cur_freq :		Current frequency of the CPU as obtained from
-				the hardware, in KHz. This is the frequency
-				the CPU actually runs at.
-
-scaling_available_frequencies : List of available frequencies, in KHz.
-
 scaling_min_freq and
 scaling_max_freq		show the current "policy limits" (in
 				kHz). By echoing new values into these
@@ -190,16 +199,11 @@ scaling_max_freq		show the current "policy limits" (in
 				first set scaling_max_freq, then
 				scaling_min_freq.
 
-affected_cpus :			List of Online CPUs that require software
-				coordination of frequency.
-
-related_cpus :			List of Online + Offline CPUs that need software
-				coordination of frequency.
-
-scaling_cur_freq :		Current frequency of the CPU as determined by
-				the governor and cpufreq core, in KHz. This is
-				the frequency the kernel thinks the CPU runs
-				at.
+scaling_setspeed		This can be read to get the currently programmed
+				value by the governor. This can be written to
+				change the current frequency for a group of
+				CPUs, represented by a policy. This is supported
+				currently only by the userspace governor.
 
 bios_limit :			If the BIOS tells the OS to limit a CPU to
 				lower frequencies, the user can read out the

+ 128 - 0
Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt

@@ -0,0 +1,128 @@
+TI CPUFreq and OPP bindings
+================================
+
+Certain TI SoCs, like those in the am335x, am437x, am57xx, and dra7xx
+families support different OPPs depending on the silicon variant in use.
+The ti-cpufreq driver can use revision and an efuse value from the SoC to
+provide the OPP framework with supported hardware information. This is
+used to determine which OPPs from the operating-points-v2 table get enabled
+when it is parsed by the OPP framework.
+
+Required properties:
+--------------------
+In 'cpus' nodes:
+- operating-points-v2: Phandle to the operating-points-v2 table to use.
+
+In 'operating-points-v2' table:
+- compatible: Should be
+	- 'operating-points-v2-ti-cpu' for am335x, am43xx, and dra7xx/am57xx SoCs
+- syscon: A phandle pointing to a syscon node representing the control module
+	  register space of the SoC.
+
+Optional properties:
+--------------------
+For each opp entry in 'operating-points-v2' table:
+- opp-supported-hw: Two bitfields indicating:
+	1. Which revision of the SoC the OPP is supported by
+	2. Which eFuse bits indicate this OPP is available
+
+	A bitwise AND is performed against these values and if any bit
+	matches, the OPP gets enabled.
+
+Example:
+--------
+
+/* From arch/arm/boot/dts/am33xx.dtsi */
+cpus {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	cpu@0 {
+		compatible = "arm,cortex-a8";
+		device_type = "cpu";
+		reg = <0>;
+
+		operating-points-v2 = <&cpu0_opp_table>;
+
+		clocks = <&dpll_mpu_ck>;
+		clock-names = "cpu";
+
+		clock-latency = <300000>; /* From omap-cpufreq driver */
+	};
+};
+
+/*
+ * cpu0 has different OPPs depending on SoC revision and some on revisions
+ * 0x2 and 0x4 have eFuse bits that indicate if they are available or not
+ */
+cpu0_opp_table: opp-table {
+	compatible = "operating-points-v2-ti-cpu";
+	syscon = <&scm_conf>;
+
+	/*
+	 * The three following nodes are marked with opp-suspend
+	 * because they can not be enabled simultaneously on a
+	 * single SoC.
+	 */
+	opp50@300000000 {
+		opp-hz = /bits/ 64 <300000000>;
+		opp-microvolt = <950000 931000 969000>;
+		opp-supported-hw = <0x06 0x0010>;
+		opp-suspend;
+	};
+
+	opp100@275000000 {
+		opp-hz = /bits/ 64 <275000000>;
+		opp-microvolt = <1100000 1078000 1122000>;
+		opp-supported-hw = <0x01 0x00FF>;
+		opp-suspend;
+	};
+
+	opp100@300000000 {
+		opp-hz = /bits/ 64 <300000000>;
+		opp-microvolt = <1100000 1078000 1122000>;
+		opp-supported-hw = <0x06 0x0020>;
+		opp-suspend;
+	};
+
+	opp100@500000000 {
+		opp-hz = /bits/ 64 <500000000>;
+		opp-microvolt = <1100000 1078000 1122000>;
+		opp-supported-hw = <0x01 0xFFFF>;
+	};
+
+	opp100@600000000 {
+		opp-hz = /bits/ 64 <600000000>;
+		opp-microvolt = <1100000 1078000 1122000>;
+		opp-supported-hw = <0x06 0x0040>;
+	};
+
+	opp120@600000000 {
+		opp-hz = /bits/ 64 <600000000>;
+		opp-microvolt = <1200000 1176000 1224000>;
+		opp-supported-hw = <0x01 0xFFFF>;
+	};
+
+	opp120@720000000 {
+		opp-hz = /bits/ 64 <720000000>;
+		opp-microvolt = <1200000 1176000 1224000>;
+		opp-supported-hw = <0x06 0x0080>;
+	};
+
+	oppturbo@720000000 {
+		opp-hz = /bits/ 64 <720000000>;
+		opp-microvolt = <1260000 1234800 1285200>;
+		opp-supported-hw = <0x01 0xFFFF>;
+	};
+
+	oppturbo@800000000 {
+		opp-hz = /bits/ 64 <800000000>;
+		opp-microvolt = <1260000 1234800 1285200>;
+		opp-supported-hw = <0x06 0x0100>;
+	};
+
+	oppnitro@1000000000 {
+		opp-hz = /bits/ 64 <1000000000>;
+		opp-microvolt = <1325000 1298500 1351500>;
+		opp-supported-hw = <0x04 0x0200>;
+	};
+};

+ 7 - 0
MAINTAINERS

@@ -2692,6 +2692,13 @@ F:	drivers/irqchip/irq-brcmstb*
 F:	include/linux/bcm963xx_nvram.h
 F:	include/linux/bcm963xx_tag.h
 
+BROADCOM BMIPS CPUFREQ DRIVER
+M:	Markus Mayer <mmayer@broadcom.com>
+M:	bcm-kernel-feedback-list@broadcom.com
+L:	linux-pm@vger.kernel.org
+S:	Maintained
+F:	drivers/cpufreq/bmips-cpufreq.c
+
 BROADCOM TG3 GIGABIT ETHERNET DRIVER
 M:	Siva Reddy Kallam <siva.kallam@broadcom.com>
 M:	Prashant Sreedharan <prashant@broadcom.com>

+ 1 - 1
arch/arm/configs/exynos_defconfig

@@ -24,7 +24,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x41000000,8M console=ttySAC1,115200 init=/linuxrc mem=256M"
 CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_STAT=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_USERSPACE=m

+ 1 - 1
arch/arm/configs/multi_v5_defconfig

@@ -58,7 +58,7 @@ CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_STAT=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_IDLE=y
 CONFIG_ARM_KIRKWOOD_CPUIDLE=y

+ 1 - 1
arch/arm/configs/multi_v7_defconfig

@@ -132,7 +132,7 @@ CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_KEXEC=y
 CONFIG_EFI=y
 CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_STAT=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_USERSPACE=m

+ 1 - 1
arch/arm/configs/mvebu_v5_defconfig

@@ -44,7 +44,7 @@ CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_STAT=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_IDLE=y
 CONFIG_ARM_KIRKWOOD_CPUIDLE=y

+ 1 - 1
arch/arm/configs/pxa_defconfig

@@ -97,7 +97,7 @@ CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_CMDLINE="root=/dev/ram0 ro"
 CONFIG_KEXEC=y
 CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_STAT=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_USERSPACE=m

+ 1 - 1
arch/arm/configs/shmobile_defconfig

@@ -38,7 +38,7 @@ CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_KEXEC=y
 CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_STAT=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y

+ 2 - 0
arch/mips/Kconfig

@@ -1703,6 +1703,8 @@ config CPU_BMIPS
 	select WEAK_ORDERING
 	select CPU_SUPPORTS_HIGHMEM
 	select CPU_HAS_PREFETCH
+	select CPU_SUPPORTS_CPUFREQ
+	select MIPS_EXTERNAL_TIMER
 	help
 	  Support for BMIPS32/3300/4350/4380 and BMIPS5000 processors.
 

+ 10 - 6
arch/mips/configs/bmips_stb_defconfig

@@ -9,13 +9,20 @@ CONFIG_MIPS_O32_FP64_SUPPORT=y
 # CONFIG_SWAP is not set
 CONFIG_NO_HZ=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_RD_GZIP=y
 CONFIG_EXPERT=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
+CONFIG_BMIPS_CPUFREQ=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=y
@@ -24,7 +31,6 @@ CONFIG_INET=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 CONFIG_CFG80211=y
 CONFIG_NL80211_TESTMODE=y
@@ -34,8 +40,6 @@ CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
-CONFIG_PRINTK_TIME=y
-CONFIG_BRCMSTB_GISB_ARB=y
 CONFIG_MTD=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -51,16 +55,15 @@ CONFIG_USB_USBNET=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_POWER_SUPPLY=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_BRCMSTB=y
 CONFIG_POWER_RESET_SYSCON=y
+CONFIG_POWER_SUPPLY=y
 # CONFIG_HWMON is not set
 CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
@@ -82,6 +85,7 @@ CONFIG_CIFS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ASCII=y
 CONFIG_NLS_ISO8859_1=y
+CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_CMDLINE_BOOL=y

+ 0 - 1
arch/mips/configs/lemote2f_defconfig

@@ -40,7 +40,6 @@ CONFIG_PM_STD_PARTITION="/dev/hda3"
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_DEBUG=y
 CONFIG_CPU_FREQ_STAT=m
-CONFIG_CPU_FREQ_STAT_DETAILS=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_USERSPACE=m

+ 0 - 1
arch/powerpc/configs/ppc6xx_defconfig

@@ -62,7 +62,6 @@ CONFIG_MPC8610_HPCD=y
 CONFIG_GEF_SBC610=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_STAT=m
-CONFIG_CPU_FREQ_STAT_DETAILS=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m

+ 1 - 1
arch/sh/configs/sh7785lcr_32bit_defconfig

@@ -25,7 +25,7 @@ CONFIG_SH_SH7785LCR=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_STAT=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_SH_CPU_FREQ=y
 CONFIG_HEARTBEAT=y

+ 1 - 3
drivers/acpi/processor_perflib.c

@@ -75,10 +75,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
 	struct acpi_processor *pr;
 	unsigned int ppc = 0;
 
-	if (event == CPUFREQ_START && ignore_ppc <= 0) {
+	if (ignore_ppc < 0)
 		ignore_ppc = 0;
-		return 0;
-	}
 
 	if (ignore_ppc)
 		return 0;

+ 11 - 9
drivers/cpufreq/Kconfig

@@ -37,14 +37,6 @@ config CPU_FREQ_STAT
 
 	  If in doubt, say N.
 
-config CPU_FREQ_STAT_DETAILS
-	bool "CPU frequency transition statistics details"
-	depends on CPU_FREQ_STAT
-	help
-	  Show detailed CPU frequency transition table in sysfs.
-
-	  If in doubt, say N.
-
 choice
 	prompt "Default CPUFreq governor"
 	default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || ARM_SA1110_CPUFREQ
@@ -271,6 +263,16 @@ config IA64_ACPI_CPUFREQ
 endif
 
 if MIPS
+config BMIPS_CPUFREQ
+	tristate "BMIPS CPUfreq Driver"
+	help
+	  This option adds a CPUfreq driver for BMIPS processors with
+	  support for configurable CPU frequency.
+
+	  For now, BMIPS5 chips are supported (such as the Broadcom 7425).
+
+	  If in doubt, say N.
+
 config LOONGSON2_CPUFREQ
 	tristate "Loongson2 CPUFreq Driver"
 	help
@@ -332,7 +334,7 @@ endif
 
 config QORIQ_CPUFREQ
 	tristate "CPU frequency scaling driver for Freescale QorIQ SoCs"
-	depends on OF && COMMON_CLK && (PPC_E500MC || ARM)
+	depends on OF && COMMON_CLK && (PPC_E500MC || ARM || ARM64)
 	depends on !CPU_THERMAL || THERMAL
 	select CLK_QORIQ
 	help

+ 12 - 1
drivers/cpufreq/Kconfig.arm

@@ -247,6 +247,17 @@ config ARM_TEGRA124_CPUFREQ
 	help
 	  This adds the CPUFreq driver support for Tegra124 SOCs.
 
+config ARM_TI_CPUFREQ
+	bool "Texas Instruments CPUFreq support"
+	depends on ARCH_OMAP2PLUS
+	help
+	  This driver enables valid OPPs on the running platform based on
+	  values contained within the SoC in use. Enable this in order to
+	  use the cpufreq-dt driver on all Texas Instruments platforms that
+	  provide dt based operating-points-v2 tables with opp-supported-hw
+	  data provided. Required for cpufreq support on AM335x, AM437x,
+	  DRA7x, and AM57x platforms.
+
 config ARM_PXA2xx_CPUFREQ
 	tristate "Intel PXA2xx CPUfreq driver"
 	depends on PXA27x || PXA25x
@@ -257,7 +268,7 @@ config ARM_PXA2xx_CPUFREQ
 
 config ACPI_CPPC_CPUFREQ
 	tristate "CPUFreq driver based on the ACPI CPPC spec"
-	depends on ACPI
+	depends on ACPI_PROCESSOR
 	select ACPI_CPPC_LIB
 	default n
 	help

+ 2 - 0
drivers/cpufreq/Makefile

@@ -77,6 +77,7 @@ obj-$(CONFIG_ARM_SPEAR_CPUFREQ)		+= spear-cpufreq.o
 obj-$(CONFIG_ARM_STI_CPUFREQ)		+= sti-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA20_CPUFREQ)	+= tegra20-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
+obj-$(CONFIG_ARM_TI_CPUFREQ)		+= ti-cpufreq.o
 obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o
 obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o
 obj-$(CONFIG_MACH_MVEBU_V7)		+= mvebu-cpufreq.o
@@ -98,6 +99,7 @@ obj-$(CONFIG_POWERNV_CPUFREQ)		+= powernv-cpufreq.o
 # Other platform drivers
 obj-$(CONFIG_AVR32_AT32AP_CPUFREQ)	+= at32ap-cpufreq.o
 obj-$(CONFIG_BFIN_CPU_FREQ)		+= blackfin-cpufreq.o
+obj-$(CONFIG_BMIPS_CPUFREQ)		+= bmips-cpufreq.o
 obj-$(CONFIG_CRIS_MACH_ARTPEC3)		+= cris-artpec3-cpufreq.o
 obj-$(CONFIG_ETRAXFS)			+= cris-etraxfs-cpufreq.o
 obj-$(CONFIG_IA64_ACPI_CPUFREQ)		+= ia64-acpi-cpufreq.o

+ 188 - 0
drivers/cpufreq/bmips-cpufreq.c

@@ -0,0 +1,188 @@
+/*
+ * CPU frequency scaling for Broadcom BMIPS SoCs
+ *
+ * Copyright (c) 2017 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+
+/* for mips_hpt_frequency */
+#include <asm/time.h>
+
+#define BMIPS_CPUFREQ_PREFIX	"bmips"
+#define BMIPS_CPUFREQ_NAME	BMIPS_CPUFREQ_PREFIX "-cpufreq"
+
+#define TRANSITION_LATENCY	(25 * 1000)	/* 25 us */
+
+#define BMIPS5_CLK_DIV_SET_SHIFT	0x7
+#define BMIPS5_CLK_DIV_SHIFT		0x4
+#define BMIPS5_CLK_DIV_MASK		0xf
+
+enum bmips_type {
+	BMIPS5000,
+	BMIPS5200,
+};
+
+struct cpufreq_compat {
+	const char *compatible;
+	unsigned int bmips_type;
+	unsigned int clk_mult;
+	unsigned int max_freqs;
+};
+
+#define BMIPS(c, t, m, f) { \
+	.compatible = c, \
+	.bmips_type = (t), \
+	.clk_mult = (m), \
+	.max_freqs = (f), \
+}
+
+static struct cpufreq_compat bmips_cpufreq_compat[] = {
+	BMIPS("brcm,bmips5000", BMIPS5000, 8, 4),
+	BMIPS("brcm,bmips5200", BMIPS5200, 8, 4),
+	{ }
+};
+
+static struct cpufreq_compat *priv;
+
+static int htp_freq_to_cpu_freq(unsigned int clk_mult)
+{
+	return mips_hpt_frequency * clk_mult / 1000;
+}
+
+static struct cpufreq_frequency_table *
+bmips_cpufreq_get_freq_table(const struct cpufreq_policy *policy)
+{
+	struct cpufreq_frequency_table *table;
+	unsigned long cpu_freq;
+	int i;
+
+	cpu_freq = htp_freq_to_cpu_freq(priv->clk_mult);
+
+	table = kmalloc((priv->max_freqs + 1) * sizeof(*table), GFP_KERNEL);
+	if (!table)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < priv->max_freqs; i++) {
+		table[i].frequency = cpu_freq / (1 << i);
+		table[i].driver_data = i;
+	}
+	table[i].frequency = CPUFREQ_TABLE_END;
+
+	return table;
+}
+
+static unsigned int bmips_cpufreq_get(unsigned int cpu)
+{
+	unsigned int div;
+	uint32_t mode;
+
+	switch (priv->bmips_type) {
+	case BMIPS5200:
+	case BMIPS5000:
+		mode = read_c0_brcm_mode();
+		div = ((mode >> BMIPS5_CLK_DIV_SHIFT) & BMIPS5_CLK_DIV_MASK);
+		break;
+	default:
+		div = 0;
+	}
+
+	return htp_freq_to_cpu_freq(priv->clk_mult) / (1 << div);
+}
+
+static int bmips_cpufreq_target_index(struct cpufreq_policy *policy,
+				      unsigned int index)
+{
+	unsigned int div = policy->freq_table[index].driver_data;
+
+	switch (priv->bmips_type) {
+	case BMIPS5200:
+	case BMIPS5000:
+		change_c0_brcm_mode(BMIPS5_CLK_DIV_MASK << BMIPS5_CLK_DIV_SHIFT,
+				    (1 << BMIPS5_CLK_DIV_SET_SHIFT) |
+				    (div << BMIPS5_CLK_DIV_SHIFT));
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	return 0;
+}
+
+static int bmips_cpufreq_exit(struct cpufreq_policy *policy)
+{
+	kfree(policy->freq_table);
+
+	return 0;
+}
+
+static int bmips_cpufreq_init(struct cpufreq_policy *policy)
+{
+	struct cpufreq_frequency_table *freq_table;
+	int ret;
+
+	freq_table = bmips_cpufreq_get_freq_table(policy);
+	if (IS_ERR(freq_table)) {
+		ret = PTR_ERR(freq_table);
+		pr_err("%s: couldn't determine frequency table (%d).\n",
+			BMIPS_CPUFREQ_NAME, ret);
+		return ret;
+	}
+
+	ret = cpufreq_generic_init(policy, freq_table, TRANSITION_LATENCY);
+	if (ret)
+		bmips_cpufreq_exit(policy);
+	else
+		pr_info("%s: registered\n", BMIPS_CPUFREQ_NAME);
+
+	return ret;
+}
+
+static struct cpufreq_driver bmips_cpufreq_driver = {
+	.flags		= CPUFREQ_NEED_INITIAL_FREQ_CHECK,
+	.verify		= cpufreq_generic_frequency_table_verify,
+	.target_index	= bmips_cpufreq_target_index,
+	.get		= bmips_cpufreq_get,
+	.init		= bmips_cpufreq_init,
+	.exit		= bmips_cpufreq_exit,
+	.attr		= cpufreq_generic_attr,
+	.name		= BMIPS_CPUFREQ_PREFIX,
+};
+
+static int __init bmips_cpufreq_probe(void)
+{
+	struct cpufreq_compat *cc;
+	struct device_node *np;
+
+	for (cc = bmips_cpufreq_compat; cc->compatible; cc++) {
+		np = of_find_compatible_node(NULL, "cpu", cc->compatible);
+		if (np) {
+			of_node_put(np);
+			priv = cc;
+			break;
+		}
+	}
+
+	/* We hit the guard element of the array. No compatible CPU found. */
+	if (!cc->compatible)
+		return -ENODEV;
+
+	return cpufreq_register_driver(&bmips_cpufreq_driver);
+}
+device_initcall(bmips_cpufreq_probe);
+
+MODULE_AUTHOR("Markus Mayer <mmayer@broadcom.com>");
+MODULE_DESCRIPTION("CPUfreq driver for Broadcom BMIPS SoCs");
+MODULE_LICENSE("GPL");

+ 0 - 2
drivers/cpufreq/brcmstb-avs-cpufreq.c

@@ -878,7 +878,6 @@ unmap_intr_base:
 	iounmap(priv->avs_intr_base);
 unmap_base:
 	iounmap(priv->base);
-	platform_set_drvdata(pdev, NULL);
 
 	return ret;
 }
@@ -1042,7 +1041,6 @@ static int brcm_avs_cpufreq_remove(struct platform_device *pdev)
 	priv = platform_get_drvdata(pdev);
 	iounmap(priv->base);
 	iounmap(priv->avs_intr_base);
-	platform_set_drvdata(pdev, NULL);
 
 	return 0;
 }

+ 0 - 2
drivers/cpufreq/cpufreq-dt-platdev.c

@@ -87,8 +87,6 @@ static const struct of_device_id machines[] __initconst = {
 	{ .compatible = "socionext,uniphier-ld11", },
 	{ .compatible = "socionext,uniphier-ld20", },
 
-	{ .compatible = "ti,am33xx", },
-	{ .compatible = "ti,dra7", },
 	{ .compatible = "ti,omap2", },
 	{ .compatible = "ti,omap3", },
 	{ .compatible = "ti,omap4", },

+ 5 - 16
drivers/cpufreq/cpufreq.c

@@ -1078,15 +1078,11 @@ err_free_policy:
 	return NULL;
 }
 
-static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy, bool notify)
+static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy)
 {
 	struct kobject *kobj;
 	struct completion *cmp;
 
-	if (notify)
-		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-					     CPUFREQ_REMOVE_POLICY, policy);
-
 	down_write(&policy->rwsem);
 	cpufreq_stats_free_table(policy);
 	kobj = &policy->kobj;
@@ -1104,7 +1100,7 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy, bool notify)
 	pr_debug("wait complete\n");
 }
 
-static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify)
+static void cpufreq_policy_free(struct cpufreq_policy *policy)
 {
 	unsigned long flags;
 	int cpu;
@@ -1117,7 +1113,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify)
 		per_cpu(cpufreq_cpu_data, cpu) = NULL;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-	cpufreq_policy_put_kobj(policy, notify);
+	cpufreq_policy_put_kobj(policy);
 	free_cpumask_var(policy->real_cpus);
 	free_cpumask_var(policy->related_cpus);
 	free_cpumask_var(policy->cpus);
@@ -1170,8 +1166,6 @@ static int cpufreq_online(unsigned int cpu)
 	if (new_policy) {
 		/* related_cpus should at least include policy->cpus. */
 		cpumask_copy(policy->related_cpus, policy->cpus);
-		/* Clear mask of registered CPUs */
-		cpumask_clear(policy->real_cpus);
 	}
 
 	/*
@@ -1244,17 +1238,12 @@ static int cpufreq_online(unsigned int cpu)
 			goto out_exit_policy;
 
 		cpufreq_stats_create_table(policy);
-		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-				CPUFREQ_CREATE_POLICY, policy);
 
 		write_lock_irqsave(&cpufreq_driver_lock, flags);
 		list_add(&policy->policy_list, &cpufreq_policy_list);
 		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 	}
 
-	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-				     CPUFREQ_START, policy);
-
 	ret = cpufreq_init_policy(policy);
 	if (ret) {
 		pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n",
@@ -1282,7 +1271,7 @@ out_exit_policy:
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
 out_free_policy:
-	cpufreq_policy_free(policy, !new_policy);
+	cpufreq_policy_free(policy);
 	return ret;
 }
 
@@ -1403,7 +1392,7 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 	remove_cpu_dev_symlink(policy, dev);
 
 	if (cpumask_empty(policy->real_cpus))
-		cpufreq_policy_free(policy, true);
+		cpufreq_policy_free(policy);
 }
 
 /**

+ 0 - 14
drivers/cpufreq/cpufreq_stats.c

@@ -25,9 +25,7 @@ struct cpufreq_stats {
 	unsigned int last_index;
 	u64 *time_in_state;
 	unsigned int *freq_table;
-#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 	unsigned int *trans_table;
-#endif
 };
 
 static int cpufreq_stats_update(struct cpufreq_stats *stats)
@@ -46,9 +44,7 @@ static void cpufreq_stats_clear_table(struct cpufreq_stats *stats)
 	unsigned int count = stats->max_state;
 
 	memset(stats->time_in_state, 0, count * sizeof(u64));
-#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 	memset(stats->trans_table, 0, count * count * sizeof(int));
-#endif
 	stats->last_time = get_jiffies_64();
 	stats->total_trans = 0;
 }
@@ -84,7 +80,6 @@ static ssize_t store_reset(struct cpufreq_policy *policy, const char *buf,
 	return count;
 }
 
-#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
 {
 	struct cpufreq_stats *stats = policy->stats;
@@ -129,7 +124,6 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
 	return len;
 }
 cpufreq_freq_attr_ro(trans_table);
-#endif
 
 cpufreq_freq_attr_ro(total_trans);
 cpufreq_freq_attr_ro(time_in_state);
@@ -139,9 +133,7 @@ static struct attribute *default_attrs[] = {
 	&total_trans.attr,
 	&time_in_state.attr,
 	&reset.attr,
-#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 	&trans_table.attr,
-#endif
 	NULL
 };
 static struct attribute_group stats_attr_group = {
@@ -200,9 +192,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 
 	alloc_size = count * sizeof(int) + count * sizeof(u64);
 
-#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 	alloc_size += count * count * sizeof(int);
-#endif
 
 	/* Allocate memory for time_in_state/freq_table/trans_table in one go */
 	stats->time_in_state = kzalloc(alloc_size, GFP_KERNEL);
@@ -211,9 +201,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 
 	stats->freq_table = (unsigned int *)(stats->time_in_state + count);
 
-#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 	stats->trans_table = stats->freq_table + count;
-#endif
 
 	stats->max_state = count;
 
@@ -259,8 +247,6 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy,
 	cpufreq_stats_update(stats);
 
 	stats->last_index = new_index;
-#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 	stats->trans_table[old_index * stats->max_state + new_index]++;
-#endif
 	stats->total_trans++;
 }

+ 310 - 75
drivers/cpufreq/intel_pstate.c

@@ -358,6 +358,8 @@ static struct pstate_funcs pstate_funcs __read_mostly;
 static int hwp_active __read_mostly;
 static bool per_cpu_limits __read_mostly;
 
+static bool driver_registered __read_mostly;
+
 #ifdef CONFIG_ACPI
 static bool acpi_ppc;
 #endif
@@ -394,6 +396,7 @@ static struct perf_limits *limits = &performance_limits;
 static struct perf_limits *limits = &powersave_limits;
 #endif
 
+static DEFINE_MUTEX(intel_pstate_driver_lock);
 static DEFINE_MUTEX(intel_pstate_limits_lock);
 
 #ifdef CONFIG_ACPI
@@ -538,7 +541,6 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 
 	acpi_processor_unregister_performance(policy->cpu);
 }
-
 #else
 static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 {
@@ -873,7 +875,10 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 
 		rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
 		hw_min = HWP_LOWEST_PERF(cap);
-		hw_max = HWP_HIGHEST_PERF(cap);
+		if (limits->no_turbo)
+			hw_max = HWP_GUARANTEED_PERF(cap);
+		else
+			hw_max = HWP_HIGHEST_PERF(cap);
 		range = hw_max - hw_min;
 
 		max_perf_pct = perf_limits->max_perf_pct;
@@ -887,11 +892,6 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 
 		adj_range = max_perf_pct * range / 100;
 		max = hw_min + adj_range;
-		if (limits->no_turbo) {
-			hw_max = HWP_GUARANTEED_PERF(cap);
-			if (hw_max < max)
-				max = hw_max;
-		}
 
 		value &= ~HWP_MAX_PERF(~0L);
 		value |= HWP_MAX_PERF(max);
@@ -1007,35 +1007,57 @@ static int pid_param_get(void *data, u64 *val)
 }
 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
 
+static struct dentry *debugfs_parent;
+
 struct pid_param {
 	char *name;
 	void *value;
+	struct dentry *dentry;
 };
 
 static struct pid_param pid_files[] = {
-	{"sample_rate_ms", &pid_params.sample_rate_ms},
-	{"d_gain_pct", &pid_params.d_gain_pct},
-	{"i_gain_pct", &pid_params.i_gain_pct},
-	{"deadband", &pid_params.deadband},
-	{"setpoint", &pid_params.setpoint},
-	{"p_gain_pct", &pid_params.p_gain_pct},
-	{NULL, NULL}
+	{"sample_rate_ms", &pid_params.sample_rate_ms, },
+	{"d_gain_pct", &pid_params.d_gain_pct, },
+	{"i_gain_pct", &pid_params.i_gain_pct, },
+	{"deadband", &pid_params.deadband, },
+	{"setpoint", &pid_params.setpoint, },
+	{"p_gain_pct", &pid_params.p_gain_pct, },
+	{NULL, NULL, }
 };
 
-static void __init intel_pstate_debug_expose_params(void)
+static void intel_pstate_debug_expose_params(void)
 {
-	struct dentry *debugfs_parent;
-	int i = 0;
+	int i;
 
 	debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
 	if (IS_ERR_OR_NULL(debugfs_parent))
 		return;
-	while (pid_files[i].name) {
-		debugfs_create_file(pid_files[i].name, 0660,
-				    debugfs_parent, pid_files[i].value,
-				    &fops_pid_param);
-		i++;
+
+	for (i = 0; pid_files[i].name; i++) {
+		struct dentry *dentry;
+
+		dentry = debugfs_create_file(pid_files[i].name, 0660,
+					     debugfs_parent, pid_files[i].value,
+					     &fops_pid_param);
+		if (!IS_ERR(dentry))
+			pid_files[i].dentry = dentry;
+	}
+}
+
+static void intel_pstate_debug_hide_params(void)
+{
+	int i;
+
+	if (IS_ERR_OR_NULL(debugfs_parent))
+		return;
+
+	for (i = 0; pid_files[i].name; i++) {
+		debugfs_remove(pid_files[i].dentry);
+		pid_files[i].dentry = NULL;
 	}
+
+	debugfs_remove(debugfs_parent);
+	debugfs_parent = NULL;
 }
 
 /************************** debugfs end ************************/
@@ -1048,6 +1070,34 @@ static void __init intel_pstate_debug_expose_params(void)
 		return sprintf(buf, "%u\n", limits->object);		\
 	}
 
+static ssize_t intel_pstate_show_status(char *buf);
+static int intel_pstate_update_status(const char *buf, size_t size);
+
+static ssize_t show_status(struct kobject *kobj,
+			   struct attribute *attr, char *buf)
+{
+	ssize_t ret;
+
+	mutex_lock(&intel_pstate_driver_lock);
+	ret = intel_pstate_show_status(buf);
+	mutex_unlock(&intel_pstate_driver_lock);
+
+	return ret;
+}
+
+static ssize_t store_status(struct kobject *a, struct attribute *b,
+			    const char *buf, size_t count)
+{
+	char *p = memchr(buf, '\n', count);
+	int ret;
+
+	mutex_lock(&intel_pstate_driver_lock);
+	ret = intel_pstate_update_status(buf, p ? p - buf : count);
+	mutex_unlock(&intel_pstate_driver_lock);
+
+	return ret < 0 ? ret : count;
+}
+
 static ssize_t show_turbo_pct(struct kobject *kobj,
 				struct attribute *attr, char *buf)
 {
@@ -1055,12 +1105,22 @@ static ssize_t show_turbo_pct(struct kobject *kobj,
 	int total, no_turbo, turbo_pct;
 	uint32_t turbo_fp;
 
+	mutex_lock(&intel_pstate_driver_lock);
+
+	if (!driver_registered) {
+		mutex_unlock(&intel_pstate_driver_lock);
+		return -EAGAIN;
+	}
+
 	cpu = all_cpu_data[0];
 
 	total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
 	no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
 	turbo_fp = div_fp(no_turbo, total);
 	turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
+
+	mutex_unlock(&intel_pstate_driver_lock);
+
 	return sprintf(buf, "%u\n", turbo_pct);
 }
 
@@ -1070,8 +1130,18 @@ static ssize_t show_num_pstates(struct kobject *kobj,
 	struct cpudata *cpu;
 	int total;
 
+	mutex_lock(&intel_pstate_driver_lock);
+
+	if (!driver_registered) {
+		mutex_unlock(&intel_pstate_driver_lock);
+		return -EAGAIN;
+	}
+
 	cpu = all_cpu_data[0];
 	total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
+
+	mutex_unlock(&intel_pstate_driver_lock);
+
 	return sprintf(buf, "%u\n", total);
 }
 
@@ -1080,12 +1150,21 @@ static ssize_t show_no_turbo(struct kobject *kobj,
 {
 	ssize_t ret;
 
+	mutex_lock(&intel_pstate_driver_lock);
+
+	if (!driver_registered) {
+		mutex_unlock(&intel_pstate_driver_lock);
+		return -EAGAIN;
+	}
+
 	update_turbo_state();
 	if (limits->turbo_disabled)
 		ret = sprintf(buf, "%u\n", limits->turbo_disabled);
 	else
 		ret = sprintf(buf, "%u\n", limits->no_turbo);
 
+	mutex_unlock(&intel_pstate_driver_lock);
+
 	return ret;
 }
 
@@ -1099,12 +1178,20 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 	if (ret != 1)
 		return -EINVAL;
 
+	mutex_lock(&intel_pstate_driver_lock);
+
+	if (!driver_registered) {
+		mutex_unlock(&intel_pstate_driver_lock);
+		return -EAGAIN;
+	}
+
 	mutex_lock(&intel_pstate_limits_lock);
 
 	update_turbo_state();
 	if (limits->turbo_disabled) {
 		pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
 		mutex_unlock(&intel_pstate_limits_lock);
+		mutex_unlock(&intel_pstate_driver_lock);
 		return -EPERM;
 	}
 
@@ -1114,6 +1201,8 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 
 	intel_pstate_update_policies();
 
+	mutex_unlock(&intel_pstate_driver_lock);
+
 	return count;
 }
 
@@ -1127,6 +1216,13 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 	if (ret != 1)
 		return -EINVAL;
 
+	mutex_lock(&intel_pstate_driver_lock);
+
+	if (!driver_registered) {
+		mutex_unlock(&intel_pstate_driver_lock);
+		return -EAGAIN;
+	}
+
 	mutex_lock(&intel_pstate_limits_lock);
 
 	limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
@@ -1142,6 +1238,8 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 
 	intel_pstate_update_policies();
 
+	mutex_unlock(&intel_pstate_driver_lock);
+
 	return count;
 }
 
@@ -1155,6 +1253,13 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 	if (ret != 1)
 		return -EINVAL;
 
+	mutex_lock(&intel_pstate_driver_lock);
+
+	if (!driver_registered) {
+		mutex_unlock(&intel_pstate_driver_lock);
+		return -EAGAIN;
+	}
+
 	mutex_lock(&intel_pstate_limits_lock);
 
 	limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
@@ -1170,12 +1275,15 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 
 	intel_pstate_update_policies();
 
+	mutex_unlock(&intel_pstate_driver_lock);
+
 	return count;
 }
 
 show_one(max_perf_pct, max_perf_pct);
 show_one(min_perf_pct, min_perf_pct);
 
+define_one_global_rw(status);
 define_one_global_rw(no_turbo);
 define_one_global_rw(max_perf_pct);
 define_one_global_rw(min_perf_pct);
@@ -1183,6 +1291,7 @@ define_one_global_ro(turbo_pct);
 define_one_global_ro(num_pstates);
 
 static struct attribute *intel_pstate_attributes[] = {
+	&status.attr,
 	&no_turbo.attr,
 	&turbo_pct.attr,
 	&num_pstates.attr,
@@ -1364,48 +1473,71 @@ static int core_get_max_pstate_physical(void)
 	return (value >> 8) & 0xFF;
 }
 
+static int core_get_tdp_ratio(u64 plat_info)
+{
+	/* Check how many TDP levels present */
+	if (plat_info & 0x600000000) {
+		u64 tdp_ctrl;
+		u64 tdp_ratio;
+		int tdp_msr;
+		int err;
+
+		/* Get the TDP level (0, 1, 2) to get ratios */
+		err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
+		if (err)
+			return err;
+
+		/* TDP MSR are continuous starting at 0x648 */
+		tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03);
+		err = rdmsrl_safe(tdp_msr, &tdp_ratio);
+		if (err)
+			return err;
+
+		/* For level 1 and 2, bits[23:16] contain the ratio */
+		if (tdp_ctrl & 0x03)
+			tdp_ratio >>= 16;
+
+		tdp_ratio &= 0xff; /* ratios are only 8 bits long */
+		pr_debug("tdp_ratio %x\n", (int)tdp_ratio);
+
+		return (int)tdp_ratio;
+	}
+
+	return -ENXIO;
+}
+
 static int core_get_max_pstate(void)
 {
 	u64 tar;
 	u64 plat_info;
 	int max_pstate;
+	int tdp_ratio;
 	int err;
 
 	rdmsrl(MSR_PLATFORM_INFO, plat_info);
 	max_pstate = (plat_info >> 8) & 0xFF;
 
+	tdp_ratio = core_get_tdp_ratio(plat_info);
+	if (tdp_ratio <= 0)
+		return max_pstate;
+
+	if (hwp_active) {
+		/* Turbo activation ratio is not used on HWP platforms */
+		return tdp_ratio;
+	}
+
 	err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
 	if (!err) {
+		int tar_levels;
+
 		/* Do some sanity checking for safety */
-		if (plat_info & 0x600000000) {
-			u64 tdp_ctrl;
-			u64 tdp_ratio;
-			int tdp_msr;
-
-			err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
-			if (err)
-				goto skip_tar;
-
-			tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3);
-			err = rdmsrl_safe(tdp_msr, &tdp_ratio);
-			if (err)
-				goto skip_tar;
-
-			/* For level 1 and 2, bits[23:16] contain the ratio */
-			if (tdp_ctrl)
-				tdp_ratio >>= 16;
-
-			tdp_ratio &= 0xff; /* ratios are only 8 bits long */
-			if (tdp_ratio - 1 == tar) {
-				max_pstate = tar;
-				pr_debug("max_pstate=TAC %x\n", max_pstate);
-			} else {
-				goto skip_tar;
-			}
+		tar_levels = tar & 0xff;
+		if (tdp_ratio - 1 == tar_levels) {
+			max_pstate = tar_levels;
+			pr_debug("max_pstate=TAC %x\n", max_pstate);
 		}
 	}
 
-skip_tar:
 	return max_pstate;
 }
 
@@ -2072,6 +2204,20 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 
 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 {
+	struct cpudata *cpu = all_cpu_data[policy->cpu];
+	struct perf_limits *perf_limits;
+
+	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
+		perf_limits = &performance_limits;
+	else
+		perf_limits = &powersave_limits;
+
+	update_turbo_state();
+	policy->cpuinfo.max_freq = perf_limits->turbo_disabled ||
+					perf_limits->no_turbo ?
+					cpu->pstate.max_freq :
+					cpu->pstate.turbo_freq;
+
 	cpufreq_verify_within_cpu_limits(policy);
 
 	if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
@@ -2299,6 +2445,111 @@ static struct cpufreq_driver intel_cpufreq = {
 
 static struct cpufreq_driver *intel_pstate_driver = &intel_pstate;
 
+static void intel_pstate_driver_cleanup(void)
+{
+	unsigned int cpu;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		if (all_cpu_data[cpu]) {
+			if (intel_pstate_driver == &intel_pstate)
+				intel_pstate_clear_update_util_hook(cpu);
+
+			kfree(all_cpu_data[cpu]);
+			all_cpu_data[cpu] = NULL;
+		}
+	}
+	put_online_cpus();
+}
+
+static int intel_pstate_register_driver(void)
+{
+	int ret;
+
+	ret = cpufreq_register_driver(intel_pstate_driver);
+	if (ret) {
+		intel_pstate_driver_cleanup();
+		return ret;
+	}
+
+	mutex_lock(&intel_pstate_limits_lock);
+	driver_registered = true;
+	mutex_unlock(&intel_pstate_limits_lock);
+
+	if (intel_pstate_driver == &intel_pstate && !hwp_active &&
+	    pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+		intel_pstate_debug_expose_params();
+
+	return 0;
+}
+
+static int intel_pstate_unregister_driver(void)
+{
+	if (hwp_active)
+		return -EBUSY;
+
+	if (intel_pstate_driver == &intel_pstate && !hwp_active &&
+	    pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+		intel_pstate_debug_hide_params();
+
+	mutex_lock(&intel_pstate_limits_lock);
+	driver_registered = false;
+	mutex_unlock(&intel_pstate_limits_lock);
+
+	cpufreq_unregister_driver(intel_pstate_driver);
+	intel_pstate_driver_cleanup();
+
+	return 0;
+}
+
+static ssize_t intel_pstate_show_status(char *buf)
+{
+	if (!driver_registered)
+		return sprintf(buf, "off\n");
+
+	return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ?
+					"active" : "passive");
+}
+
+static int intel_pstate_update_status(const char *buf, size_t size)
+{
+	int ret;
+
+	if (size == 3 && !strncmp(buf, "off", size))
+		return driver_registered ?
+			intel_pstate_unregister_driver() : -EINVAL;
+
+	if (size == 6 && !strncmp(buf, "active", size)) {
+		if (driver_registered) {
+			if (intel_pstate_driver == &intel_pstate)
+				return 0;
+
+			ret = intel_pstate_unregister_driver();
+			if (ret)
+				return ret;
+		}
+
+		intel_pstate_driver = &intel_pstate;
+		return intel_pstate_register_driver();
+	}
+
+	if (size == 7 && !strncmp(buf, "passive", size)) {
+		if (driver_registered) {
+			if (intel_pstate_driver != &intel_pstate)
+				return 0;
+
+			ret = intel_pstate_unregister_driver();
+			if (ret)
+				return ret;
+		}
+
+		intel_pstate_driver = &intel_cpufreq;
+		return intel_pstate_register_driver();
+	}
+
+	return -EINVAL;
+}
+
 static int no_load __initdata;
 static int no_hwp __initdata;
 static int hwp_only __initdata;
@@ -2486,9 +2737,9 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = {
 
 static int __init intel_pstate_init(void)
 {
-	int cpu, rc = 0;
 	const struct x86_cpu_id *id;
 	struct cpu_defaults *cpu_def;
+	int rc = 0;
 
 	if (no_load)
 		return -ENODEV;
@@ -2520,45 +2771,29 @@ hwp_cpu_matched:
 	if (intel_pstate_platform_pwr_mgmt_exists())
 		return -ENODEV;
 
+	if (!hwp_active && hwp_only)
+		return -ENOTSUPP;
+
 	pr_info("Intel P-state driver initializing\n");
 
 	all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
 	if (!all_cpu_data)
 		return -ENOMEM;
 
-	if (!hwp_active && hwp_only)
-		goto out;
-
 	intel_pstate_request_control_from_smm();
 
-	rc = cpufreq_register_driver(intel_pstate_driver);
-	if (rc)
-		goto out;
-
-	if (intel_pstate_driver == &intel_pstate && !hwp_active &&
-	    pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
-		intel_pstate_debug_expose_params();
-
 	intel_pstate_sysfs_expose_params();
 
+	mutex_lock(&intel_pstate_driver_lock);
+	rc = intel_pstate_register_driver();
+	mutex_unlock(&intel_pstate_driver_lock);
+	if (rc)
+		return rc;
+
 	if (hwp_active)
 		pr_info("HWP enabled\n");
 
-	return rc;
-out:
-	get_online_cpus();
-	for_each_online_cpu(cpu) {
-		if (all_cpu_data[cpu]) {
-			if (intel_pstate_driver == &intel_pstate)
-				intel_pstate_clear_update_util_hook(cpu);
-
-			kfree(all_cpu_data[cpu]);
-		}
-	}
-
-	put_online_cpus();
-	vfree(all_cpu_data);
-	return -ENODEV;
+	return 0;
 }
 device_initcall(intel_pstate_init);
 

+ 47 - 3
drivers/cpufreq/powernv-cpufreq.c

@@ -144,6 +144,7 @@ static struct powernv_pstate_info {
 	unsigned int max;
 	unsigned int nominal;
 	unsigned int nr_pstates;
+	bool wof_enabled;
 } powernv_pstate_info;
 
 /* Use following macros for conversions between pstate_id and index */
@@ -203,6 +204,7 @@ static int init_powernv_pstates(void)
 	const __be32 *pstate_ids, *pstate_freqs;
 	u32 len_ids, len_freqs;
 	u32 pstate_min, pstate_max, pstate_nominal;
+	u32 pstate_turbo, pstate_ultra_turbo;
 
 	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
 	if (!power_mgt) {
@@ -225,8 +227,29 @@ static int init_powernv_pstates(void)
 		pr_warn("ibm,pstate-nominal not found\n");
 		return -ENODEV;
 	}
+
+	if (of_property_read_u32(power_mgt, "ibm,pstate-ultra-turbo",
+				 &pstate_ultra_turbo)) {
+		powernv_pstate_info.wof_enabled = false;
+		goto next;
+	}
+
+	if (of_property_read_u32(power_mgt, "ibm,pstate-turbo",
+				 &pstate_turbo)) {
+		powernv_pstate_info.wof_enabled = false;
+		goto next;
+	}
+
+	if (pstate_turbo == pstate_ultra_turbo)
+		powernv_pstate_info.wof_enabled = false;
+	else
+		powernv_pstate_info.wof_enabled = true;
+
+next:
 	pr_info("cpufreq pstate min %d nominal %d max %d\n", pstate_min,
 		pstate_nominal, pstate_max);
+	pr_info("Workload Optimized Frequency is %s in the platform\n",
+		(powernv_pstate_info.wof_enabled) ? "enabled" : "disabled");
 
 	pstate_ids = of_get_property(power_mgt, "ibm,pstate-ids", &len_ids);
 	if (!pstate_ids) {
@@ -268,6 +291,13 @@ static int init_powernv_pstates(void)
 			powernv_pstate_info.nominal = i;
 		else if (id == pstate_min)
 			powernv_pstate_info.min = i;
+
+		if (powernv_pstate_info.wof_enabled && id == pstate_turbo) {
+			int j;
+
+			for (j = i - 1; j >= (int)powernv_pstate_info.max; j--)
+				powernv_freqs[j].flags = CPUFREQ_BOOST_FREQ;
+		}
 	}
 
 	/* End of list marker entry */
@@ -305,9 +335,12 @@ static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy,
 struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq =
 	__ATTR_RO(cpuinfo_nominal_freq);
 
+#define SCALING_BOOST_FREQS_ATTR_INDEX		2
+
 static struct freq_attr *powernv_cpu_freq_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	&cpufreq_freq_attr_cpuinfo_nominal_freq,
+	&cpufreq_freq_attr_scaling_boost_freqs,
 	NULL,
 };
 
@@ -1013,11 +1046,22 @@ static int __init powernv_cpufreq_init(void)
 	register_reboot_notifier(&powernv_cpufreq_reboot_nb);
 	opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb);
 
+	if (powernv_pstate_info.wof_enabled)
+		powernv_cpufreq_driver.boost_enabled = true;
+	else
+		powernv_cpu_freq_attr[SCALING_BOOST_FREQS_ATTR_INDEX] = NULL;
+
 	rc = cpufreq_register_driver(&powernv_cpufreq_driver);
-	if (!rc)
-		return 0;
+	if (rc) {
+		pr_info("Failed to register the cpufreq driver (%d)\n", rc);
+		goto cleanup_notifiers;
+	}
 
-	pr_info("Failed to register the cpufreq driver (%d)\n", rc);
+	if (powernv_pstate_info.wof_enabled)
+		cpufreq_enable_boost_support();
+
+	return 0;
+cleanup_notifiers:
 	unregister_all_notifiers();
 	clean_chip_info();
 out:

+ 0 - 3
drivers/cpufreq/ppc_cbe_cpufreq_pmi.c

@@ -100,9 +100,6 @@ static int pmi_notifier(struct notifier_block *nb,
 	/* Should this really be called for CPUFREQ_ADJUST and CPUFREQ_NOTIFY
 	 * policy events?)
 	 */
-	if (event == CPUFREQ_START)
-		return 0;
-
 	node = cbe_cpu_to_node(policy->cpu);
 
 	pr_debug("got notified, event=%lu, node=%u\n", event, node);

+ 49 - 99
drivers/cpufreq/qoriq-cpufreq.c

@@ -11,6 +11,7 @@
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/cpufreq.h>
 #include <linux/cpu_cooling.h>
 #include <linux/errno.h>
@@ -37,53 +38,20 @@ struct cpu_data {
 	struct thermal_cooling_device *cdev;
 };
 
+/*
+ * Don't use cpufreq on this SoC -- used when the SoC would have otherwise
+ * matched a more generic compatible.
+ */
+#define SOC_BLACKLIST		1
+
 /**
  * struct soc_data - SoC specific data
- * @freq_mask: mask the disallowed frequencies
- * @flag: unique flags
+ * @flags: SOC_xxx
  */
 struct soc_data {
-	u32 freq_mask[4];
-	u32 flag;
-};
-
-#define FREQ_MASK	1
-/* see hardware specification for the allowed frqeuencies */
-static const struct soc_data sdata[] = {
-	{ /* used by p2041 and p3041 */
-		.freq_mask = {0x8, 0x8, 0x2, 0x2},
-		.flag = FREQ_MASK,
-	},
-	{ /* used by p5020 */
-		.freq_mask = {0x8, 0x2},
-		.flag = FREQ_MASK,
-	},
-	{ /* used by p4080, p5040 */
-		.freq_mask = {0},
-		.flag = 0,
-	},
+	u32 flags;
 };
 
-/*
- * the minimum allowed core frequency, in Hz
- * for chassis v1.0, >= platform frequency
- * for chassis v2.0, >= platform frequency / 2
- */
-static u32 min_cpufreq;
-static const u32 *fmask;
-
-#if defined(CONFIG_ARM)
-static int get_cpu_physical_id(int cpu)
-{
-	return topology_core_id(cpu);
-}
-#else
-static int get_cpu_physical_id(int cpu)
-{
-	return get_hard_smp_processor_id(cpu);
-}
-#endif
-
 static u32 get_bus_freq(void)
 {
 	struct device_node *soc;
@@ -101,9 +69,10 @@ static u32 get_bus_freq(void)
 	return sysfreq;
 }
 
-static struct device_node *cpu_to_clk_node(int cpu)
+static struct clk *cpu_to_clk(int cpu)
 {
-	struct device_node *np, *clk_np;
+	struct device_node *np;
+	struct clk *clk;
 
 	if (!cpu_present(cpu))
 		return NULL;
@@ -112,37 +81,28 @@ static struct device_node *cpu_to_clk_node(int cpu)
 	if (!np)
 		return NULL;
 
-	clk_np = of_parse_phandle(np, "clocks", 0);
-	if (!clk_np)
-		return NULL;
-
+	clk = of_clk_get(np, 0);
 	of_node_put(np);
-
-	return clk_np;
+	return clk;
 }
 
 /* traverse cpu nodes to get cpu mask of sharing clock wire */
 static void set_affected_cpus(struct cpufreq_policy *policy)
 {
-	struct device_node *np, *clk_np;
 	struct cpumask *dstp = policy->cpus;
+	struct clk *clk;
 	int i;
 
-	np = cpu_to_clk_node(policy->cpu);
-	if (!np)
-		return;
-
 	for_each_present_cpu(i) {
-		clk_np = cpu_to_clk_node(i);
-		if (!clk_np)
+		clk = cpu_to_clk(i);
+		if (IS_ERR(clk)) {
+			pr_err("%s: no clock for cpu %d\n", __func__, i);
 			continue;
+		}
 
-		if (clk_np == np)
+		if (clk_is_match(policy->clk, clk))
 			cpumask_set_cpu(i, dstp);
-
-		of_node_put(clk_np);
 	}
-	of_node_put(np);
 }
 
 /* reduce the duplicated frequencies in frequency table */
@@ -198,10 +158,11 @@ static void freq_table_sort(struct cpufreq_frequency_table *freq_table,
 
 static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
-	struct device_node *np, *pnode;
+	struct device_node *np;
 	int i, count, ret;
-	u32 freq, mask;
+	u32 freq;
 	struct clk *clk;
+	const struct clk_hw *hwclk;
 	struct cpufreq_frequency_table *table;
 	struct cpu_data *data;
 	unsigned int cpu = policy->cpu;
@@ -221,17 +182,13 @@ static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		goto err_nomem2;
 	}
 
-	pnode = of_parse_phandle(np, "clocks", 0);
-	if (!pnode) {
-		pr_err("%s: could not get clock information\n", __func__);
-		goto err_nomem2;
-	}
+	hwclk = __clk_get_hw(policy->clk);
+	count = clk_hw_get_num_parents(hwclk);
 
-	count = of_property_count_strings(pnode, "clock-names");
 	data->pclk = kcalloc(count, sizeof(struct clk *), GFP_KERNEL);
 	if (!data->pclk) {
 		pr_err("%s: no memory\n", __func__);
-		goto err_node;
+		goto err_nomem2;
 	}
 
 	table = kcalloc(count + 1, sizeof(*table), GFP_KERNEL);
@@ -240,23 +197,11 @@ static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		goto err_pclk;
 	}
 
-	if (fmask)
-		mask = fmask[get_cpu_physical_id(cpu)];
-	else
-		mask = 0x0;
-
 	for (i = 0; i < count; i++) {
-		clk = of_clk_get(pnode, i);
+		clk = clk_hw_get_parent_by_index(hwclk, i)->clk;
 		data->pclk[i] = clk;
 		freq = clk_get_rate(clk);
-		/*
-		 * the clock is valid if its frequency is not masked
-		 * and large than minimum allowed frequency.
-		 */
-		if (freq < min_cpufreq || (mask & (1 << i)))
-			table[i].frequency = CPUFREQ_ENTRY_INVALID;
-		else
-			table[i].frequency = freq / 1000;
+		table[i].frequency = freq / 1000;
 		table[i].driver_data = i;
 	}
 	freq_table_redup(table, count);
@@ -282,7 +227,6 @@ static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->cpuinfo.transition_latency = u64temp + 1;
 
 	of_node_put(np);
-	of_node_put(pnode);
 
 	return 0;
 
@@ -290,10 +234,7 @@ err_nomem1:
 	kfree(table);
 err_pclk:
 	kfree(data->pclk);
-err_node:
-	of_node_put(pnode);
 err_nomem2:
-	policy->driver_data = NULL;
 	kfree(data);
 err_np:
 	of_node_put(np);
@@ -357,12 +298,25 @@ static struct cpufreq_driver qoriq_cpufreq_driver = {
 	.attr		= cpufreq_generic_attr,
 };
 
+static const struct soc_data blacklist = {
+	.flags = SOC_BLACKLIST,
+};
+
 static const struct of_device_id node_matches[] __initconst = {
-	{ .compatible = "fsl,p2041-clockgen", .data = &sdata[0], },
-	{ .compatible = "fsl,p3041-clockgen", .data = &sdata[0], },
-	{ .compatible = "fsl,p5020-clockgen", .data = &sdata[1], },
-	{ .compatible = "fsl,p4080-clockgen", .data = &sdata[2], },
-	{ .compatible = "fsl,p5040-clockgen", .data = &sdata[2], },
+	/* e6500 cannot use cpufreq due to erratum A-008083 */
+	{ .compatible = "fsl,b4420-clockgen", &blacklist },
+	{ .compatible = "fsl,b4860-clockgen", &blacklist },
+	{ .compatible = "fsl,t2080-clockgen", &blacklist },
+	{ .compatible = "fsl,t4240-clockgen", &blacklist },
+
+	{ .compatible = "fsl,ls1012a-clockgen", },
+	{ .compatible = "fsl,ls1021a-clockgen", },
+	{ .compatible = "fsl,ls1043a-clockgen", },
+	{ .compatible = "fsl,ls1046a-clockgen", },
+	{ .compatible = "fsl,ls1088a-clockgen", },
+	{ .compatible = "fsl,ls2080a-clockgen", },
+	{ .compatible = "fsl,p4080-clockgen", },
+	{ .compatible = "fsl,qoriq-clockgen-1.0", },
 	{ .compatible = "fsl,qoriq-clockgen-2.0", },
 	{}
 };
@@ -380,16 +334,12 @@ static int __init qoriq_cpufreq_init(void)
 
 	match = of_match_node(node_matches, np);
 	data = match->data;
-	if (data) {
-		if (data->flag)
-			fmask = data->freq_mask;
-		min_cpufreq = get_bus_freq();
-	} else {
-		min_cpufreq = get_bus_freq() / 2;
-	}
 
 	of_node_put(np);
 
+	if (data && data->flags & SOC_BLACKLIST)
+		return -ENODEV;
+
 	ret = cpufreq_register_driver(&qoriq_cpufreq_driver);
 	if (!ret)
 		pr_info("Freescale QorIQ CPU frequency scaling driver\n");

+ 0 - 1
drivers/cpufreq/s3c2416-cpufreq.c

@@ -400,7 +400,6 @@ static int s3c2416_cpufreq_driver_init(struct cpufreq_policy *policy)
 	rate = clk_get_rate(s3c_freq->hclk);
 	if (rate < 133 * 1000 * 1000) {
 		pr_err("cpufreq: HCLK not at 133MHz\n");
-		clk_put(s3c_freq->hclk);
 		ret = -EINVAL;
 		goto err_armclk;
 	}

+ 268 - 0
drivers/cpufreq/ti-cpufreq.c

@@ -0,0 +1,268 @@
+/*
+ * TI CPUFreq/OPP hw-supported driver
+ *
+ * Copyright (C) 2016-2017 Texas Instruments, Inc.
+ *	 Dave Gerlach <d-gerlach@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/pm_opp.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#define REVISION_MASK				0xF
+#define REVISION_SHIFT				28
+
+#define AM33XX_800M_ARM_MPU_MAX_FREQ		0x1E2F
+#define AM43XX_600M_ARM_MPU_MAX_FREQ		0xFFA
+
+#define DRA7_EFUSE_HAS_OD_MPU_OPP		11
+#define DRA7_EFUSE_HAS_HIGH_MPU_OPP		15
+#define DRA7_EFUSE_HAS_ALL_MPU_OPP		23
+
+#define DRA7_EFUSE_NOM_MPU_OPP			BIT(0)
+#define DRA7_EFUSE_OD_MPU_OPP			BIT(1)
+#define DRA7_EFUSE_HIGH_MPU_OPP			BIT(2)
+
+#define VERSION_COUNT				2
+
+struct ti_cpufreq_data;
+
+struct ti_cpufreq_soc_data {
+	unsigned long (*efuse_xlate)(struct ti_cpufreq_data *opp_data,
+				     unsigned long efuse);
+	unsigned long efuse_fallback;
+	unsigned long efuse_offset;
+	unsigned long efuse_mask;
+	unsigned long efuse_shift;
+	unsigned long rev_offset;
+};
+
+struct ti_cpufreq_data {
+	struct device *cpu_dev;
+	struct device_node *opp_node;
+	struct regmap *syscon;
+	const struct ti_cpufreq_soc_data *soc_data;
+};
+
+static unsigned long amx3_efuse_xlate(struct ti_cpufreq_data *opp_data,
+				      unsigned long efuse)
+{
+	if (!efuse)
+		efuse = opp_data->soc_data->efuse_fallback;
+	/* AM335x and AM437x use "OPP disable" bits, so invert */
+	return ~efuse;
+}
+
+static unsigned long dra7_efuse_xlate(struct ti_cpufreq_data *opp_data,
+				      unsigned long efuse)
+{
+	unsigned long calculated_efuse = DRA7_EFUSE_NOM_MPU_OPP;
+
+	/*
+	 * The efuse on dra7 and am57 parts contains a specific
+	 * value indicating the highest available OPP.
+	 */
+
+	switch (efuse) {
+	case DRA7_EFUSE_HAS_ALL_MPU_OPP:
+	case DRA7_EFUSE_HAS_HIGH_MPU_OPP:
+		calculated_efuse |= DRA7_EFUSE_HIGH_MPU_OPP;
+	case DRA7_EFUSE_HAS_OD_MPU_OPP:
+		calculated_efuse |= DRA7_EFUSE_OD_MPU_OPP;
+	}
+
+	return calculated_efuse;
+}
+
+static struct ti_cpufreq_soc_data am3x_soc_data = {
+	.efuse_xlate = amx3_efuse_xlate,
+	.efuse_fallback = AM33XX_800M_ARM_MPU_MAX_FREQ,
+	.efuse_offset = 0x07fc,
+	.efuse_mask = 0x1fff,
+	.rev_offset = 0x600,
+};
+
+static struct ti_cpufreq_soc_data am4x_soc_data = {
+	.efuse_xlate = amx3_efuse_xlate,
+	.efuse_fallback = AM43XX_600M_ARM_MPU_MAX_FREQ,
+	.efuse_offset = 0x0610,
+	.efuse_mask = 0x3f,
+	.rev_offset = 0x600,
+};
+
+static struct ti_cpufreq_soc_data dra7_soc_data = {
+	.efuse_xlate = dra7_efuse_xlate,
+	.efuse_offset = 0x020c,
+	.efuse_mask = 0xf80000,
+	.efuse_shift = 19,
+	.rev_offset = 0x204,
+};
+
+/**
+ * ti_cpufreq_get_efuse() - Parse and return efuse value present on SoC
+ * @opp_data: pointer to ti_cpufreq_data context
+ * @efuse_value: Set to the value parsed from efuse
+ *
+ * Returns error code if efuse not read properly.
+ */
+static int ti_cpufreq_get_efuse(struct ti_cpufreq_data *opp_data,
+				u32 *efuse_value)
+{
+	struct device *dev = opp_data->cpu_dev;
+	u32 efuse;
+	int ret;
+
+	ret = regmap_read(opp_data->syscon, opp_data->soc_data->efuse_offset,
+			  &efuse);
+	if (ret) {
+		dev_err(dev,
+			"Failed to read the efuse value from syscon: %d\n",
+			ret);
+		return ret;
+	}
+
+	efuse = (efuse & opp_data->soc_data->efuse_mask);
+	efuse >>= opp_data->soc_data->efuse_shift;
+
+	*efuse_value = opp_data->soc_data->efuse_xlate(opp_data, efuse);
+
+	return 0;
+}
+
+/**
+ * ti_cpufreq_get_rev() - Parse and return rev value present on SoC
+ * @opp_data: pointer to ti_cpufreq_data context
+ * @revision_value: Set to the value parsed from revision register
+ *
+ * Returns error code if revision not read properly.
+ */
+static int ti_cpufreq_get_rev(struct ti_cpufreq_data *opp_data,
+			      u32 *revision_value)
+{
+	struct device *dev = opp_data->cpu_dev;
+	u32 revision;
+	int ret;
+
+	ret = regmap_read(opp_data->syscon, opp_data->soc_data->rev_offset,
+			  &revision);
+	if (ret) {
+		dev_err(dev,
+			"Failed to read the revision number from syscon: %d\n",
+			ret);
+		return ret;
+	}
+
+	*revision_value = BIT((revision >> REVISION_SHIFT) & REVISION_MASK);
+
+	return 0;
+}
+
+static int ti_cpufreq_setup_syscon_register(struct ti_cpufreq_data *opp_data)
+{
+	struct device *dev = opp_data->cpu_dev;
+	struct device_node *np = opp_data->opp_node;
+
+	opp_data->syscon = syscon_regmap_lookup_by_phandle(np,
+							"syscon");
+	if (IS_ERR(opp_data->syscon)) {
+		dev_err(dev,
+			"\"syscon\" is missing, cannot use OPPv2 table.\n");
+		return PTR_ERR(opp_data->syscon);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id ti_cpufreq_of_match[] = {
+	{ .compatible = "ti,am33xx", .data = &am3x_soc_data, },
+	{ .compatible = "ti,am4372", .data = &am4x_soc_data, },
+	{ .compatible = "ti,dra7", .data = &dra7_soc_data },
+	{},
+};
+
+static int ti_cpufreq_init(void)
+{
+	u32 version[VERSION_COUNT];
+	struct device_node *np;
+	const struct of_device_id *match;
+	struct ti_cpufreq_data *opp_data;
+	int ret;
+
+	np = of_find_node_by_path("/");
+	match = of_match_node(ti_cpufreq_of_match, np);
+	if (!match)
+		return -ENODEV;
+
+	opp_data = kzalloc(sizeof(*opp_data), GFP_KERNEL);
+	if (!opp_data)
+		return -ENOMEM;
+
+	opp_data->soc_data = match->data;
+
+	opp_data->cpu_dev = get_cpu_device(0);
+	if (!opp_data->cpu_dev) {
+		pr_err("%s: Failed to get device for CPU0\n", __func__);
+		return -ENODEV;
+	}
+
+	opp_data->opp_node = dev_pm_opp_of_get_opp_desc_node(opp_data->cpu_dev);
+	if (!opp_data->opp_node) {
+		dev_info(opp_data->cpu_dev,
+			 "OPP-v2 not supported, cpufreq-dt will attempt to use legacy tables.\n");
+		goto register_cpufreq_dt;
+	}
+
+	ret = ti_cpufreq_setup_syscon_register(opp_data);
+	if (ret)
+		goto fail_put_node;
+
+	/*
+	 * OPPs determine whether or not they are supported based on
+	 * two metrics:
+	 *	0 - SoC Revision
+	 *	1 - eFuse value
+	 */
+	ret = ti_cpufreq_get_rev(opp_data, &version[0]);
+	if (ret)
+		goto fail_put_node;
+
+	ret = ti_cpufreq_get_efuse(opp_data, &version[1]);
+	if (ret)
+		goto fail_put_node;
+
+	of_node_put(opp_data->opp_node);
+
+	ret = PTR_ERR_OR_ZERO(dev_pm_opp_set_supported_hw(opp_data->cpu_dev,
+							  version, VERSION_COUNT));
+	if (ret) {
+		dev_err(opp_data->cpu_dev,
+			"Failed to set supported hardware\n");
+		goto fail_put_node;
+	}
+
+register_cpufreq_dt:
+	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
+
+	return 0;
+
+fail_put_node:
+	of_node_put(opp_data->opp_node);
+
+	return ret;
+}
+device_initcall(ti_cpufreq_init);

+ 2 - 5
include/linux/cpufreq.h

@@ -31,7 +31,7 @@
 
 #define CPUFREQ_ETERNAL			(-1)
 #define CPUFREQ_NAME_LEN		16
-/* Print length for names. Extra 1 space for accomodating '\n' in prints */
+/* Print length for names. Extra 1 space for accommodating '\n' in prints */
 #define CPUFREQ_NAME_PLEN		(CPUFREQ_NAME_LEN + 1)
 
 struct cpufreq_governor;
@@ -115,7 +115,7 @@ struct cpufreq_policy {
 	 *   guarantee that frequency can be changed on any CPU sharing the
 	 *   policy and that the change will affect all of the policy CPUs then.
 	 * - fast_switch_enabled is to be set by governors that support fast
-	 *   freqnency switching with the help of cpufreq_enable_fast_switch().
+	 *   frequency switching with the help of cpufreq_enable_fast_switch().
 	 */
 	bool			fast_switch_possible;
 	bool			fast_switch_enabled;
@@ -415,9 +415,6 @@ static inline void cpufreq_resume(void) {}
 /* Policy Notifiers  */
 #define CPUFREQ_ADJUST			(0)
 #define CPUFREQ_NOTIFY			(1)
-#define CPUFREQ_START			(2)
-#define CPUFREQ_CREATE_POLICY		(3)
-#define CPUFREQ_REMOVE_POLICY		(4)
 
 #ifdef CONFIG_CPU_FREQ
 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list);