浏览代码

Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux

Pull thermal management updates from Zhang Rui:

 - Thermal core code reorganization and cleanup. Two new files are
   created for thermal sysfs I/F code and thermal helper functions
   (Eduardo Valentin).

 - Sanitize hotplug and locking for x86_pkg_temp driver (Thomas
   Gleixner)

 - Update MAINTAINER file for pwm-fan driver and Samsung thermal driver
   (Lukasz Majewski)

 - Fix module auto-load for max77620, tango and db8500 thermal driver
   (Javier Martinez Canillas)

 - Fix a bug that thermal hwmon sysfs I/F returns wrong critical trip
   point temperature value (Krzysztof Kozlowski)

 - Add Skylake PCH 100 series support for intel_pch_thermal driver
   (OGAWA Hirofumi)

 - Small fixes and cleanups for platform thermal drivers (Julia Lawall,
   Luis Henriques, Leo Yan, Stephen Boyd, Shawn Lin, Javi Merino and
   Lukasz Luba)

* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux: (76 commits)
  MAINTAINERS: Samsung: Update maintainer for PWM FAN and SAMSUNG THERMAL
  thermal/x86 pkg temp: Convert to hotplug state machine
  thermal/x86_pkg_temp: Sanitize package management
  thermal/x86_pkg_temp: Move work into package struct
  thermal/x86_pkg_temp: Move work scheduled flag into package struct
  thermal/x86_pkg_temp: Sanitize locking
  thermal/x86_pkg_temp: Cleanup code some more
  thermal/x86_pkg_temp: Cleanup namespace
  thermal/x86_pkg_temp: Get rid of ref counting
  thermal/x86_pkg_temp: Sanitize callback (de)initialization
  thermal/x86_pkg_temp: Replace open coded cpu search
  thermal/x86_pkg_temp: Remove redundant package search
  thermal/x86_pkg_temp: Cleanup thermal interrupt handling
  thermal: hwmon: Properly report critical temperature in sysfs
  devfreq_cooling: pass a pointer to devfreq in the power model callbacks
  devfreq_cooling: make the structs devfreq_cooling_xxx visible for all
  dt-bindings: rockchip-thermal: fix the misleading description
  thermal: rockchip: improve the warning log
  thermal: db8500: Fix module autoload
  thermal: tango: Fix module autoload
  ...
Linus Torvalds 8 年之前
父节点
当前提交
9346116d14

+ 3 - 0
Documentation/devicetree/bindings/thermal/rockchip-thermal.txt

@@ -22,10 +22,13 @@ Required properties:
 	      TSADC controller.
 - pinctrl-2 : The "sleep" pinctrl state, it will be in for suspend.
 - #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description.
+
+Optional properties:
 - rockchip,hw-tshut-temp : The hardware-controlled shutdown temperature value.
 - rockchip,hw-tshut-mode : The hardware-controlled shutdown mode 0:CRU 1:GPIO.
 - rockchip,hw-tshut-polarity : The hardware-controlled active polarity 0:LOW
 			       1:HIGH.
+- rockchip,grf : The phandle of the syscon node for the general register file.
 
 Exiample:
 tsadc: tsadc@ff280000 {

+ 2 - 2
MAINTAINERS

@@ -9838,7 +9838,7 @@ F:	drivers/media/usb/pwc/*
 
 PWM FAN DRIVER
 M:	Kamil Debski <kamil@wypas.org>
-M:	Lukasz Majewski <l.majewski@samsung.com>
+M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:	linux-hwmon@vger.kernel.org
 S:	Supported
 F:	Documentation/devicetree/bindings/hwmon/pwm-fan.txt
@@ -10609,7 +10609,7 @@ L:	netdev@vger.kernel.org
 F:	drivers/net/ethernet/samsung/sxgbe/
 
 SAMSUNG THERMAL DRIVER
-M:	Lukasz Majewski <l.majewski@samsung.com>
+M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:	linux-pm@vger.kernel.org
 L:	linux-samsung-soc@vger.kernel.org
 S:	Supported

+ 3 - 1
drivers/thermal/Kconfig

@@ -177,8 +177,10 @@ config THERMAL_EMULATION
 
 config HISI_THERMAL
 	tristate "Hisilicon thermal driver"
-	depends on (ARCH_HISI && CPU_THERMAL && OF) || COMPILE_TEST
+	depends on ARCH_HISI || COMPILE_TEST
 	depends on HAS_IOMEM
+	depends on OF
+	default y
 	help
 	  Enable this to plug hisilicon's thermal sensor driver into the Linux
 	  thermal framework. cpufreq is used as the cooling device to throttle

+ 2 - 1
drivers/thermal/Makefile

@@ -3,7 +3,8 @@
 #
 
 obj-$(CONFIG_THERMAL)		+= thermal_sys.o
-thermal_sys-y			+= thermal_core.o
+thermal_sys-y			+= thermal_core.o thermal_sysfs.o \
+					thermal_helpers.o
 
 # interface to/from other layers providing sensors
 thermal_sys-$(CONFIG_THERMAL_HWMON)		+= thermal_hwmon.o

+ 1 - 0
drivers/thermal/db8500_thermal.c

@@ -512,6 +512,7 @@ static const struct of_device_id db8500_thermal_match[] = {
 	{ .compatible = "stericsson,db8500-thermal" },
 	{},
 };
+MODULE_DEVICE_TABLE(of, db8500_thermal_match);
 #endif
 
 static struct platform_driver db8500_thermal_driver = {

+ 3 - 2
drivers/thermal/devfreq_cooling.c

@@ -238,7 +238,7 @@ get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
 		return 0;
 	}
 
-	return dfc->power_ops->get_static_power(voltage);
+	return dfc->power_ops->get_static_power(df, voltage);
 }
 
 /**
@@ -262,7 +262,8 @@ get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq,
 	struct devfreq_cooling_power *dfc_power = dfc->power_ops;
 
 	if (dfc_power->get_dynamic_power)
-		return dfc_power->get_dynamic_power(freq, voltage);
+		return dfc_power->get_dynamic_power(dfc->devfreq, freq,
+						    voltage);
 
 	freq_mhz = freq / 1000000;
 	power = (u64)dfc_power->dyn_power_coeff * freq_mhz * voltage * voltage;

+ 1 - 1
drivers/thermal/int340x_thermal/int3400_thermal.c

@@ -96,7 +96,7 @@ static ssize_t current_uuid_store(struct device *dev,
 	return -EINVAL;
 }
 
-static DEVICE_ATTR(current_uuid, 0644, current_uuid_show, current_uuid_store);
+static DEVICE_ATTR_RW(current_uuid);
 static DEVICE_ATTR_RO(available_uuids);
 static struct attribute *uuid_attrs[] = {
 	&dev_attr_available_uuids.attr,

+ 39 - 25
drivers/thermal/intel_pch_thermal.c

@@ -29,6 +29,7 @@
 #define PCH_THERMAL_DID_HSW_2	0x8C24 /* Haswell PCH */
 #define PCH_THERMAL_DID_WPT	0x9CA4 /* Wildcat Point */
 #define PCH_THERMAL_DID_SKL	0x9D31 /* Skylake PCH */
+#define PCH_THERMAL_DID_SKL_H	0xA131 /* Skylake PCH 100 series */
 
 /* Wildcat Point-LP  PCH Thermal registers */
 #define WPT_TEMP	0x0000	/* Temperature */
@@ -273,37 +274,44 @@ static struct thermal_zone_device_ops tzd_ops = {
 	.get_trip_temp = pch_get_trip_temp,
 };
 
+enum board_ids {
+	board_hsw,
+	board_wpt,
+	board_skl,
+};
+
+static const struct board_info {
+	const char *name;
+	const struct pch_dev_ops *ops;
+} board_info[] = {
+	[board_hsw] = {
+		.name = "pch_haswell",
+		.ops = &pch_dev_ops_wpt,
+	},
+	[board_wpt] = {
+		.name = "pch_wildcat_point",
+		.ops = &pch_dev_ops_wpt,
+	},
+	[board_skl] = {
+		.name = "pch_skylake",
+		.ops = &pch_dev_ops_wpt,
+	},
+};
 
 static int intel_pch_thermal_probe(struct pci_dev *pdev,
 				   const struct pci_device_id *id)
 {
+	enum board_ids board_id = id->driver_data;
+	const struct board_info *bi = &board_info[board_id];
 	struct pch_thermal_device *ptd;
 	int err;
 	int nr_trips;
-	char *dev_name;
 
 	ptd = devm_kzalloc(&pdev->dev, sizeof(*ptd), GFP_KERNEL);
 	if (!ptd)
 		return -ENOMEM;
 
-	switch (pdev->device) {
-	case PCH_THERMAL_DID_WPT:
-		ptd->ops = &pch_dev_ops_wpt;
-		dev_name = "pch_wildcat_point";
-		break;
-	case PCH_THERMAL_DID_SKL:
-		ptd->ops = &pch_dev_ops_wpt;
-		dev_name = "pch_skylake";
-		break;
-	case PCH_THERMAL_DID_HSW_1:
-	case PCH_THERMAL_DID_HSW_2:
-		ptd->ops = &pch_dev_ops_wpt;
-		dev_name = "pch_haswell";
-		break;
-	default:
-		dev_err(&pdev->dev, "unknown pch thermal device\n");
-		return -ENODEV;
-	}
+	ptd->ops = bi->ops;
 
 	pci_set_drvdata(pdev, ptd);
 	ptd->pdev = pdev;
@@ -331,11 +339,11 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev,
 	if (err)
 		goto error_cleanup;
 
-	ptd->tzd = thermal_zone_device_register(dev_name, nr_trips, 0, ptd,
+	ptd->tzd = thermal_zone_device_register(bi->name, nr_trips, 0, ptd,
 						&tzd_ops, NULL, 0, 0);
 	if (IS_ERR(ptd->tzd)) {
 		dev_err(&pdev->dev, "Failed to register thermal zone %s\n",
-			dev_name);
+			bi->name);
 		err = PTR_ERR(ptd->tzd);
 		goto error_cleanup;
 	}
@@ -380,10 +388,16 @@ static int intel_pch_thermal_resume(struct device *device)
 }
 
 static struct pci_device_id intel_pch_thermal_id[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_1) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_2) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_1),
+		.driver_data = board_hsw, },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_2),
+		.driver_data = board_hsw, },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT),
+		.driver_data = board_wpt, },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL),
+		.driver_data = board_skl, },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL_H),
+		.driver_data = board_skl, },
 	{ 0, },
 };
 MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);

+ 1 - 0
drivers/thermal/max77620_thermal.c

@@ -149,6 +149,7 @@ static struct platform_device_id max77620_thermal_devtype[] = {
 	{ .name = "max77620-thermal", },
 	{},
 };
+MODULE_DEVICE_TABLE(platform, max77620_thermal_devtype);
 
 static struct platform_driver max77620_thermal_driver = {
 	.driver = {

+ 3 - 3
drivers/thermal/qcom-spmi-temp-alarm.c

@@ -200,7 +200,7 @@ static int qpnp_tm_probe(struct platform_device *pdev)
 	struct qpnp_tm_chip *chip;
 	struct device_node *node;
 	u8 type, subtype;
-	u32 res[2];
+	u32 res;
 	int ret, irq;
 
 	node = pdev->dev.of_node;
@@ -215,7 +215,7 @@ static int qpnp_tm_probe(struct platform_device *pdev)
 	if (!chip->map)
 		return -ENXIO;
 
-	ret = of_property_read_u32_array(node, "reg", res, 2);
+	ret = of_property_read_u32(node, "reg", &res);
 	if (ret < 0)
 		return ret;
 
@@ -228,7 +228,7 @@ static int qpnp_tm_probe(struct platform_device *pdev)
 	if (PTR_ERR(chip->adc) == -EPROBE_DEFER)
 		return PTR_ERR(chip->adc);
 
-	chip->base = res[0];
+	chip->base = res;
 
 	ret = qpnp_tm_read(chip, QPNP_TM_REG_TYPE, &type);
 	if (ret < 0) {

+ 2 - 5
drivers/thermal/rockchip_thermal.c

@@ -524,11 +524,6 @@ static void rk_tsadcv2_initialize(struct regmap *grf, void __iomem *regs,
 		       regs + TSADCV2_AUTO_PERIOD_HT);
 	writel_relaxed(TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT,
 		       regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE);
-
-	if (IS_ERR(grf)) {
-		pr_warn("%s: Missing rockchip,grf property\n", __func__);
-		return;
-	}
 }
 
 /**
@@ -971,6 +966,8 @@ static int rockchip_configure_from_dt(struct device *dev,
 	 * need this property.
 	 */
 	thermal->grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
+	if (IS_ERR(thermal->grf))
+		dev_warn(dev, "Missing rockchip,grf property\n");
 
 	return 0;
 }

+ 1 - 0
drivers/thermal/tango_thermal.c

@@ -107,6 +107,7 @@ static const struct of_device_id tango_sensor_ids[] = {
 	},
 	{ /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, tango_sensor_ids);
 
 static struct platform_driver tango_thermal_driver = {
 	.probe	= tango_thermal_probe,

+ 307 - 1139
drivers/thermal/thermal_core.c

@@ -5,22 +5,9 @@
  *  Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com>
  *  Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com>
  *
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -64,6 +51,13 @@ static atomic_t in_suspend;
 
 static struct thermal_governor *def_governor;
 
+/*
+ * Governor section: set of functions to handle thermal governors
+ *
+ * Functions to help in the life cycle of thermal governors within
+ * the thermal core and by the thermal governor code.
+ */
+
 static struct thermal_governor *__find_governor(const char *name)
 {
 	struct thermal_governor *pos;
@@ -142,11 +136,16 @@ int thermal_register_governor(struct thermal_governor *governor)
 	mutex_lock(&thermal_governor_lock);
 
 	err = -EBUSY;
-	if (__find_governor(governor->name) == NULL) {
+	if (!__find_governor(governor->name)) {
+		bool match_default;
+
 		err = 0;
 		list_add(&governor->governor_list, &thermal_governor_list);
-		if (!def_governor && !strncmp(governor->name,
-			DEFAULT_THERMAL_GOVERNOR, THERMAL_NAME_LENGTH))
+		match_default = !strncmp(governor->name,
+					 DEFAULT_THERMAL_GOVERNOR,
+					 THERMAL_NAME_LENGTH);
+
+		if (!def_governor && match_default)
 			def_governor = governor;
 	}
 
@@ -188,14 +187,14 @@ void thermal_unregister_governor(struct thermal_governor *governor)
 
 	mutex_lock(&thermal_governor_lock);
 
-	if (__find_governor(governor->name) == NULL)
+	if (!__find_governor(governor->name))
 		goto exit;
 
 	mutex_lock(&thermal_list_lock);
 
 	list_for_each_entry(pos, &thermal_tz_list, node) {
 		if (!strncasecmp(pos->governor->name, governor->name,
-						THERMAL_NAME_LENGTH))
+				 THERMAL_NAME_LENGTH))
 			thermal_set_governor(pos, NULL);
 	}
 
@@ -203,195 +202,92 @@ void thermal_unregister_governor(struct thermal_governor *governor)
 	list_del(&governor->governor_list);
 exit:
 	mutex_unlock(&thermal_governor_lock);
-	return;
-}
-
-static int get_idr(struct idr *idr, struct mutex *lock, int *id)
-{
-	int ret;
-
-	if (lock)
-		mutex_lock(lock);
-	ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
-	if (lock)
-		mutex_unlock(lock);
-	if (unlikely(ret < 0))
-		return ret;
-	*id = ret;
-	return 0;
-}
-
-static void release_idr(struct idr *idr, struct mutex *lock, int id)
-{
-	if (lock)
-		mutex_lock(lock);
-	idr_remove(idr, id);
-	if (lock)
-		mutex_unlock(lock);
-}
-
-int get_tz_trend(struct thermal_zone_device *tz, int trip)
-{
-	enum thermal_trend trend;
-
-	if (tz->emul_temperature || !tz->ops->get_trend ||
-	    tz->ops->get_trend(tz, trip, &trend)) {
-		if (tz->temperature > tz->last_temperature)
-			trend = THERMAL_TREND_RAISING;
-		else if (tz->temperature < tz->last_temperature)
-			trend = THERMAL_TREND_DROPPING;
-		else
-			trend = THERMAL_TREND_STABLE;
-	}
-
-	return trend;
 }
-EXPORT_SYMBOL(get_tz_trend);
 
-struct thermal_instance *get_thermal_instance(struct thermal_zone_device *tz,
-			struct thermal_cooling_device *cdev, int trip)
+int thermal_zone_device_set_policy(struct thermal_zone_device *tz,
+				   char *policy)
 {
-	struct thermal_instance *pos = NULL;
-	struct thermal_instance *target_instance = NULL;
+	struct thermal_governor *gov;
+	int ret = -EINVAL;
 
+	mutex_lock(&thermal_governor_lock);
 	mutex_lock(&tz->lock);
-	mutex_lock(&cdev->lock);
 
-	list_for_each_entry(pos, &tz->thermal_instances, tz_node) {
-		if (pos->tz == tz && pos->trip == trip && pos->cdev == cdev) {
-			target_instance = pos;
-			break;
-		}
-	}
+	gov = __find_governor(strim(policy));
+	if (!gov)
+		goto exit;
 
-	mutex_unlock(&cdev->lock);
-	mutex_unlock(&tz->lock);
+	ret = thermal_set_governor(tz, gov);
 
-	return target_instance;
-}
-EXPORT_SYMBOL(get_thermal_instance);
+exit:
+	mutex_unlock(&tz->lock);
+	mutex_unlock(&thermal_governor_lock);
 
-static void print_bind_err_msg(struct thermal_zone_device *tz,
-			struct thermal_cooling_device *cdev, int ret)
-{
-	dev_err(&tz->device, "binding zone %s with cdev %s failed:%d\n",
-				tz->type, cdev->type, ret);
+	return ret;
 }
 
-static void __bind(struct thermal_zone_device *tz, int mask,
-			struct thermal_cooling_device *cdev,
-			unsigned long *limits,
-			unsigned int weight)
+int thermal_build_list_of_policies(char *buf)
 {
-	int i, ret;
+	struct thermal_governor *pos;
+	ssize_t count = 0;
+	ssize_t size = PAGE_SIZE;
 
-	for (i = 0; i < tz->trips; i++) {
-		if (mask & (1 << i)) {
-			unsigned long upper, lower;
+	mutex_lock(&thermal_governor_lock);
 
-			upper = THERMAL_NO_LIMIT;
-			lower = THERMAL_NO_LIMIT;
-			if (limits) {
-				lower = limits[i * 2];
-				upper = limits[i * 2 + 1];
-			}
-			ret = thermal_zone_bind_cooling_device(tz, i, cdev,
-							       upper, lower,
-							       weight);
-			if (ret)
-				print_bind_err_msg(tz, cdev, ret);
-		}
+	list_for_each_entry(pos, &thermal_governor_list, governor_list) {
+		size = PAGE_SIZE - count;
+		count += scnprintf(buf + count, size, "%s ", pos->name);
 	}
-}
+	count += scnprintf(buf + count, size, "\n");
 
-static void __unbind(struct thermal_zone_device *tz, int mask,
-			struct thermal_cooling_device *cdev)
-{
-	int i;
+	mutex_unlock(&thermal_governor_lock);
 
-	for (i = 0; i < tz->trips; i++)
-		if (mask & (1 << i))
-			thermal_zone_unbind_cooling_device(tz, i, cdev);
+	return count;
 }
 
-static void bind_cdev(struct thermal_cooling_device *cdev)
+static int __init thermal_register_governors(void)
 {
-	int i, ret;
-	const struct thermal_zone_params *tzp;
-	struct thermal_zone_device *pos = NULL;
-
-	mutex_lock(&thermal_list_lock);
+	int result;
 
-	list_for_each_entry(pos, &thermal_tz_list, node) {
-		if (!pos->tzp && !pos->ops->bind)
-			continue;
+	result = thermal_gov_step_wise_register();
+	if (result)
+		return result;
 
-		if (pos->ops->bind) {
-			ret = pos->ops->bind(pos, cdev);
-			if (ret)
-				print_bind_err_msg(pos, cdev, ret);
-			continue;
-		}
+	result = thermal_gov_fair_share_register();
+	if (result)
+		return result;
 
-		tzp = pos->tzp;
-		if (!tzp || !tzp->tbp)
-			continue;
+	result = thermal_gov_bang_bang_register();
+	if (result)
+		return result;
 
-		for (i = 0; i < tzp->num_tbps; i++) {
-			if (tzp->tbp[i].cdev || !tzp->tbp[i].match)
-				continue;
-			if (tzp->tbp[i].match(pos, cdev))
-				continue;
-			tzp->tbp[i].cdev = cdev;
-			__bind(pos, tzp->tbp[i].trip_mask, cdev,
-			       tzp->tbp[i].binding_limits,
-			       tzp->tbp[i].weight);
-		}
-	}
+	result = thermal_gov_user_space_register();
+	if (result)
+		return result;
 
-	mutex_unlock(&thermal_list_lock);
+	return thermal_gov_power_allocator_register();
 }
 
-static void bind_tz(struct thermal_zone_device *tz)
+static void thermal_unregister_governors(void)
 {
-	int i, ret;
-	struct thermal_cooling_device *pos = NULL;
-	const struct thermal_zone_params *tzp = tz->tzp;
-
-	if (!tzp && !tz->ops->bind)
-		return;
-
-	mutex_lock(&thermal_list_lock);
-
-	/* If there is ops->bind, try to use ops->bind */
-	if (tz->ops->bind) {
-		list_for_each_entry(pos, &thermal_cdev_list, node) {
-			ret = tz->ops->bind(tz, pos);
-			if (ret)
-				print_bind_err_msg(tz, pos, ret);
-		}
-		goto exit;
-	}
-
-	if (!tzp || !tzp->tbp)
-		goto exit;
-
-	list_for_each_entry(pos, &thermal_cdev_list, node) {
-		for (i = 0; i < tzp->num_tbps; i++) {
-			if (tzp->tbp[i].cdev || !tzp->tbp[i].match)
-				continue;
-			if (tzp->tbp[i].match(tz, pos))
-				continue;
-			tzp->tbp[i].cdev = pos;
-			__bind(tz, tzp->tbp[i].trip_mask, pos,
-			       tzp->tbp[i].binding_limits,
-			       tzp->tbp[i].weight);
-		}
-	}
-exit:
-	mutex_unlock(&thermal_list_lock);
+	thermal_gov_step_wise_unregister();
+	thermal_gov_fair_share_unregister();
+	thermal_gov_bang_bang_unregister();
+	thermal_gov_user_space_unregister();
+	thermal_gov_power_allocator_unregister();
 }
 
+/*
+ * Zone update section: main control loop applied to each zone while monitoring
+ *
+ * in polling mode. The monitoring is done using a workqueue.
+ * Same update may be done on a zone by calling thermal_zone_device_update().
+ *
+ * An update means:
+ * - Non-critical trips will invoke the governor responsible for that zone;
+ * - Hot trips will produce a notification to userspace;
+ * - Critical trip point will cause a system shutdown.
+ */
 static void thermal_zone_device_set_polling(struct thermal_zone_device *tz,
 					    int delay)
 {
@@ -420,14 +316,15 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz)
 }
 
 static void handle_non_critical_trips(struct thermal_zone_device *tz,
-			int trip, enum thermal_trip_type trip_type)
+				      int trip,
+				      enum thermal_trip_type trip_type)
 {
 	tz->governor ? tz->governor->throttle(tz, trip) :
 		       def_governor->throttle(tz, trip);
 }
 
 static void handle_critical_trips(struct thermal_zone_device *tz,
-				int trip, enum thermal_trip_type trip_type)
+				  int trip, enum thermal_trip_type trip_type)
 {
 	int trip_temp;
 
@@ -471,105 +368,6 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
 	monitor_thermal_zone(tz);
 }
 
-/**
- * thermal_zone_get_temp() - returns the temperature of a thermal zone
- * @tz: a valid pointer to a struct thermal_zone_device
- * @temp: a valid pointer to where to store the resulting temperature.
- *
- * When a valid thermal zone reference is passed, it will fetch its
- * temperature and fill @temp.
- *
- * Return: On success returns 0, an error code otherwise
- */
-int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp)
-{
-	int ret = -EINVAL;
-	int count;
-	int crit_temp = INT_MAX;
-	enum thermal_trip_type type;
-
-	if (!tz || IS_ERR(tz) || !tz->ops->get_temp)
-		goto exit;
-
-	mutex_lock(&tz->lock);
-
-	ret = tz->ops->get_temp(tz, temp);
-
-	if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) {
-		for (count = 0; count < tz->trips; count++) {
-			ret = tz->ops->get_trip_type(tz, count, &type);
-			if (!ret && type == THERMAL_TRIP_CRITICAL) {
-				ret = tz->ops->get_trip_temp(tz, count,
-						&crit_temp);
-				break;
-			}
-		}
-
-		/*
-		 * Only allow emulating a temperature when the real temperature
-		 * is below the critical temperature so that the emulation code
-		 * cannot hide critical conditions.
-		 */
-		if (!ret && *temp < crit_temp)
-			*temp = tz->emul_temperature;
-	}
- 
-	mutex_unlock(&tz->lock);
-exit:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(thermal_zone_get_temp);
-
-void thermal_zone_set_trips(struct thermal_zone_device *tz)
-{
-	int low = -INT_MAX;
-	int high = INT_MAX;
-	int trip_temp, hysteresis;
-	int i, ret;
-
-	mutex_lock(&tz->lock);
-
-	if (!tz->ops->set_trips || !tz->ops->get_trip_hyst)
-		goto exit;
-
-	for (i = 0; i < tz->trips; i++) {
-		int trip_low;
-
-		tz->ops->get_trip_temp(tz, i, &trip_temp);
-		tz->ops->get_trip_hyst(tz, i, &hysteresis);
-
-		trip_low = trip_temp - hysteresis;
-
-		if (trip_low < tz->temperature && trip_low > low)
-			low = trip_low;
-
-		if (trip_temp > tz->temperature && trip_temp < high)
-			high = trip_temp;
-	}
-
-	/* No need to change trip points */
-	if (tz->prev_low_trip == low && tz->prev_high_trip == high)
-		goto exit;
-
-	tz->prev_low_trip = low;
-	tz->prev_high_trip = high;
-
-	dev_dbg(&tz->device,
-		"new temperature boundaries: %d < x < %d\n", low, high);
-
-	/*
-	 * Set a temperature window. When this window is left the driver
-	 * must inform the thermal core via thermal_zone_device_update.
-	 */
-	ret = tz->ops->set_trips(tz, low, high);
-	if (ret)
-		dev_err(&tz->device, "Failed to set trips: %d\n", ret);
-
-exit:
-	mutex_unlock(&tz->lock);
-}
-EXPORT_SYMBOL_GPL(thermal_zone_set_trips);
-
 static void update_temperature(struct thermal_zone_device *tz)
 {
 	int temp, ret;
@@ -629,6 +427,24 @@ void thermal_zone_device_update(struct thermal_zone_device *tz,
 }
 EXPORT_SYMBOL_GPL(thermal_zone_device_update);
 
+/**
+ * thermal_notify_framework - Sensor drivers use this API to notify framework
+ * @tz:		thermal zone device
+ * @trip:	indicates which trip point has been crossed
+ *
+ * This function handles the trip events from sensor drivers. It starts
+ * throttling the cooling devices according to the policy configured.
+ * For CRITICAL and HOT trip points, this notifies the respective drivers,
+ * and does actual throttling for other trip points i.e ACTIVE and PASSIVE.
+ * The throttling policy is based on the configured platform data; if no
+ * platform data is provided, this uses the step_wise throttling policy.
+ */
+void thermal_notify_framework(struct thermal_zone_device *tz, int trip)
+{
+	handle_thermal_trip(tz, trip);
+}
+EXPORT_SYMBOL_GPL(thermal_notify_framework);
+
 static void thermal_zone_device_check(struct work_struct *work)
 {
 	struct thermal_zone_device *tz = container_of(work, struct
@@ -637,445 +453,12 @@ static void thermal_zone_device_check(struct work_struct *work)
 	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
 }
 
-/* sys I/F for thermal zone */
-
-#define to_thermal_zone(_dev) \
-	container_of(_dev, struct thermal_zone_device, device)
-
-static ssize_t
-type_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-
-	return sprintf(buf, "%s\n", tz->type);
-}
-
-static ssize_t
-temp_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	int temperature, ret;
-
-	ret = thermal_zone_get_temp(tz, &temperature);
-
-	if (ret)
-		return ret;
-
-	return sprintf(buf, "%d\n", temperature);
-}
-
-static ssize_t
-mode_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	enum thermal_device_mode mode;
-	int result;
-
-	if (!tz->ops->get_mode)
-		return -EPERM;
-
-	result = tz->ops->get_mode(tz, &mode);
-	if (result)
-		return result;
-
-	return sprintf(buf, "%s\n", mode == THERMAL_DEVICE_ENABLED ? "enabled"
-		       : "disabled");
-}
-
-static ssize_t
-mode_store(struct device *dev, struct device_attribute *attr,
-	   const char *buf, size_t count)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	int result;
-
-	if (!tz->ops->set_mode)
-		return -EPERM;
-
-	if (!strncmp(buf, "enabled", sizeof("enabled") - 1))
-		result = tz->ops->set_mode(tz, THERMAL_DEVICE_ENABLED);
-	else if (!strncmp(buf, "disabled", sizeof("disabled") - 1))
-		result = tz->ops->set_mode(tz, THERMAL_DEVICE_DISABLED);
-	else
-		result = -EINVAL;
-
-	if (result)
-		return result;
-
-	return count;
-}
-
-static ssize_t
-trip_point_type_show(struct device *dev, struct device_attribute *attr,
-		     char *buf)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	enum thermal_trip_type type;
-	int trip, result;
-
-	if (!tz->ops->get_trip_type)
-		return -EPERM;
-
-	if (!sscanf(attr->attr.name, "trip_point_%d_type", &trip))
-		return -EINVAL;
-
-	result = tz->ops->get_trip_type(tz, trip, &type);
-	if (result)
-		return result;
-
-	switch (type) {
-	case THERMAL_TRIP_CRITICAL:
-		return sprintf(buf, "critical\n");
-	case THERMAL_TRIP_HOT:
-		return sprintf(buf, "hot\n");
-	case THERMAL_TRIP_PASSIVE:
-		return sprintf(buf, "passive\n");
-	case THERMAL_TRIP_ACTIVE:
-		return sprintf(buf, "active\n");
-	default:
-		return sprintf(buf, "unknown\n");
-	}
-}
-
-static ssize_t
-trip_point_temp_store(struct device *dev, struct device_attribute *attr,
-		     const char *buf, size_t count)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	int trip, ret;
-	int temperature;
-
-	if (!tz->ops->set_trip_temp)
-		return -EPERM;
-
-	if (!sscanf(attr->attr.name, "trip_point_%d_temp", &trip))
-		return -EINVAL;
-
-	if (kstrtoint(buf, 10, &temperature))
-		return -EINVAL;
-
-	ret = tz->ops->set_trip_temp(tz, trip, temperature);
-	if (ret)
-		return ret;
-
-	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
-
-	return count;
-}
-
-static ssize_t
-trip_point_temp_show(struct device *dev, struct device_attribute *attr,
-		     char *buf)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	int trip, ret;
-	int temperature;
-
-	if (!tz->ops->get_trip_temp)
-		return -EPERM;
-
-	if (!sscanf(attr->attr.name, "trip_point_%d_temp", &trip))
-		return -EINVAL;
-
-	ret = tz->ops->get_trip_temp(tz, trip, &temperature);
-
-	if (ret)
-		return ret;
-
-	return sprintf(buf, "%d\n", temperature);
-}
-
-static ssize_t
-trip_point_hyst_store(struct device *dev, struct device_attribute *attr,
-			const char *buf, size_t count)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	int trip, ret;
-	int temperature;
-
-	if (!tz->ops->set_trip_hyst)
-		return -EPERM;
-
-	if (!sscanf(attr->attr.name, "trip_point_%d_hyst", &trip))
-		return -EINVAL;
-
-	if (kstrtoint(buf, 10, &temperature))
-		return -EINVAL;
-
-	/*
-	 * We are not doing any check on the 'temperature' value
-	 * here. The driver implementing 'set_trip_hyst' has to
-	 * take care of this.
-	 */
-	ret = tz->ops->set_trip_hyst(tz, trip, temperature);
-
-	if (!ret)
-		thermal_zone_set_trips(tz);
-
-	return ret ? ret : count;
-}
-
-static ssize_t
-trip_point_hyst_show(struct device *dev, struct device_attribute *attr,
-			char *buf)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	int trip, ret;
-	int temperature;
-
-	if (!tz->ops->get_trip_hyst)
-		return -EPERM;
-
-	if (!sscanf(attr->attr.name, "trip_point_%d_hyst", &trip))
-		return -EINVAL;
-
-	ret = tz->ops->get_trip_hyst(tz, trip, &temperature);
-
-	return ret ? ret : sprintf(buf, "%d\n", temperature);
-}
-
-static ssize_t
-passive_store(struct device *dev, struct device_attribute *attr,
-		    const char *buf, size_t count)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	struct thermal_cooling_device *cdev = NULL;
-	int state;
-
-	if (!sscanf(buf, "%d\n", &state))
-		return -EINVAL;
-
-	/* sanity check: values below 1000 millicelcius don't make sense
-	 * and can cause the system to go into a thermal heart attack
-	 */
-	if (state && state < 1000)
-		return -EINVAL;
-
-	if (state && !tz->forced_passive) {
-		mutex_lock(&thermal_list_lock);
-		list_for_each_entry(cdev, &thermal_cdev_list, node) {
-			if (!strncmp("Processor", cdev->type,
-				     sizeof("Processor")))
-				thermal_zone_bind_cooling_device(tz,
-						THERMAL_TRIPS_NONE, cdev,
-						THERMAL_NO_LIMIT,
-						THERMAL_NO_LIMIT,
-						THERMAL_WEIGHT_DEFAULT);
-		}
-		mutex_unlock(&thermal_list_lock);
-		if (!tz->passive_delay)
-			tz->passive_delay = 1000;
-	} else if (!state && tz->forced_passive) {
-		mutex_lock(&thermal_list_lock);
-		list_for_each_entry(cdev, &thermal_cdev_list, node) {
-			if (!strncmp("Processor", cdev->type,
-				     sizeof("Processor")))
-				thermal_zone_unbind_cooling_device(tz,
-								   THERMAL_TRIPS_NONE,
-								   cdev);
-		}
-		mutex_unlock(&thermal_list_lock);
-		tz->passive_delay = 0;
-	}
-
-	tz->forced_passive = state;
-
-	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
-
-	return count;
-}
-
-static ssize_t
-passive_show(struct device *dev, struct device_attribute *attr,
-		   char *buf)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-
-	return sprintf(buf, "%d\n", tz->forced_passive);
-}
-
-static ssize_t
-policy_store(struct device *dev, struct device_attribute *attr,
-		    const char *buf, size_t count)
-{
-	int ret = -EINVAL;
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	struct thermal_governor *gov;
-	char name[THERMAL_NAME_LENGTH];
-
-	snprintf(name, sizeof(name), "%s", buf);
-
-	mutex_lock(&thermal_governor_lock);
-	mutex_lock(&tz->lock);
-
-	gov = __find_governor(strim(name));
-	if (!gov)
-		goto exit;
-
-	ret = thermal_set_governor(tz, gov);
-	if (!ret)
-		ret = count;
-
-exit:
-	mutex_unlock(&tz->lock);
-	mutex_unlock(&thermal_governor_lock);
-	return ret;
-}
-
-static ssize_t
-policy_show(struct device *dev, struct device_attribute *devattr, char *buf)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-
-	return sprintf(buf, "%s\n", tz->governor->name);
-}
-
-static ssize_t
-available_policies_show(struct device *dev, struct device_attribute *devattr,
-			char *buf)
-{
-	struct thermal_governor *pos;
-	ssize_t count = 0;
-	ssize_t size = PAGE_SIZE;
-
-	mutex_lock(&thermal_governor_lock);
-
-	list_for_each_entry(pos, &thermal_governor_list, governor_list) {
-		size = PAGE_SIZE - count;
-		count += scnprintf(buf + count, size, "%s ", pos->name);
-	}
-	count += scnprintf(buf + count, size, "\n");
-
-	mutex_unlock(&thermal_governor_lock);
-
-	return count;
-}
-
-static ssize_t
-emul_temp_store(struct device *dev, struct device_attribute *attr,
-		     const char *buf, size_t count)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	int ret = 0;
-	int temperature;
-
-	if (kstrtoint(buf, 10, &temperature))
-		return -EINVAL;
-
-	if (!tz->ops->set_emul_temp) {
-		mutex_lock(&tz->lock);
-		tz->emul_temperature = temperature;
-		mutex_unlock(&tz->lock);
-	} else {
-		ret = tz->ops->set_emul_temp(tz, temperature);
-	}
-
-	if (!ret)
-		thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
-
-	return ret ? ret : count;
-}
-static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store);
-
-static ssize_t
-sustainable_power_show(struct device *dev, struct device_attribute *devattr,
-		       char *buf)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-
-	if (tz->tzp)
-		return sprintf(buf, "%u\n", tz->tzp->sustainable_power);
-	else
-		return -EIO;
-}
-
-static ssize_t
-sustainable_power_store(struct device *dev, struct device_attribute *devattr,
-			const char *buf, size_t count)
-{
-	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	u32 sustainable_power;
-
-	if (!tz->tzp)
-		return -EIO;
-
-	if (kstrtou32(buf, 10, &sustainable_power))
-		return -EINVAL;
-
-	tz->tzp->sustainable_power = sustainable_power;
-
-	return count;
-}
-static DEVICE_ATTR(sustainable_power, S_IWUSR | S_IRUGO, sustainable_power_show,
-		sustainable_power_store);
-
-#define create_s32_tzp_attr(name)					\
-	static ssize_t							\
-	name##_show(struct device *dev, struct device_attribute *devattr, \
-		char *buf)						\
-	{								\
-	struct thermal_zone_device *tz = to_thermal_zone(dev);		\
-									\
-	if (tz->tzp)							\
-		return sprintf(buf, "%d\n", tz->tzp->name);		\
-	else								\
-		return -EIO;						\
-	}								\
-									\
-	static ssize_t							\
-	name##_store(struct device *dev, struct device_attribute *devattr, \
-		const char *buf, size_t count)				\
-	{								\
-		struct thermal_zone_device *tz = to_thermal_zone(dev);	\
-		s32 value;						\
-									\
-		if (!tz->tzp)						\
-			return -EIO;					\
-									\
-		if (kstrtos32(buf, 10, &value))				\
-			return -EINVAL;					\
-									\
-		tz->tzp->name = value;					\
-									\
-		return count;						\
-	}								\
-	static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, name##_show, name##_store)
-
-create_s32_tzp_attr(k_po);
-create_s32_tzp_attr(k_pu);
-create_s32_tzp_attr(k_i);
-create_s32_tzp_attr(k_d);
-create_s32_tzp_attr(integral_cutoff);
-create_s32_tzp_attr(slope);
-create_s32_tzp_attr(offset);
-#undef create_s32_tzp_attr
-
-static struct device_attribute *dev_tzp_attrs[] = {
-	&dev_attr_sustainable_power,
-	&dev_attr_k_po,
-	&dev_attr_k_pu,
-	&dev_attr_k_i,
-	&dev_attr_k_d,
-	&dev_attr_integral_cutoff,
-	&dev_attr_slope,
-	&dev_attr_offset,
-};
-
-static int create_tzp_attrs(struct device *dev)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(dev_tzp_attrs); i++) {
-		int ret;
-		struct device_attribute *dev_attr = dev_tzp_attrs[i];
-
-		ret = device_create_file(dev, dev_attr);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
+/*
+ * Power actor section: interface to power actors to estimate power
+ *
+ * Set of functions used to interact to cooling devices that know
+ * how to estimate their devices power consumption.
+ */
 
 /**
  * power_actor_get_max_power() - get the maximum power that a cdev can consume
@@ -1127,12 +510,13 @@ int power_actor_get_min_power(struct thermal_cooling_device *cdev,
 }
 
 /**
- * power_actor_set_power() - limit the maximum power that a cooling device can consume
+ * power_actor_set_power() - limit the maximum power a cooling device consumes
  * @cdev:	pointer to &thermal_cooling_device
  * @instance:	thermal instance to update
  * @power:	the power in milliwatts
  *
- * Set the cooling device to consume at most @power milliwatts.
+ * Set the cooling device to consume at most @power milliwatts. The limit is
+ * expected to be a cap at the maximum power consumption.
  *
  * Return: 0 on success, -EINVAL if the cooling device does not
  * implement the power actor API or -E* for other failures.
@@ -1159,143 +543,75 @@ int power_actor_set_power(struct thermal_cooling_device *cdev,
 	return 0;
 }
 
-static DEVICE_ATTR(type, 0444, type_show, NULL);
-static DEVICE_ATTR(temp, 0444, temp_show, NULL);
-static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
-static DEVICE_ATTR(passive, S_IRUGO | S_IWUSR, passive_show, passive_store);
-static DEVICE_ATTR(policy, S_IRUGO | S_IWUSR, policy_show, policy_store);
-static DEVICE_ATTR(available_policies, S_IRUGO, available_policies_show, NULL);
-
-/* sys I/F for cooling device */
-#define to_cooling_device(_dev)	\
-	container_of(_dev, struct thermal_cooling_device, device)
-
-static ssize_t
-thermal_cooling_device_type_show(struct device *dev,
-				 struct device_attribute *attr, char *buf)
+void thermal_zone_device_rebind_exception(struct thermal_zone_device *tz,
+					  const char *cdev_type, size_t size)
 {
-	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+	struct thermal_cooling_device *cdev = NULL;
 
-	return sprintf(buf, "%s\n", cdev->type);
+	mutex_lock(&thermal_list_lock);
+	list_for_each_entry(cdev, &thermal_cdev_list, node) {
+		/* skip non matching cdevs */
+		if (strncmp(cdev_type, cdev->type, size))
+			continue;
+
+		/* re binding the exception matching the type pattern */
+		thermal_zone_bind_cooling_device(tz, THERMAL_TRIPS_NONE, cdev,
+						 THERMAL_NO_LIMIT,
+						 THERMAL_NO_LIMIT,
+						 THERMAL_WEIGHT_DEFAULT);
+	}
+	mutex_unlock(&thermal_list_lock);
 }
 
-static ssize_t
-thermal_cooling_device_max_state_show(struct device *dev,
-				      struct device_attribute *attr, char *buf)
+void thermal_zone_device_unbind_exception(struct thermal_zone_device *tz,
+					  const char *cdev_type, size_t size)
 {
-	struct thermal_cooling_device *cdev = to_cooling_device(dev);
-	unsigned long state;
-	int ret;
+	struct thermal_cooling_device *cdev = NULL;
 
-	ret = cdev->ops->get_max_state(cdev, &state);
-	if (ret)
-		return ret;
-	return sprintf(buf, "%ld\n", state);
+	mutex_lock(&thermal_list_lock);
+	list_for_each_entry(cdev, &thermal_cdev_list, node) {
+		/* skip non matching cdevs */
+		if (strncmp(cdev_type, cdev->type, size))
+			continue;
+		/* unbinding the exception matching the type pattern */
+		thermal_zone_unbind_cooling_device(tz, THERMAL_TRIPS_NONE,
+						   cdev);
+	}
+	mutex_unlock(&thermal_list_lock);
 }
 
-static ssize_t
-thermal_cooling_device_cur_state_show(struct device *dev,
-				      struct device_attribute *attr, char *buf)
+/*
+ * Device management section: cooling devices, zones devices, and binding
+ *
+ * Set of functions provided by the thermal core for:
+ * - cooling devices lifecycle: registration, unregistration,
+ *				binding, and unbinding.
+ * - thermal zone devices lifecycle: registration, unregistration,
+ *				     binding, and unbinding.
+ */
+static int get_idr(struct idr *idr, struct mutex *lock, int *id)
 {
-	struct thermal_cooling_device *cdev = to_cooling_device(dev);
-	unsigned long state;
 	int ret;
 
-	ret = cdev->ops->get_cur_state(cdev, &state);
-	if (ret)
+	if (lock)
+		mutex_lock(lock);
+	ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
+	if (lock)
+		mutex_unlock(lock);
+	if (unlikely(ret < 0))
 		return ret;
-	return sprintf(buf, "%ld\n", state);
-}
-
-static ssize_t
-thermal_cooling_device_cur_state_store(struct device *dev,
-				       struct device_attribute *attr,
-				       const char *buf, size_t count)
-{
-	struct thermal_cooling_device *cdev = to_cooling_device(dev);
-	unsigned long state;
-	int result;
-
-	if (!sscanf(buf, "%ld\n", &state))
-		return -EINVAL;
-
-	if ((long)state < 0)
-		return -EINVAL;
-
-	result = cdev->ops->set_cur_state(cdev, state);
-	if (result)
-		return result;
-	return count;
-}
-
-static struct device_attribute dev_attr_cdev_type =
-__ATTR(type, 0444, thermal_cooling_device_type_show, NULL);
-static DEVICE_ATTR(max_state, 0444,
-		   thermal_cooling_device_max_state_show, NULL);
-static DEVICE_ATTR(cur_state, 0644,
-		   thermal_cooling_device_cur_state_show,
-		   thermal_cooling_device_cur_state_store);
-
-static ssize_t
-thermal_cooling_device_trip_point_show(struct device *dev,
-				       struct device_attribute *attr, char *buf)
-{
-	struct thermal_instance *instance;
-
-	instance =
-	    container_of(attr, struct thermal_instance, attr);
-
-	if (instance->trip == THERMAL_TRIPS_NONE)
-		return sprintf(buf, "-1\n");
-	else
-		return sprintf(buf, "%d\n", instance->trip);
-}
-
-static struct attribute *cooling_device_attrs[] = {
-	&dev_attr_cdev_type.attr,
-	&dev_attr_max_state.attr,
-	&dev_attr_cur_state.attr,
-	NULL,
-};
-
-static const struct attribute_group cooling_device_attr_group = {
-	.attrs = cooling_device_attrs,
-};
-
-static const struct attribute_group *cooling_device_attr_groups[] = {
-	&cooling_device_attr_group,
-	NULL,
-};
-
-static ssize_t
-thermal_cooling_device_weight_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	struct thermal_instance *instance;
-
-	instance = container_of(attr, struct thermal_instance, weight_attr);
-
-	return sprintf(buf, "%d\n", instance->weight);
+	*id = ret;
+	return 0;
 }
 
-static ssize_t
-thermal_cooling_device_weight_store(struct device *dev,
-				    struct device_attribute *attr,
-				    const char *buf, size_t count)
-{
-	struct thermal_instance *instance;
-	int ret, weight;
-
-	ret = kstrtoint(buf, 0, &weight);
-	if (ret)
-		return ret;
-
-	instance = container_of(attr, struct thermal_instance, weight_attr);
-	instance->weight = weight;
-
-	return count;
+static void release_idr(struct idr *idr, struct mutex *lock, int id)
+{
+	if (lock)
+		mutex_lock(lock);
+	idr_remove(idr, id);
+	if (lock)
+		mutex_unlock(lock);
 }
-/* Device management */
 
 /**
  * thermal_zone_bind_cooling_device() - bind a cooling device to a thermal zone
@@ -1358,8 +674,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	if (lower > upper || upper > max_state)
 		return -EINVAL;
 
-	dev =
-	    kzalloc(sizeof(struct thermal_instance), GFP_KERNEL);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev)
 		return -ENOMEM;
 	dev->tz = tz;
@@ -1402,10 +717,10 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	mutex_lock(&tz->lock);
 	mutex_lock(&cdev->lock);
 	list_for_each_entry(pos, &tz->thermal_instances, tz_node)
-	    if (pos->tz == tz && pos->trip == trip && pos->cdev == cdev) {
-		result = -EEXIST;
-		break;
-	}
+		if (pos->tz == tz && pos->trip == trip && pos->cdev == cdev) {
+			result = -EEXIST;
+			break;
+		}
 	if (!result) {
 		list_add_tail(&dev->tz_node, &tz->thermal_instances);
 		list_add_tail(&dev->cdev_node, &cdev->thermal_instances);
@@ -1485,8 +800,8 @@ static void thermal_release(struct device *dev)
 		     sizeof("thermal_zone") - 1)) {
 		tz = to_thermal_zone(dev);
 		kfree(tz);
-	} else if(!strncmp(dev_name(dev), "cooling_device",
-			sizeof("cooling_device") - 1)){
+	} else if (!strncmp(dev_name(dev), "cooling_device",
+			    sizeof("cooling_device") - 1)) {
 		cdev = to_cooling_device(dev);
 		kfree(cdev);
 	}
@@ -1497,6 +812,78 @@ static struct class thermal_class = {
 	.dev_release = thermal_release,
 };
 
+static inline
+void print_bind_err_msg(struct thermal_zone_device *tz,
+			struct thermal_cooling_device *cdev, int ret)
+{
+	dev_err(&tz->device, "binding zone %s with cdev %s failed:%d\n",
+		tz->type, cdev->type, ret);
+}
+
+static void __bind(struct thermal_zone_device *tz, int mask,
+		   struct thermal_cooling_device *cdev,
+		   unsigned long *limits,
+		   unsigned int weight)
+{
+	int i, ret;
+
+	for (i = 0; i < tz->trips; i++) {
+		if (mask & (1 << i)) {
+			unsigned long upper, lower;
+
+			upper = THERMAL_NO_LIMIT;
+			lower = THERMAL_NO_LIMIT;
+			if (limits) {
+				lower = limits[i * 2];
+				upper = limits[i * 2 + 1];
+			}
+			ret = thermal_zone_bind_cooling_device(tz, i, cdev,
+							       upper, lower,
+							       weight);
+			if (ret)
+				print_bind_err_msg(tz, cdev, ret);
+		}
+	}
+}
+
+static void bind_cdev(struct thermal_cooling_device *cdev)
+{
+	int i, ret;
+	const struct thermal_zone_params *tzp;
+	struct thermal_zone_device *pos = NULL;
+
+	mutex_lock(&thermal_list_lock);
+
+	list_for_each_entry(pos, &thermal_tz_list, node) {
+		if (!pos->tzp && !pos->ops->bind)
+			continue;
+
+		if (pos->ops->bind) {
+			ret = pos->ops->bind(pos, cdev);
+			if (ret)
+				print_bind_err_msg(pos, cdev, ret);
+			continue;
+		}
+
+		tzp = pos->tzp;
+		if (!tzp || !tzp->tbp)
+			continue;
+
+		for (i = 0; i < tzp->num_tbps; i++) {
+			if (tzp->tbp[i].cdev || !tzp->tbp[i].match)
+				continue;
+			if (tzp->tbp[i].match(pos, cdev))
+				continue;
+			tzp->tbp[i].cdev = cdev;
+			__bind(pos, tzp->tbp[i].trip_mask, cdev,
+			       tzp->tbp[i].binding_limits,
+			       tzp->tbp[i].weight);
+		}
+	}
+
+	mutex_unlock(&thermal_list_lock);
+}
+
 /**
  * __thermal_cooling_device_register() - register a new thermal cooling device
  * @np:		a pointer to a device tree node.
@@ -1529,7 +916,7 @@ __thermal_cooling_device_register(struct device_node *np,
 	    !ops->set_cur_state)
 		return ERR_PTR(-EINVAL);
 
-	cdev = kzalloc(sizeof(struct thermal_cooling_device), GFP_KERNEL);
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
 	if (!cdev)
 		return ERR_PTR(-ENOMEM);
 
@@ -1546,7 +933,7 @@ __thermal_cooling_device_register(struct device_node *np,
 	cdev->ops = ops;
 	cdev->updated = false;
 	cdev->device.class = &thermal_class;
-	cdev->device.groups = cooling_device_attr_groups;
+	thermal_cooling_device_setup_sysfs(cdev);
 	cdev->devdata = devdata;
 	dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
 	result = device_register(&cdev->device);
@@ -1619,12 +1006,22 @@ thermal_of_cooling_device_register(struct device_node *np,
 }
 EXPORT_SYMBOL_GPL(thermal_of_cooling_device_register);
 
+static void __unbind(struct thermal_zone_device *tz, int mask,
+		     struct thermal_cooling_device *cdev)
+{
+	int i;
+
+	for (i = 0; i < tz->trips; i++)
+		if (mask & (1 << i))
+			thermal_zone_unbind_cooling_device(tz, i, cdev);
+}
+
 /**
- * thermal_cooling_device_unregister - removes the registered thermal cooling device
+ * thermal_cooling_device_unregister - removes a thermal cooling device
  * @cdev:	the thermal cooling device to remove.
  *
- * thermal_cooling_device_unregister() must be called when the device is no
- * longer needed.
+ * thermal_cooling_device_unregister() must be called when a registered
+ * thermal cooling device is no longer needed.
  */
 void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
 {
@@ -1638,8 +1035,8 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
 
 	mutex_lock(&thermal_list_lock);
 	list_for_each_entry(pos, &thermal_cdev_list, node)
-	    if (pos == cdev)
-		break;
+		if (pos == cdev)
+			break;
 	if (pos != cdev) {
 		/* thermal cooling device not found */
 		mutex_unlock(&thermal_list_lock);
@@ -1668,171 +1065,49 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
 
 	mutex_unlock(&thermal_list_lock);
 
-	if (cdev->type[0])
-		device_remove_file(&cdev->device, &dev_attr_cdev_type);
-	device_remove_file(&cdev->device, &dev_attr_max_state);
-	device_remove_file(&cdev->device, &dev_attr_cur_state);
-
 	release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id);
 	device_unregister(&cdev->device);
-	return;
 }
 EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister);
 
-void thermal_cdev_update(struct thermal_cooling_device *cdev)
+static void bind_tz(struct thermal_zone_device *tz)
 {
-	struct thermal_instance *instance;
-	unsigned long target = 0;
+	int i, ret;
+	struct thermal_cooling_device *pos = NULL;
+	const struct thermal_zone_params *tzp = tz->tzp;
 
-	mutex_lock(&cdev->lock);
-	/* cooling device is updated*/
-	if (cdev->updated) {
-		mutex_unlock(&cdev->lock);
+	if (!tzp && !tz->ops->bind)
 		return;
-	}
-
-	/* Make sure cdev enters the deepest cooling state */
-	list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) {
-		dev_dbg(&cdev->device, "zone%d->target=%lu\n",
-				instance->tz->id, instance->target);
-		if (instance->target == THERMAL_NO_TARGET)
-			continue;
-		if (instance->target > target)
-			target = instance->target;
-	}
-	cdev->ops->set_cur_state(cdev, target);
-	cdev->updated = true;
-	mutex_unlock(&cdev->lock);
-	trace_cdev_update(cdev, target);
-	dev_dbg(&cdev->device, "set to state %lu\n", target);
-}
-EXPORT_SYMBOL(thermal_cdev_update);
-
-/**
- * thermal_notify_framework - Sensor drivers use this API to notify framework
- * @tz:		thermal zone device
- * @trip:	indicates which trip point has been crossed
- *
- * This function handles the trip events from sensor drivers. It starts
- * throttling the cooling devices according to the policy configured.
- * For CRITICAL and HOT trip points, this notifies the respective drivers,
- * and does actual throttling for other trip points i.e ACTIVE and PASSIVE.
- * The throttling policy is based on the configured platform data; if no
- * platform data is provided, this uses the step_wise throttling policy.
- */
-void thermal_notify_framework(struct thermal_zone_device *tz, int trip)
-{
-	handle_thermal_trip(tz, trip);
-}
-EXPORT_SYMBOL_GPL(thermal_notify_framework);
-
-/**
- * create_trip_attrs() - create attributes for trip points
- * @tz:		the thermal zone device
- * @mask:	Writeable trip point bitmap.
- *
- * helper function to instantiate sysfs entries for every trip
- * point and its properties of a struct thermal_zone_device.
- *
- * Return: 0 on success, the proper error value otherwise.
- */
-static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
-{
-	int indx;
-	int size = sizeof(struct thermal_attr) * tz->trips;
 
-	tz->trip_type_attrs = kzalloc(size, GFP_KERNEL);
-	if (!tz->trip_type_attrs)
-		return -ENOMEM;
-
-	tz->trip_temp_attrs = kzalloc(size, GFP_KERNEL);
-	if (!tz->trip_temp_attrs) {
-		kfree(tz->trip_type_attrs);
-		return -ENOMEM;
-	}
+	mutex_lock(&thermal_list_lock);
 
-	if (tz->ops->get_trip_hyst) {
-		tz->trip_hyst_attrs = kzalloc(size, GFP_KERNEL);
-		if (!tz->trip_hyst_attrs) {
-			kfree(tz->trip_type_attrs);
-			kfree(tz->trip_temp_attrs);
-			return -ENOMEM;
+	/* If there is ops->bind, try to use ops->bind */
+	if (tz->ops->bind) {
+		list_for_each_entry(pos, &thermal_cdev_list, node) {
+			ret = tz->ops->bind(tz, pos);
+			if (ret)
+				print_bind_err_msg(tz, pos, ret);
 		}
+		goto exit;
 	}
 
+	if (!tzp || !tzp->tbp)
+		goto exit;
 
-	for (indx = 0; indx < tz->trips; indx++) {
-		/* create trip type attribute */
-		snprintf(tz->trip_type_attrs[indx].name, THERMAL_NAME_LENGTH,
-			 "trip_point_%d_type", indx);
-
-		sysfs_attr_init(&tz->trip_type_attrs[indx].attr.attr);
-		tz->trip_type_attrs[indx].attr.attr.name =
-						tz->trip_type_attrs[indx].name;
-		tz->trip_type_attrs[indx].attr.attr.mode = S_IRUGO;
-		tz->trip_type_attrs[indx].attr.show = trip_point_type_show;
-
-		device_create_file(&tz->device,
-				   &tz->trip_type_attrs[indx].attr);
-
-		/* create trip temp attribute */
-		snprintf(tz->trip_temp_attrs[indx].name, THERMAL_NAME_LENGTH,
-			 "trip_point_%d_temp", indx);
-
-		sysfs_attr_init(&tz->trip_temp_attrs[indx].attr.attr);
-		tz->trip_temp_attrs[indx].attr.attr.name =
-						tz->trip_temp_attrs[indx].name;
-		tz->trip_temp_attrs[indx].attr.attr.mode = S_IRUGO;
-		tz->trip_temp_attrs[indx].attr.show = trip_point_temp_show;
-		if (IS_ENABLED(CONFIG_THERMAL_WRITABLE_TRIPS) &&
-		    mask & (1 << indx)) {
-			tz->trip_temp_attrs[indx].attr.attr.mode |= S_IWUSR;
-			tz->trip_temp_attrs[indx].attr.store =
-							trip_point_temp_store;
-		}
-
-		device_create_file(&tz->device,
-				   &tz->trip_temp_attrs[indx].attr);
-
-		/* create Optional trip hyst attribute */
-		if (!tz->ops->get_trip_hyst)
-			continue;
-		snprintf(tz->trip_hyst_attrs[indx].name, THERMAL_NAME_LENGTH,
-			 "trip_point_%d_hyst", indx);
-
-		sysfs_attr_init(&tz->trip_hyst_attrs[indx].attr.attr);
-		tz->trip_hyst_attrs[indx].attr.attr.name =
-					tz->trip_hyst_attrs[indx].name;
-		tz->trip_hyst_attrs[indx].attr.attr.mode = S_IRUGO;
-		tz->trip_hyst_attrs[indx].attr.show = trip_point_hyst_show;
-		if (tz->ops->set_trip_hyst) {
-			tz->trip_hyst_attrs[indx].attr.attr.mode |= S_IWUSR;
-			tz->trip_hyst_attrs[indx].attr.store =
-					trip_point_hyst_store;
+	list_for_each_entry(pos, &thermal_cdev_list, node) {
+		for (i = 0; i < tzp->num_tbps; i++) {
+			if (tzp->tbp[i].cdev || !tzp->tbp[i].match)
+				continue;
+			if (tzp->tbp[i].match(tz, pos))
+				continue;
+			tzp->tbp[i].cdev = pos;
+			__bind(tz, tzp->tbp[i].trip_mask, pos,
+			       tzp->tbp[i].binding_limits,
+			       tzp->tbp[i].weight);
 		}
-
-		device_create_file(&tz->device,
-				   &tz->trip_hyst_attrs[indx].attr);
-	}
-	return 0;
-}
-
-static void remove_trip_attrs(struct thermal_zone_device *tz)
-{
-	int indx;
-
-	for (indx = 0; indx < tz->trips; indx++) {
-		device_remove_file(&tz->device,
-				   &tz->trip_type_attrs[indx].attr);
-		device_remove_file(&tz->device,
-				   &tz->trip_temp_attrs[indx].attr);
-		if (tz->ops->get_trip_hyst)
-			device_remove_file(&tz->device,
-				  &tz->trip_hyst_attrs[indx].attr);
 	}
-	kfree(tz->trip_type_attrs);
-	kfree(tz->trip_temp_attrs);
-	kfree(tz->trip_hyst_attrs);
+exit:
+	mutex_unlock(&thermal_list_lock);
 }
 
 /**
@@ -1859,20 +1134,22 @@ static void remove_trip_attrs(struct thermal_zone_device *tz)
  * in case of error, an ERR_PTR. Caller must check return value with
  * IS_ERR*() helpers.
  */
-struct thermal_zone_device *thermal_zone_device_register(const char *type,
-	int trips, int mask, void *devdata,
-	struct thermal_zone_device_ops *ops,
-	struct thermal_zone_params *tzp,
-	int passive_delay, int polling_delay)
+struct thermal_zone_device *
+thermal_zone_device_register(const char *type, int trips, int mask,
+			     void *devdata, struct thermal_zone_device_ops *ops,
+			     struct thermal_zone_params *tzp, int passive_delay,
+			     int polling_delay)
 {
 	struct thermal_zone_device *tz;
 	enum thermal_trip_type trip_type;
 	int trip_temp;
 	int result;
 	int count;
-	int passive = 0;
 	struct thermal_governor *governor;
 
+	if (!type || strlen(type) == 0)
+		return ERR_PTR(-EINVAL);
+
 	if (type && strlen(type) >= THERMAL_NAME_LENGTH)
 		return ERR_PTR(-EINVAL);
 
@@ -1885,7 +1162,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	if (trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp))
 		return ERR_PTR(-EINVAL);
 
-	tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL);
+	tz = kzalloc(sizeof(*tz), GFP_KERNEL);
 	if (!tz)
 		return ERR_PTR(-ENOMEM);
 
@@ -1898,7 +1175,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 		return ERR_PTR(result);
 	}
 
-	strlcpy(tz->type, type ? : "", sizeof(tz->type));
+	strlcpy(tz->type, type, sizeof(tz->type));
 	tz->ops = ops;
 	tz->tzp = tzp;
 	tz->device.class = &thermal_class;
@@ -1906,6 +1183,13 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	tz->trips = trips;
 	tz->passive_delay = passive_delay;
 	tz->polling_delay = polling_delay;
+
+	/* sys I/F */
+	/* Add nodes that are always present via .groups */
+	result = thermal_zone_create_device_groups(tz, mask);
+	if (result)
+		goto unregister;
+
 	/* A new thermal zone needs to be updated anyway. */
 	atomic_set(&tz->need_update, 1);
 
@@ -1917,32 +1201,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 		return ERR_PTR(result);
 	}
 
-	/* sys I/F */
-	if (type) {
-		result = device_create_file(&tz->device, &dev_attr_type);
-		if (result)
-			goto unregister;
-	}
-
-	result = device_create_file(&tz->device, &dev_attr_temp);
-	if (result)
-		goto unregister;
-
-	if (ops->get_mode) {
-		result = device_create_file(&tz->device, &dev_attr_mode);
-		if (result)
-			goto unregister;
-	}
-
-	result = create_trip_attrs(tz, mask);
-	if (result)
-		goto unregister;
-
 	for (count = 0; count < trips; count++) {
 		if (tz->ops->get_trip_type(tz, count, &trip_type))
 			set_bit(count, &tz->trips_disabled);
-		if (trip_type == THERMAL_TRIP_PASSIVE)
-			passive = 1;
 		if (tz->ops->get_trip_temp(tz, count, &trip_temp))
 			set_bit(count, &tz->trips_disabled);
 		/* Check for bogus trip points */
@@ -1950,33 +1211,6 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 			set_bit(count, &tz->trips_disabled);
 	}
 
-	if (!passive) {
-		result = device_create_file(&tz->device, &dev_attr_passive);
-		if (result)
-			goto unregister;
-	}
-
-	if (IS_ENABLED(CONFIG_THERMAL_EMULATION)) {
-		result = device_create_file(&tz->device, &dev_attr_emul_temp);
-		if (result)
-			goto unregister;
-	}
-
-	/* Create policy attribute */
-	result = device_create_file(&tz->device, &dev_attr_policy);
-	if (result)
-		goto unregister;
-
-	/* Add thermal zone params */
-	result = create_tzp_attrs(&tz->device);
-	if (result)
-		goto unregister;
-
-	/* Create available_policies attribute */
-	result = device_create_file(&tz->device, &dev_attr_available_policies);
-	if (result)
-		goto unregister;
-
 	/* Update 'this' zone's governor information */
 	mutex_lock(&thermal_governor_lock);
 
@@ -2006,7 +1240,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	/* Bind cooling devices for this zone */
 	bind_tz(tz);
 
-	INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
+	INIT_DELAYED_WORK(&tz->poll_queue, thermal_zone_device_check);
 
 	thermal_zone_device_reset(tz);
 	/* Update the new thermal zone and mark it as already updated. */
@@ -2040,8 +1274,8 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 
 	mutex_lock(&thermal_list_lock);
 	list_for_each_entry(pos, &thermal_tz_list, node)
-	    if (pos == tz)
-		break;
+		if (pos == tz)
+			break;
 	if (pos != tz) {
 		/* thermal zone device not found */
 		mutex_unlock(&thermal_list_lock);
@@ -2071,14 +1305,10 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 
 	thermal_zone_device_set_polling(tz, 0);
 
-	if (tz->type[0])
-		device_remove_file(&tz->device, &dev_attr_type);
-	device_remove_file(&tz->device, &dev_attr_temp);
-	if (tz->ops->get_mode)
-		device_remove_file(&tz->device, &dev_attr_mode);
-	device_remove_file(&tz->device, &dev_attr_policy);
-	device_remove_file(&tz->device, &dev_attr_available_policies);
-	remove_trip_attrs(tz);
+	kfree(tz->trip_type_attrs);
+	kfree(tz->trip_temp_attrs);
+	kfree(tz->trip_hyst_attrs);
+	kfree(tz->trips_attribute_group.attrs);
 	thermal_set_governor(tz, NULL);
 
 	thermal_remove_hwmon_sysfs(tz);
@@ -2086,7 +1316,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 	idr_destroy(&tz->idr);
 	mutex_destroy(&tz->lock);
 	device_unregister(&tz->device);
-	return;
+	kfree(tz->device.groups);
 }
 EXPORT_SYMBOL_GPL(thermal_zone_device_unregister);
 
@@ -2128,36 +1358,6 @@ exit:
 }
 EXPORT_SYMBOL_GPL(thermal_zone_get_zone_by_name);
 
-/**
- * thermal_zone_get_slope - return the slope attribute of the thermal zone
- * @tz: thermal zone device with the slope attribute
- *
- * Return: If the thermal zone device has a slope attribute, return it, else
- * return 1.
- */
-int thermal_zone_get_slope(struct thermal_zone_device *tz)
-{
-	if (tz && tz->tzp)
-		return tz->tzp->slope;
-	return 1;
-}
-EXPORT_SYMBOL_GPL(thermal_zone_get_slope);
-
-/**
- * thermal_zone_get_offset - return the offset attribute of the thermal zone
- * @tz: thermal zone device with the offset attribute
- *
- * Return: If the thermal zone device has a offset attribute, return it, else
- * return 0.
- */
-int thermal_zone_get_offset(struct thermal_zone_device *tz)
-{
-	if (tz && tz->tzp)
-		return tz->tzp->offset;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(thermal_zone_get_offset);
-
 #ifdef CONFIG_NET
 static const struct genl_multicast_group thermal_event_mcgrps[] = {
 	{ .name = THERMAL_GENL_MCAST_GROUP_NAME, },
@@ -2173,7 +1373,7 @@ static struct genl_family thermal_event_genl_family __ro_after_init = {
 };
 
 int thermal_generate_netlink_event(struct thermal_zone_device *tz,
-					enum events event)
+				   enum events event)
 {
 	struct sk_buff *skb;
 	struct nlattr *attr;
@@ -2249,40 +1449,8 @@ static inline int genetlink_init(void) { return 0; }
 static inline void genetlink_exit(void) {}
 #endif /* !CONFIG_NET */
 
-static int __init thermal_register_governors(void)
-{
-	int result;
-
-	result = thermal_gov_step_wise_register();
-	if (result)
-		return result;
-
-	result = thermal_gov_fair_share_register();
-	if (result)
-		return result;
-
-	result = thermal_gov_bang_bang_register();
-	if (result)
-		return result;
-
-	result = thermal_gov_user_space_register();
-	if (result)
-		return result;
-
-	return thermal_gov_power_allocator_register();
-}
-
-static void thermal_unregister_governors(void)
-{
-	thermal_gov_step_wise_unregister();
-	thermal_gov_fair_share_unregister();
-	thermal_gov_bang_bang_unregister();
-	thermal_gov_user_space_unregister();
-	thermal_gov_power_allocator_unregister();
-}
-
 static int thermal_pm_notify(struct notifier_block *nb,
-				unsigned long mode, void *_unused)
+			     unsigned long mode, void *_unused)
 {
 	struct thermal_zone_device *tz;
 

+ 26 - 0
drivers/thermal/thermal_core.h

@@ -54,8 +54,34 @@ struct thermal_instance {
 	unsigned int weight; /* The weight of the cooling device */
 };
 
+#define to_thermal_zone(_dev) \
+	container_of(_dev, struct thermal_zone_device, device)
+
+#define to_cooling_device(_dev)	\
+	container_of(_dev, struct thermal_cooling_device, device)
+
 int thermal_register_governor(struct thermal_governor *);
 void thermal_unregister_governor(struct thermal_governor *);
+void thermal_zone_device_rebind_exception(struct thermal_zone_device *,
+					  const char *, size_t);
+void thermal_zone_device_unbind_exception(struct thermal_zone_device *,
+					  const char *, size_t);
+int thermal_zone_device_set_policy(struct thermal_zone_device *, char *);
+int thermal_build_list_of_policies(char *buf);
+
+/* sysfs I/F */
+int thermal_zone_create_device_groups(struct thermal_zone_device *, int);
+void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *);
+/* used only at binding time */
+ssize_t
+thermal_cooling_device_trip_point_show(struct device *,
+				       struct device_attribute *, char *);
+ssize_t thermal_cooling_device_weight_show(struct device *,
+					   struct device_attribute *, char *);
+
+ssize_t thermal_cooling_device_weight_store(struct device *,
+					    struct device_attribute *,
+					    const char *, size_t);
 
 #ifdef CONFIG_THERMAL_GOV_STEP_WISE
 int thermal_gov_step_wise_register(void);

+ 226 - 0
drivers/thermal/thermal_helpers.c

@@ -0,0 +1,226 @@
+/*
+ *  thermal_helpers.c - helper functions to handle thermal devices
+ *
+ *  Copyright (C) 2016 Eduardo Valentin <edubezval@gmail.com>
+ *
+ *  Highly based on original thermal_core.c
+ *  Copyright (C) 2008 Intel Corp
+ *  Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com>
+ *  Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/sysfs.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <trace/events/thermal.h>
+
+#include "thermal_core.h"
+
+int get_tz_trend(struct thermal_zone_device *tz, int trip)
+{
+	enum thermal_trend trend;
+
+	if (tz->emul_temperature || !tz->ops->get_trend ||
+	    tz->ops->get_trend(tz, trip, &trend)) {
+		if (tz->temperature > tz->last_temperature)
+			trend = THERMAL_TREND_RAISING;
+		else if (tz->temperature < tz->last_temperature)
+			trend = THERMAL_TREND_DROPPING;
+		else
+			trend = THERMAL_TREND_STABLE;
+	}
+
+	return trend;
+}
+EXPORT_SYMBOL(get_tz_trend);
+
+struct thermal_instance *
+get_thermal_instance(struct thermal_zone_device *tz,
+		     struct thermal_cooling_device *cdev, int trip)
+{
+	struct thermal_instance *pos = NULL;
+	struct thermal_instance *target_instance = NULL;
+
+	mutex_lock(&tz->lock);
+	mutex_lock(&cdev->lock);
+
+	list_for_each_entry(pos, &tz->thermal_instances, tz_node) {
+		if (pos->tz == tz && pos->trip == trip && pos->cdev == cdev) {
+			target_instance = pos;
+			break;
+		}
+	}
+
+	mutex_unlock(&cdev->lock);
+	mutex_unlock(&tz->lock);
+
+	return target_instance;
+}
+EXPORT_SYMBOL(get_thermal_instance);
+
+/**
+ * thermal_zone_get_temp() - returns the temperature of a thermal zone
+ * @tz: a valid pointer to a struct thermal_zone_device
+ * @temp: a valid pointer to where to store the resulting temperature.
+ *
+ * When a valid thermal zone reference is passed, it will fetch its
+ * temperature and fill @temp.
+ *
+ * Return: On success returns 0, an error code otherwise
+ */
+int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp)
+{
+	int ret = -EINVAL;
+	int count;
+	int crit_temp = INT_MAX;
+	enum thermal_trip_type type;
+
+	if (!tz || IS_ERR(tz) || !tz->ops->get_temp)
+		goto exit;
+
+	mutex_lock(&tz->lock);
+
+	ret = tz->ops->get_temp(tz, temp);
+
+	if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) {
+		for (count = 0; count < tz->trips; count++) {
+			ret = tz->ops->get_trip_type(tz, count, &type);
+			if (!ret && type == THERMAL_TRIP_CRITICAL) {
+				ret = tz->ops->get_trip_temp(tz, count,
+						&crit_temp);
+				break;
+			}
+		}
+
+		/*
+		 * Only allow emulating a temperature when the real temperature
+		 * is below the critical temperature so that the emulation code
+		 * cannot hide critical conditions.
+		 */
+		if (!ret && *temp < crit_temp)
+			*temp = tz->emul_temperature;
+	}
+
+	mutex_unlock(&tz->lock);
+exit:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thermal_zone_get_temp);
+
+void thermal_zone_set_trips(struct thermal_zone_device *tz)
+{
+	int low = -INT_MAX;
+	int high = INT_MAX;
+	int trip_temp, hysteresis;
+	int i, ret;
+
+	mutex_lock(&tz->lock);
+
+	if (!tz->ops->set_trips || !tz->ops->get_trip_hyst)
+		goto exit;
+
+	for (i = 0; i < tz->trips; i++) {
+		int trip_low;
+
+		tz->ops->get_trip_temp(tz, i, &trip_temp);
+		tz->ops->get_trip_hyst(tz, i, &hysteresis);
+
+		trip_low = trip_temp - hysteresis;
+
+		if (trip_low < tz->temperature && trip_low > low)
+			low = trip_low;
+
+		if (trip_temp > tz->temperature && trip_temp < high)
+			high = trip_temp;
+	}
+
+	/* No need to change trip points */
+	if (tz->prev_low_trip == low && tz->prev_high_trip == high)
+		goto exit;
+
+	tz->prev_low_trip = low;
+	tz->prev_high_trip = high;
+
+	dev_dbg(&tz->device,
+		"new temperature boundaries: %d < x < %d\n", low, high);
+
+	/*
+	 * Set a temperature window. When this window is left the driver
+	 * must inform the thermal core via thermal_zone_device_update.
+	 */
+	ret = tz->ops->set_trips(tz, low, high);
+	if (ret)
+		dev_err(&tz->device, "Failed to set trips: %d\n", ret);
+
+exit:
+	mutex_unlock(&tz->lock);
+}
+EXPORT_SYMBOL_GPL(thermal_zone_set_trips);
+
+void thermal_cdev_update(struct thermal_cooling_device *cdev)
+{
+	struct thermal_instance *instance;
+	unsigned long target = 0;
+
+	mutex_lock(&cdev->lock);
+	/* cooling device is updated*/
+	if (cdev->updated) {
+		mutex_unlock(&cdev->lock);
+		return;
+	}
+
+	/* Make sure cdev enters the deepest cooling state */
+	list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) {
+		dev_dbg(&cdev->device, "zone%d->target=%lu\n",
+			instance->tz->id, instance->target);
+		if (instance->target == THERMAL_NO_TARGET)
+			continue;
+		if (instance->target > target)
+			target = instance->target;
+	}
+	cdev->ops->set_cur_state(cdev, target);
+	cdev->updated = true;
+	mutex_unlock(&cdev->lock);
+	trace_cdev_update(cdev, target);
+	dev_dbg(&cdev->device, "set to state %lu\n", target);
+}
+EXPORT_SYMBOL(thermal_cdev_update);
+
+/**
+ * thermal_zone_get_slope - return the slope attribute of the thermal zone
+ * @tz: thermal zone device with the slope attribute
+ *
+ * Return: If the thermal zone device has a slope attribute, return it, else
+ * return 1.
+ */
+int thermal_zone_get_slope(struct thermal_zone_device *tz)
+{
+	if (tz && tz->tzp)
+		return tz->tzp->slope;
+	return 1;
+}
+EXPORT_SYMBOL_GPL(thermal_zone_get_slope);
+
+/**
+ * thermal_zone_get_offset - return the offset attribute of the thermal zone
+ * @tz: thermal zone device with the offset attribute
+ *
+ * Return: If the thermal zone device has a offset attribute, return it, else
+ * return 0.
+ */
+int thermal_zone_get_offset(struct thermal_zone_device *tz)
+{
+	if (tz && tz->tzp)
+		return tz->tzp->offset;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(thermal_zone_get_offset);

+ 2 - 2
drivers/thermal/thermal_hwmon.c

@@ -64,7 +64,7 @@ name_show(struct device *dev, struct device_attribute *attr, char *buf)
 	struct thermal_hwmon_device *hwmon = dev_get_drvdata(dev);
 	return sprintf(buf, "%s\n", hwmon->type);
 }
-static DEVICE_ATTR(name, 0444, name_show, NULL);
+static DEVICE_ATTR_RO(name);
 
 static ssize_t
 temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
@@ -98,7 +98,7 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, char *buf)
 	int temperature;
 	int ret;
 
-	ret = tz->ops->get_trip_temp(tz, 0, &temperature);
+	ret = tz->ops->get_crit_temp(tz, &temperature);
 	if (ret)
 		return ret;
 

+ 771 - 0
drivers/thermal/thermal_sysfs.c

@@ -0,0 +1,771 @@
+/*
+ *  thermal.c - sysfs interface of thermal devices
+ *
+ *  Copyright (C) 2016 Eduardo Valentin <edubezval@gmail.com>
+ *
+ *  Highly based on original thermal_core.c
+ *  Copyright (C) 2008 Intel Corp
+ *  Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com>
+ *  Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/sysfs.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "thermal_core.h"
+
+/* sys I/F for thermal zone */
+
+static ssize_t
+type_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+
+	return sprintf(buf, "%s\n", tz->type);
+}
+
+static ssize_t
+temp_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int temperature, ret;
+
+	ret = thermal_zone_get_temp(tz, &temperature);
+
+	if (ret)
+		return ret;
+
+	return sprintf(buf, "%d\n", temperature);
+}
+
+static ssize_t
+mode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	enum thermal_device_mode mode;
+	int result;
+
+	if (!tz->ops->get_mode)
+		return -EPERM;
+
+	result = tz->ops->get_mode(tz, &mode);
+	if (result)
+		return result;
+
+	return sprintf(buf, "%s\n", mode == THERMAL_DEVICE_ENABLED ? "enabled"
+		       : "disabled");
+}
+
+static ssize_t
+mode_store(struct device *dev, struct device_attribute *attr,
+	   const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int result;
+
+	if (!tz->ops->set_mode)
+		return -EPERM;
+
+	if (!strncmp(buf, "enabled", sizeof("enabled") - 1))
+		result = tz->ops->set_mode(tz, THERMAL_DEVICE_ENABLED);
+	else if (!strncmp(buf, "disabled", sizeof("disabled") - 1))
+		result = tz->ops->set_mode(tz, THERMAL_DEVICE_DISABLED);
+	else
+		result = -EINVAL;
+
+	if (result)
+		return result;
+
+	return count;
+}
+
+static ssize_t
+trip_point_type_show(struct device *dev, struct device_attribute *attr,
+		     char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	enum thermal_trip_type type;
+	int trip, result;
+
+	if (!tz->ops->get_trip_type)
+		return -EPERM;
+
+	if (sscanf(attr->attr.name, "trip_point_%d_type", &trip) != 1)
+		return -EINVAL;
+
+	result = tz->ops->get_trip_type(tz, trip, &type);
+	if (result)
+		return result;
+
+	switch (type) {
+	case THERMAL_TRIP_CRITICAL:
+		return sprintf(buf, "critical\n");
+	case THERMAL_TRIP_HOT:
+		return sprintf(buf, "hot\n");
+	case THERMAL_TRIP_PASSIVE:
+		return sprintf(buf, "passive\n");
+	case THERMAL_TRIP_ACTIVE:
+		return sprintf(buf, "active\n");
+	default:
+		return sprintf(buf, "unknown\n");
+	}
+}
+
+static ssize_t
+trip_point_temp_store(struct device *dev, struct device_attribute *attr,
+		      const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int trip, ret;
+	int temperature;
+
+	if (!tz->ops->set_trip_temp)
+		return -EPERM;
+
+	if (sscanf(attr->attr.name, "trip_point_%d_temp", &trip) != 1)
+		return -EINVAL;
+
+	if (kstrtoint(buf, 10, &temperature))
+		return -EINVAL;
+
+	ret = tz->ops->set_trip_temp(tz, trip, temperature);
+	if (ret)
+		return ret;
+
+	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
+
+	return count;
+}
+
+static ssize_t
+trip_point_temp_show(struct device *dev, struct device_attribute *attr,
+		     char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int trip, ret;
+	int temperature;
+
+	if (!tz->ops->get_trip_temp)
+		return -EPERM;
+
+	if (sscanf(attr->attr.name, "trip_point_%d_temp", &trip) != 1)
+		return -EINVAL;
+
+	ret = tz->ops->get_trip_temp(tz, trip, &temperature);
+
+	if (ret)
+		return ret;
+
+	return sprintf(buf, "%d\n", temperature);
+}
+
+static ssize_t
+trip_point_hyst_store(struct device *dev, struct device_attribute *attr,
+		      const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int trip, ret;
+	int temperature;
+
+	if (!tz->ops->set_trip_hyst)
+		return -EPERM;
+
+	if (sscanf(attr->attr.name, "trip_point_%d_hyst", &trip) != 1)
+		return -EINVAL;
+
+	if (kstrtoint(buf, 10, &temperature))
+		return -EINVAL;
+
+	/*
+	 * We are not doing any check on the 'temperature' value
+	 * here. The driver implementing 'set_trip_hyst' has to
+	 * take care of this.
+	 */
+	ret = tz->ops->set_trip_hyst(tz, trip, temperature);
+
+	if (!ret)
+		thermal_zone_set_trips(tz);
+
+	return ret ? ret : count;
+}
+
+static ssize_t
+trip_point_hyst_show(struct device *dev, struct device_attribute *attr,
+		     char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int trip, ret;
+	int temperature;
+
+	if (!tz->ops->get_trip_hyst)
+		return -EPERM;
+
+	if (sscanf(attr->attr.name, "trip_point_%d_hyst", &trip) != 1)
+		return -EINVAL;
+
+	ret = tz->ops->get_trip_hyst(tz, trip, &temperature);
+
+	return ret ? ret : sprintf(buf, "%d\n", temperature);
+}
+
+static ssize_t
+passive_store(struct device *dev, struct device_attribute *attr,
+	      const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int state;
+
+	if (sscanf(buf, "%d\n", &state) != 1)
+		return -EINVAL;
+
+	/* sanity check: values below 1000 millicelcius don't make sense
+	 * and can cause the system to go into a thermal heart attack
+	 */
+	if (state && state < 1000)
+		return -EINVAL;
+
+	if (state && !tz->forced_passive) {
+		if (!tz->passive_delay)
+			tz->passive_delay = 1000;
+		thermal_zone_device_rebind_exception(tz, "Processor",
+						     sizeof("Processor"));
+	} else if (!state && tz->forced_passive) {
+		tz->passive_delay = 0;
+		thermal_zone_device_unbind_exception(tz, "Processor",
+						     sizeof("Processor"));
+	}
+
+	tz->forced_passive = state;
+
+	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
+
+	return count;
+}
+
+static ssize_t
+passive_show(struct device *dev, struct device_attribute *attr,
+	     char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+
+	return sprintf(buf, "%d\n", tz->forced_passive);
+}
+
+static ssize_t
+policy_store(struct device *dev, struct device_attribute *attr,
+	     const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	char name[THERMAL_NAME_LENGTH];
+	int ret;
+
+	snprintf(name, sizeof(name), "%s", buf);
+
+	ret = thermal_zone_device_set_policy(tz, name);
+	if (!ret)
+		ret = count;
+
+	return ret;
+}
+
+static ssize_t
+policy_show(struct device *dev, struct device_attribute *devattr, char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+
+	return sprintf(buf, "%s\n", tz->governor->name);
+}
+
+static ssize_t
+available_policies_show(struct device *dev, struct device_attribute *devattr,
+			char *buf)
+{
+	return thermal_build_list_of_policies(buf);
+}
+
+#if (IS_ENABLED(CONFIG_THERMAL_EMULATION))
+static ssize_t
+emul_temp_store(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int ret = 0;
+	int temperature;
+
+	if (kstrtoint(buf, 10, &temperature))
+		return -EINVAL;
+
+	if (!tz->ops->set_emul_temp) {
+		mutex_lock(&tz->lock);
+		tz->emul_temperature = temperature;
+		mutex_unlock(&tz->lock);
+	} else {
+		ret = tz->ops->set_emul_temp(tz, temperature);
+	}
+
+	if (!ret)
+		thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
+
+	return ret ? ret : count;
+}
+static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store);
+#endif
+
+static ssize_t
+sustainable_power_show(struct device *dev, struct device_attribute *devattr,
+		       char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+
+	if (tz->tzp)
+		return sprintf(buf, "%u\n", tz->tzp->sustainable_power);
+	else
+		return -EIO;
+}
+
+static ssize_t
+sustainable_power_store(struct device *dev, struct device_attribute *devattr,
+			const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	u32 sustainable_power;
+
+	if (!tz->tzp)
+		return -EIO;
+
+	if (kstrtou32(buf, 10, &sustainable_power))
+		return -EINVAL;
+
+	tz->tzp->sustainable_power = sustainable_power;
+
+	return count;
+}
+
+#define create_s32_tzp_attr(name)					\
+	static ssize_t							\
+	name##_show(struct device *dev, struct device_attribute *devattr, \
+		char *buf)						\
+	{								\
+	struct thermal_zone_device *tz = to_thermal_zone(dev);		\
+									\
+	if (tz->tzp)							\
+		return sprintf(buf, "%d\n", tz->tzp->name);		\
+	else								\
+		return -EIO;						\
+	}								\
+									\
+	static ssize_t							\
+	name##_store(struct device *dev, struct device_attribute *devattr, \
+		const char *buf, size_t count)				\
+	{								\
+		struct thermal_zone_device *tz = to_thermal_zone(dev);	\
+		s32 value;						\
+									\
+		if (!tz->tzp)						\
+			return -EIO;					\
+									\
+		if (kstrtos32(buf, 10, &value))				\
+			return -EINVAL;					\
+									\
+		tz->tzp->name = value;					\
+									\
+		return count;						\
+	}								\
+	static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, name##_show, name##_store)
+
+create_s32_tzp_attr(k_po);
+create_s32_tzp_attr(k_pu);
+create_s32_tzp_attr(k_i);
+create_s32_tzp_attr(k_d);
+create_s32_tzp_attr(integral_cutoff);
+create_s32_tzp_attr(slope);
+create_s32_tzp_attr(offset);
+#undef create_s32_tzp_attr
+
+/*
+ * These are thermal zone device attributes that will always be present.
+ * All the attributes created for tzp (create_s32_tzp_attr) also are always
+ * present on the sysfs interface.
+ */
+static DEVICE_ATTR(type, 0444, type_show, NULL);
+static DEVICE_ATTR(temp, 0444, temp_show, NULL);
+static DEVICE_ATTR(policy, S_IRUGO | S_IWUSR, policy_show, policy_store);
+static DEVICE_ATTR(available_policies, S_IRUGO, available_policies_show, NULL);
+static DEVICE_ATTR(sustainable_power, S_IWUSR | S_IRUGO, sustainable_power_show,
+		   sustainable_power_store);
+
+/* These thermal zone device attributes are created based on conditions */
+static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
+static DEVICE_ATTR(passive, S_IRUGO | S_IWUSR, passive_show, passive_store);
+
+/* These attributes are unconditionally added to a thermal zone */
+static struct attribute *thermal_zone_dev_attrs[] = {
+	&dev_attr_type.attr,
+	&dev_attr_temp.attr,
+#if (IS_ENABLED(CONFIG_THERMAL_EMULATION))
+	&dev_attr_emul_temp.attr,
+#endif
+	&dev_attr_policy.attr,
+	&dev_attr_available_policies.attr,
+	&dev_attr_sustainable_power.attr,
+	&dev_attr_k_po.attr,
+	&dev_attr_k_pu.attr,
+	&dev_attr_k_i.attr,
+	&dev_attr_k_d.attr,
+	&dev_attr_integral_cutoff.attr,
+	&dev_attr_slope.attr,
+	&dev_attr_offset.attr,
+	NULL,
+};
+
+static struct attribute_group thermal_zone_attribute_group = {
+	.attrs = thermal_zone_dev_attrs,
+};
+
+/* We expose mode only if .get_mode is present */
+static struct attribute *thermal_zone_mode_attrs[] = {
+	&dev_attr_mode.attr,
+	NULL,
+};
+
+static umode_t thermal_zone_mode_is_visible(struct kobject *kobj,
+					    struct attribute *attr,
+					    int attrno)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct thermal_zone_device *tz;
+
+	tz = container_of(dev, struct thermal_zone_device, device);
+
+	if (tz->ops->get_mode)
+		return attr->mode;
+
+	return 0;
+}
+
+static struct attribute_group thermal_zone_mode_attribute_group = {
+	.attrs = thermal_zone_mode_attrs,
+	.is_visible = thermal_zone_mode_is_visible,
+};
+
+/* We expose passive only if passive trips are present */
+static struct attribute *thermal_zone_passive_attrs[] = {
+	&dev_attr_passive.attr,
+	NULL,
+};
+
+static umode_t thermal_zone_passive_is_visible(struct kobject *kobj,
+					       struct attribute *attr,
+					       int attrno)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct thermal_zone_device *tz;
+	enum thermal_trip_type trip_type;
+	int count, passive = 0;
+
+	tz = container_of(dev, struct thermal_zone_device, device);
+
+	for (count = 0; count < tz->trips && !passive; count++) {
+		tz->ops->get_trip_type(tz, count, &trip_type);
+
+		if (trip_type == THERMAL_TRIP_PASSIVE)
+			passive = 1;
+	}
+
+	if (!passive)
+		return attr->mode;
+
+	return 0;
+}
+
+static struct attribute_group thermal_zone_passive_attribute_group = {
+	.attrs = thermal_zone_passive_attrs,
+	.is_visible = thermal_zone_passive_is_visible,
+};
+
+static const struct attribute_group *thermal_zone_attribute_groups[] = {
+	&thermal_zone_attribute_group,
+	&thermal_zone_mode_attribute_group,
+	&thermal_zone_passive_attribute_group,
+	/* This is not NULL terminated as we create the group dynamically */
+};
+
+/**
+ * create_trip_attrs() - create attributes for trip points
+ * @tz:		the thermal zone device
+ * @mask:	Writeable trip point bitmap.
+ *
+ * helper function to instantiate sysfs entries for every trip
+ * point and its properties of a struct thermal_zone_device.
+ *
+ * Return: 0 on success, the proper error value otherwise.
+ */
+static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
+{
+	struct attribute **attrs;
+	int indx;
+
+	/* This function works only for zones with at least one trip */
+	if (tz->trips <= 0)
+		return -EINVAL;
+
+	tz->trip_type_attrs = kcalloc(tz->trips, sizeof(*tz->trip_type_attrs),
+				      GFP_KERNEL);
+	if (!tz->trip_type_attrs)
+		return -ENOMEM;
+
+	tz->trip_temp_attrs = kcalloc(tz->trips, sizeof(*tz->trip_temp_attrs),
+				      GFP_KERNEL);
+	if (!tz->trip_temp_attrs) {
+		kfree(tz->trip_type_attrs);
+		return -ENOMEM;
+	}
+
+	if (tz->ops->get_trip_hyst) {
+		tz->trip_hyst_attrs = kcalloc(tz->trips,
+					      sizeof(*tz->trip_hyst_attrs),
+					      GFP_KERNEL);
+		if (!tz->trip_hyst_attrs) {
+			kfree(tz->trip_type_attrs);
+			kfree(tz->trip_temp_attrs);
+			return -ENOMEM;
+		}
+	}
+
+	attrs = kcalloc(tz->trips * 3 + 1, sizeof(*attrs), GFP_KERNEL);
+	if (!attrs) {
+		kfree(tz->trip_type_attrs);
+		kfree(tz->trip_temp_attrs);
+		if (tz->ops->get_trip_hyst)
+			kfree(tz->trip_hyst_attrs);
+		return -ENOMEM;
+	}
+
+	for (indx = 0; indx < tz->trips; indx++) {
+		/* create trip type attribute */
+		snprintf(tz->trip_type_attrs[indx].name, THERMAL_NAME_LENGTH,
+			 "trip_point_%d_type", indx);
+
+		sysfs_attr_init(&tz->trip_type_attrs[indx].attr.attr);
+		tz->trip_type_attrs[indx].attr.attr.name =
+						tz->trip_type_attrs[indx].name;
+		tz->trip_type_attrs[indx].attr.attr.mode = S_IRUGO;
+		tz->trip_type_attrs[indx].attr.show = trip_point_type_show;
+		attrs[indx] = &tz->trip_type_attrs[indx].attr.attr;
+
+		/* create trip temp attribute */
+		snprintf(tz->trip_temp_attrs[indx].name, THERMAL_NAME_LENGTH,
+			 "trip_point_%d_temp", indx);
+
+		sysfs_attr_init(&tz->trip_temp_attrs[indx].attr.attr);
+		tz->trip_temp_attrs[indx].attr.attr.name =
+						tz->trip_temp_attrs[indx].name;
+		tz->trip_temp_attrs[indx].attr.attr.mode = S_IRUGO;
+		tz->trip_temp_attrs[indx].attr.show = trip_point_temp_show;
+		if (IS_ENABLED(CONFIG_THERMAL_WRITABLE_TRIPS) &&
+		    mask & (1 << indx)) {
+			tz->trip_temp_attrs[indx].attr.attr.mode |= S_IWUSR;
+			tz->trip_temp_attrs[indx].attr.store =
+							trip_point_temp_store;
+		}
+		attrs[indx + tz->trips] = &tz->trip_temp_attrs[indx].attr.attr;
+
+		/* create Optional trip hyst attribute */
+		if (!tz->ops->get_trip_hyst)
+			continue;
+		snprintf(tz->trip_hyst_attrs[indx].name, THERMAL_NAME_LENGTH,
+			 "trip_point_%d_hyst", indx);
+
+		sysfs_attr_init(&tz->trip_hyst_attrs[indx].attr.attr);
+		tz->trip_hyst_attrs[indx].attr.attr.name =
+					tz->trip_hyst_attrs[indx].name;
+		tz->trip_hyst_attrs[indx].attr.attr.mode = S_IRUGO;
+		tz->trip_hyst_attrs[indx].attr.show = trip_point_hyst_show;
+		if (tz->ops->set_trip_hyst) {
+			tz->trip_hyst_attrs[indx].attr.attr.mode |= S_IWUSR;
+			tz->trip_hyst_attrs[indx].attr.store =
+					trip_point_hyst_store;
+		}
+		attrs[indx + tz->trips * 2] =
+					&tz->trip_hyst_attrs[indx].attr.attr;
+	}
+	attrs[tz->trips * 3] = NULL;
+
+	tz->trips_attribute_group.attrs = attrs;
+
+	return 0;
+}
+
+int thermal_zone_create_device_groups(struct thermal_zone_device *tz,
+				      int mask)
+{
+	const struct attribute_group **groups;
+	int i, size, result;
+
+	/* we need one extra for trips and the NULL to terminate the array */
+	size = ARRAY_SIZE(thermal_zone_attribute_groups) + 2;
+	/* This also takes care of API requirement to be NULL terminated */
+	groups = kcalloc(size, sizeof(*groups), GFP_KERNEL);
+	if (!groups)
+		return -ENOMEM;
+
+	for (i = 0; i < size - 2; i++)
+		groups[i] = thermal_zone_attribute_groups[i];
+
+	if (tz->trips) {
+		result = create_trip_attrs(tz, mask);
+		if (result) {
+			kfree(groups);
+
+			return result;
+		}
+
+		groups[size - 2] = &tz->trips_attribute_group;
+	}
+
+	tz->device.groups = groups;
+
+	return 0;
+}
+
+/* sys I/F for cooling device */
+static ssize_t
+thermal_cooling_device_type_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+
+	return sprintf(buf, "%s\n", cdev->type);
+}
+
+static ssize_t
+thermal_cooling_device_max_state_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+	unsigned long state;
+	int ret;
+
+	ret = cdev->ops->get_max_state(cdev, &state);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%ld\n", state);
+}
+
+static ssize_t
+thermal_cooling_device_cur_state_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+	unsigned long state;
+	int ret;
+
+	ret = cdev->ops->get_cur_state(cdev, &state);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%ld\n", state);
+}
+
+static ssize_t
+thermal_cooling_device_cur_state_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+	unsigned long state;
+	int result;
+
+	if (sscanf(buf, "%ld\n", &state) != 1)
+		return -EINVAL;
+
+	if ((long)state < 0)
+		return -EINVAL;
+
+	result = cdev->ops->set_cur_state(cdev, state);
+	if (result)
+		return result;
+	return count;
+}
+
+static struct device_attribute dev_attr_cdev_type =
+__ATTR(type, 0444, thermal_cooling_device_type_show, NULL);
+static DEVICE_ATTR(max_state, 0444,
+		   thermal_cooling_device_max_state_show, NULL);
+static DEVICE_ATTR(cur_state, 0644,
+		   thermal_cooling_device_cur_state_show,
+		   thermal_cooling_device_cur_state_store);
+
+static struct attribute *cooling_device_attrs[] = {
+	&dev_attr_cdev_type.attr,
+	&dev_attr_max_state.attr,
+	&dev_attr_cur_state.attr,
+	NULL,
+};
+
+static const struct attribute_group cooling_device_attr_group = {
+	.attrs = cooling_device_attrs,
+};
+
+static const struct attribute_group *cooling_device_attr_groups[] = {
+	&cooling_device_attr_group,
+	NULL,
+};
+
+void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *cdev)
+{
+	cdev->device.groups = cooling_device_attr_groups;
+}
+
+/* these helper will be used only at the time of bindig */
+ssize_t
+thermal_cooling_device_trip_point_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct thermal_instance *instance;
+
+	instance =
+	    container_of(attr, struct thermal_instance, attr);
+
+	if (instance->trip == THERMAL_TRIPS_NONE)
+		return sprintf(buf, "-1\n");
+	else
+		return sprintf(buf, "%d\n", instance->trip);
+}
+
+ssize_t
+thermal_cooling_device_weight_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct thermal_instance *instance;
+
+	instance = container_of(attr, struct thermal_instance, weight_attr);
+
+	return sprintf(buf, "%d\n", instance->weight);
+}
+
+ssize_t
+thermal_cooling_device_weight_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct thermal_instance *instance;
+	int ret, weight;
+
+	ret = kstrtoint(buf, 0, &weight);
+	if (ret)
+		return ret;
+
+	instance = container_of(attr, struct thermal_instance, weight_attr);
+	instance->weight = weight;
+
+	return count;
+}

+ 3 - 2
drivers/thermal/ti-soc-thermal/ti-bandgap.c

@@ -1298,7 +1298,7 @@ int ti_bandgap_probe(struct platform_device *pdev)
 	if (IS_ERR(bgp->div_clk)) {
 		dev_err(&pdev->dev, "failed to request div_ts_ck clock ref\n");
 		ret = PTR_ERR(bgp->div_clk);
-		goto free_irqs;
+		goto put_fclock;
 	}
 
 	for (i = 0; i < bgp->conf->sensor_count; i++) {
@@ -1430,8 +1430,9 @@ disable_clk:
 	if (TI_BANDGAP_HAS(bgp, CLK_CTRL))
 		clk_disable_unprepare(bgp->fclock);
 put_clks:
-	clk_put(bgp->fclock);
 	clk_put(bgp->div_clk);
+put_fclock:
+	clk_put(bgp->fclock);
 free_irqs:
 	if (TI_BANDGAP_HAS(bgp, TSHUT)) {
 		free_irq(gpio_to_irq(bgp->tshut_gpio), NULL);

+ 246 - 341
drivers/thermal/x86_pkg_temp_thermal.c

@@ -54,37 +54,33 @@ MODULE_PARM_DESC(notify_delay_ms,
 * is some wrong values returned by cpuid for number of thresholds.
 */
 #define MAX_NUMBER_OF_TRIPS	2
-/* Limit number of package temp zones */
-#define MAX_PKG_TEMP_ZONE_IDS	256
-
-struct phy_dev_entry {
-	struct list_head list;
-	u16 phys_proc_id;
-	u16 first_cpu;
-	u32 tj_max;
-	int ref_cnt;
-	u32 start_pkg_therm_low;
-	u32 start_pkg_therm_high;
-	struct thermal_zone_device *tzone;
+
+struct pkg_device {
+	int				cpu;
+	bool				work_scheduled;
+	u32				tj_max;
+	u32				msr_pkg_therm_low;
+	u32				msr_pkg_therm_high;
+	struct delayed_work		work;
+	struct thermal_zone_device	*tzone;
+	struct cpumask			cpumask;
 };
 
 static struct thermal_zone_params pkg_temp_tz_params = {
 	.no_hwmon	= true,
 };
 
-/* List maintaining number of package instances */
-static LIST_HEAD(phy_dev_list);
-static DEFINE_MUTEX(phy_dev_list_mutex);
-
-/* Interrupt to work function schedule queue */
-static DEFINE_PER_CPU(struct delayed_work, pkg_temp_thermal_threshold_work);
+/* Keep track of how many package pointers we allocated in init() */
+static int max_packages __read_mostly;
+/* Array of package pointers */
+static struct pkg_device **packages;
+/* Serializes interrupt notification, work and hotplug */
+static DEFINE_SPINLOCK(pkg_temp_lock);
+/* Protects zone operation in the work function against hotplug removal */
+static DEFINE_MUTEX(thermal_zone_mutex);
 
-/* To track if the work is already scheduled on a package */
-static u8 *pkg_work_scheduled;
-
-/* Spin lock to prevent races with pkg_work_scheduled */
-static spinlock_t pkg_work_lock;
-static u16 max_phy_id;
+/* The dynamically assigned cpu hotplug state for module_exit() */
+static enum cpuhp_state pkg_thermal_hp_state __read_mostly;
 
 /* Debug counters to show using debugfs */
 static struct dentry *debugfs;
@@ -116,22 +112,20 @@ err_out:
 	return -ENOENT;
 }
 
-static struct phy_dev_entry
-			*pkg_temp_thermal_get_phy_entry(unsigned int cpu)
+/*
+ * Protection:
+ *
+ * - cpu hotplug: Read serialized by cpu hotplug lock
+ *		  Write must hold pkg_temp_lock
+ *
+ * - Other callsites: Must hold pkg_temp_lock
+ */
+static struct pkg_device *pkg_temp_thermal_get_dev(unsigned int cpu)
 {
-	u16 phys_proc_id = topology_physical_package_id(cpu);
-	struct phy_dev_entry *phy_ptr;
-
-	mutex_lock(&phy_dev_list_mutex);
-
-	list_for_each_entry(phy_ptr, &phy_dev_list, list)
-		if (phy_ptr->phys_proc_id == phys_proc_id) {
-			mutex_unlock(&phy_dev_list_mutex);
-			return phy_ptr;
-		}
-
-	mutex_unlock(&phy_dev_list_mutex);
+	int pkgid = topology_logical_package_id(cpu);
 
+	if (pkgid >= 0 && pkgid < max_packages)
+		return packages[pkgid];
 	return NULL;
 }
 
@@ -141,61 +135,44 @@ static struct phy_dev_entry
 */
 static int get_tj_max(int cpu, u32 *tj_max)
 {
-	u32 eax, edx;
-	u32 val;
+	u32 eax, edx, val;
 	int err;
 
 	err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
 	if (err)
-		goto err_ret;
-	else {
-		val = (eax >> 16) & 0xff;
-		if (val)
-			*tj_max = val * 1000;
-		else {
-			err = -EINVAL;
-			goto err_ret;
-		}
-	}
+		return err;
 
-	return 0;
-err_ret:
-	*tj_max = 0;
-	return err;
+	val = (eax >> 16) & 0xff;
+	*tj_max = val * 1000;
+
+	return val ? 0 : -EINVAL;
 }
 
 static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
 {
+	struct pkg_device *pkgdev = tzd->devdata;
 	u32 eax, edx;
-	struct phy_dev_entry *phy_dev_entry;
 
-	phy_dev_entry = tzd->devdata;
-	rdmsr_on_cpu(phy_dev_entry->first_cpu, MSR_IA32_PACKAGE_THERM_STATUS,
-			&eax, &edx);
+	rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_STATUS, &eax, &edx);
 	if (eax & 0x80000000) {
-		*temp = phy_dev_entry->tj_max -
-				((eax >> 16) & 0x7f) * 1000;
+		*temp = pkgdev->tj_max - ((eax >> 16) & 0x7f) * 1000;
 		pr_debug("sys_get_curr_temp %d\n", *temp);
 		return 0;
 	}
-
 	return -EINVAL;
 }
 
 static int sys_get_trip_temp(struct thermal_zone_device *tzd,
-		int trip, int *temp)
+			     int trip, int *temp)
 {
-	u32 eax, edx;
-	struct phy_dev_entry *phy_dev_entry;
-	u32 mask, shift;
+	struct pkg_device *pkgdev = tzd->devdata;
 	unsigned long thres_reg_value;
+	u32 mask, shift, eax, edx;
 	int ret;
 
 	if (trip >= MAX_NUMBER_OF_TRIPS)
 		return -EINVAL;
 
-	phy_dev_entry = tzd->devdata;
-
 	if (trip) {
 		mask = THERM_MASK_THRESHOLD1;
 		shift = THERM_SHIFT_THRESHOLD1;
@@ -204,14 +181,14 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd,
 		shift = THERM_SHIFT_THRESHOLD0;
 	}
 
-	ret = rdmsr_on_cpu(phy_dev_entry->first_cpu,
-				MSR_IA32_PACKAGE_THERM_INTERRUPT, &eax, &edx);
+	ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
+			   &eax, &edx);
 	if (ret < 0)
-		return -EINVAL;
+		return ret;
 
 	thres_reg_value = (eax & mask) >> shift;
 	if (thres_reg_value)
-		*temp = phy_dev_entry->tj_max - thres_reg_value * 1000;
+		*temp = pkgdev->tj_max - thres_reg_value * 1000;
 	else
 		*temp = 0;
 	pr_debug("sys_get_trip_temp %d\n", *temp);
@@ -219,24 +196,20 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd,
 	return 0;
 }
 
-static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
-							int temp)
+static int
+sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
 {
-	u32 l, h;
-	struct phy_dev_entry *phy_dev_entry;
-	u32 mask, shift, intr;
+	struct pkg_device *pkgdev = tzd->devdata;
+	u32 l, h, mask, shift, intr;
 	int ret;
 
-	phy_dev_entry = tzd->devdata;
-
-	if (trip >= MAX_NUMBER_OF_TRIPS || temp >= phy_dev_entry->tj_max)
+	if (trip >= MAX_NUMBER_OF_TRIPS || temp >= pkgdev->tj_max)
 		return -EINVAL;
 
-	ret = rdmsr_on_cpu(phy_dev_entry->first_cpu,
-					MSR_IA32_PACKAGE_THERM_INTERRUPT,
-					&l, &h);
+	ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
+			   &l, &h);
 	if (ret < 0)
-		return -EINVAL;
+		return ret;
 
 	if (trip) {
 		mask = THERM_MASK_THRESHOLD1;
@@ -252,24 +225,20 @@ static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
 	* When users space sets a trip temperature == 0, which is indication
 	* that, it is no longer interested in receiving notifications.
 	*/
-	if (!temp)
+	if (!temp) {
 		l &= ~intr;
-	else {
-		l |= (phy_dev_entry->tj_max - temp)/1000 << shift;
+	} else {
+		l |= (pkgdev->tj_max - temp)/1000 << shift;
 		l |= intr;
 	}
 
-	return wrmsr_on_cpu(phy_dev_entry->first_cpu,
-					MSR_IA32_PACKAGE_THERM_INTERRUPT,
-					l, h);
+	return wrmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
 }
 
-static int sys_get_trip_type(struct thermal_zone_device *thermal,
-		int trip, enum thermal_trip_type *type)
+static int sys_get_trip_type(struct thermal_zone_device *thermal, int trip,
+			     enum thermal_trip_type *type)
 {
-
 	*type = THERMAL_TRIP_PASSIVE;
-
 	return 0;
 }
 
@@ -281,7 +250,7 @@ static struct thermal_zone_device_ops tzone_ops = {
 	.set_trip_temp = sys_set_trip_temp,
 };
 
-static bool pkg_temp_thermal_platform_thermal_rate_control(void)
+static bool pkg_thermal_rate_control(void)
 {
 	return true;
 }
@@ -289,8 +258,8 @@ static bool pkg_temp_thermal_platform_thermal_rate_control(void)
 /* Enable threshold interrupt on local package/cpu */
 static inline void enable_pkg_thres_interrupt(void)
 {
-	u32 l, h;
 	u8 thres_0, thres_1;
+	u32 l, h;
 
 	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
 	/* only enable/disable if it had valid threshold value */
@@ -307,271 +276,232 @@ static inline void enable_pkg_thres_interrupt(void)
 static inline void disable_pkg_thres_interrupt(void)
 {
 	u32 l, h;
+
 	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
-	wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
-			l & (~THERM_INT_THRESHOLD0_ENABLE) &
-				(~THERM_INT_THRESHOLD1_ENABLE), h);
+
+	l &= ~(THERM_INT_THRESHOLD0_ENABLE | THERM_INT_THRESHOLD1_ENABLE);
+	wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
 }
 
 static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
 {
-	__u64 msr_val;
+	struct thermal_zone_device *tzone = NULL;
 	int cpu = smp_processor_id();
-	int phy_id = topology_physical_package_id(cpu);
-	struct phy_dev_entry *phdev = pkg_temp_thermal_get_phy_entry(cpu);
-	bool notify = false;
-	unsigned long flags;
-
-	if (!phdev)
-		return;
+	struct pkg_device *pkgdev;
+	u64 msr_val, wr_val;
 
-	spin_lock_irqsave(&pkg_work_lock, flags);
+	mutex_lock(&thermal_zone_mutex);
+	spin_lock_irq(&pkg_temp_lock);
 	++pkg_work_cnt;
-	if (unlikely(phy_id > max_phy_id)) {
-		spin_unlock_irqrestore(&pkg_work_lock, flags);
+
+	pkgdev = pkg_temp_thermal_get_dev(cpu);
+	if (!pkgdev) {
+		spin_unlock_irq(&pkg_temp_lock);
+		mutex_unlock(&thermal_zone_mutex);
 		return;
 	}
-	pkg_work_scheduled[phy_id] = 0;
-	spin_unlock_irqrestore(&pkg_work_lock, flags);
+	pkgdev->work_scheduled = false;
 
-	enable_pkg_thres_interrupt();
 	rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
-	if (msr_val & THERM_LOG_THRESHOLD0) {
-		wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS,
-				msr_val & ~THERM_LOG_THRESHOLD0);
-		notify = true;
-	}
-	if (msr_val & THERM_LOG_THRESHOLD1) {
-		wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS,
-				msr_val & ~THERM_LOG_THRESHOLD1);
-		notify = true;
-	}
-	if (notify) {
-		pr_debug("thermal_zone_device_update\n");
-		thermal_zone_device_update(phdev->tzone,
-					   THERMAL_EVENT_UNSPECIFIED);
+	wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
+	if (wr_val != msr_val) {
+		wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
+		tzone = pkgdev->tzone;
 	}
+
+	enable_pkg_thres_interrupt();
+	spin_unlock_irq(&pkg_temp_lock);
+
+	/*
+	 * If tzone is not NULL, then thermal_zone_mutex will prevent the
+	 * concurrent removal in the cpu offline callback.
+	 */
+	if (tzone)
+		thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED);
+
+	mutex_unlock(&thermal_zone_mutex);
 }
 
-static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
+static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
+{
+	unsigned long ms = msecs_to_jiffies(notify_delay_ms);
+
+	schedule_delayed_work_on(cpu, work, ms);
+}
+
+static int pkg_thermal_notify(u64 msr_val)
 {
-	unsigned long flags;
 	int cpu = smp_processor_id();
-	int phy_id = topology_physical_package_id(cpu);
+	struct pkg_device *pkgdev;
+	unsigned long flags;
 
-	/*
-	* When a package is in interrupted state, all CPU's in that package
-	* are in the same interrupt state. So scheduling on any one CPU in
-	* the package is enough and simply return for others.
-	*/
-	spin_lock_irqsave(&pkg_work_lock, flags);
+	spin_lock_irqsave(&pkg_temp_lock, flags);
 	++pkg_interrupt_cnt;
-	if (unlikely(phy_id > max_phy_id) || unlikely(!pkg_work_scheduled) ||
-			pkg_work_scheduled[phy_id]) {
-		disable_pkg_thres_interrupt();
-		spin_unlock_irqrestore(&pkg_work_lock, flags);
-		return -EINVAL;
-	}
-	pkg_work_scheduled[phy_id] = 1;
-	spin_unlock_irqrestore(&pkg_work_lock, flags);
 
 	disable_pkg_thres_interrupt();
-	schedule_delayed_work_on(cpu,
-				&per_cpu(pkg_temp_thermal_threshold_work, cpu),
-				msecs_to_jiffies(notify_delay_ms));
-	return 0;
-}
 
-static int find_siblings_cpu(int cpu)
-{
-	int i;
-	int id = topology_physical_package_id(cpu);
-
-	for_each_online_cpu(i)
-		if (i != cpu && topology_physical_package_id(i) == id)
-			return i;
+	/* Work is per package, so scheduling it once is enough. */
+	pkgdev = pkg_temp_thermal_get_dev(cpu);
+	if (pkgdev && !pkgdev->work_scheduled) {
+		pkgdev->work_scheduled = true;
+		pkg_thermal_schedule_work(pkgdev->cpu, &pkgdev->work);
+	}
 
+	spin_unlock_irqrestore(&pkg_temp_lock, flags);
 	return 0;
 }
 
 static int pkg_temp_thermal_device_add(unsigned int cpu)
 {
-	int err;
-	u32 tj_max;
-	struct phy_dev_entry *phy_dev_entry;
-	int thres_count;
-	u32 eax, ebx, ecx, edx;
-	u8 *temp;
-	unsigned long flags;
+	int pkgid = topology_logical_package_id(cpu);
+	u32 tj_max, eax, ebx, ecx, edx;
+	struct pkg_device *pkgdev;
+	int thres_count, err;
+
+	if (pkgid >= max_packages)
+		return -ENOMEM;
 
 	cpuid(6, &eax, &ebx, &ecx, &edx);
 	thres_count = ebx & 0x07;
 	if (!thres_count)
 		return -ENODEV;
 
-	if (topology_physical_package_id(cpu) > MAX_PKG_TEMP_ZONE_IDS)
-		return -ENODEV;
-
 	thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
 
 	err = get_tj_max(cpu, &tj_max);
 	if (err)
-		goto err_ret;
-
-	mutex_lock(&phy_dev_list_mutex);
+		return err;
 
-	phy_dev_entry = kzalloc(sizeof(*phy_dev_entry), GFP_KERNEL);
-	if (!phy_dev_entry) {
-		err = -ENOMEM;
-		goto err_ret_unlock;
-	}
+	pkgdev = kzalloc(sizeof(*pkgdev), GFP_KERNEL);
+	if (!pkgdev)
+		return -ENOMEM;
 
-	spin_lock_irqsave(&pkg_work_lock, flags);
-	if (topology_physical_package_id(cpu) > max_phy_id)
-		max_phy_id = topology_physical_package_id(cpu);
-	temp = krealloc(pkg_work_scheduled,
-			(max_phy_id+1) * sizeof(u8), GFP_ATOMIC);
-	if (!temp) {
-		spin_unlock_irqrestore(&pkg_work_lock, flags);
-		err = -ENOMEM;
-		goto err_ret_free;
-	}
-	pkg_work_scheduled = temp;
-	pkg_work_scheduled[topology_physical_package_id(cpu)] = 0;
-	spin_unlock_irqrestore(&pkg_work_lock, flags);
-
-	phy_dev_entry->phys_proc_id = topology_physical_package_id(cpu);
-	phy_dev_entry->first_cpu = cpu;
-	phy_dev_entry->tj_max = tj_max;
-	phy_dev_entry->ref_cnt = 1;
-	phy_dev_entry->tzone = thermal_zone_device_register("x86_pkg_temp",
+	INIT_DELAYED_WORK(&pkgdev->work, pkg_temp_thermal_threshold_work_fn);
+	pkgdev->cpu = cpu;
+	pkgdev->tj_max = tj_max;
+	pkgdev->tzone = thermal_zone_device_register("x86_pkg_temp",
 			thres_count,
-			(thres_count == MAX_NUMBER_OF_TRIPS) ?
-				0x03 : 0x01,
-			phy_dev_entry, &tzone_ops, &pkg_temp_tz_params, 0, 0);
-	if (IS_ERR(phy_dev_entry->tzone)) {
-		err = PTR_ERR(phy_dev_entry->tzone);
-		goto err_ret_free;
+			(thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01,
+			pkgdev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
+	if (IS_ERR(pkgdev->tzone)) {
+		err = PTR_ERR(pkgdev->tzone);
+		kfree(pkgdev);
+		return err;
 	}
 	/* Store MSR value for package thermal interrupt, to restore at exit */
-	rdmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
-				&phy_dev_entry->start_pkg_therm_low,
-				&phy_dev_entry->start_pkg_therm_high);
-
-	list_add_tail(&phy_dev_entry->list, &phy_dev_list);
-	pr_debug("pkg_temp_thermal_device_add :phy_id %d cpu %d\n",
-			phy_dev_entry->phys_proc_id, cpu);
-
-	mutex_unlock(&phy_dev_list_mutex);
+	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, pkgdev->msr_pkg_therm_low,
+	      pkgdev->msr_pkg_therm_high);
 
+	cpumask_set_cpu(cpu, &pkgdev->cpumask);
+	spin_lock_irq(&pkg_temp_lock);
+	packages[pkgid] = pkgdev;
+	spin_unlock_irq(&pkg_temp_lock);
 	return 0;
-
-err_ret_free:
-	kfree(phy_dev_entry);
-err_ret_unlock:
-	mutex_unlock(&phy_dev_list_mutex);
-
-err_ret:
-	return err;
 }
 
-static int pkg_temp_thermal_device_remove(unsigned int cpu)
+static int pkg_thermal_cpu_offline(unsigned int cpu)
 {
-	struct phy_dev_entry *n;
-	u16 phys_proc_id = topology_physical_package_id(cpu);
-	struct phy_dev_entry *phdev =
-			pkg_temp_thermal_get_phy_entry(cpu);
+	struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
+	bool lastcpu, was_target;
+	int target;
 
-	if (!phdev)
-		return -ENODEV;
+	if (!pkgdev)
+		return 0;
 
-	mutex_lock(&phy_dev_list_mutex);
-	/* If we are loosing the first cpu for this package, we need change */
-	if (phdev->first_cpu == cpu) {
-		phdev->first_cpu = find_siblings_cpu(cpu);
-		pr_debug("thermal_device_remove: first cpu switched %d\n",
-					phdev->first_cpu);
+	target = cpumask_any_but(&pkgdev->cpumask, cpu);
+	cpumask_clear_cpu(cpu, &pkgdev->cpumask);
+	lastcpu = target >= nr_cpu_ids;
+	/*
+	 * Remove the sysfs files, if this is the last cpu in the package
+	 * before doing further cleanups.
+	 */
+	if (lastcpu) {
+		struct thermal_zone_device *tzone = pkgdev->tzone;
+
+		/*
+		 * We must protect against a work function calling
+		 * thermal_zone_update, after/while unregister. We null out
+		 * the pointer under the zone mutex, so the worker function
+		 * won't try to call.
+		 */
+		mutex_lock(&thermal_zone_mutex);
+		pkgdev->tzone = NULL;
+		mutex_unlock(&thermal_zone_mutex);
+
+		thermal_zone_device_unregister(tzone);
 	}
+
+	/* Protect against work and interrupts */
+	spin_lock_irq(&pkg_temp_lock);
+
 	/*
-	* It is possible that no siblings left as this was the last cpu
-	* going offline. We don't need to worry about this assignment
-	* as the phydev entry will be removed in this case and
-	* thermal zone is removed.
-	*/
-	--phdev->ref_cnt;
-	pr_debug("thermal_device_remove: pkg: %d cpu %d ref_cnt %d\n",
-					phys_proc_id, cpu, phdev->ref_cnt);
-	if (!phdev->ref_cnt)
-		list_for_each_entry_safe(phdev, n, &phy_dev_list, list) {
-			if (phdev->phys_proc_id == phys_proc_id) {
-				thermal_zone_device_unregister(phdev->tzone);
-				list_del(&phdev->list);
-				kfree(phdev);
-				break;
-			}
-		}
-	mutex_unlock(&phy_dev_list_mutex);
+	 * Check whether this cpu was the current target and store the new
+	 * one. When we drop the lock, then the interrupt notify function
+	 * will see the new target.
+	 */
+	was_target = pkgdev->cpu == cpu;
+	pkgdev->cpu = target;
 
-	return 0;
-}
+	/*
+	 * If this is the last CPU in the package remove the package
+	 * reference from the array and restore the interrupt MSR. When we
+	 * drop the lock neither the interrupt notify function nor the
+	 * worker will see the package anymore.
+	 */
+	if (lastcpu) {
+		packages[topology_logical_package_id(cpu)] = NULL;
+		/* After this point nothing touches the MSR anymore. */
+		wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+		      pkgdev->msr_pkg_therm_low, pkgdev->msr_pkg_therm_high);
+	}
 
-static int get_core_online(unsigned int cpu)
-{
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	struct phy_dev_entry *phdev = pkg_temp_thermal_get_phy_entry(cpu);
-
-	/* Check if there is already an instance for this package */
-	if (!phdev) {
-		if (!cpu_has(c, X86_FEATURE_DTHERM) ||
-					!cpu_has(c, X86_FEATURE_PTS))
-			return -ENODEV;
-		if (pkg_temp_thermal_device_add(cpu))
-			return -ENODEV;
-	} else {
-		mutex_lock(&phy_dev_list_mutex);
-		++phdev->ref_cnt;
-		pr_debug("get_core_online: cpu %d ref_cnt %d\n",
-						cpu, phdev->ref_cnt);
-		mutex_unlock(&phy_dev_list_mutex);
+	/*
+	 * Check whether there is work scheduled and whether the work is
+	 * targeted at the outgoing CPU.
+	 */
+	if (pkgdev->work_scheduled && was_target) {
+		/*
+		 * To cancel the work we need to drop the lock, otherwise
+		 * we might deadlock if the work needs to be flushed.
+		 */
+		spin_unlock_irq(&pkg_temp_lock);
+		cancel_delayed_work_sync(&pkgdev->work);
+		spin_lock_irq(&pkg_temp_lock);
+		/*
+		 * If this is not the last cpu in the package and the work
+		 * did not run after we dropped the lock above, then we
+		 * need to reschedule the work, otherwise the interrupt
+		 * stays disabled forever.
+		 */
+		if (!lastcpu && pkgdev->work_scheduled)
+			pkg_thermal_schedule_work(target, &pkgdev->work);
 	}
-	INIT_DELAYED_WORK(&per_cpu(pkg_temp_thermal_threshold_work, cpu),
-			pkg_temp_thermal_threshold_work_fn);
 
-	pr_debug("get_core_online: cpu %d successful\n", cpu);
+	spin_unlock_irq(&pkg_temp_lock);
 
+	/* Final cleanup if this is the last cpu */
+	if (lastcpu)
+		kfree(pkgdev);
 	return 0;
 }
 
-static void put_core_offline(unsigned int cpu)
+static int pkg_thermal_cpu_online(unsigned int cpu)
 {
-	if (!pkg_temp_thermal_device_remove(cpu))
-		cancel_delayed_work_sync(
-			&per_cpu(pkg_temp_thermal_threshold_work, cpu));
+	struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	pr_debug("put_core_offline: cpu %d\n", cpu);
-}
+	/* Paranoia check */
+	if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS))
+		return -ENODEV;
 
-static int pkg_temp_thermal_cpu_callback(struct notifier_block *nfb,
-				 unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long) hcpu;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
-		get_core_online(cpu);
-		break;
-	case CPU_DOWN_PREPARE:
-		put_core_offline(cpu);
-		break;
+	/* If the package exists, nothing to do */
+	if (pkgdev) {
+		cpumask_set_cpu(cpu, &pkgdev->cpumask);
+		return 0;
 	}
-	return NOTIFY_OK;
+	return pkg_temp_thermal_device_add(cpu);
 }
 
-static struct notifier_block pkg_temp_thermal_notifier __refdata = {
-	.notifier_call = pkg_temp_thermal_cpu_callback,
-};
-
 static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
 	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_PTS },
 	{}
@@ -580,71 +510,46 @@ MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
 
 static int __init pkg_temp_thermal_init(void)
 {
-	int i;
+	int ret;
 
 	if (!x86_match_cpu(pkg_temp_thermal_ids))
 		return -ENODEV;
 
-	spin_lock_init(&pkg_work_lock);
-	platform_thermal_package_notify =
-			pkg_temp_thermal_platform_thermal_notify;
-	platform_thermal_package_rate_control =
-			pkg_temp_thermal_platform_thermal_rate_control;
+	max_packages = topology_max_packages();
+	packages = kzalloc(max_packages * sizeof(struct pkg_device *), GFP_KERNEL);
+	if (!packages)
+		return -ENOMEM;
 
-	cpu_notifier_register_begin();
-	for_each_online_cpu(i)
-		if (get_core_online(i))
-			goto err_ret;
-	__register_hotcpu_notifier(&pkg_temp_thermal_notifier);
-	cpu_notifier_register_done();
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
+				pkg_thermal_cpu_online,	pkg_thermal_cpu_offline);
+	if (ret < 0)
+		goto err;
 
-	pkg_temp_debugfs_init(); /* Don't care if fails */
+	/* Store the state for module exit */
+	pkg_thermal_hp_state = ret;
 
-	return 0;
+	platform_thermal_package_notify = pkg_thermal_notify;
+	platform_thermal_package_rate_control = pkg_thermal_rate_control;
 
-err_ret:
-	for_each_online_cpu(i)
-		put_core_offline(i);
-	cpu_notifier_register_done();
-	kfree(pkg_work_scheduled);
-	platform_thermal_package_notify = NULL;
-	platform_thermal_package_rate_control = NULL;
+	 /* Don't care if it fails */
+	pkg_temp_debugfs_init();
+	return 0;
 
-	return -ENODEV;
+err:
+	kfree(packages);
+	return ret;
 }
+module_init(pkg_temp_thermal_init)
 
 static void __exit pkg_temp_thermal_exit(void)
 {
-	struct phy_dev_entry *phdev, *n;
-	int i;
-
-	cpu_notifier_register_begin();
-	__unregister_hotcpu_notifier(&pkg_temp_thermal_notifier);
-	mutex_lock(&phy_dev_list_mutex);
-	list_for_each_entry_safe(phdev, n, &phy_dev_list, list) {
-		/* Retore old MSR value for package thermal interrupt */
-		wrmsr_on_cpu(phdev->first_cpu,
-			MSR_IA32_PACKAGE_THERM_INTERRUPT,
-			phdev->start_pkg_therm_low,
-			phdev->start_pkg_therm_high);
-		thermal_zone_device_unregister(phdev->tzone);
-		list_del(&phdev->list);
-		kfree(phdev);
-	}
-	mutex_unlock(&phy_dev_list_mutex);
 	platform_thermal_package_notify = NULL;
 	platform_thermal_package_rate_control = NULL;
-	for_each_online_cpu(i)
-		cancel_delayed_work_sync(
-			&per_cpu(pkg_temp_thermal_threshold_work, i));
-	cpu_notifier_register_done();
-
-	kfree(pkg_work_scheduled);
 
+	cpuhp_remove_state(pkg_thermal_hp_state);
 	debugfs_remove_recursive(debugfs);
+	kfree(packages);
 }
-
-module_init(pkg_temp_thermal_init)
 module_exit(pkg_temp_thermal_exit)
 
 MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver");

+ 6 - 3
include/linux/devfreq_cooling.h

@@ -20,7 +20,6 @@
 #include <linux/devfreq.h>
 #include <linux/thermal.h>
 
-#ifdef CONFIG_DEVFREQ_THERMAL
 
 /**
  * struct devfreq_cooling_power - Devfreq cooling power ops
@@ -37,12 +36,16 @@
  *			@dyn_power_coeff * frequency * voltage^2
  */
 struct devfreq_cooling_power {
-	unsigned long (*get_static_power)(unsigned long voltage);
-	unsigned long (*get_dynamic_power)(unsigned long freq,
+	unsigned long (*get_static_power)(struct devfreq *devfreq,
+					  unsigned long voltage);
+	unsigned long (*get_dynamic_power)(struct devfreq *devfreq,
+					   unsigned long freq,
 					   unsigned long voltage);
 	unsigned long dyn_power_coeff;
 };
 
+#ifdef CONFIG_DEVFREQ_THERMAL
+
 struct thermal_cooling_device *
 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 				  struct devfreq_cooling_power *dfc_power);

+ 2 - 0
include/linux/thermal.h

@@ -28,6 +28,7 @@
 #include <linux/of.h>
 #include <linux/idr.h>
 #include <linux/device.h>
+#include <linux/sysfs.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/thermal.h>
 
@@ -204,6 +205,7 @@ struct thermal_zone_device {
 	int id;
 	char type[THERMAL_NAME_LENGTH];
 	struct device device;
+	struct attribute_group trips_attribute_group;
 	struct thermal_attr *trip_temp_attrs;
 	struct thermal_attr *trip_type_attrs;
 	struct thermal_attr *trip_hyst_attrs;