Browse Source

greybus: svc_watchdog: Add sysfs file to change the behavior of bite

Currently, AP performs unipro_reset if SVC fails to response to its
ping. While this error recovery is best suited for the end-user
experience, errors in the UniPro network could potentially go unnoticed
by the QA and fishfooders in the development phase of the project. This
patch adds an option to trigger a kernel panic so logs can be collected
for analysis.

Testing Done:
 - Reproduce issue and observe kernel panic when
   watchdob_control is changed to 'panic'

Signed-off-by: David Lin <dtwlin@google.com>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
David Lin 9 years ago
parent
commit
7c4a0edb38

+ 16 - 0
drivers/staging/greybus/Documentation/sysfs-bus-greybus

@@ -257,3 +257,19 @@ Contact:	Greg Kroah-Hartman <greg@kroah.com>
 Description:
 		If the SVC watchdog is enabled or not.  Writing 0 to this
 		file will disable the watchdog, writing 1 will enable it.
+
+What:		/sys/bus/greybus/devices/N-svc/watchdog_action
+Date:		July 2016
+KernelVersion:	4.XX
+Contact:	Greg Kroah-Hartman <greg@kroah.com>
+Description:
+		This attribute indicates the action to be performed upon SVC
+		watchdog bite.
+
+		The action can be one of the "reset" or "panic". Writing either
+		one of the "reset" or "panic" will change the behavior of SVC
+		watchdog bite. Default value is "reset".
+
+		"reset" means the UniPro subsystem is to be reset.
+
+		"panic" means SVC watchdog bite will cause kernel to panic.

+ 31 - 0
drivers/staging/greybus/svc.c

@@ -100,6 +100,36 @@ static ssize_t watchdog_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(watchdog);
 
+static ssize_t watchdog_action_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct gb_svc *svc = to_gb_svc(dev);
+
+	if (svc->action == GB_SVC_WATCHDOG_BITE_PANIC_KERNEL)
+		return sprintf(buf, "panic\n");
+	else if (svc->action == GB_SVC_WATCHDOG_BITE_RESET_UNIPRO)
+		return sprintf(buf, "reset\n");
+
+	return -EINVAL;
+}
+
+static ssize_t watchdog_action_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t len)
+{
+	struct gb_svc *svc = to_gb_svc(dev);
+
+	if (sysfs_streq(buf, "panic"))
+		svc->action = GB_SVC_WATCHDOG_BITE_PANIC_KERNEL;
+	else if (sysfs_streq(buf, "reset"))
+		svc->action = GB_SVC_WATCHDOG_BITE_RESET_UNIPRO;
+	else
+		return -EINVAL;
+
+	return len;
+}
+static DEVICE_ATTR_RW(watchdog_action);
+
 static int gb_svc_pwrmon_rail_count_get(struct gb_svc *svc, u8 *value)
 {
 	struct gb_svc_pwrmon_rail_count_get_response response;
@@ -222,6 +252,7 @@ static struct attribute *svc_attrs[] = {
 	&dev_attr_ap_intf_id.attr,
 	&dev_attr_intf_eject.attr,
 	&dev_attr_watchdog.attr,
+	&dev_attr_watchdog_action.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(svc);

+ 6 - 0
drivers/staging/greybus/svc.h

@@ -20,6 +20,11 @@ enum gb_svc_state {
 	GB_SVC_STATE_SVC_HELLO,
 };
 
+enum gb_svc_watchdog_bite {
+	GB_SVC_WATCHDOG_BITE_RESET_UNIPRO = 0,
+	GB_SVC_WATCHDOG_BITE_PANIC_KERNEL,
+};
+
 struct gb_svc_watchdog;
 
 struct svc_debugfs_pwrmon_rail {
@@ -43,6 +48,7 @@ struct gb_svc {
 	u8 protocol_minor;
 
 	struct gb_svc_watchdog	*watchdog;
+	enum gb_svc_watchdog_bite action;
 
 	struct dentry *debugfs_dentry;
 	struct svc_debugfs_pwrmon_rail *pwrmon_rails;

+ 13 - 8
drivers/staging/greybus/svc_watchdog.c

@@ -83,16 +83,21 @@ static void do_work(struct work_struct *work)
 		dev_err(&svc->dev,
 			"SVC ping has returned %d, something is wrong!!!\n",
 			retval);
-		dev_err(&svc->dev, "Resetting the greybus network, watch out!!!\n");
 
-		INIT_DELAYED_WORK(&reset_work, greybus_reset);
-		queue_delayed_work(system_wq, &reset_work, HZ/2);
+		if (svc->action == GB_SVC_WATCHDOG_BITE_PANIC_KERNEL) {
+			panic("SVC is not responding\n");
+		} else if (svc->action == GB_SVC_WATCHDOG_BITE_RESET_UNIPRO) {
+			dev_err(&svc->dev, "Resetting the greybus network, watch out!!!\n");
 
-		/*
-		 * Disable ourselves, we don't want to trip again unless
-		 * userspace wants us to.
-		 */
-		watchdog->enabled = false;
+			INIT_DELAYED_WORK(&reset_work, greybus_reset);
+			queue_delayed_work(system_wq, &reset_work, HZ / 2);
+
+			/*
+			 * Disable ourselves, we don't want to trip again unless
+			 * userspace wants us to.
+			 */
+			watchdog->enabled = false;
+		}
 	}
 
 	/* resubmit our work to happen again, if we are still "alive" */