8 years ago · 95107b30be
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -858,11 +858,11 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
 
															 	       When enabled, it will account time the task has been
														
 
															 	       scheduled out as part of the function call.
														
 
															-  graph-time - When running function graph tracer, to include the
														
 
															-  	       time to call nested functions. When this is not set,
														
 
															-	       the time reported for the function will only include
														
 
															-	       the time the function itself executed for, not the time
														
 
															-	       for functions that it called.
														
 
															+  graph-time - When running function profiler with function graph tracer,
														
 
															+	       to include the time to call nested functions. When this is
														
 
															+	       not set, the time reported for the function will only
														
 
															+	       include the time the function itself executed for, not the
														
 
															+	       time for functions that it called.
														
 
															   record-cmd - When any event or tracer is enabled, a hook is enabled
														
 
															   	       in the sched_switch trace point to fill comm cache
														
--- a/Documentation/trace/hwlat_detector.txt
+++ b/Documentation/trace/hwlat_detector.txt
@@ -0,0 +1,79 @@
 
															+Introduction:
														
 
															+-------------
														
 
															+
														
 
															+The tracer hwlat_detector is a special purpose tracer that is used to
														
 
															+detect large system latencies induced by the behavior of certain underlying
														
 
															+hardware or firmware, independent of Linux itself. The code was developed
														
 
															+originally to detect SMIs (System Management Interrupts) on x86 systems,
														
 
															+however there is nothing x86 specific about this patchset. It was
														
 
															+originally written for use by the "RT" patch since the Real Time
														
 
															+kernel is highly latency sensitive.
														
 
															+
														
 
															+SMIs are not serviced by the Linux kernel, which means that it does not
														
 
															+even know that they are occuring. SMIs are instead set up by BIOS code
														
 
															+and are serviced by BIOS code, usually for "critical" events such as
														
 
															+management of thermal sensors and fans. Sometimes though, SMIs are used for
														
 
															+other tasks and those tasks can spend an inordinate amount of time in the
														
 
															+handler (sometimes measured in milliseconds). Obviously this is a problem if
														
 
															+you are trying to keep event service latencies down in the microsecond range.
														
 
															+
														
 
															+The hardware latency detector works by hogging one of the cpus for configurable
														
 
															+amounts of time (with interrupts disabled), polling the CPU Time Stamp Counter
														
 
															+for some period, then looking for gaps in the TSC data. Any gap indicates a
														
 
															+time when the polling was interrupted and since the interrupts are disabled,
														
 
															+the only thing that could do that would be an SMI or other hardware hiccup
														
 
															+(or an NMI, but those can be tracked).
														
 
															+
														
 
															+Note that the hwlat detector should *NEVER* be used in a production environment.
														
 
															+It is intended to be run manually to determine if the hardware platform has a
														
 
															+problem with long system firmware service routines.
														
 
															+
														
 
															+Usage:
														
 
															+------
														
 
															+
														
 
															+Write the ASCII text "hwlat" into the current_tracer file of the tracing system
														
 
															+(mounted at /sys/kernel/tracing or /sys/kernel/tracing). It is possible to
														
 
															+redefine the threshold in microseconds (us) above which latency spikes will
														
 
															+be taken into account.
														
 
															+
														
 
															+Example:
														
 
															+
														
 
															+	# echo hwlat > /sys/kernel/tracing/current_tracer
														
 
															+	# echo 100 > /sys/kernel/tracing/tracing_thresh
														
 
															+
														
 
															+The /sys/kernel/tracing/hwlat_detector interface contains the following files:
														
 
															+
														
 
															+width			- time period to sample with CPUs held (usecs)
														
 
															+			  must be less than the total window size (enforced)
														
 
															+window			- total period of sampling, width being inside (usecs)
														
 
															+
														
 
															+By default the width is set to 500,000 and window to 1,000,000, meaning that
														
 
															+for every 1,000,000 usecs (1s) the hwlat detector will spin for 500,000 usecs
														
 
															+(0.5s). If tracing_thresh contains zero when hwlat tracer is enabled, it will
														
 
															+change to a default of 10 usecs. If any latencies that exceed the threshold is
														
 
															+observed then the data will be written to the tracing ring buffer.
														
 
															+
														
 
															+The minimum sleep time between periods is 1 millisecond. Even if width
														
 
															+is less than 1 millisecond apart from window, to allow the system to not
														
 
															+be totally starved.
														
 
															+
														
 
															+If tracing_thresh was zero when hwlat detector was started, it will be set
														
 
															+back to zero if another tracer is loaded. Note, the last value in
														
 
															+tracing_thresh that hwlat detector had will be saved and this value will
														
 
															+be restored in tracing_thresh if it is still zero when hwlat detector is
														
 
															+started again.
														
 
															+
														
 
															+The following tracing directory files are used by the hwlat_detector:
														
 
															+
														
 
															+in /sys/kernel/tracing:
														
 
															+
														
 
															+ tracing_threshold	- minimum latency value to be considered (usecs)
														
 
															+ tracing_max_latency	- maximum hardware latency actually observed (usecs)
														
 
															+ tracing_cpumask	- the CPUs to move the hwlat thread across
														
 
															+ hwlat_detector/width	- specified amount of time to spin within window (usecs)
														
 
															+ hwlat_detector/window	- amount of time between (width) runs (usecs)
														
 
															+
														
 
															+The hwlat detector's kernel thread will migrate across each CPU specified in
														
 
															+tracing_cpumask between each window. To limit the migration, either modify
														
 
															+tracing_cpumask, or modify the hwlat kernel thread (named [hwlatd]) CPU
														
 
															+affinity directly, and the migration will stop.
														
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -139,7 +139,7 @@ static void ftrace_mod_code(void)
 
															 		clear_mod_flag();
														
 
															 }
														
 
															-void ftrace_nmi_enter(void)
														
 
															+void arch_ftrace_nmi_enter(void)
														
 
															 {
														
 
															 	if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
														
 
															 		smp_rmb();
														
@@ -150,7 +150,7 @@ void ftrace_nmi_enter(void)
 
															 	smp_mb();
														
 
															 }
														
 
															-void ftrace_nmi_exit(void)
														
 
															+void arch_ftrace_nmi_exit(void)
														
 
															 {
														
 
															 	/* Finish all executions before clearing nmi_running */
														
 
															 	smp_mb();
														
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -794,7 +794,9 @@ struct ftrace_ret_stack {
 
															 	unsigned long ret;
														
 
															 	unsigned long func;
														
 
															 	unsigned long long calltime;
														
 
															+#ifdef CONFIG_FUNCTION_PROFILER
														
 
															 	unsigned long long subtime;
														
 
															+#endif
														
 
															 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
														
 
															 	unsigned long fp;
														
 
															 #endif
														
--- a/include/linux/ftrace_irq.h
+++ b/include/linux/ftrace_irq.h
@@ -3,11 +3,34 @@
 
															 #ifdef CONFIG_FTRACE_NMI_ENTER
														
 
															-extern void ftrace_nmi_enter(void);
														
 
															-extern void ftrace_nmi_exit(void);
														
 
															+extern void arch_ftrace_nmi_enter(void);
														
 
															+extern void arch_ftrace_nmi_exit(void);
														
 
															 #else
														
 
															-static inline void ftrace_nmi_enter(void) { }
														
 
															-static inline void ftrace_nmi_exit(void) { }
														
 
															+static inline void arch_ftrace_nmi_enter(void) { }
														
 
															+static inline void arch_ftrace_nmi_exit(void) { }
														
 
															 #endif
														
 
															+#ifdef CONFIG_HWLAT_TRACER
														
 
															+extern bool trace_hwlat_callback_enabled;
														
 
															+extern void trace_hwlat_callback(bool enter);
														
 
															+#endif
														
 
															+
														
 
															+static inline void ftrace_nmi_enter(void)
														
 
															+{
														
 
															+#ifdef CONFIG_HWLAT_TRACER
														
 
															+	if (trace_hwlat_callback_enabled)
														
 
															+		trace_hwlat_callback(true);
														
 
															+#endif
														
 
															+	arch_ftrace_nmi_enter();
														
 
															+}
														
 
															+
														
 
															+static inline void ftrace_nmi_exit(void)
														
 
															+{
														
 
															+	arch_ftrace_nmi_exit();
														
 
															+#ifdef CONFIG_HWLAT_TRACER
														
 
															+	if (trace_hwlat_callback_enabled)
														
 
															+		trace_hwlat_callback(false);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															 #endif /* _LINUX_FTRACE_IRQ_H */
														
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -216,6 +216,41 @@ config SCHED_TRACER
 
															 	  This tracer tracks the latency of the highest priority task
														
 
															 	  to be scheduled in, starting from the point it has woken up.
														
 
															+config HWLAT_TRACER
														
 
															+	bool "Tracer to detect hardware latencies (like SMIs)"
														
 
															+	select GENERIC_TRACER
														
 
															+	help
														
 
															+	 This tracer, when enabled will create one or more kernel threads,
														
 
															+	 depening on what the cpumask file is set to, which each thread
														
 
															+	 spinning in a loop looking for interruptions caused by
														
 
															+	 something other than the kernel. For example, if a
														
 
															+	 System Management Interrupt (SMI) takes a noticeable amount of
														
 
															+	 time, this tracer will detect it. This is useful for testing
														
 
															+	 if a system is reliable for Real Time tasks.
														
 
															+
														
 
															+	 Some files are created in the tracing directory when this
														
 
															+	 is enabled:
														
 
															+
														
 
															+	   hwlat_detector/width   - time in usecs for how long to spin for
														
 
															+	   hwlat_detector/window  - time in usecs between the start of each
														
 
															+				     iteration
														
 
															+
														
 
															+	 A kernel thread is created that will spin with interrupts disabled
														
 
															+	 for "width" microseconds in every "widow" cycle. It will not spin
														
 
															+	 for "window - width" microseconds, where the system can
														
 
															+	 continue to operate.
														
 
															+
														
 
															+	 The output will appear in the trace and trace_pipe files.
														
 
															+
														
 
															+	 When the tracer is not running, it has no affect on the system,
														
 
															+	 but when it is running, it can cause the system to be
														
 
															+	 periodically non responsive. Do not run this tracer on a
														
 
															+	 production system.
														
 
															+
														
 
															+	 To enable this tracer, echo in "hwlat" into the current_tracer
														
 
															+	 file. Every time a latency is greater than tracing_thresh, it will
														
 
															+	 be recorded into the ring buffer.
														
 
															+
														
 
															 config ENABLE_DEFAULT_TRACERS
														
 
															 	bool "Trace process context switches and events"
														
 
															 	depends on !GENERIC_TRACER
														
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
 
															 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
														
 
															 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
														
 
															 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
														
 
															+obj-$(CONFIG_HWLAT_TRACER) += trace_hwlat.o
														
 
															 obj-$(CONFIG_NOP_TRACER) += trace_nop.o
														
 
															 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
														
 
															 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
														
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -872,7 +872,13 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
 
															 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
														
 
															 static int profile_graph_entry(struct ftrace_graph_ent *trace)
														
 
															 {
														
 
															+	int index = trace->depth;
														
 
															+
														
 
															 	function_profile_call(trace->func, 0, NULL, NULL);
														
 
															+
														
 
															+	if (index >= 0 && index < FTRACE_RETFUNC_DEPTH)
														
 
															+		current->ret_stack[index].subtime = 0;
														
 
															+
														
 
															 	return 1;
														
 
															 }
														
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1047,7 +1047,7 @@ void disable_trace_on_warning(void)
 
															  *
														
 
															  * Shows real state of the ring buffer if it is enabled or not.
														
 
															  */
														
 
															-static int tracer_tracing_is_on(struct trace_array *tr)
														
 
															+int tracer_tracing_is_on(struct trace_array *tr)
														
 
															 {
														
 
															 	if (tr->trace_buffer.buffer)
														
 
															 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
														
@@ -4969,7 +4969,7 @@ out:
 
															 	return ret;
														
 
															 }
														
 
															-#ifdef CONFIG_TRACER_MAX_TRACE
														
 
															+#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
														
 
															 static ssize_t
														
 
															 tracing_max_lat_read(struct file *filp, char __user *ubuf,
														
@@ -5892,7 +5892,7 @@ static const struct file_operations tracing_thresh_fops = {
 
															 	.llseek		= generic_file_llseek,
														
 
															 };
														
 
															-#ifdef CONFIG_TRACER_MAX_TRACE
														
 
															+#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
														
 
															 static const struct file_operations tracing_max_lat_fops = {
														
 
															 	.open		= tracing_open_generic,
														
 
															 	.read		= tracing_max_lat_read,
														
@@ -7222,7 +7222,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 
															 	create_trace_options_dir(tr);
														
 
															-#ifdef CONFIG_TRACER_MAX_TRACE
														
 
															+#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
														
 
															 	trace_create_file("tracing_max_latency", 0644, d_tracer,
														
 
															 			&tr->max_latency, &tracing_max_lat_fops);
														
 
															 #endif
														
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -38,6 +38,7 @@ enum trace_type {
 
															 	TRACE_USER_STACK,
														
 
															 	TRACE_BLK,
														
 
															 	TRACE_BPUTS,
														
 
															+	TRACE_HWLAT,
														
 
															 	__TRACE_LAST_TYPE,
														
 
															 };
														
@@ -213,6 +214,8 @@ struct trace_array {
 
															 	 */
														
 
															 	struct trace_buffer	max_buffer;
														
 
															 	bool			allocated_snapshot;
														
 
															+#endif
														
 
															+#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
														
 
															 	unsigned long		max_latency;
														
 
															 #endif
														
 
															 	struct trace_pid_list	__rcu *filtered_pids;
														
@@ -326,6 +329,7 @@ extern void __ftrace_bad_type(void);
 
															 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
														
 
															 		IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT);	\
														
 
															 		IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS);	\
														
 
															+		IF_ASSIGN(var, ent, struct hwlat_entry, TRACE_HWLAT);	\
														
 
															 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
														
 
															 			  TRACE_MMIO_RW);				\
														
 
															 		IF_ASSIGN(var, ent, struct trace_mmiotrace_map,		\
														
@@ -571,6 +575,7 @@ void tracing_reset_current(int cpu);
 
															 void tracing_reset_all_online_cpus(void);
														
 
															 int tracing_open_generic(struct inode *inode, struct file *filp);
														
 
															 bool tracing_is_disabled(void);
														
 
															+int tracer_tracing_is_on(struct trace_array *tr);
														
 
															 struct dentry *trace_create_file(const char *name,
														
 
															 				 umode_t mode,
														
 
															 				 struct dentry *parent,
														
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -322,3 +322,30 @@ FTRACE_ENTRY(branch, trace_branch,
 
															 	FILTER_OTHER
														
 
															 );
														
 
															+
														
 
															+FTRACE_ENTRY(hwlat, hwlat_entry,
														
 
															+
														
 
															+	TRACE_HWLAT,
														
 
															+
														
 
															+	F_STRUCT(
														
 
															+		__field(	u64,			duration	)
														
 
															+		__field(	u64,			outer_duration	)
														
 
															+		__field(	u64,			nmi_total_ts	)
														
 
															+		__field_struct( struct timespec,	timestamp	)
														
 
															+		__field_desc(	long,	timestamp,	tv_sec		)
														
 
															+		__field_desc(	long,	timestamp,	tv_nsec		)
														
 
															+		__field(	unsigned int,		nmi_count	)
														
 
															+		__field(	unsigned int,		seqnum		)
														
 
															+	),
														
 
															+
														
 
															+	F_printk("cnt:%u\tts:%010lu.%010lu\tinner:%llu\touter:%llunmi-ts:%llu\tnmi-count:%u\n",
														
 
															+		 __entry->seqnum,
														
 
															+		 __entry->tv_sec,
														
 
															+		 __entry->tv_nsec,
														
 
															+		 __entry->duration,
														
 
															+		 __entry->outer_duration,
														
 
															+		 __entry->nmi_total_ts,
														
 
															+		 __entry->nmi_count),
														
 
															+
														
 
															+	FILTER_OTHER
														
 
															+);
														
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1028,6 +1028,7 @@ static struct event_command trigger_traceon_cmd = {
 
															 static struct event_command trigger_traceoff_cmd = {
														
 
															 	.name			= "traceoff",
														
 
															 	.trigger_type		= ETT_TRACE_ONOFF,
														
 
															+	.flags			= EVENT_CMD_FL_POST_TRIGGER,
														
 
															 	.func			= event_trigger_callback,
														
 
															 	.reg			= register_trigger,
														
 
															 	.unreg			= unregister_trigger,
														
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -170,7 +170,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 
															 	current->ret_stack[index].ret = ret;
														
 
															 	current->ret_stack[index].func = func;
														
 
															 	current->ret_stack[index].calltime = calltime;
														
 
															-	current->ret_stack[index].subtime = 0;
														
 
															 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
														
 
															 	current->ret_stack[index].fp = frame_pointer;
														
 
															 #endif
														
@@ -1183,6 +1182,11 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
 
															 	trace_seq_puts(s, "/* ");
														
 
															 	switch (iter->ent->type) {
														
 
															+	case TRACE_BPUTS:
														
 
															+		ret = trace_print_bputs_msg_only(iter);
														
 
															+		if (ret != TRACE_TYPE_HANDLED)
														
 
															+			return ret;
														
 
															+		break;
														
 
															 	case TRACE_BPRINT:
														
 
															 		ret = trace_print_bprintk_msg_only(iter);
														
 
															 		if (ret != TRACE_TYPE_HANDLED)
														
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -0,0 +1,633 @@
 
															+/*
														
 
															+ * trace_hwlatdetect.c - A simple Hardware Latency detector.
														
 
															+ *
														
 
															+ * Use this tracer to detect large system latencies induced by the behavior of
														
 
															+ * certain underlying system hardware or firmware, independent of Linux itself.
														
 
															+ * The code was developed originally to detect the presence of SMIs on Intel
														
 
															+ * and AMD systems, although there is no dependency upon x86 herein.
														
 
															+ *
														
 
															+ * The classical example usage of this tracer is in detecting the presence of
														
 
															+ * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
														
 
															+ * somewhat special form of hardware interrupt spawned from earlier CPU debug
														
 
															+ * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
														
 
															+ * LPC (or other device) to generate a special interrupt under certain
														
 
															+ * circumstances, for example, upon expiration of a special SMI timer device,
														
 
															+ * due to certain external thermal readings, on certain I/O address accesses,
														
 
															+ * and other situations. An SMI hits a special CPU pin, triggers a special
														
 
															+ * SMI mode (complete with special memory map), and the OS is unaware.
														
 
															+ *
														
 
															+ * Although certain hardware-inducing latencies are necessary (for example,
														
 
															+ * a modern system often requires an SMI handler for correct thermal control
														
 
															+ * and remote management) they can wreak havoc upon any OS-level performance
														
 
															+ * guarantees toward low-latency, especially when the OS is not even made
														
 
															+ * aware of the presence of these interrupts. For this reason, we need a
														
 
															+ * somewhat brute force mechanism to detect these interrupts. In this case,
														
 
															+ * we do it by hogging all of the CPU(s) for configurable timer intervals,
														
 
															+ * sampling the built-in CPU timer, looking for discontiguous readings.
														
 
															+ *
														
 
															+ * WARNING: This implementation necessarily introduces latencies. Therefore,
														
 
															+ *          you should NEVER use this tracer while running in a production
														
 
															+ *          environment requiring any kind of low-latency performance
														
 
															+ *          guarantee(s).
														
 
															+ *
														
 
															+ * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
														
 
															+ * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
														
 
															+ *
														
 
															+ * Includes useful feedback from Clark Williams <clark@redhat.com>
														
 
															+ *
														
 
															+ * This file is licensed under the terms of the GNU General Public
														
 
															+ * License version 2. This program is licensed "as is" without any
														
 
															+ * warranty of any kind, whether express or implied.
														
 
															+ */
														
 
															+#include <linux/kthread.h>
														
 
															+#include <linux/tracefs.h>
														
 
															+#include <linux/uaccess.h>
														
 
															+#include <linux/cpumask.h>
														
 
															+#include <linux/delay.h>
														
 
															+#include "trace.h"
														
 
															+
														
 
															+static struct trace_array	*hwlat_trace;
														
 
															+
														
 
															+#define U64STR_SIZE		22			/* 20 digits max */
														
 
															+
														
 
															+#define BANNER			"hwlat_detector: "
														
 
															+#define DEFAULT_SAMPLE_WINDOW	1000000			/* 1s */
														
 
															+#define DEFAULT_SAMPLE_WIDTH	500000			/* 0.5s */
														
 
															+#define DEFAULT_LAT_THRESHOLD	10			/* 10us */
														
 
															+
														
 
															+/* sampling thread*/
														
 
															+static struct task_struct *hwlat_kthread;
														
 
															+
														
 
															+static struct dentry *hwlat_sample_width;	/* sample width us */
														
 
															+static struct dentry *hwlat_sample_window;	/* sample window us */
														
 
															+
														
 
															+/* Save the previous tracing_thresh value */
														
 
															+static unsigned long save_tracing_thresh;
														
 
															+
														
 
															+/* NMI timestamp counters */
														
 
															+static u64 nmi_ts_start;
														
 
															+static u64 nmi_total_ts;
														
 
															+static int nmi_count;
														
 
															+static int nmi_cpu;
														
 
															+
														
 
															+/* Tells NMIs to call back to the hwlat tracer to record timestamps */
														
 
															+bool trace_hwlat_callback_enabled;
														
 
															+
														
 
															+/* If the user changed threshold, remember it */
														
 
															+static u64 last_tracing_thresh = DEFAULT_LAT_THRESHOLD * NSEC_PER_USEC;
														
 
															+
														
 
															+/* Individual latency samples are stored here when detected. */
														
 
															+struct hwlat_sample {
														
 
															+	u64		seqnum;		/* unique sequence */
														
 
															+	u64		duration;	/* delta */
														
 
															+	u64		outer_duration;	/* delta (outer loop) */
														
 
															+	u64		nmi_total_ts;	/* Total time spent in NMIs */
														
 
															+	struct timespec	timestamp;	/* wall time */
														
 
															+	int		nmi_count;	/* # NMIs during this sample */
														
 
															+};
														
 
															+
														
 
															+/* keep the global state somewhere. */
														
 
															+static struct hwlat_data {
														
 
															+
														
 
															+	struct mutex lock;		/* protect changes */
														
 
															+
														
 
															+	u64	count;			/* total since reset */
														
 
															+
														
 
															+	u64	sample_window;		/* total sampling window (on+off) */
														
 
															+	u64	sample_width;		/* active sampling portion of window */
														
 
															+
														
 
															+} hwlat_data = {
														
 
															+	.sample_window		= DEFAULT_SAMPLE_WINDOW,
														
 
															+	.sample_width		= DEFAULT_SAMPLE_WIDTH,
														
 
															+};
														
 
															+
														
 
															+static void trace_hwlat_sample(struct hwlat_sample *sample)
														
 
															+{
														
 
															+	struct trace_array *tr = hwlat_trace;
														
 
															+	struct trace_event_call *call = &event_hwlat;
														
 
															+	struct ring_buffer *buffer = tr->trace_buffer.buffer;
														
 
															+	struct ring_buffer_event *event;
														
 
															+	struct hwlat_entry *entry;
														
 
															+	unsigned long flags;
														
 
															+	int pc;
														
 
															+
														
 
															+	pc = preempt_count();
														
 
															+	local_save_flags(flags);
														
 
															+
														
 
															+	event = trace_buffer_lock_reserve(buffer, TRACE_HWLAT, sizeof(*entry),
														
 
															+					  flags, pc);
														
 
															+	if (!event)
														
 
															+		return;
														
 
															+	entry	= ring_buffer_event_data(event);
														
 
															+	entry->seqnum			= sample->seqnum;
														
 
															+	entry->duration			= sample->duration;
														
 
															+	entry->outer_duration		= sample->outer_duration;
														
 
															+	entry->timestamp		= sample->timestamp;
														
 
															+	entry->nmi_total_ts		= sample->nmi_total_ts;
														
 
															+	entry->nmi_count		= sample->nmi_count;
														
 
															+
														
 
															+	if (!call_filter_check_discard(call, entry, buffer, event))
														
 
															+		__buffer_unlock_commit(buffer, event);
														
 
															+}
														
 
															+
														
 
															+/* Macros to encapsulate the time capturing infrastructure */
														
 
															+#define time_type	u64
														
 
															+#define time_get()	trace_clock_local()
														
 
															+#define time_to_us(x)	div_u64(x, 1000)
														
 
															+#define time_sub(a, b)	((a) - (b))
														
 
															+#define init_time(a, b)	(a = b)
														
 
															+#define time_u64(a)	a
														
 
															+
														
 
															+void trace_hwlat_callback(bool enter)
														
 
															+{
														
 
															+	if (smp_processor_id() != nmi_cpu)
														
 
															+		return;
														
 
															+
														
 
															+	/*
														
 
															+	 * Currently trace_clock_local() calls sched_clock() and the
														
 
															+	 * generic version is not NMI safe.
														
 
															+	 */
														
 
															+	if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
														
 
															+		if (enter)
														
 
															+			nmi_ts_start = time_get();
														
 
															+		else
														
 
															+			nmi_total_ts = time_get() - nmi_ts_start;
														
 
															+	}
														
 
															+
														
 
															+	if (enter)
														
 
															+		nmi_count++;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * get_sample - sample the CPU TSC and look for likely hardware latencies
														
 
															+ *
														
 
															+ * Used to repeatedly capture the CPU TSC (or similar), looking for potential
														
 
															+ * hardware-induced latency. Called with interrupts disabled and with
														
 
															+ * hwlat_data.lock held.
														
 
															+ */
														
 
															+static int get_sample(void)
														
 
															+{
														
 
															+	struct trace_array *tr = hwlat_trace;
														
 
															+	time_type start, t1, t2, last_t2;
														
 
															+	s64 diff, total, last_total = 0;
														
 
															+	u64 sample = 0;
														
 
															+	u64 thresh = tracing_thresh;
														
 
															+	u64 outer_sample = 0;
														
 
															+	int ret = -1;
														
 
															+
														
 
															+	do_div(thresh, NSEC_PER_USEC); /* modifies interval value */
														
 
															+
														
 
															+	nmi_cpu = smp_processor_id();
														
 
															+	nmi_total_ts = 0;
														
 
															+	nmi_count = 0;
														
 
															+	/* Make sure NMIs see this first */
														
 
															+	barrier();
														
 
															+
														
 
															+	trace_hwlat_callback_enabled = true;
														
 
															+
														
 
															+	init_time(last_t2, 0);
														
 
															+	start = time_get(); /* start timestamp */
														
 
															+
														
 
															+	do {
														
 
															+
														
 
															+		t1 = time_get();	/* we'll look for a discontinuity */
														
 
															+		t2 = time_get();
														
 
															+
														
 
															+		if (time_u64(last_t2)) {
														
 
															+			/* Check the delta from outer loop (t2 to next t1) */
														
 
															+			diff = time_to_us(time_sub(t1, last_t2));
														
 
															+			/* This shouldn't happen */
														
 
															+			if (diff < 0) {
														
 
															+				pr_err(BANNER "time running backwards\n");
														
 
															+				goto out;
														
 
															+			}
														
 
															+			if (diff > outer_sample)
														
 
															+				outer_sample = diff;
														
 
															+		}
														
 
															+		last_t2 = t2;
														
 
															+
														
 
															+		total = time_to_us(time_sub(t2, start)); /* sample width */
														
 
															+
														
 
															+		/* Check for possible overflows */
														
 
															+		if (total < last_total) {
														
 
															+			pr_err("Time total overflowed\n");
														
 
															+			break;
														
 
															+		}
														
 
															+		last_total = total;
														
 
															+
														
 
															+		/* This checks the inner loop (t1 to t2) */
														
 
															+		diff = time_to_us(time_sub(t2, t1));     /* current diff */
														
 
															+
														
 
															+		/* This shouldn't happen */
														
 
															+		if (diff < 0) {
														
 
															+			pr_err(BANNER "time running backwards\n");
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		if (diff > sample)
														
 
															+			sample = diff; /* only want highest value */
														
 
															+
														
 
															+	} while (total <= hwlat_data.sample_width);
														
 
															+
														
 
															+	barrier(); /* finish the above in the view for NMIs */
														
 
															+	trace_hwlat_callback_enabled = false;
														
 
															+	barrier(); /* Make sure nmi_total_ts is no longer updated */
														
 
															+
														
 
															+	ret = 0;
														
 
															+
														
 
															+	/* If we exceed the threshold value, we have found a hardware latency */
														
 
															+	if (sample > thresh || outer_sample > thresh) {
														
 
															+		struct hwlat_sample s;
														
 
															+
														
 
															+		ret = 1;
														
 
															+
														
 
															+		/* We read in microseconds */
														
 
															+		if (nmi_total_ts)
														
 
															+			do_div(nmi_total_ts, NSEC_PER_USEC);
														
 
															+
														
 
															+		hwlat_data.count++;
														
 
															+		s.seqnum = hwlat_data.count;
														
 
															+		s.duration = sample;
														
 
															+		s.outer_duration = outer_sample;
														
 
															+		s.timestamp = CURRENT_TIME;
														
 
															+		s.nmi_total_ts = nmi_total_ts;
														
 
															+		s.nmi_count = nmi_count;
														
 
															+		trace_hwlat_sample(&s);
														
 
															+
														
 
															+		/* Keep a running maximum ever recorded hardware latency */
														
 
															+		if (sample > tr->max_latency)
														
 
															+			tr->max_latency = sample;
														
 
															+	}
														
 
															+
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static struct cpumask save_cpumask;
														
 
															+static bool disable_migrate;
														
 
															+
														
 
															+static void move_to_next_cpu(void)
														
 
															+{
														
 
															+	static struct cpumask *current_mask;
														
 
															+	int next_cpu;
														
 
															+
														
 
															+	if (disable_migrate)
														
 
															+		return;
														
 
															+
														
 
															+	/* Just pick the first CPU on first iteration */
														
 
															+	if (!current_mask) {
														
 
															+		current_mask = &save_cpumask;
														
 
															+		get_online_cpus();
														
 
															+		cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask);
														
 
															+		put_online_cpus();
														
 
															+		next_cpu = cpumask_first(current_mask);
														
 
															+		goto set_affinity;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * If for some reason the user modifies the CPU affinity
														
 
															+	 * of this thread, than stop migrating for the duration
														
 
															+	 * of the current test.
														
 
															+	 */
														
 
															+	if (!cpumask_equal(current_mask, &current->cpus_allowed))
														
 
															+		goto disable;
														
 
															+
														
 
															+	get_online_cpus();
														
 
															+	cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask);
														
 
															+	next_cpu = cpumask_next(smp_processor_id(), current_mask);
														
 
															+	put_online_cpus();
														
 
															+
														
 
															+	if (next_cpu >= nr_cpu_ids)
														
 
															+		next_cpu = cpumask_first(current_mask);
														
 
															+
														
 
															+ set_affinity:
														
 
															+	if (next_cpu >= nr_cpu_ids) /* Shouldn't happen! */
														
 
															+		goto disable;
														
 
															+
														
 
															+	cpumask_clear(current_mask);
														
 
															+	cpumask_set_cpu(next_cpu, current_mask);
														
 
															+
														
 
															+	sched_setaffinity(0, current_mask);
														
 
															+	return;
														
 
															+
														
 
															+ disable:
														
 
															+	disable_migrate = true;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
														
 
															+ *
														
 
															+ * Used to periodically sample the CPU TSC via a call to get_sample. We
														
 
															+ * disable interrupts, which does (intentionally) introduce latency since we
														
 
															+ * need to ensure nothing else might be running (and thus preempting).
														
 
															+ * Obviously this should never be used in production environments.
														
 
															+ *
														
 
															+ * Currently this runs on which ever CPU it was scheduled on, but most
														
 
															+ * real-world hardware latency situations occur across several CPUs,
														
 
															+ * but we might later generalize this if we find there are any actualy
														
 
															+ * systems with alternate SMI delivery or other hardware latencies.
														
 
															+ */
														
 
															+static int kthread_fn(void *data)
														
 
															+{
														
 
															+	u64 interval;
														
 
															+
														
 
															+	while (!kthread_should_stop()) {
														
 
															+
														
 
															+		move_to_next_cpu();
														
 
															+
														
 
															+		local_irq_disable();
														
 
															+		get_sample();
														
 
															+		local_irq_enable();
														
 
															+
														
 
															+		mutex_lock(&hwlat_data.lock);
														
 
															+		interval = hwlat_data.sample_window - hwlat_data.sample_width;
														
 
															+		mutex_unlock(&hwlat_data.lock);
														
 
															+
														
 
															+		do_div(interval, USEC_PER_MSEC); /* modifies interval value */
														
 
															+
														
 
															+		/* Always sleep for at least 1ms */
														
 
															+		if (interval < 1)
														
 
															+			interval = 1;
														
 
															+
														
 
															+		if (msleep_interruptible(interval))
														
 
															+			break;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * start_kthread - Kick off the hardware latency sampling/detector kthread
														
 
															+ *
														
 
															+ * This starts the kernel thread that will sit and sample the CPU timestamp
														
 
															+ * counter (TSC or similar) and look for potential hardware latencies.
														
 
															+ */
														
 
															+static int start_kthread(struct trace_array *tr)
														
 
															+{
														
 
															+	struct task_struct *kthread;
														
 
															+
														
 
															+	kthread = kthread_create(kthread_fn, NULL, "hwlatd");
														
 
															+	if (IS_ERR(kthread)) {
														
 
															+		pr_err(BANNER "could not start sampling thread\n");
														
 
															+		return -ENOMEM;
														
 
															+	}
														
 
															+	hwlat_kthread = kthread;
														
 
															+	wake_up_process(kthread);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * stop_kthread - Inform the hardware latency samping/detector kthread to stop
														
 
															+ *
														
 
															+ * This kicks the running hardware latency sampling/detector kernel thread and
														
 
															+ * tells it to stop sampling now. Use this on unload and at system shutdown.
														
 
															+ */
														
 
															+static void stop_kthread(void)
														
 
															+{
														
 
															+	if (!hwlat_kthread)
														
 
															+		return;
														
 
															+	kthread_stop(hwlat_kthread);
														
 
															+	hwlat_kthread = NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * hwlat_read - Wrapper read function for reading both window and width
														
 
															+ * @filp: The active open file structure
														
 
															+ * @ubuf: The userspace provided buffer to read value into
														
 
															+ * @cnt: The maximum number of bytes to read
														
 
															+ * @ppos: The current "file" position
														
 
															+ *
														
 
															+ * This function provides a generic read implementation for the global state
														
 
															+ * "hwlat_data" structure filesystem entries.
														
 
															+ */
														
 
															+static ssize_t hwlat_read(struct file *filp, char __user *ubuf,
														
 
															+			  size_t cnt, loff_t *ppos)
														
 
															+{
														
 
															+	char buf[U64STR_SIZE];
														
 
															+	u64 *entry = filp->private_data;
														
 
															+	u64 val;
														
 
															+	int len;
														
 
															+
														
 
															+	if (!entry)
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	if (cnt > sizeof(buf))
														
 
															+		cnt = sizeof(buf);
														
 
															+
														
 
															+	val = *entry;
														
 
															+
														
 
															+	len = snprintf(buf, sizeof(buf), "%llu\n", val);
														
 
															+
														
 
															+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * hwlat_width_write - Write function for "width" entry
														
 
															+ * @filp: The active open file structure
														
 
															+ * @ubuf: The user buffer that contains the value to write
														
 
															+ * @cnt: The maximum number of bytes to write to "file"
														
 
															+ * @ppos: The current position in @file
														
 
															+ *
														
 
															+ * This function provides a write implementation for the "width" interface
														
 
															+ * to the hardware latency detector. It can be used to configure
														
 
															+ * for how many us of the total window us we will actively sample for any
														
 
															+ * hardware-induced latency periods. Obviously, it is not possible to
														
 
															+ * sample constantly and have the system respond to a sample reader, or,
														
 
															+ * worse, without having the system appear to have gone out to lunch. It
														
 
															+ * is enforced that width is less that the total window size.
														
 
															+ */
														
 
															+static ssize_t
														
 
															+hwlat_width_write(struct file *filp, const char __user *ubuf,
														
 
															+		  size_t cnt, loff_t *ppos)
														
 
															+{
														
 
															+	u64 val;
														
 
															+	int err;
														
 
															+
														
 
															+	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
														
 
															+	if (err)
														
 
															+		return err;
														
 
															+
														
 
															+	mutex_lock(&hwlat_data.lock);
														
 
															+	if (val < hwlat_data.sample_window)
														
 
															+		hwlat_data.sample_width = val;
														
 
															+	else
														
 
															+		err = -EINVAL;
														
 
															+	mutex_unlock(&hwlat_data.lock);
														
 
															+
														
 
															+	if (err)
														
 
															+		return err;
														
 
															+
														
 
															+	return cnt;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * hwlat_window_write - Write function for "window" entry
														
 
															+ * @filp: The active open file structure
														
 
															+ * @ubuf: The user buffer that contains the value to write
														
 
															+ * @cnt: The maximum number of bytes to write to "file"
														
 
															+ * @ppos: The current position in @file
														
 
															+ *
														
 
															+ * This function provides a write implementation for the "window" interface
														
 
															+ * to the hardware latency detetector. The window is the total time
														
 
															+ * in us that will be considered one sample period. Conceptually, windows
														
 
															+ * occur back-to-back and contain a sample width period during which
														
 
															+ * actual sampling occurs. Can be used to write a new total window size. It
														
 
															+ * is enfoced that any value written must be greater than the sample width
														
 
															+ * size, or an error results.
														
 
															+ */
														
 
															+static ssize_t
														
 
															+hwlat_window_write(struct file *filp, const char __user *ubuf,
														
 
															+		   size_t cnt, loff_t *ppos)
														
 
															+{
														
 
															+	u64 val;
														
 
															+	int err;
														
 
															+
														
 
															+	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
														
 
															+	if (err)
														
 
															+		return err;
														
 
															+
														
 
															+	mutex_lock(&hwlat_data.lock);
														
 
															+	if (hwlat_data.sample_width < val)
														
 
															+		hwlat_data.sample_window = val;
														
 
															+	else
														
 
															+		err = -EINVAL;
														
 
															+	mutex_unlock(&hwlat_data.lock);
														
 
															+
														
 
															+	if (err)
														
 
															+		return err;
														
 
															+
														
 
															+	return cnt;
														
 
															+}
														
 
															+
														
 
															+static const struct file_operations width_fops = {
														
 
															+	.open		= tracing_open_generic,
														
 
															+	.read		= hwlat_read,
														
 
															+	.write		= hwlat_width_write,
														
 
															+};
														
 
															+
														
 
															+static const struct file_operations window_fops = {
														
 
															+	.open		= tracing_open_generic,
														
 
															+	.read		= hwlat_read,
														
 
															+	.write		= hwlat_window_write,
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * init_tracefs - A function to initialize the tracefs interface files
														
 
															+ *
														
 
															+ * This function creates entries in tracefs for "hwlat_detector".
														
 
															+ * It creates the hwlat_detector directory in the tracing directory,
														
 
															+ * and within that directory is the count, width and window files to
														
 
															+ * change and view those values.
														
 
															+ */
														
 
															+static int init_tracefs(void)
														
 
															+{
														
 
															+	struct dentry *d_tracer;
														
 
															+	struct dentry *top_dir;
														
 
															+
														
 
															+	d_tracer = tracing_init_dentry();
														
 
															+	if (IS_ERR(d_tracer))
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	top_dir = tracefs_create_dir("hwlat_detector", d_tracer);
														
 
															+	if (!top_dir)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	hwlat_sample_window = tracefs_create_file("window", 0640,
														
 
															+						  top_dir,
														
 
															+						  &hwlat_data.sample_window,
														
 
															+						  &window_fops);
														
 
															+	if (!hwlat_sample_window)
														
 
															+		goto err;
														
 
															+
														
 
															+	hwlat_sample_width = tracefs_create_file("width", 0644,
														
 
															+						 top_dir,
														
 
															+						 &hwlat_data.sample_width,
														
 
															+						 &width_fops);
														
 
															+	if (!hwlat_sample_width)
														
 
															+		goto err;
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+ err:
														
 
															+	tracefs_remove_recursive(top_dir);
														
 
															+	return -ENOMEM;
														
 
															+}
														
 
															+
														
 
															+static void hwlat_tracer_start(struct trace_array *tr)
														
 
															+{
														
 
															+	int err;
														
 
															+
														
 
															+	err = start_kthread(tr);
														
 
															+	if (err)
														
 
															+		pr_err(BANNER "Cannot start hwlat kthread\n");
														
 
															+}
														
 
															+
														
 
															+static void hwlat_tracer_stop(struct trace_array *tr)
														
 
															+{
														
 
															+	stop_kthread();
														
 
															+}
														
 
															+
														
 
															+static bool hwlat_busy;
														
 
															+
														
 
															+static int hwlat_tracer_init(struct trace_array *tr)
														
 
															+{
														
 
															+	/* Only allow one instance to enable this */
														
 
															+	if (hwlat_busy)
														
 
															+		return -EBUSY;
														
 
															+
														
 
															+	hwlat_trace = tr;
														
 
															+
														
 
															+	disable_migrate = false;
														
 
															+	hwlat_data.count = 0;
														
 
															+	tr->max_latency = 0;
														
 
															+	save_tracing_thresh = tracing_thresh;
														
 
															+
														
 
															+	/* tracing_thresh is in nsecs, we speak in usecs */
														
 
															+	if (!tracing_thresh)
														
 
															+		tracing_thresh = last_tracing_thresh;
														
 
															+
														
 
															+	if (tracer_tracing_is_on(tr))
														
 
															+		hwlat_tracer_start(tr);
														
 
															+
														
 
															+	hwlat_busy = true;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void hwlat_tracer_reset(struct trace_array *tr)
														
 
															+{
														
 
															+	stop_kthread();
														
 
															+
														
 
															+	/* the tracing threshold is static between runs */
														
 
															+	last_tracing_thresh = tracing_thresh;
														
 
															+
														
 
															+	tracing_thresh = save_tracing_thresh;
														
 
															+	hwlat_busy = false;
														
 
															+}
														
 
															+
														
 
															+static struct tracer hwlat_tracer __read_mostly =
														
 
															+{
														
 
															+	.name		= "hwlat",
														
 
															+	.init		= hwlat_tracer_init,
														
 
															+	.reset		= hwlat_tracer_reset,
														
 
															+	.start		= hwlat_tracer_start,
														
 
															+	.stop		= hwlat_tracer_stop,
														
 
															+	.allow_instances = true,
														
 
															+};
														
 
															+
														
 
															+__init static int init_hwlat_tracer(void)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	mutex_init(&hwlat_data.lock);
														
 
															+
														
 
															+	ret = register_tracer(&hwlat_tracer);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	init_tracefs();
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+late_initcall(init_hwlat_tracer);
														
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1098,6 +1098,71 @@ static struct trace_event trace_user_stack_event = {
 
															 	.funcs		= &trace_user_stack_funcs,
														
 
															 };
														
 
															+/* TRACE_HWLAT */
														
 
															+static enum print_line_t
														
 
															+trace_hwlat_print(struct trace_iterator *iter, int flags,
														
 
															+		  struct trace_event *event)
														
 
															+{
														
 
															+	struct trace_entry *entry = iter->ent;
														
 
															+	struct trace_seq *s = &iter->seq;
														
 
															+	struct hwlat_entry *field;
														
 
															+
														
 
															+	trace_assign_type(field, entry);
														
 
															+
														
 
															+	trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ld.%09ld",
														
 
															+			 field->seqnum,
														
 
															+			 field->duration,
														
 
															+			 field->outer_duration,
														
 
															+			 field->timestamp.tv_sec,
														
 
															+			 field->timestamp.tv_nsec);
														
 
															+
														
 
															+	if (field->nmi_count) {
														
 
															+		/*
														
 
															+		 * The generic sched_clock() is not NMI safe, thus
														
 
															+		 * we only record the count and not the time.
														
 
															+		 */
														
 
															+		if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK))
														
 
															+			trace_seq_printf(s, " nmi-total:%llu",
														
 
															+					 field->nmi_total_ts);
														
 
															+		trace_seq_printf(s, " nmi-count:%u",
														
 
															+				 field->nmi_count);
														
 
															+	}
														
 
															+
														
 
															+	trace_seq_putc(s, '\n');
														
 
															+
														
 
															+	return trace_handle_return(s);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+static enum print_line_t
														
 
															+trace_hwlat_raw(struct trace_iterator *iter, int flags,
														
 
															+		struct trace_event *event)
														
 
															+{
														
 
															+	struct hwlat_entry *field;
														
 
															+	struct trace_seq *s = &iter->seq;
														
 
															+
														
 
															+	trace_assign_type(field, iter->ent);
														
 
															+
														
 
															+	trace_seq_printf(s, "%llu %lld %ld %09ld %u\n",
														
 
															+			 field->duration,
														
 
															+			 field->outer_duration,
														
 
															+			 field->timestamp.tv_sec,
														
 
															+			 field->timestamp.tv_nsec,
														
 
															+			 field->seqnum);
														
 
															+
														
 
															+	return trace_handle_return(s);
														
 
															+}
														
 
															+
														
 
															+static struct trace_event_functions trace_hwlat_funcs = {
														
 
															+	.trace		= trace_hwlat_print,
														
 
															+	.raw		= trace_hwlat_raw,
														
 
															+};
														
 
															+
														
 
															+static struct trace_event trace_hwlat_event = {
														
 
															+	.type		= TRACE_HWLAT,
														
 
															+	.funcs		= &trace_hwlat_funcs,
														
 
															+};
														
 
															+
														
 
															 /* TRACE_BPUTS */
														
 
															 static enum print_line_t
														
 
															 trace_bputs_print(struct trace_iterator *iter, int flags,
														
@@ -1233,6 +1298,7 @@ static struct trace_event *events[] __initdata = {
 
															 	&trace_bputs_event,
														
 
															 	&trace_bprint_event,
														
 
															 	&trace_print_event,
														
 
															+	&trace_hwlat_event,
														
 
															 	NULL
														
 
															 };
														
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -431,10 +431,6 @@ static int create_trace_uprobe(int argc, char **argv)
 
															 		pr_info("Probe point is not specified.\n");
														
 
															 		return -EINVAL;
														
 
															 	}
														
 
															-	if (isdigit(argv[1][0])) {
														
 
															-		pr_info("probe point must be have a filename.\n");
														
 
															-		return -EINVAL;
														
 
															-	}
														
 
															 	arg = strchr(argv[1], ':');
														
 
															 	if (!arg) {
														
 
															 		ret = -EINVAL;
														
--- a/scripts/tracing/ftrace-bisect.sh
+++ b/scripts/tracing/ftrace-bisect.sh
@@ -0,0 +1,115 @@
 
															+#!/bin/bash
														
 
															+#
														
 
															+# Here's how to use this:
														
 
															+#
														
 
															+# This script is used to help find functions that are being traced by function
														
 
															+# tracer or function graph tracing that causes the machine to reboot, hang, or
														
 
															+# crash. Here's the steps to take.
														
 
															+#
														
 
															+# First, determine if function tracing is working with a single function:
														
 
															+#
														
 
															+#   (note, if this is a problem with function_graph tracing, then simply
														
 
															+#    replace "function" with "function_graph" in the following steps).
														
 
															+#
														
 
															+#  # cd /sys/kernel/debug/tracing
														
 
															+#  # echo schedule > set_ftrace_filter
														
 
															+#  # echo function > current_tracer
														
 
															+#
														
 
															+# If this works, then we know that something is being traced that shouldn't be.
														
 
															+#
														
 
															+#  # echo nop > current_tracer
														
 
															+#
														
 
															+#  # cat available_filter_functions > ~/full-file
														
 
															+#  # ftrace-bisect ~/full-file ~/test-file ~/non-test-file
														
 
															+#  # cat ~/test-file > set_ftrace_filter
														
 
															+#
														
 
															+# *** Note *** this will take several minutes. Setting multiple functions is
														
 
															+# an O(n^2) operation, and we are dealing with thousands of functions. So go
														
 
															+# have  coffee, talk with your coworkers, read facebook. And eventually, this
														
 
															+# operation will end.
														
 
															+#
														
 
															+#  # echo function > current_tracer
														
 
															+#
														
 
															+# If it crashes, we know that ~/test-file has a bad function.
														
 
															+#
														
 
															+#   Reboot back to test kernel.
														
 
															+#
														
 
															+#     # cd /sys/kernel/debug/tracing
														
 
															+#     # mv ~/test-file ~/full-file
														
 
															+#
														
 
															+# If it didn't crash.
														
 
															+#
														
 
															+#     # echo nop > current_tracer
														
 
															+#     # mv ~/non-test-file ~/full-file
														
 
															+#
														
 
															+# Get rid of the other test file from previous run (or save them off somewhere).
														
 
															+#  # rm -f ~/test-file ~/non-test-file
														
 
															+#
														
 
															+# And start again:
														
 
															+#
														
 
															+#  # ftrace-bisect ~/full-file ~/test-file ~/non-test-file
														
 
															+#
														
 
															+# The good thing is, because this cuts the number of functions in ~/test-file
														
 
															+# by half, the cat of it into set_ftrace_filter takes half as long each
														
 
															+# iteration, so don't talk so much at the water cooler the second time.
														
 
															+#
														
 
															+# Eventually, if you did this correctly, you will get down to the problem
														
 
															+# function, and all we need to do is to notrace it.
														
 
															+#
														
 
															+# The way to figure out if the problem function is bad, just do:
														
 
															+#
														
 
															+#  # echo <problem-function> > set_ftrace_notrace
														
 
															+#  # echo > set_ftrace_filter
														
 
															+#  # echo function > current_tracer
														
 
															+#
														
 
															+# And if it doesn't crash, we are done.
														
 
															+#
														
 
															+# If it does crash, do this again (there's more than one problem function)
														
 
															+# but you need to echo the problem function(s) into set_ftrace_notrace before
														
 
															+# enabling function tracing in the above steps. Or if you can compile the
														
 
															+# kernel, annotate the problem functions with "notrace" and start again.
														
 
															+#
														
 
															+
														
 
															+
														
 
															+if [ $# -ne 3 ]; then
														
 
															+  echo 'usage: ftrace-bisect full-file test-file  non-test-file'
														
 
															+  exit
														
 
															+fi
														
 
															+
														
 
															+full=$1
														
 
															+test=$2
														
 
															+nontest=$3
														
 
															+
														
 
															+x=`cat $full | wc -l`
														
 
															+if [ $x -eq 1 ]; then
														
 
															+	echo "There's only one function left, must be the bad one"
														
 
															+	cat $full
														
 
															+	exit 0
														
 
															+fi
														
 
															+
														
 
															+let x=$x/2
														
 
															+let y=$x+1
														
 
															+
														
 
															+if [ ! -f $full ]; then
														
 
															+	echo "$full does not exist"
														
 
															+	exit 1
														
 
															+fi
														
 
															+
														
 
															+if [ -f $test ]; then
														
 
															+	echo -n "$test exists, delete it? [y/N]"
														
 
															+	read a
														
 
															+	if [ "$a" != "y" -a "$a" != "Y" ]; then
														
 
															+		exit 1
														
 
															+	fi
														
 
															+fi
														
 
															+
														
 
															+if [ -f $nontest ]; then
														
 
															+	echo -n "$nontest exists, delete it? [y/N]"
														
 
															+	read a
														
 
															+	if [ "$a" != "y" -a "$a" != "Y" ]; then
														
 
															+		exit 1
														
 
															+	fi
														
 
															+fi
														
 
															+
														
 
															+sed -ne "1,${x}p" $full > $test
														
 
															+sed -ne "$y,\$p" $full > $nontest