|
@@ -25,7 +25,7 @@
|
|
|
#include <linux/hardirq.h>
|
|
|
#include <linux/linkage.h>
|
|
|
#include <linux/uaccess.h>
|
|
|
-#include <linux/kprobes.h>
|
|
|
+#include <linux/vmalloc.h>
|
|
|
#include <linux/ftrace.h>
|
|
|
#include <linux/module.h>
|
|
|
#include <linux/percpu.h>
|
|
@@ -319,6 +319,258 @@ int call_filter_check_discard(struct trace_event_call *call, void *rec,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+void trace_free_pid_list(struct trace_pid_list *pid_list)
|
|
|
+{
|
|
|
+ vfree(pid_list->pids);
|
|
|
+ kfree(pid_list);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
|
|
|
+ * @filtered_pids: The list of pids to check
|
|
|
+ * @search_pid: The PID to find in @filtered_pids
|
|
|
+ *
|
|
|
+ * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
|
|
|
+ */
|
|
|
+bool
|
|
|
+trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * If pid_max changed after filtered_pids was created, we
|
|
|
+ * by default ignore all pids greater than the previous pid_max.
|
|
|
+ */
|
|
|
+ if (search_pid >= filtered_pids->pid_max)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ return test_bit(search_pid, filtered_pids->pids);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * trace_ignore_this_task - should a task be ignored for tracing
|
|
|
+ * @filtered_pids: The list of pids to check
|
|
|
+ * @task: The task that should be ignored if not filtered
|
|
|
+ *
|
|
|
+ * Checks if @task should be traced or not from @filtered_pids.
|
|
|
+ * Returns true if @task should *NOT* be traced.
|
|
|
+ * Returns false if @task should be traced.
|
|
|
+ */
|
|
|
+bool
|
|
|
+trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * Return false, because if filtered_pids does not exist,
|
|
|
+ * all pids are good to trace.
|
|
|
+ */
|
|
|
+ if (!filtered_pids)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ return !trace_find_filtered_pid(filtered_pids, task->pid);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * trace_pid_filter_add_remove - Add or remove a task from a pid_list
|
|
|
+ * @pid_list: The list to modify
|
|
|
+ * @self: The current task for fork or NULL for exit
|
|
|
+ * @task: The task to add or remove
|
|
|
+ *
|
|
|
+ * If adding a task, if @self is defined, the task is only added if @self
|
|
|
+ * is also included in @pid_list. This happens on fork and tasks should
|
|
|
+ * only be added when the parent is listed. If @self is NULL, then the
|
|
|
+ * @task pid will be removed from the list, which would happen on exit
|
|
|
+ * of a task.
|
|
|
+ */
|
|
|
+void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
|
|
|
+ struct task_struct *self,
|
|
|
+ struct task_struct *task)
|
|
|
+{
|
|
|
+ if (!pid_list)
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* For forks, we only add if the forking task is listed */
|
|
|
+ if (self) {
|
|
|
+ if (!trace_find_filtered_pid(pid_list, self->pid))
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Sorry, but we don't support pid_max changing after setting */
|
|
|
+ if (task->pid >= pid_list->pid_max)
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* "self" is set for forks, and NULL for exits */
|
|
|
+ if (self)
|
|
|
+ set_bit(task->pid, pid_list->pids);
|
|
|
+ else
|
|
|
+ clear_bit(task->pid, pid_list->pids);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
|
|
|
+ * @pid_list: The pid list to show
|
|
|
+ * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
|
|
|
+ * @pos: The position of the file
|
|
|
+ *
|
|
|
+ * This is used by the seq_file "next" operation to iterate the pids
|
|
|
+ * listed in a trace_pid_list structure.
|
|
|
+ *
|
|
|
+ * Returns the pid+1 as we want to display pid of zero, but NULL would
|
|
|
+ * stop the iteration.
|
|
|
+ */
|
|
|
+void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
|
|
|
+{
|
|
|
+ unsigned long pid = (unsigned long)v;
|
|
|
+
|
|
|
+ (*pos)++;
|
|
|
+
|
|
|
+ /* pid already is +1 of the actual prevous bit */
|
|
|
+ pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
|
|
|
+
|
|
|
+ /* Return pid + 1 to allow zero to be represented */
|
|
|
+ if (pid < pid_list->pid_max)
|
|
|
+ return (void *)(pid + 1);
|
|
|
+
|
|
|
+ return NULL;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * trace_pid_start - Used for seq_file to start reading pid lists
|
|
|
+ * @pid_list: The pid list to show
|
|
|
+ * @pos: The position of the file
|
|
|
+ *
|
|
|
+ * This is used by seq_file "start" operation to start the iteration
|
|
|
+ * of listing pids.
|
|
|
+ *
|
|
|
+ * Returns the pid+1 as we want to display pid of zero, but NULL would
|
|
|
+ * stop the iteration.
|
|
|
+ */
|
|
|
+void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
|
|
|
+{
|
|
|
+ unsigned long pid;
|
|
|
+ loff_t l = 0;
|
|
|
+
|
|
|
+ pid = find_first_bit(pid_list->pids, pid_list->pid_max);
|
|
|
+ if (pid >= pid_list->pid_max)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ /* Return pid + 1 so that zero can be the exit value */
|
|
|
+ for (pid++; pid && l < *pos;
|
|
|
+ pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
|
|
|
+ ;
|
|
|
+ return (void *)pid;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * trace_pid_show - show the current pid in seq_file processing
|
|
|
+ * @m: The seq_file structure to write into
|
|
|
+ * @v: A void pointer of the pid (+1) value to display
|
|
|
+ *
|
|
|
+ * Can be directly used by seq_file operations to display the current
|
|
|
+ * pid value.
|
|
|
+ */
|
|
|
+int trace_pid_show(struct seq_file *m, void *v)
|
|
|
+{
|
|
|
+ unsigned long pid = (unsigned long)v - 1;
|
|
|
+
|
|
|
+ seq_printf(m, "%lu\n", pid);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+/* 128 should be much more than enough */
|
|
|
+#define PID_BUF_SIZE 127
|
|
|
+
|
|
|
+int trace_pid_write(struct trace_pid_list *filtered_pids,
|
|
|
+ struct trace_pid_list **new_pid_list,
|
|
|
+ const char __user *ubuf, size_t cnt)
|
|
|
+{
|
|
|
+ struct trace_pid_list *pid_list;
|
|
|
+ struct trace_parser parser;
|
|
|
+ unsigned long val;
|
|
|
+ int nr_pids = 0;
|
|
|
+ ssize_t read = 0;
|
|
|
+ ssize_t ret = 0;
|
|
|
+ loff_t pos;
|
|
|
+ pid_t pid;
|
|
|
+
|
|
|
+ if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Always recreate a new array. The write is an all or nothing
|
|
|
+ * operation. Always create a new array when adding new pids by
|
|
|
+ * the user. If the operation fails, then the current list is
|
|
|
+ * not modified.
|
|
|
+ */
|
|
|
+ pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
|
|
|
+ if (!pid_list)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ pid_list->pid_max = READ_ONCE(pid_max);
|
|
|
+
|
|
|
+ /* Only truncating will shrink pid_max */
|
|
|
+ if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
|
|
|
+ pid_list->pid_max = filtered_pids->pid_max;
|
|
|
+
|
|
|
+ pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
|
|
|
+ if (!pid_list->pids) {
|
|
|
+ kfree(pid_list);
|
|
|
+ return -ENOMEM;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (filtered_pids) {
|
|
|
+ /* copy the current bits to the new max */
|
|
|
+ for_each_set_bit(pid, filtered_pids->pids,
|
|
|
+ filtered_pids->pid_max) {
|
|
|
+ set_bit(pid, pid_list->pids);
|
|
|
+ nr_pids++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ while (cnt > 0) {
|
|
|
+
|
|
|
+ pos = 0;
|
|
|
+
|
|
|
+ ret = trace_get_user(&parser, ubuf, cnt, &pos);
|
|
|
+ if (ret < 0 || !trace_parser_loaded(&parser))
|
|
|
+ break;
|
|
|
+
|
|
|
+ read += ret;
|
|
|
+ ubuf += ret;
|
|
|
+ cnt -= ret;
|
|
|
+
|
|
|
+ parser.buffer[parser.idx] = 0;
|
|
|
+
|
|
|
+ ret = -EINVAL;
|
|
|
+ if (kstrtoul(parser.buffer, 0, &val))
|
|
|
+ break;
|
|
|
+ if (val >= pid_list->pid_max)
|
|
|
+ break;
|
|
|
+
|
|
|
+ pid = (pid_t)val;
|
|
|
+
|
|
|
+ set_bit(pid, pid_list->pids);
|
|
|
+ nr_pids++;
|
|
|
+
|
|
|
+ trace_parser_clear(&parser);
|
|
|
+ ret = 0;
|
|
|
+ }
|
|
|
+ trace_parser_put(&parser);
|
|
|
+
|
|
|
+ if (ret < 0) {
|
|
|
+ trace_free_pid_list(pid_list);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!nr_pids) {
|
|
|
+ /* Cleared the list of pids */
|
|
|
+ trace_free_pid_list(pid_list);
|
|
|
+ read = ret;
|
|
|
+ pid_list = NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ *new_pid_list = pid_list;
|
|
|
+
|
|
|
+ return read;
|
|
|
+}
|
|
|
+
|
|
|
static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
|
|
|
{
|
|
|
u64 ts;
|
|
@@ -1862,7 +2114,17 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
|
|
|
{
|
|
|
__buffer_unlock_commit(buffer, event);
|
|
|
|
|
|
- ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
|
|
|
+ /*
|
|
|
+ * If regs is not set, then skip the following callers:
|
|
|
+ * trace_buffer_unlock_commit_regs
|
|
|
+ * event_trigger_unlock_commit
|
|
|
+ * trace_event_buffer_commit
|
|
|
+ * trace_event_raw_event_sched_switch
|
|
|
+ * Note, we can still get here via blktrace, wakeup tracer
|
|
|
+ * and mmiotrace, but that's ok if they lose a function or
|
|
|
+ * two. They are that meaningful.
|
|
|
+ */
|
|
|
+ ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
|
|
|
ftrace_trace_userstack(buffer, flags, pc);
|
|
|
}
|
|
|
|
|
@@ -1912,6 +2174,13 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
|
|
|
trace.nr_entries = 0;
|
|
|
trace.skip = skip;
|
|
|
|
|
|
+ /*
|
|
|
+ * Add two, for this function and the call to save_stack_trace()
|
|
|
+ * If regs is set, then these functions will not be in the way.
|
|
|
+ */
|
|
|
+ if (!regs)
|
|
|
+ trace.skip += 2;
|
|
|
+
|
|
|
/*
|
|
|
* Since events can happen in NMIs there's no safe way to
|
|
|
* use the per cpu ftrace_stacks. We reserve it and if an interrupt
|
|
@@ -2083,83 +2352,41 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
|
|
|
|
|
|
/* created for use with alloc_percpu */
|
|
|
struct trace_buffer_struct {
|
|
|
- char buffer[TRACE_BUF_SIZE];
|
|
|
+ int nesting;
|
|
|
+ char buffer[4][TRACE_BUF_SIZE];
|
|
|
};
|
|
|
|
|
|
static struct trace_buffer_struct *trace_percpu_buffer;
|
|
|
-static struct trace_buffer_struct *trace_percpu_sirq_buffer;
|
|
|
-static struct trace_buffer_struct *trace_percpu_irq_buffer;
|
|
|
-static struct trace_buffer_struct *trace_percpu_nmi_buffer;
|
|
|
|
|
|
/*
|
|
|
- * The buffer used is dependent on the context. There is a per cpu
|
|
|
- * buffer for normal context, softirq contex, hard irq context and
|
|
|
- * for NMI context. Thise allows for lockless recording.
|
|
|
- *
|
|
|
- * Note, if the buffers failed to be allocated, then this returns NULL
|
|
|
+ * Thise allows for lockless recording. If we're nested too deeply, then
|
|
|
+ * this returns NULL.
|
|
|
*/
|
|
|
static char *get_trace_buf(void)
|
|
|
{
|
|
|
- struct trace_buffer_struct *percpu_buffer;
|
|
|
-
|
|
|
- /*
|
|
|
- * If we have allocated per cpu buffers, then we do not
|
|
|
- * need to do any locking.
|
|
|
- */
|
|
|
- if (in_nmi())
|
|
|
- percpu_buffer = trace_percpu_nmi_buffer;
|
|
|
- else if (in_irq())
|
|
|
- percpu_buffer = trace_percpu_irq_buffer;
|
|
|
- else if (in_softirq())
|
|
|
- percpu_buffer = trace_percpu_sirq_buffer;
|
|
|
- else
|
|
|
- percpu_buffer = trace_percpu_buffer;
|
|
|
+ struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
|
|
|
|
|
|
- if (!percpu_buffer)
|
|
|
+ if (!buffer || buffer->nesting >= 4)
|
|
|
return NULL;
|
|
|
|
|
|
- return this_cpu_ptr(&percpu_buffer->buffer[0]);
|
|
|
+ return &buffer->buffer[buffer->nesting++][0];
|
|
|
+}
|
|
|
+
|
|
|
+static void put_trace_buf(void)
|
|
|
+{
|
|
|
+ this_cpu_dec(trace_percpu_buffer->nesting);
|
|
|
}
|
|
|
|
|
|
static int alloc_percpu_trace_buffer(void)
|
|
|
{
|
|
|
struct trace_buffer_struct *buffers;
|
|
|
- struct trace_buffer_struct *sirq_buffers;
|
|
|
- struct trace_buffer_struct *irq_buffers;
|
|
|
- struct trace_buffer_struct *nmi_buffers;
|
|
|
|
|
|
buffers = alloc_percpu(struct trace_buffer_struct);
|
|
|
- if (!buffers)
|
|
|
- goto err_warn;
|
|
|
-
|
|
|
- sirq_buffers = alloc_percpu(struct trace_buffer_struct);
|
|
|
- if (!sirq_buffers)
|
|
|
- goto err_sirq;
|
|
|
-
|
|
|
- irq_buffers = alloc_percpu(struct trace_buffer_struct);
|
|
|
- if (!irq_buffers)
|
|
|
- goto err_irq;
|
|
|
-
|
|
|
- nmi_buffers = alloc_percpu(struct trace_buffer_struct);
|
|
|
- if (!nmi_buffers)
|
|
|
- goto err_nmi;
|
|
|
+ if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
|
|
|
+ return -ENOMEM;
|
|
|
|
|
|
trace_percpu_buffer = buffers;
|
|
|
- trace_percpu_sirq_buffer = sirq_buffers;
|
|
|
- trace_percpu_irq_buffer = irq_buffers;
|
|
|
- trace_percpu_nmi_buffer = nmi_buffers;
|
|
|
-
|
|
|
return 0;
|
|
|
-
|
|
|
- err_nmi:
|
|
|
- free_percpu(irq_buffers);
|
|
|
- err_irq:
|
|
|
- free_percpu(sirq_buffers);
|
|
|
- err_sirq:
|
|
|
- free_percpu(buffers);
|
|
|
- err_warn:
|
|
|
- WARN(1, "Could not allocate percpu trace_printk buffer");
|
|
|
- return -ENOMEM;
|
|
|
}
|
|
|
|
|
|
static int buffers_allocated;
|
|
@@ -2250,7 +2477,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
|
|
|
tbuffer = get_trace_buf();
|
|
|
if (!tbuffer) {
|
|
|
len = 0;
|
|
|
- goto out;
|
|
|
+ goto out_nobuffer;
|
|
|
}
|
|
|
|
|
|
len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
|
|
@@ -2276,6 +2503,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
|
|
|
}
|
|
|
|
|
|
out:
|
|
|
+ put_trace_buf();
|
|
|
+
|
|
|
+out_nobuffer:
|
|
|
preempt_enable_notrace();
|
|
|
unpause_graph_tracing();
|
|
|
|
|
@@ -2307,7 +2537,7 @@ __trace_array_vprintk(struct ring_buffer *buffer,
|
|
|
tbuffer = get_trace_buf();
|
|
|
if (!tbuffer) {
|
|
|
len = 0;
|
|
|
- goto out;
|
|
|
+ goto out_nobuffer;
|
|
|
}
|
|
|
|
|
|
len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
|
|
@@ -2326,7 +2556,11 @@ __trace_array_vprintk(struct ring_buffer *buffer,
|
|
|
__buffer_unlock_commit(buffer, event);
|
|
|
ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
|
|
|
}
|
|
|
- out:
|
|
|
+
|
|
|
+out:
|
|
|
+ put_trace_buf();
|
|
|
+
|
|
|
+out_nobuffer:
|
|
|
preempt_enable_notrace();
|
|
|
unpause_graph_tracing();
|
|
|
|
|
@@ -6977,6 +7211,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
|
|
|
for_each_tracing_cpu(cpu)
|
|
|
tracing_init_tracefs_percpu(tr, cpu);
|
|
|
|
|
|
+ ftrace_init_tracefs(tr, d_tracer);
|
|
|
}
|
|
|
|
|
|
static struct vfsmount *trace_automount(void *ingore)
|
|
@@ -7130,6 +7365,7 @@ static __init int tracer_init_tracefs(void)
|
|
|
return 0;
|
|
|
|
|
|
init_tracer_tracefs(&global_trace, d_tracer);
|
|
|
+ ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
|
|
|
|
|
|
trace_create_file("tracing_thresh", 0644, d_tracer,
|
|
|
&global_trace, &tracing_thresh_fops);
|