14 лет назад · 83c2f912b4
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2475,6 +2475,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
				 	stacktrace	[FTRACE]
			
 
				 			Enabled the stack tracer on boot up.
			
 
				 
			
 
				+	stacktrace_filter=[function-list]
			
 
				+			[FTRACE] Limit the functions that the stack tracer
			
 
				+			will trace at boot up. function-list is a comma separated
			
 
				+			list of functions. This list can be changed at run
			
 
				+			time by the stack_trace_filter file in the debugfs
			
 
				+			tracing directory. Note, this enables stack tracing
			
 
				+			and the stacktrace above is not needed.
			
 
				+
			
 
				 	sti=		[PARISC,HW]
			
 
				 			Format: <num>
			
 
				 			Set the STI (builtin display/keyboard on the HP-PARISC
			
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump);
 
				 
			
 
				 extern void hw_breakpoint_restore(void);
			
 
				 
			
 
				+#ifdef CONFIG_X86_64
			
 
				+DECLARE_PER_CPU(int, debug_stack_usage);
			
 
				+static inline void debug_stack_usage_inc(void)
			
 
				+{
			
 
				+	__get_cpu_var(debug_stack_usage)++;
			
 
				+}
			
 
				+static inline void debug_stack_usage_dec(void)
			
 
				+{
			
 
				+	__get_cpu_var(debug_stack_usage)--;
			
 
				+}
			
 
				+int is_debug_stack(unsigned long addr);
			
 
				+void debug_stack_set_zero(void);
			
 
				+void debug_stack_reset(void);
			
 
				+#else /* !X86_64 */
			
 
				+static inline int is_debug_stack(unsigned long addr) { return 0; }
			
 
				+static inline void debug_stack_set_zero(void) { }
			
 
				+static inline void debug_stack_reset(void) { }
			
 
				+static inline void debug_stack_usage_inc(void) { }
			
 
				+static inline void debug_stack_usage_dec(void) { }
			
 
				+#endif /* X86_64 */
			
 
				+
			
 
				+
			
 
				 #endif	/* __KERNEL__ */
			
 
				 
			
 
				 #endif /* _ASM_X86_DEBUGREG_H */
			
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
 
				 
			
 
				 extern struct desc_ptr idt_descr;
			
 
				 extern gate_desc idt_table[];
			
 
				+extern struct desc_ptr nmi_idt_descr;
			
 
				+extern gate_desc nmi_idt_table[];
			
 
				 
			
 
				 struct gdt_page {
			
 
				 	struct desc_struct gdt[GDT_ENTRIES];
			
@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
 
				 	desc->limit = (limit >> 16) & 0xf;
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_X86_64
			
 
				+static inline void set_nmi_gate(int gate, void *addr)
			
 
				+{
			
 
				+	gate_desc s;
			
 
				+
			
 
				+	pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
			
 
				+	write_idt_entry(nmi_idt_table, gate, &s);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 static inline void _set_gate(int gate, unsigned type, void *addr,
			
 
				 			     unsigned dpl, unsigned ist, unsigned seg)
			
 
				 {
			
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid);
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
 
				 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
			
 
				+struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
			
 
				+				    (unsigned long) nmi_idt_table };
			
 
				 
			
 
				 DEFINE_PER_CPU_FIRST(union irq_stack_union,
			
 
				 		     irq_stack_union) __aligned(PAGE_SIZE);
			
@@ -1085,6 +1087,26 @@ unsigned long kernel_eflags;
 
				  */
			
 
				 DEFINE_PER_CPU(struct orig_ist, orig_ist);
			
 
				 
			
 
				+static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
			
 
				+DEFINE_PER_CPU(int, debug_stack_usage);
			
 
				+
			
 
				+int is_debug_stack(unsigned long addr)
			
 
				+{
			
 
				+	return __get_cpu_var(debug_stack_usage) ||
			
 
				+		(addr <= __get_cpu_var(debug_stack_addr) &&
			
 
				+		 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
			
 
				+}
			
 
				+
			
 
				+void debug_stack_set_zero(void)
			
 
				+{
			
 
				+	load_idt((const struct desc_ptr *)&nmi_idt_descr);
			
 
				+}
			
 
				+
			
 
				+void debug_stack_reset(void)
			
 
				+{
			
 
				+	load_idt((const struct desc_ptr *)&idt_descr);
			
 
				+}
			
 
				+
			
 
				 #else	/* CONFIG_X86_64 */
			
 
				 
			
 
				 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
			
@@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void)
 
				 			estacks += exception_stack_sizes[v];
			
 
				 			oist->ist[v] = t->x86_tss.ist[v] =
			
 
				 					(unsigned long)estacks;
			
 
				+			if (v == DEBUG_STACK-1)
			
 
				+				per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1480,62 +1480,214 @@ ENTRY(error_exit)
 
				 	CFI_ENDPROC
			
 
				 END(error_exit)
			
 
				 
			
 
				+/*
			
 
				+ * Test if a given stack is an NMI stack or not.
			
 
				+ */
			
 
				+	.macro test_in_nmi reg stack nmi_ret normal_ret
			
 
				+	cmpq %\reg, \stack
			
 
				+	ja \normal_ret
			
 
				+	subq $EXCEPTION_STKSZ, %\reg
			
 
				+	cmpq %\reg, \stack
			
 
				+	jb \normal_ret
			
 
				+	jmp \nmi_ret
			
 
				+	.endm
			
 
				 
			
 
				 	/* runs on exception stack */
			
 
				 ENTRY(nmi)
			
 
				 	INTR_FRAME
			
 
				 	PARAVIRT_ADJUST_EXCEPTION_FRAME
			
 
				-	pushq_cfi $-1
			
 
				+	/*
			
 
				+	 * We allow breakpoints in NMIs. If a breakpoint occurs, then
			
 
				+	 * the iretq it performs will take us out of NMI context.
			
 
				+	 * This means that we can have nested NMIs where the next
			
 
				+	 * NMI is using the top of the stack of the previous NMI. We
			
 
				+	 * can't let it execute because the nested NMI will corrupt the
			
 
				+	 * stack of the previous NMI. NMI handlers are not re-entrant
			
 
				+	 * anyway.
			
 
				+	 *
			
 
				+	 * To handle this case we do the following:
			
 
				+	 *  Check the a special location on the stack that contains
			
 
				+	 *  a variable that is set when NMIs are executing.
			
 
				+	 *  The interrupted task's stack is also checked to see if it
			
 
				+	 *  is an NMI stack.
			
 
				+	 *  If the variable is not set and the stack is not the NMI
			
 
				+	 *  stack then:
			
 
				+	 *    o Set the special variable on the stack
			
 
				+	 *    o Copy the interrupt frame into a "saved" location on the stack
			
 
				+	 *    o Copy the interrupt frame into a "copy" location on the stack
			
 
				+	 *    o Continue processing the NMI
			
 
				+	 *  If the variable is set or the previous stack is the NMI stack:
			
 
				+	 *    o Modify the "copy" location to jump to the repeate_nmi
			
 
				+	 *    o return back to the first NMI
			
 
				+	 *
			
 
				+	 * Now on exit of the first NMI, we first clear the stack variable
			
 
				+	 * The NMI stack will tell any nested NMIs at that point that it is
			
 
				+	 * nested. Then we pop the stack normally with iret, and if there was
			
 
				+	 * a nested NMI that updated the copy interrupt stack frame, a
			
 
				+	 * jump will be made to the repeat_nmi code that will handle the second
			
 
				+	 * NMI.
			
 
				+	 */
			
 
				+
			
 
				+	/* Use %rdx as out temp variable throughout */
			
 
				+	pushq_cfi %rdx
			
 
				+
			
 
				+	/*
			
 
				+	 * Check the special variable on the stack to see if NMIs are
			
 
				+	 * executing.
			
 
				+	 */
			
 
				+	cmp $1, -8(%rsp)
			
 
				+	je nested_nmi
			
 
				+
			
 
				+	/*
			
 
				+	 * Now test if the previous stack was an NMI stack.
			
 
				+	 * We need the double check. We check the NMI stack to satisfy the
			
 
				+	 * race when the first NMI clears the variable before returning.
			
 
				+	 * We check the variable because the first NMI could be in a
			
 
				+	 * breakpoint routine using a breakpoint stack.
			
 
				+	 */
			
 
				+	lea 6*8(%rsp), %rdx
			
 
				+	test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
			
 
				+
			
 
				+nested_nmi:
			
 
				+	/*
			
 
				+	 * Do nothing if we interrupted the fixup in repeat_nmi.
			
 
				+	 * It's about to repeat the NMI handler, so we are fine
			
 
				+	 * with ignoring this one.
			
 
				+	 */
			
 
				+	movq $repeat_nmi, %rdx
			
 
				+	cmpq 8(%rsp), %rdx
			
 
				+	ja 1f
			
 
				+	movq $end_repeat_nmi, %rdx
			
 
				+	cmpq 8(%rsp), %rdx
			
 
				+	ja nested_nmi_out
			
 
				+
			
 
				+1:
			
 
				+	/* Set up the interrupted NMIs stack to jump to repeat_nmi */
			
 
				+	leaq -6*8(%rsp), %rdx
			
 
				+	movq %rdx, %rsp
			
 
				+	CFI_ADJUST_CFA_OFFSET 6*8
			
 
				+	pushq_cfi $__KERNEL_DS
			
 
				+	pushq_cfi %rdx
			
 
				+	pushfq_cfi
			
 
				+	pushq_cfi $__KERNEL_CS
			
 
				+	pushq_cfi $repeat_nmi
			
 
				+
			
 
				+	/* Put stack back */
			
 
				+	addq $(11*8), %rsp
			
 
				+	CFI_ADJUST_CFA_OFFSET -11*8
			
 
				+
			
 
				+nested_nmi_out:
			
 
				+	popq_cfi %rdx
			
 
				+
			
 
				+	/* No need to check faults here */
			
 
				+	INTERRUPT_RETURN
			
 
				+
			
 
				+first_nmi:
			
 
				+	/*
			
 
				+	 * Because nested NMIs will use the pushed location that we
			
 
				+	 * stored in rdx, we must keep that space available.
			
 
				+	 * Here's what our stack frame will look like:
			
 
				+	 * +-------------------------+
			
 
				+	 * | original SS             |
			
 
				+	 * | original Return RSP     |
			
 
				+	 * | original RFLAGS         |
			
 
				+	 * | original CS             |
			
 
				+	 * | original RIP            |
			
 
				+	 * +-------------------------+
			
 
				+	 * | temp storage for rdx    |
			
 
				+	 * +-------------------------+
			
 
				+	 * | NMI executing variable  |
			
 
				+	 * +-------------------------+
			
 
				+	 * | Saved SS                |
			
 
				+	 * | Saved Return RSP        |
			
 
				+	 * | Saved RFLAGS            |
			
 
				+	 * | Saved CS                |
			
 
				+	 * | Saved RIP               |
			
 
				+	 * +-------------------------+
			
 
				+	 * | copied SS               |
			
 
				+	 * | copied Return RSP       |
			
 
				+	 * | copied RFLAGS           |
			
 
				+	 * | copied CS               |
			
 
				+	 * | copied RIP              |
			
 
				+	 * +-------------------------+
			
 
				+	 * | pt_regs                 |
			
 
				+	 * +-------------------------+
			
 
				+	 *
			
 
				+	 * The saved RIP is used to fix up the copied RIP that a nested
			
 
				+	 * NMI may zero out. The original stack frame and the temp storage
			
 
				+	 * is also used by nested NMIs and can not be trusted on exit.
			
 
				+	 */
			
 
				+	/* Set the NMI executing variable on the stack. */
			
 
				+	pushq_cfi $1
			
 
				+
			
 
				+	/* Copy the stack frame to the Saved frame */
			
 
				+	.rept 5
			
 
				+	pushq_cfi 6*8(%rsp)
			
 
				+	.endr
			
 
				+
			
 
				+	/* Make another copy, this one may be modified by nested NMIs */
			
 
				+	.rept 5
			
 
				+	pushq_cfi 4*8(%rsp)
			
 
				+	.endr
			
 
				+
			
 
				+	/* Do not pop rdx, nested NMIs will corrupt it */
			
 
				+	movq 11*8(%rsp), %rdx
			
 
				+
			
 
				+	/*
			
 
				+	 * Everything below this point can be preempted by a nested
			
 
				+	 * NMI if the first NMI took an exception. Repeated NMIs
			
 
				+	 * caused by an exception and nested NMI will start here, and
			
 
				+	 * can still be preempted by another NMI.
			
 
				+	 */
			
 
				+restart_nmi:
			
 
				+	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
			
 
				 	subq $ORIG_RAX-R15, %rsp
			
 
				 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
			
 
				+	/*
			
 
				+	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
			
 
				+	 * as we should not be calling schedule in NMI context.
			
 
				+	 * Even with normal interrupts enabled. An NMI should not be
			
 
				+	 * setting NEED_RESCHED or anything that normal interrupts and
			
 
				+	 * exceptions might do.
			
 
				+	 */
			
 
				 	call save_paranoid
			
 
				 	DEFAULT_FRAME 0
			
 
				 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
			
 
				 	movq %rsp,%rdi
			
 
				 	movq $-1,%rsi
			
 
				 	call do_nmi
			
 
				-#ifdef CONFIG_TRACE_IRQFLAGS
			
 
				-	/* paranoidexit; without TRACE_IRQS_OFF */
			
 
				-	/* ebx:	no swapgs flag */
			
 
				-	DISABLE_INTERRUPTS(CLBR_NONE)
			
 
				 	testl %ebx,%ebx				/* swapgs needed? */
			
 
				 	jnz nmi_restore
			
 
				-	testl $3,CS(%rsp)
			
 
				-	jnz nmi_userspace
			
 
				 nmi_swapgs:
			
 
				 	SWAPGS_UNSAFE_STACK
			
 
				 nmi_restore:
			
 
				 	RESTORE_ALL 8
			
 
				+	/* Clear the NMI executing stack variable */
			
 
				+	movq $0, 10*8(%rsp)
			
 
				 	jmp irq_return
			
 
				-nmi_userspace:
			
 
				-	GET_THREAD_INFO(%rcx)
			
 
				-	movl TI_flags(%rcx),%ebx
			
 
				-	andl $_TIF_WORK_MASK,%ebx
			
 
				-	jz nmi_swapgs
			
 
				-	movq %rsp,%rdi			/* &pt_regs */
			
 
				-	call sync_regs
			
 
				-	movq %rax,%rsp			/* switch stack for scheduling */
			
 
				-	testl $_TIF_NEED_RESCHED,%ebx
			
 
				-	jnz nmi_schedule
			
 
				-	movl %ebx,%edx			/* arg3: thread flags */
			
 
				-	ENABLE_INTERRUPTS(CLBR_NONE)
			
 
				-	xorl %esi,%esi 			/* arg2: oldset */
			
 
				-	movq %rsp,%rdi 			/* arg1: &pt_regs */
			
 
				-	call do_notify_resume
			
 
				-	DISABLE_INTERRUPTS(CLBR_NONE)
			
 
				-	jmp nmi_userspace
			
 
				-nmi_schedule:
			
 
				-	ENABLE_INTERRUPTS(CLBR_ANY)
			
 
				-	call schedule
			
 
				-	DISABLE_INTERRUPTS(CLBR_ANY)
			
 
				-	jmp nmi_userspace
			
 
				-	CFI_ENDPROC
			
 
				-#else
			
 
				-	jmp paranoid_exit
			
 
				 	CFI_ENDPROC
			
 
				-#endif
			
 
				 END(nmi)
			
 
				 
			
 
				+	/*
			
 
				+	 * If an NMI hit an iret because of an exception or breakpoint,
			
 
				+	 * it can lose its NMI context, and a nested NMI may come in.
			
 
				+	 * In that case, the nested NMI will change the preempted NMI's
			
 
				+	 * stack to jump to here when it does the final iret.
			
 
				+	 */
			
 
				+repeat_nmi:
			
 
				+	INTR_FRAME
			
 
				+	/* Update the stack variable to say we are still in NMI */
			
 
				+	movq $1, 5*8(%rsp)
			
 
				+
			
 
				+	/* copy the saved stack back to copy stack */
			
 
				+	.rept 5
			
 
				+	pushq_cfi 4*8(%rsp)
			
 
				+	.endr
			
 
				+
			
 
				+	jmp restart_nmi
			
 
				+	CFI_ENDPROC
			
 
				+end_repeat_nmi:
			
 
				+
			
 
				 ENTRY(ignore_sysret)
			
 
				 	CFI_STARTPROC
			
 
				 	mov $-ENOSYS,%eax
			
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -417,6 +417,10 @@ ENTRY(phys_base)
 
				 ENTRY(idt_table)
			
 
				 	.skip IDT_ENTRIES * 16
			
 
				 
			
 
				+	.align L1_CACHE_BYTES
			
 
				+ENTRY(nmi_idt_table)
			
 
				+	.skip IDT_ENTRIES * 16
			
 
				+
			
 
				 	__PAGE_ALIGNED_BSS
			
 
				 	.align PAGE_SIZE
			
 
				 ENTRY(empty_zero_page)
			
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 
				 		unknown_nmi_error(reason, regs);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * NMIs can hit breakpoints which will cause it to lose its
			
 
				+ * NMI context with the CPU when the breakpoint does an iret.
			
 
				+ */
			
 
				+#ifdef CONFIG_X86_32
			
 
				+/*
			
 
				+ * For i386, NMIs use the same stack as the kernel, and we can
			
 
				+ * add a workaround to the iret problem in C. Simply have 3 states
			
 
				+ * the NMI can be in.
			
 
				+ *
			
 
				+ *  1) not running
			
 
				+ *  2) executing
			
 
				+ *  3) latched
			
 
				+ *
			
 
				+ * When no NMI is in progress, it is in the "not running" state.
			
 
				+ * When an NMI comes in, it goes into the "executing" state.
			
 
				+ * Normally, if another NMI is triggered, it does not interrupt
			
 
				+ * the running NMI and the HW will simply latch it so that when
			
 
				+ * the first NMI finishes, it will restart the second NMI.
			
 
				+ * (Note, the latch is binary, thus multiple NMIs triggering,
			
 
				+ *  when one is running, are ignored. Only one NMI is restarted.)
			
 
				+ *
			
 
				+ * If an NMI hits a breakpoint that executes an iret, another
			
 
				+ * NMI can preempt it. We do not want to allow this new NMI
			
 
				+ * to run, but we want to execute it when the first one finishes.
			
 
				+ * We set the state to "latched", and the first NMI will perform
			
 
				+ * an cmpxchg on the state, and if it doesn't successfully
			
 
				+ * reset the state to "not running" it will restart the next
			
 
				+ * NMI.
			
 
				+ */
			
 
				+enum nmi_states {
			
 
				+	NMI_NOT_RUNNING,
			
 
				+	NMI_EXECUTING,
			
 
				+	NMI_LATCHED,
			
 
				+};
			
 
				+static DEFINE_PER_CPU(enum nmi_states, nmi_state);
			
 
				+
			
 
				+#define nmi_nesting_preprocess(regs)					\
			
 
				+	do {								\
			
 
				+		if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) {	\
			
 
				+			__get_cpu_var(nmi_state) = NMI_LATCHED;		\
			
 
				+			return;						\
			
 
				+		}							\
			
 
				+	nmi_restart:							\
			
 
				+		__get_cpu_var(nmi_state) = NMI_EXECUTING;		\
			
 
				+	} while (0)
			
 
				+
			
 
				+#define nmi_nesting_postprocess()					\
			
 
				+	do {								\
			
 
				+		if (cmpxchg(&__get_cpu_var(nmi_state),			\
			
 
				+		    NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING)	\
			
 
				+			goto nmi_restart;				\
			
 
				+	} while (0)
			
 
				+#else /* x86_64 */
			
 
				+/*
			
 
				+ * In x86_64 things are a bit more difficult. This has the same problem
			
 
				+ * where an NMI hitting a breakpoint that calls iret will remove the
			
 
				+ * NMI context, allowing a nested NMI to enter. What makes this more
			
 
				+ * difficult is that both NMIs and breakpoints have their own stack.
			
 
				+ * When a new NMI or breakpoint is executed, the stack is set to a fixed
			
 
				+ * point. If an NMI is nested, it will have its stack set at that same
			
 
				+ * fixed address that the first NMI had, and will start corrupting the
			
 
				+ * stack. This is handled in entry_64.S, but the same problem exists with
			
 
				+ * the breakpoint stack.
			
 
				+ *
			
 
				+ * If a breakpoint is being processed, and the debug stack is being used,
			
 
				+ * if an NMI comes in and also hits a breakpoint, the stack pointer
			
 
				+ * will be set to the same fixed address as the breakpoint that was
			
 
				+ * interrupted, causing that stack to be corrupted. To handle this case,
			
 
				+ * check if the stack that was interrupted is the debug stack, and if
			
 
				+ * so, change the IDT so that new breakpoints will use the current stack
			
 
				+ * and not switch to the fixed address. On return of the NMI, switch back
			
 
				+ * to the original IDT.
			
 
				+ */
			
 
				+static DEFINE_PER_CPU(int, update_debug_stack);
			
 
				+
			
 
				+static inline void nmi_nesting_preprocess(struct pt_regs *regs)
			
 
				+{
			
 
				+	/*
			
 
				+	 * If we interrupted a breakpoint, it is possible that
			
 
				+	 * the nmi handler will have breakpoints too. We need to
			
 
				+	 * change the IDT such that breakpoints that happen here
			
 
				+	 * continue to use the NMI stack.
			
 
				+	 */
			
 
				+	if (unlikely(is_debug_stack(regs->sp))) {
			
 
				+		debug_stack_set_zero();
			
 
				+		__get_cpu_var(update_debug_stack) = 1;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline void nmi_nesting_postprocess(void)
			
 
				+{
			
 
				+	if (unlikely(__get_cpu_var(update_debug_stack)))
			
 
				+		debug_stack_reset();
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 dotraplinkage notrace __kprobes void
			
 
				 do_nmi(struct pt_regs *regs, long error_code)
			
 
				 {
			
 
				+	nmi_nesting_preprocess(regs);
			
 
				+
			
 
				 	nmi_enter();
			
 
				 
			
 
				 	inc_irq_stat(__nmi_count);
			
@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code)
 
				 		default_do_nmi(regs);
			
 
				 
			
 
				 	nmi_exit();
			
 
				+
			
 
				+	/* On i386, may loop back to preprocess */
			
 
				+	nmi_nesting_postprocess();
			
 
				 }
			
 
				 
			
 
				 void stop_nmi(void)
			
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -311,9 +311,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 
				 			== NOTIFY_STOP)
			
 
				 		return;
			
 
				 
			
 
				+	/*
			
 
				+	 * Let others (NMI) know that the debug stack is in use
			
 
				+	 * as we may switch to the interrupt stack.
			
 
				+	 */
			
 
				+	debug_stack_usage_inc();
			
 
				 	preempt_conditional_sti(regs);
			
 
				 	do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
			
 
				 	preempt_conditional_cli(regs);
			
 
				+	debug_stack_usage_dec();
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
@@ -406,6 +412,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 
				 							SIGTRAP) == NOTIFY_STOP)
			
 
				 		return;
			
 
				 
			
 
				+	/*
			
 
				+	 * Let others (NMI) know that the debug stack is in use
			
 
				+	 * as we may switch to the interrupt stack.
			
 
				+	 */
			
 
				+	debug_stack_usage_inc();
			
 
				+
			
 
				 	/* It's safe to allow irq's after DR6 has been saved */
			
 
				 	preempt_conditional_sti(regs);
			
 
				 
			
@@ -413,6 +425,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 
				 		handle_vm86_trap((struct kernel_vm86_regs *) regs,
			
 
				 				error_code, 1);
			
 
				 		preempt_conditional_cli(regs);
			
 
				+		debug_stack_usage_dec();
			
 
				 		return;
			
 
				 	}
			
 
				 
			
@@ -432,6 +445,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 
				 	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
			
 
				 		send_sigtrap(tsk, regs, error_code, si_code);
			
 
				 	preempt_conditional_cli(regs);
			
 
				+	debug_stack_usage_dec();
			
 
				 
			
 
				 	return;
			
 
				 }
			
@@ -718,4 +732,10 @@ void __init trap_init(void)
 
				 	cpu_init();
			
 
				 
			
 
				 	x86_init.irqs.trap_init();
			
 
				+
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
			
 
				+	set_nmi_gate(1, &debug);
			
 
				+	set_nmi_gate(3, &int3);
			
 
				+#endif
			
 
				 }
			
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -50,6 +50,11 @@
 
				 # define inline		inline		__attribute__((always_inline))
			
 
				 # define __inline__	__inline__	__attribute__((always_inline))
			
 
				 # define __inline	__inline	__attribute__((always_inline))
			
 
				+#else
			
 
				+/* A lot of inline functions can cause havoc with function tracing */
			
 
				+# define inline		inline		notrace
			
 
				+# define __inline__	__inline__	notrace
			
 
				+# define __inline	__inline	notrace
			
 
				 #endif
			
 
				 
			
 
				 #define __deprecated			__attribute__((deprecated))
			
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -133,6 +133,8 @@ struct ftrace_func_command {
 
				 int ftrace_arch_code_modify_prepare(void);
			
 
				 int ftrace_arch_code_modify_post_process(void);
			
 
				 
			
 
				+void ftrace_bug(int err, unsigned long ip);
			
 
				+
			
 
				 struct seq_file;
			
 
				 
			
 
				 struct ftrace_probe_ops {
			
@@ -161,7 +163,6 @@ extern int ftrace_text_reserved(void *start, void *end);
 
				 
			
 
				 enum {
			
 
				 	FTRACE_FL_ENABLED	= (1 << 30),
			
 
				-	FTRACE_FL_FREE		= (1 << 31),
			
 
				 };
			
 
				 
			
 
				 #define FTRACE_FL_MASK		(0x3UL << 30)
			
@@ -172,10 +173,7 @@ struct dyn_ftrace {
 
				 		unsigned long		ip; /* address of mcount call-site */
			
 
				 		struct dyn_ftrace	*freelist;
			
 
				 	};
			
 
				-	union {
			
 
				-		unsigned long		flags;
			
 
				-		struct dyn_ftrace	*newlist;
			
 
				-	};
			
 
				+	unsigned long		flags;
			
 
				 	struct dyn_arch_ftrace		arch;
			
 
				 };
			
 
				 
			
@@ -190,6 +188,56 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
 
				 int register_ftrace_command(struct ftrace_func_command *cmd);
			
 
				 int unregister_ftrace_command(struct ftrace_func_command *cmd);
			
 
				 
			
 
				+enum {
			
 
				+	FTRACE_UPDATE_CALLS		= (1 << 0),
			
 
				+	FTRACE_DISABLE_CALLS		= (1 << 1),
			
 
				+	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
			
 
				+	FTRACE_START_FUNC_RET		= (1 << 3),
			
 
				+	FTRACE_STOP_FUNC_RET		= (1 << 4),
			
 
				+};
			
 
				+
			
 
				+enum {
			
 
				+	FTRACE_UPDATE_IGNORE,
			
 
				+	FTRACE_UPDATE_MAKE_CALL,
			
 
				+	FTRACE_UPDATE_MAKE_NOP,
			
 
				+};
			
 
				+
			
 
				+enum {
			
 
				+	FTRACE_ITER_FILTER	= (1 << 0),
			
 
				+	FTRACE_ITER_NOTRACE	= (1 << 1),
			
 
				+	FTRACE_ITER_PRINTALL	= (1 << 2),
			
 
				+	FTRACE_ITER_DO_HASH	= (1 << 3),
			
 
				+	FTRACE_ITER_HASH	= (1 << 4),
			
 
				+	FTRACE_ITER_ENABLED	= (1 << 5),
			
 
				+};
			
 
				+
			
 
				+void arch_ftrace_update_code(int command);
			
 
				+
			
 
				+struct ftrace_rec_iter;
			
 
				+
			
 
				+struct ftrace_rec_iter *ftrace_rec_iter_start(void);
			
 
				+struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter);
			
 
				+struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
			
 
				+
			
 
				+int ftrace_update_record(struct dyn_ftrace *rec, int enable);
			
 
				+int ftrace_test_record(struct dyn_ftrace *rec, int enable);
			
 
				+void ftrace_run_stop_machine(int command);
			
 
				+int ftrace_location(unsigned long ip);
			
 
				+
			
 
				+extern ftrace_func_t ftrace_trace_function;
			
 
				+
			
 
				+int ftrace_regex_open(struct ftrace_ops *ops, int flag,
			
 
				+		  struct inode *inode, struct file *file);
			
 
				+ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
			
 
				+			    size_t cnt, loff_t *ppos);
			
 
				+ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
			
 
				+			     size_t cnt, loff_t *ppos);
			
 
				+loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
			
 
				+int ftrace_regex_release(struct inode *inode, struct file *file);
			
 
				+
			
 
				+void __init
			
 
				+ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
			
 
				+
			
 
				 /* defined in arch */
			
 
				 extern int ftrace_ip_converted(unsigned long ip);
			
 
				 extern int ftrace_dyn_arch_init(void *data);
			
@@ -284,6 +332,25 @@ static inline int ftrace_text_reserved(void *start, void *end)
 
				 {
			
 
				 	return 0;
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * Again users of functions that have ftrace_ops may not
			
 
				+ * have them defined when ftrace is not enabled, but these
			
 
				+ * functions may still be called. Use a macro instead of inline.
			
 
				+ */
			
 
				+#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
			
 
				+#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
			
 
				+
			
 
				+static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
			
 
				+			    size_t cnt, loff_t *ppos) { return -ENODEV; }
			
 
				+static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
			
 
				+			     size_t cnt, loff_t *ppos) { return -ENODEV; }
			
 
				+static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
			
 
				+{
			
 
				+	return -ENODEV;
			
 
				+}
			
 
				+static inline int
			
 
				+ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; }
			
 
				 #endif /* CONFIG_DYNAMIC_FTRACE */
			
 
				 
			
 
				 /* totally disable ftrace - can not re-enable after this */
			
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,11 +22,13 @@
 
				 #include <linux/hardirq.h>
			
 
				 #include <linux/kthread.h>
			
 
				 #include <linux/uaccess.h>
			
 
				+#include <linux/bsearch.h>
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/ftrace.h>
			
 
				 #include <linux/sysctl.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/ctype.h>
			
 
				+#include <linux/sort.h>
			
 
				 #include <linux/list.h>
			
 
				 #include <linux/hash.h>
			
 
				 #include <linux/rcupdate.h>
			
@@ -947,13 +949,6 @@ struct ftrace_func_probe {
 
				 	struct rcu_head		rcu;
			
 
				 };
			
 
				 
			
 
				-enum {
			
 
				-	FTRACE_ENABLE_CALLS		= (1 << 0),
			
 
				-	FTRACE_DISABLE_CALLS		= (1 << 1),
			
 
				-	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
			
 
				-	FTRACE_START_FUNC_RET		= (1 << 3),
			
 
				-	FTRACE_STOP_FUNC_RET		= (1 << 4),
			
 
				-};
			
 
				 struct ftrace_func_entry {
			
 
				 	struct hlist_node hlist;
			
 
				 	unsigned long ip;
			
@@ -984,18 +979,19 @@ static struct ftrace_ops global_ops = {
 
				 	.filter_hash		= EMPTY_HASH,
			
 
				 };
			
 
				 
			
 
				-static struct dyn_ftrace *ftrace_new_addrs;
			
 
				-
			
 
				 static DEFINE_MUTEX(ftrace_regex_lock);
			
 
				 
			
 
				 struct ftrace_page {
			
 
				 	struct ftrace_page	*next;
			
 
				+	struct dyn_ftrace	*records;
			
 
				 	int			index;
			
 
				-	struct dyn_ftrace	records[];
			
 
				+	int			size;
			
 
				 };
			
 
				 
			
 
				-#define ENTRIES_PER_PAGE \
			
 
				-  ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
			
 
				+static struct ftrace_page *ftrace_new_pgs;
			
 
				+
			
 
				+#define ENTRY_SIZE sizeof(struct dyn_ftrace)
			
 
				+#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
			
 
				 
			
 
				 /* estimate from running different kernels */
			
 
				 #define NR_TO_INIT		10000
			
@@ -1003,7 +999,10 @@ struct ftrace_page {
 
				 static struct ftrace_page	*ftrace_pages_start;
			
 
				 static struct ftrace_page	*ftrace_pages;
			
 
				 
			
 
				-static struct dyn_ftrace *ftrace_free_records;
			
 
				+static bool ftrace_hash_empty(struct ftrace_hash *hash)
			
 
				+{
			
 
				+	return !hash || !hash->count;
			
 
				+}
			
 
				 
			
 
				 static struct ftrace_func_entry *
			
 
				 ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
			
@@ -1013,7 +1012,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
 
				 	struct hlist_head *hhd;
			
 
				 	struct hlist_node *n;
			
 
				 
			
 
				-	if (!hash->count)
			
 
				+	if (ftrace_hash_empty(hash))
			
 
				 		return NULL;
			
 
				 
			
 
				 	if (hash->size_bits > 0)
			
@@ -1157,7 +1156,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
 
				 		return NULL;
			
 
				 
			
 
				 	/* Empty hash? */
			
 
				-	if (!hash || !hash->count)
			
 
				+	if (ftrace_hash_empty(hash))
			
 
				 		return new_hash;
			
 
				 
			
 
				 	size = 1 << hash->size_bits;
			
@@ -1282,9 +1281,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 
				 	filter_hash = rcu_dereference_raw(ops->filter_hash);
			
 
				 	notrace_hash = rcu_dereference_raw(ops->notrace_hash);
			
 
				 
			
 
				-	if ((!filter_hash || !filter_hash->count ||
			
 
				+	if ((ftrace_hash_empty(filter_hash) ||
			
 
				 	     ftrace_lookup_ip(filter_hash, ip)) &&
			
 
				-	    (!notrace_hash || !notrace_hash->count ||
			
 
				+	    (ftrace_hash_empty(notrace_hash) ||
			
 
				 	     !ftrace_lookup_ip(notrace_hash, ip)))
			
 
				 		ret = 1;
			
 
				 	else
			
@@ -1307,6 +1306,47 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 
				 		}				\
			
 
				 	}
			
 
				 
			
 
				+
			
 
				+static int ftrace_cmp_recs(const void *a, const void *b)
			
 
				+{
			
 
				+	const struct dyn_ftrace *reca = a;
			
 
				+	const struct dyn_ftrace *recb = b;
			
 
				+
			
 
				+	if (reca->ip > recb->ip)
			
 
				+		return 1;
			
 
				+	if (reca->ip < recb->ip)
			
 
				+		return -1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * ftrace_location - return true if the ip giving is a traced location
			
 
				+ * @ip: the instruction pointer to check
			
 
				+ *
			
 
				+ * Returns 1 if @ip given is a pointer to a ftrace location.
			
 
				+ * That is, the instruction that is either a NOP or call to
			
 
				+ * the function tracer. It checks the ftrace internal tables to
			
 
				+ * determine if the address belongs or not.
			
 
				+ */
			
 
				+int ftrace_location(unsigned long ip)
			
 
				+{
			
 
				+	struct ftrace_page *pg;
			
 
				+	struct dyn_ftrace *rec;
			
 
				+	struct dyn_ftrace key;
			
 
				+
			
 
				+	key.ip = ip;
			
 
				+
			
 
				+	for (pg = ftrace_pages_start; pg; pg = pg->next) {
			
 
				+		rec = bsearch(&key, pg->records, pg->index,
			
 
				+			      sizeof(struct dyn_ftrace),
			
 
				+			      ftrace_cmp_recs);
			
 
				+		if (rec)
			
 
				+			return 1;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
			
 
				 				     int filter_hash,
			
 
				 				     bool inc)
			
@@ -1336,7 +1376,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 
				 	if (filter_hash) {
			
 
				 		hash = ops->filter_hash;
			
 
				 		other_hash = ops->notrace_hash;
			
 
				-		if (!hash || !hash->count)
			
 
				+		if (ftrace_hash_empty(hash))
			
 
				 			all = 1;
			
 
				 	} else {
			
 
				 		inc = !inc;
			
@@ -1346,7 +1386,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 
				 		 * If the notrace hash has no items,
			
 
				 		 * then there's nothing to do.
			
 
				 		 */
			
 
				-		if (hash && !hash->count)
			
 
				+		if (ftrace_hash_empty(hash))
			
 
				 			return;
			
 
				 	}
			
 
				 
			
@@ -1363,8 +1403,8 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 
				 			if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
			
 
				 				match = 1;
			
 
				 		} else {
			
 
				-			in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip);
			
 
				-			in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip);
			
 
				+			in_hash = !!ftrace_lookup_ip(hash, rec->ip);
			
 
				+			in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
			
 
				 
			
 
				 			/*
			
 
				 			 *
			
@@ -1372,7 +1412,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 
				 			if (filter_hash && in_hash && !in_other_hash)
			
 
				 				match = 1;
			
 
				 			else if (!filter_hash && in_hash &&
			
 
				-				 (in_other_hash || !other_hash->count))
			
 
				+				 (in_other_hash || ftrace_hash_empty(other_hash)))
			
 
				 				match = 1;
			
 
				 		}
			
 
				 		if (!match)
			
@@ -1406,40 +1446,12 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
 
				 	__ftrace_hash_rec_update(ops, filter_hash, 1);
			
 
				 }
			
 
				 
			
 
				-static void ftrace_free_rec(struct dyn_ftrace *rec)
			
 
				-{
			
 
				-	rec->freelist = ftrace_free_records;
			
 
				-	ftrace_free_records = rec;
			
 
				-	rec->flags |= FTRACE_FL_FREE;
			
 
				-}
			
 
				-
			
 
				 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
			
 
				 {
			
 
				-	struct dyn_ftrace *rec;
			
 
				-
			
 
				-	/* First check for freed records */
			
 
				-	if (ftrace_free_records) {
			
 
				-		rec = ftrace_free_records;
			
 
				-
			
 
				-		if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
			
 
				-			FTRACE_WARN_ON_ONCE(1);
			
 
				-			ftrace_free_records = NULL;
			
 
				+	if (ftrace_pages->index == ftrace_pages->size) {
			
 
				+		/* We should have allocated enough */
			
 
				+		if (WARN_ON(!ftrace_pages->next))
			
 
				 			return NULL;
			
 
				-		}
			
 
				-
			
 
				-		ftrace_free_records = rec->freelist;
			
 
				-		memset(rec, 0, sizeof(*rec));
			
 
				-		return rec;
			
 
				-	}
			
 
				-
			
 
				-	if (ftrace_pages->index == ENTRIES_PER_PAGE) {
			
 
				-		if (!ftrace_pages->next) {
			
 
				-			/* allocate another page */
			
 
				-			ftrace_pages->next =
			
 
				-				(void *)get_zeroed_page(GFP_KERNEL);
			
 
				-			if (!ftrace_pages->next)
			
 
				-				return NULL;
			
 
				-		}
			
 
				 		ftrace_pages = ftrace_pages->next;
			
 
				 	}
			
 
				 
			
@@ -1459,8 +1471,6 @@ ftrace_record_ip(unsigned long ip)
 
				 		return NULL;
			
 
				 
			
 
				 	rec->ip = ip;
			
 
				-	rec->newlist = ftrace_new_addrs;
			
 
				-	ftrace_new_addrs = rec;
			
 
				 
			
 
				 	return rec;
			
 
				 }
			
@@ -1475,7 +1485,19 @@ static void print_ip_ins(const char *fmt, unsigned char *p)
 
				 		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
			
 
				 }
			
 
				 
			
 
				-static void ftrace_bug(int failed, unsigned long ip)
			
 
				+/**
			
 
				+ * ftrace_bug - report and shutdown function tracer
			
 
				+ * @failed: The failed type (EFAULT, EINVAL, EPERM)
			
 
				+ * @ip: The address that failed
			
 
				+ *
			
 
				+ * The arch code that enables or disables the function tracing
			
 
				+ * can call ftrace_bug() when it has detected a problem in
			
 
				+ * modifying the code. @failed should be one of either:
			
 
				+ * EFAULT - if the problem happens on reading the @ip address
			
 
				+ * EINVAL - if what is read at @ip is not what was expected
			
 
				+ * EPERM - if the problem happens on writting to the @ip address
			
 
				+ */
			
 
				+void ftrace_bug(int failed, unsigned long ip)
			
 
				 {
			
 
				 	switch (failed) {
			
 
				 	case -EFAULT:
			
@@ -1517,24 +1539,19 @@ int ftrace_text_reserved(void *start, void *end)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-
			
 
				-static int
			
 
				-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
			
 
				+static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
			
 
				 {
			
 
				-	unsigned long ftrace_addr;
			
 
				 	unsigned long flag = 0UL;
			
 
				 
			
 
				-	ftrace_addr = (unsigned long)FTRACE_ADDR;
			
 
				-
			
 
				 	/*
			
 
				-	 * If we are enabling tracing:
			
 
				+	 * If we are updating calls:
			
 
				 	 *
			
 
				 	 *   If the record has a ref count, then we need to enable it
			
 
				 	 *   because someone is using it.
			
 
				 	 *
			
 
				 	 *   Otherwise we make sure its disabled.
			
 
				 	 *
			
 
				-	 * If we are disabling tracing, then disable all records that
			
 
				+	 * If we are disabling calls, then disable all records that
			
 
				 	 * are enabled.
			
 
				 	 */
			
 
				 	if (enable && (rec->flags & ~FTRACE_FL_MASK))
			
@@ -1542,18 +1559,72 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 
				 
			
 
				 	/* If the state of this record hasn't changed, then do nothing */
			
 
				 	if ((rec->flags & FTRACE_FL_ENABLED) == flag)
			
 
				-		return 0;
			
 
				+		return FTRACE_UPDATE_IGNORE;
			
 
				 
			
 
				 	if (flag) {
			
 
				-		rec->flags |= FTRACE_FL_ENABLED;
			
 
				+		if (update)
			
 
				+			rec->flags |= FTRACE_FL_ENABLED;
			
 
				+		return FTRACE_UPDATE_MAKE_CALL;
			
 
				+	}
			
 
				+
			
 
				+	if (update)
			
 
				+		rec->flags &= ~FTRACE_FL_ENABLED;
			
 
				+
			
 
				+	return FTRACE_UPDATE_MAKE_NOP;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * ftrace_update_record, set a record that now is tracing or not
			
 
				+ * @rec: the record to update
			
 
				+ * @enable: set to 1 if the record is tracing, zero to force disable
			
 
				+ *
			
 
				+ * The records that represent all functions that can be traced need
			
 
				+ * to be updated when tracing has been enabled.
			
 
				+ */
			
 
				+int ftrace_update_record(struct dyn_ftrace *rec, int enable)
			
 
				+{
			
 
				+	return ftrace_check_record(rec, enable, 1);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * ftrace_test_record, check if the record has been enabled or not
			
 
				+ * @rec: the record to test
			
 
				+ * @enable: set to 1 to check if enabled, 0 if it is disabled
			
 
				+ *
			
 
				+ * The arch code may need to test if a record is already set to
			
 
				+ * tracing to determine how to modify the function code that it
			
 
				+ * represents.
			
 
				+ */
			
 
				+int ftrace_test_record(struct dyn_ftrace *rec, int enable)
			
 
				+{
			
 
				+	return ftrace_check_record(rec, enable, 0);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
			
 
				+{
			
 
				+	unsigned long ftrace_addr;
			
 
				+	int ret;
			
 
				+
			
 
				+	ftrace_addr = (unsigned long)FTRACE_ADDR;
			
 
				+
			
 
				+	ret = ftrace_update_record(rec, enable);
			
 
				+
			
 
				+	switch (ret) {
			
 
				+	case FTRACE_UPDATE_IGNORE:
			
 
				+		return 0;
			
 
				+
			
 
				+	case FTRACE_UPDATE_MAKE_CALL:
			
 
				 		return ftrace_make_call(rec, ftrace_addr);
			
 
				+
			
 
				+	case FTRACE_UPDATE_MAKE_NOP:
			
 
				+		return ftrace_make_nop(NULL, rec, ftrace_addr);
			
 
				 	}
			
 
				 
			
 
				-	rec->flags &= ~FTRACE_FL_ENABLED;
			
 
				-	return ftrace_make_nop(NULL, rec, ftrace_addr);
			
 
				+	return -1; /* unknow ftrace bug */
			
 
				 }
			
 
				 
			
 
				-static void ftrace_replace_code(int enable)
			
 
				+static void ftrace_replace_code(int update)
			
 
				 {
			
 
				 	struct dyn_ftrace *rec;
			
 
				 	struct ftrace_page *pg;
			
@@ -1563,11 +1634,7 @@ static void ftrace_replace_code(int enable)
 
				 		return;
			
 
				 
			
 
				 	do_for_each_ftrace_rec(pg, rec) {
			
 
				-		/* Skip over free records */
			
 
				-		if (rec->flags & FTRACE_FL_FREE)
			
 
				-			continue;
			
 
				-
			
 
				-		failed = __ftrace_replace_code(rec, enable);
			
 
				+		failed = __ftrace_replace_code(rec, update);
			
 
				 		if (failed) {
			
 
				 			ftrace_bug(failed, rec->ip);
			
 
				 			/* Stop processing */
			
@@ -1576,6 +1643,78 @@ static void ftrace_replace_code(int enable)
 
				 	} while_for_each_ftrace_rec();
			
 
				 }
			
 
				 
			
 
				+struct ftrace_rec_iter {
			
 
				+	struct ftrace_page	*pg;
			
 
				+	int			index;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * ftrace_rec_iter_start, start up iterating over traced functions
			
 
				+ *
			
 
				+ * Returns an iterator handle that is used to iterate over all
			
 
				+ * the records that represent address locations where functions
			
 
				+ * are traced.
			
 
				+ *
			
 
				+ * May return NULL if no records are available.
			
 
				+ */
			
 
				+struct ftrace_rec_iter *ftrace_rec_iter_start(void)
			
 
				+{
			
 
				+	/*
			
 
				+	 * We only use a single iterator.
			
 
				+	 * Protected by the ftrace_lock mutex.
			
 
				+	 */
			
 
				+	static struct ftrace_rec_iter ftrace_rec_iter;
			
 
				+	struct ftrace_rec_iter *iter = &ftrace_rec_iter;
			
 
				+
			
 
				+	iter->pg = ftrace_pages_start;
			
 
				+	iter->index = 0;
			
 
				+
			
 
				+	/* Could have empty pages */
			
 
				+	while (iter->pg && !iter->pg->index)
			
 
				+		iter->pg = iter->pg->next;
			
 
				+
			
 
				+	if (!iter->pg)
			
 
				+		return NULL;
			
 
				+
			
 
				+	return iter;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * ftrace_rec_iter_next, get the next record to process.
			
 
				+ * @iter: The handle to the iterator.
			
 
				+ *
			
 
				+ * Returns the next iterator after the given iterator @iter.
			
 
				+ */
			
 
				+struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter)
			
 
				+{
			
 
				+	iter->index++;
			
 
				+
			
 
				+	if (iter->index >= iter->pg->index) {
			
 
				+		iter->pg = iter->pg->next;
			
 
				+		iter->index = 0;
			
 
				+
			
 
				+		/* Could have empty pages */
			
 
				+		while (iter->pg && !iter->pg->index)
			
 
				+			iter->pg = iter->pg->next;
			
 
				+	}
			
 
				+
			
 
				+	if (!iter->pg)
			
 
				+		return NULL;
			
 
				+
			
 
				+	return iter;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * ftrace_rec_iter_record, get the record at the iterator location
			
 
				+ * @iter: The current iterator location
			
 
				+ *
			
 
				+ * Returns the record that the current @iter is at.
			
 
				+ */
			
 
				+struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
			
 
				+{
			
 
				+	return &iter->pg->records[iter->index];
			
 
				+}
			
 
				+
			
 
				 static int
			
 
				 ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
			
 
				 {
			
@@ -1617,13 +1756,7 @@ static int __ftrace_modify_code(void *data)
 
				 {
			
 
				 	int *command = data;
			
 
				 
			
 
				-	/*
			
 
				-	 * Do not call function tracer while we update the code.
			
 
				-	 * We are in stop machine, no worrying about races.
			
 
				-	 */
			
 
				-	function_trace_stop++;
			
 
				-
			
 
				-	if (*command & FTRACE_ENABLE_CALLS)
			
 
				+	if (*command & FTRACE_UPDATE_CALLS)
			
 
				 		ftrace_replace_code(1);
			
 
				 	else if (*command & FTRACE_DISABLE_CALLS)
			
 
				 		ftrace_replace_code(0);
			
@@ -1636,21 +1769,33 @@ static int __ftrace_modify_code(void *data)
 
				 	else if (*command & FTRACE_STOP_FUNC_RET)
			
 
				 		ftrace_disable_ftrace_graph_caller();
			
 
				 
			
 
				-#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
			
 
				-	/*
			
 
				-	 * For archs that call ftrace_test_stop_func(), we must
			
 
				-	 * wait till after we update all the function callers
			
 
				-	 * before we update the callback. This keeps different
			
 
				-	 * ops that record different functions from corrupting
			
 
				-	 * each other.
			
 
				-	 */
			
 
				-	__ftrace_trace_function = __ftrace_trace_function_delay;
			
 
				-#endif
			
 
				-	function_trace_stop--;
			
 
				-
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * ftrace_run_stop_machine, go back to the stop machine method
			
 
				+ * @command: The command to tell ftrace what to do
			
 
				+ *
			
 
				+ * If an arch needs to fall back to the stop machine method, the
			
 
				+ * it can call this function.
			
 
				+ */
			
 
				+void ftrace_run_stop_machine(int command)
			
 
				+{
			
 
				+	stop_machine(__ftrace_modify_code, &command, NULL);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * arch_ftrace_update_code, modify the code to trace or not trace
			
 
				+ * @command: The command that needs to be done
			
 
				+ *
			
 
				+ * Archs can override this function if it does not need to
			
 
				+ * run stop_machine() to modify code.
			
 
				+ */
			
 
				+void __weak arch_ftrace_update_code(int command)
			
 
				+{
			
 
				+	ftrace_run_stop_machine(command);
			
 
				+}
			
 
				+
			
 
				 static void ftrace_run_update_code(int command)
			
 
				 {
			
 
				 	int ret;
			
@@ -1659,8 +1804,31 @@ static void ftrace_run_update_code(int command)
 
				 	FTRACE_WARN_ON(ret);
			
 
				 	if (ret)
			
 
				 		return;
			
 
				+	/*
			
 
				+	 * Do not call function tracer while we update the code.
			
 
				+	 * We are in stop machine.
			
 
				+	 */
			
 
				+	function_trace_stop++;
			
 
				 
			
 
				-	stop_machine(__ftrace_modify_code, &command, NULL);
			
 
				+	/*
			
 
				+	 * By default we use stop_machine() to modify the code.
			
 
				+	 * But archs can do what ever they want as long as it
			
 
				+	 * is safe. The stop_machine() is the safest, but also
			
 
				+	 * produces the most overhead.
			
 
				+	 */
			
 
				+	arch_ftrace_update_code(command);
			
 
				+
			
 
				+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
			
 
				+	/*
			
 
				+	 * For archs that call ftrace_test_stop_func(), we must
			
 
				+	 * wait till after we update all the function callers
			
 
				+	 * before we update the callback. This keeps different
			
 
				+	 * ops that record different functions from corrupting
			
 
				+	 * each other.
			
 
				+	 */
			
 
				+	__ftrace_trace_function = __ftrace_trace_function_delay;
			
 
				+#endif
			
 
				+	function_trace_stop--;
			
 
				 
			
 
				 	ret = ftrace_arch_code_modify_post_process();
			
 
				 	FTRACE_WARN_ON(ret);
			
@@ -1691,7 +1859,7 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
 
				 		return -ENODEV;
			
 
				 
			
 
				 	ftrace_start_up++;
			
 
				-	command |= FTRACE_ENABLE_CALLS;
			
 
				+	command |= FTRACE_UPDATE_CALLS;
			
 
				 
			
 
				 	/* ops marked global share the filter hashes */
			
 
				 	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
			
@@ -1743,8 +1911,7 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
 
				 	if (ops != &global_ops || !global_start_up)
			
 
				 		ops->flags &= ~FTRACE_OPS_FL_ENABLED;
			
 
				 
			
 
				-	if (!ftrace_start_up)
			
 
				-		command |= FTRACE_DISABLE_CALLS;
			
 
				+	command |= FTRACE_UPDATE_CALLS;
			
 
				 
			
 
				 	if (saved_ftrace_func != ftrace_trace_function) {
			
 
				 		saved_ftrace_func = ftrace_trace_function;
			
@@ -1766,7 +1933,7 @@ static void ftrace_startup_sysctl(void)
 
				 	saved_ftrace_func = NULL;
			
 
				 	/* ftrace_start_up is true if we want ftrace running */
			
 
				 	if (ftrace_start_up)
			
 
				-		ftrace_run_update_code(FTRACE_ENABLE_CALLS);
			
 
				+		ftrace_run_update_code(FTRACE_UPDATE_CALLS);
			
 
				 }
			
 
				 
			
 
				 static void ftrace_shutdown_sysctl(void)
			
@@ -1788,14 +1955,16 @@ static int ops_traces_mod(struct ftrace_ops *ops)
 
				 	struct ftrace_hash *hash;
			
 
				 
			
 
				 	hash = ops->filter_hash;
			
 
				-	return !!(!hash || !hash->count);
			
 
				+	return ftrace_hash_empty(hash);
			
 
				 }
			
 
				 
			
 
				 static int ftrace_update_code(struct module *mod)
			
 
				 {
			
 
				+	struct ftrace_page *pg;
			
 
				 	struct dyn_ftrace *p;
			
 
				 	cycle_t start, stop;
			
 
				 	unsigned long ref = 0;
			
 
				+	int i;
			
 
				 
			
 
				 	/*
			
 
				 	 * When adding a module, we need to check if tracers are
			
@@ -1817,46 +1986,44 @@ static int ftrace_update_code(struct module *mod)
 
				 	start = ftrace_now(raw_smp_processor_id());
			
 
				 	ftrace_update_cnt = 0;
			
 
				 
			
 
				-	while (ftrace_new_addrs) {
			
 
				+	for (pg = ftrace_new_pgs; pg; pg = pg->next) {
			
 
				 
			
 
				-		/* If something went wrong, bail without enabling anything */
			
 
				-		if (unlikely(ftrace_disabled))
			
 
				-			return -1;
			
 
				+		for (i = 0; i < pg->index; i++) {
			
 
				+			/* If something went wrong, bail without enabling anything */
			
 
				+			if (unlikely(ftrace_disabled))
			
 
				+				return -1;
			
 
				 
			
 
				-		p = ftrace_new_addrs;
			
 
				-		ftrace_new_addrs = p->newlist;
			
 
				-		p->flags = ref;
			
 
				+			p = &pg->records[i];
			
 
				+			p->flags = ref;
			
 
				 
			
 
				-		/*
			
 
				-		 * Do the initial record conversion from mcount jump
			
 
				-		 * to the NOP instructions.
			
 
				-		 */
			
 
				-		if (!ftrace_code_disable(mod, p)) {
			
 
				-			ftrace_free_rec(p);
			
 
				-			/* Game over */
			
 
				-			break;
			
 
				-		}
			
 
				+			/*
			
 
				+			 * Do the initial record conversion from mcount jump
			
 
				+			 * to the NOP instructions.
			
 
				+			 */
			
 
				+			if (!ftrace_code_disable(mod, p))
			
 
				+				break;
			
 
				 
			
 
				-		ftrace_update_cnt++;
			
 
				+			ftrace_update_cnt++;
			
 
				 
			
 
				-		/*
			
 
				-		 * If the tracing is enabled, go ahead and enable the record.
			
 
				-		 *
			
 
				-		 * The reason not to enable the record immediatelly is the
			
 
				-		 * inherent check of ftrace_make_nop/ftrace_make_call for
			
 
				-		 * correct previous instructions.  Making first the NOP
			
 
				-		 * conversion puts the module to the correct state, thus
			
 
				-		 * passing the ftrace_make_call check.
			
 
				-		 */
			
 
				-		if (ftrace_start_up && ref) {
			
 
				-			int failed = __ftrace_replace_code(p, 1);
			
 
				-			if (failed) {
			
 
				-				ftrace_bug(failed, p->ip);
			
 
				-				ftrace_free_rec(p);
			
 
				+			/*
			
 
				+			 * If the tracing is enabled, go ahead and enable the record.
			
 
				+			 *
			
 
				+			 * The reason not to enable the record immediatelly is the
			
 
				+			 * inherent check of ftrace_make_nop/ftrace_make_call for
			
 
				+			 * correct previous instructions.  Making first the NOP
			
 
				+			 * conversion puts the module to the correct state, thus
			
 
				+			 * passing the ftrace_make_call check.
			
 
				+			 */
			
 
				+			if (ftrace_start_up && ref) {
			
 
				+				int failed = __ftrace_replace_code(p, 1);
			
 
				+				if (failed)
			
 
				+					ftrace_bug(failed, p->ip);
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	ftrace_new_pgs = NULL;
			
 
				+
			
 
				 	stop = ftrace_now(raw_smp_processor_id());
			
 
				 	ftrace_update_time = stop - start;
			
 
				 	ftrace_update_tot_cnt += ftrace_update_cnt;
			
@@ -1864,57 +2031,108 @@ static int ftrace_update_code(struct module *mod)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
			
 
				+static int ftrace_allocate_records(struct ftrace_page *pg, int count)
			
 
				 {
			
 
				-	struct ftrace_page *pg;
			
 
				+	int order;
			
 
				 	int cnt;
			
 
				-	int i;
			
 
				 
			
 
				-	/* allocate a few pages */
			
 
				-	ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
			
 
				-	if (!ftrace_pages_start)
			
 
				-		return -1;
			
 
				+	if (WARN_ON(!count))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
			
 
				 
			
 
				 	/*
			
 
				-	 * Allocate a few more pages.
			
 
				-	 *
			
 
				-	 * TODO: have some parser search vmlinux before
			
 
				-	 *   final linking to find all calls to ftrace.
			
 
				-	 *   Then we can:
			
 
				-	 *    a) know how many pages to allocate.
			
 
				-	 *     and/or
			
 
				-	 *    b) set up the table then.
			
 
				-	 *
			
 
				-	 *  The dynamic code is still necessary for
			
 
				-	 *  modules.
			
 
				+	 * We want to fill as much as possible. No more than a page
			
 
				+	 * may be empty.
			
 
				 	 */
			
 
				+	while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE)
			
 
				+		order--;
			
 
				 
			
 
				-	pg = ftrace_pages = ftrace_pages_start;
			
 
				+ again:
			
 
				+	pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
			
 
				 
			
 
				-	cnt = num_to_init / ENTRIES_PER_PAGE;
			
 
				-	pr_info("ftrace: allocating %ld entries in %d pages\n",
			
 
				-		num_to_init, cnt + 1);
			
 
				+	if (!pg->records) {
			
 
				+		/* if we can't allocate this size, try something smaller */
			
 
				+		if (!order)
			
 
				+			return -ENOMEM;
			
 
				+		order >>= 1;
			
 
				+		goto again;
			
 
				+	}
			
 
				 
			
 
				-	for (i = 0; i < cnt; i++) {
			
 
				-		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
			
 
				+	cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
			
 
				+	pg->size = cnt;
			
 
				 
			
 
				-		/* If we fail, we'll try later anyway */
			
 
				-		if (!pg->next)
			
 
				+	if (cnt > count)
			
 
				+		cnt = count;
			
 
				+
			
 
				+	return cnt;
			
 
				+}
			
 
				+
			
 
				+static struct ftrace_page *
			
 
				+ftrace_allocate_pages(unsigned long num_to_init)
			
 
				+{
			
 
				+	struct ftrace_page *start_pg;
			
 
				+	struct ftrace_page *pg;
			
 
				+	int order;
			
 
				+	int cnt;
			
 
				+
			
 
				+	if (!num_to_init)
			
 
				+		return 0;
			
 
				+
			
 
				+	start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL);
			
 
				+	if (!pg)
			
 
				+		return NULL;
			
 
				+
			
 
				+	/*
			
 
				+	 * Try to allocate as much as possible in one continues
			
 
				+	 * location that fills in all of the space. We want to
			
 
				+	 * waste as little space as possible.
			
 
				+	 */
			
 
				+	for (;;) {
			
 
				+		cnt = ftrace_allocate_records(pg, num_to_init);
			
 
				+		if (cnt < 0)
			
 
				+			goto free_pages;
			
 
				+
			
 
				+		num_to_init -= cnt;
			
 
				+		if (!num_to_init)
			
 
				 			break;
			
 
				 
			
 
				+		pg->next = kzalloc(sizeof(*pg), GFP_KERNEL);
			
 
				+		if (!pg->next)
			
 
				+			goto free_pages;
			
 
				+
			
 
				 		pg = pg->next;
			
 
				 	}
			
 
				 
			
 
				-	return 0;
			
 
				+	return start_pg;
			
 
				+
			
 
				+ free_pages:
			
 
				+	while (start_pg) {
			
 
				+		order = get_count_order(pg->size / ENTRIES_PER_PAGE);
			
 
				+		free_pages((unsigned long)pg->records, order);
			
 
				+		start_pg = pg->next;
			
 
				+		kfree(pg);
			
 
				+		pg = start_pg;
			
 
				+	}
			
 
				+	pr_info("ftrace: FAILED to allocate memory for functions\n");
			
 
				+	return NULL;
			
 
				 }
			
 
				 
			
 
				-enum {
			
 
				-	FTRACE_ITER_FILTER	= (1 << 0),
			
 
				-	FTRACE_ITER_NOTRACE	= (1 << 1),
			
 
				-	FTRACE_ITER_PRINTALL	= (1 << 2),
			
 
				-	FTRACE_ITER_HASH	= (1 << 3),
			
 
				-	FTRACE_ITER_ENABLED	= (1 << 4),
			
 
				-};
			
 
				+static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
			
 
				+{
			
 
				+	int cnt;
			
 
				+
			
 
				+	if (!num_to_init) {
			
 
				+		pr_info("ftrace: No functions to be traced?\n");
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	cnt = num_to_init / ENTRIES_PER_PAGE;
			
 
				+	pr_info("ftrace: allocating %ld entries in %d pages\n",
			
 
				+		num_to_init, cnt + 1);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				 
			
 
				 #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
			
 
				 
			
@@ -1980,6 +2198,9 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
 
				 	void *p = NULL;
			
 
				 	loff_t l;
			
 
				 
			
 
				+	if (!(iter->flags & FTRACE_ITER_DO_HASH))
			
 
				+		return NULL;
			
 
				+
			
 
				 	if (iter->func_pos > *pos)
			
 
				 		return NULL;
			
 
				 
			
@@ -2023,7 +2244,7 @@ static void *
 
				 t_next(struct seq_file *m, void *v, loff_t *pos)
			
 
				 {
			
 
				 	struct ftrace_iterator *iter = m->private;
			
 
				-	struct ftrace_ops *ops = &global_ops;
			
 
				+	struct ftrace_ops *ops = iter->ops;
			
 
				 	struct dyn_ftrace *rec = NULL;
			
 
				 
			
 
				 	if (unlikely(ftrace_disabled))
			
@@ -2047,9 +2268,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 
				 		}
			
 
				 	} else {
			
 
				 		rec = &iter->pg->records[iter->idx++];
			
 
				-		if ((rec->flags & FTRACE_FL_FREE) ||
			
 
				-
			
 
				-		    ((iter->flags & FTRACE_ITER_FILTER) &&
			
 
				+		if (((iter->flags & FTRACE_ITER_FILTER) &&
			
 
				 		     !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
			
 
				 
			
 
				 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
			
@@ -2081,7 +2300,7 @@ static void reset_iter_read(struct ftrace_iterator *iter)
 
				 static void *t_start(struct seq_file *m, loff_t *pos)
			
 
				 {
			
 
				 	struct ftrace_iterator *iter = m->private;
			
 
				-	struct ftrace_ops *ops = &global_ops;
			
 
				+	struct ftrace_ops *ops = iter->ops;
			
 
				 	void *p = NULL;
			
 
				 	loff_t l;
			
 
				 
			
@@ -2101,7 +2320,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 
				 	 * off, we can short cut and just print out that all
			
 
				 	 * functions are enabled.
			
 
				 	 */
			
 
				-	if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) {
			
 
				+	if (iter->flags & FTRACE_ITER_FILTER &&
			
 
				+	    ftrace_hash_empty(ops->filter_hash)) {
			
 
				 		if (*pos > 0)
			
 
				 			return t_hash_start(m, pos);
			
 
				 		iter->flags |= FTRACE_ITER_PRINTALL;
			
@@ -2126,12 +2346,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 
				 			break;
			
 
				 	}
			
 
				 
			
 
				-	if (!p) {
			
 
				-		if (iter->flags & FTRACE_ITER_FILTER)
			
 
				-			return t_hash_start(m, pos);
			
 
				-
			
 
				-		return NULL;
			
 
				-	}
			
 
				+	if (!p)
			
 
				+		return t_hash_start(m, pos);
			
 
				 
			
 
				 	return iter;
			
 
				 }
			
@@ -2189,6 +2405,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	iter->pg = ftrace_pages_start;
			
 
				+	iter->ops = &global_ops;
			
 
				 
			
 
				 	ret = seq_open(file, &show_ftrace_seq_ops);
			
 
				 	if (!ret) {
			
@@ -2217,6 +2434,7 @@ ftrace_enabled_open(struct inode *inode, struct file *file)
 
				 
			
 
				 	iter->pg = ftrace_pages_start;
			
 
				 	iter->flags = FTRACE_ITER_ENABLED;
			
 
				+	iter->ops = &global_ops;
			
 
				 
			
 
				 	ret = seq_open(file, &show_ftrace_seq_ops);
			
 
				 	if (!ret) {
			
@@ -2237,7 +2455,23 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
 
				 	mutex_unlock(&ftrace_lock);
			
 
				 }
			
 
				 
			
 
				-static int
			
 
				+/**
			
 
				+ * ftrace_regex_open - initialize function tracer filter files
			
 
				+ * @ops: The ftrace_ops that hold the hash filters
			
 
				+ * @flag: The type of filter to process
			
 
				+ * @inode: The inode, usually passed in to your open routine
			
 
				+ * @file: The file, usually passed in to your open routine
			
 
				+ *
			
 
				+ * ftrace_regex_open() initializes the filter files for the
			
 
				+ * @ops. Depending on @flag it may process the filter hash or
			
 
				+ * the notrace hash of @ops. With this called from the open
			
 
				+ * routine, you can use ftrace_filter_write() for the write
			
 
				+ * routine if @flag has FTRACE_ITER_FILTER set, or
			
 
				+ * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
			
 
				+ * ftrace_regex_lseek() should be used as the lseek routine, and
			
 
				+ * release must call ftrace_regex_release().
			
 
				+ */
			
 
				+int
			
 
				 ftrace_regex_open(struct ftrace_ops *ops, int flag,
			
 
				 		  struct inode *inode, struct file *file)
			
 
				 {
			
@@ -2306,8 +2540,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 
				 static int
			
 
				 ftrace_filter_open(struct inode *inode, struct file *file)
			
 
				 {
			
 
				-	return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER,
			
 
				-				 inode, file);
			
 
				+	return ftrace_regex_open(&global_ops,
			
 
				+			FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH,
			
 
				+			inode, file);
			
 
				 }
			
 
				 
			
 
				 static int
			
@@ -2317,7 +2552,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
 
				 				 inode, file);
			
 
				 }
			
 
				 
			
 
				-static loff_t
			
 
				+loff_t
			
 
				 ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
			
 
				 {
			
 
				 	loff_t ret;
			
@@ -2426,7 +2661,6 @@ match_records(struct ftrace_hash *hash, char *buff,
 
				 		goto out_unlock;
			
 
				 
			
 
				 	do_for_each_ftrace_rec(pg, rec) {
			
 
				-
			
 
				 		if (ftrace_match_record(rec, mod, search, search_len, type)) {
			
 
				 			ret = enter_record(hash, rec, not);
			
 
				 			if (ret < 0) {
			
@@ -2871,14 +3105,14 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static ssize_t
			
 
				+ssize_t
			
 
				 ftrace_filter_write(struct file *file, const char __user *ubuf,
			
 
				 		    size_t cnt, loff_t *ppos)
			
 
				 {
			
 
				 	return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
			
 
				 }
			
 
				 
			
 
				-static ssize_t
			
 
				+ssize_t
			
 
				 ftrace_notrace_write(struct file *file, const char __user *ubuf,
			
 
				 		     size_t cnt, loff_t *ppos)
			
 
				 {
			
@@ -2919,7 +3153,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
 
				 	ret = ftrace_hash_move(ops, enable, orig_hash, hash);
			
 
				 	if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
			
 
				 	    && ftrace_enabled)
			
 
				-		ftrace_run_update_code(FTRACE_ENABLE_CALLS);
			
 
				+		ftrace_run_update_code(FTRACE_UPDATE_CALLS);
			
 
				 
			
 
				 	mutex_unlock(&ftrace_lock);
			
 
				 
			
@@ -3045,8 +3279,8 @@ static void __init set_ftrace_early_graph(char *buf)
 
				 }
			
 
				 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
			
 
				 
			
 
				-static void __init
			
 
				-set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
			
 
				+void __init
			
 
				+ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
			
 
				 {
			
 
				 	char *func;
			
 
				 
			
@@ -3059,17 +3293,16 @@ set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
 
				 static void __init set_ftrace_early_filters(void)
			
 
				 {
			
 
				 	if (ftrace_filter_buf[0])
			
 
				-		set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1);
			
 
				+		ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1);
			
 
				 	if (ftrace_notrace_buf[0])
			
 
				-		set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0);
			
 
				+		ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0);
			
 
				 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
			
 
				 	if (ftrace_graph_buf[0])
			
 
				 		set_ftrace_early_graph(ftrace_graph_buf);
			
 
				 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
			
 
				 }
			
 
				 
			
 
				-static int
			
 
				-ftrace_regex_release(struct inode *inode, struct file *file)
			
 
				+int ftrace_regex_release(struct inode *inode, struct file *file)
			
 
				 {
			
 
				 	struct seq_file *m = (struct seq_file *)file->private_data;
			
 
				 	struct ftrace_iterator *iter;
			
@@ -3107,7 +3340,7 @@ ftrace_regex_release(struct inode *inode, struct file *file)
 
				 				       orig_hash, iter->hash);
			
 
				 		if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
			
 
				 		    && ftrace_enabled)
			
 
				-			ftrace_run_update_code(FTRACE_ENABLE_CALLS);
			
 
				+			ftrace_run_update_code(FTRACE_UPDATE_CALLS);
			
 
				 
			
 
				 		mutex_unlock(&ftrace_lock);
			
 
				 	}
			
@@ -3270,9 +3503,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 
				 
			
 
				 	do_for_each_ftrace_rec(pg, rec) {
			
 
				 
			
 
				-		if (rec->flags & FTRACE_FL_FREE)
			
 
				-			continue;
			
 
				-
			
 
				 		if (ftrace_match_record(rec, NULL, search, search_len, type)) {
			
 
				 			/* if it is in the array */
			
 
				 			exists = false;
			
@@ -3381,15 +3611,62 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void ftrace_swap_recs(void *a, void *b, int size)
			
 
				+{
			
 
				+	struct dyn_ftrace *reca = a;
			
 
				+	struct dyn_ftrace *recb = b;
			
 
				+	struct dyn_ftrace t;
			
 
				+
			
 
				+	t = *reca;
			
 
				+	*reca = *recb;
			
 
				+	*recb = t;
			
 
				+}
			
 
				+
			
 
				 static int ftrace_process_locs(struct module *mod,
			
 
				 			       unsigned long *start,
			
 
				 			       unsigned long *end)
			
 
				 {
			
 
				+	struct ftrace_page *pg;
			
 
				+	unsigned long count;
			
 
				 	unsigned long *p;
			
 
				 	unsigned long addr;
			
 
				 	unsigned long flags = 0; /* Shut up gcc */
			
 
				+	int ret = -ENOMEM;
			
 
				+
			
 
				+	count = end - start;
			
 
				+
			
 
				+	if (!count)
			
 
				+		return 0;
			
 
				+
			
 
				+	pg = ftrace_allocate_pages(count);
			
 
				+	if (!pg)
			
 
				+		return -ENOMEM;
			
 
				 
			
 
				 	mutex_lock(&ftrace_lock);
			
 
				+
			
 
				+	/*
			
 
				+	 * Core and each module needs their own pages, as
			
 
				+	 * modules will free them when they are removed.
			
 
				+	 * Force a new page to be allocated for modules.
			
 
				+	 */
			
 
				+	if (!mod) {
			
 
				+		WARN_ON(ftrace_pages || ftrace_pages_start);
			
 
				+		/* First initialization */
			
 
				+		ftrace_pages = ftrace_pages_start = pg;
			
 
				+	} else {
			
 
				+		if (!ftrace_pages)
			
 
				+			goto out;
			
 
				+
			
 
				+		if (WARN_ON(ftrace_pages->next)) {
			
 
				+			/* Hmm, we have free pages? */
			
 
				+			while (ftrace_pages->next)
			
 
				+				ftrace_pages = ftrace_pages->next;
			
 
				+		}
			
 
				+
			
 
				+		ftrace_pages->next = pg;
			
 
				+		ftrace_pages = pg;
			
 
				+	}
			
 
				+
			
 
				 	p = start;
			
 
				 	while (p < end) {
			
 
				 		addr = ftrace_call_adjust(*p++);
			
@@ -3401,9 +3678,18 @@ static int ftrace_process_locs(struct module *mod,
 
				 		 */
			
 
				 		if (!addr)
			
 
				 			continue;
			
 
				-		ftrace_record_ip(addr);
			
 
				+		if (!ftrace_record_ip(addr))
			
 
				+			break;
			
 
				 	}
			
 
				 
			
 
				+	/* These new locations need to be initialized */
			
 
				+	ftrace_new_pgs = pg;
			
 
				+
			
 
				+	/* Make each individual set of pages sorted by ips */
			
 
				+	for (; pg; pg = pg->next)
			
 
				+		sort(pg->records, pg->index, sizeof(struct dyn_ftrace),
			
 
				+		     ftrace_cmp_recs, ftrace_swap_recs);
			
 
				+
			
 
				 	/*
			
 
				 	 * We only need to disable interrupts on start up
			
 
				 	 * because we are modifying code that an interrupt
			
@@ -3417,32 +3703,55 @@ static int ftrace_process_locs(struct module *mod,
 
				 	ftrace_update_code(mod);
			
 
				 	if (!mod)
			
 
				 		local_irq_restore(flags);
			
 
				+	ret = 0;
			
 
				+ out:
			
 
				 	mutex_unlock(&ftrace_lock);
			
 
				 
			
 
				-	return 0;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_MODULES
			
 
				+
			
 
				+#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
			
 
				+
			
 
				 void ftrace_release_mod(struct module *mod)
			
 
				 {
			
 
				 	struct dyn_ftrace *rec;
			
 
				+	struct ftrace_page **last_pg;
			
 
				 	struct ftrace_page *pg;
			
 
				+	int order;
			
 
				 
			
 
				 	mutex_lock(&ftrace_lock);
			
 
				 
			
 
				 	if (ftrace_disabled)
			
 
				 		goto out_unlock;
			
 
				 
			
 
				-	do_for_each_ftrace_rec(pg, rec) {
			
 
				+	/*
			
 
				+	 * Each module has its own ftrace_pages, remove
			
 
				+	 * them from the list.
			
 
				+	 */
			
 
				+	last_pg = &ftrace_pages_start;
			
 
				+	for (pg = ftrace_pages_start; pg; pg = *last_pg) {
			
 
				+		rec = &pg->records[0];
			
 
				 		if (within_module_core(rec->ip, mod)) {
			
 
				 			/*
			
 
				-			 * rec->ip is changed in ftrace_free_rec()
			
 
				-			 * It should not between s and e if record was freed.
			
 
				+			 * As core pages are first, the first
			
 
				+			 * page should never be a module page.
			
 
				 			 */
			
 
				-			FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
			
 
				-			ftrace_free_rec(rec);
			
 
				-		}
			
 
				-	} while_for_each_ftrace_rec();
			
 
				+			if (WARN_ON(pg == ftrace_pages_start))
			
 
				+				goto out_unlock;
			
 
				+
			
 
				+			/* Check if we are deleting the last page */
			
 
				+			if (pg == ftrace_pages)
			
 
				+				ftrace_pages = next_to_ftrace_page(last_pg);
			
 
				+
			
 
				+			*last_pg = pg->next;
			
 
				+			order = get_count_order(pg->size / ENTRIES_PER_PAGE);
			
 
				+			free_pages((unsigned long)pg->records, order);
			
 
				+			kfree(pg);
			
 
				+		} else
			
 
				+			last_pg = &pg->next;
			
 
				+	}
			
 
				  out_unlock:
			
 
				 	mutex_unlock(&ftrace_lock);
			
 
				 }
			
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1738,11 +1738,121 @@ static int replace_system_preds(struct event_subsystem *system,
 
				 	return -ENOMEM;
			
 
				 }
			
 
				 
			
 
				+static int create_filter_start(char *filter_str, bool set_str,
			
 
				+			       struct filter_parse_state **psp,
			
 
				+			       struct event_filter **filterp)
			
 
				+{
			
 
				+	struct event_filter *filter;
			
 
				+	struct filter_parse_state *ps = NULL;
			
 
				+	int err = 0;
			
 
				+
			
 
				+	WARN_ON_ONCE(*psp || *filterp);
			
 
				+
			
 
				+	/* allocate everything, and if any fails, free all and fail */
			
 
				+	filter = __alloc_filter();
			
 
				+	if (filter && set_str)
			
 
				+		err = replace_filter_string(filter, filter_str);
			
 
				+
			
 
				+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
			
 
				+
			
 
				+	if (!filter || !ps || err) {
			
 
				+		kfree(ps);
			
 
				+		__free_filter(filter);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	/* we're committed to creating a new filter */
			
 
				+	*filterp = filter;
			
 
				+	*psp = ps;
			
 
				+
			
 
				+	parse_init(ps, filter_ops, filter_str);
			
 
				+	err = filter_parse(ps);
			
 
				+	if (err && set_str)
			
 
				+		append_filter_err(ps, filter);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static void create_filter_finish(struct filter_parse_state *ps)
			
 
				+{
			
 
				+	if (ps) {
			
 
				+		filter_opstack_clear(ps);
			
 
				+		postfix_clear(ps);
			
 
				+		kfree(ps);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * create_filter - create a filter for a ftrace_event_call
			
 
				+ * @call: ftrace_event_call to create a filter for
			
 
				+ * @filter_str: filter string
			
 
				+ * @set_str: remember @filter_str and enable detailed error in filter
			
 
				+ * @filterp: out param for created filter (always updated on return)
			
 
				+ *
			
 
				+ * Creates a filter for @call with @filter_str.  If @set_str is %true,
			
 
				+ * @filter_str is copied and recorded in the new filter.
			
 
				+ *
			
 
				+ * On success, returns 0 and *@filterp points to the new filter.  On
			
 
				+ * failure, returns -errno and *@filterp may point to %NULL or to a new
			
 
				+ * filter.  In the latter case, the returned filter contains error
			
 
				+ * information if @set_str is %true and the caller is responsible for
			
 
				+ * freeing it.
			
 
				+ */
			
 
				+static int create_filter(struct ftrace_event_call *call,
			
 
				+			 char *filter_str, bool set_str,
			
 
				+			 struct event_filter **filterp)
			
 
				+{
			
 
				+	struct event_filter *filter = NULL;
			
 
				+	struct filter_parse_state *ps = NULL;
			
 
				+	int err;
			
 
				+
			
 
				+	err = create_filter_start(filter_str, set_str, &ps, &filter);
			
 
				+	if (!err) {
			
 
				+		err = replace_preds(call, filter, ps, filter_str, false);
			
 
				+		if (err && set_str)
			
 
				+			append_filter_err(ps, filter);
			
 
				+	}
			
 
				+	create_filter_finish(ps);
			
 
				+
			
 
				+	*filterp = filter;
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * create_system_filter - create a filter for an event_subsystem
			
 
				+ * @system: event_subsystem to create a filter for
			
 
				+ * @filter_str: filter string
			
 
				+ * @filterp: out param for created filter (always updated on return)
			
 
				+ *
			
 
				+ * Identical to create_filter() except that it creates a subsystem filter
			
 
				+ * and always remembers @filter_str.
			
 
				+ */
			
 
				+static int create_system_filter(struct event_subsystem *system,
			
 
				+				char *filter_str, struct event_filter **filterp)
			
 
				+{
			
 
				+	struct event_filter *filter = NULL;
			
 
				+	struct filter_parse_state *ps = NULL;
			
 
				+	int err;
			
 
				+
			
 
				+	err = create_filter_start(filter_str, true, &ps, &filter);
			
 
				+	if (!err) {
			
 
				+		err = replace_system_preds(system, ps, filter_str);
			
 
				+		if (!err) {
			
 
				+			/* System filters just show a default message */
			
 
				+			kfree(filter->filter_string);
			
 
				+			filter->filter_string = NULL;
			
 
				+		} else {
			
 
				+			append_filter_err(ps, filter);
			
 
				+		}
			
 
				+	}
			
 
				+	create_filter_finish(ps);
			
 
				+
			
 
				+	*filterp = filter;
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				 int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
			
 
				 {
			
 
				-	struct filter_parse_state *ps;
			
 
				 	struct event_filter *filter;
			
 
				-	struct event_filter *tmp;
			
 
				 	int err = 0;
			
 
				 
			
 
				 	mutex_lock(&event_mutex);
			
@@ -1759,49 +1869,30 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 
				 		goto out_unlock;
			
 
				 	}
			
 
				 
			
 
				-	err = -ENOMEM;
			
 
				-	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
			
 
				-	if (!ps)
			
 
				-		goto out_unlock;
			
 
				-
			
 
				-	filter = __alloc_filter();
			
 
				-	if (!filter) {
			
 
				-		kfree(ps);
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-
			
 
				-	replace_filter_string(filter, filter_string);
			
 
				-
			
 
				-	parse_init(ps, filter_ops, filter_string);
			
 
				-	err = filter_parse(ps);
			
 
				-	if (err) {
			
 
				-		append_filter_err(ps, filter);
			
 
				-		goto out;
			
 
				-	}
			
 
				+	err = create_filter(call, filter_string, true, &filter);
			
 
				 
			
 
				-	err = replace_preds(call, filter, ps, filter_string, false);
			
 
				-	if (err) {
			
 
				-		filter_disable(call);
			
 
				-		append_filter_err(ps, filter);
			
 
				-	} else
			
 
				-		call->flags |= TRACE_EVENT_FL_FILTERED;
			
 
				-out:
			
 
				 	/*
			
 
				 	 * Always swap the call filter with the new filter
			
 
				 	 * even if there was an error. If there was an error
			
 
				 	 * in the filter, we disable the filter and show the error
			
 
				 	 * string
			
 
				 	 */
			
 
				-	tmp = call->filter;
			
 
				-	rcu_assign_pointer(call->filter, filter);
			
 
				-	if (tmp) {
			
 
				-		/* Make sure the call is done with the filter */
			
 
				-		synchronize_sched();
			
 
				-		__free_filter(tmp);
			
 
				+	if (filter) {
			
 
				+		struct event_filter *tmp = call->filter;
			
 
				+
			
 
				+		if (!err)
			
 
				+			call->flags |= TRACE_EVENT_FL_FILTERED;
			
 
				+		else
			
 
				+			filter_disable(call);
			
 
				+
			
 
				+		rcu_assign_pointer(call->filter, filter);
			
 
				+
			
 
				+		if (tmp) {
			
 
				+			/* Make sure the call is done with the filter */
			
 
				+			synchronize_sched();
			
 
				+			__free_filter(tmp);
			
 
				+		}
			
 
				 	}
			
 
				-	filter_opstack_clear(ps);
			
 
				-	postfix_clear(ps);
			
 
				-	kfree(ps);
			
 
				 out_unlock:
			
 
				 	mutex_unlock(&event_mutex);
			
 
				 
			
@@ -1811,7 +1902,6 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 
				 int apply_subsystem_event_filter(struct event_subsystem *system,
			
 
				 				 char *filter_string)
			
 
				 {
			
 
				-	struct filter_parse_state *ps;
			
 
				 	struct event_filter *filter;
			
 
				 	int err = 0;
			
 
				 
			
@@ -1835,48 +1925,19 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 
				 		goto out_unlock;
			
 
				 	}
			
 
				 
			
 
				-	err = -ENOMEM;
			
 
				-	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
			
 
				-	if (!ps)
			
 
				-		goto out_unlock;
			
 
				-
			
 
				-	filter = __alloc_filter();
			
 
				-	if (!filter)
			
 
				-		goto out;
			
 
				-
			
 
				-	/* System filters just show a default message */
			
 
				-	kfree(filter->filter_string);
			
 
				-	filter->filter_string = NULL;
			
 
				-
			
 
				-	/*
			
 
				-	 * No event actually uses the system filter
			
 
				-	 * we can free it without synchronize_sched().
			
 
				-	 */
			
 
				-	__free_filter(system->filter);
			
 
				-	system->filter = filter;
			
 
				-
			
 
				-	parse_init(ps, filter_ops, filter_string);
			
 
				-	err = filter_parse(ps);
			
 
				-	if (err)
			
 
				-		goto err_filter;
			
 
				-
			
 
				-	err = replace_system_preds(system, ps, filter_string);
			
 
				-	if (err)
			
 
				-		goto err_filter;
			
 
				-
			
 
				-out:
			
 
				-	filter_opstack_clear(ps);
			
 
				-	postfix_clear(ps);
			
 
				-	kfree(ps);
			
 
				+	err = create_system_filter(system, filter_string, &filter);
			
 
				+	if (filter) {
			
 
				+		/*
			
 
				+		 * No event actually uses the system filter
			
 
				+		 * we can free it without synchronize_sched().
			
 
				+		 */
			
 
				+		__free_filter(system->filter);
			
 
				+		system->filter = filter;
			
 
				+	}
			
 
				 out_unlock:
			
 
				 	mutex_unlock(&event_mutex);
			
 
				 
			
 
				 	return err;
			
 
				-
			
 
				-err_filter:
			
 
				-	replace_filter_string(filter, filter_string);
			
 
				-	append_filter_err(ps, system->filter);
			
 
				-	goto out;
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_PERF_EVENTS
			
@@ -1894,7 +1955,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
 
				 {
			
 
				 	int err;
			
 
				 	struct event_filter *filter;
			
 
				-	struct filter_parse_state *ps;
			
 
				 	struct ftrace_event_call *call;
			
 
				 
			
 
				 	mutex_lock(&event_mutex);
			
@@ -1909,33 +1969,10 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
 
				 	if (event->filter)
			
 
				 		goto out_unlock;
			
 
				 
			
 
				-	filter = __alloc_filter();
			
 
				-	if (!filter) {
			
 
				-		err = PTR_ERR(filter);
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-
			
 
				-	err = -ENOMEM;
			
 
				-	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
			
 
				-	if (!ps)
			
 
				-		goto free_filter;
			
 
				-
			
 
				-	parse_init(ps, filter_ops, filter_str);
			
 
				-	err = filter_parse(ps);
			
 
				-	if (err)
			
 
				-		goto free_ps;
			
 
				-
			
 
				-	err = replace_preds(call, filter, ps, filter_str, false);
			
 
				+	err = create_filter(call, filter_str, false, &filter);
			
 
				 	if (!err)
			
 
				 		event->filter = filter;
			
 
				-
			
 
				-free_ps:
			
 
				-	filter_opstack_clear(ps);
			
 
				-	postfix_clear(ps);
			
 
				-	kfree(ps);
			
 
				-
			
 
				-free_filter:
			
 
				-	if (err)
			
 
				+	else
			
 
				 		__free_filter(filter);
			
 
				 
			
 
				 out_unlock:
			
@@ -1954,43 +1991,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
 
				 #define CREATE_TRACE_POINTS
			
 
				 #include "trace_events_filter_test.h"
			
 
				 
			
 
				-static int test_get_filter(char *filter_str, struct ftrace_event_call *call,
			
 
				-			   struct event_filter **pfilter)
			
 
				-{
			
 
				-	struct event_filter *filter;
			
 
				-	struct filter_parse_state *ps;
			
 
				-	int err = -ENOMEM;
			
 
				-
			
 
				-	filter = __alloc_filter();
			
 
				-	if (!filter)
			
 
				-		goto out;
			
 
				-
			
 
				-	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
			
 
				-	if (!ps)
			
 
				-		goto free_filter;
			
 
				-
			
 
				-	parse_init(ps, filter_ops, filter_str);
			
 
				-	err = filter_parse(ps);
			
 
				-	if (err)
			
 
				-		goto free_ps;
			
 
				-
			
 
				-	err = replace_preds(call, filter, ps, filter_str, false);
			
 
				-	if (!err)
			
 
				-		*pfilter = filter;
			
 
				-
			
 
				- free_ps:
			
 
				-	filter_opstack_clear(ps);
			
 
				-	postfix_clear(ps);
			
 
				-	kfree(ps);
			
 
				-
			
 
				- free_filter:
			
 
				-	if (err)
			
 
				-		__free_filter(filter);
			
 
				-
			
 
				- out:
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				 #define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
			
 
				 { \
			
 
				 	.filter = FILTER, \
			
@@ -2109,12 +2109,13 @@ static __init int ftrace_test_event_filter(void)
 
				 		struct test_filter_data_t *d = &test_filter_data[i];
			
 
				 		int err;
			
 
				 
			
 
				-		err = test_get_filter(d->filter, &event_ftrace_test_filter,
			
 
				-				      &filter);
			
 
				+		err = create_filter(&event_ftrace_test_filter, d->filter,
			
 
				+				    false, &filter);
			
 
				 		if (err) {
			
 
				 			printk(KERN_INFO
			
 
				 			       "Failed to get filter for '%s', err %d\n",
			
 
				 			       d->filter, err);
			
 
				+			__free_filter(filter);
			
 
				 			break;
			
 
				 		}
			
 
				 
			
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -13,6 +13,9 @@
 
				 #include <linux/sysctl.h>
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/fs.h>
			
 
				+
			
 
				+#include <asm/setup.h>
			
 
				+
			
 
				 #include "trace.h"
			
 
				 
			
 
				 #define STACK_TRACE_ENTRIES 500
			
@@ -133,7 +136,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
 
				 static struct ftrace_ops trace_ops __read_mostly =
			
 
				 {
			
 
				 	.func = stack_trace_call,
			
 
				-	.flags = FTRACE_OPS_FL_GLOBAL,
			
 
				 };
			
 
				 
			
 
				 static ssize_t
			
@@ -311,6 +313,21 @@ static const struct file_operations stack_trace_fops = {
 
				 	.release	= seq_release,
			
 
				 };
			
 
				 
			
 
				+static int
			
 
				+stack_trace_filter_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
			
 
				+				 inode, file);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations stack_trace_filter_fops = {
			
 
				+	.open = stack_trace_filter_open,
			
 
				+	.read = seq_read,
			
 
				+	.write = ftrace_filter_write,
			
 
				+	.llseek = ftrace_regex_lseek,
			
 
				+	.release = ftrace_regex_release,
			
 
				+};
			
 
				+
			
 
				 int
			
 
				 stack_trace_sysctl(struct ctl_table *table, int write,
			
 
				 		   void __user *buffer, size_t *lenp,
			
@@ -338,8 +355,13 @@ stack_trace_sysctl(struct ctl_table *table, int write,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
			
 
				+
			
 
				 static __init int enable_stacktrace(char *str)
			
 
				 {
			
 
				+	if (strncmp(str, "_filter=", 8) == 0)
			
 
				+		strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
			
 
				+
			
 
				 	stack_tracer_enabled = 1;
			
 
				 	last_stack_tracer_enabled = 1;
			
 
				 	return 1;
			
@@ -358,6 +380,12 @@ static __init int stack_trace_init(void)
 
				 	trace_create_file("stack_trace", 0444, d_tracer,
			
 
				 			NULL, &stack_trace_fops);
			
 
				 
			
 
				+	trace_create_file("stack_trace_filter", 0444, d_tracer,
			
 
				+			NULL, &stack_trace_filter_fops);
			
 
				+
			
 
				+	if (stack_trace_filter_buf[0])
			
 
				+		ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
			
 
				+
			
 
				 	if (stack_tracer_enabled)
			
 
				 		register_ftrace_function(&trace_ops);
			
 
				 
			
--- a/scripts/recordmcount.h
+++ b/scripts/recordmcount.h
@@ -462,7 +462,7 @@ __has_rel_mcount(Elf_Shdr const *const relhdr,  /* is SHT_REL or SHT_RELA */
 
				 		succeed_file();
			
 
				 	}
			
 
				 	if (w(txthdr->sh_type) != SHT_PROGBITS ||
			
 
				-	    !(w(txthdr->sh_flags) & SHF_EXECINSTR))
			
 
				+	    !(_w(txthdr->sh_flags) & SHF_EXECINSTR))
			
 
				 		return NULL;
			
 
				 	return txtname;
			
 
				 }
			
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -21,6 +21,8 @@ EVENT MODIFIERS
 
				 Events can optionally have a modifer by appending a colon and one or
			
 
				 more modifiers.  Modifiers allow the user to restrict when events are
			
 
				 counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
			
 
				+Additional modifiers are 'G' for guest counting (in KVM guests) and 'H'
			
 
				+for host counting (not in KVM guests).
			
 
				 
			
 
				 The 'p' modifier can be used for specifying how precise the instruction
			
 
				 address should be. The 'p' modifier is currently only implemented for
			
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,4 +1,5 @@
 
				 tools/perf
			
 
				+include/linux/const.h
			
 
				 include/linux/perf_event.h
			
 
				 include/linux/rbtree.h
			
 
				 include/linux/list.h
			
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -235,7 +235,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
 
				 }
			
 
				 
			
 
				 static const char * const annotate_usage[] = {
			
 
				-	"perf annotate [<options>] <command>",
			
 
				+	"perf annotate [<options>]",
			
 
				 	NULL
			
 
				 };
			
 
				 
			
@@ -313,10 +313,5 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
 
				 		annotate.sym_hist_filter = argv[0];
			
 
				 	}
			
 
				 
			
 
				-	if (field_sep && *field_sep == '.') {
			
 
				-		pr_err("'.' is the only non valid --field-separator argument\n");
			
 
				-		return -1;
			
 
				-	}
			
 
				-
			
 
				 	return __cmd_annotate(&annotate);
			
 
				 }
			
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -108,7 +108,9 @@ static void setup_cpunode_map(void)
 
				 				continue;
			
 
				 			cpunode_map[cpu] = mem;
			
 
				 		}
			
 
				+		closedir(dir2);
			
 
				 	}
			
 
				+	closedir(dir1);
			
 
				 }
			
 
				 
			
 
				 static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
			
@@ -645,6 +647,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
 
				 			break;
			
 
				 		if (sort_dimension__add(tok, sort_list) < 0) {
			
 
				 			error("Unknown --sort key: '%s'", tok);
			
 
				+			free(str);
			
 
				 			return -1;
			
 
				 		}
			
 
				 	}
			
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -22,9 +22,6 @@
 
				 static const char		*file_name;
			
 
				 static char			name_buffer[256];
			
 
				 
			
 
				-bool				perf_host = 1;
			
 
				-bool				perf_guest;
			
 
				-
			
 
				 static const char * const kvm_usage[] = {
			
 
				 	"perf kvm [<options>] {top|record|report|diff|buildid-list}",
			
 
				 	NULL
			
@@ -107,7 +104,8 @@ static int __cmd_buildid_list(int argc, const char **argv)
 
				 
			
 
				 int cmd_kvm(int argc, const char **argv, const char *prefix __used)
			
 
				 {
			
 
				-	perf_host = perf_guest = 0;
			
 
				+	perf_host  = 0;
			
 
				+	perf_guest = 1;
			
 
				 
			
 
				 	argc = parse_options(argc, argv, kvm_options, kvm_usage,
			
 
				 			PARSE_OPT_STOP_AT_NON_OPTION);
			
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1018,13 +1018,17 @@ static char *get_script_path(const char *script_root, const char *suffix)
 
				 			__script_root = get_script_root(&script_dirent, suffix);
			
 
				 			if (__script_root && !strcmp(script_root, __script_root)) {
			
 
				 				free(__script_root);
			
 
				+				closedir(lang_dir);
			
 
				+				closedir(scripts_dir);
			
 
				 				snprintf(script_path, MAXPATHLEN, "%s/%s",
			
 
				 					 lang_path, script_dirent.d_name);
			
 
				 				return strdup(script_path);
			
 
				 			}
			
 
				 			free(__script_root);
			
 
				 		}
			
 
				+		closedir(lang_dir);
			
 
				 	}
			
 
				+	closedir(scripts_dir);
			
 
				 
			
 
				 	return NULL;
			
 
				 }
			
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -1396,7 +1396,7 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
 
				 	NULL,
			
 
				 	};
			
 
				 	const struct option test_options[] = {
			
 
				-	OPT_INTEGER('v', "verbose", &verbose,
			
 
				+	OPT_INCR('v', "verbose", &verbose,
			
 
				 		    "be more verbose (show symbol address, etc)"),
			
 
				 	OPT_END()
			
 
				 	};
			
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -235,7 +235,6 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 
				 	if (he == NULL)
			
 
				 		return NULL;
			
 
				 
			
 
				-	evsel->hists.stats.total_period += sample->period;
			
 
				 	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
			
 
				 	return he;
			
 
				 }
			
@@ -889,6 +888,10 @@ static void perf_top__start_counters(struct perf_top *top)
 
				 				ui__warning("The %s event is not supported.\n",
			
 
				 					    event_name(counter));
			
 
				 				goto out_err;
			
 
				+			} else if (err == EMFILE) {
			
 
				+				ui__warning("Too many events are opened.\n"
			
 
				+					    "Try again after reducing the number of events\n");
			
 
				+				goto out_err;
			
 
				 			}
			
 
				 
			
 
				 			ui__warning("The sys_perf_event_open() syscall "
			
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -111,8 +111,11 @@ int perf_evlist__add_default(struct perf_evlist *evlist)
 
				 		.type = PERF_TYPE_HARDWARE,
			
 
				 		.config = PERF_COUNT_HW_CPU_CYCLES,
			
 
				 	};
			
 
				-	struct perf_evsel *evsel = perf_evsel__new(&attr, 0);
			
 
				+	struct perf_evsel *evsel;
			
 
				+
			
 
				+	event_attr_init(&attr);
			
 
				 
			
 
				+	evsel = perf_evsel__new(&attr, 0);
			
 
				 	if (evsel == NULL)
			
 
				 		goto error;
			
 
				 
			
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -76,21 +76,21 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void hist_entry__add_cpumode_period(struct hist_entry *self,
			
 
				+static void hist_entry__add_cpumode_period(struct hist_entry *he,
			
 
				 					   unsigned int cpumode, u64 period)
			
 
				 {
			
 
				 	switch (cpumode) {
			
 
				 	case PERF_RECORD_MISC_KERNEL:
			
 
				-		self->period_sys += period;
			
 
				+		he->period_sys += period;
			
 
				 		break;
			
 
				 	case PERF_RECORD_MISC_USER:
			
 
				-		self->period_us += period;
			
 
				+		he->period_us += period;
			
 
				 		break;
			
 
				 	case PERF_RECORD_MISC_GUEST_KERNEL:
			
 
				-		self->period_guest_sys += period;
			
 
				+		he->period_guest_sys += period;
			
 
				 		break;
			
 
				 	case PERF_RECORD_MISC_GUEST_USER:
			
 
				-		self->period_guest_us += period;
			
 
				+		he->period_guest_us += period;
			
 
				 		break;
			
 
				 	default:
			
 
				 		break;
			
@@ -165,18 +165,18 @@ void hists__decay_entries_threaded(struct hists *hists,
 
				 static struct hist_entry *hist_entry__new(struct hist_entry *template)
			
 
				 {
			
 
				 	size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
			
 
				-	struct hist_entry *self = malloc(sizeof(*self) + callchain_size);
			
 
				+	struct hist_entry *he = malloc(sizeof(*he) + callchain_size);
			
 
				 
			
 
				-	if (self != NULL) {
			
 
				-		*self = *template;
			
 
				-		self->nr_events = 1;
			
 
				-		if (self->ms.map)
			
 
				-			self->ms.map->referenced = true;
			
 
				+	if (he != NULL) {
			
 
				+		*he = *template;
			
 
				+		he->nr_events = 1;
			
 
				+		if (he->ms.map)
			
 
				+			he->ms.map->referenced = true;
			
 
				 		if (symbol_conf.use_callchain)
			
 
				-			callchain_init(self->callchain);
			
 
				+			callchain_init(he->callchain);
			
 
				 	}
			
 
				 
			
 
				-	return self;
			
 
				+	return he;
			
 
				 }
			
 
				 
			
 
				 static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
			
@@ -677,15 +677,16 @@ static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
			
 
				-					    u64 total_samples, int left_margin)
			
 
				+static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
			
 
				+					    u64 total_samples, int left_margin,
			
 
				+					    FILE *fp)
			
 
				 {
			
 
				 	struct rb_node *rb_node;
			
 
				 	struct callchain_node *chain;
			
 
				 	size_t ret = 0;
			
 
				 	u32 entries_printed = 0;
			
 
				 
			
 
				-	rb_node = rb_first(&self->sorted_chain);
			
 
				+	rb_node = rb_first(&he->sorted_chain);
			
 
				 	while (rb_node) {
			
 
				 		double percent;
			
 
				 
			
@@ -730,35 +731,35 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s,
			
 
				+static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s,
			
 
				 				     size_t size, struct hists *pair_hists,
			
 
				 				     bool show_displacement, long displacement,
			
 
				-				     bool color, u64 session_total)
			
 
				+				     bool color, u64 total_period)
			
 
				 {
			
 
				 	u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
			
 
				 	u64 nr_events;
			
 
				 	const char *sep = symbol_conf.field_sep;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (symbol_conf.exclude_other && !self->parent)
			
 
				+	if (symbol_conf.exclude_other && !he->parent)
			
 
				 		return 0;
			
 
				 
			
 
				 	if (pair_hists) {
			
 
				-		period = self->pair ? self->pair->period : 0;
			
 
				-		nr_events = self->pair ? self->pair->nr_events : 0;
			
 
				+		period = he->pair ? he->pair->period : 0;
			
 
				+		nr_events = he->pair ? he->pair->nr_events : 0;
			
 
				 		total = pair_hists->stats.total_period;
			
 
				-		period_sys = self->pair ? self->pair->period_sys : 0;
			
 
				-		period_us = self->pair ? self->pair->period_us : 0;
			
 
				-		period_guest_sys = self->pair ? self->pair->period_guest_sys : 0;
			
 
				-		period_guest_us = self->pair ? self->pair->period_guest_us : 0;
			
 
				+		period_sys = he->pair ? he->pair->period_sys : 0;
			
 
				+		period_us = he->pair ? he->pair->period_us : 0;
			
 
				+		period_guest_sys = he->pair ? he->pair->period_guest_sys : 0;
			
 
				+		period_guest_us = he->pair ? he->pair->period_guest_us : 0;
			
 
				 	} else {
			
 
				-		period = self->period;
			
 
				-		nr_events = self->nr_events;
			
 
				-		total = session_total;
			
 
				-		period_sys = self->period_sys;
			
 
				-		period_us = self->period_us;
			
 
				-		period_guest_sys = self->period_guest_sys;
			
 
				-		period_guest_us = self->period_guest_us;
			
 
				+		period = he->period;
			
 
				+		nr_events = he->nr_events;
			
 
				+		total = total_period;
			
 
				+		period_sys = he->period_sys;
			
 
				+		period_us = he->period_us;
			
 
				+		period_guest_sys = he->period_guest_sys;
			
 
				+		period_guest_us = he->period_guest_us;
			
 
				 	}
			
 
				 
			
 
				 	if (total) {
			
@@ -812,8 +813,8 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s,
 
				 
			
 
				 		if (total > 0)
			
 
				 			old_percent = (period * 100.0) / total;
			
 
				-		if (session_total > 0)
			
 
				-			new_percent = (self->period * 100.0) / session_total;
			
 
				+		if (total_period > 0)
			
 
				+			new_percent = (he->period * 100.0) / total_period;
			
 
				 
			
 
				 		diff = new_percent - old_percent;
			
 
				 
			
@@ -862,9 +863,10 @@ int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
			
 
				-			struct hists *pair_hists, bool show_displacement,
			
 
				-			long displacement, FILE *fp, u64 session_total)
			
 
				+static int hist_entry__fprintf(struct hist_entry *he, size_t size,
			
 
				+			       struct hists *hists, struct hists *pair_hists,
			
 
				+			       bool show_displacement, long displacement,
			
 
				+			       u64 total_period, FILE *fp)
			
 
				 {
			
 
				 	char bf[512];
			
 
				 	int ret;
			
@@ -874,14 +876,14 @@ int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
 
				 
			
 
				 	ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists,
			
 
				 					show_displacement, displacement,
			
 
				-					true, session_total);
			
 
				+					true, total_period);
			
 
				 	hist_entry__snprintf(he, bf + ret, size - ret, hists);
			
 
				 	return fprintf(fp, "%s\n", bf);
			
 
				 }
			
 
				 
			
 
				-static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
			
 
				-					    struct hists *hists, FILE *fp,
			
 
				-					    u64 session_total)
			
 
				+static size_t hist_entry__fprintf_callchain(struct hist_entry *he,
			
 
				+					    struct hists *hists,
			
 
				+					    u64 total_period, FILE *fp)
			
 
				 {
			
 
				 	int left_margin = 0;
			
 
				 
			
@@ -889,11 +891,10 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
 
				 		struct sort_entry *se = list_first_entry(&hist_entry__sort_list,
			
 
				 							 typeof(*se), list);
			
 
				 		left_margin = hists__col_len(hists, se->se_width_idx);
			
 
				-		left_margin -= thread__comm_len(self->thread);
			
 
				+		left_margin -= thread__comm_len(he->thread);
			
 
				 	}
			
 
				 
			
 
				-	return hist_entry_callchain__fprintf(fp, self, session_total,
			
 
				-					     left_margin);
			
 
				+	return hist_entry_callchain__fprintf(he, total_period, left_margin, fp);
			
 
				 }
			
 
				 
			
 
				 size_t hists__fprintf(struct hists *hists, struct hists *pair,
			
@@ -903,6 +904,7 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 
				 	struct sort_entry *se;
			
 
				 	struct rb_node *nd;
			
 
				 	size_t ret = 0;
			
 
				+	u64 total_period;
			
 
				 	unsigned long position = 1;
			
 
				 	long displacement = 0;
			
 
				 	unsigned int width;
			
@@ -917,20 +919,6 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 
				 
			
 
				 	fprintf(fp, "# %s", pair ? "Baseline" : "Overhead");
			
 
				 
			
 
				-	if (symbol_conf.show_nr_samples) {
			
 
				-		if (sep)
			
 
				-			fprintf(fp, "%cSamples", *sep);
			
 
				-		else
			
 
				-			fputs("  Samples  ", fp);
			
 
				-	}
			
 
				-
			
 
				-	if (symbol_conf.show_total_period) {
			
 
				-		if (sep)
			
 
				-			ret += fprintf(fp, "%cPeriod", *sep);
			
 
				-		else
			
 
				-			ret += fprintf(fp, "   Period    ");
			
 
				-	}
			
 
				-
			
 
				 	if (symbol_conf.show_cpu_utilization) {
			
 
				 		if (sep) {
			
 
				 			ret += fprintf(fp, "%csys", *sep);
			
@@ -940,8 +928,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 
				 				ret += fprintf(fp, "%cguest us", *sep);
			
 
				 			}
			
 
				 		} else {
			
 
				-			ret += fprintf(fp, "  sys  ");
			
 
				-			ret += fprintf(fp, "  us  ");
			
 
				+			ret += fprintf(fp, "     sys  ");
			
 
				+			ret += fprintf(fp, "      us  ");
			
 
				 			if (perf_guest) {
			
 
				 				ret += fprintf(fp, "  guest sys  ");
			
 
				 				ret += fprintf(fp, "  guest us  ");
			
@@ -949,6 +937,20 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	if (symbol_conf.show_nr_samples) {
			
 
				+		if (sep)
			
 
				+			fprintf(fp, "%cSamples", *sep);
			
 
				+		else
			
 
				+			fputs("  Samples  ", fp);
			
 
				+	}
			
 
				+
			
 
				+	if (symbol_conf.show_total_period) {
			
 
				+		if (sep)
			
 
				+			ret += fprintf(fp, "%cPeriod", *sep);
			
 
				+		else
			
 
				+			ret += fprintf(fp, "   Period    ");
			
 
				+	}
			
 
				+
			
 
				 	if (pair) {
			
 
				 		if (sep)
			
 
				 			ret += fprintf(fp, "%cDelta", *sep);
			
@@ -993,6 +995,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 
				 		goto print_entries;
			
 
				 
			
 
				 	fprintf(fp, "# ........");
			
 
				+	if (symbol_conf.show_cpu_utilization)
			
 
				+		fprintf(fp, "   .......   .......");
			
 
				 	if (symbol_conf.show_nr_samples)
			
 
				 		fprintf(fp, " ..........");
			
 
				 	if (symbol_conf.show_total_period)
			
@@ -1025,6 +1029,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 
				 		goto out;
			
 
				 
			
 
				 print_entries:
			
 
				+	total_period = hists->stats.total_period;
			
 
				+
			
 
				 	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
			
 
				 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
			
 
				 
			
@@ -1040,11 +1046,10 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 
				 			++position;
			
 
				 		}
			
 
				 		ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement,
			
 
				-					   displacement, fp, hists->stats.total_period);
			
 
				+					   displacement, total_period, fp);
			
 
				 
			
 
				 		if (symbol_conf.use_callchain)
			
 
				-			ret += hist_entry__fprintf_callchain(h, hists, fp,
			
 
				-							     hists->stats.total_period);
			
 
				+			ret += hist_entry__fprintf_callchain(h, hists, total_period, fp);
			
 
				 		if (max_rows && ++nr_rows >= max_rows)
			
 
				 			goto out;
			
 
				 
			
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -66,11 +66,8 @@ struct hists {
 
				 struct hist_entry *__hists__add_entry(struct hists *self,
			
 
				 				      struct addr_location *al,
			
 
				 				      struct symbol *parent, u64 period);
			
 
				-extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *);
			
 
				-extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *);
			
 
				-int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
			
 
				-			struct hists *pair_hists, bool show_displacement,
			
 
				-			long displacement, FILE *fp, u64 session_total);
			
 
				+int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
			
 
				+int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
			
 
				 int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
			
 
				 			 struct hists *hists);
			
 
				 void hist_entry__free(struct hist_entry *);
			
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -735,8 +735,8 @@ static int
 
				 parse_event_modifier(const char **strp, struct perf_event_attr *attr)
			
 
				 {
			
 
				 	const char *str = *strp;
			
 
				-	int exclude = 0;
			
 
				-	int eu = 0, ek = 0, eh = 0, precise = 0;
			
 
				+	int exclude = 0, exclude_GH = 0;
			
 
				+	int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0;
			
 
				 
			
 
				 	if (!*str)
			
 
				 		return 0;
			
@@ -760,6 +760,14 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 
				 			if (!exclude)
			
 
				 				exclude = eu = ek = eh = 1;
			
 
				 			eh = 0;
			
 
				+		} else if (*str == 'G') {
			
 
				+			if (!exclude_GH)
			
 
				+				exclude_GH = eG = eH = 1;
			
 
				+			eG = 0;
			
 
				+		} else if (*str == 'H') {
			
 
				+			if (!exclude_GH)
			
 
				+				exclude_GH = eG = eH = 1;
			
 
				+			eH = 0;
			
 
				 		} else if (*str == 'p') {
			
 
				 			precise++;
			
 
				 		} else
			
@@ -776,6 +784,8 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 
				 	attr->exclude_kernel = ek;
			
 
				 	attr->exclude_hv     = eh;
			
 
				 	attr->precise_ip     = precise;
			
 
				+	attr->exclude_host   = eH;
			
 
				+	attr->exclude_guest  = eG;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -838,6 +848,7 @@ int parse_events(struct perf_evlist *evlist , const char *str, int unset __used)
 
				 	for (;;) {
			
 
				 		ostr = str;
			
 
				 		memset(&attr, 0, sizeof(attr));
			
 
				+		event_attr_init(&attr);
			
 
				 		ret = parse_event_symbols(evlist, &str, &attr);
			
 
				 		if (ret == EVT_FAILED)
			
 
				 			return -1;
			
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -18,7 +18,6 @@
 
				  *
			
 
				  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				  */
			
 
				-#include <ctype.h>
			
 
				 #include "util.h"
			
 
				 #include <dirent.h>
			
 
				 #include <mntent.h>
			
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -1,6 +1,21 @@
 
				+#include "../perf.h"
			
 
				 #include "util.h"
			
 
				 #include <sys/mman.h>
			
 
				 
			
 
				+/*
			
 
				+ * XXX We need to find a better place for these things...
			
 
				+ */
			
 
				+bool perf_host  = true;
			
 
				+bool perf_guest = true;
			
 
				+
			
 
				+void event_attr_init(struct perf_event_attr *attr)
			
 
				+{
			
 
				+	if (!perf_host)
			
 
				+		attr->exclude_host  = 1;
			
 
				+	if (!perf_guest)
			
 
				+		attr->exclude_guest = 1;
			
 
				+}
			
 
				+
			
 
				 int mkdir_p(char *path, mode_t mode)
			
 
				 {
			
 
				 	struct stat st;
			
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -242,6 +242,10 @@ int strtailcmp(const char *s1, const char *s2);
 
				 unsigned long convert_unit(unsigned long value, char *unit);
			
 
				 int readn(int fd, void *buf, size_t size);
			
 
				 
			
 
				+struct perf_event_attr;
			
 
				+
			
 
				+void event_attr_init(struct perf_event_attr *attr);
			
 
				+
			
 
				 #define _STR(x) #x
			
 
				 #define STR(x) _STR(x)