14 жил өмнө · 4d4abdcb1d
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -22,14 +22,15 @@ current_tracer. Instead of that, add probe points via
 
				 
			
 
				 Synopsis of kprobe_events
			
 
				 -------------------------
			
 
				-  p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS]	: Set a probe
			
 
				-  r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS]		: Set a return probe
			
 
				+  p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS]	: Set a probe
			
 
				+  r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS]		: Set a return probe
			
 
				   -:[GRP/]EVENT						: Clear a probe
			
 
				 
			
 
				  GRP		: Group name. If omitted, use "kprobes" for it.
			
 
				  EVENT		: Event name. If omitted, the event name is generated
			
 
				-		  based on SYMBOL+offs or MEMADDR.
			
 
				- SYMBOL[+offs]	: Symbol+offset where the probe is inserted.
			
 
				+		  based on SYM+offs or MEMADDR.
			
 
				+ MOD		: Module name which has given SYM.
			
 
				+ SYM[+offs]	: Symbol+offset where the probe is inserted.
			
 
				  MEMADDR	: Address where the probe is inserted.
			
 
				 
			
 
				  FETCHARGS	: Arguments. Each probe can have up to 128 args.
			
--- a/Makefile
+++ b/Makefile
@@ -1290,6 +1290,7 @@ help:
 
				 	@echo  '  make O=dir [targets] Locate all output files in "dir", including .config'
			
 
				 	@echo  '  make C=1   [targets] Check all c source with $$CHECK (sparse by default)'
			
 
				 	@echo  '  make C=2   [targets] Force check of all c source with $$CHECK'
			
 
				+	@echo  '  make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
			
 
				 	@echo  '  make W=n   [targets] Enable extra gcc checks, n=1,2,3 where'
			
 
				 	@echo  '		1: warnings which may be relevant and do not occur too often'
			
 
				 	@echo  '		2: warnings which occur quite often but may still be relevant'
			
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 
				 	data.period = event->hw.last_period;
			
 
				 
			
 
				 	if (alpha_perf_event_set_period(event, hwc, idx)) {
			
 
				-		if (perf_event_overflow(event, 1, &data, regs)) {
			
 
				+		if (perf_event_overflow(event, &data, regs)) {
			
 
				 			/* Interrupts coming too quickly; "throttle" the
			
 
				 			 * counter, i.e., disable it for a little while.
			
 
				 			 */
			
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -91,7 +91,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);
 
				 #define test_irq_work_pending()      __get_cpu_var(irq_work_pending)
			
 
				 #define clear_irq_work_pending()     __get_cpu_var(irq_work_pending) = 0
			
 
				 
			
 
				-void set_irq_work_pending(void)
			
 
				+void arch_irq_work_raise(void)
			
 
				 {
			
 
				 	set_irq_work_pending_flag();
			
 
				 }
			
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -173,6 +173,20 @@ static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 
				 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				 		},
			
 
				 	},
			
 
				+	[C(NODE)] = {
			
 
				+		[C(OP_READ)] = {
			
 
				+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+		[C(OP_WRITE)] = {
			
 
				+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+		[C(OP_PREFETCH)] = {
			
 
				+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 enum armv6mpcore_perf_types {
			
@@ -310,6 +324,20 @@ static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 
				 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
			
 
				 		},
			
 
				 	},
			
 
				+	[C(NODE)] = {
			
 
				+		[C(OP_READ)] = {
			
 
				+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+		[C(OP_WRITE)] = {
			
 
				+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+		[C(OP_PREFETCH)] = {
			
 
				+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 static inline unsigned long
			
@@ -479,7 +507,7 @@ armv6pmu_handle_irq(int irq_num,
 
				 		if (!armpmu_event_set_period(event, hwc, idx))
			
 
				 			continue;
			
 
				 
			
 
				-		if (perf_event_overflow(event, 0, &data, regs))
			
 
				+		if (perf_event_overflow(event, &data, regs))
			
 
				 			armpmu->disable(hwc, idx);
			
 
				 	}
			
 
				 
			
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -255,6 +255,20 @@ static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 
				 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				 		},
			
 
				 	},
			
 
				+	[C(NODE)] = {
			
 
				+		[C(OP_READ)] = {
			
 
				+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+		[C(OP_WRITE)] = {
			
 
				+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+		[C(OP_PREFETCH)] = {
			
 
				+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -371,6 +385,20 @@ static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 
				 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				 		},
			
 
				 	},
			
 
				+	[C(NODE)] = {
			
 
				+		[C(OP_READ)] = {
			
 
				+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+		[C(OP_WRITE)] = {
			
 
				+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+		[C(OP_PREFETCH)] = {
			
 
				+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -787,7 +815,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 
				 		if (!armpmu_event_set_period(event, hwc, idx))
			
 
				 			continue;
			
 
				 
			
 
				-		if (perf_event_overflow(event, 0, &data, regs))
			
 
				+		if (perf_event_overflow(event, &data, regs))
			
 
				 			armpmu->disable(hwc, idx);
			
 
				 	}
			
 
				 
			
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -144,6 +144,20 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 
				 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				 		},
			
 
				 	},
			
 
				+	[C(NODE)] = {
			
 
				+		[C(OP_READ)] = {
			
 
				+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+		[C(OP_WRITE)] = {
			
 
				+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+		[C(OP_PREFETCH)] = {
			
 
				+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
			
 
				+		},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 #define	XSCALE_PMU_ENABLE	0x001
			
@@ -251,7 +265,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 
				 		if (!armpmu_event_set_period(event, hwc, idx))
			
 
				 			continue;
			
 
				 
			
 
				-		if (perf_event_overflow(event, 0, &data, regs))
			
 
				+		if (perf_event_overflow(event, &data, regs))
			
 
				 			armpmu->disable(hwc, idx);
			
 
				 	}
			
 
				 
			
@@ -583,7 +597,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 
				 		if (!armpmu_event_set_period(event, hwc, idx))
			
 
				 			continue;
			
 
				 
			
 
				-		if (perf_event_overflow(event, 0, &data, regs))
			
 
				+		if (perf_event_overflow(event, &data, regs))
			
 
				 			armpmu->disable(hwc, idx);
			
 
				 	}
			
 
				 
			
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -396,7 +396,7 @@ static long ptrace_hbp_idx_to_num(int idx)
 
				 /*
			
 
				  * Handle hitting a HW-breakpoint.
			
 
				  */
			
 
				-static void ptrace_hbptriggered(struct perf_event *bp, int unused,
			
 
				+static void ptrace_hbptriggered(struct perf_event *bp,
			
 
				 				     struct perf_sample_data *data,
			
 
				 				     struct pt_regs *regs)
			
 
				 {
			
@@ -479,7 +479,8 @@ static struct perf_event *ptrace_hbp_create(struct task_struct *tsk, int type)
 
				 	attr.bp_type	= type;
			
 
				 	attr.disabled	= 1;
			
 
				 
			
 
				-	return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, tsk);
			
 
				+	return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL,
			
 
				+					   tsk);
			
 
				 }
			
 
				 
			
 
				 static int ptrace_gethbpregs(struct task_struct *tsk, long num,
			
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -183,7 +183,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr)
 
				 	unsigned int address, destreg, data, type;
			
 
				 	unsigned int res = 0;
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc);
			
 
				+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
			
 
				 
			
 
				 	if (current->pid != previous_pid) {
			
 
				 		pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n",
			
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -318,11 +318,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 
				 	fault = __do_page_fault(mm, addr, fsr, tsk);
			
 
				 	up_read(&mm->mmap_sem);
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, addr);
			
 
				+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
			
 
				 	if (fault & VM_FAULT_MAJOR)
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, addr);
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr);
			
 
				 	else if (fault & VM_FAULT_MINOR)
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, addr);
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr);
			
 
				 
			
 
				 	/*
			
 
				 	 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
			
--- a/arch/mips/include/asm/stacktrace.h
+++ b/arch/mips/include/asm/stacktrace.h
@@ -7,6 +7,10 @@
 
				 extern int raw_show_trace;
			
 
				 extern unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
			
 
				 				  unsigned long pc, unsigned long *ra);
			
 
				+extern unsigned long unwind_stack_by_address(unsigned long stack_page,
			
 
				+					     unsigned long *sp,
			
 
				+					     unsigned long pc,
			
 
				+					     unsigned long *ra);
			
 
				 #else
			
 
				 #define raw_show_trace 1
			
 
				 static inline unsigned long unwind_stack(struct task_struct *task,
			
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -527,7 +527,7 @@ handle_associated_event(struct cpu_hw_events *cpuc,
 
				 	if (!mipspmu_event_set_period(event, hwc, idx))
			
 
				 		return;
			
 
				 
			
 
				-	if (perf_event_overflow(event, 0, data, regs))
			
 
				+	if (perf_event_overflow(event, data, regs))
			
 
				 		mipspmu->disable_event(idx);
			
 
				 }
			
 
				 
			
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -377,6 +377,20 @@ static const struct mips_perf_event mipsxxcore_cache_map
 
				 		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				 	},
			
 
				 },
			
 
				+[C(NODE)] = {
			
 
				+	[C(OP_READ)] = {
			
 
				+		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				+		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				+	},
			
 
				+	[C(OP_WRITE)] = {
			
 
				+		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				+		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				+	},
			
 
				+	[C(OP_PREFETCH)] = {
			
 
				+		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				+		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				+	},
			
 
				+},
			
 
				 };
			
 
				 
			
 
				 /* 74K core has completely different cache event map. */
			
@@ -480,6 +494,20 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map
 
				 		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				 	},
			
 
				 },
			
 
				+[C(NODE)] = {
			
 
				+	[C(OP_READ)] = {
			
 
				+		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				+		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				+	},
			
 
				+	[C(OP_WRITE)] = {
			
 
				+		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				+		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				+	},
			
 
				+	[C(OP_PREFETCH)] = {
			
 
				+		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				+		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
			
 
				+	},
			
 
				+},
			
 
				 };
			
 
				 
			
 
				 #ifdef CONFIG_MIPS_MT_SMP
			
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -373,18 +373,18 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 
				 
			
 
				 
			
 
				 #ifdef CONFIG_KALLSYMS
			
 
				-/* used by show_backtrace() */
			
 
				-unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
			
 
				-			   unsigned long pc, unsigned long *ra)
			
 
				+/* generic stack unwinding function */
			
 
				+unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
			
 
				+					      unsigned long *sp,
			
 
				+					      unsigned long pc,
			
 
				+					      unsigned long *ra)
			
 
				 {
			
 
				-	unsigned long stack_page;
			
 
				 	struct mips_frame_info info;
			
 
				 	unsigned long size, ofs;
			
 
				 	int leaf;
			
 
				 	extern void ret_from_irq(void);
			
 
				 	extern void ret_from_exception(void);
			
 
				 
			
 
				-	stack_page = (unsigned long)task_stack_page(task);
			
 
				 	if (!stack_page)
			
 
				 		return 0;
			
 
				 
			
@@ -443,6 +443,15 @@ unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
 
				 	*ra = 0;
			
 
				 	return __kernel_text_address(pc) ? pc : 0;
			
 
				 }
			
 
				+EXPORT_SYMBOL(unwind_stack_by_address);
			
 
				+
			
 
				+/* used by show_backtrace() */
			
 
				+unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
			
 
				+			   unsigned long pc, unsigned long *ra)
			
 
				+{
			
 
				+	unsigned long stack_page = (unsigned long)task_stack_page(task);
			
 
				+	return unwind_stack_by_address(stack_page, sp, pc, ra);
			
 
				+}
			
 
				 #endif
			
 
				 
			
 
				 /*
			
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -578,12 +578,12 @@ static int simulate_llsc(struct pt_regs *regs, unsigned int opcode)
 
				 {
			
 
				 	if ((opcode & OPCODE) == LL) {
			
 
				 		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
			
 
				-				1, 0, regs, 0);
			
 
				+				1, regs, 0);
			
 
				 		return simulate_ll(regs, opcode);
			
 
				 	}
			
 
				 	if ((opcode & OPCODE) == SC) {
			
 
				 		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
			
 
				-				1, 0, regs, 0);
			
 
				+				1, regs, 0);
			
 
				 		return simulate_sc(regs, opcode);
			
 
				 	}
			
 
				 
			
@@ -602,7 +602,7 @@ static int simulate_rdhwr(struct pt_regs *regs, unsigned int opcode)
 
				 		int rd = (opcode & RD) >> 11;
			
 
				 		int rt = (opcode & RT) >> 16;
			
 
				 		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
			
 
				-				1, 0, regs, 0);
			
 
				+				1, regs, 0);
			
 
				 		switch (rd) {
			
 
				 		case 0:		/* CPU number */
			
 
				 			regs->regs[rt] = smp_processor_id();
			
@@ -640,7 +640,7 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode)
 
				 {
			
 
				 	if ((opcode & OPCODE) == SPEC0 && (opcode & FUNC) == SYNC) {
			
 
				 		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
			
 
				-				1, 0, regs, 0);
			
 
				+				1, regs, 0);
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -111,8 +111,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 
				 	unsigned long value;
			
 
				 	unsigned int res;
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
			
 
				-		      1, 0, regs, 0);
			
 
				+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
			
 
				 
			
 
				 	/*
			
 
				 	 * This load never faults.
			
@@ -517,7 +516,7 @@ asmlinkage void do_ade(struct pt_regs *regs)
 
				 	mm_segment_t seg;
			
 
				 
			
 
				 	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,
			
 
				-			1, 0, regs, regs->cp0_badvaddr);
			
 
				+			1, regs, regs->cp0_badvaddr);
			
 
				 	/*
			
 
				 	 * Did we catch a fault trying to load an instruction?
			
 
				 	 * Or are we running in MIPS16 mode?
			
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -272,8 +272,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 
				 	}
			
 
				 
			
 
				       emul:
			
 
				-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
			
 
				-			1, 0, xcp, 0);
			
 
				+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, xcp, 0);
			
 
				 	MIPS_FPU_EMU_INC_STATS(emulated);
			
 
				 	switch (MIPSInst_OPCODE(ir)) {
			
 
				 	case ldc1_op:{
			
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -145,7 +145,7 @@ good_area:
 
				 	 * the fault.
			
 
				 	 */
			
 
				 	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
			
 
				-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
			
 
				+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
			
 
				 	if (unlikely(fault & VM_FAULT_ERROR)) {
			
 
				 		if (fault & VM_FAULT_OOM)
			
 
				 			goto out_of_memory;
			
@@ -154,12 +154,10 @@ good_area:
 
				 		BUG();
			
 
				 	}
			
 
				 	if (fault & VM_FAULT_MAJOR) {
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
			
 
				-				1, 0, regs, address);
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
			
 
				 		tsk->maj_flt++;
			
 
				 	} else {
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
			
 
				-				1, 0, regs, address);
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
			
 
				 		tsk->min_flt++;
			
 
				 	}
			
 
				 
			
--- a/arch/mips/oprofile/Makefile
+++ b/arch/mips/oprofile/Makefile
@@ -8,7 +8,7 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 
				 		oprofilefs.o oprofile_stats.o \
			
 
				 		timer_int.o )
			
 
				 
			
 
				-oprofile-y				:= $(DRIVER_OBJS) common.o
			
 
				+oprofile-y				:= $(DRIVER_OBJS) common.o backtrace.o
			
 
				 
			
 
				 oprofile-$(CONFIG_CPU_MIPS32)		+= op_model_mipsxx.o
			
 
				 oprofile-$(CONFIG_CPU_MIPS64)		+= op_model_mipsxx.o
			
--- a/arch/mips/oprofile/backtrace.c
+++ b/arch/mips/oprofile/backtrace.c
@@ -0,0 +1,175 @@
 
				+#include <linux/oprofile.h>
			
 
				+#include <linux/sched.h>
			
 
				+#include <linux/mm.h>
			
 
				+#include <linux/uaccess.h>
			
 
				+#include <asm/ptrace.h>
			
 
				+#include <asm/stacktrace.h>
			
 
				+#include <linux/stacktrace.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <asm/sections.h>
			
 
				+#include <asm/inst.h>
			
 
				+
			
 
				+struct stackframe {
			
 
				+	unsigned long sp;
			
 
				+	unsigned long pc;
			
 
				+	unsigned long ra;
			
 
				+};
			
 
				+
			
 
				+static inline int get_mem(unsigned long addr, unsigned long *result)
			
 
				+{
			
 
				+	unsigned long *address = (unsigned long *) addr;
			
 
				+	if (!access_ok(VERIFY_READ, addr, sizeof(unsigned long)))
			
 
				+		return -1;
			
 
				+	if (__copy_from_user_inatomic(result, address, sizeof(unsigned long)))
			
 
				+		return -3;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * These two instruction helpers were taken from process.c
			
 
				+ */
			
 
				+static inline int is_ra_save_ins(union mips_instruction *ip)
			
 
				+{
			
 
				+	/* sw / sd $ra, offset($sp) */
			
 
				+	return (ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op)
			
 
				+		&& ip->i_format.rs == 29 && ip->i_format.rt == 31;
			
 
				+}
			
 
				+
			
 
				+static inline int is_sp_move_ins(union mips_instruction *ip)
			
 
				+{
			
 
				+	/* addiu/daddiu sp,sp,-imm */
			
 
				+	if (ip->i_format.rs != 29 || ip->i_format.rt != 29)
			
 
				+		return 0;
			
 
				+	if (ip->i_format.opcode == addiu_op || ip->i_format.opcode == daddiu_op)
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Looks for specific instructions that mark the end of a function.
			
 
				+ * This usually means we ran into the code area of the previous function.
			
 
				+ */
			
 
				+static inline int is_end_of_function_marker(union mips_instruction *ip)
			
 
				+{
			
 
				+	/* jr ra */
			
 
				+	if (ip->r_format.func == jr_op && ip->r_format.rs == 31)
			
 
				+		return 1;
			
 
				+	/* lui gp */
			
 
				+	if (ip->i_format.opcode == lui_op && ip->i_format.rt == 28)
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * TODO for userspace stack unwinding:
			
 
				+ * - handle cases where the stack is adjusted inside a function
			
 
				+ *     (generally doesn't happen)
			
 
				+ * - find optimal value for max_instr_check
			
 
				+ * - try to find a way to handle leaf functions
			
 
				+ */
			
 
				+
			
 
				+static inline int unwind_user_frame(struct stackframe *old_frame,
			
 
				+				    const unsigned int max_instr_check)
			
 
				+{
			
 
				+	struct stackframe new_frame = *old_frame;
			
 
				+	off_t ra_offset = 0;
			
 
				+	size_t stack_size = 0;
			
 
				+	unsigned long addr;
			
 
				+
			
 
				+	if (old_frame->pc == 0 || old_frame->sp == 0 || old_frame->ra == 0)
			
 
				+		return -9;
			
 
				+
			
 
				+	for (addr = new_frame.pc; (addr + max_instr_check > new_frame.pc)
			
 
				+		&& (!ra_offset || !stack_size); --addr) {
			
 
				+		union mips_instruction ip;
			
 
				+
			
 
				+		if (get_mem(addr, (unsigned long *) &ip))
			
 
				+			return -11;
			
 
				+
			
 
				+		if (is_sp_move_ins(&ip)) {
			
 
				+			int stack_adjustment = ip.i_format.simmediate;
			
 
				+			if (stack_adjustment > 0)
			
 
				+				/* This marks the end of the previous function,
			
 
				+				   which means we overran. */
			
 
				+				break;
			
 
				+			stack_size = (unsigned) stack_adjustment;
			
 
				+		} else if (is_ra_save_ins(&ip)) {
			
 
				+			int ra_slot = ip.i_format.simmediate;
			
 
				+			if (ra_slot < 0)
			
 
				+				/* This shouldn't happen. */
			
 
				+				break;
			
 
				+			ra_offset = ra_slot;
			
 
				+		} else if (is_end_of_function_marker(&ip))
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	if (!ra_offset || !stack_size)
			
 
				+		return -1;
			
 
				+
			
 
				+	if (ra_offset) {
			
 
				+		new_frame.ra = old_frame->sp + ra_offset;
			
 
				+		if (get_mem(new_frame.ra, &(new_frame.ra)))
			
 
				+			return -13;
			
 
				+	}
			
 
				+
			
 
				+	if (stack_size) {
			
 
				+		new_frame.sp = old_frame->sp + stack_size;
			
 
				+		if (get_mem(new_frame.sp, &(new_frame.sp)))
			
 
				+			return -14;
			
 
				+	}
			
 
				+
			
 
				+	if (new_frame.sp > old_frame->sp)
			
 
				+		return -2;
			
 
				+
			
 
				+	new_frame.pc = old_frame->ra;
			
 
				+	*old_frame = new_frame;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline void do_user_backtrace(unsigned long low_addr,
			
 
				+				     struct stackframe *frame,
			
 
				+				     unsigned int depth)
			
 
				+{
			
 
				+	const unsigned int max_instr_check = 512;
			
 
				+	const unsigned long high_addr = low_addr + THREAD_SIZE;
			
 
				+
			
 
				+	while (depth-- && !unwind_user_frame(frame, max_instr_check)) {
			
 
				+		oprofile_add_trace(frame->ra);
			
 
				+		if (frame->sp < low_addr || frame->sp > high_addr)
			
 
				+			break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#ifndef CONFIG_KALLSYMS
			
 
				+static inline void do_kernel_backtrace(unsigned long low_addr,
			
 
				+				       struct stackframe *frame,
			
 
				+				       unsigned int depth) { }
			
 
				+#else
			
 
				+static inline void do_kernel_backtrace(unsigned long low_addr,
			
 
				+				       struct stackframe *frame,
			
 
				+				       unsigned int depth)
			
 
				+{
			
 
				+	while (depth-- && frame->pc) {
			
 
				+		frame->pc = unwind_stack_by_address(low_addr,
			
 
				+						    &(frame->sp),
			
 
				+						    frame->pc,
			
 
				+						    &(frame->ra));
			
 
				+		oprofile_add_trace(frame->ra);
			
 
				+	}
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+void notrace op_mips_backtrace(struct pt_regs *const regs, unsigned int depth)
			
 
				+{
			
 
				+	struct stackframe frame = { .sp = regs->regs[29],
			
 
				+				    .pc = regs->cp0_epc,
			
 
				+				    .ra = regs->regs[31] };
			
 
				+	const int userspace = user_mode(regs);
			
 
				+	const unsigned long low_addr = ALIGN(frame.sp, THREAD_SIZE);
			
 
				+
			
 
				+	if (userspace)
			
 
				+		do_user_backtrace(low_addr, &frame, depth);
			
 
				+	else
			
 
				+		do_kernel_backtrace(low_addr, &frame, depth);
			
 
				+}
			
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -115,6 +115,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 
				 	ops->start		= op_mips_start;
			
 
				 	ops->stop		= op_mips_stop;
			
 
				 	ops->cpu_type		= lmodel->cpu_type;
			
 
				+	ops->backtrace		= op_mips_backtrace;
			
 
				 
			
 
				 	printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
			
 
				 	       lmodel->cpu_type);
			
--- a/arch/mips/oprofile/op_impl.h
+++ b/arch/mips/oprofile/op_impl.h
@@ -36,4 +36,6 @@ struct op_mips_model {
 
				 	unsigned char num_counters;
			
 
				 };
			
 
				 
			
 
				+void op_mips_backtrace(struct pt_regs * const regs, unsigned int depth);
			
 
				+
			
 
				 #endif
			
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -78,14 +78,14 @@ extern void ppc_warn_emulated_print(const char *type);
 
				 #define PPC_WARN_EMULATED(type, regs)					\
			
 
				 	do {								\
			
 
				 		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,		\
			
 
				-			1, 0, regs, 0);					\
			
 
				+			1, regs, 0);					\
			
 
				 		__PPC_WARN_EMULATED(type);				\
			
 
				 	} while (0)
			
 
				 
			
 
				 #define PPC_WARN_ALIGNMENT(type, regs)					\
			
 
				 	do {								\
			
 
				 		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,		\
			
 
				-			1, 0, regs, regs->dar);				\
			
 
				+			1, regs, regs->dar);				\
			
 
				 		__PPC_WARN_EMULATED(type);				\
			
 
				 	} while (0)
			
 
				 
			
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -57,7 +57,7 @@ void hw_breakpoint_pmu_read(struct perf_event *bp);
 
				 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
			
 
				 
			
 
				 extern struct pmu perf_ops_bp;
			
 
				-extern void ptrace_triggered(struct perf_event *bp, int nmi,
			
 
				+extern void ptrace_triggered(struct perf_event *bp,
			
 
				 			struct perf_sample_data *data, struct pt_regs *regs);
			
 
				 static inline void hw_breakpoint_disable(void)
			
 
				 {
			
--- a/arch/powerpc/kernel/e500-pmu.c
+++ b/arch/powerpc/kernel/e500-pmu.c
@@ -75,6 +75,11 @@ static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 
				 		[C(OP_WRITE)] = {	-1,		-1	},
			
 
				 		[C(OP_PREFETCH)] = {	-1,		-1	},
			
 
				 	},
			
 
				+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
			
 
				+		[C(OP_READ)] = {	-1,		-1 	},
			
 
				+		[C(OP_WRITE)] = {	-1,		-1	},
			
 
				+		[C(OP_PREFETCH)] = {	-1,		-1	},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 static int num_events = 128;
			
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -388,6 +388,11 @@ static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 
				 		[C(OP_WRITE)] = {	-1,		-1	},
			
 
				 		[C(OP_PREFETCH)] = {	-1,		-1	},
			
 
				 	},
			
 
				+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
			
 
				+		[C(OP_READ)] = {	-1,		-1	},
			
 
				+		[C(OP_WRITE)] = {	-1,		-1	},
			
 
				+		[C(OP_PREFETCH)] = {	-1,		-1	},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 struct power_pmu mpc7450_pmu = {
			
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1207,7 +1207,7 @@ struct pmu power_pmu = {
 
				  * here so there is no possibility of being interrupted.
			
 
				  */
			
 
				 static void record_and_restart(struct perf_event *event, unsigned long val,
			
 
				-			       struct pt_regs *regs, int nmi)
			
 
				+			       struct pt_regs *regs)
			
 
				 {
			
 
				 	u64 period = event->hw.sample_period;
			
 
				 	s64 prev, delta, left;
			
@@ -1258,7 +1258,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 
				 		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
			
 
				 			perf_get_data_addr(regs, &data.addr);
			
 
				 
			
 
				-		if (perf_event_overflow(event, nmi, &data, regs))
			
 
				+		if (perf_event_overflow(event, &data, regs))
			
 
				 			power_pmu_stop(event, 0);
			
 
				 	}
			
 
				 }
			
@@ -1346,7 +1346,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
 
				 		if ((int)val < 0) {
			
 
				 			/* event has overflowed */
			
 
				 			found = 1;
			
 
				-			record_and_restart(event, val, regs, nmi);
			
 
				+			record_and_restart(event, val, regs);
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -568,7 +568,7 @@ static struct pmu fsl_emb_pmu = {
 
				  * here so there is no possibility of being interrupted.
			
 
				  */
			
 
				 static void record_and_restart(struct perf_event *event, unsigned long val,
			
 
				-			       struct pt_regs *regs, int nmi)
			
 
				+			       struct pt_regs *regs)
			
 
				 {
			
 
				 	u64 period = event->hw.sample_period;
			
 
				 	s64 prev, delta, left;
			
@@ -616,7 +616,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 
				 		perf_sample_data_init(&data, 0);
			
 
				 		data.period = event->hw.last_period;
			
 
				 
			
 
				-		if (perf_event_overflow(event, nmi, &data, regs))
			
 
				+		if (perf_event_overflow(event, &data, regs))
			
 
				 			fsl_emb_pmu_stop(event, 0);
			
 
				 	}
			
 
				 }
			
@@ -644,7 +644,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
 
				 			if (event) {
			
 
				 				/* event has overflowed */
			
 
				 				found = 1;
			
 
				-				record_and_restart(event, val, regs, nmi);
			
 
				+				record_and_restart(event, val, regs);
			
 
				 			} else {
			
 
				 				/*
			
 
				 				 * Disabled counter is negative,
			
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -587,6 +587,11 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 
				 		[C(OP_WRITE)] = {	-1,		-1	},
			
 
				 		[C(OP_PREFETCH)] = {	-1,		-1	},
			
 
				 	},
			
 
				+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
			
 
				+		[C(OP_READ)] = {	-1,		-1	},
			
 
				+		[C(OP_WRITE)] = {	-1,		-1	},
			
 
				+		[C(OP_PREFETCH)] = {	-1,		-1	},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 static struct power_pmu power4_pmu = {
			
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -653,6 +653,11 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 
				 		[C(OP_WRITE)] = {	-1,		-1		},
			
 
				 		[C(OP_PREFETCH)] = {	-1,		-1		},
			
 
				 	},
			
 
				+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
			
 
				+		[C(OP_READ)] = {	-1,		-1		},
			
 
				+		[C(OP_WRITE)] = {	-1,		-1		},
			
 
				+		[C(OP_PREFETCH)] = {	-1,		-1		},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 static struct power_pmu power5p_pmu = {
			
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -595,6 +595,11 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 
				 		[C(OP_WRITE)] = {	-1,		-1		},
			
 
				 		[C(OP_PREFETCH)] = {	-1,		-1		},
			
 
				 	},
			
 
				+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
			
 
				+		[C(OP_READ)] = {	-1,		-1		},
			
 
				+		[C(OP_WRITE)] = {	-1,		-1		},
			
 
				+		[C(OP_PREFETCH)] = {	-1,		-1		},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 static struct power_pmu power5_pmu = {
			
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -516,6 +516,11 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 
				 		[C(OP_WRITE)] = {	-1,		-1		},
			
 
				 		[C(OP_PREFETCH)] = {	-1,		-1		},
			
 
				 	},
			
 
				+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
			
 
				+		[C(OP_READ)] = {	-1,		-1		},
			
 
				+		[C(OP_WRITE)] = {	-1,		-1		},
			
 
				+		[C(OP_PREFETCH)] = {	-1,		-1		},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 static struct power_pmu power6_pmu = {
			
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -342,6 +342,11 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 
				 		[C(OP_WRITE)] = {	-1,		-1	},
			
 
				 		[C(OP_PREFETCH)] = {	-1,		-1	},
			
 
				 	},
			
 
				+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
			
 
				+		[C(OP_READ)] = {	-1,		-1	},
			
 
				+		[C(OP_WRITE)] = {	-1,		-1	},
			
 
				+		[C(OP_PREFETCH)] = {	-1,		-1	},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 static struct power_pmu power7_pmu = {
			
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -467,6 +467,11 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 
				 		[C(OP_WRITE)] = {	-1,		-1	},
			
 
				 		[C(OP_PREFETCH)] = {	-1,		-1	},
			
 
				 	},
			
 
				+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
			
 
				+		[C(OP_READ)] = {	-1,		-1	},
			
 
				+		[C(OP_WRITE)] = {	-1,		-1	},
			
 
				+		[C(OP_PREFETCH)] = {	-1,		-1	},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 static struct power_pmu ppc970_pmu = {
			
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -882,7 +882,7 @@ void user_disable_single_step(struct task_struct *task)
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_HAVE_HW_BREAKPOINT
			
 
				-void ptrace_triggered(struct perf_event *bp, int nmi,
			
 
				+void ptrace_triggered(struct perf_event *bp,
			
 
				 		      struct perf_sample_data *data, struct pt_regs *regs)
			
 
				 {
			
 
				 	struct perf_event_attr attr;
			
@@ -973,7 +973,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 
				 								&attr.bp_type);
			
 
				 
			
 
				 	thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
			
 
				-							ptrace_triggered, task);
			
 
				+					       ptrace_triggered, NULL, task);
			
 
				 	if (IS_ERR(bp)) {
			
 
				 		thread->ptrace_bps[0] = NULL;
			
 
				 		ptrace_put_breakpoints(task);
			
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -544,7 +544,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);
 
				 
			
 
				 #endif /* 32 vs 64 bit */
			
 
				 
			
 
				-void set_irq_work_pending(void)
			
 
				+void arch_irq_work_raise(void)
			
 
				 {
			
 
				 	preempt_disable();
			
 
				 	set_irq_work_pending_flag();
			
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -174,7 +174,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 
				 		die("Weird page fault", regs, SIGSEGV);
			
 
				 	}
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
			
 
				+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
			
 
				 
			
 
				 	/* When running in the kernel we expect faults to occur only to
			
 
				 	 * addresses in user space.  All other faults represent errors in the
			
@@ -320,7 +320,7 @@ good_area:
 
				 	}
			
 
				 	if (ret & VM_FAULT_MAJOR) {
			
 
				 		current->maj_flt++;
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
			
 
				 				     regs, address);
			
 
				 #ifdef CONFIG_PPC_SMLPAR
			
 
				 		if (firmware_has_feature(FW_FEATURE_CMO)) {
			
@@ -331,7 +331,7 @@ good_area:
 
				 #endif
			
 
				 	} else {
			
 
				 		current->min_flt++;
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
			
 
				 				     regs, address);
			
 
				 	}
			
 
				 	up_read(&mm->mmap_sem);
			
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -299,7 +299,7 @@ static inline int do_exception(struct pt_regs *regs, int access,
 
				 		goto out;
			
 
				 
			
 
				 	address = trans_exc_code & __FAIL_ADDR_MASK;
			
 
				-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
			
 
				+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
			
 
				 	flags = FAULT_FLAG_ALLOW_RETRY;
			
 
				 	if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
			
 
				 		flags |= FAULT_FLAG_WRITE;
			
@@ -345,11 +345,11 @@ retry:
 
				 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
			
 
				 		if (fault & VM_FAULT_MAJOR) {
			
 
				 			tsk->maj_flt++;
			
 
				-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
			
 
				+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
			
 
				 				      regs, address);
			
 
				 		} else {
			
 
				 			tsk->min_flt++;
			
 
				-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
			
 
				+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
			
 
				 				      regs, address);
			
 
				 		}
			
 
				 		if (fault & VM_FAULT_RETRY) {
			
--- a/arch/sh/kernel/cpu/sh4/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4/perf_event.c
@@ -180,6 +180,21 @@ static const int sh7750_cache_events
 
				 			[ C(RESULT_MISS)   ] = -1,
			
 
				 		},
			
 
				 	},
			
 
				+
			
 
				+	[ C(NODE) ] = {
			
 
				+		[ C(OP_READ) ] = {
			
 
				+			[ C(RESULT_ACCESS) ] = -1,
			
 
				+			[ C(RESULT_MISS)   ] = -1,
			
 
				+		},
			
 
				+		[ C(OP_WRITE) ] = {
			
 
				+			[ C(RESULT_ACCESS) ] = -1,
			
 
				+			[ C(RESULT_MISS)   ] = -1,
			
 
				+		},
			
 
				+		[ C(OP_PREFETCH) ] = {
			
 
				+			[ C(RESULT_ACCESS) ] = -1,
			
 
				+			[ C(RESULT_MISS)   ] = -1,
			
 
				+		},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 static int sh7750_event_map(int event)
			
--- a/arch/sh/kernel/cpu/sh4a/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4a/perf_event.c
@@ -205,6 +205,21 @@ static const int sh4a_cache_events
 
				 			[ C(RESULT_MISS)   ] = -1,
			
 
				 		},
			
 
				 	},
			
 
				+
			
 
				+	[ C(NODE) ] = {
			
 
				+		[ C(OP_READ) ] = {
			
 
				+			[ C(RESULT_ACCESS) ] = -1,
			
 
				+			[ C(RESULT_MISS)   ] = -1,
			
 
				+		},
			
 
				+		[ C(OP_WRITE) ] = {
			
 
				+			[ C(RESULT_ACCESS) ] = -1,
			
 
				+			[ C(RESULT_MISS)   ] = -1,
			
 
				+		},
			
 
				+		[ C(OP_PREFETCH) ] = {
			
 
				+			[ C(RESULT_ACCESS) ] = -1,
			
 
				+			[ C(RESULT_MISS)   ] = -1,
			
 
				+		},
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 static int sh4a_event_map(int event)
			
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -63,7 +63,7 @@ static inline int put_stack_long(struct task_struct *task, int offset,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-void ptrace_triggered(struct perf_event *bp, int nmi,
			
 
				+void ptrace_triggered(struct perf_event *bp,
			
 
				 		      struct perf_sample_data *data, struct pt_regs *regs)
			
 
				 {
			
 
				 	struct perf_event_attr attr;
			
@@ -91,7 +91,8 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr)
 
				 		attr.bp_len = HW_BREAKPOINT_LEN_2;
			
 
				 		attr.bp_type = HW_BREAKPOINT_R;
			
 
				 
			
 
				-		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
			
 
				+		bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
			
 
				+						 NULL, tsk);
			
 
				 		if (IS_ERR(bp))
			
 
				 			return PTR_ERR(bp);
			
 
				 
			
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -393,7 +393,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 
				 	 */
			
 
				 	if (!expected) {
			
 
				 		unaligned_fixups_notify(current, instruction, regs);
			
 
				-		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0,
			
 
				+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1,
			
 
				 			      regs, address);
			
 
				 	}
			
 
				 
			
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -434,7 +434,7 @@ static int misaligned_load(struct pt_regs *regs,
 
				 		return error;
			
 
				 	}
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address);
			
 
				+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
			
 
				 
			
 
				 	destreg = (opcode >> 4) & 0x3f;
			
 
				 	if (user_mode(regs)) {
			
@@ -512,7 +512,7 @@ static int misaligned_store(struct pt_regs *regs,
 
				 		return error;
			
 
				 	}
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address);
			
 
				+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
			
 
				 
			
 
				 	srcreg = (opcode >> 4) & 0x3f;
			
 
				 	if (user_mode(regs)) {
			
@@ -588,7 +588,7 @@ static int misaligned_fpu_load(struct pt_regs *regs,
 
				 		return error;
			
 
				 	}
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address);
			
 
				+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address);
			
 
				 
			
 
				 	destreg = (opcode >> 4) & 0x3f;
			
 
				 	if (user_mode(regs)) {
			
@@ -665,7 +665,7 @@ static int misaligned_fpu_store(struct pt_regs *regs,
 
				 		return error;
			
 
				 	}
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address);
			
 
				+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address);
			
 
				 
			
 
				 	srcreg = (opcode >> 4) & 0x3f;
			
 
				 	if (user_mode(regs)) {
			
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -620,7 +620,7 @@ int do_fpu_inst(unsigned short inst, struct pt_regs *regs)
 
				 	struct task_struct *tsk = current;
			
 
				 	struct sh_fpu_soft_struct *fpu = &(tsk->thread.xstate->softfpu);
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
			
 
				+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
			
 
				 
			
 
				 	if (!(task_thread_info(tsk)->status & TS_USEDFPU)) {
			
 
				 		/* initialize once. */
			
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -160,7 +160,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 
				 	if ((regs->sr & SR_IMASK) != SR_IMASK)
			
 
				 		local_irq_enable();
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
			
 
				+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
			
 
				 
			
 
				 	/*
			
 
				 	 * If we're in an interrupt, have no user context or are running
			
@@ -210,11 +210,11 @@ good_area:
 
				 	}
			
 
				 	if (fault & VM_FAULT_MAJOR) {
			
 
				 		tsk->maj_flt++;
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
			
 
				 				     regs, address);
			
 
				 	} else {
			
 
				 		tsk->min_flt++;
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
			
 
				 				     regs, address);
			
 
				 	}
			
 
				 
			
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 
				 	/* Not an IO address, so reenable interrupts */
			
 
				 	local_irq_enable();
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
			
 
				+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
			
 
				 
			
 
				 	/*
			
 
				 	 * If we're in an interrupt or have no user
			
@@ -200,11 +200,11 @@ good_area:
 
				 
			
 
				 	if (fault & VM_FAULT_MAJOR) {
			
 
				 		tsk->maj_flt++;
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
			
 
				 				     regs, address);
			
 
				 	} else {
			
 
				 		tsk->min_flt++;
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
			
 
				 				     regs, address);
			
 
				 	}
			
 
				 
			
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -246,6 +246,20 @@ static const cache_map_t ultra3_cache_map = {
 
				 		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
			
 
				 	},
			
 
				 },
			
 
				+[C(NODE)] = {
			
 
				+	[C(OP_READ)] = {
			
 
				+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
			
 
				+		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
			
 
				+	},
			
 
				+	[ C(OP_WRITE) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
			
 
				+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
			
 
				+	},
			
 
				+	[ C(OP_PREFETCH) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
			
 
				+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
			
 
				+	},
			
 
				+},
			
 
				 };
			
 
				 
			
 
				 static const struct sparc_pmu ultra3_pmu = {
			
@@ -361,6 +375,20 @@ static const cache_map_t niagara1_cache_map = {
 
				 		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
			
 
				 	},
			
 
				 },
			
 
				+[C(NODE)] = {
			
 
				+	[C(OP_READ)] = {
			
 
				+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
			
 
				+		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
			
 
				+	},
			
 
				+	[ C(OP_WRITE) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
			
 
				+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
			
 
				+	},
			
 
				+	[ C(OP_PREFETCH) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
			
 
				+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
			
 
				+	},
			
 
				+},
			
 
				 };
			
 
				 
			
 
				 static const struct sparc_pmu niagara1_pmu = {
			
@@ -473,6 +501,20 @@ static const cache_map_t niagara2_cache_map = {
 
				 		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
			
 
				 	},
			
 
				 },
			
 
				+[C(NODE)] = {
			
 
				+	[C(OP_READ)] = {
			
 
				+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
			
 
				+		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
			
 
				+	},
			
 
				+	[ C(OP_WRITE) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
			
 
				+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
			
 
				+	},
			
 
				+	[ C(OP_PREFETCH) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
			
 
				+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
			
 
				+	},
			
 
				+},
			
 
				 };
			
 
				 
			
 
				 static const struct sparc_pmu niagara2_pmu = {
			
@@ -1277,7 +1319,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 
				 		if (!sparc_perf_event_set_period(event, hwc, idx))
			
 
				 			continue;
			
 
				 
			
 
				-		if (perf_event_overflow(event, 1, &data, regs))
			
 
				+		if (perf_event_overflow(event, &data, regs))
			
 
				 			sparc_pmu_stop(event, 0);
			
 
				 	}
			
 
				 
			
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -247,7 +247,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
 
				 		unsigned long addr = compute_effective_address(regs, insn);
			
 
				 		int err;
			
 
				 
			
 
				-		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr);
			
 
				+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
			
 
				 		switch (dir) {
			
 
				 		case load:
			
 
				 			err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
			
@@ -338,7 +338,7 @@ asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
 
				 		}
			
 
				 
			
 
				 		addr = compute_effective_address(regs, insn);
			
 
				-		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr);
			
 
				+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
			
 
				 		switch(dir) {
			
 
				 		case load:
			
 
				 			err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
			
--- a/arch/sparc/kernel/unaligned_64.c
+++ b/arch/sparc/kernel/unaligned_64.c
@@ -317,7 +317,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
 
				 
			
 
				 		addr = compute_effective_address(regs, insn,
			
 
				 						 ((insn >> 25) & 0x1f));
			
 
				-		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr);
			
 
				+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
			
 
				 		switch (asi) {
			
 
				 		case ASI_NL:
			
 
				 		case ASI_AIUPL:
			
@@ -384,7 +384,7 @@ int handle_popc(u32 insn, struct pt_regs *regs)
 
				 	int ret, i, rd = ((insn >> 25) & 0x1f);
			
 
				 	int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
			
 
				 	                        
			
 
				-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
			
 
				+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
			
 
				 	if (insn & 0x2000) {
			
 
				 		maybe_flush_windows(0, 0, rd, from_kernel);
			
 
				 		value = sign_extend_imm13(insn);
			
@@ -431,7 +431,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)
 
				 	int asi = decode_asi(insn, regs);
			
 
				 	int flag = (freg < 32) ? FPRS_DL : FPRS_DU;
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
			
 
				+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
			
 
				 
			
 
				 	save_and_clear_fpu();
			
 
				 	current_thread_info()->xfsr[0] &= ~0x1c000;
			
@@ -554,7 +554,7 @@ void handle_ld_nf(u32 insn, struct pt_regs *regs)
 
				 	int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
			
 
				 	unsigned long *reg;
			
 
				 	                        
			
 
				-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
			
 
				+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
			
 
				 
			
 
				 	maybe_flush_windows(0, 0, rd, from_kernel);
			
 
				 	reg = fetch_reg_addr(rd, regs);
			
@@ -586,7 +586,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
 
				 
			
 
				 	if (tstate & TSTATE_PRIV)
			
 
				 		die_if_kernel("lddfmna from kernel", regs);
			
 
				-	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar);
			
 
				+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);
			
 
				 	if (test_thread_flag(TIF_32BIT))
			
 
				 		pc = (u32)pc;
			
 
				 	if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
			
@@ -647,7 +647,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
 
				 
			
 
				 	if (tstate & TSTATE_PRIV)
			
 
				 		die_if_kernel("stdfmna from kernel", regs);
			
 
				-	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar);
			
 
				+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);
			
 
				 	if (test_thread_flag(TIF_32BIT))
			
 
				 		pc = (u32)pc;
			
 
				 	if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
			
--- a/arch/sparc/kernel/visemul.c
+++ b/arch/sparc/kernel/visemul.c
@@ -802,7 +802,7 @@ int vis_emul(struct pt_regs *regs, unsigned int insn)
 
				 
			
 
				 	BUG_ON(regs->tstate & TSTATE_PRIV);
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
			
 
				+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
			
 
				 
			
 
				 	if (test_thread_flag(TIF_32BIT))
			
 
				 		pc = (u32)pc;
			
--- a/arch/sparc/math-emu/math_32.c
+++ b/arch/sparc/math-emu/math_32.c
@@ -164,7 +164,7 @@ int do_mathemu(struct pt_regs *regs, struct task_struct *fpt)
 
				 	int retcode = 0;                               /* assume all succeed */
			
 
				 	unsigned long insn;
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
			
 
				+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
			
 
				 
			
 
				 #ifdef DEBUG_MATHEMU
			
 
				 	printk("In do_mathemu()... pc is %08lx\n", regs->pc);
			
--- a/arch/sparc/math-emu/math_64.c
+++ b/arch/sparc/math-emu/math_64.c
@@ -184,7 +184,7 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f)
 
				 
			
 
				 	if (tstate & TSTATE_PRIV)
			
 
				 		die_if_kernel("unfinished/unimplemented FPop from kernel", regs);
			
 
				-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
			
 
				+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
			
 
				 	if (test_thread_flag(TIF_32BIT))
			
 
				 		pc = (u32)pc;
			
 
				 	if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
			
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -251,7 +251,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
 
				         if (in_atomic() || !mm)
			
 
				                 goto no_context;
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
			
 
				+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
			
 
				 
			
 
				 	down_read(&mm->mmap_sem);
			
 
				 
			
@@ -301,12 +301,10 @@ good_area:
 
				 	}
			
 
				 	if (fault & VM_FAULT_MAJOR) {
			
 
				 		current->maj_flt++;
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
			
 
				-			      regs, address);
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
			
 
				 	} else {
			
 
				 		current->min_flt++;
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
			
 
				-			      regs, address);
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
			
 
				 	}
			
 
				 	up_read(&mm->mmap_sem);
			
 
				 	return;
			
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -325,7 +325,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
 
				 	if (in_atomic() || !mm)
			
 
				 		goto intr_or_no_mm;
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
			
 
				+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
			
 
				 
			
 
				 	if (!down_read_trylock(&mm->mmap_sem)) {
			
 
				 		if ((regs->tstate & TSTATE_PRIV) &&
			
@@ -433,12 +433,10 @@ good_area:
 
				 	}
			
 
				 	if (fault & VM_FAULT_MAJOR) {
			
 
				 		current->maj_flt++;
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
			
 
				-			      regs, address);
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
			
 
				 	} else {
			
 
				 		current->min_flt++;
			
 
				-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
			
 
				-			      regs, address);
			
 
				+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
			
 
				 	}
			
 
				 	up_read(&mm->mmap_sem);
			
 
				 
			
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -60,23 +60,24 @@ static inline void native_halt(void)
 
				 #include <asm/paravirt.h>
			
 
				 #else
			
 
				 #ifndef __ASSEMBLY__
			
 
				+#include <linux/types.h>
			
 
				 
			
 
				-static inline unsigned long arch_local_save_flags(void)
			
 
				+static inline notrace unsigned long arch_local_save_flags(void)
			
 
				 {
			
 
				 	return native_save_fl();
			
 
				 }
			
 
				 
			
 
				-static inline void arch_local_irq_restore(unsigned long flags)
			
 
				+static inline notrace void arch_local_irq_restore(unsigned long flags)
			
 
				 {
			
 
				 	native_restore_fl(flags);
			
 
				 }
			
 
				 
			
 
				-static inline void arch_local_irq_disable(void)
			
 
				+static inline notrace void arch_local_irq_disable(void)
			
 
				 {
			
 
				 	native_irq_disable();
			
 
				 }
			
 
				 
			
 
				-static inline void arch_local_irq_enable(void)
			
 
				+static inline notrace void arch_local_irq_enable(void)
			
 
				 {
			
 
				 	native_irq_enable();
			
 
				 }
			
@@ -102,7 +103,7 @@ static inline void halt(void)
 
				 /*
			
 
				  * For spinlocks, etc:
			
 
				  */
			
 
				-static inline unsigned long arch_local_irq_save(void)
			
 
				+static inline notrace unsigned long arch_local_irq_save(void)
			
 
				 {
			
 
				 	unsigned long flags = arch_local_save_flags();
			
 
				 	arch_local_irq_disable();
			
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -152,6 +152,11 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 
				 	(regs)->bp = caller_frame_pointer();			\
			
 
				 	(regs)->cs = __KERNEL_CS;				\
			
 
				 	regs->flags = 0;					\
			
 
				+	asm volatile(						\
			
 
				+		_ASM_MOV "%%"_ASM_SP ", %0\n"			\
			
 
				+		: "=m" ((regs)->sp)				\
			
 
				+		:: "memory"					\
			
 
				+	);							\
			
 
				 }
			
 
				 
			
 
				 #else
			
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -101,6 +101,14 @@
 
				 #define P4_CONFIG_HT_SHIFT		63
			
 
				 #define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)
			
 
				 
			
 
				+/*
			
 
				+ * If an event has alias it should be marked
			
 
				+ * with a special bit. (Don't forget to check
			
 
				+ * P4_PEBS_CONFIG_MASK and related bits on
			
 
				+ * modification.)
			
 
				+ */
			
 
				+#define P4_CONFIG_ALIASABLE		(1 << 9)
			
 
				+
			
 
				 /*
			
 
				  * The bits we allow to pass for RAW events
			
 
				  */
			
@@ -123,6 +131,31 @@
 
				 	(p4_config_pack_escr(P4_CONFIG_MASK_ESCR))	| \
			
 
				 	(p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
			
 
				 
			
 
				+/*
			
 
				+ * In case of event aliasing we need to preserve some
			
 
				+ * caller bits, otherwise the mapping won't be complete.
			
 
				+ */
			
 
				+#define P4_CONFIG_EVENT_ALIAS_MASK			  \
			
 
				+	(p4_config_pack_escr(P4_CONFIG_MASK_ESCR)	| \
			
 
				+	 p4_config_pack_cccr(P4_CCCR_EDGE		| \
			
 
				+			     P4_CCCR_THRESHOLD_MASK	| \
			
 
				+			     P4_CCCR_COMPLEMENT		| \
			
 
				+			     P4_CCCR_COMPARE))
			
 
				+
			
 
				+#define  P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS		  \
			
 
				+	((P4_CONFIG_HT)					| \
			
 
				+	 p4_config_pack_escr(P4_ESCR_T0_OS		| \
			
 
				+			     P4_ESCR_T0_USR		| \
			
 
				+			     P4_ESCR_T1_OS		| \
			
 
				+			     P4_ESCR_T1_USR)		| \
			
 
				+	 p4_config_pack_cccr(P4_CCCR_OVF		| \
			
 
				+			     P4_CCCR_CASCADE		| \
			
 
				+			     P4_CCCR_FORCE_OVF		| \
			
 
				+			     P4_CCCR_THREAD_ANY		| \
			
 
				+			     P4_CCCR_OVF_PMI_T0		| \
			
 
				+			     P4_CCCR_OVF_PMI_T1		| \
			
 
				+			     P4_CONFIG_ALIASABLE))
			
 
				+
			
 
				 static inline bool p4_is_event_cascaded(u64 config)
			
 
				 {
			
 
				 	u32 cccr = p4_config_unpack_cccr(config);
			
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -555,6 +555,9 @@ struct __large_struct { unsigned long buf[100]; };
 
				 
			
 
				 #endif /* CONFIG_X86_WP_WORKS_OK */
			
 
				 
			
 
				+extern unsigned long
			
 
				+copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
			
 
				+
			
 
				 /*
			
 
				  * movsl can be slow when source and dest are not both 8-byte aligned
			
 
				  */
			
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -22,7 +22,6 @@
 
				 #include <linux/sched.h>
			
 
				 #include <linux/uaccess.h>
			
 
				 #include <linux/slab.h>
			
 
				-#include <linux/highmem.h>
			
 
				 #include <linux/cpu.h>
			
 
				 #include <linux/bitops.h>
			
 
				 
			
@@ -45,38 +44,27 @@ do {								\
 
				 #endif
			
 
				 
			
 
				 /*
			
 
				- * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
			
 
				+ *          |   NHM/WSM    |      SNB     |
			
 
				+ * register -------------------------------
			
 
				+ *          |  HT  | no HT |  HT  | no HT |
			
 
				+ *-----------------------------------------
			
 
				+ * offcore  | core | core  | cpu  | core  |
			
 
				+ * lbr_sel  | core | core  | cpu  | core  |
			
 
				+ * ld_lat   | cpu  | core  | cpu  | core  |
			
 
				+ *-----------------------------------------
			
 
				+ *
			
 
				+ * Given that there is a small number of shared regs,
			
 
				+ * we can pre-allocate their slot in the per-cpu
			
 
				+ * per-core reg tables.
			
 
				  */
			
 
				-static unsigned long
			
 
				-copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
			
 
				-{
			
 
				-	unsigned long offset, addr = (unsigned long)from;
			
 
				-	unsigned long size, len = 0;
			
 
				-	struct page *page;
			
 
				-	void *map;
			
 
				-	int ret;
			
 
				-
			
 
				-	do {
			
 
				-		ret = __get_user_pages_fast(addr, 1, 0, &page);
			
 
				-		if (!ret)
			
 
				-			break;
			
 
				-
			
 
				-		offset = addr & (PAGE_SIZE - 1);
			
 
				-		size = min(PAGE_SIZE - offset, n - len);
			
 
				-
			
 
				-		map = kmap_atomic(page);
			
 
				-		memcpy(to, map+offset, size);
			
 
				-		kunmap_atomic(map);
			
 
				-		put_page(page);
			
 
				+enum extra_reg_type {
			
 
				+	EXTRA_REG_NONE  = -1,	/* not used */
			
 
				 
			
 
				-		len  += size;
			
 
				-		to   += size;
			
 
				-		addr += size;
			
 
				+	EXTRA_REG_RSP_0 = 0,	/* offcore_response_0 */
			
 
				+	EXTRA_REG_RSP_1 = 1,	/* offcore_response_1 */
			
 
				 
			
 
				-	} while (len < n);
			
 
				-
			
 
				-	return len;
			
 
				-}
			
 
				+	EXTRA_REG_MAX		/* number of entries needed */
			
 
				+};
			
 
				 
			
 
				 struct event_constraint {
			
 
				 	union {
			
@@ -132,11 +120,10 @@ struct cpu_hw_events {
 
				 	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
			
 
				 
			
 
				 	/*
			
 
				-	 * Intel percore register state.
			
 
				-	 * Coordinate shared resources between HT threads.
			
 
				+	 * manage shared (per-core, per-cpu) registers
			
 
				+	 * used on Intel NHM/WSM/SNB
			
 
				 	 */
			
 
				-	int				percore_used; /* Used by this CPU? */
			
 
				-	struct intel_percore		*per_core;
			
 
				+	struct intel_shared_regs	*shared_regs;
			
 
				 
			
 
				 	/*
			
 
				 	 * AMD specific bits
			
@@ -186,27 +173,46 @@ struct cpu_hw_events {
 
				 #define for_each_event_constraint(e, c)	\
			
 
				 	for ((e) = (c); (e)->weight; (e)++)
			
 
				 
			
 
				+/*
			
 
				+ * Per register state.
			
 
				+ */
			
 
				+struct er_account {
			
 
				+	raw_spinlock_t		lock;	/* per-core: protect structure */
			
 
				+	u64			config;	/* extra MSR config */
			
 
				+	u64			reg;	/* extra MSR number */
			
 
				+	atomic_t		ref;	/* reference count */
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * Extra registers for specific events.
			
 
				+ *
			
 
				  * Some events need large masks and require external MSRs.
			
 
				- * Define a mapping to these extra registers.
			
 
				+ * Those extra MSRs end up being shared for all events on
			
 
				+ * a PMU and sometimes between PMU of sibling HT threads.
			
 
				+ * In either case, the kernel needs to handle conflicting
			
 
				+ * accesses to those extra, shared, regs. The data structure
			
 
				+ * to manage those registers is stored in cpu_hw_event.
			
 
				  */
			
 
				 struct extra_reg {
			
 
				 	unsigned int		event;
			
 
				 	unsigned int		msr;
			
 
				 	u64			config_mask;
			
 
				 	u64			valid_mask;
			
 
				+	int			idx;  /* per_xxx->regs[] reg index */
			
 
				 };
			
 
				 
			
 
				-#define EVENT_EXTRA_REG(e, ms, m, vm) {	\
			
 
				+#define EVENT_EXTRA_REG(e, ms, m, vm, i) {	\
			
 
				 	.event = (e),		\
			
 
				 	.msr = (ms),		\
			
 
				 	.config_mask = (m),	\
			
 
				 	.valid_mask = (vm),	\
			
 
				+	.idx = EXTRA_REG_##i	\
			
 
				 	}
			
 
				-#define INTEL_EVENT_EXTRA_REG(event, msr, vm)	\
			
 
				-	EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
			
 
				-#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
			
 
				+
			
 
				+#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)	\
			
 
				+	EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
			
 
				+
			
 
				+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
			
 
				 
			
 
				 union perf_capabilities {
			
 
				 	struct {
			
@@ -252,7 +258,6 @@ struct x86_pmu {
 
				 	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
			
 
				 						 struct perf_event *event);
			
 
				 	struct event_constraint *event_constraints;
			
 
				-	struct event_constraint *percore_constraints;
			
 
				 	void		(*quirks)(void);
			
 
				 	int		perfctr_second_write;
			
 
				 
			
@@ -286,8 +291,12 @@ struct x86_pmu {
 
				 	 * Extra registers for events
			
 
				 	 */
			
 
				 	struct extra_reg *extra_regs;
			
 
				+	unsigned int er_flags;
			
 
				 };
			
 
				 
			
 
				+#define ERF_NO_HT_SHARING	1
			
 
				+#define ERF_HAS_RSP_1		2
			
 
				+
			
 
				 static struct x86_pmu x86_pmu __read_mostly;
			
 
				 
			
 
				 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
			
@@ -393,10 +402,10 @@ static inline unsigned int x86_pmu_event_addr(int index)
 
				  */
			
 
				 static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
			
 
				 {
			
 
				+	struct hw_perf_event_extra *reg;
			
 
				 	struct extra_reg *er;
			
 
				 
			
 
				-	event->hw.extra_reg = 0;
			
 
				-	event->hw.extra_config = 0;
			
 
				+	reg = &event->hw.extra_reg;
			
 
				 
			
 
				 	if (!x86_pmu.extra_regs)
			
 
				 		return 0;
			
@@ -406,8 +415,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
 
				 			continue;
			
 
				 		if (event->attr.config1 & ~er->valid_mask)
			
 
				 			return -EINVAL;
			
 
				-		event->hw.extra_reg = er->msr;
			
 
				-		event->hw.extra_config = event->attr.config1;
			
 
				+
			
 
				+		reg->idx = er->idx;
			
 
				+		reg->config = event->attr.config1;
			
 
				+		reg->reg = er->msr;
			
 
				 		break;
			
 
				 	}
			
 
				 	return 0;
			
@@ -706,6 +717,9 @@ static int __x86_pmu_event_init(struct perf_event *event)
 
				 	event->hw.last_cpu = -1;
			
 
				 	event->hw.last_tag = ~0ULL;
			
 
				 
			
 
				+	/* mark unused */
			
 
				+	event->hw.extra_reg.idx = EXTRA_REG_NONE;
			
 
				+
			
 
				 	return x86_pmu.hw_config(event);
			
 
				 }
			
 
				 
			
@@ -747,8 +761,8 @@ static void x86_pmu_disable(struct pmu *pmu)
 
				 static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
			
 
				 					  u64 enable_mask)
			
 
				 {
			
 
				-	if (hwc->extra_reg)
			
 
				-		wrmsrl(hwc->extra_reg, hwc->extra_config);
			
 
				+	if (hwc->extra_reg.reg)
			
 
				+		wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
			
 
				 	wrmsrl(hwc->config_base, hwc->config | enable_mask);
			
 
				 }
			
 
				 
			
@@ -1332,7 +1346,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 
				 		if (!x86_perf_event_set_period(event))
			
 
				 			continue;
			
 
				 
			
 
				-		if (perf_event_overflow(event, 1, &data, regs))
			
 
				+		if (perf_event_overflow(event, &data, regs))
			
 
				 			x86_pmu_stop(event, 0);
			
 
				 	}
			
 
				 
			
@@ -1637,6 +1651,40 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
 
				 	perf_pmu_enable(pmu);
			
 
				 	return 0;
			
 
				 }
			
 
				+/*
			
 
				+ * a fake_cpuc is used to validate event groups. Due to
			
 
				+ * the extra reg logic, we need to also allocate a fake
			
 
				+ * per_core and per_cpu structure. Otherwise, group events
			
 
				+ * using extra reg may conflict without the kernel being
			
 
				+ * able to catch this when the last event gets added to
			
 
				+ * the group.
			
 
				+ */
			
 
				+static void free_fake_cpuc(struct cpu_hw_events *cpuc)
			
 
				+{
			
 
				+	kfree(cpuc->shared_regs);
			
 
				+	kfree(cpuc);
			
 
				+}
			
 
				+
			
 
				+static struct cpu_hw_events *allocate_fake_cpuc(void)
			
 
				+{
			
 
				+	struct cpu_hw_events *cpuc;
			
 
				+	int cpu = raw_smp_processor_id();
			
 
				+
			
 
				+	cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
			
 
				+	if (!cpuc)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+
			
 
				+	/* only needed, if we have extra_regs */
			
 
				+	if (x86_pmu.extra_regs) {
			
 
				+		cpuc->shared_regs = allocate_shared_regs(cpu);
			
 
				+		if (!cpuc->shared_regs)
			
 
				+			goto error;
			
 
				+	}
			
 
				+	return cpuc;
			
 
				+error:
			
 
				+	free_fake_cpuc(cpuc);
			
 
				+	return ERR_PTR(-ENOMEM);
			
 
				+}
			
 
				 
			
 
				 /*
			
 
				  * validate that we can schedule this event
			
@@ -1647,9 +1695,9 @@ static int validate_event(struct perf_event *event)
 
				 	struct event_constraint *c;
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
			
 
				-	if (!fake_cpuc)
			
 
				-		return -ENOMEM;
			
 
				+	fake_cpuc = allocate_fake_cpuc();
			
 
				+	if (IS_ERR(fake_cpuc))
			
 
				+		return PTR_ERR(fake_cpuc);
			
 
				 
			
 
				 	c = x86_pmu.get_event_constraints(fake_cpuc, event);
			
 
				 
			
@@ -1659,7 +1707,7 @@ static int validate_event(struct perf_event *event)
 
				 	if (x86_pmu.put_event_constraints)
			
 
				 		x86_pmu.put_event_constraints(fake_cpuc, event);
			
 
				 
			
 
				-	kfree(fake_cpuc);
			
 
				+	free_fake_cpuc(fake_cpuc);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -1679,36 +1727,32 @@ static int validate_group(struct perf_event *event)
 
				 {
			
 
				 	struct perf_event *leader = event->group_leader;
			
 
				 	struct cpu_hw_events *fake_cpuc;
			
 
				-	int ret, n;
			
 
				-
			
 
				-	ret = -ENOMEM;
			
 
				-	fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
			
 
				-	if (!fake_cpuc)
			
 
				-		goto out;
			
 
				+	int ret = -ENOSPC, n;
			
 
				 
			
 
				+	fake_cpuc = allocate_fake_cpuc();
			
 
				+	if (IS_ERR(fake_cpuc))
			
 
				+		return PTR_ERR(fake_cpuc);
			
 
				 	/*
			
 
				 	 * the event is not yet connected with its
			
 
				 	 * siblings therefore we must first collect
			
 
				 	 * existing siblings, then add the new event
			
 
				 	 * before we can simulate the scheduling
			
 
				 	 */
			
 
				-	ret = -ENOSPC;
			
 
				 	n = collect_events(fake_cpuc, leader, true);
			
 
				 	if (n < 0)
			
 
				-		goto out_free;
			
 
				+		goto out;
			
 
				 
			
 
				 	fake_cpuc->n_events = n;
			
 
				 	n = collect_events(fake_cpuc, event, false);
			
 
				 	if (n < 0)
			
 
				-		goto out_free;
			
 
				+		goto out;
			
 
				 
			
 
				 	fake_cpuc->n_events = n;
			
 
				 
			
 
				 	ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
			
 
				 
			
 
				-out_free:
			
 
				-	kfree(fake_cpuc);
			
 
				 out:
			
 
				+	free_fake_cpuc(fake_cpuc);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -89,6 +89,20 @@ static __initconst const u64 amd_hw_cache_event_ids
 
				 		[ C(RESULT_MISS)   ] = -1,
			
 
				 	},
			
 
				  },
			
 
				+ [ C(NODE) ] = {
			
 
				+	[ C(OP_READ) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
			
 
				+		[ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */
			
 
				+	},
			
 
				+	[ C(OP_WRITE) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = -1,
			
 
				+		[ C(RESULT_MISS)   ] = -1,
			
 
				+	},
			
 
				+	[ C(OP_PREFETCH) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = -1,
			
 
				+		[ C(RESULT_MISS)   ] = -1,
			
 
				+	},
			
 
				+ },
			
 
				 };
			
 
				 
			
 
				 /*
			
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,25 +1,15 @@
 
				 #ifdef CONFIG_CPU_SUP_INTEL
			
 
				 
			
 
				-#define MAX_EXTRA_REGS 2
			
 
				-
			
 
				-/*
			
 
				- * Per register state.
			
 
				- */
			
 
				-struct er_account {
			
 
				-	int			ref;		/* reference count */
			
 
				-	unsigned int		extra_reg;	/* extra MSR number */
			
 
				-	u64			extra_config;	/* extra MSR config */
			
 
				-};
			
 
				-
			
 
				 /*
			
 
				- * Per core state
			
 
				- * This used to coordinate shared registers for HT threads.
			
 
				+ * Per core/cpu state
			
 
				+ *
			
 
				+ * Used to coordinate shared registers between HT threads or
			
 
				+ * among events on a single PMU.
			
 
				  */
			
 
				-struct intel_percore {
			
 
				-	raw_spinlock_t		lock;		/* protect structure */
			
 
				-	struct er_account	regs[MAX_EXTRA_REGS];
			
 
				-	int			refcnt;		/* number of threads */
			
 
				-	unsigned		core_id;
			
 
				+struct intel_shared_regs {
			
 
				+	struct er_account       regs[EXTRA_REG_MAX];
			
 
				+	int                     refcnt;		/* per-core: #HT threads */
			
 
				+	unsigned                core_id;	/* per-core: core id */
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -88,16 +78,10 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 
				 
			
 
				 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
			
 
				 {
			
 
				-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
			
 
				+	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
			
 
				 	EVENT_EXTRA_END
			
 
				 };
			
 
				 
			
 
				-static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
			
 
				-{
			
 
				-	INTEL_EVENT_CONSTRAINT(0xb7, 0),
			
 
				-	EVENT_CONSTRAINT_END
			
 
				-};
			
 
				-
			
 
				 static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
			
 
				 {
			
 
				 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
			
@@ -116,8 +100,6 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 
				 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
			
 
				 	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
			
 
				 	INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
			
 
				-	INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
			
 
				-	INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
			
 
				 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
			
 
				 	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
			
 
				 	EVENT_CONSTRAINT_END
			
@@ -125,15 +107,13 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 
				 
			
 
				 static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
			
 
				 {
			
 
				-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
			
 
				-	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
			
 
				+	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
			
 
				+	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
			
 
				 	EVENT_EXTRA_END
			
 
				 };
			
 
				 
			
 
				-static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =
			
 
				+static struct event_constraint intel_v1_event_constraints[] __read_mostly =
			
 
				 {
			
 
				-	INTEL_EVENT_CONSTRAINT(0xb7, 0),
			
 
				-	INTEL_EVENT_CONSTRAINT(0xbb, 0),
			
 
				 	EVENT_CONSTRAINT_END
			
 
				 };
			
 
				 
			
@@ -145,6 +125,12 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 
				 	EVENT_CONSTRAINT_END
			
 
				 };
			
 
				 
			
 
				+static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
			
 
				+	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
			
 
				+	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
			
 
				+	EVENT_EXTRA_END
			
 
				+};
			
 
				+
			
 
				 static u64 intel_pmu_event_map(int hw_event)
			
 
				 {
			
 
				 	return intel_perfmon_event_map[hw_event];
			
@@ -245,6 +231,21 @@ static __initconst const u64 snb_hw_cache_event_ids
 
				 		[ C(RESULT_MISS)   ] = -1,
			
 
				 	},
			
 
				  },
			
 
				+ [ C(NODE) ] = {
			
 
				+	[ C(OP_READ) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = -1,
			
 
				+		[ C(RESULT_MISS)   ] = -1,
			
 
				+	},
			
 
				+	[ C(OP_WRITE) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = -1,
			
 
				+		[ C(RESULT_MISS)   ] = -1,
			
 
				+	},
			
 
				+	[ C(OP_PREFETCH) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = -1,
			
 
				+		[ C(RESULT_MISS)   ] = -1,
			
 
				+	},
			
 
				+ },
			
 
				+
			
 
				 };
			
 
				 
			
 
				 static __initconst const u64 westmere_hw_cache_event_ids
			
@@ -346,6 +347,20 @@ static __initconst const u64 westmere_hw_cache_event_ids
 
				 		[ C(RESULT_MISS)   ] = -1,
			
 
				 	},
			
 
				  },
			
 
				+ [ C(NODE) ] = {
			
 
				+	[ C(OP_READ) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = 0x01b7,
			
 
				+		[ C(RESULT_MISS)   ] = 0x01b7,
			
 
				+	},
			
 
				+	[ C(OP_WRITE) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = 0x01b7,
			
 
				+		[ C(RESULT_MISS)   ] = 0x01b7,
			
 
				+	},
			
 
				+	[ C(OP_PREFETCH) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = 0x01b7,
			
 
				+		[ C(RESULT_MISS)   ] = 0x01b7,
			
 
				+	},
			
 
				+ },
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -398,7 +413,21 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
 
				 		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
			
 
				 		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
			
 
				 	},
			
 
				- }
			
 
				+ },
			
 
				+ [ C(NODE) ] = {
			
 
				+	[ C(OP_READ) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
			
 
				+		[ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
			
 
				+	},
			
 
				+	[ C(OP_WRITE) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
			
 
				+		[ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
			
 
				+	},
			
 
				+	[ C(OP_PREFETCH) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
			
 
				+		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
			
 
				+	},
			
 
				+ },
			
 
				 };
			
 
				 
			
 
				 static __initconst const u64 nehalem_hw_cache_event_ids
			
@@ -500,6 +529,20 @@ static __initconst const u64 nehalem_hw_cache_event_ids
 
				 		[ C(RESULT_MISS)   ] = -1,
			
 
				 	},
			
 
				  },
			
 
				+ [ C(NODE) ] = {
			
 
				+	[ C(OP_READ) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = 0x01b7,
			
 
				+		[ C(RESULT_MISS)   ] = 0x01b7,
			
 
				+	},
			
 
				+	[ C(OP_WRITE) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = 0x01b7,
			
 
				+		[ C(RESULT_MISS)   ] = 0x01b7,
			
 
				+	},
			
 
				+	[ C(OP_PREFETCH) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = 0x01b7,
			
 
				+		[ C(RESULT_MISS)   ] = 0x01b7,
			
 
				+	},
			
 
				+ },
			
 
				 };
			
 
				 
			
 
				 static __initconst const u64 core2_hw_cache_event_ids
			
@@ -1003,7 +1046,7 @@ again:
 
				 
			
 
				 		data.period = event->hw.last_period;
			
 
				 
			
 
				-		if (perf_event_overflow(event, 1, &data, regs))
			
 
				+		if (perf_event_overflow(event, &data, regs))
			
 
				 			x86_pmu_stop(event, 0);
			
 
				 	}
			
 
				 
			
@@ -1037,65 +1080,121 @@ intel_bts_constraints(struct perf_event *event)
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				+static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
			
 
				+{
			
 
				+	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
			
 
				+		return false;
			
 
				+
			
 
				+	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
			
 
				+		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
			
 
				+		event->hw.config |= 0x01bb;
			
 
				+		event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
			
 
				+		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
			
 
				+	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
			
 
				+		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
			
 
				+		event->hw.config |= 0x01b7;
			
 
				+		event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
			
 
				+		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
			
 
				+	}
			
 
				+
			
 
				+	if (event->hw.extra_reg.idx == orig_idx)
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * manage allocation of shared extra msr for certain events
			
 
				+ *
			
 
				+ * sharing can be:
			
 
				+ * per-cpu: to be shared between the various events on a single PMU
			
 
				+ * per-core: per-cpu + shared by HT threads
			
 
				+ */
			
 
				 static struct event_constraint *
			
 
				-intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
			
 
				+__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
			
 
				+				   struct perf_event *event)
			
 
				 {
			
 
				-	struct hw_perf_event *hwc = &event->hw;
			
 
				-	unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
			
 
				-	struct event_constraint *c;
			
 
				-	struct intel_percore *pc;
			
 
				+	struct event_constraint *c = &emptyconstraint;
			
 
				+	struct hw_perf_event_extra *reg = &event->hw.extra_reg;
			
 
				 	struct er_account *era;
			
 
				-	int i;
			
 
				-	int free_slot;
			
 
				-	int found;
			
 
				+	unsigned long flags;
			
 
				+	int orig_idx = reg->idx;
			
 
				 
			
 
				-	if (!x86_pmu.percore_constraints || hwc->extra_alloc)
			
 
				-		return NULL;
			
 
				+	/* already allocated shared msr */
			
 
				+	if (reg->alloc)
			
 
				+		return &unconstrained;
			
 
				 
			
 
				-	for (c = x86_pmu.percore_constraints; c->cmask; c++) {
			
 
				-		if (e != c->code)
			
 
				-			continue;
			
 
				+again:
			
 
				+	era = &cpuc->shared_regs->regs[reg->idx];
			
 
				+	/*
			
 
				+	 * we use spin_lock_irqsave() to avoid lockdep issues when
			
 
				+	 * passing a fake cpuc
			
 
				+	 */
			
 
				+	raw_spin_lock_irqsave(&era->lock, flags);
			
 
				+
			
 
				+	if (!atomic_read(&era->ref) || era->config == reg->config) {
			
 
				+
			
 
				+		/* lock in msr value */
			
 
				+		era->config = reg->config;
			
 
				+		era->reg = reg->reg;
			
 
				+
			
 
				+		/* one more user */
			
 
				+		atomic_inc(&era->ref);
			
 
				+
			
 
				+		/* no need to reallocate during incremental event scheduling */
			
 
				+		reg->alloc = 1;
			
 
				 
			
 
				 		/*
			
 
				-		 * Allocate resource per core.
			
 
				+		 * All events using extra_reg are unconstrained.
			
 
				+		 * Avoids calling x86_get_event_constraints()
			
 
				+		 *
			
 
				+		 * Must revisit if extra_reg controlling events
			
 
				+		 * ever have constraints. Worst case we go through
			
 
				+		 * the regular event constraint table.
			
 
				 		 */
			
 
				-		pc = cpuc->per_core;
			
 
				-		if (!pc)
			
 
				-			break;
			
 
				-		c = &emptyconstraint;
			
 
				-		raw_spin_lock(&pc->lock);
			
 
				-		free_slot = -1;
			
 
				-		found = 0;
			
 
				-		for (i = 0; i < MAX_EXTRA_REGS; i++) {
			
 
				-			era = &pc->regs[i];
			
 
				-			if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
			
 
				-				/* Allow sharing same config */
			
 
				-				if (hwc->extra_config == era->extra_config) {
			
 
				-					era->ref++;
			
 
				-					cpuc->percore_used = 1;
			
 
				-					hwc->extra_alloc = 1;
			
 
				-					c = NULL;
			
 
				-				}
			
 
				-				/* else conflict */
			
 
				-				found = 1;
			
 
				-				break;
			
 
				-			} else if (era->ref == 0 && free_slot == -1)
			
 
				-				free_slot = i;
			
 
				-		}
			
 
				-		if (!found && free_slot != -1) {
			
 
				-			era = &pc->regs[free_slot];
			
 
				-			era->ref = 1;
			
 
				-			era->extra_reg = hwc->extra_reg;
			
 
				-			era->extra_config = hwc->extra_config;
			
 
				-			cpuc->percore_used = 1;
			
 
				-			hwc->extra_alloc = 1;
			
 
				-			c = NULL;
			
 
				-		}
			
 
				-		raw_spin_unlock(&pc->lock);
			
 
				-		return c;
			
 
				+		c = &unconstrained;
			
 
				+	} else if (intel_try_alt_er(event, orig_idx)) {
			
 
				+		raw_spin_unlock(&era->lock);
			
 
				+		goto again;
			
 
				 	}
			
 
				+	raw_spin_unlock_irqrestore(&era->lock, flags);
			
 
				 
			
 
				-	return NULL;
			
 
				+	return c;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
			
 
				+				   struct hw_perf_event_extra *reg)
			
 
				+{
			
 
				+	struct er_account *era;
			
 
				+
			
 
				+	/*
			
 
				+	 * only put constraint if extra reg was actually
			
 
				+	 * allocated. Also takes care of event which do
			
 
				+	 * not use an extra shared reg
			
 
				+	 */
			
 
				+	if (!reg->alloc)
			
 
				+		return;
			
 
				+
			
 
				+	era = &cpuc->shared_regs->regs[reg->idx];
			
 
				+
			
 
				+	/* one fewer user */
			
 
				+	atomic_dec(&era->ref);
			
 
				+
			
 
				+	/* allocate again next time */
			
 
				+	reg->alloc = 0;
			
 
				+}
			
 
				+
			
 
				+static struct event_constraint *
			
 
				+intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
			
 
				+			      struct perf_event *event)
			
 
				+{
			
 
				+	struct event_constraint *c = NULL;
			
 
				+
			
 
				+	if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
			
 
				+		c = __intel_shared_reg_get_constraints(cpuc, event);
			
 
				+
			
 
				+	return c;
			
 
				 }
			
 
				 
			
 
				 static struct event_constraint *
			
@@ -1111,49 +1210,28 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
 
				 	if (c)
			
 
				 		return c;
			
 
				 
			
 
				-	c = intel_percore_constraints(cpuc, event);
			
 
				+	c = intel_shared_regs_constraints(cpuc, event);
			
 
				 	if (c)
			
 
				 		return c;
			
 
				 
			
 
				 	return x86_get_event_constraints(cpuc, event);
			
 
				 }
			
 
				 
			
 
				-static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
			
 
				+static void
			
 
				+intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
			
 
				 					struct perf_event *event)
			
 
				 {
			
 
				-	struct extra_reg *er;
			
 
				-	struct intel_percore *pc;
			
 
				-	struct er_account *era;
			
 
				-	struct hw_perf_event *hwc = &event->hw;
			
 
				-	int i, allref;
			
 
				+	struct hw_perf_event_extra *reg;
			
 
				 
			
 
				-	if (!cpuc->percore_used)
			
 
				-		return;
			
 
				-
			
 
				-	for (er = x86_pmu.extra_regs; er->msr; er++) {
			
 
				-		if (er->event != (hwc->config & er->config_mask))
			
 
				-			continue;
			
 
				+	reg = &event->hw.extra_reg;
			
 
				+	if (reg->idx != EXTRA_REG_NONE)
			
 
				+		__intel_shared_reg_put_constraints(cpuc, reg);
			
 
				+}
			
 
				 
			
 
				-		pc = cpuc->per_core;
			
 
				-		raw_spin_lock(&pc->lock);
			
 
				-		for (i = 0; i < MAX_EXTRA_REGS; i++) {
			
 
				-			era = &pc->regs[i];
			
 
				-			if (era->ref > 0 &&
			
 
				-			    era->extra_config == hwc->extra_config &&
			
 
				-			    era->extra_reg == er->msr) {
			
 
				-				era->ref--;
			
 
				-				hwc->extra_alloc = 0;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		allref = 0;
			
 
				-		for (i = 0; i < MAX_EXTRA_REGS; i++)
			
 
				-			allref += pc->regs[i].ref;
			
 
				-		if (allref == 0)
			
 
				-			cpuc->percore_used = 0;
			
 
				-		raw_spin_unlock(&pc->lock);
			
 
				-		break;
			
 
				-	}
			
 
				+static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
			
 
				+					struct perf_event *event)
			
 
				+{
			
 
				+	intel_put_shared_regs_event_constraints(cpuc, event);
			
 
				 }
			
 
				 
			
 
				 static int intel_pmu_hw_config(struct perf_event *event)
			
@@ -1231,20 +1309,36 @@ static __initconst const struct x86_pmu core_pmu = {
 
				 	.event_constraints	= intel_core_event_constraints,
			
 
				 };
			
 
				 
			
 
				+static struct intel_shared_regs *allocate_shared_regs(int cpu)
			
 
				+{
			
 
				+	struct intel_shared_regs *regs;
			
 
				+	int i;
			
 
				+
			
 
				+	regs = kzalloc_node(sizeof(struct intel_shared_regs),
			
 
				+			    GFP_KERNEL, cpu_to_node(cpu));
			
 
				+	if (regs) {
			
 
				+		/*
			
 
				+		 * initialize the locks to keep lockdep happy
			
 
				+		 */
			
 
				+		for (i = 0; i < EXTRA_REG_MAX; i++)
			
 
				+			raw_spin_lock_init(&regs->regs[i].lock);
			
 
				+
			
 
				+		regs->core_id = -1;
			
 
				+	}
			
 
				+	return regs;
			
 
				+}
			
 
				+
			
 
				 static int intel_pmu_cpu_prepare(int cpu)
			
 
				 {
			
 
				 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
			
 
				 
			
 
				-	if (!cpu_has_ht_siblings())
			
 
				+	if (!x86_pmu.extra_regs)
			
 
				 		return NOTIFY_OK;
			
 
				 
			
 
				-	cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
			
 
				-				      GFP_KERNEL, cpu_to_node(cpu));
			
 
				-	if (!cpuc->per_core)
			
 
				+	cpuc->shared_regs = allocate_shared_regs(cpu);
			
 
				+	if (!cpuc->shared_regs)
			
 
				 		return NOTIFY_BAD;
			
 
				 
			
 
				-	raw_spin_lock_init(&cpuc->per_core->lock);
			
 
				-	cpuc->per_core->core_id = -1;
			
 
				 	return NOTIFY_OK;
			
 
				 }
			
 
				 
			
@@ -1260,32 +1354,34 @@ static void intel_pmu_cpu_starting(int cpu)
 
				 	 */
			
 
				 	intel_pmu_lbr_reset();
			
 
				 
			
 
				-	if (!cpu_has_ht_siblings())
			
 
				+	if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
			
 
				 		return;
			
 
				 
			
 
				 	for_each_cpu(i, topology_thread_cpumask(cpu)) {
			
 
				-		struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
			
 
				+		struct intel_shared_regs *pc;
			
 
				 
			
 
				+		pc = per_cpu(cpu_hw_events, i).shared_regs;
			
 
				 		if (pc && pc->core_id == core_id) {
			
 
				-			kfree(cpuc->per_core);
			
 
				-			cpuc->per_core = pc;
			
 
				+			kfree(cpuc->shared_regs);
			
 
				+			cpuc->shared_regs = pc;
			
 
				 			break;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	cpuc->per_core->core_id = core_id;
			
 
				-	cpuc->per_core->refcnt++;
			
 
				+	cpuc->shared_regs->core_id = core_id;
			
 
				+	cpuc->shared_regs->refcnt++;
			
 
				 }
			
 
				 
			
 
				 static void intel_pmu_cpu_dying(int cpu)
			
 
				 {
			
 
				 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
			
 
				-	struct intel_percore *pc = cpuc->per_core;
			
 
				+	struct intel_shared_regs *pc;
			
 
				 
			
 
				+	pc = cpuc->shared_regs;
			
 
				 	if (pc) {
			
 
				 		if (pc->core_id == -1 || --pc->refcnt == 0)
			
 
				 			kfree(pc);
			
 
				-		cpuc->per_core = NULL;
			
 
				+		cpuc->shared_regs = NULL;
			
 
				 	}
			
 
				 
			
 
				 	fini_debug_store_on_cpu(cpu);
			
@@ -1436,7 +1532,6 @@ static __init int intel_pmu_init(void)
 
				 
			
 
				 		x86_pmu.event_constraints = intel_nehalem_event_constraints;
			
 
				 		x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
			
 
				-		x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
			
 
				 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
			
 
				 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
			
 
				 
			
@@ -1481,10 +1576,10 @@ static __init int intel_pmu_init(void)
 
				 		intel_pmu_lbr_init_nhm();
			
 
				 
			
 
				 		x86_pmu.event_constraints = intel_westmere_event_constraints;
			
 
				-		x86_pmu.percore_constraints = intel_westmere_percore_constraints;
			
 
				 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
			
 
				 		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
			
 
				 		x86_pmu.extra_regs = intel_westmere_extra_regs;
			
 
				+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
			
 
				 
			
 
				 		/* UOPS_ISSUED.STALLED_CYCLES */
			
 
				 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
			
@@ -1502,6 +1597,10 @@ static __init int intel_pmu_init(void)
 
				 
			
 
				 		x86_pmu.event_constraints = intel_snb_event_constraints;
			
 
				 		x86_pmu.pebs_constraints = intel_snb_pebs_events;
			
 
				+		x86_pmu.extra_regs = intel_snb_extra_regs;
			
 
				+		/* all extra regs are per-cpu when HT is on */
			
 
				+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
			
 
				+		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
			
 
				 
			
 
				 		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
			
 
				 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
			
@@ -1512,11 +1611,19 @@ static __init int intel_pmu_init(void)
 
				 		break;
			
 
				 
			
 
				 	default:
			
 
				-		/*
			
 
				-		 * default constraints for v2 and up
			
 
				-		 */
			
 
				-		x86_pmu.event_constraints = intel_gen_event_constraints;
			
 
				-		pr_cont("generic architected perfmon, ");
			
 
				+		switch (x86_pmu.version) {
			
 
				+		case 1:
			
 
				+			x86_pmu.event_constraints = intel_v1_event_constraints;
			
 
				+			pr_cont("generic architected perfmon v1, ");
			
 
				+			break;
			
 
				+		default:
			
 
				+			/*
			
 
				+			 * default constraints for v2 and up
			
 
				+			 */
			
 
				+			x86_pmu.event_constraints = intel_gen_event_constraints;
			
 
				+			pr_cont("generic architected perfmon, ");
			
 
				+			break;
			
 
				+		}
			
 
				 	}
			
 
				 	return 0;
			
 
				 }
			
@@ -1528,4 +1635,8 @@ static int intel_pmu_init(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static struct intel_shared_regs *allocate_shared_regs(int cpu)
			
 
				+{
			
 
				+	return NULL;
			
 
				+}
			
 
				 #endif /* CONFIG_CPU_SUP_INTEL */
			
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -340,7 +340,7 @@ static int intel_pmu_drain_bts_buffer(void)
 
				 	 */
			
 
				 	perf_prepare_sample(&header, &data, event, &regs);
			
 
				 
			
 
				-	if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
			
 
				+	if (perf_output_begin(&handle, event, header.size * (top - at)))
			
 
				 		return 1;
			
 
				 
			
 
				 	for (; at < top; at++) {
			
@@ -616,7 +616,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 
				 	else
			
 
				 		regs.flags &= ~PERF_EFLAGS_EXACT;
			
 
				 
			
 
				-	if (perf_event_overflow(event, 1, &data, &regs))
			
 
				+	if (perf_event_overflow(event, &data, &regs))
			
 
				 		x86_pmu_stop(event, 0);
			
 
				 }
			
 
				 
			
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -554,13 +554,102 @@ static __initconst const u64 p4_hw_cache_event_ids
 
				 		[ C(RESULT_MISS)   ] = -1,
			
 
				 	},
			
 
				  },
			
 
				+ [ C(NODE) ] = {
			
 
				+	[ C(OP_READ) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = -1,
			
 
				+		[ C(RESULT_MISS)   ] = -1,
			
 
				+	},
			
 
				+	[ C(OP_WRITE) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = -1,
			
 
				+		[ C(RESULT_MISS)   ] = -1,
			
 
				+	},
			
 
				+	[ C(OP_PREFETCH) ] = {
			
 
				+		[ C(RESULT_ACCESS) ] = -1,
			
 
				+		[ C(RESULT_MISS)   ] = -1,
			
 
				+	},
			
 
				+ },
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * Because of Netburst being quite restricted in how many
			
 
				+ * identical events may run simultaneously, we introduce event aliases,
			
 
				+ * ie the different events which have the same functionality but
			
 
				+ * utilize non-intersected resources (ESCR/CCCR/counter registers).
			
 
				+ *
			
 
				+ * This allow us to relax restrictions a bit and run two or more
			
 
				+ * identical events together.
			
 
				+ *
			
 
				+ * Never set any custom internal bits such as P4_CONFIG_HT,
			
 
				+ * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
			
 
				+ * either up to date automatically or not applicable at all.
			
 
				+ */
			
 
				+struct p4_event_alias {
			
 
				+	u64 original;
			
 
				+	u64 alternative;
			
 
				+} p4_event_aliases[] = {
			
 
				+	{
			
 
				+		/*
			
 
				+		 * Non-halted cycles can be substituted with non-sleeping cycles (see
			
 
				+		 * Intel SDM Vol3b for details). We need this alias to be able
			
 
				+		 * to run nmi-watchdog and 'perf top' (or any other user space tool
			
 
				+		 * which is interested in running PERF_COUNT_HW_CPU_CYCLES)
			
 
				+		 * simultaneously.
			
 
				+		 */
			
 
				+	.original	=
			
 
				+		p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)		|
			
 
				+				    P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
			
 
				+	.alternative	=
			
 
				+		p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT)		|
			
 
				+				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
			
 
				+				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
			
 
				+				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
			
 
				+				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
			
 
				+				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0)	|
			
 
				+				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1)	|
			
 
				+				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2)	|
			
 
				+				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
			
 
				+		p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT		|
			
 
				+				    P4_CCCR_COMPARE),
			
 
				+	},
			
 
				+};
			
 
				+
			
 
				+static u64 p4_get_alias_event(u64 config)
			
 
				+{
			
 
				+	u64 config_match;
			
 
				+	int i;
			
 
				+
			
 
				+	/*
			
 
				+	 * Only event with special mark is allowed,
			
 
				+	 * we're to be sure it didn't come as malformed
			
 
				+	 * RAW event.
			
 
				+	 */
			
 
				+	if (!(config & P4_CONFIG_ALIASABLE))
			
 
				+		return 0;
			
 
				+
			
 
				+	config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
			
 
				+		if (config_match == p4_event_aliases[i].original) {
			
 
				+			config_match = p4_event_aliases[i].alternative;
			
 
				+			break;
			
 
				+		} else if (config_match == p4_event_aliases[i].alternative) {
			
 
				+			config_match = p4_event_aliases[i].original;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (i >= ARRAY_SIZE(p4_event_aliases))
			
 
				+		return 0;
			
 
				+
			
 
				+	return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
			
 
				+}
			
 
				+
			
 
				 static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
			
 
				   /* non-halted CPU clocks */
			
 
				   [PERF_COUNT_HW_CPU_CYCLES] =
			
 
				 	p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)		|
			
 
				-		P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
			
 
				+		P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING))	|
			
 
				+		P4_CONFIG_ALIASABLE,
			
 
				 
			
 
				   /*
			
 
				    * retired instructions
			
@@ -945,7 +1034,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 
				 
			
 
				 		if (!x86_perf_event_set_period(event))
			
 
				 			continue;
			
 
				-		if (perf_event_overflow(event, 1, &data, regs))
			
 
				+		if (perf_event_overflow(event, &data, regs))
			
 
				 			x86_pmu_stop(event, 0);
			
 
				 	}
			
 
				 
			
@@ -1120,6 +1209,8 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
 
				 	struct p4_event_bind *bind;
			
 
				 	unsigned int i, thread, num;
			
 
				 	int cntr_idx, escr_idx;
			
 
				+	u64 config_alias;
			
 
				+	int pass;
			
 
				 
			
 
				 	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
			
 
				 	bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
			
@@ -1128,6 +1219,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
 
				 
			
 
				 		hwc = &cpuc->event_list[i]->hw;
			
 
				 		thread = p4_ht_thread(cpu);
			
 
				+		pass = 0;
			
 
				+
			
 
				+again:
			
 
				+		/*
			
 
				+		 * It's possible to hit a circular lock
			
 
				+		 * between original and alternative events
			
 
				+		 * if both are scheduled already.
			
 
				+		 */
			
 
				+		if (pass > 2)
			
 
				+			goto done;
			
 
				+
			
 
				 		bind = p4_config_get_bind(hwc->config);
			
 
				 		escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
			
 
				 		if (unlikely(escr_idx == -1))
			
@@ -1141,8 +1243,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
 
				 		}
			
 
				 
			
 
				 		cntr_idx = p4_next_cntr(thread, used_mask, bind);
			
 
				-		if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
			
 
				-			goto done;
			
 
				+		if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
			
 
				+			/*
			
 
				+			 * Check whether an event alias is still available.
			
 
				+			 */
			
 
				+			config_alias = p4_get_alias_event(hwc->config);
			
 
				+			if (!config_alias)
			
 
				+				goto done;
			
 
				+			hwc->config = config_alias;
			
 
				+			pass++;
			
 
				+			goto again;
			
 
				+		}
			
 
				 
			
 
				 		p4_pmu_swap_config_ts(hwc, cpu);
			
 
				 		if (assign)
			
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -104,34 +104,6 @@ in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
 
				 	return (stack >= irq_stack && stack < irq_stack_end);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * We are returning from the irq stack and go to the previous one.
			
 
				- * If the previous stack is also in the irq stack, then bp in the first
			
 
				- * frame of the irq stack points to the previous, interrupted one.
			
 
				- * Otherwise we have another level of indirection: We first save
			
 
				- * the bp of the previous stack, then we switch the stack to the irq one
			
 
				- * and save a new bp that links to the previous one.
			
 
				- * (See save_args())
			
 
				- */
			
 
				-static inline unsigned long
			
 
				-fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
			
 
				-		  unsigned long *irq_stack, unsigned long *irq_stack_end)
			
 
				-{
			
 
				-#ifdef CONFIG_FRAME_POINTER
			
 
				-	struct stack_frame *frame = (struct stack_frame *)bp;
			
 
				-	unsigned long next;
			
 
				-
			
 
				-	if (!in_irq_stack(stack, irq_stack, irq_stack_end)) {
			
 
				-		if (!probe_kernel_address(&frame->next_frame, next))
			
 
				-			return next;
			
 
				-		else
			
 
				-			WARN_ONCE(1, "Perf: bad frame pointer = %p in "
			
 
				-				  "callchain\n", &frame->next_frame);
			
 
				-	}
			
 
				-#endif
			
 
				-	return bp;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * x86-64 can have up to three kernel stacks:
			
 
				  * process stack
			
@@ -155,9 +127,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 
				 		task = current;
			
 
				 
			
 
				 	if (!stack) {
			
 
				-		stack = &dummy;
			
 
				-		if (task && task != current)
			
 
				+		if (regs)
			
 
				+			stack = (unsigned long *)regs->sp;
			
 
				+		else if (task && task != current)
			
 
				 			stack = (unsigned long *)task->thread.sp;
			
 
				+		else
			
 
				+			stack = &dummy;
			
 
				 	}
			
 
				 
			
 
				 	if (!bp)
			
@@ -205,8 +180,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 
				 				 * pointer (index -1 to end) in the IRQ stack:
			
 
				 				 */
			
 
				 				stack = (unsigned long *) (irq_stack_end[-1]);
			
 
				-				bp = fixup_bp_irq_link(bp, stack, irq_stack,
			
 
				-						       irq_stack_end);
			
 
				 				irq_stack_end = NULL;
			
 
				 				ops->stack(data, "EOI");
			
 
				 				continue;
			
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -297,27 +297,26 @@ ENDPROC(native_usergs_sysret64)
 
				 	.endm
			
 
				 
			
 
				 /* save partial stack frame */
			
 
				-	.pushsection .kprobes.text, "ax"
			
 
				-ENTRY(save_args)
			
 
				-	XCPT_FRAME
			
 
				+	.macro SAVE_ARGS_IRQ
			
 
				 	cld
			
 
				-	/*
			
 
				-	 * start from rbp in pt_regs and jump over
			
 
				-	 * return address.
			
 
				-	 */
			
 
				-	movq_cfi rdi, RDI+8-RBP
			
 
				-	movq_cfi rsi, RSI+8-RBP
			
 
				-	movq_cfi rdx, RDX+8-RBP
			
 
				-	movq_cfi rcx, RCX+8-RBP
			
 
				-	movq_cfi rax, RAX+8-RBP
			
 
				-	movq_cfi  r8,  R8+8-RBP
			
 
				-	movq_cfi  r9,  R9+8-RBP
			
 
				-	movq_cfi r10, R10+8-RBP
			
 
				-	movq_cfi r11, R11+8-RBP
			
 
				-
			
 
				-	leaq -RBP+8(%rsp),%rdi	/* arg1 for handler */
			
 
				-	movq_cfi rbp, 8		/* push %rbp */
			
 
				-	leaq 8(%rsp), %rbp		/* mov %rsp, %ebp */
			
 
				+	/* start from rbp in pt_regs and jump over */
			
 
				+	movq_cfi rdi, RDI-RBP
			
 
				+	movq_cfi rsi, RSI-RBP
			
 
				+	movq_cfi rdx, RDX-RBP
			
 
				+	movq_cfi rcx, RCX-RBP
			
 
				+	movq_cfi rax, RAX-RBP
			
 
				+	movq_cfi  r8,  R8-RBP
			
 
				+	movq_cfi  r9,  R9-RBP
			
 
				+	movq_cfi r10, R10-RBP
			
 
				+	movq_cfi r11, R11-RBP
			
 
				+
			
 
				+	/* Save rbp so that we can unwind from get_irq_regs() */
			
 
				+	movq_cfi rbp, 0
			
 
				+
			
 
				+	/* Save previous stack value */
			
 
				+	movq %rsp, %rsi
			
 
				+
			
 
				+	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
			
 
				 	testl $3, CS(%rdi)
			
 
				 	je 1f
			
 
				 	SWAPGS
			
@@ -329,19 +328,14 @@ ENTRY(save_args)
 
				 	 */
			
 
				 1:	incl PER_CPU_VAR(irq_count)
			
 
				 	jne 2f
			
 
				-	popq_cfi %rax			/* move return address... */
			
 
				 	mov PER_CPU_VAR(irq_stack_ptr),%rsp
			
 
				 	EMPTY_FRAME 0
			
 
				-	pushq_cfi %rbp			/* backlink for unwinder */
			
 
				-	pushq_cfi %rax			/* ... to the new stack */
			
 
				-	/*
			
 
				-	 * We entered an interrupt context - irqs are off:
			
 
				-	 */
			
 
				-2:	TRACE_IRQS_OFF
			
 
				-	ret
			
 
				-	CFI_ENDPROC
			
 
				-END(save_args)
			
 
				-	.popsection
			
 
				+
			
 
				+2:	/* Store previous stack value */
			
 
				+	pushq %rsi
			
 
				+	/* We entered an interrupt context - irqs are off: */
			
 
				+	TRACE_IRQS_OFF
			
 
				+	.endm
			
 
				 
			
 
				 ENTRY(save_rest)
			
 
				 	PARTIAL_FRAME 1 REST_SKIP+8
			
@@ -791,7 +785,7 @@ END(interrupt)
 
				 	/* reserve pt_regs for scratch regs and rbp */
			
 
				 	subq $ORIG_RAX-RBP, %rsp
			
 
				 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
			
 
				-	call save_args
			
 
				+	SAVE_ARGS_IRQ
			
 
				 	PARTIAL_FRAME 0
			
 
				 	call \func
			
 
				 	.endm
			
@@ -814,15 +808,14 @@ ret_from_intr:
 
				 	DISABLE_INTERRUPTS(CLBR_NONE)
			
 
				 	TRACE_IRQS_OFF
			
 
				 	decl PER_CPU_VAR(irq_count)
			
 
				-	leaveq
			
 
				 
			
 
				-	CFI_RESTORE		rbp
			
 
				+	/* Restore saved previous stack */
			
 
				+	popq %rsi
			
 
				+	leaq 16(%rsi), %rsp
			
 
				+
			
 
				 	CFI_DEF_CFA_REGISTER	rsp
			
 
				-	CFI_ADJUST_CFA_OFFSET	-8
			
 
				+	CFI_ADJUST_CFA_OFFSET	-16
			
 
				 
			
 
				-	/* we did not save rbx, restore only from ARGOFFSET */
			
 
				-	addq $8, %rsp
			
 
				-	CFI_ADJUST_CFA_OFFSET	-8
			
 
				 exit_intr:
			
 
				 	GET_THREAD_INFO(%rcx)
			
 
				 	testl $3,CS-ARGOFFSET(%rsp)
			
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -608,7 +608,7 @@ int kgdb_arch_init(void)
 
				 	return register_die_notifier(&kgdb_notifier);
			
 
				 }
			
 
				 
			
 
				-static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi,
			
 
				+static void kgdb_hw_overflow_handler(struct perf_event *event,
			
 
				 		struct perf_sample_data *data, struct pt_regs *regs)
			
 
				 {
			
 
				 	struct task_struct *tsk = current;
			
@@ -638,7 +638,7 @@ void kgdb_arch_late(void)
 
				 	for (i = 0; i < HBP_NUM; i++) {
			
 
				 		if (breakinfo[i].pev)
			
 
				 			continue;
			
 
				-		breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL);
			
 
				+		breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
			
 
				 		if (IS_ERR((void * __force)breakinfo[i].pev)) {
			
 
				 			printk(KERN_ERR "kgdb: Could not allocate hw"
			
 
				 			       "breakpoints\nDisabling the kernel debugger\n");
			
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -528,7 +528,7 @@ static int genregs_set(struct task_struct *target,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static void ptrace_triggered(struct perf_event *bp, int nmi,
			
 
				+static void ptrace_triggered(struct perf_event *bp,
			
 
				 			     struct perf_sample_data *data,
			
 
				 			     struct pt_regs *regs)
			
 
				 {
			
@@ -715,7 +715,8 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 
				 		attr.bp_type = HW_BREAKPOINT_W;
			
 
				 		attr.disabled = 1;
			
 
				 
			
 
				-		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
			
 
				+		bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
			
 
				+						 NULL, tsk);
			
 
				 
			
 
				 		/*
			
 
				 		 * CHECKME: the previous code returned -EIO if the addr wasn't
			
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -66,7 +66,7 @@ void save_stack_trace(struct stack_trace *trace)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(save_stack_trace);
			
 
				 
			
 
				-void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
			
 
				+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
			
 
				 {
			
 
				 	dump_trace(current, regs, NULL, 0, &save_stack_ops, trace);
			
 
				 	if (trace->nr_entries < trace->max_entries)
			
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
 
				 
			
 
				 lib-y := delay.o
			
 
				 lib-y += thunk_$(BITS).o
			
 
				-lib-y += usercopy_$(BITS).o getuser.o putuser.o
			
 
				+lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
			
 
				 lib-y += memcpy_$(BITS).o
			
 
				 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
			
 
				 
			
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -0,0 +1,43 @@
 
				+/*
			
 
				+ * User address space access functions.
			
 
				+ *
			
 
				+ *  For licencing details see kernel-base/COPYING
			
 
				+ */
			
 
				+
			
 
				+#include <linux/highmem.h>
			
 
				+#include <linux/module.h>
			
 
				+
			
 
				+/*
			
 
				+ * best effort, GUP based copy_from_user() that is NMI-safe
			
 
				+ */
			
 
				+unsigned long
			
 
				+copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
			
 
				+{
			
 
				+	unsigned long offset, addr = (unsigned long)from;
			
 
				+	unsigned long size, len = 0;
			
 
				+	struct page *page;
			
 
				+	void *map;
			
 
				+	int ret;
			
 
				+
			
 
				+	do {
			
 
				+		ret = __get_user_pages_fast(addr, 1, 0, &page);
			
 
				+		if (!ret)
			
 
				+			break;
			
 
				+
			
 
				+		offset = addr & (PAGE_SIZE - 1);
			
 
				+		size = min(PAGE_SIZE - offset, n - len);
			
 
				+
			
 
				+		map = kmap_atomic(page);
			
 
				+		memcpy(to, map+offset, size);
			
 
				+		kunmap_atomic(map);
			
 
				+		put_page(page);
			
 
				+
			
 
				+		len  += size;
			
 
				+		to   += size;
			
 
				+		addr += size;
			
 
				+
			
 
				+	} while (len < n);
			
 
				+
			
 
				+	return len;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(copy_from_user_nmi);
			
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1059,7 +1059,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 
				 	if (unlikely(error_code & PF_RSVD))
			
 
				 		pgtable_bad(regs, error_code, address);
			
 
				 
			
 
				-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
			
 
				+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
			
 
				 
			
 
				 	/*
			
 
				 	 * If we're in an interrupt, have no user context or are running
			
@@ -1161,11 +1161,11 @@ good_area:
 
				 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
			
 
				 		if (fault & VM_FAULT_MAJOR) {
			
 
				 			tsk->maj_flt++;
			
 
				-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
			
 
				+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
			
 
				 				      regs, address);
			
 
				 		} else {
			
 
				 			tsk->min_flt++;
			
 
				-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
			
 
				+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
			
 
				 				      regs, address);
			
 
				 		}
			
 
				 		if (fault & VM_FAULT_RETRY) {
			
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
 
				 	e->trace.entries = e->trace_entries;
			
 
				 	e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
			
 
				 	e->trace.skip = 0;
			
 
				-	save_stack_trace_regs(&e->trace, regs);
			
 
				+	save_stack_trace_regs(regs, &e->trace);
			
 
				 
			
 
				 	/* Round address down to nearest 16 bytes */
			
 
				 	shadow_copy = kmemcheck_shadow_lookup(address
			
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -11,10 +11,11 @@
 
				 #include <linux/oprofile.h>
			
 
				 #include <linux/sched.h>
			
 
				 #include <linux/mm.h>
			
 
				+#include <linux/compat.h>
			
 
				+#include <linux/uaccess.h>
			
 
				+
			
 
				 #include <asm/ptrace.h>
			
 
				-#include <asm/uaccess.h>
			
 
				 #include <asm/stacktrace.h>
			
 
				-#include <linux/compat.h>
			
 
				 
			
 
				 static int backtrace_stack(void *data, char *name)
			
 
				 {
			
@@ -40,13 +41,13 @@ static struct stacktrace_ops backtrace_ops = {
 
				 static struct stack_frame_ia32 *
			
 
				 dump_user_backtrace_32(struct stack_frame_ia32 *head)
			
 
				 {
			
 
				+	/* Also check accessibility of one struct frame_head beyond: */
			
 
				 	struct stack_frame_ia32 bufhead[2];
			
 
				 	struct stack_frame_ia32 *fp;
			
 
				+	unsigned long bytes;
			
 
				 
			
 
				-	/* Also check accessibility of one struct frame_head beyond */
			
 
				-	if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
			
 
				-		return NULL;
			
 
				-	if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
			
 
				+	bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
			
 
				+	if (bytes != sizeof(bufhead))
			
 
				 		return NULL;
			
 
				 
			
 
				 	fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
			
@@ -87,12 +88,12 @@ x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
 
				 
			
 
				 static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
			
 
				 {
			
 
				+	/* Also check accessibility of one struct frame_head beyond: */
			
 
				 	struct stack_frame bufhead[2];
			
 
				+	unsigned long bytes;
			
 
				 
			
 
				-	/* Also check accessibility of one struct stack_frame beyond */
			
 
				-	if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
			
 
				-		return NULL;
			
 
				-	if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
			
 
				+	bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
			
 
				+	if (bytes != sizeof(bufhead))
			
 
				 		return NULL;
			
 
				 
			
 
				 	oprofile_add_trace(bufhead[0].return_address);
			
--- a/drivers/oprofile/oprofile_perf.c
+++ b/drivers/oprofile/oprofile_perf.c
@@ -31,7 +31,7 @@ static int num_counters;
 
				 /*
			
 
				  * Overflow callback for oprofile.
			
 
				  */
			
 
				-static void op_overflow_handler(struct perf_event *event, int unused,
			
 
				+static void op_overflow_handler(struct perf_event *event,
			
 
				 			struct perf_sample_data *data, struct pt_regs *regs)
			
 
				 {
			
 
				 	int id;
			
@@ -79,7 +79,7 @@ static int op_create_counter(int cpu, int event)
 
				 
			
 
				 	pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
			
 
				 						  cpu, NULL,
			
 
				-						  op_overflow_handler);
			
 
				+						  op_overflow_handler, NULL);
			
 
				 
			
 
				 	if (IS_ERR(pevent))
			
 
				 		return PTR_ERR(pevent);
			
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -19,6 +19,8 @@
 
				 
			
 
				 #include <asm/ftrace.h>
			
 
				 
			
 
				+struct ftrace_hash;
			
 
				+
			
 
				 #ifdef CONFIG_FUNCTION_TRACER
			
 
				 
			
 
				 extern int ftrace_enabled;
			
@@ -29,8 +31,6 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
				 
			
 
				 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
			
 
				 
			
 
				-struct ftrace_hash;
			
 
				-
			
 
				 enum {
			
 
				 	FTRACE_OPS_FL_ENABLED		= 1 << 0,
			
 
				 	FTRACE_OPS_FL_GLOBAL		= 1 << 1,
			
@@ -123,7 +123,8 @@ stack_trace_sysctl(struct ctl_table *table, int write,
 
				 struct ftrace_func_command {
			
 
				 	struct list_head	list;
			
 
				 	char			*name;
			
 
				-	int			(*func)(char *func, char *cmd,
			
 
				+	int			(*func)(struct ftrace_hash *hash,
			
 
				+					char *func, char *cmd,
			
 
				 					char *params, int enable);
			
 
				 };
			
 
				 
			
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -76,6 +76,7 @@ struct trace_iterator {
 
				 	struct trace_entry	*ent;
			
 
				 	unsigned long		lost_events;
			
 
				 	int			leftover;
			
 
				+	int			ent_size;
			
 
				 	int			cpu;
			
 
				 	u64			ts;
			
 
				 
			
@@ -129,6 +130,10 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
 
				 void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
			
 
				 				       struct ring_buffer_event *event,
			
 
				 					unsigned long flags, int pc);
			
 
				+void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
			
 
				+					    struct ring_buffer_event *event,
			
 
				+					    unsigned long flags, int pc,
			
 
				+					    struct pt_regs *regs);
			
 
				 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
			
 
				 					 struct ring_buffer_event *event);
			
 
				 
			
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -73,6 +73,7 @@ static inline unsigned long hw_breakpoint_len(struct perf_event *bp)
 
				 extern struct perf_event *
			
 
				 register_user_hw_breakpoint(struct perf_event_attr *attr,
			
 
				 			    perf_overflow_handler_t triggered,
			
 
				+			    void *context,
			
 
				 			    struct task_struct *tsk);
			
 
				 
			
 
				 /* FIXME: only change from the attr, and don't unregister */
			
@@ -85,11 +86,13 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr);
 
				 extern struct perf_event *
			
 
				 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
			
 
				 				perf_overflow_handler_t	triggered,
			
 
				+				void *context,
			
 
				 				int cpu);
			
 
				 
			
 
				 extern struct perf_event * __percpu *
			
 
				 register_wide_hw_breakpoint(struct perf_event_attr *attr,
			
 
				-			    perf_overflow_handler_t triggered);
			
 
				+			    perf_overflow_handler_t triggered,
			
 
				+			    void *context);
			
 
				 
			
 
				 extern int register_perf_hw_breakpoint(struct perf_event *bp);
			
 
				 extern int __register_perf_hw_breakpoint(struct perf_event *bp);
			
@@ -115,6 +118,7 @@ static inline int __init init_hw_breakpoint(void) { return 0; }
 
				 static inline struct perf_event *
			
 
				 register_user_hw_breakpoint(struct perf_event_attr *attr,
			
 
				 			    perf_overflow_handler_t triggered,
			
 
				+			    void *context,
			
 
				 			    struct task_struct *tsk)	{ return NULL; }
			
 
				 static inline int
			
 
				 modify_user_hw_breakpoint(struct perf_event *bp,
			
@@ -122,10 +126,12 @@ modify_user_hw_breakpoint(struct perf_event *bp,
 
				 static inline struct perf_event *
			
 
				 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
			
 
				 				perf_overflow_handler_t	 triggered,
			
 
				+				void *context,
			
 
				 				int cpu)		{ return NULL; }
			
 
				 static inline struct perf_event * __percpu *
			
 
				 register_wide_hw_breakpoint(struct perf_event_attr *attr,
			
 
				-			    perf_overflow_handler_t triggered)	{ return NULL; }
			
 
				+			    perf_overflow_handler_t triggered,
			
 
				+			    void *context)		{ return NULL; }
			
 
				 static inline int
			
 
				 register_perf_hw_breakpoint(struct perf_event *bp)	{ return -ENOSYS; }
			
 
				 static inline int
			
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -61,7 +61,7 @@ enum perf_hw_id {
 
				 /*
			
 
				  * Generalized hardware cache events:
			
 
				  *
			
 
				- *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
			
 
				+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
			
 
				  *       { read, write, prefetch } x
			
 
				  *       { accesses, misses }
			
 
				  */
			
@@ -72,6 +72,7 @@ enum perf_hw_cache_id {
 
				 	PERF_COUNT_HW_CACHE_DTLB		= 3,
			
 
				 	PERF_COUNT_HW_CACHE_ITLB		= 4,
			
 
				 	PERF_COUNT_HW_CACHE_BPU			= 5,
			
 
				+	PERF_COUNT_HW_CACHE_NODE		= 6,
			
 
				 
			
 
				 	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
			
 
				 };
			
@@ -536,6 +537,16 @@ struct perf_branch_stack {
 
				 
			
 
				 struct task_struct;
			
 
				 
			
 
				+/*
			
 
				+ * extra PMU register associated with an event
			
 
				+ */
			
 
				+struct hw_perf_event_extra {
			
 
				+	u64		config;	/* register value */
			
 
				+	unsigned int	reg;	/* register address or index */
			
 
				+	int		alloc;	/* extra register already allocated */
			
 
				+	int		idx;	/* index in shared_regs->regs[] */
			
 
				+};
			
 
				+
			
 
				 /**
			
 
				  * struct hw_perf_event - performance event hardware details:
			
 
				  */
			
@@ -549,9 +560,7 @@ struct hw_perf_event {
 
				 			unsigned long	event_base;
			
 
				 			int		idx;
			
 
				 			int		last_cpu;
			
 
				-			unsigned int	extra_reg;
			
 
				-			u64		extra_config;
			
 
				-			int		extra_alloc;
			
 
				+			struct hw_perf_event_extra extra_reg;
			
 
				 		};
			
 
				 		struct { /* software */
			
 
				 			struct hrtimer	hrtimer;
			
@@ -680,36 +689,9 @@ enum perf_event_active_state {
 
				 };
			
 
				 
			
 
				 struct file;
			
 
				-
			
 
				-#define PERF_BUFFER_WRITABLE		0x01
			
 
				-
			
 
				-struct perf_buffer {
			
 
				-	atomic_t			refcount;
			
 
				-	struct rcu_head			rcu_head;
			
 
				-#ifdef CONFIG_PERF_USE_VMALLOC
			
 
				-	struct work_struct		work;
			
 
				-	int				page_order;	/* allocation order  */
			
 
				-#endif
			
 
				-	int				nr_pages;	/* nr of data pages  */
			
 
				-	int				writable;	/* are we writable   */
			
 
				-
			
 
				-	atomic_t			poll;		/* POLL_ for wakeups */
			
 
				-
			
 
				-	local_t				head;		/* write position    */
			
 
				-	local_t				nest;		/* nested writers    */
			
 
				-	local_t				events;		/* event limit       */
			
 
				-	local_t				wakeup;		/* wakeup stamp      */
			
 
				-	local_t				lost;		/* nr records lost   */
			
 
				-
			
 
				-	long				watermark;	/* wakeup watermark  */
			
 
				-
			
 
				-	struct perf_event_mmap_page	*user_page;
			
 
				-	void				*data_pages[0];
			
 
				-};
			
 
				-
			
 
				 struct perf_sample_data;
			
 
				 
			
 
				-typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
			
 
				+typedef void (*perf_overflow_handler_t)(struct perf_event *,
			
 
				 					struct perf_sample_data *,
			
 
				 					struct pt_regs *regs);
			
 
				 
			
@@ -745,6 +727,8 @@ struct perf_cgroup {
 
				 };
			
 
				 #endif
			
 
				 
			
 
				+struct ring_buffer;
			
 
				+
			
 
				 /**
			
 
				  * struct perf_event - performance event kernel representation:
			
 
				  */
			
@@ -834,7 +818,7 @@ struct perf_event {
 
				 	atomic_t			mmap_count;
			
 
				 	int				mmap_locked;
			
 
				 	struct user_struct		*mmap_user;
			
 
				-	struct perf_buffer		*buffer;
			
 
				+	struct ring_buffer		*rb;
			
 
				 
			
 
				 	/* poll related */
			
 
				 	wait_queue_head_t		waitq;
			
@@ -855,6 +839,7 @@ struct perf_event {
 
				 	u64				id;
			
 
				 
			
 
				 	perf_overflow_handler_t		overflow_handler;
			
 
				+	void				*overflow_handler_context;
			
 
				 
			
 
				 #ifdef CONFIG_EVENT_TRACING
			
 
				 	struct ftrace_event_call	*tp_event;
			
@@ -919,8 +904,8 @@ struct perf_event_context {
 
				 	u64				parent_gen;
			
 
				 	u64				generation;
			
 
				 	int				pin_count;
			
 
				-	struct rcu_head			rcu_head;
			
 
				 	int				nr_cgroups; /* cgroup events present */
			
 
				+	struct rcu_head			rcu_head;
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -945,13 +930,11 @@ struct perf_cpu_context {
 
				 
			
 
				 struct perf_output_handle {
			
 
				 	struct perf_event		*event;
			
 
				-	struct perf_buffer		*buffer;
			
 
				+	struct ring_buffer		*rb;
			
 
				 	unsigned long			wakeup;
			
 
				 	unsigned long			size;
			
 
				 	void				*addr;
			
 
				 	int				page;
			
 
				-	int				nmi;
			
 
				-	int				sample;
			
 
				 };
			
 
				 
			
 
				 #ifdef CONFIG_PERF_EVENTS
			
@@ -972,13 +955,15 @@ extern void perf_pmu_disable(struct pmu *pmu);
 
				 extern void perf_pmu_enable(struct pmu *pmu);
			
 
				 extern int perf_event_task_disable(void);
			
 
				 extern int perf_event_task_enable(void);
			
 
				+extern int perf_event_refresh(struct perf_event *event, int refresh);
			
 
				 extern void perf_event_update_userpage(struct perf_event *event);
			
 
				 extern int perf_event_release_kernel(struct perf_event *event);
			
 
				 extern struct perf_event *
			
 
				 perf_event_create_kernel_counter(struct perf_event_attr *attr,
			
 
				 				int cpu,
			
 
				 				struct task_struct *task,
			
 
				-				perf_overflow_handler_t callback);
			
 
				+				perf_overflow_handler_t callback,
			
 
				+				void *context);
			
 
				 extern u64 perf_event_read_value(struct perf_event *event,
			
 
				 				 u64 *enabled, u64 *running);
			
 
				 
			
@@ -1018,7 +1003,7 @@ extern void perf_prepare_sample(struct perf_event_header *header,
 
				 				struct perf_event *event,
			
 
				 				struct pt_regs *regs);
			
 
				 
			
 
				-extern int perf_event_overflow(struct perf_event *event, int nmi,
			
 
				+extern int perf_event_overflow(struct perf_event *event,
			
 
				 				 struct perf_sample_data *data,
			
 
				 				 struct pt_regs *regs);
			
 
				 
			
@@ -1037,7 +1022,7 @@ static inline int is_software_event(struct perf_event *event)
 
				 
			
 
				 extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
			
 
				 
			
 
				-extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
			
 
				+extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
			
 
				 
			
 
				 #ifndef perf_arch_fetch_caller_regs
			
 
				 static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
			
@@ -1059,7 +1044,7 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs)
 
				 }
			
 
				 
			
 
				 static __always_inline void
			
 
				-perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
			
 
				+perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
			
 
				 {
			
 
				 	struct pt_regs hot_regs;
			
 
				 
			
@@ -1068,7 +1053,7 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 
				 			perf_fetch_caller_regs(&hot_regs);
			
 
				 			regs = &hot_regs;
			
 
				 		}
			
 
				-		__perf_sw_event(event_id, nr, nmi, regs, addr);
			
 
				+		__perf_sw_event(event_id, nr, regs, addr);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1082,7 +1067,7 @@ static inline void perf_event_task_sched_in(struct task_struct *task)
 
				 
			
 
				 static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
			
 
				 {
			
 
				-	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
			
 
				+	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
			
 
				 
			
 
				 	__perf_event_task_sched_out(task, next);
			
 
				 }
			
@@ -1143,8 +1128,7 @@ extern void perf_bp_event(struct perf_event *event, void *data);
 
				 #endif
			
 
				 
			
 
				 extern int perf_output_begin(struct perf_output_handle *handle,
			
 
				-			     struct perf_event *event, unsigned int size,
			
 
				-			     int nmi, int sample);
			
 
				+			     struct perf_event *event, unsigned int size);
			
 
				 extern void perf_output_end(struct perf_output_handle *handle);
			
 
				 extern void perf_output_copy(struct perf_output_handle *handle,
			
 
				 			     const void *buf, unsigned int len);
			
@@ -1166,10 +1150,13 @@ static inline void perf_event_delayed_put(struct task_struct *task)	{ }
 
				 static inline void perf_event_print_debug(void)				{ }
			
 
				 static inline int perf_event_task_disable(void)				{ return -EINVAL; }
			
 
				 static inline int perf_event_task_enable(void)				{ return -EINVAL; }
			
 
				+static inline int perf_event_refresh(struct perf_event *event, int refresh)
			
 
				+{
			
 
				+	return -EINVAL;
			
 
				+}
			
 
				 
			
 
				 static inline void
			
 
				-perf_sw_event(u32 event_id, u64 nr, int nmi,
			
 
				-		     struct pt_regs *regs, u64 addr)			{ }
			
 
				+perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }
			
 
				 static inline void
			
 
				 perf_bp_event(struct perf_event *event, void *data)			{ }
			
 
				 
			
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -169,7 +169,7 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
 
				 size_t ring_buffer_page_len(void *page);
			
 
				 
			
 
				 
			
 
				-void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
			
 
				+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu);
			
 
				 void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
			
 
				 int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
			
 
				 			  size_t len, int cpu, int full);
			
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -14,8 +14,8 @@ struct stack_trace {
 
				 };
			
 
				 
			
 
				 extern void save_stack_trace(struct stack_trace *trace);
			
 
				-extern void save_stack_trace_regs(struct stack_trace *trace,
			
 
				-				  struct pt_regs *regs);
			
 
				+extern void save_stack_trace_regs(struct pt_regs *regs,
			
 
				+				  struct stack_trace *trace);
			
 
				 extern void save_stack_trace_tsk(struct task_struct *tsk,
			
 
				 				struct stack_trace *trace);
			
 
				 
			
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -49,12 +49,13 @@ asynchronous and synchronous parts of the kernel.
 
				 */
			
 
				 
			
 
				 #include <linux/async.h>
			
 
				+#include <linux/atomic.h>
			
 
				+#include <linux/ktime.h>
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/wait.h>
			
 
				 #include <linux/sched.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/workqueue.h>
			
 
				-#include <asm/atomic.h>
			
 
				 
			
 
				 static async_cookie_t next_cookie = 1;
			
 
				 
			
@@ -128,7 +129,8 @@ static void async_run_entry_fn(struct work_struct *work)
 
				 
			
 
				 	/* 2) run (and print duration) */
			
 
				 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
			
 
				-		printk("calling  %lli_%pF @ %i\n", (long long)entry->cookie,
			
 
				+		printk(KERN_DEBUG "calling  %lli_%pF @ %i\n",
			
 
				+			(long long)entry->cookie,
			
 
				 			entry->func, task_pid_nr(current));
			
 
				 		calltime = ktime_get();
			
 
				 	}
			
@@ -136,7 +138,7 @@ static void async_run_entry_fn(struct work_struct *work)
 
				 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
			
 
				 		rettime = ktime_get();
			
 
				 		delta = ktime_sub(rettime, calltime);
			
 
				-		printk("initcall %lli_%pF returned 0 after %lld usecs\n",
			
 
				+		printk(KERN_DEBUG "initcall %lli_%pF returned 0 after %lld usecs\n",
			
 
				 			(long long)entry->cookie,
			
 
				 			entry->func,
			
 
				 			(long long)ktime_to_ns(delta) >> 10);
			
@@ -270,7 +272,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie,
 
				 	ktime_t starttime, delta, endtime;
			
 
				 
			
 
				 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
			
 
				-		printk("async_waiting @ %i\n", task_pid_nr(current));
			
 
				+		printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current));
			
 
				 		starttime = ktime_get();
			
 
				 	}
			
 
				 
			
@@ -280,7 +282,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie,
 
				 		endtime = ktime_get();
			
 
				 		delta = ktime_sub(endtime, starttime);
			
 
				 
			
 
				-		printk("async_continuing @ %i after %lli usec\n",
			
 
				+		printk(KERN_DEBUG "async_continuing @ %i after %lli usec\n",
			
 
				 			task_pid_nr(current),
			
 
				 			(long long)ktime_to_ns(delta) >> 10);
			
 
				 	}
			
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER
 
				 CFLAGS_REMOVE_core.o = -pg
			
 
				 endif
			
 
				 
			
 
				-obj-y := core.o
			
 
				+obj-y := core.o ring_buffer.o
			
 
				 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
			
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -36,6 +36,8 @@
 
				 #include <linux/ftrace_event.h>
			
 
				 #include <linux/hw_breakpoint.h>
			
 
				 
			
 
				+#include "internal.h"
			
 
				+
			
 
				 #include <asm/irq_regs.h>
			
 
				 
			
 
				 struct remote_function_call {
			
@@ -200,6 +202,22 @@ __get_cpu_context(struct perf_event_context *ctx)
 
				 	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
			
 
				 }
			
 
				 
			
 
				+static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
			
 
				+			  struct perf_event_context *ctx)
			
 
				+{
			
 
				+	raw_spin_lock(&cpuctx->ctx.lock);
			
 
				+	if (ctx)
			
 
				+		raw_spin_lock(&ctx->lock);
			
 
				+}
			
 
				+
			
 
				+static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
			
 
				+			    struct perf_event_context *ctx)
			
 
				+{
			
 
				+	if (ctx)
			
 
				+		raw_spin_unlock(&ctx->lock);
			
 
				+	raw_spin_unlock(&cpuctx->ctx.lock);
			
 
				+}
			
 
				+
			
 
				 #ifdef CONFIG_CGROUP_PERF
			
 
				 
			
 
				 /*
			
@@ -340,11 +358,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 
				 	rcu_read_lock();
			
 
				 
			
 
				 	list_for_each_entry_rcu(pmu, &pmus, entry) {
			
 
				-
			
 
				 		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
			
 
				 
			
 
				-		perf_pmu_disable(cpuctx->ctx.pmu);
			
 
				-
			
 
				 		/*
			
 
				 		 * perf_cgroup_events says at least one
			
 
				 		 * context on this CPU has cgroup events.
			
@@ -353,6 +368,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 
				 		 * events for a context.
			
 
				 		 */
			
 
				 		if (cpuctx->ctx.nr_cgroups > 0) {
			
 
				+			perf_ctx_lock(cpuctx, cpuctx->task_ctx);
			
 
				+			perf_pmu_disable(cpuctx->ctx.pmu);
			
 
				 
			
 
				 			if (mode & PERF_CGROUP_SWOUT) {
			
 
				 				cpu_ctx_sched_out(cpuctx, EVENT_ALL);
			
@@ -372,9 +389,9 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 
				 				cpuctx->cgrp = perf_cgroup_from_task(task);
			
 
				 				cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
			
 
				 			}
			
 
				+			perf_pmu_enable(cpuctx->ctx.pmu);
			
 
				+			perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
			
 
				 		}
			
 
				-
			
 
				-		perf_pmu_enable(cpuctx->ctx.pmu);
			
 
				 	}
			
 
				 
			
 
				 	rcu_read_unlock();
			
@@ -731,6 +748,7 @@ static u64 perf_event_time(struct perf_event *event)
 
				 
			
 
				 /*
			
 
				  * Update the total_time_enabled and total_time_running fields for a event.
			
 
				+ * The caller of this function needs to hold the ctx->lock.
			
 
				  */
			
 
				 static void update_event_times(struct perf_event *event)
			
 
				 {
			
@@ -1105,6 +1123,10 @@ static int __perf_remove_from_context(void *info)
 
				 	raw_spin_lock(&ctx->lock);
			
 
				 	event_sched_out(event, cpuctx, ctx);
			
 
				 	list_del_event(event, ctx);
			
 
				+	if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
			
 
				+		ctx->is_active = 0;
			
 
				+		cpuctx->task_ctx = NULL;
			
 
				+	}
			
 
				 	raw_spin_unlock(&ctx->lock);
			
 
				 
			
 
				 	return 0;
			
@@ -1454,8 +1476,24 @@ static void add_event_to_ctx(struct perf_event *event,
 
				 	event->tstamp_stopped = tstamp;
			
 
				 }
			
 
				 
			
 
				-static void perf_event_context_sched_in(struct perf_event_context *ctx,
			
 
				-					struct task_struct *tsk);
			
 
				+static void task_ctx_sched_out(struct perf_event_context *ctx);
			
 
				+static void
			
 
				+ctx_sched_in(struct perf_event_context *ctx,
			
 
				+	     struct perf_cpu_context *cpuctx,
			
 
				+	     enum event_type_t event_type,
			
 
				+	     struct task_struct *task);
			
 
				+
			
 
				+static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
			
 
				+				struct perf_event_context *ctx,
			
 
				+				struct task_struct *task)
			
 
				+{
			
 
				+	cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task);
			
 
				+	if (ctx)
			
 
				+		ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
			
 
				+	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
			
 
				+	if (ctx)
			
 
				+		ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
			
 
				+}
			
 
				 
			
 
				 /*
			
 
				  * Cross CPU call to install and enable a performance event
			
@@ -1466,20 +1504,37 @@ static int  __perf_install_in_context(void *info)
 
				 {
			
 
				 	struct perf_event *event = info;
			
 
				 	struct perf_event_context *ctx = event->ctx;
			
 
				-	struct perf_event *leader = event->group_leader;
			
 
				 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
			
 
				-	int err;
			
 
				+	struct perf_event_context *task_ctx = cpuctx->task_ctx;
			
 
				+	struct task_struct *task = current;
			
 
				+
			
 
				+	perf_ctx_lock(cpuctx, task_ctx);
			
 
				+	perf_pmu_disable(cpuctx->ctx.pmu);
			
 
				 
			
 
				 	/*
			
 
				-	 * In case we're installing a new context to an already running task,
			
 
				-	 * could also happen before perf_event_task_sched_in() on architectures
			
 
				-	 * which do context switches with IRQs enabled.
			
 
				+	 * If there was an active task_ctx schedule it out.
			
 
				 	 */
			
 
				-	if (ctx->task && !cpuctx->task_ctx)
			
 
				-		perf_event_context_sched_in(ctx, ctx->task);
			
 
				+	if (task_ctx)
			
 
				+		task_ctx_sched_out(task_ctx);
			
 
				+
			
 
				+	/*
			
 
				+	 * If the context we're installing events in is not the
			
 
				+	 * active task_ctx, flip them.
			
 
				+	 */
			
 
				+	if (ctx->task && task_ctx != ctx) {
			
 
				+		if (task_ctx)
			
 
				+			raw_spin_unlock(&task_ctx->lock);
			
 
				+		raw_spin_lock(&ctx->lock);
			
 
				+		task_ctx = ctx;
			
 
				+	}
			
 
				+
			
 
				+	if (task_ctx) {
			
 
				+		cpuctx->task_ctx = task_ctx;
			
 
				+		task = task_ctx->task;
			
 
				+	}
			
 
				+
			
 
				+	cpu_ctx_sched_out(cpuctx, EVENT_ALL);
			
 
				 
			
 
				-	raw_spin_lock(&ctx->lock);
			
 
				-	ctx->is_active = 1;
			
 
				 	update_context_time(ctx);
			
 
				 	/*
			
 
				 	 * update cgrp time only if current cgrp
			
@@ -1490,43 +1545,13 @@ static int  __perf_install_in_context(void *info)
 
				 
			
 
				 	add_event_to_ctx(event, ctx);
			
 
				 
			
 
				-	if (!event_filter_match(event))
			
 
				-		goto unlock;
			
 
				-
			
 
				-	/*
			
 
				-	 * Don't put the event on if it is disabled or if
			
 
				-	 * it is in a group and the group isn't on.
			
 
				-	 */
			
 
				-	if (event->state != PERF_EVENT_STATE_INACTIVE ||
			
 
				-	    (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE))
			
 
				-		goto unlock;
			
 
				-
			
 
				 	/*
			
 
				-	 * An exclusive event can't go on if there are already active
			
 
				-	 * hardware events, and no hardware event can go on if there
			
 
				-	 * is already an exclusive event on.
			
 
				+	 * Schedule everything back in
			
 
				 	 */
			
 
				-	if (!group_can_go_on(event, cpuctx, 1))
			
 
				-		err = -EEXIST;
			
 
				-	else
			
 
				-		err = event_sched_in(event, cpuctx, ctx);
			
 
				-
			
 
				-	if (err) {
			
 
				-		/*
			
 
				-		 * This event couldn't go on.  If it is in a group
			
 
				-		 * then we have to pull the whole group off.
			
 
				-		 * If the event group is pinned then put it in error state.
			
 
				-		 */
			
 
				-		if (leader != event)
			
 
				-			group_sched_out(leader, cpuctx, ctx);
			
 
				-		if (leader->attr.pinned) {
			
 
				-			update_group_times(leader);
			
 
				-			leader->state = PERF_EVENT_STATE_ERROR;
			
 
				-		}
			
 
				-	}
			
 
				+	perf_event_sched_in(cpuctx, task_ctx, task);
			
 
				 
			
 
				-unlock:
			
 
				-	raw_spin_unlock(&ctx->lock);
			
 
				+	perf_pmu_enable(cpuctx->ctx.pmu);
			
 
				+	perf_ctx_unlock(cpuctx, task_ctx);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -1739,7 +1764,7 @@ out:
 
				 	raw_spin_unlock_irq(&ctx->lock);
			
 
				 }
			
 
				 
			
 
				-static int perf_event_refresh(struct perf_event *event, int refresh)
			
 
				+int perf_event_refresh(struct perf_event *event, int refresh)
			
 
				 {
			
 
				 	/*
			
 
				 	 * not supported on inherited events
			
@@ -1752,36 +1777,35 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(perf_event_refresh);
			
 
				 
			
 
				 static void ctx_sched_out(struct perf_event_context *ctx,
			
 
				 			  struct perf_cpu_context *cpuctx,
			
 
				 			  enum event_type_t event_type)
			
 
				 {
			
 
				 	struct perf_event *event;
			
 
				+	int is_active = ctx->is_active;
			
 
				 
			
 
				-	raw_spin_lock(&ctx->lock);
			
 
				-	perf_pmu_disable(ctx->pmu);
			
 
				-	ctx->is_active = 0;
			
 
				+	ctx->is_active &= ~event_type;
			
 
				 	if (likely(!ctx->nr_events))
			
 
				-		goto out;
			
 
				+		return;
			
 
				+
			
 
				 	update_context_time(ctx);
			
 
				 	update_cgrp_time_from_cpuctx(cpuctx);
			
 
				-
			
 
				 	if (!ctx->nr_active)
			
 
				-		goto out;
			
 
				+		return;
			
 
				 
			
 
				-	if (event_type & EVENT_PINNED) {
			
 
				+	perf_pmu_disable(ctx->pmu);
			
 
				+	if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {
			
 
				 		list_for_each_entry(event, &ctx->pinned_groups, group_entry)
			
 
				 			group_sched_out(event, cpuctx, ctx);
			
 
				 	}
			
 
				 
			
 
				-	if (event_type & EVENT_FLEXIBLE) {
			
 
				+	if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) {
			
 
				 		list_for_each_entry(event, &ctx->flexible_groups, group_entry)
			
 
				 			group_sched_out(event, cpuctx, ctx);
			
 
				 	}
			
 
				-out:
			
 
				 	perf_pmu_enable(ctx->pmu);
			
 
				-	raw_spin_unlock(&ctx->lock);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1929,8 +1953,10 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 
				 	rcu_read_unlock();
			
 
				 
			
 
				 	if (do_switch) {
			
 
				+		raw_spin_lock(&ctx->lock);
			
 
				 		ctx_sched_out(ctx, cpuctx, EVENT_ALL);
			
 
				 		cpuctx->task_ctx = NULL;
			
 
				+		raw_spin_unlock(&ctx->lock);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1965,8 +1991,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
 
				 		perf_cgroup_sched_out(task);
			
 
				 }
			
 
				 
			
 
				-static void task_ctx_sched_out(struct perf_event_context *ctx,
			
 
				-			       enum event_type_t event_type)
			
 
				+static void task_ctx_sched_out(struct perf_event_context *ctx)
			
 
				 {
			
 
				 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
			
 
				 
			
@@ -1976,7 +2001,7 @@ static void task_ctx_sched_out(struct perf_event_context *ctx,
 
				 	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
			
 
				 		return;
			
 
				 
			
 
				-	ctx_sched_out(ctx, cpuctx, event_type);
			
 
				+	ctx_sched_out(ctx, cpuctx, EVENT_ALL);
			
 
				 	cpuctx->task_ctx = NULL;
			
 
				 }
			
 
				 
			
@@ -2055,11 +2080,11 @@ ctx_sched_in(struct perf_event_context *ctx,
 
				 	     struct task_struct *task)
			
 
				 {
			
 
				 	u64 now;
			
 
				+	int is_active = ctx->is_active;
			
 
				 
			
 
				-	raw_spin_lock(&ctx->lock);
			
 
				-	ctx->is_active = 1;
			
 
				+	ctx->is_active |= event_type;
			
 
				 	if (likely(!ctx->nr_events))
			
 
				-		goto out;
			
 
				+		return;
			
 
				 
			
 
				 	now = perf_clock();
			
 
				 	ctx->timestamp = now;
			
@@ -2068,15 +2093,12 @@ ctx_sched_in(struct perf_event_context *ctx,
 
				 	 * First go through the list and put on any pinned groups
			
 
				 	 * in order to give them the best chance of going on.
			
 
				 	 */
			
 
				-	if (event_type & EVENT_PINNED)
			
 
				+	if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))
			
 
				 		ctx_pinned_sched_in(ctx, cpuctx);
			
 
				 
			
 
				 	/* Then walk through the lower prio flexible groups */
			
 
				-	if (event_type & EVENT_FLEXIBLE)
			
 
				+	if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))
			
 
				 		ctx_flexible_sched_in(ctx, cpuctx);
			
 
				-
			
 
				-out:
			
 
				-	raw_spin_unlock(&ctx->lock);
			
 
				 }
			
 
				 
			
 
				 static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
			
@@ -2088,19 +2110,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
 
				 	ctx_sched_in(ctx, cpuctx, event_type, task);
			
 
				 }
			
 
				 
			
 
				-static void task_ctx_sched_in(struct perf_event_context *ctx,
			
 
				-			      enum event_type_t event_type)
			
 
				-{
			
 
				-	struct perf_cpu_context *cpuctx;
			
 
				-
			
 
				-	cpuctx = __get_cpu_context(ctx);
			
 
				-	if (cpuctx->task_ctx == ctx)
			
 
				-		return;
			
 
				-
			
 
				-	ctx_sched_in(ctx, cpuctx, event_type, NULL);
			
 
				-	cpuctx->task_ctx = ctx;
			
 
				-}
			
 
				-
			
 
				 static void perf_event_context_sched_in(struct perf_event_context *ctx,
			
 
				 					struct task_struct *task)
			
 
				 {
			
@@ -2110,6 +2119,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
 
				 	if (cpuctx->task_ctx == ctx)
			
 
				 		return;
			
 
				 
			
 
				+	perf_ctx_lock(cpuctx, ctx);
			
 
				 	perf_pmu_disable(ctx->pmu);
			
 
				 	/*
			
 
				 	 * We want to keep the following priority order:
			
@@ -2118,18 +2128,18 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
 
				 	 */
			
 
				 	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
			
 
				 
			
 
				-	ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
			
 
				-	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
			
 
				-	ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
			
 
				+	perf_event_sched_in(cpuctx, ctx, task);
			
 
				 
			
 
				 	cpuctx->task_ctx = ctx;
			
 
				 
			
 
				+	perf_pmu_enable(ctx->pmu);
			
 
				+	perf_ctx_unlock(cpuctx, ctx);
			
 
				+
			
 
				 	/*
			
 
				 	 * Since these rotations are per-cpu, we need to ensure the
			
 
				 	 * cpu-context we got scheduled on is actually rotating.
			
 
				 	 */
			
 
				 	perf_pmu_rotate_start(ctx->pmu);
			
 
				-	perf_pmu_enable(ctx->pmu);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2269,7 +2279,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 
				 	u64 interrupts, now;
			
 
				 	s64 delta;
			
 
				 
			
 
				-	raw_spin_lock(&ctx->lock);
			
 
				 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
			
 
				 		if (event->state != PERF_EVENT_STATE_ACTIVE)
			
 
				 			continue;
			
@@ -2301,7 +2310,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 
				 		if (delta > 0)
			
 
				 			perf_adjust_period(event, period, delta);
			
 
				 	}
			
 
				-	raw_spin_unlock(&ctx->lock);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2309,16 +2317,12 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 
				  */
			
 
				 static void rotate_ctx(struct perf_event_context *ctx)
			
 
				 {
			
 
				-	raw_spin_lock(&ctx->lock);
			
 
				-
			
 
				 	/*
			
 
				 	 * Rotate the first entry last of non-pinned groups. Rotation might be
			
 
				 	 * disabled by the inheritance code.
			
 
				 	 */
			
 
				 	if (!ctx->rotate_disable)
			
 
				 		list_rotate_left(&ctx->flexible_groups);
			
 
				-
			
 
				-	raw_spin_unlock(&ctx->lock);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2345,6 +2349,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 
				 			rotate = 1;
			
 
				 	}
			
 
				 
			
 
				+	perf_ctx_lock(cpuctx, cpuctx->task_ctx);
			
 
				 	perf_pmu_disable(cpuctx->ctx.pmu);
			
 
				 	perf_ctx_adjust_freq(&cpuctx->ctx, interval);
			
 
				 	if (ctx)
			
@@ -2355,21 +2360,20 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 
				 
			
 
				 	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
			
 
				 	if (ctx)
			
 
				-		task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
			
 
				+		ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
			
 
				 
			
 
				 	rotate_ctx(&cpuctx->ctx);
			
 
				 	if (ctx)
			
 
				 		rotate_ctx(ctx);
			
 
				 
			
 
				-	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current);
			
 
				-	if (ctx)
			
 
				-		task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
			
 
				+	perf_event_sched_in(cpuctx, ctx, current);
			
 
				 
			
 
				 done:
			
 
				 	if (remove)
			
 
				 		list_del_init(&cpuctx->rotation_list);
			
 
				 
			
 
				 	perf_pmu_enable(cpuctx->ctx.pmu);
			
 
				+	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
			
 
				 }
			
 
				 
			
 
				 void perf_event_task_tick(void)
			
@@ -2424,9 +2428,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
 
				 	 * in.
			
 
				 	 */
			
 
				 	perf_cgroup_sched_out(current);
			
 
				-	task_ctx_sched_out(ctx, EVENT_ALL);
			
 
				 
			
 
				 	raw_spin_lock(&ctx->lock);
			
 
				+	task_ctx_sched_out(ctx);
			
 
				 
			
 
				 	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
			
 
				 		ret = event_enable_on_exec(event, ctx);
			
@@ -2835,16 +2839,12 @@ retry:
 
				 		unclone_ctx(ctx);
			
 
				 		++ctx->pin_count;
			
 
				 		raw_spin_unlock_irqrestore(&ctx->lock, flags);
			
 
				-	}
			
 
				-
			
 
				-	if (!ctx) {
			
 
				+	} else {
			
 
				 		ctx = alloc_perf_context(pmu, task);
			
 
				 		err = -ENOMEM;
			
 
				 		if (!ctx)
			
 
				 			goto errout;
			
 
				 
			
 
				-		get_ctx(ctx);
			
 
				-
			
 
				 		err = 0;
			
 
				 		mutex_lock(&task->perf_event_mutex);
			
 
				 		/*
			
@@ -2856,14 +2856,14 @@ retry:
 
				 		else if (task->perf_event_ctxp[ctxn])
			
 
				 			err = -EAGAIN;
			
 
				 		else {
			
 
				+			get_ctx(ctx);
			
 
				 			++ctx->pin_count;
			
 
				 			rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
			
 
				 		}
			
 
				 		mutex_unlock(&task->perf_event_mutex);
			
 
				 
			
 
				 		if (unlikely(err)) {
			
 
				-			put_task_struct(task);
			
 
				-			kfree(ctx);
			
 
				+			put_ctx(ctx);
			
 
				 
			
 
				 			if (err == -EAGAIN)
			
 
				 				goto retry;
			
@@ -2890,7 +2890,7 @@ static void free_event_rcu(struct rcu_head *head)
 
				 	kfree(event);
			
 
				 }
			
 
				 
			
 
				-static void perf_buffer_put(struct perf_buffer *buffer);
			
 
				+static void ring_buffer_put(struct ring_buffer *rb);
			
 
				 
			
 
				 static void free_event(struct perf_event *event)
			
 
				 {
			
@@ -2913,9 +2913,9 @@ static void free_event(struct perf_event *event)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (event->buffer) {
			
 
				-		perf_buffer_put(event->buffer);
			
 
				-		event->buffer = NULL;
			
 
				+	if (event->rb) {
			
 
				+		ring_buffer_put(event->rb);
			
 
				+		event->rb = NULL;
			
 
				 	}
			
 
				 
			
 
				 	if (is_cgroup_event(event))
			
@@ -2934,12 +2934,6 @@ int perf_event_release_kernel(struct perf_event *event)
 
				 {
			
 
				 	struct perf_event_context *ctx = event->ctx;
			
 
				 
			
 
				-	/*
			
 
				-	 * Remove from the PMU, can't get re-enabled since we got
			
 
				-	 * here because the last ref went.
			
 
				-	 */
			
 
				-	perf_event_disable(event);
			
 
				-
			
 
				 	WARN_ON_ONCE(ctx->parent_ctx);
			
 
				 	/*
			
 
				 	 * There are two ways this annotation is useful:
			
@@ -2956,8 +2950,8 @@ int perf_event_release_kernel(struct perf_event *event)
 
				 	mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
			
 
				 	raw_spin_lock_irq(&ctx->lock);
			
 
				 	perf_group_detach(event);
			
 
				-	list_del_event(event, ctx);
			
 
				 	raw_spin_unlock_irq(&ctx->lock);
			
 
				+	perf_remove_from_context(event);
			
 
				 	mutex_unlock(&ctx->mutex);
			
 
				 
			
 
				 	free_event(event);
			
@@ -3149,13 +3143,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 
				 static unsigned int perf_poll(struct file *file, poll_table *wait)
			
 
				 {
			
 
				 	struct perf_event *event = file->private_data;
			
 
				-	struct perf_buffer *buffer;
			
 
				+	struct ring_buffer *rb;
			
 
				 	unsigned int events = POLL_HUP;
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				-	buffer = rcu_dereference(event->buffer);
			
 
				-	if (buffer)
			
 
				-		events = atomic_xchg(&buffer->poll, 0);
			
 
				+	rb = rcu_dereference(event->rb);
			
 
				+	if (rb)
			
 
				+		events = atomic_xchg(&rb->poll, 0);
			
 
				 	rcu_read_unlock();
			
 
				 
			
 
				 	poll_wait(file, &event->waitq, wait);
			
@@ -3358,6 +3352,18 @@ static int perf_event_index(struct perf_event *event)
 
				 	return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
			
 
				 }
			
 
				 
			
 
				+static void calc_timer_values(struct perf_event *event,
			
 
				+				u64 *running,
			
 
				+				u64 *enabled)
			
 
				+{
			
 
				+	u64 now, ctx_time;
			
 
				+
			
 
				+	now = perf_clock();
			
 
				+	ctx_time = event->shadow_ctx_time + now;
			
 
				+	*enabled = ctx_time - event->tstamp_enabled;
			
 
				+	*running = ctx_time - event->tstamp_running;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Callers need to ensure there can be no nesting of this function, otherwise
			
 
				  * the seqlock logic goes bad. We can not serialize this because the arch
			
@@ -3366,14 +3372,25 @@ static int perf_event_index(struct perf_event *event)
 
				 void perf_event_update_userpage(struct perf_event *event)
			
 
				 {
			
 
				 	struct perf_event_mmap_page *userpg;
			
 
				-	struct perf_buffer *buffer;
			
 
				+	struct ring_buffer *rb;
			
 
				+	u64 enabled, running;
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				-	buffer = rcu_dereference(event->buffer);
			
 
				-	if (!buffer)
			
 
				+	/*
			
 
				+	 * compute total_time_enabled, total_time_running
			
 
				+	 * based on snapshot values taken when the event
			
 
				+	 * was last scheduled in.
			
 
				+	 *
			
 
				+	 * we cannot simply called update_context_time()
			
 
				+	 * because of locking issue as we can be called in
			
 
				+	 * NMI context
			
 
				+	 */
			
 
				+	calc_timer_values(event, &enabled, &running);
			
 
				+	rb = rcu_dereference(event->rb);
			
 
				+	if (!rb)
			
 
				 		goto unlock;
			
 
				 
			
 
				-	userpg = buffer->user_page;
			
 
				+	userpg = rb->user_page;
			
 
				 
			
 
				 	/*
			
 
				 	 * Disable preemption so as to not let the corresponding user-space
			
@@ -3387,10 +3404,10 @@ void perf_event_update_userpage(struct perf_event *event)
 
				 	if (event->state == PERF_EVENT_STATE_ACTIVE)
			
 
				 		userpg->offset -= local64_read(&event->hw.prev_count);
			
 
				 
			
 
				-	userpg->time_enabled = event->total_time_enabled +
			
 
				+	userpg->time_enabled = enabled +
			
 
				 			atomic64_read(&event->child_total_time_enabled);
			
 
				 
			
 
				-	userpg->time_running = event->total_time_running +
			
 
				+	userpg->time_running = running +
			
 
				 			atomic64_read(&event->child_total_time_running);
			
 
				 
			
 
				 	barrier();
			
@@ -3400,220 +3417,10 @@ unlock:
 
				 	rcu_read_unlock();
			
 
				 }
			
 
				 
			
 
				-static unsigned long perf_data_size(struct perf_buffer *buffer);
			
 
				-
			
 
				-static void
			
 
				-perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
			
 
				-{
			
 
				-	long max_size = perf_data_size(buffer);
			
 
				-
			
 
				-	if (watermark)
			
 
				-		buffer->watermark = min(max_size, watermark);
			
 
				-
			
 
				-	if (!buffer->watermark)
			
 
				-		buffer->watermark = max_size / 2;
			
 
				-
			
 
				-	if (flags & PERF_BUFFER_WRITABLE)
			
 
				-		buffer->writable = 1;
			
 
				-
			
 
				-	atomic_set(&buffer->refcount, 1);
			
 
				-}
			
 
				-
			
 
				-#ifndef CONFIG_PERF_USE_VMALLOC
			
 
				-
			
 
				-/*
			
 
				- * Back perf_mmap() with regular GFP_KERNEL-0 pages.
			
 
				- */
			
 
				-
			
 
				-static struct page *
			
 
				-perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
			
 
				-{
			
 
				-	if (pgoff > buffer->nr_pages)
			
 
				-		return NULL;
			
 
				-
			
 
				-	if (pgoff == 0)
			
 
				-		return virt_to_page(buffer->user_page);
			
 
				-
			
 
				-	return virt_to_page(buffer->data_pages[pgoff - 1]);
			
 
				-}
			
 
				-
			
 
				-static void *perf_mmap_alloc_page(int cpu)
			
 
				-{
			
 
				-	struct page *page;
			
 
				-	int node;
			
 
				-
			
 
				-	node = (cpu == -1) ? cpu : cpu_to_node(cpu);
			
 
				-	page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
			
 
				-	if (!page)
			
 
				-		return NULL;
			
 
				-
			
 
				-	return page_address(page);
			
 
				-}
			
 
				-
			
 
				-static struct perf_buffer *
			
 
				-perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
			
 
				-{
			
 
				-	struct perf_buffer *buffer;
			
 
				-	unsigned long size;
			
 
				-	int i;
			
 
				-
			
 
				-	size = sizeof(struct perf_buffer);
			
 
				-	size += nr_pages * sizeof(void *);
			
 
				-
			
 
				-	buffer = kzalloc(size, GFP_KERNEL);
			
 
				-	if (!buffer)
			
 
				-		goto fail;
			
 
				-
			
 
				-	buffer->user_page = perf_mmap_alloc_page(cpu);
			
 
				-	if (!buffer->user_page)
			
 
				-		goto fail_user_page;
			
 
				-
			
 
				-	for (i = 0; i < nr_pages; i++) {
			
 
				-		buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
			
 
				-		if (!buffer->data_pages[i])
			
 
				-			goto fail_data_pages;
			
 
				-	}
			
 
				-
			
 
				-	buffer->nr_pages = nr_pages;
			
 
				-
			
 
				-	perf_buffer_init(buffer, watermark, flags);
			
 
				-
			
 
				-	return buffer;
			
 
				-
			
 
				-fail_data_pages:
			
 
				-	for (i--; i >= 0; i--)
			
 
				-		free_page((unsigned long)buffer->data_pages[i]);
			
 
				-
			
 
				-	free_page((unsigned long)buffer->user_page);
			
 
				-
			
 
				-fail_user_page:
			
 
				-	kfree(buffer);
			
 
				-
			
 
				-fail:
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static void perf_mmap_free_page(unsigned long addr)
			
 
				-{
			
 
				-	struct page *page = virt_to_page((void *)addr);
			
 
				-
			
 
				-	page->mapping = NULL;
			
 
				-	__free_page(page);
			
 
				-}
			
 
				-
			
 
				-static void perf_buffer_free(struct perf_buffer *buffer)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	perf_mmap_free_page((unsigned long)buffer->user_page);
			
 
				-	for (i = 0; i < buffer->nr_pages; i++)
			
 
				-		perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
			
 
				-	kfree(buffer);
			
 
				-}
			
 
				-
			
 
				-static inline int page_order(struct perf_buffer *buffer)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-#else
			
 
				-
			
 
				-/*
			
 
				- * Back perf_mmap() with vmalloc memory.
			
 
				- *
			
 
				- * Required for architectures that have d-cache aliasing issues.
			
 
				- */
			
 
				-
			
 
				-static inline int page_order(struct perf_buffer *buffer)
			
 
				-{
			
 
				-	return buffer->page_order;
			
 
				-}
			
 
				-
			
 
				-static struct page *
			
 
				-perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
			
 
				-{
			
 
				-	if (pgoff > (1UL << page_order(buffer)))
			
 
				-		return NULL;
			
 
				-
			
 
				-	return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
			
 
				-}
			
 
				-
			
 
				-static void perf_mmap_unmark_page(void *addr)
			
 
				-{
			
 
				-	struct page *page = vmalloc_to_page(addr);
			
 
				-
			
 
				-	page->mapping = NULL;
			
 
				-}
			
 
				-
			
 
				-static void perf_buffer_free_work(struct work_struct *work)
			
 
				-{
			
 
				-	struct perf_buffer *buffer;
			
 
				-	void *base;
			
 
				-	int i, nr;
			
 
				-
			
 
				-	buffer = container_of(work, struct perf_buffer, work);
			
 
				-	nr = 1 << page_order(buffer);
			
 
				-
			
 
				-	base = buffer->user_page;
			
 
				-	for (i = 0; i < nr + 1; i++)
			
 
				-		perf_mmap_unmark_page(base + (i * PAGE_SIZE));
			
 
				-
			
 
				-	vfree(base);
			
 
				-	kfree(buffer);
			
 
				-}
			
 
				-
			
 
				-static void perf_buffer_free(struct perf_buffer *buffer)
			
 
				-{
			
 
				-	schedule_work(&buffer->work);
			
 
				-}
			
 
				-
			
 
				-static struct perf_buffer *
			
 
				-perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
			
 
				-{
			
 
				-	struct perf_buffer *buffer;
			
 
				-	unsigned long size;
			
 
				-	void *all_buf;
			
 
				-
			
 
				-	size = sizeof(struct perf_buffer);
			
 
				-	size += sizeof(void *);
			
 
				-
			
 
				-	buffer = kzalloc(size, GFP_KERNEL);
			
 
				-	if (!buffer)
			
 
				-		goto fail;
			
 
				-
			
 
				-	INIT_WORK(&buffer->work, perf_buffer_free_work);
			
 
				-
			
 
				-	all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
			
 
				-	if (!all_buf)
			
 
				-		goto fail_all_buf;
			
 
				-
			
 
				-	buffer->user_page = all_buf;
			
 
				-	buffer->data_pages[0] = all_buf + PAGE_SIZE;
			
 
				-	buffer->page_order = ilog2(nr_pages);
			
 
				-	buffer->nr_pages = 1;
			
 
				-
			
 
				-	perf_buffer_init(buffer, watermark, flags);
			
 
				-
			
 
				-	return buffer;
			
 
				-
			
 
				-fail_all_buf:
			
 
				-	kfree(buffer);
			
 
				-
			
 
				-fail:
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-static unsigned long perf_data_size(struct perf_buffer *buffer)
			
 
				-{
			
 
				-	return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
			
 
				-}
			
 
				-
			
 
				 static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
			
 
				 {
			
 
				 	struct perf_event *event = vma->vm_file->private_data;
			
 
				-	struct perf_buffer *buffer;
			
 
				+	struct ring_buffer *rb;
			
 
				 	int ret = VM_FAULT_SIGBUS;
			
 
				 
			
 
				 	if (vmf->flags & FAULT_FLAG_MKWRITE) {
			
@@ -3623,14 +3430,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	}
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				-	buffer = rcu_dereference(event->buffer);
			
 
				-	if (!buffer)
			
 
				+	rb = rcu_dereference(event->rb);
			
 
				+	if (!rb)
			
 
				 		goto unlock;
			
 
				 
			
 
				 	if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
			
 
				 		goto unlock;
			
 
				 
			
 
				-	vmf->page = perf_mmap_to_page(buffer, vmf->pgoff);
			
 
				+	vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
			
 
				 	if (!vmf->page)
			
 
				 		goto unlock;
			
 
				 
			
@@ -3645,35 +3452,35 @@ unlock:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
			
 
				+static void rb_free_rcu(struct rcu_head *rcu_head)
			
 
				 {
			
 
				-	struct perf_buffer *buffer;
			
 
				+	struct ring_buffer *rb;
			
 
				 
			
 
				-	buffer = container_of(rcu_head, struct perf_buffer, rcu_head);
			
 
				-	perf_buffer_free(buffer);
			
 
				+	rb = container_of(rcu_head, struct ring_buffer, rcu_head);
			
 
				+	rb_free(rb);
			
 
				 }
			
 
				 
			
 
				-static struct perf_buffer *perf_buffer_get(struct perf_event *event)
			
 
				+static struct ring_buffer *ring_buffer_get(struct perf_event *event)
			
 
				 {
			
 
				-	struct perf_buffer *buffer;
			
 
				+	struct ring_buffer *rb;
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				-	buffer = rcu_dereference(event->buffer);
			
 
				-	if (buffer) {
			
 
				-		if (!atomic_inc_not_zero(&buffer->refcount))
			
 
				-			buffer = NULL;
			
 
				+	rb = rcu_dereference(event->rb);
			
 
				+	if (rb) {
			
 
				+		if (!atomic_inc_not_zero(&rb->refcount))
			
 
				+			rb = NULL;
			
 
				 	}
			
 
				 	rcu_read_unlock();
			
 
				 
			
 
				-	return buffer;
			
 
				+	return rb;
			
 
				 }
			
 
				 
			
 
				-static void perf_buffer_put(struct perf_buffer *buffer)
			
 
				+static void ring_buffer_put(struct ring_buffer *rb)
			
 
				 {
			
 
				-	if (!atomic_dec_and_test(&buffer->refcount))
			
 
				+	if (!atomic_dec_and_test(&rb->refcount))
			
 
				 		return;
			
 
				 
			
 
				-	call_rcu(&buffer->rcu_head, perf_buffer_free_rcu);
			
 
				+	call_rcu(&rb->rcu_head, rb_free_rcu);
			
 
				 }
			
 
				 
			
 
				 static void perf_mmap_open(struct vm_area_struct *vma)
			
@@ -3688,16 +3495,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
				 	struct perf_event *event = vma->vm_file->private_data;
			
 
				 
			
 
				 	if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
			
 
				-		unsigned long size = perf_data_size(event->buffer);
			
 
				+		unsigned long size = perf_data_size(event->rb);
			
 
				 		struct user_struct *user = event->mmap_user;
			
 
				-		struct perf_buffer *buffer = event->buffer;
			
 
				+		struct ring_buffer *rb = event->rb;
			
 
				 
			
 
				 		atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
			
 
				 		vma->vm_mm->locked_vm -= event->mmap_locked;
			
 
				-		rcu_assign_pointer(event->buffer, NULL);
			
 
				+		rcu_assign_pointer(event->rb, NULL);
			
 
				 		mutex_unlock(&event->mmap_mutex);
			
 
				 
			
 
				-		perf_buffer_put(buffer);
			
 
				+		ring_buffer_put(rb);
			
 
				 		free_uid(user);
			
 
				 	}
			
 
				 }
			
@@ -3715,7 +3522,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
				 	unsigned long user_locked, user_lock_limit;
			
 
				 	struct user_struct *user = current_user();
			
 
				 	unsigned long locked, lock_limit;
			
 
				-	struct perf_buffer *buffer;
			
 
				+	struct ring_buffer *rb;
			
 
				 	unsigned long vma_size;
			
 
				 	unsigned long nr_pages;
			
 
				 	long user_extra, extra;
			
@@ -3724,7 +3531,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
				 	/*
			
 
				 	 * Don't allow mmap() of inherited per-task counters. This would
			
 
				 	 * create a performance issue due to all children writing to the
			
 
				-	 * same buffer.
			
 
				+	 * same rb.
			
 
				 	 */
			
 
				 	if (event->cpu == -1 && event->attr.inherit)
			
 
				 		return -EINVAL;
			
@@ -3736,7 +3543,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
				 	nr_pages = (vma_size / PAGE_SIZE) - 1;
			
 
				 
			
 
				 	/*
			
 
				-	 * If we have buffer pages ensure they're a power-of-two number, so we
			
 
				+	 * If we have rb pages ensure they're a power-of-two number, so we
			
 
				 	 * can do bitmasks instead of modulo.
			
 
				 	 */
			
 
				 	if (nr_pages != 0 && !is_power_of_2(nr_pages))
			
@@ -3750,9 +3557,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
				 
			
 
				 	WARN_ON_ONCE(event->ctx->parent_ctx);
			
 
				 	mutex_lock(&event->mmap_mutex);
			
 
				-	if (event->buffer) {
			
 
				-		if (event->buffer->nr_pages == nr_pages)
			
 
				-			atomic_inc(&event->buffer->refcount);
			
 
				+	if (event->rb) {
			
 
				+		if (event->rb->nr_pages == nr_pages)
			
 
				+			atomic_inc(&event->rb->refcount);
			
 
				 		else
			
 
				 			ret = -EINVAL;
			
 
				 		goto unlock;
			
@@ -3782,18 +3589,20 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
				 		goto unlock;
			
 
				 	}
			
 
				 
			
 
				-	WARN_ON(event->buffer);
			
 
				+	WARN_ON(event->rb);
			
 
				 
			
 
				 	if (vma->vm_flags & VM_WRITE)
			
 
				-		flags |= PERF_BUFFER_WRITABLE;
			
 
				+		flags |= RING_BUFFER_WRITABLE;
			
 
				 
			
 
				-	buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark,
			
 
				-				   event->cpu, flags);
			
 
				-	if (!buffer) {
			
 
				+	rb = rb_alloc(nr_pages, 
			
 
				+		event->attr.watermark ? event->attr.wakeup_watermark : 0,
			
 
				+		event->cpu, flags);
			
 
				+
			
 
				+	if (!rb) {
			
 
				 		ret = -ENOMEM;
			
 
				 		goto unlock;
			
 
				 	}
			
 
				-	rcu_assign_pointer(event->buffer, buffer);
			
 
				+	rcu_assign_pointer(event->rb, rb);
			
 
				 
			
 
				 	atomic_long_add(user_extra, &user->locked_vm);
			
 
				 	event->mmap_locked = extra;
			
@@ -3892,117 +3701,6 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
			
 
				 
			
 
				-/*
			
 
				- * Output
			
 
				- */
			
 
				-static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
			
 
				-			      unsigned long offset, unsigned long head)
			
 
				-{
			
 
				-	unsigned long mask;
			
 
				-
			
 
				-	if (!buffer->writable)
			
 
				-		return true;
			
 
				-
			
 
				-	mask = perf_data_size(buffer) - 1;
			
 
				-
			
 
				-	offset = (offset - tail) & mask;
			
 
				-	head   = (head   - tail) & mask;
			
 
				-
			
 
				-	if ((int)(head - offset) < 0)
			
 
				-		return false;
			
 
				-
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-static void perf_output_wakeup(struct perf_output_handle *handle)
			
 
				-{
			
 
				-	atomic_set(&handle->buffer->poll, POLL_IN);
			
 
				-
			
 
				-	if (handle->nmi) {
			
 
				-		handle->event->pending_wakeup = 1;
			
 
				-		irq_work_queue(&handle->event->pending);
			
 
				-	} else
			
 
				-		perf_event_wakeup(handle->event);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * We need to ensure a later event_id doesn't publish a head when a former
			
 
				- * event isn't done writing. However since we need to deal with NMIs we
			
 
				- * cannot fully serialize things.
			
 
				- *
			
 
				- * We only publish the head (and generate a wakeup) when the outer-most
			
 
				- * event completes.
			
 
				- */
			
 
				-static void perf_output_get_handle(struct perf_output_handle *handle)
			
 
				-{
			
 
				-	struct perf_buffer *buffer = handle->buffer;
			
 
				-
			
 
				-	preempt_disable();
			
 
				-	local_inc(&buffer->nest);
			
 
				-	handle->wakeup = local_read(&buffer->wakeup);
			
 
				-}
			
 
				-
			
 
				-static void perf_output_put_handle(struct perf_output_handle *handle)
			
 
				-{
			
 
				-	struct perf_buffer *buffer = handle->buffer;
			
 
				-	unsigned long head;
			
 
				-
			
 
				-again:
			
 
				-	head = local_read(&buffer->head);
			
 
				-
			
 
				-	/*
			
 
				-	 * IRQ/NMI can happen here, which means we can miss a head update.
			
 
				-	 */
			
 
				-
			
 
				-	if (!local_dec_and_test(&buffer->nest))
			
 
				-		goto out;
			
 
				-
			
 
				-	/*
			
 
				-	 * Publish the known good head. Rely on the full barrier implied
			
 
				-	 * by atomic_dec_and_test() order the buffer->head read and this
			
 
				-	 * write.
			
 
				-	 */
			
 
				-	buffer->user_page->data_head = head;
			
 
				-
			
 
				-	/*
			
 
				-	 * Now check if we missed an update, rely on the (compiler)
			
 
				-	 * barrier in atomic_dec_and_test() to re-read buffer->head.
			
 
				-	 */
			
 
				-	if (unlikely(head != local_read(&buffer->head))) {
			
 
				-		local_inc(&buffer->nest);
			
 
				-		goto again;
			
 
				-	}
			
 
				-
			
 
				-	if (handle->wakeup != local_read(&buffer->wakeup))
			
 
				-		perf_output_wakeup(handle);
			
 
				-
			
 
				-out:
			
 
				-	preempt_enable();
			
 
				-}
			
 
				-
			
 
				-__always_inline void perf_output_copy(struct perf_output_handle *handle,
			
 
				-		      const void *buf, unsigned int len)
			
 
				-{
			
 
				-	do {
			
 
				-		unsigned long size = min_t(unsigned long, handle->size, len);
			
 
				-
			
 
				-		memcpy(handle->addr, buf, size);
			
 
				-
			
 
				-		len -= size;
			
 
				-		handle->addr += size;
			
 
				-		buf += size;
			
 
				-		handle->size -= size;
			
 
				-		if (!handle->size) {
			
 
				-			struct perf_buffer *buffer = handle->buffer;
			
 
				-
			
 
				-			handle->page++;
			
 
				-			handle->page &= buffer->nr_pages - 1;
			
 
				-			handle->addr = buffer->data_pages[handle->page];
			
 
				-			handle->size = PAGE_SIZE << page_order(buffer);
			
 
				-		}
			
 
				-	} while (len);
			
 
				-}
			
 
				-
			
 
				 static void __perf_event_header__init_id(struct perf_event_header *header,
			
 
				 					 struct perf_sample_data *data,
			
 
				 					 struct perf_event *event)
			
@@ -4033,9 +3731,9 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void perf_event_header__init_id(struct perf_event_header *header,
			
 
				-				       struct perf_sample_data *data,
			
 
				-				       struct perf_event *event)
			
 
				+void perf_event_header__init_id(struct perf_event_header *header,
			
 
				+				struct perf_sample_data *data,
			
 
				+				struct perf_event *event)
			
 
				 {
			
 
				 	if (event->attr.sample_id_all)
			
 
				 		__perf_event_header__init_id(header, data, event);
			
@@ -4062,121 +3760,14 @@ static void __perf_event__output_id_sample(struct perf_output_handle *handle,
 
				 		perf_output_put(handle, data->cpu_entry);
			
 
				 }
			
 
				 
			
 
				-static void perf_event__output_id_sample(struct perf_event *event,
			
 
				-					 struct perf_output_handle *handle,
			
 
				-					 struct perf_sample_data *sample)
			
 
				+void perf_event__output_id_sample(struct perf_event *event,
			
 
				+				  struct perf_output_handle *handle,
			
 
				+				  struct perf_sample_data *sample)
			
 
				 {
			
 
				 	if (event->attr.sample_id_all)
			
 
				 		__perf_event__output_id_sample(handle, sample);
			
 
				 }
			
 
				 
			
 
				-int perf_output_begin(struct perf_output_handle *handle,
			
 
				-		      struct perf_event *event, unsigned int size,
			
 
				-		      int nmi, int sample)
			
 
				-{
			
 
				-	struct perf_buffer *buffer;
			
 
				-	unsigned long tail, offset, head;
			
 
				-	int have_lost;
			
 
				-	struct perf_sample_data sample_data;
			
 
				-	struct {
			
 
				-		struct perf_event_header header;
			
 
				-		u64			 id;
			
 
				-		u64			 lost;
			
 
				-	} lost_event;
			
 
				-
			
 
				-	rcu_read_lock();
			
 
				-	/*
			
 
				-	 * For inherited events we send all the output towards the parent.
			
 
				-	 */
			
 
				-	if (event->parent)
			
 
				-		event = event->parent;
			
 
				-
			
 
				-	buffer = rcu_dereference(event->buffer);
			
 
				-	if (!buffer)
			
 
				-		goto out;
			
 
				-
			
 
				-	handle->buffer	= buffer;
			
 
				-	handle->event	= event;
			
 
				-	handle->nmi	= nmi;
			
 
				-	handle->sample	= sample;
			
 
				-
			
 
				-	if (!buffer->nr_pages)
			
 
				-		goto out;
			
 
				-
			
 
				-	have_lost = local_read(&buffer->lost);
			
 
				-	if (have_lost) {
			
 
				-		lost_event.header.size = sizeof(lost_event);
			
 
				-		perf_event_header__init_id(&lost_event.header, &sample_data,
			
 
				-					   event);
			
 
				-		size += lost_event.header.size;
			
 
				-	}
			
 
				-
			
 
				-	perf_output_get_handle(handle);
			
 
				-
			
 
				-	do {
			
 
				-		/*
			
 
				-		 * Userspace could choose to issue a mb() before updating the
			
 
				-		 * tail pointer. So that all reads will be completed before the
			
 
				-		 * write is issued.
			
 
				-		 */
			
 
				-		tail = ACCESS_ONCE(buffer->user_page->data_tail);
			
 
				-		smp_rmb();
			
 
				-		offset = head = local_read(&buffer->head);
			
 
				-		head += size;
			
 
				-		if (unlikely(!perf_output_space(buffer, tail, offset, head)))
			
 
				-			goto fail;
			
 
				-	} while (local_cmpxchg(&buffer->head, offset, head) != offset);
			
 
				-
			
 
				-	if (head - local_read(&buffer->wakeup) > buffer->watermark)
			
 
				-		local_add(buffer->watermark, &buffer->wakeup);
			
 
				-
			
 
				-	handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
			
 
				-	handle->page &= buffer->nr_pages - 1;
			
 
				-	handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
			
 
				-	handle->addr = buffer->data_pages[handle->page];
			
 
				-	handle->addr += handle->size;
			
 
				-	handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
			
 
				-
			
 
				-	if (have_lost) {
			
 
				-		lost_event.header.type = PERF_RECORD_LOST;
			
 
				-		lost_event.header.misc = 0;
			
 
				-		lost_event.id          = event->id;
			
 
				-		lost_event.lost        = local_xchg(&buffer->lost, 0);
			
 
				-
			
 
				-		perf_output_put(handle, lost_event);
			
 
				-		perf_event__output_id_sample(event, handle, &sample_data);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-fail:
			
 
				-	local_inc(&buffer->lost);
			
 
				-	perf_output_put_handle(handle);
			
 
				-out:
			
 
				-	rcu_read_unlock();
			
 
				-
			
 
				-	return -ENOSPC;
			
 
				-}
			
 
				-
			
 
				-void perf_output_end(struct perf_output_handle *handle)
			
 
				-{
			
 
				-	struct perf_event *event = handle->event;
			
 
				-	struct perf_buffer *buffer = handle->buffer;
			
 
				-
			
 
				-	int wakeup_events = event->attr.wakeup_events;
			
 
				-
			
 
				-	if (handle->sample && wakeup_events) {
			
 
				-		int events = local_inc_return(&buffer->events);
			
 
				-		if (events >= wakeup_events) {
			
 
				-			local_sub(wakeup_events, &buffer->events);
			
 
				-			local_inc(&buffer->wakeup);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	perf_output_put_handle(handle);
			
 
				-	rcu_read_unlock();
			
 
				-}
			
 
				-
			
 
				 static void perf_output_read_one(struct perf_output_handle *handle,
			
 
				 				 struct perf_event *event,
			
 
				 				 u64 enabled, u64 running)
			
@@ -4197,7 +3788,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
 
				 	if (read_format & PERF_FORMAT_ID)
			
 
				 		values[n++] = primary_event_id(event);
			
 
				 
			
 
				-	perf_output_copy(handle, values, n * sizeof(u64));
			
 
				+	__output_copy(handle, values, n * sizeof(u64));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -4227,7 +3818,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 
				 	if (read_format & PERF_FORMAT_ID)
			
 
				 		values[n++] = primary_event_id(leader);
			
 
				 
			
 
				-	perf_output_copy(handle, values, n * sizeof(u64));
			
 
				+	__output_copy(handle, values, n * sizeof(u64));
			
 
				 
			
 
				 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
			
 
				 		n = 0;
			
@@ -4239,7 +3830,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 
				 		if (read_format & PERF_FORMAT_ID)
			
 
				 			values[n++] = primary_event_id(sub);
			
 
				 
			
 
				-		perf_output_copy(handle, values, n * sizeof(u64));
			
 
				+		__output_copy(handle, values, n * sizeof(u64));
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -4249,7 +3840,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 
				 static void perf_output_read(struct perf_output_handle *handle,
			
 
				 			     struct perf_event *event)
			
 
				 {
			
 
				-	u64 enabled = 0, running = 0, now, ctx_time;
			
 
				+	u64 enabled = 0, running = 0;
			
 
				 	u64 read_format = event->attr.read_format;
			
 
				 
			
 
				 	/*
			
@@ -4261,12 +3852,8 @@ static void perf_output_read(struct perf_output_handle *handle,
 
				 	 * because of locking issue as we are called in
			
 
				 	 * NMI context
			
 
				 	 */
			
 
				-	if (read_format & PERF_FORMAT_TOTAL_TIMES) {
			
 
				-		now = perf_clock();
			
 
				-		ctx_time = event->shadow_ctx_time + now;
			
 
				-		enabled = ctx_time - event->tstamp_enabled;
			
 
				-		running = ctx_time - event->tstamp_running;
			
 
				-	}
			
 
				+	if (read_format & PERF_FORMAT_TOTAL_TIMES)
			
 
				+		calc_timer_values(event, &enabled, &running);
			
 
				 
			
 
				 	if (event->attr.read_format & PERF_FORMAT_GROUP)
			
 
				 		perf_output_read_group(handle, event, enabled, running);
			
@@ -4319,7 +3906,7 @@ void perf_output_sample(struct perf_output_handle *handle,
 
				 
			
 
				 			size *= sizeof(u64);
			
 
				 
			
 
				-			perf_output_copy(handle, data->callchain, size);
			
 
				+			__output_copy(handle, data->callchain, size);
			
 
				 		} else {
			
 
				 			u64 nr = 0;
			
 
				 			perf_output_put(handle, nr);
			
@@ -4329,8 +3916,8 @@ void perf_output_sample(struct perf_output_handle *handle,
 
				 	if (sample_type & PERF_SAMPLE_RAW) {
			
 
				 		if (data->raw) {
			
 
				 			perf_output_put(handle, data->raw->size);
			
 
				-			perf_output_copy(handle, data->raw->data,
			
 
				-					 data->raw->size);
			
 
				+			__output_copy(handle, data->raw->data,
			
 
				+					   data->raw->size);
			
 
				 		} else {
			
 
				 			struct {
			
 
				 				u32	size;
			
@@ -4342,6 +3929,20 @@ void perf_output_sample(struct perf_output_handle *handle,
 
				 			perf_output_put(handle, raw);
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				+	if (!event->attr.watermark) {
			
 
				+		int wakeup_events = event->attr.wakeup_events;
			
 
				+
			
 
				+		if (wakeup_events) {
			
 
				+			struct ring_buffer *rb = handle->rb;
			
 
				+			int events = local_inc_return(&rb->events);
			
 
				+
			
 
				+			if (events >= wakeup_events) {
			
 
				+				local_sub(wakeup_events, &rb->events);
			
 
				+				local_inc(&rb->wakeup);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 void perf_prepare_sample(struct perf_event_header *header,
			
@@ -4386,7 +3987,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void perf_event_output(struct perf_event *event, int nmi,
			
 
				+static void perf_event_output(struct perf_event *event,
			
 
				 				struct perf_sample_data *data,
			
 
				 				struct pt_regs *regs)
			
 
				 {
			
@@ -4398,7 +3999,7 @@ static void perf_event_output(struct perf_event *event, int nmi,
 
				 
			
 
				 	perf_prepare_sample(&header, data, event, regs);
			
 
				 
			
 
				-	if (perf_output_begin(&handle, event, header.size, nmi, 1))
			
 
				+	if (perf_output_begin(&handle, event, header.size))
			
 
				 		goto exit;
			
 
				 
			
 
				 	perf_output_sample(&handle, &header, data, event);
			
@@ -4438,7 +4039,7 @@ perf_event_read_event(struct perf_event *event,
 
				 	int ret;
			
 
				 
			
 
				 	perf_event_header__init_id(&read_event.header, &sample, event);
			
 
				-	ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
			
 
				+	ret = perf_output_begin(&handle, event, read_event.header.size);
			
 
				 	if (ret)
			
 
				 		return;
			
 
				 
			
@@ -4481,7 +4082,7 @@ static void perf_event_task_output(struct perf_event *event,
 
				 	perf_event_header__init_id(&task_event->event_id.header, &sample, event);
			
 
				 
			
 
				 	ret = perf_output_begin(&handle, event,
			
 
				-				task_event->event_id.header.size, 0, 0);
			
 
				+				task_event->event_id.header.size);
			
 
				 	if (ret)
			
 
				 		goto out;
			
 
				 
			
@@ -4618,7 +4219,7 @@ static void perf_event_comm_output(struct perf_event *event,
 
				 
			
 
				 	perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
			
 
				 	ret = perf_output_begin(&handle, event,
			
 
				-				comm_event->event_id.header.size, 0, 0);
			
 
				+				comm_event->event_id.header.size);
			
 
				 
			
 
				 	if (ret)
			
 
				 		goto out;
			
@@ -4627,7 +4228,7 @@ static void perf_event_comm_output(struct perf_event *event,
 
				 	comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
			
 
				 
			
 
				 	perf_output_put(&handle, comm_event->event_id);
			
 
				-	perf_output_copy(&handle, comm_event->comm,
			
 
				+	__output_copy(&handle, comm_event->comm,
			
 
				 				   comm_event->comm_size);
			
 
				 
			
 
				 	perf_event__output_id_sample(event, &handle, &sample);
			
@@ -4765,7 +4366,7 @@ static void perf_event_mmap_output(struct perf_event *event,
 
				 
			
 
				 	perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
			
 
				 	ret = perf_output_begin(&handle, event,
			
 
				-				mmap_event->event_id.header.size, 0, 0);
			
 
				+				mmap_event->event_id.header.size);
			
 
				 	if (ret)
			
 
				 		goto out;
			
 
				 
			
@@ -4773,7 +4374,7 @@ static void perf_event_mmap_output(struct perf_event *event,
 
				 	mmap_event->event_id.tid = perf_event_tid(event, current);
			
 
				 
			
 
				 	perf_output_put(&handle, mmap_event->event_id);
			
 
				-	perf_output_copy(&handle, mmap_event->file_name,
			
 
				+	__output_copy(&handle, mmap_event->file_name,
			
 
				 				   mmap_event->file_size);
			
 
				 
			
 
				 	perf_event__output_id_sample(event, &handle, &sample);
			
@@ -4829,7 +4430,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 
				 
			
 
				 	if (file) {
			
 
				 		/*
			
 
				-		 * d_path works from the end of the buffer backwards, so we
			
 
				+		 * d_path works from the end of the rb backwards, so we
			
 
				 		 * need to add enough zero bytes after the string to handle
			
 
				 		 * the 64bit alignment we do later.
			
 
				 		 */
			
@@ -4960,7 +4561,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
 
				 	perf_event_header__init_id(&throttle_event.header, &sample, event);
			
 
				 
			
 
				 	ret = perf_output_begin(&handle, event,
			
 
				-				throttle_event.header.size, 1, 0);
			
 
				+				throttle_event.header.size);
			
 
				 	if (ret)
			
 
				 		return;
			
 
				 
			
@@ -4973,7 +4574,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
 
				  * Generic event overflow handling, sampling.
			
 
				  */
			
 
				 
			
 
				-static int __perf_event_overflow(struct perf_event *event, int nmi,
			
 
				+static int __perf_event_overflow(struct perf_event *event,
			
 
				 				   int throttle, struct perf_sample_data *data,
			
 
				 				   struct pt_regs *regs)
			
 
				 {
			
@@ -5016,34 +4617,28 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
 
				 	if (events && atomic_dec_and_test(&event->event_limit)) {
			
 
				 		ret = 1;
			
 
				 		event->pending_kill = POLL_HUP;
			
 
				-		if (nmi) {
			
 
				-			event->pending_disable = 1;
			
 
				-			irq_work_queue(&event->pending);
			
 
				-		} else
			
 
				-			perf_event_disable(event);
			
 
				+		event->pending_disable = 1;
			
 
				+		irq_work_queue(&event->pending);
			
 
				 	}
			
 
				 
			
 
				 	if (event->overflow_handler)
			
 
				-		event->overflow_handler(event, nmi, data, regs);
			
 
				+		event->overflow_handler(event, data, regs);
			
 
				 	else
			
 
				-		perf_event_output(event, nmi, data, regs);
			
 
				+		perf_event_output(event, data, regs);
			
 
				 
			
 
				 	if (event->fasync && event->pending_kill) {
			
 
				-		if (nmi) {
			
 
				-			event->pending_wakeup = 1;
			
 
				-			irq_work_queue(&event->pending);
			
 
				-		} else
			
 
				-			perf_event_wakeup(event);
			
 
				+		event->pending_wakeup = 1;
			
 
				+		irq_work_queue(&event->pending);
			
 
				 	}
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int perf_event_overflow(struct perf_event *event, int nmi,
			
 
				+int perf_event_overflow(struct perf_event *event,
			
 
				 			  struct perf_sample_data *data,
			
 
				 			  struct pt_regs *regs)
			
 
				 {
			
 
				-	return __perf_event_overflow(event, nmi, 1, data, regs);
			
 
				+	return __perf_event_overflow(event, 1, data, regs);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -5092,7 +4687,7 @@ again:
 
				 }
			
 
				 
			
 
				 static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
			
 
				-				    int nmi, struct perf_sample_data *data,
			
 
				+				    struct perf_sample_data *data,
			
 
				 				    struct pt_regs *regs)
			
 
				 {
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
@@ -5106,7 +4701,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
 
				 		return;
			
 
				 
			
 
				 	for (; overflow; overflow--) {
			
 
				-		if (__perf_event_overflow(event, nmi, throttle,
			
 
				+		if (__perf_event_overflow(event, throttle,
			
 
				 					    data, regs)) {
			
 
				 			/*
			
 
				 			 * We inhibit the overflow from happening when
			
@@ -5119,7 +4714,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
 
				 }
			
 
				 
			
 
				 static void perf_swevent_event(struct perf_event *event, u64 nr,
			
 
				-			       int nmi, struct perf_sample_data *data,
			
 
				+			       struct perf_sample_data *data,
			
 
				 			       struct pt_regs *regs)
			
 
				 {
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
@@ -5133,12 +4728,12 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
 
				 		return;
			
 
				 
			
 
				 	if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
			
 
				-		return perf_swevent_overflow(event, 1, nmi, data, regs);
			
 
				+		return perf_swevent_overflow(event, 1, data, regs);
			
 
				 
			
 
				 	if (local64_add_negative(nr, &hwc->period_left))
			
 
				 		return;
			
 
				 
			
 
				-	perf_swevent_overflow(event, 0, nmi, data, regs);
			
 
				+	perf_swevent_overflow(event, 0, data, regs);
			
 
				 }
			
 
				 
			
 
				 static int perf_exclude_event(struct perf_event *event,
			
@@ -5226,7 +4821,7 @@ find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
 
				 }
			
 
				 
			
 
				 static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
			
 
				-				    u64 nr, int nmi,
			
 
				+				    u64 nr,
			
 
				 				    struct perf_sample_data *data,
			
 
				 				    struct pt_regs *regs)
			
 
				 {
			
@@ -5242,7 +4837,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 
				 
			
 
				 	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
			
 
				 		if (perf_swevent_match(event, type, event_id, data, regs))
			
 
				-			perf_swevent_event(event, nr, nmi, data, regs);
			
 
				+			perf_swevent_event(event, nr, data, regs);
			
 
				 	}
			
 
				 end:
			
 
				 	rcu_read_unlock();
			
@@ -5263,8 +4858,7 @@ inline void perf_swevent_put_recursion_context(int rctx)
 
				 	put_recursion_context(swhash->recursion, rctx);
			
 
				 }
			
 
				 
			
 
				-void __perf_sw_event(u32 event_id, u64 nr, int nmi,
			
 
				-			    struct pt_regs *regs, u64 addr)
			
 
				+void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
			
 
				 {
			
 
				 	struct perf_sample_data data;
			
 
				 	int rctx;
			
@@ -5276,7 +4870,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
 
				 
			
 
				 	perf_sample_data_init(&data, addr);
			
 
				 
			
 
				-	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
			
 
				+	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
			
 
				 
			
 
				 	perf_swevent_put_recursion_context(rctx);
			
 
				 	preempt_enable_notrace();
			
@@ -5524,7 +5118,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
 
				 
			
 
				 	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
			
 
				 		if (perf_tp_event_match(event, &data, regs))
			
 
				-			perf_swevent_event(event, count, 1, &data, regs);
			
 
				+			perf_swevent_event(event, count, &data, regs);
			
 
				 	}
			
 
				 
			
 
				 	perf_swevent_put_recursion_context(rctx);
			
@@ -5617,7 +5211,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
 
				 	perf_sample_data_init(&sample, bp->attr.bp_addr);
			
 
				 
			
 
				 	if (!bp->hw.state && !perf_exclude_event(bp, regs))
			
 
				-		perf_swevent_event(bp, 1, 1, &sample, regs);
			
 
				+		perf_swevent_event(bp, 1, &sample, regs);
			
 
				 }
			
 
				 #endif
			
 
				 
			
@@ -5646,7 +5240,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
 
				 
			
 
				 	if (regs && !perf_exclude_event(event, regs)) {
			
 
				 		if (!(event->attr.exclude_idle && current->pid == 0))
			
 
				-			if (perf_event_overflow(event, 0, &data, regs))
			
 
				+			if (perf_event_overflow(event, &data, regs))
			
 
				 				ret = HRTIMER_NORESTART;
			
 
				 	}
			
 
				 
			
@@ -5986,6 +5580,7 @@ free_dev:
 
				 }
			
 
				 
			
 
				 static struct lock_class_key cpuctx_mutex;
			
 
				+static struct lock_class_key cpuctx_lock;
			
 
				 
			
 
				 int perf_pmu_register(struct pmu *pmu, char *name, int type)
			
 
				 {
			
@@ -6036,6 +5631,7 @@ skip_type:
 
				 		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
			
 
				 		__perf_event_init_context(&cpuctx->ctx);
			
 
				 		lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
			
 
				+		lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
			
 
				 		cpuctx->ctx.type = cpu_context;
			
 
				 		cpuctx->ctx.pmu = pmu;
			
 
				 		cpuctx->jiffies_interval = 1;
			
@@ -6150,7 +5746,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
				 		 struct task_struct *task,
			
 
				 		 struct perf_event *group_leader,
			
 
				 		 struct perf_event *parent_event,
			
 
				-		 perf_overflow_handler_t overflow_handler)
			
 
				+		 perf_overflow_handler_t overflow_handler,
			
 
				+		 void *context)
			
 
				 {
			
 
				 	struct pmu *pmu;
			
 
				 	struct perf_event *event;
			
@@ -6208,10 +5805,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
				 #endif
			
 
				 	}
			
 
				 
			
 
				-	if (!overflow_handler && parent_event)
			
 
				+	if (!overflow_handler && parent_event) {
			
 
				 		overflow_handler = parent_event->overflow_handler;
			
 
				+		context = parent_event->overflow_handler_context;
			
 
				+	}
			
 
				 
			
 
				 	event->overflow_handler	= overflow_handler;
			
 
				+	event->overflow_handler_context = context;
			
 
				 
			
 
				 	if (attr->disabled)
			
 
				 		event->state = PERF_EVENT_STATE_OFF;
			
@@ -6326,13 +5926,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 
				 	if (ret)
			
 
				 		return -EFAULT;
			
 
				 
			
 
				-	/*
			
 
				-	 * If the type exists, the corresponding creation will verify
			
 
				-	 * the attr->config.
			
 
				-	 */
			
 
				-	if (attr->type >= PERF_TYPE_MAX)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				 	if (attr->__reserved_1)
			
 
				 		return -EINVAL;
			
 
				 
			
@@ -6354,7 +5947,7 @@ err_size:
 
				 static int
			
 
				 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
			
 
				 {
			
 
				-	struct perf_buffer *buffer = NULL, *old_buffer = NULL;
			
 
				+	struct ring_buffer *rb = NULL, *old_rb = NULL;
			
 
				 	int ret = -EINVAL;
			
 
				 
			
 
				 	if (!output_event)
			
@@ -6371,7 +5964,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 
				 		goto out;
			
 
				 
			
 
				 	/*
			
 
				-	 * If its not a per-cpu buffer, it must be the same task.
			
 
				+	 * If its not a per-cpu rb, it must be the same task.
			
 
				 	 */
			
 
				 	if (output_event->cpu == -1 && output_event->ctx != event->ctx)
			
 
				 		goto out;
			
@@ -6383,20 +5976,20 @@ set:
 
				 		goto unlock;
			
 
				 
			
 
				 	if (output_event) {
			
 
				-		/* get the buffer we want to redirect to */
			
 
				-		buffer = perf_buffer_get(output_event);
			
 
				-		if (!buffer)
			
 
				+		/* get the rb we want to redirect to */
			
 
				+		rb = ring_buffer_get(output_event);
			
 
				+		if (!rb)
			
 
				 			goto unlock;
			
 
				 	}
			
 
				 
			
 
				-	old_buffer = event->buffer;
			
 
				-	rcu_assign_pointer(event->buffer, buffer);
			
 
				+	old_rb = event->rb;
			
 
				+	rcu_assign_pointer(event->rb, rb);
			
 
				 	ret = 0;
			
 
				 unlock:
			
 
				 	mutex_unlock(&event->mmap_mutex);
			
 
				 
			
 
				-	if (old_buffer)
			
 
				-		perf_buffer_put(old_buffer);
			
 
				+	if (old_rb)
			
 
				+		ring_buffer_put(old_rb);
			
 
				 out:
			
 
				 	return ret;
			
 
				 }
			
@@ -6478,7 +6071,8 @@ SYSCALL_DEFINE5(perf_event_open,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL);
			
 
				+	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
			
 
				+				 NULL, NULL);
			
 
				 	if (IS_ERR(event)) {
			
 
				 		err = PTR_ERR(event);
			
 
				 		goto err_task;
			
@@ -6663,7 +6257,8 @@ err_fd:
 
				 struct perf_event *
			
 
				 perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
			
 
				 				 struct task_struct *task,
			
 
				-				 perf_overflow_handler_t overflow_handler)
			
 
				+				 perf_overflow_handler_t overflow_handler,
			
 
				+				 void *context)
			
 
				 {
			
 
				 	struct perf_event_context *ctx;
			
 
				 	struct perf_event *event;
			
@@ -6673,7 +6268,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 
				 	 * Get the target context (task or percpu):
			
 
				 	 */
			
 
				 
			
 
				-	event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler);
			
 
				+	event = perf_event_alloc(attr, cpu, task, NULL, NULL,
			
 
				+				 overflow_handler, context);
			
 
				 	if (IS_ERR(event)) {
			
 
				 		err = PTR_ERR(event);
			
 
				 		goto err;
			
@@ -6780,7 +6376,6 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 
				 	 * our context.
			
 
				 	 */
			
 
				 	child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
			
 
				-	task_ctx_sched_out(child_ctx, EVENT_ALL);
			
 
				 
			
 
				 	/*
			
 
				 	 * Take the context lock here so that if find_get_context is
			
@@ -6788,6 +6383,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 
				 	 * incremented the context's refcount before we do put_ctx below.
			
 
				 	 */
			
 
				 	raw_spin_lock(&child_ctx->lock);
			
 
				+	task_ctx_sched_out(child_ctx);
			
 
				 	child->perf_event_ctxp[ctxn] = NULL;
			
 
				 	/*
			
 
				 	 * If this context is a clone; unclone it so it can't get
			
@@ -6957,7 +6553,7 @@ inherit_event(struct perf_event *parent_event,
 
				 					   parent_event->cpu,
			
 
				 					   child,
			
 
				 					   group_leader, parent_event,
			
 
				-					   NULL);
			
 
				+				           NULL, NULL);
			
 
				 	if (IS_ERR(child_event))
			
 
				 		return child_event;
			
 
				 	get_ctx(child_ctx);
			
@@ -6984,6 +6580,8 @@ inherit_event(struct perf_event *parent_event,
 
				 
			
 
				 	child_event->ctx = child_ctx;
			
 
				 	child_event->overflow_handler = parent_event->overflow_handler;
			
 
				+	child_event->overflow_handler_context
			
 
				+		= parent_event->overflow_handler_context;
			
 
				 
			
 
				 	/*
			
 
				 	 * Precalculate sample_data sizes
			
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -431,9 +431,11 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
 
				 struct perf_event *
			
 
				 register_user_hw_breakpoint(struct perf_event_attr *attr,
			
 
				 			    perf_overflow_handler_t triggered,
			
 
				+			    void *context,
			
 
				 			    struct task_struct *tsk)
			
 
				 {
			
 
				-	return perf_event_create_kernel_counter(attr, -1, tsk, triggered);
			
 
				+	return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
			
 
				+						context);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
			
 
				 
			
@@ -502,7 +504,8 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
 
				  */
			
 
				 struct perf_event * __percpu *
			
 
				 register_wide_hw_breakpoint(struct perf_event_attr *attr,
			
 
				-			    perf_overflow_handler_t triggered)
			
 
				+			    perf_overflow_handler_t triggered,
			
 
				+			    void *context)
			
 
				 {
			
 
				 	struct perf_event * __percpu *cpu_events, **pevent, *bp;
			
 
				 	long err;
			
@@ -515,7 +518,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
 
				 	get_online_cpus();
			
 
				 	for_each_online_cpu(cpu) {
			
 
				 		pevent = per_cpu_ptr(cpu_events, cpu);
			
 
				-		bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered);
			
 
				+		bp = perf_event_create_kernel_counter(attr, cpu, NULL,
			
 
				+						      triggered, context);
			
 
				 
			
 
				 		*pevent = bp;
			
 
				 
			
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -0,0 +1,96 @@
 
				+#ifndef _KERNEL_EVENTS_INTERNAL_H
			
 
				+#define _KERNEL_EVENTS_INTERNAL_H
			
 
				+
			
 
				+#define RING_BUFFER_WRITABLE		0x01
			
 
				+
			
 
				+struct ring_buffer {
			
 
				+	atomic_t			refcount;
			
 
				+	struct rcu_head			rcu_head;
			
 
				+#ifdef CONFIG_PERF_USE_VMALLOC
			
 
				+	struct work_struct		work;
			
 
				+	int				page_order;	/* allocation order  */
			
 
				+#endif
			
 
				+	int				nr_pages;	/* nr of data pages  */
			
 
				+	int				writable;	/* are we writable   */
			
 
				+
			
 
				+	atomic_t			poll;		/* POLL_ for wakeups */
			
 
				+
			
 
				+	local_t				head;		/* write position    */
			
 
				+	local_t				nest;		/* nested writers    */
			
 
				+	local_t				events;		/* event limit       */
			
 
				+	local_t				wakeup;		/* wakeup stamp      */
			
 
				+	local_t				lost;		/* nr records lost   */
			
 
				+
			
 
				+	long				watermark;	/* wakeup watermark  */
			
 
				+
			
 
				+	struct perf_event_mmap_page	*user_page;
			
 
				+	void				*data_pages[0];
			
 
				+};
			
 
				+
			
 
				+extern void rb_free(struct ring_buffer *rb);
			
 
				+extern struct ring_buffer *
			
 
				+rb_alloc(int nr_pages, long watermark, int cpu, int flags);
			
 
				+extern void perf_event_wakeup(struct perf_event *event);
			
 
				+
			
 
				+extern void
			
 
				+perf_event_header__init_id(struct perf_event_header *header,
			
 
				+			   struct perf_sample_data *data,
			
 
				+			   struct perf_event *event);
			
 
				+extern void
			
 
				+perf_event__output_id_sample(struct perf_event *event,
			
 
				+			     struct perf_output_handle *handle,
			
 
				+			     struct perf_sample_data *sample);
			
 
				+
			
 
				+extern struct page *
			
 
				+perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff);
			
 
				+
			
 
				+#ifdef CONFIG_PERF_USE_VMALLOC
			
 
				+/*
			
 
				+ * Back perf_mmap() with vmalloc memory.
			
 
				+ *
			
 
				+ * Required for architectures that have d-cache aliasing issues.
			
 
				+ */
			
 
				+
			
 
				+static inline int page_order(struct ring_buffer *rb)
			
 
				+{
			
 
				+	return rb->page_order;
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+static inline int page_order(struct ring_buffer *rb)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static unsigned long perf_data_size(struct ring_buffer *rb)
			
 
				+{
			
 
				+	return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+__output_copy(struct perf_output_handle *handle,
			
 
				+		   const void *buf, unsigned int len)
			
 
				+{
			
 
				+	do {
			
 
				+		unsigned long size = min_t(unsigned long, handle->size, len);
			
 
				+
			
 
				+		memcpy(handle->addr, buf, size);
			
 
				+
			
 
				+		len -= size;
			
 
				+		handle->addr += size;
			
 
				+		buf += size;
			
 
				+		handle->size -= size;
			
 
				+		if (!handle->size) {
			
 
				+			struct ring_buffer *rb = handle->rb;
			
 
				+
			
 
				+			handle->page++;
			
 
				+			handle->page &= rb->nr_pages - 1;
			
 
				+			handle->addr = rb->data_pages[handle->page];
			
 
				+			handle->size = PAGE_SIZE << page_order(rb);
			
 
				+		}
			
 
				+	} while (len);
			
 
				+}
			
 
				+
			
 
				+#endif /* _KERNEL_EVENTS_INTERNAL_H */
			
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -0,0 +1,380 @@
 
				+/*
			
 
				+ * Performance events ring-buffer code:
			
 
				+ *
			
 
				+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
			
 
				+ *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
			
 
				+ *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
			
 
				+ *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
			
 
				+ *
			
 
				+ * For licensing details see kernel-base/COPYING
			
 
				+ */
			
 
				+
			
 
				+#include <linux/perf_event.h>
			
 
				+#include <linux/vmalloc.h>
			
 
				+#include <linux/slab.h>
			
 
				+
			
 
				+#include "internal.h"
			
 
				+
			
 
				+static bool perf_output_space(struct ring_buffer *rb, unsigned long tail,
			
 
				+			      unsigned long offset, unsigned long head)
			
 
				+{
			
 
				+	unsigned long mask;
			
 
				+
			
 
				+	if (!rb->writable)
			
 
				+		return true;
			
 
				+
			
 
				+	mask = perf_data_size(rb) - 1;
			
 
				+
			
 
				+	offset = (offset - tail) & mask;
			
 
				+	head   = (head   - tail) & mask;
			
 
				+
			
 
				+	if ((int)(head - offset) < 0)
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static void perf_output_wakeup(struct perf_output_handle *handle)
			
 
				+{
			
 
				+	atomic_set(&handle->rb->poll, POLL_IN);
			
 
				+
			
 
				+	handle->event->pending_wakeup = 1;
			
 
				+	irq_work_queue(&handle->event->pending);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * We need to ensure a later event_id doesn't publish a head when a former
			
 
				+ * event isn't done writing. However since we need to deal with NMIs we
			
 
				+ * cannot fully serialize things.
			
 
				+ *
			
 
				+ * We only publish the head (and generate a wakeup) when the outer-most
			
 
				+ * event completes.
			
 
				+ */
			
 
				+static void perf_output_get_handle(struct perf_output_handle *handle)
			
 
				+{
			
 
				+	struct ring_buffer *rb = handle->rb;
			
 
				+
			
 
				+	preempt_disable();
			
 
				+	local_inc(&rb->nest);
			
 
				+	handle->wakeup = local_read(&rb->wakeup);
			
 
				+}
			
 
				+
			
 
				+static void perf_output_put_handle(struct perf_output_handle *handle)
			
 
				+{
			
 
				+	struct ring_buffer *rb = handle->rb;
			
 
				+	unsigned long head;
			
 
				+
			
 
				+again:
			
 
				+	head = local_read(&rb->head);
			
 
				+
			
 
				+	/*
			
 
				+	 * IRQ/NMI can happen here, which means we can miss a head update.
			
 
				+	 */
			
 
				+
			
 
				+	if (!local_dec_and_test(&rb->nest))
			
 
				+		goto out;
			
 
				+
			
 
				+	/*
			
 
				+	 * Publish the known good head. Rely on the full barrier implied
			
 
				+	 * by atomic_dec_and_test() order the rb->head read and this
			
 
				+	 * write.
			
 
				+	 */
			
 
				+	rb->user_page->data_head = head;
			
 
				+
			
 
				+	/*
			
 
				+	 * Now check if we missed an update, rely on the (compiler)
			
 
				+	 * barrier in atomic_dec_and_test() to re-read rb->head.
			
 
				+	 */
			
 
				+	if (unlikely(head != local_read(&rb->head))) {
			
 
				+		local_inc(&rb->nest);
			
 
				+		goto again;
			
 
				+	}
			
 
				+
			
 
				+	if (handle->wakeup != local_read(&rb->wakeup))
			
 
				+		perf_output_wakeup(handle);
			
 
				+
			
 
				+out:
			
 
				+	preempt_enable();
			
 
				+}
			
 
				+
			
 
				+int perf_output_begin(struct perf_output_handle *handle,
			
 
				+		      struct perf_event *event, unsigned int size)
			
 
				+{
			
 
				+	struct ring_buffer *rb;
			
 
				+	unsigned long tail, offset, head;
			
 
				+	int have_lost;
			
 
				+	struct perf_sample_data sample_data;
			
 
				+	struct {
			
 
				+		struct perf_event_header header;
			
 
				+		u64			 id;
			
 
				+		u64			 lost;
			
 
				+	} lost_event;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	/*
			
 
				+	 * For inherited events we send all the output towards the parent.
			
 
				+	 */
			
 
				+	if (event->parent)
			
 
				+		event = event->parent;
			
 
				+
			
 
				+	rb = rcu_dereference(event->rb);
			
 
				+	if (!rb)
			
 
				+		goto out;
			
 
				+
			
 
				+	handle->rb	= rb;
			
 
				+	handle->event	= event;
			
 
				+
			
 
				+	if (!rb->nr_pages)
			
 
				+		goto out;
			
 
				+
			
 
				+	have_lost = local_read(&rb->lost);
			
 
				+	if (have_lost) {
			
 
				+		lost_event.header.size = sizeof(lost_event);
			
 
				+		perf_event_header__init_id(&lost_event.header, &sample_data,
			
 
				+					   event);
			
 
				+		size += lost_event.header.size;
			
 
				+	}
			
 
				+
			
 
				+	perf_output_get_handle(handle);
			
 
				+
			
 
				+	do {
			
 
				+		/*
			
 
				+		 * Userspace could choose to issue a mb() before updating the
			
 
				+		 * tail pointer. So that all reads will be completed before the
			
 
				+		 * write is issued.
			
 
				+		 */
			
 
				+		tail = ACCESS_ONCE(rb->user_page->data_tail);
			
 
				+		smp_rmb();
			
 
				+		offset = head = local_read(&rb->head);
			
 
				+		head += size;
			
 
				+		if (unlikely(!perf_output_space(rb, tail, offset, head)))
			
 
				+			goto fail;
			
 
				+	} while (local_cmpxchg(&rb->head, offset, head) != offset);
			
 
				+
			
 
				+	if (head - local_read(&rb->wakeup) > rb->watermark)
			
 
				+		local_add(rb->watermark, &rb->wakeup);
			
 
				+
			
 
				+	handle->page = offset >> (PAGE_SHIFT + page_order(rb));
			
 
				+	handle->page &= rb->nr_pages - 1;
			
 
				+	handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1);
			
 
				+	handle->addr = rb->data_pages[handle->page];
			
 
				+	handle->addr += handle->size;
			
 
				+	handle->size = (PAGE_SIZE << page_order(rb)) - handle->size;
			
 
				+
			
 
				+	if (have_lost) {
			
 
				+		lost_event.header.type = PERF_RECORD_LOST;
			
 
				+		lost_event.header.misc = 0;
			
 
				+		lost_event.id          = event->id;
			
 
				+		lost_event.lost        = local_xchg(&rb->lost, 0);
			
 
				+
			
 
				+		perf_output_put(handle, lost_event);
			
 
				+		perf_event__output_id_sample(event, handle, &sample_data);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+fail:
			
 
				+	local_inc(&rb->lost);
			
 
				+	perf_output_put_handle(handle);
			
 
				+out:
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return -ENOSPC;
			
 
				+}
			
 
				+
			
 
				+void perf_output_copy(struct perf_output_handle *handle,
			
 
				+		      const void *buf, unsigned int len)
			
 
				+{
			
 
				+	__output_copy(handle, buf, len);
			
 
				+}
			
 
				+
			
 
				+void perf_output_end(struct perf_output_handle *handle)
			
 
				+{
			
 
				+	perf_output_put_handle(handle);
			
 
				+	rcu_read_unlock();
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
			
 
				+{
			
 
				+	long max_size = perf_data_size(rb);
			
 
				+
			
 
				+	if (watermark)
			
 
				+		rb->watermark = min(max_size, watermark);
			
 
				+
			
 
				+	if (!rb->watermark)
			
 
				+		rb->watermark = max_size / 2;
			
 
				+
			
 
				+	if (flags & RING_BUFFER_WRITABLE)
			
 
				+		rb->writable = 1;
			
 
				+
			
 
				+	atomic_set(&rb->refcount, 1);
			
 
				+}
			
 
				+
			
 
				+#ifndef CONFIG_PERF_USE_VMALLOC
			
 
				+
			
 
				+/*
			
 
				+ * Back perf_mmap() with regular GFP_KERNEL-0 pages.
			
 
				+ */
			
 
				+
			
 
				+struct page *
			
 
				+perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
			
 
				+{
			
 
				+	if (pgoff > rb->nr_pages)
			
 
				+		return NULL;
			
 
				+
			
 
				+	if (pgoff == 0)
			
 
				+		return virt_to_page(rb->user_page);
			
 
				+
			
 
				+	return virt_to_page(rb->data_pages[pgoff - 1]);
			
 
				+}
			
 
				+
			
 
				+static void *perf_mmap_alloc_page(int cpu)
			
 
				+{
			
 
				+	struct page *page;
			
 
				+	int node;
			
 
				+
			
 
				+	node = (cpu == -1) ? cpu : cpu_to_node(cpu);
			
 
				+	page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
			
 
				+	if (!page)
			
 
				+		return NULL;
			
 
				+
			
 
				+	return page_address(page);
			
 
				+}
			
 
				+
			
 
				+struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
			
 
				+{
			
 
				+	struct ring_buffer *rb;
			
 
				+	unsigned long size;
			
 
				+	int i;
			
 
				+
			
 
				+	size = sizeof(struct ring_buffer);
			
 
				+	size += nr_pages * sizeof(void *);
			
 
				+
			
 
				+	rb = kzalloc(size, GFP_KERNEL);
			
 
				+	if (!rb)
			
 
				+		goto fail;
			
 
				+
			
 
				+	rb->user_page = perf_mmap_alloc_page(cpu);
			
 
				+	if (!rb->user_page)
			
 
				+		goto fail_user_page;
			
 
				+
			
 
				+	for (i = 0; i < nr_pages; i++) {
			
 
				+		rb->data_pages[i] = perf_mmap_alloc_page(cpu);
			
 
				+		if (!rb->data_pages[i])
			
 
				+			goto fail_data_pages;
			
 
				+	}
			
 
				+
			
 
				+	rb->nr_pages = nr_pages;
			
 
				+
			
 
				+	ring_buffer_init(rb, watermark, flags);
			
 
				+
			
 
				+	return rb;
			
 
				+
			
 
				+fail_data_pages:
			
 
				+	for (i--; i >= 0; i--)
			
 
				+		free_page((unsigned long)rb->data_pages[i]);
			
 
				+
			
 
				+	free_page((unsigned long)rb->user_page);
			
 
				+
			
 
				+fail_user_page:
			
 
				+	kfree(rb);
			
 
				+
			
 
				+fail:
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static void perf_mmap_free_page(unsigned long addr)
			
 
				+{
			
 
				+	struct page *page = virt_to_page((void *)addr);
			
 
				+
			
 
				+	page->mapping = NULL;
			
 
				+	__free_page(page);
			
 
				+}
			
 
				+
			
 
				+void rb_free(struct ring_buffer *rb)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	perf_mmap_free_page((unsigned long)rb->user_page);
			
 
				+	for (i = 0; i < rb->nr_pages; i++)
			
 
				+		perf_mmap_free_page((unsigned long)rb->data_pages[i]);
			
 
				+	kfree(rb);
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+struct page *
			
 
				+perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
			
 
				+{
			
 
				+	if (pgoff > (1UL << page_order(rb)))
			
 
				+		return NULL;
			
 
				+
			
 
				+	return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
			
 
				+}
			
 
				+
			
 
				+static void perf_mmap_unmark_page(void *addr)
			
 
				+{
			
 
				+	struct page *page = vmalloc_to_page(addr);
			
 
				+
			
 
				+	page->mapping = NULL;
			
 
				+}
			
 
				+
			
 
				+static void rb_free_work(struct work_struct *work)
			
 
				+{
			
 
				+	struct ring_buffer *rb;
			
 
				+	void *base;
			
 
				+	int i, nr;
			
 
				+
			
 
				+	rb = container_of(work, struct ring_buffer, work);
			
 
				+	nr = 1 << page_order(rb);
			
 
				+
			
 
				+	base = rb->user_page;
			
 
				+	for (i = 0; i < nr + 1; i++)
			
 
				+		perf_mmap_unmark_page(base + (i * PAGE_SIZE));
			
 
				+
			
 
				+	vfree(base);
			
 
				+	kfree(rb);
			
 
				+}
			
 
				+
			
 
				+void rb_free(struct ring_buffer *rb)
			
 
				+{
			
 
				+	schedule_work(&rb->work);
			
 
				+}
			
 
				+
			
 
				+struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
			
 
				+{
			
 
				+	struct ring_buffer *rb;
			
 
				+	unsigned long size;
			
 
				+	void *all_buf;
			
 
				+
			
 
				+	size = sizeof(struct ring_buffer);
			
 
				+	size += sizeof(void *);
			
 
				+
			
 
				+	rb = kzalloc(size, GFP_KERNEL);
			
 
				+	if (!rb)
			
 
				+		goto fail;
			
 
				+
			
 
				+	INIT_WORK(&rb->work, rb_free_work);
			
 
				+
			
 
				+	all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
			
 
				+	if (!all_buf)
			
 
				+		goto fail_all_buf;
			
 
				+
			
 
				+	rb->user_page = all_buf;
			
 
				+	rb->data_pages[0] = all_buf + PAGE_SIZE;
			
 
				+	rb->page_order = ilog2(nr_pages);
			
 
				+	rb->nr_pages = 1;
			
 
				+
			
 
				+	ring_buffer_init(rb, watermark, flags);
			
 
				+
			
 
				+	return rb;
			
 
				+
			
 
				+fail_all_buf:
			
 
				+	kfree(rb);
			
 
				+
			
 
				+fail:
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1255,19 +1255,29 @@ static int __kprobes in_kprobes_functions(unsigned long addr)
 
				 /*
			
 
				  * If we have a symbol_name argument, look it up and add the offset field
			
 
				  * to it. This way, we can specify a relative address to a symbol.
			
 
				+ * This returns encoded errors if it fails to look up symbol or invalid
			
 
				+ * combination of parameters.
			
 
				  */
			
 
				 static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
			
 
				 {
			
 
				 	kprobe_opcode_t *addr = p->addr;
			
 
				+
			
 
				+	if ((p->symbol_name && p->addr) ||
			
 
				+	    (!p->symbol_name && !p->addr))
			
 
				+		goto invalid;
			
 
				+
			
 
				 	if (p->symbol_name) {
			
 
				-		if (addr)
			
 
				-			return NULL;
			
 
				 		kprobe_lookup_name(p->symbol_name, addr);
			
 
				+		if (!addr)
			
 
				+			return ERR_PTR(-ENOENT);
			
 
				 	}
			
 
				 
			
 
				-	if (!addr)
			
 
				-		return NULL;
			
 
				-	return (kprobe_opcode_t *)(((char *)addr) + p->offset);
			
 
				+	addr = (kprobe_opcode_t *)(((char *)addr) + p->offset);
			
 
				+	if (addr)
			
 
				+		return addr;
			
 
				+
			
 
				+invalid:
			
 
				+	return ERR_PTR(-EINVAL);
			
 
				 }
			
 
				 
			
 
				 /* Check passed kprobe is valid and return kprobe in kprobe_table. */
			
@@ -1311,8 +1321,8 @@ int __kprobes register_kprobe(struct kprobe *p)
 
				 	kprobe_opcode_t *addr;
			
 
				 
			
 
				 	addr = kprobe_addr(p);
			
 
				-	if (!addr)
			
 
				-		return -EINVAL;
			
 
				+	if (IS_ERR(addr))
			
 
				+		return PTR_ERR(addr);
			
 
				 	p->addr = addr;
			
 
				 
			
 
				 	ret = check_kprobe_rereg(p);
			
@@ -1335,6 +1345,8 @@ int __kprobes register_kprobe(struct kprobe *p)
 
				 	 */
			
 
				 	probed_mod = __module_text_address((unsigned long) p->addr);
			
 
				 	if (probed_mod) {
			
 
				+		/* Return -ENOENT if fail. */
			
 
				+		ret = -ENOENT;
			
 
				 		/*
			
 
				 		 * We must hold a refcount of the probed module while updating
			
 
				 		 * its code to prohibit unexpected unloading.
			
@@ -1351,6 +1363,7 @@ int __kprobes register_kprobe(struct kprobe *p)
 
				 			module_put(probed_mod);
			
 
				 			goto fail_with_jump_label;
			
 
				 		}
			
 
				+		/* ret will be updated by following code */
			
 
				 	}
			
 
				 	preempt_enable();
			
 
				 	jump_label_unlock();
			
@@ -1399,7 +1412,7 @@ out:
 
				 fail_with_jump_label:
			
 
				 	preempt_enable();
			
 
				 	jump_label_unlock();
			
 
				-	return -EINVAL;
			
 
				+	return ret;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(register_kprobe);
			
 
				 
			
@@ -1686,8 +1699,8 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
 
				 
			
 
				 	if (kretprobe_blacklist_size) {
			
 
				 		addr = kprobe_addr(&rp->kp);
			
 
				-		if (!addr)
			
 
				-			return -EINVAL;
			
 
				+		if (IS_ERR(addr))
			
 
				+			return PTR_ERR(addr);
			
 
				 
			
 
				 		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
			
 
				 			if (kretprobe_blacklist[i].addr == addr)
			
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2220,7 +2220,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
				 
			
 
				 	if (task_cpu(p) != new_cpu) {
			
 
				 		p->se.nr_migrations++;
			
 
				-		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
			
 
				+		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
			
 
				 	}
			
 
				 
			
 
				 	__set_task_cpu(p, new_cpu);
			
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -26,12 +26,18 @@ void print_stack_trace(struct stack_trace *trace, int spaces)
 
				 EXPORT_SYMBOL_GPL(print_stack_trace);
			
 
				 
			
 
				 /*
			
 
				- * Architectures that do not implement save_stack_trace_tsk get this
			
 
				- * weak alias and a once-per-bootup warning (whenever this facility
			
 
				- * is utilized - for example by procfs):
			
 
				+ * Architectures that do not implement save_stack_trace_tsk or
			
 
				+ * save_stack_trace_regs get this weak alias and a once-per-bootup warning
			
 
				+ * (whenever this facility is utilized - for example by procfs):
			
 
				  */
			
 
				 __weak void
			
 
				 save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
			
 
				 {
			
 
				 	WARN_ONCE(1, KERN_INFO "save_stack_trace_tsk() not implemented yet.\n");
			
 
				 }
			
 
				+
			
 
				+__weak void
			
 
				+save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
			
 
				+{
			
 
				+	WARN_ONCE(1, KERN_INFO "save_stack_trace_regs() not implemented yet.\n");
			
 
				+}
			
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -32,7 +32,6 @@
 
				 
			
 
				 #include <trace/events/sched.h>
			
 
				 
			
 
				-#include <asm/ftrace.h>
			
 
				 #include <asm/setup.h>
			
 
				 
			
 
				 #include "trace_output.h"
			
@@ -82,14 +81,14 @@ static int ftrace_disabled __read_mostly;
 
				 
			
 
				 static DEFINE_MUTEX(ftrace_lock);
			
 
				 
			
 
				-static struct ftrace_ops ftrace_list_end __read_mostly =
			
 
				-{
			
 
				+static struct ftrace_ops ftrace_list_end __read_mostly = {
			
 
				 	.func		= ftrace_stub,
			
 
				 };
			
 
				 
			
 
				 static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
			
 
				 static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
			
 
				 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
			
 
				+static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;
			
 
				 ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
			
 
				 ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
			
 
				 static struct ftrace_ops global_ops;
			
@@ -148,9 +147,11 @@ void clear_ftrace_function(void)
 
				 {
			
 
				 	ftrace_trace_function = ftrace_stub;
			
 
				 	__ftrace_trace_function = ftrace_stub;
			
 
				+	__ftrace_trace_function_delay = ftrace_stub;
			
 
				 	ftrace_pid_function = ftrace_stub;
			
 
				 }
			
 
				 
			
 
				+#undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
			
 
				 #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
			
 
				 /*
			
 
				  * For those archs that do not test ftrace_trace_stop in their
			
@@ -209,8 +210,13 @@ static void update_ftrace_function(void)
 
				 
			
 
				 #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
			
 
				 	ftrace_trace_function = func;
			
 
				+#else
			
 
				+#ifdef CONFIG_DYNAMIC_FTRACE
			
 
				+	/* do not update till all functions have been modified */
			
 
				+	__ftrace_trace_function_delay = func;
			
 
				 #else
			
 
				 	__ftrace_trace_function = func;
			
 
				+#endif
			
 
				 	ftrace_trace_function = ftrace_test_stop_func;
			
 
				 #endif
			
 
				 }
			
@@ -785,8 +791,7 @@ static void unregister_ftrace_profiler(void)
 
				 	unregister_ftrace_graph();
			
 
				 }
			
 
				 #else
			
 
				-static struct ftrace_ops ftrace_profile_ops __read_mostly =
			
 
				-{
			
 
				+static struct ftrace_ops ftrace_profile_ops __read_mostly = {
			
 
				 	.func		= function_profile_call,
			
 
				 };
			
 
				 
			
@@ -806,19 +811,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 
				 		     size_t cnt, loff_t *ppos)
			
 
				 {
			
 
				 	unsigned long val;
			
 
				-	char buf[64];		/* big enough to hold a number */
			
 
				 	int ret;
			
 
				 
			
 
				-	if (cnt >= sizeof(buf))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(&buf, ubuf, cnt))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	buf[cnt] = 0;
			
 
				-
			
 
				-	ret = strict_strtoul(buf, 10, &val);
			
 
				-	if (ret < 0)
			
 
				+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
			
 
				+	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				 	val = !!val;
			
@@ -1182,8 +1178,14 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				+static void
			
 
				+ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash);
			
 
				+static void
			
 
				+ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash);
			
 
				+
			
 
				 static int
			
 
				-ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
			
 
				+ftrace_hash_move(struct ftrace_ops *ops, int enable,
			
 
				+		 struct ftrace_hash **dst, struct ftrace_hash *src)
			
 
				 {
			
 
				 	struct ftrace_func_entry *entry;
			
 
				 	struct hlist_node *tp, *tn;
			
@@ -1193,8 +1195,15 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
 
				 	unsigned long key;
			
 
				 	int size = src->count;
			
 
				 	int bits = 0;
			
 
				+	int ret;
			
 
				 	int i;
			
 
				 
			
 
				+	/*
			
 
				+	 * Remove the current set, update the hash and add
			
 
				+	 * them back.
			
 
				+	 */
			
 
				+	ftrace_hash_rec_disable(ops, enable);
			
 
				+
			
 
				 	/*
			
 
				 	 * If the new source is empty, just free dst and assign it
			
 
				 	 * the empty_hash.
			
@@ -1215,9 +1224,10 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
 
				 	if (bits > FTRACE_HASH_MAX_BITS)
			
 
				 		bits = FTRACE_HASH_MAX_BITS;
			
 
				 
			
 
				+	ret = -ENOMEM;
			
 
				 	new_hash = alloc_ftrace_hash(bits);
			
 
				 	if (!new_hash)
			
 
				-		return -ENOMEM;
			
 
				+		goto out;
			
 
				 
			
 
				 	size = 1 << src->size_bits;
			
 
				 	for (i = 0; i < size; i++) {
			
@@ -1236,7 +1246,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
 
				 	rcu_assign_pointer(*dst, new_hash);
			
 
				 	free_ftrace_hash_rcu(old_hash);
			
 
				 
			
 
				-	return 0;
			
 
				+	ret = 0;
			
 
				+ out:
			
 
				+	/*
			
 
				+	 * Enable regardless of ret:
			
 
				+	 *  On success, we enable the new hash.
			
 
				+	 *  On failure, we re-enable the original hash.
			
 
				+	 */
			
 
				+	ftrace_hash_rec_enable(ops, enable);
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1596,6 +1615,12 @@ static int __ftrace_modify_code(void *data)
 
				 {
			
 
				 	int *command = data;
			
 
				 
			
 
				+	/*
			
 
				+	 * Do not call function tracer while we update the code.
			
 
				+	 * We are in stop machine, no worrying about races.
			
 
				+	 */
			
 
				+	function_trace_stop++;
			
 
				+
			
 
				 	if (*command & FTRACE_ENABLE_CALLS)
			
 
				 		ftrace_replace_code(1);
			
 
				 	else if (*command & FTRACE_DISABLE_CALLS)
			
@@ -1609,6 +1634,18 @@ static int __ftrace_modify_code(void *data)
 
				 	else if (*command & FTRACE_STOP_FUNC_RET)
			
 
				 		ftrace_disable_ftrace_graph_caller();
			
 
				 
			
 
				+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
			
 
				+	/*
			
 
				+	 * For archs that call ftrace_test_stop_func(), we must
			
 
				+	 * wait till after we update all the function callers
			
 
				+	 * before we update the callback. This keeps different
			
 
				+	 * ops that record different functions from corrupting
			
 
				+	 * each other.
			
 
				+	 */
			
 
				+	__ftrace_trace_function = __ftrace_trace_function_delay;
			
 
				+#endif
			
 
				+	function_trace_stop--;
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1744,10 +1781,36 @@ static cycle_t		ftrace_update_time;
 
				 static unsigned long	ftrace_update_cnt;
			
 
				 unsigned long		ftrace_update_tot_cnt;
			
 
				 
			
 
				+static int ops_traces_mod(struct ftrace_ops *ops)
			
 
				+{
			
 
				+	struct ftrace_hash *hash;
			
 
				+
			
 
				+	hash = ops->filter_hash;
			
 
				+	return !!(!hash || !hash->count);
			
 
				+}
			
 
				+
			
 
				 static int ftrace_update_code(struct module *mod)
			
 
				 {
			
 
				 	struct dyn_ftrace *p;
			
 
				 	cycle_t start, stop;
			
 
				+	unsigned long ref = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * When adding a module, we need to check if tracers are
			
 
				+	 * currently enabled and if they are set to trace all functions.
			
 
				+	 * If they are, we need to enable the module functions as well
			
 
				+	 * as update the reference counts for those function records.
			
 
				+	 */
			
 
				+	if (mod) {
			
 
				+		struct ftrace_ops *ops;
			
 
				+
			
 
				+		for (ops = ftrace_ops_list;
			
 
				+		     ops != &ftrace_list_end; ops = ops->next) {
			
 
				+			if (ops->flags & FTRACE_OPS_FL_ENABLED &&
			
 
				+			    ops_traces_mod(ops))
			
 
				+				ref++;
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	start = ftrace_now(raw_smp_processor_id());
			
 
				 	ftrace_update_cnt = 0;
			
@@ -1760,7 +1823,7 @@ static int ftrace_update_code(struct module *mod)
 
				 
			
 
				 		p = ftrace_new_addrs;
			
 
				 		ftrace_new_addrs = p->newlist;
			
 
				-		p->flags = 0L;
			
 
				+		p->flags = ref;
			
 
				 
			
 
				 		/*
			
 
				 		 * Do the initial record conversion from mcount jump
			
@@ -1783,7 +1846,7 @@ static int ftrace_update_code(struct module *mod)
 
				 		 * conversion puts the module to the correct state, thus
			
 
				 		 * passing the ftrace_make_call check.
			
 
				 		 */
			
 
				-		if (ftrace_start_up) {
			
 
				+		if (ftrace_start_up && ref) {
			
 
				 			int failed = __ftrace_replace_code(p, 1);
			
 
				 			if (failed) {
			
 
				 				ftrace_bug(failed, p->ip);
			
@@ -2407,10 +2470,9 @@ ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)
 
				  */
			
 
				 
			
 
				 static int
			
 
				-ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
			
 
				+ftrace_mod_callback(struct ftrace_hash *hash,
			
 
				+		    char *func, char *cmd, char *param, int enable)
			
 
				 {
			
 
				-	struct ftrace_ops *ops = &global_ops;
			
 
				-	struct ftrace_hash *hash;
			
 
				 	char *mod;
			
 
				 	int ret = -EINVAL;
			
 
				 
			
@@ -2430,11 +2492,6 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 
				 	if (!strlen(mod))
			
 
				 		return ret;
			
 
				 
			
 
				-	if (enable)
			
 
				-		hash = ops->filter_hash;
			
 
				-	else
			
 
				-		hash = ops->notrace_hash;
			
 
				-
			
 
				 	ret = ftrace_match_module_records(hash, func, mod);
			
 
				 	if (!ret)
			
 
				 		ret = -EINVAL;
			
@@ -2760,7 +2817,7 @@ static int ftrace_process_regex(struct ftrace_hash *hash,
 
				 	mutex_lock(&ftrace_cmd_mutex);
			
 
				 	list_for_each_entry(p, &ftrace_commands, list) {
			
 
				 		if (strcmp(p->name, command) == 0) {
			
 
				-			ret = p->func(func, command, next, enable);
			
 
				+			ret = p->func(hash, func, command, next, enable);
			
 
				 			goto out_unlock;
			
 
				 		}
			
 
				 	}
			
@@ -2857,7 +2914,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
 
				 		ftrace_match_records(hash, buf, len);
			
 
				 
			
 
				 	mutex_lock(&ftrace_lock);
			
 
				-	ret = ftrace_hash_move(orig_hash, hash);
			
 
				+	ret = ftrace_hash_move(ops, enable, orig_hash, hash);
			
 
				+	if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
			
 
				+	    && ftrace_enabled)
			
 
				+		ftrace_run_update_code(FTRACE_ENABLE_CALLS);
			
 
				+
			
 
				 	mutex_unlock(&ftrace_lock);
			
 
				 
			
 
				 	mutex_unlock(&ftrace_regex_lock);
			
@@ -3040,18 +3101,12 @@ ftrace_regex_release(struct inode *inode, struct file *file)
 
				 			orig_hash = &iter->ops->notrace_hash;
			
 
				 
			
 
				 		mutex_lock(&ftrace_lock);
			
 
				-		/*
			
 
				-		 * Remove the current set, update the hash and add
			
 
				-		 * them back.
			
 
				-		 */
			
 
				-		ftrace_hash_rec_disable(iter->ops, filter_hash);
			
 
				-		ret = ftrace_hash_move(orig_hash, iter->hash);
			
 
				-		if (!ret) {
			
 
				-			ftrace_hash_rec_enable(iter->ops, filter_hash);
			
 
				-			if (iter->ops->flags & FTRACE_OPS_FL_ENABLED
			
 
				-			    && ftrace_enabled)
			
 
				-				ftrace_run_update_code(FTRACE_ENABLE_CALLS);
			
 
				-		}
			
 
				+		ret = ftrace_hash_move(iter->ops, filter_hash,
			
 
				+				       orig_hash, iter->hash);
			
 
				+		if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
			
 
				+		    && ftrace_enabled)
			
 
				+			ftrace_run_update_code(FTRACE_ENABLE_CALLS);
			
 
				+
			
 
				 		mutex_unlock(&ftrace_lock);
			
 
				 	}
			
 
				 	free_ftrace_hash(iter->hash);
			
@@ -3330,7 +3385,7 @@ static int ftrace_process_locs(struct module *mod,
 
				 {
			
 
				 	unsigned long *p;
			
 
				 	unsigned long addr;
			
 
				-	unsigned long flags;
			
 
				+	unsigned long flags = 0; /* Shut up gcc */
			
 
				 
			
 
				 	mutex_lock(&ftrace_lock);
			
 
				 	p = start;
			
@@ -3348,12 +3403,18 @@ static int ftrace_process_locs(struct module *mod,
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * Disable interrupts to prevent interrupts from executing
			
 
				-	 * code that is being modified.
			
 
				+	 * We only need to disable interrupts on start up
			
 
				+	 * because we are modifying code that an interrupt
			
 
				+	 * may execute, and the modification is not atomic.
			
 
				+	 * But for modules, nothing runs the code we modify
			
 
				+	 * until we are finished with it, and there's no
			
 
				+	 * reason to cause large interrupt latencies while we do it.
			
 
				 	 */
			
 
				-	local_irq_save(flags);
			
 
				+	if (!mod)
			
 
				+		local_irq_save(flags);
			
 
				 	ftrace_update_code(mod);
			
 
				-	local_irq_restore(flags);
			
 
				+	if (!mod)
			
 
				+		local_irq_restore(flags);
			
 
				 	mutex_unlock(&ftrace_lock);
			
 
				 
			
 
				 	return 0;
			
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -997,15 +997,21 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 
				 			     unsigned nr_pages)
			
 
				 {
			
 
				 	struct buffer_page *bpage, *tmp;
			
 
				-	unsigned long addr;
			
 
				 	LIST_HEAD(pages);
			
 
				 	unsigned i;
			
 
				 
			
 
				 	WARN_ON(!nr_pages);
			
 
				 
			
 
				 	for (i = 0; i < nr_pages; i++) {
			
 
				+		struct page *page;
			
 
				+		/*
			
 
				+		 * __GFP_NORETRY flag makes sure that the allocation fails
			
 
				+		 * gracefully without invoking oom-killer and the system is
			
 
				+		 * not destabilized.
			
 
				+		 */
			
 
				 		bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
			
 
				-				    GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
			
 
				+				    GFP_KERNEL | __GFP_NORETRY,
			
 
				+				    cpu_to_node(cpu_buffer->cpu));
			
 
				 		if (!bpage)
			
 
				 			goto free_pages;
			
 
				 
			
@@ -1013,10 +1019,11 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 
				 
			
 
				 		list_add(&bpage->list, &pages);
			
 
				 
			
 
				-		addr = __get_free_page(GFP_KERNEL);
			
 
				-		if (!addr)
			
 
				+		page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
			
 
				+					GFP_KERNEL | __GFP_NORETRY, 0);
			
 
				+		if (!page)
			
 
				 			goto free_pages;
			
 
				-		bpage->page = (void *)addr;
			
 
				+		bpage->page = page_address(page);
			
 
				 		rb_init_page(bpage->page);
			
 
				 	}
			
 
				 
			
@@ -1045,7 +1052,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 
				 {
			
 
				 	struct ring_buffer_per_cpu *cpu_buffer;
			
 
				 	struct buffer_page *bpage;
			
 
				-	unsigned long addr;
			
 
				+	struct page *page;
			
 
				 	int ret;
			
 
				 
			
 
				 	cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
			
@@ -1067,10 +1074,10 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 
				 	rb_check_bpage(cpu_buffer, bpage);
			
 
				 
			
 
				 	cpu_buffer->reader_page = bpage;
			
 
				-	addr = __get_free_page(GFP_KERNEL);
			
 
				-	if (!addr)
			
 
				+	page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
			
 
				+	if (!page)
			
 
				 		goto fail_free_reader;
			
 
				-	bpage->page = (void *)addr;
			
 
				+	bpage->page = page_address(page);
			
 
				 	rb_init_page(bpage->page);
			
 
				 
			
 
				 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
			
@@ -1314,7 +1321,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 
				 	unsigned nr_pages, rm_pages, new_pages;
			
 
				 	struct buffer_page *bpage, *tmp;
			
 
				 	unsigned long buffer_size;
			
 
				-	unsigned long addr;
			
 
				 	LIST_HEAD(pages);
			
 
				 	int i, cpu;
			
 
				 
			
@@ -1375,16 +1381,24 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 
				 
			
 
				 	for_each_buffer_cpu(buffer, cpu) {
			
 
				 		for (i = 0; i < new_pages; i++) {
			
 
				+			struct page *page;
			
 
				+			/*
			
 
				+			 * __GFP_NORETRY flag makes sure that the allocation
			
 
				+			 * fails gracefully without invoking oom-killer and
			
 
				+			 * the system is not destabilized.
			
 
				+			 */
			
 
				 			bpage = kzalloc_node(ALIGN(sizeof(*bpage),
			
 
				 						  cache_line_size()),
			
 
				-					    GFP_KERNEL, cpu_to_node(cpu));
			
 
				+					    GFP_KERNEL | __GFP_NORETRY,
			
 
				+					    cpu_to_node(cpu));
			
 
				 			if (!bpage)
			
 
				 				goto free_pages;
			
 
				 			list_add(&bpage->list, &pages);
			
 
				-			addr = __get_free_page(GFP_KERNEL);
			
 
				-			if (!addr)
			
 
				+			page = alloc_pages_node(cpu_to_node(cpu),
			
 
				+						GFP_KERNEL | __GFP_NORETRY, 0);
			
 
				+			if (!page)
			
 
				 				goto free_pages;
			
 
				-			bpage->page = (void *)addr;
			
 
				+			bpage->page = page_address(page);
			
 
				 			rb_init_page(bpage->page);
			
 
				 		}
			
 
				 	}
			
@@ -3730,16 +3744,17 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
 
				  * Returns:
			
 
				  *  The page allocated, or NULL on error.
			
 
				  */
			
 
				-void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
			
 
				+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
			
 
				 {
			
 
				 	struct buffer_data_page *bpage;
			
 
				-	unsigned long addr;
			
 
				+	struct page *page;
			
 
				 
			
 
				-	addr = __get_free_page(GFP_KERNEL);
			
 
				-	if (!addr)
			
 
				+	page = alloc_pages_node(cpu_to_node(cpu),
			
 
				+				GFP_KERNEL | __GFP_NORETRY, 0);
			
 
				+	if (!page)
			
 
				 		return NULL;
			
 
				 
			
 
				-	bpage = (void *)addr;
			
 
				+	bpage = page_address(page);
			
 
				 
			
 
				 	rb_init_page(bpage);
			
 
				 
			
@@ -3978,20 +3993,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
 
				 		size_t cnt, loff_t *ppos)
			
 
				 {
			
 
				 	unsigned long *p = filp->private_data;
			
 
				-	char buf[64];
			
 
				 	unsigned long val;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (cnt >= sizeof(buf))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(&buf, ubuf, cnt))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	buf[cnt] = 0;
			
 
				-
			
 
				-	ret = strict_strtoul(buf, 10, &val);
			
 
				-	if (ret < 0)
			
 
				+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
			
 
				+	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				 	if (val)
			
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -106,7 +106,7 @@ static enum event_status read_page(int cpu)
 
				 	int inc;
			
 
				 	int i;
			
 
				 
			
 
				-	bpage = ring_buffer_alloc_read_page(buffer);
			
 
				+	bpage = ring_buffer_alloc_read_page(buffer, cpu);
			
 
				 	if (!bpage)
			
 
				 		return EVENT_DROPPED;
			
 
				 
			
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -343,26 +343,27 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
 
				 static int trace_stop_count;
			
 
				 static DEFINE_SPINLOCK(tracing_start_lock);
			
 
				 
			
 
				+static void wakeup_work_handler(struct work_struct *work)
			
 
				+{
			
 
				+	wake_up(&trace_wait);
			
 
				+}
			
 
				+
			
 
				+static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
			
 
				+
			
 
				 /**
			
 
				  * trace_wake_up - wake up tasks waiting for trace input
			
 
				  *
			
 
				- * Simply wakes up any task that is blocked on the trace_wait
			
 
				- * queue. These is used with trace_poll for tasks polling the trace.
			
 
				+ * Schedules a delayed work to wake up any task that is blocked on the
			
 
				+ * trace_wait queue. These is used with trace_poll for tasks polling the
			
 
				+ * trace.
			
 
				  */
			
 
				 void trace_wake_up(void)
			
 
				 {
			
 
				-	int cpu;
			
 
				+	const unsigned long delay = msecs_to_jiffies(2);
			
 
				 
			
 
				 	if (trace_flags & TRACE_ITER_BLOCK)
			
 
				 		return;
			
 
				-	/*
			
 
				-	 * The runqueue_is_locked() can fail, but this is the best we
			
 
				-	 * have for now:
			
 
				-	 */
			
 
				-	cpu = get_cpu();
			
 
				-	if (!runqueue_is_locked(cpu))
			
 
				-		wake_up(&trace_wait);
			
 
				-	put_cpu();
			
 
				+	schedule_delayed_work(&wakeup_work, delay);
			
 
				 }
			
 
				 
			
 
				 static int __init set_buf_size(char *str)
			
@@ -424,6 +425,7 @@ static const char *trace_options[] = {
 
				 	"graph-time",
			
 
				 	"record-cmd",
			
 
				 	"overwrite",
			
 
				+	"disable_on_free",
			
 
				 	NULL
			
 
				 };
			
 
				 
			
@@ -1191,6 +1193,18 @@ void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
			
 
				 
			
 
				+void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
			
 
				+					    struct ring_buffer_event *event,
			
 
				+					    unsigned long flags, int pc,
			
 
				+					    struct pt_regs *regs)
			
 
				+{
			
 
				+	ring_buffer_unlock_commit(buffer, event);
			
 
				+
			
 
				+	ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
			
 
				+	ftrace_trace_userstack(buffer, flags, pc);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs);
			
 
				+
			
 
				 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
			
 
				 					 struct ring_buffer_event *event)
			
 
				 {
			
@@ -1234,30 +1248,103 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_STACKTRACE
			
 
				+
			
 
				+#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
			
 
				+struct ftrace_stack {
			
 
				+	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
			
 
				+};
			
 
				+
			
 
				+static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
			
 
				+static DEFINE_PER_CPU(int, ftrace_stack_reserve);
			
 
				+
			
 
				 static void __ftrace_trace_stack(struct ring_buffer *buffer,
			
 
				 				 unsigned long flags,
			
 
				-				 int skip, int pc)
			
 
				+				 int skip, int pc, struct pt_regs *regs)
			
 
				 {
			
 
				 	struct ftrace_event_call *call = &event_kernel_stack;
			
 
				 	struct ring_buffer_event *event;
			
 
				 	struct stack_entry *entry;
			
 
				 	struct stack_trace trace;
			
 
				+	int use_stack;
			
 
				+	int size = FTRACE_STACK_ENTRIES;
			
 
				+
			
 
				+	trace.nr_entries	= 0;
			
 
				+	trace.skip		= skip;
			
 
				+
			
 
				+	/*
			
 
				+	 * Since events can happen in NMIs there's no safe way to
			
 
				+	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
			
 
				+	 * or NMI comes in, it will just have to use the default
			
 
				+	 * FTRACE_STACK_SIZE.
			
 
				+	 */
			
 
				+	preempt_disable_notrace();
			
 
				+
			
 
				+	use_stack = ++__get_cpu_var(ftrace_stack_reserve);
			
 
				+	/*
			
 
				+	 * We don't need any atomic variables, just a barrier.
			
 
				+	 * If an interrupt comes in, we don't care, because it would
			
 
				+	 * have exited and put the counter back to what we want.
			
 
				+	 * We just need a barrier to keep gcc from moving things
			
 
				+	 * around.
			
 
				+	 */
			
 
				+	barrier();
			
 
				+	if (use_stack == 1) {
			
 
				+		trace.entries		= &__get_cpu_var(ftrace_stack).calls[0];
			
 
				+		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
			
 
				+
			
 
				+		if (regs)
			
 
				+			save_stack_trace_regs(regs, &trace);
			
 
				+		else
			
 
				+			save_stack_trace(&trace);
			
 
				+
			
 
				+		if (trace.nr_entries > size)
			
 
				+			size = trace.nr_entries;
			
 
				+	} else
			
 
				+		/* From now on, use_stack is a boolean */
			
 
				+		use_stack = 0;
			
 
				+
			
 
				+	size *= sizeof(unsigned long);
			
 
				 
			
 
				 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
			
 
				-					  sizeof(*entry), flags, pc);
			
 
				+					  sizeof(*entry) + size, flags, pc);
			
 
				 	if (!event)
			
 
				-		return;
			
 
				-	entry	= ring_buffer_event_data(event);
			
 
				-	memset(&entry->caller, 0, sizeof(entry->caller));
			
 
				+		goto out;
			
 
				+	entry = ring_buffer_event_data(event);
			
 
				 
			
 
				-	trace.nr_entries	= 0;
			
 
				-	trace.max_entries	= FTRACE_STACK_ENTRIES;
			
 
				-	trace.skip		= skip;
			
 
				-	trace.entries		= entry->caller;
			
 
				+	memset(&entry->caller, 0, size);
			
 
				+
			
 
				+	if (use_stack)
			
 
				+		memcpy(&entry->caller, trace.entries,
			
 
				+		       trace.nr_entries * sizeof(unsigned long));
			
 
				+	else {
			
 
				+		trace.max_entries	= FTRACE_STACK_ENTRIES;
			
 
				+		trace.entries		= entry->caller;
			
 
				+		if (regs)
			
 
				+			save_stack_trace_regs(regs, &trace);
			
 
				+		else
			
 
				+			save_stack_trace(&trace);
			
 
				+	}
			
 
				+
			
 
				+	entry->size = trace.nr_entries;
			
 
				 
			
 
				-	save_stack_trace(&trace);
			
 
				 	if (!filter_check_discard(call, entry, buffer, event))
			
 
				 		ring_buffer_unlock_commit(buffer, event);
			
 
				+
			
 
				+ out:
			
 
				+	/* Again, don't let gcc optimize things here */
			
 
				+	barrier();
			
 
				+	__get_cpu_var(ftrace_stack_reserve)--;
			
 
				+	preempt_enable_notrace();
			
 
				+
			
 
				+}
			
 
				+
			
 
				+void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
			
 
				+			     int skip, int pc, struct pt_regs *regs)
			
 
				+{
			
 
				+	if (!(trace_flags & TRACE_ITER_STACKTRACE))
			
 
				+		return;
			
 
				+
			
 
				+	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
			
 
				 }
			
 
				 
			
 
				 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
			
@@ -1266,13 +1353,13 @@ void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
 
				 	if (!(trace_flags & TRACE_ITER_STACKTRACE))
			
 
				 		return;
			
 
				 
			
 
				-	__ftrace_trace_stack(buffer, flags, skip, pc);
			
 
				+	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
			
 
				 }
			
 
				 
			
 
				 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
			
 
				 		   int pc)
			
 
				 {
			
 
				-	__ftrace_trace_stack(tr->buffer, flags, skip, pc);
			
 
				+	__ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -1288,7 +1375,7 @@ void trace_dump_stack(void)
 
				 	local_save_flags(flags);
			
 
				 
			
 
				 	/* skipping 3 traces, seems to get us at the caller of this function */
			
 
				-	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
			
 
				+	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL);
			
 
				 }
			
 
				 
			
 
				 static DEFINE_PER_CPU(int, user_stack_count);
			
@@ -1536,7 +1623,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
 
				 
			
 
				 	ftrace_enable_cpu();
			
 
				 
			
 
				-	return event ? ring_buffer_event_data(event) : NULL;
			
 
				+	if (event) {
			
 
				+		iter->ent_size = ring_buffer_event_length(event);
			
 
				+		return ring_buffer_event_data(event);
			
 
				+	}
			
 
				+	iter->ent_size = 0;
			
 
				+	return NULL;
			
 
				 }
			
 
				 
			
 
				 static struct trace_entry *
			
@@ -2051,6 +2143,9 @@ void trace_default_header(struct seq_file *m)
 
				 {
			
 
				 	struct trace_iterator *iter = m->private;
			
 
				 
			
 
				+	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
			
 
				+		return;
			
 
				+
			
 
				 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
			
 
				 		/* print nothing if the buffers are empty */
			
 
				 		if (trace_empty(iter))
			
@@ -2701,20 +2796,11 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
 
				 		   size_t cnt, loff_t *ppos)
			
 
				 {
			
 
				 	struct trace_array *tr = filp->private_data;
			
 
				-	char buf[64];
			
 
				 	unsigned long val;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (cnt >= sizeof(buf))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(&buf, ubuf, cnt))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	buf[cnt] = 0;
			
 
				-
			
 
				-	ret = strict_strtoul(buf, 10, &val);
			
 
				-	if (ret < 0)
			
 
				+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
			
 
				+	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				 	val = !!val;
			
@@ -2767,7 +2853,7 @@ int tracer_init(struct tracer *t, struct trace_array *tr)
 
				 	return t->init(tr);
			
 
				 }
			
 
				 
			
 
				-static int tracing_resize_ring_buffer(unsigned long size)
			
 
				+static int __tracing_resize_ring_buffer(unsigned long size)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -2819,6 +2905,41 @@ static int tracing_resize_ring_buffer(unsigned long size)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static ssize_t tracing_resize_ring_buffer(unsigned long size)
			
 
				+{
			
 
				+	int cpu, ret = size;
			
 
				+
			
 
				+	mutex_lock(&trace_types_lock);
			
 
				+
			
 
				+	tracing_stop();
			
 
				+
			
 
				+	/* disable all cpu buffers */
			
 
				+	for_each_tracing_cpu(cpu) {
			
 
				+		if (global_trace.data[cpu])
			
 
				+			atomic_inc(&global_trace.data[cpu]->disabled);
			
 
				+		if (max_tr.data[cpu])
			
 
				+			atomic_inc(&max_tr.data[cpu]->disabled);
			
 
				+	}
			
 
				+
			
 
				+	if (size != global_trace.entries)
			
 
				+		ret = __tracing_resize_ring_buffer(size);
			
 
				+
			
 
				+	if (ret < 0)
			
 
				+		ret = -ENOMEM;
			
 
				+
			
 
				+	for_each_tracing_cpu(cpu) {
			
 
				+		if (global_trace.data[cpu])
			
 
				+			atomic_dec(&global_trace.data[cpu]->disabled);
			
 
				+		if (max_tr.data[cpu])
			
 
				+			atomic_dec(&max_tr.data[cpu]->disabled);
			
 
				+	}
			
 
				+
			
 
				+	tracing_start();
			
 
				+	mutex_unlock(&trace_types_lock);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 
			
 
				 /**
			
 
				  * tracing_update_buffers - used by tracing facility to expand ring buffers
			
@@ -2836,7 +2957,7 @@ int tracing_update_buffers(void)
 
				 
			
 
				 	mutex_lock(&trace_types_lock);
			
 
				 	if (!ring_buffer_expanded)
			
 
				-		ret = tracing_resize_ring_buffer(trace_buf_size);
			
 
				+		ret = __tracing_resize_ring_buffer(trace_buf_size);
			
 
				 	mutex_unlock(&trace_types_lock);
			
 
				 
			
 
				 	return ret;
			
@@ -2860,7 +2981,7 @@ static int tracing_set_tracer(const char *buf)
 
				 	mutex_lock(&trace_types_lock);
			
 
				 
			
 
				 	if (!ring_buffer_expanded) {
			
 
				-		ret = tracing_resize_ring_buffer(trace_buf_size);
			
 
				+		ret = __tracing_resize_ring_buffer(trace_buf_size);
			
 
				 		if (ret < 0)
			
 
				 			goto out;
			
 
				 		ret = 0;
			
@@ -2966,20 +3087,11 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
 
				 		      size_t cnt, loff_t *ppos)
			
 
				 {
			
 
				 	unsigned long *ptr = filp->private_data;
			
 
				-	char buf[64];
			
 
				 	unsigned long val;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (cnt >= sizeof(buf))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(&buf, ubuf, cnt))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	buf[cnt] = 0;
			
 
				-
			
 
				-	ret = strict_strtoul(buf, 10, &val);
			
 
				-	if (ret < 0)
			
 
				+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
			
 
				+	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				 	*ptr = val * 1000;
			
@@ -3434,67 +3546,54 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 
				 		      size_t cnt, loff_t *ppos)
			
 
				 {
			
 
				 	unsigned long val;
			
 
				-	char buf[64];
			
 
				-	int ret, cpu;
			
 
				-
			
 
				-	if (cnt >= sizeof(buf))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(&buf, ubuf, cnt))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	buf[cnt] = 0;
			
 
				+	int ret;
			
 
				 
			
 
				-	ret = strict_strtoul(buf, 10, &val);
			
 
				-	if (ret < 0)
			
 
				+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
			
 
				+	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				 	/* must have at least 1 entry */
			
 
				 	if (!val)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	mutex_lock(&trace_types_lock);
			
 
				-
			
 
				-	tracing_stop();
			
 
				-
			
 
				-	/* disable all cpu buffers */
			
 
				-	for_each_tracing_cpu(cpu) {
			
 
				-		if (global_trace.data[cpu])
			
 
				-			atomic_inc(&global_trace.data[cpu]->disabled);
			
 
				-		if (max_tr.data[cpu])
			
 
				-			atomic_inc(&max_tr.data[cpu]->disabled);
			
 
				-	}
			
 
				-
			
 
				 	/* value is in KB */
			
 
				 	val <<= 10;
			
 
				 
			
 
				-	if (val != global_trace.entries) {
			
 
				-		ret = tracing_resize_ring_buffer(val);
			
 
				-		if (ret < 0) {
			
 
				-			cnt = ret;
			
 
				-			goto out;
			
 
				-		}
			
 
				-	}
			
 
				+	ret = tracing_resize_ring_buffer(val);
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				 
			
 
				 	*ppos += cnt;
			
 
				 
			
 
				-	/* If check pages failed, return ENOMEM */
			
 
				-	if (tracing_disabled)
			
 
				-		cnt = -ENOMEM;
			
 
				- out:
			
 
				-	for_each_tracing_cpu(cpu) {
			
 
				-		if (global_trace.data[cpu])
			
 
				-			atomic_dec(&global_trace.data[cpu]->disabled);
			
 
				-		if (max_tr.data[cpu])
			
 
				-			atomic_dec(&max_tr.data[cpu]->disabled);
			
 
				-	}
			
 
				+	return cnt;
			
 
				+}
			
 
				 
			
 
				-	tracing_start();
			
 
				-	mutex_unlock(&trace_types_lock);
			
 
				+static ssize_t
			
 
				+tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
			
 
				+			  size_t cnt, loff_t *ppos)
			
 
				+{
			
 
				+	/*
			
 
				+	 * There is no need to read what the user has written, this function
			
 
				+	 * is just to make sure that there is no error when "echo" is used
			
 
				+	 */
			
 
				+
			
 
				+	*ppos += cnt;
			
 
				 
			
 
				 	return cnt;
			
 
				 }
			
 
				 
			
 
				+static int
			
 
				+tracing_free_buffer_release(struct inode *inode, struct file *filp)
			
 
				+{
			
 
				+	/* disable tracing ? */
			
 
				+	if (trace_flags & TRACE_ITER_STOP_ON_FREE)
			
 
				+		tracing_off();
			
 
				+	/* resize the ring buffer to 0 */
			
 
				+	tracing_resize_ring_buffer(0);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int mark_printk(const char *fmt, ...)
			
 
				 {
			
 
				 	int ret;
			
@@ -3640,6 +3739,11 @@ static const struct file_operations tracing_entries_fops = {
 
				 	.llseek		= generic_file_llseek,
			
 
				 };
			
 
				 
			
 
				+static const struct file_operations tracing_free_buffer_fops = {
			
 
				+	.write		= tracing_free_buffer_write,
			
 
				+	.release	= tracing_free_buffer_release,
			
 
				+};
			
 
				+
			
 
				 static const struct file_operations tracing_mark_fops = {
			
 
				 	.open		= tracing_open_generic,
			
 
				 	.write		= tracing_mark_write,
			
@@ -3696,7 +3800,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 
				 		return 0;
			
 
				 
			
 
				 	if (!info->spare)
			
 
				-		info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
			
 
				+		info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu);
			
 
				 	if (!info->spare)
			
 
				 		return -ENOMEM;
			
 
				 
			
@@ -3853,7 +3957,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 
				 
			
 
				 		ref->ref = 1;
			
 
				 		ref->buffer = info->tr->buffer;
			
 
				-		ref->page = ring_buffer_alloc_read_page(ref->buffer);
			
 
				+		ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu);
			
 
				 		if (!ref->page) {
			
 
				 			kfree(ref);
			
 
				 			break;
			
@@ -3862,8 +3966,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 
				 		r = ring_buffer_read_page(ref->buffer, &ref->page,
			
 
				 					  len, info->cpu, 1);
			
 
				 		if (r < 0) {
			
 
				-			ring_buffer_free_read_page(ref->buffer,
			
 
				-						   ref->page);
			
 
				+			ring_buffer_free_read_page(ref->buffer, ref->page);
			
 
				 			kfree(ref);
			
 
				 			break;
			
 
				 		}
			
@@ -4099,19 +4202,10 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
				 {
			
 
				 	struct trace_option_dentry *topt = filp->private_data;
			
 
				 	unsigned long val;
			
 
				-	char buf[64];
			
 
				 	int ret;
			
 
				 
			
 
				-	if (cnt >= sizeof(buf))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(&buf, ubuf, cnt))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	buf[cnt] = 0;
			
 
				-
			
 
				-	ret = strict_strtoul(buf, 10, &val);
			
 
				-	if (ret < 0)
			
 
				+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
			
 
				+	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				 	if (val != 0 && val != 1)
			
@@ -4159,20 +4253,11 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
				 			 loff_t *ppos)
			
 
				 {
			
 
				 	long index = (long)filp->private_data;
			
 
				-	char buf[64];
			
 
				 	unsigned long val;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (cnt >= sizeof(buf))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(&buf, ubuf, cnt))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	buf[cnt] = 0;
			
 
				-
			
 
				-	ret = strict_strtoul(buf, 10, &val);
			
 
				-	if (ret < 0)
			
 
				+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
			
 
				+	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				 	if (val != 0 && val != 1)
			
@@ -4365,6 +4450,9 @@ static __init int tracer_init_debugfs(void)
 
				 	trace_create_file("buffer_size_kb", 0644, d_tracer,
			
 
				 			&global_trace, &tracing_entries_fops);
			
 
				 
			
 
				+	trace_create_file("free_buffer", 0644, d_tracer,
			
 
				+			&global_trace, &tracing_free_buffer_fops);
			
 
				+
			
 
				 	trace_create_file("trace_marker", 0220, d_tracer,
			
 
				 			NULL, &tracing_mark_fops);
			
 
				 
			
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -278,6 +278,29 @@ struct tracer {
 
				 };
			
 
				 
			
 
				 
			
 
				+/* Only current can touch trace_recursion */
			
 
				+#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
			
 
				+#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
			
 
				+
			
 
				+/* Ring buffer has the 10 LSB bits to count */
			
 
				+#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
			
 
				+
			
 
				+/* for function tracing recursion */
			
 
				+#define TRACE_INTERNAL_BIT		(1<<11)
			
 
				+#define TRACE_GLOBAL_BIT		(1<<12)
			
 
				+/*
			
 
				+ * Abuse of the trace_recursion.
			
 
				+ * As we need a way to maintain state if we are tracing the function
			
 
				+ * graph in irq because we want to trace a particular function that
			
 
				+ * was called in irq context but we have irq tracing off. Since this
			
 
				+ * can only be modified by current, we can reuse trace_recursion.
			
 
				+ */
			
 
				+#define TRACE_IRQ_BIT			(1<<13)
			
 
				+
			
 
				+#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (bit); } while (0)
			
 
				+#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(bit); } while (0)
			
 
				+#define trace_recursion_test(bit)	((current)->trace_recursion & (bit))
			
 
				+
			
 
				 #define TRACE_PIPE_ALL_CPU	-1
			
 
				 
			
 
				 int tracer_init(struct tracer *t, struct trace_array *tr);
			
@@ -389,6 +412,9 @@ void update_max_tr_single(struct trace_array *tr,
 
				 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
			
 
				 			int skip, int pc);
			
 
				 
			
 
				+void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
			
 
				+			     int skip, int pc, struct pt_regs *regs);
			
 
				+
			
 
				 void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
			
 
				 			    int pc);
			
 
				 
			
@@ -400,6 +426,12 @@ static inline void ftrace_trace_stack(struct ring_buffer *buffer,
 
				 {
			
 
				 }
			
 
				 
			
 
				+static inline void ftrace_trace_stack_regs(struct ring_buffer *buffer,
			
 
				+					   unsigned long flags, int skip,
			
 
				+					   int pc, struct pt_regs *regs)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				 static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
			
 
				 					  unsigned long flags, int pc)
			
 
				 {
			
@@ -507,8 +539,18 @@ static inline int ftrace_graph_addr(unsigned long addr)
 
				 		return 1;
			
 
				 
			
 
				 	for (i = 0; i < ftrace_graph_count; i++) {
			
 
				-		if (addr == ftrace_graph_funcs[i])
			
 
				+		if (addr == ftrace_graph_funcs[i]) {
			
 
				+			/*
			
 
				+			 * If no irqs are to be traced, but a set_graph_function
			
 
				+			 * is set, and called by an interrupt handler, we still
			
 
				+			 * want to trace it.
			
 
				+			 */
			
 
				+			if (in_irq())
			
 
				+				trace_recursion_set(TRACE_IRQ_BIT);
			
 
				+			else
			
 
				+				trace_recursion_clear(TRACE_IRQ_BIT);
			
 
				 			return 1;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
@@ -609,6 +651,7 @@ enum trace_iterator_flags {
 
				 	TRACE_ITER_GRAPH_TIME		= 0x80000,
			
 
				 	TRACE_ITER_RECORD_CMD		= 0x100000,
			
 
				 	TRACE_ITER_OVERWRITE		= 0x200000,
			
 
				+	TRACE_ITER_STOP_ON_FREE		= 0x400000,
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -677,6 +720,7 @@ struct event_subsystem {
 
				 	struct dentry		*entry;
			
 
				 	struct event_filter	*filter;
			
 
				 	int			nr_events;
			
 
				+	int			ref_count;
			
 
				 };
			
 
				 
			
 
				 #define FILTER_PRED_INVALID	((unsigned short)-1)
			
@@ -784,19 +828,4 @@ extern const char *__stop___trace_bprintk_fmt[];
 
				 	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
			
 
				 #include "trace_entries.h"
			
 
				 
			
 
				-/* Only current can touch trace_recursion */
			
 
				-#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
			
 
				-#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
			
 
				-
			
 
				-/* Ring buffer has the 10 LSB bits to count */
			
 
				-#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
			
 
				-
			
 
				-/* for function tracing recursion */
			
 
				-#define TRACE_INTERNAL_BIT		(1<<11)
			
 
				-#define TRACE_GLOBAL_BIT		(1<<12)
			
 
				-
			
 
				-#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (bit); } while (0)
			
 
				-#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(bit); } while (0)
			
 
				-#define trace_recursion_test(bit)	((current)->trace_recursion & (bit))
			
 
				-
			
 
				 #endif /* _LINUX_KERNEL_TRACE_H */
			
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -161,7 +161,8 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
 
				 	TRACE_STACK,
			
 
				 
			
 
				 	F_STRUCT(
			
 
				-		__array(	unsigned long,	caller, FTRACE_STACK_ENTRIES	)
			
 
				+		__field(	int,		size	)
			
 
				+		__dynamic_array(unsigned long,	caller	)
			
 
				 	),
			
 
				 
			
 
				 	F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
			
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -244,6 +244,35 @@ static void ftrace_clear_events(void)
 
				 	mutex_unlock(&event_mutex);
			
 
				 }
			
 
				 
			
 
				+static void __put_system(struct event_subsystem *system)
			
 
				+{
			
 
				+	struct event_filter *filter = system->filter;
			
 
				+
			
 
				+	WARN_ON_ONCE(system->ref_count == 0);
			
 
				+	if (--system->ref_count)
			
 
				+		return;
			
 
				+
			
 
				+	if (filter) {
			
 
				+		kfree(filter->filter_string);
			
 
				+		kfree(filter);
			
 
				+	}
			
 
				+	kfree(system->name);
			
 
				+	kfree(system);
			
 
				+}
			
 
				+
			
 
				+static void __get_system(struct event_subsystem *system)
			
 
				+{
			
 
				+	WARN_ON_ONCE(system->ref_count == 0);
			
 
				+	system->ref_count++;
			
 
				+}
			
 
				+
			
 
				+static void put_system(struct event_subsystem *system)
			
 
				+{
			
 
				+	mutex_lock(&event_mutex);
			
 
				+	__put_system(system);
			
 
				+	mutex_unlock(&event_mutex);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
			
 
				  */
			
@@ -486,20 +515,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
				 		   loff_t *ppos)
			
 
				 {
			
 
				 	struct ftrace_event_call *call = filp->private_data;
			
 
				-	char buf[64];
			
 
				 	unsigned long val;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (cnt >= sizeof(buf))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(&buf, ubuf, cnt))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	buf[cnt] = 0;
			
 
				-
			
 
				-	ret = strict_strtoul(buf, 10, &val);
			
 
				-	if (ret < 0)
			
 
				+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
			
 
				+	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				 	ret = tracing_update_buffers();
			
@@ -528,7 +548,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 
				 		   loff_t *ppos)
			
 
				 {
			
 
				 	const char set_to_char[4] = { '?', '0', '1', 'X' };
			
 
				-	const char *system = filp->private_data;
			
 
				+	struct event_subsystem *system = filp->private_data;
			
 
				 	struct ftrace_event_call *call;
			
 
				 	char buf[2];
			
 
				 	int set = 0;
			
@@ -539,7 +559,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 
				 		if (!call->name || !call->class || !call->class->reg)
			
 
				 			continue;
			
 
				 
			
 
				-		if (system && strcmp(call->class->system, system) != 0)
			
 
				+		if (system && strcmp(call->class->system, system->name) != 0)
			
 
				 			continue;
			
 
				 
			
 
				 		/*
			
@@ -569,21 +589,13 @@ static ssize_t
 
				 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
			
 
				 		    loff_t *ppos)
			
 
				 {
			
 
				-	const char *system = filp->private_data;
			
 
				+	struct event_subsystem *system = filp->private_data;
			
 
				+	const char *name = NULL;
			
 
				 	unsigned long val;
			
 
				-	char buf[64];
			
 
				 	ssize_t ret;
			
 
				 
			
 
				-	if (cnt >= sizeof(buf))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(&buf, ubuf, cnt))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	buf[cnt] = 0;
			
 
				-
			
 
				-	ret = strict_strtoul(buf, 10, &val);
			
 
				-	if (ret < 0)
			
 
				+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
			
 
				+	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				 	ret = tracing_update_buffers();
			
@@ -593,7 +605,14 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
				 	if (val != 0 && val != 1)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
			
 
				+	/*
			
 
				+	 * Opening of "enable" adds a ref count to system,
			
 
				+	 * so the name is safe to use.
			
 
				+	 */
			
 
				+	if (system)
			
 
				+		name = system->name;
			
 
				+
			
 
				+	ret = __ftrace_set_clr_event(NULL, name, NULL, val);
			
 
				 	if (ret)
			
 
				 		goto out;
			
 
				 
			
@@ -826,6 +845,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
				 	return cnt;
			
 
				 }
			
 
				 
			
 
				+static LIST_HEAD(event_subsystems);
			
 
				+
			
 
				+static int subsystem_open(struct inode *inode, struct file *filp)
			
 
				+{
			
 
				+	struct event_subsystem *system = NULL;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!inode->i_private)
			
 
				+		goto skip_search;
			
 
				+
			
 
				+	/* Make sure the system still exists */
			
 
				+	mutex_lock(&event_mutex);
			
 
				+	list_for_each_entry(system, &event_subsystems, list) {
			
 
				+		if (system == inode->i_private) {
			
 
				+			/* Don't open systems with no events */
			
 
				+			if (!system->nr_events) {
			
 
				+				system = NULL;
			
 
				+				break;
			
 
				+			}
			
 
				+			__get_system(system);
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	mutex_unlock(&event_mutex);
			
 
				+
			
 
				+	if (system != inode->i_private)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+ skip_search:
			
 
				+	ret = tracing_open_generic(inode, filp);
			
 
				+	if (ret < 0 && system)
			
 
				+		put_system(system);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int subsystem_release(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	struct event_subsystem *system = inode->i_private;
			
 
				+
			
 
				+	if (system)
			
 
				+		put_system(system);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static ssize_t
			
 
				 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
			
 
				 		      loff_t *ppos)
			
@@ -963,17 +1028,19 @@ static const struct file_operations ftrace_event_filter_fops = {
 
				 };
			
 
				 
			
 
				 static const struct file_operations ftrace_subsystem_filter_fops = {
			
 
				-	.open = tracing_open_generic,
			
 
				+	.open = subsystem_open,
			
 
				 	.read = subsystem_filter_read,
			
 
				 	.write = subsystem_filter_write,
			
 
				 	.llseek = default_llseek,
			
 
				+	.release = subsystem_release,
			
 
				 };
			
 
				 
			
 
				 static const struct file_operations ftrace_system_enable_fops = {
			
 
				-	.open = tracing_open_generic,
			
 
				+	.open = subsystem_open,
			
 
				 	.read = system_enable_read,
			
 
				 	.write = system_enable_write,
			
 
				 	.llseek = default_llseek,
			
 
				+	.release = subsystem_release,
			
 
				 };
			
 
				 
			
 
				 static const struct file_operations ftrace_show_header_fops = {
			
@@ -1002,8 +1069,6 @@ static struct dentry *event_trace_events_dir(void)
 
				 	return d_events;
			
 
				 }
			
 
				 
			
 
				-static LIST_HEAD(event_subsystems);
			
 
				-
			
 
				 static struct dentry *
			
 
				 event_subsystem_dir(const char *name, struct dentry *d_events)
			
 
				 {
			
@@ -1013,6 +1078,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 
				 	/* First see if we did not already create this dir */
			
 
				 	list_for_each_entry(system, &event_subsystems, list) {
			
 
				 		if (strcmp(system->name, name) == 0) {
			
 
				+			__get_system(system);
			
 
				 			system->nr_events++;
			
 
				 			return system->entry;
			
 
				 		}
			
@@ -1035,6 +1101,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 
				 	}
			
 
				 
			
 
				 	system->nr_events = 1;
			
 
				+	system->ref_count = 1;
			
 
				 	system->name = kstrdup(name, GFP_KERNEL);
			
 
				 	if (!system->name) {
			
 
				 		debugfs_remove(system->entry);
			
@@ -1062,8 +1129,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 
				 			   "'%s/filter' entry\n", name);
			
 
				 	}
			
 
				 
			
 
				-	trace_create_file("enable", 0644, system->entry,
			
 
				-			  (void *)system->name,
			
 
				+	trace_create_file("enable", 0644, system->entry, system,
			
 
				 			  &ftrace_system_enable_fops);
			
 
				 
			
 
				 	return system->entry;
			
@@ -1184,16 +1250,9 @@ static void remove_subsystem_dir(const char *name)
 
				 	list_for_each_entry(system, &event_subsystems, list) {
			
 
				 		if (strcmp(system->name, name) == 0) {
			
 
				 			if (!--system->nr_events) {
			
 
				-				struct event_filter *filter = system->filter;
			
 
				-
			
 
				 				debugfs_remove_recursive(system->entry);
			
 
				 				list_del(&system->list);
			
 
				-				if (filter) {
			
 
				-					kfree(filter->filter_string);
			
 
				-					kfree(filter);
			
 
				-				}
			
 
				-				kfree(system->name);
			
 
				-				kfree(system);
			
 
				+				__put_system(system);
			
 
				 			}
			
 
				 			break;
			
 
				 		}
			
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1886,6 +1886,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 
				 
			
 
				 	mutex_lock(&event_mutex);
			
 
				 
			
 
				+	/* Make sure the system still has events */
			
 
				+	if (!system->nr_events) {
			
 
				+		err = -ENODEV;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				 	if (!strcmp(strstrip(filter_string), "0")) {
			
 
				 		filter_free_subsystem_preds(system);
			
 
				 		remove_filter_string(system->filter);
			
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -324,7 +324,8 @@ ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
 
				 }
			
 
				 
			
 
				 static int
			
 
				-ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
			
 
				+ftrace_trace_onoff_callback(struct ftrace_hash *hash,
			
 
				+			    char *glob, char *cmd, char *param, int enable)
			
 
				 {
			
 
				 	struct ftrace_probe_ops *ops;
			
 
				 	void *count = (void *)-1;
			
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -74,6 +74,20 @@ static struct tracer_flags tracer_flags = {
 
				 
			
 
				 static struct trace_array *graph_array;
			
 
				 
			
 
				+/*
			
 
				+ * DURATION column is being also used to display IRQ signs,
			
 
				+ * following values are used by print_graph_irq and others
			
 
				+ * to fill in space into DURATION column.
			
 
				+ */
			
 
				+enum {
			
 
				+	DURATION_FILL_FULL  = -1,
			
 
				+	DURATION_FILL_START = -2,
			
 
				+	DURATION_FILL_END   = -3,
			
 
				+};
			
 
				+
			
 
				+static enum print_line_t
			
 
				+print_graph_duration(unsigned long long duration, struct trace_seq *s,
			
 
				+		     u32 flags);
			
 
				 
			
 
				 /* Add a function return address to the trace stack on thread info.*/
			
 
				 int
			
@@ -213,7 +227,7 @@ int __trace_graph_entry(struct trace_array *tr,
 
				 
			
 
				 static inline int ftrace_graph_ignore_irqs(void)
			
 
				 {
			
 
				-	if (!ftrace_graph_skip_irqs)
			
 
				+	if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))
			
 
				 		return 0;
			
 
				 
			
 
				 	return in_irq();
			
@@ -577,32 +591,6 @@ get_return_for_leaf(struct trace_iterator *iter,
 
				 	return next;
			
 
				 }
			
 
				 
			
 
				-/* Signal a overhead of time execution to the output */
			
 
				-static int
			
 
				-print_graph_overhead(unsigned long long duration, struct trace_seq *s,
			
 
				-		     u32 flags)
			
 
				-{
			
 
				-	/* If duration disappear, we don't need anything */
			
 
				-	if (!(flags & TRACE_GRAPH_PRINT_DURATION))
			
 
				-		return 1;
			
 
				-
			
 
				-	/* Non nested entry or return */
			
 
				-	if (duration == -1)
			
 
				-		return trace_seq_printf(s, "  ");
			
 
				-
			
 
				-	if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
			
 
				-		/* Duration exceeded 100 msecs */
			
 
				-		if (duration > 100000ULL)
			
 
				-			return trace_seq_printf(s, "! ");
			
 
				-
			
 
				-		/* Duration exceeded 10 msecs */
			
 
				-		if (duration > 10000ULL)
			
 
				-			return trace_seq_printf(s, "+ ");
			
 
				-	}
			
 
				-
			
 
				-	return trace_seq_printf(s, "  ");
			
 
				-}
			
 
				-
			
 
				 static int print_graph_abs_time(u64 t, struct trace_seq *s)
			
 
				 {
			
 
				 	unsigned long usecs_rem;
			
@@ -625,34 +613,36 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
 
				 		addr >= (unsigned long)__irqentry_text_end)
			
 
				 		return TRACE_TYPE_UNHANDLED;
			
 
				 
			
 
				-	/* Absolute time */
			
 
				-	if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
			
 
				-		ret = print_graph_abs_time(iter->ts, s);
			
 
				-		if (!ret)
			
 
				-			return TRACE_TYPE_PARTIAL_LINE;
			
 
				-	}
			
 
				+	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
			
 
				+		/* Absolute time */
			
 
				+		if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
			
 
				+			ret = print_graph_abs_time(iter->ts, s);
			
 
				+			if (!ret)
			
 
				+				return TRACE_TYPE_PARTIAL_LINE;
			
 
				+		}
			
 
				 
			
 
				-	/* Cpu */
			
 
				-	if (flags & TRACE_GRAPH_PRINT_CPU) {
			
 
				-		ret = print_graph_cpu(s, cpu);
			
 
				-		if (ret == TRACE_TYPE_PARTIAL_LINE)
			
 
				-			return TRACE_TYPE_PARTIAL_LINE;
			
 
				-	}
			
 
				+		/* Cpu */
			
 
				+		if (flags & TRACE_GRAPH_PRINT_CPU) {
			
 
				+			ret = print_graph_cpu(s, cpu);
			
 
				+			if (ret == TRACE_TYPE_PARTIAL_LINE)
			
 
				+				return TRACE_TYPE_PARTIAL_LINE;
			
 
				+		}
			
 
				 
			
 
				-	/* Proc */
			
 
				-	if (flags & TRACE_GRAPH_PRINT_PROC) {
			
 
				-		ret = print_graph_proc(s, pid);
			
 
				-		if (ret == TRACE_TYPE_PARTIAL_LINE)
			
 
				-			return TRACE_TYPE_PARTIAL_LINE;
			
 
				-		ret = trace_seq_printf(s, " | ");
			
 
				-		if (!ret)
			
 
				-			return TRACE_TYPE_PARTIAL_LINE;
			
 
				+		/* Proc */
			
 
				+		if (flags & TRACE_GRAPH_PRINT_PROC) {
			
 
				+			ret = print_graph_proc(s, pid);
			
 
				+			if (ret == TRACE_TYPE_PARTIAL_LINE)
			
 
				+				return TRACE_TYPE_PARTIAL_LINE;
			
 
				+			ret = trace_seq_printf(s, " | ");
			
 
				+			if (!ret)
			
 
				+				return TRACE_TYPE_PARTIAL_LINE;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	/* No overhead */
			
 
				-	ret = print_graph_overhead(-1, s, flags);
			
 
				-	if (!ret)
			
 
				-		return TRACE_TYPE_PARTIAL_LINE;
			
 
				+	ret = print_graph_duration(DURATION_FILL_START, s, flags);
			
 
				+	if (ret != TRACE_TYPE_HANDLED)
			
 
				+		return ret;
			
 
				 
			
 
				 	if (type == TRACE_GRAPH_ENT)
			
 
				 		ret = trace_seq_printf(s, "==========>");
			
@@ -662,9 +652,10 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
 
				 	if (!ret)
			
 
				 		return TRACE_TYPE_PARTIAL_LINE;
			
 
				 
			
 
				-	/* Don't close the duration column if haven't one */
			
 
				-	if (flags & TRACE_GRAPH_PRINT_DURATION)
			
 
				-		trace_seq_printf(s, " |");
			
 
				+	ret = print_graph_duration(DURATION_FILL_END, s, flags);
			
 
				+	if (ret != TRACE_TYPE_HANDLED)
			
 
				+		return ret;
			
 
				+
			
 
				 	ret = trace_seq_printf(s, "\n");
			
 
				 
			
 
				 	if (!ret)
			
@@ -716,9 +707,49 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
 
				 }
			
 
				 
			
 
				 static enum print_line_t
			
 
				-print_graph_duration(unsigned long long duration, struct trace_seq *s)
			
 
				+print_graph_duration(unsigned long long duration, struct trace_seq *s,
			
 
				+		     u32 flags)
			
 
				 {
			
 
				-	int ret;
			
 
				+	int ret = -1;
			
 
				+
			
 
				+	if (!(flags & TRACE_GRAPH_PRINT_DURATION) ||
			
 
				+	    !(trace_flags & TRACE_ITER_CONTEXT_INFO))
			
 
				+			return TRACE_TYPE_HANDLED;
			
 
				+
			
 
				+	/* No real adata, just filling the column with spaces */
			
 
				+	switch (duration) {
			
 
				+	case DURATION_FILL_FULL:
			
 
				+		ret = trace_seq_printf(s, "              |  ");
			
 
				+		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
			
 
				+	case DURATION_FILL_START:
			
 
				+		ret = trace_seq_printf(s, "  ");
			
 
				+		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
			
 
				+	case DURATION_FILL_END:
			
 
				+		ret = trace_seq_printf(s, " |");
			
 
				+		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
			
 
				+	}
			
 
				+
			
 
				+	/* Signal a overhead of time execution to the output */
			
 
				+	if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
			
 
				+		/* Duration exceeded 100 msecs */
			
 
				+		if (duration > 100000ULL)
			
 
				+			ret = trace_seq_printf(s, "! ");
			
 
				+		/* Duration exceeded 10 msecs */
			
 
				+		else if (duration > 10000ULL)
			
 
				+			ret = trace_seq_printf(s, "+ ");
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * The -1 means we either did not exceed the duration tresholds
			
 
				+	 * or we dont want to print out the overhead. Either way we need
			
 
				+	 * to fill out the space.
			
 
				+	 */
			
 
				+	if (ret == -1)
			
 
				+		ret = trace_seq_printf(s, "  ");
			
 
				+
			
 
				+	/* Catching here any failure happenned above */
			
 
				+	if (!ret)
			
 
				+		return TRACE_TYPE_PARTIAL_LINE;
			
 
				 
			
 
				 	ret = trace_print_graph_duration(duration, s);
			
 
				 	if (ret != TRACE_TYPE_HANDLED)
			
@@ -767,18 +798,11 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 
				 			cpu_data->enter_funcs[call->depth] = 0;
			
 
				 	}
			
 
				 
			
 
				-	/* Overhead */
			
 
				-	ret = print_graph_overhead(duration, s, flags);
			
 
				-	if (!ret)
			
 
				+	/* Overhead and duration */
			
 
				+	ret = print_graph_duration(duration, s, flags);
			
 
				+	if (ret == TRACE_TYPE_PARTIAL_LINE)
			
 
				 		return TRACE_TYPE_PARTIAL_LINE;
			
 
				 
			
 
				-	/* Duration */
			
 
				-	if (flags & TRACE_GRAPH_PRINT_DURATION) {
			
 
				-		ret = print_graph_duration(duration, s);
			
 
				-		if (ret == TRACE_TYPE_PARTIAL_LINE)
			
 
				-			return TRACE_TYPE_PARTIAL_LINE;
			
 
				-	}
			
 
				-
			
 
				 	/* Function */
			
 
				 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
			
 
				 		ret = trace_seq_printf(s, " ");
			
@@ -815,17 +839,10 @@ print_graph_entry_nested(struct trace_iterator *iter,
 
				 			cpu_data->enter_funcs[call->depth] = call->func;
			
 
				 	}
			
 
				 
			
 
				-	/* No overhead */
			
 
				-	ret = print_graph_overhead(-1, s, flags);
			
 
				-	if (!ret)
			
 
				-		return TRACE_TYPE_PARTIAL_LINE;
			
 
				-
			
 
				 	/* No time */
			
 
				-	if (flags & TRACE_GRAPH_PRINT_DURATION) {
			
 
				-		ret = trace_seq_printf(s, "            |  ");
			
 
				-		if (!ret)
			
 
				-			return TRACE_TYPE_PARTIAL_LINE;
			
 
				-	}
			
 
				+	ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
			
 
				+	if (ret != TRACE_TYPE_HANDLED)
			
 
				+		return ret;
			
 
				 
			
 
				 	/* Function */
			
 
				 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
			
@@ -865,6 +882,9 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
 
				 			return TRACE_TYPE_PARTIAL_LINE;
			
 
				 	}
			
 
				 
			
 
				+	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
			
 
				+		return 0;
			
 
				+
			
 
				 	/* Absolute time */
			
 
				 	if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
			
 
				 		ret = print_graph_abs_time(iter->ts, s);
			
@@ -1078,18 +1098,11 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 
				 	if (print_graph_prologue(iter, s, 0, 0, flags))
			
 
				 		return TRACE_TYPE_PARTIAL_LINE;
			
 
				 
			
 
				-	/* Overhead */
			
 
				-	ret = print_graph_overhead(duration, s, flags);
			
 
				-	if (!ret)
			
 
				+	/* Overhead and duration */
			
 
				+	ret = print_graph_duration(duration, s, flags);
			
 
				+	if (ret == TRACE_TYPE_PARTIAL_LINE)
			
 
				 		return TRACE_TYPE_PARTIAL_LINE;
			
 
				 
			
 
				-	/* Duration */
			
 
				-	if (flags & TRACE_GRAPH_PRINT_DURATION) {
			
 
				-		ret = print_graph_duration(duration, s);
			
 
				-		if (ret == TRACE_TYPE_PARTIAL_LINE)
			
 
				-			return TRACE_TYPE_PARTIAL_LINE;
			
 
				-	}
			
 
				-
			
 
				 	/* Closing brace */
			
 
				 	for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
			
 
				 		ret = trace_seq_printf(s, " ");
			
@@ -1146,17 +1159,10 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
 
				 	if (print_graph_prologue(iter, s, 0, 0, flags))
			
 
				 		return TRACE_TYPE_PARTIAL_LINE;
			
 
				 
			
 
				-	/* No overhead */
			
 
				-	ret = print_graph_overhead(-1, s, flags);
			
 
				-	if (!ret)
			
 
				-		return TRACE_TYPE_PARTIAL_LINE;
			
 
				-
			
 
				 	/* No time */
			
 
				-	if (flags & TRACE_GRAPH_PRINT_DURATION) {
			
 
				-		ret = trace_seq_printf(s, "            |  ");
			
 
				-		if (!ret)
			
 
				-			return TRACE_TYPE_PARTIAL_LINE;
			
 
				-	}
			
 
				+	ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
			
 
				+	if (ret != TRACE_TYPE_HANDLED)
			
 
				+		return ret;
			
 
				 
			
 
				 	/* Indentation */
			
 
				 	if (depth > 0)
			
@@ -1207,7 +1213,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
 
				 
			
 
				 
			
 
				 enum print_line_t
			
 
				-__print_graph_function_flags(struct trace_iterator *iter, u32 flags)
			
 
				+print_graph_function_flags(struct trace_iterator *iter, u32 flags)
			
 
				 {
			
 
				 	struct ftrace_graph_ent_entry *field;
			
 
				 	struct fgraph_data *data = iter->private;
			
@@ -1270,18 +1276,7 @@ __print_graph_function_flags(struct trace_iterator *iter, u32 flags)
 
				 static enum print_line_t
			
 
				 print_graph_function(struct trace_iterator *iter)
			
 
				 {
			
 
				-	return __print_graph_function_flags(iter, tracer_flags.val);
			
 
				-}
			
 
				-
			
 
				-enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
			
 
				-					     u32 flags)
			
 
				-{
			
 
				-	if (trace_flags & TRACE_ITER_LATENCY_FMT)
			
 
				-		flags |= TRACE_GRAPH_PRINT_DURATION;
			
 
				-	else
			
 
				-		flags |= TRACE_GRAPH_PRINT_ABS_TIME;
			
 
				-
			
 
				-	return __print_graph_function_flags(iter, flags);
			
 
				+	return print_graph_function_flags(iter, tracer_flags.val);
			
 
				 }
			
 
				 
			
 
				 static enum print_line_t
			
@@ -1309,8 +1304,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)
 
				 	seq_printf(s, "#%.*s / _----=> need-resched    \n", size, spaces);
			
 
				 	seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);
			
 
				 	seq_printf(s, "#%.*s|| / _--=> preempt-depth   \n", size, spaces);
			
 
				-	seq_printf(s, "#%.*s||| / _-=> lock-depth      \n", size, spaces);
			
 
				-	seq_printf(s, "#%.*s|||| /                     \n", size, spaces);
			
 
				+	seq_printf(s, "#%.*s||| /                      \n", size, spaces);
			
 
				 }
			
 
				 
			
 
				 static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
			
@@ -1329,7 +1323,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
 
				 	if (flags & TRACE_GRAPH_PRINT_PROC)
			
 
				 		seq_printf(s, "  TASK/PID       ");
			
 
				 	if (lat)
			
 
				-		seq_printf(s, "|||||");
			
 
				+		seq_printf(s, "||||");
			
 
				 	if (flags & TRACE_GRAPH_PRINT_DURATION)
			
 
				 		seq_printf(s, "  DURATION   ");
			
 
				 	seq_printf(s, "               FUNCTION CALLS\n");
			
@@ -1343,7 +1337,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
 
				 	if (flags & TRACE_GRAPH_PRINT_PROC)
			
 
				 		seq_printf(s, "   |    |        ");
			
 
				 	if (lat)
			
 
				-		seq_printf(s, "|||||");
			
 
				+		seq_printf(s, "||||");
			
 
				 	if (flags & TRACE_GRAPH_PRINT_DURATION)
			
 
				 		seq_printf(s, "   |   |      ");
			
 
				 	seq_printf(s, "               |   |   |   |\n");
			
@@ -1358,15 +1352,16 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags)
 
				 {
			
 
				 	struct trace_iterator *iter = s->private;
			
 
				 
			
 
				+	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
			
 
				+		return;
			
 
				+
			
 
				 	if (trace_flags & TRACE_ITER_LATENCY_FMT) {
			
 
				 		/* print nothing if the buffers are empty */
			
 
				 		if (trace_empty(iter))
			
 
				 			return;
			
 
				 
			
 
				 		print_trace_header(s, iter);
			
 
				-		flags |= TRACE_GRAPH_PRINT_DURATION;
			
 
				-	} else
			
 
				-		flags |= TRACE_GRAPH_PRINT_ABS_TIME;
			
 
				+	}
			
 
				 
			
 
				 	__print_graph_headers_flags(s, flags);
			
 
				 }
			
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -226,7 +226,9 @@ static void irqsoff_trace_close(struct trace_iterator *iter)
 
				 }
			
 
				 
			
 
				 #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \
			
 
				-			    TRACE_GRAPH_PRINT_PROC)
			
 
				+			    TRACE_GRAPH_PRINT_PROC | \
			
 
				+			    TRACE_GRAPH_PRINT_ABS_TIME | \
			
 
				+			    TRACE_GRAPH_PRINT_DURATION)
			
 
				 
			
 
				 static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
			
 
				 {