9 years ago · 36db171cc7
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -60,6 +60,7 @@ show up in /proc/sys/kernel:
 
				 - panic_on_warn
			
 
				 - perf_cpu_time_max_percent
			
 
				 - perf_event_paranoid
			
 
				+- perf_event_max_stack
			
 
				 - pid_max
			
 
				 - powersave-nap               [ PPC only ]
			
 
				 - printk
			
@@ -654,6 +655,19 @@ users (without CAP_SYS_ADMIN).  The default value is 2.
 
				 
			
 
				 ==============================================================
			
 
				 
			
 
				+perf_event_max_stack:
			
 
				+
			
 
				+Controls maximum number of stack frames to copy for (attr.sample_type &
			
 
				+PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
			
 
				+'perf record -g' or 'perf trace --call-graph fp'.
			
 
				+
			
 
				+This can only be done when no events are in use that have callchains
			
 
				+enabled, otherwise writing to this file will return -EBUSY.
			
 
				+
			
 
				+The default value is 127.
			
 
				+
			
 
				+==============================================================
			
 
				+
			
 
				 pid_max:
			
 
				 
			
 
				 PID allocation wrap value.  When the kernel's next PID value
			
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -631,7 +631,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 
				 	info->address &= ~alignment_mask;
			
 
				 	info->ctrl.len <<= offset;
			
 
				 
			
 
				-	if (!bp->overflow_handler) {
			
 
				+	if (is_default_overflow_handler(bp)) {
			
 
				 		/*
			
 
				 		 * Mismatch breakpoints are required for single-stepping
			
 
				 		 * breakpoints.
			
@@ -754,7 +754,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
 
				 		 * mismatch breakpoint so we can single-step over the
			
 
				 		 * watchpoint trigger.
			
 
				 		 */
			
 
				-		if (!wp->overflow_handler)
			
 
				+		if (is_default_overflow_handler(wp))
			
 
				 			enable_single_step(wp, instruction_pointer(regs));
			
 
				 
			
 
				 unlock:
			
--- a/arch/arm/kernel/perf_callchain.c
+++ b/arch/arm/kernel/perf_callchain.c
@@ -75,7 +75,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
				 
			
 
				 	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
			
 
				 
			
 
				-	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
			
 
				+	while ((entry->nr < sysctl_perf_event_max_stack) &&
			
 
				 	       tail && !((unsigned long)tail & 0x3))
			
 
				 		tail = user_backtrace(tail, entry);
			
 
				 }
			
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -616,7 +616,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr,
 
				 		perf_bp_event(bp, regs);
			
 
				 
			
 
				 		/* Do we need to handle the stepping? */
			
 
				-		if (!bp->overflow_handler)
			
 
				+		if (is_default_overflow_handler(bp))
			
 
				 			step = 1;
			
 
				 unlock:
			
 
				 		rcu_read_unlock();
			
@@ -712,7 +712,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
 
				 		perf_bp_event(wp, regs);
			
 
				 
			
 
				 		/* Do we need to handle the stepping? */
			
 
				-		if (!wp->overflow_handler)
			
 
				+		if (is_default_overflow_handler(wp))
			
 
				 			step = 1;
			
 
				 
			
 
				 unlock:
			
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -122,7 +122,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
 
				 
			
 
				 		tail = (struct frame_tail __user *)regs->regs[29];
			
 
				 
			
 
				-		while (entry->nr < PERF_MAX_STACK_DEPTH &&
			
 
				+		while (entry->nr < sysctl_perf_event_max_stack &&
			
 
				 		       tail && !((unsigned long)tail & 0xf))
			
 
				 			tail = user_backtrace(tail, entry);
			
 
				 	} else {
			
@@ -132,7 +132,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
 
				 
			
 
				 		tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
			
 
				 
			
 
				-		while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
			
 
				+		while ((entry->nr < sysctl_perf_event_max_stack) &&
			
 
				 			tail && !((unsigned long)tail & 0x3))
			
 
				 			tail = compat_user_backtrace(tail, entry);
			
 
				 #endif
			
--- a/arch/metag/kernel/perf_callchain.c
+++ b/arch/metag/kernel/perf_callchain.c
@@ -65,7 +65,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
				 
			
 
				 	--frame;
			
 
				 
			
 
				-	while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
			
 
				+	while ((entry->nr < sysctl_perf_event_max_stack) && frame)
			
 
				 		frame = user_backtrace(frame, entry);
			
 
				 }
			
 
				 
			
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -35,7 +35,7 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
 
				 		addr = *sp++;
			
 
				 		if (__kernel_text_address(addr)) {
			
 
				 			perf_callchain_store(entry, addr);
			
 
				-			if (entry->nr >= PERF_MAX_STACK_DEPTH)
			
 
				+			if (entry->nr >= sysctl_perf_event_max_stack)
			
 
				 				break;
			
 
				 		}
			
 
				 	}
			
@@ -59,7 +59,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
 
				 	}
			
 
				 	do {
			
 
				 		perf_callchain_store(entry, pc);
			
 
				-		if (entry->nr >= PERF_MAX_STACK_DEPTH)
			
 
				+		if (entry->nr >= sysctl_perf_event_max_stack)
			
 
				 			break;
			
 
				 		pc = unwind_stack(current, &sp, pc, &ra);
			
 
				 	} while (pc);
			
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -247,7 +247,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
 
				 	sp = regs->gpr[1];
			
 
				 	perf_callchain_store(entry, next_ip);
			
 
				 
			
 
				-	while (entry->nr < PERF_MAX_STACK_DEPTH) {
			
 
				+	while (entry->nr < sysctl_perf_event_max_stack) {
			
 
				 		fp = (unsigned long __user *) sp;
			
 
				 		if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
			
 
				 			return;
			
@@ -453,7 +453,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
 
				 	sp = regs->gpr[1];
			
 
				 	perf_callchain_store(entry, next_ip);
			
 
				 
			
 
				-	while (entry->nr < PERF_MAX_STACK_DEPTH) {
			
 
				+	while (entry->nr < sysctl_perf_event_max_stack) {
			
 
				 		fp = (unsigned int __user *) (unsigned long) sp;
			
 
				 		if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
			
 
				 			return;
			
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1756,7 +1756,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
 
				 			}
			
 
				 		}
			
 
				 #endif
			
 
				-	} while (entry->nr < PERF_MAX_STACK_DEPTH);
			
 
				+	} while (entry->nr < sysctl_perf_event_max_stack);
			
 
				 }
			
 
				 
			
 
				 static inline int
			
@@ -1790,7 +1790,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
 
				 		pc = sf.callers_pc;
			
 
				 		ufp = (unsigned long)sf.fp + STACK_BIAS;
			
 
				 		perf_callchain_store(entry, pc);
			
 
				-	} while (entry->nr < PERF_MAX_STACK_DEPTH);
			
 
				+	} while (entry->nr < sysctl_perf_event_max_stack);
			
 
				 }
			
 
				 
			
 
				 static void perf_callchain_user_32(struct perf_callchain_entry *entry,
			
@@ -1822,7 +1822,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
 
				 			ufp = (unsigned long)sf.fp;
			
 
				 		}
			
 
				 		perf_callchain_store(entry, pc);
			
 
				-	} while (entry->nr < PERF_MAX_STACK_DEPTH);
			
 
				+	} while (entry->nr < sysctl_perf_event_max_stack);
			
 
				 }
			
 
				 
			
 
				 void
			
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -164,10 +164,6 @@ config INSTRUCTION_DECODER
 
				 	def_bool y
			
 
				 	depends on KPROBES || PERF_EVENTS || UPROBES
			
 
				 
			
 
				-config PERF_EVENTS_INTEL_UNCORE
			
 
				-	def_bool y
			
 
				-	depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
			
 
				-
			
 
				 config OUTPUT_FORMAT
			
 
				 	string
			
 
				 	default "elf32-i386" if X86_32
			
@@ -1046,6 +1042,8 @@ config X86_THERMAL_VECTOR
 
				 	def_bool y
			
 
				 	depends on X86_MCE_INTEL
			
 
				 
			
 
				+source "arch/x86/events/Kconfig"
			
 
				+
			
 
				 config X86_LEGACY_VM86
			
 
				 	bool "Legacy VM86 support"
			
 
				 	default n
			
@@ -1210,15 +1208,6 @@ config MICROCODE_OLD_INTERFACE
 
				 	def_bool y
			
 
				 	depends on MICROCODE
			
 
				 
			
 
				-config PERF_EVENTS_AMD_POWER
			
 
				-	depends on PERF_EVENTS && CPU_SUP_AMD
			
 
				-	tristate "AMD Processor Power Reporting Mechanism"
			
 
				-	---help---
			
 
				-	  Provide power reporting mechanism support for AMD processors.
			
 
				-	  Currently, it leverages X86_FEATURE_ACC_POWER
			
 
				-	  (CPUID Fn8000_0007_EDX[12]) interface to calculate the
			
 
				-	  average power consumption on Family 15h processors.
			
 
				-
			
 
				 config X86_MSR
			
 
				 	tristate "/dev/cpu/*/msr - Model-specific register support"
			
 
				 	---help---
			
--- a/arch/x86/events/Kconfig
+++ b/arch/x86/events/Kconfig
@@ -0,0 +1,36 @@
 
				+menu "Performance monitoring"
			
 
				+
			
 
				+config PERF_EVENTS_INTEL_UNCORE
			
 
				+	tristate "Intel uncore performance events"
			
 
				+	depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
			
 
				+	default y
			
 
				+	---help---
			
 
				+	Include support for Intel uncore performance events. These are
			
 
				+	available on NehalemEX and more modern processors.
			
 
				+
			
 
				+config PERF_EVENTS_INTEL_RAPL
			
 
				+	tristate "Intel rapl performance events"
			
 
				+	depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
			
 
				+	default y
			
 
				+	---help---
			
 
				+	Include support for Intel rapl performance events for power
			
 
				+	monitoring on modern processors.
			
 
				+
			
 
				+config PERF_EVENTS_INTEL_CSTATE
			
 
				+	tristate "Intel cstate performance events"
			
 
				+	depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
			
 
				+	default y
			
 
				+	---help---
			
 
				+	Include support for Intel cstate performance events for power
			
 
				+	monitoring on modern processors.
			
 
				+
			
 
				+config PERF_EVENTS_AMD_POWER
			
 
				+	depends on PERF_EVENTS && CPU_SUP_AMD
			
 
				+	tristate "AMD Processor Power Reporting Mechanism"
			
 
				+	---help---
			
 
				+	  Provide power reporting mechanism support for AMD processors.
			
 
				+	  Currently, it leverages X86_FEATURE_ACC_POWER
			
 
				+	  (CPUID Fn8000_0007_EDX[12]) interface to calculate the
			
 
				+	  average power consumption on Family 15h processors.
			
 
				+
			
 
				+endmenu
			
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -6,9 +6,6 @@ obj-$(CONFIG_X86_LOCAL_APIC)            += amd/ibs.o msr.o
 
				 ifdef CONFIG_AMD_IOMMU
			
 
				 obj-$(CONFIG_CPU_SUP_AMD)               += amd/iommu.o
			
 
				 endif
			
 
				-obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/core.o intel/bts.o intel/cqm.o
			
 
				-obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/cstate.o intel/ds.o intel/knc.o 
			
 
				-obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
			
 
				-obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/rapl.o msr.o
			
 
				-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel/uncore.o intel/uncore_nhmex.o
			
 
				-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel/uncore_snb.o intel/uncore_snbep.o
			
 
				+
			
 
				+obj-$(CONFIG_CPU_SUP_INTEL)		+= msr.o
			
 
				+obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/
			
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -263,6 +263,7 @@ static const struct attribute_group *amd_uncore_attr_groups[] = {
 
				 };
			
 
				 
			
 
				 static struct pmu amd_nb_pmu = {
			
 
				+	.task_ctx_nr	= perf_invalid_context,
			
 
				 	.attr_groups	= amd_uncore_attr_groups,
			
 
				 	.name		= "amd_nb",
			
 
				 	.event_init	= amd_uncore_event_init,
			
@@ -274,6 +275,7 @@ static struct pmu amd_nb_pmu = {
 
				 };
			
 
				 
			
 
				 static struct pmu amd_l2_pmu = {
			
 
				+	.task_ctx_nr	= perf_invalid_context,
			
 
				 	.attr_groups	= amd_uncore_attr_groups,
			
 
				 	.name		= "amd_l2",
			
 
				 	.event_init	= amd_uncore_event_init,
			
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -360,6 +360,9 @@ int x86_add_exclusive(unsigned int what)
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				+	if (x86_pmu.lbr_pt_coexist)
			
 
				+		return 0;
			
 
				+
			
 
				 	if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
			
 
				 		mutex_lock(&pmc_reserve_mutex);
			
 
				 		for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
			
@@ -380,6 +383,9 @@ int x86_add_exclusive(unsigned int what)
 
				 
			
 
				 void x86_del_exclusive(unsigned int what)
			
 
				 {
			
 
				+	if (x86_pmu.lbr_pt_coexist)
			
 
				+		return;
			
 
				+
			
 
				 	atomic_dec(&x86_pmu.lbr_exclusive[what]);
			
 
				 	atomic_dec(&active_events);
			
 
				 }
			
@@ -2277,7 +2283,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 
				 
			
 
				 	fp = compat_ptr(ss_base + regs->bp);
			
 
				 	pagefault_disable();
			
 
				-	while (entry->nr < PERF_MAX_STACK_DEPTH) {
			
 
				+	while (entry->nr < sysctl_perf_event_max_stack) {
			
 
				 		unsigned long bytes;
			
 
				 		frame.next_frame     = 0;
			
 
				 		frame.return_address = 0;
			
@@ -2337,7 +2343,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
				 		return;
			
 
				 
			
 
				 	pagefault_disable();
			
 
				-	while (entry->nr < PERF_MAX_STACK_DEPTH) {
			
 
				+	while (entry->nr < sysctl_perf_event_max_stack) {
			
 
				 		unsigned long bytes;
			
 
				 		frame.next_frame	     = NULL;
			
 
				 		frame.return_address = 0;
			
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -0,0 +1,9 @@
 
				+obj-$(CONFIG_CPU_SUP_INTEL)		+= core.o bts.o cqm.o
			
 
				+obj-$(CONFIG_CPU_SUP_INTEL)		+= ds.o knc.o
			
 
				+obj-$(CONFIG_CPU_SUP_INTEL)		+= lbr.o p4.o p6.o pt.o
			
 
				+obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)	+= intel-rapl.o
			
 
				+intel-rapl-objs				:= rapl.o
			
 
				+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel-uncore.o
			
 
				+intel-uncore-objs			:= uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
			
 
				+obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE)	+= intel-cstate.o
			
 
				+intel-cstate-objs			:= cstate.o
			
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -171,18 +171,6 @@ static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
 
				 	memset(page_address(phys->page) + index, 0, phys->size - index);
			
 
				 }
			
 
				 
			
 
				-static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
			
 
				-{
			
 
				-	if (buf->snapshot)
			
 
				-		return false;
			
 
				-
			
 
				-	if (local_read(&buf->data_size) >= bts->handle.size ||
			
 
				-	    bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
			
 
				-		return true;
			
 
				-
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				 static void bts_update(struct bts_ctx *bts)
			
 
				 {
			
 
				 	int cpu = raw_smp_processor_id();
			
@@ -213,18 +201,15 @@ static void bts_update(struct bts_ctx *bts)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static int
			
 
				+bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
			
 
				+
			
 
				 static void __bts_event_start(struct perf_event *event)
			
 
				 {
			
 
				 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
			
 
				 	struct bts_buffer *buf = perf_get_aux(&bts->handle);
			
 
				 	u64 config = 0;
			
 
				 
			
 
				-	if (!buf || bts_buffer_is_full(buf, bts))
			
 
				-		return;
			
 
				-
			
 
				-	event->hw.itrace_started = 1;
			
 
				-	event->hw.state = 0;
			
 
				-
			
 
				 	if (!buf->snapshot)
			
 
				 		config |= ARCH_PERFMON_EVENTSEL_INT;
			
 
				 	if (!event->attr.exclude_kernel)
			
@@ -241,16 +226,41 @@ static void __bts_event_start(struct perf_event *event)
 
				 	wmb();
			
 
				 
			
 
				 	intel_pmu_enable_bts(config);
			
 
				+
			
 
				 }
			
 
				 
			
 
				 static void bts_event_start(struct perf_event *event, int flags)
			
 
				 {
			
 
				+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
			
 
				 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
			
 
				+	struct bts_buffer *buf;
			
 
				+
			
 
				+	buf = perf_aux_output_begin(&bts->handle, event);
			
 
				+	if (!buf)
			
 
				+		goto fail_stop;
			
 
				+
			
 
				+	if (bts_buffer_reset(buf, &bts->handle))
			
 
				+		goto fail_end_stop;
			
 
				+
			
 
				+	bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
			
 
				+	bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
			
 
				+	bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
			
 
				+
			
 
				+	event->hw.itrace_started = 1;
			
 
				+	event->hw.state = 0;
			
 
				 
			
 
				 	__bts_event_start(event);
			
 
				 
			
 
				 	/* PMI handler: this counter is running and likely generating PMIs */
			
 
				 	ACCESS_ONCE(bts->started) = 1;
			
 
				+
			
 
				+	return;
			
 
				+
			
 
				+fail_end_stop:
			
 
				+	perf_aux_output_end(&bts->handle, 0, false);
			
 
				+
			
 
				+fail_stop:
			
 
				+	event->hw.state = PERF_HES_STOPPED;
			
 
				 }
			
 
				 
			
 
				 static void __bts_event_stop(struct perf_event *event)
			
@@ -269,15 +279,32 @@ static void __bts_event_stop(struct perf_event *event)
 
				 
			
 
				 static void bts_event_stop(struct perf_event *event, int flags)
			
 
				 {
			
 
				+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
			
 
				 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
			
 
				+	struct bts_buffer *buf = perf_get_aux(&bts->handle);
			
 
				 
			
 
				 	/* PMI handler: don't restart this counter */
			
 
				 	ACCESS_ONCE(bts->started) = 0;
			
 
				 
			
 
				 	__bts_event_stop(event);
			
 
				 
			
 
				-	if (flags & PERF_EF_UPDATE)
			
 
				+	if (flags & PERF_EF_UPDATE) {
			
 
				 		bts_update(bts);
			
 
				+
			
 
				+		if (buf) {
			
 
				+			if (buf->snapshot)
			
 
				+				bts->handle.head =
			
 
				+					local_xchg(&buf->data_size,
			
 
				+						   buf->nr_pages << PAGE_SHIFT);
			
 
				+			perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
			
 
				+					    !!local_xchg(&buf->lost, 0));
			
 
				+		}
			
 
				+
			
 
				+		cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
			
 
				+		cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
			
 
				+		cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
			
 
				+		cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 void intel_bts_enable_local(void)
			
@@ -417,34 +444,14 @@ int intel_bts_interrupt(void)
 
				 
			
 
				 static void bts_event_del(struct perf_event *event, int mode)
			
 
				 {
			
 
				-	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
			
 
				-	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
			
 
				-	struct bts_buffer *buf = perf_get_aux(&bts->handle);
			
 
				-
			
 
				 	bts_event_stop(event, PERF_EF_UPDATE);
			
 
				-
			
 
				-	if (buf) {
			
 
				-		if (buf->snapshot)
			
 
				-			bts->handle.head =
			
 
				-				local_xchg(&buf->data_size,
			
 
				-					   buf->nr_pages << PAGE_SHIFT);
			
 
				-		perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
			
 
				-				    !!local_xchg(&buf->lost, 0));
			
 
				-	}
			
 
				-
			
 
				-	cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
			
 
				-	cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
			
 
				-	cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
			
 
				-	cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
			
 
				 }
			
 
				 
			
 
				 static int bts_event_add(struct perf_event *event, int mode)
			
 
				 {
			
 
				-	struct bts_buffer *buf;
			
 
				 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
			
 
				 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
 
				-	int ret = -EBUSY;
			
 
				 
			
 
				 	event->hw.state = PERF_HES_STOPPED;
			
 
				 
			
@@ -454,26 +461,10 @@ static int bts_event_add(struct perf_event *event, int mode)
 
				 	if (bts->handle.event)
			
 
				 		return -EBUSY;
			
 
				 
			
 
				-	buf = perf_aux_output_begin(&bts->handle, event);
			
 
				-	if (!buf)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	ret = bts_buffer_reset(buf, &bts->handle);
			
 
				-	if (ret) {
			
 
				-		perf_aux_output_end(&bts->handle, 0, false);
			
 
				-		return ret;
			
 
				-	}
			
 
				-
			
 
				-	bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
			
 
				-	bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
			
 
				-	bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
			
 
				-
			
 
				 	if (mode & PERF_EF_START) {
			
 
				 		bts_event_start(event, 0);
			
 
				-		if (hwc->state & PERF_HES_STOPPED) {
			
 
				-			bts_event_del(event, 0);
			
 
				-			return -EBUSY;
			
 
				-		}
			
 
				+		if (hwc->state & PERF_HES_STOPPED)
			
 
				+			return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1465,6 +1465,140 @@ static __initconst const u64 slm_hw_cache_event_ids
 
				  },
			
 
				 };
			
 
				 
			
 
				+static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
			
 
				+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
			
 
				+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
			
 
				+	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1),
			
 
				+	EVENT_EXTRA_END
			
 
				+};
			
 
				+
			
 
				+#define GLM_DEMAND_DATA_RD		BIT_ULL(0)
			
 
				+#define GLM_DEMAND_RFO			BIT_ULL(1)
			
 
				+#define GLM_ANY_RESPONSE		BIT_ULL(16)
			
 
				+#define GLM_SNP_NONE_OR_MISS		BIT_ULL(33)
			
 
				+#define GLM_DEMAND_READ			GLM_DEMAND_DATA_RD
			
 
				+#define GLM_DEMAND_WRITE		GLM_DEMAND_RFO
			
 
				+#define GLM_DEMAND_PREFETCH		(SNB_PF_DATA_RD|SNB_PF_RFO)
			
 
				+#define GLM_LLC_ACCESS			GLM_ANY_RESPONSE
			
 
				+#define GLM_SNP_ANY			(GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM)
			
 
				+#define GLM_LLC_MISS			(GLM_SNP_ANY|SNB_NON_DRAM)
			
 
				+
			
 
				+static __initconst const u64 glm_hw_cache_event_ids
			
 
				+				[PERF_COUNT_HW_CACHE_MAX]
			
 
				+				[PERF_COUNT_HW_CACHE_OP_MAX]
			
 
				+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
			
 
				+	[C(L1D)] = {
			
 
				+		[C(OP_READ)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
			
 
				+			[C(RESULT_MISS)]	= 0x0,
			
 
				+		},
			
 
				+		[C(OP_WRITE)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
			
 
				+			[C(RESULT_MISS)]	= 0x0,
			
 
				+		},
			
 
				+		[C(OP_PREFETCH)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x0,
			
 
				+			[C(RESULT_MISS)]	= 0x0,
			
 
				+		},
			
 
				+	},
			
 
				+	[C(L1I)] = {
			
 
				+		[C(OP_READ)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x0380,	/* ICACHE.ACCESSES */
			
 
				+			[C(RESULT_MISS)]	= 0x0280,	/* ICACHE.MISSES */
			
 
				+		},
			
 
				+		[C(OP_WRITE)] = {
			
 
				+			[C(RESULT_ACCESS)]	= -1,
			
 
				+			[C(RESULT_MISS)]	= -1,
			
 
				+		},
			
 
				+		[C(OP_PREFETCH)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x0,
			
 
				+			[C(RESULT_MISS)]	= 0x0,
			
 
				+		},
			
 
				+	},
			
 
				+	[C(LL)] = {
			
 
				+		[C(OP_READ)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
			
 
				+			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
			
 
				+		},
			
 
				+		[C(OP_WRITE)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
			
 
				+			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
			
 
				+		},
			
 
				+		[C(OP_PREFETCH)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
			
 
				+			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
			
 
				+		},
			
 
				+	},
			
 
				+	[C(DTLB)] = {
			
 
				+		[C(OP_READ)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
			
 
				+			[C(RESULT_MISS)]	= 0x0,
			
 
				+		},
			
 
				+		[C(OP_WRITE)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
			
 
				+			[C(RESULT_MISS)]	= 0x0,
			
 
				+		},
			
 
				+		[C(OP_PREFETCH)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x0,
			
 
				+			[C(RESULT_MISS)]	= 0x0,
			
 
				+		},
			
 
				+	},
			
 
				+	[C(ITLB)] = {
			
 
				+		[C(OP_READ)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x00c0,	/* INST_RETIRED.ANY_P */
			
 
				+			[C(RESULT_MISS)]	= 0x0481,	/* ITLB.MISS */
			
 
				+		},
			
 
				+		[C(OP_WRITE)] = {
			
 
				+			[C(RESULT_ACCESS)]	= -1,
			
 
				+			[C(RESULT_MISS)]	= -1,
			
 
				+		},
			
 
				+		[C(OP_PREFETCH)] = {
			
 
				+			[C(RESULT_ACCESS)]	= -1,
			
 
				+			[C(RESULT_MISS)]	= -1,
			
 
				+		},
			
 
				+	},
			
 
				+	[C(BPU)] = {
			
 
				+		[C(OP_READ)] = {
			
 
				+			[C(RESULT_ACCESS)]	= 0x00c4,	/* BR_INST_RETIRED.ALL_BRANCHES */
			
 
				+			[C(RESULT_MISS)]	= 0x00c5,	/* BR_MISP_RETIRED.ALL_BRANCHES */
			
 
				+		},
			
 
				+		[C(OP_WRITE)] = {
			
 
				+			[C(RESULT_ACCESS)]	= -1,
			
 
				+			[C(RESULT_MISS)]	= -1,
			
 
				+		},
			
 
				+		[C(OP_PREFETCH)] = {
			
 
				+			[C(RESULT_ACCESS)]	= -1,
			
 
				+			[C(RESULT_MISS)]	= -1,
			
 
				+		},
			
 
				+	},
			
 
				+};
			
 
				+
			
 
				+static __initconst const u64 glm_hw_cache_extra_regs
			
 
				+				[PERF_COUNT_HW_CACHE_MAX]
			
 
				+				[PERF_COUNT_HW_CACHE_OP_MAX]
			
 
				+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
			
 
				+	[C(LL)] = {
			
 
				+		[C(OP_READ)] = {
			
 
				+			[C(RESULT_ACCESS)]	= GLM_DEMAND_READ|
			
 
				+						  GLM_LLC_ACCESS,
			
 
				+			[C(RESULT_MISS)]	= GLM_DEMAND_READ|
			
 
				+						  GLM_LLC_MISS,
			
 
				+		},
			
 
				+		[C(OP_WRITE)] = {
			
 
				+			[C(RESULT_ACCESS)]	= GLM_DEMAND_WRITE|
			
 
				+						  GLM_LLC_ACCESS,
			
 
				+			[C(RESULT_MISS)]	= GLM_DEMAND_WRITE|
			
 
				+						  GLM_LLC_MISS,
			
 
				+		},
			
 
				+		[C(OP_PREFETCH)] = {
			
 
				+			[C(RESULT_ACCESS)]	= GLM_DEMAND_PREFETCH|
			
 
				+						  GLM_LLC_ACCESS,
			
 
				+			[C(RESULT_MISS)]	= GLM_DEMAND_PREFETCH|
			
 
				+						  GLM_LLC_MISS,
			
 
				+		},
			
 
				+	},
			
 
				+};
			
 
				+
			
 
				 #define KNL_OT_L2_HITE		BIT_ULL(19) /* Other Tile L2 Hit */
			
 
				 #define KNL_OT_L2_HITF		BIT_ULL(20) /* Other Tile L2 Hit */
			
 
				 #define KNL_MCDRAM_LOCAL	BIT_ULL(21)
			
@@ -3447,7 +3581,7 @@ __init int intel_pmu_init(void)
 
				 		memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
			
 
				 		       sizeof(hw_cache_extra_regs));
			
 
				 
			
 
				-		intel_pmu_lbr_init_atom();
			
 
				+		intel_pmu_lbr_init_slm();
			
 
				 
			
 
				 		x86_pmu.event_constraints = intel_slm_event_constraints;
			
 
				 		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
			
@@ -3456,6 +3590,30 @@ __init int intel_pmu_init(void)
 
				 		pr_cont("Silvermont events, ");
			
 
				 		break;
			
 
				 
			
 
				+	case 92: /* 14nm Atom "Goldmont" */
			
 
				+	case 95: /* 14nm Atom "Goldmont Denverton" */
			
 
				+		memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
			
 
				+		       sizeof(hw_cache_event_ids));
			
 
				+		memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
			
 
				+		       sizeof(hw_cache_extra_regs));
			
 
				+
			
 
				+		intel_pmu_lbr_init_skl();
			
 
				+
			
 
				+		x86_pmu.event_constraints = intel_slm_event_constraints;
			
 
				+		x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
			
 
				+		x86_pmu.extra_regs = intel_glm_extra_regs;
			
 
				+		/*
			
 
				+		 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
			
 
				+		 * for precise cycles.
			
 
				+		 * :pp is identical to :ppp
			
 
				+		 */
			
 
				+		x86_pmu.pebs_aliases = NULL;
			
 
				+		x86_pmu.pebs_prec_dist = true;
			
 
				+		x86_pmu.lbr_pt_coexist = true;
			
 
				+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
			
 
				+		pr_cont("Goldmont events, ");
			
 
				+		break;
			
 
				+
			
 
				 	case 37: /* 32nm Westmere    */
			
 
				 	case 44: /* 32nm Westmere-EP */
			
 
				 	case 47: /* 32nm Westmere-EX */
			
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -91,6 +91,8 @@
 
				 #include <asm/cpu_device_id.h>
			
 
				 #include "../perf_event.h"
			
 
				 
			
 
				+MODULE_LICENSE("GPL");
			
 
				+
			
 
				 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)		\
			
 
				 static ssize_t __cstate_##_var##_show(struct kobject *kobj,	\
			
 
				 				struct kobj_attribute *attr,	\
			
@@ -106,22 +108,27 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
 
				 				       struct device_attribute *attr,
			
 
				 				       char *buf);
			
 
				 
			
 
				+/* Model -> events mapping */
			
 
				+struct cstate_model {
			
 
				+	unsigned long		core_events;
			
 
				+	unsigned long		pkg_events;
			
 
				+	unsigned long		quirks;
			
 
				+};
			
 
				+
			
 
				+/* Quirk flags */
			
 
				+#define SLM_PKG_C6_USE_C7_MSR	(1UL << 0)
			
 
				+
			
 
				 struct perf_cstate_msr {
			
 
				 	u64	msr;
			
 
				 	struct	perf_pmu_events_attr *attr;
			
 
				-	bool	(*test)(int idx);
			
 
				 };
			
 
				 
			
 
				 
			
 
				 /* cstate_core PMU */
			
 
				-
			
 
				 static struct pmu cstate_core_pmu;
			
 
				 static bool has_cstate_core;
			
 
				 
			
 
				-enum perf_cstate_core_id {
			
 
				-	/*
			
 
				-	 * cstate_core events
			
 
				-	 */
			
 
				+enum perf_cstate_core_events {
			
 
				 	PERF_CSTATE_CORE_C1_RES = 0,
			
 
				 	PERF_CSTATE_CORE_C3_RES,
			
 
				 	PERF_CSTATE_CORE_C6_RES,
			
@@ -130,69 +137,16 @@ enum perf_cstate_core_id {
 
				 	PERF_CSTATE_CORE_EVENT_MAX,
			
 
				 };
			
 
				 
			
 
				-bool test_core(int idx)
			
 
				-{
			
 
				-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
			
 
				-	    boot_cpu_data.x86 != 6)
			
 
				-		return false;
			
 
				-
			
 
				-	switch (boot_cpu_data.x86_model) {
			
 
				-	case 30: /* 45nm Nehalem    */
			
 
				-	case 26: /* 45nm Nehalem-EP */
			
 
				-	case 46: /* 45nm Nehalem-EX */
			
 
				-
			
 
				-	case 37: /* 32nm Westmere    */
			
 
				-	case 44: /* 32nm Westmere-EP */
			
 
				-	case 47: /* 32nm Westmere-EX */
			
 
				-		if (idx == PERF_CSTATE_CORE_C3_RES ||
			
 
				-		    idx == PERF_CSTATE_CORE_C6_RES)
			
 
				-			return true;
			
 
				-		break;
			
 
				-	case 42: /* 32nm SandyBridge         */
			
 
				-	case 45: /* 32nm SandyBridge-E/EN/EP */
			
 
				-
			
 
				-	case 58: /* 22nm IvyBridge       */
			
 
				-	case 62: /* 22nm IvyBridge-EP/EX */
			
 
				-
			
 
				-	case 60: /* 22nm Haswell Core */
			
 
				-	case 63: /* 22nm Haswell Server */
			
 
				-	case 69: /* 22nm Haswell ULT */
			
 
				-	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
			
 
				-
			
 
				-	case 61: /* 14nm Broadwell Core-M */
			
 
				-	case 86: /* 14nm Broadwell Xeon D */
			
 
				-	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
			
 
				-	case 79: /* 14nm Broadwell Server */
			
 
				-
			
 
				-	case 78: /* 14nm Skylake Mobile */
			
 
				-	case 94: /* 14nm Skylake Desktop */
			
 
				-		if (idx == PERF_CSTATE_CORE_C3_RES ||
			
 
				-		    idx == PERF_CSTATE_CORE_C6_RES ||
			
 
				-		    idx == PERF_CSTATE_CORE_C7_RES)
			
 
				-			return true;
			
 
				-		break;
			
 
				-	case 55: /* 22nm Atom "Silvermont"                */
			
 
				-	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
			
 
				-	case 76: /* 14nm Atom "Airmont"                   */
			
 
				-		if (idx == PERF_CSTATE_CORE_C1_RES ||
			
 
				-		    idx == PERF_CSTATE_CORE_C6_RES)
			
 
				-			return true;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				 PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
			
 
				 PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
			
 
				 PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
			
 
				 PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
			
 
				 
			
 
				 static struct perf_cstate_msr core_msr[] = {
			
 
				-	[PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,		&evattr_cstate_core_c1,	test_core, },
			
 
				-	[PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,	&evattr_cstate_core_c3, test_core, },
			
 
				-	[PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,	&evattr_cstate_core_c6, test_core, },
			
 
				-	[PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,	&evattr_cstate_core_c7,	test_core, },
			
 
				+	[PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,		&evattr_cstate_core_c1 },
			
 
				+	[PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,	&evattr_cstate_core_c3 },
			
 
				+	[PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,	&evattr_cstate_core_c6 },
			
 
				+	[PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,	&evattr_cstate_core_c7 },
			
 
				 };
			
 
				 
			
 
				 static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
			
@@ -234,18 +188,11 @@ static const struct attribute_group *core_attr_groups[] = {
 
				 	NULL,
			
 
				 };
			
 
				 
			
 
				-/* cstate_core PMU end */
			
 
				-
			
 
				-
			
 
				 /* cstate_pkg PMU */
			
 
				-
			
 
				 static struct pmu cstate_pkg_pmu;
			
 
				 static bool has_cstate_pkg;
			
 
				 
			
 
				-enum perf_cstate_pkg_id {
			
 
				-	/*
			
 
				-	 * cstate_pkg events
			
 
				-	 */
			
 
				+enum perf_cstate_pkg_events {
			
 
				 	PERF_CSTATE_PKG_C2_RES = 0,
			
 
				 	PERF_CSTATE_PKG_C3_RES,
			
 
				 	PERF_CSTATE_PKG_C6_RES,
			
@@ -257,69 +204,6 @@ enum perf_cstate_pkg_id {
 
				 	PERF_CSTATE_PKG_EVENT_MAX,
			
 
				 };
			
 
				 
			
 
				-bool test_pkg(int idx)
			
 
				-{
			
 
				-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
			
 
				-	    boot_cpu_data.x86 != 6)
			
 
				-		return false;
			
 
				-
			
 
				-	switch (boot_cpu_data.x86_model) {
			
 
				-	case 30: /* 45nm Nehalem    */
			
 
				-	case 26: /* 45nm Nehalem-EP */
			
 
				-	case 46: /* 45nm Nehalem-EX */
			
 
				-
			
 
				-	case 37: /* 32nm Westmere    */
			
 
				-	case 44: /* 32nm Westmere-EP */
			
 
				-	case 47: /* 32nm Westmere-EX */
			
 
				-		if (idx == PERF_CSTATE_CORE_C3_RES ||
			
 
				-		    idx == PERF_CSTATE_CORE_C6_RES ||
			
 
				-		    idx == PERF_CSTATE_CORE_C7_RES)
			
 
				-			return true;
			
 
				-		break;
			
 
				-	case 42: /* 32nm SandyBridge         */
			
 
				-	case 45: /* 32nm SandyBridge-E/EN/EP */
			
 
				-
			
 
				-	case 58: /* 22nm IvyBridge       */
			
 
				-	case 62: /* 22nm IvyBridge-EP/EX */
			
 
				-
			
 
				-	case 60: /* 22nm Haswell Core */
			
 
				-	case 63: /* 22nm Haswell Server */
			
 
				-	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
			
 
				-
			
 
				-	case 61: /* 14nm Broadwell Core-M */
			
 
				-	case 86: /* 14nm Broadwell Xeon D */
			
 
				-	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
			
 
				-	case 79: /* 14nm Broadwell Server */
			
 
				-
			
 
				-	case 78: /* 14nm Skylake Mobile */
			
 
				-	case 94: /* 14nm Skylake Desktop */
			
 
				-		if (idx == PERF_CSTATE_PKG_C2_RES ||
			
 
				-		    idx == PERF_CSTATE_PKG_C3_RES ||
			
 
				-		    idx == PERF_CSTATE_PKG_C6_RES ||
			
 
				-		    idx == PERF_CSTATE_PKG_C7_RES)
			
 
				-			return true;
			
 
				-		break;
			
 
				-	case 55: /* 22nm Atom "Silvermont"                */
			
 
				-	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
			
 
				-	case 76: /* 14nm Atom "Airmont"                   */
			
 
				-		if (idx == PERF_CSTATE_CORE_C6_RES)
			
 
				-			return true;
			
 
				-		break;
			
 
				-	case 69: /* 22nm Haswell ULT */
			
 
				-		if (idx == PERF_CSTATE_PKG_C2_RES ||
			
 
				-		    idx == PERF_CSTATE_PKG_C3_RES ||
			
 
				-		    idx == PERF_CSTATE_PKG_C6_RES ||
			
 
				-		    idx == PERF_CSTATE_PKG_C7_RES ||
			
 
				-		    idx == PERF_CSTATE_PKG_C8_RES ||
			
 
				-		    idx == PERF_CSTATE_PKG_C9_RES ||
			
 
				-		    idx == PERF_CSTATE_PKG_C10_RES)
			
 
				-			return true;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				 PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
			
 
				 PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
			
 
				 PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
			
@@ -329,13 +213,13 @@ PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
 
				 PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
			
 
				 
			
 
				 static struct perf_cstate_msr pkg_msr[] = {
			
 
				-	[PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,	&evattr_cstate_pkg_c2,	test_pkg, },
			
 
				-	[PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,	&evattr_cstate_pkg_c3,	test_pkg, },
			
 
				-	[PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,	&evattr_cstate_pkg_c6,	test_pkg, },
			
 
				-	[PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,	&evattr_cstate_pkg_c7,	test_pkg, },
			
 
				-	[PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,	&evattr_cstate_pkg_c8,	test_pkg, },
			
 
				-	[PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,	&evattr_cstate_pkg_c9,	test_pkg, },
			
 
				-	[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,	&evattr_cstate_pkg_c10,	test_pkg, },
			
 
				+	[PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,	&evattr_cstate_pkg_c2 },
			
 
				+	[PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,	&evattr_cstate_pkg_c3 },
			
 
				+	[PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,	&evattr_cstate_pkg_c6 },
			
 
				+	[PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,	&evattr_cstate_pkg_c7 },
			
 
				+	[PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,	&evattr_cstate_pkg_c8 },
			
 
				+	[PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,	&evattr_cstate_pkg_c9 },
			
 
				+	[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,	&evattr_cstate_pkg_c10 },
			
 
				 };
			
 
				 
			
 
				 static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
			
@@ -366,8 +250,6 @@ static const struct attribute_group *pkg_attr_groups[] = {
 
				 	NULL,
			
 
				 };
			
 
				 
			
 
				-/* cstate_pkg PMU end*/
			
 
				-
			
 
				 static ssize_t cstate_get_attr_cpumask(struct device *dev,
			
 
				 				       struct device_attribute *attr,
			
 
				 				       char *buf)
			
@@ -385,7 +267,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
 
				 static int cstate_pmu_event_init(struct perf_event *event)
			
 
				 {
			
 
				 	u64 cfg = event->attr.config;
			
 
				-	int ret = 0;
			
 
				+	int cpu;
			
 
				 
			
 
				 	if (event->attr.type != event->pmu->type)
			
 
				 		return -ENOENT;
			
@@ -400,26 +282,36 @@ static int cstate_pmu_event_init(struct perf_event *event)
 
				 	    event->attr.sample_period) /* no sampling */
			
 
				 		return -EINVAL;
			
 
				 
			
 
				+	if (event->cpu < 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				 	if (event->pmu == &cstate_core_pmu) {
			
 
				 		if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
			
 
				 			return -EINVAL;
			
 
				 		if (!core_msr[cfg].attr)
			
 
				 			return -EINVAL;
			
 
				 		event->hw.event_base = core_msr[cfg].msr;
			
 
				+		cpu = cpumask_any_and(&cstate_core_cpu_mask,
			
 
				+				      topology_sibling_cpumask(event->cpu));
			
 
				 	} else if (event->pmu == &cstate_pkg_pmu) {
			
 
				 		if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
			
 
				 			return -EINVAL;
			
 
				 		if (!pkg_msr[cfg].attr)
			
 
				 			return -EINVAL;
			
 
				 		event->hw.event_base = pkg_msr[cfg].msr;
			
 
				-	} else
			
 
				+		cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
			
 
				+				      topology_core_cpumask(event->cpu));
			
 
				+	} else {
			
 
				 		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	if (cpu >= nr_cpu_ids)
			
 
				+		return -ENODEV;
			
 
				 
			
 
				-	/* must be done before validate_group */
			
 
				+	event->cpu = cpu;
			
 
				 	event->hw.config = cfg;
			
 
				 	event->hw.idx = -1;
			
 
				-
			
 
				-	return ret;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static inline u64 cstate_pmu_read_counter(struct perf_event *event)
			
@@ -469,172 +361,91 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Check if exiting cpu is the designated reader. If so migrate the
			
 
				+ * events when there is a valid target available
			
 
				+ */
			
 
				 static void cstate_cpu_exit(int cpu)
			
 
				 {
			
 
				-	int i, id, target;
			
 
				+	unsigned int target;
			
 
				 
			
 
				-	/* cpu exit for cstate core */
			
 
				-	if (has_cstate_core) {
			
 
				-		id = topology_core_id(cpu);
			
 
				-		target = -1;
			
 
				-
			
 
				-		for_each_online_cpu(i) {
			
 
				-			if (i == cpu)
			
 
				-				continue;
			
 
				-			if (id == topology_core_id(i)) {
			
 
				-				target = i;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
			
 
				+	if (has_cstate_core &&
			
 
				+	    cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
			
 
				+
			
 
				+		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
			
 
				+		/* Migrate events if there is a valid target */
			
 
				+		if (target < nr_cpu_ids) {
			
 
				 			cpumask_set_cpu(target, &cstate_core_cpu_mask);
			
 
				-		WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
			
 
				-		if (target >= 0)
			
 
				 			perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				-	/* cpu exit for cstate pkg */
			
 
				-	if (has_cstate_pkg) {
			
 
				-		id = topology_physical_package_id(cpu);
			
 
				-		target = -1;
			
 
				-
			
 
				-		for_each_online_cpu(i) {
			
 
				-			if (i == cpu)
			
 
				-				continue;
			
 
				-			if (id == topology_physical_package_id(i)) {
			
 
				-				target = i;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
			
 
				+	if (has_cstate_pkg &&
			
 
				+	    cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
			
 
				+
			
 
				+		target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
			
 
				+		/* Migrate events if there is a valid target */
			
 
				+		if (target < nr_cpu_ids) {
			
 
				 			cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
			
 
				-		WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
			
 
				-		if (target >= 0)
			
 
				 			perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
			
 
				+		}
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 static void cstate_cpu_init(int cpu)
			
 
				 {
			
 
				-	int i, id;
			
 
				+	unsigned int target;
			
 
				 
			
 
				-	/* cpu init for cstate core */
			
 
				-	if (has_cstate_core) {
			
 
				-		id = topology_core_id(cpu);
			
 
				-		for_each_cpu(i, &cstate_core_cpu_mask) {
			
 
				-			if (id == topology_core_id(i))
			
 
				-				break;
			
 
				-		}
			
 
				-		if (i >= nr_cpu_ids)
			
 
				-			cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
			
 
				-	}
			
 
				+	/*
			
 
				+	 * If this is the first online thread of that core, set it in
			
 
				+	 * the core cpu mask as the designated reader.
			
 
				+	 */
			
 
				+	target = cpumask_any_and(&cstate_core_cpu_mask,
			
 
				+				 topology_sibling_cpumask(cpu));
			
 
				 
			
 
				-	/* cpu init for cstate pkg */
			
 
				-	if (has_cstate_pkg) {
			
 
				-		id = topology_physical_package_id(cpu);
			
 
				-		for_each_cpu(i, &cstate_pkg_cpu_mask) {
			
 
				-			if (id == topology_physical_package_id(i))
			
 
				-				break;
			
 
				-		}
			
 
				-		if (i >= nr_cpu_ids)
			
 
				-			cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
			
 
				-	}
			
 
				+	if (has_cstate_core && target >= nr_cpu_ids)
			
 
				+		cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
			
 
				+
			
 
				+	/*
			
 
				+	 * If this is the first online thread of that package, set it
			
 
				+	 * in the package cpu mask as the designated reader.
			
 
				+	 */
			
 
				+	target = cpumask_any_and(&cstate_pkg_cpu_mask,
			
 
				+				 topology_core_cpumask(cpu));
			
 
				+	if (has_cstate_pkg && target >= nr_cpu_ids)
			
 
				+		cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
			
 
				 }
			
 
				 
			
 
				 static int cstate_cpu_notifier(struct notifier_block *self,
			
 
				-				  unsigned long action, void *hcpu)
			
 
				+			       unsigned long action, void *hcpu)
			
 
				 {
			
 
				 	unsigned int cpu = (long)hcpu;
			
 
				 
			
 
				 	switch (action & ~CPU_TASKS_FROZEN) {
			
 
				-	case CPU_UP_PREPARE:
			
 
				-		break;
			
 
				 	case CPU_STARTING:
			
 
				 		cstate_cpu_init(cpu);
			
 
				 		break;
			
 
				-	case CPU_UP_CANCELED:
			
 
				-	case CPU_DYING:
			
 
				-		break;
			
 
				-	case CPU_ONLINE:
			
 
				-	case CPU_DEAD:
			
 
				-		break;
			
 
				 	case CPU_DOWN_PREPARE:
			
 
				 		cstate_cpu_exit(cpu);
			
 
				 		break;
			
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
 
				-
			
 
				 	return NOTIFY_OK;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Probe the cstate events and insert the available one into sysfs attrs
			
 
				- * Return false if there is no available events.
			
 
				- */
			
 
				-static bool cstate_probe_msr(struct perf_cstate_msr *msr,
			
 
				-			     struct attribute	**events_attrs,
			
 
				-			     int max_event_nr)
			
 
				-{
			
 
				-	int i, j = 0;
			
 
				-	u64 val;
			
 
				-
			
 
				-	/* Probe the cstate events. */
			
 
				-	for (i = 0; i < max_event_nr; i++) {
			
 
				-		if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
			
 
				-			msr[i].attr = NULL;
			
 
				-	}
			
 
				-
			
 
				-	/* List remaining events in the sysfs attrs. */
			
 
				-	for (i = 0; i < max_event_nr; i++) {
			
 
				-		if (msr[i].attr)
			
 
				-			events_attrs[j++] = &msr[i].attr->attr.attr;
			
 
				-	}
			
 
				-	events_attrs[j] = NULL;
			
 
				-
			
 
				-	return (j > 0) ? true : false;
			
 
				-}
			
 
				-
			
 
				-static int __init cstate_init(void)
			
 
				-{
			
 
				-	/* SLM has different MSR for PKG C6 */
			
 
				-	switch (boot_cpu_data.x86_model) {
			
 
				-	case 55:
			
 
				-	case 76:
			
 
				-	case 77:
			
 
				-		pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
			
 
				-	}
			
 
				-
			
 
				-	if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
			
 
				-		has_cstate_core = true;
			
 
				-
			
 
				-	if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
			
 
				-		has_cstate_pkg = true;
			
 
				-
			
 
				-	return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
			
 
				-}
			
 
				-
			
 
				-static void __init cstate_cpumask_init(void)
			
 
				-{
			
 
				-	int cpu;
			
 
				-
			
 
				-	cpu_notifier_register_begin();
			
 
				-
			
 
				-	for_each_online_cpu(cpu)
			
 
				-		cstate_cpu_init(cpu);
			
 
				-
			
 
				-	__perf_cpu_notifier(cstate_cpu_notifier);
			
 
				-
			
 
				-	cpu_notifier_register_done();
			
 
				-}
			
 
				+static struct notifier_block cstate_cpu_nb = {
			
 
				+	.notifier_call	= cstate_cpu_notifier,
			
 
				+	.priority       = CPU_PRI_PERF + 1,
			
 
				+};
			
 
				 
			
 
				 static struct pmu cstate_core_pmu = {
			
 
				 	.attr_groups	= core_attr_groups,
			
 
				 	.name		= "cstate_core",
			
 
				 	.task_ctx_nr	= perf_invalid_context,
			
 
				 	.event_init	= cstate_pmu_event_init,
			
 
				-	.add		= cstate_pmu_event_add, /* must have */
			
 
				-	.del		= cstate_pmu_event_del, /* must have */
			
 
				+	.add		= cstate_pmu_event_add,
			
 
				+	.del		= cstate_pmu_event_del,
			
 
				 	.start		= cstate_pmu_event_start,
			
 
				 	.stop		= cstate_pmu_event_stop,
			
 
				 	.read		= cstate_pmu_event_update,
			
@@ -646,49 +457,203 @@ static struct pmu cstate_pkg_pmu = {
 
				 	.name		= "cstate_pkg",
			
 
				 	.task_ctx_nr	= perf_invalid_context,
			
 
				 	.event_init	= cstate_pmu_event_init,
			
 
				-	.add		= cstate_pmu_event_add, /* must have */
			
 
				-	.del		= cstate_pmu_event_del, /* must have */
			
 
				+	.add		= cstate_pmu_event_add,
			
 
				+	.del		= cstate_pmu_event_del,
			
 
				 	.start		= cstate_pmu_event_start,
			
 
				 	.stop		= cstate_pmu_event_stop,
			
 
				 	.read		= cstate_pmu_event_update,
			
 
				 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT,
			
 
				 };
			
 
				 
			
 
				-static void __init cstate_pmus_register(void)
			
 
				+static const struct cstate_model nhm_cstates __initconst = {
			
 
				+	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
			
 
				+				  BIT(PERF_CSTATE_CORE_C6_RES),
			
 
				+
			
 
				+	.pkg_events		= BIT(PERF_CSTATE_PKG_C3_RES) |
			
 
				+				  BIT(PERF_CSTATE_PKG_C6_RES) |
			
 
				+				  BIT(PERF_CSTATE_PKG_C7_RES),
			
 
				+};
			
 
				+
			
 
				+static const struct cstate_model snb_cstates __initconst = {
			
 
				+	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
			
 
				+				  BIT(PERF_CSTATE_CORE_C6_RES) |
			
 
				+				  BIT(PERF_CSTATE_CORE_C7_RES),
			
 
				+
			
 
				+	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
			
 
				+				  BIT(PERF_CSTATE_PKG_C3_RES) |
			
 
				+				  BIT(PERF_CSTATE_PKG_C6_RES) |
			
 
				+				  BIT(PERF_CSTATE_PKG_C7_RES),
			
 
				+};
			
 
				+
			
 
				+static const struct cstate_model hswult_cstates __initconst = {
			
 
				+	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
			
 
				+				  BIT(PERF_CSTATE_CORE_C6_RES) |
			
 
				+				  BIT(PERF_CSTATE_CORE_C7_RES),
			
 
				+
			
 
				+	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
			
 
				+				  BIT(PERF_CSTATE_PKG_C3_RES) |
			
 
				+				  BIT(PERF_CSTATE_PKG_C6_RES) |
			
 
				+				  BIT(PERF_CSTATE_PKG_C7_RES) |
			
 
				+				  BIT(PERF_CSTATE_PKG_C8_RES) |
			
 
				+				  BIT(PERF_CSTATE_PKG_C9_RES) |
			
 
				+				  BIT(PERF_CSTATE_PKG_C10_RES),
			
 
				+};
			
 
				+
			
 
				+static const struct cstate_model slm_cstates __initconst = {
			
 
				+	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
			
 
				+				  BIT(PERF_CSTATE_CORE_C6_RES),
			
 
				+
			
 
				+	.pkg_events		= BIT(PERF_CSTATE_PKG_C6_RES),
			
 
				+	.quirks			= SLM_PKG_C6_USE_C7_MSR,
			
 
				+};
			
 
				+
			
 
				+#define X86_CSTATES_MODEL(model, states)				\
			
 
				+	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
			
 
				+
			
 
				+static const struct x86_cpu_id intel_cstates_match[] __initconst = {
			
 
				+	X86_CSTATES_MODEL(30, nhm_cstates),    /* 45nm Nehalem              */
			
 
				+	X86_CSTATES_MODEL(26, nhm_cstates),    /* 45nm Nehalem-EP           */
			
 
				+	X86_CSTATES_MODEL(46, nhm_cstates),    /* 45nm Nehalem-EX           */
			
 
				+
			
 
				+	X86_CSTATES_MODEL(37, nhm_cstates),    /* 32nm Westmere             */
			
 
				+	X86_CSTATES_MODEL(44, nhm_cstates),    /* 32nm Westmere-EP          */
			
 
				+	X86_CSTATES_MODEL(47, nhm_cstates),    /* 32nm Westmere-EX          */
			
 
				+
			
 
				+	X86_CSTATES_MODEL(42, snb_cstates),    /* 32nm SandyBridge          */
			
 
				+	X86_CSTATES_MODEL(45, snb_cstates),    /* 32nm SandyBridge-E/EN/EP  */
			
 
				+
			
 
				+	X86_CSTATES_MODEL(58, snb_cstates),    /* 22nm IvyBridge            */
			
 
				+	X86_CSTATES_MODEL(62, snb_cstates),    /* 22nm IvyBridge-EP/EX      */
			
 
				+
			
 
				+	X86_CSTATES_MODEL(60, snb_cstates),    /* 22nm Haswell Core         */
			
 
				+	X86_CSTATES_MODEL(63, snb_cstates),    /* 22nm Haswell Server       */
			
 
				+	X86_CSTATES_MODEL(70, snb_cstates),    /* 22nm Haswell + GT3e       */
			
 
				+
			
 
				+	X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT          */
			
 
				+
			
 
				+	X86_CSTATES_MODEL(55, slm_cstates),    /* 22nm Atom Silvermont      */
			
 
				+	X86_CSTATES_MODEL(77, slm_cstates),    /* 22nm Atom Avoton/Rangely  */
			
 
				+	X86_CSTATES_MODEL(76, slm_cstates),    /* 22nm Atom Airmont         */
			
 
				+
			
 
				+	X86_CSTATES_MODEL(61, snb_cstates),    /* 14nm Broadwell Core-M     */
			
 
				+	X86_CSTATES_MODEL(86, snb_cstates),    /* 14nm Broadwell Xeon D     */
			
 
				+	X86_CSTATES_MODEL(71, snb_cstates),    /* 14nm Broadwell + GT3e     */
			
 
				+	X86_CSTATES_MODEL(79, snb_cstates),    /* 14nm Broadwell Server     */
			
 
				+
			
 
				+	X86_CSTATES_MODEL(78, snb_cstates),    /* 14nm Skylake Mobile       */
			
 
				+	X86_CSTATES_MODEL(94, snb_cstates),    /* 14nm Skylake Desktop      */
			
 
				+	{ },
			
 
				+};
			
 
				+MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
			
 
				+
			
 
				+/*
			
 
				+ * Probe the cstate events and insert the available one into sysfs attrs
			
 
				+ * Return false if there are no available events.
			
 
				+ */
			
 
				+static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
			
 
				+                                   struct perf_cstate_msr *msr,
			
 
				+                                   struct attribute **attrs)
			
 
				 {
			
 
				-	int err;
			
 
				+	bool found = false;
			
 
				+	unsigned int bit;
			
 
				+	u64 val;
			
 
				+
			
 
				+	for (bit = 0; bit < max; bit++) {
			
 
				+		if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
			
 
				+			*attrs++ = &msr[bit].attr->attr.attr;
			
 
				+			found = true;
			
 
				+		} else {
			
 
				+			msr[bit].attr = NULL;
			
 
				+		}
			
 
				+	}
			
 
				+	*attrs = NULL;
			
 
				+
			
 
				+	return found;
			
 
				+}
			
 
				+
			
 
				+static int __init cstate_probe(const struct cstate_model *cm)
			
 
				+{
			
 
				+	/* SLM has different MSR for PKG C6 */
			
 
				+	if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
			
 
				+		pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
			
 
				+
			
 
				+	has_cstate_core = cstate_probe_msr(cm->core_events,
			
 
				+					   PERF_CSTATE_CORE_EVENT_MAX,
			
 
				+					   core_msr, core_events_attrs);
			
 
				+
			
 
				+	has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
			
 
				+					  PERF_CSTATE_PKG_EVENT_MAX,
			
 
				+					  pkg_msr, pkg_events_attrs);
			
 
				+
			
 
				+	return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
			
 
				+}
			
 
				+
			
 
				+static inline void cstate_cleanup(void)
			
 
				+{
			
 
				+	if (has_cstate_core)
			
 
				+		perf_pmu_unregister(&cstate_core_pmu);
			
 
				+
			
 
				+	if (has_cstate_pkg)
			
 
				+		perf_pmu_unregister(&cstate_pkg_pmu);
			
 
				+}
			
 
				+
			
 
				+static int __init cstate_init(void)
			
 
				+{
			
 
				+	int cpu, err;
			
 
				+
			
 
				+	cpu_notifier_register_begin();
			
 
				+	for_each_online_cpu(cpu)
			
 
				+		cstate_cpu_init(cpu);
			
 
				 
			
 
				 	if (has_cstate_core) {
			
 
				 		err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
			
 
				-		if (WARN_ON(err))
			
 
				-			pr_info("Failed to register PMU %s error %d\n",
			
 
				-				cstate_core_pmu.name, err);
			
 
				+		if (err) {
			
 
				+			has_cstate_core = false;
			
 
				+			pr_info("Failed to register cstate core pmu\n");
			
 
				+			goto out;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	if (has_cstate_pkg) {
			
 
				 		err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
			
 
				-		if (WARN_ON(err))
			
 
				-			pr_info("Failed to register PMU %s error %d\n",
			
 
				-				cstate_pkg_pmu.name, err);
			
 
				+		if (err) {
			
 
				+			has_cstate_pkg = false;
			
 
				+			pr_info("Failed to register cstate pkg pmu\n");
			
 
				+			cstate_cleanup();
			
 
				+			goto out;
			
 
				+		}
			
 
				 	}
			
 
				+	__register_cpu_notifier(&cstate_cpu_nb);
			
 
				+out:
			
 
				+	cpu_notifier_register_done();
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				 static int __init cstate_pmu_init(void)
			
 
				 {
			
 
				+	const struct x86_cpu_id *id;
			
 
				 	int err;
			
 
				 
			
 
				-	if (cpu_has_hypervisor)
			
 
				+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	id = x86_match_cpu(intel_cstates_match);
			
 
				+	if (!id)
			
 
				 		return -ENODEV;
			
 
				 
			
 
				-	err = cstate_init();
			
 
				+	err = cstate_probe((const struct cstate_model *) id->driver_data);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
 
				-	cstate_cpumask_init();
			
 
				-
			
 
				-	cstate_pmus_register();
			
 
				-
			
 
				-	return 0;
			
 
				+	return cstate_init();
			
 
				 }
			
 
				+module_init(cstate_pmu_init);
			
 
				 
			
 
				-device_initcall(cstate_pmu_init);
			
 
				+static void __exit cstate_pmu_exit(void)
			
 
				+{
			
 
				+	cpu_notifier_register_begin();
			
 
				+	__unregister_cpu_notifier(&cstate_cpu_nb);
			
 
				+	cstate_cleanup();
			
 
				+	cpu_notifier_register_done();
			
 
				+}
			
 
				+module_exit(cstate_pmu_exit);
			
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -645,6 +645,12 @@ struct event_constraint intel_slm_pebs_event_constraints[] = {
 
				 	EVENT_CONSTRAINT_END
			
 
				 };
			
 
				 
			
 
				+struct event_constraint intel_glm_pebs_event_constraints[] = {
			
 
				+	/* Allow all events as PEBS with no flags */
			
 
				+	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
			
 
				+	EVENT_CONSTRAINT_END
			
 
				+};
			
 
				+
			
 
				 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
			
 
				 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
			
 
				 	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
			
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -14,7 +14,8 @@ enum {
 
				 	LBR_FORMAT_EIP_FLAGS	= 0x03,
			
 
				 	LBR_FORMAT_EIP_FLAGS2	= 0x04,
			
 
				 	LBR_FORMAT_INFO		= 0x05,
			
 
				-	LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_INFO,
			
 
				+	LBR_FORMAT_TIME		= 0x06,
			
 
				+	LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_TIME,
			
 
				 };
			
 
				 
			
 
				 static enum {
			
@@ -464,6 +465,16 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 
				 			abort = !!(info & LBR_INFO_ABORT);
			
 
				 			cycles = (info & LBR_INFO_CYCLES);
			
 
				 		}
			
 
				+
			
 
				+		if (lbr_format == LBR_FORMAT_TIME) {
			
 
				+			mis = !!(from & LBR_FROM_FLAG_MISPRED);
			
 
				+			pred = !mis;
			
 
				+			skip = 1;
			
 
				+			cycles = ((to >> 48) & LBR_INFO_CYCLES);
			
 
				+
			
 
				+			to = (u64)((((s64)to) << 16) >> 16);
			
 
				+		}
			
 
				+
			
 
				 		if (lbr_flags & LBR_EIP_FLAGS) {
			
 
				 			mis = !!(from & LBR_FROM_FLAG_MISPRED);
			
 
				 			pred = !mis;
			
@@ -1049,6 +1060,24 @@ void __init intel_pmu_lbr_init_atom(void)
 
				 	pr_cont("8-deep LBR, ");
			
 
				 }
			
 
				 
			
 
				+/* slm */
			
 
				+void __init intel_pmu_lbr_init_slm(void)
			
 
				+{
			
 
				+	x86_pmu.lbr_nr	   = 8;
			
 
				+	x86_pmu.lbr_tos    = MSR_LBR_TOS;
			
 
				+	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
			
 
				+	x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
			
 
				+
			
 
				+	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
			
 
				+	x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
			
 
				+
			
 
				+	/*
			
 
				+	 * SW branch filter usage:
			
 
				+	 * - compensate for lack of HW filter
			
 
				+	 */
			
 
				+	pr_cont("8-deep LBR, ");
			
 
				+}
			
 
				+
			
 
				 /* Knights Landing */
			
 
				 void intel_pmu_lbr_init_knl(void)
			
 
				 {
			
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -67,11 +67,13 @@ static struct pt_cap_desc {
 
				 	PT_CAP(max_subleaf,		0, CR_EAX, 0xffffffff),
			
 
				 	PT_CAP(cr3_filtering,		0, CR_EBX, BIT(0)),
			
 
				 	PT_CAP(psb_cyc,			0, CR_EBX, BIT(1)),
			
 
				+	PT_CAP(ip_filtering,		0, CR_EBX, BIT(2)),
			
 
				 	PT_CAP(mtc,			0, CR_EBX, BIT(3)),
			
 
				 	PT_CAP(topa_output,		0, CR_ECX, BIT(0)),
			
 
				 	PT_CAP(topa_multiple_entries,	0, CR_ECX, BIT(1)),
			
 
				 	PT_CAP(single_range_output,	0, CR_ECX, BIT(2)),
			
 
				 	PT_CAP(payloads_lip,		0, CR_ECX, BIT(31)),
			
 
				+	PT_CAP(num_address_ranges,	1, CR_EAX, 0x3),
			
 
				 	PT_CAP(mtc_periods,		1, CR_EAX, 0xffff0000),
			
 
				 	PT_CAP(cycle_thresholds,	1, CR_EBX, 0xffff),
			
 
				 	PT_CAP(psb_periods,		1, CR_EBX, 0xffff0000),
			
@@ -125,9 +127,46 @@ static struct attribute_group pt_format_group = {
 
				 	.attrs	= pt_formats_attr,
			
 
				 };
			
 
				 
			
 
				+static ssize_t
			
 
				+pt_timing_attr_show(struct device *dev, struct device_attribute *attr,
			
 
				+		    char *page)
			
 
				+{
			
 
				+	struct perf_pmu_events_attr *pmu_attr =
			
 
				+		container_of(attr, struct perf_pmu_events_attr, attr);
			
 
				+
			
 
				+	switch (pmu_attr->id) {
			
 
				+	case 0:
			
 
				+		return sprintf(page, "%lu\n", pt_pmu.max_nonturbo_ratio);
			
 
				+	case 1:
			
 
				+		return sprintf(page, "%u:%u\n",
			
 
				+			       pt_pmu.tsc_art_num,
			
 
				+			       pt_pmu.tsc_art_den);
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	return -EINVAL;
			
 
				+}
			
 
				+
			
 
				+PMU_EVENT_ATTR(max_nonturbo_ratio, timing_attr_max_nonturbo_ratio, 0,
			
 
				+	       pt_timing_attr_show);
			
 
				+PMU_EVENT_ATTR(tsc_art_ratio, timing_attr_tsc_art_ratio, 1,
			
 
				+	       pt_timing_attr_show);
			
 
				+
			
 
				+static struct attribute *pt_timing_attr[] = {
			
 
				+	&timing_attr_max_nonturbo_ratio.attr.attr,
			
 
				+	&timing_attr_tsc_art_ratio.attr.attr,
			
 
				+	NULL,
			
 
				+};
			
 
				+
			
 
				+static struct attribute_group pt_timing_group = {
			
 
				+	.attrs	= pt_timing_attr,
			
 
				+};
			
 
				+
			
 
				 static const struct attribute_group *pt_attr_groups[] = {
			
 
				 	&pt_cap_group,
			
 
				 	&pt_format_group,
			
 
				+	&pt_timing_group,
			
 
				 	NULL,
			
 
				 };
			
 
				 
			
@@ -140,6 +179,23 @@ static int __init pt_pmu_hw_init(void)
 
				 	int ret;
			
 
				 	long i;
			
 
				 
			
 
				+	rdmsrl(MSR_PLATFORM_INFO, reg);
			
 
				+	pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8;
			
 
				+
			
 
				+	/*
			
 
				+	 * if available, read in TSC to core crystal clock ratio,
			
 
				+	 * otherwise, zero for numerator stands for "not enumerated"
			
 
				+	 * as per SDM
			
 
				+	 */
			
 
				+	if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) {
			
 
				+		u32 eax, ebx, ecx, edx;
			
 
				+
			
 
				+		cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx);
			
 
				+
			
 
				+		pt_pmu.tsc_art_num = ebx;
			
 
				+		pt_pmu.tsc_art_den = eax;
			
 
				+	}
			
 
				+
			
 
				 	if (boot_cpu_has(X86_FEATURE_VMX)) {
			
 
				 		/*
			
 
				 		 * Intel SDM, 36.5 "Tracing post-VMXON" says that
			
@@ -263,6 +319,75 @@ static bool pt_event_valid(struct perf_event *event)
 
				  * These all are cpu affine and operate on a local PT
			
 
				  */
			
 
				 
			
 
				+/* Address ranges and their corresponding msr configuration registers */
			
 
				+static const struct pt_address_range {
			
 
				+	unsigned long	msr_a;
			
 
				+	unsigned long	msr_b;
			
 
				+	unsigned int	reg_off;
			
 
				+} pt_address_ranges[] = {
			
 
				+	{
			
 
				+		.msr_a	 = MSR_IA32_RTIT_ADDR0_A,
			
 
				+		.msr_b	 = MSR_IA32_RTIT_ADDR0_B,
			
 
				+		.reg_off = RTIT_CTL_ADDR0_OFFSET,
			
 
				+	},
			
 
				+	{
			
 
				+		.msr_a	 = MSR_IA32_RTIT_ADDR1_A,
			
 
				+		.msr_b	 = MSR_IA32_RTIT_ADDR1_B,
			
 
				+		.reg_off = RTIT_CTL_ADDR1_OFFSET,
			
 
				+	},
			
 
				+	{
			
 
				+		.msr_a	 = MSR_IA32_RTIT_ADDR2_A,
			
 
				+		.msr_b	 = MSR_IA32_RTIT_ADDR2_B,
			
 
				+		.reg_off = RTIT_CTL_ADDR2_OFFSET,
			
 
				+	},
			
 
				+	{
			
 
				+		.msr_a	 = MSR_IA32_RTIT_ADDR3_A,
			
 
				+		.msr_b	 = MSR_IA32_RTIT_ADDR3_B,
			
 
				+		.reg_off = RTIT_CTL_ADDR3_OFFSET,
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+static u64 pt_config_filters(struct perf_event *event)
			
 
				+{
			
 
				+	struct pt_filters *filters = event->hw.addr_filters;
			
 
				+	struct pt *pt = this_cpu_ptr(&pt_ctx);
			
 
				+	unsigned int range = 0;
			
 
				+	u64 rtit_ctl = 0;
			
 
				+
			
 
				+	if (!filters)
			
 
				+		return 0;
			
 
				+
			
 
				+	perf_event_addr_filters_sync(event);
			
 
				+
			
 
				+	for (range = 0; range < filters->nr_filters; range++) {
			
 
				+		struct pt_filter *filter = &filters->filter[range];
			
 
				+
			
 
				+		/*
			
 
				+		 * Note, if the range has zero start/end addresses due
			
 
				+		 * to its dynamic object not being loaded yet, we just
			
 
				+		 * go ahead and program zeroed range, which will simply
			
 
				+		 * produce no data. Note^2: if executable code at 0x0
			
 
				+		 * is a concern, we can set up an "invalid" configuration
			
 
				+		 * such as msr_b < msr_a.
			
 
				+		 */
			
 
				+
			
 
				+		/* avoid redundant msr writes */
			
 
				+		if (pt->filters.filter[range].msr_a != filter->msr_a) {
			
 
				+			wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
			
 
				+			pt->filters.filter[range].msr_a = filter->msr_a;
			
 
				+		}
			
 
				+
			
 
				+		if (pt->filters.filter[range].msr_b != filter->msr_b) {
			
 
				+			wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
			
 
				+			pt->filters.filter[range].msr_b = filter->msr_b;
			
 
				+		}
			
 
				+
			
 
				+		rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
			
 
				+	}
			
 
				+
			
 
				+	return rtit_ctl;
			
 
				+}
			
 
				+
			
 
				 static void pt_config(struct perf_event *event)
			
 
				 {
			
 
				 	u64 reg;
			
@@ -272,7 +397,8 @@ static void pt_config(struct perf_event *event)
 
				 		wrmsrl(MSR_IA32_RTIT_STATUS, 0);
			
 
				 	}
			
 
				 
			
 
				-	reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
			
 
				+	reg = pt_config_filters(event);
			
 
				+	reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
			
 
				 
			
 
				 	if (!event->attr.exclude_kernel)
			
 
				 		reg |= RTIT_CTL_OS;
			
@@ -921,24 +1047,80 @@ static void pt_buffer_free_aux(void *data)
 
				 	kfree(buf);
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- * pt_buffer_is_full() - check if the buffer is full
			
 
				- * @buf:	PT buffer.
			
 
				- * @pt:		Per-cpu pt handle.
			
 
				- *
			
 
				- * If the user hasn't read data from the output region that aux_head
			
 
				- * points to, the buffer is considered full: the user needs to read at
			
 
				- * least this region and update aux_tail to point past it.
			
 
				- */
			
 
				-static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
			
 
				+static int pt_addr_filters_init(struct perf_event *event)
			
 
				 {
			
 
				-	if (buf->snapshot)
			
 
				-		return false;
			
 
				+	struct pt_filters *filters;
			
 
				+	int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
			
 
				+
			
 
				+	if (!pt_cap_get(PT_CAP_num_address_ranges))
			
 
				+		return 0;
			
 
				+
			
 
				+	filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
			
 
				+	if (!filters)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (event->parent)
			
 
				+		memcpy(filters, event->parent->hw.addr_filters,
			
 
				+		       sizeof(*filters));
			
 
				+
			
 
				+	event->hw.addr_filters = filters;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void pt_addr_filters_fini(struct perf_event *event)
			
 
				+{
			
 
				+	kfree(event->hw.addr_filters);
			
 
				+	event->hw.addr_filters = NULL;
			
 
				+}
			
 
				+
			
 
				+static int pt_event_addr_filters_validate(struct list_head *filters)
			
 
				+{
			
 
				+	struct perf_addr_filter *filter;
			
 
				+	int range = 0;
			
 
				+
			
 
				+	list_for_each_entry(filter, filters, entry) {
			
 
				+		/* PT doesn't support single address triggers */
			
 
				+		if (!filter->range)
			
 
				+			return -EOPNOTSUPP;
			
 
				+
			
 
				+		if (!filter->inode && !kernel_ip(filter->offset))
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+		if (++range > pt_cap_get(PT_CAP_num_address_ranges))
			
 
				+			return -EOPNOTSUPP;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void pt_event_addr_filters_sync(struct perf_event *event)
			
 
				+{
			
 
				+	struct perf_addr_filters_head *head = perf_event_addr_filters(event);
			
 
				+	unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
			
 
				+	struct pt_filters *filters = event->hw.addr_filters;
			
 
				+	struct perf_addr_filter *filter;
			
 
				+	int range = 0;
			
 
				+
			
 
				+	if (!filters)
			
 
				+		return;
			
 
				 
			
 
				-	if (local_read(&buf->data_size) >= pt->handle.size)
			
 
				-		return true;
			
 
				+	list_for_each_entry(filter, &head->list, entry) {
			
 
				+		if (filter->inode && !offs[range]) {
			
 
				+			msr_a = msr_b = 0;
			
 
				+		} else {
			
 
				+			/* apply the offset */
			
 
				+			msr_a = filter->offset + offs[range];
			
 
				+			msr_b = filter->size + msr_a;
			
 
				+		}
			
 
				+
			
 
				+		filters->filter[range].msr_a  = msr_a;
			
 
				+		filters->filter[range].msr_b  = msr_b;
			
 
				+		filters->filter[range].config = filter->filter ? 1 : 2;
			
 
				+		range++;
			
 
				+	}
			
 
				 
			
 
				-	return false;
			
 
				+	filters->nr_filters = range;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -955,7 +1137,7 @@ void intel_pt_interrupt(void)
 
				 	 * after PT has been disabled by pt_event_stop(). Make sure we don't
			
 
				 	 * do anything (particularly, re-enable) for this event here.
			
 
				 	 */
			
 
				-	if (!ACCESS_ONCE(pt->handle_nmi))
			
 
				+	if (!READ_ONCE(pt->handle_nmi))
			
 
				 		return;
			
 
				 
			
 
				 	/*
			
@@ -1040,23 +1222,36 @@ EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
 
				 
			
 
				 static void pt_event_start(struct perf_event *event, int mode)
			
 
				 {
			
 
				+	struct hw_perf_event *hwc = &event->hw;
			
 
				 	struct pt *pt = this_cpu_ptr(&pt_ctx);
			
 
				-	struct pt_buffer *buf = perf_get_aux(&pt->handle);
			
 
				+	struct pt_buffer *buf;
			
 
				 
			
 
				 	if (READ_ONCE(pt->vmx_on))
			
 
				 		return;
			
 
				 
			
 
				-	if (!buf || pt_buffer_is_full(buf, pt)) {
			
 
				-		event->hw.state = PERF_HES_STOPPED;
			
 
				-		return;
			
 
				+	buf = perf_aux_output_begin(&pt->handle, event);
			
 
				+	if (!buf)
			
 
				+		goto fail_stop;
			
 
				+
			
 
				+	pt_buffer_reset_offsets(buf, pt->handle.head);
			
 
				+	if (!buf->snapshot) {
			
 
				+		if (pt_buffer_reset_markers(buf, &pt->handle))
			
 
				+			goto fail_end_stop;
			
 
				 	}
			
 
				 
			
 
				-	ACCESS_ONCE(pt->handle_nmi) = 1;
			
 
				-	event->hw.state = 0;
			
 
				+	WRITE_ONCE(pt->handle_nmi, 1);
			
 
				+	hwc->state = 0;
			
 
				 
			
 
				 	pt_config_buffer(buf->cur->table, buf->cur_idx,
			
 
				 			 buf->output_off);
			
 
				 	pt_config(event);
			
 
				+
			
 
				+	return;
			
 
				+
			
 
				+fail_end_stop:
			
 
				+	perf_aux_output_end(&pt->handle, 0, true);
			
 
				+fail_stop:
			
 
				+	hwc->state = PERF_HES_STOPPED;
			
 
				 }
			
 
				 
			
 
				 static void pt_event_stop(struct perf_event *event, int mode)
			
@@ -1067,7 +1262,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
 
				 	 * Protect against the PMI racing with disabling wrmsr,
			
 
				 	 * see comment in intel_pt_interrupt().
			
 
				 	 */
			
 
				-	ACCESS_ONCE(pt->handle_nmi) = 0;
			
 
				+	WRITE_ONCE(pt->handle_nmi, 0);
			
 
				 
			
 
				 	pt_config_stop(event);
			
 
				 
			
@@ -1090,19 +1285,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
 
				 		pt_handle_status(pt);
			
 
				 
			
 
				 		pt_update_head(pt);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void pt_event_del(struct perf_event *event, int mode)
			
 
				-{
			
 
				-	struct pt *pt = this_cpu_ptr(&pt_ctx);
			
 
				-	struct pt_buffer *buf;
			
 
				 
			
 
				-	pt_event_stop(event, PERF_EF_UPDATE);
			
 
				-
			
 
				-	buf = perf_get_aux(&pt->handle);
			
 
				-
			
 
				-	if (buf) {
			
 
				 		if (buf->snapshot)
			
 
				 			pt->handle.head =
			
 
				 				local_xchg(&buf->data_size,
			
@@ -1112,9 +1295,13 @@ static void pt_event_del(struct perf_event *event, int mode)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void pt_event_del(struct perf_event *event, int mode)
			
 
				+{
			
 
				+	pt_event_stop(event, PERF_EF_UPDATE);
			
 
				+}
			
 
				+
			
 
				 static int pt_event_add(struct perf_event *event, int mode)
			
 
				 {
			
 
				-	struct pt_buffer *buf;
			
 
				 	struct pt *pt = this_cpu_ptr(&pt_ctx);
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
 
				 	int ret = -EBUSY;
			
@@ -1122,34 +1309,18 @@ static int pt_event_add(struct perf_event *event, int mode)
 
				 	if (pt->handle.event)
			
 
				 		goto fail;
			
 
				 
			
 
				-	buf = perf_aux_output_begin(&pt->handle, event);
			
 
				-	ret = -EINVAL;
			
 
				-	if (!buf)
			
 
				-		goto fail_stop;
			
 
				-
			
 
				-	pt_buffer_reset_offsets(buf, pt->handle.head);
			
 
				-	if (!buf->snapshot) {
			
 
				-		ret = pt_buffer_reset_markers(buf, &pt->handle);
			
 
				-		if (ret)
			
 
				-			goto fail_end_stop;
			
 
				-	}
			
 
				-
			
 
				 	if (mode & PERF_EF_START) {
			
 
				 		pt_event_start(event, 0);
			
 
				-		ret = -EBUSY;
			
 
				+		ret = -EINVAL;
			
 
				 		if (hwc->state == PERF_HES_STOPPED)
			
 
				-			goto fail_end_stop;
			
 
				+			goto fail;
			
 
				 	} else {
			
 
				 		hwc->state = PERF_HES_STOPPED;
			
 
				 	}
			
 
				 
			
 
				-	return 0;
			
 
				-
			
 
				-fail_end_stop:
			
 
				-	perf_aux_output_end(&pt->handle, 0, true);
			
 
				-fail_stop:
			
 
				-	hwc->state = PERF_HES_STOPPED;
			
 
				+	ret = 0;
			
 
				 fail:
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -1159,6 +1330,7 @@ static void pt_event_read(struct perf_event *event)
 
				 
			
 
				 static void pt_event_destroy(struct perf_event *event)
			
 
				 {
			
 
				+	pt_addr_filters_fini(event);
			
 
				 	x86_del_exclusive(x86_lbr_exclusive_pt);
			
 
				 }
			
 
				 
			
@@ -1173,6 +1345,11 @@ static int pt_event_init(struct perf_event *event)
 
				 	if (x86_add_exclusive(x86_lbr_exclusive_pt))
			
 
				 		return -EBUSY;
			
 
				 
			
 
				+	if (pt_addr_filters_init(event)) {
			
 
				+		x86_del_exclusive(x86_lbr_exclusive_pt);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				 	event->destroy = pt_event_destroy;
			
 
				 
			
 
				 	return 0;
			
@@ -1192,7 +1369,7 @@ static __init int pt_init(void)
 
				 
			
 
				 	BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
			
 
				 
			
 
				-	if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
			
 
				+	if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
			
 
				 		return -ENODEV;
			
 
				 
			
 
				 	get_online_cpus();
			
@@ -1226,16 +1403,21 @@ static __init int pt_init(void)
 
				 			PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
			
 
				 
			
 
				 	pt_pmu.pmu.capabilities	|= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
			
 
				-	pt_pmu.pmu.attr_groups	= pt_attr_groups;
			
 
				-	pt_pmu.pmu.task_ctx_nr	= perf_sw_context;
			
 
				-	pt_pmu.pmu.event_init	= pt_event_init;
			
 
				-	pt_pmu.pmu.add		= pt_event_add;
			
 
				-	pt_pmu.pmu.del		= pt_event_del;
			
 
				-	pt_pmu.pmu.start	= pt_event_start;
			
 
				-	pt_pmu.pmu.stop		= pt_event_stop;
			
 
				-	pt_pmu.pmu.read		= pt_event_read;
			
 
				-	pt_pmu.pmu.setup_aux	= pt_buffer_setup_aux;
			
 
				-	pt_pmu.pmu.free_aux	= pt_buffer_free_aux;
			
 
				+	pt_pmu.pmu.attr_groups		 = pt_attr_groups;
			
 
				+	pt_pmu.pmu.task_ctx_nr		 = perf_sw_context;
			
 
				+	pt_pmu.pmu.event_init		 = pt_event_init;
			
 
				+	pt_pmu.pmu.add			 = pt_event_add;
			
 
				+	pt_pmu.pmu.del			 = pt_event_del;
			
 
				+	pt_pmu.pmu.start		 = pt_event_start;
			
 
				+	pt_pmu.pmu.stop			 = pt_event_stop;
			
 
				+	pt_pmu.pmu.read			 = pt_event_read;
			
 
				+	pt_pmu.pmu.setup_aux		 = pt_buffer_setup_aux;
			
 
				+	pt_pmu.pmu.free_aux		 = pt_buffer_free_aux;
			
 
				+	pt_pmu.pmu.addr_filters_sync     = pt_event_addr_filters_sync;
			
 
				+	pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
			
 
				+	pt_pmu.pmu.nr_addr_filters       =
			
 
				+		pt_cap_get(PT_CAP_num_address_ranges);
			
 
				+
			
 
				 	ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
			
 
				 
			
 
				 	return ret;
			
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -19,6 +19,40 @@
 
				 #ifndef __INTEL_PT_H__
			
 
				 #define __INTEL_PT_H__
			
 
				 
			
 
				+/*
			
 
				+ * PT MSR bit definitions
			
 
				+ */
			
 
				+#define RTIT_CTL_TRACEEN		BIT(0)
			
 
				+#define RTIT_CTL_CYCLEACC		BIT(1)
			
 
				+#define RTIT_CTL_OS			BIT(2)
			
 
				+#define RTIT_CTL_USR			BIT(3)
			
 
				+#define RTIT_CTL_CR3EN			BIT(7)
			
 
				+#define RTIT_CTL_TOPA			BIT(8)
			
 
				+#define RTIT_CTL_MTC_EN			BIT(9)
			
 
				+#define RTIT_CTL_TSC_EN			BIT(10)
			
 
				+#define RTIT_CTL_DISRETC		BIT(11)
			
 
				+#define RTIT_CTL_BRANCH_EN		BIT(13)
			
 
				+#define RTIT_CTL_MTC_RANGE_OFFSET	14
			
 
				+#define RTIT_CTL_MTC_RANGE		(0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
			
 
				+#define RTIT_CTL_CYC_THRESH_OFFSET	19
			
 
				+#define RTIT_CTL_CYC_THRESH		(0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
			
 
				+#define RTIT_CTL_PSB_FREQ_OFFSET	24
			
 
				+#define RTIT_CTL_PSB_FREQ      		(0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
			
 
				+#define RTIT_CTL_ADDR0_OFFSET		32
			
 
				+#define RTIT_CTL_ADDR0      		(0x0full << RTIT_CTL_ADDR0_OFFSET)
			
 
				+#define RTIT_CTL_ADDR1_OFFSET		36
			
 
				+#define RTIT_CTL_ADDR1      		(0x0full << RTIT_CTL_ADDR1_OFFSET)
			
 
				+#define RTIT_CTL_ADDR2_OFFSET		40
			
 
				+#define RTIT_CTL_ADDR2      		(0x0full << RTIT_CTL_ADDR2_OFFSET)
			
 
				+#define RTIT_CTL_ADDR3_OFFSET		44
			
 
				+#define RTIT_CTL_ADDR3      		(0x0full << RTIT_CTL_ADDR3_OFFSET)
			
 
				+#define RTIT_STATUS_FILTEREN		BIT(0)
			
 
				+#define RTIT_STATUS_CONTEXTEN		BIT(1)
			
 
				+#define RTIT_STATUS_TRIGGEREN		BIT(2)
			
 
				+#define RTIT_STATUS_BUFFOVF		BIT(3)
			
 
				+#define RTIT_STATUS_ERROR		BIT(4)
			
 
				+#define RTIT_STATUS_STOPPED		BIT(5)
			
 
				+
			
 
				 /*
			
 
				  * Single-entry ToPA: when this close to region boundary, switch
			
 
				  * buffers to avoid losing data.
			
@@ -48,15 +82,20 @@ struct topa_entry {
 
				 #define PT_CPUID_LEAVES		2
			
 
				 #define PT_CPUID_REGS_NUM	4 /* number of regsters (eax, ebx, ecx, edx) */
			
 
				 
			
 
				+/* TSC to Core Crystal Clock Ratio */
			
 
				+#define CPUID_TSC_LEAF		0x15
			
 
				+
			
 
				 enum pt_capabilities {
			
 
				 	PT_CAP_max_subleaf = 0,
			
 
				 	PT_CAP_cr3_filtering,
			
 
				 	PT_CAP_psb_cyc,
			
 
				+	PT_CAP_ip_filtering,
			
 
				 	PT_CAP_mtc,
			
 
				 	PT_CAP_topa_output,
			
 
				 	PT_CAP_topa_multiple_entries,
			
 
				 	PT_CAP_single_range_output,
			
 
				 	PT_CAP_payloads_lip,
			
 
				+	PT_CAP_num_address_ranges,
			
 
				 	PT_CAP_mtc_periods,
			
 
				 	PT_CAP_cycle_thresholds,
			
 
				 	PT_CAP_psb_periods,
			
@@ -66,6 +105,9 @@ struct pt_pmu {
 
				 	struct pmu		pmu;
			
 
				 	u32			caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
			
 
				 	bool			vmx;
			
 
				+	unsigned long		max_nonturbo_ratio;
			
 
				+	unsigned int		tsc_art_num;
			
 
				+	unsigned int		tsc_art_den;
			
 
				 };
			
 
				 
			
 
				 /**
			
@@ -104,14 +146,40 @@ struct pt_buffer {
 
				 	struct topa_entry	*topa_index[0];
			
 
				 };
			
 
				 
			
 
				+#define PT_FILTERS_NUM	4
			
 
				+
			
 
				+/**
			
 
				+ * struct pt_filter - IP range filter configuration
			
 
				+ * @msr_a:	range start, goes to RTIT_ADDRn_A
			
 
				+ * @msr_b:	range end, goes to RTIT_ADDRn_B
			
 
				+ * @config:	4-bit field in RTIT_CTL
			
 
				+ */
			
 
				+struct pt_filter {
			
 
				+	unsigned long	msr_a;
			
 
				+	unsigned long	msr_b;
			
 
				+	unsigned long	config;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * struct pt_filters - IP range filtering context
			
 
				+ * @filter:	filters defined for this context
			
 
				+ * @nr_filters:	number of defined filters in the @filter array
			
 
				+ */
			
 
				+struct pt_filters {
			
 
				+	struct pt_filter	filter[PT_FILTERS_NUM];
			
 
				+	unsigned int		nr_filters;
			
 
				+};
			
 
				+
			
 
				 /**
			
 
				  * struct pt - per-cpu pt context
			
 
				  * @handle:	perf output handle
			
 
				+ * @filters:		last configured filters
			
 
				  * @handle_nmi:	do handle PT PMI on this cpu, there's an active event
			
 
				  * @vmx_on:	1 if VMX is ON on this cpu
			
 
				  */
			
 
				 struct pt {
			
 
				 	struct perf_output_handle handle;
			
 
				+	struct pt_filters	filters;
			
 
				 	int			handle_nmi;
			
 
				 	int			vmx_on;
			
 
				 };
			
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -27,10 +27,14 @@
 
				  *	  event: rapl_energy_dram
			
 
				  *    perf code: 0x3
			
 
				  *
			
 
				- * dram counter: consumption of the builtin-gpu domain (client only)
			
 
				+ * gpu counter: consumption of the builtin-gpu domain (client only)
			
 
				  *	  event: rapl_energy_gpu
			
 
				  *    perf code: 0x4
			
 
				  *
			
 
				+ *  psys counter: consumption of the builtin-psys domain (client only)
			
 
				+ *	  event: rapl_energy_psys
			
 
				+ *    perf code: 0x5
			
 
				+ *
			
 
				  * We manage those counters as free running (read-only). They may be
			
 
				  * use simultaneously by other tools, such as turbostat.
			
 
				  *
			
@@ -53,6 +57,8 @@
 
				 #include <asm/cpu_device_id.h>
			
 
				 #include "../perf_event.h"
			
 
				 
			
 
				+MODULE_LICENSE("GPL");
			
 
				+
			
 
				 /*
			
 
				  * RAPL energy status counters
			
 
				  */
			
@@ -64,13 +70,16 @@
 
				 #define INTEL_RAPL_RAM		0x3	/* pseudo-encoding */
			
 
				 #define RAPL_IDX_PP1_NRG_STAT	3	/* gpu */
			
 
				 #define INTEL_RAPL_PP1		0x4	/* pseudo-encoding */
			
 
				+#define RAPL_IDX_PSYS_NRG_STAT	4	/* psys */
			
 
				+#define INTEL_RAPL_PSYS		0x5	/* pseudo-encoding */
			
 
				 
			
 
				-#define NR_RAPL_DOMAINS         0x4
			
 
				+#define NR_RAPL_DOMAINS         0x5
			
 
				 static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
			
 
				 	"pp0-core",
			
 
				 	"package",
			
 
				 	"dram",
			
 
				 	"pp1-gpu",
			
 
				+	"psys",
			
 
				 };
			
 
				 
			
 
				 /* Clients have PP0, PKG */
			
@@ -89,6 +98,13 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
 
				 			 1<<RAPL_IDX_RAM_NRG_STAT|\
			
 
				 			 1<<RAPL_IDX_PP1_NRG_STAT)
			
 
				 
			
 
				+/* SKL clients have PP0, PKG, RAM, PP1, PSYS */
			
 
				+#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
			
 
				+			  1<<RAPL_IDX_PKG_NRG_STAT|\
			
 
				+			  1<<RAPL_IDX_RAM_NRG_STAT|\
			
 
				+			  1<<RAPL_IDX_PP1_NRG_STAT|\
			
 
				+			  1<<RAPL_IDX_PSYS_NRG_STAT)
			
 
				+
			
 
				 /* Knights Landing has PKG, RAM */
			
 
				 #define RAPL_IDX_KNL	(1<<RAPL_IDX_PKG_NRG_STAT|\
			
 
				 			 1<<RAPL_IDX_RAM_NRG_STAT)
			
@@ -360,6 +376,10 @@ static int rapl_pmu_event_init(struct perf_event *event)
 
				 		bit = RAPL_IDX_PP1_NRG_STAT;
			
 
				 		msr = MSR_PP1_ENERGY_STATUS;
			
 
				 		break;
			
 
				+	case INTEL_RAPL_PSYS:
			
 
				+		bit = RAPL_IDX_PSYS_NRG_STAT;
			
 
				+		msr = MSR_PLATFORM_ENERGY_STATUS;
			
 
				+		break;
			
 
				 	default:
			
 
				 		return -EINVAL;
			
 
				 	}
			
@@ -414,11 +434,13 @@ RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
 
				 RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
			
 
				 RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
			
 
				 RAPL_EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
			
 
				+RAPL_EVENT_ATTR_STR(energy-psys,   rapl_psys, "event=0x05");
			
 
				 
			
 
				 RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
			
 
				 RAPL_EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
			
 
				 RAPL_EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
			
 
				 RAPL_EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
			
 
				+RAPL_EVENT_ATTR_STR(energy-psys.unit,   rapl_psys_unit, "Joules");
			
 
				 
			
 
				 /*
			
 
				  * we compute in 0.23 nJ increments regardless of MSR
			
@@ -427,6 +449,7 @@ RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890
 
				 RAPL_EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890625e-10");
			
 
				 RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
			
 
				 RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
			
 
				+RAPL_EVENT_ATTR_STR(energy-psys.scale,   rapl_psys_scale, "2.3283064365386962890625e-10");
			
 
				 
			
 
				 static struct attribute *rapl_events_srv_attr[] = {
			
 
				 	EVENT_PTR(rapl_cores),
			
@@ -476,6 +499,27 @@ static struct attribute *rapl_events_hsw_attr[] = {
 
				 	NULL,
			
 
				 };
			
 
				 
			
 
				+static struct attribute *rapl_events_skl_attr[] = {
			
 
				+	EVENT_PTR(rapl_cores),
			
 
				+	EVENT_PTR(rapl_pkg),
			
 
				+	EVENT_PTR(rapl_gpu),
			
 
				+	EVENT_PTR(rapl_ram),
			
 
				+	EVENT_PTR(rapl_psys),
			
 
				+
			
 
				+	EVENT_PTR(rapl_cores_unit),
			
 
				+	EVENT_PTR(rapl_pkg_unit),
			
 
				+	EVENT_PTR(rapl_gpu_unit),
			
 
				+	EVENT_PTR(rapl_ram_unit),
			
 
				+	EVENT_PTR(rapl_psys_unit),
			
 
				+
			
 
				+	EVENT_PTR(rapl_cores_scale),
			
 
				+	EVENT_PTR(rapl_pkg_scale),
			
 
				+	EVENT_PTR(rapl_gpu_scale),
			
 
				+	EVENT_PTR(rapl_ram_scale),
			
 
				+	EVENT_PTR(rapl_psys_scale),
			
 
				+	NULL,
			
 
				+};
			
 
				+
			
 
				 static struct attribute *rapl_events_knl_attr[] = {
			
 
				 	EVENT_PTR(rapl_pkg),
			
 
				 	EVENT_PTR(rapl_ram),
			
@@ -592,6 +636,11 @@ static int rapl_cpu_notifier(struct notifier_block *self,
 
				 	return NOTIFY_OK;
			
 
				 }
			
 
				 
			
 
				+static struct notifier_block rapl_cpu_nb = {
			
 
				+	.notifier_call	= rapl_cpu_notifier,
			
 
				+	.priority       = CPU_PRI_PERF + 1,
			
 
				+};
			
 
				+
			
 
				 static int rapl_check_hw_unit(bool apply_quirk)
			
 
				 {
			
 
				 	u64 msr_rapl_power_unit_bits;
			
@@ -660,7 +709,7 @@ static int __init rapl_prepare_cpus(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void __init cleanup_rapl_pmus(void)
			
 
				+static void cleanup_rapl_pmus(void)
			
 
				 {
			
 
				 	int i;
			
 
				 
			
@@ -691,52 +740,92 @@ static int __init init_rapl_pmus(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+#define X86_RAPL_MODEL_MATCH(model, init)	\
			
 
				+	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
			
 
				+
			
 
				+struct intel_rapl_init_fun {
			
 
				+	bool apply_quirk;
			
 
				+	int cntr_mask;
			
 
				+	struct attribute **attrs;
			
 
				+};
			
 
				+
			
 
				+static const struct intel_rapl_init_fun snb_rapl_init __initconst = {
			
 
				+	.apply_quirk = false,
			
 
				+	.cntr_mask = RAPL_IDX_CLN,
			
 
				+	.attrs = rapl_events_cln_attr,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_rapl_init_fun hsx_rapl_init __initconst = {
			
 
				+	.apply_quirk = true,
			
 
				+	.cntr_mask = RAPL_IDX_SRV,
			
 
				+	.attrs = rapl_events_srv_attr,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_rapl_init_fun hsw_rapl_init __initconst = {
			
 
				+	.apply_quirk = false,
			
 
				+	.cntr_mask = RAPL_IDX_HSW,
			
 
				+	.attrs = rapl_events_hsw_attr,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_rapl_init_fun snbep_rapl_init __initconst = {
			
 
				+	.apply_quirk = false,
			
 
				+	.cntr_mask = RAPL_IDX_SRV,
			
 
				+	.attrs = rapl_events_srv_attr,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_rapl_init_fun knl_rapl_init __initconst = {
			
 
				+	.apply_quirk = true,
			
 
				+	.cntr_mask = RAPL_IDX_KNL,
			
 
				+	.attrs = rapl_events_knl_attr,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
			
 
				+	.apply_quirk = false,
			
 
				+	.cntr_mask = RAPL_IDX_SKL_CLN,
			
 
				+	.attrs = rapl_events_skl_attr,
			
 
				+};
			
 
				+
			
 
				 static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
			
 
				-	[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
			
 
				-	[1] = {},
			
 
				+	X86_RAPL_MODEL_MATCH(42, snb_rapl_init),	/* Sandy Bridge */
			
 
				+	X86_RAPL_MODEL_MATCH(45, snbep_rapl_init),	/* Sandy Bridge-EP */
			
 
				+
			
 
				+	X86_RAPL_MODEL_MATCH(58, snb_rapl_init),	/* Ivy Bridge */
			
 
				+	X86_RAPL_MODEL_MATCH(62, snbep_rapl_init),	/* IvyTown */
			
 
				+
			
 
				+	X86_RAPL_MODEL_MATCH(60, hsw_rapl_init),	/* Haswell */
			
 
				+	X86_RAPL_MODEL_MATCH(63, hsx_rapl_init),	/* Haswell-Server */
			
 
				+	X86_RAPL_MODEL_MATCH(69, hsw_rapl_init),	/* Haswell-Celeron */
			
 
				+	X86_RAPL_MODEL_MATCH(70, hsw_rapl_init),	/* Haswell GT3e */
			
 
				+
			
 
				+	X86_RAPL_MODEL_MATCH(61, hsw_rapl_init),	/* Broadwell */
			
 
				+	X86_RAPL_MODEL_MATCH(71, hsw_rapl_init),	/* Broadwell-H */
			
 
				+	X86_RAPL_MODEL_MATCH(79, hsx_rapl_init),	/* Broadwell-Server */
			
 
				+	X86_RAPL_MODEL_MATCH(86, hsx_rapl_init),	/* Broadwell Xeon D */
			
 
				+
			
 
				+	X86_RAPL_MODEL_MATCH(87, knl_rapl_init),	/* Knights Landing */
			
 
				+
			
 
				+	X86_RAPL_MODEL_MATCH(78, skl_rapl_init),	/* Skylake */
			
 
				+	X86_RAPL_MODEL_MATCH(94, skl_rapl_init),	/* Skylake H/S */
			
 
				+	{},
			
 
				 };
			
 
				 
			
 
				+MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match);
			
 
				+
			
 
				 static int __init rapl_pmu_init(void)
			
 
				 {
			
 
				-	bool apply_quirk = false;
			
 
				+	const struct x86_cpu_id *id;
			
 
				+	struct intel_rapl_init_fun *rapl_init;
			
 
				+	bool apply_quirk;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (!x86_match_cpu(rapl_cpu_match))
			
 
				+	id = x86_match_cpu(rapl_cpu_match);
			
 
				+	if (!id)
			
 
				 		return -ENODEV;
			
 
				 
			
 
				-	switch (boot_cpu_data.x86_model) {
			
 
				-	case 42: /* Sandy Bridge */
			
 
				-	case 58: /* Ivy Bridge */
			
 
				-		rapl_cntr_mask = RAPL_IDX_CLN;
			
 
				-		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
			
 
				-		break;
			
 
				-	case 63: /* Haswell-Server */
			
 
				-	case 79: /* Broadwell-Server */
			
 
				-		apply_quirk = true;
			
 
				-		rapl_cntr_mask = RAPL_IDX_SRV;
			
 
				-		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
			
 
				-		break;
			
 
				-	case 60: /* Haswell */
			
 
				-	case 69: /* Haswell-Celeron */
			
 
				-	case 70: /* Haswell GT3e */
			
 
				-	case 61: /* Broadwell */
			
 
				-	case 71: /* Broadwell-H */
			
 
				-		rapl_cntr_mask = RAPL_IDX_HSW;
			
 
				-		rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
			
 
				-		break;
			
 
				-	case 45: /* Sandy Bridge-EP */
			
 
				-	case 62: /* IvyTown */
			
 
				-		rapl_cntr_mask = RAPL_IDX_SRV;
			
 
				-		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
			
 
				-		break;
			
 
				-	case 87: /* Knights Landing */
			
 
				-		apply_quirk = true;
			
 
				-		rapl_cntr_mask = RAPL_IDX_KNL;
			
 
				-		rapl_pmu_events_group.attrs = rapl_events_knl_attr;
			
 
				-		break;
			
 
				-	default:
			
 
				-		return -ENODEV;
			
 
				-	}
			
 
				+	rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
			
 
				+	apply_quirk = rapl_init->apply_quirk;
			
 
				+	rapl_cntr_mask = rapl_init->cntr_mask;
			
 
				+	rapl_pmu_events_group.attrs = rapl_init->attrs;
			
 
				 
			
 
				 	ret = rapl_check_hw_unit(apply_quirk);
			
 
				 	if (ret)
			
@@ -756,7 +845,7 @@ static int __init rapl_pmu_init(void)
 
				 	if (ret)
			
 
				 		goto out;
			
 
				 
			
 
				-	__perf_cpu_notifier(rapl_cpu_notifier);
			
 
				+	__register_cpu_notifier(&rapl_cpu_nb);
			
 
				 	cpu_notifier_register_done();
			
 
				 	rapl_advertise();
			
 
				 	return 0;
			
@@ -767,4 +856,14 @@ static int __init rapl_pmu_init(void)
 
				 	cpu_notifier_register_done();
			
 
				 	return ret;
			
 
				 }
			
 
				-device_initcall(rapl_pmu_init);
			
 
				+module_init(rapl_pmu_init);
			
 
				+
			
 
				+static void __exit intel_rapl_exit(void)
			
 
				+{
			
 
				+	cpu_notifier_register_begin();
			
 
				+	__unregister_cpu_notifier(&rapl_cpu_nb);
			
 
				+	perf_pmu_unregister(&rapl_pmus->pmu);
			
 
				+	cleanup_rapl_pmus();
			
 
				+	cpu_notifier_register_done();
			
 
				+}
			
 
				+module_exit(intel_rapl_exit);
			
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,3 +1,4 @@
 
				+#include <asm/cpu_device_id.h>
			
 
				 #include "uncore.h"
			
 
				 
			
 
				 static struct intel_uncore_type *empty_uncore[] = { NULL, };
			
@@ -21,6 +22,8 @@ static struct event_constraint uncore_constraint_fixed =
 
				 struct event_constraint uncore_constraint_empty =
			
 
				 	EVENT_CONSTRAINT(0, 0, 0);
			
 
				 
			
 
				+MODULE_LICENSE("GPL");
			
 
				+
			
 
				 static int uncore_pcibus_to_physid(struct pci_bus *bus)
			
 
				 {
			
 
				 	struct pci2phy_map *map;
			
@@ -754,7 +757,7 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
 
				 	pmu->registered = false;
			
 
				 }
			
 
				 
			
 
				-static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
			
 
				+static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
			
 
				 {
			
 
				 	struct intel_uncore_pmu *pmu = type->pmus;
			
 
				 	struct intel_uncore_box *box;
			
@@ -770,7 +773,7 @@ static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void __init uncore_exit_boxes(void *dummy)
			
 
				+static void uncore_exit_boxes(void *dummy)
			
 
				 {
			
 
				 	struct intel_uncore_type **types;
			
 
				 
			
@@ -787,7 +790,7 @@ static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
 
				 	kfree(pmu->boxes);
			
 
				 }
			
 
				 
			
 
				-static void __init uncore_type_exit(struct intel_uncore_type *type)
			
 
				+static void uncore_type_exit(struct intel_uncore_type *type)
			
 
				 {
			
 
				 	struct intel_uncore_pmu *pmu = type->pmus;
			
 
				 	int i;
			
@@ -804,7 +807,7 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
 
				 	type->events_group = NULL;
			
 
				 }
			
 
				 
			
 
				-static void __init uncore_types_exit(struct intel_uncore_type **types)
			
 
				+static void uncore_types_exit(struct intel_uncore_type **types)
			
 
				 {
			
 
				 	for (; *types; types++)
			
 
				 		uncore_type_exit(*types);
			
@@ -989,46 +992,6 @@ static int __init uncore_pci_init(void)
 
				 	size_t size;
			
 
				 	int ret;
			
 
				 
			
 
				-	switch (boot_cpu_data.x86_model) {
			
 
				-	case 45: /* Sandy Bridge-EP */
			
 
				-		ret = snbep_uncore_pci_init();
			
 
				-		break;
			
 
				-	case 62: /* Ivy Bridge-EP */
			
 
				-		ret = ivbep_uncore_pci_init();
			
 
				-		break;
			
 
				-	case 63: /* Haswell-EP */
			
 
				-		ret = hswep_uncore_pci_init();
			
 
				-		break;
			
 
				-	case 79: /* BDX-EP */
			
 
				-	case 86: /* BDX-DE */
			
 
				-		ret = bdx_uncore_pci_init();
			
 
				-		break;
			
 
				-	case 42: /* Sandy Bridge */
			
 
				-		ret = snb_uncore_pci_init();
			
 
				-		break;
			
 
				-	case 58: /* Ivy Bridge */
			
 
				-		ret = ivb_uncore_pci_init();
			
 
				-		break;
			
 
				-	case 60: /* Haswell */
			
 
				-	case 69: /* Haswell Celeron */
			
 
				-		ret = hsw_uncore_pci_init();
			
 
				-		break;
			
 
				-	case 61: /* Broadwell */
			
 
				-		ret = bdw_uncore_pci_init();
			
 
				-		break;
			
 
				-	case 87: /* Knights Landing */
			
 
				-		ret = knl_uncore_pci_init();
			
 
				-		break;
			
 
				-	case 94: /* SkyLake */
			
 
				-		ret = skl_uncore_pci_init();
			
 
				-		break;
			
 
				-	default:
			
 
				-		return -ENODEV;
			
 
				-	}
			
 
				-
			
 
				-	if (ret)
			
 
				-		return ret;
			
 
				-
			
 
				 	size = max_packages * sizeof(struct pci_extra_dev);
			
 
				 	uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
			
 
				 	if (!uncore_extra_pci_dev) {
			
@@ -1060,7 +1023,7 @@ static int __init uncore_pci_init(void)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static void __init uncore_pci_exit(void)
			
 
				+static void uncore_pci_exit(void)
			
 
				 {
			
 
				 	if (pcidrv_registered) {
			
 
				 		pcidrv_registered = false;
			
@@ -1287,46 +1250,6 @@ static int __init uncore_cpu_init(void)
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	switch (boot_cpu_data.x86_model) {
			
 
				-	case 26: /* Nehalem */
			
 
				-	case 30:
			
 
				-	case 37: /* Westmere */
			
 
				-	case 44:
			
 
				-		nhm_uncore_cpu_init();
			
 
				-		break;
			
 
				-	case 42: /* Sandy Bridge */
			
 
				-	case 58: /* Ivy Bridge */
			
 
				-	case 60: /* Haswell */
			
 
				-	case 69: /* Haswell */
			
 
				-	case 70: /* Haswell */
			
 
				-	case 61: /* Broadwell */
			
 
				-	case 71: /* Broadwell */
			
 
				-		snb_uncore_cpu_init();
			
 
				-		break;
			
 
				-	case 45: /* Sandy Bridge-EP */
			
 
				-		snbep_uncore_cpu_init();
			
 
				-		break;
			
 
				-	case 46: /* Nehalem-EX */
			
 
				-	case 47: /* Westmere-EX aka. Xeon E7 */
			
 
				-		nhmex_uncore_cpu_init();
			
 
				-		break;
			
 
				-	case 62: /* Ivy Bridge-EP */
			
 
				-		ivbep_uncore_cpu_init();
			
 
				-		break;
			
 
				-	case 63: /* Haswell-EP */
			
 
				-		hswep_uncore_cpu_init();
			
 
				-		break;
			
 
				-	case 79: /* BDX-EP */
			
 
				-	case 86: /* BDX-DE */
			
 
				-		bdx_uncore_cpu_init();
			
 
				-		break;
			
 
				-	case 87: /* Knights Landing */
			
 
				-		knl_uncore_cpu_init();
			
 
				-		break;
			
 
				-	default:
			
 
				-		return -ENODEV;
			
 
				-	}
			
 
				-
			
 
				 	ret = uncore_types_init(uncore_msr_uncores, true);
			
 
				 	if (ret)
			
 
				 		goto err;
			
@@ -1376,11 +1299,105 @@ static int __init uncore_cpumask_init(bool msr)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+#define X86_UNCORE_MODEL_MATCH(model, init)	\
			
 
				+	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
			
 
				+
			
 
				+struct intel_uncore_init_fun {
			
 
				+	void	(*cpu_init)(void);
			
 
				+	int	(*pci_init)(void);
			
 
				+};
			
 
				+
			
 
				+static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
			
 
				+	.cpu_init = nhm_uncore_cpu_init,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
			
 
				+	.cpu_init = snb_uncore_cpu_init,
			
 
				+	.pci_init = snb_uncore_pci_init,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
			
 
				+	.cpu_init = snb_uncore_cpu_init,
			
 
				+	.pci_init = ivb_uncore_pci_init,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
			
 
				+	.cpu_init = snb_uncore_cpu_init,
			
 
				+	.pci_init = hsw_uncore_pci_init,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
			
 
				+	.cpu_init = snb_uncore_cpu_init,
			
 
				+	.pci_init = bdw_uncore_pci_init,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
			
 
				+	.cpu_init = snbep_uncore_cpu_init,
			
 
				+	.pci_init = snbep_uncore_pci_init,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
			
 
				+	.cpu_init = nhmex_uncore_cpu_init,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
			
 
				+	.cpu_init = ivbep_uncore_cpu_init,
			
 
				+	.pci_init = ivbep_uncore_pci_init,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
			
 
				+	.cpu_init = hswep_uncore_cpu_init,
			
 
				+	.pci_init = hswep_uncore_pci_init,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
			
 
				+	.cpu_init = bdx_uncore_cpu_init,
			
 
				+	.pci_init = bdx_uncore_pci_init,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
			
 
				+	.cpu_init = knl_uncore_cpu_init,
			
 
				+	.pci_init = knl_uncore_pci_init,
			
 
				+};
			
 
				+
			
 
				+static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
			
 
				+	.pci_init = skl_uncore_pci_init,
			
 
				+};
			
 
				+
			
 
				+static const struct x86_cpu_id intel_uncore_match[] __initconst = {
			
 
				+	X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init),	/* Nehalem */
			
 
				+	X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
			
 
				+	X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init),	/* Westmere */
			
 
				+	X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
			
 
				+	X86_UNCORE_MODEL_MATCH(42, snb_uncore_init),	/* Sandy Bridge */
			
 
				+	X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init),	/* Ivy Bridge */
			
 
				+	X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init),	/* Haswell */
			
 
				+	X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init),	/* Haswell Celeron */
			
 
				+	X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init),	/* Haswell */
			
 
				+	X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init),	/* Broadwell */
			
 
				+	X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init),	/* Broadwell */
			
 
				+	X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init),	/* Sandy Bridge-EP */
			
 
				+	X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init),	/* Nehalem-EX */
			
 
				+	X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init),	/* Westmere-EX aka. Xeon E7 */
			
 
				+	X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init),	/* Ivy Bridge-EP */
			
 
				+	X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init),	/* Haswell-EP */
			
 
				+	X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init),	/* BDX-EP */
			
 
				+	X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init),	/* BDX-DE */
			
 
				+	X86_UNCORE_MODEL_MATCH(87, knl_uncore_init),	/* Knights Landing */
			
 
				+	X86_UNCORE_MODEL_MATCH(94, skl_uncore_init),	/* SkyLake */
			
 
				+	{},
			
 
				+};
			
 
				+
			
 
				+MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
			
 
				+
			
 
				 static int __init intel_uncore_init(void)
			
 
				 {
			
 
				-	int pret, cret, ret;
			
 
				+	const struct x86_cpu_id *id;
			
 
				+	struct intel_uncore_init_fun *uncore_init;
			
 
				+	int pret = 0, cret = 0, ret;
			
 
				 
			
 
				-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
			
 
				+	id = x86_match_cpu(intel_uncore_match);
			
 
				+	if (!id)
			
 
				 		return -ENODEV;
			
 
				 
			
 
				 	if (cpu_has_hypervisor)
			
@@ -1388,8 +1405,17 @@ static int __init intel_uncore_init(void)
 
				 
			
 
				 	max_packages = topology_max_packages();
			
 
				 
			
 
				-	pret = uncore_pci_init();
			
 
				-	cret = uncore_cpu_init();
			
 
				+	uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
			
 
				+	if (uncore_init->pci_init) {
			
 
				+		pret = uncore_init->pci_init();
			
 
				+		if (!pret)
			
 
				+			pret = uncore_pci_init();
			
 
				+	}
			
 
				+
			
 
				+	if (uncore_init->cpu_init) {
			
 
				+		uncore_init->cpu_init();
			
 
				+		cret = uncore_cpu_init();
			
 
				+	}
			
 
				 
			
 
				 	if (cret && pret)
			
 
				 		return -ENODEV;
			
@@ -1409,4 +1435,14 @@ static int __init intel_uncore_init(void)
 
				 	cpu_notifier_register_done();
			
 
				 	return ret;
			
 
				 }
			
 
				-device_initcall(intel_uncore_init);
			
 
				+module_init(intel_uncore_init);
			
 
				+
			
 
				+static void __exit intel_uncore_exit(void)
			
 
				+{
			
 
				+	cpu_notifier_register_begin();
			
 
				+	__unregister_cpu_notifier(&uncore_cpu_nb);
			
 
				+	uncore_types_exit(uncore_msr_uncores);
			
 
				+	uncore_pci_exit();
			
 
				+	cpu_notifier_register_done();
			
 
				+}
			
 
				+module_exit(intel_uncore_exit);
			
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -6,6 +6,8 @@ enum perf_msr_id {
 
				 	PERF_MSR_MPERF			= 2,
			
 
				 	PERF_MSR_PPERF			= 3,
			
 
				 	PERF_MSR_SMI			= 4,
			
 
				+	PERF_MSR_PTSC			= 5,
			
 
				+	PERF_MSR_IRPERF			= 6,
			
 
				 
			
 
				 	PERF_MSR_EVENT_MAX,
			
 
				 };
			
@@ -15,6 +17,16 @@ static bool test_aperfmperf(int idx)
 
				 	return boot_cpu_has(X86_FEATURE_APERFMPERF);
			
 
				 }
			
 
				 
			
 
				+static bool test_ptsc(int idx)
			
 
				+{
			
 
				+	return boot_cpu_has(X86_FEATURE_PTSC);
			
 
				+}
			
 
				+
			
 
				+static bool test_irperf(int idx)
			
 
				+{
			
 
				+	return boot_cpu_has(X86_FEATURE_IRPERF);
			
 
				+}
			
 
				+
			
 
				 static bool test_intel(int idx)
			
 
				 {
			
 
				 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
			
@@ -69,18 +81,22 @@ struct perf_msr {
 
				 	bool	(*test)(int idx);
			
 
				 };
			
 
				 
			
 
				-PMU_EVENT_ATTR_STRING(tsc,   evattr_tsc,   "event=0x00");
			
 
				-PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
			
 
				-PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
			
 
				-PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
			
 
				-PMU_EVENT_ATTR_STRING(smi,   evattr_smi,   "event=0x04");
			
 
				+PMU_EVENT_ATTR_STRING(tsc,    evattr_tsc,    "event=0x00");
			
 
				+PMU_EVENT_ATTR_STRING(aperf,  evattr_aperf,  "event=0x01");
			
 
				+PMU_EVENT_ATTR_STRING(mperf,  evattr_mperf,  "event=0x02");
			
 
				+PMU_EVENT_ATTR_STRING(pperf,  evattr_pperf,  "event=0x03");
			
 
				+PMU_EVENT_ATTR_STRING(smi,    evattr_smi,    "event=0x04");
			
 
				+PMU_EVENT_ATTR_STRING(ptsc,   evattr_ptsc,   "event=0x05");
			
 
				+PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06");
			
 
				 
			
 
				 static struct perf_msr msr[] = {
			
 
				-	[PERF_MSR_TSC]   = { 0,			&evattr_tsc,	NULL,		 },
			
 
				-	[PERF_MSR_APERF] = { MSR_IA32_APERF,	&evattr_aperf,	test_aperfmperf, },
			
 
				-	[PERF_MSR_MPERF] = { MSR_IA32_MPERF,	&evattr_mperf,	test_aperfmperf, },
			
 
				-	[PERF_MSR_PPERF] = { MSR_PPERF,		&evattr_pperf,	test_intel,	 },
			
 
				-	[PERF_MSR_SMI]   = { MSR_SMI_COUNT,	&evattr_smi,	test_intel,	 },
			
 
				+	[PERF_MSR_TSC]    = { 0,		&evattr_tsc,	NULL,		 },
			
 
				+	[PERF_MSR_APERF]  = { MSR_IA32_APERF,	&evattr_aperf,	test_aperfmperf, },
			
 
				+	[PERF_MSR_MPERF]  = { MSR_IA32_MPERF,	&evattr_mperf,	test_aperfmperf, },
			
 
				+	[PERF_MSR_PPERF]  = { MSR_PPERF,	&evattr_pperf,	test_intel,	 },
			
 
				+	[PERF_MSR_SMI]    = { MSR_SMI_COUNT,	&evattr_smi,	test_intel,	 },
			
 
				+	[PERF_MSR_PTSC]   = { MSR_F15H_PTSC,	&evattr_ptsc,	test_ptsc,	 },
			
 
				+	[PERF_MSR_IRPERF] = { MSR_F17H_IRPERF,	&evattr_irperf,	test_irperf,	 },
			
 
				 };
			
 
				 
			
 
				 static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
			
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -601,6 +601,7 @@ struct x86_pmu {
 
				 	u64		lbr_sel_mask;		   /* LBR_SELECT valid bits */
			
 
				 	const int	*lbr_sel_map;		   /* lbr_select mappings */
			
 
				 	bool		lbr_double_abort;	   /* duplicated lbr aborts */
			
 
				+	bool		lbr_pt_coexist;		   /* LBR may coexist with PT */
			
 
				 
			
 
				 	/*
			
 
				 	 * Intel PT/LBR/BTS are exclusive
			
@@ -859,6 +860,8 @@ extern struct event_constraint intel_atom_pebs_event_constraints[];
 
				 
			
 
				 extern struct event_constraint intel_slm_pebs_event_constraints[];
			
 
				 
			
 
				+extern struct event_constraint intel_glm_pebs_event_constraints[];
			
 
				+
			
 
				 extern struct event_constraint intel_nehalem_pebs_event_constraints[];
			
 
				 
			
 
				 extern struct event_constraint intel_westmere_pebs_event_constraints[];
			
@@ -907,6 +910,8 @@ void intel_pmu_lbr_init_nhm(void);
 
				 
			
 
				 void intel_pmu_lbr_init_atom(void);
			
 
				 
			
 
				+void intel_pmu_lbr_init_slm(void);
			
 
				+
			
 
				 void intel_pmu_lbr_init_snb(void);
			
 
				 
			
 
				 void intel_pmu_lbr_init_hsw(void);
			
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -177,6 +177,7 @@
 
				 #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
			
 
				 #define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
			
 
				 #define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
			
 
				+#define X86_FEATURE_PTSC	( 6*32+27) /* performance time-stamp counter */
			
 
				 #define X86_FEATURE_PERFCTR_L2	( 6*32+28) /* L2 performance counter extensions */
			
 
				 #define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
			
 
				 
			
@@ -250,6 +251,7 @@
 
				 
			
 
				 /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
			
 
				 #define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
			
 
				+#define X86_FEATURE_IRPERF	(13*32+1) /* Instructions Retired Count */
			
 
				 
			
 
				 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
			
 
				 #define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
			
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -89,27 +89,16 @@
 
				 #define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6
			
 
				 
			
 
				 #define MSR_IA32_RTIT_CTL		0x00000570
			
 
				-#define RTIT_CTL_TRACEEN		BIT(0)
			
 
				-#define RTIT_CTL_CYCLEACC		BIT(1)
			
 
				-#define RTIT_CTL_OS			BIT(2)
			
 
				-#define RTIT_CTL_USR			BIT(3)
			
 
				-#define RTIT_CTL_CR3EN			BIT(7)
			
 
				-#define RTIT_CTL_TOPA			BIT(8)
			
 
				-#define RTIT_CTL_MTC_EN			BIT(9)
			
 
				-#define RTIT_CTL_TSC_EN			BIT(10)
			
 
				-#define RTIT_CTL_DISRETC		BIT(11)
			
 
				-#define RTIT_CTL_BRANCH_EN		BIT(13)
			
 
				-#define RTIT_CTL_MTC_RANGE_OFFSET	14
			
 
				-#define RTIT_CTL_MTC_RANGE		(0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
			
 
				-#define RTIT_CTL_CYC_THRESH_OFFSET	19
			
 
				-#define RTIT_CTL_CYC_THRESH		(0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
			
 
				-#define RTIT_CTL_PSB_FREQ_OFFSET	24
			
 
				-#define RTIT_CTL_PSB_FREQ      		(0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
			
 
				 #define MSR_IA32_RTIT_STATUS		0x00000571
			
 
				-#define RTIT_STATUS_CONTEXTEN		BIT(1)
			
 
				-#define RTIT_STATUS_TRIGGEREN		BIT(2)
			
 
				-#define RTIT_STATUS_ERROR		BIT(4)
			
 
				-#define RTIT_STATUS_STOPPED		BIT(5)
			
 
				+#define MSR_IA32_RTIT_STATUS		0x00000571
			
 
				+#define MSR_IA32_RTIT_ADDR0_A		0x00000580
			
 
				+#define MSR_IA32_RTIT_ADDR0_B		0x00000581
			
 
				+#define MSR_IA32_RTIT_ADDR1_A		0x00000582
			
 
				+#define MSR_IA32_RTIT_ADDR1_B		0x00000583
			
 
				+#define MSR_IA32_RTIT_ADDR2_A		0x00000584
			
 
				+#define MSR_IA32_RTIT_ADDR2_B		0x00000585
			
 
				+#define MSR_IA32_RTIT_ADDR3_A		0x00000586
			
 
				+#define MSR_IA32_RTIT_ADDR3_B		0x00000587
			
 
				 #define MSR_IA32_RTIT_CR3_MATCH		0x00000572
			
 
				 #define MSR_IA32_RTIT_OUTPUT_BASE	0x00000560
			
 
				 #define MSR_IA32_RTIT_OUTPUT_MASK	0x00000561
			
@@ -205,6 +194,8 @@
 
				 #define MSR_CONFIG_TDP_CONTROL		0x0000064B
			
 
				 #define MSR_TURBO_ACTIVATION_RATIO	0x0000064C
			
 
				 
			
 
				+#define MSR_PLATFORM_ENERGY_STATUS	0x0000064D
			
 
				+
			
 
				 #define MSR_PKG_WEIGHTED_CORE_C0_RES	0x00000658
			
 
				 #define MSR_PKG_ANY_CORE_C0_RES		0x00000659
			
 
				 #define MSR_PKG_ANY_GFXE_C0_RES		0x0000065A
			
@@ -315,6 +306,9 @@
 
				 #define MSR_AMD64_IBSOPDATA4		0xc001103d
			
 
				 #define MSR_AMD64_IBS_REG_COUNT_MAX	8 /* includes MSR_AMD64_IBSBRTARGET */
			
 
				 
			
 
				+/* Fam 17h MSRs */
			
 
				+#define MSR_F17H_IRPERF			0xc00000e9
			
 
				+
			
 
				 /* Fam 16h MSRs */
			
 
				 #define MSR_F16H_L2I_PERF_CTL		0xc0010230
			
 
				 #define MSR_F16H_L2I_PERF_CTR		0xc0010231
			
@@ -328,6 +322,7 @@
 
				 #define MSR_F15H_PERF_CTR		0xc0010201
			
 
				 #define MSR_F15H_NB_PERF_CTL		0xc0010240
			
 
				 #define MSR_F15H_NB_PERF_CTR		0xc0010241
			
 
				+#define MSR_F15H_PTSC			0xc0010280
			
 
				 #define MSR_F15H_IC_CFG			0xc0011021
			
 
				 
			
 
				 /* Fam 10h MSRs */
			
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -578,7 +578,7 @@ static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 
				 	riprel_post_xol(auprobe, regs);
			
 
				 }
			
 
				 
			
 
				-static struct uprobe_xol_ops default_xol_ops = {
			
 
				+static const struct uprobe_xol_ops default_xol_ops = {
			
 
				 	.pre_xol  = default_pre_xol_op,
			
 
				 	.post_xol = default_post_xol_op,
			
 
				 	.abort	  = default_abort_op,
			
@@ -695,7 +695,7 @@ static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn)
 
				 		0, insn->immediate.nbytes);
			
 
				 }
			
 
				 
			
 
				-static struct uprobe_xol_ops branch_xol_ops = {
			
 
				+static const struct uprobe_xol_ops branch_xol_ops = {
			
 
				 	.emulate  = branch_emulate_op,
			
 
				 	.post_xol = branch_post_xol_op,
			
 
				 };
			
--- a/arch/xtensa/kernel/perf_event.c
+++ b/arch/xtensa/kernel/perf_event.c
@@ -332,14 +332,14 @@ static int callchain_trace(struct stackframe *frame, void *data)
 
				 void perf_callchain_kernel(struct perf_callchain_entry *entry,
			
 
				 			   struct pt_regs *regs)
			
 
				 {
			
 
				-	xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
			
 
				+	xtensa_backtrace_kernel(regs, sysctl_perf_event_max_stack,
			
 
				 				callchain_trace, NULL, entry);
			
 
				 }
			
 
				 
			
 
				 void perf_callchain_user(struct perf_callchain_entry *entry,
			
 
				 			 struct pt_regs *regs)
			
 
				 {
			
 
				-	xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
			
 
				+	xtensa_backtrace_user(regs, sysctl_perf_event_max_stack,
			
 
				 			      callchain_trace, entry);
			
 
				 }
			
 
				 
			
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -847,6 +847,14 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 
				 	if (!platform_get_irq(cpu_pmu->plat_device, 0))
			
 
				 		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
			
 
				 
			
 
				+	/*
			
 
				+	 * This is a CPU PMU potentially in a heterogeneous configuration (e.g.
			
 
				+	 * big.LITTLE). This is not an uncore PMU, and we have taken ctx
			
 
				+	 * sharing into account (e.g. with our pmu::filter_match callback and
			
 
				+	 * pmu::event_init group validation).
			
 
				+	 */
			
 
				+	cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS;
			
 
				+
			
 
				 	return 0;
			
 
				 
			
 
				 out_unregister:
			
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -34,6 +34,9 @@
 
				 #include <asm/processor.h>
			
 
				 #include <asm/cpu_device_id.h>
			
 
				 
			
 
				+/* Local defines */
			
 
				+#define MSR_PLATFORM_POWER_LIMIT	0x0000065C
			
 
				+
			
 
				 /* bitmasks for RAPL MSRs, used by primitive access functions */
			
 
				 #define ENERGY_STATUS_MASK      0xffffffff
			
 
				 
			
@@ -86,6 +89,7 @@ enum rapl_domain_type {
 
				 	RAPL_DOMAIN_PP0, /* core power plane */
			
 
				 	RAPL_DOMAIN_PP1, /* graphics uncore */
			
 
				 	RAPL_DOMAIN_DRAM,/* DRAM control_type */
			
 
				+	RAPL_DOMAIN_PLATFORM, /* PSys control_type */
			
 
				 	RAPL_DOMAIN_MAX,
			
 
				 };
			
 
				 
			
@@ -251,9 +255,11 @@ static const char * const rapl_domain_names[] = {
 
				 	"core",
			
 
				 	"uncore",
			
 
				 	"dram",
			
 
				+	"psys",
			
 
				 };
			
 
				 
			
 
				 static struct powercap_control_type *control_type; /* PowerCap Controller */
			
 
				+static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */
			
 
				 
			
 
				 /* caller to ensure CPU hotplug lock is held */
			
 
				 static struct rapl_package *find_package_by_id(int id)
			
@@ -409,6 +415,14 @@ static const struct powercap_zone_ops zone_ops[] = {
 
				 		.set_enable = set_domain_enable,
			
 
				 		.get_enable = get_domain_enable,
			
 
				 	},
			
 
				+	/* RAPL_DOMAIN_PLATFORM */
			
 
				+	{
			
 
				+		.get_energy_uj = get_energy_counter,
			
 
				+		.get_max_energy_range_uj = get_max_energy_counter,
			
 
				+		.release = release_zone,
			
 
				+		.set_enable = set_domain_enable,
			
 
				+		.get_enable = get_domain_enable,
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 static int set_power_limit(struct powercap_zone *power_zone, int id,
			
@@ -1160,6 +1174,13 @@ static int rapl_unregister_powercap(void)
 
				 			powercap_unregister_zone(control_type,
			
 
				 						&rd_package->power_zone);
			
 
				 	}
			
 
				+
			
 
				+	if (platform_rapl_domain) {
			
 
				+		powercap_unregister_zone(control_type,
			
 
				+					 &platform_rapl_domain->power_zone);
			
 
				+		kfree(platform_rapl_domain);
			
 
				+	}
			
 
				+
			
 
				 	powercap_unregister_control_type(control_type);
			
 
				 
			
 
				 	return 0;
			
@@ -1239,6 +1260,47 @@ static int rapl_package_register_powercap(struct rapl_package *rp)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int rapl_register_psys(void)
			
 
				+{
			
 
				+	struct rapl_domain *rd;
			
 
				+	struct powercap_zone *power_zone;
			
 
				+	u64 val;
			
 
				+
			
 
				+	if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_ENERGY_STATUS, &val) || !val)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_POWER_LIMIT, &val) || !val)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	rd = kzalloc(sizeof(*rd), GFP_KERNEL);
			
 
				+	if (!rd)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM];
			
 
				+	rd->id = RAPL_DOMAIN_PLATFORM;
			
 
				+	rd->msrs[0] = MSR_PLATFORM_POWER_LIMIT;
			
 
				+	rd->msrs[1] = MSR_PLATFORM_ENERGY_STATUS;
			
 
				+	rd->rpl[0].prim_id = PL1_ENABLE;
			
 
				+	rd->rpl[0].name = pl1_name;
			
 
				+	rd->rpl[1].prim_id = PL2_ENABLE;
			
 
				+	rd->rpl[1].name = pl2_name;
			
 
				+	rd->rp = find_package_by_id(0);
			
 
				+
			
 
				+	power_zone = powercap_register_zone(&rd->power_zone, control_type,
			
 
				+					    "psys", NULL,
			
 
				+					    &zone_ops[RAPL_DOMAIN_PLATFORM],
			
 
				+					    2, &constraint_ops);
			
 
				+
			
 
				+	if (IS_ERR(power_zone)) {
			
 
				+		kfree(rd);
			
 
				+		return PTR_ERR(power_zone);
			
 
				+	}
			
 
				+
			
 
				+	platform_rapl_domain = rd;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int rapl_register_powercap(void)
			
 
				 {
			
 
				 	struct rapl_domain *rd;
			
@@ -1255,6 +1317,10 @@ static int rapl_register_powercap(void)
 
				 	list_for_each_entry(rp, &rapl_packages, plist)
			
 
				 		if (rapl_package_register_powercap(rp))
			
 
				 			goto err_cleanup_package;
			
 
				+
			
 
				+	/* Don't bail out if PSys is not supported */
			
 
				+	rapl_register_psys();
			
 
				+
			
 
				 	return ret;
			
 
				 
			
 
				 err_cleanup_package:
			
@@ -1289,6 +1355,9 @@ static int rapl_check_domain(int cpu, int domain)
 
				 	case RAPL_DOMAIN_DRAM:
			
 
				 		msr = MSR_DRAM_ENERGY_STATUS;
			
 
				 		break;
			
 
				+	case RAPL_DOMAIN_PLATFORM:
			
 
				+		/* PSYS(PLATFORM) is not a CPU domain, so avoid printng error */
			
 
				+		return -EINVAL;
			
 
				 	default:
			
 
				 		pr_err("invalid domain id %d\n", domain);
			
 
				 		return -EINVAL;
			
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -58,7 +58,7 @@ struct perf_guest_info_callbacks {
 
				 
			
 
				 struct perf_callchain_entry {
			
 
				 	__u64				nr;
			
 
				-	__u64				ip[PERF_MAX_STACK_DEPTH];
			
 
				+	__u64				ip[0]; /* /proc/sys/kernel/perf_event_max_stack */
			
 
				 };
			
 
				 
			
 
				 struct perf_raw_record {
			
@@ -151,6 +151,15 @@ struct hw_perf_event {
 
				 	 */
			
 
				 	struct task_struct		*target;
			
 
				 
			
 
				+	/*
			
 
				+	 * PMU would store hardware filter configuration
			
 
				+	 * here.
			
 
				+	 */
			
 
				+	void				*addr_filters;
			
 
				+
			
 
				+	/* Last sync'ed generation of filters */
			
 
				+	unsigned long			addr_filters_gen;
			
 
				+
			
 
				 /*
			
 
				  * hw_perf_event::state flags; used to track the PERF_EF_* state.
			
 
				  */
			
@@ -216,6 +225,7 @@ struct perf_event;
 
				 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF		0x08
			
 
				 #define PERF_PMU_CAP_EXCLUSIVE			0x10
			
 
				 #define PERF_PMU_CAP_ITRACE			0x20
			
 
				+#define PERF_PMU_CAP_HETEROGENEOUS_CPUS		0x40
			
 
				 
			
 
				 /**
			
 
				  * struct pmu - generic performance monitoring unit
			
@@ -240,6 +250,9 @@ struct pmu {
 
				 	int				task_ctx_nr;
			
 
				 	int				hrtimer_interval_ms;
			
 
				 
			
 
				+	/* number of address filters this PMU can do */
			
 
				+	unsigned int			nr_addr_filters;
			
 
				+
			
 
				 	/*
			
 
				 	 * Fully disable/enable this PMU, can be used to protect from the PMI
			
 
				 	 * as well as for lazy/batch writing of the MSRs.
			
@@ -392,12 +405,71 @@ struct pmu {
 
				 	 */
			
 
				 	void (*free_aux)		(void *aux); /* optional */
			
 
				 
			
 
				+	/*
			
 
				+	 * Validate address range filters: make sure the HW supports the
			
 
				+	 * requested configuration and number of filters; return 0 if the
			
 
				+	 * supplied filters are valid, -errno otherwise.
			
 
				+	 *
			
 
				+	 * Runs in the context of the ioctl()ing process and is not serialized
			
 
				+	 * with the rest of the PMU callbacks.
			
 
				+	 */
			
 
				+	int (*addr_filters_validate)	(struct list_head *filters);
			
 
				+					/* optional */
			
 
				+
			
 
				+	/*
			
 
				+	 * Synchronize address range filter configuration:
			
 
				+	 * translate hw-agnostic filters into hardware configuration in
			
 
				+	 * event::hw::addr_filters.
			
 
				+	 *
			
 
				+	 * Runs as a part of filter sync sequence that is done in ->start()
			
 
				+	 * callback by calling perf_event_addr_filters_sync().
			
 
				+	 *
			
 
				+	 * May (and should) traverse event::addr_filters::list, for which its
			
 
				+	 * caller provides necessary serialization.
			
 
				+	 */
			
 
				+	void (*addr_filters_sync)	(struct perf_event *event);
			
 
				+					/* optional */
			
 
				+
			
 
				 	/*
			
 
				 	 * Filter events for PMU-specific reasons.
			
 
				 	 */
			
 
				 	int (*filter_match)		(struct perf_event *event); /* optional */
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+ * struct perf_addr_filter - address range filter definition
			
 
				+ * @entry:	event's filter list linkage
			
 
				+ * @inode:	object file's inode for file-based filters
			
 
				+ * @offset:	filter range offset
			
 
				+ * @size:	filter range size
			
 
				+ * @range:	1: range, 0: address
			
 
				+ * @filter:	1: filter/start, 0: stop
			
 
				+ *
			
 
				+ * This is a hardware-agnostic filter configuration as specified by the user.
			
 
				+ */
			
 
				+struct perf_addr_filter {
			
 
				+	struct list_head	entry;
			
 
				+	struct inode		*inode;
			
 
				+	unsigned long		offset;
			
 
				+	unsigned long		size;
			
 
				+	unsigned int		range	: 1,
			
 
				+				filter	: 1;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * struct perf_addr_filters_head - container for address range filters
			
 
				+ * @list:	list of filters for this event
			
 
				+ * @lock:	spinlock that serializes accesses to the @list and event's
			
 
				+ *		(and its children's) filter generations.
			
 
				+ *
			
 
				+ * A child event will use parent's @list (and therefore @lock), so they are
			
 
				+ * bundled together; see perf_event_addr_filters().
			
 
				+ */
			
 
				+struct perf_addr_filters_head {
			
 
				+	struct list_head	list;
			
 
				+	raw_spinlock_t		lock;
			
 
				+};
			
 
				+
			
 
				 /**
			
 
				  * enum perf_event_active_state - the states of a event
			
 
				  */
			
@@ -566,6 +638,12 @@ struct perf_event {
 
				 
			
 
				 	atomic_t			event_limit;
			
 
				 
			
 
				+	/* address range filters */
			
 
				+	struct perf_addr_filters_head	addr_filters;
			
 
				+	/* vma address array for file-based filders */
			
 
				+	unsigned long			*addr_filters_offs;
			
 
				+	unsigned long			addr_filters_gen;
			
 
				+
			
 
				 	void (*destroy)(struct perf_event *);
			
 
				 	struct rcu_head			rcu_head;
			
 
				 
			
@@ -834,9 +912,25 @@ extern int perf_event_overflow(struct perf_event *event,
 
				 				 struct perf_sample_data *data,
			
 
				 				 struct pt_regs *regs);
			
 
				 
			
 
				+extern void perf_event_output_forward(struct perf_event *event,
			
 
				+				     struct perf_sample_data *data,
			
 
				+				     struct pt_regs *regs);
			
 
				+extern void perf_event_output_backward(struct perf_event *event,
			
 
				+				       struct perf_sample_data *data,
			
 
				+				       struct pt_regs *regs);
			
 
				 extern void perf_event_output(struct perf_event *event,
			
 
				-				struct perf_sample_data *data,
			
 
				-				struct pt_regs *regs);
			
 
				+			      struct perf_sample_data *data,
			
 
				+			      struct pt_regs *regs);
			
 
				+
			
 
				+static inline bool
			
 
				+is_default_overflow_handler(struct perf_event *event)
			
 
				+{
			
 
				+	if (likely(event->overflow_handler == perf_event_output_forward))
			
 
				+		return true;
			
 
				+	if (unlikely(event->overflow_handler == perf_event_output_backward))
			
 
				+		return true;
			
 
				+	return false;
			
 
				+}
			
 
				 
			
 
				 extern void
			
 
				 perf_event_header__init_id(struct perf_event_header *header,
			
@@ -977,9 +1071,11 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 
				 extern int get_callchain_buffers(void);
			
 
				 extern void put_callchain_buffers(void);
			
 
				 
			
 
				+extern int sysctl_perf_event_max_stack;
			
 
				+
			
 
				 static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
			
 
				 {
			
 
				-	if (entry->nr < PERF_MAX_STACK_DEPTH) {
			
 
				+	if (entry->nr < sysctl_perf_event_max_stack) {
			
 
				 		entry->ip[entry->nr++] = ip;
			
 
				 		return 0;
			
 
				 	} else {
			
@@ -1001,6 +1097,8 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
 
				 		void __user *buffer, size_t *lenp,
			
 
				 		loff_t *ppos);
			
 
				 
			
 
				+int perf_event_max_stack_handler(struct ctl_table *table, int write,
			
 
				+				 void __user *buffer, size_t *lenp, loff_t *ppos);
			
 
				 
			
 
				 static inline bool perf_paranoid_tracepoint_raw(void)
			
 
				 {
			
@@ -1045,8 +1143,41 @@ static inline bool has_aux(struct perf_event *event)
 
				 	return event->pmu->setup_aux;
			
 
				 }
			
 
				 
			
 
				+static inline bool is_write_backward(struct perf_event *event)
			
 
				+{
			
 
				+	return !!event->attr.write_backward;
			
 
				+}
			
 
				+
			
 
				+static inline bool has_addr_filter(struct perf_event *event)
			
 
				+{
			
 
				+	return event->pmu->nr_addr_filters;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * An inherited event uses parent's filters
			
 
				+ */
			
 
				+static inline struct perf_addr_filters_head *
			
 
				+perf_event_addr_filters(struct perf_event *event)
			
 
				+{
			
 
				+	struct perf_addr_filters_head *ifh = &event->addr_filters;
			
 
				+
			
 
				+	if (event->parent)
			
 
				+		ifh = &event->parent->addr_filters;
			
 
				+
			
 
				+	return ifh;
			
 
				+}
			
 
				+
			
 
				+extern void perf_event_addr_filters_sync(struct perf_event *event);
			
 
				+
			
 
				 extern int perf_output_begin(struct perf_output_handle *handle,
			
 
				 			     struct perf_event *event, unsigned int size);
			
 
				+extern int perf_output_begin_forward(struct perf_output_handle *handle,
			
 
				+				    struct perf_event *event,
			
 
				+				    unsigned int size);
			
 
				+extern int perf_output_begin_backward(struct perf_output_handle *handle,
			
 
				+				      struct perf_event *event,
			
 
				+				      unsigned int size);
			
 
				+
			
 
				 extern void perf_output_end(struct perf_output_handle *handle);
			
 
				 extern unsigned int perf_output_copy(struct perf_output_handle *handle,
			
 
				 			     const void *buf, unsigned int len);
			
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -340,7 +340,8 @@ struct perf_event_attr {
 
				 				comm_exec      :  1, /* flag comm events that are due to an exec */
			
 
				 				use_clockid    :  1, /* use @clockid for time fields */
			
 
				 				context_switch :  1, /* context switch data */
			
 
				-				__reserved_1   : 37;
			
 
				+				write_backward :  1, /* Write ring buffer from end to beginning */
			
 
				+				__reserved_1   : 36;
			
 
				 
			
 
				 	union {
			
 
				 		__u32		wakeup_events;	  /* wakeup every n events */
			
@@ -401,6 +402,7 @@ struct perf_event_attr {
 
				 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
			
 
				 #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
			
 
				 #define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
			
 
				+#define PERF_EVENT_IOC_PAUSE_OUTPUT	_IOW('$', 9, __u32)
			
 
				 
			
 
				 enum perf_event_ioc_flags {
			
 
				 	PERF_IOC_FLAG_GROUP		= 1U << 0,
			
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -66,7 +66,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 
				 	/* check sanity of attributes */
			
 
				 	if (attr->max_entries == 0 || attr->key_size != 4 ||
			
 
				 	    value_size < 8 || value_size % 8 ||
			
 
				-	    value_size / 8 > PERF_MAX_STACK_DEPTH)
			
 
				+	    value_size / 8 > sysctl_perf_event_max_stack)
			
 
				 		return ERR_PTR(-EINVAL);
			
 
				 
			
 
				 	/* hash table size must be power of 2 */
			
@@ -124,8 +124,8 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
 
				 	struct perf_callchain_entry *trace;
			
 
				 	struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
			
 
				 	u32 max_depth = map->value_size / 8;
			
 
				-	/* stack_map_alloc() checks that max_depth <= PERF_MAX_STACK_DEPTH */
			
 
				-	u32 init_nr = PERF_MAX_STACK_DEPTH - max_depth;
			
 
				+	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
			
 
				+	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
			
 
				 	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
			
 
				 	u32 hash, id, trace_nr, trace_len;
			
 
				 	bool user = flags & BPF_F_USER_STACK;
			
@@ -143,7 +143,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
 
				 		return -EFAULT;
			
 
				 
			
 
				 	/* get_perf_callchain() guarantees that trace->nr >= init_nr
			
 
				-	 * and trace-nr <= PERF_MAX_STACK_DEPTH, so trace_nr <= max_depth
			
 
				+	 * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
			
 
				 	 */
			
 
				 	trace_nr = trace->nr - init_nr;
			
 
				 
			
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -18,6 +18,14 @@ struct callchain_cpus_entries {
 
				 	struct perf_callchain_entry	*cpu_entries[0];
			
 
				 };
			
 
				 
			
 
				+int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
			
 
				+
			
 
				+static inline size_t perf_callchain_entry__sizeof(void)
			
 
				+{
			
 
				+	return (sizeof(struct perf_callchain_entry) +
			
 
				+		sizeof(__u64) * sysctl_perf_event_max_stack);
			
 
				+}
			
 
				+
			
 
				 static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
			
 
				 static atomic_t nr_callchain_events;
			
 
				 static DEFINE_MUTEX(callchain_mutex);
			
@@ -73,7 +81,7 @@ static int alloc_callchain_buffers(void)
 
				 	if (!entries)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
			
 
				+	size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS;
			
 
				 
			
 
				 	for_each_possible_cpu(cpu) {
			
 
				 		entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
			
@@ -147,7 +155,8 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
 
				 
			
 
				 	cpu = smp_processor_id();
			
 
				 
			
 
				-	return &entries->cpu_entries[cpu][*rctx];
			
 
				+	return (((void *)entries->cpu_entries[cpu]) +
			
 
				+		(*rctx * perf_callchain_entry__sizeof()));
			
 
				 }
			
 
				 
			
 
				 static void
			
@@ -215,3 +224,25 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 
				 
			
 
				 	return entry;
			
 
				 }
			
 
				+
			
 
				+int perf_event_max_stack_handler(struct ctl_table *table, int write,
			
 
				+				 void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				+{
			
 
				+	int new_value = sysctl_perf_event_max_stack, ret;
			
 
				+	struct ctl_table new_table = *table;
			
 
				+
			
 
				+	new_table.data = &new_value;
			
 
				+	ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos);
			
 
				+	if (ret || !write)
			
 
				+		return ret;
			
 
				+
			
 
				+	mutex_lock(&callchain_mutex);
			
 
				+	if (atomic_read(&nr_callchain_events))
			
 
				+		ret = -EBUSY;
			
 
				+	else
			
 
				+		sysctl_perf_event_max_stack = new_value;
			
 
				+
			
 
				+	mutex_unlock(&callchain_mutex);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -44,6 +44,8 @@
 
				 #include <linux/compat.h>
			
 
				 #include <linux/bpf.h>
			
 
				 #include <linux/filter.h>
			
 
				+#include <linux/namei.h>
			
 
				+#include <linux/parser.h>
			
 
				 
			
 
				 #include "internal.h"
			
 
				 
			
@@ -1927,8 +1929,13 @@ event_sched_in(struct perf_event *event,
 
				 	if (event->state <= PERF_EVENT_STATE_OFF)
			
 
				 		return 0;
			
 
				 
			
 
				-	event->state = PERF_EVENT_STATE_ACTIVE;
			
 
				-	event->oncpu = smp_processor_id();
			
 
				+	WRITE_ONCE(event->oncpu, smp_processor_id());
			
 
				+	/*
			
 
				+	 * Order event::oncpu write to happen before the ACTIVE state
			
 
				+	 * is visible.
			
 
				+	 */
			
 
				+	smp_wmb();
			
 
				+	WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
			
 
				 
			
 
				 	/*
			
 
				 	 * Unthrottle events, since we scheduled we might have missed several
			
@@ -2360,6 +2367,112 @@ void perf_event_enable(struct perf_event *event)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(perf_event_enable);
			
 
				 
			
 
				+struct stop_event_data {
			
 
				+	struct perf_event	*event;
			
 
				+	unsigned int		restart;
			
 
				+};
			
 
				+
			
 
				+static int __perf_event_stop(void *info)
			
 
				+{
			
 
				+	struct stop_event_data *sd = info;
			
 
				+	struct perf_event *event = sd->event;
			
 
				+
			
 
				+	/* if it's already INACTIVE, do nothing */
			
 
				+	if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* matches smp_wmb() in event_sched_in() */
			
 
				+	smp_rmb();
			
 
				+
			
 
				+	/*
			
 
				+	 * There is a window with interrupts enabled before we get here,
			
 
				+	 * so we need to check again lest we try to stop another CPU's event.
			
 
				+	 */
			
 
				+	if (READ_ONCE(event->oncpu) != smp_processor_id())
			
 
				+		return -EAGAIN;
			
 
				+
			
 
				+	event->pmu->stop(event, PERF_EF_UPDATE);
			
 
				+
			
 
				+	/*
			
 
				+	 * May race with the actual stop (through perf_pmu_output_stop()),
			
 
				+	 * but it is only used for events with AUX ring buffer, and such
			
 
				+	 * events will refuse to restart because of rb::aux_mmap_count==0,
			
 
				+	 * see comments in perf_aux_output_begin().
			
 
				+	 *
			
 
				+	 * Since this is happening on a event-local CPU, no trace is lost
			
 
				+	 * while restarting.
			
 
				+	 */
			
 
				+	if (sd->restart)
			
 
				+		event->pmu->start(event, PERF_EF_START);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int perf_event_restart(struct perf_event *event)
			
 
				+{
			
 
				+	struct stop_event_data sd = {
			
 
				+		.event		= event,
			
 
				+		.restart	= 1,
			
 
				+	};
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	do {
			
 
				+		if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
			
 
				+			return 0;
			
 
				+
			
 
				+		/* matches smp_wmb() in event_sched_in() */
			
 
				+		smp_rmb();
			
 
				+
			
 
				+		/*
			
 
				+		 * We only want to restart ACTIVE events, so if the event goes
			
 
				+		 * inactive here (event->oncpu==-1), there's nothing more to do;
			
 
				+		 * fall through with ret==-ENXIO.
			
 
				+		 */
			
 
				+		ret = cpu_function_call(READ_ONCE(event->oncpu),
			
 
				+					__perf_event_stop, &sd);
			
 
				+	} while (ret == -EAGAIN);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * In order to contain the amount of racy and tricky in the address filter
			
 
				+ * configuration management, it is a two part process:
			
 
				+ *
			
 
				+ * (p1) when userspace mappings change as a result of (1) or (2) or (3) below,
			
 
				+ *      we update the addresses of corresponding vmas in
			
 
				+ *	event::addr_filters_offs array and bump the event::addr_filters_gen;
			
 
				+ * (p2) when an event is scheduled in (pmu::add), it calls
			
 
				+ *      perf_event_addr_filters_sync() which calls pmu::addr_filters_sync()
			
 
				+ *      if the generation has changed since the previous call.
			
 
				+ *
			
 
				+ * If (p1) happens while the event is active, we restart it to force (p2).
			
 
				+ *
			
 
				+ * (1) perf_addr_filters_apply(): adjusting filters' offsets based on
			
 
				+ *     pre-existing mappings, called once when new filters arrive via SET_FILTER
			
 
				+ *     ioctl;
			
 
				+ * (2) perf_addr_filters_adjust(): adjusting filters' offsets based on newly
			
 
				+ *     registered mapping, called for every new mmap(), with mm::mmap_sem down
			
 
				+ *     for reading;
			
 
				+ * (3) perf_event_addr_filters_exec(): clearing filters' offsets in the process
			
 
				+ *     of exec.
			
 
				+ */
			
 
				+void perf_event_addr_filters_sync(struct perf_event *event)
			
 
				+{
			
 
				+	struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
			
 
				+
			
 
				+	if (!has_addr_filter(event))
			
 
				+		return;
			
 
				+
			
 
				+	raw_spin_lock(&ifh->lock);
			
 
				+	if (event->addr_filters_gen != event->hw.addr_filters_gen) {
			
 
				+		event->pmu->addr_filters_sync(event);
			
 
				+		event->hw.addr_filters_gen = event->addr_filters_gen;
			
 
				+	}
			
 
				+	raw_spin_unlock(&ifh->lock);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);
			
 
				+
			
 
				 static int _perf_event_refresh(struct perf_event *event, int refresh)
			
 
				 {
			
 
				 	/*
			
@@ -3209,16 +3322,6 @@ static void perf_event_enable_on_exec(int ctxn)
 
				 		put_ctx(clone_ctx);
			
 
				 }
			
 
				 
			
 
				-void perf_event_exec(void)
			
 
				-{
			
 
				-	int ctxn;
			
 
				-
			
 
				-	rcu_read_lock();
			
 
				-	for_each_task_context_nr(ctxn)
			
 
				-		perf_event_enable_on_exec(ctxn);
			
 
				-	rcu_read_unlock();
			
 
				-}
			
 
				-
			
 
				 struct perf_read_data {
			
 
				 	struct perf_event *event;
			
 
				 	bool group;
			
@@ -3720,6 +3823,9 @@ static bool exclusive_event_installable(struct perf_event *event,
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				+static void perf_addr_filters_splice(struct perf_event *event,
			
 
				+				       struct list_head *head);
			
 
				+
			
 
				 static void _free_event(struct perf_event *event)
			
 
				 {
			
 
				 	irq_work_sync(&event->pending);
			
@@ -3747,6 +3853,8 @@ static void _free_event(struct perf_event *event)
 
				 	}
			
 
				 
			
 
				 	perf_event_free_bpf_prog(event);
			
 
				+	perf_addr_filters_splice(event, NULL);
			
 
				+	kfree(event->addr_filters_offs);
			
 
				 
			
 
				 	if (event->destroy)
			
 
				 		event->destroy(event);
			
@@ -4343,6 +4451,19 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
 
				 	case PERF_EVENT_IOC_SET_BPF:
			
 
				 		return perf_event_set_bpf_prog(event, arg);
			
 
				 
			
 
				+	case PERF_EVENT_IOC_PAUSE_OUTPUT: {
			
 
				+		struct ring_buffer *rb;
			
 
				+
			
 
				+		rcu_read_lock();
			
 
				+		rb = rcu_dereference(event->rb);
			
 
				+		if (!rb || !rb->nr_pages) {
			
 
				+			rcu_read_unlock();
			
 
				+			return -EINVAL;
			
 
				+		}
			
 
				+		rb_toggle_paused(rb, !!arg);
			
 
				+		rcu_read_unlock();
			
 
				+		return 0;
			
 
				+	}
			
 
				 	default:
			
 
				 		return -ENOTTY;
			
 
				 	}
			
@@ -4659,6 +4780,8 @@ static void perf_mmap_open(struct vm_area_struct *vma)
 
				 		event->pmu->event_mapped(event);
			
 
				 }
			
 
				 
			
 
				+static void perf_pmu_output_stop(struct perf_event *event);
			
 
				+
			
 
				 /*
			
 
				  * A buffer can be mmap()ed multiple times; either directly through the same
			
 
				  * event, or through other events by use of perf_event_set_output().
			
@@ -4686,10 +4809,22 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
				 	 */
			
 
				 	if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
			
 
				 	    atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
			
 
				+		/*
			
 
				+		 * Stop all AUX events that are writing to this buffer,
			
 
				+		 * so that we can free its AUX pages and corresponding PMU
			
 
				+		 * data. Note that after rb::aux_mmap_count dropped to zero,
			
 
				+		 * they won't start any more (see perf_aux_output_begin()).
			
 
				+		 */
			
 
				+		perf_pmu_output_stop(event);
			
 
				+
			
 
				+		/* now it's safe to free the pages */
			
 
				 		atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
			
 
				 		vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
			
 
				 
			
 
				+		/* this has to be the last one */
			
 
				 		rb_free_aux(rb);
			
 
				+		WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
			
 
				+
			
 
				 		mutex_unlock(&event->mmap_mutex);
			
 
				 	}
			
 
				 
			
@@ -5630,9 +5765,13 @@ void perf_prepare_sample(struct perf_event_header *header,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void perf_event_output(struct perf_event *event,
			
 
				-			struct perf_sample_data *data,
			
 
				-			struct pt_regs *regs)
			
 
				+static void __always_inline
			
 
				+__perf_event_output(struct perf_event *event,
			
 
				+		    struct perf_sample_data *data,
			
 
				+		    struct pt_regs *regs,
			
 
				+		    int (*output_begin)(struct perf_output_handle *,
			
 
				+					struct perf_event *,
			
 
				+					unsigned int))
			
 
				 {
			
 
				 	struct perf_output_handle handle;
			
 
				 	struct perf_event_header header;
			
@@ -5642,7 +5781,7 @@ void perf_event_output(struct perf_event *event,
 
				 
			
 
				 	perf_prepare_sample(&header, data, event, regs);
			
 
				 
			
 
				-	if (perf_output_begin(&handle, event, header.size))
			
 
				+	if (output_begin(&handle, event, header.size))
			
 
				 		goto exit;
			
 
				 
			
 
				 	perf_output_sample(&handle, &header, data, event);
			
@@ -5653,6 +5792,30 @@ void perf_event_output(struct perf_event *event,
 
				 	rcu_read_unlock();
			
 
				 }
			
 
				 
			
 
				+void
			
 
				+perf_event_output_forward(struct perf_event *event,
			
 
				+			 struct perf_sample_data *data,
			
 
				+			 struct pt_regs *regs)
			
 
				+{
			
 
				+	__perf_event_output(event, data, regs, perf_output_begin_forward);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+perf_event_output_backward(struct perf_event *event,
			
 
				+			   struct perf_sample_data *data,
			
 
				+			   struct pt_regs *regs)
			
 
				+{
			
 
				+	__perf_event_output(event, data, regs, perf_output_begin_backward);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+perf_event_output(struct perf_event *event,
			
 
				+		  struct perf_sample_data *data,
			
 
				+		  struct pt_regs *regs)
			
 
				+{
			
 
				+	__perf_event_output(event, data, regs, perf_output_begin);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * read event_id
			
 
				  */
			
@@ -5698,15 +5861,18 @@ typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
 
				 static void
			
 
				 perf_event_aux_ctx(struct perf_event_context *ctx,
			
 
				 		   perf_event_aux_output_cb output,
			
 
				-		   void *data)
			
 
				+		   void *data, bool all)
			
 
				 {
			
 
				 	struct perf_event *event;
			
 
				 
			
 
				 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
			
 
				-		if (event->state < PERF_EVENT_STATE_INACTIVE)
			
 
				-			continue;
			
 
				-		if (!event_filter_match(event))
			
 
				-			continue;
			
 
				+		if (!all) {
			
 
				+			if (event->state < PERF_EVENT_STATE_INACTIVE)
			
 
				+				continue;
			
 
				+			if (!event_filter_match(event))
			
 
				+				continue;
			
 
				+		}
			
 
				+
			
 
				 		output(event, data);
			
 
				 	}
			
 
				 }
			
@@ -5717,7 +5883,7 @@ perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
 
				 {
			
 
				 	rcu_read_lock();
			
 
				 	preempt_disable();
			
 
				-	perf_event_aux_ctx(task_ctx, output, data);
			
 
				+	perf_event_aux_ctx(task_ctx, output, data, false);
			
 
				 	preempt_enable();
			
 
				 	rcu_read_unlock();
			
 
				 }
			
@@ -5747,19 +5913,147 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
 
				 		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
			
 
				 		if (cpuctx->unique_pmu != pmu)
			
 
				 			goto next;
			
 
				-		perf_event_aux_ctx(&cpuctx->ctx, output, data);
			
 
				+		perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
			
 
				 		ctxn = pmu->task_ctx_nr;
			
 
				 		if (ctxn < 0)
			
 
				 			goto next;
			
 
				 		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
			
 
				 		if (ctx)
			
 
				-			perf_event_aux_ctx(ctx, output, data);
			
 
				+			perf_event_aux_ctx(ctx, output, data, false);
			
 
				 next:
			
 
				 		put_cpu_ptr(pmu->pmu_cpu_context);
			
 
				 	}
			
 
				 	rcu_read_unlock();
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Clear all file-based filters at exec, they'll have to be
			
 
				+ * re-instated when/if these objects are mmapped again.
			
 
				+ */
			
 
				+static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
			
 
				+{
			
 
				+	struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
			
 
				+	struct perf_addr_filter *filter;
			
 
				+	unsigned int restart = 0, count = 0;
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	if (!has_addr_filter(event))
			
 
				+		return;
			
 
				+
			
 
				+	raw_spin_lock_irqsave(&ifh->lock, flags);
			
 
				+	list_for_each_entry(filter, &ifh->list, entry) {
			
 
				+		if (filter->inode) {
			
 
				+			event->addr_filters_offs[count] = 0;
			
 
				+			restart++;
			
 
				+		}
			
 
				+
			
 
				+		count++;
			
 
				+	}
			
 
				+
			
 
				+	if (restart)
			
 
				+		event->addr_filters_gen++;
			
 
				+	raw_spin_unlock_irqrestore(&ifh->lock, flags);
			
 
				+
			
 
				+	if (restart)
			
 
				+		perf_event_restart(event);
			
 
				+}
			
 
				+
			
 
				+void perf_event_exec(void)
			
 
				+{
			
 
				+	struct perf_event_context *ctx;
			
 
				+	int ctxn;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	for_each_task_context_nr(ctxn) {
			
 
				+		ctx = current->perf_event_ctxp[ctxn];
			
 
				+		if (!ctx)
			
 
				+			continue;
			
 
				+
			
 
				+		perf_event_enable_on_exec(ctxn);
			
 
				+
			
 
				+		perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL,
			
 
				+				   true);
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+}
			
 
				+
			
 
				+struct remote_output {
			
 
				+	struct ring_buffer	*rb;
			
 
				+	int			err;
			
 
				+};
			
 
				+
			
 
				+static void __perf_event_output_stop(struct perf_event *event, void *data)
			
 
				+{
			
 
				+	struct perf_event *parent = event->parent;
			
 
				+	struct remote_output *ro = data;
			
 
				+	struct ring_buffer *rb = ro->rb;
			
 
				+	struct stop_event_data sd = {
			
 
				+		.event	= event,
			
 
				+	};
			
 
				+
			
 
				+	if (!has_aux(event))
			
 
				+		return;
			
 
				+
			
 
				+	if (!parent)
			
 
				+		parent = event;
			
 
				+
			
 
				+	/*
			
 
				+	 * In case of inheritance, it will be the parent that links to the
			
 
				+	 * ring-buffer, but it will be the child that's actually using it:
			
 
				+	 */
			
 
				+	if (rcu_dereference(parent->rb) == rb)
			
 
				+		ro->err = __perf_event_stop(&sd);
			
 
				+}
			
 
				+
			
 
				+static int __perf_pmu_output_stop(void *info)
			
 
				+{
			
 
				+	struct perf_event *event = info;
			
 
				+	struct pmu *pmu = event->pmu;
			
 
				+	struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
			
 
				+	struct remote_output ro = {
			
 
				+		.rb	= event->rb,
			
 
				+	};
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
			
 
				+	if (cpuctx->task_ctx)
			
 
				+		perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
			
 
				+				   &ro, false);
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return ro.err;
			
 
				+}
			
 
				+
			
 
				+static void perf_pmu_output_stop(struct perf_event *event)
			
 
				+{
			
 
				+	struct perf_event *iter;
			
 
				+	int err, cpu;
			
 
				+
			
 
				+restart:
			
 
				+	rcu_read_lock();
			
 
				+	list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) {
			
 
				+		/*
			
 
				+		 * For per-CPU events, we need to make sure that neither they
			
 
				+		 * nor their children are running; for cpu==-1 events it's
			
 
				+		 * sufficient to stop the event itself if it's active, since
			
 
				+		 * it can't have children.
			
 
				+		 */
			
 
				+		cpu = iter->cpu;
			
 
				+		if (cpu == -1)
			
 
				+			cpu = READ_ONCE(iter->oncpu);
			
 
				+
			
 
				+		if (cpu == -1)
			
 
				+			continue;
			
 
				+
			
 
				+		err = cpu_function_call(cpu, __perf_pmu_output_stop, event);
			
 
				+		if (err == -EAGAIN) {
			
 
				+			rcu_read_unlock();
			
 
				+			goto restart;
			
 
				+		}
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * task tracking -- fork/exit
			
 
				  *
			
@@ -6169,6 +6463,87 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 
				 	kfree(buf);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Whether this @filter depends on a dynamic object which is not loaded
			
 
				+ * yet or its load addresses are not known.
			
 
				+ */
			
 
				+static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter)
			
 
				+{
			
 
				+	return filter->filter && filter->inode;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Check whether inode and address range match filter criteria.
			
 
				+ */
			
 
				+static bool perf_addr_filter_match(struct perf_addr_filter *filter,
			
 
				+				     struct file *file, unsigned long offset,
			
 
				+				     unsigned long size)
			
 
				+{
			
 
				+	if (filter->inode != file->f_inode)
			
 
				+		return false;
			
 
				+
			
 
				+	if (filter->offset > offset + size)
			
 
				+		return false;
			
 
				+
			
 
				+	if (filter->offset + filter->size < offset)
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
			
 
				+{
			
 
				+	struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
			
 
				+	struct vm_area_struct *vma = data;
			
 
				+	unsigned long off = vma->vm_pgoff << PAGE_SHIFT, flags;
			
 
				+	struct file *file = vma->vm_file;
			
 
				+	struct perf_addr_filter *filter;
			
 
				+	unsigned int restart = 0, count = 0;
			
 
				+
			
 
				+	if (!has_addr_filter(event))
			
 
				+		return;
			
 
				+
			
 
				+	if (!file)
			
 
				+		return;
			
 
				+
			
 
				+	raw_spin_lock_irqsave(&ifh->lock, flags);
			
 
				+	list_for_each_entry(filter, &ifh->list, entry) {
			
 
				+		if (perf_addr_filter_match(filter, file, off,
			
 
				+					     vma->vm_end - vma->vm_start)) {
			
 
				+			event->addr_filters_offs[count] = vma->vm_start;
			
 
				+			restart++;
			
 
				+		}
			
 
				+
			
 
				+		count++;
			
 
				+	}
			
 
				+
			
 
				+	if (restart)
			
 
				+		event->addr_filters_gen++;
			
 
				+	raw_spin_unlock_irqrestore(&ifh->lock, flags);
			
 
				+
			
 
				+	if (restart)
			
 
				+		perf_event_restart(event);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Adjust all task's events' filters to the new vma
			
 
				+ */
			
 
				+static void perf_addr_filters_adjust(struct vm_area_struct *vma)
			
 
				+{
			
 
				+	struct perf_event_context *ctx;
			
 
				+	int ctxn;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	for_each_task_context_nr(ctxn) {
			
 
				+		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
			
 
				+		if (!ctx)
			
 
				+			continue;
			
 
				+
			
 
				+		perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+}
			
 
				+
			
 
				 void perf_event_mmap(struct vm_area_struct *vma)
			
 
				 {
			
 
				 	struct perf_mmap_event mmap_event;
			
@@ -6200,6 +6575,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
 
				 		/* .flags (attr_mmap2 only) */
			
 
				 	};
			
 
				 
			
 
				+	perf_addr_filters_adjust(vma);
			
 
				 	perf_event_mmap_event(&mmap_event);
			
 
				 }
			
 
				 
			
@@ -6491,10 +6867,7 @@ static int __perf_event_overflow(struct perf_event *event,
 
				 		irq_work_queue(&event->pending);
			
 
				 	}
			
 
				 
			
 
				-	if (event->overflow_handler)
			
 
				-		event->overflow_handler(event, data, regs);
			
 
				-	else
			
 
				-		perf_event_output(event, data, regs);
			
 
				+	event->overflow_handler(event, data, regs);
			
 
				 
			
 
				 	if (*perf_event_fasync(event) && event->pending_kill) {
			
 
				 		event->pending_wakeup = 1;
			
@@ -7081,24 +7454,6 @@ static inline void perf_tp_register(void)
 
				 	perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
			
 
				 }
			
 
				 
			
 
				-static int perf_event_set_filter(struct perf_event *event, void __user *arg)
			
 
				-{
			
 
				-	char *filter_str;
			
 
				-	int ret;
			
 
				-
			
 
				-	if (event->attr.type != PERF_TYPE_TRACEPOINT)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	filter_str = strndup_user(arg, PAGE_SIZE);
			
 
				-	if (IS_ERR(filter_str))
			
 
				-		return PTR_ERR(filter_str);
			
 
				-
			
 
				-	ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
			
 
				-
			
 
				-	kfree(filter_str);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				 static void perf_event_free_filter(struct perf_event *event)
			
 
				 {
			
 
				 	ftrace_profile_free_filter(event);
			
@@ -7153,11 +7508,6 @@ static inline void perf_tp_register(void)
 
				 {
			
 
				 }
			
 
				 
			
 
				-static int perf_event_set_filter(struct perf_event *event, void __user *arg)
			
 
				-{
			
 
				-	return -ENOENT;
			
 
				-}
			
 
				-
			
 
				 static void perf_event_free_filter(struct perf_event *event)
			
 
				 {
			
 
				 }
			
@@ -7185,6 +7535,387 @@ void perf_bp_event(struct perf_event *bp, void *data)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/*
			
 
				+ * Allocate a new address filter
			
 
				+ */
			
 
				+static struct perf_addr_filter *
			
 
				+perf_addr_filter_new(struct perf_event *event, struct list_head *filters)
			
 
				+{
			
 
				+	int node = cpu_to_node(event->cpu == -1 ? 0 : event->cpu);
			
 
				+	struct perf_addr_filter *filter;
			
 
				+
			
 
				+	filter = kzalloc_node(sizeof(*filter), GFP_KERNEL, node);
			
 
				+	if (!filter)
			
 
				+		return NULL;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&filter->entry);
			
 
				+	list_add_tail(&filter->entry, filters);
			
 
				+
			
 
				+	return filter;
			
 
				+}
			
 
				+
			
 
				+static void free_filters_list(struct list_head *filters)
			
 
				+{
			
 
				+	struct perf_addr_filter *filter, *iter;
			
 
				+
			
 
				+	list_for_each_entry_safe(filter, iter, filters, entry) {
			
 
				+		if (filter->inode)
			
 
				+			iput(filter->inode);
			
 
				+		list_del(&filter->entry);
			
 
				+		kfree(filter);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Free existing address filters and optionally install new ones
			
 
				+ */
			
 
				+static void perf_addr_filters_splice(struct perf_event *event,
			
 
				+				     struct list_head *head)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+	LIST_HEAD(list);
			
 
				+
			
 
				+	if (!has_addr_filter(event))
			
 
				+		return;
			
 
				+
			
 
				+	/* don't bother with children, they don't have their own filters */
			
 
				+	if (event->parent)
			
 
				+		return;
			
 
				+
			
 
				+	raw_spin_lock_irqsave(&event->addr_filters.lock, flags);
			
 
				+
			
 
				+	list_splice_init(&event->addr_filters.list, &list);
			
 
				+	if (head)
			
 
				+		list_splice(head, &event->addr_filters.list);
			
 
				+
			
 
				+	raw_spin_unlock_irqrestore(&event->addr_filters.lock, flags);
			
 
				+
			
 
				+	free_filters_list(&list);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Scan through mm's vmas and see if one of them matches the
			
 
				+ * @filter; if so, adjust filter's address range.
			
 
				+ * Called with mm::mmap_sem down for reading.
			
 
				+ */
			
 
				+static unsigned long perf_addr_filter_apply(struct perf_addr_filter *filter,
			
 
				+					    struct mm_struct *mm)
			
 
				+{
			
 
				+	struct vm_area_struct *vma;
			
 
				+
			
 
				+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
			
 
				+		struct file *file = vma->vm_file;
			
 
				+		unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
			
 
				+		unsigned long vma_size = vma->vm_end - vma->vm_start;
			
 
				+
			
 
				+		if (!file)
			
 
				+			continue;
			
 
				+
			
 
				+		if (!perf_addr_filter_match(filter, file, off, vma_size))
			
 
				+			continue;
			
 
				+
			
 
				+		return vma->vm_start;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Update event's address range filters based on the
			
 
				+ * task's existing mappings, if any.
			
 
				+ */
			
 
				+static void perf_event_addr_filters_apply(struct perf_event *event)
			
 
				+{
			
 
				+	struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
			
 
				+	struct task_struct *task = READ_ONCE(event->ctx->task);
			
 
				+	struct perf_addr_filter *filter;
			
 
				+	struct mm_struct *mm = NULL;
			
 
				+	unsigned int count = 0;
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	/*
			
 
				+	 * We may observe TASK_TOMBSTONE, which means that the event tear-down
			
 
				+	 * will stop on the parent's child_mutex that our caller is also holding
			
 
				+	 */
			
 
				+	if (task == TASK_TOMBSTONE)
			
 
				+		return;
			
 
				+
			
 
				+	mm = get_task_mm(event->ctx->task);
			
 
				+	if (!mm)
			
 
				+		goto restart;
			
 
				+
			
 
				+	down_read(&mm->mmap_sem);
			
 
				+
			
 
				+	raw_spin_lock_irqsave(&ifh->lock, flags);
			
 
				+	list_for_each_entry(filter, &ifh->list, entry) {
			
 
				+		event->addr_filters_offs[count] = 0;
			
 
				+
			
 
				+		if (perf_addr_filter_needs_mmap(filter))
			
 
				+			event->addr_filters_offs[count] =
			
 
				+				perf_addr_filter_apply(filter, mm);
			
 
				+
			
 
				+		count++;
			
 
				+	}
			
 
				+
			
 
				+	event->addr_filters_gen++;
			
 
				+	raw_spin_unlock_irqrestore(&ifh->lock, flags);
			
 
				+
			
 
				+	up_read(&mm->mmap_sem);
			
 
				+
			
 
				+	mmput(mm);
			
 
				+
			
 
				+restart:
			
 
				+	perf_event_restart(event);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Address range filtering: limiting the data to certain
			
 
				+ * instruction address ranges. Filters are ioctl()ed to us from
			
 
				+ * userspace as ascii strings.
			
 
				+ *
			
 
				+ * Filter string format:
			
 
				+ *
			
 
				+ * ACTION RANGE_SPEC
			
 
				+ * where ACTION is one of the
			
 
				+ *  * "filter": limit the trace to this region
			
 
				+ *  * "start": start tracing from this address
			
 
				+ *  * "stop": stop tracing at this address/region;
			
 
				+ * RANGE_SPEC is
			
 
				+ *  * for kernel addresses: <start address>[/<size>]
			
 
				+ *  * for object files:     <start address>[/<size>]@</path/to/object/file>
			
 
				+ *
			
 
				+ * if <size> is not specified, the range is treated as a single address.
			
 
				+ */
			
 
				+enum {
			
 
				+	IF_ACT_FILTER,
			
 
				+	IF_ACT_START,
			
 
				+	IF_ACT_STOP,
			
 
				+	IF_SRC_FILE,
			
 
				+	IF_SRC_KERNEL,
			
 
				+	IF_SRC_FILEADDR,
			
 
				+	IF_SRC_KERNELADDR,
			
 
				+};
			
 
				+
			
 
				+enum {
			
 
				+	IF_STATE_ACTION = 0,
			
 
				+	IF_STATE_SOURCE,
			
 
				+	IF_STATE_END,
			
 
				+};
			
 
				+
			
 
				+static const match_table_t if_tokens = {
			
 
				+	{ IF_ACT_FILTER,	"filter" },
			
 
				+	{ IF_ACT_START,		"start" },
			
 
				+	{ IF_ACT_STOP,		"stop" },
			
 
				+	{ IF_SRC_FILE,		"%u/%u@%s" },
			
 
				+	{ IF_SRC_KERNEL,	"%u/%u" },
			
 
				+	{ IF_SRC_FILEADDR,	"%u@%s" },
			
 
				+	{ IF_SRC_KERNELADDR,	"%u" },
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Address filter string parser
			
 
				+ */
			
 
				+static int
			
 
				+perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
			
 
				+			     struct list_head *filters)
			
 
				+{
			
 
				+	struct perf_addr_filter *filter = NULL;
			
 
				+	char *start, *orig, *filename = NULL;
			
 
				+	struct path path;
			
 
				+	substring_t args[MAX_OPT_ARGS];
			
 
				+	int state = IF_STATE_ACTION, token;
			
 
				+	unsigned int kernel = 0;
			
 
				+	int ret = -EINVAL;
			
 
				+
			
 
				+	orig = fstr = kstrdup(fstr, GFP_KERNEL);
			
 
				+	if (!fstr)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	while ((start = strsep(&fstr, " ,\n")) != NULL) {
			
 
				+		ret = -EINVAL;
			
 
				+
			
 
				+		if (!*start)
			
 
				+			continue;
			
 
				+
			
 
				+		/* filter definition begins */
			
 
				+		if (state == IF_STATE_ACTION) {
			
 
				+			filter = perf_addr_filter_new(event, filters);
			
 
				+			if (!filter)
			
 
				+				goto fail;
			
 
				+		}
			
 
				+
			
 
				+		token = match_token(start, if_tokens, args);
			
 
				+		switch (token) {
			
 
				+		case IF_ACT_FILTER:
			
 
				+		case IF_ACT_START:
			
 
				+			filter->filter = 1;
			
 
				+
			
 
				+		case IF_ACT_STOP:
			
 
				+			if (state != IF_STATE_ACTION)
			
 
				+				goto fail;
			
 
				+
			
 
				+			state = IF_STATE_SOURCE;
			
 
				+			break;
			
 
				+
			
 
				+		case IF_SRC_KERNELADDR:
			
 
				+		case IF_SRC_KERNEL:
			
 
				+			kernel = 1;
			
 
				+
			
 
				+		case IF_SRC_FILEADDR:
			
 
				+		case IF_SRC_FILE:
			
 
				+			if (state != IF_STATE_SOURCE)
			
 
				+				goto fail;
			
 
				+
			
 
				+			if (token == IF_SRC_FILE || token == IF_SRC_KERNEL)
			
 
				+				filter->range = 1;
			
 
				+
			
 
				+			*args[0].to = 0;
			
 
				+			ret = kstrtoul(args[0].from, 0, &filter->offset);
			
 
				+			if (ret)
			
 
				+				goto fail;
			
 
				+
			
 
				+			if (filter->range) {
			
 
				+				*args[1].to = 0;
			
 
				+				ret = kstrtoul(args[1].from, 0, &filter->size);
			
 
				+				if (ret)
			
 
				+					goto fail;
			
 
				+			}
			
 
				+
			
 
				+			if (token == IF_SRC_FILE) {
			
 
				+				filename = match_strdup(&args[2]);
			
 
				+				if (!filename) {
			
 
				+					ret = -ENOMEM;
			
 
				+					goto fail;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			state = IF_STATE_END;
			
 
				+			break;
			
 
				+
			
 
				+		default:
			
 
				+			goto fail;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Filter definition is fully parsed, validate and install it.
			
 
				+		 * Make sure that it doesn't contradict itself or the event's
			
 
				+		 * attribute.
			
 
				+		 */
			
 
				+		if (state == IF_STATE_END) {
			
 
				+			if (kernel && event->attr.exclude_kernel)
			
 
				+				goto fail;
			
 
				+
			
 
				+			if (!kernel) {
			
 
				+				if (!filename)
			
 
				+					goto fail;
			
 
				+
			
 
				+				/* look up the path and grab its inode */
			
 
				+				ret = kern_path(filename, LOOKUP_FOLLOW, &path);
			
 
				+				if (ret)
			
 
				+					goto fail_free_name;
			
 
				+
			
 
				+				filter->inode = igrab(d_inode(path.dentry));
			
 
				+				path_put(&path);
			
 
				+				kfree(filename);
			
 
				+				filename = NULL;
			
 
				+
			
 
				+				ret = -EINVAL;
			
 
				+				if (!filter->inode ||
			
 
				+				    !S_ISREG(filter->inode->i_mode))
			
 
				+					/* free_filters_list() will iput() */
			
 
				+					goto fail;
			
 
				+			}
			
 
				+
			
 
				+			/* ready to consume more filters */
			
 
				+			state = IF_STATE_ACTION;
			
 
				+			filter = NULL;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (state != IF_STATE_ACTION)
			
 
				+		goto fail;
			
 
				+
			
 
				+	kfree(orig);
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+fail_free_name:
			
 
				+	kfree(filename);
			
 
				+fail:
			
 
				+	free_filters_list(filters);
			
 
				+	kfree(orig);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
			
 
				+{
			
 
				+	LIST_HEAD(filters);
			
 
				+	int ret;
			
 
				+
			
 
				+	/*
			
 
				+	 * Since this is called in perf_ioctl() path, we're already holding
			
 
				+	 * ctx::mutex.
			
 
				+	 */
			
 
				+	lockdep_assert_held(&event->ctx->mutex);
			
 
				+
			
 
				+	if (WARN_ON_ONCE(event->parent))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/*
			
 
				+	 * For now, we only support filtering in per-task events; doing so
			
 
				+	 * for CPU-wide events requires additional context switching trickery,
			
 
				+	 * since same object code will be mapped at different virtual
			
 
				+	 * addresses in different processes.
			
 
				+	 */
			
 
				+	if (!event->ctx->task)
			
 
				+		return -EOPNOTSUPP;
			
 
				+
			
 
				+	ret = perf_event_parse_addr_filter(event, filter_str, &filters);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = event->pmu->addr_filters_validate(&filters);
			
 
				+	if (ret) {
			
 
				+		free_filters_list(&filters);
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	/* remove existing filters, if any */
			
 
				+	perf_addr_filters_splice(event, &filters);
			
 
				+
			
 
				+	/* install new filters */
			
 
				+	perf_event_for_each_child(event, perf_event_addr_filters_apply);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
			
 
				+{
			
 
				+	char *filter_str;
			
 
				+	int ret = -EINVAL;
			
 
				+
			
 
				+	if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
			
 
				+	    !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
			
 
				+	    !has_addr_filter(event))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	filter_str = strndup_user(arg, PAGE_SIZE);
			
 
				+	if (IS_ERR(filter_str))
			
 
				+		return PTR_ERR(filter_str);
			
 
				+
			
 
				+	if (IS_ENABLED(CONFIG_EVENT_TRACING) &&
			
 
				+	    event->attr.type == PERF_TYPE_TRACEPOINT)
			
 
				+		ret = ftrace_profile_set_filter(event, event->attr.config,
			
 
				+						filter_str);
			
 
				+	else if (has_addr_filter(event))
			
 
				+		ret = perf_event_set_addr_filter(event, filter_str);
			
 
				+
			
 
				+	kfree(filter_str);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * hrtimer based swevent callback
			
 
				  */
			
@@ -7542,6 +8273,20 @@ static void free_pmu_context(struct pmu *pmu)
 
				 out:
			
 
				 	mutex_unlock(&pmus_lock);
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * Let userspace know that this PMU supports address range filtering:
			
 
				+ */
			
 
				+static ssize_t nr_addr_filters_show(struct device *dev,
			
 
				+				    struct device_attribute *attr,
			
 
				+				    char *page)
			
 
				+{
			
 
				+	struct pmu *pmu = dev_get_drvdata(dev);
			
 
				+
			
 
				+	return snprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
			
 
				+}
			
 
				+DEVICE_ATTR_RO(nr_addr_filters);
			
 
				+
			
 
				 static struct idr pmu_idr;
			
 
				 
			
 
				 static ssize_t
			
@@ -7643,9 +8388,19 @@ static int pmu_dev_alloc(struct pmu *pmu)
 
				 	if (ret)
			
 
				 		goto free_dev;
			
 
				 
			
 
				+	/* For PMUs with address filters, throw in an extra attribute: */
			
 
				+	if (pmu->nr_addr_filters)
			
 
				+		ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters);
			
 
				+
			
 
				+	if (ret)
			
 
				+		goto del_dev;
			
 
				+
			
 
				 out:
			
 
				 	return ret;
			
 
				 
			
 
				+del_dev:
			
 
				+	device_del(pmu->dev);
			
 
				+
			
 
				 free_dev:
			
 
				 	put_device(pmu->dev);
			
 
				 	goto out;
			
@@ -7685,6 +8440,21 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
 
				 	}
			
 
				 
			
 
				 skip_type:
			
 
				+	if (pmu->task_ctx_nr == perf_hw_context) {
			
 
				+		static int hw_context_taken = 0;
			
 
				+
			
 
				+		/*
			
 
				+		 * Other than systems with heterogeneous CPUs, it never makes
			
 
				+		 * sense for two PMUs to share perf_hw_context. PMUs which are
			
 
				+		 * uncore must use perf_invalid_context.
			
 
				+		 */
			
 
				+		if (WARN_ON_ONCE(hw_context_taken &&
			
 
				+		    !(pmu->capabilities & PERF_PMU_CAP_HETEROGENEOUS_CPUS)))
			
 
				+			pmu->task_ctx_nr = perf_invalid_context;
			
 
				+
			
 
				+		hw_context_taken = 1;
			
 
				+	}
			
 
				+
			
 
				 	pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
			
 
				 	if (pmu->pmu_cpu_context)
			
 
				 		goto got_cpu_context;
			
@@ -7772,6 +8542,8 @@ void perf_pmu_unregister(struct pmu *pmu)
 
				 	free_percpu(pmu->pmu_disable_count);
			
 
				 	if (pmu->type >= PERF_TYPE_MAX)
			
 
				 		idr_remove(&pmu_idr, pmu->type);
			
 
				+	if (pmu->nr_addr_filters)
			
 
				+		device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
			
 
				 	device_del(pmu->dev);
			
 
				 	put_device(pmu->dev);
			
 
				 	free_pmu_context(pmu);
			
@@ -7965,6 +8737,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
				 	INIT_LIST_HEAD(&event->sibling_list);
			
 
				 	INIT_LIST_HEAD(&event->rb_entry);
			
 
				 	INIT_LIST_HEAD(&event->active_entry);
			
 
				+	INIT_LIST_HEAD(&event->addr_filters.list);
			
 
				 	INIT_HLIST_NODE(&event->hlist_entry);
			
 
				 
			
 
				 
			
@@ -7972,6 +8745,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
				 	init_irq_work(&event->pending, perf_pending_event);
			
 
				 
			
 
				 	mutex_init(&event->mmap_mutex);
			
 
				+	raw_spin_lock_init(&event->addr_filters.lock);
			
 
				 
			
 
				 	atomic_long_set(&event->refcount, 1);
			
 
				 	event->cpu		= cpu;
			
@@ -8006,8 +8780,16 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
				 		context = parent_event->overflow_handler_context;
			
 
				 	}
			
 
				 
			
 
				-	event->overflow_handler	= overflow_handler;
			
 
				-	event->overflow_handler_context = context;
			
 
				+	if (overflow_handler) {
			
 
				+		event->overflow_handler	= overflow_handler;
			
 
				+		event->overflow_handler_context = context;
			
 
				+	} else if (is_write_backward(event)){
			
 
				+		event->overflow_handler = perf_event_output_backward;
			
 
				+		event->overflow_handler_context = NULL;
			
 
				+	} else {
			
 
				+		event->overflow_handler = perf_event_output_forward;
			
 
				+		event->overflow_handler_context = NULL;
			
 
				+	}
			
 
				 
			
 
				 	perf_event__state_init(event);
			
 
				 
			
@@ -8048,11 +8830,22 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
				 	if (err)
			
 
				 		goto err_pmu;
			
 
				 
			
 
				+	if (has_addr_filter(event)) {
			
 
				+		event->addr_filters_offs = kcalloc(pmu->nr_addr_filters,
			
 
				+						   sizeof(unsigned long),
			
 
				+						   GFP_KERNEL);
			
 
				+		if (!event->addr_filters_offs)
			
 
				+			goto err_per_task;
			
 
				+
			
 
				+		/* force hw sync on the address filters */
			
 
				+		event->addr_filters_gen = 1;
			
 
				+	}
			
 
				+
			
 
				 	if (!event->parent) {
			
 
				 		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
			
 
				 			err = get_callchain_buffers();
			
 
				 			if (err)
			
 
				-				goto err_per_task;
			
 
				+				goto err_addr_filters;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -8061,6 +8854,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
				 
			
 
				 	return event;
			
 
				 
			
 
				+err_addr_filters:
			
 
				+	kfree(event->addr_filters_offs);
			
 
				+
			
 
				 err_per_task:
			
 
				 	exclusive_event_destroy(event);
			
 
				 
			
@@ -8239,6 +9035,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 
				 	if (output_event->clock != event->clock)
			
 
				 		goto out;
			
 
				 
			
 
				+	/*
			
 
				+	 * Either writing ring buffer from beginning or from end.
			
 
				+	 * Mixing is not allowed.
			
 
				+	 */
			
 
				+	if (is_write_backward(output_event) != is_write_backward(event))
			
 
				+		goto out;
			
 
				+
			
 
				 	/*
			
 
				 	 * If both events generate aux data, they must be on the same PMU
			
 
				 	 */
			
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -11,13 +11,13 @@
 
				 struct ring_buffer {
			
 
				 	atomic_t			refcount;
			
 
				 	struct rcu_head			rcu_head;
			
 
				-	struct irq_work			irq_work;
			
 
				 #ifdef CONFIG_PERF_USE_VMALLOC
			
 
				 	struct work_struct		work;
			
 
				 	int				page_order;	/* allocation order  */
			
 
				 #endif
			
 
				 	int				nr_pages;	/* nr of data pages  */
			
 
				 	int				overwrite;	/* can overwrite itself */
			
 
				+	int				paused;		/* can write into ring buffer */
			
 
				 
			
 
				 	atomic_t			poll;		/* POLL_ for wakeups */
			
 
				 
			
@@ -65,6 +65,14 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
 
				 	rb_free(rb);
			
 
				 }
			
 
				 
			
 
				+static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause)
			
 
				+{
			
 
				+	if (!pause && rb->nr_pages)
			
 
				+		rb->paused = 0;
			
 
				+	else
			
 
				+		rb->paused = 1;
			
 
				+}
			
 
				+
			
 
				 extern struct ring_buffer *
			
 
				 rb_alloc(int nr_pages, long watermark, int cpu, int flags);
			
 
				 extern void perf_event_wakeup(struct perf_event *event);
			
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -102,8 +102,21 @@ static void perf_output_put_handle(struct perf_output_handle *handle)
 
				 	preempt_enable();
			
 
				 }
			
 
				 
			
 
				-int perf_output_begin(struct perf_output_handle *handle,
			
 
				-		      struct perf_event *event, unsigned int size)
			
 
				+static bool __always_inline
			
 
				+ring_buffer_has_space(unsigned long head, unsigned long tail,
			
 
				+		      unsigned long data_size, unsigned int size,
			
 
				+		      bool backward)
			
 
				+{
			
 
				+	if (!backward)
			
 
				+		return CIRC_SPACE(head, tail, data_size) >= size;
			
 
				+	else
			
 
				+		return CIRC_SPACE(tail, head, data_size) >= size;
			
 
				+}
			
 
				+
			
 
				+static int __always_inline
			
 
				+__perf_output_begin(struct perf_output_handle *handle,
			
 
				+		    struct perf_event *event, unsigned int size,
			
 
				+		    bool backward)
			
 
				 {
			
 
				 	struct ring_buffer *rb;
			
 
				 	unsigned long tail, offset, head;
			
@@ -125,8 +138,11 @@ int perf_output_begin(struct perf_output_handle *handle,
 
				 	if (unlikely(!rb))
			
 
				 		goto out;
			
 
				 
			
 
				-	if (unlikely(!rb->nr_pages))
			
 
				+	if (unlikely(rb->paused)) {
			
 
				+		if (rb->nr_pages)
			
 
				+			local_inc(&rb->lost);
			
 
				 		goto out;
			
 
				+	}
			
 
				 
			
 
				 	handle->rb    = rb;
			
 
				 	handle->event = event;
			
@@ -143,9 +159,12 @@ int perf_output_begin(struct perf_output_handle *handle,
 
				 	do {
			
 
				 		tail = READ_ONCE(rb->user_page->data_tail);
			
 
				 		offset = head = local_read(&rb->head);
			
 
				-		if (!rb->overwrite &&
			
 
				-		    unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
			
 
				-			goto fail;
			
 
				+		if (!rb->overwrite) {
			
 
				+			if (unlikely(!ring_buffer_has_space(head, tail,
			
 
				+							    perf_data_size(rb),
			
 
				+							    size, backward)))
			
 
				+				goto fail;
			
 
				+		}
			
 
				 
			
 
				 		/*
			
 
				 		 * The above forms a control dependency barrier separating the
			
@@ -159,9 +178,17 @@ int perf_output_begin(struct perf_output_handle *handle,
 
				 		 * See perf_output_put_handle().
			
 
				 		 */
			
 
				 
			
 
				-		head += size;
			
 
				+		if (!backward)
			
 
				+			head += size;
			
 
				+		else
			
 
				+			head -= size;
			
 
				 	} while (local_cmpxchg(&rb->head, offset, head) != offset);
			
 
				 
			
 
				+	if (backward) {
			
 
				+		offset = head;
			
 
				+		head = (u64)(-head);
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * We rely on the implied barrier() by local_cmpxchg() to ensure
			
 
				 	 * none of the data stores below can be lifted up by the compiler.
			
@@ -203,6 +230,26 @@ int perf_output_begin(struct perf_output_handle *handle,
 
				 	return -ENOSPC;
			
 
				 }
			
 
				 
			
 
				+int perf_output_begin_forward(struct perf_output_handle *handle,
			
 
				+			     struct perf_event *event, unsigned int size)
			
 
				+{
			
 
				+	return __perf_output_begin(handle, event, size, false);
			
 
				+}
			
 
				+
			
 
				+int perf_output_begin_backward(struct perf_output_handle *handle,
			
 
				+			       struct perf_event *event, unsigned int size)
			
 
				+{
			
 
				+	return __perf_output_begin(handle, event, size, true);
			
 
				+}
			
 
				+
			
 
				+int perf_output_begin(struct perf_output_handle *handle,
			
 
				+		      struct perf_event *event, unsigned int size)
			
 
				+{
			
 
				+
			
 
				+	return __perf_output_begin(handle, event, size,
			
 
				+				   unlikely(is_write_backward(event)));
			
 
				+}
			
 
				+
			
 
				 unsigned int perf_output_copy(struct perf_output_handle *handle,
			
 
				 		      const void *buf, unsigned int len)
			
 
				 {
			
@@ -221,8 +268,6 @@ void perf_output_end(struct perf_output_handle *handle)
 
				 	rcu_read_unlock();
			
 
				 }
			
 
				 
			
 
				-static void rb_irq_work(struct irq_work *work);
			
 
				-
			
 
				 static void
			
 
				 ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
			
 
				 {
			
@@ -243,16 +288,13 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 
				 
			
 
				 	INIT_LIST_HEAD(&rb->event_list);
			
 
				 	spin_lock_init(&rb->event_lock);
			
 
				-	init_irq_work(&rb->irq_work, rb_irq_work);
			
 
				-}
			
 
				 
			
 
				-static void ring_buffer_put_async(struct ring_buffer *rb)
			
 
				-{
			
 
				-	if (!atomic_dec_and_test(&rb->refcount))
			
 
				-		return;
			
 
				-
			
 
				-	rb->rcu_head.next = (void *)rb;
			
 
				-	irq_work_queue(&rb->irq_work);
			
 
				+	/*
			
 
				+	 * perf_output_begin() only checks rb->paused, therefore
			
 
				+	 * rb->paused must be true if we have no pages for output.
			
 
				+	 */
			
 
				+	if (!rb->nr_pages)
			
 
				+		rb->paused = 1;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -264,6 +306,10 @@ static void ring_buffer_put_async(struct ring_buffer *rb)
 
				  * The ordering is similar to that of perf_output_{begin,end}, with
			
 
				  * the exception of (B), which should be taken care of by the pmu
			
 
				  * driver, since ordering rules will differ depending on hardware.
			
 
				+ *
			
 
				+ * Call this from pmu::start(); see the comment in perf_aux_output_end()
			
 
				+ * about its use in pmu callbacks. Both can also be called from the PMI
			
 
				+ * handler if needed.
			
 
				  */
			
 
				 void *perf_aux_output_begin(struct perf_output_handle *handle,
			
 
				 			    struct perf_event *event)
			
@@ -287,6 +333,13 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
 
				 	if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount))
			
 
				 		goto err;
			
 
				 
			
 
				+	/*
			
 
				+	 * If rb::aux_mmap_count is zero (and rb_has_aux() above went through),
			
 
				+	 * the aux buffer is in perf_mmap_close(), about to get freed.
			
 
				+	 */
			
 
				+	if (!atomic_read(&rb->aux_mmap_count))
			
 
				+		goto err_put;
			
 
				+
			
 
				 	/*
			
 
				 	 * Nesting is not supported for AUX area, make sure nested
			
 
				 	 * writers are caught early
			
@@ -328,10 +381,11 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
 
				 	return handle->rb->aux_priv;
			
 
				 
			
 
				 err_put:
			
 
				+	/* can't be last */
			
 
				 	rb_free_aux(rb);
			
 
				 
			
 
				 err:
			
 
				-	ring_buffer_put_async(rb);
			
 
				+	ring_buffer_put(rb);
			
 
				 	handle->event = NULL;
			
 
				 
			
 
				 	return NULL;
			
@@ -342,6 +396,10 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
 
				  * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
			
 
				  * pmu driver's responsibility to observe ordering rules of the hardware,
			
 
				  * so that all the data is externally visible before this is called.
			
 
				+ *
			
 
				+ * Note: this has to be called from pmu::stop() callback, as the assumption
			
 
				+ * of the AUX buffer management code is that after pmu::stop(), the AUX
			
 
				+ * transaction must be stopped and therefore drop the AUX reference count.
			
 
				  */
			
 
				 void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
			
 
				 			 bool truncated)
			
@@ -389,8 +447,9 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
 
				 	handle->event = NULL;
			
 
				 
			
 
				 	local_set(&rb->aux_nest, 0);
			
 
				+	/* can't be last */
			
 
				 	rb_free_aux(rb);
			
 
				-	ring_buffer_put_async(rb);
			
 
				+	ring_buffer_put(rb);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -471,6 +530,14 @@ static void __rb_free_aux(struct ring_buffer *rb)
 
				 {
			
 
				 	int pg;
			
 
				 
			
 
				+	/*
			
 
				+	 * Should never happen, the last reference should be dropped from
			
 
				+	 * perf_mmap_close() path, which first stops aux transactions (which
			
 
				+	 * in turn are the atomic holders of aux_refcount) and then does the
			
 
				+	 * last rb_free_aux().
			
 
				+	 */
			
 
				+	WARN_ON_ONCE(in_atomic());
			
 
				+
			
 
				 	if (rb->aux_priv) {
			
 
				 		rb->free_aux(rb->aux_priv);
			
 
				 		rb->free_aux = NULL;
			
@@ -582,18 +649,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 
				 void rb_free_aux(struct ring_buffer *rb)
			
 
				 {
			
 
				 	if (atomic_dec_and_test(&rb->aux_refcount))
			
 
				-		irq_work_queue(&rb->irq_work);
			
 
				-}
			
 
				-
			
 
				-static void rb_irq_work(struct irq_work *work)
			
 
				-{
			
 
				-	struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
			
 
				-
			
 
				-	if (!atomic_read(&rb->aux_refcount))
			
 
				 		__rb_free_aux(rb);
			
 
				-
			
 
				-	if (rb->rcu_head.next == (void *)rb)
			
 
				-		call_rcu(&rb->rcu_head, rb_free_rcu);
			
 
				 }
			
 
				 
			
 
				 #ifndef CONFIG_PERF_USE_VMALLOC
			
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -130,6 +130,9 @@ static int one_thousand = 1000;
 
				 #ifdef CONFIG_PRINTK
			
 
				 static int ten_thousand = 10000;
			
 
				 #endif
			
 
				+#ifdef CONFIG_PERF_EVENTS
			
 
				+static int six_hundred_forty_kb = 640 * 1024;
			
 
				+#endif
			
 
				 
			
 
				 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
			
 
				 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
			
@@ -1144,6 +1147,15 @@ static struct ctl_table kern_table[] = {
 
				 		.extra1		= &zero,
			
 
				 		.extra2		= &one_hundred,
			
 
				 	},
			
 
				+	{
			
 
				+		.procname	= "perf_event_max_stack",
			
 
				+		.data		= NULL, /* filled in by handler */
			
 
				+		.maxlen		= sizeof(sysctl_perf_event_max_stack),
			
 
				+		.mode		= 0644,
			
 
				+		.proc_handler	= perf_event_max_stack_handler,
			
 
				+		.extra1		= &zero,
			
 
				+		.extra2		= &six_hundred_forty_kb,
			
 
				+	},
			
 
				 #endif
			
 
				 #ifdef CONFIG_KMEMCHECK
			
 
				 	{
			
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -47,6 +47,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event,
 
				 		if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
			
 
				 			return -EPERM;
			
 
				 
			
 
				+		if (!is_sampling_event(p_event))
			
 
				+			return 0;
			
 
				+
			
 
				 		/*
			
 
				 		 * We don't allow user space callchains for  function trace
			
 
				 		 * event, due to issues with page faults while tracing page
			
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -137,7 +137,8 @@ libsubcmd_clean:
 
				 	$(call descend,lib/subcmd,clean)
			
 
				 
			
 
				 perf_clean:
			
 
				-	$(call descend,$(@:_clean=),clean)
			
 
				+	$(Q)mkdir -p $(PERF_O) .
			
 
				+	$(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir= clean
			
 
				 
			
 
				 selftests_clean:
			
 
				 	$(call descend,testing/$(@:_clean=),clean)
			
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -49,6 +49,10 @@ FEATURE_TESTS_BASIC :=			\
 
				 	libslang			\
			
 
				 	libcrypto			\
			
 
				 	libunwind			\
			
 
				+	libunwind-x86			\
			
 
				+	libunwind-x86_64		\
			
 
				+	libunwind-arm			\
			
 
				+	libunwind-aarch64		\
			
 
				 	pthread-attr-setaffinity-np	\
			
 
				 	stackprotector-all		\
			
 
				 	timerfd				\
			
@@ -69,7 +73,9 @@ FEATURE_TESTS_EXTRA :=			\
 
				 	libbabeltrace			\
			
 
				 	liberty				\
			
 
				 	liberty-z			\
			
 
				-	libunwind-debug-frame
			
 
				+	libunwind-debug-frame		\
			
 
				+	libunwind-debug-frame-arm	\
			
 
				+	libunwind-debug-frame-aarch64
			
 
				 
			
 
				 FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
			
 
				 
			
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -27,6 +27,12 @@ FILES=					\
 
				 	test-libcrypto.bin		\
			
 
				 	test-libunwind.bin		\
			
 
				 	test-libunwind-debug-frame.bin	\
			
 
				+	test-libunwind-x86.bin		\
			
 
				+	test-libunwind-x86_64.bin	\
			
 
				+	test-libunwind-arm.bin		\
			
 
				+	test-libunwind-aarch64.bin	\
			
 
				+	test-libunwind-debug-frame-arm.bin	\
			
 
				+	test-libunwind-debug-frame-aarch64.bin	\
			
 
				 	test-pthread-attr-setaffinity-np.bin	\
			
 
				 	test-stackprotector-all.bin	\
			
 
				 	test-timerfd.bin		\
			
@@ -103,6 +109,23 @@ $(OUTPUT)test-libunwind.bin:
 
				 
			
 
				 $(OUTPUT)test-libunwind-debug-frame.bin:
			
 
				 	$(BUILD) -lelf
			
 
				+$(OUTPUT)test-libunwind-x86.bin:
			
 
				+	$(BUILD) -lelf -lunwind-x86
			
 
				+
			
 
				+$(OUTPUT)test-libunwind-x86_64.bin:
			
 
				+	$(BUILD) -lelf -lunwind-x86_64
			
 
				+
			
 
				+$(OUTPUT)test-libunwind-arm.bin:
			
 
				+	$(BUILD) -lelf -lunwind-arm
			
 
				+
			
 
				+$(OUTPUT)test-libunwind-aarch64.bin:
			
 
				+	$(BUILD) -lelf -lunwind-aarch64
			
 
				+
			
 
				+$(OUTPUT)test-libunwind-debug-frame-arm.bin:
			
 
				+	$(BUILD) -lelf -lunwind-arm
			
 
				+
			
 
				+$(OUTPUT)test-libunwind-debug-frame-aarch64.bin:
			
 
				+	$(BUILD) -lelf -lunwind-aarch64
			
 
				 
			
 
				 $(OUTPUT)test-libaudit.bin:
			
 
				 	$(BUILD) -laudit
			
--- a/tools/build/feature/test-bpf.c
+++ b/tools/build/feature/test-bpf.c
@@ -27,10 +27,9 @@ int main(void)
 
				 	attr.log_level = 0;
			
 
				 	attr.kern_version = 0;
			
 
				 
			
 
				-	attr = attr;
			
 
				 	/*
			
 
				 	 * Test existence of __NR_bpf and BPF_PROG_LOAD.
			
 
				 	 * This call should fail if we run the testcase.
			
 
				 	 */
			
 
				-	return syscall(__NR_bpf, BPF_PROG_LOAD, attr, sizeof(attr));
			
 
				+	return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
			
 
				 }
			
--- a/tools/build/feature/test-libunwind-aarch64.c
+++ b/tools/build/feature/test-libunwind-aarch64.c
@@ -0,0 +1,26 @@
 
				+#include <libunwind-aarch64.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
			
 
				+					       unw_word_t ip,
			
 
				+					       unw_dyn_info_t *di,
			
 
				+					       unw_proc_info_t *pi,
			
 
				+					       int need_unwind_info, void *arg);
			
 
				+
			
 
				+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
			
 
				+
			
 
				+static unw_accessors_t accessors;
			
 
				+
			
 
				+int main(void)
			
 
				+{
			
 
				+	unw_addr_space_t addr_space;
			
 
				+
			
 
				+	addr_space = unw_create_addr_space(&accessors, 0);
			
 
				+	if (addr_space)
			
 
				+		return 0;
			
 
				+
			
 
				+	unw_init_remote(NULL, addr_space, NULL);
			
 
				+	dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/tools/build/feature/test-libunwind-arm.c
+++ b/tools/build/feature/test-libunwind-arm.c
@@ -0,0 +1,27 @@
 
				+#include <libunwind-arm.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
			
 
				+					       unw_word_t ip,
			
 
				+					       unw_dyn_info_t *di,
			
 
				+					       unw_proc_info_t *pi,
			
 
				+					       int need_unwind_info, void *arg);
			
 
				+
			
 
				+
			
 
				+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
			
 
				+
			
 
				+static unw_accessors_t accessors;
			
 
				+
			
 
				+int main(void)
			
 
				+{
			
 
				+	unw_addr_space_t addr_space;
			
 
				+
			
 
				+	addr_space = unw_create_addr_space(&accessors, 0);
			
 
				+	if (addr_space)
			
 
				+		return 0;
			
 
				+
			
 
				+	unw_init_remote(NULL, addr_space, NULL);
			
 
				+	dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/tools/build/feature/test-libunwind-debug-frame-aarch64.c
+++ b/tools/build/feature/test-libunwind-debug-frame-aarch64.c
@@ -0,0 +1,16 @@
 
				+#include <libunwind-aarch64.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+extern int
			
 
				+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
			
 
				+				 unw_word_t ip, unw_word_t segbase,
			
 
				+				 const char *obj_name, unw_word_t start,
			
 
				+				 unw_word_t end);
			
 
				+
			
 
				+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
			
 
				+
			
 
				+int main(void)
			
 
				+{
			
 
				+	dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
			
 
				+	return 0;
			
 
				+}
			
--- a/tools/build/feature/test-libunwind-debug-frame-arm.c
+++ b/tools/build/feature/test-libunwind-debug-frame-arm.c
@@ -0,0 +1,16 @@
 
				+#include <libunwind-arm.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+extern int
			
 
				+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
			
 
				+				 unw_word_t ip, unw_word_t segbase,
			
 
				+				 const char *obj_name, unw_word_t start,
			
 
				+				 unw_word_t end);
			
 
				+
			
 
				+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
			
 
				+
			
 
				+int main(void)
			
 
				+{
			
 
				+	dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
			
 
				+	return 0;
			
 
				+}
			
--- a/tools/build/feature/test-libunwind-x86.c
+++ b/tools/build/feature/test-libunwind-x86.c
@@ -0,0 +1,27 @@
 
				+#include <libunwind-x86.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
			
 
				+					       unw_word_t ip,
			
 
				+					       unw_dyn_info_t *di,
			
 
				+					       unw_proc_info_t *pi,
			
 
				+					       int need_unwind_info, void *arg);
			
 
				+
			
 
				+
			
 
				+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
			
 
				+
			
 
				+static unw_accessors_t accessors;
			
 
				+
			
 
				+int main(void)
			
 
				+{
			
 
				+	unw_addr_space_t addr_space;
			
 
				+
			
 
				+	addr_space = unw_create_addr_space(&accessors, 0);
			
 
				+	if (addr_space)
			
 
				+		return 0;
			
 
				+
			
 
				+	unw_init_remote(NULL, addr_space, NULL);
			
 
				+	dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/tools/build/feature/test-libunwind-x86_64.c
+++ b/tools/build/feature/test-libunwind-x86_64.c
@@ -0,0 +1,27 @@
 
				+#include <libunwind-x86_64.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
			
 
				+					       unw_word_t ip,
			
 
				+					       unw_dyn_info_t *di,
			
 
				+					       unw_proc_info_t *pi,
			
 
				+					       int need_unwind_info, void *arg);
			
 
				+
			
 
				+
			
 
				+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
			
 
				+
			
 
				+static unw_accessors_t accessors;
			
 
				+
			
 
				+int main(void)
			
 
				+{
			
 
				+	unw_addr_space_t addr_space;
			
 
				+
			
 
				+	addr_space = unw_create_addr_space(&accessors, 0);
			
 
				+	if (addr_space)
			
 
				+		return 0;
			
 
				+
			
 
				+	unw_init_remote(NULL, addr_space, NULL);
			
 
				+	dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -351,6 +351,19 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				+int procfs__read_str(const char *entry, char **buf, size_t *sizep)
			
 
				+{
			
 
				+	char path[PATH_MAX];
			
 
				+	const char *procfs = procfs__mountpoint();
			
 
				+
			
 
				+	if (!procfs)
			
 
				+		return -1;
			
 
				+
			
 
				+	snprintf(path, sizeof(path), "%s/%s", procfs, entry);
			
 
				+
			
 
				+	return filename__read_str(path, buf, sizep);
			
 
				+}
			
 
				+
			
 
				 int sysfs__read_ull(const char *entry, unsigned long long *value)
			
 
				 {
			
 
				 	char path[PATH_MAX];
			
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -29,6 +29,8 @@ int filename__read_int(const char *filename, int *value);
 
				 int filename__read_ull(const char *filename, unsigned long long *value);
			
 
				 int filename__read_str(const char *filename, char **buf, size_t *sizep);
			
 
				 
			
 
				+int procfs__read_str(const char *entry, char **buf, size_t *sizep);
			
 
				+
			
 
				 int sysctl__read_int(const char *sysctl, int *value);
			
 
				 int sysfs__read_int(const char *entry, int *value);
			
 
				 int sysfs__read_ull(const char *entry, unsigned long long *value);
			
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -672,6 +672,7 @@ The letters are:
 
				 	d	create a debug log
			
 
				 	g	synthesize a call chain (use with i or x)
			
 
				 	l	synthesize last branch entries (use with i or x)
			
 
				+	s	skip initial number of events
			
 
				 
			
 
				 "Instructions" events look like they were recorded by "perf record -e
			
 
				 instructions".
			
@@ -730,6 +731,12 @@ from one sample to the next.
 
				 
			
 
				 To disable trace decoding entirely, use the option --no-itrace.
			
 
				 
			
 
				+It is also possible to skip events generated (instructions, branches, transactions)
			
 
				+at the beginning. This is useful to ignore initialization code.
			
 
				+
			
 
				+	--itrace=i0nss1000000
			
 
				+
			
 
				+skips the first million instructions.
			
 
				 
			
 
				 dump option
			
 
				 -----------
			
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -7,6 +7,7 @@
 
				 		d	create a debug log
			
 
				 		g	synthesize a call chain (use with i or x)
			
 
				 		l	synthesize last branch entries (use with i or x)
			
 
				+		s       skip initial number of events
			
 
				 
			
 
				 	The default is all events i.e. the same as --itrace=ibxe
			
 
				 
			
@@ -24,3 +25,10 @@
 
				 
			
 
				 	Also the number of last branch entries (default 64, max. 1024) for
			
 
				 	instructions or transactions events can be specified.
			
 
				+
			
 
				+	It is also possible to skip events generated (instructions, branches, transactions)
			
 
				+	at the beginning. This is useful to ignore initialization code.
			
 
				+
			
 
				+	--itrace=i0nss1000000
			
 
				+
			
 
				+	skips the first million instructions.
			
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -33,7 +33,7 @@ OPTIONS
 
				 
			
 
				 -f::
			
 
				 --force::
			
 
				-        Don't complain, do it.
			
 
				+        Don't do ownership validation.
			
 
				 
			
 
				 -v::
			
 
				 --verbose::
			
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -75,7 +75,7 @@ OPTIONS
 
				 
			
 
				 -f::
			
 
				 --force::
			
 
				-       Don't complain, do it.
			
 
				+        Don't do ownership validation.
			
 
				 
			
 
				 --symfs=<directory>::
			
 
				         Look for files with symbols relative to this directory.
			
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -93,6 +93,67 @@ raw encoding of 0x1A8 can be used:
 
				 You should refer to the processor specific documentation for getting these
			
 
				 details. Some of them are referenced in the SEE ALSO section below.
			
 
				 
			
 
				+ARBITRARY PMUS
			
 
				+--------------
			
 
				+
			
 
				+perf also supports an extended syntax for specifying raw parameters
			
 
				+to PMUs. Using this typically requires looking up the specific event
			
 
				+in the CPU vendor specific documentation.
			
 
				+
			
 
				+The available PMUs and their raw parameters can be listed with
			
 
				+
			
 
				+  ls /sys/devices/*/format
			
 
				+
			
 
				+For example the raw event "LSD.UOPS" core pmu event above could
			
 
				+be specified as
			
 
				+
			
 
				+  perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ...
			
 
				+
			
 
				+PER SOCKET PMUS
			
 
				+---------------
			
 
				+
			
 
				+Some PMUs are not associated with a core, but with a whole CPU socket.
			
 
				+Events on these PMUs generally cannot be sampled, but only counted globally
			
 
				+with perf stat -a. They can be bound to one logical CPU, but will measure
			
 
				+all the CPUs in the same socket.
			
 
				+
			
 
				+This example measures memory bandwidth every second
			
 
				+on the first memory controller on socket 0 of a Intel Xeon system
			
 
				+
			
 
				+  perf stat -C 0 -a uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/ -I 1000 ...
			
 
				+
			
 
				+Each memory controller has its own PMU.  Measuring the complete system
			
 
				+bandwidth would require specifying all imc PMUs (see perf list output),
			
 
				+and adding the values together.
			
 
				+
			
 
				+This example measures the combined core power every second
			
 
				+
			
 
				+  perf stat -I 1000 -e power/energy-cores/  -a
			
 
				+
			
 
				+ACCESS RESTRICTIONS
			
 
				+-------------------
			
 
				+
			
 
				+For non root users generally only context switched PMU events are available.
			
 
				+This is normally only the events in the cpu PMU, the predefined events
			
 
				+like cycles and instructions and some software events.
			
 
				+
			
 
				+Other PMUs and global measurements are normally root only.
			
 
				+Some event qualifiers, such as "any", are also root only.
			
 
				+
			
 
				+This can be overriden by setting the kernel.perf_event_paranoid
			
 
				+sysctl to -1, which allows non root to use these events.
			
 
				+
			
 
				+For accessing trace point events perf needs to have read access to
			
 
				+/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed
			
 
				+setting.
			
 
				+
			
 
				+TRACING
			
 
				+-------
			
 
				+
			
 
				+Some PMUs control advanced hardware tracing capabilities, such as Intel PT,
			
 
				+that allows low overhead execution tracing.  These are described in a separate
			
 
				+intel-pt.txt document.
			
 
				+
			
 
				 PARAMETERIZED EVENTS
			
 
				 --------------------
			
 
				 
			
@@ -106,6 +167,50 @@ also be supplied. For example:
 
				 
			
 
				   perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ...
			
 
				 
			
 
				+EVENT GROUPS
			
 
				+------------
			
 
				+
			
 
				+Perf supports time based multiplexing of events, when the number of events
			
 
				+active exceeds the number of hardware performance counters. Multiplexing
			
 
				+can cause measurement errors when the workload changes its execution
			
 
				+profile.
			
 
				+
			
 
				+When metrics are computed using formulas from event counts, it is useful to
			
 
				+ensure some events are always measured together as a group to minimize multiplexing
			
 
				+errors. Event groups can be specified using { }.
			
 
				+
			
 
				+  perf stat -e '{instructions,cycles}' ...
			
 
				+
			
 
				+The number of available performance counters depend on the CPU. A group
			
 
				+cannot contain more events than available counters.
			
 
				+For example Intel Core CPUs typically have four generic performance counters
			
 
				+for the core, plus three fixed counters for instructions, cycles and
			
 
				+ref-cycles. Some special events have restrictions on which counter they
			
 
				+can schedule, and may not support multiple instances in a single group.
			
 
				+When too many events are specified in the group none of them will not
			
 
				+be measured.
			
 
				+
			
 
				+Globally pinned events can limit the number of counters available for
			
 
				+other groups. On x86 systems, the NMI watchdog pins a counter by default.
			
 
				+The nmi watchdog can be disabled as root with
			
 
				+
			
 
				+	echo 0 > /proc/sys/kernel/nmi_watchdog
			
 
				+
			
 
				+Events from multiple different PMUs cannot be mixed in a group, with
			
 
				+some exceptions for software events.
			
 
				+
			
 
				+LEADER SAMPLING
			
 
				+---------------
			
 
				+
			
 
				+perf also supports group leader sampling using the :S specifier.
			
 
				+
			
 
				+  perf record -e '{cycles,instructions}:S' ...
			
 
				+  perf report --group
			
 
				+
			
 
				+Normally all events in a event group sample, but with :S only
			
 
				+the first event (the leader) samples, and it only reads the values of the
			
 
				+other events in the group.
			
 
				+
			
 
				 OPTIONS
			
 
				 -------
			
 
				 
			
@@ -143,5 +248,5 @@ SEE ALSO
 
				 --------
			
 
				 linkperf:perf-stat[1], linkperf:perf-top[1],
			
 
				 linkperf:perf-record[1],
			
 
				-http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
			
 
				+http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
			
 
				 http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]
			
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -48,6 +48,14 @@ OPTIONS
 
				 	option can be passed in record mode. It will be interpreted the same way as perf
			
 
				 	record.
			
 
				 
			
 
				+-K::
			
 
				+--all-kernel::
			
 
				+	Configure all used events to run in kernel space.
			
 
				+
			
 
				+-U::
			
 
				+--all-user::
			
 
				+	Configure all used events to run in user space.
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-record[1], linkperf:perf-report[1]
			
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -347,6 +347,19 @@ Configure all used events to run in kernel space.
 
				 --all-user::
			
 
				 Configure all used events to run in user space.
			
 
				 
			
 
				+--timestamp-filename
			
 
				+Append timestamp to output file name.
			
 
				+
			
 
				+--switch-output::
			
 
				+Generate multiple perf.data files, timestamp prefixed, switching to a new one
			
 
				+when receiving a SIGUSR2.
			
 
				+
			
 
				+A possible use case is to, given an external event, slice the perf.data file
			
 
				+that gets then processed, possibly via a perf script, to decide if that
			
 
				+particular perf.data snapshot should be kept or not.
			
 
				+
			
 
				+Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-stat[1], linkperf:perf-list[1]
			
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -248,7 +248,7 @@ OPTIONS
 
				 	Note that when using the --itrace option the synthesized callchain size
			
 
				 	will override this value if the synthesized callchain size is bigger.
			
 
				 
			
 
				-	Default: 127
			
 
				+	Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
			
 
				 
			
 
				 -G::
			
 
				 --inverted::
			
@@ -285,7 +285,7 @@ OPTIONS
 
				 
			
 
				 -f::
			
 
				 --force::
			
 
				-        Don't complain, do it.
			
 
				+        Don't do ownership validation.
			
 
				 
			
 
				 --symfs=<directory>::
			
 
				         Look for files with symbols relative to this directory.
			
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -50,6 +50,22 @@ OPTIONS
 
				 --dump-raw-trace=::
			
 
				         Display verbose dump of the sched data.
			
 
				 
			
 
				+OPTIONS for 'perf sched map'
			
 
				+----------------------------
			
 
				+
			
 
				+--compact::
			
 
				+	Show only CPUs with activity. Helps visualizing on high core
			
 
				+	count systems.
			
 
				+
			
 
				+--cpus::
			
 
				+	Show just entries with activities for the given CPUs.
			
 
				+
			
 
				+--color-cpus::
			
 
				+	Highlight the given cpus.
			
 
				+
			
 
				+--color-pids::
			
 
				+	Highlight the given pids.
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-record[1]
			
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -259,9 +259,23 @@ include::itrace.txt[]
 
				 --full-source-path::
			
 
				 	Show the full path for source files for srcline output.
			
 
				 
			
 
				+--max-stack::
			
 
				+        Set the stack depth limit when parsing the callchain, anything
			
 
				+        beyond the specified depth will be ignored. This is a trade-off
			
 
				+        between information loss and faster processing especially for
			
 
				+        workloads that can have a very long callchain stack.
			
 
				+        Note that when using the --itrace option the synthesized callchain size
			
 
				+        will override this value if the synthesized callchain size is bigger.
			
 
				+
			
 
				+        Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
			
 
				+
			
 
				 --ns::
			
 
				 	Use 9 decimal places when displaying time (i.e. show the nanoseconds)
			
 
				 
			
 
				+-f::
			
 
				+--force::
			
 
				+	Don't do ownership validation.
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-record[1], linkperf:perf-script-perl[1],
			
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -177,7 +177,7 @@ Default is to monitor all CPUS.
 
				 	between information loss and faster processing especially for
			
 
				 	workloads that can have a very long callchain stack.
			
 
				 
			
 
				-	Default: 127
			
 
				+	Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
			
 
				 
			
 
				 --ignore-callees=<regex>::
			
 
				         Ignore callees of the function(s) matching the given regex.
			
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -117,9 +117,41 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 
				 --syscalls::
			
 
				 	Trace system calls. This options is enabled by default.
			
 
				 
			
 
				+--call-graph [mode,type,min[,limit],order[,key][,branch]]::
			
 
				+        Setup and enable call-graph (stack chain/backtrace) recording.
			
 
				+        See `--call-graph` section in perf-record and perf-report
			
 
				+        man pages for details. The ones that are most useful in 'perf trace'
			
 
				+        are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.
			
 
				+
			
 
				+        Using this will, for the root user, bump the value of --mmap-pages to 4
			
 
				+        times the maximum for non-root users, based on the kernel.perf_event_mlock_kb
			
 
				+        sysctl. This is done only if the user doesn't specify a --mmap-pages value.
			
 
				+
			
 
				+--kernel-syscall-graph::
			
 
				+	 Show the kernel callchains on the syscall exit path.
			
 
				+
			
 
				 --event::
			
 
				 	Trace other events, see 'perf list' for a complete list.
			
 
				 
			
 
				+--max-stack::
			
 
				+        Set the stack depth limit when parsing the callchain, anything
			
 
				+        beyond the specified depth will be ignored. Note that at this point
			
 
				+        this is just about the presentation part, i.e. the kernel is still
			
 
				+        not limiting, the overhead of callchains needs to be set via the
			
 
				+        knobs in --call-graph dwarf.
			
 
				+
			
 
				+        Implies '--call-graph dwarf' when --call-graph not present on the
			
 
				+        command line, on systems where DWARF unwinding was built in.
			
 
				+
			
 
				+        Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
			
 
				+
			
 
				+--min-stack::
			
 
				+        Set the stack depth limit when parsing the callchain, anything
			
 
				+        below the specified depth will be ignored. Disabled by default.
			
 
				+
			
 
				+        Implies '--call-graph dwarf' when --call-graph not present on the
			
 
				+        command line, on systems where DWARF unwinding was built in.
			
 
				+
			
 
				 --proc-map-timeout::
			
 
				 	When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
			
 
				 	because the file may be huge. A time out is needed in such cases.
			
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -183,6 +183,11 @@ endif
 
				 include config/Makefile
			
 
				 endif
			
 
				 
			
 
				+ifeq ($(config),0)
			
 
				+include $(srctree)/tools/scripts/Makefile.arch
			
 
				+-include arch/$(ARCH)/Makefile
			
 
				+endif
			
 
				+
			
 
				 # The FEATURE_DUMP_EXPORT holds location of the actual
			
 
				 # FEATURE_DUMP file to be used to bypass feature detection
			
 
				 # (for bpf or any other subproject)
			
@@ -297,8 +302,6 @@ endif
 
				 # because maintaining the nesting to match is a pain.  If
			
 
				 # we had "elif" things would have been much nicer...
			
 
				 
			
 
				--include arch/$(ARCH)/Makefile
			
 
				-
			
 
				 ifneq ($(OUTPUT),)
			
 
				   CFLAGS += -I$(OUTPUT)
			
 
				 endif
			
@@ -390,7 +393,7 @@ endif
 
				 __build-dir = $(subst $(OUTPUT),,$(dir $@))
			
 
				 build-dir   = $(if $(__build-dir),$(__build-dir),.)
			
 
				 
			
 
				-prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep
			
 
				+prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep archheaders
			
 
				 
			
 
				 $(OUTPUT)%.o: %.c prepare FORCE
			
 
				 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
			
@@ -430,7 +433,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
 
				 
			
 
				 LIBPERF_IN := $(OUTPUT)libperf-in.o
			
 
				 
			
 
				-$(LIBPERF_IN): fixdep FORCE
			
 
				+$(LIBPERF_IN): prepare fixdep FORCE
			
 
				 	$(Q)$(MAKE) $(build)=libperf
			
 
				 
			
 
				 $(LIB_FILE): $(LIBPERF_IN)
			
@@ -625,7 +628,7 @@ config-clean:
 
				 	$(call QUIET_CLEAN, config)
			
 
				 	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
			
 
				 
			
 
				-clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
			
 
				+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
			
 
				 	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
			
 
				 	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
			
 
				 	$(Q)$(RM) $(OUTPUT).config-detected
			
@@ -662,5 +665,5 @@ FORCE:
 
				 .PHONY: all install clean config-clean strip install-gtk
			
 
				 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
			
 
				 .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
			
 
				-.PHONY: libtraceevent_plugins
			
 
				+.PHONY: libtraceevent_plugins archheaders
			
 
				 
			
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -3,4 +3,5 @@ PERF_HAVE_DWARF_REGS := 1
 
				 endif
			
 
				 
			
 
				 HAVE_KVM_STAT_SUPPORT := 1
			
 
				+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
			
 
				 PERF_HAVE_JITDUMP := 1
			
--- a/tools/perf/arch/powerpc/util/dwarf-regs.c
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -10,19 +10,26 @@
 
				  */
			
 
				 
			
 
				 #include <stddef.h>
			
 
				+#include <errno.h>
			
 
				+#include <string.h>
			
 
				 #include <dwarf-regs.h>
			
 
				-
			
 
				+#include <linux/ptrace.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include "util.h"
			
 
				 
			
 
				 struct pt_regs_dwarfnum {
			
 
				 	const char *name;
			
 
				 	unsigned int dwarfnum;
			
 
				+	unsigned int ptregs_offset;
			
 
				 };
			
 
				 
			
 
				-#define STR(s) #s
			
 
				-#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
			
 
				-#define GPR_DWARFNUM_NAME(num)	\
			
 
				-	{.name = STR(%gpr##num), .dwarfnum = num}
			
 
				-#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
			
 
				+#define REG_DWARFNUM_NAME(r, num)					\
			
 
				+		{.name = STR(%)STR(r), .dwarfnum = num,			\
			
 
				+		.ptregs_offset = offsetof(struct pt_regs, r)}
			
 
				+#define GPR_DWARFNUM_NAME(num)						\
			
 
				+		{.name = STR(%gpr##num), .dwarfnum = num,		\
			
 
				+		.ptregs_offset = offsetof(struct pt_regs, gpr[num])}
			
 
				+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0}
			
 
				 
			
 
				 /*
			
 
				  * Reference:
			
@@ -61,12 +68,12 @@ static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
 
				 	GPR_DWARFNUM_NAME(29),
			
 
				 	GPR_DWARFNUM_NAME(30),
			
 
				 	GPR_DWARFNUM_NAME(31),
			
 
				-	REG_DWARFNUM_NAME("%msr",   66),
			
 
				-	REG_DWARFNUM_NAME("%ctr",   109),
			
 
				-	REG_DWARFNUM_NAME("%link",  108),
			
 
				-	REG_DWARFNUM_NAME("%xer",   101),
			
 
				-	REG_DWARFNUM_NAME("%dar",   119),
			
 
				-	REG_DWARFNUM_NAME("%dsisr", 118),
			
 
				+	REG_DWARFNUM_NAME(msr,   66),
			
 
				+	REG_DWARFNUM_NAME(ctr,   109),
			
 
				+	REG_DWARFNUM_NAME(link,  108),
			
 
				+	REG_DWARFNUM_NAME(xer,   101),
			
 
				+	REG_DWARFNUM_NAME(dar,   119),
			
 
				+	REG_DWARFNUM_NAME(dsisr, 118),
			
 
				 	REG_DWARFNUM_END,
			
 
				 };
			
 
				 
			
@@ -86,3 +93,12 @@ const char *get_arch_regstr(unsigned int n)
 
				 			return roff->name;
			
 
				 	return NULL;
			
 
				 }
			
 
				+
			
 
				+int regs_query_register_offset(const char *name)
			
 
				+{
			
 
				+	const struct pt_regs_dwarfnum *roff;
			
 
				+	for (roff = regdwarfnum_table; roff->name != NULL; roff++)
			
 
				+		if (!strcmp(roff->name, name))
			
 
				+			return roff->ptregs_offset;
			
 
				+	return -EINVAL;
			
 
				+}
			
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -19,12 +19,6 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
 
				 	       ehdr.e_type == ET_DYN;
			
 
				 }
			
 
				 
			
 
				-#if defined(_CALL_ELF) && _CALL_ELF == 2
			
 
				-void arch__elf_sym_adjust(GElf_Sym *sym)
			
 
				-{
			
 
				-	sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
			
 
				-}
			
 
				-#endif
			
 
				 #endif
			
 
				 
			
 
				 #if !defined(_CALL_ELF) || _CALL_ELF != 2
			
@@ -65,18 +59,45 @@ bool arch__prefers_symtab(void)
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				+#ifdef HAVE_LIBELF_SUPPORT
			
 
				+void arch__sym_update(struct symbol *s, GElf_Sym *sym)
			
 
				+{
			
 
				+	s->arch_sym = sym->st_other;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 #define PPC64LE_LEP_OFFSET	8
			
 
				 
			
 
				 void arch__fix_tev_from_maps(struct perf_probe_event *pev,
			
 
				-			     struct probe_trace_event *tev, struct map *map)
			
 
				+			     struct probe_trace_event *tev, struct map *map,
			
 
				+			     struct symbol *sym)
			
 
				 {
			
 
				+	int lep_offset;
			
 
				+
			
 
				 	/*
			
 
				-	 * ppc64 ABIv2 local entry point is currently always 2 instructions
			
 
				-	 * (8 bytes) after the global entry point.
			
 
				+	 * When probing at a function entry point, we normally always want the
			
 
				+	 * LEP since that catches calls to the function through both the GEP and
			
 
				+	 * the LEP. Hence, we would like to probe at an offset of 8 bytes if
			
 
				+	 * the user only specified the function entry.
			
 
				+	 *
			
 
				+	 * However, if the user specifies an offset, we fall back to using the
			
 
				+	 * GEP since all userspace applications (objdump/readelf) show function
			
 
				+	 * disassembly with offsets from the GEP.
			
 
				+	 *
			
 
				+	 * In addition, we shouldn't specify an offset for kretprobes.
			
 
				 	 */
			
 
				-	if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
			
 
				-		tev->point.address += PPC64LE_LEP_OFFSET;
			
 
				+	if (pev->point.offset || pev->point.retprobe || !map || !sym)
			
 
				+		return;
			
 
				+
			
 
				+	lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
			
 
				+
			
 
				+	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
			
 
				 		tev->point.offset += PPC64LE_LEP_OFFSET;
			
 
				+	else if (lep_offset) {
			
 
				+		if (pev->uprobes)
			
 
				+			tev->point.address += lep_offset;
			
 
				+		else
			
 
				+			tev->point.offset += lep_offset;
			
 
				 	}
			
 
				 }
			
 
				 #endif
			
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -4,3 +4,26 @@ endif
 
				 HAVE_KVM_STAT_SUPPORT := 1
			
 
				 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
			
 
				 PERF_HAVE_JITDUMP := 1
			
 
				+
			
 
				+###
			
 
				+# Syscall table generation
			
 
				+#
			
 
				+
			
 
				+out    := $(OUTPUT)arch/x86/include/generated/asm
			
 
				+header := $(out)/syscalls_64.c
			
 
				+sys    := $(srctree)/tools/perf/arch/x86/entry/syscalls
			
 
				+systbl := $(sys)/syscalltbl.sh
			
 
				+
			
 
				+# Create output directory if not already present
			
 
				+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
			
 
				+
			
 
				+$(header): $(sys)/syscall_64.tbl $(systbl)
			
 
				+	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
			
 
				+        (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \
			
 
				+        || echo "Warning: x86_64's syscall_64.tbl differs from kernel" >&2 )) || true
			
 
				+	$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
			
 
				+
			
 
				+clean::
			
 
				+	$(call QUIET_CLEAN, x86) $(RM) $(header)
			
 
				+
			
 
				+archheaders: $(header)
			
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -0,0 +1,376 @@
 
				+#
			
 
				+# 64-bit system call numbers and entry vectors
			
 
				+#
			
 
				+# The format is:
			
 
				+# <number> <abi> <name> <entry point>
			
 
				+#
			
 
				+# The abi is "common", "64" or "x32" for this file.
			
 
				+#
			
 
				+0	common	read			sys_read
			
 
				+1	common	write			sys_write
			
 
				+2	common	open			sys_open
			
 
				+3	common	close			sys_close
			
 
				+4	common	stat			sys_newstat
			
 
				+5	common	fstat			sys_newfstat
			
 
				+6	common	lstat			sys_newlstat
			
 
				+7	common	poll			sys_poll
			
 
				+8	common	lseek			sys_lseek
			
 
				+9	common	mmap			sys_mmap
			
 
				+10	common	mprotect		sys_mprotect
			
 
				+11	common	munmap			sys_munmap
			
 
				+12	common	brk			sys_brk
			
 
				+13	64	rt_sigaction		sys_rt_sigaction
			
 
				+14	common	rt_sigprocmask		sys_rt_sigprocmask
			
 
				+15	64	rt_sigreturn		sys_rt_sigreturn/ptregs
			
 
				+16	64	ioctl			sys_ioctl
			
 
				+17	common	pread64			sys_pread64
			
 
				+18	common	pwrite64		sys_pwrite64
			
 
				+19	64	readv			sys_readv
			
 
				+20	64	writev			sys_writev
			
 
				+21	common	access			sys_access
			
 
				+22	common	pipe			sys_pipe
			
 
				+23	common	select			sys_select
			
 
				+24	common	sched_yield		sys_sched_yield
			
 
				+25	common	mremap			sys_mremap
			
 
				+26	common	msync			sys_msync
			
 
				+27	common	mincore			sys_mincore
			
 
				+28	common	madvise			sys_madvise
			
 
				+29	common	shmget			sys_shmget
			
 
				+30	common	shmat			sys_shmat
			
 
				+31	common	shmctl			sys_shmctl
			
 
				+32	common	dup			sys_dup
			
 
				+33	common	dup2			sys_dup2
			
 
				+34	common	pause			sys_pause
			
 
				+35	common	nanosleep		sys_nanosleep
			
 
				+36	common	getitimer		sys_getitimer
			
 
				+37	common	alarm			sys_alarm
			
 
				+38	common	setitimer		sys_setitimer
			
 
				+39	common	getpid			sys_getpid
			
 
				+40	common	sendfile		sys_sendfile64
			
 
				+41	common	socket			sys_socket
			
 
				+42	common	connect			sys_connect
			
 
				+43	common	accept			sys_accept
			
 
				+44	common	sendto			sys_sendto
			
 
				+45	64	recvfrom		sys_recvfrom
			
 
				+46	64	sendmsg			sys_sendmsg
			
 
				+47	64	recvmsg			sys_recvmsg
			
 
				+48	common	shutdown		sys_shutdown
			
 
				+49	common	bind			sys_bind
			
 
				+50	common	listen			sys_listen
			
 
				+51	common	getsockname		sys_getsockname
			
 
				+52	common	getpeername		sys_getpeername
			
 
				+53	common	socketpair		sys_socketpair
			
 
				+54	64	setsockopt		sys_setsockopt
			
 
				+55	64	getsockopt		sys_getsockopt
			
 
				+56	common	clone			sys_clone/ptregs
			
 
				+57	common	fork			sys_fork/ptregs
			
 
				+58	common	vfork			sys_vfork/ptregs
			
 
				+59	64	execve			sys_execve/ptregs
			
 
				+60	common	exit			sys_exit
			
 
				+61	common	wait4			sys_wait4
			
 
				+62	common	kill			sys_kill
			
 
				+63	common	uname			sys_newuname
			
 
				+64	common	semget			sys_semget
			
 
				+65	common	semop			sys_semop
			
 
				+66	common	semctl			sys_semctl
			
 
				+67	common	shmdt			sys_shmdt
			
 
				+68	common	msgget			sys_msgget
			
 
				+69	common	msgsnd			sys_msgsnd
			
 
				+70	common	msgrcv			sys_msgrcv
			
 
				+71	common	msgctl			sys_msgctl
			
 
				+72	common	fcntl			sys_fcntl
			
 
				+73	common	flock			sys_flock
			
 
				+74	common	fsync			sys_fsync
			
 
				+75	common	fdatasync		sys_fdatasync
			
 
				+76	common	truncate		sys_truncate
			
 
				+77	common	ftruncate		sys_ftruncate
			
 
				+78	common	getdents		sys_getdents
			
 
				+79	common	getcwd			sys_getcwd
			
 
				+80	common	chdir			sys_chdir
			
 
				+81	common	fchdir			sys_fchdir
			
 
				+82	common	rename			sys_rename
			
 
				+83	common	mkdir			sys_mkdir
			
 
				+84	common	rmdir			sys_rmdir
			
 
				+85	common	creat			sys_creat
			
 
				+86	common	link			sys_link
			
 
				+87	common	unlink			sys_unlink
			
 
				+88	common	symlink			sys_symlink
			
 
				+89	common	readlink		sys_readlink
			
 
				+90	common	chmod			sys_chmod
			
 
				+91	common	fchmod			sys_fchmod
			
 
				+92	common	chown			sys_chown
			
 
				+93	common	fchown			sys_fchown
			
 
				+94	common	lchown			sys_lchown
			
 
				+95	common	umask			sys_umask
			
 
				+96	common	gettimeofday		sys_gettimeofday
			
 
				+97	common	getrlimit		sys_getrlimit
			
 
				+98	common	getrusage		sys_getrusage
			
 
				+99	common	sysinfo			sys_sysinfo
			
 
				+100	common	times			sys_times
			
 
				+101	64	ptrace			sys_ptrace
			
 
				+102	common	getuid			sys_getuid
			
 
				+103	common	syslog			sys_syslog
			
 
				+104	common	getgid			sys_getgid
			
 
				+105	common	setuid			sys_setuid
			
 
				+106	common	setgid			sys_setgid
			
 
				+107	common	geteuid			sys_geteuid
			
 
				+108	common	getegid			sys_getegid
			
 
				+109	common	setpgid			sys_setpgid
			
 
				+110	common	getppid			sys_getppid
			
 
				+111	common	getpgrp			sys_getpgrp
			
 
				+112	common	setsid			sys_setsid
			
 
				+113	common	setreuid		sys_setreuid
			
 
				+114	common	setregid		sys_setregid
			
 
				+115	common	getgroups		sys_getgroups
			
 
				+116	common	setgroups		sys_setgroups
			
 
				+117	common	setresuid		sys_setresuid
			
 
				+118	common	getresuid		sys_getresuid
			
 
				+119	common	setresgid		sys_setresgid
			
 
				+120	common	getresgid		sys_getresgid
			
 
				+121	common	getpgid			sys_getpgid
			
 
				+122	common	setfsuid		sys_setfsuid
			
 
				+123	common	setfsgid		sys_setfsgid
			
 
				+124	common	getsid			sys_getsid
			
 
				+125	common	capget			sys_capget
			
 
				+126	common	capset			sys_capset
			
 
				+127	64	rt_sigpending		sys_rt_sigpending
			
 
				+128	64	rt_sigtimedwait		sys_rt_sigtimedwait
			
 
				+129	64	rt_sigqueueinfo		sys_rt_sigqueueinfo
			
 
				+130	common	rt_sigsuspend		sys_rt_sigsuspend
			
 
				+131	64	sigaltstack		sys_sigaltstack
			
 
				+132	common	utime			sys_utime
			
 
				+133	common	mknod			sys_mknod
			
 
				+134	64	uselib
			
 
				+135	common	personality		sys_personality
			
 
				+136	common	ustat			sys_ustat
			
 
				+137	common	statfs			sys_statfs
			
 
				+138	common	fstatfs			sys_fstatfs
			
 
				+139	common	sysfs			sys_sysfs
			
 
				+140	common	getpriority		sys_getpriority
			
 
				+141	common	setpriority		sys_setpriority
			
 
				+142	common	sched_setparam		sys_sched_setparam
			
 
				+143	common	sched_getparam		sys_sched_getparam
			
 
				+144	common	sched_setscheduler	sys_sched_setscheduler
			
 
				+145	common	sched_getscheduler	sys_sched_getscheduler
			
 
				+146	common	sched_get_priority_max	sys_sched_get_priority_max
			
 
				+147	common	sched_get_priority_min	sys_sched_get_priority_min
			
 
				+148	common	sched_rr_get_interval	sys_sched_rr_get_interval
			
 
				+149	common	mlock			sys_mlock
			
 
				+150	common	munlock			sys_munlock
			
 
				+151	common	mlockall		sys_mlockall
			
 
				+152	common	munlockall		sys_munlockall
			
 
				+153	common	vhangup			sys_vhangup
			
 
				+154	common	modify_ldt		sys_modify_ldt
			
 
				+155	common	pivot_root		sys_pivot_root
			
 
				+156	64	_sysctl			sys_sysctl
			
 
				+157	common	prctl			sys_prctl
			
 
				+158	common	arch_prctl		sys_arch_prctl
			
 
				+159	common	adjtimex		sys_adjtimex
			
 
				+160	common	setrlimit		sys_setrlimit
			
 
				+161	common	chroot			sys_chroot
			
 
				+162	common	sync			sys_sync
			
 
				+163	common	acct			sys_acct
			
 
				+164	common	settimeofday		sys_settimeofday
			
 
				+165	common	mount			sys_mount
			
 
				+166	common	umount2			sys_umount
			
 
				+167	common	swapon			sys_swapon
			
 
				+168	common	swapoff			sys_swapoff
			
 
				+169	common	reboot			sys_reboot
			
 
				+170	common	sethostname		sys_sethostname
			
 
				+171	common	setdomainname		sys_setdomainname
			
 
				+172	common	iopl			sys_iopl/ptregs
			
 
				+173	common	ioperm			sys_ioperm
			
 
				+174	64	create_module
			
 
				+175	common	init_module		sys_init_module
			
 
				+176	common	delete_module		sys_delete_module
			
 
				+177	64	get_kernel_syms
			
 
				+178	64	query_module
			
 
				+179	common	quotactl		sys_quotactl
			
 
				+180	64	nfsservctl
			
 
				+181	common	getpmsg
			
 
				+182	common	putpmsg
			
 
				+183	common	afs_syscall
			
 
				+184	common	tuxcall
			
 
				+185	common	security
			
 
				+186	common	gettid			sys_gettid
			
 
				+187	common	readahead		sys_readahead
			
 
				+188	common	setxattr		sys_setxattr
			
 
				+189	common	lsetxattr		sys_lsetxattr
			
 
				+190	common	fsetxattr		sys_fsetxattr
			
 
				+191	common	getxattr		sys_getxattr
			
 
				+192	common	lgetxattr		sys_lgetxattr
			
 
				+193	common	fgetxattr		sys_fgetxattr
			
 
				+194	common	listxattr		sys_listxattr
			
 
				+195	common	llistxattr		sys_llistxattr
			
 
				+196	common	flistxattr		sys_flistxattr
			
 
				+197	common	removexattr		sys_removexattr
			
 
				+198	common	lremovexattr		sys_lremovexattr
			
 
				+199	common	fremovexattr		sys_fremovexattr
			
 
				+200	common	tkill			sys_tkill
			
 
				+201	common	time			sys_time
			
 
				+202	common	futex			sys_futex
			
 
				+203	common	sched_setaffinity	sys_sched_setaffinity
			
 
				+204	common	sched_getaffinity	sys_sched_getaffinity
			
 
				+205	64	set_thread_area
			
 
				+206	64	io_setup		sys_io_setup
			
 
				+207	common	io_destroy		sys_io_destroy
			
 
				+208	common	io_getevents		sys_io_getevents
			
 
				+209	64	io_submit		sys_io_submit
			
 
				+210	common	io_cancel		sys_io_cancel
			
 
				+211	64	get_thread_area
			
 
				+212	common	lookup_dcookie		sys_lookup_dcookie
			
 
				+213	common	epoll_create		sys_epoll_create
			
 
				+214	64	epoll_ctl_old
			
 
				+215	64	epoll_wait_old
			
 
				+216	common	remap_file_pages	sys_remap_file_pages
			
 
				+217	common	getdents64		sys_getdents64
			
 
				+218	common	set_tid_address		sys_set_tid_address
			
 
				+219	common	restart_syscall		sys_restart_syscall
			
 
				+220	common	semtimedop		sys_semtimedop
			
 
				+221	common	fadvise64		sys_fadvise64
			
 
				+222	64	timer_create		sys_timer_create
			
 
				+223	common	timer_settime		sys_timer_settime
			
 
				+224	common	timer_gettime		sys_timer_gettime
			
 
				+225	common	timer_getoverrun	sys_timer_getoverrun
			
 
				+226	common	timer_delete		sys_timer_delete
			
 
				+227	common	clock_settime		sys_clock_settime
			
 
				+228	common	clock_gettime		sys_clock_gettime
			
 
				+229	common	clock_getres		sys_clock_getres
			
 
				+230	common	clock_nanosleep		sys_clock_nanosleep
			
 
				+231	common	exit_group		sys_exit_group
			
 
				+232	common	epoll_wait		sys_epoll_wait
			
 
				+233	common	epoll_ctl		sys_epoll_ctl
			
 
				+234	common	tgkill			sys_tgkill
			
 
				+235	common	utimes			sys_utimes
			
 
				+236	64	vserver
			
 
				+237	common	mbind			sys_mbind
			
 
				+238	common	set_mempolicy		sys_set_mempolicy
			
 
				+239	common	get_mempolicy		sys_get_mempolicy
			
 
				+240	common	mq_open			sys_mq_open
			
 
				+241	common	mq_unlink		sys_mq_unlink
			
 
				+242	common	mq_timedsend		sys_mq_timedsend
			
 
				+243	common	mq_timedreceive		sys_mq_timedreceive
			
 
				+244	64	mq_notify		sys_mq_notify
			
 
				+245	common	mq_getsetattr		sys_mq_getsetattr
			
 
				+246	64	kexec_load		sys_kexec_load
			
 
				+247	64	waitid			sys_waitid
			
 
				+248	common	add_key			sys_add_key
			
 
				+249	common	request_key		sys_request_key
			
 
				+250	common	keyctl			sys_keyctl
			
 
				+251	common	ioprio_set		sys_ioprio_set
			
 
				+252	common	ioprio_get		sys_ioprio_get
			
 
				+253	common	inotify_init		sys_inotify_init
			
 
				+254	common	inotify_add_watch	sys_inotify_add_watch
			
 
				+255	common	inotify_rm_watch	sys_inotify_rm_watch
			
 
				+256	common	migrate_pages		sys_migrate_pages
			
 
				+257	common	openat			sys_openat
			
 
				+258	common	mkdirat			sys_mkdirat
			
 
				+259	common	mknodat			sys_mknodat
			
 
				+260	common	fchownat		sys_fchownat
			
 
				+261	common	futimesat		sys_futimesat
			
 
				+262	common	newfstatat		sys_newfstatat
			
 
				+263	common	unlinkat		sys_unlinkat
			
 
				+264	common	renameat		sys_renameat
			
 
				+265	common	linkat			sys_linkat
			
 
				+266	common	symlinkat		sys_symlinkat
			
 
				+267	common	readlinkat		sys_readlinkat
			
 
				+268	common	fchmodat		sys_fchmodat
			
 
				+269	common	faccessat		sys_faccessat
			
 
				+270	common	pselect6		sys_pselect6
			
 
				+271	common	ppoll			sys_ppoll
			
 
				+272	common	unshare			sys_unshare
			
 
				+273	64	set_robust_list		sys_set_robust_list
			
 
				+274	64	get_robust_list		sys_get_robust_list
			
 
				+275	common	splice			sys_splice
			
 
				+276	common	tee			sys_tee
			
 
				+277	common	sync_file_range		sys_sync_file_range
			
 
				+278	64	vmsplice		sys_vmsplice
			
 
				+279	64	move_pages		sys_move_pages
			
 
				+280	common	utimensat		sys_utimensat
			
 
				+281	common	epoll_pwait		sys_epoll_pwait
			
 
				+282	common	signalfd		sys_signalfd
			
 
				+283	common	timerfd_create		sys_timerfd_create
			
 
				+284	common	eventfd			sys_eventfd
			
 
				+285	common	fallocate		sys_fallocate
			
 
				+286	common	timerfd_settime		sys_timerfd_settime
			
 
				+287	common	timerfd_gettime		sys_timerfd_gettime
			
 
				+288	common	accept4			sys_accept4
			
 
				+289	common	signalfd4		sys_signalfd4
			
 
				+290	common	eventfd2		sys_eventfd2
			
 
				+291	common	epoll_create1		sys_epoll_create1
			
 
				+292	common	dup3			sys_dup3
			
 
				+293	common	pipe2			sys_pipe2
			
 
				+294	common	inotify_init1		sys_inotify_init1
			
 
				+295	64	preadv			sys_preadv
			
 
				+296	64	pwritev			sys_pwritev
			
 
				+297	64	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo
			
 
				+298	common	perf_event_open		sys_perf_event_open
			
 
				+299	64	recvmmsg		sys_recvmmsg
			
 
				+300	common	fanotify_init		sys_fanotify_init
			
 
				+301	common	fanotify_mark		sys_fanotify_mark
			
 
				+302	common	prlimit64		sys_prlimit64
			
 
				+303	common	name_to_handle_at	sys_name_to_handle_at
			
 
				+304	common	open_by_handle_at	sys_open_by_handle_at
			
 
				+305	common	clock_adjtime		sys_clock_adjtime
			
 
				+306	common	syncfs			sys_syncfs
			
 
				+307	64	sendmmsg		sys_sendmmsg
			
 
				+308	common	setns			sys_setns
			
 
				+309	common	getcpu			sys_getcpu
			
 
				+310	64	process_vm_readv	sys_process_vm_readv
			
 
				+311	64	process_vm_writev	sys_process_vm_writev
			
 
				+312	common	kcmp			sys_kcmp
			
 
				+313	common	finit_module		sys_finit_module
			
 
				+314	common	sched_setattr		sys_sched_setattr
			
 
				+315	common	sched_getattr		sys_sched_getattr
			
 
				+316	common	renameat2		sys_renameat2
			
 
				+317	common	seccomp			sys_seccomp
			
 
				+318	common	getrandom		sys_getrandom
			
 
				+319	common	memfd_create		sys_memfd_create
			
 
				+320	common	kexec_file_load		sys_kexec_file_load
			
 
				+321	common	bpf			sys_bpf
			
 
				+322	64	execveat		sys_execveat/ptregs
			
 
				+323	common	userfaultfd		sys_userfaultfd
			
 
				+324	common	membarrier		sys_membarrier
			
 
				+325	common	mlock2			sys_mlock2
			
 
				+326	common	copy_file_range		sys_copy_file_range
			
 
				+327	64	preadv2			sys_preadv2
			
 
				+328	64	pwritev2		sys_pwritev2
			
 
				+
			
 
				+#
			
 
				+# x32-specific system call numbers start at 512 to avoid cache impact
			
 
				+# for native 64-bit operation.
			
 
				+#
			
 
				+512	x32	rt_sigaction		compat_sys_rt_sigaction
			
 
				+513	x32	rt_sigreturn		sys32_x32_rt_sigreturn
			
 
				+514	x32	ioctl			compat_sys_ioctl
			
 
				+515	x32	readv			compat_sys_readv
			
 
				+516	x32	writev			compat_sys_writev
			
 
				+517	x32	recvfrom		compat_sys_recvfrom
			
 
				+518	x32	sendmsg			compat_sys_sendmsg
			
 
				+519	x32	recvmsg			compat_sys_recvmsg
			
 
				+520	x32	execve			compat_sys_execve/ptregs
			
 
				+521	x32	ptrace			compat_sys_ptrace
			
 
				+522	x32	rt_sigpending		compat_sys_rt_sigpending
			
 
				+523	x32	rt_sigtimedwait		compat_sys_rt_sigtimedwait
			
 
				+524	x32	rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
			
 
				+525	x32	sigaltstack		compat_sys_sigaltstack
			
 
				+526	x32	timer_create		compat_sys_timer_create
			
 
				+527	x32	mq_notify		compat_sys_mq_notify
			
 
				+528	x32	kexec_load		compat_sys_kexec_load
			
 
				+529	x32	waitid			compat_sys_waitid
			
 
				+530	x32	set_robust_list		compat_sys_set_robust_list
			
 
				+531	x32	get_robust_list		compat_sys_get_robust_list
			
 
				+532	x32	vmsplice		compat_sys_vmsplice
			
 
				+533	x32	move_pages		compat_sys_move_pages
			
 
				+534	x32	preadv			compat_sys_preadv64
			
 
				+535	x32	pwritev			compat_sys_pwritev64
			
 
				+536	x32	rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
			
 
				+537	x32	recvmmsg		compat_sys_recvmmsg
			
 
				+538	x32	sendmmsg		compat_sys_sendmmsg
			
 
				+539	x32	process_vm_readv	compat_sys_process_vm_readv
			
 
				+540	x32	process_vm_writev	compat_sys_process_vm_writev
			
 
				+541	x32	setsockopt		compat_sys_setsockopt
			
 
				+542	x32	getsockopt		compat_sys_getsockopt
			
 
				+543	x32	io_setup		compat_sys_io_setup
			
 
				+544	x32	io_submit		compat_sys_io_submit
			
 
				+545	x32	execveat		compat_sys_execveat/ptregs
			
--- a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
@@ -0,0 +1,39 @@
 
				+#!/bin/sh
			
 
				+
			
 
				+in="$1"
			
 
				+arch="$2"
			
 
				+
			
 
				+syscall_macro() {
			
 
				+    nr="$1"
			
 
				+    name="$2"
			
 
				+
			
 
				+    echo "	[$nr] = \"$name\","
			
 
				+}
			
 
				+
			
 
				+emit() {
			
 
				+    nr="$1"
			
 
				+    entry="$2"
			
 
				+
			
 
				+    syscall_macro "$nr" "$entry"
			
 
				+}
			
 
				+
			
 
				+echo "static const char *syscalltbl_${arch}[] = {"
			
 
				+
			
 
				+sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
			
 
				+grep '^[0-9]' "$in" | sort -n > $sorted_table
			
 
				+
			
 
				+max_nr=0
			
 
				+while read nr abi name entry compat; do
			
 
				+    if [ $nr -ge 512 ] ; then # discard compat sycalls
			
 
				+        break
			
 
				+    fi
			
 
				+
			
 
				+    emit "$nr" "$name"
			
 
				+    max_nr=$nr
			
 
				+done < $sorted_table
			
 
				+
			
 
				+rm -f $sorted_table
			
 
				+
			
 
				+echo "};"
			
 
				+
			
 
				+echo "#define SYSCALLTBL_${arch}_MAX_ID ${max_nr}"
			
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -71,7 +71,7 @@ int test__perf_time_to_tsc(int subtest __maybe_unused)
 
				 
			
 
				 	CHECK__(parse_events(evlist, "cycles:u", NULL));
			
 
				 
			
 
				-	perf_evlist__config(evlist, &opts);
			
 
				+	perf_evlist__config(evlist, &opts, NULL);
			
 
				 
			
 
				 	evsel = perf_evlist__first(evlist);
			
 
				 
			
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -438,6 +438,11 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
 
				 	if (!intel_bts_pmu)
			
 
				 		return NULL;
			
 
				 
			
 
				+	if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
			
 
				+		*err = -errno;
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				 	btsr = zalloc(sizeof(struct intel_bts_recording));
			
 
				 	if (!btsr) {
			
 
				 		*err = -ENOMEM;
			
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -1027,6 +1027,11 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
 
				 	if (!intel_pt_pmu)
			
 
				 		return NULL;
			
 
				 
			
 
				+	if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
			
 
				+		*err = -errno;
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				 	ptr = zalloc(sizeof(struct intel_pt_recording));
			
 
				 	if (!ptr) {
			
 
				 		*err = -ENOMEM;
			
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -7,7 +7,6 @@
 
				 #include <linux/types.h>
			
 
				 #include "../../util/debug.h"
			
 
				 #include "../../util/tsc.h"
			
 
				-#include "tsc.h"
			
 
				 
			
 
				 int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
			
 
				 			     struct perf_tsc_conversion *tc)
			
@@ -46,3 +45,34 @@ u64 rdtsc(void)
 
				 
			
 
				 	return low | ((u64)high) << 32;
			
 
				 }
			
 
				+
			
 
				+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
			
 
				+				struct perf_tool *tool,
			
 
				+				perf_event__handler_t process,
			
 
				+				struct machine *machine)
			
 
				+{
			
 
				+	union perf_event event = {
			
 
				+		.time_conv = {
			
 
				+			.header = {
			
 
				+				.type = PERF_RECORD_TIME_CONV,
			
 
				+				.size = sizeof(struct time_conv_event),
			
 
				+			},
			
 
				+		},
			
 
				+	};
			
 
				+	struct perf_tsc_conversion tc;
			
 
				+	int err;
			
 
				+
			
 
				+	err = perf_read_tsc_conversion(pc, &tc);
			
 
				+	if (err == -EOPNOTSUPP)
			
 
				+		return 0;
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	pr_debug2("Synthesizing TSC conversion information\n");
			
 
				+
			
 
				+	event.time_conv.time_mult  = tc.time_mult;
			
 
				+	event.time_conv.time_shift = tc.time_shift;
			
 
				+	event.time_conv.time_zero  = tc.time_zero;
			
 
				+
			
 
				+	return process(tool, &event, NULL, machine);
			
 
				+}
			
--- a/tools/perf/arch/x86/util/tsc.h
+++ b/tools/perf/arch/x86/util/tsc.h
@@ -1,17 +0,0 @@
 
				-#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
			
 
				-#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
			
 
				-
			
 
				-#include <linux/types.h>
			
 
				-
			
 
				-struct perf_tsc_conversion {
			
 
				-	u16 time_shift;
			
 
				-	u32 time_mult;
			
 
				-	u64 time_zero;
			
 
				-};
			
 
				-
			
 
				-struct perf_event_mmap_page;
			
 
				-
			
 
				-int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
			
 
				-			     struct perf_tsc_conversion *tc);
			
 
				-
			
 
				-#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */
			
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -83,7 +83,7 @@ static void *workerfn(void *arg)
 
				 	do {
			
 
				 		int ret;
			
 
				 	again:
			
 
				-		ret = futex_lock_pi(w->futex, NULL, 0, futex_flag);
			
 
				+		ret = futex_lock_pi(w->futex, NULL, futex_flag);
			
 
				 
			
 
				 		if (ret) { /* handle lock acquisition */
			
 
				 			if (!silent)
			
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -57,13 +57,11 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
 
				 
			
 
				 /**
			
 
				  * futex_lock_pi() - block on uaddr as a PI mutex
			
 
				- * @detect:	whether (1) or not (0) to perform deadlock detection
			
 
				  */
			
 
				 static inline int
			
 
				-futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect,
			
 
				-	      int opflags)
			
 
				+futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags)
			
 
				 {
			
 
				-	return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
			
 
				+	return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags);
			
 
				 }
			
 
				 
			
 
				 /**
			
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -6,6 +6,7 @@
 
				  * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
			
 
				  */
			
 
				 
			
 
				+#include "debug.h"
			
 
				 #include "../perf.h"
			
 
				 #include "../util/util.h"
			
 
				 #include <subcmd/parse-options.h>
			
@@ -63,14 +64,16 @@ static struct perf_event_attr cycle_attr = {
 
				 	.config		= PERF_COUNT_HW_CPU_CYCLES
			
 
				 };
			
 
				 
			
 
				-static void init_cycles(void)
			
 
				+static int init_cycles(void)
			
 
				 {
			
 
				 	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
			
 
				 
			
 
				-	if (cycles_fd < 0 && errno == ENOSYS)
			
 
				-		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
			
 
				-	else
			
 
				-		BUG_ON(cycles_fd < 0);
			
 
				+	if (cycles_fd < 0 && errno == ENOSYS) {
			
 
				+		pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	return cycles_fd;
			
 
				 }
			
 
				 
			
 
				 static u64 get_cycles(void)
			
@@ -155,8 +158,13 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
 
				 
			
 
				 	argc = parse_options(argc, argv, options, info->usage, 0);
			
 
				 
			
 
				-	if (use_cycles)
			
 
				-		init_cycles();
			
 
				+	if (use_cycles) {
			
 
				+		i = init_cycles();
			
 
				+		if (i < 0) {
			
 
				+			fprintf(stderr, "Failed to open cycles counter\n");
			
 
				+			return i;
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	size = (size_t)perf_atoll((char *)size_str);
			
 
				 	size_total = (double)size * nr_loops;
			
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -12,6 +12,7 @@
 
				 #include <subcmd/parse-options.h>
			
 
				 #include "util/util.h"
			
 
				 #include "util/debug.h"
			
 
				+#include "util/config.h"
			
 
				 
			
 
				 static bool use_system_config, use_user_config;
			
 
				 
			
@@ -32,13 +33,28 @@ static struct option config_options[] = {
 
				 	OPT_END()
			
 
				 };
			
 
				 
			
 
				-static int show_config(const char *key, const char *value,
			
 
				-		       void *cb __maybe_unused)
			
 
				+static int show_config(struct perf_config_set *set)
			
 
				 {
			
 
				-	if (value)
			
 
				-		printf("%s=%s\n", key, value);
			
 
				-	else
			
 
				-		printf("%s\n", key);
			
 
				+	struct perf_config_section *section;
			
 
				+	struct perf_config_item *item;
			
 
				+	struct list_head *sections;
			
 
				+
			
 
				+	if (set == NULL)
			
 
				+		return -1;
			
 
				+
			
 
				+	sections = &set->sections;
			
 
				+	if (list_empty(sections))
			
 
				+		return -1;
			
 
				+
			
 
				+	list_for_each_entry(section, sections, node) {
			
 
				+		list_for_each_entry(item, &section->items, node) {
			
 
				+			char *value = item->value;
			
 
				+
			
 
				+			if (value)
			
 
				+				printf("%s.%s=%s\n", section->name,
			
 
				+				       item->name, value);
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -46,6 +62,7 @@ static int show_config(const char *key, const char *value,
 
				 int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				+	struct perf_config_set *set;
			
 
				 	char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
			
 
				 
			
 
				 	argc = parse_options(argc, argv, config_options, config_usage,
			
@@ -63,13 +80,19 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	else if (use_user_config)
			
 
				 		config_exclusive_filename = user_config;
			
 
				 
			
 
				+	set = perf_config_set__new();
			
 
				+	if (!set) {
			
 
				+		ret = -1;
			
 
				+		goto out_err;
			
 
				+	}
			
 
				+
			
 
				 	switch (actions) {
			
 
				 	case ACTION_LIST:
			
 
				 		if (argc) {
			
 
				 			pr_err("Error: takes no arguments\n");
			
 
				 			parse_options_usage(config_usage, config_options, "l", 1);
			
 
				 		} else {
			
 
				-			ret = perf_config(show_config, NULL);
			
 
				+			ret = show_config(set);
			
 
				 			if (ret < 0) {
			
 
				 				const char * config_filename = config_exclusive_filename;
			
 
				 				if (!config_exclusive_filename)
			
@@ -83,5 +106,7 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		usage_with_options(config_usage, config_options);
			
 
				 	}
			
 
				 
			
 
				+	perf_config_set__delete(set);
			
 
				+out_err:
			
 
				 	return ret;
			
 
				 }
			
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -428,7 +428,7 @@ static void hists__baseline_only(struct hists *hists)
 
				 	struct rb_root *root;
			
 
				 	struct rb_node *next;
			
 
				 
			
 
				-	if (sort__need_collapse)
			
 
				+	if (hists__has(hists, need_collapse))
			
 
				 		root = &hists->entries_collapsed;
			
 
				 	else
			
 
				 		root = hists->entries_in;
			
@@ -450,7 +450,7 @@ static void hists__precompute(struct hists *hists)
 
				 	struct rb_root *root;
			
 
				 	struct rb_node *next;
			
 
				 
			
 
				-	if (sort__need_collapse)
			
 
				+	if (hists__has(hists, need_collapse))
			
 
				 		root = &hists->entries_collapsed;
			
 
				 	else
			
 
				 		root = hists->entries_in;
			
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -61,6 +61,7 @@ static int check_emacsclient_version(void)
 
				 	struct child_process ec_process;
			
 
				 	const char *argv_ec[] = { "emacsclient", "--version", NULL };
			
 
				 	int version;
			
 
				+	int ret = -1;
			
 
				 
			
 
				 	/* emacsclient prints its version number on stderr */
			
 
				 	memset(&ec_process, 0, sizeof(ec_process));
			
@@ -71,7 +72,10 @@ static int check_emacsclient_version(void)
 
				 		fprintf(stderr, "Failed to start emacsclient.\n");
			
 
				 		return -1;
			
 
				 	}
			
 
				-	strbuf_read(&buffer, ec_process.err, 20);
			
 
				+	if (strbuf_read(&buffer, ec_process.err, 20) < 0) {
			
 
				+		fprintf(stderr, "Failed to read emacsclient version\n");
			
 
				+		goto out;
			
 
				+	}
			
 
				 	close(ec_process.err);
			
 
				 
			
 
				 	/*
			
@@ -82,8 +86,7 @@ static int check_emacsclient_version(void)
 
				 
			
 
				 	if (prefixcmp(buffer.buf, "emacsclient")) {
			
 
				 		fprintf(stderr, "Failed to parse emacsclient version.\n");
			
 
				-		strbuf_release(&buffer);
			
 
				-		return -1;
			
 
				+		goto out;
			
 
				 	}
			
 
				 
			
 
				 	version = atoi(buffer.buf + strlen("emacsclient"));
			
@@ -92,12 +95,11 @@ static int check_emacsclient_version(void)
 
				 		fprintf(stderr,
			
 
				 			"emacsclient version '%d' too old (< 22).\n",
			
 
				 			version);
			
 
				-		strbuf_release(&buffer);
			
 
				-		return -1;
			
 
				-	}
			
 
				-
			
 
				+	} else
			
 
				+		ret = 0;
			
 
				+out:
			
 
				 	strbuf_release(&buffer);
			
 
				-	return 0;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static void exec_woman_emacs(const char *path, const char *page)
			
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -748,6 +748,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 			.auxtrace_info	= perf_event__repipe_op2_synth,
			
 
				 			.auxtrace	= perf_event__repipe_auxtrace,
			
 
				 			.auxtrace_error	= perf_event__repipe_op2_synth,
			
 
				+			.time_conv	= perf_event__repipe_op2_synth,
			
 
				 			.finished_round	= perf_event__repipe_oe_synth,
			
 
				 			.build_id	= perf_event__repipe_op2_synth,
			
 
				 			.id_index	= perf_event__repipe_op2_synth,
			
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -375,7 +375,7 @@ static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
 
				 	}
			
 
				 
			
 
				 	al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
			
 
				-	sample__resolve_callchain(sample, NULL, evsel, &al, 16);
			
 
				+	sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
			
 
				 
			
 
				 	callchain_cursor_commit(&callchain_cursor);
			
 
				 	while (true) {
			
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -982,7 +982,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
 
				 	struct perf_evlist *evlist = kvm->evlist;
			
 
				 	char sbuf[STRERR_BUFSIZE];
			
 
				 
			
 
				-	perf_evlist__config(evlist, &kvm->opts);
			
 
				+	perf_evlist__config(evlist, &kvm->opts, NULL);
			
 
				 
			
 
				 	/*
			
 
				 	 * Note: exclude_{guest,host} do not apply here.
			
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -62,19 +62,22 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 
				 	int rec_argc, i = 0, j;
			
 
				 	const char **rec_argv;
			
 
				 	int ret;
			
 
				+	bool all_user = false, all_kernel = false;
			
 
				 	struct option options[] = {
			
 
				 	OPT_CALLBACK('e', "event", &mem, "event",
			
 
				 		     "event selector. use 'perf mem record -e list' to list available events",
			
 
				 		     parse_record_events),
			
 
				 	OPT_INCR('v', "verbose", &verbose,
			
 
				 		 "be more verbose (show counter open errors, etc)"),
			
 
				+	OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"),
			
 
				+	OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"),
			
 
				 	OPT_END()
			
 
				 	};
			
 
				 
			
 
				 	argc = parse_options(argc, argv, options, record_mem_usage,
			
 
				 			     PARSE_OPT_STOP_AT_NON_OPTION);
			
 
				 
			
 
				-	rec_argc = argc + 7; /* max number of arguments */
			
 
				+	rec_argc = argc + 9; /* max number of arguments */
			
 
				 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
			
 
				 	if (!rec_argv)
			
 
				 		return -1;
			
@@ -103,6 +106,12 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 
				 		rec_argv[i++] = perf_mem_events__name(j);
			
 
				 	};
			
 
				 
			
 
				+	if (all_user)
			
 
				+		rec_argv[i++] = "--all-user";
			
 
				+
			
 
				+	if (all_kernel)
			
 
				+		rec_argv[i++] = "--all-kernel";
			
 
				+
			
 
				 	for (j = 0; j < argc; j++, i++)
			
 
				 		rec_argv[i] = argv[j];
			
 
				 
			
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -29,10 +29,12 @@
 
				 #include "util/data.h"
			
 
				 #include "util/perf_regs.h"
			
 
				 #include "util/auxtrace.h"
			
 
				+#include "util/tsc.h"
			
 
				 #include "util/parse-branch-options.h"
			
 
				 #include "util/parse-regs-options.h"
			
 
				 #include "util/llvm-utils.h"
			
 
				 #include "util/bpf-loader.h"
			
 
				+#include "util/trigger.h"
			
 
				 #include "asm/bug.h"
			
 
				 
			
 
				 #include <unistd.h>
			
@@ -55,6 +57,8 @@ struct record {
 
				 	bool			no_buildid_cache;
			
 
				 	bool			no_buildid_cache_set;
			
 
				 	bool			buildid_all;
			
 
				+	bool			timestamp_filename;
			
 
				+	bool			switch_output;
			
 
				 	unsigned long long	samples;
			
 
				 };
			
 
				 
			
@@ -124,9 +128,10 @@ static int record__mmap_read(struct record *rec, int idx)
 
				 static volatile int done;
			
 
				 static volatile int signr = -1;
			
 
				 static volatile int child_finished;
			
 
				-static volatile int auxtrace_snapshot_enabled;
			
 
				-static volatile int auxtrace_snapshot_err;
			
 
				+
			
 
				 static volatile int auxtrace_record__snapshot_started;
			
 
				+static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
			
 
				+static DEFINE_TRIGGER(switch_output_trigger);
			
 
				 
			
 
				 static void sig_handler(int sig)
			
 
				 {
			
@@ -244,11 +249,12 @@ static void record__read_auxtrace_snapshot(struct record *rec)
 
				 {
			
 
				 	pr_debug("Recording AUX area tracing snapshot\n");
			
 
				 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
			
 
				-		auxtrace_snapshot_err = -1;
			
 
				+		trigger_error(&auxtrace_snapshot_trigger);
			
 
				 	} else {
			
 
				-		auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
			
 
				-		if (!auxtrace_snapshot_err)
			
 
				-			auxtrace_snapshot_enabled = 1;
			
 
				+		if (auxtrace_record__snapshot_finish(rec->itr))
			
 
				+			trigger_error(&auxtrace_snapshot_trigger);
			
 
				+		else
			
 
				+			trigger_ready(&auxtrace_snapshot_trigger);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -283,7 +289,7 @@ static int record__open(struct record *rec)
 
				 	struct record_opts *opts = &rec->opts;
			
 
				 	int rc = 0;
			
 
				 
			
 
				-	perf_evlist__config(evlist, opts);
			
 
				+	perf_evlist__config(evlist, opts, &callchain_param);
			
 
				 
			
 
				 	evlist__for_each(evlist, pos) {
			
 
				 try_again:
			
@@ -494,6 +500,73 @@ record__finish_output(struct record *rec)
 
				 	return;
			
 
				 }
			
 
				 
			
 
				+static int record__synthesize_workload(struct record *rec)
			
 
				+{
			
 
				+	struct {
			
 
				+		struct thread_map map;
			
 
				+		struct thread_map_data map_data;
			
 
				+	} thread_map;
			
 
				+
			
 
				+	thread_map.map.nr = 1;
			
 
				+	thread_map.map.map[0].pid = rec->evlist->workload.pid;
			
 
				+	thread_map.map.map[0].comm = NULL;
			
 
				+	return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
			
 
				+						 process_synthesized_event,
			
 
				+						 &rec->session->machines.host,
			
 
				+						 rec->opts.sample_address,
			
 
				+						 rec->opts.proc_map_timeout);
			
 
				+}
			
 
				+
			
 
				+static int record__synthesize(struct record *rec);
			
 
				+
			
 
				+static int
			
 
				+record__switch_output(struct record *rec, bool at_exit)
			
 
				+{
			
 
				+	struct perf_data_file *file = &rec->file;
			
 
				+	int fd, err;
			
 
				+
			
 
				+	/* Same Size:      "2015122520103046"*/
			
 
				+	char timestamp[] = "InvalidTimestamp";
			
 
				+
			
 
				+	rec->samples = 0;
			
 
				+	record__finish_output(rec);
			
 
				+	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
			
 
				+	if (err) {
			
 
				+		pr_err("Failed to get current timestamp\n");
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	fd = perf_data_file__switch(file, timestamp,
			
 
				+				    rec->session->header.data_offset,
			
 
				+				    at_exit);
			
 
				+	if (fd >= 0 && !at_exit) {
			
 
				+		rec->bytes_written = 0;
			
 
				+		rec->session->header.data_size = 0;
			
 
				+	}
			
 
				+
			
 
				+	if (!quiet)
			
 
				+		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
			
 
				+			file->path, timestamp);
			
 
				+
			
 
				+	/* Output tracking events */
			
 
				+	if (!at_exit) {
			
 
				+		record__synthesize(rec);
			
 
				+
			
 
				+		/*
			
 
				+		 * In 'perf record --switch-output' without -a,
			
 
				+		 * record__synthesize() in record__switch_output() won't
			
 
				+		 * generate tracking events because there's no thread_map
			
 
				+		 * in evlist. Which causes newly created perf.data doesn't
			
 
				+		 * contain map and comm information.
			
 
				+		 * Create a fake thread_map and directly call
			
 
				+		 * perf_event__synthesize_thread_map() for those events.
			
 
				+		 */
			
 
				+		if (target__none(&rec->opts.target))
			
 
				+			record__synthesize_workload(rec);
			
 
				+	}
			
 
				+	return fd;
			
 
				+}
			
 
				+
			
 
				 static volatile int workload_exec_errno;
			
 
				 
			
 
				 /*
			
@@ -512,6 +585,15 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 
				 
			
 
				 static void snapshot_sig_handler(int sig);
			
 
				 
			
 
				+int __weak
			
 
				+perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
			
 
				+			    struct perf_tool *tool __maybe_unused,
			
 
				+			    perf_event__handler_t process __maybe_unused,
			
 
				+			    struct machine *machine __maybe_unused)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int record__synthesize(struct record *rec)
			
 
				 {
			
 
				 	struct perf_session *session = rec->session;
			
@@ -549,6 +631,11 @@ static int record__synthesize(struct record *rec)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
			
 
				+					  process_synthesized_event, machine);
			
 
				+	if (err)
			
 
				+		goto out;
			
 
				+
			
 
				 	if (rec->opts.full_auxtrace) {
			
 
				 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
			
 
				 					session, process_synthesized_event);
			
@@ -600,10 +687,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
				 	signal(SIGCHLD, sig_handler);
			
 
				 	signal(SIGINT, sig_handler);
			
 
				 	signal(SIGTERM, sig_handler);
			
 
				-	if (rec->opts.auxtrace_snapshot_mode)
			
 
				+
			
 
				+	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
			
 
				 		signal(SIGUSR2, snapshot_sig_handler);
			
 
				-	else
			
 
				+		if (rec->opts.auxtrace_snapshot_mode)
			
 
				+			trigger_on(&auxtrace_snapshot_trigger);
			
 
				+		if (rec->switch_output)
			
 
				+			trigger_on(&switch_output_trigger);
			
 
				+	} else {
			
 
				 		signal(SIGUSR2, SIG_IGN);
			
 
				+	}
			
 
				 
			
 
				 	session = perf_session__new(file, false, tool);
			
 
				 	if (session == NULL) {
			
@@ -729,27 +822,45 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
				 		perf_evlist__enable(rec->evlist);
			
 
				 	}
			
 
				 
			
 
				-	auxtrace_snapshot_enabled = 1;
			
 
				+	trigger_ready(&auxtrace_snapshot_trigger);
			
 
				+	trigger_ready(&switch_output_trigger);
			
 
				 	for (;;) {
			
 
				 		unsigned long long hits = rec->samples;
			
 
				 
			
 
				 		if (record__mmap_read_all(rec) < 0) {
			
 
				-			auxtrace_snapshot_enabled = 0;
			
 
				+			trigger_error(&auxtrace_snapshot_trigger);
			
 
				+			trigger_error(&switch_output_trigger);
			
 
				 			err = -1;
			
 
				 			goto out_child;
			
 
				 		}
			
 
				 
			
 
				 		if (auxtrace_record__snapshot_started) {
			
 
				 			auxtrace_record__snapshot_started = 0;
			
 
				-			if (!auxtrace_snapshot_err)
			
 
				+			if (!trigger_is_error(&auxtrace_snapshot_trigger))
			
 
				 				record__read_auxtrace_snapshot(rec);
			
 
				-			if (auxtrace_snapshot_err) {
			
 
				+			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
			
 
				 				pr_err("AUX area tracing snapshot failed\n");
			
 
				 				err = -1;
			
 
				 				goto out_child;
			
 
				 			}
			
 
				 		}
			
 
				 
			
 
				+		if (trigger_is_hit(&switch_output_trigger)) {
			
 
				+			trigger_ready(&switch_output_trigger);
			
 
				+
			
 
				+			if (!quiet)
			
 
				+				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
			
 
				+					waking);
			
 
				+			waking = 0;
			
 
				+			fd = record__switch_output(rec, false);
			
 
				+			if (fd < 0) {
			
 
				+				pr_err("Failed to switch to new file\n");
			
 
				+				trigger_error(&switch_output_trigger);
			
 
				+				err = fd;
			
 
				+				goto out_child;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				 		if (hits == rec->samples) {
			
 
				 			if (done || draining)
			
 
				 				break;
			
@@ -772,12 +883,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
				 		 * disable events in this case.
			
 
				 		 */
			
 
				 		if (done && !disabled && !target__none(&opts->target)) {
			
 
				-			auxtrace_snapshot_enabled = 0;
			
 
				+			trigger_off(&auxtrace_snapshot_trigger);
			
 
				 			perf_evlist__disable(rec->evlist);
			
 
				 			disabled = true;
			
 
				 		}
			
 
				 	}
			
 
				-	auxtrace_snapshot_enabled = 0;
			
 
				+	trigger_off(&auxtrace_snapshot_trigger);
			
 
				+	trigger_off(&switch_output_trigger);
			
 
				 
			
 
				 	if (forks && workload_exec_errno) {
			
 
				 		char msg[STRERR_BUFSIZE];
			
@@ -811,11 +923,22 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
				 	/* this will be recalculated during process_buildids() */
			
 
				 	rec->samples = 0;
			
 
				 
			
 
				-	if (!err)
			
 
				-		record__finish_output(rec);
			
 
				+	if (!err) {
			
 
				+		if (!rec->timestamp_filename) {
			
 
				+			record__finish_output(rec);
			
 
				+		} else {
			
 
				+			fd = record__switch_output(rec, true);
			
 
				+			if (fd < 0) {
			
 
				+				status = fd;
			
 
				+				goto out_delete_session;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	if (!err && !quiet) {
			
 
				 		char samples[128];
			
 
				+		const char *postfix = rec->timestamp_filename ?
			
 
				+					".<timestamp>" : "";
			
 
				 
			
 
				 		if (rec->samples && !rec->opts.full_auxtrace)
			
 
				 			scnprintf(samples, sizeof(samples),
			
@@ -823,9 +946,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
				 		else
			
 
				 			samples[0] = '\0';
			
 
				 
			
 
				-		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s ]\n",
			
 
				+		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
			
 
				 			perf_data_file__size(file) / 1024.0 / 1024.0,
			
 
				-			file->path, samples);
			
 
				+			file->path, postfix, samples);
			
 
				 	}
			
 
				 
			
 
				 out_delete_session:
			
@@ -833,58 +956,61 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
				 	return status;
			
 
				 }
			
 
				 
			
 
				-static void callchain_debug(void)
			
 
				+static void callchain_debug(struct callchain_param *callchain)
			
 
				 {
			
 
				 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
			
 
				 
			
 
				-	pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
			
 
				+	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
			
 
				 
			
 
				-	if (callchain_param.record_mode == CALLCHAIN_DWARF)
			
 
				+	if (callchain->record_mode == CALLCHAIN_DWARF)
			
 
				 		pr_debug("callchain: stack dump size %d\n",
			
 
				-			 callchain_param.dump_size);
			
 
				+			 callchain->dump_size);
			
 
				 }
			
 
				 
			
 
				-int record_parse_callchain_opt(const struct option *opt,
			
 
				-			       const char *arg,
			
 
				-			       int unset)
			
 
				+int record_opts__parse_callchain(struct record_opts *record,
			
 
				+				 struct callchain_param *callchain,
			
 
				+				 const char *arg, bool unset)
			
 
				 {
			
 
				 	int ret;
			
 
				-	struct record_opts *record = (struct record_opts *)opt->value;
			
 
				-
			
 
				-	record->callgraph_set = true;
			
 
				-	callchain_param.enabled = !unset;
			
 
				+	callchain->enabled = !unset;
			
 
				 
			
 
				 	/* --no-call-graph */
			
 
				 	if (unset) {
			
 
				-		callchain_param.record_mode = CALLCHAIN_NONE;
			
 
				+		callchain->record_mode = CALLCHAIN_NONE;
			
 
				 		pr_debug("callchain: disabled\n");
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				-	ret = parse_callchain_record_opt(arg, &callchain_param);
			
 
				+	ret = parse_callchain_record_opt(arg, callchain);
			
 
				 	if (!ret) {
			
 
				 		/* Enable data address sampling for DWARF unwind. */
			
 
				-		if (callchain_param.record_mode == CALLCHAIN_DWARF)
			
 
				+		if (callchain->record_mode == CALLCHAIN_DWARF)
			
 
				 			record->sample_address = true;
			
 
				-		callchain_debug();
			
 
				+		callchain_debug(callchain);
			
 
				 	}
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+int record_parse_callchain_opt(const struct option *opt,
			
 
				+			       const char *arg,
			
 
				+			       int unset)
			
 
				+{
			
 
				+	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
			
 
				+}
			
 
				+
			
 
				 int record_callchain_opt(const struct option *opt,
			
 
				 			 const char *arg __maybe_unused,
			
 
				 			 int unset __maybe_unused)
			
 
				 {
			
 
				-	struct record_opts *record = (struct record_opts *)opt->value;
			
 
				+	struct callchain_param *callchain = opt->value;
			
 
				 
			
 
				-	record->callgraph_set = true;
			
 
				-	callchain_param.enabled = true;
			
 
				+	callchain->enabled = true;
			
 
				 
			
 
				-	if (callchain_param.record_mode == CALLCHAIN_NONE)
			
 
				-		callchain_param.record_mode = CALLCHAIN_FP;
			
 
				+	if (callchain->record_mode == CALLCHAIN_NONE)
			
 
				+		callchain->record_mode = CALLCHAIN_FP;
			
 
				 
			
 
				-	callchain_debug();
			
 
				+	callchain_debug(callchain);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1122,7 +1248,7 @@ struct option __record_options[] = {
 
				 		     record__parse_mmap_pages),
			
 
				 	OPT_BOOLEAN(0, "group", &record.opts.group,
			
 
				 		    "put the counters into a counter group"),
			
 
				-	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
			
 
				+	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
			
 
				 			   NULL, "enables call-graph recording" ,
			
 
				 			   &record_callchain_opt),
			
 
				 	OPT_CALLBACK(0, "call-graph", &record.opts,
			
@@ -1195,6 +1321,10 @@ struct option __record_options[] = {
 
				 		   "file", "vmlinux pathname"),
			
 
				 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
			
 
				 		    "Record build-id of all DSOs regardless of hits"),
			
 
				+	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
			
 
				+		    "append timestamp to output filename"),
			
 
				+	OPT_BOOLEAN(0, "switch-output", &record.switch_output,
			
 
				+		    "Switch output when receive SIGUSR2"),
			
 
				 	OPT_END()
			
 
				 };
			
 
				 
			
@@ -1250,6 +1380,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				+	if (rec->switch_output)
			
 
				+		rec->timestamp_filename = true;
			
 
				+
			
 
				 	if (!rec->itr) {
			
 
				 		rec->itr = auxtrace_record__init(rec->evlist, &err);
			
 
				 		if (err)
			
@@ -1261,6 +1394,14 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
 
				+	err = bpf__setup_stdout(rec->evlist);
			
 
				+	if (err) {
			
 
				+		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
			
 
				+		pr_err("ERROR: Setup BPF stdout failed: %s\n",
			
 
				+			 errbuf);
			
 
				+		return err;
			
 
				+	}
			
 
				+
			
 
				 	err = -ENOMEM;
			
 
				 
			
 
				 	symbol__init(NULL);
			
@@ -1275,8 +1416,36 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
			
 
				 "even with a suitable vmlinux or kallsyms file.\n\n");
			
 
				 
			
 
				-	if (rec->no_buildid_cache || rec->no_buildid)
			
 
				+	if (rec->no_buildid_cache || rec->no_buildid) {
			
 
				 		disable_buildid_cache();
			
 
				+	} else if (rec->switch_output) {
			
 
				+		/*
			
 
				+		 * In 'perf record --switch-output', disable buildid
			
 
				+		 * generation by default to reduce data file switching
			
 
				+		 * overhead. Still generate buildid if they are required
			
 
				+		 * explicitly using
			
 
				+		 *
			
 
				+		 *  perf record --signal-trigger --no-no-buildid \
			
 
				+		 *              --no-no-buildid-cache
			
 
				+		 *
			
 
				+		 * Following code equals to:
			
 
				+		 *
			
 
				+		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
			
 
				+		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
			
 
				+		 *         disable_buildid_cache();
			
 
				+		 */
			
 
				+		bool disable = true;
			
 
				+
			
 
				+		if (rec->no_buildid_set && !rec->no_buildid)
			
 
				+			disable = false;
			
 
				+		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
			
 
				+			disable = false;
			
 
				+		if (disable) {
			
 
				+			rec->no_buildid = true;
			
 
				+			rec->no_buildid_cache = true;
			
 
				+			disable_buildid_cache();
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	if (rec->evlist->nr_entries == 0 &&
			
 
				 	    perf_evlist__add_default(rec->evlist) < 0) {
			
@@ -1335,9 +1504,13 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 
			
 
				 static void snapshot_sig_handler(int sig __maybe_unused)
			
 
				 {
			
 
				-	if (!auxtrace_snapshot_enabled)
			
 
				-		return;
			
 
				-	auxtrace_snapshot_enabled = 0;
			
 
				-	auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
			
 
				-	auxtrace_record__snapshot_started = 1;
			
 
				+	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
			
 
				+		trigger_hit(&auxtrace_snapshot_trigger);
			
 
				+		auxtrace_record__snapshot_started = 1;
			
 
				+		if (auxtrace_record__snapshot_start(record.itr))
			
 
				+			trigger_error(&auxtrace_snapshot_trigger);
			
 
				+	}
			
 
				+
			
 
				+	if (trigger_is_ready(&switch_output_trigger))
			
 
				+		trigger_hit(&switch_output_trigger);
			
 
				 }
			
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -47,7 +47,6 @@ struct report {
 
				 	struct perf_tool	tool;
			
 
				 	struct perf_session	*session;
			
 
				 	bool			use_tui, use_gtk, use_stdio;
			
 
				-	bool			dont_use_callchains;
			
 
				 	bool			show_full_info;
			
 
				 	bool			show_threads;
			
 
				 	bool			inverted_callchain;
			
@@ -235,7 +234,7 @@ static int report__setup_sample_type(struct report *rep)
 
				 		sample_type |= PERF_SAMPLE_BRANCH_STACK;
			
 
				 
			
 
				 	if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
			
 
				-		if (sort__has_parent) {
			
 
				+		if (perf_hpp_list.parent) {
			
 
				 			ui__error("Selected --sort parent, but no "
			
 
				 				    "callchain data. Did you call "
			
 
				 				    "'perf record' without -g?\n");
			
@@ -247,7 +246,7 @@ static int report__setup_sample_type(struct report *rep)
 
				 				  "you call 'perf record' without -g?\n");
			
 
				 			return -1;
			
 
				 		}
			
 
				-	} else if (!rep->dont_use_callchains &&
			
 
				+	} else if (!callchain_param.enabled &&
			
 
				 		   callchain_param.mode != CHAIN_NONE &&
			
 
				 		   !symbol_conf.use_callchain) {
			
 
				 			symbol_conf.use_callchain = true;
			
@@ -599,13 +598,15 @@ static int __cmd_report(struct report *rep)
 
				 static int
			
 
				 report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
			
 
				 {
			
 
				-	struct report *rep = (struct report *)opt->value;
			
 
				+	struct callchain_param *callchain = opt->value;
			
 
				 
			
 
				+	callchain->enabled = !unset;
			
 
				 	/*
			
 
				 	 * --no-call-graph
			
 
				 	 */
			
 
				 	if (unset) {
			
 
				-		rep->dont_use_callchains = true;
			
 
				+		symbol_conf.use_callchain = false;
			
 
				+		callchain->mode = CHAIN_NONE;
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
@@ -690,7 +691,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 			.ordered_events	 = true,
			
 
				 			.ordering_requires_timestamps = true,
			
 
				 		},
			
 
				-		.max_stack		 = PERF_MAX_STACK_DEPTH,
			
 
				+		.max_stack		 = sysctl_perf_event_max_stack,
			
 
				 		.pretty_printing_style	 = "normal",
			
 
				 		.socket_filter		 = -1,
			
 
				 	};
			
@@ -734,7 +735,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		   "regex filter to identify parent, see: '--sort parent'"),
			
 
				 	OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
			
 
				 		    "Only display entries with parent-match"),
			
 
				-	OPT_CALLBACK_DEFAULT('g', "call-graph", &report,
			
 
				+	OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
			
 
				 			     "print_type,threshold[,print_limit],order,sort_key[,branch],value",
			
 
				 			     report_callchain_help, &report_parse_callchain_opt,
			
 
				 			     callchain_default_opt),
			
@@ -743,7 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	OPT_INTEGER(0, "max-stack", &report.max_stack,
			
 
				 		    "Set the maximum stack depth when parsing the callchain, "
			
 
				 		    "anything beyond the specified depth will be ignored. "
			
 
				-		    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
			
 
				+		    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
			
 
				 	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
			
 
				 		    "alias for inverted call graph"),
			
 
				 	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
			
@@ -935,7 +936,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 			goto error;
			
 
				 		}
			
 
				 
			
 
				-		sort__need_collapse = true;
			
 
				+		perf_hpp_list.need_collapse = true;
			
 
				 	}
			
 
				 
			
 
				 	/* Force tty output for header output and per-thread stat. */
			
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -11,6 +11,8 @@
 
				 #include "util/session.h"
			
 
				 #include "util/tool.h"
			
 
				 #include "util/cloexec.h"
			
 
				+#include "util/thread_map.h"
			
 
				+#include "util/color.h"
			
 
				 
			
 
				 #include <subcmd/parse-options.h>
			
 
				 #include "util/trace-event.h"
			
@@ -122,6 +124,21 @@ struct trace_sched_handler {
 
				 				  struct machine *machine);
			
 
				 };
			
 
				 
			
 
				+#define COLOR_PIDS PERF_COLOR_BLUE
			
 
				+#define COLOR_CPUS PERF_COLOR_BG_RED
			
 
				+
			
 
				+struct perf_sched_map {
			
 
				+	DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
			
 
				+	int			*comp_cpus;
			
 
				+	bool			 comp;
			
 
				+	struct thread_map	*color_pids;
			
 
				+	const char		*color_pids_str;
			
 
				+	struct cpu_map		*color_cpus;
			
 
				+	const char		*color_cpus_str;
			
 
				+	struct cpu_map		*cpus;
			
 
				+	const char		*cpus_str;
			
 
				+};
			
 
				+
			
 
				 struct perf_sched {
			
 
				 	struct perf_tool tool;
			
 
				 	const char	 *sort_order;
			
@@ -173,6 +190,7 @@ struct perf_sched {
 
				 	struct list_head sort_list, cmp_pid;
			
 
				 	bool force;
			
 
				 	bool skip_merge;
			
 
				+	struct perf_sched_map map;
			
 
				 };
			
 
				 
			
 
				 static u64 get_nsecs(void)
			
@@ -1339,6 +1357,38 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+union map_priv {
			
 
				+	void	*ptr;
			
 
				+	bool	 color;
			
 
				+};
			
 
				+
			
 
				+static bool thread__has_color(struct thread *thread)
			
 
				+{
			
 
				+	union map_priv priv = {
			
 
				+		.ptr = thread__priv(thread),
			
 
				+	};
			
 
				+
			
 
				+	return priv.color;
			
 
				+}
			
 
				+
			
 
				+static struct thread*
			
 
				+map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid)
			
 
				+{
			
 
				+	struct thread *thread = machine__findnew_thread(machine, pid, tid);
			
 
				+	union map_priv priv = {
			
 
				+		.color = false,
			
 
				+	};
			
 
				+
			
 
				+	if (!sched->map.color_pids || !thread || thread__priv(thread))
			
 
				+		return thread;
			
 
				+
			
 
				+	if (thread_map__has(sched->map.color_pids, tid))
			
 
				+		priv.color = true;
			
 
				+
			
 
				+	thread__set_priv(thread, priv.ptr);
			
 
				+	return thread;
			
 
				+}
			
 
				+
			
 
				 static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
			
 
				 			    struct perf_sample *sample, struct machine *machine)
			
 
				 {
			
@@ -1347,13 +1397,25 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 
				 	int new_shortname;
			
 
				 	u64 timestamp0, timestamp = sample->time;
			
 
				 	s64 delta;
			
 
				-	int cpu, this_cpu = sample->cpu;
			
 
				+	int i, this_cpu = sample->cpu;
			
 
				+	int cpus_nr;
			
 
				+	bool new_cpu = false;
			
 
				+	const char *color = PERF_COLOR_NORMAL;
			
 
				 
			
 
				 	BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
			
 
				 
			
 
				 	if (this_cpu > sched->max_cpu)
			
 
				 		sched->max_cpu = this_cpu;
			
 
				 
			
 
				+	if (sched->map.comp) {
			
 
				+		cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
			
 
				+		if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
			
 
				+			sched->map.comp_cpus[cpus_nr++] = this_cpu;
			
 
				+			new_cpu = true;
			
 
				+		}
			
 
				+	} else
			
 
				+		cpus_nr = sched->max_cpu;
			
 
				+
			
 
				 	timestamp0 = sched->cpu_last_switched[this_cpu];
			
 
				 	sched->cpu_last_switched[this_cpu] = timestamp;
			
 
				 	if (timestamp0)
			
@@ -1366,7 +1428,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 
				 		return -1;
			
 
				 	}
			
 
				 
			
 
				-	sched_in = machine__findnew_thread(machine, -1, next_pid);
			
 
				+	sched_in = map__findnew_thread(sched, machine, -1, next_pid);
			
 
				 	if (sched_in == NULL)
			
 
				 		return -1;
			
 
				 
			
@@ -1400,26 +1462,52 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 
				 		new_shortname = 1;
			
 
				 	}
			
 
				 
			
 
				-	for (cpu = 0; cpu <= sched->max_cpu; cpu++) {
			
 
				+	for (i = 0; i < cpus_nr; i++) {
			
 
				+		int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
			
 
				+		struct thread *curr_thread = sched->curr_thread[cpu];
			
 
				+		const char *pid_color = color;
			
 
				+		const char *cpu_color = color;
			
 
				+
			
 
				+		if (curr_thread && thread__has_color(curr_thread))
			
 
				+			pid_color = COLOR_PIDS;
			
 
				+
			
 
				+		if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
			
 
				+			continue;
			
 
				+
			
 
				+		if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
			
 
				+			cpu_color = COLOR_CPUS;
			
 
				+
			
 
				 		if (cpu != this_cpu)
			
 
				-			printf(" ");
			
 
				+			color_fprintf(stdout, cpu_color, " ");
			
 
				 		else
			
 
				-			printf("*");
			
 
				+			color_fprintf(stdout, cpu_color, "*");
			
 
				 
			
 
				 		if (sched->curr_thread[cpu])
			
 
				-			printf("%2s ", sched->curr_thread[cpu]->shortname);
			
 
				+			color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname);
			
 
				 		else
			
 
				-			printf("   ");
			
 
				+			color_fprintf(stdout, color, "   ");
			
 
				 	}
			
 
				 
			
 
				-	printf("  %12.6f secs ", (double)timestamp/1e9);
			
 
				+	if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
			
 
				+		goto out;
			
 
				+
			
 
				+	color_fprintf(stdout, color, "  %12.6f secs ", (double)timestamp/1e9);
			
 
				 	if (new_shortname) {
			
 
				-		printf("%s => %s:%d\n",
			
 
				+		const char *pid_color = color;
			
 
				+
			
 
				+		if (thread__has_color(sched_in))
			
 
				+			pid_color = COLOR_PIDS;
			
 
				+
			
 
				+		color_fprintf(stdout, pid_color, "%s => %s:%d",
			
 
				 		       sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
			
 
				-	} else {
			
 
				-		printf("\n");
			
 
				 	}
			
 
				 
			
 
				+	if (sched->map.comp && new_cpu)
			
 
				+		color_fprintf(stdout, color, " (CPU %d)", this_cpu);
			
 
				+
			
 
				+out:
			
 
				+	color_fprintf(stdout, color, "\n");
			
 
				+
			
 
				 	thread__put(sched_in);
			
 
				 
			
 
				 	return 0;
			
@@ -1675,9 +1763,75 @@ static int perf_sched__lat(struct perf_sched *sched)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int setup_map_cpus(struct perf_sched *sched)
			
 
				+{
			
 
				+	struct cpu_map *map;
			
 
				+
			
 
				+	sched->max_cpu  = sysconf(_SC_NPROCESSORS_CONF);
			
 
				+
			
 
				+	if (sched->map.comp) {
			
 
				+		sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
			
 
				+		if (!sched->map.comp_cpus)
			
 
				+			return -1;
			
 
				+	}
			
 
				+
			
 
				+	if (!sched->map.cpus_str)
			
 
				+		return 0;
			
 
				+
			
 
				+	map = cpu_map__new(sched->map.cpus_str);
			
 
				+	if (!map) {
			
 
				+		pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	sched->map.cpus = map;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int setup_color_pids(struct perf_sched *sched)
			
 
				+{
			
 
				+	struct thread_map *map;
			
 
				+
			
 
				+	if (!sched->map.color_pids_str)
			
 
				+		return 0;
			
 
				+
			
 
				+	map = thread_map__new_by_tid_str(sched->map.color_pids_str);
			
 
				+	if (!map) {
			
 
				+		pr_err("failed to get thread map from %s\n", sched->map.color_pids_str);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	sched->map.color_pids = map;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int setup_color_cpus(struct perf_sched *sched)
			
 
				+{
			
 
				+	struct cpu_map *map;
			
 
				+
			
 
				+	if (!sched->map.color_cpus_str)
			
 
				+		return 0;
			
 
				+
			
 
				+	map = cpu_map__new(sched->map.color_cpus_str);
			
 
				+	if (!map) {
			
 
				+		pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	sched->map.color_cpus = map;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int perf_sched__map(struct perf_sched *sched)
			
 
				 {
			
 
				-	sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
			
 
				+	if (setup_map_cpus(sched))
			
 
				+		return -1;
			
 
				+
			
 
				+	if (setup_color_pids(sched))
			
 
				+		return -1;
			
 
				+
			
 
				+	if (setup_color_cpus(sched))
			
 
				+		return -1;
			
 
				 
			
 
				 	setup_pager();
			
 
				 	if (perf_sched__read_events(sched))
			
@@ -1831,6 +1985,17 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		    "dump raw trace in ASCII"),
			
 
				 	OPT_END()
			
 
				 	};
			
 
				+	const struct option map_options[] = {
			
 
				+	OPT_BOOLEAN(0, "compact", &sched.map.comp,
			
 
				+		    "map output in compact mode"),
			
 
				+	OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids",
			
 
				+		   "highlight given pids in map"),
			
 
				+	OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus",
			
 
				+                    "highlight given CPUs in map"),
			
 
				+	OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus",
			
 
				+                    "display given CPUs in map"),
			
 
				+	OPT_END()
			
 
				+	};
			
 
				 	const char * const latency_usage[] = {
			
 
				 		"perf sched latency [<options>]",
			
 
				 		NULL
			
@@ -1839,6 +2004,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		"perf sched replay [<options>]",
			
 
				 		NULL
			
 
				 	};
			
 
				+	const char * const map_usage[] = {
			
 
				+		"perf sched map [<options>]",
			
 
				+		NULL
			
 
				+	};
			
 
				 	const char *const sched_subcommands[] = { "record", "latency", "map",
			
 
				 						  "replay", "script", NULL };
			
 
				 	const char *sched_usage[] = {
			
@@ -1887,6 +2056,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		setup_sorting(&sched, latency_options, latency_usage);
			
 
				 		return perf_sched__lat(&sched);
			
 
				 	} else if (!strcmp(argv[0], "map")) {
			
 
				+		if (argc) {
			
 
				+			argc = parse_options(argc, argv, map_options, map_usage, 0);
			
 
				+			if (argc)
			
 
				+				usage_with_options(map_usage, map_options);
			
 
				+		}
			
 
				 		sched.tp_handler = &map_ops;
			
 
				 		setup_sorting(&sched, latency_options, latency_usage);
			
 
				 		return perf_sched__map(&sched);
			
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -22,6 +22,7 @@
 
				 #include "util/thread_map.h"
			
 
				 #include "util/stat.h"
			
 
				 #include <linux/bitmap.h>
			
 
				+#include <linux/stringify.h>
			
 
				 #include "asm/bug.h"
			
 
				 #include "util/mem-events.h"
			
 
				 
			
@@ -317,19 +318,19 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
 
				 
			
 
				 	output[type].print_ip_opts = 0;
			
 
				 	if (PRINT_FIELD(IP))
			
 
				-		output[type].print_ip_opts |= PRINT_IP_OPT_IP;
			
 
				+		output[type].print_ip_opts |= EVSEL__PRINT_IP;
			
 
				 
			
 
				 	if (PRINT_FIELD(SYM))
			
 
				-		output[type].print_ip_opts |= PRINT_IP_OPT_SYM;
			
 
				+		output[type].print_ip_opts |= EVSEL__PRINT_SYM;
			
 
				 
			
 
				 	if (PRINT_FIELD(DSO))
			
 
				-		output[type].print_ip_opts |= PRINT_IP_OPT_DSO;
			
 
				+		output[type].print_ip_opts |= EVSEL__PRINT_DSO;
			
 
				 
			
 
				 	if (PRINT_FIELD(SYMOFFSET))
			
 
				-		output[type].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET;
			
 
				+		output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET;
			
 
				 
			
 
				 	if (PRINT_FIELD(SRCLINE))
			
 
				-		output[type].print_ip_opts |= PRINT_IP_OPT_SRCLINE;
			
 
				+		output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -569,18 +570,23 @@ static void print_sample_bts(struct perf_sample *sample,
 
				 	/* print branch_from information */
			
 
				 	if (PRINT_FIELD(IP)) {
			
 
				 		unsigned int print_opts = output[attr->type].print_ip_opts;
			
 
				+		struct callchain_cursor *cursor = NULL;
			
 
				 
			
 
				-		if (symbol_conf.use_callchain && sample->callchain) {
			
 
				-			printf("\n");
			
 
				-		} else {
			
 
				-			printf(" ");
			
 
				-			if (print_opts & PRINT_IP_OPT_SRCLINE) {
			
 
				+		if (symbol_conf.use_callchain && sample->callchain &&
			
 
				+		    thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
			
 
				+					      sample, NULL, NULL, scripting_max_stack) == 0)
			
 
				+			cursor = &callchain_cursor;
			
 
				+
			
 
				+		if (cursor == NULL) {
			
 
				+			putchar(' ');
			
 
				+			if (print_opts & EVSEL__PRINT_SRCLINE) {
			
 
				 				print_srcline_last = true;
			
 
				-				print_opts &= ~PRINT_IP_OPT_SRCLINE;
			
 
				+				print_opts &= ~EVSEL__PRINT_SRCLINE;
			
 
				 			}
			
 
				-		}
			
 
				-		perf_evsel__print_ip(evsel, sample, al, print_opts,
			
 
				-				     scripting_max_stack);
			
 
				+		} else
			
 
				+			putchar('\n');
			
 
				+
			
 
				+		sample__fprintf_sym(sample, al, 0, print_opts, cursor, stdout);
			
 
				 	}
			
 
				 
			
 
				 	/* print branch_to information */
			
@@ -783,14 +789,15 @@ static void process_event(struct perf_script *script,
 
				 		printf("%16" PRIu64, sample->weight);
			
 
				 
			
 
				 	if (PRINT_FIELD(IP)) {
			
 
				-		if (!symbol_conf.use_callchain)
			
 
				-			printf(" ");
			
 
				-		else
			
 
				-			printf("\n");
			
 
				+		struct callchain_cursor *cursor = NULL;
			
 
				+
			
 
				+		if (symbol_conf.use_callchain && sample->callchain &&
			
 
				+		    thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
			
 
				+					      sample, NULL, NULL, scripting_max_stack) == 0)
			
 
				+			cursor = &callchain_cursor;
			
 
				 
			
 
				-		perf_evsel__print_ip(evsel, sample, al,
			
 
				-				     output[attr->type].print_ip_opts,
			
 
				-				     scripting_max_stack);
			
 
				+		putchar(cursor ? '\n' : ' ');
			
 
				+		sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout);
			
 
				 	}
			
 
				 
			
 
				 	if (PRINT_FIELD(IREGS))
			
@@ -1959,6 +1966,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 			.exit		 = perf_event__process_exit,
			
 
				 			.fork		 = perf_event__process_fork,
			
 
				 			.attr		 = process_attr,
			
 
				+			.event_update   = perf_event__process_event_update,
			
 
				 			.tracing_data	 = perf_event__process_tracing_data,
			
 
				 			.build_id	 = perf_event__process_build_id,
			
 
				 			.id_index	 = perf_event__process_id_index,
			
@@ -2020,6 +2028,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		   "only consider symbols in these pids"),
			
 
				 	OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
			
 
				 		   "only consider symbols in these tids"),
			
 
				+	OPT_UINTEGER(0, "max-stack", &scripting_max_stack,
			
 
				+		     "Set the maximum stack depth when parsing the callchain, "
			
 
				+		     "anything beyond the specified depth will be ignored. "
			
 
				+		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
			
 
				 	OPT_BOOLEAN('I', "show-info", &show_full_info,
			
 
				 		    "display extended information from perf.data file"),
			
 
				 	OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
			
@@ -2055,6 +2067,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		NULL
			
 
				 	};
			
 
				 
			
 
				+	scripting_max_stack = sysctl_perf_event_max_stack;
			
 
				+
			
 
				 	setup_scripting();
			
 
				 
			
 
				 	argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
			
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -298,6 +298,14 @@ static int read_counter(struct perf_evsel *counter)
 
				 					return -1;
			
 
				 				}
			
 
				 			}
			
 
				+
			
 
				+			if (verbose > 1) {
			
 
				+				fprintf(stat_config.output,
			
 
				+					"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
			
 
				+						perf_evsel__name(counter),
			
 
				+						cpu,
			
 
				+						count->val, count->ena, count->run);
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -688,7 +688,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
 
				 	struct hist_entry *he = iter->he;
			
 
				 	struct perf_evsel *evsel = iter->evsel;
			
 
				 
			
 
				-	if (sort__has_sym && single)
			
 
				+	if (perf_hpp_list.sym && single)
			
 
				 		perf_top__record_precise_ip(top, he, evsel->idx, al->addr);
			
 
				 
			
 
				 	hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
			
@@ -886,7 +886,7 @@ static int perf_top__start_counters(struct perf_top *top)
 
				 	struct perf_evlist *evlist = top->evlist;
			
 
				 	struct record_opts *opts = &top->record_opts;
			
 
				 
			
 
				-	perf_evlist__config(evlist, opts);
			
 
				+	perf_evlist__config(evlist, opts, &callchain_param);
			
 
				 
			
 
				 	evlist__for_each(evlist, counter) {
			
 
				 try_again:
			
@@ -917,15 +917,15 @@ static int perf_top__start_counters(struct perf_top *top)
 
				 	return -1;
			
 
				 }
			
 
				 
			
 
				-static int perf_top__setup_sample_type(struct perf_top *top __maybe_unused)
			
 
				+static int callchain_param__setup_sample_type(struct callchain_param *callchain)
			
 
				 {
			
 
				-	if (!sort__has_sym) {
			
 
				-		if (symbol_conf.use_callchain) {
			
 
				+	if (!perf_hpp_list.sym) {
			
 
				+		if (callchain->enabled) {
			
 
				 			ui__error("Selected -g but \"sym\" not present in --sort/-s.");
			
 
				 			return -EINVAL;
			
 
				 		}
			
 
				-	} else if (callchain_param.mode != CHAIN_NONE) {
			
 
				-		if (callchain_register_param(&callchain_param) < 0) {
			
 
				+	} else if (callchain->mode != CHAIN_NONE) {
			
 
				+		if (callchain_register_param(callchain) < 0) {
			
 
				 			ui__error("Can't register callchain params.\n");
			
 
				 			return -EINVAL;
			
 
				 		}
			
@@ -952,7 +952,7 @@ static int __cmd_top(struct perf_top *top)
 
				 			goto out_delete;
			
 
				 	}
			
 
				 
			
 
				-	ret = perf_top__setup_sample_type(top);
			
 
				+	ret = callchain_param__setup_sample_type(&callchain_param);
			
 
				 	if (ret)
			
 
				 		goto out_delete;
			
 
				 
			
@@ -962,7 +962,7 @@ static int __cmd_top(struct perf_top *top)
 
				 	machine__synthesize_threads(&top->session->machines.host, &opts->target,
			
 
				 				    top->evlist->threads, false, opts->proc_map_timeout);
			
 
				 
			
 
				-	if (sort__has_socket) {
			
 
				+	if (perf_hpp_list.socket) {
			
 
				 		ret = perf_env__read_cpu_topology_map(&perf_env);
			
 
				 		if (ret < 0)
			
 
				 			goto out_err_cpu_topo;
			
@@ -1045,18 +1045,17 @@ callchain_opt(const struct option *opt, const char *arg, int unset)
 
				 static int
			
 
				 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
			
 
				 {
			
 
				-	struct record_opts *record = (struct record_opts *)opt->value;
			
 
				+	struct callchain_param *callchain = opt->value;
			
 
				 
			
 
				-	record->callgraph_set = true;
			
 
				-	callchain_param.enabled = !unset;
			
 
				-	callchain_param.record_mode = CALLCHAIN_FP;
			
 
				+	callchain->enabled = !unset;
			
 
				+	callchain->record_mode = CALLCHAIN_FP;
			
 
				 
			
 
				 	/*
			
 
				 	 * --no-call-graph
			
 
				 	 */
			
 
				 	if (unset) {
			
 
				 		symbol_conf.use_callchain = false;
			
 
				-		callchain_param.record_mode = CALLCHAIN_NONE;
			
 
				+		callchain->record_mode = CALLCHAIN_NONE;
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
@@ -1104,7 +1103,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 			},
			
 
				 			.proc_map_timeout    = 500,
			
 
				 		},
			
 
				-		.max_stack	     = PERF_MAX_STACK_DEPTH,
			
 
				+		.max_stack	     = sysctl_perf_event_max_stack,
			
 
				 		.sym_pcnt_filter     = 5,
			
 
				 	};
			
 
				 	struct record_opts *opts = &top.record_opts;
			
@@ -1162,17 +1161,17 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		   "output field(s): overhead, period, sample plus all of sort keys"),
			
 
				 	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
			
 
				 		    "Show a column with the number of samples"),
			
 
				-	OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts,
			
 
				+	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
			
 
				 			   NULL, "enables call-graph recording and display",
			
 
				 			   &callchain_opt),
			
 
				-	OPT_CALLBACK(0, "call-graph", &top.record_opts,
			
 
				+	OPT_CALLBACK(0, "call-graph", &callchain_param,
			
 
				 		     "record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]",
			
 
				 		     top_callchain_help, &parse_callchain_opt),
			
 
				 	OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
			
 
				 		    "Accumulate callchains of children and show total overhead as well"),
			
 
				 	OPT_INTEGER(0, "max-stack", &top.max_stack,
			
 
				 		    "Set the maximum stack depth when parsing the callchain. "
			
 
				-		    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
			
 
				+		    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
			
 
				 	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
			
 
				 		   "ignore callees of these functions in call graphs",
			
 
				 		   report_parse_ignore_callees_opt),
			
@@ -1256,7 +1255,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 
			
 
				 	sort__mode = SORT_MODE__TOP;
			
 
				 	/* display thread wants entries to be collapsed in a different tree */
			
 
				-	sort__need_collapse = 1;
			
 
				+	perf_hpp_list.need_collapse = 1;
			
 
				 
			
 
				 	if (top.use_stdio)
			
 
				 		use_browser = 0;
			
@@ -1312,7 +1311,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 
			
 
				 	top.sym_evsel = perf_evlist__first(top.evlist);
			
 
				 
			
 
				-	if (!symbol_conf.use_callchain) {
			
 
				+	if (!callchain_param.enabled) {
			
 
				 		symbol_conf.cumulate_callchain = false;
			
 
				 		perf_hpp__cancel_cumulate();
			
 
				 	}
			
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -34,79 +34,76 @@
 
				 #include "trace-event.h"
			
 
				 #include "util/parse-events.h"
			
 
				 #include "util/bpf-loader.h"
			
 
				+#include "callchain.h"
			
 
				+#include "syscalltbl.h"
			
 
				+#include "rb_resort.h"
			
 
				 
			
 
				-#include <libaudit.h>
			
 
				+#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
			
 
				 #include <stdlib.h>
			
 
				-#include <sys/mman.h>
			
 
				-#include <linux/futex.h>
			
 
				 #include <linux/err.h>
			
 
				-
			
 
				-/* For older distros: */
			
 
				-#ifndef MAP_STACK
			
 
				-# define MAP_STACK		0x20000
			
 
				-#endif
			
 
				-
			
 
				-#ifndef MADV_HWPOISON
			
 
				-# define MADV_HWPOISON		100
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-#ifndef MADV_MERGEABLE
			
 
				-# define MADV_MERGEABLE		12
			
 
				-#endif
			
 
				-
			
 
				-#ifndef MADV_UNMERGEABLE
			
 
				-# define MADV_UNMERGEABLE	13
			
 
				-#endif
			
 
				-
			
 
				-#ifndef EFD_SEMAPHORE
			
 
				-# define EFD_SEMAPHORE		1
			
 
				-#endif
			
 
				-
			
 
				-#ifndef EFD_NONBLOCK
			
 
				-# define EFD_NONBLOCK		00004000
			
 
				-#endif
			
 
				-
			
 
				-#ifndef EFD_CLOEXEC
			
 
				-# define EFD_CLOEXEC		02000000
			
 
				-#endif
			
 
				+#include <linux/filter.h>
			
 
				+#include <linux/audit.h>
			
 
				+#include <sys/ptrace.h>
			
 
				+#include <linux/random.h>
			
 
				+#include <linux/stringify.h>
			
 
				 
			
 
				 #ifndef O_CLOEXEC
			
 
				 # define O_CLOEXEC		02000000
			
 
				 #endif
			
 
				 
			
 
				-#ifndef SOCK_DCCP
			
 
				-# define SOCK_DCCP		6
			
 
				-#endif
			
 
				-
			
 
				-#ifndef SOCK_CLOEXEC
			
 
				-# define SOCK_CLOEXEC		02000000
			
 
				-#endif
			
 
				-
			
 
				-#ifndef SOCK_NONBLOCK
			
 
				-# define SOCK_NONBLOCK		00004000
			
 
				-#endif
			
 
				-
			
 
				-#ifndef MSG_CMSG_CLOEXEC
			
 
				-# define MSG_CMSG_CLOEXEC	0x40000000
			
 
				-#endif
			
 
				-
			
 
				-#ifndef PERF_FLAG_FD_NO_GROUP
			
 
				-# define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
			
 
				-#endif
			
 
				-
			
 
				-#ifndef PERF_FLAG_FD_OUTPUT
			
 
				-# define PERF_FLAG_FD_OUTPUT		(1UL << 1)
			
 
				-#endif
			
 
				-
			
 
				-#ifndef PERF_FLAG_PID_CGROUP
			
 
				-# define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
			
 
				-#endif
			
 
				-
			
 
				-#ifndef PERF_FLAG_FD_CLOEXEC
			
 
				-# define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
			
 
				-#endif
			
 
				-
			
 
				+struct trace {
			
 
				+	struct perf_tool	tool;
			
 
				+	struct syscalltbl	*sctbl;
			
 
				+	struct {
			
 
				+		int		max;
			
 
				+		struct syscall  *table;
			
 
				+		struct {
			
 
				+			struct perf_evsel *sys_enter,
			
 
				+					  *sys_exit;
			
 
				+		}		events;
			
 
				+	} syscalls;
			
 
				+	struct record_opts	opts;
			
 
				+	struct perf_evlist	*evlist;
			
 
				+	struct machine		*host;
			
 
				+	struct thread		*current;
			
 
				+	u64			base_time;
			
 
				+	FILE			*output;
			
 
				+	unsigned long		nr_events;
			
 
				+	struct strlist		*ev_qualifier;
			
 
				+	struct {
			
 
				+		size_t		nr;
			
 
				+		int		*entries;
			
 
				+	}			ev_qualifier_ids;
			
 
				+	struct intlist		*tid_list;
			
 
				+	struct intlist		*pid_list;
			
 
				+	struct {
			
 
				+		size_t		nr;
			
 
				+		pid_t		*entries;
			
 
				+	}			filter_pids;
			
 
				+	double			duration_filter;
			
 
				+	double			runtime_ms;
			
 
				+	struct {
			
 
				+		u64		vfs_getname,
			
 
				+				proc_getname;
			
 
				+	} stats;
			
 
				+	unsigned int		max_stack;
			
 
				+	unsigned int		min_stack;
			
 
				+	bool			not_ev_qualifier;
			
 
				+	bool			live;
			
 
				+	bool			full_time;
			
 
				+	bool			sched;
			
 
				+	bool			multiple_threads;
			
 
				+	bool			summary;
			
 
				+	bool			summary_only;
			
 
				+	bool			show_comm;
			
 
				+	bool			show_tool_stats;
			
 
				+	bool			trace_syscalls;
			
 
				+	bool			kernel_syscallchains;
			
 
				+	bool			force;
			
 
				+	bool			vfs_getname;
			
 
				+	int			trace_pgfaults;
			
 
				+	int			open_id;
			
 
				+};
			
 
				 
			
 
				 struct tp_field {
			
 
				 	int offset;
			
@@ -371,221 +368,6 @@ static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
 
				 
			
 
				 #define SCA_INT syscall_arg__scnprintf_int
			
 
				 
			
 
				-static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
			
 
				-					       struct syscall_arg *arg)
			
 
				-{
			
 
				-	int printed = 0, prot = arg->val;
			
 
				-
			
 
				-	if (prot == PROT_NONE)
			
 
				-		return scnprintf(bf, size, "NONE");
			
 
				-#define	P_MMAP_PROT(n) \
			
 
				-	if (prot & PROT_##n) { \
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
			
 
				-		prot &= ~PROT_##n; \
			
 
				-	}
			
 
				-
			
 
				-	P_MMAP_PROT(EXEC);
			
 
				-	P_MMAP_PROT(READ);
			
 
				-	P_MMAP_PROT(WRITE);
			
 
				-#ifdef PROT_SEM
			
 
				-	P_MMAP_PROT(SEM);
			
 
				-#endif
			
 
				-	P_MMAP_PROT(GROWSDOWN);
			
 
				-	P_MMAP_PROT(GROWSUP);
			
 
				-#undef P_MMAP_PROT
			
 
				-
			
 
				-	if (prot)
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
			
 
				-
			
 
				-	return printed;
			
 
				-}
			
 
				-
			
 
				-#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
			
 
				-
			
 
				-static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
			
 
				-						struct syscall_arg *arg)
			
 
				-{
			
 
				-	int printed = 0, flags = arg->val;
			
 
				-
			
 
				-#define	P_MMAP_FLAG(n) \
			
 
				-	if (flags & MAP_##n) { \
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
			
 
				-		flags &= ~MAP_##n; \
			
 
				-	}
			
 
				-
			
 
				-	P_MMAP_FLAG(SHARED);
			
 
				-	P_MMAP_FLAG(PRIVATE);
			
 
				-#ifdef MAP_32BIT
			
 
				-	P_MMAP_FLAG(32BIT);
			
 
				-#endif
			
 
				-	P_MMAP_FLAG(ANONYMOUS);
			
 
				-	P_MMAP_FLAG(DENYWRITE);
			
 
				-	P_MMAP_FLAG(EXECUTABLE);
			
 
				-	P_MMAP_FLAG(FILE);
			
 
				-	P_MMAP_FLAG(FIXED);
			
 
				-	P_MMAP_FLAG(GROWSDOWN);
			
 
				-#ifdef MAP_HUGETLB
			
 
				-	P_MMAP_FLAG(HUGETLB);
			
 
				-#endif
			
 
				-	P_MMAP_FLAG(LOCKED);
			
 
				-	P_MMAP_FLAG(NONBLOCK);
			
 
				-	P_MMAP_FLAG(NORESERVE);
			
 
				-	P_MMAP_FLAG(POPULATE);
			
 
				-	P_MMAP_FLAG(STACK);
			
 
				-#ifdef MAP_UNINITIALIZED
			
 
				-	P_MMAP_FLAG(UNINITIALIZED);
			
 
				-#endif
			
 
				-#undef P_MMAP_FLAG
			
 
				-
			
 
				-	if (flags)
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
			
 
				-
			
 
				-	return printed;
			
 
				-}
			
 
				-
			
 
				-#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
			
 
				-
			
 
				-static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
			
 
				-						  struct syscall_arg *arg)
			
 
				-{
			
 
				-	int printed = 0, flags = arg->val;
			
 
				-
			
 
				-#define P_MREMAP_FLAG(n) \
			
 
				-	if (flags & MREMAP_##n) { \
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
			
 
				-		flags &= ~MREMAP_##n; \
			
 
				-	}
			
 
				-
			
 
				-	P_MREMAP_FLAG(MAYMOVE);
			
 
				-#ifdef MREMAP_FIXED
			
 
				-	P_MREMAP_FLAG(FIXED);
			
 
				-#endif
			
 
				-#undef P_MREMAP_FLAG
			
 
				-
			
 
				-	if (flags)
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
			
 
				-
			
 
				-	return printed;
			
 
				-}
			
 
				-
			
 
				-#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
			
 
				-
			
 
				-static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
			
 
				-						      struct syscall_arg *arg)
			
 
				-{
			
 
				-	int behavior = arg->val;
			
 
				-
			
 
				-	switch (behavior) {
			
 
				-#define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
			
 
				-	P_MADV_BHV(NORMAL);
			
 
				-	P_MADV_BHV(RANDOM);
			
 
				-	P_MADV_BHV(SEQUENTIAL);
			
 
				-	P_MADV_BHV(WILLNEED);
			
 
				-	P_MADV_BHV(DONTNEED);
			
 
				-	P_MADV_BHV(REMOVE);
			
 
				-	P_MADV_BHV(DONTFORK);
			
 
				-	P_MADV_BHV(DOFORK);
			
 
				-	P_MADV_BHV(HWPOISON);
			
 
				-#ifdef MADV_SOFT_OFFLINE
			
 
				-	P_MADV_BHV(SOFT_OFFLINE);
			
 
				-#endif
			
 
				-	P_MADV_BHV(MERGEABLE);
			
 
				-	P_MADV_BHV(UNMERGEABLE);
			
 
				-#ifdef MADV_HUGEPAGE
			
 
				-	P_MADV_BHV(HUGEPAGE);
			
 
				-#endif
			
 
				-#ifdef MADV_NOHUGEPAGE
			
 
				-	P_MADV_BHV(NOHUGEPAGE);
			
 
				-#endif
			
 
				-#ifdef MADV_DONTDUMP
			
 
				-	P_MADV_BHV(DONTDUMP);
			
 
				-#endif
			
 
				-#ifdef MADV_DODUMP
			
 
				-	P_MADV_BHV(DODUMP);
			
 
				-#endif
			
 
				-#undef P_MADV_PHV
			
 
				-	default: break;
			
 
				-	}
			
 
				-
			
 
				-	return scnprintf(bf, size, "%#x", behavior);
			
 
				-}
			
 
				-
			
 
				-#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
			
 
				-
			
 
				-static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
			
 
				-					   struct syscall_arg *arg)
			
 
				-{
			
 
				-	int printed = 0, op = arg->val;
			
 
				-
			
 
				-	if (op == 0)
			
 
				-		return scnprintf(bf, size, "NONE");
			
 
				-#define	P_CMD(cmd) \
			
 
				-	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
			
 
				-		op &= ~LOCK_##cmd; \
			
 
				-	}
			
 
				-
			
 
				-	P_CMD(SH);
			
 
				-	P_CMD(EX);
			
 
				-	P_CMD(NB);
			
 
				-	P_CMD(UN);
			
 
				-	P_CMD(MAND);
			
 
				-	P_CMD(RW);
			
 
				-	P_CMD(READ);
			
 
				-	P_CMD(WRITE);
			
 
				-#undef P_OP
			
 
				-
			
 
				-	if (op)
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
			
 
				-
			
 
				-	return printed;
			
 
				-}
			
 
				-
			
 
				-#define SCA_FLOCK syscall_arg__scnprintf_flock
			
 
				-
			
 
				-static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
			
 
				-{
			
 
				-	enum syscall_futex_args {
			
 
				-		SCF_UADDR   = (1 << 0),
			
 
				-		SCF_OP	    = (1 << 1),
			
 
				-		SCF_VAL	    = (1 << 2),
			
 
				-		SCF_TIMEOUT = (1 << 3),
			
 
				-		SCF_UADDR2  = (1 << 4),
			
 
				-		SCF_VAL3    = (1 << 5),
			
 
				-	};
			
 
				-	int op = arg->val;
			
 
				-	int cmd = op & FUTEX_CMD_MASK;
			
 
				-	size_t printed = 0;
			
 
				-
			
 
				-	switch (cmd) {
			
 
				-#define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
			
 
				-	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
			
 
				-	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
			
 
				-	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
			
 
				-	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
			
 
				-	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
			
 
				-	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
			
 
				-	P_FUTEX_OP(WAKE_OP);							  break;
			
 
				-	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
			
 
				-	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
			
 
				-	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
			
 
				-	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
			
 
				-	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
			
 
				-	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
			
 
				-	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
			
 
				-	}
			
 
				-
			
 
				-	if (op & FUTEX_PRIVATE_FLAG)
			
 
				-		printed += scnprintf(bf + printed, size - printed, "|PRIV");
			
 
				-
			
 
				-	if (op & FUTEX_CLOCK_REALTIME)
			
 
				-		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
			
 
				-
			
 
				-	return printed;
			
 
				-}
			
 
				-
			
 
				-#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
			
 
				-
			
 
				 static const char *bpf_cmd[] = {
			
 
				 	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
			
 
				 	"MAP_GET_NEXT_KEY", "PROG_LOAD",
			
@@ -652,110 +434,6 @@ static const char *socket_families[] = {
 
				 };
			
 
				 static DEFINE_STRARRAY(socket_families);
			
 
				 
			
 
				-#ifndef SOCK_TYPE_MASK
			
 
				-#define SOCK_TYPE_MASK 0xf
			
 
				-#endif
			
 
				-
			
 
				-static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
			
 
				-						      struct syscall_arg *arg)
			
 
				-{
			
 
				-	size_t printed;
			
 
				-	int type = arg->val,
			
 
				-	    flags = type & ~SOCK_TYPE_MASK;
			
 
				-
			
 
				-	type &= SOCK_TYPE_MASK;
			
 
				-	/*
			
 
				- 	 * Can't use a strarray, MIPS may override for ABI reasons.
			
 
				- 	 */
			
 
				-	switch (type) {
			
 
				-#define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
			
 
				-	P_SK_TYPE(STREAM);
			
 
				-	P_SK_TYPE(DGRAM);
			
 
				-	P_SK_TYPE(RAW);
			
 
				-	P_SK_TYPE(RDM);
			
 
				-	P_SK_TYPE(SEQPACKET);
			
 
				-	P_SK_TYPE(DCCP);
			
 
				-	P_SK_TYPE(PACKET);
			
 
				-#undef P_SK_TYPE
			
 
				-	default:
			
 
				-		printed = scnprintf(bf, size, "%#x", type);
			
 
				-	}
			
 
				-
			
 
				-#define	P_SK_FLAG(n) \
			
 
				-	if (flags & SOCK_##n) { \
			
 
				-		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
			
 
				-		flags &= ~SOCK_##n; \
			
 
				-	}
			
 
				-
			
 
				-	P_SK_FLAG(CLOEXEC);
			
 
				-	P_SK_FLAG(NONBLOCK);
			
 
				-#undef P_SK_FLAG
			
 
				-
			
 
				-	if (flags)
			
 
				-		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
			
 
				-
			
 
				-	return printed;
			
 
				-}
			
 
				-
			
 
				-#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
			
 
				-
			
 
				-#ifndef MSG_PROBE
			
 
				-#define MSG_PROBE	     0x10
			
 
				-#endif
			
 
				-#ifndef MSG_WAITFORONE
			
 
				-#define MSG_WAITFORONE	0x10000
			
 
				-#endif
			
 
				-#ifndef MSG_SENDPAGE_NOTLAST
			
 
				-#define MSG_SENDPAGE_NOTLAST 0x20000
			
 
				-#endif
			
 
				-#ifndef MSG_FASTOPEN
			
 
				-#define MSG_FASTOPEN	     0x20000000
			
 
				-#endif
			
 
				-
			
 
				-static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
			
 
				-					       struct syscall_arg *arg)
			
 
				-{
			
 
				-	int printed = 0, flags = arg->val;
			
 
				-
			
 
				-	if (flags == 0)
			
 
				-		return scnprintf(bf, size, "NONE");
			
 
				-#define	P_MSG_FLAG(n) \
			
 
				-	if (flags & MSG_##n) { \
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
			
 
				-		flags &= ~MSG_##n; \
			
 
				-	}
			
 
				-
			
 
				-	P_MSG_FLAG(OOB);
			
 
				-	P_MSG_FLAG(PEEK);
			
 
				-	P_MSG_FLAG(DONTROUTE);
			
 
				-	P_MSG_FLAG(TRYHARD);
			
 
				-	P_MSG_FLAG(CTRUNC);
			
 
				-	P_MSG_FLAG(PROBE);
			
 
				-	P_MSG_FLAG(TRUNC);
			
 
				-	P_MSG_FLAG(DONTWAIT);
			
 
				-	P_MSG_FLAG(EOR);
			
 
				-	P_MSG_FLAG(WAITALL);
			
 
				-	P_MSG_FLAG(FIN);
			
 
				-	P_MSG_FLAG(SYN);
			
 
				-	P_MSG_FLAG(CONFIRM);
			
 
				-	P_MSG_FLAG(RST);
			
 
				-	P_MSG_FLAG(ERRQUEUE);
			
 
				-	P_MSG_FLAG(NOSIGNAL);
			
 
				-	P_MSG_FLAG(MORE);
			
 
				-	P_MSG_FLAG(WAITFORONE);
			
 
				-	P_MSG_FLAG(SENDPAGE_NOTLAST);
			
 
				-	P_MSG_FLAG(FASTOPEN);
			
 
				-	P_MSG_FLAG(CMSG_CLOEXEC);
			
 
				-#undef P_MSG_FLAG
			
 
				-
			
 
				-	if (flags)
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
			
 
				-
			
 
				-	return printed;
			
 
				-}
			
 
				-
			
 
				-#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
			
 
				-
			
 
				 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
			
 
				 						 struct syscall_arg *arg)
			
 
				 {
			
@@ -788,116 +466,6 @@ static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 
				 
			
 
				 #define SCA_FILENAME syscall_arg__scnprintf_filename
			
 
				 
			
 
				-static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
			
 
				-					       struct syscall_arg *arg)
			
 
				-{
			
 
				-	int printed = 0, flags = arg->val;
			
 
				-
			
 
				-	if (!(flags & O_CREAT))
			
 
				-		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
			
 
				-
			
 
				-	if (flags == 0)
			
 
				-		return scnprintf(bf, size, "RDONLY");
			
 
				-#define	P_FLAG(n) \
			
 
				-	if (flags & O_##n) { \
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
			
 
				-		flags &= ~O_##n; \
			
 
				-	}
			
 
				-
			
 
				-	P_FLAG(APPEND);
			
 
				-	P_FLAG(ASYNC);
			
 
				-	P_FLAG(CLOEXEC);
			
 
				-	P_FLAG(CREAT);
			
 
				-	P_FLAG(DIRECT);
			
 
				-	P_FLAG(DIRECTORY);
			
 
				-	P_FLAG(EXCL);
			
 
				-	P_FLAG(LARGEFILE);
			
 
				-	P_FLAG(NOATIME);
			
 
				-	P_FLAG(NOCTTY);
			
 
				-#ifdef O_NONBLOCK
			
 
				-	P_FLAG(NONBLOCK);
			
 
				-#elif O_NDELAY
			
 
				-	P_FLAG(NDELAY);
			
 
				-#endif
			
 
				-#ifdef O_PATH
			
 
				-	P_FLAG(PATH);
			
 
				-#endif
			
 
				-	P_FLAG(RDWR);
			
 
				-#ifdef O_DSYNC
			
 
				-	if ((flags & O_SYNC) == O_SYNC)
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
			
 
				-	else {
			
 
				-		P_FLAG(DSYNC);
			
 
				-	}
			
 
				-#else
			
 
				-	P_FLAG(SYNC);
			
 
				-#endif
			
 
				-	P_FLAG(TRUNC);
			
 
				-	P_FLAG(WRONLY);
			
 
				-#undef P_FLAG
			
 
				-
			
 
				-	if (flags)
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
			
 
				-
			
 
				-	return printed;
			
 
				-}
			
 
				-
			
 
				-#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
			
 
				-
			
 
				-static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
			
 
				-						struct syscall_arg *arg)
			
 
				-{
			
 
				-	int printed = 0, flags = arg->val;
			
 
				-
			
 
				-	if (flags == 0)
			
 
				-		return 0;
			
 
				-
			
 
				-#define	P_FLAG(n) \
			
 
				-	if (flags & PERF_FLAG_##n) { \
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
			
 
				-		flags &= ~PERF_FLAG_##n; \
			
 
				-	}
			
 
				-
			
 
				-	P_FLAG(FD_NO_GROUP);
			
 
				-	P_FLAG(FD_OUTPUT);
			
 
				-	P_FLAG(PID_CGROUP);
			
 
				-	P_FLAG(FD_CLOEXEC);
			
 
				-#undef P_FLAG
			
 
				-
			
 
				-	if (flags)
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
			
 
				-
			
 
				-	return printed;
			
 
				-}
			
 
				-
			
 
				-#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
			
 
				-
			
 
				-static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
			
 
				-						   struct syscall_arg *arg)
			
 
				-{
			
 
				-	int printed = 0, flags = arg->val;
			
 
				-
			
 
				-	if (flags == 0)
			
 
				-		return scnprintf(bf, size, "NONE");
			
 
				-#define	P_FLAG(n) \
			
 
				-	if (flags & EFD_##n) { \
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
			
 
				-		flags &= ~EFD_##n; \
			
 
				-	}
			
 
				-
			
 
				-	P_FLAG(SEMAPHORE);
			
 
				-	P_FLAG(CLOEXEC);
			
 
				-	P_FLAG(NONBLOCK);
			
 
				-#undef P_FLAG
			
 
				-
			
 
				-	if (flags)
			
 
				-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
			
 
				-
			
 
				-	return printed;
			
 
				-}
			
 
				-
			
 
				-#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
			
 
				-
			
 
				 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
			
 
				 						struct syscall_arg *arg)
			
 
				 {
			
@@ -921,59 +489,6 @@ static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 
				 
			
 
				 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
			
 
				 
			
 
				-static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
			
 
				-{
			
 
				-	int sig = arg->val;
			
 
				-
			
 
				-	switch (sig) {
			
 
				-#define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
			
 
				-	P_SIGNUM(HUP);
			
 
				-	P_SIGNUM(INT);
			
 
				-	P_SIGNUM(QUIT);
			
 
				-	P_SIGNUM(ILL);
			
 
				-	P_SIGNUM(TRAP);
			
 
				-	P_SIGNUM(ABRT);
			
 
				-	P_SIGNUM(BUS);
			
 
				-	P_SIGNUM(FPE);
			
 
				-	P_SIGNUM(KILL);
			
 
				-	P_SIGNUM(USR1);
			
 
				-	P_SIGNUM(SEGV);
			
 
				-	P_SIGNUM(USR2);
			
 
				-	P_SIGNUM(PIPE);
			
 
				-	P_SIGNUM(ALRM);
			
 
				-	P_SIGNUM(TERM);
			
 
				-	P_SIGNUM(CHLD);
			
 
				-	P_SIGNUM(CONT);
			
 
				-	P_SIGNUM(STOP);
			
 
				-	P_SIGNUM(TSTP);
			
 
				-	P_SIGNUM(TTIN);
			
 
				-	P_SIGNUM(TTOU);
			
 
				-	P_SIGNUM(URG);
			
 
				-	P_SIGNUM(XCPU);
			
 
				-	P_SIGNUM(XFSZ);
			
 
				-	P_SIGNUM(VTALRM);
			
 
				-	P_SIGNUM(PROF);
			
 
				-	P_SIGNUM(WINCH);
			
 
				-	P_SIGNUM(IO);
			
 
				-	P_SIGNUM(PWR);
			
 
				-	P_SIGNUM(SYS);
			
 
				-#ifdef SIGEMT
			
 
				-	P_SIGNUM(EMT);
			
 
				-#endif
			
 
				-#ifdef SIGSTKFLT
			
 
				-	P_SIGNUM(STKFLT);
			
 
				-#endif
			
 
				-#ifdef SIGSWI
			
 
				-	P_SIGNUM(SWI);
			
 
				-#endif
			
 
				-	default: break;
			
 
				-	}
			
 
				-
			
 
				-	return scnprintf(bf, size, "%#x", sig);
			
 
				-}
			
 
				-
			
 
				-#define SCA_SIGNUM syscall_arg__scnprintf_signum
			
 
				-
			
 
				 #if defined(__i386__) || defined(__x86_64__)
			
 
				 /*
			
 
				  * FIXME: Make this available to all arches.
			
@@ -1001,16 +516,62 @@ static const char *tioctls[] = {
 
				 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
			
 
				 #endif /* defined(__i386__) || defined(__x86_64__) */
			
 
				 
			
 
				+#ifndef GRND_NONBLOCK
			
 
				+#define GRND_NONBLOCK	0x0001
			
 
				+#endif
			
 
				+#ifndef GRND_RANDOM
			
 
				+#define GRND_RANDOM	0x0002
			
 
				+#endif
			
 
				+
			
 
				+static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
			
 
				+						   struct syscall_arg *arg)
			
 
				+{
			
 
				+	int printed = 0, flags = arg->val;
			
 
				+
			
 
				+#define	P_FLAG(n) \
			
 
				+	if (flags & GRND_##n) { \
			
 
				+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
			
 
				+		flags &= ~GRND_##n; \
			
 
				+	}
			
 
				+
			
 
				+	P_FLAG(RANDOM);
			
 
				+	P_FLAG(NONBLOCK);
			
 
				+#undef P_FLAG
			
 
				+
			
 
				+	if (flags)
			
 
				+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
			
 
				+
			
 
				+	return printed;
			
 
				+}
			
 
				+
			
 
				+#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
			
 
				+
			
 
				 #define STRARRAY(arg, name, array) \
			
 
				 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
			
 
				 	  .arg_parm	 = { [arg] = &strarray__##array, }
			
 
				 
			
 
				+#include "trace/beauty/eventfd.c"
			
 
				+#include "trace/beauty/flock.c"
			
 
				+#include "trace/beauty/futex_op.c"
			
 
				+#include "trace/beauty/mmap.c"
			
 
				+#include "trace/beauty/mode_t.c"
			
 
				+#include "trace/beauty/msg_flags.c"
			
 
				+#include "trace/beauty/open_flags.c"
			
 
				+#include "trace/beauty/perf_event_open.c"
			
 
				+#include "trace/beauty/pid.c"
			
 
				+#include "trace/beauty/sched_policy.c"
			
 
				+#include "trace/beauty/seccomp.c"
			
 
				+#include "trace/beauty/signum.c"
			
 
				+#include "trace/beauty/socket_type.c"
			
 
				+#include "trace/beauty/waitid_options.c"
			
 
				+
			
 
				 static struct syscall_fmt {
			
 
				 	const char *name;
			
 
				 	const char *alias;
			
 
				 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
			
 
				 	void	   *arg_parm[6];
			
 
				 	bool	   errmsg;
			
 
				+	bool	   errpid;
			
 
				 	bool	   timeout;
			
 
				 	bool	   hexret;
			
 
				 } syscall_fmts[] = {
			
@@ -1028,6 +589,7 @@ static struct syscall_fmt {
 
				 	{ .name	    = "chroot",	    .errmsg = true,
			
 
				 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
			
 
				 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
			
 
				+	{ .name	    = "clone",	    .errpid = true, },
			
 
				 	{ .name	    = "close",	    .errmsg = true,
			
 
				 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
			
 
				 	{ .name	    = "connect",    .errmsg = true, },
			
@@ -1093,6 +655,11 @@ static struct syscall_fmt {
 
				 	{ .name	    = "getdents64", .errmsg = true,
			
 
				 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
			
 
				 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
			
 
				+	{ .name	    = "getpid",	    .errpid = true, },
			
 
				+	{ .name	    = "getpgid",    .errpid = true, },
			
 
				+	{ .name	    = "getppid",    .errpid = true, },
			
 
				+	{ .name	    = "getrandom",  .errmsg = true,
			
 
				+	  .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
			
 
				 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
			
 
				 	{ .name	    = "getxattr",    .errmsg = true,
			
 
				 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
			
@@ -1186,8 +753,7 @@ static struct syscall_fmt {
 
				 			     [1] = SCA_FILENAME, /* filename */
			
 
				 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
			
 
				 	{ .name	    = "perf_event_open", .errmsg = true,
			
 
				-	  .arg_scnprintf = { [1] = SCA_INT, /* pid */
			
 
				-			     [2] = SCA_INT, /* cpu */
			
 
				+	  .arg_scnprintf = { [2] = SCA_INT, /* cpu */
			
 
				 			     [3] = SCA_FD,  /* group_fd */
			
 
				 			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
			
 
				 	{ .name	    = "pipe2",	    .errmsg = true,
			
@@ -1234,6 +800,11 @@ static struct syscall_fmt {
 
				 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
			
 
				 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
			
 
				 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
			
 
				+	{ .name	    = "sched_setscheduler",   .errmsg = true,
			
 
				+	  .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
			
 
				+	{ .name	    = "seccomp", .errmsg = true,
			
 
				+	  .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
			
 
				+			     [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
			
 
				 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
			
 
				 	{ .name	    = "sendmmsg",    .errmsg = true,
			
 
				 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
			
@@ -1244,7 +815,9 @@ static struct syscall_fmt {
 
				 	{ .name	    = "sendto",	    .errmsg = true,
			
 
				 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
			
 
				 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
			
 
				+	{ .name	    = "set_tid_address", .errpid = true, },
			
 
				 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
			
 
				+	{ .name	    = "setpgid",    .errmsg = true, },
			
 
				 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
			
 
				 	{ .name	    = "setxattr",   .errmsg = true,
			
 
				 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
			
@@ -1287,6 +860,10 @@ static struct syscall_fmt {
 
				 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
			
 
				 	{ .name	    = "vmsplice",  .errmsg = true,
			
 
				 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
			
 
				+	{ .name	    = "wait4",	    .errpid = true,
			
 
				+	  .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
			
 
				+	{ .name	    = "waitid",	    .errpid = true,
			
 
				+	  .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
			
 
				 	{ .name	    = "write",	    .errmsg = true,
			
 
				 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
			
 
				 	{ .name	    = "writev",	    .errmsg = true,
			
@@ -1398,59 +975,6 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
 
				 
			
 
				 static const size_t trace__entry_str_size = 2048;
			
 
				 
			
 
				-struct trace {
			
 
				-	struct perf_tool	tool;
			
 
				-	struct {
			
 
				-		int		machine;
			
 
				-		int		open_id;
			
 
				-	}			audit;
			
 
				-	struct {
			
 
				-		int		max;
			
 
				-		struct syscall  *table;
			
 
				-		struct {
			
 
				-			struct perf_evsel *sys_enter,
			
 
				-					  *sys_exit;
			
 
				-		}		events;
			
 
				-	} syscalls;
			
 
				-	struct record_opts	opts;
			
 
				-	struct perf_evlist	*evlist;
			
 
				-	struct machine		*host;
			
 
				-	struct thread		*current;
			
 
				-	u64			base_time;
			
 
				-	FILE			*output;
			
 
				-	unsigned long		nr_events;
			
 
				-	struct strlist		*ev_qualifier;
			
 
				-	struct {
			
 
				-		size_t		nr;
			
 
				-		int		*entries;
			
 
				-	}			ev_qualifier_ids;
			
 
				-	struct intlist		*tid_list;
			
 
				-	struct intlist		*pid_list;
			
 
				-	struct {
			
 
				-		size_t		nr;
			
 
				-		pid_t		*entries;
			
 
				-	}			filter_pids;
			
 
				-	double			duration_filter;
			
 
				-	double			runtime_ms;
			
 
				-	struct {
			
 
				-		u64		vfs_getname,
			
 
				-				proc_getname;
			
 
				-	} stats;
			
 
				-	bool			not_ev_qualifier;
			
 
				-	bool			live;
			
 
				-	bool			full_time;
			
 
				-	bool			sched;
			
 
				-	bool			multiple_threads;
			
 
				-	bool			summary;
			
 
				-	bool			summary_only;
			
 
				-	bool			show_comm;
			
 
				-	bool			show_tool_stats;
			
 
				-	bool			trace_syscalls;
			
 
				-	bool			force;
			
 
				-	bool			vfs_getname;
			
 
				-	int			trace_pgfaults;
			
 
				-};
			
 
				-
			
 
				 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
			
 
				 {
			
 
				 	struct thread_trace *ttrace = thread__priv(thread);
			
@@ -1618,6 +1142,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine,
 
				 		color_fprintf(trace->output, PERF_COLOR_RED,
			
 
				 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
			
 
				 		ret = machine__process_lost_event(machine, event, sample);
			
 
				+		break;
			
 
				 	default:
			
 
				 		ret = machine__process_event(machine, event, sample);
			
 
				 		break;
			
@@ -1675,6 +1200,10 @@ static int syscall__set_arg_fmts(struct syscall *sc)
 
				 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
			
 
				 		else if (field->flags & FIELD_IS_POINTER)
			
 
				 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
			
 
				+		else if (strcmp(field->type, "pid_t") == 0)
			
 
				+			sc->arg_scnprintf[idx] = SCA_PID;
			
 
				+		else if (strcmp(field->type, "umode_t") == 0)
			
 
				+			sc->arg_scnprintf[idx] = SCA_MODE_T;
			
 
				 		++idx;
			
 
				 	}
			
 
				 
			
@@ -1685,7 +1214,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 
				 {
			
 
				 	char tp_name[128];
			
 
				 	struct syscall *sc;
			
 
				-	const char *name = audit_syscall_to_name(id, trace->audit.machine);
			
 
				+	const char *name = syscalltbl__name(trace->sctbl, id);
			
 
				 
			
 
				 	if (name == NULL)
			
 
				 		return -1;
			
@@ -1760,7 +1289,7 @@ static int trace__validate_ev_qualifier(struct trace *trace)
 
				 
			
 
				 	strlist__for_each(pos, trace->ev_qualifier) {
			
 
				 		const char *sc = pos->s;
			
 
				-		int id = audit_name_to_syscall(sc, trace->audit.machine);
			
 
				+		int id = syscalltbl__id(trace->sctbl, sc);
			
 
				 
			
 
				 		if (id < 0) {
			
 
				 			if (err == 0) {
			
@@ -1846,7 +1375,12 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 
				 						     "%ld", val);
			
 
				 			}
			
 
				 		}
			
 
				-	} else {
			
 
				+	} else if (IS_ERR(sc->tp_format)) {
			
 
				+		/*
			
 
				+		 * If we managed to read the tracepoint /format file, then we
			
 
				+		 * may end up not having any args, like with gettid(), so only
			
 
				+		 * print the raw args when we didn't manage to read it.
			
 
				+		 */
			
 
				 		int i = 0;
			
 
				 
			
 
				 		while (i < 6) {
			
@@ -1987,7 +1521,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
				 			goto out_put;
			
 
				 	}
			
 
				 
			
 
				-	if (!trace->summary_only)
			
 
				+	if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
			
 
				 		trace__printf_interrupted_entry(trace, sample);
			
 
				 
			
 
				 	ttrace->entry_time = sample->time;
			
@@ -1998,7 +1532,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
				 					   args, trace, thread);
			
 
				 
			
 
				 	if (sc->is_exit) {
			
 
				-		if (!trace->duration_filter && !trace->summary_only) {
			
 
				+		if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
			
 
				 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
			
 
				 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
			
 
				 		}
			
@@ -2018,6 +1552,29 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				+static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
			
 
				+				    struct perf_sample *sample,
			
 
				+				    struct callchain_cursor *cursor)
			
 
				+{
			
 
				+	struct addr_location al;
			
 
				+
			
 
				+	if (machine__resolve(trace->host, &al, sample) < 0 ||
			
 
				+	    thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
			
 
				+		return -1;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
			
 
				+{
			
 
				+	/* TODO: user-configurable print_opts */
			
 
				+	const unsigned int print_opts = EVSEL__PRINT_SYM |
			
 
				+				        EVSEL__PRINT_DSO |
			
 
				+				        EVSEL__PRINT_UNKNOWN_AS_ADDR;
			
 
				+
			
 
				+	return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
			
 
				+}
			
 
				+
			
 
				 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
			
 
				 			   union perf_event *event __maybe_unused,
			
 
				 			   struct perf_sample *sample)
			
@@ -2025,7 +1582,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
				 	long ret;
			
 
				 	u64 duration = 0;
			
 
				 	struct thread *thread;
			
 
				-	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
			
 
				+	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
			
 
				 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
			
 
				 	struct thread_trace *ttrace;
			
 
				 
			
@@ -2042,7 +1599,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
				 
			
 
				 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
			
 
				 
			
 
				-	if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
			
 
				+	if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
			
 
				 		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
			
 
				 		ttrace->filename.pending_open = false;
			
 
				 		++trace->stats.vfs_getname;
			
@@ -2057,6 +1614,15 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
				 	} else if (trace->duration_filter)
			
 
				 		goto out;
			
 
				 
			
 
				+	if (sample->callchain) {
			
 
				+		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
			
 
				+		if (callchain_ret == 0) {
			
 
				+			if (callchain_cursor.nr < trace->min_stack)
			
 
				+				goto out;
			
 
				+			callchain_ret = 1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	if (trace->summary_only)
			
 
				 		goto out;
			
 
				 
			
@@ -2073,7 +1639,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
				 	if (sc->fmt == NULL) {
			
 
				 signed_print:
			
 
				 		fprintf(trace->output, ") = %ld", ret);
			
 
				-	} else if (ret < 0 && sc->fmt->errmsg) {
			
 
				+	} else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
			
 
				 		char bf[STRERR_BUFSIZE];
			
 
				 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
			
 
				 			   *e = audit_errno_to_name(-ret);
			
@@ -2083,10 +1649,24 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
				 		fprintf(trace->output, ") = 0 Timeout");
			
 
				 	else if (sc->fmt->hexret)
			
 
				 		fprintf(trace->output, ") = %#lx", ret);
			
 
				-	else
			
 
				+	else if (sc->fmt->errpid) {
			
 
				+		struct thread *child = machine__find_thread(trace->host, ret, ret);
			
 
				+
			
 
				+		if (child != NULL) {
			
 
				+			fprintf(trace->output, ") = %ld", ret);
			
 
				+			if (child->comm_set)
			
 
				+				fprintf(trace->output, " (%s)", thread__comm_str(child));
			
 
				+			thread__put(child);
			
 
				+		}
			
 
				+	} else
			
 
				 		goto signed_print;
			
 
				 
			
 
				 	fputc('\n', trace->output);
			
 
				+
			
 
				+	if (callchain_ret > 0)
			
 
				+		trace__fprintf_callchain(trace, sample);
			
 
				+	else if (callchain_ret < 0)
			
 
				+		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
			
 
				 out:
			
 
				 	ttrace->entry_pending = false;
			
 
				 	err = 0;
			
@@ -2217,6 +1797,17 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 
				 				union perf_event *event __maybe_unused,
			
 
				 				struct perf_sample *sample)
			
 
				 {
			
 
				+	int callchain_ret = 0;
			
 
				+
			
 
				+	if (sample->callchain) {
			
 
				+		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
			
 
				+		if (callchain_ret == 0) {
			
 
				+			if (callchain_cursor.nr < trace->min_stack)
			
 
				+				goto out;
			
 
				+			callchain_ret = 1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	trace__printf_interrupted_entry(trace, sample);
			
 
				 	trace__fprintf_tstamp(trace, sample->time, trace->output);
			
 
				 
			
@@ -2234,6 +1825,12 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 
				 	}
			
 
				 
			
 
				 	fprintf(trace->output, ")\n");
			
 
				+
			
 
				+	if (callchain_ret > 0)
			
 
				+		trace__fprintf_callchain(trace, sample);
			
 
				+	else if (callchain_ret < 0)
			
 
				+		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
			
 
				+out:
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -2264,8 +1861,19 @@ static int trace__pgfault(struct trace *trace,
 
				 	char map_type = 'd';
			
 
				 	struct thread_trace *ttrace;
			
 
				 	int err = -1;
			
 
				+	int callchain_ret = 0;
			
 
				 
			
 
				 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
			
 
				+
			
 
				+	if (sample->callchain) {
			
 
				+		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
			
 
				+		if (callchain_ret == 0) {
			
 
				+			if (callchain_cursor.nr < trace->min_stack)
			
 
				+				goto out_put;
			
 
				+			callchain_ret = 1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	ttrace = thread__trace(thread, trace->output);
			
 
				 	if (ttrace == NULL)
			
 
				 		goto out_put;
			
@@ -2307,6 +1915,11 @@ static int trace__pgfault(struct trace *trace,
 
				 	print_location(trace->output, sample, &al, true, false);
			
 
				 
			
 
				 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
			
 
				+
			
 
				+	if (callchain_ret > 0)
			
 
				+		trace__fprintf_callchain(trace, sample);
			
 
				+	else if (callchain_ret < 0)
			
 
				+		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
			
 
				 out:
			
 
				 	err = 0;
			
 
				 out_put:
			
@@ -2326,6 +1939,23 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample)
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				+static void trace__set_base_time(struct trace *trace,
			
 
				+				 struct perf_evsel *evsel,
			
 
				+				 struct perf_sample *sample)
			
 
				+{
			
 
				+	/*
			
 
				+	 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
			
 
				+	 * and don't use sample->time unconditionally, we may end up having
			
 
				+	 * some other event in the future without PERF_SAMPLE_TIME for good
			
 
				+	 * reason, i.e. we may not be interested in its timestamps, just in
			
 
				+	 * it taking place, picking some piece of information when it
			
 
				+	 * appears in our event stream (vfs_getname comes to mind).
			
 
				+	 */
			
 
				+	if (trace->base_time == 0 && !trace->full_time &&
			
 
				+	    (evsel->attr.sample_type & PERF_SAMPLE_TIME))
			
 
				+		trace->base_time = sample->time;
			
 
				+}
			
 
				+
			
 
				 static int trace__process_sample(struct perf_tool *tool,
			
 
				 				 union perf_event *event,
			
 
				 				 struct perf_sample *sample,
			
@@ -2340,8 +1970,7 @@ static int trace__process_sample(struct perf_tool *tool,
 
				 	if (skip_sample(trace, sample))
			
 
				 		return 0;
			
 
				 
			
 
				-	if (!trace->full_time && trace->base_time == 0)
			
 
				-		trace->base_time = sample->time;
			
 
				+	trace__set_base_time(trace, evsel, sample);
			
 
				 
			
 
				 	if (handler) {
			
 
				 		++trace->nr_events;
			
@@ -2450,8 +2079,7 @@ static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
			
 
				-				    u64 config)
			
 
				+static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
			
 
				 {
			
 
				 	struct perf_evsel *evsel;
			
 
				 	struct perf_event_attr attr = {
			
@@ -2465,13 +2093,10 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
 
				 	event_attr_init(&attr);
			
 
				 
			
 
				 	evsel = perf_evsel__new(&attr);
			
 
				-	if (!evsel)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	evsel->handler = trace__pgfault;
			
 
				-	perf_evlist__add(evlist, evsel);
			
 
				+	if (evsel)
			
 
				+		evsel->handler = trace__pgfault;
			
 
				 
			
 
				-	return 0;
			
 
				+	return evsel;
			
 
				 }
			
 
				 
			
 
				 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
			
@@ -2479,9 +2104,6 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
 
				 	const u32 type = event->header.type;
			
 
				 	struct perf_evsel *evsel;
			
 
				 
			
 
				-	if (!trace->full_time && trace->base_time == 0)
			
 
				-		trace->base_time = sample->time;
			
 
				-
			
 
				 	if (type != PERF_RECORD_SAMPLE) {
			
 
				 		trace__process_event(trace, trace->host, event, sample);
			
 
				 		return;
			
@@ -2493,6 +2115,8 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				+	trace__set_base_time(trace, evsel, sample);
			
 
				+
			
 
				 	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
			
 
				 	    sample->raw_data == NULL) {
			
 
				 		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
			
@@ -2527,6 +2151,15 @@ static int trace__add_syscall_newtp(struct trace *trace)
 
				 	perf_evlist__add(evlist, sys_enter);
			
 
				 	perf_evlist__add(evlist, sys_exit);
			
 
				 
			
 
				+	if (callchain_param.enabled && !trace->kernel_syscallchains) {
			
 
				+		/*
			
 
				+		 * We're interested only in the user space callchain
			
 
				+		 * leading to the syscall, allow overriding that for
			
 
				+		 * debugging reasons using --kernel_syscall_callchains
			
 
				+		 */
			
 
				+		sys_exit->attr.exclude_callchain_kernel = 1;
			
 
				+	}
			
 
				+
			
 
				 	trace->syscalls.events.sys_enter = sys_enter;
			
 
				 	trace->syscalls.events.sys_exit  = sys_exit;
			
 
				 
			
@@ -2565,7 +2198,7 @@ static int trace__set_ev_qualifier_filter(struct trace *trace)
 
				 static int trace__run(struct trace *trace, int argc, const char **argv)
			
 
				 {
			
 
				 	struct perf_evlist *evlist = trace->evlist;
			
 
				-	struct perf_evsel *evsel;
			
 
				+	struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
			
 
				 	int err = -1, i;
			
 
				 	unsigned long before;
			
 
				 	const bool forks = argc > 0;
			
@@ -2579,14 +2212,19 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 
				 	if (trace->trace_syscalls)
			
 
				 		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
			
 
				 
			
 
				-	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
			
 
				-	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
			
 
				-		goto out_error_mem;
			
 
				+	if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
			
 
				+		pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
			
 
				+		if (pgfault_maj == NULL)
			
 
				+			goto out_error_mem;
			
 
				+		perf_evlist__add(evlist, pgfault_maj);
			
 
				 	}
			
 
				 
			
 
				-	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
			
 
				-	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
			
 
				-		goto out_error_mem;
			
 
				+	if ((trace->trace_pgfaults & TRACE_PFMIN)) {
			
 
				+		pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
			
 
				+		if (pgfault_min == NULL)
			
 
				+			goto out_error_mem;
			
 
				+		perf_evlist__add(evlist, pgfault_min);
			
 
				+	}
			
 
				 
			
 
				 	if (trace->sched &&
			
 
				 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
			
@@ -2605,7 +2243,45 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 
				 		goto out_delete_evlist;
			
 
				 	}
			
 
				 
			
 
				-	perf_evlist__config(evlist, &trace->opts);
			
 
				+	perf_evlist__config(evlist, &trace->opts, NULL);
			
 
				+
			
 
				+	if (callchain_param.enabled) {
			
 
				+		bool use_identifier = false;
			
 
				+
			
 
				+		if (trace->syscalls.events.sys_exit) {
			
 
				+			perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
			
 
				+						     &trace->opts, &callchain_param);
			
 
				+			use_identifier = true;
			
 
				+		}
			
 
				+
			
 
				+		if (pgfault_maj) {
			
 
				+			perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
			
 
				+			use_identifier = true;
			
 
				+		}
			
 
				+
			
 
				+		if (pgfault_min) {
			
 
				+			perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
			
 
				+			use_identifier = true;
			
 
				+		}
			
 
				+
			
 
				+		if (use_identifier) {
			
 
				+		       /*
			
 
				+			* Now we have evsels with different sample_ids, use
			
 
				+			* PERF_SAMPLE_IDENTIFIER to map from sample to evsel
			
 
				+			* from a fixed position in each ring buffer record.
			
 
				+			*
			
 
				+			* As of this the changeset introducing this comment, this
			
 
				+			* isn't strictly needed, as the fields that can come before
			
 
				+			* PERF_SAMPLE_ID are all used, but we'll probably disable
			
 
				+			* some of those for things like copying the payload of
			
 
				+			* pointer syscall arguments, and for vfs_getname we don't
			
 
				+			* need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
			
 
				+			* here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
			
 
				+			*/
			
 
				+			perf_evlist__set_sample_bit(evlist, IDENTIFIER);
			
 
				+			perf_evlist__reset_sample_bit(evlist, ID);
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	signal(SIGCHLD, sig_handler);
			
 
				 	signal(SIGINT, sig_handler);
			
@@ -2883,15 +2559,29 @@ static size_t trace__fprintf_threads_header(FILE *fp)
 
				 	return printed;
			
 
				 }
			
 
				 
			
 
				+DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
			
 
				+	struct stats 	*stats;
			
 
				+	double		msecs;
			
 
				+	int		syscall;
			
 
				+)
			
 
				+{
			
 
				+	struct int_node *source = rb_entry(nd, struct int_node, rb_node);
			
 
				+	struct stats *stats = source->priv;
			
 
				+
			
 
				+	entry->syscall = source->i;
			
 
				+	entry->stats   = stats;
			
 
				+	entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
			
 
				+}
			
 
				+
			
 
				 static size_t thread__dump_stats(struct thread_trace *ttrace,
			
 
				 				 struct trace *trace, FILE *fp)
			
 
				 {
			
 
				-	struct stats *stats;
			
 
				 	size_t printed = 0;
			
 
				 	struct syscall *sc;
			
 
				-	struct int_node *inode = intlist__first(ttrace->syscall_stats);
			
 
				+	struct rb_node *nd;
			
 
				+	DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
			
 
				 
			
 
				-	if (inode == NULL)
			
 
				+	if (syscall_stats == NULL)
			
 
				 		return 0;
			
 
				 
			
 
				 	printed += fprintf(fp, "\n");
			
@@ -2900,9 +2590,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
 
				 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
			
 
				 	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
			
 
				 
			
 
				-	/* each int_node is a syscall */
			
 
				-	while (inode) {
			
 
				-		stats = inode->priv;
			
 
				+	resort_rb__for_each(nd, syscall_stats) {
			
 
				+		struct stats *stats = syscall_stats_entry->stats;
			
 
				 		if (stats) {
			
 
				 			double min = (double)(stats->min) / NSEC_PER_MSEC;
			
 
				 			double max = (double)(stats->max) / NSEC_PER_MSEC;
			
@@ -2913,34 +2602,23 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
 
				 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
			
 
				 			avg /= NSEC_PER_MSEC;
			
 
				 
			
 
				-			sc = &trace->syscalls.table[inode->i];
			
 
				+			sc = &trace->syscalls.table[syscall_stats_entry->syscall];
			
 
				 			printed += fprintf(fp, "   %-15s", sc->name);
			
 
				 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
			
 
				-					   n, avg * n, min, avg);
			
 
				+					   n, syscall_stats_entry->msecs, min, avg);
			
 
				 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
			
 
				 		}
			
 
				-
			
 
				-		inode = intlist__next(inode);
			
 
				 	}
			
 
				 
			
 
				+	resort_rb__delete(syscall_stats);
			
 
				 	printed += fprintf(fp, "\n\n");
			
 
				 
			
 
				 	return printed;
			
 
				 }
			
 
				 
			
 
				-/* struct used to pass data to per-thread function */
			
 
				-struct summary_data {
			
 
				-	FILE *fp;
			
 
				-	struct trace *trace;
			
 
				-	size_t printed;
			
 
				-};
			
 
				-
			
 
				-static int trace__fprintf_one_thread(struct thread *thread, void *priv)
			
 
				+static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
			
 
				 {
			
 
				-	struct summary_data *data = priv;
			
 
				-	FILE *fp = data->fp;
			
 
				-	size_t printed = data->printed;
			
 
				-	struct trace *trace = data->trace;
			
 
				+	size_t printed = 0;
			
 
				 	struct thread_trace *ttrace = thread__priv(thread);
			
 
				 	double ratio;
			
 
				 
			
@@ -2956,25 +2634,45 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv)
 
				 		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
			
 
				 	if (ttrace->pfmin)
			
 
				 		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
			
 
				-	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
			
 
				+	if (trace->sched)
			
 
				+		printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
			
 
				+	else if (fputc('\n', fp) != EOF)
			
 
				+		++printed;
			
 
				+
			
 
				 	printed += thread__dump_stats(ttrace, trace, fp);
			
 
				 
			
 
				-	data->printed += printed;
			
 
				+	return printed;
			
 
				+}
			
 
				 
			
 
				-	return 0;
			
 
				+static unsigned long thread__nr_events(struct thread_trace *ttrace)
			
 
				+{
			
 
				+	return ttrace ? ttrace->nr_events : 0;
			
 
				+}
			
 
				+
			
 
				+DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
			
 
				+	struct thread *thread;
			
 
				+)
			
 
				+{
			
 
				+	entry->thread = rb_entry(nd, struct thread, rb_node);
			
 
				 }
			
 
				 
			
 
				 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
			
 
				 {
			
 
				-	struct summary_data data = {
			
 
				-		.fp = fp,
			
 
				-		.trace = trace
			
 
				-	};
			
 
				-	data.printed = trace__fprintf_threads_header(fp);
			
 
				+	DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
			
 
				+	size_t printed = trace__fprintf_threads_header(fp);
			
 
				+	struct rb_node *nd;
			
 
				+
			
 
				+	if (threads == NULL) {
			
 
				+		fprintf(fp, "%s", "Error sorting output by nr_events!\n");
			
 
				+		return 0;
			
 
				+	}
			
 
				 
			
 
				-	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
			
 
				+	resort_rb__for_each(nd, threads)
			
 
				+		printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
			
 
				 
			
 
				-	return data.printed;
			
 
				+	resort_rb__delete(threads);
			
 
				+
			
 
				+	return printed;
			
 
				 }
			
 
				 
			
 
				 static int trace__set_duration(const struct option *opt, const char *str,
			
@@ -3070,10 +2768,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		NULL
			
 
				 	};
			
 
				 	struct trace trace = {
			
 
				-		.audit = {
			
 
				-			.machine = audit_detect_machine(),
			
 
				-			.open_id = audit_name_to_syscall("open", trace.audit.machine),
			
 
				-		},
			
 
				 		.syscalls = {
			
 
				 			. max = -1,
			
 
				 		},
			
@@ -3091,6 +2785,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		.output = stderr,
			
 
				 		.show_comm = true,
			
 
				 		.trace_syscalls = true,
			
 
				+		.kernel_syscallchains = false,
			
 
				+		.max_stack = UINT_MAX,
			
 
				 	};
			
 
				 	const char *output_name = NULL;
			
 
				 	const char *ev_qualifier_str = NULL;
			
@@ -3136,10 +2832,24 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		     "Trace pagefaults", parse_pagefaults, "maj"),
			
 
				 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
			
 
				 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
			
 
				+	OPT_CALLBACK(0, "call-graph", &trace.opts,
			
 
				+		     "record_mode[,record_size]", record_callchain_help,
			
 
				+		     &record_parse_callchain_opt),
			
 
				+	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
			
 
				+		    "Show the kernel callchains on the syscall exit path"),
			
 
				+	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
			
 
				+		     "Set the minimum stack depth when parsing the callchain, "
			
 
				+		     "anything below the specified depth will be ignored."),
			
 
				+	OPT_UINTEGER(0, "max-stack", &trace.max_stack,
			
 
				+		     "Set the maximum stack depth when parsing the callchain, "
			
 
				+		     "anything beyond the specified depth will be ignored. "
			
 
				+		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
			
 
				 	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
			
 
				 			"per thread proc mmap processing timeout in ms"),
			
 
				 	OPT_END()
			
 
				 	};
			
 
				+	bool __maybe_unused max_stack_user_set = true;
			
 
				+	bool mmap_pages_user_set = true;
			
 
				 	const char * const trace_subcommands[] = { "record", NULL };
			
 
				 	int err;
			
 
				 	char bf[BUFSIZ];
			
@@ -3148,8 +2858,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	signal(SIGFPE, sighandler_dump_stack);
			
 
				 
			
 
				 	trace.evlist = perf_evlist__new();
			
 
				+	trace.sctbl = syscalltbl__new();
			
 
				 
			
 
				-	if (trace.evlist == NULL) {
			
 
				+	if (trace.evlist == NULL || trace.sctbl == NULL) {
			
 
				 		pr_err("Not enough memory to run!\n");
			
 
				 		err = -ENOMEM;
			
 
				 		goto out;
			
@@ -3158,11 +2869,40 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
			
 
				 				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
			
 
				 
			
 
				+	err = bpf__setup_stdout(trace.evlist);
			
 
				+	if (err) {
			
 
				+		bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
			
 
				+		pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	err = -1;
			
 
				+
			
 
				 	if (trace.trace_pgfaults) {
			
 
				 		trace.opts.sample_address = true;
			
 
				 		trace.opts.sample_time = true;
			
 
				 	}
			
 
				 
			
 
				+	if (trace.opts.mmap_pages == UINT_MAX)
			
 
				+		mmap_pages_user_set = false;
			
 
				+
			
 
				+	if (trace.max_stack == UINT_MAX) {
			
 
				+		trace.max_stack = sysctl_perf_event_max_stack;
			
 
				+		max_stack_user_set = false;
			
 
				+	}
			
 
				+
			
 
				+#ifdef HAVE_DWARF_UNWIND_SUPPORT
			
 
				+	if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled)
			
 
				+		record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
			
 
				+#endif
			
 
				+
			
 
				+	if (callchain_param.enabled) {
			
 
				+		if (!mmap_pages_user_set && geteuid() == 0)
			
 
				+			trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
			
 
				+
			
 
				+		symbol_conf.use_callchain = true;
			
 
				+	}
			
 
				+
			
 
				 	if (trace.evlist->nr_entries > 0)
			
 
				 		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
			
 
				 
			
@@ -3179,6 +2919,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		return -1;
			
 
				 	}
			
 
				 
			
 
				+	if (!trace.trace_syscalls && ev_qualifier_str) {
			
 
				+		pr_err("The -e option can't be used with --no-syscalls.\n");
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				 	if (output_name != NULL) {
			
 
				 		err = trace__open_output(&trace, output_name);
			
 
				 		if (err < 0) {
			
@@ -3187,6 +2932,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	trace.open_id = syscalltbl__id(trace.sctbl, "open");
			
 
				+
			
 
				 	if (ev_qualifier_str != NULL) {
			
 
				 		const char *s = ev_qualifier_str;
			
 
				 		struct strlist_config slist_config = {
			
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -27,7 +27,7 @@ NO_PERF_REGS := 1
 
				 ifeq ($(ARCH),x86)
			
 
				   $(call detected,CONFIG_X86)
			
 
				   ifeq (${IS_64_BIT}, 1)
			
 
				-    CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
			
 
				+    CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
			
 
				     ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
			
 
				     LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
			
 
				     $(call detected,CONFIG_X86_64)
			
@@ -295,9 +295,6 @@ ifndef NO_LIBELF
 
				     CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
			
 
				   endif
			
 
				 
			
 
				-  # include ARCH specific config
			
 
				-  -include $(src-perf)/arch/$(ARCH)/Makefile
			
 
				-
			
 
				   ifndef NO_DWARF
			
 
				     ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
			
 
				       msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
			
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -92,6 +92,22 @@ static int get_e_machine(struct jitheader *hdr)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int use_arch_timestamp;
			
 
				+
			
 
				+static inline uint64_t
			
 
				+get_arch_timestamp(void)
			
 
				+{
			
 
				+#if defined(__i386__) || defined(__x86_64__)
			
 
				+	unsigned int low, high;
			
 
				+
			
 
				+	asm volatile("rdtsc" : "=a" (low), "=d" (high));
			
 
				+
			
 
				+	return low | ((uint64_t)high) << 32;
			
 
				+#else
			
 
				+	return 0;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 #define NSEC_PER_SEC	1000000000
			
 
				 static int perf_clk_id = CLOCK_MONOTONIC;
			
 
				 
			
@@ -107,6 +123,9 @@ perf_get_timestamp(void)
 
				 	struct timespec ts;
			
 
				 	int ret;
			
 
				 
			
 
				+	if (use_arch_timestamp)
			
 
				+		return get_arch_timestamp();
			
 
				+
			
 
				 	ret = clock_gettime(perf_clk_id, &ts);
			
 
				 	if (ret)
			
 
				 		return 0;
			
@@ -203,6 +222,17 @@ perf_close_marker_file(void)
 
				 	munmap(marker_addr, pgsz);
			
 
				 }
			
 
				 
			
 
				+static void
			
 
				+init_arch_timestamp(void)
			
 
				+{
			
 
				+	char *str = getenv("JITDUMP_USE_ARCH_TIMESTAMP");
			
 
				+
			
 
				+	if (!str || !*str || !strcmp(str, "0"))
			
 
				+		return;
			
 
				+
			
 
				+	use_arch_timestamp = 1;
			
 
				+}
			
 
				+
			
 
				 void *jvmti_open(void)
			
 
				 {
			
 
				 	int pad_cnt;
			
@@ -211,11 +241,17 @@ void *jvmti_open(void)
 
				 	int fd;
			
 
				 	FILE *fp;
			
 
				 
			
 
				+	init_arch_timestamp();
			
 
				+
			
 
				 	/*
			
 
				 	 * check if clockid is supported
			
 
				 	 */
			
 
				-	if (!perf_get_timestamp())
			
 
				-		warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
			
 
				+	if (!perf_get_timestamp()) {
			
 
				+		if (use_arch_timestamp)
			
 
				+			warnx("jvmti: arch timestamp not supported");
			
 
				+		else
			
 
				+			warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
			
 
				+	}
			
 
				 
			
 
				 	memset(&header, 0, sizeof(header));
			
 
				 
			
@@ -263,6 +299,9 @@ void *jvmti_open(void)
 
				 
			
 
				 	header.timestamp = perf_get_timestamp();
			
 
				 
			
 
				+	if (use_arch_timestamp)
			
 
				+		header.flags |= JITDUMP_FLAGS_ARCH_TIMESTAMP;
			
 
				+
			
 
				 	if (!fwrite(&header, sizeof(header), 1, fp)) {
			
 
				 		warn("jvmti: cannot write dumpfile header");
			
 
				 		goto error;
			
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -17,6 +17,7 @@
 
				 #include <subcmd/parse-options.h>
			
 
				 #include "util/bpf-loader.h"
			
 
				 #include "util/debug.h"
			
 
				+#include <api/fs/fs.h>
			
 
				 #include <api/fs/tracing_path.h>
			
 
				 #include <pthread.h>
			
 
				 #include <stdlib.h>
			
@@ -308,9 +309,11 @@ static int handle_alias(int *argcp, const char ***argv)
 
				 			if (*argcp > 1) {
			
 
				 				struct strbuf buf;
			
 
				 
			
 
				-				strbuf_init(&buf, PATH_MAX);
			
 
				-				strbuf_addstr(&buf, alias_string);
			
 
				-				sq_quote_argv(&buf, (*argv) + 1, PATH_MAX);
			
 
				+				if (strbuf_init(&buf, PATH_MAX) < 0 ||
			
 
				+				    strbuf_addstr(&buf, alias_string) < 0 ||
			
 
				+				    sq_quote_argv(&buf, (*argv) + 1,
			
 
				+						  PATH_MAX) < 0)
			
 
				+					die("Failed to allocate memory.");
			
 
				 				free(alias_string);
			
 
				 				alias_string = buf.buf;
			
 
				 			}
			
@@ -533,6 +536,7 @@ int main(int argc, const char **argv)
 
				 {
			
 
				 	const char *cmd;
			
 
				 	char sbuf[STRERR_BUFSIZE];
			
 
				+	int value;
			
 
				 
			
 
				 	/* libsubcmd init */
			
 
				 	exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
			
@@ -542,6 +546,9 @@ int main(int argc, const char **argv)
 
				 	page_size = sysconf(_SC_PAGE_SIZE);
			
 
				 	cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
			
 
				 
			
 
				+	if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
			
 
				+		sysctl_perf_event_max_stack = value;
			
 
				+
			
 
				 	cmd = extract_argv0_path(argv[0]);
			
 
				 	if (!cmd)
			
 
				 		cmd = "perf-help";
			
@@ -549,6 +556,7 @@ int main(int argc, const char **argv)
 
				 	srandom(time(NULL));
			
 
				 
			
 
				 	perf_config(perf_default_config, NULL);
			
 
				+	set_buildid_dir(NULL);
			
 
				 
			
 
				 	/* get debugfs/tracefs mount point from /proc/mounts */
			
 
				 	tracing_path_mount();
			
@@ -572,7 +580,6 @@ int main(int argc, const char **argv)
 
				 	}
			
 
				 	if (!prefixcmp(cmd, "trace")) {
			
 
				 #ifdef HAVE_LIBAUDIT_SUPPORT
			
 
				-		set_buildid_dir(NULL);
			
 
				 		setup_path();
			
 
				 		argv[0] = "trace";
			
 
				 		return cmd_trace(argc, argv, NULL);
			
@@ -587,7 +594,6 @@ int main(int argc, const char **argv)
 
				 	argc--;
			
 
				 	handle_options(&argv, &argc, NULL);
			
 
				 	commit_pager_choice();
			
 
				-	set_buildid_dir(NULL);
			
 
				 
			
 
				 	if (argc > 0) {
			
 
				 		if (!prefixcmp(argv[0], "--"))
			
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -52,7 +52,6 @@ struct record_opts {
 
				 	bool	     sample_weight;
			
 
				 	bool	     sample_time;
			
 
				 	bool	     sample_time_set;
			
 
				-	bool	     callgraph_set;
			
 
				 	bool	     period;
			
 
				 	bool	     running_time;
			
 
				 	bool	     full_auxtrace;
			
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -34,10 +34,9 @@ import datetime
 
				 #
			
 
				 # ubuntu:
			
 
				 #
			
 
				-#	$ sudo apt-get install postgresql
			
 
				+#	$ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql
			
 
				 #	$ sudo su - postgres
			
 
				-#	$ createuser <your user id here>
			
 
				-#	Shall the new role be a superuser? (y/n) y
			
 
				+#	$ createuser -s <your user id here>
			
 
				 #
			
 
				 # An example of using this script with Intel PT:
			
 
				 #
			
@@ -224,11 +223,14 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
 
				 
			
 
				 perf_db_export_mode = True
			
 
				 perf_db_export_calls = False
			
 
				+perf_db_export_callchains = False
			
 
				+
			
 
				 
			
 
				 def usage():
			
 
				-	print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]"
			
 
				+	print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]"
			
 
				 	print >> sys.stderr, "where:	columns		'all' or 'branches'"
			
 
				-	print >> sys.stderr, "		calls		'calls' => create calls table"
			
 
				+	print >> sys.stderr, "		calls		'calls' => create calls and call_paths table"
			
 
				+	print >> sys.stderr, "		callchains	'callchains' => create call_paths table"
			
 
				 	raise Exception("Too few arguments")
			
 
				 
			
 
				 if (len(sys.argv) < 2):
			
@@ -246,9 +248,11 @@ if columns not in ("all", "branches"):
 
				 
			
 
				 branches = (columns == "branches")
			
 
				 
			
 
				-if (len(sys.argv) >= 4):
			
 
				-	if (sys.argv[3] == "calls"):
			
 
				+for i in range(3,len(sys.argv)):
			
 
				+	if (sys.argv[i] == "calls"):
			
 
				 		perf_db_export_calls = True
			
 
				+	elif (sys.argv[i] == "callchains"):
			
 
				+		perf_db_export_callchains = True
			
 
				 	else:
			
 
				 		usage()
			
 
				 
			
@@ -359,14 +363,16 @@ else:
 
				 		'transaction	bigint,'
			
 
				 		'data_src	bigint,'
			
 
				 		'branch_type	integer,'
			
 
				-		'in_tx		boolean)')
			
 
				+		'in_tx		boolean,'
			
 
				+		'call_path_id	bigint)')
			
 
				 
			
 
				-if perf_db_export_calls:
			
 
				+if perf_db_export_calls or perf_db_export_callchains:
			
 
				 	do_query(query, 'CREATE TABLE call_paths ('
			
 
				 		'id		bigint		NOT NULL,'
			
 
				 		'parent_id	bigint,'
			
 
				 		'symbol_id	bigint,'
			
 
				 		'ip		bigint)')
			
 
				+if perf_db_export_calls:
			
 
				 	do_query(query, 'CREATE TABLE calls ('
			
 
				 		'id		bigint		NOT NULL,'
			
 
				 		'thread_id	bigint,'
			
@@ -428,7 +434,7 @@ do_query(query, 'CREATE VIEW comm_threads_view AS '
 
				 		'(SELECT tid FROM threads WHERE id = thread_id) AS tid'
			
 
				 	' FROM comm_threads')
			
 
				 
			
 
				-if perf_db_export_calls:
			
 
				+if perf_db_export_calls or perf_db_export_callchains:
			
 
				 	do_query(query, 'CREATE VIEW call_paths_view AS '
			
 
				 		'SELECT '
			
 
				 			'c.id,'
			
@@ -444,6 +450,7 @@ if perf_db_export_calls:
 
				 			'(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
			
 
				 			'(SELECT dso FROM symbols_view  WHERE id = p.symbol_id) AS parent_dso_short_name'
			
 
				 		' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
			
 
				+if perf_db_export_calls:
			
 
				 	do_query(query, 'CREATE VIEW calls_view AS '
			
 
				 		'SELECT '
			
 
				 			'calls.id,'
			
@@ -541,8 +548,9 @@ dso_file		= open_output_file("dso_table.bin")
 
				 symbol_file		= open_output_file("symbol_table.bin")
			
 
				 branch_type_file	= open_output_file("branch_type_table.bin")
			
 
				 sample_file		= open_output_file("sample_table.bin")
			
 
				-if perf_db_export_calls:
			
 
				+if perf_db_export_calls or perf_db_export_callchains:
			
 
				 	call_path_file		= open_output_file("call_path_table.bin")
			
 
				+if perf_db_export_calls:
			
 
				 	call_file		= open_output_file("call_table.bin")
			
 
				 
			
 
				 def trace_begin():
			
@@ -554,8 +562,8 @@ def trace_begin():
 
				 	comm_table(0, "unknown")
			
 
				 	dso_table(0, 0, "unknown", "unknown", "")
			
 
				 	symbol_table(0, 0, 0, 0, 0, "unknown")
			
 
				-	sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
			
 
				-	if perf_db_export_calls:
			
 
				+	sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
			
 
				+	if perf_db_export_calls or perf_db_export_callchains:
			
 
				 		call_path_table(0, 0, 0, 0)
			
 
				 
			
 
				 unhandled_count = 0
			
@@ -571,8 +579,9 @@ def trace_end():
 
				 	copy_output_file(symbol_file,		"symbols")
			
 
				 	copy_output_file(branch_type_file,	"branch_types")
			
 
				 	copy_output_file(sample_file,		"samples")
			
 
				-	if perf_db_export_calls:
			
 
				+	if perf_db_export_calls or perf_db_export_callchains:
			
 
				 		copy_output_file(call_path_file,	"call_paths")
			
 
				+	if perf_db_export_calls:
			
 
				 		copy_output_file(call_file,		"calls")
			
 
				 
			
 
				 	print datetime.datetime.today(), "Removing intermediate files..."
			
@@ -585,8 +594,9 @@ def trace_end():
 
				 	remove_output_file(symbol_file)
			
 
				 	remove_output_file(branch_type_file)
			
 
				 	remove_output_file(sample_file)
			
 
				-	if perf_db_export_calls:
			
 
				+	if perf_db_export_calls or perf_db_export_callchains:
			
 
				 		remove_output_file(call_path_file)
			
 
				+	if perf_db_export_calls:
			
 
				 		remove_output_file(call_file)
			
 
				 	os.rmdir(output_dir_name)
			
 
				 	print datetime.datetime.today(), "Adding primary keys"
			
@@ -599,8 +609,9 @@ def trace_end():
 
				 	do_query(query, 'ALTER TABLE symbols         ADD PRIMARY KEY (id)')
			
 
				 	do_query(query, 'ALTER TABLE branch_types    ADD PRIMARY KEY (id)')
			
 
				 	do_query(query, 'ALTER TABLE samples         ADD PRIMARY KEY (id)')
			
 
				-	if perf_db_export_calls:
			
 
				+	if perf_db_export_calls or perf_db_export_callchains:
			
 
				 		do_query(query, 'ALTER TABLE call_paths      ADD PRIMARY KEY (id)')
			
 
				+	if perf_db_export_calls:
			
 
				 		do_query(query, 'ALTER TABLE calls           ADD PRIMARY KEY (id)')
			
 
				 
			
 
				 	print datetime.datetime.today(), "Adding foreign keys"
			
@@ -623,10 +634,11 @@ def trace_end():
 
				 					'ADD CONSTRAINT symbolfk   FOREIGN KEY (symbol_id)    REFERENCES symbols    (id),'
			
 
				 					'ADD CONSTRAINT todsofk    FOREIGN KEY (to_dso_id)    REFERENCES dsos       (id),'
			
 
				 					'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols    (id)')
			
 
				-	if perf_db_export_calls:
			
 
				+	if perf_db_export_calls or perf_db_export_callchains:
			
 
				 		do_query(query, 'ALTER TABLE call_paths '
			
 
				 					'ADD CONSTRAINT parentfk    FOREIGN KEY (parent_id)    REFERENCES call_paths (id),'
			
 
				 					'ADD CONSTRAINT symbolfk    FOREIGN KEY (symbol_id)    REFERENCES symbols    (id)')
			
 
				+	if perf_db_export_calls:
			
 
				 		do_query(query, 'ALTER TABLE calls '
			
 
				 					'ADD CONSTRAINT threadfk    FOREIGN KEY (thread_id)    REFERENCES threads    (id),'
			
 
				 					'ADD CONSTRAINT commfk      FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
			
@@ -694,11 +706,11 @@ def branch_type_table(branch_type, name, *x):
 
				 	value = struct.pack(fmt, 2, 4, branch_type, n, name)
			
 
				 	branch_type_file.write(value)
			
 
				 
			
 
				-def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x):
			
 
				+def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x):
			
 
				 	if branches:
			
 
				-		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx)
			
 
				+		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id)
			
 
				 	else:
			
 
				-		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx)
			
 
				+		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id)
			
 
				 	sample_file.write(value)
			
 
				 
			
 
				 def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
			
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -37,6 +37,8 @@ perf-y += topology.o
 
				 perf-y += cpumap.o
			
 
				 perf-y += stat.o
			
 
				 perf-y += event_update.o
			
 
				+perf-y += event-times.o
			
 
				+perf-y += backward-ring-buffer.o
			
 
				 
			
 
				 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
			
 
				 	$(call rule_mkdir)