11 years ago · ef35ad26f8
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -99,10 +99,6 @@ static int arc_pmu_event_init(struct perf_event *event)
 
				 	struct hw_perf_event *hwc = &event->hw;
			
 
				 	int ret;
			
 
				 
			
 
				-	/* ARC 700 PMU does not support sampling events */
			
 
				-	if (is_sampling_event(event))
			
 
				-		return -ENOENT;
			
 
				-
			
 
				 	switch (event->attr.type) {
			
 
				 	case PERF_TYPE_HARDWARE:
			
 
				 		if (event->attr.config >= PERF_COUNT_HW_MAX)
			
@@ -298,6 +294,9 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 
				 		.read		= arc_pmu_read,
			
 
				 	};
			
 
				 
			
 
				+	/* ARC 700 PMU does not support sampling events */
			
 
				+	arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
			
 
				+
			
 
				 	ret = perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
			
 
				 
			
 
				 	return ret;
			
--- a/arch/blackfin/kernel/perf_event.c
+++ b/arch/blackfin/kernel/perf_event.c
@@ -389,14 +389,6 @@ static int bfin_pmu_event_init(struct perf_event *event)
 
				 	if (attr->exclude_hv || attr->exclude_idle)
			
 
				 		return -EPERM;
			
 
				 
			
 
				-	/*
			
 
				-	 * All of the on-chip counters are "limited", in that they have
			
 
				-	 * no interrupts, and are therefore unable to do sampling without
			
 
				-	 * further work and timer assistance.
			
 
				-	 */
			
 
				-	if (hwc->sample_period)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				 	ret = 0;
			
 
				 	switch (attr->type) {
			
 
				 	case PERF_TYPE_RAW:
			
@@ -490,6 +482,13 @@ static int __init bfin_pmu_init(void)
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				+	/*
			
 
				+	 * All of the on-chip counters are "limited", in that they have
			
 
				+	 * no interrupts, and are therefore unable to do sampling without
			
 
				+	 * further work and timer assistance.
			
 
				+	 */
			
 
				+	pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
			
 
				+
			
 
				 	ret = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
			
 
				 	if (!ret)
			
 
				 		perf_cpu_notifier(bfin_pmu_notifier);
			
--- a/arch/metag/kernel/perf/perf_event.c
+++ b/arch/metag/kernel/perf/perf_event.c
@@ -567,16 +567,6 @@ static int _hw_perf_event_init(struct perf_event *event)
 
				 	if (mapping == -1)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	/*
			
 
				-	 * Early cores have "limited" counters - they have no overflow
			
 
				-	 * interrupts - and so are unable to do sampling without extra work
			
 
				-	 * and timer assistance.
			
 
				-	 */
			
 
				-	if (metag_pmu->max_period == 0) {
			
 
				-		if (hwc->sample_period)
			
 
				-			return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				 	/*
			
 
				 	 * Don't assign an index until the event is placed into the hardware.
			
 
				 	 * -1 signifies that we're still deciding where to put it. On SMP
			
@@ -866,6 +856,15 @@ static int __init init_hw_perf_events(void)
 
				 	pr_info("enabled with %s PMU driver, %d counters available\n",
			
 
				 			metag_pmu->name, metag_pmu->max_events);
			
 
				 
			
 
				+	/*
			
 
				+	 * Early cores have "limited" counters - they have no overflow
			
 
				+	 * interrupts - and so are unable to do sampling without extra work
			
 
				+	 * and timer assistance.
			
 
				+	 */
			
 
				+	if (metag_pmu->max_period == 0) {
			
 
				+		metag_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
			
 
				+	}
			
 
				+
			
 
				 	/* Initialise the active events and reservation mutex */
			
 
				 	atomic_set(&metag_pmu->active_events, 0);
			
 
				 	mutex_init(&metag_pmu->reserve_mutex);
			
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -387,8 +387,7 @@ static int h_24x7_event_init(struct perf_event *event)
 
				 	    event->attr.exclude_hv     ||
			
 
				 	    event->attr.exclude_idle   ||
			
 
				 	    event->attr.exclude_host   ||
			
 
				-	    event->attr.exclude_guest  ||
			
 
				-	    is_sampling_event(event)) /* no sampling */
			
 
				+	    event->attr.exclude_guest)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	/* no branch sampling */
			
@@ -513,6 +512,9 @@ static int hv_24x7_init(void)
 
				 	if (!hv_page_cache)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				+	/* sampling not supported */
			
 
				+	h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
			
 
				+
			
 
				 	r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
			
 
				 	if (r)
			
 
				 		return r;
			
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -210,8 +210,7 @@ static int h_gpci_event_init(struct perf_event *event)
 
				 	    event->attr.exclude_hv     ||
			
 
				 	    event->attr.exclude_idle   ||
			
 
				 	    event->attr.exclude_host   ||
			
 
				-	    event->attr.exclude_guest  ||
			
 
				-	    is_sampling_event(event)) /* no sampling */
			
 
				+	    event->attr.exclude_guest)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	/* no branch sampling */
			
@@ -284,6 +283,9 @@ static int hv_gpci_init(void)
 
				 		return -ENODEV;
			
 
				 	}
			
 
				 
			
 
				+	/* sampling not supported */
			
 
				+	h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
			
 
				+
			
 
				 	r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
			
 
				 	if (r)
			
 
				 		return r;
			
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -16,6 +16,7 @@ header-y += ioctls.h
 
				 header-y += ipcbuf.h
			
 
				 header-y += kvm.h
			
 
				 header-y += kvm_para.h
			
 
				+header-y += kvm_perf.h
			
 
				 header-y += kvm_virtio.h
			
 
				 header-y += mman.h
			
 
				 header-y += monwriter.h
			
--- a/arch/s390/include/uapi/asm/kvm_perf.h
+++ b/arch/s390/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,25 @@
 
				+/*
			
 
				+ * Definitions for perf-kvm on s390
			
 
				+ *
			
 
				+ * Copyright 2014 IBM Corp.
			
 
				+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License (version 2 only)
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LINUX_KVM_PERF_S390_H
			
 
				+#define __LINUX_KVM_PERF_S390_H
			
 
				+
			
 
				+#include <asm/sie.h>
			
 
				+
			
 
				+#define DECODE_STR_LEN 40
			
 
				+
			
 
				+#define VCPU_ID "id"
			
 
				+
			
 
				+#define KVM_ENTRY_TRACE "kvm:kvm_s390_sie_enter"
			
 
				+#define KVM_EXIT_TRACE "kvm:kvm_s390_sie_exit"
			
 
				+#define KVM_EXIT_REASON "icptcode"
			
 
				+
			
 
				+#endif
			
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -411,12 +411,6 @@ static int cpumf_pmu_event_init(struct perf_event *event)
 
				 	case PERF_TYPE_HARDWARE:
			
 
				 	case PERF_TYPE_HW_CACHE:
			
 
				 	case PERF_TYPE_RAW:
			
 
				-		/* The CPU measurement counter facility does not have overflow
			
 
				-		 * interrupts to do sampling.  Sampling must be provided by
			
 
				-		 * external means, for example, by timers.
			
 
				-		 */
			
 
				-		if (is_sampling_event(event))
			
 
				-			return -ENOENT;
			
 
				 		err = __hw_perf_event_init(event);
			
 
				 		break;
			
 
				 	default:
			
@@ -681,6 +675,12 @@ static int __init cpumf_pmu_init(void)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				+	/* The CPU measurement counter facility does not have overflow
			
 
				+	 * interrupts to do sampling.  Sampling must be provided by
			
 
				+	 * external means, for example, by timers.
			
 
				+	 */
			
 
				+	cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
			
 
				+
			
 
				 	cpumf_pmu.attr_groups = cpumf_cf_event_group();
			
 
				 	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
			
 
				 	if (rc) {
			
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -128,14 +128,6 @@ static int __hw_perf_event_init(struct perf_event *event)
 
				 	if (!sh_pmu_initialized())
			
 
				 		return -ENODEV;
			
 
				 
			
 
				-	/*
			
 
				-	 * All of the on-chip counters are "limited", in that they have
			
 
				-	 * no interrupts, and are therefore unable to do sampling without
			
 
				-	 * further work and timer assistance.
			
 
				-	 */
			
 
				-	if (hwc->sample_period)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				 	/*
			
 
				 	 * See if we need to reserve the counter.
			
 
				 	 *
			
@@ -392,6 +384,13 @@ int register_sh_pmu(struct sh_pmu *_pmu)
 
				 
			
 
				 	pr_info("Performance Events: %s support registered\n", _pmu->name);
			
 
				 
			
 
				+	/*
			
 
				+	 * All of the on-chip counters are "limited", in that they have
			
 
				+	 * no interrupts, and are therefore unable to do sampling without
			
 
				+	 * further work and timer assistance.
			
 
				+	 */
			
 
				+	pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
			
 
				+
			
 
				 	WARN_ON(_pmu->num_events > MAX_HWEVENTS);
			
 
				 
			
 
				 	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
			
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -22,6 +22,7 @@ header-y += ipcbuf.h
 
				 header-y += ist.h
			
 
				 header-y += kvm.h
			
 
				 header-y += kvm_para.h
			
 
				+header-y += kvm_perf.h
			
 
				 header-y += ldt.h
			
 
				 header-y += mce.h
			
 
				 header-y += mman.h
			
--- a/arch/x86/include/uapi/asm/kvm_perf.h
+++ b/arch/x86/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,16 @@
 
				+#ifndef _ASM_X86_KVM_PERF_H
			
 
				+#define _ASM_X86_KVM_PERF_H
			
 
				+
			
 
				+#include <asm/svm.h>
			
 
				+#include <asm/vmx.h>
			
 
				+#include <asm/kvm.h>
			
 
				+
			
 
				+#define DECODE_STR_LEN 20
			
 
				+
			
 
				+#define VCPU_ID "vcpu_id"
			
 
				+
			
 
				+#define KVM_ENTRY_TRACE "kvm:kvm_entry"
			
 
				+#define KVM_EXIT_TRACE "kvm:kvm_exit"
			
 
				+#define KVM_EXIT_REASON "exit_reason"
			
 
				+
			
 
				+#endif /* _ASM_X86_KVM_PERF_H */
			
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -294,31 +294,41 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
 
				 			cpu_to_node(cpu));
			
 
				 }
			
 
				 
			
 
				-static void amd_uncore_cpu_up_prepare(unsigned int cpu)
			
 
				+static int amd_uncore_cpu_up_prepare(unsigned int cpu)
			
 
				 {
			
 
				-	struct amd_uncore *uncore;
			
 
				+	struct amd_uncore *uncore_nb = NULL, *uncore_l2;
			
 
				 
			
 
				 	if (amd_uncore_nb) {
			
 
				-		uncore = amd_uncore_alloc(cpu);
			
 
				-		uncore->cpu = cpu;
			
 
				-		uncore->num_counters = NUM_COUNTERS_NB;
			
 
				-		uncore->rdpmc_base = RDPMC_BASE_NB;
			
 
				-		uncore->msr_base = MSR_F15H_NB_PERF_CTL;
			
 
				-		uncore->active_mask = &amd_nb_active_mask;
			
 
				-		uncore->pmu = &amd_nb_pmu;
			
 
				-		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
			
 
				+		uncore_nb = amd_uncore_alloc(cpu);
			
 
				+		if (!uncore_nb)
			
 
				+			goto fail;
			
 
				+		uncore_nb->cpu = cpu;
			
 
				+		uncore_nb->num_counters = NUM_COUNTERS_NB;
			
 
				+		uncore_nb->rdpmc_base = RDPMC_BASE_NB;
			
 
				+		uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
			
 
				+		uncore_nb->active_mask = &amd_nb_active_mask;
			
 
				+		uncore_nb->pmu = &amd_nb_pmu;
			
 
				+		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
			
 
				 	}
			
 
				 
			
 
				 	if (amd_uncore_l2) {
			
 
				-		uncore = amd_uncore_alloc(cpu);
			
 
				-		uncore->cpu = cpu;
			
 
				-		uncore->num_counters = NUM_COUNTERS_L2;
			
 
				-		uncore->rdpmc_base = RDPMC_BASE_L2;
			
 
				-		uncore->msr_base = MSR_F16H_L2I_PERF_CTL;
			
 
				-		uncore->active_mask = &amd_l2_active_mask;
			
 
				-		uncore->pmu = &amd_l2_pmu;
			
 
				-		*per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
			
 
				+		uncore_l2 = amd_uncore_alloc(cpu);
			
 
				+		if (!uncore_l2)
			
 
				+			goto fail;
			
 
				+		uncore_l2->cpu = cpu;
			
 
				+		uncore_l2->num_counters = NUM_COUNTERS_L2;
			
 
				+		uncore_l2->rdpmc_base = RDPMC_BASE_L2;
			
 
				+		uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
			
 
				+		uncore_l2->active_mask = &amd_l2_active_mask;
			
 
				+		uncore_l2->pmu = &amd_l2_pmu;
			
 
				+		*per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
			
 
				 	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+fail:
			
 
				+	kfree(uncore_nb);
			
 
				+	return -ENOMEM;
			
 
				 }
			
 
				 
			
 
				 static struct amd_uncore *
			
@@ -441,7 +451,7 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
 
				 
			
 
				 	if (!--uncore->refcnt)
			
 
				 		kfree(uncore);
			
 
				-	*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
			
 
				+	*per_cpu_ptr(uncores, cpu) = NULL;
			
 
				 }
			
 
				 
			
 
				 static void amd_uncore_cpu_dead(unsigned int cpu)
			
@@ -461,7 +471,8 @@ amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
 
				 
			
 
				 	switch (action & ~CPU_TASKS_FROZEN) {
			
 
				 	case CPU_UP_PREPARE:
			
 
				-		amd_uncore_cpu_up_prepare(cpu);
			
 
				+		if (amd_uncore_cpu_up_prepare(cpu))
			
 
				+			return notifier_from_errno(-ENOMEM);
			
 
				 		break;
			
 
				 
			
 
				 	case CPU_STARTING:
			
@@ -501,20 +512,33 @@ static void __init init_cpu_already_online(void *dummy)
 
				 	amd_uncore_cpu_online(cpu);
			
 
				 }
			
 
				 
			
 
				+static void cleanup_cpu_online(void *dummy)
			
 
				+{
			
 
				+	unsigned int cpu = smp_processor_id();
			
 
				+
			
 
				+	amd_uncore_cpu_dead(cpu);
			
 
				+}
			
 
				+
			
 
				 static int __init amd_uncore_init(void)
			
 
				 {
			
 
				-	unsigned int cpu;
			
 
				+	unsigned int cpu, cpu2;
			
 
				 	int ret = -ENODEV;
			
 
				 
			
 
				 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
			
 
				-		return -ENODEV;
			
 
				+		goto fail_nodev;
			
 
				 
			
 
				 	if (!cpu_has_topoext)
			
 
				-		return -ENODEV;
			
 
				+		goto fail_nodev;
			
 
				 
			
 
				 	if (cpu_has_perfctr_nb) {
			
 
				 		amd_uncore_nb = alloc_percpu(struct amd_uncore *);
			
 
				-		perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
			
 
				+		if (!amd_uncore_nb) {
			
 
				+			ret = -ENOMEM;
			
 
				+			goto fail_nb;
			
 
				+		}
			
 
				+		ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
			
 
				+		if (ret)
			
 
				+			goto fail_nb;
			
 
				 
			
 
				 		printk(KERN_INFO "perf: AMD NB counters detected\n");
			
 
				 		ret = 0;
			
@@ -522,20 +546,28 @@ static int __init amd_uncore_init(void)
 
				 
			
 
				 	if (cpu_has_perfctr_l2) {
			
 
				 		amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
			
 
				-		perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
			
 
				+		if (!amd_uncore_l2) {
			
 
				+			ret = -ENOMEM;
			
 
				+			goto fail_l2;
			
 
				+		}
			
 
				+		ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
			
 
				+		if (ret)
			
 
				+			goto fail_l2;
			
 
				 
			
 
				 		printk(KERN_INFO "perf: AMD L2I counters detected\n");
			
 
				 		ret = 0;
			
 
				 	}
			
 
				 
			
 
				 	if (ret)
			
 
				-		return -ENODEV;
			
 
				+		goto fail_nodev;
			
 
				 
			
 
				 	cpu_notifier_register_begin();
			
 
				 
			
 
				 	/* init cpus already online before registering for hotplug notifier */
			
 
				 	for_each_online_cpu(cpu) {
			
 
				-		amd_uncore_cpu_up_prepare(cpu);
			
 
				+		ret = amd_uncore_cpu_up_prepare(cpu);
			
 
				+		if (ret)
			
 
				+			goto fail_online;
			
 
				 		smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
			
 
				 	}
			
 
				 
			
@@ -543,5 +575,30 @@ static int __init amd_uncore_init(void)
 
				 	cpu_notifier_register_done();
			
 
				 
			
 
				 	return 0;
			
 
				+
			
 
				+
			
 
				+fail_online:
			
 
				+	for_each_online_cpu(cpu2) {
			
 
				+		if (cpu2 == cpu)
			
 
				+			break;
			
 
				+		smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
			
 
				+	}
			
 
				+	cpu_notifier_register_done();
			
 
				+
			
 
				+	/* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
			
 
				+	amd_uncore_nb = amd_uncore_l2 = NULL;
			
 
				+	if (cpu_has_perfctr_l2)
			
 
				+		perf_pmu_unregister(&amd_l2_pmu);
			
 
				+fail_l2:
			
 
				+	if (cpu_has_perfctr_nb)
			
 
				+		perf_pmu_unregister(&amd_nb_pmu);
			
 
				+	if (amd_uncore_l2)
			
 
				+		free_percpu(amd_uncore_l2);
			
 
				+fail_nb:
			
 
				+	if (amd_uncore_nb)
			
 
				+		free_percpu(amd_uncore_nb);
			
 
				+
			
 
				+fail_nodev:
			
 
				+	return ret;
			
 
				 }
			
 
				 device_initcall(amd_uncore_init);
			
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2947,10 +2947,7 @@ again:
 
				 		 * extra registers. If we failed to take an extra
			
 
				 		 * register, try the alternative.
			
 
				 		 */
			
 
				-		if (idx % 2)
			
 
				-			idx--;
			
 
				-		else
			
 
				-			idx++;
			
 
				+		idx ^= 1;
			
 
				 		if (idx != reg1->idx % 6) {
			
 
				 			if (idx == 2)
			
 
				 				config1 >>= 8;
			
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5266,6 +5266,12 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 
				 
			
 
				 		goto got_name;
			
 
				 	} else {
			
 
				+		if (vma->vm_ops && vma->vm_ops->name) {
			
 
				+			name = (char *) vma->vm_ops->name(vma);
			
 
				+			if (name)
			
 
				+				goto cpy_name;
			
 
				+		}
			
 
				+
			
 
				 		name = (char *)arch_vma_name(vma);
			
 
				 		if (name)
			
 
				 			goto cpy_name;
			
@@ -7804,7 +7810,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
 
				 /*
			
 
				  * Initialize the perf_event context in task_struct
			
 
				  */
			
 
				-int perf_event_init_context(struct task_struct *child, int ctxn)
			
 
				+static int perf_event_init_context(struct task_struct *child, int ctxn)
			
 
				 {
			
 
				 	struct perf_event_context *child_ctx, *parent_ctx;
			
 
				 	struct perf_event_context *cloned_ctx;
			
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -30,6 +30,18 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
 
				 			return ret;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * We checked and allowed to create parent,
			
 
				+	 * allow children without checking.
			
 
				+	 */
			
 
				+	if (p_event->parent)
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * It's ok to check current process (owner) permissions in here,
			
 
				+	 * because code below is called only via perf_event_open syscall.
			
 
				+	 */
			
 
				+
			
 
				 	/* The ftrace function trace is allowed only for root. */
			
 
				 	if (ftrace_event_is_function(tp_event)) {
			
 
				 		if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
			
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -2395,7 +2395,7 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok)
 
				 {
			
 
				 	struct print_arg *field;
			
 
				 	enum event_type type;
			
 
				-	char *token;
			
 
				+	char *token = NULL;
			
 
				 
			
 
				 	memset(arg, 0, sizeof(*arg));
			
 
				 	arg->type = PRINT_FLAGS;
			
@@ -2448,7 +2448,7 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok)
 
				 {
			
 
				 	struct print_arg *field;
			
 
				 	enum event_type type;
			
 
				-	char *token;
			
 
				+	char *token = NULL;
			
 
				 
			
 
				 	memset(arg, 0, sizeof(*arg));
			
 
				 	arg->type = PRINT_SYMBOL;
			
@@ -2487,7 +2487,7 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok)
 
				 {
			
 
				 	struct print_arg *field;
			
 
				 	enum event_type type;
			
 
				-	char *token;
			
 
				+	char *token = NULL;
			
 
				 
			
 
				 	memset(arg, 0, sizeof(*arg));
			
 
				 	arg->type = PRINT_HEX;
			
--- a/tools/lib/traceevent/plugin_cfg80211.c
+++ b/tools/lib/traceevent/plugin_cfg80211.c
@@ -5,8 +5,7 @@
 
				 #include "event-parse.h"
			
 
				 
			
 
				 static unsigned long long
			
 
				-process___le16_to_cpup(struct trace_seq *s,
			
 
				-		       unsigned long long *args)
			
 
				+process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
			
 
				 {
			
 
				 	uint16_t *val = (uint16_t *) (unsigned long) args[0];
			
 
				 	return val ? (long long) le16toh(*val) : 0;
			
--- a/tools/lib/traceevent/plugin_jbd2.c
+++ b/tools/lib/traceevent/plugin_jbd2.c
@@ -30,8 +30,7 @@
 
				 #define MINOR(dev)	((unsigned int) ((dev) & MINORMASK))
			
 
				 
			
 
				 static unsigned long long
			
 
				-process_jbd2_dev_to_name(struct trace_seq *s,
			
 
				-			 unsigned long long *args)
			
 
				+process_jbd2_dev_to_name(struct trace_seq *s, unsigned long long *args)
			
 
				 {
			
 
				 	unsigned int dev = args[0];
			
 
				 
			
@@ -40,8 +39,7 @@ process_jbd2_dev_to_name(struct trace_seq *s,
 
				 }
			
 
				 
			
 
				 static unsigned long long
			
 
				-process_jiffies_to_msecs(struct trace_seq *s,
			
 
				-			 unsigned long long *args)
			
 
				+process_jiffies_to_msecs(struct trace_seq *s, unsigned long long *args)
			
 
				 {
			
 
				 	unsigned long long jiffies = args[0];
			
 
				 
			
--- a/tools/lib/traceevent/plugin_kvm.c
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -240,25 +240,38 @@ static const char *find_exit_reason(unsigned isa, int val)
 
				 	for (i = 0; strings[i].val >= 0; i++)
			
 
				 		if (strings[i].val == val)
			
 
				 			break;
			
 
				-	if (strings[i].str)
			
 
				-		return strings[i].str;
			
 
				-	return "UNKNOWN";
			
 
				+
			
 
				+	return strings[i].str;
			
 
				 }
			
 
				 
			
 
				-static int kvm_exit_handler(struct trace_seq *s, struct pevent_record *record,
			
 
				-			    struct event_format *event, void *context)
			
 
				+static int print_exit_reason(struct trace_seq *s, struct pevent_record *record,
			
 
				+			     struct event_format *event, const char *field)
			
 
				 {
			
 
				 	unsigned long long isa;
			
 
				 	unsigned long long val;
			
 
				-	unsigned long long info1 = 0, info2 = 0;
			
 
				+	const char *reason;
			
 
				 
			
 
				-	if (pevent_get_field_val(s, event, "exit_reason", record, &val, 1) < 0)
			
 
				+	if (pevent_get_field_val(s, event, field, record, &val, 1) < 0)
			
 
				 		return -1;
			
 
				 
			
 
				 	if (pevent_get_field_val(s, event, "isa", record, &isa, 0) < 0)
			
 
				 		isa = 1;
			
 
				 
			
 
				-	trace_seq_printf(s, "reason %s", find_exit_reason(isa, val));
			
 
				+	reason = find_exit_reason(isa, val);
			
 
				+	if (reason)
			
 
				+		trace_seq_printf(s, "reason %s", reason);
			
 
				+	else
			
 
				+		trace_seq_printf(s, "reason UNKNOWN (%llu)", val);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int kvm_exit_handler(struct trace_seq *s, struct pevent_record *record,
			
 
				+			    struct event_format *event, void *context)
			
 
				+{
			
 
				+	unsigned long long info1 = 0, info2 = 0;
			
 
				+
			
 
				+	if (print_exit_reason(s, record, event, "exit_reason") < 0)
			
 
				+		return -1;
			
 
				 
			
 
				 	pevent_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1);
			
 
				 
			
@@ -313,6 +326,29 @@ static int kvm_emulate_insn_handler(struct trace_seq *s,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+
			
 
				+static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct pevent_record *record,
			
 
				+					    struct event_format *event, void *context)
			
 
				+{
			
 
				+	if (print_exit_reason(s, record, event, "exit_code") < 0)
			
 
				+		return -1;
			
 
				+
			
 
				+	pevent_print_num_field(s, " info1 %llx", event, "exit_info1", record, 1);
			
 
				+	pevent_print_num_field(s, " info2 %llx", event, "exit_info2", record, 1);
			
 
				+	pevent_print_num_field(s, " int_info %llx", event, "exit_int_info", record, 1);
			
 
				+	pevent_print_num_field(s, " int_info_err %llx", event, "exit_int_info_err", record, 1);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int kvm_nested_vmexit_handler(struct trace_seq *s, struct pevent_record *record,
			
 
				+				     struct event_format *event, void *context)
			
 
				+{
			
 
				+	pevent_print_num_field(s, "rip %llx ", event, "rip", record, 1);
			
 
				+
			
 
				+	return kvm_nested_vmexit_inject_handler(s, record, event, context);
			
 
				+}
			
 
				+
			
 
				 union kvm_mmu_page_role {
			
 
				 	unsigned word;
			
 
				 	struct {
			
@@ -409,6 +445,12 @@ int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
 
				 	pevent_register_event_handler(pevent, -1, "kvm", "kvm_emulate_insn",
			
 
				 				      kvm_emulate_insn_handler, NULL);
			
 
				 
			
 
				+	pevent_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit",
			
 
				+				      kvm_nested_vmexit_handler, NULL);
			
 
				+
			
 
				+	pevent_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject",
			
 
				+				      kvm_nested_vmexit_inject_handler, NULL);
			
 
				+
			
 
				 	pevent_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page",
			
 
				 				      kvm_mmu_get_page_handler, NULL);
			
 
				 
			
@@ -443,6 +485,12 @@ void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
 
				 	pevent_unregister_event_handler(pevent, -1, "kvm", "kvm_emulate_insn",
			
 
				 					kvm_emulate_insn_handler, NULL);
			
 
				 
			
 
				+	pevent_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit",
			
 
				+					kvm_nested_vmexit_handler, NULL);
			
 
				+
			
 
				+	pevent_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject",
			
 
				+					kvm_nested_vmexit_inject_handler, NULL);
			
 
				+
			
 
				 	pevent_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page",
			
 
				 					kvm_mmu_get_page_handler, NULL);
			
 
				 
			
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -16,6 +16,10 @@ This 'perf bench' command is a general framework for benchmark suites.
 
				 
			
 
				 COMMON OPTIONS
			
 
				 --------------
			
 
				+-r::
			
 
				+--repeat=::
			
 
				+Specify amount of times to repeat the run (default 10).
			
 
				+
			
 
				 -f::
			
 
				 --format=::
			
 
				 Specify format style.
			
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -41,6 +41,9 @@ OPTIONS
 
				 	tasks slept. sched_switch contains a callchain where a task slept and
			
 
				 	sched_stat contains a timeslice how long a task slept.
			
 
				 
			
 
				+--kallsyms=<file>::
			
 
				+	kallsyms pathname
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
			
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -51,9 +51,9 @@ There are a couple of variants of perf kvm:
 
				   'perf kvm stat <command>' to run a command and gather performance counter
			
 
				   statistics.
			
 
				   Especially, perf 'kvm stat record/report' generates a statistical analysis
			
 
				-  of KVM events. Currently, vmexit, mmio and ioport events are supported.
			
 
				-  'perf kvm stat record <command>' records kvm events and the events between
			
 
				-  start and end <command>.
			
 
				+  of KVM events. Currently, vmexit, mmio (x86 only) and ioport (x86 only)
			
 
				+  events are supported. 'perf kvm stat record <command>' records kvm events
			
 
				+  and the events between start and end <command>.
			
 
				   And this command produces a file which contains tracing results of kvm
			
 
				   events.
			
 
				 
			
@@ -103,8 +103,8 @@ STAT REPORT OPTIONS
 
				        analyze events which occures on this vcpu. (default: all vcpus)
			
 
				 
			
 
				 --event=<value>::
			
 
				-       event to be analyzed. Possible values: vmexit, mmio, ioport.
			
 
				-       (default: vmexit)
			
 
				+       event to be analyzed. Possible values: vmexit, mmio (x86 only),
			
 
				+       ioport (x86 only). (default: vmexit)
			
 
				 -k::
			
 
				 --key=<value>::
			
 
				        Sorting key. Possible values: sample (default, sort by samples
			
@@ -138,7 +138,8 @@ STAT LIVE OPTIONS
 
				 
			
 
				 
			
 
				 --event=<value>::
			
 
				-       event to be analyzed. Possible values: vmexit, mmio, ioport.
			
 
				+       event to be analyzed. Possible values: vmexit,
			
 
				+       mmio (x86 only), ioport (x86 only).
			
 
				        (default: vmexit)
			
 
				 
			
 
				 -k::
			
@@ -147,7 +148,8 @@ STAT LIVE OPTIONS
 
				        number), time (sort by average time).
			
 
				 
			
 
				 --duration=<value>::
			
 
				-       Show events other than HLT that take longer than duration usecs.
			
 
				+       Show events other than HLT (x86 only) or Wait state (s390 only)
			
 
				+       that take longer than duration usecs.
			
 
				 
			
 
				 SEE ALSO
			
 
				 --------
			
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -15,10 +15,20 @@ DESCRIPTION
 
				 There are two variants of perf timechart:
			
 
				 
			
 
				   'perf timechart record <command>' to record the system level events
			
 
				-  of an arbitrary workload.
			
 
				+  of an arbitrary workload. By default timechart records only scheduler
			
 
				+  and CPU events (task switches, running times, CPU power states, etc),
			
 
				+  but it's possible to record IO (disk, network) activity using -I argument.
			
 
				 
			
 
				   'perf timechart' to turn a trace into a Scalable Vector Graphics file,
			
 
				-  that can be viewed with popular SVG viewers such as 'Inkscape'.
			
 
				+  that can be viewed with popular SVG viewers such as 'Inkscape'. Depending
			
 
				+  on the events in the perf.data file, timechart will contain scheduler/cpu
			
 
				+  events or IO events.
			
 
				+
			
 
				+  In IO mode, every bar has two charts: upper and lower.
			
 
				+  Upper bar shows incoming events (disk reads, ingress network packets).
			
 
				+  Lower bar shows outgoing events (disk writes, egress network packets).
			
 
				+  There are also poll bars which show how much time application spent
			
 
				+  in poll/epoll/select syscalls.
			
 
				 
			
 
				 TIMECHART OPTIONS
			
 
				 -----------------
			
@@ -54,6 +64,19 @@ TIMECHART OPTIONS
 
				 	duration or tasks with given name. If number is given it's interpreted
			
 
				 	as number of nanoseconds. If non-numeric string is given it's
			
 
				 	interpreted as task name.
			
 
				+--io-skip-eagain::
			
 
				+	Don't draw EAGAIN IO events.
			
 
				+--io-min-time=<nsecs>::
			
 
				+	Draw small events as if they lasted min-time. Useful when you need
			
 
				+	to see very small and fast IO. It's possible to specify ms or us
			
 
				+	suffix to specify time in milliseconds or microseconds.
			
 
				+	Default value is 1ms.
			
 
				+--io-merge-dist=<nsecs>::
			
 
				+	Merge events that are merge-dist nanoseconds apart.
			
 
				+	Reduces number of figures on the SVG and makes it more render-friendly.
			
 
				+	It's possible to specify ms or us suffix to specify time in
			
 
				+	milliseconds or microseconds.
			
 
				+	Default value is 1us.
			
 
				 
			
 
				 RECORD OPTIONS
			
 
				 --------------
			
@@ -63,6 +86,9 @@ RECORD OPTIONS
 
				 -T::
			
 
				 --tasks-only::
			
 
				         Record only tasks-related events
			
 
				+-I::
			
 
				+--io-only::
			
 
				+        Record only io-related events
			
 
				 -g::
			
 
				 --callchain::
			
 
				         Do call-graph (stack chain/backtrace) recording
			
@@ -87,6 +113,14 @@ Record system-wide timechart:
 
				 
			
 
				   $ perf timechart --highlight gcc
			
 
				 
			
 
				+Record system-wide IO events:
			
 
				+
			
 
				+  $ perf timechart record -I
			
 
				+
			
 
				+  then generate timechart:
			
 
				+
			
 
				+  $ perf timechart
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-record[1]
			
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -107,6 +107,52 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 
				 	Show tool stats such as number of times fd->pathname was discovered thru
			
 
				 	hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
			
 
				 
			
 
				+-F=[all|min|maj]::
			
 
				+--pf=[all|min|maj]::
			
 
				+	Trace pagefaults. Optionally, you can specify whether you want minor,
			
 
				+	major or all pagefaults. Default value is maj.
			
 
				+
			
 
				+--syscalls::
			
 
				+	Trace system calls. This options is enabled by default.
			
 
				+
			
 
				+PAGEFAULTS
			
 
				+----------
			
 
				+
			
 
				+When tracing pagefaults, the format of the trace is as follows:
			
 
				+
			
 
				+<min|maj>fault [<ip.symbol>+<ip.offset>] => <addr.dso@addr.offset> (<map type><addr level>).
			
 
				+
			
 
				+- min/maj indicates whether fault event is minor or major;
			
 
				+- ip.symbol shows symbol for instruction pointer (the code that generated the
			
 
				+  fault); if no debug symbols available, perf trace will print raw IP;
			
 
				+- addr.dso shows DSO for the faulted address;
			
 
				+- map type is either 'd' for non-executable maps or 'x' for executable maps;
			
 
				+- addr level is either 'k' for kernel dso or '.' for user dso.
			
 
				+
			
 
				+For symbols resolution you may need to install debugging symbols.
			
 
				+
			
 
				+Please be aware that duration is currently always 0 and doesn't reflect actual
			
 
				+time it took for fault to be handled!
			
 
				+
			
 
				+When --verbose specified, perf trace tries to print all available information
			
 
				+for both IP and fault address in the form of dso@symbol+offset.
			
 
				+
			
 
				+EXAMPLES
			
 
				+--------
			
 
				+
			
 
				+Trace only major pagefaults:
			
 
				+
			
 
				+ $ perf trace --no-syscalls -F
			
 
				+
			
 
				+Trace syscalls, major and minor pagefaults:
			
 
				+
			
 
				+ $ perf trace -F all
			
 
				+
			
 
				+  1416.547 ( 0.000 ms): python/20235 majfault [CRYPTO_push_info_+0x0] => /lib/x86_64-linux-gnu/libcrypto.so.1.0.0@0x61be0 (x.)
			
 
				+
			
 
				+  As you can see, there was major pagefault in python process, from
			
 
				+  CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-record[1], linkperf:perf-script[1]
			
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt
@@ -8,7 +8,15 @@ perf - Performance analysis tools for Linux
 
				 SYNOPSIS
			
 
				 --------
			
 
				 [verse]
			
 
				-'perf' [--version] [--help] COMMAND [ARGS]
			
 
				+'perf' [--version] [--help] [OPTIONS] COMMAND [ARGS]
			
 
				+
			
 
				+OPTIONS
			
 
				+-------
			
 
				+--debug::
			
 
				+	Setup debug variable (just verbose for now) in value
			
 
				+	range (0, 10). Use like:
			
 
				+	  --debug verbose   # sets verbose = 1
			
 
				+	  --debug verbose=2 # sets verbose = 2
			
 
				 
			
 
				 DESCRIPTION
			
 
				 -----------
			
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -37,3 +37,6 @@ arch/x86/include/asm/kvm_host.h
 
				 arch/x86/include/uapi/asm/svm.h
			
 
				 arch/x86/include/uapi/asm/vmx.h
			
 
				 arch/x86/include/uapi/asm/kvm.h
			
 
				+arch/x86/include/uapi/asm/kvm_perf.h
			
 
				+arch/s390/include/uapi/asm/sie.h
			
 
				+arch/s390/include/uapi/asm/kvm_perf.h
			
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -295,11 +295,13 @@ LIB_H += util/intlist.h
 
				 LIB_H += util/perf_regs.h
			
 
				 LIB_H += util/unwind.h
			
 
				 LIB_H += util/vdso.h
			
 
				+LIB_H += util/tsc.h
			
 
				 LIB_H += ui/helpline.h
			
 
				 LIB_H += ui/progress.h
			
 
				 LIB_H += ui/util.h
			
 
				 LIB_H += ui/ui.h
			
 
				 LIB_H += util/data.h
			
 
				+LIB_H += util/kvm-stat.h
			
 
				 
			
 
				 LIB_OBJS += $(OUTPUT)util/abspath.o
			
 
				 LIB_OBJS += $(OUTPUT)util/alias.o
			
@@ -373,6 +375,8 @@ LIB_OBJS += $(OUTPUT)util/stat.o
 
				 LIB_OBJS += $(OUTPUT)util/record.o
			
 
				 LIB_OBJS += $(OUTPUT)util/srcline.o
			
 
				 LIB_OBJS += $(OUTPUT)util/data.o
			
 
				+LIB_OBJS += $(OUTPUT)util/tsc.o
			
 
				+LIB_OBJS += $(OUTPUT)util/cloexec.o
			
 
				 
			
 
				 LIB_OBJS += $(OUTPUT)ui/setup.o
			
 
				 LIB_OBJS += $(OUTPUT)ui/helpline.o
			
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1
 
				 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
			
 
				 endif
			
 
				 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
			
 
				+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/skip-callchain-idx.o
			
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -5,9 +5,7 @@
 
				 #include <string.h>
			
 
				 
			
 
				 #include "../../util/header.h"
			
 
				-
			
 
				-#define __stringify_1(x)        #x
			
 
				-#define __stringify(x)          __stringify_1(x)
			
 
				+#include "../../util/util.h"
			
 
				 
			
 
				 #define mfspr(rn)       ({unsigned long rval; \
			
 
				 			 asm volatile("mfspr %0," __stringify(rn) \
			
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -0,0 +1,266 @@
 
				+/*
			
 
				+ * Use DWARF Debug information to skip unnecessary callchain entries.
			
 
				+ *
			
 
				+ * Copyright (C) 2014 Sukadev Bhattiprolu, IBM Corporation.
			
 
				+ * Copyright (C) 2014 Ulrich Weigand, IBM Corporation.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version
			
 
				+ * 2 of the License, or (at your option) any later version.
			
 
				+ */
			
 
				+#include <inttypes.h>
			
 
				+#include <dwarf.h>
			
 
				+#include <elfutils/libdwfl.h>
			
 
				+
			
 
				+#include "util/thread.h"
			
 
				+#include "util/callchain.h"
			
 
				+
			
 
				+/*
			
 
				+ * When saving the callchain on Power, the kernel conservatively saves
			
 
				+ * excess entries in the callchain. A few of these entries are needed
			
 
				+ * in some cases but not others. If the unnecessary entries are not
			
 
				+ * ignored, we end up with duplicate arcs in the call-graphs. Use
			
 
				+ * DWARF debug information to skip over any unnecessary callchain
			
 
				+ * entries.
			
 
				+ *
			
 
				+ * See function header for arch_adjust_callchain() below for more details.
			
 
				+ *
			
 
				+ * The libdwfl code in this file is based on code from elfutils
			
 
				+ * (libdwfl/argp-std.c, libdwfl/tests/addrcfi.c, etc).
			
 
				+ */
			
 
				+static char *debuginfo_path;
			
 
				+
			
 
				+static const Dwfl_Callbacks offline_callbacks = {
			
 
				+	.debuginfo_path = &debuginfo_path,
			
 
				+	.find_debuginfo = dwfl_standard_find_debuginfo,
			
 
				+	.section_address = dwfl_offline_section_address,
			
 
				+};
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Use the DWARF expression for the Call-frame-address and determine
			
 
				+ * if return address is in LR and if a new frame was allocated.
			
 
				+ */
			
 
				+static int check_return_reg(int ra_regno, Dwarf_Frame *frame)
			
 
				+{
			
 
				+	Dwarf_Op ops_mem[2];
			
 
				+	Dwarf_Op dummy;
			
 
				+	Dwarf_Op *ops = &dummy;
			
 
				+	size_t nops;
			
 
				+	int result;
			
 
				+
			
 
				+	result = dwarf_frame_register(frame, ra_regno, ops_mem, &ops, &nops);
			
 
				+	if (result < 0) {
			
 
				+		pr_debug("dwarf_frame_register() %s\n", dwarf_errmsg(-1));
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Check if return address is on the stack.
			
 
				+	 */
			
 
				+	if (nops != 0 || ops != NULL)
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Return address is in LR. Check if a frame was allocated
			
 
				+	 * but not-yet used.
			
 
				+	 */
			
 
				+	result = dwarf_frame_cfa(frame, &ops, &nops);
			
 
				+	if (result < 0) {
			
 
				+		pr_debug("dwarf_frame_cfa() returns %d, %s\n", result,
			
 
				+					dwarf_errmsg(-1));
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If call frame address is in r1, no new frame was allocated.
			
 
				+	 */
			
 
				+	if (nops == 1 && ops[0].atom == DW_OP_bregx && ops[0].number == 1 &&
			
 
				+				ops[0].number2 == 0)
			
 
				+		return 1;
			
 
				+
			
 
				+	/*
			
 
				+	 * A new frame was allocated but has not yet been used.
			
 
				+	 */
			
 
				+	return 2;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Get the DWARF frame from the .eh_frame section.
			
 
				+ */
			
 
				+static Dwarf_Frame *get_eh_frame(Dwfl_Module *mod, Dwarf_Addr pc)
			
 
				+{
			
 
				+	int		result;
			
 
				+	Dwarf_Addr	bias;
			
 
				+	Dwarf_CFI	*cfi;
			
 
				+	Dwarf_Frame	*frame;
			
 
				+
			
 
				+	cfi = dwfl_module_eh_cfi(mod, &bias);
			
 
				+	if (!cfi) {
			
 
				+		pr_debug("%s(): no CFI - %s\n", __func__, dwfl_errmsg(-1));
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	result = dwarf_cfi_addrframe(cfi, pc, &frame);
			
 
				+	if (result) {
			
 
				+		pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1));
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	return frame;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Get the DWARF frame from the .debug_frame section.
			
 
				+ */
			
 
				+static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc)
			
 
				+{
			
 
				+	Dwarf_CFI       *cfi;
			
 
				+	Dwarf_Addr      bias;
			
 
				+	Dwarf_Frame     *frame;
			
 
				+	int             result;
			
 
				+
			
 
				+	cfi = dwfl_module_dwarf_cfi(mod, &bias);
			
 
				+	if (!cfi) {
			
 
				+		pr_debug("%s(): no CFI - %s\n", __func__, dwfl_errmsg(-1));
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	result = dwarf_cfi_addrframe(cfi, pc, &frame);
			
 
				+	if (result) {
			
 
				+		pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1));
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	return frame;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return:
			
 
				+ *	0 if return address for the program counter @pc is on stack
			
 
				+ *	1 if return address is in LR and no new stack frame was allocated
			
 
				+ *	2 if return address is in LR and a new frame was allocated (but not
			
 
				+ *		yet used)
			
 
				+ *	-1 in case of errors
			
 
				+ */
			
 
				+static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
			
 
				+{
			
 
				+	int		rc = -1;
			
 
				+	Dwfl		*dwfl;
			
 
				+	Dwfl_Module	*mod;
			
 
				+	Dwarf_Frame	*frame;
			
 
				+	int		ra_regno;
			
 
				+	Dwarf_Addr	start = pc;
			
 
				+	Dwarf_Addr	end = pc;
			
 
				+	bool		signalp;
			
 
				+
			
 
				+	dwfl = dwfl_begin(&offline_callbacks);
			
 
				+	if (!dwfl) {
			
 
				+		pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	if (dwfl_report_offline(dwfl, "",  exec_file, -1) == NULL) {
			
 
				+		pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1));
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	mod = dwfl_addrmodule(dwfl, pc);
			
 
				+	if (!mod) {
			
 
				+		pr_debug("dwfl_addrmodule() failed, %s\n", dwarf_errmsg(-1));
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * To work with split debug info files (eg: glibc), check both
			
 
				+	 * .eh_frame and .debug_frame sections of the ELF header.
			
 
				+	 */
			
 
				+	frame = get_eh_frame(mod, pc);
			
 
				+	if (!frame) {
			
 
				+		frame = get_dwarf_frame(mod, pc);
			
 
				+		if (!frame)
			
 
				+			goto out;
			
 
				+	}
			
 
				+
			
 
				+	ra_regno = dwarf_frame_info(frame, &start, &end, &signalp);
			
 
				+	if (ra_regno < 0) {
			
 
				+		pr_debug("Return address register unavailable: %s\n",
			
 
				+				dwarf_errmsg(-1));
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	rc = check_return_reg(ra_regno, frame);
			
 
				+
			
 
				+out:
			
 
				+	dwfl_end(dwfl);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The callchain saved by the kernel always includes the link register (LR).
			
 
				+ *
			
 
				+ *	0:	PERF_CONTEXT_USER
			
 
				+ *	1:	Program counter (Next instruction pointer)
			
 
				+ *	2:	LR value
			
 
				+ *	3:	Caller's caller
			
 
				+ *	4:	...
			
 
				+ *
			
 
				+ * The value in LR is only needed when it holds a return address. If the
			
 
				+ * return address is on the stack, we should ignore the LR value.
			
 
				+ *
			
 
				+ * Further, when the return address is in the LR, if a new frame was just
			
 
				+ * allocated but the LR was not saved into it, then the LR contains the
			
 
				+ * caller, slot 4: contains the caller's caller and the contents of slot 3:
			
 
				+ * (chain->ips[3]) is undefined and must be ignored.
			
 
				+ *
			
 
				+ * Use DWARF debug information to determine if any entries need to be skipped.
			
 
				+ *
			
 
				+ * Return:
			
 
				+ *	index:	of callchain entry that needs to be ignored (if any)
			
 
				+ *	-1	if no entry needs to be ignored or in case of errors
			
 
				+ */
			
 
				+int arch_skip_callchain_idx(struct machine *machine, struct thread *thread,
			
 
				+				struct ip_callchain *chain)
			
 
				+{
			
 
				+	struct addr_location al;
			
 
				+	struct dso *dso = NULL;
			
 
				+	int rc;
			
 
				+	u64 ip;
			
 
				+	u64 skip_slot = -1;
			
 
				+
			
 
				+	if (chain->nr < 3)
			
 
				+		return skip_slot;
			
 
				+
			
 
				+	ip = chain->ips[2];
			
 
				+
			
 
				+	thread__find_addr_location(thread, machine, PERF_RECORD_MISC_USER,
			
 
				+			MAP__FUNCTION, ip, &al);
			
 
				+
			
 
				+	if (al.map)
			
 
				+		dso = al.map->dso;
			
 
				+
			
 
				+	if (!dso) {
			
 
				+		pr_debug("%" PRIx64 " dso is NULL\n", ip);
			
 
				+		return skip_slot;
			
 
				+	}
			
 
				+
			
 
				+	rc = check_return_addr(dso->long_name, ip);
			
 
				+
			
 
				+	pr_debug("DSO %s, nr %" PRIx64 ", ip 0x%" PRIx64 "rc %d\n",
			
 
				+				dso->long_name, chain->nr, ip, rc);
			
 
				+
			
 
				+	if (rc == 0) {
			
 
				+		/*
			
 
				+		 * Return address on stack. Ignore LR value in callchain
			
 
				+		 */
			
 
				+		skip_slot = 2;
			
 
				+	} else if (rc == 2) {
			
 
				+		/*
			
 
				+		 * New frame allocated but return address still in LR.
			
 
				+		 * Ignore the caller's caller entry in callchain.
			
 
				+		 */
			
 
				+		skip_slot = 3;
			
 
				+	}
			
 
				+	return skip_slot;
			
 
				+}
			
--- a/tools/perf/arch/s390/Makefile
+++ b/tools/perf/arch/s390/Makefile
@@ -2,3 +2,6 @@ ifndef NO_DWARF
 
				 PERF_HAVE_DWARF_REGS := 1
			
 
				 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
			
 
				 endif
			
 
				+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
			
 
				+HAVE_KVM_STAT_SUPPORT := 1
			
 
				+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o
			
--- a/tools/perf/arch/s390/util/header.c
+++ b/tools/perf/arch/s390/util/header.c
@@ -0,0 +1,28 @@
 
				+/*
			
 
				+ * Implementation of get_cpuid().
			
 
				+ *
			
 
				+ * Copyright 2014 IBM Corp.
			
 
				+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License (version 2 only)
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ */
			
 
				+
			
 
				+#include <sys/types.h>
			
 
				+#include <unistd.h>
			
 
				+#include <stdio.h>
			
 
				+#include <string.h>
			
 
				+
			
 
				+#include "../../util/header.h"
			
 
				+
			
 
				+int get_cpuid(char *buffer, size_t sz)
			
 
				+{
			
 
				+	const char *cpuid = "IBM/S390";
			
 
				+
			
 
				+	if (strlen(cpuid) + 1 > sz)
			
 
				+		return -1;
			
 
				+
			
 
				+	strcpy(buffer, cpuid);
			
 
				+	return 0;
			
 
				+}
			
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -0,0 +1,105 @@
 
				+/*
			
 
				+ * Arch specific functions for perf kvm stat.
			
 
				+ *
			
 
				+ * Copyright 2014 IBM Corp.
			
 
				+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License (version 2 only)
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ */
			
 
				+
			
 
				+#include "../../util/kvm-stat.h"
			
 
				+#include <asm/kvm_perf.h>
			
 
				+
			
 
				+define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
			
 
				+define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
			
 
				+define_exit_reasons_table(sie_sigp_order_codes, sigp_order_codes);
			
 
				+define_exit_reasons_table(sie_diagnose_codes, diagnose_codes);
			
 
				+define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes);
			
 
				+
			
 
				+static void event_icpt_insn_get_key(struct perf_evsel *evsel,
			
 
				+				    struct perf_sample *sample,
			
 
				+				    struct event_key *key)
			
 
				+{
			
 
				+	unsigned long insn;
			
 
				+
			
 
				+	insn = perf_evsel__intval(evsel, sample, "instruction");
			
 
				+	key->key = icpt_insn_decoder(insn);
			
 
				+	key->exit_reasons = sie_icpt_insn_codes;
			
 
				+}
			
 
				+
			
 
				+static void event_sigp_get_key(struct perf_evsel *evsel,
			
 
				+			       struct perf_sample *sample,
			
 
				+			       struct event_key *key)
			
 
				+{
			
 
				+	key->key = perf_evsel__intval(evsel, sample, "order_code");
			
 
				+	key->exit_reasons = sie_sigp_order_codes;
			
 
				+}
			
 
				+
			
 
				+static void event_diag_get_key(struct perf_evsel *evsel,
			
 
				+			       struct perf_sample *sample,
			
 
				+			       struct event_key *key)
			
 
				+{
			
 
				+	key->key = perf_evsel__intval(evsel, sample, "code");
			
 
				+	key->exit_reasons = sie_diagnose_codes;
			
 
				+}
			
 
				+
			
 
				+static void event_icpt_prog_get_key(struct perf_evsel *evsel,
			
 
				+				    struct perf_sample *sample,
			
 
				+				    struct event_key *key)
			
 
				+{
			
 
				+	key->key = perf_evsel__intval(evsel, sample, "code");
			
 
				+	key->exit_reasons = sie_icpt_prog_codes;
			
 
				+}
			
 
				+
			
 
				+static struct child_event_ops child_events[] = {
			
 
				+	{ .name = "kvm:kvm_s390_intercept_instruction",
			
 
				+	  .get_key = event_icpt_insn_get_key },
			
 
				+	{ .name = "kvm:kvm_s390_handle_sigp",
			
 
				+	  .get_key = event_sigp_get_key },
			
 
				+	{ .name = "kvm:kvm_s390_handle_diag",
			
 
				+	  .get_key = event_diag_get_key },
			
 
				+	{ .name = "kvm:kvm_s390_intercept_prog",
			
 
				+	  .get_key = event_icpt_prog_get_key },
			
 
				+	{ NULL, NULL },
			
 
				+};
			
 
				+
			
 
				+static struct kvm_events_ops exit_events = {
			
 
				+	.is_begin_event = exit_event_begin,
			
 
				+	.is_end_event = exit_event_end,
			
 
				+	.child_ops = child_events,
			
 
				+	.decode_key = exit_event_decode_key,
			
 
				+	.name = "VM-EXIT"
			
 
				+};
			
 
				+
			
 
				+const char * const kvm_events_tp[] = {
			
 
				+	"kvm:kvm_s390_sie_enter",
			
 
				+	"kvm:kvm_s390_sie_exit",
			
 
				+	"kvm:kvm_s390_intercept_instruction",
			
 
				+	"kvm:kvm_s390_handle_sigp",
			
 
				+	"kvm:kvm_s390_handle_diag",
			
 
				+	"kvm:kvm_s390_intercept_prog",
			
 
				+	NULL,
			
 
				+};
			
 
				+
			
 
				+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
			
 
				+	{ .name = "vmexit", .ops = &exit_events },
			
 
				+	{ NULL, NULL },
			
 
				+};
			
 
				+
			
 
				+const char * const kvm_skip_events[] = {
			
 
				+	"Wait state",
			
 
				+	NULL,
			
 
				+};
			
 
				+
			
 
				+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid)
			
 
				+{
			
 
				+	if (strstr(cpuid, "IBM/S390")) {
			
 
				+		kvm->exit_reasons = sie_exit_reasons;
			
 
				+		kvm->exit_reasons_isa = "SIE";
			
 
				+	} else
			
 
				+		return -ENOTSUP;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -15,3 +15,5 @@ endif
 
				 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
			
 
				 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o
			
 
				 LIB_H += arch/$(ARCH)/util/tsc.h
			
 
				+HAVE_KVM_STAT_SUPPORT := 1
			
 
				+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o
			
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -3,6 +3,7 @@
 
				 #include "thread.h"
			
 
				 #include "map.h"
			
 
				 #include "event.h"
			
 
				+#include "debug.h"
			
 
				 #include "tests/tests.h"
			
 
				 
			
 
				 #define STACK_SIZE 8192
			
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -0,0 +1,156 @@
 
				+#include "../../util/kvm-stat.h"
			
 
				+#include <asm/kvm_perf.h>
			
 
				+
			
 
				+define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
			
 
				+define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
			
 
				+
			
 
				+static struct kvm_events_ops exit_events = {
			
 
				+	.is_begin_event = exit_event_begin,
			
 
				+	.is_end_event = exit_event_end,
			
 
				+	.decode_key = exit_event_decode_key,
			
 
				+	.name = "VM-EXIT"
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * For the mmio events, we treat:
			
 
				+ * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
			
 
				+ * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...).
			
 
				+ */
			
 
				+static void mmio_event_get_key(struct perf_evsel *evsel, struct perf_sample *sample,
			
 
				+			       struct event_key *key)
			
 
				+{
			
 
				+	key->key  = perf_evsel__intval(evsel, sample, "gpa");
			
 
				+	key->info = perf_evsel__intval(evsel, sample, "type");
			
 
				+}
			
 
				+
			
 
				+#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
			
 
				+#define KVM_TRACE_MMIO_READ 1
			
 
				+#define KVM_TRACE_MMIO_WRITE 2
			
 
				+
			
 
				+static bool mmio_event_begin(struct perf_evsel *evsel,
			
 
				+			     struct perf_sample *sample, struct event_key *key)
			
 
				+{
			
 
				+	/* MMIO read begin event in kernel. */
			
 
				+	if (kvm_exit_event(evsel))
			
 
				+		return true;
			
 
				+
			
 
				+	/* MMIO write begin event in kernel. */
			
 
				+	if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
			
 
				+	    perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
			
 
				+		mmio_event_get_key(evsel, sample, key);
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+static bool mmio_event_end(struct perf_evsel *evsel, struct perf_sample *sample,
			
 
				+			   struct event_key *key)
			
 
				+{
			
 
				+	/* MMIO write end event in kernel. */
			
 
				+	if (kvm_entry_event(evsel))
			
 
				+		return true;
			
 
				+
			
 
				+	/* MMIO read end event in kernel.*/
			
 
				+	if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
			
 
				+	    perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
			
 
				+		mmio_event_get_key(evsel, sample, key);
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
			
 
				+				  struct event_key *key,
			
 
				+				  char *decode)
			
 
				+{
			
 
				+	scnprintf(decode, DECODE_STR_LEN, "%#lx:%s",
			
 
				+		  (unsigned long)key->key,
			
 
				+		  key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
			
 
				+}
			
 
				+
			
 
				+static struct kvm_events_ops mmio_events = {
			
 
				+	.is_begin_event = mmio_event_begin,
			
 
				+	.is_end_event = mmio_event_end,
			
 
				+	.decode_key = mmio_event_decode_key,
			
 
				+	.name = "MMIO Access"
			
 
				+};
			
 
				+
			
 
				+ /* The time of emulation pio access is from kvm_pio to kvm_entry. */
			
 
				+static void ioport_event_get_key(struct perf_evsel *evsel,
			
 
				+				 struct perf_sample *sample,
			
 
				+				 struct event_key *key)
			
 
				+{
			
 
				+	key->key  = perf_evsel__intval(evsel, sample, "port");
			
 
				+	key->info = perf_evsel__intval(evsel, sample, "rw");
			
 
				+}
			
 
				+
			
 
				+static bool ioport_event_begin(struct perf_evsel *evsel,
			
 
				+			       struct perf_sample *sample,
			
 
				+			       struct event_key *key)
			
 
				+{
			
 
				+	if (!strcmp(evsel->name, "kvm:kvm_pio")) {
			
 
				+		ioport_event_get_key(evsel, sample, key);
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+static bool ioport_event_end(struct perf_evsel *evsel,
			
 
				+			     struct perf_sample *sample __maybe_unused,
			
 
				+			     struct event_key *key __maybe_unused)
			
 
				+{
			
 
				+	return kvm_entry_event(evsel);
			
 
				+}
			
 
				+
			
 
				+static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
			
 
				+				    struct event_key *key,
			
 
				+				    char *decode)
			
 
				+{
			
 
				+	scnprintf(decode, DECODE_STR_LEN, "%#llx:%s",
			
 
				+		  (unsigned long long)key->key,
			
 
				+		  key->info ? "POUT" : "PIN");
			
 
				+}
			
 
				+
			
 
				+static struct kvm_events_ops ioport_events = {
			
 
				+	.is_begin_event = ioport_event_begin,
			
 
				+	.is_end_event = ioport_event_end,
			
 
				+	.decode_key = ioport_event_decode_key,
			
 
				+	.name = "IO Port Access"
			
 
				+};
			
 
				+
			
 
				+const char * const kvm_events_tp[] = {
			
 
				+	"kvm:kvm_entry",
			
 
				+	"kvm:kvm_exit",
			
 
				+	"kvm:kvm_mmio",
			
 
				+	"kvm:kvm_pio",
			
 
				+	NULL,
			
 
				+};
			
 
				+
			
 
				+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
			
 
				+	{ .name = "vmexit", .ops = &exit_events },
			
 
				+	{ .name = "mmio", .ops = &mmio_events },
			
 
				+	{ .name = "ioport", .ops = &ioport_events },
			
 
				+	{ NULL, NULL },
			
 
				+};
			
 
				+
			
 
				+const char * const kvm_skip_events[] = {
			
 
				+	"HLT",
			
 
				+	NULL,
			
 
				+};
			
 
				+
			
 
				+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid)
			
 
				+{
			
 
				+	if (strstr(cpuid, "Intel")) {
			
 
				+		kvm->exit_reasons = vmx_exit_reasons;
			
 
				+		kvm->exit_reasons_isa = "VMX";
			
 
				+	} else if (strstr(cpuid, "AMD")) {
			
 
				+		kvm->exit_reasons = svm_exit_reasons;
			
 
				+		kvm->exit_reasons_isa = "SVM";
			
 
				+	} else
			
 
				+		return -ENOTSUP;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -6,29 +6,9 @@
 
				 #include "../../perf.h"
			
 
				 #include <linux/types.h>
			
 
				 #include "../../util/debug.h"
			
 
				+#include "../../util/tsc.h"
			
 
				 #include "tsc.h"
			
 
				 
			
 
				-u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
			
 
				-{
			
 
				-	u64 t, quot, rem;
			
 
				-
			
 
				-	t = ns - tc->time_zero;
			
 
				-	quot = t / tc->time_mult;
			
 
				-	rem  = t % tc->time_mult;
			
 
				-	return (quot << tc->time_shift) +
			
 
				-	       (rem << tc->time_shift) / tc->time_mult;
			
 
				-}
			
 
				-
			
 
				-u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
			
 
				-{
			
 
				-	u64 quot, rem;
			
 
				-
			
 
				-	quot = cyc >> tc->time_shift;
			
 
				-	rem  = cyc & ((1 << tc->time_shift) - 1);
			
 
				-	return tc->time_zero + quot * tc->time_mult +
			
 
				-	       ((rem * tc->time_mult) >> tc->time_shift);
			
 
				-}
			
 
				-
			
 
				 int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
			
 
				 			     struct perf_tsc_conversion *tc)
			
 
				 {
			
@@ -57,3 +37,12 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				+
			
 
				+u64 rdtsc(void)
			
 
				+{
			
 
				+	unsigned int low, high;
			
 
				+
			
 
				+	asm volatile("rdtsc" : "=a" (low), "=d" (high));
			
 
				+
			
 
				+	return low | ((u64)high) << 32;
			
 
				+}
			
--- a/tools/perf/arch/x86/util/tsc.h
+++ b/tools/perf/arch/x86/util/tsc.h
@@ -14,7 +14,4 @@ struct perf_event_mmap_page;
 
				 int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
			
 
				 			     struct perf_tsc_conversion *tc);
			
 
				 
			
 
				-u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
			
 
				-u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
			
 
				-
			
 
				 #endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */
			
--- a/tools/perf/arch/x86/util/unwind-libunwind.c
+++ b/tools/perf/arch/x86/util/unwind-libunwind.c
@@ -3,6 +3,7 @@
 
				 #include <libunwind.h>
			
 
				 #include "perf_regs.h"
			
 
				 #include "../../util/unwind.h"
			
 
				+#include "../../util/debug.h"
			
 
				 
			
 
				 #ifdef HAVE_ARCH_X86_64_SUPPORT
			
 
				 int libunwind__arch_reg_id(int regnum)
			
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -43,5 +43,6 @@ extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
 
				 #define BENCH_FORMAT_UNKNOWN		-1
			
 
				 
			
 
				 extern int bench_format;
			
 
				+extern unsigned int bench_repeat;
			
 
				 
			
 
				 #endif
			
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -29,13 +29,6 @@ static u_int32_t futex1 = 0, futex2 = 0;
 
				  */
			
 
				 static unsigned int nrequeue = 1;
			
 
				 
			
 
				-/*
			
 
				- * There can be significant variance from run to run,
			
 
				- * the more repeats, the more exact the overall avg and
			
 
				- * the better idea of the futex latency.
			
 
				- */
			
 
				-static unsigned int repeat = 10;
			
 
				-
			
 
				 static pthread_t *worker;
			
 
				 static bool done = 0, silent = 0;
			
 
				 static pthread_mutex_t thread_lock;
			
@@ -46,7 +39,6 @@ static unsigned int ncpus, threads_starting, nthreads = 0;
 
				 static const struct option options[] = {
			
 
				 	OPT_UINTEGER('t', "threads",  &nthreads, "Specify amount of threads"),
			
 
				 	OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
			
 
				-	OPT_UINTEGER('r', "repeat",   &repeat,   "Specify amount of times to repeat the run"),
			
 
				 	OPT_BOOLEAN( 's', "silent",   &silent,   "Silent mode: do not display data/details"),
			
 
				 	OPT_END()
			
 
				 };
			
@@ -146,7 +138,7 @@ int bench_futex_requeue(int argc, const char **argv,
 
				 	pthread_cond_init(&thread_parent, NULL);
			
 
				 	pthread_cond_init(&thread_worker, NULL);
			
 
				 
			
 
				-	for (j = 0; j < repeat && !done; j++) {
			
 
				+	for (j = 0; j < bench_repeat && !done; j++) {
			
 
				 		unsigned int nrequeued = 0;
			
 
				 		struct timeval start, end, runtime;
			
 
				 
			
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -30,15 +30,8 @@ static u_int32_t futex1 = 0;
 
				  */
			
 
				 static unsigned int nwakes = 1;
			
 
				 
			
 
				-/*
			
 
				- * There can be significant variance from run to run,
			
 
				- * the more repeats, the more exact the overall avg and
			
 
				- * the better idea of the futex latency.
			
 
				- */
			
 
				-static unsigned int repeat = 10;
			
 
				-
			
 
				 pthread_t *worker;
			
 
				-static bool done = 0, silent = 0;
			
 
				+static bool done = false, silent = false;
			
 
				 static pthread_mutex_t thread_lock;
			
 
				 static pthread_cond_t thread_parent, thread_worker;
			
 
				 static struct stats waketime_stats, wakeup_stats;
			
@@ -47,7 +40,6 @@ static unsigned int ncpus, threads_starting, nthreads = 0;
 
				 static const struct option options[] = {
			
 
				 	OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
			
 
				 	OPT_UINTEGER('w', "nwakes",  &nwakes,   "Specify amount of threads to wake at once"),
			
 
				-	OPT_UINTEGER('r', "repeat",  &repeat,   "Specify amount of times to repeat the run"),
			
 
				 	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
			
 
				 	OPT_END()
			
 
				 };
			
@@ -149,7 +141,7 @@ int bench_futex_wake(int argc, const char **argv,
 
				 	pthread_cond_init(&thread_parent, NULL);
			
 
				 	pthread_cond_init(&thread_worker, NULL);
			
 
				 
			
 
				-	for (j = 0; j < repeat && !done; j++) {
			
 
				+	for (j = 0; j < bench_repeat && !done; j++) {
			
 
				 		unsigned int nwoken = 0;
			
 
				 		struct timeval start, end, runtime;
			
 
				 
			
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -10,6 +10,7 @@
 
				 #include "../util/util.h"
			
 
				 #include "../util/parse-options.h"
			
 
				 #include "../util/header.h"
			
 
				+#include "../util/cloexec.h"
			
 
				 #include "bench.h"
			
 
				 #include "mem-memcpy-arch.h"
			
 
				 
			
@@ -83,7 +84,8 @@ static struct perf_event_attr cycle_attr = {
 
				 
			
 
				 static void init_cycle(void)
			
 
				 {
			
 
				-	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0);
			
 
				+	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1,
			
 
				+				       perf_event_open_cloexec_flag());
			
 
				 
			
 
				 	if (cycle_fd < 0 && errno == ENOSYS)
			
 
				 		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
			
@@ -189,6 +191,11 @@ int bench_mem_memcpy(int argc, const char **argv,
 
				 	argc = parse_options(argc, argv, options,
			
 
				 			     bench_mem_memcpy_usage, 0);
			
 
				 
			
 
				+	if (no_prefault && only_prefault) {
			
 
				+		fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				 	if (use_cycle)
			
 
				 		init_cycle();
			
 
				 
			
--- a/tools/perf/bench/mem-memset.c
+++ b/tools/perf/bench/mem-memset.c
@@ -10,6 +10,7 @@
 
				 #include "../util/util.h"
			
 
				 #include "../util/parse-options.h"
			
 
				 #include "../util/header.h"
			
 
				+#include "../util/cloexec.h"
			
 
				 #include "bench.h"
			
 
				 #include "mem-memset-arch.h"
			
 
				 
			
@@ -83,7 +84,8 @@ static struct perf_event_attr cycle_attr = {
 
				 
			
 
				 static void init_cycle(void)
			
 
				 {
			
 
				-	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0);
			
 
				+	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1,
			
 
				+				       perf_event_open_cloexec_flag());
			
 
				 
			
 
				 	if (cycle_fd < 0 && errno == ENOSYS)
			
 
				 		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
			
@@ -181,6 +183,11 @@ int bench_mem_memset(int argc, const char **argv,
 
				 	argc = parse_options(argc, argv, options,
			
 
				 			     bench_mem_memset_usage, 0);
			
 
				 
			
 
				+	if (no_prefault && only_prefault) {
			
 
				+		fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				 	if (use_cycle)
			
 
				 		init_cycle();
			
 
				 
			
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -28,6 +28,7 @@
 
				 #include <sys/time.h>
			
 
				 #include <sys/poll.h>
			
 
				 #include <limits.h>
			
 
				+#include <err.h>
			
 
				 
			
 
				 #define DATASIZE 100
			
 
				 
			
@@ -50,12 +51,6 @@ struct receiver_context {
 
				 	int wakefd;
			
 
				 };
			
 
				 
			
 
				-static void barf(const char *msg)
			
 
				-{
			
 
				-	fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
			
 
				-	exit(1);
			
 
				-}
			
 
				-
			
 
				 static void fdpair(int fds[2])
			
 
				 {
			
 
				 	if (use_pipes) {
			
@@ -66,7 +61,7 @@ static void fdpair(int fds[2])
 
				 			return;
			
 
				 	}
			
 
				 
			
 
				-	barf(use_pipes ? "pipe()" : "socketpair()");
			
 
				+	err(EXIT_FAILURE, use_pipes ? "pipe()" : "socketpair()");
			
 
				 }
			
 
				 
			
 
				 /* Block until we're ready to go */
			
@@ -77,11 +72,11 @@ static void ready(int ready_out, int wakefd)
 
				 
			
 
				 	/* Tell them we're ready. */
			
 
				 	if (write(ready_out, &dummy, 1) != 1)
			
 
				-		barf("CLIENT: ready write");
			
 
				+		err(EXIT_FAILURE, "CLIENT: ready write");
			
 
				 
			
 
				 	/* Wait for "GO" signal */
			
 
				 	if (poll(&pollfd, 1, -1) != 1)
			
 
				-		barf("poll");
			
 
				+		err(EXIT_FAILURE, "poll");
			
 
				 }
			
 
				 
			
 
				 /* Sender sprays loops messages down each file descriptor */
			
@@ -101,7 +96,7 @@ again:
 
				 			ret = write(ctx->out_fds[j], data + done,
			
 
				 				    sizeof(data)-done);
			
 
				 			if (ret < 0)
			
 
				-				barf("SENDER: write");
			
 
				+				err(EXIT_FAILURE, "SENDER: write");
			
 
				 			done += ret;
			
 
				 			if (done < DATASIZE)
			
 
				 				goto again;
			
@@ -131,7 +126,7 @@ static void *receiver(struct receiver_context* ctx)
 
				 again:
			
 
				 		ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
			
 
				 		if (ret < 0)
			
 
				-			barf("SERVER: read");
			
 
				+			err(EXIT_FAILURE, "SERVER: read");
			
 
				 		done += ret;
			
 
				 		if (done < DATASIZE)
			
 
				 			goto again;
			
@@ -144,14 +139,14 @@ static pthread_t create_worker(void *ctx, void *(*func)(void *))
 
				 {
			
 
				 	pthread_attr_t attr;
			
 
				 	pthread_t childid;
			
 
				-	int err;
			
 
				+	int ret;
			
 
				 
			
 
				 	if (!thread_mode) {
			
 
				 		/* process mode */
			
 
				 		/* Fork the receiver. */
			
 
				 		switch (fork()) {
			
 
				 		case -1:
			
 
				-			barf("fork()");
			
 
				+			err(EXIT_FAILURE, "fork()");
			
 
				 			break;
			
 
				 		case 0:
			
 
				 			(*func) (ctx);
			
@@ -165,19 +160,17 @@ static pthread_t create_worker(void *ctx, void *(*func)(void *))
 
				 	}
			
 
				 
			
 
				 	if (pthread_attr_init(&attr) != 0)
			
 
				-		barf("pthread_attr_init:");
			
 
				+		err(EXIT_FAILURE, "pthread_attr_init:");
			
 
				 
			
 
				 #ifndef __ia64__
			
 
				 	if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
			
 
				-		barf("pthread_attr_setstacksize");
			
 
				+		err(EXIT_FAILURE, "pthread_attr_setstacksize");
			
 
				 #endif
			
 
				 
			
 
				-	err = pthread_create(&childid, &attr, func, ctx);
			
 
				-	if (err != 0) {
			
 
				-		fprintf(stderr, "pthread_create failed: %s (%d)\n",
			
 
				-			strerror(err), err);
			
 
				-		exit(-1);
			
 
				-	}
			
 
				+	ret = pthread_create(&childid, &attr, func, ctx);
			
 
				+	if (ret != 0)
			
 
				+		err(EXIT_FAILURE, "pthread_create failed");
			
 
				+
			
 
				 	return childid;
			
 
				 }
			
 
				 
			
@@ -207,14 +200,14 @@ static unsigned int group(pthread_t *pth,
 
				 			+ num_fds * sizeof(int));
			
 
				 
			
 
				 	if (!snd_ctx)
			
 
				-		barf("malloc()");
			
 
				+		err(EXIT_FAILURE, "malloc()");
			
 
				 
			
 
				 	for (i = 0; i < num_fds; i++) {
			
 
				 		int fds[2];
			
 
				 		struct receiver_context *ctx = malloc(sizeof(*ctx));
			
 
				 
			
 
				 		if (!ctx)
			
 
				-			barf("malloc()");
			
 
				+			err(EXIT_FAILURE, "malloc()");
			
 
				 
			
 
				 
			
 
				 		/* Create the pipe between client and server */
			
@@ -281,7 +274,7 @@ int bench_sched_messaging(int argc, const char **argv,
 
				 
			
 
				 	pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
			
 
				 	if (!pth_tab)
			
 
				-		barf("main:malloc()");
			
 
				+		err(EXIT_FAILURE, "main:malloc()");
			
 
				 
			
 
				 	fdpair(readyfds);
			
 
				 	fdpair(wakefds);
			
@@ -294,13 +287,13 @@ int bench_sched_messaging(int argc, const char **argv,
 
				 	/* Wait for everyone to be ready */
			
 
				 	for (i = 0; i < total_children; i++)
			
 
				 		if (read(readyfds[0], &dummy, 1) != 1)
			
 
				-			barf("Reading for readyfds");
			
 
				+			err(EXIT_FAILURE, "Reading for readyfds");
			
 
				 
			
 
				 	gettimeofday(&start, NULL);
			
 
				 
			
 
				 	/* Kick them off */
			
 
				 	if (write(wakefds[1], &dummy, 1) != 1)
			
 
				-		barf("Writing to start them");
			
 
				+		err(EXIT_FAILURE, "Writing to start them");
			
 
				 
			
 
				 	/* Reap them all */
			
 
				 	for (i = 0; i < total_children; i++)
			
@@ -332,5 +325,7 @@ int bench_sched_messaging(int argc, const char **argv,
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				+	free(pth_tab);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -104,9 +104,11 @@ static const char *bench_format_str;
 
				 
			
 
				 /* Output/formatting style, exported to benchmark modules: */
			
 
				 int bench_format = BENCH_FORMAT_DEFAULT;
			
 
				+unsigned int bench_repeat = 10; /* default number of times to repeat the run */
			
 
				 
			
 
				 static const struct option bench_options[] = {
			
 
				 	OPT_STRING('f', "format", &bench_format_str, "default", "Specify format style"),
			
 
				+	OPT_UINTEGER('r', "repeat",  &bench_repeat,   "Specify amount of times to repeat the run"),
			
 
				 	OPT_END()
			
 
				 };
			
 
				 
			
@@ -226,6 +228,11 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		goto end;
			
 
				 	}
			
 
				 
			
 
				+	if (bench_repeat == 0) {
			
 
				+		printf("Invalid repeat option: Must specify a positive value\n");
			
 
				+		goto end;
			
 
				+	}
			
 
				+
			
 
				 	if (argc < 1) {
			
 
				 		print_usage();
			
 
				 		goto end;
			
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -125,7 +125,8 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int build_id_cache__add_kcore(const char *filename, const char *debugdir)
			
 
				+static int build_id_cache__add_kcore(const char *filename, const char *debugdir,
			
 
				+				     bool force)
			
 
				 {
			
 
				 	char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1];
			
 
				 	char from_dir[PATH_MAX], to_dir[PATH_MAX];
			
@@ -144,7 +145,8 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir)
 
				 	scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s",
			
 
				 		  debugdir, sbuildid);
			
 
				 
			
 
				-	if (!build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) {
			
 
				+	if (!force &&
			
 
				+	    !build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) {
			
 
				 		pr_debug("same kcore found in %s\n", to_dir);
			
 
				 		return 0;
			
 
				 	}
			
@@ -389,7 +391,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 
				 	}
			
 
				 
			
 
				 	if (kcore_filename &&
			
 
				-	    build_id_cache__add_kcore(kcore_filename, debugdir))
			
 
				+	    build_id_cache__add_kcore(kcore_filename, debugdir, force))
			
 
				 		pr_warning("Couldn't add %s\n", kcore_filename);
			
 
				 
			
 
				 	return ret;
			
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -15,6 +15,7 @@
 
				 #include "util/parse-options.h"
			
 
				 #include "util/session.h"
			
 
				 #include "util/data.h"
			
 
				+#include "util/debug.h"
			
 
				 
			
 
				 static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
			
 
				 {
			
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -11,6 +11,7 @@
 
				 #include "util/parse-options.h"
			
 
				 #include "util/run-command.h"
			
 
				 #include "util/help.h"
			
 
				+#include "util/debug.h"
			
 
				 
			
 
				 static struct man_viewer_list {
			
 
				 	struct man_viewer_list *next;
			
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -389,6 +389,9 @@ static int __cmd_inject(struct perf_inject *inject)
 
				 	ret = perf_session__process_events(session, &inject->tool);
			
 
				 
			
 
				 	if (!file_out->is_pipe) {
			
 
				+		if (inject->build_ids)
			
 
				+			perf_header__set_feat(&session->header,
			
 
				+					      HEADER_BUILD_ID);
			
 
				 		session->header.data_size = inject->bytes_written;
			
 
				 		perf_session__write_header(session, session->evlist, file_out->fd, true);
			
 
				 	}
			
@@ -436,6 +439,8 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 			    "where and how long tasks slept"),
			
 
				 		OPT_INCR('v', "verbose", &verbose,
			
 
				 			 "be more verbose (show build ids, etc)"),
			
 
				+		OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
			
 
				+			   "kallsyms pathname"),
			
 
				 		OPT_END()
			
 
				 	};
			
 
				 	const char * const inject_usage[] = {
			
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -29,114 +29,25 @@
 
				 #include <pthread.h>
			
 
				 #include <math.h>
			
 
				 
			
 
				-#if defined(__i386__) || defined(__x86_64__)
			
 
				-#include <asm/svm.h>
			
 
				-#include <asm/vmx.h>
			
 
				-#include <asm/kvm.h>
			
 
				-
			
 
				-struct event_key {
			
 
				-	#define INVALID_KEY     (~0ULL)
			
 
				-	u64 key;
			
 
				-	int info;
			
 
				-};
			
 
				-
			
 
				-struct kvm_event_stats {
			
 
				-	u64 time;
			
 
				-	struct stats stats;
			
 
				-};
			
 
				-
			
 
				-struct kvm_event {
			
 
				-	struct list_head hash_entry;
			
 
				-	struct rb_node rb;
			
 
				-
			
 
				-	struct event_key key;
			
 
				-
			
 
				-	struct kvm_event_stats total;
			
 
				-
			
 
				-	#define DEFAULT_VCPU_NUM 8
			
 
				-	int max_vcpu;
			
 
				-	struct kvm_event_stats *vcpu;
			
 
				-};
			
 
				-
			
 
				-typedef int (*key_cmp_fun)(struct kvm_event*, struct kvm_event*, int);
			
 
				-
			
 
				-struct kvm_event_key {
			
 
				-	const char *name;
			
 
				-	key_cmp_fun key;
			
 
				-};
			
 
				-
			
 
				-
			
 
				-struct perf_kvm_stat;
			
 
				-
			
 
				-struct kvm_events_ops {
			
 
				-	bool (*is_begin_event)(struct perf_evsel *evsel,
			
 
				-			       struct perf_sample *sample,
			
 
				-			       struct event_key *key);
			
 
				-	bool (*is_end_event)(struct perf_evsel *evsel,
			
 
				-			     struct perf_sample *sample, struct event_key *key);
			
 
				-	void (*decode_key)(struct perf_kvm_stat *kvm, struct event_key *key,
			
 
				-			   char decode[20]);
			
 
				-	const char *name;
			
 
				-};
			
 
				-
			
 
				-struct exit_reasons_table {
			
 
				-	unsigned long exit_code;
			
 
				-	const char *reason;
			
 
				-};
			
 
				+#ifdef HAVE_KVM_STAT_SUPPORT
			
 
				+#include <asm/kvm_perf.h>
			
 
				+#include "util/kvm-stat.h"
			
 
				 
			
 
				-#define EVENTS_BITS		12
			
 
				-#define EVENTS_CACHE_SIZE	(1UL << EVENTS_BITS)
			
 
				-
			
 
				-struct perf_kvm_stat {
			
 
				-	struct perf_tool    tool;
			
 
				-	struct record_opts  opts;
			
 
				-	struct perf_evlist  *evlist;
			
 
				-	struct perf_session *session;
			
 
				-
			
 
				-	const char *file_name;
			
 
				-	const char *report_event;
			
 
				-	const char *sort_key;
			
 
				-	int trace_vcpu;
			
 
				-
			
 
				-	struct exit_reasons_table *exit_reasons;
			
 
				-	int exit_reasons_size;
			
 
				-	const char *exit_reasons_isa;
			
 
				-
			
 
				-	struct kvm_events_ops *events_ops;
			
 
				-	key_cmp_fun compare;
			
 
				-	struct list_head kvm_events_cache[EVENTS_CACHE_SIZE];
			
 
				-
			
 
				-	u64 total_time;
			
 
				-	u64 total_count;
			
 
				-	u64 lost_events;
			
 
				-	u64 duration;
			
 
				-
			
 
				-	const char *pid_str;
			
 
				-	struct intlist *pid_list;
			
 
				-
			
 
				-	struct rb_root result;
			
 
				-
			
 
				-	int timerfd;
			
 
				-	unsigned int display_time;
			
 
				-	bool live;
			
 
				-};
			
 
				-
			
 
				-
			
 
				-static void exit_event_get_key(struct perf_evsel *evsel,
			
 
				-			       struct perf_sample *sample,
			
 
				-			       struct event_key *key)
			
 
				+void exit_event_get_key(struct perf_evsel *evsel,
			
 
				+			struct perf_sample *sample,
			
 
				+			struct event_key *key)
			
 
				 {
			
 
				 	key->info = 0;
			
 
				-	key->key = perf_evsel__intval(evsel, sample, "exit_reason");
			
 
				+	key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON);
			
 
				 }
			
 
				 
			
 
				-static bool kvm_exit_event(struct perf_evsel *evsel)
			
 
				+bool kvm_exit_event(struct perf_evsel *evsel)
			
 
				 {
			
 
				-	return !strcmp(evsel->name, "kvm:kvm_exit");
			
 
				+	return !strcmp(evsel->name, KVM_EXIT_TRACE);
			
 
				 }
			
 
				 
			
 
				-static bool exit_event_begin(struct perf_evsel *evsel,
			
 
				-			     struct perf_sample *sample, struct event_key *key)
			
 
				+bool exit_event_begin(struct perf_evsel *evsel,
			
 
				+		      struct perf_sample *sample, struct event_key *key)
			
 
				 {
			
 
				 	if (kvm_exit_event(evsel)) {
			
 
				 		exit_event_get_key(evsel, sample, key);
			
@@ -146,32 +57,23 @@ static bool exit_event_begin(struct perf_evsel *evsel,
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				-static bool kvm_entry_event(struct perf_evsel *evsel)
			
 
				+bool kvm_entry_event(struct perf_evsel *evsel)
			
 
				 {
			
 
				-	return !strcmp(evsel->name, "kvm:kvm_entry");
			
 
				+	return !strcmp(evsel->name, KVM_ENTRY_TRACE);
			
 
				 }
			
 
				 
			
 
				-static bool exit_event_end(struct perf_evsel *evsel,
			
 
				-			   struct perf_sample *sample __maybe_unused,
			
 
				-			   struct event_key *key __maybe_unused)
			
 
				+bool exit_event_end(struct perf_evsel *evsel,
			
 
				+		    struct perf_sample *sample __maybe_unused,
			
 
				+		    struct event_key *key __maybe_unused)
			
 
				 {
			
 
				 	return kvm_entry_event(evsel);
			
 
				 }
			
 
				 
			
 
				-static struct exit_reasons_table vmx_exit_reasons[] = {
			
 
				-	VMX_EXIT_REASONS
			
 
				-};
			
 
				-
			
 
				-static struct exit_reasons_table svm_exit_reasons[] = {
			
 
				-	SVM_EXIT_REASONS
			
 
				-};
			
 
				-
			
 
				-static const char *get_exit_reason(struct perf_kvm_stat *kvm, u64 exit_code)
			
 
				+static const char *get_exit_reason(struct perf_kvm_stat *kvm,
			
 
				+				   struct exit_reasons_table *tbl,
			
 
				+				   u64 exit_code)
			
 
				 {
			
 
				-	int i = kvm->exit_reasons_size;
			
 
				-	struct exit_reasons_table *tbl = kvm->exit_reasons;
			
 
				-
			
 
				-	while (i--) {
			
 
				+	while (tbl->reason != NULL) {
			
 
				 		if (tbl->exit_code == exit_code)
			
 
				 			return tbl->reason;
			
 
				 		tbl++;
			
@@ -182,148 +84,30 @@ static const char *get_exit_reason(struct perf_kvm_stat *kvm, u64 exit_code)
 
				 	return "UNKNOWN";
			
 
				 }
			
 
				 
			
 
				-static void exit_event_decode_key(struct perf_kvm_stat *kvm,
			
 
				-				  struct event_key *key,
			
 
				-				  char decode[20])
			
 
				+void exit_event_decode_key(struct perf_kvm_stat *kvm,
			
 
				+			   struct event_key *key,
			
 
				+			   char *decode)
			
 
				 {
			
 
				-	const char *exit_reason = get_exit_reason(kvm, key->key);
			
 
				+	const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
			
 
				+						  key->key);
			
 
				 
			
 
				-	scnprintf(decode, 20, "%s", exit_reason);
			
 
				+	scnprintf(decode, DECODE_STR_LEN, "%s", exit_reason);
			
 
				 }
			
 
				 
			
 
				-static struct kvm_events_ops exit_events = {
			
 
				-	.is_begin_event = exit_event_begin,
			
 
				-	.is_end_event = exit_event_end,
			
 
				-	.decode_key = exit_event_decode_key,
			
 
				-	.name = "VM-EXIT"
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * For the mmio events, we treat:
			
 
				- * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
			
 
				- * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...).
			
 
				- */
			
 
				-static void mmio_event_get_key(struct perf_evsel *evsel, struct perf_sample *sample,
			
 
				-			       struct event_key *key)
			
 
				-{
			
 
				-	key->key  = perf_evsel__intval(evsel, sample, "gpa");
			
 
				-	key->info = perf_evsel__intval(evsel, sample, "type");
			
 
				-}
			
 
				-
			
 
				-#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
			
 
				-#define KVM_TRACE_MMIO_READ 1
			
 
				-#define KVM_TRACE_MMIO_WRITE 2
			
 
				-
			
 
				-static bool mmio_event_begin(struct perf_evsel *evsel,
			
 
				-			     struct perf_sample *sample, struct event_key *key)
			
 
				-{
			
 
				-	/* MMIO read begin event in kernel. */
			
 
				-	if (kvm_exit_event(evsel))
			
 
				-		return true;
			
 
				-
			
 
				-	/* MMIO write begin event in kernel. */
			
 
				-	if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
			
 
				-	    perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
			
 
				-		mmio_event_get_key(evsel, sample, key);
			
 
				-		return true;
			
 
				-	}
			
 
				-
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				-static bool mmio_event_end(struct perf_evsel *evsel, struct perf_sample *sample,
			
 
				-			   struct event_key *key)
			
 
				-{
			
 
				-	/* MMIO write end event in kernel. */
			
 
				-	if (kvm_entry_event(evsel))
			
 
				-		return true;
			
 
				-
			
 
				-	/* MMIO read end event in kernel.*/
			
 
				-	if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
			
 
				-	    perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
			
 
				-		mmio_event_get_key(evsel, sample, key);
			
 
				-		return true;
			
 
				-	}
			
 
				-
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				-static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
			
 
				-				  struct event_key *key,
			
 
				-				  char decode[20])
			
 
				-{
			
 
				-	scnprintf(decode, 20, "%#lx:%s", (unsigned long)key->key,
			
 
				-				key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
			
 
				-}
			
 
				-
			
 
				-static struct kvm_events_ops mmio_events = {
			
 
				-	.is_begin_event = mmio_event_begin,
			
 
				-	.is_end_event = mmio_event_end,
			
 
				-	.decode_key = mmio_event_decode_key,
			
 
				-	.name = "MMIO Access"
			
 
				-};
			
 
				-
			
 
				- /* The time of emulation pio access is from kvm_pio to kvm_entry. */
			
 
				-static void ioport_event_get_key(struct perf_evsel *evsel,
			
 
				-				 struct perf_sample *sample,
			
 
				-				 struct event_key *key)
			
 
				+static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
			
 
				 {
			
 
				-	key->key  = perf_evsel__intval(evsel, sample, "port");
			
 
				-	key->info = perf_evsel__intval(evsel, sample, "rw");
			
 
				-}
			
 
				+	struct kvm_reg_events_ops *events_ops = kvm_reg_events_ops;
			
 
				 
			
 
				-static bool ioport_event_begin(struct perf_evsel *evsel,
			
 
				-			       struct perf_sample *sample,
			
 
				-			       struct event_key *key)
			
 
				-{
			
 
				-	if (!strcmp(evsel->name, "kvm:kvm_pio")) {
			
 
				-		ioport_event_get_key(evsel, sample, key);
			
 
				-		return true;
			
 
				+	for (events_ops = kvm_reg_events_ops; events_ops->name; events_ops++) {
			
 
				+		if (!strcmp(events_ops->name, kvm->report_event)) {
			
 
				+			kvm->events_ops = events_ops->ops;
			
 
				+			return true;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				-static bool ioport_event_end(struct perf_evsel *evsel,
			
 
				-			     struct perf_sample *sample __maybe_unused,
			
 
				-			     struct event_key *key __maybe_unused)
			
 
				-{
			
 
				-	return kvm_entry_event(evsel);
			
 
				-}
			
 
				-
			
 
				-static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
			
 
				-				    struct event_key *key,
			
 
				-				    char decode[20])
			
 
				-{
			
 
				-	scnprintf(decode, 20, "%#llx:%s", (unsigned long long)key->key,
			
 
				-				key->info ? "POUT" : "PIN");
			
 
				-}
			
 
				-
			
 
				-static struct kvm_events_ops ioport_events = {
			
 
				-	.is_begin_event = ioport_event_begin,
			
 
				-	.is_end_event = ioport_event_end,
			
 
				-	.decode_key = ioport_event_decode_key,
			
 
				-	.name = "IO Port Access"
			
 
				-};
			
 
				-
			
 
				-static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
			
 
				-{
			
 
				-	bool ret = true;
			
 
				-
			
 
				-	if (!strcmp(kvm->report_event, "vmexit"))
			
 
				-		kvm->events_ops = &exit_events;
			
 
				-	else if (!strcmp(kvm->report_event, "mmio"))
			
 
				-		kvm->events_ops = &mmio_events;
			
 
				-	else if (!strcmp(kvm->report_event, "ioport"))
			
 
				-		kvm->events_ops = &ioport_events;
			
 
				-	else {
			
 
				-		pr_err("Unknown report event:%s\n", kvm->report_event);
			
 
				-		ret = false;
			
 
				-	}
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				 struct vcpu_event_record {
			
 
				 	int vcpu_id;
			
 
				 	u64 start_time;
			
@@ -477,6 +261,54 @@ static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				+static bool is_child_event(struct perf_kvm_stat *kvm,
			
 
				+			   struct perf_evsel *evsel,
			
 
				+			   struct perf_sample *sample,
			
 
				+			   struct event_key *key)
			
 
				+{
			
 
				+	struct child_event_ops *child_ops;
			
 
				+
			
 
				+	child_ops = kvm->events_ops->child_ops;
			
 
				+
			
 
				+	if (!child_ops)
			
 
				+		return false;
			
 
				+
			
 
				+	for (; child_ops->name; child_ops++) {
			
 
				+		if (!strcmp(evsel->name, child_ops->name)) {
			
 
				+			child_ops->get_key(evsel, sample, key);
			
 
				+			return true;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+static bool handle_child_event(struct perf_kvm_stat *kvm,
			
 
				+			       struct vcpu_event_record *vcpu_record,
			
 
				+			       struct event_key *key,
			
 
				+			       struct perf_sample *sample __maybe_unused)
			
 
				+{
			
 
				+	struct kvm_event *event = NULL;
			
 
				+
			
 
				+	if (key->key != INVALID_KEY)
			
 
				+		event = find_create_kvm_event(kvm, key);
			
 
				+
			
 
				+	vcpu_record->last_event = event;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static bool skip_event(const char *event)
			
 
				+{
			
 
				+	const char * const *skip_events;
			
 
				+
			
 
				+	for (skip_events = kvm_skip_events; *skip_events; skip_events++)
			
 
				+		if (!strcmp(event, *skip_events))
			
 
				+			return true;
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				 static bool handle_end_event(struct perf_kvm_stat *kvm,
			
 
				 			     struct vcpu_event_record *vcpu_record,
			
 
				 			     struct event_key *key,
			
@@ -525,10 +357,10 @@ static bool handle_end_event(struct perf_kvm_stat *kvm,
 
				 	time_diff = sample->time - time_begin;
			
 
				 
			
 
				 	if (kvm->duration && time_diff > kvm->duration) {
			
 
				-		char decode[32];
			
 
				+		char decode[DECODE_STR_LEN];
			
 
				 
			
 
				 		kvm->events_ops->decode_key(kvm, &event->key, decode);
			
 
				-		if (strcmp(decode, "HLT")) {
			
 
				+		if (!skip_event(decode)) {
			
 
				 			pr_info("%" PRIu64 " VM %d, vcpu %d: %s event took %" PRIu64 "usec\n",
			
 
				 				 sample->time, sample->pid, vcpu_record->vcpu_id,
			
 
				 				 decode, time_diff/1000);
			
@@ -553,7 +385,7 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread,
 
				 			return NULL;
			
 
				 		}
			
 
				 
			
 
				-		vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, "vcpu_id");
			
 
				+		vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, VCPU_ID);
			
 
				 		thread->priv = vcpu_record;
			
 
				 	}
			
 
				 
			
@@ -566,7 +398,8 @@ static bool handle_kvm_event(struct perf_kvm_stat *kvm,
 
				 			     struct perf_sample *sample)
			
 
				 {
			
 
				 	struct vcpu_event_record *vcpu_record;
			
 
				-	struct event_key key = {.key = INVALID_KEY};
			
 
				+	struct event_key key = { .key = INVALID_KEY,
			
 
				+				 .exit_reasons = kvm->exit_reasons };
			
 
				 
			
 
				 	vcpu_record = per_vcpu_record(thread, evsel, sample);
			
 
				 	if (!vcpu_record)
			
@@ -580,6 +413,9 @@ static bool handle_kvm_event(struct perf_kvm_stat *kvm,
 
				 	if (kvm->events_ops->is_begin_event(evsel, sample, &key))
			
 
				 		return handle_begin_event(kvm, vcpu_record, &key, sample->time);
			
 
				 
			
 
				+	if (is_child_event(kvm, evsel, sample, &key))
			
 
				+		return handle_child_event(kvm, vcpu_record, &key, sample);
			
 
				+
			
 
				 	if (kvm->events_ops->is_end_event(evsel, sample, &key))
			
 
				 		return handle_end_event(kvm, vcpu_record, &key, sample);
			
 
				 
			
@@ -740,7 +576,7 @@ static void show_timeofday(void)
 
				 
			
 
				 static void print_result(struct perf_kvm_stat *kvm)
			
 
				 {
			
 
				-	char decode[20];
			
 
				+	char decode[DECODE_STR_LEN];
			
 
				 	struct kvm_event *event;
			
 
				 	int vcpu = kvm->trace_vcpu;
			
 
				 
			
@@ -751,7 +587,7 @@ static void print_result(struct perf_kvm_stat *kvm)
 
				 
			
 
				 	pr_info("\n\n");
			
 
				 	print_vcpu_info(kvm);
			
 
				-	pr_info("%20s ", kvm->events_ops->name);
			
 
				+	pr_info("%*s ", DECODE_STR_LEN, kvm->events_ops->name);
			
 
				 	pr_info("%10s ", "Samples");
			
 
				 	pr_info("%9s ", "Samples%");
			
 
				 
			
@@ -770,7 +606,7 @@ static void print_result(struct perf_kvm_stat *kvm)
 
				 		min = get_event_min(event, vcpu);
			
 
				 
			
 
				 		kvm->events_ops->decode_key(kvm, &event->key, decode);
			
 
				-		pr_info("%20s ", decode);
			
 
				+		pr_info("%*s ", DECODE_STR_LEN, decode);
			
 
				 		pr_info("%10llu ", (unsigned long long)ecount);
			
 
				 		pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
			
 
				 		pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
			
@@ -839,34 +675,28 @@ static int process_sample_event(struct perf_tool *tool,
 
				 static int cpu_isa_config(struct perf_kvm_stat *kvm)
			
 
				 {
			
 
				 	char buf[64], *cpuid;
			
 
				-	int err, isa;
			
 
				+	int err;
			
 
				 
			
 
				 	if (kvm->live) {
			
 
				 		err = get_cpuid(buf, sizeof(buf));
			
 
				 		if (err != 0) {
			
 
				-			pr_err("Failed to look up CPU type (Intel or AMD)\n");
			
 
				+			pr_err("Failed to look up CPU type\n");
			
 
				 			return err;
			
 
				 		}
			
 
				 		cpuid = buf;
			
 
				 	} else
			
 
				 		cpuid = kvm->session->header.env.cpuid;
			
 
				 
			
 
				-	if (strstr(cpuid, "Intel"))
			
 
				-		isa = 1;
			
 
				-	else if (strstr(cpuid, "AMD"))
			
 
				-		isa = 0;
			
 
				-	else {
			
 
				-		pr_err("CPU %s is not supported.\n", cpuid);
			
 
				-		return -ENOTSUP;
			
 
				+	if (!cpuid) {
			
 
				+		pr_err("Failed to look up CPU type\n");
			
 
				+		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				-	if (isa == 1) {
			
 
				-		kvm->exit_reasons = vmx_exit_reasons;
			
 
				-		kvm->exit_reasons_size = ARRAY_SIZE(vmx_exit_reasons);
			
 
				-		kvm->exit_reasons_isa = "VMX";
			
 
				-	}
			
 
				+	err = cpu_isa_init(kvm, cpuid);
			
 
				+	if (err == -ENOTSUP)
			
 
				+		pr_err("CPU %s is not supported.\n", cpuid);
			
 
				 
			
 
				-	return 0;
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				 static bool verify_vcpu(int vcpu)
			
@@ -1300,13 +1130,6 @@ exit:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static const char * const kvm_events_tp[] = {
			
 
				-	"kvm:kvm_entry",
			
 
				-	"kvm:kvm_exit",
			
 
				-	"kvm:kvm_mmio",
			
 
				-	"kvm:kvm_pio",
			
 
				-};
			
 
				-
			
 
				 #define STRDUP_FAIL_EXIT(s)		\
			
 
				 	({	char *_p;		\
			
 
				 	_p = strdup(s);		\
			
@@ -1318,7 +1141,7 @@ static const char * const kvm_events_tp[] = {
 
				 static int
			
 
				 kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
			
 
				 {
			
 
				-	unsigned int rec_argc, i, j;
			
 
				+	unsigned int rec_argc, i, j, events_tp_size;
			
 
				 	const char **rec_argv;
			
 
				 	const char * const record_args[] = {
			
 
				 		"record",
			
@@ -1326,9 +1149,14 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
 
				 		"-m", "1024",
			
 
				 		"-c", "1",
			
 
				 	};
			
 
				+	const char * const *events_tp;
			
 
				+	events_tp_size = 0;
			
 
				+
			
 
				+	for (events_tp = kvm_events_tp; *events_tp; events_tp++)
			
 
				+		events_tp_size++;
			
 
				 
			
 
				 	rec_argc = ARRAY_SIZE(record_args) + argc + 2 +
			
 
				-		   2 * ARRAY_SIZE(kvm_events_tp);
			
 
				+		   2 * events_tp_size;
			
 
				 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
			
 
				 
			
 
				 	if (rec_argv == NULL)
			
@@ -1337,7 +1165,7 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
 
				 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
			
 
				 		rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
			
 
				 
			
 
				-	for (j = 0; j < ARRAY_SIZE(kvm_events_tp); j++) {
			
 
				+	for (j = 0; j < events_tp_size; j++) {
			
 
				 		rec_argv[i++] = "-e";
			
 
				 		rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]);
			
 
				 	}
			
@@ -1356,7 +1184,8 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
 
				 {
			
 
				 	const struct option kvm_events_report_options[] = {
			
 
				 		OPT_STRING(0, "event", &kvm->report_event, "report event",
			
 
				-			    "event for reporting: vmexit, mmio, ioport"),
			
 
				+			   "event for reporting: vmexit, "
			
 
				+			   "mmio (x86 only), ioport (x86 only)"),
			
 
				 		OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
			
 
				 			    "vcpu id to report"),
			
 
				 		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
			
@@ -1391,16 +1220,16 @@ static struct perf_evlist *kvm_live_event_list(void)
 
				 {
			
 
				 	struct perf_evlist *evlist;
			
 
				 	char *tp, *name, *sys;
			
 
				-	unsigned int j;
			
 
				 	int err = -1;
			
 
				+	const char * const *events_tp;
			
 
				 
			
 
				 	evlist = perf_evlist__new();
			
 
				 	if (evlist == NULL)
			
 
				 		return NULL;
			
 
				 
			
 
				-	for (j = 0; j < ARRAY_SIZE(kvm_events_tp); j++) {
			
 
				+	for (events_tp = kvm_events_tp; *events_tp; events_tp++) {
			
 
				 
			
 
				-		tp = strdup(kvm_events_tp[j]);
			
 
				+		tp = strdup(*events_tp);
			
 
				 		if (tp == NULL)
			
 
				 			goto out;
			
 
				 
			
@@ -1409,7 +1238,7 @@ static struct perf_evlist *kvm_live_event_list(void)
 
				 		name = strchr(tp, ':');
			
 
				 		if (name == NULL) {
			
 
				 			pr_err("Error parsing %s tracepoint: subsystem delimiter not found\n",
			
 
				-				kvm_events_tp[j]);
			
 
				+			       *events_tp);
			
 
				 			free(tp);
			
 
				 			goto out;
			
 
				 		}
			
@@ -1417,7 +1246,7 @@ static struct perf_evlist *kvm_live_event_list(void)
 
				 		name++;
			
 
				 
			
 
				 		if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
			
 
				-			pr_err("Failed to add %s tracepoint to the list\n", kvm_events_tp[j]);
			
 
				+			pr_err("Failed to add %s tracepoint to the list\n", *events_tp);
			
 
				 			free(tp);
			
 
				 			goto out;
			
 
				 		}
			
@@ -1462,7 +1291,9 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 
				 			"key for sorting: sample(sort by samples number)"
			
 
				 			" time (sort by avg time)"),
			
 
				 		OPT_U64(0, "duration", &kvm->duration,
			
 
				-		    "show events other than HALT that take longer than duration usecs"),
			
 
				+			"show events other than"
			
 
				+			" HLT (x86 only) or Wait state (s390 only)"
			
 
				+			" that take longer than duration usecs"),
			
 
				 		OPT_END()
			
 
				 	};
			
 
				 	const char * const live_usage[] = {
			
@@ -1585,9 +1416,6 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
 
				 		.report_event	= "vmexit",
			
 
				 		.sort_key	= "sample",
			
 
				 
			
 
				-		.exit_reasons = svm_exit_reasons,
			
 
				-		.exit_reasons_size = ARRAY_SIZE(svm_exit_reasons),
			
 
				-		.exit_reasons_isa = "SVM",
			
 
				 	};
			
 
				 
			
 
				 	if (argc == 1) {
			
@@ -1609,7 +1437,7 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
 
				 perf_stat:
			
 
				 	return cmd_stat(argc, argv, NULL);
			
 
				 }
			
 
				-#endif
			
 
				+#endif /* HAVE_KVM_STAT_SUPPORT */
			
 
				 
			
 
				 static int __cmd_record(const char *file_name, int argc, const char **argv)
			
 
				 {
			
@@ -1726,7 +1554,7 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		return cmd_top(argc, argv, NULL);
			
 
				 	else if (!strncmp(argv[0], "buildid-list", 12))
			
 
				 		return __cmd_buildid_list(file_name, argc, argv);
			
 
				-#if defined(__i386__) || defined(__x86_64__)
			
 
				+#ifdef HAVE_KVM_STAT_SUPPORT
			
 
				 	else if (!strncmp(argv[0], "stat", 4))
			
 
				 		return kvm_cmd_stat(file_name, argc, argv);
			
 
				 #endif
			
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -238,6 +238,7 @@ static struct perf_event_header finished_round_event = {
 
				 
			
 
				 static int record__mmap_read_all(struct record *rec)
			
 
				 {
			
 
				+	u64 bytes_written = rec->bytes_written;
			
 
				 	int i;
			
 
				 	int rc = 0;
			
 
				 
			
@@ -250,7 +251,11 @@ static int record__mmap_read_all(struct record *rec)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
			
 
				+	/*
			
 
				+	 * Mark the round finished in case we wrote
			
 
				+	 * at least one event.
			
 
				+	 */
			
 
				+	if (bytes_written != rec->bytes_written)
			
 
				 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
			
 
				 
			
 
				 out:
			
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -10,6 +10,7 @@
 
				 #include "util/header.h"
			
 
				 #include "util/session.h"
			
 
				 #include "util/tool.h"
			
 
				+#include "util/cloexec.h"
			
 
				 
			
 
				 #include "util/parse-options.h"
			
 
				 #include "util/trace-event.h"
			
@@ -434,7 +435,8 @@ static int self_open_counters(void)
 
				 	attr.type = PERF_TYPE_SOFTWARE;
			
 
				 	attr.config = PERF_COUNT_SW_TASK_CLOCK;
			
 
				 
			
 
				-	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
			
 
				+	fd = sys_perf_event_open(&attr, 0, -1, -1,
			
 
				+				 perf_event_open_cloexec_flag());
			
 
				 
			
 
				 	if (fd < 0)
			
 
				 		pr_err("Error: sys_perf_event_open() syscall returned "
			
@@ -935,8 +937,8 @@ static int latency_switch_event(struct perf_sched *sched,
 
				 		return -1;
			
 
				 	}
			
 
				 
			
 
				-	sched_out = machine__findnew_thread(machine, 0, prev_pid);
			
 
				-	sched_in = machine__findnew_thread(machine, 0, next_pid);
			
 
				+	sched_out = machine__findnew_thread(machine, -1, prev_pid);
			
 
				+	sched_in = machine__findnew_thread(machine, -1, next_pid);
			
 
				 
			
 
				 	out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
			
 
				 	if (!out_events) {
			
@@ -979,7 +981,7 @@ static int latency_runtime_event(struct perf_sched *sched,
 
				 {
			
 
				 	const u32 pid	   = perf_evsel__intval(evsel, sample, "pid");
			
 
				 	const u64 runtime  = perf_evsel__intval(evsel, sample, "runtime");
			
 
				-	struct thread *thread = machine__findnew_thread(machine, 0, pid);
			
 
				+	struct thread *thread = machine__findnew_thread(machine, -1, pid);
			
 
				 	struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
			
 
				 	u64 timestamp = sample->time;
			
 
				 	int cpu = sample->cpu;
			
@@ -1012,7 +1014,7 @@ static int latency_wakeup_event(struct perf_sched *sched,
 
				 	struct thread *wakee;
			
 
				 	u64 timestamp = sample->time;
			
 
				 
			
 
				-	wakee = machine__findnew_thread(machine, 0, pid);
			
 
				+	wakee = machine__findnew_thread(machine, -1, pid);
			
 
				 	atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
			
 
				 	if (!atoms) {
			
 
				 		if (thread_atoms_insert(sched, wakee))
			
@@ -1072,7 +1074,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
 
				 	if (sched->profile_cpu == -1)
			
 
				 		return 0;
			
 
				 
			
 
				-	migrant = machine__findnew_thread(machine, 0, pid);
			
 
				+	migrant = machine__findnew_thread(machine, -1, pid);
			
 
				 	atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
			
 
				 	if (!atoms) {
			
 
				 		if (thread_atoms_insert(sched, migrant))
			
@@ -1290,7 +1292,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 
				 		return -1;
			
 
				 	}
			
 
				 
			
 
				-	sched_in = machine__findnew_thread(machine, 0, next_pid);
			
 
				+	sched_in = machine__findnew_thread(machine, -1, next_pid);
			
 
				 
			
 
				 	sched->curr_thread[this_cpu] = sched_in;
			
 
				 
			
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -358,27 +358,6 @@ static void print_sample_start(struct perf_sample *sample,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static bool is_bts_event(struct perf_event_attr *attr)
			
 
				-{
			
 
				-	return ((attr->type == PERF_TYPE_HARDWARE) &&
			
 
				-		(attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
			
 
				-		(attr->sample_period == 1));
			
 
				-}
			
 
				-
			
 
				-static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
			
 
				-{
			
 
				-	if ((attr->type == PERF_TYPE_SOFTWARE) &&
			
 
				-	    ((attr->config == PERF_COUNT_SW_PAGE_FAULTS) ||
			
 
				-	     (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MIN) ||
			
 
				-	     (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)))
			
 
				-		return true;
			
 
				-
			
 
				-	if (is_bts_event(attr))
			
 
				-		return true;
			
 
				-
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				 static void print_sample_addr(union perf_event *event,
			
 
				 			  struct perf_sample *sample,
			
 
				 			  struct machine *machine,
			
@@ -386,24 +365,13 @@ static void print_sample_addr(union perf_event *event,
 
				 			  struct perf_event_attr *attr)
			
 
				 {
			
 
				 	struct addr_location al;
			
 
				-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
			
 
				 
			
 
				 	printf("%16" PRIx64, sample->addr);
			
 
				 
			
 
				 	if (!sample_addr_correlates_sym(attr))
			
 
				 		return;
			
 
				 
			
 
				-	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
			
 
				-			      sample->addr, &al);
			
 
				-	if (!al.map)
			
 
				-		thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE,
			
 
				-				      sample->addr, &al);
			
 
				-
			
 
				-	al.cpu = sample->cpu;
			
 
				-	al.sym = NULL;
			
 
				-
			
 
				-	if (al.map)
			
 
				-		al.sym = map__find_symbol(al.map, al.addr, NULL);
			
 
				+	perf_event__preprocess_sample_addr(event, sample, machine, thread, &al);
			
 
				 
			
 
				 	if (PRINT_FIELD(SYM)) {
			
 
				 		printf(" ");
			
@@ -427,25 +395,35 @@ static void print_sample_bts(union perf_event *event,
 
				 			     struct addr_location *al)
			
 
				 {
			
 
				 	struct perf_event_attr *attr = &evsel->attr;
			
 
				+	bool print_srcline_last = false;
			
 
				 
			
 
				 	/* print branch_from information */
			
 
				 	if (PRINT_FIELD(IP)) {
			
 
				-		if (!symbol_conf.use_callchain)
			
 
				-			printf(" ");
			
 
				-		else
			
 
				+		unsigned int print_opts = output[attr->type].print_ip_opts;
			
 
				+
			
 
				+		if (symbol_conf.use_callchain && sample->callchain) {
			
 
				 			printf("\n");
			
 
				-		perf_evsel__print_ip(evsel, sample, al,
			
 
				-				     output[attr->type].print_ip_opts,
			
 
				+		} else {
			
 
				+			printf(" ");
			
 
				+			if (print_opts & PRINT_IP_OPT_SRCLINE) {
			
 
				+				print_srcline_last = true;
			
 
				+				print_opts &= ~PRINT_IP_OPT_SRCLINE;
			
 
				+			}
			
 
				+		}
			
 
				+		perf_evsel__print_ip(evsel, sample, al, print_opts,
			
 
				 				     PERF_MAX_STACK_DEPTH);
			
 
				 	}
			
 
				 
			
 
				-	printf(" => ");
			
 
				-
			
 
				 	/* print branch_to information */
			
 
				 	if (PRINT_FIELD(ADDR) ||
			
 
				 	    ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
			
 
				-	     !output[attr->type].user_set))
			
 
				+	     !output[attr->type].user_set)) {
			
 
				+		printf(" => ");
			
 
				 		print_sample_addr(event, sample, al->machine, thread, attr);
			
 
				+	}
			
 
				+
			
 
				+	if (print_srcline_last)
			
 
				+		map__fprintf_srcline(al->map, al->addr, "\n  ", stdout);
			
 
				 
			
 
				 	printf("\n");
			
 
				 }
			
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -184,7 +184,7 @@ static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 
				 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
			
 
				 {
			
 
				 	evsel->priv = zalloc(sizeof(struct perf_stat));
			
 
				-	if (evsel == NULL)
			
 
				+	if (evsel->priv == NULL)
			
 
				 		return -ENOMEM;
			
 
				 	perf_evsel__reset_stat_priv(evsel);
			
 
				 	return 0;
			
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -37,6 +37,7 @@
 
				 #include "util/svghelper.h"
			
 
				 #include "util/tool.h"
			
 
				 #include "util/data.h"
			
 
				+#include "util/debug.h"
			
 
				 
			
 
				 #define SUPPORT_OLD_POWER_EVENTS 1
			
 
				 #define PWR_EVENT_EXIT -1
			
@@ -60,10 +61,17 @@ struct timechart {
 
				 				tasks_only,
			
 
				 				with_backtrace,
			
 
				 				topology;
			
 
				+	/* IO related settings */
			
 
				+	u64			io_events;
			
 
				+	bool			io_only,
			
 
				+				skip_eagain;
			
 
				+	u64			min_time,
			
 
				+				merge_dist;
			
 
				 };
			
 
				 
			
 
				 struct per_pidcomm;
			
 
				 struct cpu_sample;
			
 
				+struct io_sample;
			
 
				 
			
 
				 /*
			
 
				  * Datastructure layout:
			
@@ -84,6 +92,7 @@ struct per_pid {
 
				 	u64		start_time;
			
 
				 	u64		end_time;
			
 
				 	u64		total_time;
			
 
				+	u64		total_bytes;
			
 
				 	int		display;
			
 
				 
			
 
				 	struct per_pidcomm *all;
			
@@ -97,6 +106,8 @@ struct per_pidcomm {
 
				 	u64		start_time;
			
 
				 	u64		end_time;
			
 
				 	u64		total_time;
			
 
				+	u64		max_bytes;
			
 
				+	u64		total_bytes;
			
 
				 
			
 
				 	int		Y;
			
 
				 	int		display;
			
@@ -107,6 +118,7 @@ struct per_pidcomm {
 
				 	char		*comm;
			
 
				 
			
 
				 	struct cpu_sample *samples;
			
 
				+	struct io_sample  *io_samples;
			
 
				 };
			
 
				 
			
 
				 struct sample_wrapper {
			
@@ -131,6 +143,27 @@ struct cpu_sample {
 
				 	const char *backtrace;
			
 
				 };
			
 
				 
			
 
				+enum {
			
 
				+	IOTYPE_READ,
			
 
				+	IOTYPE_WRITE,
			
 
				+	IOTYPE_SYNC,
			
 
				+	IOTYPE_TX,
			
 
				+	IOTYPE_RX,
			
 
				+	IOTYPE_POLL,
			
 
				+};
			
 
				+
			
 
				+struct io_sample {
			
 
				+	struct io_sample *next;
			
 
				+
			
 
				+	u64 start_time;
			
 
				+	u64 end_time;
			
 
				+	u64 bytes;
			
 
				+	int type;
			
 
				+	int fd;
			
 
				+	int err;
			
 
				+	int merges;
			
 
				+};
			
 
				+
			
 
				 #define CSTATE 1
			
 
				 #define PSTATE 2
			
 
				 
			
@@ -213,7 +246,7 @@ static void pid_fork(struct timechart *tchart, int pid, int ppid, u64 timestamp)
 
				 		pid_set_comm(tchart, pid, pp->current->comm);
			
 
				 
			
 
				 	p->start_time = timestamp;
			
 
				-	if (p->current) {
			
 
				+	if (p->current && !p->current->start_time) {
			
 
				 		p->current->start_time = timestamp;
			
 
				 		p->current->state_since = timestamp;
			
 
				 	}
			
@@ -682,6 +715,249 @@ static void end_sample_processing(struct timechart *tchart)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static int pid_begin_io_sample(struct timechart *tchart, int pid, int type,
			
 
				+			       u64 start, int fd)
			
 
				+{
			
 
				+	struct per_pid *p = find_create_pid(tchart, pid);
			
 
				+	struct per_pidcomm *c = p->current;
			
 
				+	struct io_sample *sample;
			
 
				+	struct io_sample *prev;
			
 
				+
			
 
				+	if (!c) {
			
 
				+		c = zalloc(sizeof(*c));
			
 
				+		if (!c)
			
 
				+			return -ENOMEM;
			
 
				+		p->current = c;
			
 
				+		c->next = p->all;
			
 
				+		p->all = c;
			
 
				+	}
			
 
				+
			
 
				+	prev = c->io_samples;
			
 
				+
			
 
				+	if (prev && prev->start_time && !prev->end_time) {
			
 
				+		pr_warning("Skip invalid start event: "
			
 
				+			   "previous event already started!\n");
			
 
				+
			
 
				+		/* remove previous event that has been started,
			
 
				+		 * we are not sure we will ever get an end for it */
			
 
				+		c->io_samples = prev->next;
			
 
				+		free(prev);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	sample = zalloc(sizeof(*sample));
			
 
				+	if (!sample)
			
 
				+		return -ENOMEM;
			
 
				+	sample->start_time = start;
			
 
				+	sample->type = type;
			
 
				+	sample->fd = fd;
			
 
				+	sample->next = c->io_samples;
			
 
				+	c->io_samples = sample;
			
 
				+
			
 
				+	if (c->start_time == 0 || c->start_time > start)
			
 
				+		c->start_time = start;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int pid_end_io_sample(struct timechart *tchart, int pid, int type,
			
 
				+			     u64 end, long ret)
			
 
				+{
			
 
				+	struct per_pid *p = find_create_pid(tchart, pid);
			
 
				+	struct per_pidcomm *c = p->current;
			
 
				+	struct io_sample *sample, *prev;
			
 
				+
			
 
				+	if (!c) {
			
 
				+		pr_warning("Invalid pidcomm!\n");
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	sample = c->io_samples;
			
 
				+
			
 
				+	if (!sample) /* skip partially captured events */
			
 
				+		return 0;
			
 
				+
			
 
				+	if (sample->end_time) {
			
 
				+		pr_warning("Skip invalid end event: "
			
 
				+			   "previous event already ended!\n");
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (sample->type != type) {
			
 
				+		pr_warning("Skip invalid end event: invalid event type!\n");
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	sample->end_time = end;
			
 
				+	prev = sample->next;
			
 
				+
			
 
				+	/* we want to be able to see small and fast transfers, so make them
			
 
				+	 * at least min_time long, but don't overlap them */
			
 
				+	if (sample->end_time - sample->start_time < tchart->min_time)
			
 
				+		sample->end_time = sample->start_time + tchart->min_time;
			
 
				+	if (prev && sample->start_time < prev->end_time) {
			
 
				+		if (prev->err) /* try to make errors more visible */
			
 
				+			sample->start_time = prev->end_time;
			
 
				+		else
			
 
				+			prev->end_time = sample->start_time;
			
 
				+	}
			
 
				+
			
 
				+	if (ret < 0) {
			
 
				+		sample->err = ret;
			
 
				+	} else if (type == IOTYPE_READ || type == IOTYPE_WRITE ||
			
 
				+		   type == IOTYPE_TX || type == IOTYPE_RX) {
			
 
				+
			
 
				+		if ((u64)ret > c->max_bytes)
			
 
				+			c->max_bytes = ret;
			
 
				+
			
 
				+		c->total_bytes += ret;
			
 
				+		p->total_bytes += ret;
			
 
				+		sample->bytes = ret;
			
 
				+	}
			
 
				+
			
 
				+	/* merge two requests to make svg smaller and render-friendly */
			
 
				+	if (prev &&
			
 
				+	    prev->type == sample->type &&
			
 
				+	    prev->err == sample->err &&
			
 
				+	    prev->fd == sample->fd &&
			
 
				+	    prev->end_time + tchart->merge_dist >= sample->start_time) {
			
 
				+
			
 
				+		sample->bytes += prev->bytes;
			
 
				+		sample->merges += prev->merges + 1;
			
 
				+
			
 
				+		sample->start_time = prev->start_time;
			
 
				+		sample->next = prev->next;
			
 
				+		free(prev);
			
 
				+
			
 
				+		if (!sample->err && sample->bytes > c->max_bytes)
			
 
				+			c->max_bytes = sample->bytes;
			
 
				+	}
			
 
				+
			
 
				+	tchart->io_events++;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+process_enter_read(struct timechart *tchart,
			
 
				+		   struct perf_evsel *evsel,
			
 
				+		   struct perf_sample *sample)
			
 
				+{
			
 
				+	long fd = perf_evsel__intval(evsel, sample, "fd");
			
 
				+	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_READ,
			
 
				+				   sample->time, fd);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+process_exit_read(struct timechart *tchart,
			
 
				+		  struct perf_evsel *evsel,
			
 
				+		  struct perf_sample *sample)
			
 
				+{
			
 
				+	long ret = perf_evsel__intval(evsel, sample, "ret");
			
 
				+	return pid_end_io_sample(tchart, sample->tid, IOTYPE_READ,
			
 
				+				 sample->time, ret);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+process_enter_write(struct timechart *tchart,
			
 
				+		    struct perf_evsel *evsel,
			
 
				+		    struct perf_sample *sample)
			
 
				+{
			
 
				+	long fd = perf_evsel__intval(evsel, sample, "fd");
			
 
				+	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_WRITE,
			
 
				+				   sample->time, fd);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+process_exit_write(struct timechart *tchart,
			
 
				+		   struct perf_evsel *evsel,
			
 
				+		   struct perf_sample *sample)
			
 
				+{
			
 
				+	long ret = perf_evsel__intval(evsel, sample, "ret");
			
 
				+	return pid_end_io_sample(tchart, sample->tid, IOTYPE_WRITE,
			
 
				+				 sample->time, ret);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+process_enter_sync(struct timechart *tchart,
			
 
				+		   struct perf_evsel *evsel,
			
 
				+		   struct perf_sample *sample)
			
 
				+{
			
 
				+	long fd = perf_evsel__intval(evsel, sample, "fd");
			
 
				+	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_SYNC,
			
 
				+				   sample->time, fd);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+process_exit_sync(struct timechart *tchart,
			
 
				+		  struct perf_evsel *evsel,
			
 
				+		  struct perf_sample *sample)
			
 
				+{
			
 
				+	long ret = perf_evsel__intval(evsel, sample, "ret");
			
 
				+	return pid_end_io_sample(tchart, sample->tid, IOTYPE_SYNC,
			
 
				+				 sample->time, ret);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+process_enter_tx(struct timechart *tchart,
			
 
				+		 struct perf_evsel *evsel,
			
 
				+		 struct perf_sample *sample)
			
 
				+{
			
 
				+	long fd = perf_evsel__intval(evsel, sample, "fd");
			
 
				+	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_TX,
			
 
				+				   sample->time, fd);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+process_exit_tx(struct timechart *tchart,
			
 
				+		struct perf_evsel *evsel,
			
 
				+		struct perf_sample *sample)
			
 
				+{
			
 
				+	long ret = perf_evsel__intval(evsel, sample, "ret");
			
 
				+	return pid_end_io_sample(tchart, sample->tid, IOTYPE_TX,
			
 
				+				 sample->time, ret);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+process_enter_rx(struct timechart *tchart,
			
 
				+		 struct perf_evsel *evsel,
			
 
				+		 struct perf_sample *sample)
			
 
				+{
			
 
				+	long fd = perf_evsel__intval(evsel, sample, "fd");
			
 
				+	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_RX,
			
 
				+				   sample->time, fd);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+process_exit_rx(struct timechart *tchart,
			
 
				+		struct perf_evsel *evsel,
			
 
				+		struct perf_sample *sample)
			
 
				+{
			
 
				+	long ret = perf_evsel__intval(evsel, sample, "ret");
			
 
				+	return pid_end_io_sample(tchart, sample->tid, IOTYPE_RX,
			
 
				+				 sample->time, ret);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+process_enter_poll(struct timechart *tchart,
			
 
				+		   struct perf_evsel *evsel,
			
 
				+		   struct perf_sample *sample)
			
 
				+{
			
 
				+	long fd = perf_evsel__intval(evsel, sample, "fd");
			
 
				+	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_POLL,
			
 
				+				   sample->time, fd);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+process_exit_poll(struct timechart *tchart,
			
 
				+		  struct perf_evsel *evsel,
			
 
				+		  struct perf_sample *sample)
			
 
				+{
			
 
				+	long ret = perf_evsel__intval(evsel, sample, "ret");
			
 
				+	return pid_end_io_sample(tchart, sample->tid, IOTYPE_POLL,
			
 
				+				 sample->time, ret);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Sort the pid datastructure
			
 
				  */
			
@@ -852,6 +1128,121 @@ static void draw_cpu_usage(struct timechart *tchart)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void draw_io_bars(struct timechart *tchart)
			
 
				+{
			
 
				+	const char *suf;
			
 
				+	double bytes;
			
 
				+	char comm[256];
			
 
				+	struct per_pid *p;
			
 
				+	struct per_pidcomm *c;
			
 
				+	struct io_sample *sample;
			
 
				+	int Y = 1;
			
 
				+
			
 
				+	p = tchart->all_data;
			
 
				+	while (p) {
			
 
				+		c = p->all;
			
 
				+		while (c) {
			
 
				+			if (!c->display) {
			
 
				+				c->Y = 0;
			
 
				+				c = c->next;
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			svg_box(Y, c->start_time, c->end_time, "process3");
			
 
				+			sample = c->io_samples;
			
 
				+			for (sample = c->io_samples; sample; sample = sample->next) {
			
 
				+				double h = (double)sample->bytes / c->max_bytes;
			
 
				+
			
 
				+				if (tchart->skip_eagain &&
			
 
				+				    sample->err == -EAGAIN)
			
 
				+					continue;
			
 
				+
			
 
				+				if (sample->err)
			
 
				+					h = 1;
			
 
				+
			
 
				+				if (sample->type == IOTYPE_SYNC)
			
 
				+					svg_fbox(Y,
			
 
				+						sample->start_time,
			
 
				+						sample->end_time,
			
 
				+						1,
			
 
				+						sample->err ? "error" : "sync",
			
 
				+						sample->fd,
			
 
				+						sample->err,
			
 
				+						sample->merges);
			
 
				+				else if (sample->type == IOTYPE_POLL)
			
 
				+					svg_fbox(Y,
			
 
				+						sample->start_time,
			
 
				+						sample->end_time,
			
 
				+						1,
			
 
				+						sample->err ? "error" : "poll",
			
 
				+						sample->fd,
			
 
				+						sample->err,
			
 
				+						sample->merges);
			
 
				+				else if (sample->type == IOTYPE_READ)
			
 
				+					svg_ubox(Y,
			
 
				+						sample->start_time,
			
 
				+						sample->end_time,
			
 
				+						h,
			
 
				+						sample->err ? "error" : "disk",
			
 
				+						sample->fd,
			
 
				+						sample->err,
			
 
				+						sample->merges);
			
 
				+				else if (sample->type == IOTYPE_WRITE)
			
 
				+					svg_lbox(Y,
			
 
				+						sample->start_time,
			
 
				+						sample->end_time,
			
 
				+						h,
			
 
				+						sample->err ? "error" : "disk",
			
 
				+						sample->fd,
			
 
				+						sample->err,
			
 
				+						sample->merges);
			
 
				+				else if (sample->type == IOTYPE_RX)
			
 
				+					svg_ubox(Y,
			
 
				+						sample->start_time,
			
 
				+						sample->end_time,
			
 
				+						h,
			
 
				+						sample->err ? "error" : "net",
			
 
				+						sample->fd,
			
 
				+						sample->err,
			
 
				+						sample->merges);
			
 
				+				else if (sample->type == IOTYPE_TX)
			
 
				+					svg_lbox(Y,
			
 
				+						sample->start_time,
			
 
				+						sample->end_time,
			
 
				+						h,
			
 
				+						sample->err ? "error" : "net",
			
 
				+						sample->fd,
			
 
				+						sample->err,
			
 
				+						sample->merges);
			
 
				+			}
			
 
				+
			
 
				+			suf = "";
			
 
				+			bytes = c->total_bytes;
			
 
				+			if (bytes > 1024) {
			
 
				+				bytes = bytes / 1024;
			
 
				+				suf = "K";
			
 
				+			}
			
 
				+			if (bytes > 1024) {
			
 
				+				bytes = bytes / 1024;
			
 
				+				suf = "M";
			
 
				+			}
			
 
				+			if (bytes > 1024) {
			
 
				+				bytes = bytes / 1024;
			
 
				+				suf = "G";
			
 
				+			}
			
 
				+
			
 
				+
			
 
				+			sprintf(comm, "%s:%i (%3.1f %sbytes)", c->comm ?: "", p->pid, bytes, suf);
			
 
				+			svg_text(Y, c->start_time, comm);
			
 
				+
			
 
				+			c->Y = Y;
			
 
				+			Y++;
			
 
				+			c = c->next;
			
 
				+		}
			
 
				+		p = p->next;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static void draw_process_bars(struct timechart *tchart)
			
 
				 {
			
 
				 	struct per_pid *p;
			
@@ -987,9 +1378,6 @@ static int determine_display_tasks(struct timechart *tchart, u64 threshold)
 
				 	struct per_pidcomm *c;
			
 
				 	int count = 0;
			
 
				 
			
 
				-	if (process_filter)
			
 
				-		return determine_display_tasks_filtered(tchart);
			
 
				-
			
 
				 	p = tchart->all_data;
			
 
				 	while (p) {
			
 
				 		p->display = 0;
			
@@ -1025,15 +1413,46 @@ static int determine_display_tasks(struct timechart *tchart, u64 threshold)
 
				 	return count;
			
 
				 }
			
 
				 
			
 
				+static int determine_display_io_tasks(struct timechart *timechart, u64 threshold)
			
 
				+{
			
 
				+	struct per_pid *p;
			
 
				+	struct per_pidcomm *c;
			
 
				+	int count = 0;
			
 
				+
			
 
				+	p = timechart->all_data;
			
 
				+	while (p) {
			
 
				+		/* no exit marker, task kept running to the end */
			
 
				+		if (p->end_time == 0)
			
 
				+			p->end_time = timechart->last_time;
			
 
				 
			
 
				+		c = p->all;
			
 
				 
			
 
				+		while (c) {
			
 
				+			c->display = 0;
			
 
				+
			
 
				+			if (c->total_bytes >= threshold) {
			
 
				+				c->display = 1;
			
 
				+				count++;
			
 
				+			}
			
 
				+
			
 
				+			if (c->end_time == 0)
			
 
				+				c->end_time = timechart->last_time;
			
 
				+
			
 
				+			c = c->next;
			
 
				+		}
			
 
				+		p = p->next;
			
 
				+	}
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+#define BYTES_THRESH (1 * 1024 * 1024)
			
 
				 #define TIME_THRESH 10000000
			
 
				 
			
 
				 static void write_svg_file(struct timechart *tchart, const char *filename)
			
 
				 {
			
 
				 	u64 i;
			
 
				 	int count;
			
 
				-	int thresh = TIME_THRESH;
			
 
				+	int thresh = tchart->io_events ? BYTES_THRESH : TIME_THRESH;
			
 
				 
			
 
				 	if (tchart->power_only)
			
 
				 		tchart->proc_num = 0;
			
@@ -1041,28 +1460,43 @@ static void write_svg_file(struct timechart *tchart, const char *filename)
 
				 	/* We'd like to show at least proc_num tasks;
			
 
				 	 * be less picky if we have fewer */
			
 
				 	do {
			
 
				-		count = determine_display_tasks(tchart, thresh);
			
 
				+		if (process_filter)
			
 
				+			count = determine_display_tasks_filtered(tchart);
			
 
				+		else if (tchart->io_events)
			
 
				+			count = determine_display_io_tasks(tchart, thresh);
			
 
				+		else
			
 
				+			count = determine_display_tasks(tchart, thresh);
			
 
				 		thresh /= 10;
			
 
				 	} while (!process_filter && thresh && count < tchart->proc_num);
			
 
				 
			
 
				 	if (!tchart->proc_num)
			
 
				 		count = 0;
			
 
				 
			
 
				-	open_svg(filename, tchart->numcpus, count, tchart->first_time, tchart->last_time);
			
 
				+	if (tchart->io_events) {
			
 
				+		open_svg(filename, 0, count, tchart->first_time, tchart->last_time);
			
 
				 
			
 
				-	svg_time_grid();
			
 
				-	svg_legenda();
			
 
				+		svg_time_grid(0.5);
			
 
				+		svg_io_legenda();
			
 
				+
			
 
				+		draw_io_bars(tchart);
			
 
				+	} else {
			
 
				+		open_svg(filename, tchart->numcpus, count, tchart->first_time, tchart->last_time);
			
 
				 
			
 
				-	for (i = 0; i < tchart->numcpus; i++)
			
 
				-		svg_cpu_box(i, tchart->max_freq, tchart->turbo_frequency);
			
 
				+		svg_time_grid(0);
			
 
				 
			
 
				-	draw_cpu_usage(tchart);
			
 
				-	if (tchart->proc_num)
			
 
				-		draw_process_bars(tchart);
			
 
				-	if (!tchart->tasks_only)
			
 
				-		draw_c_p_states(tchart);
			
 
				-	if (tchart->proc_num)
			
 
				-		draw_wakeups(tchart);
			
 
				+		svg_legenda();
			
 
				+
			
 
				+		for (i = 0; i < tchart->numcpus; i++)
			
 
				+			svg_cpu_box(i, tchart->max_freq, tchart->turbo_frequency);
			
 
				+
			
 
				+		draw_cpu_usage(tchart);
			
 
				+		if (tchart->proc_num)
			
 
				+			draw_process_bars(tchart);
			
 
				+		if (!tchart->tasks_only)
			
 
				+			draw_c_p_states(tchart);
			
 
				+		if (tchart->proc_num)
			
 
				+			draw_wakeups(tchart);
			
 
				+	}
			
 
				 
			
 
				 	svg_close();
			
 
				 }
			
@@ -1110,6 +1544,56 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name)
 
				 		{ "power:power_end",		process_sample_power_end },
			
 
				 		{ "power:power_frequency",	process_sample_power_frequency },
			
 
				 #endif
			
 
				+
			
 
				+		{ "syscalls:sys_enter_read",		process_enter_read },
			
 
				+		{ "syscalls:sys_enter_pread64",		process_enter_read },
			
 
				+		{ "syscalls:sys_enter_readv",		process_enter_read },
			
 
				+		{ "syscalls:sys_enter_preadv",		process_enter_read },
			
 
				+		{ "syscalls:sys_enter_write",		process_enter_write },
			
 
				+		{ "syscalls:sys_enter_pwrite64",	process_enter_write },
			
 
				+		{ "syscalls:sys_enter_writev",		process_enter_write },
			
 
				+		{ "syscalls:sys_enter_pwritev",		process_enter_write },
			
 
				+		{ "syscalls:sys_enter_sync",		process_enter_sync },
			
 
				+		{ "syscalls:sys_enter_sync_file_range",	process_enter_sync },
			
 
				+		{ "syscalls:sys_enter_fsync",		process_enter_sync },
			
 
				+		{ "syscalls:sys_enter_msync",		process_enter_sync },
			
 
				+		{ "syscalls:sys_enter_recvfrom",	process_enter_rx },
			
 
				+		{ "syscalls:sys_enter_recvmmsg",	process_enter_rx },
			
 
				+		{ "syscalls:sys_enter_recvmsg",		process_enter_rx },
			
 
				+		{ "syscalls:sys_enter_sendto",		process_enter_tx },
			
 
				+		{ "syscalls:sys_enter_sendmsg",		process_enter_tx },
			
 
				+		{ "syscalls:sys_enter_sendmmsg",	process_enter_tx },
			
 
				+		{ "syscalls:sys_enter_epoll_pwait",	process_enter_poll },
			
 
				+		{ "syscalls:sys_enter_epoll_wait",	process_enter_poll },
			
 
				+		{ "syscalls:sys_enter_poll",		process_enter_poll },
			
 
				+		{ "syscalls:sys_enter_ppoll",		process_enter_poll },
			
 
				+		{ "syscalls:sys_enter_pselect6",	process_enter_poll },
			
 
				+		{ "syscalls:sys_enter_select",		process_enter_poll },
			
 
				+
			
 
				+		{ "syscalls:sys_exit_read",		process_exit_read },
			
 
				+		{ "syscalls:sys_exit_pread64",		process_exit_read },
			
 
				+		{ "syscalls:sys_exit_readv",		process_exit_read },
			
 
				+		{ "syscalls:sys_exit_preadv",		process_exit_read },
			
 
				+		{ "syscalls:sys_exit_write",		process_exit_write },
			
 
				+		{ "syscalls:sys_exit_pwrite64",		process_exit_write },
			
 
				+		{ "syscalls:sys_exit_writev",		process_exit_write },
			
 
				+		{ "syscalls:sys_exit_pwritev",		process_exit_write },
			
 
				+		{ "syscalls:sys_exit_sync",		process_exit_sync },
			
 
				+		{ "syscalls:sys_exit_sync_file_range",	process_exit_sync },
			
 
				+		{ "syscalls:sys_exit_fsync",		process_exit_sync },
			
 
				+		{ "syscalls:sys_exit_msync",		process_exit_sync },
			
 
				+		{ "syscalls:sys_exit_recvfrom",		process_exit_rx },
			
 
				+		{ "syscalls:sys_exit_recvmmsg",		process_exit_rx },
			
 
				+		{ "syscalls:sys_exit_recvmsg",		process_exit_rx },
			
 
				+		{ "syscalls:sys_exit_sendto",		process_exit_tx },
			
 
				+		{ "syscalls:sys_exit_sendmsg",		process_exit_tx },
			
 
				+		{ "syscalls:sys_exit_sendmmsg",		process_exit_tx },
			
 
				+		{ "syscalls:sys_exit_epoll_pwait",	process_exit_poll },
			
 
				+		{ "syscalls:sys_exit_epoll_wait",	process_exit_poll },
			
 
				+		{ "syscalls:sys_exit_poll",		process_exit_poll },
			
 
				+		{ "syscalls:sys_exit_ppoll",		process_exit_poll },
			
 
				+		{ "syscalls:sys_exit_pselect6",		process_exit_poll },
			
 
				+		{ "syscalls:sys_exit_select",		process_exit_poll },
			
 
				 	};
			
 
				 	struct perf_data_file file = {
			
 
				 		.path = input_name,
			
@@ -1154,6 +1638,139 @@ out_delete:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int timechart__io_record(int argc, const char **argv)
			
 
				+{
			
 
				+	unsigned int rec_argc, i;
			
 
				+	const char **rec_argv;
			
 
				+	const char **p;
			
 
				+	char *filter = NULL;
			
 
				+
			
 
				+	const char * const common_args[] = {
			
 
				+		"record", "-a", "-R", "-c", "1",
			
 
				+	};
			
 
				+	unsigned int common_args_nr = ARRAY_SIZE(common_args);
			
 
				+
			
 
				+	const char * const disk_events[] = {
			
 
				+		"syscalls:sys_enter_read",
			
 
				+		"syscalls:sys_enter_pread64",
			
 
				+		"syscalls:sys_enter_readv",
			
 
				+		"syscalls:sys_enter_preadv",
			
 
				+		"syscalls:sys_enter_write",
			
 
				+		"syscalls:sys_enter_pwrite64",
			
 
				+		"syscalls:sys_enter_writev",
			
 
				+		"syscalls:sys_enter_pwritev",
			
 
				+		"syscalls:sys_enter_sync",
			
 
				+		"syscalls:sys_enter_sync_file_range",
			
 
				+		"syscalls:sys_enter_fsync",
			
 
				+		"syscalls:sys_enter_msync",
			
 
				+
			
 
				+		"syscalls:sys_exit_read",
			
 
				+		"syscalls:sys_exit_pread64",
			
 
				+		"syscalls:sys_exit_readv",
			
 
				+		"syscalls:sys_exit_preadv",
			
 
				+		"syscalls:sys_exit_write",
			
 
				+		"syscalls:sys_exit_pwrite64",
			
 
				+		"syscalls:sys_exit_writev",
			
 
				+		"syscalls:sys_exit_pwritev",
			
 
				+		"syscalls:sys_exit_sync",
			
 
				+		"syscalls:sys_exit_sync_file_range",
			
 
				+		"syscalls:sys_exit_fsync",
			
 
				+		"syscalls:sys_exit_msync",
			
 
				+	};
			
 
				+	unsigned int disk_events_nr = ARRAY_SIZE(disk_events);
			
 
				+
			
 
				+	const char * const net_events[] = {
			
 
				+		"syscalls:sys_enter_recvfrom",
			
 
				+		"syscalls:sys_enter_recvmmsg",
			
 
				+		"syscalls:sys_enter_recvmsg",
			
 
				+		"syscalls:sys_enter_sendto",
			
 
				+		"syscalls:sys_enter_sendmsg",
			
 
				+		"syscalls:sys_enter_sendmmsg",
			
 
				+
			
 
				+		"syscalls:sys_exit_recvfrom",
			
 
				+		"syscalls:sys_exit_recvmmsg",
			
 
				+		"syscalls:sys_exit_recvmsg",
			
 
				+		"syscalls:sys_exit_sendto",
			
 
				+		"syscalls:sys_exit_sendmsg",
			
 
				+		"syscalls:sys_exit_sendmmsg",
			
 
				+	};
			
 
				+	unsigned int net_events_nr = ARRAY_SIZE(net_events);
			
 
				+
			
 
				+	const char * const poll_events[] = {
			
 
				+		"syscalls:sys_enter_epoll_pwait",
			
 
				+		"syscalls:sys_enter_epoll_wait",
			
 
				+		"syscalls:sys_enter_poll",
			
 
				+		"syscalls:sys_enter_ppoll",
			
 
				+		"syscalls:sys_enter_pselect6",
			
 
				+		"syscalls:sys_enter_select",
			
 
				+
			
 
				+		"syscalls:sys_exit_epoll_pwait",
			
 
				+		"syscalls:sys_exit_epoll_wait",
			
 
				+		"syscalls:sys_exit_poll",
			
 
				+		"syscalls:sys_exit_ppoll",
			
 
				+		"syscalls:sys_exit_pselect6",
			
 
				+		"syscalls:sys_exit_select",
			
 
				+	};
			
 
				+	unsigned int poll_events_nr = ARRAY_SIZE(poll_events);
			
 
				+
			
 
				+	rec_argc = common_args_nr +
			
 
				+		disk_events_nr * 4 +
			
 
				+		net_events_nr * 4 +
			
 
				+		poll_events_nr * 4 +
			
 
				+		argc;
			
 
				+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
			
 
				+
			
 
				+	if (rec_argv == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (asprintf(&filter, "common_pid != %d", getpid()) < 0)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	p = rec_argv;
			
 
				+	for (i = 0; i < common_args_nr; i++)
			
 
				+		*p++ = strdup(common_args[i]);
			
 
				+
			
 
				+	for (i = 0; i < disk_events_nr; i++) {
			
 
				+		if (!is_valid_tracepoint(disk_events[i])) {
			
 
				+			rec_argc -= 4;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		*p++ = "-e";
			
 
				+		*p++ = strdup(disk_events[i]);
			
 
				+		*p++ = "--filter";
			
 
				+		*p++ = filter;
			
 
				+	}
			
 
				+	for (i = 0; i < net_events_nr; i++) {
			
 
				+		if (!is_valid_tracepoint(net_events[i])) {
			
 
				+			rec_argc -= 4;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		*p++ = "-e";
			
 
				+		*p++ = strdup(net_events[i]);
			
 
				+		*p++ = "--filter";
			
 
				+		*p++ = filter;
			
 
				+	}
			
 
				+	for (i = 0; i < poll_events_nr; i++) {
			
 
				+		if (!is_valid_tracepoint(poll_events[i])) {
			
 
				+			rec_argc -= 4;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		*p++ = "-e";
			
 
				+		*p++ = strdup(poll_events[i]);
			
 
				+		*p++ = "--filter";
			
 
				+		*p++ = filter;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < (unsigned int)argc; i++)
			
 
				+		*p++ = argv[i];
			
 
				+
			
 
				+	return cmd_record(rec_argc, rec_argv, NULL);
			
 
				+}
			
 
				+
			
 
				+
			
 
				 static int timechart__record(struct timechart *tchart, int argc, const char **argv)
			
 
				 {
			
 
				 	unsigned int rec_argc, i, j;
			
@@ -1270,6 +1887,30 @@ parse_highlight(const struct option *opt __maybe_unused, const char *arg,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int
			
 
				+parse_time(const struct option *opt, const char *arg, int __maybe_unused unset)
			
 
				+{
			
 
				+	char unit = 'n';
			
 
				+	u64 *value = opt->value;
			
 
				+
			
 
				+	if (sscanf(arg, "%" PRIu64 "%cs", value, &unit) > 0) {
			
 
				+		switch (unit) {
			
 
				+		case 'm':
			
 
				+			*value *= 1000000;
			
 
				+			break;
			
 
				+		case 'u':
			
 
				+			*value *= 1000;
			
 
				+			break;
			
 
				+		case 'n':
			
 
				+			break;
			
 
				+		default:
			
 
				+			return -1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 int cmd_timechart(int argc, const char **argv,
			
 
				 		  const char *prefix __maybe_unused)
			
 
				 {
			
@@ -1282,6 +1923,8 @@ int cmd_timechart(int argc, const char **argv,
 
				 			.ordered_samples = true,
			
 
				 		},
			
 
				 		.proc_num = 15,
			
 
				+		.min_time = 1000000,
			
 
				+		.merge_dist = 1000,
			
 
				 	};
			
 
				 	const char *output_name = "output.svg";
			
 
				 	const struct option timechart_options[] = {
			
@@ -1303,6 +1946,14 @@ int cmd_timechart(int argc, const char **argv,
 
				 		    "min. number of tasks to print"),
			
 
				 	OPT_BOOLEAN('t', "topology", &tchart.topology,
			
 
				 		    "sort CPUs according to topology"),
			
 
				+	OPT_BOOLEAN(0, "io-skip-eagain", &tchart.skip_eagain,
			
 
				+		    "skip EAGAIN errors"),
			
 
				+	OPT_CALLBACK(0, "io-min-time", &tchart.min_time, "time",
			
 
				+		     "all IO faster than min-time will visually appear longer",
			
 
				+		     parse_time),
			
 
				+	OPT_CALLBACK(0, "io-merge-dist", &tchart.merge_dist, "time",
			
 
				+		     "merge events that are merge-dist us apart",
			
 
				+		     parse_time),
			
 
				 	OPT_END()
			
 
				 	};
			
 
				 	const char * const timechart_usage[] = {
			
@@ -1314,6 +1965,8 @@ int cmd_timechart(int argc, const char **argv,
 
				 	OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"),
			
 
				 	OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only,
			
 
				 		    "output processes data only"),
			
 
				+	OPT_BOOLEAN('I', "io-only", &tchart.io_only,
			
 
				+		    "record only IO data"),
			
 
				 	OPT_BOOLEAN('g', "callchain", &tchart.with_backtrace, "record callchain"),
			
 
				 	OPT_END()
			
 
				 	};
			
@@ -1340,7 +1993,10 @@ int cmd_timechart(int argc, const char **argv,
 
				 			return -1;
			
 
				 		}
			
 
				 
			
 
				-		return timechart__record(&tchart, argc, argv);
			
 
				+		if (tchart.io_only)
			
 
				+			return timechart__io_record(argc, argv);
			
 
				+		else
			
 
				+			return timechart__record(&tchart, argc, argv);
			
 
				 	} else if (argc)
			
 
				 		usage_with_options(timechart_usage, timechart_options);
			
 
				 
			
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1108,6 +1108,7 @@ struct syscall {
 
				 	struct event_format *tp_format;
			
 
				 	const char	    *name;
			
 
				 	bool		    filtered;
			
 
				+	bool		    is_exit;
			
 
				 	struct syscall_fmt  *fmt;
			
 
				 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
			
 
				 	void		    **arg_parm;
			
@@ -1132,6 +1133,7 @@ struct thread_trace {
 
				 	u64		  exit_time;
			
 
				 	bool		  entry_pending;
			
 
				 	unsigned long	  nr_events;
			
 
				+	unsigned long	  pfmaj, pfmin;
			
 
				 	char		  *entry_str;
			
 
				 	double		  runtime_ms;
			
 
				 	struct {
			
@@ -1177,6 +1179,9 @@ fail:
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				+#define TRACE_PFMAJ		(1 << 0)
			
 
				+#define TRACE_PFMIN		(1 << 1)
			
 
				+
			
 
				 struct trace {
			
 
				 	struct perf_tool	tool;
			
 
				 	struct {
			
@@ -1211,6 +1216,8 @@ struct trace {
 
				 	bool			summary_only;
			
 
				 	bool			show_comm;
			
 
				 	bool			show_tool_stats;
			
 
				+	bool			trace_syscalls;
			
 
				+	int			trace_pgfaults;
			
 
				 };
			
 
				 
			
 
				 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
			
@@ -1276,11 +1283,11 @@ static const char *thread__fd_path(struct thread *thread, int fd,
 
				 	if (fd < 0)
			
 
				 		return NULL;
			
 
				 
			
 
				-	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
			
 
				+	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
			
 
				 		if (!trace->live)
			
 
				 			return NULL;
			
 
				 		++trace->stats.proc_getname;
			
 
				-		if (thread__read_fd_path(thread, fd)) {
			
 
				+		if (thread__read_fd_path(thread, fd))
			
 
				 			return NULL;
			
 
				 	}
			
 
				 
			
@@ -1473,6 +1480,8 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 
				 	if (sc->tp_format == NULL)
			
 
				 		return -1;
			
 
				 
			
 
				+	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
			
 
				+
			
 
				 	return syscall__set_arg_fmts(sc);
			
 
				 }
			
 
				 
			
@@ -1535,6 +1544,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 
				 }
			
 
				 
			
 
				 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
			
 
				+				  union perf_event *event,
			
 
				 				  struct perf_sample *sample);
			
 
				 
			
 
				 static struct syscall *trace__syscall_info(struct trace *trace,
			
@@ -1607,6 +1617,7 @@ static void thread__update_stats(struct thread_trace *ttrace,
 
				 }
			
 
				 
			
 
				 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
			
 
				+			    union perf_event *event __maybe_unused,
			
 
				 			    struct perf_sample *sample)
			
 
				 {
			
 
				 	char *msg;
			
@@ -1629,7 +1640,6 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
				 		return -1;
			
 
				 
			
 
				 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
			
 
				-	ttrace = thread->priv;
			
 
				 
			
 
				 	if (ttrace->entry_str == NULL) {
			
 
				 		ttrace->entry_str = malloc(1024);
			
@@ -1644,7 +1654,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
				 	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
			
 
				 					   args, trace, thread);
			
 
				 
			
 
				-	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
			
 
				+	if (sc->is_exit) {
			
 
				 		if (!trace->duration_filter && !trace->summary_only) {
			
 
				 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
			
 
				 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
			
@@ -1656,6 +1666,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
				 }
			
 
				 
			
 
				 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
			
 
				+			   union perf_event *event __maybe_unused,
			
 
				 			   struct perf_sample *sample)
			
 
				 {
			
 
				 	int ret;
			
@@ -1687,8 +1698,6 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
				 		++trace->stats.vfs_getname;
			
 
				 	}
			
 
				 
			
 
				-	ttrace = thread->priv;
			
 
				-
			
 
				 	ttrace->exit_time = sample->time;
			
 
				 
			
 
				 	if (ttrace->entry_time) {
			
@@ -1735,6 +1744,7 @@ out:
 
				 }
			
 
				 
			
 
				 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
			
 
				+			      union perf_event *event __maybe_unused,
			
 
				 			      struct perf_sample *sample)
			
 
				 {
			
 
				 	trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
			
@@ -1742,6 +1752,7 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
 
				 }
			
 
				 
			
 
				 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
			
 
				+				     union perf_event *event __maybe_unused,
			
 
				 				     struct perf_sample *sample)
			
 
				 {
			
 
				         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
			
@@ -1768,6 +1779,80 @@ out_dump:
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void print_location(FILE *f, struct perf_sample *sample,
			
 
				+			   struct addr_location *al,
			
 
				+			   bool print_dso, bool print_sym)
			
 
				+{
			
 
				+
			
 
				+	if ((verbose || print_dso) && al->map)
			
 
				+		fprintf(f, "%s@", al->map->dso->long_name);
			
 
				+
			
 
				+	if ((verbose || print_sym) && al->sym)
			
 
				+		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
			
 
				+			al->addr - al->sym->start);
			
 
				+	else if (al->map)
			
 
				+		fprintf(f, "0x%" PRIx64, al->addr);
			
 
				+	else
			
 
				+		fprintf(f, "0x%" PRIx64, sample->addr);
			
 
				+}
			
 
				+
			
 
				+static int trace__pgfault(struct trace *trace,
			
 
				+			  struct perf_evsel *evsel,
			
 
				+			  union perf_event *event,
			
 
				+			  struct perf_sample *sample)
			
 
				+{
			
 
				+	struct thread *thread;
			
 
				+	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
			
 
				+	struct addr_location al;
			
 
				+	char map_type = 'd';
			
 
				+	struct thread_trace *ttrace;
			
 
				+
			
 
				+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
			
 
				+	ttrace = thread__trace(thread, trace->output);
			
 
				+	if (ttrace == NULL)
			
 
				+		return -1;
			
 
				+
			
 
				+	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
			
 
				+		ttrace->pfmaj++;
			
 
				+	else
			
 
				+		ttrace->pfmin++;
			
 
				+
			
 
				+	if (trace->summary_only)
			
 
				+		return 0;
			
 
				+
			
 
				+	thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
			
 
				+			      sample->ip, &al);
			
 
				+
			
 
				+	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
			
 
				+
			
 
				+	fprintf(trace->output, "%sfault [",
			
 
				+		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
			
 
				+		"maj" : "min");
			
 
				+
			
 
				+	print_location(trace->output, sample, &al, false, true);
			
 
				+
			
 
				+	fprintf(trace->output, "] => ");
			
 
				+
			
 
				+	thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE,
			
 
				+				   sample->addr, &al);
			
 
				+
			
 
				+	if (!al.map) {
			
 
				+		thread__find_addr_location(thread, trace->host, cpumode,
			
 
				+					   MAP__FUNCTION, sample->addr, &al);
			
 
				+
			
 
				+		if (al.map)
			
 
				+			map_type = 'x';
			
 
				+		else
			
 
				+			map_type = '?';
			
 
				+	}
			
 
				+
			
 
				+	print_location(trace->output, sample, &al, true, false);
			
 
				+
			
 
				+	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
			
 
				 {
			
 
				 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
			
@@ -1781,7 +1866,7 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample)
 
				 }
			
 
				 
			
 
				 static int trace__process_sample(struct perf_tool *tool,
			
 
				-				 union perf_event *event __maybe_unused,
			
 
				+				 union perf_event *event,
			
 
				 				 struct perf_sample *sample,
			
 
				 				 struct perf_evsel *evsel,
			
 
				 				 struct machine *machine __maybe_unused)
			
@@ -1799,7 +1884,7 @@ static int trace__process_sample(struct perf_tool *tool,
 
				 
			
 
				 	if (handler) {
			
 
				 		++trace->nr_events;
			
 
				-		handler(trace, evsel, sample);
			
 
				+		handler(trace, evsel, event, sample);
			
 
				 	}
			
 
				 
			
 
				 	return err;
			
@@ -1826,7 +1911,7 @@ static int parse_target_str(struct trace *trace)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int trace__record(int argc, const char **argv)
			
 
				+static int trace__record(struct trace *trace, int argc, const char **argv)
			
 
				 {
			
 
				 	unsigned int rec_argc, i, j;
			
 
				 	const char **rec_argv;
			
@@ -1835,34 +1920,54 @@ static int trace__record(int argc, const char **argv)
 
				 		"-R",
			
 
				 		"-m", "1024",
			
 
				 		"-c", "1",
			
 
				-		"-e",
			
 
				 	};
			
 
				 
			
 
				+	const char * const sc_args[] = { "-e", };
			
 
				+	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
			
 
				+	const char * const majpf_args[] = { "-e", "major-faults" };
			
 
				+	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
			
 
				+	const char * const minpf_args[] = { "-e", "minor-faults" };
			
 
				+	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
			
 
				+
			
 
				 	/* +1 is for the event string below */
			
 
				-	rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
			
 
				+	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
			
 
				+		majpf_args_nr + minpf_args_nr + argc;
			
 
				 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
			
 
				 
			
 
				 	if (rec_argv == NULL)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				+	j = 0;
			
 
				 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
			
 
				-		rec_argv[i] = record_args[i];
			
 
				-
			
 
				-	/* event string may be different for older kernels - e.g., RHEL6 */
			
 
				-	if (is_valid_tracepoint("raw_syscalls:sys_enter"))
			
 
				-		rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
			
 
				-	else if (is_valid_tracepoint("syscalls:sys_enter"))
			
 
				-		rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
			
 
				-	else {
			
 
				-		pr_err("Neither raw_syscalls nor syscalls events exist.\n");
			
 
				-		return -1;
			
 
				+		rec_argv[j++] = record_args[i];
			
 
				+
			
 
				+	if (trace->trace_syscalls) {
			
 
				+		for (i = 0; i < sc_args_nr; i++)
			
 
				+			rec_argv[j++] = sc_args[i];
			
 
				+
			
 
				+		/* event string may be different for older kernels - e.g., RHEL6 */
			
 
				+		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
			
 
				+			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
			
 
				+		else if (is_valid_tracepoint("syscalls:sys_enter"))
			
 
				+			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
			
 
				+		else {
			
 
				+			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
			
 
				+			return -1;
			
 
				+		}
			
 
				 	}
			
 
				-	i++;
			
 
				 
			
 
				-	for (j = 0; j < (unsigned int)argc; j++, i++)
			
 
				-		rec_argv[i] = argv[j];
			
 
				+	if (trace->trace_pgfaults & TRACE_PFMAJ)
			
 
				+		for (i = 0; i < majpf_args_nr; i++)
			
 
				+			rec_argv[j++] = majpf_args[i];
			
 
				+
			
 
				+	if (trace->trace_pgfaults & TRACE_PFMIN)
			
 
				+		for (i = 0; i < minpf_args_nr; i++)
			
 
				+			rec_argv[j++] = minpf_args[i];
			
 
				+
			
 
				+	for (i = 0; i < (unsigned int)argc; i++)
			
 
				+		rec_argv[j++] = argv[i];
			
 
				 
			
 
				-	return cmd_record(i, rec_argv, NULL);
			
 
				+	return cmd_record(j, rec_argv, NULL);
			
 
				 }
			
 
				 
			
 
				 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
			
@@ -1882,6 +1987,30 @@ static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
 
				 	perf_evlist__add(evlist, evsel);
			
 
				 }
			
 
				 
			
 
				+static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
			
 
				+				    u64 config)
			
 
				+{
			
 
				+	struct perf_evsel *evsel;
			
 
				+	struct perf_event_attr attr = {
			
 
				+		.type = PERF_TYPE_SOFTWARE,
			
 
				+		.mmap_data = 1,
			
 
				+	};
			
 
				+
			
 
				+	attr.config = config;
			
 
				+	attr.sample_period = 1;
			
 
				+
			
 
				+	event_attr_init(&attr);
			
 
				+
			
 
				+	evsel = perf_evsel__new(&attr);
			
 
				+	if (!evsel)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	evsel->handler = trace__pgfault;
			
 
				+	perf_evlist__add(evlist, evsel);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int trace__run(struct trace *trace, int argc, const char **argv)
			
 
				 {
			
 
				 	struct perf_evlist *evlist = perf_evlist__new();
			
@@ -1897,10 +2026,21 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
			
 
				+	if (trace->trace_syscalls &&
			
 
				+	    perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
			
 
				+					   trace__sys_exit))
			
 
				 		goto out_error_tp;
			
 
				 
			
 
				-	perf_evlist__add_vfs_getname(evlist);
			
 
				+	if (trace->trace_syscalls)
			
 
				+		perf_evlist__add_vfs_getname(evlist);
			
 
				+
			
 
				+	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
			
 
				+	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
			
 
				+		goto out_error_tp;
			
 
				+
			
 
				+	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
			
 
				+	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
			
 
				+		goto out_error_tp;
			
 
				 
			
 
				 	if (trace->sched &&
			
 
				 		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
			
@@ -1982,7 +2122,8 @@ again:
 
				 				goto next_event;
			
 
				 			}
			
 
				 
			
 
				-			if (sample.raw_data == NULL) {
			
 
				+			if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
			
 
				+			    sample.raw_data == NULL) {
			
 
				 				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
			
 
				 				       perf_evsel__name(evsel), sample.tid,
			
 
				 				       sample.cpu, sample.raw_size);
			
@@ -1990,7 +2131,7 @@ again:
 
				 			}
			
 
				 
			
 
				 			handler = evsel->handler;
			
 
				-			handler(trace, evsel, &sample);
			
 
				+			handler(trace, evsel, event, &sample);
			
 
				 next_event:
			
 
				 			perf_evlist__mmap_consume(evlist, i);
			
 
				 
			
@@ -2093,13 +2234,10 @@ static int trace__replay(struct trace *trace)
 
				 	if (evsel == NULL)
			
 
				 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
			
 
				 							     "syscalls:sys_enter");
			
 
				-	if (evsel == NULL) {
			
 
				-		pr_err("Data file does not have raw_syscalls:sys_enter event\n");
			
 
				-		goto out;
			
 
				-	}
			
 
				 
			
 
				-	if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
			
 
				-	    perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
			
 
				+	if (evsel &&
			
 
				+	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
			
 
				+	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
			
 
				 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
			
 
				 		goto out;
			
 
				 	}
			
@@ -2109,15 +2247,19 @@ static int trace__replay(struct trace *trace)
 
				 	if (evsel == NULL)
			
 
				 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
			
 
				 							     "syscalls:sys_exit");
			
 
				-	if (evsel == NULL) {
			
 
				-		pr_err("Data file does not have raw_syscalls:sys_exit event\n");
			
 
				+	if (evsel &&
			
 
				+	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
			
 
				+	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
			
 
				+		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
			
 
				-	    perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
			
 
				-		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
			
 
				-		goto out;
			
 
				+	evlist__for_each(session->evlist, evsel) {
			
 
				+		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
			
 
				+		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
			
 
				+		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
			
 
				+		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
			
 
				+			evsel->handler = trace__pgfault;
			
 
				 	}
			
 
				 
			
 
				 	err = parse_target_str(trace);
			
@@ -2217,6 +2359,10 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv)
 
				 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
			
 
				 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
			
 
				 	printed += fprintf(fp, "%.1f%%", ratio);
			
 
				+	if (ttrace->pfmaj)
			
 
				+		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
			
 
				+	if (ttrace->pfmin)
			
 
				+		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
			
 
				 	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
			
 
				 	printed += thread__dump_stats(ttrace, trace, fp);
			
 
				 
			
@@ -2264,6 +2410,23 @@ static int trace__open_output(struct trace *trace, const char *filename)
 
				 	return trace->output == NULL ? -errno : 0;
			
 
				 }
			
 
				 
			
 
				+static int parse_pagefaults(const struct option *opt, const char *str,
			
 
				+			    int unset __maybe_unused)
			
 
				+{
			
 
				+	int *trace_pgfaults = opt->value;
			
 
				+
			
 
				+	if (strcmp(str, "all") == 0)
			
 
				+		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
			
 
				+	else if (strcmp(str, "maj") == 0)
			
 
				+		*trace_pgfaults |= TRACE_PFMAJ;
			
 
				+	else if (strcmp(str, "min") == 0)
			
 
				+		*trace_pgfaults |= TRACE_PFMIN;
			
 
				+	else
			
 
				+		return -1;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
			
 
				 {
			
 
				 	const char * const trace_usage[] = {
			
@@ -2293,6 +2456,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		},
			
 
				 		.output = stdout,
			
 
				 		.show_comm = true,
			
 
				+		.trace_syscalls = true,
			
 
				 	};
			
 
				 	const char *output_name = NULL;
			
 
				 	const char *ev_qualifier_str = NULL;
			
@@ -2330,20 +2494,34 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		    "Show only syscall summary with statistics"),
			
 
				 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
			
 
				 		    "Show all syscalls and summary with statistics"),
			
 
				+	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
			
 
				+		     "Trace pagefaults", parse_pagefaults, "maj"),
			
 
				+	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
			
 
				 	OPT_END()
			
 
				 	};
			
 
				 	int err;
			
 
				 	char bf[BUFSIZ];
			
 
				 
			
 
				-	if ((argc > 1) && (strcmp(argv[1], "record") == 0))
			
 
				-		return trace__record(argc-2, &argv[2]);
			
 
				+	argc = parse_options(argc, argv, trace_options, trace_usage,
			
 
				+			     PARSE_OPT_STOP_AT_NON_OPTION);
			
 
				 
			
 
				-	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
			
 
				+	if (trace.trace_pgfaults) {
			
 
				+		trace.opts.sample_address = true;
			
 
				+		trace.opts.sample_time = true;
			
 
				+	}
			
 
				+
			
 
				+	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
			
 
				+		return trace__record(&trace, argc-1, &argv[1]);
			
 
				 
			
 
				 	/* summary_only implies summary option, but don't overwrite summary if set */
			
 
				 	if (trace.summary_only)
			
 
				 		trace.summary = trace.summary_only;
			
 
				 
			
 
				+	if (!trace.trace_syscalls && !trace.trace_pgfaults) {
			
 
				+		pr_err("Please specify something to trace.\n");
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				 	if (output_name != NULL) {
			
 
				 		err = trace__open_output(&trace, output_name);
			
 
				 		if (err < 0) {
			
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -48,6 +48,10 @@ ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
 
				   NO_LIBDW_DWARF_UNWIND := 1
			
 
				 endif
			
 
				 
			
 
				+ifeq ($(ARCH),powerpc)
			
 
				+  CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
			
 
				+endif
			
 
				+
			
 
				 ifeq ($(LIBUNWIND_LIBS),)
			
 
				   NO_LIBUNWIND := 1
			
 
				 else
			
@@ -160,6 +164,7 @@ CORE_FEATURE_TESTS =			\
 
				 	backtrace			\
			
 
				 	dwarf				\
			
 
				 	fortify-source			\
			
 
				+	sync-compare-and-swap		\
			
 
				 	glibc				\
			
 
				 	gtk2				\
			
 
				 	gtk2-infobar			\
			
@@ -195,6 +200,7 @@ LIB_FEATURE_TESTS =			\
 
				 VF_FEATURE_TESTS =			\
			
 
				 	backtrace			\
			
 
				 	fortify-source			\
			
 
				+	sync-compare-and-swap		\
			
 
				 	gtk2-infobar			\
			
 
				 	libelf-getphdrnum		\
			
 
				 	libelf-mmap			\
			
@@ -268,6 +274,10 @@ CFLAGS += -I$(LIB_INCLUDE)
 
				 
			
 
				 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
			
 
				 
			
 
				+ifeq ($(feature-sync-compare-and-swap), 1)
			
 
				+  CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT
			
 
				+endif
			
 
				+
			
 
				 ifndef NO_BIONIC
			
 
				   $(call feature_check,bionic)
			
 
				   ifeq ($(feature-bionic), 1)
			
@@ -590,6 +600,10 @@ ifndef NO_LIBNUMA
 
				   endif
			
 
				 endif
			
 
				 
			
 
				+ifdef HAVE_KVM_STAT_SUPPORT
			
 
				+    CFLAGS += -DHAVE_KVM_STAT_SUPPORT
			
 
				+endif
			
 
				+
			
 
				 # Among the variables below, these:
			
 
				 #   perfexecdir
			
 
				 #   template_dir
			
--- a/tools/perf/config/feature-checks/Makefile
+++ b/tools/perf/config/feature-checks/Makefile
@@ -5,6 +5,7 @@ FILES=					\
 
				 	test-bionic.bin			\
			
 
				 	test-dwarf.bin			\
			
 
				 	test-fortify-source.bin		\
			
 
				+	test-sync-compare-and-swap.bin	\
			
 
				 	test-glibc.bin			\
			
 
				 	test-gtk2.bin			\
			
 
				 	test-gtk2-infobar.bin		\
			
@@ -141,6 +142,9 @@ test-timerfd.bin:
 
				 test-libdw-dwarf-unwind.bin:
			
 
				 	$(BUILD)
			
 
				 
			
 
				+test-sync-compare-and-swap.bin:
			
 
				+	$(BUILD) -Werror
			
 
				+
			
 
				 -include *.d
			
 
				 
			
 
				 ###############################
			
--- a/tools/perf/config/feature-checks/test-all.c
+++ b/tools/perf/config/feature-checks/test-all.c
@@ -89,6 +89,10 @@
 
				 # include "test-libdw-dwarf-unwind.c"
			
 
				 #undef main
			
 
				 
			
 
				+#define main main_test_sync_compare_and_swap
			
 
				+# include "test-sync-compare-and-swap.c"
			
 
				+#undef main
			
 
				+
			
 
				 int main(int argc, char *argv[])
			
 
				 {
			
 
				 	main_test_libpython();
			
@@ -111,6 +115,7 @@ int main(int argc, char *argv[])
 
				 	main_test_timerfd();
			
 
				 	main_test_stackprotector_all();
			
 
				 	main_test_libdw_dwarf_unwind();
			
 
				+	main_test_sync_compare_and_swap(argc, argv);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/tools/perf/config/feature-checks/test-sync-compare-and-swap.c
+++ b/tools/perf/config/feature-checks/test-sync-compare-and-swap.c
@@ -0,0 +1,14 @@
 
				+#include <stdint.h>
			
 
				+
			
 
				+volatile uint64_t x;
			
 
				+
			
 
				+int main(int argc, char *argv[])
			
 
				+{
			
 
				+	uint64_t old, new = argc;
			
 
				+
			
 
				+	argv = argv;
			
 
				+	do {
			
 
				+		old = __sync_val_compare_and_swap(&x, 0, 0);
			
 
				+	} while (!__sync_bool_compare_and_swap(&x, old, new));
			
 
				+	return old == new;
			
 
				+}
			
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -54,6 +54,7 @@
 
				 #define mb()		asm volatile("bcr 15,0" ::: "memory")
			
 
				 #define wmb()		asm volatile("bcr 15,0" ::: "memory")
			
 
				 #define rmb()		asm volatile("bcr 15,0" ::: "memory")
			
 
				+#define CPUINFO_PROC	"vendor_id"
			
 
				 #endif
			
 
				 
			
 
				 #ifdef __sh__
			
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -13,11 +13,12 @@
 
				 #include "util/quote.h"
			
 
				 #include "util/run-command.h"
			
 
				 #include "util/parse-events.h"
			
 
				+#include "util/debug.h"
			
 
				 #include <api/fs/debugfs.h>
			
 
				 #include <pthread.h>
			
 
				 
			
 
				 const char perf_usage_string[] =
			
 
				-	"perf [--version] [--help] COMMAND [ARGS]";
			
 
				+	"perf [--version] [--help] [OPTIONS] COMMAND [ARGS]";
			
 
				 
			
 
				 const char perf_more_info_string[] =
			
 
				 	"See 'perf help COMMAND' for more information on a specific command.";
			
@@ -212,6 +213,16 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
 
				 				printf("%s ", p->cmd);
			
 
				 			}
			
 
				 			exit(0);
			
 
				+		} else if (!strcmp(cmd, "--debug")) {
			
 
				+			if (*argc < 2) {
			
 
				+				fprintf(stderr, "No variable specified for --debug.\n");
			
 
				+				usage(perf_usage_string);
			
 
				+			}
			
 
				+			if (perf_debug_option((*argv)[1]))
			
 
				+				usage(perf_usage_string);
			
 
				+
			
 
				+			(*argv)++;
			
 
				+			(*argc)--;
			
 
				 		} else {
			
 
				 			fprintf(stderr, "Unknown option: %s\n", cmd);
			
 
				 			usage(perf_usage_string);
			
--- a/tools/perf/scripts/perl/bin/failed-syscalls-record
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-record
@@ -1,2 +1,3 @@
 
				 #!/bin/bash
			
 
				-perf record -e raw_syscalls:sys_exit $@
			
 
				+(perf record -e raw_syscalls:sys_exit $@ || \
			
 
				+ perf record -e syscalls:sys_exit $@) 2> /dev/null
			
--- a/tools/perf/scripts/perl/failed-syscalls.pl
+++ b/tools/perf/scripts/perl/failed-syscalls.pl
@@ -26,6 +26,11 @@ sub raw_syscalls::sys_exit
 
				 	}
			
 
				 }
			
 
				 
			
 
				+sub syscalls::sys_exit
			
 
				+{
			
 
				+	raw_syscalls::sys_exit(@_)
			
 
				+}
			
 
				+
			
 
				 sub trace_end
			
 
				 {
			
 
				     printf("\nfailed syscalls by comm:\n\n");
			
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
@@ -107,12 +107,13 @@ def taskState(state):
 
				 
			
 
				 class EventHeaders:
			
 
				 	def __init__(self, common_cpu, common_secs, common_nsecs,
			
 
				-		     common_pid, common_comm):
			
 
				+		     common_pid, common_comm, common_callchain):
			
 
				 		self.cpu = common_cpu
			
 
				 		self.secs = common_secs
			
 
				 		self.nsecs = common_nsecs
			
 
				 		self.pid = common_pid
			
 
				 		self.comm = common_comm
			
 
				+		self.callchain = common_callchain
			
 
				 
			
 
				 	def ts(self):
			
 
				 		return (self.secs * (10 ** 9)) + self.nsecs
			
--- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
@@ -1,2 +1,3 @@
 
				 #!/bin/bash
			
 
				-perf record -e raw_syscalls:sys_exit $@
			
 
				+(perf record -e raw_syscalls:sys_exit $@ || \
			
 
				+ perf record -e syscalls:sys_exit $@) 2> /dev/null
			
--- a/tools/perf/scripts/python/bin/sctop-record
+++ b/tools/perf/scripts/python/bin/sctop-record
@@ -1,2 +1,3 @@
 
				 #!/bin/bash
			
 
				-perf record -e raw_syscalls:sys_enter $@
			
 
				+(perf record -e raw_syscalls:sys_enter $@ || \
			
 
				+ perf record -e syscalls:sys_enter $@) 2> /dev/null
			
--- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
@@ -1,2 +1,3 @@
 
				 #!/bin/bash
			
 
				-perf record -e raw_syscalls:sys_enter $@
			
 
				+(perf record -e raw_syscalls:sys_enter $@ || \
			
 
				+ perf record -e syscalls:sys_enter $@) 2> /dev/null
			
--- a/tools/perf/scripts/python/bin/syscall-counts-record
+++ b/tools/perf/scripts/python/bin/syscall-counts-record
@@ -1,2 +1,3 @@
 
				 #!/bin/bash
			
 
				-perf record -e raw_syscalls:sys_enter $@
			
 
				+(perf record -e raw_syscalls:sys_enter $@ || \
			
 
				+ perf record -e syscalls:sys_enter $@) 2> /dev/null
			
--- a/tools/perf/scripts/python/check-perf-trace.py
+++ b/tools/perf/scripts/python/check-perf-trace.py
@@ -27,7 +27,7 @@ def trace_end():
 
				 
			
 
				 def irq__softirq_entry(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	vec):
			
 
				+	common_callchain, vec):
			
 
				 		print_header(event_name, common_cpu, common_secs, common_nsecs,
			
 
				 			common_pid, common_comm)
			
 
				 
			
@@ -38,7 +38,7 @@ def irq__softirq_entry(event_name, context, common_cpu,
 
				 
			
 
				 def kmem__kmalloc(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	call_site, ptr, bytes_req, bytes_alloc,
			
 
				+	common_callchain, call_site, ptr, bytes_req, bytes_alloc,
			
 
				 	gfp_flags):
			
 
				 		print_header(event_name, common_cpu, common_secs, common_nsecs,
			
 
				 			common_pid, common_comm)
			
--- a/tools/perf/scripts/python/failed-syscalls-by-pid.py
+++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py
@@ -39,7 +39,7 @@ def trace_end():
 
				 
			
 
				 def raw_syscalls__sys_exit(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	id, ret):
			
 
				+	common_callchain, id, ret):
			
 
				 	if (for_comm and common_comm != for_comm) or \
			
 
				 	   (for_pid  and common_pid  != for_pid ):
			
 
				 		return
			
@@ -50,6 +50,11 @@ def raw_syscalls__sys_exit(event_name, context, common_cpu,
 
				 		except TypeError:
			
 
				 			syscalls[common_comm][common_pid][id][ret] = 1
			
 
				 
			
 
				+def syscalls__sys_exit(event_name, context, common_cpu,
			
 
				+	common_secs, common_nsecs, common_pid, common_comm,
			
 
				+	id, ret):
			
 
				+	raw_syscalls__sys_exit(**locals())
			
 
				+
			
 
				 def print_error_totals():
			
 
				     if for_comm is not None:
			
 
				 	    print "\nsyscall errors for %s:\n\n" % (for_comm),
			
--- a/tools/perf/scripts/python/futex-contention.py
+++ b/tools/perf/scripts/python/futex-contention.py
@@ -21,7 +21,7 @@ thread_blocktime = {}
 
				 lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time
			
 
				 process_names = {} # long-lived pid-to-execname mapping
			
 
				 
			
 
				-def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm,
			
 
				+def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm, callchain,
			
 
				 			      nr, uaddr, op, val, utime, uaddr2, val3):
			
 
				 	cmd = op & FUTEX_CMD_MASK
			
 
				 	if cmd != FUTEX_WAIT:
			
@@ -31,7 +31,7 @@ def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm,
 
				 	thread_thislock[tid] = uaddr
			
 
				 	thread_blocktime[tid] = nsecs(s, ns)
			
 
				 
			
 
				-def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm,
			
 
				+def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm, callchain,
			
 
				 			     nr, ret):
			
 
				 	if thread_blocktime.has_key(tid):
			
 
				 		elapsed = nsecs(s, ns) - thread_blocktime[tid]
			
--- a/tools/perf/scripts/python/net_dropmonitor.py
+++ b/tools/perf/scripts/python/net_dropmonitor.py
@@ -66,7 +66,7 @@ def trace_end():
 
				 	print_drop_table()
			
 
				 
			
 
				 # called from perf, when it finds a correspoinding event
			
 
				-def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm,
			
 
				+def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, callchain,
			
 
				 		   skbaddr, location, protocol):
			
 
				 	slocation = str(location)
			
 
				 	try:
			
--- a/tools/perf/scripts/python/netdev-times.py
+++ b/tools/perf/scripts/python/netdev-times.py
@@ -224,75 +224,75 @@ def trace_end():
 
				 			(len(rx_skb_list), of_count_rx_skb_list)
			
 
				 
			
 
				 # called from perf, when it finds a correspoinding event
			
 
				-def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, vec):
			
 
				+def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, callchain, vec):
			
 
				 	if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
			
 
				 		return
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
			
 
				 	all_event_list.append(event_info)
			
 
				 
			
 
				-def irq__softirq_exit(name, context, cpu, sec, nsec, pid, comm, vec):
			
 
				+def irq__softirq_exit(name, context, cpu, sec, nsec, pid, comm, callchain, vec):
			
 
				 	if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
			
 
				 		return
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
			
 
				 	all_event_list.append(event_info)
			
 
				 
			
 
				-def irq__softirq_raise(name, context, cpu, sec, nsec, pid, comm, vec):
			
 
				+def irq__softirq_raise(name, context, cpu, sec, nsec, pid, comm, callchain, vec):
			
 
				 	if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
			
 
				 		return
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
			
 
				 	all_event_list.append(event_info)
			
 
				 
			
 
				 def irq__irq_handler_entry(name, context, cpu, sec, nsec, pid, comm,
			
 
				-			irq, irq_name):
			
 
				+			callchain, irq, irq_name):
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
			
 
				 			irq, irq_name)
			
 
				 	all_event_list.append(event_info)
			
 
				 
			
 
				-def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, irq, ret):
			
 
				+def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, callchain, irq, ret):
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret)
			
 
				 	all_event_list.append(event_info)
			
 
				 
			
 
				-def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, napi, dev_name):
			
 
				+def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi, dev_name):
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
			
 
				 			napi, dev_name)
			
 
				 	all_event_list.append(event_info)
			
 
				 
			
 
				-def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr,
			
 
				+def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr,
			
 
				 			skblen, dev_name):
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
			
 
				 			skbaddr, skblen, dev_name)
			
 
				 	all_event_list.append(event_info)
			
 
				 
			
 
				-def net__netif_rx(name, context, cpu, sec, nsec, pid, comm, skbaddr,
			
 
				+def net__netif_rx(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr,
			
 
				 			skblen, dev_name):
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
			
 
				 			skbaddr, skblen, dev_name)
			
 
				 	all_event_list.append(event_info)
			
 
				 
			
 
				-def net__net_dev_queue(name, context, cpu, sec, nsec, pid, comm,
			
 
				+def net__net_dev_queue(name, context, cpu, sec, nsec, pid, comm, callchain,
			
 
				 			skbaddr, skblen, dev_name):
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
			
 
				 			skbaddr, skblen, dev_name)
			
 
				 	all_event_list.append(event_info)
			
 
				 
			
 
				-def net__net_dev_xmit(name, context, cpu, sec, nsec, pid, comm,
			
 
				+def net__net_dev_xmit(name, context, cpu, sec, nsec, pid, comm, callchain,
			
 
				 			skbaddr, skblen, rc, dev_name):
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
			
 
				 			skbaddr, skblen, rc ,dev_name)
			
 
				 	all_event_list.append(event_info)
			
 
				 
			
 
				-def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm,
			
 
				+def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, callchain,
			
 
				 			skbaddr, protocol, location):
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
			
 
				 			skbaddr, protocol, location)
			
 
				 	all_event_list.append(event_info)
			
 
				 
			
 
				-def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr):
			
 
				+def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr):
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
			
 
				 			skbaddr)
			
 
				 	all_event_list.append(event_info)
			
 
				 
			
 
				-def skb__skb_copy_datagram_iovec(name, context, cpu, sec, nsec, pid, comm,
			
 
				+def skb__skb_copy_datagram_iovec(name, context, cpu, sec, nsec, pid, comm, callchain,
			
 
				 	skbaddr, skblen):
			
 
				 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
			
 
				 			skbaddr, skblen)
			
--- a/tools/perf/scripts/python/sched-migration.py
+++ b/tools/perf/scripts/python/sched-migration.py
@@ -369,93 +369,92 @@ def trace_end():
 
				 
			
 
				 def sched__sched_stat_runtime(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	comm, pid, runtime, vruntime):
			
 
				+	common_callchain, comm, pid, runtime, vruntime):
			
 
				 	pass
			
 
				 
			
 
				 def sched__sched_stat_iowait(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	comm, pid, delay):
			
 
				+	common_callchain, comm, pid, delay):
			
 
				 	pass
			
 
				 
			
 
				 def sched__sched_stat_sleep(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	comm, pid, delay):
			
 
				+	common_callchain, comm, pid, delay):
			
 
				 	pass
			
 
				 
			
 
				 def sched__sched_stat_wait(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	comm, pid, delay):
			
 
				+	common_callchain, comm, pid, delay):
			
 
				 	pass
			
 
				 
			
 
				 def sched__sched_process_fork(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	parent_comm, parent_pid, child_comm, child_pid):
			
 
				+	common_callchain, parent_comm, parent_pid, child_comm, child_pid):
			
 
				 	pass
			
 
				 
			
 
				 def sched__sched_process_wait(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	comm, pid, prio):
			
 
				+	common_callchain, comm, pid, prio):
			
 
				 	pass
			
 
				 
			
 
				 def sched__sched_process_exit(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	comm, pid, prio):
			
 
				+	common_callchain, comm, pid, prio):
			
 
				 	pass
			
 
				 
			
 
				 def sched__sched_process_free(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	comm, pid, prio):
			
 
				+	common_callchain, comm, pid, prio):
			
 
				 	pass
			
 
				 
			
 
				 def sched__sched_migrate_task(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	comm, pid, prio, orig_cpu,
			
 
				+	common_callchain, comm, pid, prio, orig_cpu,
			
 
				 	dest_cpu):
			
 
				 	headers = EventHeaders(common_cpu, common_secs, common_nsecs,
			
 
				-				common_pid, common_comm)
			
 
				+				common_pid, common_comm, common_callchain)
			
 
				 	parser.migrate(headers, pid, prio, orig_cpu, dest_cpu)
			
 
				 
			
 
				 def sched__sched_switch(event_name, context, common_cpu,
			
 
				-	common_secs, common_nsecs, common_pid, common_comm,
			
 
				+	common_secs, common_nsecs, common_pid, common_comm, common_callchain,
			
 
				 	prev_comm, prev_pid, prev_prio, prev_state,
			
 
				 	next_comm, next_pid, next_prio):
			
 
				 
			
 
				 	headers = EventHeaders(common_cpu, common_secs, common_nsecs,
			
 
				-				common_pid, common_comm)
			
 
				+				common_pid, common_comm, common_callchain)
			
 
				 	parser.sched_switch(headers, prev_comm, prev_pid, prev_prio, prev_state,
			
 
				 			 next_comm, next_pid, next_prio)
			
 
				 
			
 
				 def sched__sched_wakeup_new(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	comm, pid, prio, success,
			
 
				+	common_callchain, comm, pid, prio, success,
			
 
				 	target_cpu):
			
 
				 	headers = EventHeaders(common_cpu, common_secs, common_nsecs,
			
 
				-				common_pid, common_comm)
			
 
				+				common_pid, common_comm, common_callchain)
			
 
				 	parser.wake_up(headers, comm, pid, success, target_cpu, 1)
			
 
				 
			
 
				 def sched__sched_wakeup(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	comm, pid, prio, success,
			
 
				+	common_callchain, comm, pid, prio, success,
			
 
				 	target_cpu):
			
 
				 	headers = EventHeaders(common_cpu, common_secs, common_nsecs,
			
 
				-				common_pid, common_comm)
			
 
				+				common_pid, common_comm, common_callchain)
			
 
				 	parser.wake_up(headers, comm, pid, success, target_cpu, 0)
			
 
				 
			
 
				 def sched__sched_wait_task(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	comm, pid, prio):
			
 
				+	common_callchain, comm, pid, prio):
			
 
				 	pass
			
 
				 
			
 
				 def sched__sched_kthread_stop_ret(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	ret):
			
 
				+	common_callchain, ret):
			
 
				 	pass
			
 
				 
			
 
				 def sched__sched_kthread_stop(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	comm, pid):
			
 
				+	common_callchain, comm, pid):
			
 
				 	pass
			
 
				 
			
 
				-def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs,
			
 
				-		common_pid, common_comm):
			
 
				+def trace_unhandled(event_name, context, event_fields_dict):
			
 
				 	pass
			
--- a/tools/perf/scripts/python/sctop.py
+++ b/tools/perf/scripts/python/sctop.py
@@ -44,7 +44,7 @@ def trace_begin():
 
				 
			
 
				 def raw_syscalls__sys_enter(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	id, args):
			
 
				+	common_callchain, id, args):
			
 
				 	if for_comm is not None:
			
 
				 		if common_comm != for_comm:
			
 
				 			return
			
@@ -53,6 +53,11 @@ def raw_syscalls__sys_enter(event_name, context, common_cpu,
 
				 	except TypeError:
			
 
				 		syscalls[id] = 1
			
 
				 
			
 
				+def syscalls__sys_enter(event_name, context, common_cpu,
			
 
				+	common_secs, common_nsecs, common_pid, common_comm,
			
 
				+	id, args):
			
 
				+	raw_syscalls__sys_enter(**locals())
			
 
				+
			
 
				 def print_syscall_totals(interval):
			
 
				 	while 1:
			
 
				 		clear_term()
			
--- a/tools/perf/scripts/python/syscall-counts-by-pid.py
+++ b/tools/perf/scripts/python/syscall-counts-by-pid.py
@@ -38,7 +38,7 @@ def trace_end():
 
				 
			
 
				 def raw_syscalls__sys_enter(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	id, args):
			
 
				+	common_callchain, id, args):
			
 
				 
			
 
				 	if (for_comm and common_comm != for_comm) or \
			
 
				 	   (for_pid  and common_pid  != for_pid ):
			
@@ -48,6 +48,11 @@ def raw_syscalls__sys_enter(event_name, context, common_cpu,
 
				 	except TypeError:
			
 
				 		syscalls[common_comm][common_pid][id] = 1
			
 
				 
			
 
				+def syscalls__sys_enter(event_name, context, common_cpu,
			
 
				+	common_secs, common_nsecs, common_pid, common_comm,
			
 
				+	id, args):
			
 
				+	raw_syscalls__sys_enter(**locals())
			
 
				+
			
 
				 def print_syscall_totals():
			
 
				     if for_comm is not None:
			
 
				 	    print "\nsyscall events for %s:\n\n" % (for_comm),
			
--- a/tools/perf/scripts/python/syscall-counts.py
+++ b/tools/perf/scripts/python/syscall-counts.py
@@ -35,7 +35,7 @@ def trace_end():
 
				 
			
 
				 def raw_syscalls__sys_enter(event_name, context, common_cpu,
			
 
				 	common_secs, common_nsecs, common_pid, common_comm,
			
 
				-	id, args):
			
 
				+	common_callchain, id, args):
			
 
				 	if for_comm is not None:
			
 
				 		if common_comm != for_comm:
			
 
				 			return
			
@@ -44,6 +44,11 @@ def raw_syscalls__sys_enter(event_name, context, common_cpu,
 
				 	except TypeError:
			
 
				 		syscalls[id] = 1
			
 
				 
			
 
				+def syscalls__sys_enter(event_name, context, common_cpu,
			
 
				+	common_secs, common_nsecs, common_pid, common_comm,
			
 
				+	id, args):
			
 
				+	raw_syscalls__sys_enter(**locals())
			
 
				+
			
 
				 def print_syscall_totals():
			
 
				     if for_comm is not None:
			
 
				 	    print "\nsyscall events for %s:\n\n" % (for_comm),
			
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -1,7 +1,8 @@
 
				 [event]
			
 
				 fd=1
			
 
				 group_fd=-1
			
 
				-flags=0
			
 
				+# 0 or PERF_FLAG_FD_CLOEXEC flag
			
 
				+flags=0|8
			
 
				 cpu=*
			
 
				 type=0|1
			
 
				 size=96
			
--- a/tools/perf/tests/attr/base-stat
+++ b/tools/perf/tests/attr/base-stat
@@ -1,7 +1,8 @@
 
				 [event]
			
 
				 fd=1
			
 
				 group_fd=-1
			
 
				-flags=0
			
 
				+# 0 or PERF_FLAG_FD_CLOEXEC flag
			
 
				+flags=0|8
			
 
				 cpu=*
			
 
				 type=0
			
 
				 size=96
			
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -25,6 +25,7 @@
 
				 #include "tests.h"
			
 
				 #include "debug.h"
			
 
				 #include "perf.h"
			
 
				+#include "cloexec.h"
			
 
				 
			
 
				 static int fd1;
			
 
				 static int fd2;
			
@@ -78,7 +79,8 @@ static int bp_event(void *fn, int setup_signal)
 
				 	pe.exclude_kernel = 1;
			
 
				 	pe.exclude_hv = 1;
			
 
				 
			
 
				-	fd = sys_perf_event_open(&pe, 0, -1, -1, 0);
			
 
				+	fd = sys_perf_event_open(&pe, 0, -1, -1,
			
 
				+				 perf_event_open_cloexec_flag());
			
 
				 	if (fd < 0) {
			
 
				 		pr_debug("failed opening event %llx\n", pe.config);
			
 
				 		return TEST_FAIL;
			
--- a/tools/perf/tests/bp_signal_overflow.c
+++ b/tools/perf/tests/bp_signal_overflow.c
@@ -24,6 +24,7 @@
 
				 #include "tests.h"
			
 
				 #include "debug.h"
			
 
				 #include "perf.h"
			
 
				+#include "cloexec.h"
			
 
				 
			
 
				 static int overflows;
			
 
				 
			
@@ -91,7 +92,8 @@ int test__bp_signal_overflow(void)
 
				 	pe.exclude_kernel = 1;
			
 
				 	pe.exclude_hv = 1;
			
 
				 
			
 
				-	fd = sys_perf_event_open(&pe, 0, -1, -1, 0);
			
 
				+	fd = sys_perf_event_open(&pe, 0, -1, -1,
			
 
				+				 perf_event_open_cloexec_flag());
			
 
				 	if (fd < 0) {
			
 
				 		pr_debug("failed opening event %llx\n", pe.config);
			
 
				 		return TEST_FAIL;
			
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -10,6 +10,7 @@
 
				 #include "machine.h"
			
 
				 #include "symbol.h"
			
 
				 #include "tests.h"
			
 
				+#include "debug.h"
			
 
				 
			
 
				 static char *test_file(int size)
			
 
				 {
			
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -2,6 +2,7 @@
 
				 #include "evsel.h"
			
 
				 #include "parse-events.h"
			
 
				 #include "tests.h"
			
 
				+#include "debug.h"
			
 
				 
			
 
				 static int perf_evsel__roundtrip_cache_name_test(void)
			
 
				 {
			
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -1,6 +1,7 @@
 
				 #include <traceevent/event-parse.h>
			
 
				 #include "evsel.h"
			
 
				 #include "tests.h"
			
 
				+#include "debug.h"
			
 
				 
			
 
				 static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
			
 
				 				  int size, bool should_be_signed)
			
--- a/tools/perf/tests/open-syscall-tp-fields.c
+++ b/tools/perf/tests/open-syscall-tp-fields.c
@@ -3,6 +3,7 @@
 
				 #include "evsel.h"
			
 
				 #include "thread_map.h"
			
 
				 #include "tests.h"
			
 
				+#include "debug.h"
			
 
				 
			
 
				 int test__syscall_open_tp_fields(void)
			
 
				 {
			
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -5,6 +5,7 @@
 
				 #include <api/fs/fs.h>
			
 
				 #include <api/fs/debugfs.h>
			
 
				 #include "tests.h"
			
 
				+#include "debug.h"
			
 
				 #include <linux/hw_breakpoint.h>
			
 
				 
			
 
				 #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \
			
--- a/tools/perf/tests/parse-no-sample-id-all.c
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -7,6 +7,7 @@
 
				 #include "evlist.h"
			
 
				 #include "header.h"
			
 
				 #include "util.h"
			
 
				+#include "debug.h"
			
 
				 
			
 
				 static int process_event(struct perf_evlist **pevlist, union perf_event *event)
			
 
				 {
			
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -8,10 +8,9 @@
 
				 #include "evsel.h"
			
 
				 #include "thread_map.h"
			
 
				 #include "cpumap.h"
			
 
				+#include "tsc.h"
			
 
				 #include "tests.h"
			
 
				 
			
 
				-#include "../arch/x86/util/tsc.h"
			
 
				-
			
 
				 #define CHECK__(x) {				\
			
 
				 	while ((x) < 0) {			\
			
 
				 		pr_debug(#x " failed!\n");	\
			
@@ -26,15 +25,6 @@
 
				 	}					\
			
 
				 }
			
 
				 
			
 
				-static u64 rdtsc(void)
			
 
				-{
			
 
				-	unsigned int low, high;
			
 
				-
			
 
				-	asm volatile("rdtsc" : "=a" (low), "=d" (high));
			
 
				-
			
 
				-	return low | ((u64)high) << 32;
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * test__perf_time_to_tsc - test converting perf time to TSC.
			
 
				  *
			
--- a/tools/perf/tests/rdpmc.c
+++ b/tools/perf/tests/rdpmc.c
@@ -6,6 +6,7 @@
 
				 #include "perf.h"
			
 
				 #include "debug.h"
			
 
				 #include "tests.h"
			
 
				+#include "cloexec.h"
			
 
				 
			
 
				 #if defined(__x86_64__) || defined(__i386__)
			
 
				 
			
@@ -104,7 +105,8 @@ static int __test__rdpmc(void)
 
				 	sa.sa_sigaction = segfault_handler;
			
 
				 	sigaction(SIGSEGV, &sa, NULL);
			
 
				 
			
 
				-	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
			
 
				+	fd = sys_perf_event_open(&attr, 0, -1, -1,
			
 
				+				 perf_event_open_cloexec_flag());
			
 
				 	if (fd < 0) {
			
 
				 		pr_err("Error: sys_perf_event_open() syscall returned "
			
 
				 		       "with %d (%s)\n", fd, strerror(errno));
			
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -4,6 +4,7 @@
 
				 #include "util.h"
			
 
				 #include "event.h"
			
 
				 #include "evsel.h"
			
 
				+#include "debug.h"
			
 
				 
			
 
				 #include "tests.h"
			
 
				 
			
--- a/tools/perf/tests/thread-mg-share.c
+++ b/tools/perf/tests/thread-mg-share.c
@@ -2,6 +2,7 @@
 
				 #include "machine.h"
			
 
				 #include "thread.h"
			
 
				 #include "map.h"
			
 
				+#include "debug.h"
			
 
				 
			
 
				 int test__thread_mg_share(void)
			
 
				 {
			
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -150,7 +150,7 @@ unsigned int ui_browser__rb_tree_refresh(struct ui_browser *browser)
 
				 	while (nd != NULL) {
			
 
				 		ui_browser__gotorc(browser, row, 0);
			
 
				 		browser->write(browser, nd, row);
			
 
				-		if (++row == browser->height)
			
 
				+		if (++row == browser->rows)
			
 
				 			break;
			
 
				 		nd = rb_next(nd);
			
 
				 	}
			
@@ -166,7 +166,7 @@ bool ui_browser__is_current_entry(struct ui_browser *browser, unsigned row)
 
				 void ui_browser__refresh_dimensions(struct ui_browser *browser)
			
 
				 {
			
 
				 	browser->width = SLtt_Screen_Cols - 1;
			
 
				-	browser->height = SLtt_Screen_Rows - 2;
			
 
				+	browser->height = browser->rows = SLtt_Screen_Rows - 2;
			
 
				 	browser->y = 1;
			
 
				 	browser->x = 0;
			
 
				 }
			
@@ -250,7 +250,10 @@ int ui_browser__show(struct ui_browser *browser, const char *title,
 
				 	int err;
			
 
				 	va_list ap;
			
 
				 
			
 
				-	ui_browser__refresh_dimensions(browser);
			
 
				+	if (browser->refresh_dimensions == NULL)
			
 
				+		browser->refresh_dimensions = ui_browser__refresh_dimensions;
			
 
				+
			
 
				+	browser->refresh_dimensions(browser);
			
 
				 
			
 
				 	pthread_mutex_lock(&ui__lock);
			
 
				 	__ui_browser__show_title(browser, title);
			
@@ -279,7 +282,7 @@ static void ui_browser__scrollbar_set(struct ui_browser *browser)
 
				 {
			
 
				 	int height = browser->height, h = 0, pct = 0,
			
 
				 	    col = browser->width,
			
 
				-	    row = browser->y - 1;
			
 
				+	    row = 0;
			
 
				 
			
 
				 	if (browser->nr_entries > 1) {
			
 
				 		pct = ((browser->index * (browser->height - 1)) /
			
@@ -367,7 +370,7 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs)
 
				 
			
 
				 		if (key == K_RESIZE) {
			
 
				 			ui__refresh_dimensions(false);
			
 
				-			ui_browser__refresh_dimensions(browser);
			
 
				+			browser->refresh_dimensions(browser);
			
 
				 			__ui_browser__show_title(browser, browser->title);
			
 
				 			ui_helpline__puts(browser->helpline);
			
 
				 			continue;
			
@@ -389,7 +392,7 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs)
 
				 			if (browser->index == browser->nr_entries - 1)
			
 
				 				break;
			
 
				 			++browser->index;
			
 
				-			if (browser->index == browser->top_idx + browser->height) {
			
 
				+			if (browser->index == browser->top_idx + browser->rows) {
			
 
				 				++browser->top_idx;
			
 
				 				browser->seek(browser, +1, SEEK_CUR);
			
 
				 			}
			
@@ -405,10 +408,10 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs)
 
				 			break;
			
 
				 		case K_PGDN:
			
 
				 		case ' ':
			
 
				-			if (browser->top_idx + browser->height > browser->nr_entries - 1)
			
 
				+			if (browser->top_idx + browser->rows > browser->nr_entries - 1)
			
 
				 				break;
			
 
				 
			
 
				-			offset = browser->height;
			
 
				+			offset = browser->rows;
			
 
				 			if (browser->index + offset > browser->nr_entries - 1)
			
 
				 				offset = browser->nr_entries - 1 - browser->index;
			
 
				 			browser->index += offset;
			
@@ -419,10 +422,10 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs)
 
				 			if (browser->top_idx == 0)
			
 
				 				break;
			
 
				 
			
 
				-			if (browser->top_idx < browser->height)
			
 
				+			if (browser->top_idx < browser->rows)
			
 
				 				offset = browser->top_idx;
			
 
				 			else
			
 
				-				offset = browser->height;
			
 
				+				offset = browser->rows;
			
 
				 
			
 
				 			browser->index -= offset;
			
 
				 			browser->top_idx -= offset;
			
@@ -432,7 +435,7 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs)
 
				 			ui_browser__reset_index(browser);
			
 
				 			break;
			
 
				 		case K_END:
			
 
				-			offset = browser->height - 1;
			
 
				+			offset = browser->rows - 1;
			
 
				 			if (offset >= browser->nr_entries)
			
 
				 				offset = browser->nr_entries - 1;
			
 
				 
			
@@ -462,7 +465,7 @@ unsigned int ui_browser__list_head_refresh(struct ui_browser *browser)
 
				 		if (!browser->filter || !browser->filter(browser, pos)) {
			
 
				 			ui_browser__gotorc(browser, row, 0);
			
 
				 			browser->write(browser, pos, row);
			
 
				-			if (++row == browser->height)
			
 
				+			if (++row == browser->rows)
			
 
				 				break;
			
 
				 		}
			
 
				 	}
			
@@ -587,7 +590,7 @@ unsigned int ui_browser__argv_refresh(struct ui_browser *browser)
 
				 		if (!browser->filter || !browser->filter(browser, *pos)) {
			
 
				 			ui_browser__gotorc(browser, row, 0);
			
 
				 			browser->write(browser, pos, row);
			
 
				-			if (++row == browser->height)
			
 
				+			if (++row == browser->rows)
			
 
				 				break;
			
 
				 		}
			
 
				 
			
@@ -623,7 +626,7 @@ static void __ui_browser__line_arrow_up(struct ui_browser *browser,
 
				 
			
 
				 	SLsmg_set_char_set(1);
			
 
				 
			
 
				-	if (start < browser->top_idx + browser->height) {
			
 
				+	if (start < browser->top_idx + browser->rows) {
			
 
				 		row = start - browser->top_idx;
			
 
				 		ui_browser__gotorc(browser, row, column);
			
 
				 		SLsmg_write_char(SLSMG_LLCORN_CHAR);
			
@@ -633,7 +636,7 @@ static void __ui_browser__line_arrow_up(struct ui_browser *browser,
 
				 		if (row-- == 0)
			
 
				 			goto out;
			
 
				 	} else
			
 
				-		row = browser->height - 1;
			
 
				+		row = browser->rows - 1;
			
 
				 
			
 
				 	if (end > browser->top_idx)
			
 
				 		end_row = end - browser->top_idx;
			
@@ -675,8 +678,8 @@ static void __ui_browser__line_arrow_down(struct ui_browser *browser,
 
				 	} else
			
 
				 		row = 0;
			
 
				 
			
 
				-	if (end >= browser->top_idx + browser->height)
			
 
				-		end_row = browser->height - 1;
			
 
				+	if (end >= browser->top_idx + browser->rows)
			
 
				+		end_row = browser->rows - 1;
			
 
				 	else
			
 
				 		end_row = end - browser->top_idx;
			
 
				 
			
@@ -684,7 +687,7 @@ static void __ui_browser__line_arrow_down(struct ui_browser *browser,
 
				 	SLsmg_draw_vline(end_row - row + 1);
			
 
				 
			
 
				 	ui_browser__gotorc(browser, end_row, column);
			
 
				-	if (end < browser->top_idx + browser->height) {
			
 
				+	if (end < browser->top_idx + browser->rows) {
			
 
				 		SLsmg_write_char(SLSMG_LLCORN_CHAR);
			
 
				 		ui_browser__gotorc(browser, end_row, column + 1);
			
 
				 		SLsmg_write_char(SLSMG_HLINE_CHAR);
			
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -14,11 +14,12 @@
 
				 struct ui_browser {
			
 
				 	u64	      index, top_idx;
			
 
				 	void	      *top, *entries;
			
 
				-	u16	      y, x, width, height;
			
 
				+	u16	      y, x, width, height, rows;
			
 
				 	int	      current_color;
			
 
				 	void	      *priv;
			
 
				 	const char    *title;
			
 
				 	char	      *helpline;
			
 
				+	void 	      (*refresh_dimensions)(struct ui_browser *browser);
			
 
				 	unsigned int  (*refresh)(struct ui_browser *browser);
			
 
				 	void	      (*write)(struct ui_browser *browser, void *entry, int row);
			
 
				 	void	      (*seek)(struct ui_browser *browser, off_t offset, int whence);
			
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -26,6 +26,7 @@ struct hist_browser {
 
				 	struct map_symbol   *selection;
			
 
				 	int		     print_seq;
			
 
				 	bool		     show_dso;
			
 
				+	bool		     show_headers;
			
 
				 	float		     min_pcnt;
			
 
				 	u64		     nr_non_filtered_entries;
			
 
				 	u64		     nr_callchain_rows;
			
@@ -33,8 +34,7 @@ struct hist_browser {
 
				 
			
 
				 extern void hist_browser__init_hpp(void);
			
 
				 
			
 
				-static int hists__browser_title(struct hists *hists, char *bf, size_t size,
			
 
				-				const char *ev_name);
			
 
				+static int hists__browser_title(struct hists *hists, char *bf, size_t size);
			
 
				 static void hist_browser__update_nr_entries(struct hist_browser *hb);
			
 
				 
			
 
				 static struct rb_node *hists__filter_entries(struct rb_node *nd,
			
@@ -57,11 +57,42 @@ static u32 hist_browser__nr_entries(struct hist_browser *hb)
 
				 	return nr_entries + hb->nr_callchain_rows;
			
 
				 }
			
 
				 
			
 
				-static void hist_browser__refresh_dimensions(struct hist_browser *browser)
			
 
				+static void hist_browser__update_rows(struct hist_browser *hb)
			
 
				 {
			
 
				+	struct ui_browser *browser = &hb->b;
			
 
				+	u16 header_offset = hb->show_headers ? 1 : 0, index_row;
			
 
				+
			
 
				+	browser->rows = browser->height - header_offset;
			
 
				+	/*
			
 
				+	 * Verify if we were at the last line and that line isn't
			
 
				+	 * visibe because we now show the header line(s).
			
 
				+	 */
			
 
				+	index_row = browser->index - browser->top_idx;
			
 
				+	if (index_row >= browser->rows)
			
 
				+		browser->index -= index_row - browser->rows + 1;
			
 
				+}
			
 
				+
			
 
				+static void hist_browser__refresh_dimensions(struct ui_browser *browser)
			
 
				+{
			
 
				+	struct hist_browser *hb = container_of(browser, struct hist_browser, b);
			
 
				+
			
 
				 	/* 3 == +/- toggle symbol before actual hist_entry rendering */
			
 
				-	browser->b.width = 3 + (hists__sort_list_width(browser->hists) +
			
 
				-			     sizeof("[k]"));
			
 
				+	browser->width = 3 + (hists__sort_list_width(hb->hists) + sizeof("[k]"));
			
 
				+	/*
			
 
				+ 	 * FIXME: Just keeping existing behaviour, but this really should be
			
 
				+ 	 *	  before updating browser->width, as it will invalidate the
			
 
				+ 	 *	  calculation above. Fix this and the fallout in another
			
 
				+ 	 *	  changeset.
			
 
				+ 	 */
			
 
				+	ui_browser__refresh_dimensions(browser);
			
 
				+	hist_browser__update_rows(hb);
			
 
				+}
			
 
				+
			
 
				+static void hist_browser__gotorc(struct hist_browser *browser, int row, int column)
			
 
				+{
			
 
				+	u16 header_offset = browser->show_headers ? 1 : 0;
			
 
				+
			
 
				+	ui_browser__gotorc(&browser->b, row + header_offset, column);
			
 
				 }
			
 
				 
			
 
				 static void hist_browser__reset(struct hist_browser *browser)
			
@@ -74,7 +105,7 @@ static void hist_browser__reset(struct hist_browser *browser)
 
				 
			
 
				 	hist_browser__update_nr_entries(browser);
			
 
				 	browser->b.nr_entries = hist_browser__nr_entries(browser);
			
 
				-	hist_browser__refresh_dimensions(browser);
			
 
				+	hist_browser__refresh_dimensions(&browser->b);
			
 
				 	ui_browser__reset_index(&browser->b);
			
 
				 }
			
 
				 
			
@@ -346,7 +377,7 @@ static void ui_browser__warn_lost_events(struct ui_browser *browser)
 
				 		"Or reduce the sampling frequency.");
			
 
				 }
			
 
				 
			
 
				-static int hist_browser__run(struct hist_browser *browser, const char *ev_name,
			
 
				+static int hist_browser__run(struct hist_browser *browser,
			
 
				 			     struct hist_browser_timer *hbt)
			
 
				 {
			
 
				 	int key;
			
@@ -356,8 +387,7 @@ static int hist_browser__run(struct hist_browser *browser, const char *ev_name,
 
				 	browser->b.entries = &browser->hists->entries;
			
 
				 	browser->b.nr_entries = hist_browser__nr_entries(browser);
			
 
				 
			
 
				-	hist_browser__refresh_dimensions(browser);
			
 
				-	hists__browser_title(browser->hists, title, sizeof(title), ev_name);
			
 
				+	hists__browser_title(browser->hists, title, sizeof(title));
			
 
				 
			
 
				 	if (ui_browser__show(&browser->b, title,
			
 
				 			     "Press '?' for help on key bindings") < 0)
			
@@ -384,7 +414,7 @@ static int hist_browser__run(struct hist_browser *browser, const char *ev_name,
 
				 				ui_browser__warn_lost_events(&browser->b);
			
 
				 			}
			
 
				 
			
 
				-			hists__browser_title(browser->hists, title, sizeof(title), ev_name);
			
 
				+			hists__browser_title(browser->hists, title, sizeof(title));
			
 
				 			ui_browser__show_title(&browser->b, title);
			
 
				 			continue;
			
 
				 		}
			
@@ -393,10 +423,10 @@ static int hist_browser__run(struct hist_browser *browser, const char *ev_name,
 
				 			struct hist_entry *h = rb_entry(browser->b.top,
			
 
				 							struct hist_entry, rb_node);
			
 
				 			ui_helpline__pop();
			
 
				-			ui_helpline__fpush("%d: nr_ent=(%d,%d), height=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d",
			
 
				+			ui_helpline__fpush("%d: nr_ent=(%d,%d), rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d",
			
 
				 					   seq++, browser->b.nr_entries,
			
 
				 					   browser->hists->nr_entries,
			
 
				-					   browser->b.height,
			
 
				+					   browser->b.rows,
			
 
				 					   browser->b.index,
			
 
				 					   browser->b.top_idx,
			
 
				 					   h->row_offset, h->nr_rows);
			
@@ -410,6 +440,10 @@ static int hist_browser__run(struct hist_browser *browser, const char *ev_name,
 
				 			/* Expand the whole world. */
			
 
				 			hist_browser__set_folding(browser, true);
			
 
				 			break;
			
 
				+		case 'H':
			
 
				+			browser->show_headers = !browser->show_headers;
			
 
				+			hist_browser__update_rows(browser);
			
 
				+			break;
			
 
				 		case K_ENTER:
			
 
				 			if (hist_browser__toggle_fold(browser))
			
 
				 				break;
			
@@ -509,13 +543,13 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *browse
 
				 			}
			
 
				 
			
 
				 			ui_browser__set_color(&browser->b, color);
			
 
				-			ui_browser__gotorc(&browser->b, row, 0);
			
 
				+			hist_browser__gotorc(browser, row, 0);
			
 
				 			slsmg_write_nstring(" ", offset + extra_offset);
			
 
				 			slsmg_printf("%c ", folded_sign);
			
 
				 			slsmg_write_nstring(str, width);
			
 
				 			free(alloc_str);
			
 
				 
			
 
				-			if (++row == browser->b.height)
			
 
				+			if (++row == browser->b.rows)
			
 
				 				goto out;
			
 
				 do_next:
			
 
				 			if (folded_sign == '+')
			
@@ -528,7 +562,7 @@ do_next:
 
				 									 new_level, row, row_offset,
			
 
				 									 is_current_entry);
			
 
				 		}
			
 
				-		if (row == browser->b.height)
			
 
				+		if (row == browser->b.rows)
			
 
				 			goto out;
			
 
				 		node = next;
			
 
				 	}
			
@@ -568,13 +602,13 @@ static int hist_browser__show_callchain_node(struct hist_browser *browser,
 
				 
			
 
				 		s = callchain_list__sym_name(chain, bf, sizeof(bf),
			
 
				 					     browser->show_dso);
			
 
				-		ui_browser__gotorc(&browser->b, row, 0);
			
 
				+		hist_browser__gotorc(browser, row, 0);
			
 
				 		ui_browser__set_color(&browser->b, color);
			
 
				 		slsmg_write_nstring(" ", offset);
			
 
				 		slsmg_printf("%c ", folded_sign);
			
 
				 		slsmg_write_nstring(s, width - 2);
			
 
				 
			
 
				-		if (++row == browser->b.height)
			
 
				+		if (++row == browser->b.rows)
			
 
				 			goto out;
			
 
				 	}
			
 
				 
			
@@ -603,7 +637,7 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
 
				 		row += hist_browser__show_callchain_node(browser, node, level,
			
 
				 							 row, row_offset,
			
 
				 							 is_current_entry);
			
 
				-		if (row == browser->b.height)
			
 
				+		if (row == browser->b.rows)
			
 
				 			break;
			
 
				 	}
			
 
				 
			
@@ -733,7 +767,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
 
				 			.ptr		= &arg,
			
 
				 		};
			
 
				 
			
 
				-		ui_browser__gotorc(&browser->b, row, 0);
			
 
				+		hist_browser__gotorc(browser, row, 0);
			
 
				 
			
 
				 		perf_hpp__for_each_format(fmt) {
			
 
				 			if (perf_hpp__should_skip(fmt))
			
@@ -777,7 +811,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
 
				 	} else
			
 
				 		--row_offset;
			
 
				 
			
 
				-	if (folded_sign == '-' && row != browser->b.height) {
			
 
				+	if (folded_sign == '-' && row != browser->b.rows) {
			
 
				 		printed += hist_browser__show_callchain(browser, &entry->sorted_chain,
			
 
				 							1, row, &row_offset,
			
 
				 							&current_entry);
			
@@ -788,6 +822,56 @@ static int hist_browser__show_entry(struct hist_browser *browser,
 
				 	return printed;
			
 
				 }
			
 
				 
			
 
				+static int advance_hpp_check(struct perf_hpp *hpp, int inc)
			
 
				+{
			
 
				+	advance_hpp(hpp, inc);
			
 
				+	return hpp->size <= 0;
			
 
				+}
			
 
				+
			
 
				+static int hists__scnprintf_headers(char *buf, size_t size, struct hists *hists)
			
 
				+{
			
 
				+	struct perf_hpp dummy_hpp = {
			
 
				+		.buf    = buf,
			
 
				+		.size   = size,
			
 
				+	};
			
 
				+	struct perf_hpp_fmt *fmt;
			
 
				+	size_t ret = 0;
			
 
				+
			
 
				+	if (symbol_conf.use_callchain) {
			
 
				+		ret = scnprintf(buf, size, "  ");
			
 
				+		if (advance_hpp_check(&dummy_hpp, ret))
			
 
				+			return ret;
			
 
				+	}
			
 
				+
			
 
				+	perf_hpp__for_each_format(fmt) {
			
 
				+		if (perf_hpp__should_skip(fmt))
			
 
				+			continue;
			
 
				+
			
 
				+		/* We need to add the length of the columns header. */
			
 
				+		perf_hpp__reset_width(fmt, hists);
			
 
				+
			
 
				+		ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
			
 
				+		if (advance_hpp_check(&dummy_hpp, ret))
			
 
				+			break;
			
 
				+
			
 
				+		ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "  ");
			
 
				+		if (advance_hpp_check(&dummy_hpp, ret))
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static void hist_browser__show_headers(struct hist_browser *browser)
			
 
				+{
			
 
				+	char headers[1024];
			
 
				+
			
 
				+	hists__scnprintf_headers(headers, sizeof(headers), browser->hists);
			
 
				+	ui_browser__gotorc(&browser->b, 0, 0);
			
 
				+	ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
			
 
				+	slsmg_write_nstring(headers, browser->b.width + 1);
			
 
				+}
			
 
				+
			
 
				 static void ui_browser__hists_init_top(struct ui_browser *browser)
			
 
				 {
			
 
				 	if (browser->top == NULL) {
			
@@ -801,9 +885,15 @@ static void ui_browser__hists_init_top(struct ui_browser *browser)
 
				 static unsigned int hist_browser__refresh(struct ui_browser *browser)
			
 
				 {
			
 
				 	unsigned row = 0;
			
 
				+	u16 header_offset = 0;
			
 
				 	struct rb_node *nd;
			
 
				 	struct hist_browser *hb = container_of(browser, struct hist_browser, b);
			
 
				 
			
 
				+	if (hb->show_headers) {
			
 
				+		hist_browser__show_headers(hb);
			
 
				+		header_offset = 1;
			
 
				+	}
			
 
				+
			
 
				 	ui_browser__hists_init_top(browser);
			
 
				 
			
 
				 	for (nd = browser->top; nd; nd = rb_next(nd)) {
			
@@ -818,11 +908,11 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
 
				 			continue;
			
 
				 
			
 
				 		row += hist_browser__show_entry(hb, h, row);
			
 
				-		if (row == browser->height)
			
 
				+		if (row == browser->rows)
			
 
				 			break;
			
 
				 	}
			
 
				 
			
 
				-	return row;
			
 
				+	return row + header_offset;
			
 
				 }
			
 
				 
			
 
				 static struct rb_node *hists__filter_entries(struct rb_node *nd,
			
@@ -1191,8 +1281,10 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
 
				 	if (browser) {
			
 
				 		browser->hists = hists;
			
 
				 		browser->b.refresh = hist_browser__refresh;
			
 
				+		browser->b.refresh_dimensions = hist_browser__refresh_dimensions;
			
 
				 		browser->b.seek = ui_browser__hists_seek;
			
 
				 		browser->b.use_navkeypressed = true;
			
 
				+		browser->show_headers = symbol_conf.show_hist_headers;
			
 
				 	}
			
 
				 
			
 
				 	return browser;
			
@@ -1213,8 +1305,7 @@ static struct thread *hist_browser__selected_thread(struct hist_browser *browser
 
				 	return browser->he_selection->thread;
			
 
				 }
			
 
				 
			
 
				-static int hists__browser_title(struct hists *hists, char *bf, size_t size,
			
 
				-				const char *ev_name)
			
 
				+static int hists__browser_title(struct hists *hists, char *bf, size_t size)
			
 
				 {
			
 
				 	char unit;
			
 
				 	int printed;
			
@@ -1223,6 +1314,7 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
 
				 	unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
			
 
				 	u64 nr_events = hists->stats.total_period;
			
 
				 	struct perf_evsel *evsel = hists_to_evsel(hists);
			
 
				+	const char *ev_name = perf_evsel__name(evsel);
			
 
				 	char buf[512];
			
 
				 	size_t buflen = sizeof(buf);
			
 
				 
			
@@ -1390,7 +1482,7 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb)
 
				 }
			
 
				 
			
 
				 static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
			
 
				-				    const char *helpline, const char *ev_name,
			
 
				+				    const char *helpline,
			
 
				 				    bool left_exits,
			
 
				 				    struct hist_browser_timer *hbt,
			
 
				 				    float min_pcnt,
			
@@ -1422,6 +1514,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
				 	"d             Zoom into current DSO\n"				\
			
 
				 	"E             Expand all callchains\n"				\
			
 
				 	"F             Toggle percentage of filtered entries\n"		\
			
 
				+	"H             Display column headers\n"			\
			
 
				 
			
 
				 	/* help messages are sorted by lexical order of the hotkey */
			
 
				 	const char report_help[] = HIST_BROWSER_HELP_COMMON
			
@@ -1465,7 +1558,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
				 
			
 
				 		nr_options = 0;
			
 
				 
			
 
				-		key = hist_browser__run(browser, ev_name, hbt);
			
 
				+		key = hist_browser__run(browser, hbt);
			
 
				 
			
 
				 		if (browser->he_selection != NULL) {
			
 
				 			thread = hist_browser__selected_thread(browser);
			
@@ -1843,7 +1936,7 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
 
				 {
			
 
				 	struct perf_evlist *evlist = menu->b.priv;
			
 
				 	struct perf_evsel *pos;
			
 
				-	const char *ev_name, *title = "Available samples";
			
 
				+	const char *title = "Available samples";
			
 
				 	int delay_secs = hbt ? hbt->refresh : 0;
			
 
				 	int key;
			
 
				 
			
@@ -1876,9 +1969,8 @@ browse_hists:
 
				 			 */
			
 
				 			if (hbt)
			
 
				 				hbt->timer(hbt->arg);
			
 
				-			ev_name = perf_evsel__name(pos);
			
 
				 			key = perf_evsel__hists_browse(pos, nr_events, help,
			
 
				-						       ev_name, true, hbt,
			
 
				+						       true, hbt,
			
 
				 						       menu->min_pcnt,
			
 
				 						       menu->env);
			
 
				 			ui_browser__show_title(&menu->b, title);
			
@@ -1982,10 +2074,9 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
 
				 single_entry:
			
 
				 	if (nr_entries == 1) {
			
 
				 		struct perf_evsel *first = perf_evlist__first(evlist);
			
 
				-		const char *ev_name = perf_evsel__name(first);
			
 
				 
			
 
				 		return perf_evsel__hists_browse(first, nr_entries, help,
			
 
				-						ev_name, false, hbt, min_pcnt,
			
 
				+						false, hbt, min_pcnt,
			
 
				 						env);
			
 
				 	}
			
 
				 
			
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -479,7 +479,7 @@ print_entries:
 
				 
			
 
				 		if (h->ms.map == NULL && verbose > 1) {
			
 
				 			__map_groups__fprintf_maps(h->thread->mg,
			
 
				-						   MAP__FUNCTION, verbose, fp);
			
 
				+						   MAP__FUNCTION, fp);
			
 
				 			fprintf(fp, "%.10s end\n", graph_dotted_line);
			
 
				 		}
			
 
				 	}
			
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -626,7 +626,7 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
 
				 
			
 
				 int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample)
			
 
				 {
			
 
				-	if (!symbol_conf.use_callchain)
			
 
				+	if (!symbol_conf.use_callchain || sample->callchain == NULL)
			
 
				 		return 0;
			
 
				 	return callchain_append(he->callchain, &callchain_cursor, sample->period);
			
 
				 }
			
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -176,4 +176,17 @@ static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
 
				 	dest->first = src->curr;
			
 
				 	dest->nr -= src->pos;
			
 
				 }
			
 
				+
			
 
				+#ifdef HAVE_SKIP_CALLCHAIN_IDX
			
 
				+extern int arch_skip_callchain_idx(struct machine *machine,
			
 
				+			struct thread *thread, struct ip_callchain *chain);
			
 
				+#else
			
 
				+static inline int arch_skip_callchain_idx(struct machine *machine __maybe_unused,
			
 
				+			struct thread *thread __maybe_unused,
			
 
				+			struct ip_callchain *chain __maybe_unused)
			
 
				+{
			
 
				+	return -1;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 #endif	/* __PERF_CALLCHAIN_H */
			
--- a/tools/perf/util/cloexec.c
+++ b/tools/perf/util/cloexec.c
@@ -0,0 +1,57 @@
 
				+#include "util.h"
			
 
				+#include "../perf.h"
			
 
				+#include "cloexec.h"
			
 
				+#include "asm/bug.h"
			
 
				+
			
 
				+static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
			
 
				+
			
 
				+static int perf_flag_probe(void)
			
 
				+{
			
 
				+	/* use 'safest' configuration as used in perf_evsel__fallback() */
			
 
				+	struct perf_event_attr attr = {
			
 
				+		.type = PERF_COUNT_SW_CPU_CLOCK,
			
 
				+		.config = PERF_COUNT_SW_CPU_CLOCK,
			
 
				+	};
			
 
				+	int fd;
			
 
				+	int err;
			
 
				+
			
 
				+	/* check cloexec flag */
			
 
				+	fd = sys_perf_event_open(&attr, 0, -1, -1,
			
 
				+				 PERF_FLAG_FD_CLOEXEC);
			
 
				+	err = errno;
			
 
				+
			
 
				+	if (fd >= 0) {
			
 
				+		close(fd);
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	WARN_ONCE(err != EINVAL,
			
 
				+		  "perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n",
			
 
				+		  err, strerror(err));
			
 
				+
			
 
				+	/* not supported, confirm error related to PERF_FLAG_FD_CLOEXEC */
			
 
				+	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
			
 
				+	err = errno;
			
 
				+
			
 
				+	if (WARN_ONCE(fd < 0,
			
 
				+		      "perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n",
			
 
				+		      err, strerror(err)))
			
 
				+		return -1;
			
 
				+
			
 
				+	close(fd);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+unsigned long perf_event_open_cloexec_flag(void)
			
 
				+{
			
 
				+	static bool probed;
			
 
				+
			
 
				+	if (!probed) {
			
 
				+		if (perf_flag_probe() <= 0)
			
 
				+			flag = 0;
			
 
				+		probed = true;
			
 
				+	}
			
 
				+
			
 
				+	return flag;
			
 
				+}