8 роки тому · 7c8c03bfc7
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -8,8 +8,9 @@ Overview
 
				 --------
			
 
				 These events are similar to tracepoint based events. Instead of Tracepoint,
			
 
				 this is based on kprobes (kprobe and kretprobe). So it can probe wherever
			
 
				-kprobes can probe (this means, all functions body except for __kprobes
			
 
				-functions). Unlike the Tracepoint based event, this can be added and removed
			
 
				+kprobes can probe (this means, all functions except those with
			
 
				+__kprobes/nokprobe_inline annotation and those marked NOKPROBE_SYMBOL).
			
 
				+Unlike the Tracepoint based event, this can be added and removed
			
 
				 dynamically, on the fly.
			
 
				 
			
 
				 To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
			
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -11,6 +11,8 @@
 
				  * published by the Free Software Foundation.
			
 
				  */
			
 
				 
			
 
				+#define pr_fmt(fmt)	"perf/amd_iommu: " fmt
			
 
				+
			
 
				 #include <linux/perf_event.h>
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/cpumask.h>
			
@@ -21,44 +23,42 @@
 
				 
			
 
				 #define COUNTER_SHIFT		16
			
 
				 
			
 
				-#define _GET_BANK(ev)       ((u8)(ev->hw.extra_reg.reg >> 8))
			
 
				-#define _GET_CNTR(ev)       ((u8)(ev->hw.extra_reg.reg))
			
 
				+/* iommu pmu conf masks */
			
 
				+#define GET_CSOURCE(x)     ((x)->conf & 0xFFULL)
			
 
				+#define GET_DEVID(x)       (((x)->conf >> 8)  & 0xFFFFULL)
			
 
				+#define GET_DOMID(x)       (((x)->conf >> 24) & 0xFFFFULL)
			
 
				+#define GET_PASID(x)       (((x)->conf >> 40) & 0xFFFFFULL)
			
 
				 
			
 
				-/* iommu pmu config masks */
			
 
				-#define _GET_CSOURCE(ev)    ((ev->hw.config & 0xFFULL))
			
 
				-#define _GET_DEVID(ev)      ((ev->hw.config >> 8)  & 0xFFFFULL)
			
 
				-#define _GET_PASID(ev)      ((ev->hw.config >> 24) & 0xFFFFULL)
			
 
				-#define _GET_DOMID(ev)      ((ev->hw.config >> 40) & 0xFFFFULL)
			
 
				-#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config)  & 0xFFFFULL)
			
 
				-#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
			
 
				-#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
			
 
				+/* iommu pmu conf1 masks */
			
 
				+#define GET_DEVID_MASK(x)  ((x)->conf1  & 0xFFFFULL)
			
 
				+#define GET_DOMID_MASK(x)  (((x)->conf1 >> 16) & 0xFFFFULL)
			
 
				+#define GET_PASID_MASK(x)  (((x)->conf1 >> 32) & 0xFFFFFULL)
			
 
				 
			
 
				-static struct perf_amd_iommu __perf_iommu;
			
 
				+#define IOMMU_NAME_SIZE 16
			
 
				 
			
 
				 struct perf_amd_iommu {
			
 
				+	struct list_head list;
			
 
				 	struct pmu pmu;
			
 
				+	struct amd_iommu *iommu;
			
 
				+	char name[IOMMU_NAME_SIZE];
			
 
				 	u8 max_banks;
			
 
				 	u8 max_counters;
			
 
				 	u64 cntr_assign_mask;
			
 
				 	raw_spinlock_t lock;
			
 
				-	const struct attribute_group *attr_groups[4];
			
 
				 };
			
 
				 
			
 
				-#define format_group	attr_groups[0]
			
 
				-#define cpumask_group	attr_groups[1]
			
 
				-#define events_group	attr_groups[2]
			
 
				-#define null_group	attr_groups[3]
			
 
				+static LIST_HEAD(perf_amd_iommu_list);
			
 
				 
			
 
				 /*---------------------------------------------
			
 
				  * sysfs format attributes
			
 
				  *---------------------------------------------*/
			
 
				 PMU_FORMAT_ATTR(csource,    "config:0-7");
			
 
				 PMU_FORMAT_ATTR(devid,      "config:8-23");
			
 
				-PMU_FORMAT_ATTR(pasid,      "config:24-39");
			
 
				-PMU_FORMAT_ATTR(domid,      "config:40-55");
			
 
				+PMU_FORMAT_ATTR(domid,      "config:24-39");
			
 
				+PMU_FORMAT_ATTR(pasid,      "config:40-59");
			
 
				 PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
			
 
				-PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
			
 
				-PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
			
 
				+PMU_FORMAT_ATTR(domid_mask, "config1:16-31");
			
 
				+PMU_FORMAT_ATTR(pasid_mask, "config1:32-51");
			
 
				 
			
 
				 static struct attribute *iommu_format_attrs[] = {
			
 
				 	&format_attr_csource.attr,
			
@@ -79,6 +79,10 @@ static struct attribute_group amd_iommu_format_group = {
 
				 /*---------------------------------------------
			
 
				  * sysfs events attributes
			
 
				  *---------------------------------------------*/
			
 
				+static struct attribute_group amd_iommu_events_group = {
			
 
				+	.name = "events",
			
 
				+};
			
 
				+
			
 
				 struct amd_iommu_event_desc {
			
 
				 	struct kobj_attribute attr;
			
 
				 	const char *event;
			
@@ -150,30 +154,34 @@ static struct attribute_group amd_iommu_cpumask_group = {
 
				 
			
 
				 /*---------------------------------------------*/
			
 
				 
			
 
				-static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
			
 
				+static int get_next_avail_iommu_bnk_cntr(struct perf_event *event)
			
 
				 {
			
 
				+	struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu);
			
 
				+	int max_cntrs = piommu->max_counters;
			
 
				+	int max_banks = piommu->max_banks;
			
 
				+	u32 shift, bank, cntr;
			
 
				 	unsigned long flags;
			
 
				-	int shift, bank, cntr, retval;
			
 
				-	int max_banks = perf_iommu->max_banks;
			
 
				-	int max_cntrs = perf_iommu->max_counters;
			
 
				+	int retval;
			
 
				 
			
 
				-	raw_spin_lock_irqsave(&perf_iommu->lock, flags);
			
 
				+	raw_spin_lock_irqsave(&piommu->lock, flags);
			
 
				 
			
 
				 	for (bank = 0, shift = 0; bank < max_banks; bank++) {
			
 
				 		for (cntr = 0; cntr < max_cntrs; cntr++) {
			
 
				 			shift = bank + (bank*3) + cntr;
			
 
				-			if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
			
 
				+			if (piommu->cntr_assign_mask & BIT_ULL(shift)) {
			
 
				 				continue;
			
 
				 			} else {
			
 
				-				perf_iommu->cntr_assign_mask |= (1ULL<<shift);
			
 
				-				retval = ((u16)((u16)bank<<8) | (u8)(cntr));
			
 
				+				piommu->cntr_assign_mask |= BIT_ULL(shift);
			
 
				+				event->hw.iommu_bank = bank;
			
 
				+				event->hw.iommu_cntr = cntr;
			
 
				+				retval = 0;
			
 
				 				goto out;
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 	retval = -ENOSPC;
			
 
				 out:
			
 
				-	raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
			
 
				+	raw_spin_unlock_irqrestore(&piommu->lock, flags);
			
 
				 	return retval;
			
 
				 }
			
 
				 
			
@@ -202,8 +210,6 @@ static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
 
				 static int perf_iommu_event_init(struct perf_event *event)
			
 
				 {
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
 
				-	struct perf_amd_iommu *perf_iommu;
			
 
				-	u64 config, config1;
			
 
				 
			
 
				 	/* test the event attr type check for PMU enumeration */
			
 
				 	if (event->attr.type != event->pmu->type)
			
@@ -225,80 +231,62 @@ static int perf_iommu_event_init(struct perf_event *event)
 
				 	if (event->cpu < 0)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	perf_iommu = &__perf_iommu;
			
 
				-
			
 
				-	if (event->pmu != &perf_iommu->pmu)
			
 
				-		return -ENOENT;
			
 
				-
			
 
				-	if (perf_iommu) {
			
 
				-		config = event->attr.config;
			
 
				-		config1 = event->attr.config1;
			
 
				-	} else {
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	/* integrate with iommu base devid (0000), assume one iommu */
			
 
				-	perf_iommu->max_banks =
			
 
				-		amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
			
 
				-	perf_iommu->max_counters =
			
 
				-		amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
			
 
				-	if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				 	/* update the hw_perf_event struct with the iommu config data */
			
 
				-	hwc->config = config;
			
 
				-	hwc->extra_reg.config = config1;
			
 
				+	hwc->conf  = event->attr.config;
			
 
				+	hwc->conf1 = event->attr.config1;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static inline struct amd_iommu *perf_event_2_iommu(struct perf_event *ev)
			
 
				+{
			
 
				+	return (container_of(ev->pmu, struct perf_amd_iommu, pmu))->iommu;
			
 
				+}
			
 
				+
			
 
				 static void perf_iommu_enable_event(struct perf_event *ev)
			
 
				 {
			
 
				-	u8 csource = _GET_CSOURCE(ev);
			
 
				-	u16 devid = _GET_DEVID(ev);
			
 
				+	struct amd_iommu *iommu = perf_event_2_iommu(ev);
			
 
				+	struct hw_perf_event *hwc = &ev->hw;
			
 
				+	u8 bank = hwc->iommu_bank;
			
 
				+	u8 cntr = hwc->iommu_cntr;
			
 
				 	u64 reg = 0ULL;
			
 
				 
			
 
				-	reg = csource;
			
 
				-	amd_iommu_pc_get_set_reg_val(devid,
			
 
				-			_GET_BANK(ev), _GET_CNTR(ev) ,
			
 
				-			 IOMMU_PC_COUNTER_SRC_REG, &reg, true);
			
 
				+	reg = GET_CSOURCE(hwc);
			
 
				+	amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, &reg);
			
 
				 
			
 
				-	reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
			
 
				+	reg = GET_DEVID_MASK(hwc);
			
 
				+	reg = GET_DEVID(hwc) | (reg << 32);
			
 
				 	if (reg)
			
 
				-		reg |= (1UL << 31);
			
 
				-	amd_iommu_pc_get_set_reg_val(devid,
			
 
				-			_GET_BANK(ev), _GET_CNTR(ev) ,
			
 
				-			 IOMMU_PC_DEVID_MATCH_REG, &reg, true);
			
 
				+		reg |= BIT(31);
			
 
				+	amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, &reg);
			
 
				 
			
 
				-	reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
			
 
				+	reg = GET_PASID_MASK(hwc);
			
 
				+	reg = GET_PASID(hwc) | (reg << 32);
			
 
				 	if (reg)
			
 
				-		reg |= (1UL << 31);
			
 
				-	amd_iommu_pc_get_set_reg_val(devid,
			
 
				-			_GET_BANK(ev), _GET_CNTR(ev) ,
			
 
				-			 IOMMU_PC_PASID_MATCH_REG, &reg, true);
			
 
				+		reg |= BIT(31);
			
 
				+	amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, &reg);
			
 
				 
			
 
				-	reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
			
 
				+	reg = GET_DOMID_MASK(hwc);
			
 
				+	reg = GET_DOMID(hwc) | (reg << 32);
			
 
				 	if (reg)
			
 
				-		reg |= (1UL << 31);
			
 
				-	amd_iommu_pc_get_set_reg_val(devid,
			
 
				-			_GET_BANK(ev), _GET_CNTR(ev) ,
			
 
				-			 IOMMU_PC_DOMID_MATCH_REG, &reg, true);
			
 
				+		reg |= BIT(31);
			
 
				+	amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, &reg);
			
 
				 }
			
 
				 
			
 
				 static void perf_iommu_disable_event(struct perf_event *event)
			
 
				 {
			
 
				+	struct amd_iommu *iommu = perf_event_2_iommu(event);
			
 
				+	struct hw_perf_event *hwc = &event->hw;
			
 
				 	u64 reg = 0ULL;
			
 
				 
			
 
				-	amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
			
 
				-			_GET_BANK(event), _GET_CNTR(event),
			
 
				-			IOMMU_PC_COUNTER_SRC_REG, &reg, true);
			
 
				+	amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
			
 
				+			     IOMMU_PC_COUNTER_SRC_REG, &reg);
			
 
				 }
			
 
				 
			
 
				 static void perf_iommu_start(struct perf_event *event, int flags)
			
 
				 {
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
 
				 
			
 
				-	pr_debug("perf: amd_iommu:perf_iommu_start\n");
			
 
				 	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
			
 
				 		return;
			
 
				 
			
@@ -306,10 +294,11 @@ static void perf_iommu_start(struct perf_event *event, int flags)
 
				 	hwc->state = 0;
			
 
				 
			
 
				 	if (flags & PERF_EF_RELOAD) {
			
 
				-		u64 prev_raw_count =  local64_read(&hwc->prev_count);
			
 
				-		amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
			
 
				-				_GET_BANK(event), _GET_CNTR(event),
			
 
				-				IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
			
 
				+		u64 prev_raw_count = local64_read(&hwc->prev_count);
			
 
				+		struct amd_iommu *iommu = perf_event_2_iommu(event);
			
 
				+
			
 
				+		amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
			
 
				+				     IOMMU_PC_COUNTER_REG, &prev_raw_count);
			
 
				 	}
			
 
				 
			
 
				 	perf_iommu_enable_event(event);
			
@@ -319,37 +308,30 @@ static void perf_iommu_start(struct perf_event *event, int flags)
 
				 
			
 
				 static void perf_iommu_read(struct perf_event *event)
			
 
				 {
			
 
				-	u64 count = 0ULL;
			
 
				-	u64 prev_raw_count = 0ULL;
			
 
				-	u64 delta = 0ULL;
			
 
				+	u64 count, prev, delta;
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
 
				-	pr_debug("perf: amd_iommu:perf_iommu_read\n");
			
 
				+	struct amd_iommu *iommu = perf_event_2_iommu(event);
			
 
				 
			
 
				-	amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
			
 
				-				_GET_BANK(event), _GET_CNTR(event),
			
 
				-				IOMMU_PC_COUNTER_REG, &count, false);
			
 
				+	if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
			
 
				+				 IOMMU_PC_COUNTER_REG, &count))
			
 
				+		return;
			
 
				 
			
 
				 	/* IOMMU pc counter register is only 48 bits */
			
 
				-	count &= 0xFFFFFFFFFFFFULL;
			
 
				+	count &= GENMASK_ULL(47, 0);
			
 
				 
			
 
				-	prev_raw_count =  local64_read(&hwc->prev_count);
			
 
				-	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
			
 
				-					count) != prev_raw_count)
			
 
				+	prev = local64_read(&hwc->prev_count);
			
 
				+	if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev)
			
 
				 		return;
			
 
				 
			
 
				-	/* Handling 48-bit counter overflowing */
			
 
				-	delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
			
 
				+	/* Handle 48-bit counter overflow */
			
 
				+	delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
			
 
				 	delta >>= COUNTER_SHIFT;
			
 
				 	local64_add(delta, &event->count);
			
 
				-
			
 
				 }
			
 
				 
			
 
				 static void perf_iommu_stop(struct perf_event *event, int flags)
			
 
				 {
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
 
				-	u64 config;
			
 
				-
			
 
				-	pr_debug("perf: amd_iommu:perf_iommu_stop\n");
			
 
				 
			
 
				 	if (hwc->state & PERF_HES_UPTODATE)
			
 
				 		return;
			
@@ -361,7 +343,6 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
 
				 	if (hwc->state & PERF_HES_UPTODATE)
			
 
				 		return;
			
 
				 
			
 
				-	config = hwc->config;
			
 
				 	perf_iommu_read(event);
			
 
				 	hwc->state |= PERF_HES_UPTODATE;
			
 
				 }
			
@@ -369,17 +350,12 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
 
				 static int perf_iommu_add(struct perf_event *event, int flags)
			
 
				 {
			
 
				 	int retval;
			
 
				-	struct perf_amd_iommu *perf_iommu =
			
 
				-			container_of(event->pmu, struct perf_amd_iommu, pmu);
			
 
				 
			
 
				-	pr_debug("perf: amd_iommu:perf_iommu_add\n");
			
 
				 	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
			
 
				 
			
 
				 	/* request an iommu bank/counter */
			
 
				-	retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
			
 
				-	if (retval != -ENOSPC)
			
 
				-		event->hw.extra_reg.reg = (u16)retval;
			
 
				-	else
			
 
				+	retval = get_next_avail_iommu_bnk_cntr(event);
			
 
				+	if (retval)
			
 
				 		return retval;
			
 
				 
			
 
				 	if (flags & PERF_EF_START)
			
@@ -390,115 +366,124 @@ static int perf_iommu_add(struct perf_event *event, int flags)
 
				 
			
 
				 static void perf_iommu_del(struct perf_event *event, int flags)
			
 
				 {
			
 
				+	struct hw_perf_event *hwc = &event->hw;
			
 
				 	struct perf_amd_iommu *perf_iommu =
			
 
				 			container_of(event->pmu, struct perf_amd_iommu, pmu);
			
 
				 
			
 
				-	pr_debug("perf: amd_iommu:perf_iommu_del\n");
			
 
				 	perf_iommu_stop(event, PERF_EF_UPDATE);
			
 
				 
			
 
				 	/* clear the assigned iommu bank/counter */
			
 
				 	clear_avail_iommu_bnk_cntr(perf_iommu,
			
 
				-				     _GET_BANK(event),
			
 
				-				     _GET_CNTR(event));
			
 
				+				   hwc->iommu_bank, hwc->iommu_cntr);
			
 
				 
			
 
				 	perf_event_update_userpage(event);
			
 
				 }
			
 
				 
			
 
				-static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
			
 
				+static __init int _init_events_attrs(void)
			
 
				 {
			
 
				-	struct attribute **attrs;
			
 
				-	struct attribute_group *attr_group;
			
 
				 	int i = 0, j;
			
 
				+	struct attribute **attrs;
			
 
				 
			
 
				 	while (amd_iommu_v2_event_descs[i].attr.attr.name)
			
 
				 		i++;
			
 
				 
			
 
				-	attr_group = kzalloc(sizeof(struct attribute *)
			
 
				-		* (i + 1) + sizeof(*attr_group), GFP_KERNEL);
			
 
				-	if (!attr_group)
			
 
				+	attrs = kzalloc(sizeof(struct attribute **) * (i + 1), GFP_KERNEL);
			
 
				+	if (!attrs)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	attrs = (struct attribute **)(attr_group + 1);
			
 
				 	for (j = 0; j < i; j++)
			
 
				 		attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
			
 
				 
			
 
				-	attr_group->name = "events";
			
 
				-	attr_group->attrs = attrs;
			
 
				-	perf_iommu->events_group = attr_group;
			
 
				-
			
 
				+	amd_iommu_events_group.attrs = attrs;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static __init void amd_iommu_pc_exit(void)
			
 
				-{
			
 
				-	if (__perf_iommu.events_group != NULL) {
			
 
				-		kfree(__perf_iommu.events_group);
			
 
				-		__perf_iommu.events_group = NULL;
			
 
				-	}
			
 
				-}
			
 
				+const struct attribute_group *amd_iommu_attr_groups[] = {
			
 
				+	&amd_iommu_format_group,
			
 
				+	&amd_iommu_cpumask_group,
			
 
				+	&amd_iommu_events_group,
			
 
				+	NULL,
			
 
				+};
			
 
				+
			
 
				+static struct pmu iommu_pmu = {
			
 
				+	.event_init	= perf_iommu_event_init,
			
 
				+	.add		= perf_iommu_add,
			
 
				+	.del		= perf_iommu_del,
			
 
				+	.start		= perf_iommu_start,
			
 
				+	.stop		= perf_iommu_stop,
			
 
				+	.read		= perf_iommu_read,
			
 
				+	.task_ctx_nr	= perf_invalid_context,
			
 
				+	.attr_groups	= amd_iommu_attr_groups,
			
 
				+};
			
 
				 
			
 
				-static __init int _init_perf_amd_iommu(
			
 
				-	struct perf_amd_iommu *perf_iommu, char *name)
			
 
				+static __init int init_one_iommu(unsigned int idx)
			
 
				 {
			
 
				+	struct perf_amd_iommu *perf_iommu;
			
 
				 	int ret;
			
 
				 
			
 
				+	perf_iommu = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL);
			
 
				+	if (!perf_iommu)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				 	raw_spin_lock_init(&perf_iommu->lock);
			
 
				 
			
 
				-	/* Init format attributes */
			
 
				-	perf_iommu->format_group = &amd_iommu_format_group;
			
 
				+	perf_iommu->pmu          = iommu_pmu;
			
 
				+	perf_iommu->iommu        = get_amd_iommu(idx);
			
 
				+	perf_iommu->max_banks    = amd_iommu_pc_get_max_banks(idx);
			
 
				+	perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx);
			
 
				 
			
 
				-	/* Init cpumask attributes to only core 0 */
			
 
				-	cpumask_set_cpu(0, &iommu_cpumask);
			
 
				-	perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
			
 
				-
			
 
				-	/* Init events attributes */
			
 
				-	if (_init_events_attrs(perf_iommu) != 0)
			
 
				-		pr_err("perf: amd_iommu: Only support raw events.\n");
			
 
				+	if (!perf_iommu->iommu ||
			
 
				+	    !perf_iommu->max_banks ||
			
 
				+	    !perf_iommu->max_counters) {
			
 
				+		kfree(perf_iommu);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				 
			
 
				-	/* Init null attributes */
			
 
				-	perf_iommu->null_group = NULL;
			
 
				-	perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
			
 
				+	snprintf(perf_iommu->name, IOMMU_NAME_SIZE, "amd_iommu_%u", idx);
			
 
				 
			
 
				-	ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
			
 
				-	if (ret) {
			
 
				-		pr_err("perf: amd_iommu: Failed to initialized.\n");
			
 
				-		amd_iommu_pc_exit();
			
 
				+	ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1);
			
 
				+	if (!ret) {
			
 
				+		pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank).\n",
			
 
				+			idx, perf_iommu->max_banks, perf_iommu->max_counters);
			
 
				+		list_add_tail(&perf_iommu->list, &perf_amd_iommu_list);
			
 
				 	} else {
			
 
				-		pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
			
 
				-			amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
			
 
				-			amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
			
 
				+		pr_warn("Error initializing IOMMU %d.\n", idx);
			
 
				+		kfree(perf_iommu);
			
 
				 	}
			
 
				-
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static struct perf_amd_iommu __perf_iommu = {
			
 
				-	.pmu = {
			
 
				-		.task_ctx_nr    = perf_invalid_context,
			
 
				-		.event_init	= perf_iommu_event_init,
			
 
				-		.add		= perf_iommu_add,
			
 
				-		.del		= perf_iommu_del,
			
 
				-		.start		= perf_iommu_start,
			
 
				-		.stop		= perf_iommu_stop,
			
 
				-		.read		= perf_iommu_read,
			
 
				-	},
			
 
				-	.max_banks		= 0x00,
			
 
				-	.max_counters		= 0x00,
			
 
				-	.cntr_assign_mask	= 0ULL,
			
 
				-	.format_group		= NULL,
			
 
				-	.cpumask_group		= NULL,
			
 
				-	.events_group		= NULL,
			
 
				-	.null_group		= NULL,
			
 
				-};
			
 
				-
			
 
				 static __init int amd_iommu_pc_init(void)
			
 
				 {
			
 
				+	unsigned int i, cnt = 0;
			
 
				+	int ret;
			
 
				+
			
 
				 	/* Make sure the IOMMU PC resource is available */
			
 
				 	if (!amd_iommu_pc_supported())
			
 
				 		return -ENODEV;
			
 
				 
			
 
				-	_init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
			
 
				+	ret = _init_events_attrs();
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	/*
			
 
				+	 * An IOMMU PMU is specific to an IOMMU, and can function independently.
			
 
				+	 * So we go through all IOMMUs and ignore the one that fails init
			
 
				+	 * unless all IOMMU are failing.
			
 
				+	 */
			
 
				+	for (i = 0; i < amd_iommu_get_num_iommus(); i++) {
			
 
				+		ret = init_one_iommu(i);
			
 
				+		if (!ret)
			
 
				+			cnt++;
			
 
				+	}
			
 
				+
			
 
				+	if (!cnt) {
			
 
				+		kfree(amd_iommu_events_group.attrs);
			
 
				+		return -ENODEV;
			
 
				+	}
			
 
				 
			
 
				+	/* Init cpumask attributes to only core 0 */
			
 
				+	cpumask_set_cpu(0, &iommu_cpumask);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/arch/x86/events/amd/iommu.h
+++ b/arch/x86/events/amd/iommu.h
@@ -24,17 +24,23 @@
 
				 #define PC_MAX_SPEC_BNKS			64
			
 
				 #define PC_MAX_SPEC_CNTRS			16
			
 
				 
			
 
				-/* iommu pc reg masks*/
			
 
				-#define IOMMU_BASE_DEVID			0x0000
			
 
				+struct amd_iommu;
			
 
				 
			
 
				 /* amd_iommu_init.c external support functions */
			
 
				+extern int amd_iommu_get_num_iommus(void);
			
 
				+
			
 
				 extern bool amd_iommu_pc_supported(void);
			
 
				 
			
 
				-extern u8 amd_iommu_pc_get_max_banks(u16 devid);
			
 
				+extern u8 amd_iommu_pc_get_max_banks(unsigned int idx);
			
 
				+
			
 
				+extern u8 amd_iommu_pc_get_max_counters(unsigned int idx);
			
 
				+
			
 
				+extern int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
			
 
				+				u8 fxn, u64 *value);
			
 
				 
			
 
				-extern u8 amd_iommu_pc_get_max_counters(u16 devid);
			
 
				+extern int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
			
 
				+				u8 fxn, u64 *value);
			
 
				 
			
 
				-extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
			
 
				-			u8 fxn, u64 *value, bool is_write);
			
 
				+extern struct amd_iommu *get_amd_iommu(int idx);
			
 
				 
			
 
				 #endif /*_PERF_EVENT_AMD_IOMMU_H_*/
			
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -30,6 +30,9 @@
 
				 
			
 
				 #define COUNTER_SHIFT		16
			
 
				 
			
 
				+#undef pr_fmt
			
 
				+#define pr_fmt(fmt)	"amd_uncore: " fmt
			
 
				+
			
 
				 static int num_counters_llc;
			
 
				 static int num_counters_nb;
			
 
				 
			
@@ -509,51 +512,34 @@ static int __init amd_uncore_init(void)
 
				 	int ret = -ENODEV;
			
 
				 
			
 
				 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
			
 
				-		goto fail_nodev;
			
 
				-
			
 
				-	switch(boot_cpu_data.x86) {
			
 
				-		case 23:
			
 
				-			/* Family 17h: */
			
 
				-			num_counters_nb = NUM_COUNTERS_NB;
			
 
				-			num_counters_llc = NUM_COUNTERS_L3;
			
 
				-			/*
			
 
				-			 * For Family17h, the NorthBridge counters are
			
 
				-			 * re-purposed as Data Fabric counters. Also, support is
			
 
				-			 * added for L3 counters. The pmus are exported based on
			
 
				-			 * family as either L2 or L3 and NB or DF.
			
 
				-			 */
			
 
				-			amd_nb_pmu.name = "amd_df";
			
 
				-			amd_llc_pmu.name = "amd_l3";
			
 
				-			format_attr_event_df.show = &event_show_df;
			
 
				-			format_attr_event_l3.show = &event_show_l3;
			
 
				-			break;
			
 
				-		case 22:
			
 
				-			/* Family 16h - may change: */
			
 
				-			num_counters_nb = NUM_COUNTERS_NB;
			
 
				-			num_counters_llc = NUM_COUNTERS_L2;
			
 
				-			amd_nb_pmu.name = "amd_nb";
			
 
				-			amd_llc_pmu.name = "amd_l2";
			
 
				-			format_attr_event_df = format_attr_event;
			
 
				-			format_attr_event_l3 = format_attr_event;
			
 
				-			break;
			
 
				-		default:
			
 
				-			/*
			
 
				-			 * All prior families have the same number of
			
 
				-			 * NorthBridge and Last Level Cache counters
			
 
				-			 */
			
 
				-			num_counters_nb = NUM_COUNTERS_NB;
			
 
				-			num_counters_llc = NUM_COUNTERS_L2;
			
 
				-			amd_nb_pmu.name = "amd_nb";
			
 
				-			amd_llc_pmu.name = "amd_l2";
			
 
				-			format_attr_event_df = format_attr_event;
			
 
				-			format_attr_event_l3 = format_attr_event;
			
 
				-			break;
			
 
				-	}
			
 
				-	amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
			
 
				-	amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
			
 
				+		return -ENODEV;
			
 
				 
			
 
				 	if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
			
 
				-		goto fail_nodev;
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	if (boot_cpu_data.x86 == 0x17) {
			
 
				+		/*
			
 
				+		 * For F17h, the Northbridge counters are repurposed as Data
			
 
				+		 * Fabric counters. Also, L3 counters are supported too. The PMUs
			
 
				+		 * are exported based on  family as either L2 or L3 and NB or DF.
			
 
				+		 */
			
 
				+		num_counters_nb		  = NUM_COUNTERS_NB;
			
 
				+		num_counters_llc	  = NUM_COUNTERS_L3;
			
 
				+		amd_nb_pmu.name		  = "amd_df";
			
 
				+		amd_llc_pmu.name	  = "amd_l3";
			
 
				+		format_attr_event_df.show = &event_show_df;
			
 
				+		format_attr_event_l3.show = &event_show_l3;
			
 
				+	} else {
			
 
				+		num_counters_nb		  = NUM_COUNTERS_NB;
			
 
				+		num_counters_llc	  = NUM_COUNTERS_L2;
			
 
				+		amd_nb_pmu.name		  = "amd_nb";
			
 
				+		amd_llc_pmu.name	  = "amd_l2";
			
 
				+		format_attr_event_df	  = format_attr_event;
			
 
				+		format_attr_event_l3	  = format_attr_event;
			
 
				+	}
			
 
				+
			
 
				+	amd_nb_pmu.attr_groups	= amd_uncore_attr_groups_df;
			
 
				+	amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
			
 
				 
			
 
				 	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
			
 
				 		amd_uncore_nb = alloc_percpu(struct amd_uncore *);
			
@@ -565,7 +551,7 @@ static int __init amd_uncore_init(void)
 
				 		if (ret)
			
 
				 			goto fail_nb;
			
 
				 
			
 
				-		pr_info("perf: AMD NB counters detected\n");
			
 
				+		pr_info("AMD NB counters detected\n");
			
 
				 		ret = 0;
			
 
				 	}
			
 
				 
			
@@ -579,7 +565,7 @@ static int __init amd_uncore_init(void)
 
				 		if (ret)
			
 
				 			goto fail_llc;
			
 
				 
			
 
				-		pr_info("perf: AMD LLC counters detected\n");
			
 
				+		pr_info("AMD LLC counters detected\n");
			
 
				 		ret = 0;
			
 
				 	}
			
 
				 
			
@@ -615,7 +601,6 @@ fail_nb:
 
				 	if (amd_uncore_nb)
			
 
				 		free_percpu(amd_uncore_nb);
			
 
				 
			
 
				-fail_nodev:
			
 
				 	return ret;
			
 
				 }
			
 
				 device_initcall(amd_uncore_init);
			
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -63,7 +63,6 @@ struct bts_buffer {
 
				 	unsigned int	cur_buf;
			
 
				 	bool		snapshot;
			
 
				 	local_t		data_size;
			
 
				-	local_t		lost;
			
 
				 	local_t		head;
			
 
				 	unsigned long	end;
			
 
				 	void		**data_pages;
			
@@ -199,7 +198,8 @@ static void bts_update(struct bts_ctx *bts)
 
				 			return;
			
 
				 
			
 
				 		if (ds->bts_index >= ds->bts_absolute_maximum)
			
 
				-			local_inc(&buf->lost);
			
 
				+			perf_aux_output_flag(&bts->handle,
			
 
				+			                     PERF_AUX_FLAG_TRUNCATED);
			
 
				 
			
 
				 		/*
			
 
				 		 * old and head are always in the same physical buffer, so we
			
@@ -276,7 +276,7 @@ static void bts_event_start(struct perf_event *event, int flags)
 
				 	return;
			
 
				 
			
 
				 fail_end_stop:
			
 
				-	perf_aux_output_end(&bts->handle, 0, false);
			
 
				+	perf_aux_output_end(&bts->handle, 0);
			
 
				 
			
 
				 fail_stop:
			
 
				 	event->hw.state = PERF_HES_STOPPED;
			
@@ -319,9 +319,8 @@ static void bts_event_stop(struct perf_event *event, int flags)
 
				 				bts->handle.head =
			
 
				 					local_xchg(&buf->data_size,
			
 
				 						   buf->nr_pages << PAGE_SHIFT);
			
 
				-
			
 
				-			perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
			
 
				-					    !!local_xchg(&buf->lost, 0));
			
 
				+			perf_aux_output_end(&bts->handle,
			
 
				+			                    local_xchg(&buf->data_size, 0));
			
 
				 		}
			
 
				 
			
 
				 		cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
			
@@ -484,8 +483,7 @@ int intel_bts_interrupt(void)
 
				 	if (old_head == local_read(&buf->head))
			
 
				 		return handled;
			
 
				 
			
 
				-	perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
			
 
				-			    !!local_xchg(&buf->lost, 0));
			
 
				+	perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0));
			
 
				 
			
 
				 	buf = perf_aux_output_begin(&bts->handle, event);
			
 
				 	if (buf)
			
@@ -500,7 +498,7 @@ int intel_bts_interrupt(void)
 
				 			 * cleared handle::event
			
 
				 			 */
			
 
				 			barrier();
			
 
				-			perf_aux_output_end(&bts->handle, 0, false);
			
 
				+			perf_aux_output_end(&bts->handle, 0);
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1553,6 +1553,27 @@ static __initconst const u64 slm_hw_cache_event_ids
 
				  },
			
 
				 };
			
 
				 
			
 
				+EVENT_ATTR_STR(topdown-total-slots, td_total_slots_glm, "event=0x3c");
			
 
				+EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_glm, "3");
			
 
				+/* UOPS_NOT_DELIVERED.ANY */
			
 
				+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_glm, "event=0x9c");
			
 
				+/* ISSUE_SLOTS_NOT_CONSUMED.RECOVERY */
			
 
				+EVENT_ATTR_STR(topdown-recovery-bubbles, td_recovery_bubbles_glm, "event=0xca,umask=0x02");
			
 
				+/* UOPS_RETIRED.ANY */
			
 
				+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_glm, "event=0xc2");
			
 
				+/* UOPS_ISSUED.ANY */
			
 
				+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_glm, "event=0x0e");
			
 
				+
			
 
				+static struct attribute *glm_events_attrs[] = {
			
 
				+	EVENT_PTR(td_total_slots_glm),
			
 
				+	EVENT_PTR(td_total_slots_scale_glm),
			
 
				+	EVENT_PTR(td_fetch_bubbles_glm),
			
 
				+	EVENT_PTR(td_recovery_bubbles_glm),
			
 
				+	EVENT_PTR(td_slots_issued_glm),
			
 
				+	EVENT_PTR(td_slots_retired_glm),
			
 
				+	NULL
			
 
				+};
			
 
				+
			
 
				 static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
			
 
				 	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
			
 
				 	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
			
@@ -2130,7 +2151,7 @@ again:
 
				 	 * counters from the GLOBAL_STATUS mask and we always process PEBS
			
 
				 	 * events via drain_pebs().
			
 
				 	 */
			
 
				-	status &= ~cpuc->pebs_enabled;
			
 
				+	status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
			
 
				 
			
 
				 	/*
			
 
				 	 * PEBS overflow sets bit 62 in the global status register
			
@@ -3750,6 +3771,7 @@ __init int intel_pmu_init(void)
 
				 		x86_pmu.pebs_prec_dist = true;
			
 
				 		x86_pmu.lbr_pt_coexist = true;
			
 
				 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
			
 
				+		x86_pmu.cpu_events = glm_events_attrs;
			
 
				 		pr_cont("Goldmont events, ");
			
 
				 		break;
			
 
				 
			
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1222,7 +1222,7 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
 
				 
			
 
				 			/* clear non-PEBS bit and re-check */
			
 
				 			pebs_status = p->status & cpuc->pebs_enabled;
			
 
				-			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
			
 
				+			pebs_status &= PEBS_COUNTER_MASK;
			
 
				 			if (pebs_status == (1 << bit))
			
 
				 				return at;
			
 
				 		}
			
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -28,6 +28,7 @@
 
				 #include <asm/insn.h>
			
 
				 #include <asm/io.h>
			
 
				 #include <asm/intel_pt.h>
			
 
				+#include <asm/intel-family.h>
			
 
				 
			
 
				 #include "../perf_event.h"
			
 
				 #include "pt.h"
			
@@ -98,6 +99,7 @@ static struct attribute_group pt_cap_group = {
 
				 	.name	= "caps",
			
 
				 };
			
 
				 
			
 
				+PMU_FORMAT_ATTR(pt,		"config:0"	);
			
 
				 PMU_FORMAT_ATTR(cyc,		"config:1"	);
			
 
				 PMU_FORMAT_ATTR(pwr_evt,	"config:4"	);
			
 
				 PMU_FORMAT_ATTR(fup_on_ptw,	"config:5"	);
			
@@ -105,11 +107,13 @@ PMU_FORMAT_ATTR(mtc,		"config:9"	);
 
				 PMU_FORMAT_ATTR(tsc,		"config:10"	);
			
 
				 PMU_FORMAT_ATTR(noretcomp,	"config:11"	);
			
 
				 PMU_FORMAT_ATTR(ptw,		"config:12"	);
			
 
				+PMU_FORMAT_ATTR(branch,		"config:13"	);
			
 
				 PMU_FORMAT_ATTR(mtc_period,	"config:14-17"	);
			
 
				 PMU_FORMAT_ATTR(cyc_thresh,	"config:19-22"	);
			
 
				 PMU_FORMAT_ATTR(psb_period,	"config:24-27"	);
			
 
				 
			
 
				 static struct attribute *pt_formats_attr[] = {
			
 
				+	&format_attr_pt.attr,
			
 
				 	&format_attr_cyc.attr,
			
 
				 	&format_attr_pwr_evt.attr,
			
 
				 	&format_attr_fup_on_ptw.attr,
			
@@ -117,6 +121,7 @@ static struct attribute *pt_formats_attr[] = {
 
				 	&format_attr_tsc.attr,
			
 
				 	&format_attr_noretcomp.attr,
			
 
				 	&format_attr_ptw.attr,
			
 
				+	&format_attr_branch.attr,
			
 
				 	&format_attr_mtc_period.attr,
			
 
				 	&format_attr_cyc_thresh.attr,
			
 
				 	&format_attr_psb_period.attr,
			
@@ -197,6 +202,19 @@ static int __init pt_pmu_hw_init(void)
 
				 		pt_pmu.tsc_art_den = eax;
			
 
				 	}
			
 
				 
			
 
				+	/* model-specific quirks */
			
 
				+	switch (boot_cpu_data.x86_model) {
			
 
				+	case INTEL_FAM6_BROADWELL_CORE:
			
 
				+	case INTEL_FAM6_BROADWELL_XEON_D:
			
 
				+	case INTEL_FAM6_BROADWELL_GT3E:
			
 
				+	case INTEL_FAM6_BROADWELL_X:
			
 
				+		/* not setting BRANCH_EN will #GP, erratum BDM106 */
			
 
				+		pt_pmu.branch_en_always_on = true;
			
 
				+		break;
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				 	if (boot_cpu_has(X86_FEATURE_VMX)) {
			
 
				 		/*
			
 
				 		 * Intel SDM, 36.5 "Tracing post-VMXON" says that
			
@@ -263,8 +281,20 @@ fail:
 
				 #define RTIT_CTL_PTW	(RTIT_CTL_PTW_EN	| \
			
 
				 			 RTIT_CTL_FUP_ON_PTW)
			
 
				 
			
 
				-#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN		| \
			
 
				+/*
			
 
				+ * Bit 0 (TraceEn) in the attr.config is meaningless as the
			
 
				+ * corresponding bit in the RTIT_CTL can only be controlled
			
 
				+ * by the driver; therefore, repurpose it to mean: pass
			
 
				+ * through the bit that was previously assumed to be always
			
 
				+ * on for PT, thereby allowing the user to *not* set it if
			
 
				+ * they so wish. See also pt_event_valid() and pt_config().
			
 
				+ */
			
 
				+#define RTIT_CTL_PASSTHROUGH RTIT_CTL_TRACEEN
			
 
				+
			
 
				+#define PT_CONFIG_MASK (RTIT_CTL_TRACEEN	| \
			
 
				+			RTIT_CTL_TSC_EN		| \
			
 
				 			RTIT_CTL_DISRETC	| \
			
 
				+			RTIT_CTL_BRANCH_EN	| \
			
 
				 			RTIT_CTL_CYC_PSB	| \
			
 
				 			RTIT_CTL_MTC		| \
			
 
				 			RTIT_CTL_PWR_EVT_EN	| \
			
@@ -332,6 +362,33 @@ static bool pt_event_valid(struct perf_event *event)
 
				 			return false;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Setting bit 0 (TraceEn in RTIT_CTL MSR) in the attr.config
			
 
				+	 * clears the assomption that BranchEn must always be enabled,
			
 
				+	 * as was the case with the first implementation of PT.
			
 
				+	 * If this bit is not set, the legacy behavior is preserved
			
 
				+	 * for compatibility with the older userspace.
			
 
				+	 *
			
 
				+	 * Re-using bit 0 for this purpose is fine because it is never
			
 
				+	 * directly set by the user; previous attempts at setting it in
			
 
				+	 * the attr.config resulted in -EINVAL.
			
 
				+	 */
			
 
				+	if (config & RTIT_CTL_PASSTHROUGH) {
			
 
				+		/*
			
 
				+		 * Disallow not setting BRANCH_EN where BRANCH_EN is
			
 
				+		 * always required.
			
 
				+		 */
			
 
				+		if (pt_pmu.branch_en_always_on &&
			
 
				+		    !(config & RTIT_CTL_BRANCH_EN))
			
 
				+			return false;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Disallow BRANCH_EN without the PASSTHROUGH.
			
 
				+		 */
			
 
				+		if (config & RTIT_CTL_BRANCH_EN)
			
 
				+			return false;
			
 
				+	}
			
 
				+
			
 
				 	return true;
			
 
				 }
			
 
				 
			
@@ -411,6 +468,7 @@ static u64 pt_config_filters(struct perf_event *event)
 
				 
			
 
				 static void pt_config(struct perf_event *event)
			
 
				 {
			
 
				+	struct pt *pt = this_cpu_ptr(&pt_ctx);
			
 
				 	u64 reg;
			
 
				 
			
 
				 	if (!event->hw.itrace_started) {
			
@@ -419,7 +477,20 @@ static void pt_config(struct perf_event *event)
 
				 	}
			
 
				 
			
 
				 	reg = pt_config_filters(event);
			
 
				-	reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
			
 
				+	reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
			
 
				+
			
 
				+	/*
			
 
				+	 * Previously, we had BRANCH_EN on by default, but now that PT has
			
 
				+	 * grown features outside of branch tracing, it is useful to allow
			
 
				+	 * the user to disable it. Setting bit 0 in the event's attr.config
			
 
				+	 * allows BRANCH_EN to pass through instead of being always on. See
			
 
				+	 * also the comment in pt_event_valid().
			
 
				+	 */
			
 
				+	if (event->attr.config & BIT(0)) {
			
 
				+		reg |= event->attr.config & RTIT_CTL_BRANCH_EN;
			
 
				+	} else {
			
 
				+		reg |= RTIT_CTL_BRANCH_EN;
			
 
				+	}
			
 
				 
			
 
				 	if (!event->attr.exclude_kernel)
			
 
				 		reg |= RTIT_CTL_OS;
			
@@ -429,11 +500,15 @@ static void pt_config(struct perf_event *event)
 
				 	reg |= (event->attr.config & PT_CONFIG_MASK);
			
 
				 
			
 
				 	event->hw.config = reg;
			
 
				-	wrmsrl(MSR_IA32_RTIT_CTL, reg);
			
 
				+	if (READ_ONCE(pt->vmx_on))
			
 
				+		perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
			
 
				+	else
			
 
				+		wrmsrl(MSR_IA32_RTIT_CTL, reg);
			
 
				 }
			
 
				 
			
 
				 static void pt_config_stop(struct perf_event *event)
			
 
				 {
			
 
				+	struct pt *pt = this_cpu_ptr(&pt_ctx);
			
 
				 	u64 ctl = READ_ONCE(event->hw.config);
			
 
				 
			
 
				 	/* may be already stopped by a PMI */
			
@@ -441,7 +516,8 @@ static void pt_config_stop(struct perf_event *event)
 
				 		return;
			
 
				 
			
 
				 	ctl &= ~RTIT_CTL_TRACEEN;
			
 
				-	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
			
 
				+	if (!READ_ONCE(pt->vmx_on))
			
 
				+		wrmsrl(MSR_IA32_RTIT_CTL, ctl);
			
 
				 
			
 
				 	WRITE_ONCE(event->hw.config, ctl);
			
 
				 
			
@@ -753,7 +829,8 @@ static void pt_handle_status(struct pt *pt)
 
				 		 */
			
 
				 		if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
			
 
				 		    buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
			
 
				-			local_inc(&buf->lost);
			
 
				+			perf_aux_output_flag(&pt->handle,
			
 
				+			                     PERF_AUX_FLAG_TRUNCATED);
			
 
				 			advance++;
			
 
				 		}
			
 
				 	}
			
@@ -846,8 +923,10 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
 
				 
			
 
				 	/* can't stop in the middle of an output region */
			
 
				 	if (buf->output_off + handle->size + 1 <
			
 
				-	    sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
			
 
				+	    sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
			
 
				+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				 		return -EINVAL;
			
 
				+	}
			
 
				 
			
 
				 
			
 
				 	/* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
			
@@ -1171,12 +1250,6 @@ void intel_pt_interrupt(void)
 
				 	if (!READ_ONCE(pt->handle_nmi))
			
 
				 		return;
			
 
				 
			
 
				-	/*
			
 
				-	 * If VMX is on and PT does not support it, don't touch anything.
			
 
				-	 */
			
 
				-	if (READ_ONCE(pt->vmx_on))
			
 
				-		return;
			
 
				-
			
 
				 	if (!event)
			
 
				 		return;
			
 
				 
			
@@ -1192,8 +1265,7 @@ void intel_pt_interrupt(void)
 
				 
			
 
				 	pt_update_head(pt);
			
 
				 
			
 
				-	perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
			
 
				-			    local_xchg(&buf->lost, 0));
			
 
				+	perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
			
 
				 
			
 
				 	if (!event->hw.state) {
			
 
				 		int ret;
			
@@ -1208,7 +1280,7 @@ void intel_pt_interrupt(void)
 
				 		/* snapshot counters don't use PMI, so it's safe */
			
 
				 		ret = pt_buffer_reset_markers(buf, &pt->handle);
			
 
				 		if (ret) {
			
 
				-			perf_aux_output_end(&pt->handle, 0, true);
			
 
				+			perf_aux_output_end(&pt->handle, 0);
			
 
				 			return;
			
 
				 		}
			
 
				 
			
@@ -1237,12 +1309,19 @@ void intel_pt_handle_vmx(int on)
 
				 	local_irq_save(flags);
			
 
				 	WRITE_ONCE(pt->vmx_on, on);
			
 
				 
			
 
				-	if (on) {
			
 
				-		/* prevent pt_config_stop() from writing RTIT_CTL */
			
 
				-		event = pt->handle.event;
			
 
				-		if (event)
			
 
				-			event->hw.config = 0;
			
 
				-	}
			
 
				+	/*
			
 
				+	 * If an AUX transaction is in progress, it will contain
			
 
				+	 * gap(s), so flag it PARTIAL to inform the user.
			
 
				+	 */
			
 
				+	event = pt->handle.event;
			
 
				+	if (event)
			
 
				+		perf_aux_output_flag(&pt->handle,
			
 
				+		                     PERF_AUX_FLAG_PARTIAL);
			
 
				+
			
 
				+	/* Turn PTs back on */
			
 
				+	if (!on && event)
			
 
				+		wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config);
			
 
				+
			
 
				 	local_irq_restore(flags);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
			
@@ -1257,9 +1336,6 @@ static void pt_event_start(struct perf_event *event, int mode)
 
				 	struct pt *pt = this_cpu_ptr(&pt_ctx);
			
 
				 	struct pt_buffer *buf;
			
 
				 
			
 
				-	if (READ_ONCE(pt->vmx_on))
			
 
				-		return;
			
 
				-
			
 
				 	buf = perf_aux_output_begin(&pt->handle, event);
			
 
				 	if (!buf)
			
 
				 		goto fail_stop;
			
@@ -1280,7 +1356,7 @@ static void pt_event_start(struct perf_event *event, int mode)
 
				 	return;
			
 
				 
			
 
				 fail_end_stop:
			
 
				-	perf_aux_output_end(&pt->handle, 0, true);
			
 
				+	perf_aux_output_end(&pt->handle, 0);
			
 
				 fail_stop:
			
 
				 	hwc->state = PERF_HES_STOPPED;
			
 
				 }
			
@@ -1321,8 +1397,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
 
				 			pt->handle.head =
			
 
				 				local_xchg(&buf->data_size,
			
 
				 					   buf->nr_pages << PAGE_SHIFT);
			
 
				-		perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
			
 
				-				    local_xchg(&buf->lost, 0));
			
 
				+		perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -110,6 +110,7 @@ struct pt_pmu {
 
				 	struct pmu		pmu;
			
 
				 	u32			caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
			
 
				 	bool			vmx;
			
 
				+	bool			branch_en_always_on;
			
 
				 	unsigned long		max_nonturbo_ratio;
			
 
				 	unsigned int		tsc_art_num;
			
 
				 	unsigned int		tsc_art_den;
			
@@ -143,7 +144,6 @@ struct pt_buffer {
 
				 	size_t			output_off;
			
 
				 	unsigned long		nr_pages;
			
 
				 	local_t			data_size;
			
 
				-	local_t			lost;
			
 
				 	local64_t		head;
			
 
				 	bool			snapshot;
			
 
				 	unsigned long		stop_pos, intr_pos;
			
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -79,6 +79,7 @@ struct amd_nb {
 
				 
			
 
				 /* The maximal number of PEBS events: */
			
 
				 #define MAX_PEBS_EVENTS		8
			
 
				+#define PEBS_COUNTER_MASK	((1ULL << MAX_PEBS_EVENTS) - 1)
			
 
				 
			
 
				 /*
			
 
				  * Flags PEBS can handle without an PMI.
			
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -72,14 +72,13 @@ struct arch_specific_insn {
 
				 	/* copy of the original instruction */
			
 
				 	kprobe_opcode_t *insn;
			
 
				 	/*
			
 
				-	 * boostable = -1: This instruction type is not boostable.
			
 
				-	 * boostable = 0: This instruction type is boostable.
			
 
				-	 * boostable = 1: This instruction has been boosted: we have
			
 
				+	 * boostable = false: This instruction type is not boostable.
			
 
				+	 * boostable = true: This instruction has been boosted: we have
			
 
				 	 * added a relative jump after the instruction copy in insn,
			
 
				 	 * so no single-step and fixup are needed (unless there's
			
 
				 	 * a post_handler or break_handler).
			
 
				 	 */
			
 
				-	int boostable;
			
 
				+	bool boostable;
			
 
				 	bool if_modifier;
			
 
				 };
			
 
				 
			
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -67,7 +67,7 @@
 
				 #endif
			
 
				 
			
 
				 /* Ensure if the instruction can be boostable */
			
 
				-extern int can_boost(kprobe_opcode_t *instruction, void *addr);
			
 
				+extern int can_boost(struct insn *insn, void *orig_addr);
			
 
				 /* Recover instruction if given address is probed */
			
 
				 extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
			
 
				 					 unsigned long addr);
			
@@ -75,7 +75,7 @@ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
 
				  * Copy an instruction and adjust the displacement if the instruction
			
 
				  * uses the %rip-relative addressing mode.
			
 
				  */
			
 
				-extern int __copy_instruction(u8 *dest, u8 *src);
			
 
				+extern int __copy_instruction(u8 *dest, u8 *src, struct insn *insn);
			
 
				 
			
 
				 /* Generate a relative-jump/call instruction */
			
 
				 extern void synthesize_reljump(void *from, void *to);
			
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -164,42 +164,38 @@ static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
 
				 NOKPROBE_SYMBOL(skip_prefixes);
			
 
				 
			
 
				 /*
			
 
				- * Returns non-zero if opcode is boostable.
			
 
				+ * Returns non-zero if INSN is boostable.
			
 
				  * RIP relative instructions are adjusted at copying time in 64 bits mode
			
 
				  */
			
 
				-int can_boost(kprobe_opcode_t *opcodes, void *addr)
			
 
				+int can_boost(struct insn *insn, void *addr)
			
 
				 {
			
 
				 	kprobe_opcode_t opcode;
			
 
				-	kprobe_opcode_t *orig_opcodes = opcodes;
			
 
				 
			
 
				 	if (search_exception_tables((unsigned long)addr))
			
 
				 		return 0;	/* Page fault may occur on this address. */
			
 
				 
			
 
				-retry:
			
 
				-	if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
			
 
				-		return 0;
			
 
				-	opcode = *(opcodes++);
			
 
				-
			
 
				 	/* 2nd-byte opcode */
			
 
				-	if (opcode == 0x0f) {
			
 
				-		if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
			
 
				-			return 0;
			
 
				-		return test_bit(*opcodes,
			
 
				+	if (insn->opcode.nbytes == 2)
			
 
				+		return test_bit(insn->opcode.bytes[1],
			
 
				 				(unsigned long *)twobyte_is_boostable);
			
 
				-	}
			
 
				+
			
 
				+	if (insn->opcode.nbytes != 1)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* Can't boost Address-size override prefix */
			
 
				+	if (unlikely(inat_is_address_size_prefix(insn->attr)))
			
 
				+		return 0;
			
 
				+
			
 
				+	opcode = insn->opcode.bytes[0];
			
 
				 
			
 
				 	switch (opcode & 0xf0) {
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	case 0x40:
			
 
				-		goto retry; /* REX prefix is boostable */
			
 
				-#endif
			
 
				 	case 0x60:
			
 
				-		if (0x63 < opcode && opcode < 0x67)
			
 
				-			goto retry; /* prefixes */
			
 
				-		/* can't boost Address-size override and bound */
			
 
				-		return (opcode != 0x62 && opcode != 0x67);
			
 
				+		/* can't boost "bound" */
			
 
				+		return (opcode != 0x62);
			
 
				 	case 0x70:
			
 
				 		return 0; /* can't boost conditional jump */
			
 
				+	case 0x90:
			
 
				+		return opcode != 0x9a;	/* can't boost call far */
			
 
				 	case 0xc0:
			
 
				 		/* can't boost software-interruptions */
			
 
				 		return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
			
@@ -210,14 +206,9 @@ retry:
 
				 		/* can boost in/out and absolute jmps */
			
 
				 		return ((opcode & 0x04) || opcode == 0xea);
			
 
				 	case 0xf0:
			
 
				-		if ((opcode & 0x0c) == 0 && opcode != 0xf1)
			
 
				-			goto retry; /* lock/rep(ne) prefix */
			
 
				 		/* clear and set flags are boostable */
			
 
				 		return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
			
 
				 	default:
			
 
				-		/* segment override prefixes are boostable */
			
 
				-		if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
			
 
				-			goto retry; /* prefixes */
			
 
				 		/* CS override prefix and call are not boostable */
			
 
				 		return (opcode != 0x2e && opcode != 0x9a);
			
 
				 	}
			
@@ -264,7 +255,10 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
 
				 	 * Fortunately, we know that the original code is the ideal 5-byte
			
 
				 	 * long NOP.
			
 
				 	 */
			
 
				-	memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
			
 
				+	if (probe_kernel_read(buf, (void *)addr,
			
 
				+		MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
			
 
				+		return 0UL;
			
 
				+
			
 
				 	if (faddr)
			
 
				 		memcpy(buf, ideal_nops[NOP_ATOMIC5], 5);
			
 
				 	else
			
@@ -276,7 +270,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
 
				  * Recover the probed instruction at addr for further analysis.
			
 
				  * Caller must lock kprobes by kprobe_mutex, or disable preemption
			
 
				  * for preventing to release referencing kprobes.
			
 
				- * Returns zero if the instruction can not get recovered.
			
 
				+ * Returns zero if the instruction can not get recovered (or access failed).
			
 
				  */
			
 
				 unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
			
 
				 {
			
@@ -348,37 +342,36 @@ static int is_IF_modifier(kprobe_opcode_t *insn)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Copy an instruction and adjust the displacement if the instruction
			
 
				- * uses the %rip-relative addressing mode.
			
 
				- * If it does, Return the address of the 32-bit displacement word.
			
 
				- * If not, return null.
			
 
				- * Only applicable to 64-bit x86.
			
 
				+ * Copy an instruction with recovering modified instruction by kprobes
			
 
				+ * and adjust the displacement if the instruction uses the %rip-relative
			
 
				+ * addressing mode.
			
 
				+ * This returns the length of copied instruction, or 0 if it has an error.
			
 
				  */
			
 
				-int __copy_instruction(u8 *dest, u8 *src)
			
 
				+int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
			
 
				 {
			
 
				-	struct insn insn;
			
 
				 	kprobe_opcode_t buf[MAX_INSN_SIZE];
			
 
				-	int length;
			
 
				 	unsigned long recovered_insn =
			
 
				 		recover_probed_instruction(buf, (unsigned long)src);
			
 
				 
			
 
				-	if (!recovered_insn)
			
 
				+	if (!recovered_insn || !insn)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* This can access kernel text if given address is not recovered */
			
 
				+	if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE))
			
 
				 		return 0;
			
 
				-	kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
			
 
				-	insn_get_length(&insn);
			
 
				-	length = insn.length;
			
 
				+
			
 
				+	kernel_insn_init(insn, dest, MAX_INSN_SIZE);
			
 
				+	insn_get_length(insn);
			
 
				 
			
 
				 	/* Another subsystem puts a breakpoint, failed to recover */
			
 
				-	if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
			
 
				+	if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
			
 
				 		return 0;
			
 
				-	memcpy(dest, insn.kaddr, length);
			
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
 
				-	if (insn_rip_relative(&insn)) {
			
 
				+	/* Only x86_64 has RIP relative instructions */
			
 
				+	if (insn_rip_relative(insn)) {
			
 
				 		s64 newdisp;
			
 
				 		u8 *disp;
			
 
				-		kernel_insn_init(&insn, dest, length);
			
 
				-		insn_get_displacement(&insn);
			
 
				 		/*
			
 
				 		 * The copied instruction uses the %rip-relative addressing
			
 
				 		 * mode.  Adjust the displacement for the difference between
			
@@ -391,36 +384,57 @@ int __copy_instruction(u8 *dest, u8 *src)
 
				 		 * extension of the original signed 32-bit displacement would
			
 
				 		 * have given.
			
 
				 		 */
			
 
				-		newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
			
 
				+		newdisp = (u8 *) src + (s64) insn->displacement.value
			
 
				+			  - (u8 *) dest;
			
 
				 		if ((s64) (s32) newdisp != newdisp) {
			
 
				 			pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
			
 
				-			pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value);
			
 
				+			pr_err("\tSrc: %p, Dest: %p, old disp: %x\n",
			
 
				+				src, dest, insn->displacement.value);
			
 
				 			return 0;
			
 
				 		}
			
 
				-		disp = (u8 *) dest + insn_offset_displacement(&insn);
			
 
				+		disp = (u8 *) dest + insn_offset_displacement(insn);
			
 
				 		*(s32 *) disp = (s32) newdisp;
			
 
				 	}
			
 
				 #endif
			
 
				-	return length;
			
 
				+	return insn->length;
			
 
				+}
			
 
				+
			
 
				+/* Prepare reljump right after instruction to boost */
			
 
				+static void prepare_boost(struct kprobe *p, struct insn *insn)
			
 
				+{
			
 
				+	if (can_boost(insn, p->addr) &&
			
 
				+	    MAX_INSN_SIZE - insn->length >= RELATIVEJUMP_SIZE) {
			
 
				+		/*
			
 
				+		 * These instructions can be executed directly if it
			
 
				+		 * jumps back to correct address.
			
 
				+		 */
			
 
				+		synthesize_reljump(p->ainsn.insn + insn->length,
			
 
				+				   p->addr + insn->length);
			
 
				+		p->ainsn.boostable = true;
			
 
				+	} else {
			
 
				+		p->ainsn.boostable = false;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static int arch_copy_kprobe(struct kprobe *p)
			
 
				 {
			
 
				-	int ret;
			
 
				+	struct insn insn;
			
 
				+	int len;
			
 
				+
			
 
				+	set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
			
 
				 
			
 
				 	/* Copy an instruction with recovering if other optprobe modifies it.*/
			
 
				-	ret = __copy_instruction(p->ainsn.insn, p->addr);
			
 
				-	if (!ret)
			
 
				+	len = __copy_instruction(p->ainsn.insn, p->addr, &insn);
			
 
				+	if (!len)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	/*
			
 
				 	 * __copy_instruction can modify the displacement of the instruction,
			
 
				 	 * but it doesn't affect boostable check.
			
 
				 	 */
			
 
				-	if (can_boost(p->ainsn.insn, p->addr))
			
 
				-		p->ainsn.boostable = 0;
			
 
				-	else
			
 
				-		p->ainsn.boostable = -1;
			
 
				+	prepare_boost(p, &insn);
			
 
				+
			
 
				+	set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
			
 
				 
			
 
				 	/* Check whether the instruction modifies Interrupt Flag or not */
			
 
				 	p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn);
			
@@ -459,7 +473,7 @@ void arch_disarm_kprobe(struct kprobe *p)
 
				 void arch_remove_kprobe(struct kprobe *p)
			
 
				 {
			
 
				 	if (p->ainsn.insn) {
			
 
				-		free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
			
 
				+		free_insn_slot(p->ainsn.insn, p->ainsn.boostable);
			
 
				 		p->ainsn.insn = NULL;
			
 
				 	}
			
 
				 }
			
@@ -531,7 +545,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
 
				 		return;
			
 
				 
			
 
				 #if !defined(CONFIG_PREEMPT)
			
 
				-	if (p->ainsn.boostable == 1 && !p->post_handler) {
			
 
				+	if (p->ainsn.boostable && !p->post_handler) {
			
 
				 		/* Boost up -- we can execute copied instructions directly */
			
 
				 		if (!reenter)
			
 
				 			reset_current_kprobe();
			
@@ -851,7 +865,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
 
				 	case 0xcf:
			
 
				 	case 0xea:	/* jmp absolute -- ip is correct */
			
 
				 		/* ip is already adjusted, no more changes required */
			
 
				-		p->ainsn.boostable = 1;
			
 
				+		p->ainsn.boostable = true;
			
 
				 		goto no_change;
			
 
				 	case 0xe8:	/* call relative - Fix return addr */
			
 
				 		*tos = orig_ip + (*tos - copy_ip);
			
@@ -876,28 +890,13 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
 
				 			 * jmp near and far, absolute indirect
			
 
				 			 * ip is correct. And this is boostable
			
 
				 			 */
			
 
				-			p->ainsn.boostable = 1;
			
 
				+			p->ainsn.boostable = true;
			
 
				 			goto no_change;
			
 
				 		}
			
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				-	if (p->ainsn.boostable == 0) {
			
 
				-		if ((regs->ip > copy_ip) &&
			
 
				-		    (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
			
 
				-			/*
			
 
				-			 * These instructions can be executed directly if it
			
 
				-			 * jumps back to correct address.
			
 
				-			 */
			
 
				-			synthesize_reljump((void *)regs->ip,
			
 
				-				(void *)orig_ip + (regs->ip - copy_ip));
			
 
				-			p->ainsn.boostable = 1;
			
 
				-		} else {
			
 
				-			p->ainsn.boostable = -1;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				 	regs->ip += orig_ip - copy_ip;
			
 
				 
			
 
				 no_change:
			
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -94,6 +94,6 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler);
 
				 int arch_prepare_kprobe_ftrace(struct kprobe *p)
			
 
				 {
			
 
				 	p->ainsn.insn = NULL;
			
 
				-	p->ainsn.boostable = -1;
			
 
				+	p->ainsn.boostable = false;
			
 
				 	return 0;
			
 
				 }
			
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -65,7 +65,10 @@ found:
 
				 	 * overwritten by jump destination address. In this case, original
			
 
				 	 * bytes must be recovered from op->optinsn.copied_insn buffer.
			
 
				 	 */
			
 
				-	memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
			
 
				+	if (probe_kernel_read(buf, (void *)addr,
			
 
				+		MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
			
 
				+		return 0UL;
			
 
				+
			
 
				 	if (addr == (unsigned long)kp->addr) {
			
 
				 		buf[0] = kp->opcode;
			
 
				 		memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
			
@@ -174,11 +177,12 @@ NOKPROBE_SYMBOL(optimized_callback);
 
				 
			
 
				 static int copy_optimized_instructions(u8 *dest, u8 *src)
			
 
				 {
			
 
				+	struct insn insn;
			
 
				 	int len = 0, ret;
			
 
				 
			
 
				 	while (len < RELATIVEJUMP_SIZE) {
			
 
				-		ret = __copy_instruction(dest + len, src + len);
			
 
				-		if (!ret || !can_boost(dest + len, src + len))
			
 
				+		ret = __copy_instruction(dest + len, src + len, &insn);
			
 
				+		if (!ret || !can_boost(&insn, src + len))
			
 
				 			return -EINVAL;
			
 
				 		len += ret;
			
 
				 	}
			
@@ -350,6 +354,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
 
				 	}
			
 
				 
			
 
				 	buf = (u8 *)op->optinsn.insn;
			
 
				+	set_memory_rw((unsigned long)buf & PAGE_MASK, 1);
			
 
				 
			
 
				 	/* Copy instructions into the out-of-line buffer */
			
 
				 	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
			
@@ -372,6 +377,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
 
				 	synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
			
 
				 			   (u8 *)op->kp.addr + op->optinsn.size);
			
 
				 
			
 
				+	set_memory_ro((unsigned long)buf & PAGE_MASK, 1);
			
 
				+
			
 
				 	flush_icache_range((unsigned long) buf,
			
 
				 			   (unsigned long) buf + TMPL_END_IDX +
			
 
				 			   op->optinsn.size + RELATIVEJUMP_SIZE);
			
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -321,7 +321,7 @@ static int etb_set_buffer(struct coresight_device *csdev,
 
				 
			
 
				 static unsigned long etb_reset_buffer(struct coresight_device *csdev,
			
 
				 				      struct perf_output_handle *handle,
			
 
				-				      void *sink_config, bool *lost)
			
 
				+				      void *sink_config)
			
 
				 {
			
 
				 	unsigned long size = 0;
			
 
				 	struct cs_buffers *buf = sink_config;
			
@@ -343,7 +343,6 @@ static unsigned long etb_reset_buffer(struct coresight_device *csdev,
 
				 		 * resetting parameters here and squaring off with the ring
			
 
				 		 * buffer API in the tracer PMU is fine.
			
 
				 		 */
			
 
				-		*lost = !!local_xchg(&buf->lost, 0);
			
 
				 		size = local_xchg(&buf->data_size, 0);
			
 
				 	}
			
 
				 
			
@@ -385,7 +384,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
 
				 			(unsigned long)write_ptr);
			
 
				 
			
 
				 		write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1);
			
 
				-		local_inc(&buf->lost);
			
 
				+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -396,7 +395,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
 
				 	 */
			
 
				 	status = readl_relaxed(drvdata->base + ETB_STATUS_REG);
			
 
				 	if (status & ETB_STATUS_RAM_FULL) {
			
 
				-		local_inc(&buf->lost);
			
 
				+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				 		to_read = capacity;
			
 
				 		read_ptr = write_ptr;
			
 
				 	} else {
			
@@ -429,7 +428,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
 
				 		if (read_ptr > (drvdata->buffer_depth - 1))
			
 
				 			read_ptr -= drvdata->buffer_depth;
			
 
				 		/* let the decoder know we've skipped ahead */
			
 
				-		local_inc(&buf->lost);
			
 
				+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				 	}
			
 
				 
			
 
				 	/* finally tell HW where we want to start reading from */
			
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -302,7 +302,8 @@ out:
 
				 	return;
			
 
				 
			
 
				 fail_end_stop:
			
 
				-	perf_aux_output_end(handle, 0, true);
			
 
				+	perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				+	perf_aux_output_end(handle, 0);
			
 
				 fail:
			
 
				 	event->hw.state = PERF_HES_STOPPED;
			
 
				 	goto out;
			
@@ -310,7 +311,6 @@ fail:
 
				 
			
 
				 static void etm_event_stop(struct perf_event *event, int mode)
			
 
				 {
			
 
				-	bool lost;
			
 
				 	int cpu = smp_processor_id();
			
 
				 	unsigned long size;
			
 
				 	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
			
@@ -348,10 +348,9 @@ static void etm_event_stop(struct perf_event *event, int mode)
 
				 			return;
			
 
				 
			
 
				 		size = sink_ops(sink)->reset_buffer(sink, handle,
			
 
				-						    event_data->snk_config,
			
 
				-						    &lost);
			
 
				+						    event_data->snk_config);
			
 
				 
			
 
				-		perf_aux_output_end(handle, size, lost);
			
 
				+		perf_aux_output_end(handle, size);
			
 
				 	}
			
 
				 
			
 
				 	/* Disabling the path make its elements available to other sessions */
			
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -76,7 +76,6 @@ enum cs_mode {
 
				  * @nr_pages:	max number of pages granted to us
			
 
				  * @offset:	offset within the current buffer
			
 
				  * @data_size:	how much we collected in this run
			
 
				- * @lost:	other than zero if we had a HW buffer wrap around
			
 
				  * @snapshot:	is this run in snapshot mode
			
 
				  * @data_pages:	a handle the ring buffer
			
 
				  */
			
@@ -85,7 +84,6 @@ struct cs_buffers {
 
				 	unsigned int		nr_pages;
			
 
				 	unsigned long		offset;
			
 
				 	local_t			data_size;
			
 
				-	local_t			lost;
			
 
				 	bool			snapshot;
			
 
				 	void			**data_pages;
			
 
				 };
			
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -329,7 +329,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev,
 
				 
			
 
				 static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
			
 
				 					  struct perf_output_handle *handle,
			
 
				-					  void *sink_config, bool *lost)
			
 
				+					  void *sink_config)
			
 
				 {
			
 
				 	long size = 0;
			
 
				 	struct cs_buffers *buf = sink_config;
			
@@ -350,7 +350,6 @@ static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
 
				 		 * resetting parameters here and squaring off with the ring
			
 
				 		 * buffer API in the tracer PMU is fine.
			
 
				 		 */
			
 
				-		*lost = !!local_xchg(&buf->lost, 0);
			
 
				 		size = local_xchg(&buf->data_size, 0);
			
 
				 	}
			
 
				 
			
@@ -389,7 +388,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
 
				 	 */
			
 
				 	status = readl_relaxed(drvdata->base + TMC_STS);
			
 
				 	if (status & TMC_STS_FULL) {
			
 
				-		local_inc(&buf->lost);
			
 
				+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				 		to_read = drvdata->size;
			
 
				 	} else {
			
 
				 		to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size);
			
@@ -434,7 +433,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
 
				 			read_ptr -= drvdata->size;
			
 
				 		/* Tell the HW */
			
 
				 		writel_relaxed(read_ptr, drvdata->base + TMC_RRP);
			
 
				-		local_inc(&buf->lost);
			
 
				+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				 	}
			
 
				 
			
 
				 	cur = buf->cur;
			
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1234,7 +1234,7 @@ static void __domain_flush_pages(struct protection_domain *domain,
 
				 
			
 
				 	build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
			
 
				 
			
 
				-	for (i = 0; i < amd_iommus_present; ++i) {
			
 
				+	for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
			
 
				 		if (!domain->dev_iommu[i])
			
 
				 			continue;
			
 
				 
			
@@ -1278,7 +1278,7 @@ static void domain_flush_complete(struct protection_domain *domain)
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				-	for (i = 0; i < amd_iommus_present; ++i) {
			
 
				+	for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
			
 
				 		if (domain && !domain->dev_iommu[i])
			
 
				 			continue;
			
 
				 
			
@@ -3363,7 +3363,7 @@ static int __flush_pasid(struct protection_domain *domain, int pasid,
 
				 	 * IOMMU TLB needs to be flushed before Device TLB to
			
 
				 	 * prevent device TLB refill from IOMMU TLB
			
 
				 	 */
			
 
				-	for (i = 0; i < amd_iommus_present; ++i) {
			
 
				+	for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
			
 
				 		if (domain->dev_iommu[i] == 0)
			
 
				 			continue;
			
 
				 
			
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -167,7 +167,9 @@ LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
 
				 
			
 
				 /* Array to assign indices to IOMMUs*/
			
 
				 struct amd_iommu *amd_iommus[MAX_IOMMUS];
			
 
				-int amd_iommus_present;
			
 
				+
			
 
				+/* Number of IOMMUs present in the system */
			
 
				+static int amd_iommus_present;
			
 
				 
			
 
				 /* IOMMUs have a non-present cache? */
			
 
				 bool amd_iommu_np_cache __read_mostly;
			
@@ -254,10 +256,6 @@ static int amd_iommu_enable_interrupts(void);
 
				 static int __init iommu_go_to_state(enum iommu_init_state state);
			
 
				 static void init_device_table_dma(void);
			
 
				 
			
 
				-static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
			
 
				-				    u8 bank, u8 cntr, u8 fxn,
			
 
				-				    u64 *value, bool is_write);
			
 
				-
			
 
				 static inline void update_last_devid(u16 devid)
			
 
				 {
			
 
				 	if (devid > amd_iommu_last_bdf)
			
@@ -272,6 +270,11 @@ static inline unsigned long tbl_size(int entry_size)
 
				 	return 1UL << shift;
			
 
				 }
			
 
				 
			
 
				+int amd_iommu_get_num_iommus(void)
			
 
				+{
			
 
				+	return amd_iommus_present;
			
 
				+}
			
 
				+
			
 
				 /* Access to l1 and l2 indexed register spaces */
			
 
				 
			
 
				 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
			
@@ -1336,7 +1339,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 
				 
			
 
				 	/* Add IOMMU to internal data structures */
			
 
				 	list_add_tail(&iommu->list, &amd_iommu_list);
			
 
				-	iommu->index             = amd_iommus_present++;
			
 
				+	iommu->index = amd_iommus_present++;
			
 
				 
			
 
				 	if (unlikely(iommu->index >= MAX_IOMMUS)) {
			
 
				 		WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
			
@@ -1477,6 +1480,8 @@ static int __init init_iommu_all(struct acpi_table_header *table)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
			
 
				+				u8 fxn, u64 *value, bool is_write);
			
 
				 
			
 
				 static void init_iommu_perf_ctr(struct amd_iommu *iommu)
			
 
				 {
			
@@ -1488,8 +1493,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
 
				 	amd_iommu_pc_present = true;
			
 
				 
			
 
				 	/* Check if the performance counters can be written to */
			
 
				-	if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) ||
			
 
				-	    (0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) ||
			
 
				+	if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
			
 
				+	    (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
			
 
				 	    (val != val2)) {
			
 
				 		pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
			
 
				 		amd_iommu_pc_present = false;
			
@@ -2711,6 +2716,18 @@ bool amd_iommu_v2_supported(void)
 
				 }
			
 
				 EXPORT_SYMBOL(amd_iommu_v2_supported);
			
 
				 
			
 
				+struct amd_iommu *get_amd_iommu(unsigned int idx)
			
 
				+{
			
 
				+	unsigned int i = 0;
			
 
				+	struct amd_iommu *iommu;
			
 
				+
			
 
				+	for_each_iommu(iommu)
			
 
				+		if (i++ == idx)
			
 
				+			return iommu;
			
 
				+	return NULL;
			
 
				+}
			
 
				+EXPORT_SYMBOL(get_amd_iommu);
			
 
				+
			
 
				 /****************************************************************************
			
 
				  *
			
 
				  * IOMMU EFR Performance Counter support functionality. This code allows
			
@@ -2718,17 +2735,14 @@ EXPORT_SYMBOL(amd_iommu_v2_supported);
 
				  *
			
 
				  ****************************************************************************/
			
 
				 
			
 
				-u8 amd_iommu_pc_get_max_banks(u16 devid)
			
 
				+u8 amd_iommu_pc_get_max_banks(unsigned int idx)
			
 
				 {
			
 
				-	struct amd_iommu *iommu;
			
 
				-	u8 ret = 0;
			
 
				+	struct amd_iommu *iommu = get_amd_iommu(idx);
			
 
				 
			
 
				-	/* locate the iommu governing the devid */
			
 
				-	iommu = amd_iommu_rlookup_table[devid];
			
 
				 	if (iommu)
			
 
				-		ret = iommu->max_banks;
			
 
				+		return iommu->max_banks;
			
 
				 
			
 
				-	return ret;
			
 
				+	return 0;
			
 
				 }
			
 
				 EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
			
 
				 
			
@@ -2738,62 +2752,69 @@ bool amd_iommu_pc_supported(void)
 
				 }
			
 
				 EXPORT_SYMBOL(amd_iommu_pc_supported);
			
 
				 
			
 
				-u8 amd_iommu_pc_get_max_counters(u16 devid)
			
 
				+u8 amd_iommu_pc_get_max_counters(unsigned int idx)
			
 
				 {
			
 
				-	struct amd_iommu *iommu;
			
 
				-	u8 ret = 0;
			
 
				+	struct amd_iommu *iommu = get_amd_iommu(idx);
			
 
				 
			
 
				-	/* locate the iommu governing the devid */
			
 
				-	iommu = amd_iommu_rlookup_table[devid];
			
 
				 	if (iommu)
			
 
				-		ret = iommu->max_counters;
			
 
				+		return iommu->max_counters;
			
 
				 
			
 
				-	return ret;
			
 
				+	return 0;
			
 
				 }
			
 
				 EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
			
 
				 
			
 
				-static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
			
 
				-				    u8 bank, u8 cntr, u8 fxn,
			
 
				-				    u64 *value, bool is_write)
			
 
				+static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
			
 
				+				u8 fxn, u64 *value, bool is_write)
			
 
				 {
			
 
				 	u32 offset;
			
 
				 	u32 max_offset_lim;
			
 
				 
			
 
				+	/* Make sure the IOMMU PC resource is available */
			
 
				+	if (!amd_iommu_pc_present)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				 	/* Check for valid iommu and pc register indexing */
			
 
				-	if (WARN_ON((fxn > 0x28) || (fxn & 7)))
			
 
				+	if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
			
 
				 		return -ENODEV;
			
 
				 
			
 
				-	offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn);
			
 
				+	offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
			
 
				 
			
 
				 	/* Limit the offset to the hw defined mmio region aperture */
			
 
				-	max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) |
			
 
				+	max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
			
 
				 				(iommu->max_counters << 8) | 0x28);
			
 
				 	if ((offset < MMIO_CNTR_REG_OFFSET) ||
			
 
				 	    (offset > max_offset_lim))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	if (is_write) {
			
 
				-		writel((u32)*value, iommu->mmio_base + offset);
			
 
				-		writel((*value >> 32), iommu->mmio_base + offset + 4);
			
 
				+		u64 val = *value & GENMASK_ULL(47, 0);
			
 
				+
			
 
				+		writel((u32)val, iommu->mmio_base + offset);
			
 
				+		writel((val >> 32), iommu->mmio_base + offset + 4);
			
 
				 	} else {
			
 
				 		*value = readl(iommu->mmio_base + offset + 4);
			
 
				 		*value <<= 32;
			
 
				-		*value = readl(iommu->mmio_base + offset);
			
 
				+		*value |= readl(iommu->mmio_base + offset);
			
 
				+		*value &= GENMASK_ULL(47, 0);
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				-EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val);
			
 
				 
			
 
				-int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
			
 
				-				    u64 *value, bool is_write)
			
 
				+int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
			
 
				 {
			
 
				-	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
			
 
				+	if (!iommu)
			
 
				+		return -EINVAL;
			
 
				 
			
 
				-	/* Make sure the IOMMU PC resource is available */
			
 
				-	if (!amd_iommu_pc_present || iommu == NULL)
			
 
				-		return -ENODEV;
			
 
				+	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
			
 
				+}
			
 
				+EXPORT_SYMBOL(amd_iommu_pc_get_reg);
			
 
				+
			
 
				+int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
			
 
				+{
			
 
				+	if (!iommu)
			
 
				+		return -EINVAL;
			
 
				 
			
 
				-	return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn,
			
 
				-					value, is_write);
			
 
				+	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
			
 
				 }
			
 
				+EXPORT_SYMBOL(amd_iommu_pc_set_reg);
			
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -21,6 +21,7 @@
 
				 
			
 
				 #include "amd_iommu_types.h"
			
 
				 
			
 
				+extern int amd_iommu_get_num_iommus(void);
			
 
				 extern int amd_iommu_init_dma_ops(void);
			
 
				 extern int amd_iommu_init_passthrough(void);
			
 
				 extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
			
@@ -56,13 +57,6 @@ extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
 
				 extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
			
 
				 extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
			
 
				 
			
 
				-/* IOMMU Performance Counter functions */
			
 
				-extern bool amd_iommu_pc_supported(void);
			
 
				-extern u8 amd_iommu_pc_get_max_banks(u16 devid);
			
 
				-extern u8 amd_iommu_pc_get_max_counters(u16 devid);
			
 
				-extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
			
 
				-				    u64 *value, bool is_write);
			
 
				-
			
 
				 #ifdef CONFIG_IRQ_REMAP
			
 
				 extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
			
 
				 #else
			
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -611,9 +611,6 @@ extern struct list_head amd_iommu_list;
 
				  */
			
 
				 extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
			
 
				 
			
 
				-/* Number of IOMMUs present in the system */
			
 
				-extern int amd_iommus_present;
			
 
				-
			
 
				 /*
			
 
				  * Declarations for the global list of all protection domains
			
 
				  */
			
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -201,7 +201,7 @@ struct coresight_ops_sink {
 
				 			  void *sink_config);
			
 
				 	unsigned long (*reset_buffer)(struct coresight_device *csdev,
			
 
				 				      struct perf_output_handle *handle,
			
 
				-				      void *sink_config, bool *lost);
			
 
				+				      void *sink_config);
			
 
				 	void (*update_buffer)(struct coresight_device *csdev,
			
 
				 			      struct perf_output_handle *handle,
			
 
				 			      void *sink_config);
			
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -267,6 +267,8 @@ extern int arch_init_kprobes(void);
 
				 extern void show_registers(struct pt_regs *regs);
			
 
				 extern void kprobes_inc_nmissed_count(struct kprobe *p);
			
 
				 extern bool arch_within_kprobe_blacklist(unsigned long addr);
			
 
				+extern bool arch_function_offset_within_entry(unsigned long offset);
			
 
				+extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
			
 
				 
			
 
				 extern bool within_kprobe_blacklist(unsigned long addr);
			
 
				 
			
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -165,6 +165,13 @@ struct hw_perf_event {
 
				 			struct list_head		bp_list;
			
 
				 		};
			
 
				 #endif
			
 
				+		struct { /* amd_iommu */
			
 
				+			u8	iommu_bank;
			
 
				+			u8	iommu_cntr;
			
 
				+			u16	padding;
			
 
				+			u64	conf;
			
 
				+			u64	conf1;
			
 
				+		};
			
 
				 	};
			
 
				 	/*
			
 
				 	 * If the event is a per task event, this will point to the task in
			
@@ -801,6 +808,7 @@ struct perf_output_handle {
 
				 	struct ring_buffer		*rb;
			
 
				 	unsigned long			wakeup;
			
 
				 	unsigned long			size;
			
 
				+	u64				aux_flags;
			
 
				 	union {
			
 
				 		void			*addr;
			
 
				 		unsigned long		head;
			
@@ -849,10 +857,11 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
 
				 extern void *perf_aux_output_begin(struct perf_output_handle *handle,
			
 
				 				   struct perf_event *event);
			
 
				 extern void perf_aux_output_end(struct perf_output_handle *handle,
			
 
				-				unsigned long size, bool truncated);
			
 
				+				unsigned long size);
			
 
				 extern int perf_aux_output_skip(struct perf_output_handle *handle,
			
 
				 				unsigned long size);
			
 
				 extern void *perf_get_aux(struct perf_output_handle *handle);
			
 
				+extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
			
 
				 
			
 
				 extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
			
 
				 extern void perf_pmu_unregister(struct pmu *pmu);
			
@@ -1112,6 +1121,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
 
				 
			
 
				 extern void perf_event_exec(void);
			
 
				 extern void perf_event_comm(struct task_struct *tsk, bool exec);
			
 
				+extern void perf_event_namespaces(struct task_struct *tsk);
			
 
				 extern void perf_event_fork(struct task_struct *tsk);
			
 
				 
			
 
				 /* Callchains */
			
@@ -1267,8 +1277,8 @@ static inline void *
 
				 perf_aux_output_begin(struct perf_output_handle *handle,
			
 
				 		      struct perf_event *event)				{ return NULL; }
			
 
				 static inline void
			
 
				-perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
			
 
				-		    bool truncated)					{ }
			
 
				+perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
			
 
				+									{ }
			
 
				 static inline int
			
 
				 perf_aux_output_skip(struct perf_output_handle *handle,
			
 
				 		     unsigned long size)				{ return -EINVAL; }
			
@@ -1315,6 +1325,7 @@ static inline int perf_unregister_guest_info_callbacks
 
				 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
			
 
				 static inline void perf_event_exec(void)				{ }
			
 
				 static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
			
 
				+static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
			
 
				 static inline void perf_event_fork(struct task_struct *tsk)		{ }
			
 
				 static inline void perf_event_init(void)				{ }
			
 
				 static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
			
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
 
				 				use_clockid    :  1, /* use @clockid for time fields */
			
 
				 				context_switch :  1, /* context switch data */
			
 
				 				write_backward :  1, /* Write ring buffer from end to beginning */
			
 
				-				__reserved_1   : 36;
			
 
				+				namespaces     :  1, /* include namespaces data */
			
 
				+				__reserved_1   : 35;
			
 
				 
			
 
				 	union {
			
 
				 		__u32		wakeup_events;	  /* wakeup every n events */
			
@@ -610,6 +611,23 @@ struct perf_event_header {
 
				 	__u16	size;
			
 
				 };
			
 
				 
			
 
				+struct perf_ns_link_info {
			
 
				+	__u64	dev;
			
 
				+	__u64	ino;
			
 
				+};
			
 
				+
			
 
				+enum {
			
 
				+	NET_NS_INDEX		= 0,
			
 
				+	UTS_NS_INDEX		= 1,
			
 
				+	IPC_NS_INDEX		= 2,
			
 
				+	PID_NS_INDEX		= 3,
			
 
				+	USER_NS_INDEX		= 4,
			
 
				+	MNT_NS_INDEX		= 5,
			
 
				+	CGROUP_NS_INDEX		= 6,
			
 
				+
			
 
				+	NR_NAMESPACES,		/* number of available namespaces */
			
 
				+};
			
 
				+
			
 
				 enum perf_event_type {
			
 
				 
			
 
				 	/*
			
@@ -862,6 +880,18 @@ enum perf_event_type {
 
				 	 */
			
 
				 	PERF_RECORD_SWITCH_CPU_WIDE		= 15,
			
 
				 
			
 
				+	/*
			
 
				+	 * struct {
			
 
				+	 *	struct perf_event_header	header;
			
 
				+	 *	u32				pid;
			
 
				+	 *	u32				tid;
			
 
				+	 *	u64				nr_namespaces;
			
 
				+	 *	{ u64				dev, inode; } [nr_namespaces];
			
 
				+	 *	struct sample_id		sample_id;
			
 
				+	 * };
			
 
				+	 */
			
 
				+	PERF_RECORD_NAMESPACES			= 16,
			
 
				+
			
 
				 	PERF_RECORD_MAX,			/* non-ABI */
			
 
				 };
			
 
				 
			
@@ -885,6 +915,7 @@ enum perf_callchain_context {
 
				  */
			
 
				 #define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
			
 
				 #define PERF_AUX_FLAG_OVERWRITE		0x02	/* snapshot from overwrite mode */
			
 
				+#define PERF_AUX_FLAG_PARTIAL		0x04	/* record contains gaps */
			
 
				 
			
 
				 #define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
			
 
				 #define PERF_FLAG_FD_OUTPUT		(1UL << 1)
			
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -48,6 +48,8 @@
 
				 #include <linux/parser.h>
			
 
				 #include <linux/sched/clock.h>
			
 
				 #include <linux/sched/mm.h>
			
 
				+#include <linux/proc_ns.h>
			
 
				+#include <linux/mount.h>
			
 
				 
			
 
				 #include "internal.h"
			
 
				 
			
@@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
 
				 
			
 
				 static atomic_t nr_mmap_events __read_mostly;
			
 
				 static atomic_t nr_comm_events __read_mostly;
			
 
				+static atomic_t nr_namespaces_events __read_mostly;
			
 
				 static atomic_t nr_task_events __read_mostly;
			
 
				 static atomic_t nr_freq_events __read_mostly;
			
 
				 static atomic_t nr_switch_events __read_mostly;
			
@@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
 
				 		atomic_dec(&nr_mmap_events);
			
 
				 	if (event->attr.comm)
			
 
				 		atomic_dec(&nr_comm_events);
			
 
				+	if (event->attr.namespaces)
			
 
				+		atomic_dec(&nr_namespaces_events);
			
 
				 	if (event->attr.task)
			
 
				 		atomic_dec(&nr_task_events);
			
 
				 	if (event->attr.freq)
			
@@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
 
				 void perf_event_fork(struct task_struct *task)
			
 
				 {
			
 
				 	perf_event_task(task, NULL, 1);
			
 
				+	perf_event_namespaces(task);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
 
				 	perf_event_comm_event(&comm_event);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * namespaces tracking
			
 
				+ */
			
 
				+
			
 
				+struct perf_namespaces_event {
			
 
				+	struct task_struct		*task;
			
 
				+
			
 
				+	struct {
			
 
				+		struct perf_event_header	header;
			
 
				+
			
 
				+		u32				pid;
			
 
				+		u32				tid;
			
 
				+		u64				nr_namespaces;
			
 
				+		struct perf_ns_link_info	link_info[NR_NAMESPACES];
			
 
				+	} event_id;
			
 
				+};
			
 
				+
			
 
				+static int perf_event_namespaces_match(struct perf_event *event)
			
 
				+{
			
 
				+	return event->attr.namespaces;
			
 
				+}
			
 
				+
			
 
				+static void perf_event_namespaces_output(struct perf_event *event,
			
 
				+					 void *data)
			
 
				+{
			
 
				+	struct perf_namespaces_event *namespaces_event = data;
			
 
				+	struct perf_output_handle handle;
			
 
				+	struct perf_sample_data sample;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!perf_event_namespaces_match(event))
			
 
				+		return;
			
 
				+
			
 
				+	perf_event_header__init_id(&namespaces_event->event_id.header,
			
 
				+				   &sample, event);
			
 
				+	ret = perf_output_begin(&handle, event,
			
 
				+				namespaces_event->event_id.header.size);
			
 
				+	if (ret)
			
 
				+		return;
			
 
				+
			
 
				+	namespaces_event->event_id.pid = perf_event_pid(event,
			
 
				+							namespaces_event->task);
			
 
				+	namespaces_event->event_id.tid = perf_event_tid(event,
			
 
				+							namespaces_event->task);
			
 
				+
			
 
				+	perf_output_put(&handle, namespaces_event->event_id);
			
 
				+
			
 
				+	perf_event__output_id_sample(event, &handle, &sample);
			
 
				+
			
 
				+	perf_output_end(&handle);
			
 
				+}
			
 
				+
			
 
				+static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
			
 
				+				   struct task_struct *task,
			
 
				+				   const struct proc_ns_operations *ns_ops)
			
 
				+{
			
 
				+	struct path ns_path;
			
 
				+	struct inode *ns_inode;
			
 
				+	void *error;
			
 
				+
			
 
				+	error = ns_get_path(&ns_path, task, ns_ops);
			
 
				+	if (!error) {
			
 
				+		ns_inode = ns_path.dentry->d_inode;
			
 
				+		ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
			
 
				+		ns_link_info->ino = ns_inode->i_ino;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void perf_event_namespaces(struct task_struct *task)
			
 
				+{
			
 
				+	struct perf_namespaces_event namespaces_event;
			
 
				+	struct perf_ns_link_info *ns_link_info;
			
 
				+
			
 
				+	if (!atomic_read(&nr_namespaces_events))
			
 
				+		return;
			
 
				+
			
 
				+	namespaces_event = (struct perf_namespaces_event){
			
 
				+		.task	= task,
			
 
				+		.event_id  = {
			
 
				+			.header = {
			
 
				+				.type = PERF_RECORD_NAMESPACES,
			
 
				+				.misc = 0,
			
 
				+				.size = sizeof(namespaces_event.event_id),
			
 
				+			},
			
 
				+			/* .pid */
			
 
				+			/* .tid */
			
 
				+			.nr_namespaces = NR_NAMESPACES,
			
 
				+			/* .link_info[NR_NAMESPACES] */
			
 
				+		},
			
 
				+	};
			
 
				+
			
 
				+	ns_link_info = namespaces_event.event_id.link_info;
			
 
				+
			
 
				+	perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
			
 
				+			       task, &mntns_operations);
			
 
				+
			
 
				+#ifdef CONFIG_USER_NS
			
 
				+	perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
			
 
				+			       task, &userns_operations);
			
 
				+#endif
			
 
				+#ifdef CONFIG_NET_NS
			
 
				+	perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
			
 
				+			       task, &netns_operations);
			
 
				+#endif
			
 
				+#ifdef CONFIG_UTS_NS
			
 
				+	perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
			
 
				+			       task, &utsns_operations);
			
 
				+#endif
			
 
				+#ifdef CONFIG_IPC_NS
			
 
				+	perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
			
 
				+			       task, &ipcns_operations);
			
 
				+#endif
			
 
				+#ifdef CONFIG_PID_NS
			
 
				+	perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
			
 
				+			       task, &pidns_operations);
			
 
				+#endif
			
 
				+#ifdef CONFIG_CGROUPS
			
 
				+	perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
			
 
				+			       task, &cgroupns_operations);
			
 
				+#endif
			
 
				+
			
 
				+	perf_iterate_sb(perf_event_namespaces_output,
			
 
				+			&namespaces_event,
			
 
				+			NULL);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * mmap tracking
			
 
				  */
			
@@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
 
				 		atomic_inc(&nr_mmap_events);
			
 
				 	if (event->attr.comm)
			
 
				 		atomic_inc(&nr_comm_events);
			
 
				+	if (event->attr.namespaces)
			
 
				+		atomic_inc(&nr_namespaces_events);
			
 
				 	if (event->attr.task)
			
 
				 		atomic_inc(&nr_task_events);
			
 
				 	if (event->attr.freq)
			
@@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
 
				 			return -EACCES;
			
 
				 	}
			
 
				 
			
 
				+	if (attr.namespaces) {
			
 
				+		if (!capable(CAP_SYS_ADMIN))
			
 
				+			return -EACCES;
			
 
				+	}
			
 
				+
			
 
				 	if (attr.freq) {
			
 
				 		if (attr.sample_freq > sysctl_perf_event_sample_rate)
			
 
				 			return -EINVAL;
			
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -297,6 +297,19 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 
				 		rb->paused = 1;
			
 
				 }
			
 
				 
			
 
				+void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
			
 
				+{
			
 
				+	/*
			
 
				+	 * OVERWRITE is determined by perf_aux_output_end() and can't
			
 
				+	 * be passed in directly.
			
 
				+	 */
			
 
				+	if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE))
			
 
				+		return;
			
 
				+
			
 
				+	handle->aux_flags |= flags;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(perf_aux_output_flag);
			
 
				+
			
 
				 /*
			
 
				  * This is called before hardware starts writing to the AUX area to
			
 
				  * obtain an output handle and make sure there's room in the buffer.
			
@@ -360,6 +373,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
 
				 	handle->event = event;
			
 
				 	handle->head = aux_head;
			
 
				 	handle->size = 0;
			
 
				+	handle->aux_flags = 0;
			
 
				 
			
 
				 	/*
			
 
				 	 * In overwrite mode, AUX data stores do not depend on aux_tail,
			
@@ -408,34 +422,32 @@ err:
 
				  * of the AUX buffer management code is that after pmu::stop(), the AUX
			
 
				  * transaction must be stopped and therefore drop the AUX reference count.
			
 
				  */
			
 
				-void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
			
 
				-			 bool truncated)
			
 
				+void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
			
 
				 {
			
 
				+	bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED);
			
 
				 	struct ring_buffer *rb = handle->rb;
			
 
				-	bool wakeup = truncated;
			
 
				 	unsigned long aux_head;
			
 
				-	u64 flags = 0;
			
 
				-
			
 
				-	if (truncated)
			
 
				-		flags |= PERF_AUX_FLAG_TRUNCATED;
			
 
				 
			
 
				 	/* in overwrite mode, driver provides aux_head via handle */
			
 
				 	if (rb->aux_overwrite) {
			
 
				-		flags |= PERF_AUX_FLAG_OVERWRITE;
			
 
				+		handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE;
			
 
				 
			
 
				 		aux_head = handle->head;
			
 
				 		local_set(&rb->aux_head, aux_head);
			
 
				 	} else {
			
 
				+		handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE;
			
 
				+
			
 
				 		aux_head = local_read(&rb->aux_head);
			
 
				 		local_add(size, &rb->aux_head);
			
 
				 	}
			
 
				 
			
 
				-	if (size || flags) {
			
 
				+	if (size || handle->aux_flags) {
			
 
				 		/*
			
 
				 		 * Only send RECORD_AUX if we have something useful to communicate
			
 
				 		 */
			
 
				 
			
 
				-		perf_event_aux_event(handle->event, aux_head, size, flags);
			
 
				+		perf_event_aux_event(handle->event, aux_head, size,
			
 
				+		                     handle->aux_flags);
			
 
				 	}
			
 
				 
			
 
				 	aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
			
@@ -446,7 +458,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
 
				 	}
			
 
				 
			
 
				 	if (wakeup) {
			
 
				-		if (truncated)
			
 
				+		if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
			
 
				 			handle->event->pending_disable = 1;
			
 
				 		perf_output_wakeup(handle);
			
 
				 	}
			
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2353,6 +2353,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	perf_event_namespaces(current);
			
 
				+
			
 
				 bad_unshare_cleanup_cred:
			
 
				 	if (new_cred)
			
 
				 		put_cred(new_cred);
			
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1391,21 +1391,19 @@ bool within_kprobe_blacklist(unsigned long addr)
 
				  * This returns encoded errors if it fails to look up symbol or invalid
			
 
				  * combination of parameters.
			
 
				  */
			
 
				-static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
			
 
				+static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr,
			
 
				+			const char *symbol_name, unsigned int offset)
			
 
				 {
			
 
				-	kprobe_opcode_t *addr = p->addr;
			
 
				-
			
 
				-	if ((p->symbol_name && p->addr) ||
			
 
				-	    (!p->symbol_name && !p->addr))
			
 
				+	if ((symbol_name && addr) || (!symbol_name && !addr))
			
 
				 		goto invalid;
			
 
				 
			
 
				-	if (p->symbol_name) {
			
 
				-		kprobe_lookup_name(p->symbol_name, addr);
			
 
				+	if (symbol_name) {
			
 
				+		kprobe_lookup_name(symbol_name, addr);
			
 
				 		if (!addr)
			
 
				 			return ERR_PTR(-ENOENT);
			
 
				 	}
			
 
				 
			
 
				-	addr = (kprobe_opcode_t *)(((char *)addr) + p->offset);
			
 
				+	addr = (kprobe_opcode_t *)(((char *)addr) + offset);
			
 
				 	if (addr)
			
 
				 		return addr;
			
 
				 
			
@@ -1413,6 +1411,11 @@ invalid:
 
				 	return ERR_PTR(-EINVAL);
			
 
				 }
			
 
				 
			
 
				+static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
			
 
				+{
			
 
				+	return _kprobe_addr(p->addr, p->symbol_name, p->offset);
			
 
				+}
			
 
				+
			
 
				 /* Check passed kprobe is valid and return kprobe in kprobe_table. */
			
 
				 static struct kprobe *__get_valid_kprobe(struct kprobe *p)
			
 
				 {
			
@@ -1740,11 +1743,12 @@ void unregister_kprobes(struct kprobe **kps, int num)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(unregister_kprobes);
			
 
				 
			
 
				-int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self,
			
 
				-					      unsigned long val, void *data)
			
 
				+int __weak kprobe_exceptions_notify(struct notifier_block *self,
			
 
				+					unsigned long val, void *data)
			
 
				 {
			
 
				 	return NOTIFY_DONE;
			
 
				 }
			
 
				+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
			
 
				 
			
 
				 static struct notifier_block kprobe_exceptions_nb = {
			
 
				 	.notifier_call = kprobe_exceptions_notify,
			
@@ -1875,6 +1879,25 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 
				 }
			
 
				 NOKPROBE_SYMBOL(pre_handler_kretprobe);
			
 
				 
			
 
				+bool __weak arch_function_offset_within_entry(unsigned long offset)
			
 
				+{
			
 
				+	return !offset;
			
 
				+}
			
 
				+
			
 
				+bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
			
 
				+{
			
 
				+	kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
			
 
				+
			
 
				+	if (IS_ERR(kp_addr))
			
 
				+		return false;
			
 
				+
			
 
				+	if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
			
 
				+						!arch_function_offset_within_entry(offset))
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				 int register_kretprobe(struct kretprobe *rp)
			
 
				 {
			
 
				 	int ret = 0;
			
@@ -1882,6 +1905,9 @@ int register_kretprobe(struct kretprobe *rp)
 
				 	int i;
			
 
				 	void *addr;
			
 
				 
			
 
				+	if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				 	if (kretprobe_blacklist_size) {
			
 
				 		addr = kprobe_addr(&rp->kp);
			
 
				 		if (IS_ERR(addr))
			
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,6 +26,7 @@
 
				 #include <linux/file.h>
			
 
				 #include <linux/syscalls.h>
			
 
				 #include <linux/cgroup.h>
			
 
				+#include <linux/perf_event.h>
			
 
				 
			
 
				 static struct kmem_cache *nsproxy_cachep;
			
 
				 
			
@@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
 
				 		goto out;
			
 
				 	}
			
 
				 	switch_task_namespaces(tsk, new_nsproxy);
			
 
				+
			
 
				+	perf_event_namespaces(tsk);
			
 
				 out:
			
 
				 	fput(file);
			
 
				 	return err;
			
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -455,7 +455,7 @@ config UPROBE_EVENTS
 
				 	select UPROBES
			
 
				 	select PROBE_EVENTS
			
 
				 	select TRACING
			
 
				-	default n
			
 
				+	default y
			
 
				 	help
			
 
				 	  This allows the user to add tracing events on top of userspace
			
 
				 	  dynamic events (similar to tracepoints) on the fly via the trace
			
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4355,6 +4355,7 @@ static const char readme_msg[] =
 
				 	"\t           -:[<group>/]<event>\n"
			
 
				 #ifdef CONFIG_KPROBE_EVENTS
			
 
				 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
			
 
				+  "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
			
 
				 #endif
			
 
				 #ifdef CONFIG_UPROBE_EVENTS
			
 
				 	"\t    place: <path>:<offset>\n"
			
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -681,10 +681,6 @@ static int create_trace_kprobe(int argc, char **argv)
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 	if (isdigit(argv[1][0])) {
			
 
				-		if (is_return) {
			
 
				-			pr_info("Return probe point must be a symbol.\n");
			
 
				-			return -EINVAL;
			
 
				-		}
			
 
				 		/* an address specified */
			
 
				 		ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
			
 
				 		if (ret) {
			
@@ -700,8 +696,9 @@ static int create_trace_kprobe(int argc, char **argv)
 
				 			pr_info("Failed to parse symbol.\n");
			
 
				 			return ret;
			
 
				 		}
			
 
				-		if (offset && is_return) {
			
 
				-			pr_info("Return probe must be used without offset.\n");
			
 
				+		if (offset && is_return &&
			
 
				+		    !function_offset_within_entry(NULL, symbol, offset)) {
			
 
				+			pr_info("Given offset is not valid for return probe.\n");
			
 
				 			return -EINVAL;
			
 
				 		}
			
 
				 	}
			
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -181,10 +181,23 @@ struct kvm_arch_memory_slot {
 
				 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
			
 
				 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
			
 
				 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
			
 
				+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
			
 
				+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
			
 
				+			(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
			
 
				 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
			
 
				 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
			
 
				+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
			
 
				 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
			
 
				 #define KVM_DEV_ARM_VGIC_GRP_CTRL       4
			
 
				+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
			
 
				+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
			
 
				+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
			
 
				+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
			
 
				+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
			
 
				+			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
			
 
				+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
			
 
				+#define VGIC_LEVEL_INFO_LINE_LEVEL	0
			
 
				+
			
 
				 #define   KVM_DEV_ARM_VGIC_CTRL_INIT    0
			
 
				 
			
 
				 /* KVM_IRQ_LINE irq field index values */
			
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -201,10 +201,23 @@ struct kvm_arch_memory_slot {
 
				 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
			
 
				 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
			
 
				 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
			
 
				+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
			
 
				+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
			
 
				+			(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
			
 
				 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
			
 
				 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
			
 
				+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
			
 
				 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
			
 
				 #define KVM_DEV_ARM_VGIC_GRP_CTRL	4
			
 
				+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
			
 
				+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
			
 
				+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
			
 
				+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
			
 
				+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
			
 
				+			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
			
 
				+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK	0x3ff
			
 
				+#define VGIC_LEVEL_INFO_LINE_LEVEL	0
			
 
				+
			
 
				 #define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
			
 
				 
			
 
				 /* Device Control API on vcpu fd */
			
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -413,6 +413,26 @@ struct kvm_get_htab_header {
 
				 	__u16	n_invalid;
			
 
				 };
			
 
				 
			
 
				+/* For KVM_PPC_CONFIGURE_V3_MMU */
			
 
				+struct kvm_ppc_mmuv3_cfg {
			
 
				+	__u64	flags;
			
 
				+	__u64	process_table;	/* second doubleword of partition table entry */
			
 
				+};
			
 
				+
			
 
				+/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
			
 
				+#define KVM_PPC_MMUV3_RADIX	1	/* 1 = radix mode, 0 = HPT */
			
 
				+#define KVM_PPC_MMUV3_GTSE	2	/* global translation shootdown enb. */
			
 
				+
			
 
				+/* For KVM_PPC_GET_RMMU_INFO */
			
 
				+struct kvm_ppc_rmmu_info {
			
 
				+	struct kvm_ppc_radix_geom {
			
 
				+		__u8	page_shift;
			
 
				+		__u8	level_bits[4];
			
 
				+		__u8	pad[3];
			
 
				+	}	geometries[8];
			
 
				+	__u32	ap_encodings[8];
			
 
				+};
			
 
				+
			
 
				 /* Per-vcpu XICS interrupt controller state */
			
 
				 #define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
			
 
				 
			
@@ -613,5 +633,7 @@ struct kvm_get_htab_header {
 
				 #define  KVM_XICS_LEVEL_SENSITIVE	(1ULL << 40)
			
 
				 #define  KVM_XICS_MASKED		(1ULL << 41)
			
 
				 #define  KVM_XICS_PENDING		(1ULL << 42)
			
 
				+#define  KVM_XICS_PRESENTED		(1ULL << 43)
			
 
				+#define  KVM_XICS_QUEUED		(1ULL << 44)
			
 
				 
			
 
				 #endif /* __LINUX_KVM_POWERPC_H */
			
--- a/tools/arch/x86/include/asm/atomic.h
+++ b/tools/arch/x86/include/asm/atomic.h
@@ -7,6 +7,8 @@
 
				 
			
 
				 #define LOCK_PREFIX "\n\tlock; "
			
 
				 
			
 
				+#include <asm/cmpxchg.h>
			
 
				+
			
 
				 /*
			
 
				  * Atomic operations that C can't guarantee us.  Useful for
			
 
				  * resource counting etc..
			
@@ -62,4 +64,9 @@ static inline int atomic_dec_and_test(atomic_t *v)
 
				 	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
			
 
				 }
			
 
				 
			
 
				+static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
			
 
				+{
			
 
				+	return cmpxchg(&v->counter, old, new);
			
 
				+}
			
 
				+
			
 
				 #endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
			
--- a/tools/arch/x86/include/asm/cmpxchg.h
+++ b/tools/arch/x86/include/asm/cmpxchg.h
@@ -0,0 +1,89 @@
 
				+#ifndef TOOLS_ASM_X86_CMPXCHG_H
			
 
				+#define TOOLS_ASM_X86_CMPXCHG_H
			
 
				+
			
 
				+#include <linux/compiler.h>
			
 
				+
			
 
				+/*
			
 
				+ * Non-existant functions to indicate usage errors at link time
			
 
				+ * (or compile-time if the compiler implements __compiletime_error().
			
 
				+ */
			
 
				+extern void __cmpxchg_wrong_size(void)
			
 
				+	__compiletime_error("Bad argument size for cmpxchg");
			
 
				+
			
 
				+/*
			
 
				+ * Constants for operation sizes. On 32-bit, the 64-bit size it set to
			
 
				+ * -1 because sizeof will never return -1, thereby making those switch
			
 
				+ * case statements guaranteeed dead code which the compiler will
			
 
				+ * eliminate, and allowing the "missing symbol in the default case" to
			
 
				+ * indicate a usage error.
			
 
				+ */
			
 
				+#define __X86_CASE_B	1
			
 
				+#define __X86_CASE_W	2
			
 
				+#define __X86_CASE_L	4
			
 
				+#ifdef __x86_64__
			
 
				+#define __X86_CASE_Q	8
			
 
				+#else
			
 
				+#define	__X86_CASE_Q	-1		/* sizeof will never return -1 */
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
			
 
				+ * store NEW in MEM.  Return the initial value in MEM.  Success is
			
 
				+ * indicated by comparing RETURN with OLD.
			
 
				+ */
			
 
				+#define __raw_cmpxchg(ptr, old, new, size, lock)			\
			
 
				+({									\
			
 
				+	__typeof__(*(ptr)) __ret;					\
			
 
				+	__typeof__(*(ptr)) __old = (old);				\
			
 
				+	__typeof__(*(ptr)) __new = (new);				\
			
 
				+	switch (size) {							\
			
 
				+	case __X86_CASE_B:						\
			
 
				+	{								\
			
 
				+		volatile u8 *__ptr = (volatile u8 *)(ptr);		\
			
 
				+		asm volatile(lock "cmpxchgb %2,%1"			\
			
 
				+			     : "=a" (__ret), "+m" (*__ptr)		\
			
 
				+			     : "q" (__new), "0" (__old)			\
			
 
				+			     : "memory");				\
			
 
				+		break;							\
			
 
				+	}								\
			
 
				+	case __X86_CASE_W:						\
			
 
				+	{								\
			
 
				+		volatile u16 *__ptr = (volatile u16 *)(ptr);		\
			
 
				+		asm volatile(lock "cmpxchgw %2,%1"			\
			
 
				+			     : "=a" (__ret), "+m" (*__ptr)		\
			
 
				+			     : "r" (__new), "0" (__old)			\
			
 
				+			     : "memory");				\
			
 
				+		break;							\
			
 
				+	}								\
			
 
				+	case __X86_CASE_L:						\
			
 
				+	{								\
			
 
				+		volatile u32 *__ptr = (volatile u32 *)(ptr);		\
			
 
				+		asm volatile(lock "cmpxchgl %2,%1"			\
			
 
				+			     : "=a" (__ret), "+m" (*__ptr)		\
			
 
				+			     : "r" (__new), "0" (__old)			\
			
 
				+			     : "memory");				\
			
 
				+		break;							\
			
 
				+	}								\
			
 
				+	case __X86_CASE_Q:						\
			
 
				+	{								\
			
 
				+		volatile u64 *__ptr = (volatile u64 *)(ptr);		\
			
 
				+		asm volatile(lock "cmpxchgq %2,%1"			\
			
 
				+			     : "=a" (__ret), "+m" (*__ptr)		\
			
 
				+			     : "r" (__new), "0" (__old)			\
			
 
				+			     : "memory");				\
			
 
				+		break;							\
			
 
				+	}								\
			
 
				+	default:							\
			
 
				+		__cmpxchg_wrong_size();					\
			
 
				+	}								\
			
 
				+	__ret;								\
			
 
				+})
			
 
				+
			
 
				+#define __cmpxchg(ptr, old, new, size)					\
			
 
				+	__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
			
 
				+
			
 
				+#define cmpxchg(ptr, old, new)						\
			
 
				+	__cmpxchg(ptr, old, new, sizeof(*(ptr)))
			
 
				+
			
 
				+
			
 
				+#endif	/* TOOLS_ASM_X86_CMPXCHG_H */
			
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -100,7 +100,7 @@
 
				 #define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
			
 
				 #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
			
 
				 #define X86_FEATURE_NONSTOP_TSC	( 3*32+24) /* TSC does not stop in C states */
			
 
				-/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
			
 
				+#define X86_FEATURE_CPUID	( 3*32+25) /* CPU has CPUID instruction itself */
			
 
				 #define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
			
 
				 #define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
			
 
				 #define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
			
@@ -186,7 +186,8 @@
 
				  *
			
 
				  * Reuse free bits when adding new feature flags!
			
 
				  */
			
 
				-
			
 
				+#define X86_FEATURE_RING3MWAIT	( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
			
 
				+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
			
 
				 #define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
			
 
				 #define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
			
 
				 #define X86_FEATURE_CAT_L3	( 7*32+ 4) /* Cache Allocation Technology L3 */
			
@@ -289,7 +290,8 @@
 
				 #define X86_FEATURE_PKU		(16*32+ 3) /* Protection Keys for Userspace */
			
 
				 #define X86_FEATURE_OSPKE	(16*32+ 4) /* OS Protection Keys Enable */
			
 
				 #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
			
 
				-#define X86_FEATURE_RDPID	(16*32+ 22) /* RDPID instruction */
			
 
				+#define X86_FEATURE_LA57	(16*32+16) /* 5-level page tables */
			
 
				+#define X86_FEATURE_RDPID	(16*32+22) /* RDPID instruction */
			
 
				 
			
 
				 /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
			
 
				 #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
			
@@ -321,5 +323,4 @@
 
				 #define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
			
 
				 #define X86_BUG_MONITOR		X86_BUG(12) /* IPI required to wake up remote CPU */
			
 
				 #define X86_BUG_AMD_E400	X86_BUG(13) /* CPU is among the affected by Erratum 400 */
			
 
				-
			
 
				 #endif /* _ASM_X86_CPUFEATURES_H */
			
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -286,7 +286,7 @@ ENDPROC(memcpy_mcsafe_unrolled)
 
				 	_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
			
 
				 	_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
			
 
				 	_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
			
 
				-	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
			
 
				+	_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
			
 
				 	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
			
 
				 	_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
			
 
				 	_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
			
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -63,6 +63,7 @@ FEATURE_TESTS_BASIC :=                  \
 
				         lzma                            \
			
 
				         get_cpuid                       \
			
 
				         bpf                             \
			
 
				+        sched_getcpu			\
			
 
				         sdt
			
 
				 
			
 
				 # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
			
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -48,21 +48,22 @@ FILES=                                          \
 
				          test-get_cpuid.bin                     \
			
 
				          test-sdt.bin                           \
			
 
				          test-cxx.bin                           \
			
 
				-         test-jvmti.bin
			
 
				+         test-jvmti.bin				\
			
 
				+         test-sched_getcpu.bin
			
 
				 
			
 
				 FILES := $(addprefix $(OUTPUT),$(FILES))
			
 
				 
			
 
				-CC := $(CROSS_COMPILE)gcc -MD
			
 
				-CXX := $(CROSS_COMPILE)g++ -MD
			
 
				-PKG_CONFIG := $(CROSS_COMPILE)pkg-config
			
 
				+CC ?= $(CROSS_COMPILE)gcc
			
 
				+CXX ?= $(CROSS_COMPILE)g++
			
 
				+PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
			
 
				 LLVM_CONFIG ?= llvm-config
			
 
				 
			
 
				 all: $(FILES)
			
 
				 
			
 
				-__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
			
 
				+__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
			
 
				   BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
			
 
				 
			
 
				-__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
			
 
				+__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
			
 
				   BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
			
 
				 
			
 
				 ###############################
			
@@ -91,6 +92,9 @@ $(OUTPUT)test-libelf.bin:
 
				 $(OUTPUT)test-glibc.bin:
			
 
				 	$(BUILD)
			
 
				 
			
 
				+$(OUTPUT)test-sched_getcpu.bin:
			
 
				+	$(BUILD)
			
 
				+
			
 
				 DWARFLIBS := -ldw
			
 
				 ifeq ($(findstring -static,${LDFLAGS}),-static)
			
 
				 DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
			
@@ -171,7 +175,7 @@ $(OUTPUT)test-libperl.bin:
 
				 	$(BUILD) $(FLAGS_PERL_EMBED)
			
 
				 
			
 
				 $(OUTPUT)test-libpython.bin:
			
 
				-	$(BUILD)
			
 
				+	$(BUILD) $(FLAGS_PYTHON_EMBED)
			
 
				 
			
 
				 $(OUTPUT)test-libpython-version.bin:
			
 
				 	$(BUILD)
			
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -117,6 +117,10 @@
 
				 # include "test-pthread-attr-setaffinity-np.c"
			
 
				 #undef main
			
 
				 
			
 
				+#define main main_test_sched_getcpu
			
 
				+# include "test-sched_getcpu.c"
			
 
				+#undef main
			
 
				+
			
 
				 # if 0
			
 
				 /*
			
 
				  * Disable libbabeltrace check for test-all, because the requested
			
@@ -182,6 +186,7 @@ int main(int argc, char *argv[])
 
				 	main_test_get_cpuid();
			
 
				 	main_test_bpf();
			
 
				 	main_test_libcrypto();
			
 
				+	main_test_sched_getcpu();
			
 
				 	main_test_sdt();
			
 
				 
			
 
				 	return 0;
			
--- a/tools/build/feature/test-sched_getcpu.c
+++ b/tools/build/feature/test-sched_getcpu.c
@@ -0,0 +1,7 @@
 
				+#define _GNU_SOURCE
			
 
				+#include <sched.h>
			
 
				+
			
 
				+int main(void)
			
 
				+{
			
 
				+	return sched_getcpu();
			
 
				+}
			
--- a/tools/include/asm-generic/atomic-gcc.h
+++ b/tools/include/asm-generic/atomic-gcc.h
@@ -60,4 +60,12 @@ static inline int atomic_dec_and_test(atomic_t *v)
 
				 	return __sync_sub_and_fetch(&v->counter, 1) == 0;
			
 
				 }
			
 
				 
			
 
				+#define cmpxchg(ptr, oldval, newval) \
			
 
				+	__sync_val_compare_and_swap(ptr, oldval, newval)
			
 
				+
			
 
				+static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval)
			
 
				+{
			
 
				+	return cmpxchg(&(v)->counter, oldval, newval);
			
 
				+}
			
 
				+
			
 
				 #endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */
			
--- a/tools/include/linux/atomic.h
+++ b/tools/include/linux/atomic.h
@@ -3,4 +3,10 @@
 
				 
			
 
				 #include <asm/atomic.h>
			
 
				 
			
 
				+/* atomic_cmpxchg_relaxed */
			
 
				+#ifndef atomic_cmpxchg_relaxed
			
 
				+#define  atomic_cmpxchg_relaxed		atomic_cmpxchg
			
 
				+#define  atomic_cmpxchg_release         atomic_cmpxchg
			
 
				+#endif /* atomic_cmpxchg_relaxed */
			
 
				+
			
 
				 #endif /* __TOOLS_LINUX_ATOMIC_H */
			
--- a/tools/include/linux/bug.h
+++ b/tools/include/linux/bug.h
@@ -0,0 +1,10 @@
 
				+#ifndef _TOOLS_PERF_LINUX_BUG_H
			
 
				+#define _TOOLS_PERF_LINUX_BUG_H
			
 
				+
			
 
				+/* Force a compilation error if condition is true, but also produce a
			
 
				+   result (of value 0 and type size_t), so the expression can be used
			
 
				+   e.g. in a structure initializer (or where-ever else comma expressions
			
 
				+   aren't permitted). */
			
 
				+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
			
 
				+
			
 
				+#endif	/* _TOOLS_PERF_LINUX_BUG_H */
			
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -12,3 +12,10 @@
 
				 #if GCC_VERSION >= 70000 && !defined(__CHECKER__)
			
 
				 # define __fallthrough __attribute__ ((fallthrough))
			
 
				 #endif
			
 
				+
			
 
				+#if GCC_VERSION >= 40300
			
 
				+# define __compiletime_error(message) __attribute__((error(message)))
			
 
				+#endif /* GCC_VERSION >= 40300 */
			
 
				+
			
 
				+/* &a[0] degrades to a pointer: a different type from an array */
			
 
				+#define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
			
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -5,6 +5,10 @@
 
				 #include <linux/compiler-gcc.h>
			
 
				 #endif
			
 
				 
			
 
				+#ifndef __compiletime_error
			
 
				+# define __compiletime_error(message)
			
 
				+#endif
			
 
				+
			
 
				 /* Optimization barrier */
			
 
				 /* The "volatile" is due to gcc bugs */
			
 
				 #define barrier() __asm__ __volatile__("": : :"memory")
			
@@ -13,6 +17,11 @@
 
				 # define __always_inline	inline __attribute__((always_inline))
			
 
				 #endif
			
 
				 
			
 
				+/* Are two types/vars the same type (ignoring qualifiers)? */
			
 
				+#ifndef __same_type
			
 
				+# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
			
 
				+#endif
			
 
				+
			
 
				 #ifdef __ANDROID__
			
 
				 /*
			
 
				  * FIXME: Big hammer to get rid of tons of:
			
--- a/tools/include/linux/hashtable.h
+++ b/tools/include/linux/hashtable.h
@@ -13,10 +13,6 @@
 
				 #include <linux/hash.h>
			
 
				 #include <linux/log2.h>
			
 
				 
			
 
				-#ifndef ARRAY_SIZE
			
 
				-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
			
 
				-#endif
			
 
				-
			
 
				 #define DEFINE_HASHTABLE(name, bits)						\
			
 
				 	struct hlist_head name[1 << (bits)] =					\
			
 
				 			{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
			
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -4,6 +4,11 @@
 
				 #include <stdarg.h>
			
 
				 #include <stddef.h>
			
 
				 #include <assert.h>
			
 
				+#include <linux/compiler.h>
			
 
				+
			
 
				+#ifndef UINT_MAX
			
 
				+#define UINT_MAX	(~0U)
			
 
				+#endif
			
 
				 
			
 
				 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
			
 
				 
			
@@ -72,6 +77,8 @@
 
				 int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
			
 
				 int scnprintf(char * buf, size_t size, const char * fmt, ...);
			
 
				 
			
 
				+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
			
 
				+
			
 
				 /*
			
 
				  * This looks more complex than it should be. But we need to
			
 
				  * get the type for the ~ right in round_down (it needs to be
			
--- a/tools/include/linux/log2.h
+++ b/tools/include/linux/log2.h
@@ -12,6 +12,9 @@
 
				 #ifndef _TOOLS_LINUX_LOG2_H
			
 
				 #define _TOOLS_LINUX_LOG2_H
			
 
				 
			
 
				+#include <linux/bitops.h>
			
 
				+#include <linux/types.h>
			
 
				+
			
 
				 /*
			
 
				  * non-constant log of base 2 calculators
			
 
				  * - the arch may override these in asm/bitops.h if they can be implemented
			
--- a/tools/include/linux/refcount.h
+++ b/tools/include/linux/refcount.h
@@ -0,0 +1,151 @@
 
				+#ifndef _TOOLS_LINUX_REFCOUNT_H
			
 
				+#define _TOOLS_LINUX_REFCOUNT_H
			
 
				+
			
 
				+/*
			
 
				+ * Variant of atomic_t specialized for reference counts.
			
 
				+ *
			
 
				+ * The interface matches the atomic_t interface (to aid in porting) but only
			
 
				+ * provides the few functions one should use for reference counting.
			
 
				+ *
			
 
				+ * It differs in that the counter saturates at UINT_MAX and will not move once
			
 
				+ * there. This avoids wrapping the counter and causing 'spurious'
			
 
				+ * use-after-free issues.
			
 
				+ *
			
 
				+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
			
 
				+ * and provide only what is strictly required for refcounts.
			
 
				+ *
			
 
				+ * The increments are fully relaxed; these will not provide ordering. The
			
 
				+ * rationale is that whatever is used to obtain the object we're increasing the
			
 
				+ * reference count on will provide the ordering. For locked data structures,
			
 
				+ * its the lock acquire, for RCU/lockless data structures its the dependent
			
 
				+ * load.
			
 
				+ *
			
 
				+ * Do note that inc_not_zero() provides a control dependency which will order
			
 
				+ * future stores against the inc, this ensures we'll never modify the object
			
 
				+ * if we did not in fact acquire a reference.
			
 
				+ *
			
 
				+ * The decrements will provide release order, such that all the prior loads and
			
 
				+ * stores will be issued before, it also provides a control dependency, which
			
 
				+ * will order us against the subsequent free().
			
 
				+ *
			
 
				+ * The control dependency is against the load of the cmpxchg (ll/sc) that
			
 
				+ * succeeded. This means the stores aren't fully ordered, but this is fine
			
 
				+ * because the 1->0 transition indicates no concurrency.
			
 
				+ *
			
 
				+ * Note that the allocator is responsible for ordering things between free()
			
 
				+ * and alloc().
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include <linux/atomic.h>
			
 
				+#include <linux/kernel.h>
			
 
				+
			
 
				+#ifdef NDEBUG
			
 
				+#define REFCOUNT_WARN(cond, str) (void)(cond)
			
 
				+#define __refcount_check
			
 
				+#else
			
 
				+#define REFCOUNT_WARN(cond, str) BUG_ON(cond)
			
 
				+#define __refcount_check	__must_check
			
 
				+#endif
			
 
				+
			
 
				+typedef struct refcount_struct {
			
 
				+	atomic_t refs;
			
 
				+} refcount_t;
			
 
				+
			
 
				+#define REFCOUNT_INIT(n)	{ .refs = ATOMIC_INIT(n), }
			
 
				+
			
 
				+static inline void refcount_set(refcount_t *r, unsigned int n)
			
 
				+{
			
 
				+	atomic_set(&r->refs, n);
			
 
				+}
			
 
				+
			
 
				+static inline unsigned int refcount_read(const refcount_t *r)
			
 
				+{
			
 
				+	return atomic_read(&r->refs);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
			
 
				+ *
			
 
				+ * Provides no memory ordering, it is assumed the caller has guaranteed the
			
 
				+ * object memory to be stable (RCU, etc.). It does provide a control dependency
			
 
				+ * and thereby orders future stores. See the comment on top.
			
 
				+ */
			
 
				+static inline __refcount_check
			
 
				+bool refcount_inc_not_zero(refcount_t *r)
			
 
				+{
			
 
				+	unsigned int old, new, val = atomic_read(&r->refs);
			
 
				+
			
 
				+	for (;;) {
			
 
				+		new = val + 1;
			
 
				+
			
 
				+		if (!val)
			
 
				+			return false;
			
 
				+
			
 
				+		if (unlikely(!new))
			
 
				+			return true;
			
 
				+
			
 
				+		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
			
 
				+		if (old == val)
			
 
				+			break;
			
 
				+
			
 
				+		val = old;
			
 
				+	}
			
 
				+
			
 
				+	REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
			
 
				+ *
			
 
				+ * Provides no memory ordering, it is assumed the caller already has a
			
 
				+ * reference on the object, will WARN when this is not so.
			
 
				+ */
			
 
				+static inline void refcount_inc(refcount_t *r)
			
 
				+{
			
 
				+	REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
			
 
				+ * decrement when saturated at UINT_MAX.
			
 
				+ *
			
 
				+ * Provides release memory ordering, such that prior loads and stores are done
			
 
				+ * before, and provides a control dependency such that free() must come after.
			
 
				+ * See the comment on top.
			
 
				+ */
			
 
				+static inline __refcount_check
			
 
				+bool refcount_sub_and_test(unsigned int i, refcount_t *r)
			
 
				+{
			
 
				+	unsigned int old, new, val = atomic_read(&r->refs);
			
 
				+
			
 
				+	for (;;) {
			
 
				+		if (unlikely(val == UINT_MAX))
			
 
				+			return false;
			
 
				+
			
 
				+		new = val - i;
			
 
				+		if (new > val) {
			
 
				+			REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		old = atomic_cmpxchg_release(&r->refs, val, new);
			
 
				+		if (old == val)
			
 
				+			break;
			
 
				+
			
 
				+		val = old;
			
 
				+	}
			
 
				+
			
 
				+	return !new;
			
 
				+}
			
 
				+
			
 
				+static inline __refcount_check
			
 
				+bool refcount_dec_and_test(refcount_t *r)
			
 
				+{
			
 
				+	return refcount_sub_and_test(1, r);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif /* _ATOMIC_LINUX_REFCOUNT_H */
			
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -7,6 +7,7 @@
 
				 
			
 
				 #define __SANE_USERSPACE_TYPES__	/* For PPC64, to get LL64 types */
			
 
				 #include <asm/types.h>
			
 
				+#include <asm/posix_types.h>
			
 
				 
			
 
				 struct page;
			
 
				 struct kmem_cache;
			
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -0,0 +1,72 @@
 
				+#ifndef _UAPI_LINUX_FCNTL_H
			
 
				+#define _UAPI_LINUX_FCNTL_H
			
 
				+
			
 
				+#include <asm/fcntl.h>
			
 
				+
			
 
				+#define F_SETLEASE	(F_LINUX_SPECIFIC_BASE + 0)
			
 
				+#define F_GETLEASE	(F_LINUX_SPECIFIC_BASE + 1)
			
 
				+
			
 
				+/*
			
 
				+ * Cancel a blocking posix lock; internal use only until we expose an
			
 
				+ * asynchronous lock api to userspace:
			
 
				+ */
			
 
				+#define F_CANCELLK	(F_LINUX_SPECIFIC_BASE + 5)
			
 
				+
			
 
				+/* Create a file descriptor with FD_CLOEXEC set. */
			
 
				+#define F_DUPFD_CLOEXEC	(F_LINUX_SPECIFIC_BASE + 6)
			
 
				+
			
 
				+/*
			
 
				+ * Request nofications on a directory.
			
 
				+ * See below for events that may be notified.
			
 
				+ */
			
 
				+#define F_NOTIFY	(F_LINUX_SPECIFIC_BASE+2)
			
 
				+
			
 
				+/*
			
 
				+ * Set and get of pipe page size array
			
 
				+ */
			
 
				+#define F_SETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 7)
			
 
				+#define F_GETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 8)
			
 
				+
			
 
				+/*
			
 
				+ * Set/Get seals
			
 
				+ */
			
 
				+#define F_ADD_SEALS	(F_LINUX_SPECIFIC_BASE + 9)
			
 
				+#define F_GET_SEALS	(F_LINUX_SPECIFIC_BASE + 10)
			
 
				+
			
 
				+/*
			
 
				+ * Types of seals
			
 
				+ */
			
 
				+#define F_SEAL_SEAL	0x0001	/* prevent further seals from being set */
			
 
				+#define F_SEAL_SHRINK	0x0002	/* prevent file from shrinking */
			
 
				+#define F_SEAL_GROW	0x0004	/* prevent file from growing */
			
 
				+#define F_SEAL_WRITE	0x0008	/* prevent writes */
			
 
				+/* (1U << 31) is reserved for signed error codes */
			
 
				+
			
 
				+/*
			
 
				+ * Types of directory notifications that may be requested.
			
 
				+ */
			
 
				+#define DN_ACCESS	0x00000001	/* File accessed */
			
 
				+#define DN_MODIFY	0x00000002	/* File modified */
			
 
				+#define DN_CREATE	0x00000004	/* File created */
			
 
				+#define DN_DELETE	0x00000008	/* File removed */
			
 
				+#define DN_RENAME	0x00000010	/* File renamed */
			
 
				+#define DN_ATTRIB	0x00000020	/* File changed attibutes */
			
 
				+#define DN_MULTISHOT	0x80000000	/* Don't remove notifier */
			
 
				+
			
 
				+#define AT_FDCWD		-100    /* Special value used to indicate
			
 
				+                                           openat should use the current
			
 
				+                                           working directory. */
			
 
				+#define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
			
 
				+#define AT_REMOVEDIR		0x200   /* Remove directory instead of
			
 
				+                                           unlinking file.  */
			
 
				+#define AT_SYMLINK_FOLLOW	0x400   /* Follow symbolic links.  */
			
 
				+#define AT_NO_AUTOMOUNT		0x800	/* Suppress terminal automount traversal */
			
 
				+#define AT_EMPTY_PATH		0x1000	/* Allow empty relative pathname */
			
 
				+
			
 
				+#define AT_STATX_SYNC_TYPE	0x6000	/* Type of synchronisation required from statx() */
			
 
				+#define AT_STATX_SYNC_AS_STAT	0x0000	/* - Do whatever stat() does */
			
 
				+#define AT_STATX_FORCE_SYNC	0x2000	/* - Force the attributes to be sync'd with the server */
			
 
				+#define AT_STATX_DONT_SYNC	0x4000	/* - Don't sync attributes with the server */
			
 
				+
			
 
				+
			
 
				+#endif /* _UAPI_LINUX_FCNTL_H */
			
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
 
				 				use_clockid    :  1, /* use @clockid for time fields */
			
 
				 				context_switch :  1, /* context switch data */
			
 
				 				write_backward :  1, /* Write ring buffer from end to beginning */
			
 
				-				__reserved_1   : 36;
			
 
				+				namespaces     :  1, /* include namespaces data */
			
 
				+				__reserved_1   : 35;
			
 
				 
			
 
				 	union {
			
 
				 		__u32		wakeup_events;	  /* wakeup every n events */
			
@@ -610,6 +611,23 @@ struct perf_event_header {
 
				 	__u16	size;
			
 
				 };
			
 
				 
			
 
				+struct perf_ns_link_info {
			
 
				+	__u64	dev;
			
 
				+	__u64	ino;
			
 
				+};
			
 
				+
			
 
				+enum {
			
 
				+	NET_NS_INDEX		= 0,
			
 
				+	UTS_NS_INDEX		= 1,
			
 
				+	IPC_NS_INDEX		= 2,
			
 
				+	PID_NS_INDEX		= 3,
			
 
				+	USER_NS_INDEX		= 4,
			
 
				+	MNT_NS_INDEX		= 5,
			
 
				+	CGROUP_NS_INDEX		= 6,
			
 
				+
			
 
				+	NR_NAMESPACES,		/* number of available namespaces */
			
 
				+};
			
 
				+
			
 
				 enum perf_event_type {
			
 
				 
			
 
				 	/*
			
@@ -862,6 +880,18 @@ enum perf_event_type {
 
				 	 */
			
 
				 	PERF_RECORD_SWITCH_CPU_WIDE		= 15,
			
 
				 
			
 
				+	/*
			
 
				+	 * struct {
			
 
				+	 *	struct perf_event_header	header;
			
 
				+	 *	u32				pid;
			
 
				+	 *	u32				tid;
			
 
				+	 *	u64				nr_namespaces;
			
 
				+	 *	{ u64				dev, inode; } [nr_namespaces];
			
 
				+	 *	struct sample_id		sample_id;
			
 
				+	 * };
			
 
				+	 */
			
 
				+	PERF_RECORD_NAMESPACES			= 16,
			
 
				+
			
 
				 	PERF_RECORD_MAX,			/* non-ABI */
			
 
				 };
			
 
				 
			
@@ -885,6 +915,7 @@ enum perf_callchain_context {
 
				  */
			
 
				 #define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
			
 
				 #define PERF_AUX_FLAG_OVERWRITE		0x02	/* snapshot from overwrite mode */
			
 
				+#define PERF_AUX_FLAG_PARTIAL		0x04	/* record contains gaps */
			
 
				 
			
 
				 #define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
			
 
				 #define PERF_FLAG_FD_OUTPUT		(1UL << 1)
			
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -0,0 +1,177 @@
 
				+#ifndef _UAPI_LINUX_STAT_H
			
 
				+#define _UAPI_LINUX_STAT_H
			
 
				+
			
 
				+#include <linux/types.h>
			
 
				+
			
 
				+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
			
 
				+
			
 
				+#define S_IFMT  00170000
			
 
				+#define S_IFSOCK 0140000
			
 
				+#define S_IFLNK	 0120000
			
 
				+#define S_IFREG  0100000
			
 
				+#define S_IFBLK  0060000
			
 
				+#define S_IFDIR  0040000
			
 
				+#define S_IFCHR  0020000
			
 
				+#define S_IFIFO  0010000
			
 
				+#define S_ISUID  0004000
			
 
				+#define S_ISGID  0002000
			
 
				+#define S_ISVTX  0001000
			
 
				+
			
 
				+#define S_ISLNK(m)	(((m) & S_IFMT) == S_IFLNK)
			
 
				+#define S_ISREG(m)	(((m) & S_IFMT) == S_IFREG)
			
 
				+#define S_ISDIR(m)	(((m) & S_IFMT) == S_IFDIR)
			
 
				+#define S_ISCHR(m)	(((m) & S_IFMT) == S_IFCHR)
			
 
				+#define S_ISBLK(m)	(((m) & S_IFMT) == S_IFBLK)
			
 
				+#define S_ISFIFO(m)	(((m) & S_IFMT) == S_IFIFO)
			
 
				+#define S_ISSOCK(m)	(((m) & S_IFMT) == S_IFSOCK)
			
 
				+
			
 
				+#define S_IRWXU 00700
			
 
				+#define S_IRUSR 00400
			
 
				+#define S_IWUSR 00200
			
 
				+#define S_IXUSR 00100
			
 
				+
			
 
				+#define S_IRWXG 00070
			
 
				+#define S_IRGRP 00040
			
 
				+#define S_IWGRP 00020
			
 
				+#define S_IXGRP 00010
			
 
				+
			
 
				+#define S_IRWXO 00007
			
 
				+#define S_IROTH 00004
			
 
				+#define S_IWOTH 00002
			
 
				+#define S_IXOTH 00001
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Timestamp structure for the timestamps in struct statx.
			
 
				+ *
			
 
				+ * tv_sec holds the number of seconds before (negative) or after (positive)
			
 
				+ * 00:00:00 1st January 1970 UTC.
			
 
				+ *
			
 
				+ * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
			
 
				+ * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
			
 
				+ *
			
 
				+ * Note that if both tv_sec and tv_nsec are non-zero, then the two values must
			
 
				+ * either be both positive or both negative.
			
 
				+ *
			
 
				+ * __reserved is held in case we need a yet finer resolution.
			
 
				+ */
			
 
				+struct statx_timestamp {
			
 
				+	__s64	tv_sec;
			
 
				+	__s32	tv_nsec;
			
 
				+	__s32	__reserved;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Structures for the extended file attribute retrieval system call
			
 
				+ * (statx()).
			
 
				+ *
			
 
				+ * The caller passes a mask of what they're specifically interested in as a
			
 
				+ * parameter to statx().  What statx() actually got will be indicated in
			
 
				+ * st_mask upon return.
			
 
				+ *
			
 
				+ * For each bit in the mask argument:
			
 
				+ *
			
 
				+ * - if the datum is not supported:
			
 
				+ *
			
 
				+ *   - the bit will be cleared, and
			
 
				+ *
			
 
				+ *   - the datum will be set to an appropriate fabricated value if one is
			
 
				+ *     available (eg. CIFS can take a default uid and gid), otherwise
			
 
				+ *
			
 
				+ *   - the field will be cleared;
			
 
				+ *
			
 
				+ * - otherwise, if explicitly requested:
			
 
				+ *
			
 
				+ *   - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
			
 
				+ *     set or if the datum is considered out of date, and
			
 
				+ *
			
 
				+ *   - the field will be filled in and the bit will be set;
			
 
				+ *
			
 
				+ * - otherwise, if not requested, but available in approximate form without any
			
 
				+ *   effort, it will be filled in anyway, and the bit will be set upon return
			
 
				+ *   (it might not be up to date, however, and no attempt will be made to
			
 
				+ *   synchronise the internal state first);
			
 
				+ *
			
 
				+ * - otherwise the field and the bit will be cleared before returning.
			
 
				+ *
			
 
				+ * Items in STATX_BASIC_STATS may be marked unavailable on return, but they
			
 
				+ * will have values installed for compatibility purposes so that stat() and
			
 
				+ * co. can be emulated in userspace.
			
 
				+ */
			
 
				+struct statx {
			
 
				+	/* 0x00 */
			
 
				+	__u32	stx_mask;	/* What results were written [uncond] */
			
 
				+	__u32	stx_blksize;	/* Preferred general I/O size [uncond] */
			
 
				+	__u64	stx_attributes;	/* Flags conveying information about the file [uncond] */
			
 
				+	/* 0x10 */
			
 
				+	__u32	stx_nlink;	/* Number of hard links */
			
 
				+	__u32	stx_uid;	/* User ID of owner */
			
 
				+	__u32	stx_gid;	/* Group ID of owner */
			
 
				+	__u16	stx_mode;	/* File mode */
			
 
				+	__u16	__spare0[1];
			
 
				+	/* 0x20 */
			
 
				+	__u64	stx_ino;	/* Inode number */
			
 
				+	__u64	stx_size;	/* File size */
			
 
				+	__u64	stx_blocks;	/* Number of 512-byte blocks allocated */
			
 
				+	__u64	stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
			
 
				+	/* 0x40 */
			
 
				+	struct statx_timestamp	stx_atime;	/* Last access time */
			
 
				+	struct statx_timestamp	stx_btime;	/* File creation time */
			
 
				+	struct statx_timestamp	stx_ctime;	/* Last attribute change time */
			
 
				+	struct statx_timestamp	stx_mtime;	/* Last data modification time */
			
 
				+	/* 0x80 */
			
 
				+	__u32	stx_rdev_major;	/* Device ID of special file [if bdev/cdev] */
			
 
				+	__u32	stx_rdev_minor;
			
 
				+	__u32	stx_dev_major;	/* ID of device containing file [uncond] */
			
 
				+	__u32	stx_dev_minor;
			
 
				+	/* 0x90 */
			
 
				+	__u64	__spare2[14];	/* Spare space for future expansion */
			
 
				+	/* 0x100 */
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Flags to be stx_mask
			
 
				+ *
			
 
				+ * Query request/result mask for statx() and struct statx::stx_mask.
			
 
				+ *
			
 
				+ * These bits should be set in the mask argument of statx() to request
			
 
				+ * particular items when calling statx().
			
 
				+ */
			
 
				+#define STATX_TYPE		0x00000001U	/* Want/got stx_mode & S_IFMT */
			
 
				+#define STATX_MODE		0x00000002U	/* Want/got stx_mode & ~S_IFMT */
			
 
				+#define STATX_NLINK		0x00000004U	/* Want/got stx_nlink */
			
 
				+#define STATX_UID		0x00000008U	/* Want/got stx_uid */
			
 
				+#define STATX_GID		0x00000010U	/* Want/got stx_gid */
			
 
				+#define STATX_ATIME		0x00000020U	/* Want/got stx_atime */
			
 
				+#define STATX_MTIME		0x00000040U	/* Want/got stx_mtime */
			
 
				+#define STATX_CTIME		0x00000080U	/* Want/got stx_ctime */
			
 
				+#define STATX_INO		0x00000100U	/* Want/got stx_ino */
			
 
				+#define STATX_SIZE		0x00000200U	/* Want/got stx_size */
			
 
				+#define STATX_BLOCKS		0x00000400U	/* Want/got stx_blocks */
			
 
				+#define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
			
 
				+#define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
			
 
				+#define STATX_ALL		0x00000fffU	/* All currently supported flags */
			
 
				+#define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
			
 
				+
			
 
				+/*
			
 
				+ * Attributes to be found in stx_attributes and masked in stx_attributes_mask.
			
 
				+ *
			
 
				+ * These give information about the features or the state of a file that might
			
 
				+ * be of use to ordinary userspace programs such as GUIs or ls rather than
			
 
				+ * specialised tools.
			
 
				+ *
			
 
				+ * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
			
 
				+ * semantically.  Where possible, the numerical value is picked to correspond
			
 
				+ * also.
			
 
				+ */
			
 
				+#define STATX_ATTR_COMPRESSED		0x00000004 /* [I] File is compressed by the fs */
			
 
				+#define STATX_ATTR_IMMUTABLE		0x00000010 /* [I] File is marked immutable */
			
 
				+#define STATX_ATTR_APPEND		0x00000020 /* [I] File is append-only */
			
 
				+#define STATX_ATTR_NODUMP		0x00000040 /* [I] File is not to be dumped */
			
 
				+#define STATX_ATTR_ENCRYPTED		0x00000800 /* [I] File requires key to decrypt in fs */
			
 
				+
			
 
				+#define STATX_ATTR_AUTOMOUNT		0x00001000 /* Dir: Automount trigger */
			
 
				+
			
 
				+
			
 
				+#endif /* _UAPI_LINUX_STAT_H */
			
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -439,6 +439,35 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
 
				 	return filename__read_str(path, buf, sizep);
			
 
				 }
			
 
				 
			
 
				+int sysfs__read_bool(const char *entry, bool *value)
			
 
				+{
			
 
				+	char *buf;
			
 
				+	size_t size;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = sysfs__read_str(entry, &buf, &size);
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	switch (buf[0]) {
			
 
				+	case '1':
			
 
				+	case 'y':
			
 
				+	case 'Y':
			
 
				+		*value = true;
			
 
				+		break;
			
 
				+	case '0':
			
 
				+	case 'n':
			
 
				+	case 'N':
			
 
				+		*value = false;
			
 
				+		break;
			
 
				+	default:
			
 
				+		ret = -1;
			
 
				+	}
			
 
				+
			
 
				+	free(buf);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				 int sysctl__read_int(const char *sysctl, int *value)
			
 
				 {
			
 
				 	char path[PATH_MAX];
			
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -37,4 +37,5 @@ int sysctl__read_int(const char *sysctl, int *value);
 
				 int sysfs__read_int(const char *entry, int *value);
			
 
				 int sysfs__read_ull(const char *entry, unsigned long long *value);
			
 
				 int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
			
 
				+int sysfs__read_bool(const char *entry, bool *value);
			
 
				 #endif /* __API_FS__ */
			
--- a/tools/lib/subcmd/help.h
+++ b/tools/lib/subcmd/help.h
@@ -2,6 +2,7 @@
 
				 #define __SUBCMD_HELP_H
			
 
				 
			
 
				 #include <sys/types.h>
			
 
				+#include <stdio.h>
			
 
				 
			
 
				 struct cmdnames {
			
 
				 	size_t alloc;
			
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -1,3 +1,4 @@
 
				+#include <ctype.h>
			
 
				 #include "symbol/kallsyms.h"
			
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -36,8 +36,7 @@
 
				 #include "warn.h"
			
 
				 
			
 
				 #include <linux/hashtable.h>
			
 
				-
			
 
				-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
			
 
				+#include <linux/kernel.h>
			
 
				 
			
 
				 #define STATE_FP_SAVED		0x1
			
 
				 #define STATE_FP_SETUP		0x2
			
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -31,11 +31,10 @@
 
				 #include <stdlib.h>
			
 
				 #include <subcmd/exec-cmd.h>
			
 
				 #include <subcmd/pager.h>
			
 
				+#include <linux/kernel.h>
			
 
				 
			
 
				 #include "builtin.h"
			
 
				 
			
 
				-#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
			
 
				-
			
 
				 struct cmd_struct {
			
 
				 	const char *name;
			
 
				 	int (*fn)(int, const char **);
			
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -31,3 +31,5 @@ config.mak.autogen
 
				 .config-detected
			
 
				 util/intel-pt-decoder/inat-tables.c
			
 
				 arch/*/include/generated/
			
 
				+pmu-events/pmu-events.c
			
 
				+pmu-events/jevents
			
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -50,5 +50,6 @@ libperf-y += util/
 
				 libperf-y += arch/
			
 
				 libperf-y += ui/
			
 
				 libperf-y += scripts/
			
 
				+libperf-y += trace/beauty/
			
 
				 
			
 
				 gtk-y += ui/gtk/
			
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -30,6 +30,24 @@ OPTIONS
 
				 --verbose=::
			
 
				         Verbosity level.
			
 
				 
			
 
				+-p::
			
 
				+--pid=::
			
 
				+	Trace on existing process id (comma separated list).
			
 
				+
			
 
				+-a::
			
 
				+--all-cpus::
			
 
				+	Force system-wide collection.  Scripts run without a <command>
			
 
				+	normally use -a by default, while scripts run with a <command>
			
 
				+	normally don't - this option allows the latter to be run in
			
 
				+	system-wide mode.
			
 
				+
			
 
				+-C::
			
 
				+--cpu=::
			
 
				+	Only trace for the list of CPUs provided.  Multiple CPUs can
			
 
				+	be provided as a comma separated list with no space like: 0,1.
			
 
				+	Ranges of CPUs are specified with -: 0-2.
			
 
				+	Default is to trace on all online CPUs.
			
 
				+
			
 
				 
			
 
				 SEE ALSO
			
 
				 --------
			
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,7 +8,7 @@ perf-list - List all symbolic event types
 
				 SYNOPSIS
			
 
				 --------
			
 
				 [verse]
			
 
				-'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob]
			
 
				+'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]
			
 
				 
			
 
				 DESCRIPTION
			
 
				 -----------
			
@@ -24,6 +24,10 @@ Don't print descriptions.
 
				 --long-desc::
			
 
				 Print longer event descriptions.
			
 
				 
			
 
				+--details::
			
 
				+Print how named events are resolved internally into perf events, and also
			
 
				+any extra expressions computed by perf stat.
			
 
				+
			
 
				 
			
 
				 [[EVENT_MODIFIERS]]
			
 
				 EVENT MODIFIERS
			
@@ -240,6 +244,8 @@ To limit the list use:
 
				 
			
 
				 . 'pmu' to print the kernel supplied PMU events.
			
 
				 
			
 
				+. 'sdt' to list all Statically Defined Tracepoint events.
			
 
				+
			
 
				 . If none of the above is matched, it will apply the supplied glob to all
			
 
				   events, printing the ones that match.
			
 
				 
			
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can
 
				 displayed with the weight and local_weight sort keys.  This currently works for TSX
			
 
				 abort events and some memory events in precise mode on modern Intel CPUs.
			
 
				 
			
 
				+--namespaces::
			
 
				+Record events of type PERF_RECORD_NAMESPACES.
			
 
				+
			
 
				 --transaction::
			
 
				 Record transaction flags for transaction related events.
			
 
				 
			
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -72,7 +72,8 @@ OPTIONS
 
				 --sort=::
			
 
				 	Sort histogram entries by given key(s) - multiple keys can be specified
			
 
				 	in CSV format.  Following sort keys are available:
			
 
				-	pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight.
			
 
				+	pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
			
 
				+	local_weight, cgroup_id.
			
 
				 
			
 
				 	Each key has following meaning:
			
 
				 
			
@@ -80,6 +81,7 @@ OPTIONS
 
				 	- pid: command and tid of the task
			
 
				 	- dso: name of library or module executed at the time of sample
			
 
				 	- symbol: name of function executed at the time of sample
			
 
				+	- symbol_size: size of function executed at the time of sample
			
 
				 	- parent: name of function matched to the parent regex filter. Unmatched
			
 
				 	entries are displayed as "[other]".
			
 
				 	- cpu: cpu number the task ran at the time of sample
			
@@ -91,6 +93,7 @@ OPTIONS
 
				 	- weight: Event specific weight, e.g. memory latency or transaction
			
 
				 	abort cost. This is the global weight.
			
 
				 	- local_weight: Local weight version of the weight above.
			
 
				+	- cgroup_id: ID derived from cgroup namespace device and inode numbers.
			
 
				 	- transaction: Transaction abort flags.
			
 
				 	- overhead: Overhead percentage of sample
			
 
				 	- overhead_sys: Overhead percentage of sample running in system mode
			
@@ -172,6 +175,9 @@ OPTIONS
 
				 	By default, every sort keys not specified in -F will be appended
			
 
				 	automatically.
			
 
				 
			
 
				+	If the keys starts with a prefix '+', then it will append the specified
			
 
				+        field(s) to the default field order. For example: perf report -F +period,sample.
			
 
				+
			
 
				 -p::
			
 
				 --parent=<regex>::
			
 
				         A regex filter to identify parent. The parent is a caller of this
			
@@ -229,6 +235,7 @@ OPTIONS
 
				 	sort_key can be:
			
 
				 	- function: compare on functions (default)
			
 
				 	- address: compare on individual code addresses
			
 
				+	- srcline: compare on source filename and line number
			
 
				 
			
 
				 	branch can be:
			
 
				 	- branch: include last branch information in callgraph when available.
			
@@ -424,6 +431,10 @@ include::itrace.txt[]
 
				 --hierarchy::
			
 
				 	Enable hierarchical output.
			
 
				 
			
 
				+--inline::
			
 
				+	If a callgraph address belongs to an inlined function, the inline stack
			
 
				+	will be printed. Each entry is function name or file/line.
			
 
				+
			
 
				 include::callchain-overhead-calculation.txt[]
			
 
				 
			
 
				 SEE ALSO
			
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist'
 
				 --migrations::
			
 
				 	Show migration events.
			
 
				 
			
 
				+-n::
			
 
				+--next::
			
 
				+	Show next task.
			
 
				+
			
 
				 -I::
			
 
				 --idle-hist::
			
 
				 	Show idle-related events only.
			
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,7 +116,7 @@ OPTIONS
 
				 --fields::
			
 
				         Comma separated list of fields to print. Options are:
			
 
				         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
			
 
				-        srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
			
 
				+        srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
			
 
				         callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
			
 
				         to indicate to which event type the field list applies.
			
 
				         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
			
@@ -189,15 +189,20 @@ OPTIONS
 
				 	i.e., -F "" is not allowed.
			
 
				 
			
 
				 	The brstack output includes branch related information with raw addresses using the
			
 
				-	/v/v/v/v/ syntax in the following order:
			
 
				+	/v/v/v/v/cycles syntax in the following order:
			
 
				 	FROM: branch source instruction
			
 
				 	TO  : branch target instruction
			
 
				         M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
			
 
				 	X/- : X=branch inside a transactional region, -=not in transaction region or not supported
			
 
				 	A/- : A=TSX abort entry, -=not aborted region or not supported
			
 
				+	cycles
			
 
				 
			
 
				 	The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
			
 
				 
			
 
				+	When brstackinsn is specified the full assembler sequences of branch sequences for each sample
			
 
				+	is printed. This is the full execution path leading to the sample. This is only supported when the
			
 
				+	sample was recorded with perf record -b or -j any.
			
 
				+
			
 
				 -k::
			
 
				 --vmlinux=<file>::
			
 
				         vmlinux pathname
			
@@ -248,6 +253,9 @@ OPTIONS
 
				 --show-mmap-events
			
 
				 	Display mmap related events (e.g. MMAP, MMAP2).
			
 
				 
			
 
				+--show-namespace-events
			
 
				+	Display namespace events i.e. events of type PERF_RECORD_NAMESPACES.
			
 
				+
			
 
				 --show-switch-events
			
 
				 	Display context switch events i.e. events of type PERF_RECORD_SWITCH or
			
 
				 	PERF_RECORD_SWITCH_CPU_WIDE.
			
@@ -299,6 +307,10 @@ include::itrace.txt[]
 
				 	stop time is not given (i.e, time string is 'x.y,') then analysis goes
			
 
				 	to end of file.
			
 
				 
			
 
				+--max-blocks::
			
 
				+	Set the maximum number of program blocks to print with brstackasm for
			
 
				+	each sample.
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-record[1], linkperf:perf-script-perl[1],
			
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -94,8 +94,7 @@ to activate system-wide monitoring. Default is to count on all CPUs.
 
				 
			
 
				 -A::
			
 
				 --no-aggr::
			
 
				-Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
			
 
				-This option is only valid in system-wide mode.
			
 
				+Do not aggregate counts across all monitored CPUs.
			
 
				 
			
 
				 -n::
			
 
				 --null::
			
@@ -237,6 +236,9 @@ To interpret the results it is usually needed to know on which
 
				 CPUs the workload runs on. If needed the CPUs can be forced using
			
 
				 taskset.
			
 
				 
			
 
				+--no-merge::
			
 
				+Do not merge results from same PMUs.
			
 
				+
			
 
				 EXAMPLES
			
 
				 --------
			
 
				 
			
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -123,7 +123,8 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 
				 	major or all pagefaults. Default value is maj.
			
 
				 
			
 
				 --syscalls::
			
 
				-	Trace system calls. This options is enabled by default.
			
 
				+	Trace system calls. This options is enabled by default, disable with
			
 
				+	--no-syscalls.
			
 
				 
			
 
				 --call-graph [mode,type,min[,limit],order[,key][,branch]]::
			
 
				         Setup and enable call-graph (stack chain/backtrace) recording.
			
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -11,8 +11,8 @@ All fields are in native-endian of the machine that generated the perf.data.
 
				 
			
 
				 When perf is writing to a pipe it uses a special version of the file
			
 
				 format that does not rely on seeking to adjust data offsets.  This
			
 
				-format is not described here. The pipe version can be converted to
			
 
				-normal perf.data with perf inject.
			
 
				+format is described in "Pipe-mode data" section. The pipe data version can be
			
 
				+augmented with additional events using perf inject.
			
 
				 
			
 
				 The file starts with a perf_header:
			
 
				 
			
@@ -411,6 +411,21 @@ An array bound by the perf_file_section size.
 
				 
			
 
				 ids points to a array of uint64_t defining the ids for event attr attr.
			
 
				 
			
 
				+Pipe-mode data
			
 
				+
			
 
				+Pipe-mode avoid seeks in the file by removing the perf_file_section and flags
			
 
				+from the struct perf_header. The trimmed header is:
			
 
				+
			
 
				+struct perf_pipe_file_header {
			
 
				+	u64				magic;
			
 
				+	u64				size;
			
 
				+};
			
 
				+
			
 
				+The information about attrs, data, and event_types is instead in the
			
 
				+synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA and
			
 
				+PERF_RECORD_HEADER_EVENT_TYPE that are generated by perf record in pipe-mode.
			
 
				+
			
 
				+
			
 
				 References:
			
 
				 
			
 
				 include/uapi/linux/perf_event.h
			
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -12,6 +12,7 @@ tools/arch/sparc/include/asm/barrier_32.h
 
				 tools/arch/sparc/include/asm/barrier_64.h
			
 
				 tools/arch/tile/include/asm/barrier.h
			
 
				 tools/arch/x86/include/asm/barrier.h
			
 
				+tools/arch/x86/include/asm/cmpxchg.h
			
 
				 tools/arch/x86/include/asm/cpufeatures.h
			
 
				 tools/arch/x86/include/asm/disabled-features.h
			
 
				 tools/arch/x86/include/asm/required-features.h
			
@@ -63,6 +64,7 @@ tools/include/linux/bitops.h
 
				 tools/include/linux/compiler.h
			
 
				 tools/include/linux/compiler-gcc.h
			
 
				 tools/include/linux/coresight-pmu.h
			
 
				+tools/include/linux/bug.h
			
 
				 tools/include/linux/filter.h
			
 
				 tools/include/linux/hash.h
			
 
				 tools/include/linux/kernel.h
			
@@ -72,12 +74,15 @@ tools/include/uapi/asm-generic/mman-common.h
 
				 tools/include/uapi/asm-generic/mman.h
			
 
				 tools/include/uapi/linux/bpf.h
			
 
				 tools/include/uapi/linux/bpf_common.h
			
 
				+tools/include/uapi/linux/fcntl.h
			
 
				 tools/include/uapi/linux/hw_breakpoint.h
			
 
				 tools/include/uapi/linux/mman.h
			
 
				 tools/include/uapi/linux/perf_event.h
			
 
				+tools/include/uapi/linux/stat.h
			
 
				 tools/include/linux/poison.h
			
 
				 tools/include/linux/rbtree.h
			
 
				 tools/include/linux/rbtree_augmented.h
			
 
				+tools/include/linux/refcount.h
			
 
				 tools/include/linux/string.h
			
 
				 tools/include/linux/stringify.h
			
 
				 tools/include/linux/types.h
			
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -170,13 +170,20 @@ PYTHON2_CONFIG := \
 
				 override PYTHON_CONFIG := \
			
 
				   $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
			
 
				 
			
 
				-PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
			
 
				+grep-libs  = $(filter -l%,$(1))
			
 
				+strip-libs  = $(filter-out -l%,$(1))
			
 
				 
			
 
				-PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
			
 
				-PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
			
 
				+PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
			
 
				 
			
 
				-ifeq ($(CC), clang)
			
 
				-  PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
			
 
				+ifdef PYTHON_CONFIG
			
 
				+  PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
			
 
				+  PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
			
 
				+  PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
			
 
				+  PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
			
 
				+  ifeq ($(CC), clang)
			
 
				+    PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
			
 
				+  endif
			
 
				+  FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
			
 
				 endif
			
 
				 
			
 
				 FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
			
@@ -267,6 +274,7 @@ ifdef NO_LIBELF
 
				   NO_LIBUNWIND := 1
			
 
				   NO_LIBDW_DWARF_UNWIND := 1
			
 
				   NO_LIBBPF := 1
			
 
				+  NO_JVMTI := 1
			
 
				 else
			
 
				   ifeq ($(feature-libelf), 0)
			
 
				     ifeq ($(feature-glibc), 1)
			
@@ -276,7 +284,7 @@ else
 
				       LIBC_SUPPORT := 1
			
 
				     endif
			
 
				     ifeq ($(LIBC_SUPPORT),1)
			
 
				-      msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install libelf-dev, libelf-devel or elfutils-libelf-devel);
			
 
				+      msg := $(warning No libelf found. Disables 'probe' tool, jvmti and BPF support in 'perf record'. Please install libelf-dev, libelf-devel or elfutils-libelf-devel);
			
 
				 
			
 
				       NO_LIBELF := 1
			
 
				       NO_DWARF := 1
			
@@ -284,6 +292,7 @@ else
 
				       NO_LIBUNWIND := 1
			
 
				       NO_LIBDW_DWARF_UNWIND := 1
			
 
				       NO_LIBBPF := 1
			
 
				+      NO_JVMTI := 1
			
 
				     else
			
 
				       ifneq ($(filter s% -static%,$(LDFLAGS),),)
			
 
				         msg := $(error No static glibc found, please install glibc-static);
			
@@ -317,6 +326,10 @@ ifdef NO_DWARF
 
				   NO_LIBDW_DWARF_UNWIND := 1
			
 
				 endif
			
 
				 
			
 
				+ifeq ($(feature-sched_getcpu), 1)
			
 
				+  CFLAGS += -DHAVE_SCHED_GETCPU_SUPPORT
			
 
				+endif
			
 
				+
			
 
				 ifndef NO_LIBELF
			
 
				   CFLAGS += -DHAVE_LIBELF_SUPPORT
			
 
				   EXTLIBS += -lelf
			
@@ -550,8 +563,6 @@ ifndef NO_GTK2
 
				   endif
			
 
				 endif
			
 
				 
			
 
				-grep-libs  = $(filter -l%,$(1))
			
 
				-strip-libs = $(filter-out -l%,$(1))
			
 
				 
			
 
				 ifdef NO_LIBPERL
			
 
				   CFLAGS += -DNO_LIBPERL
			
@@ -599,21 +610,9 @@ else
 
				       $(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev)
			
 
				     else
			
 
				 
			
 
				-      PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
			
 
				-
			
 
				-      PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
			
 
				-      PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
			
 
				-      PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
			
 
				-      PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
			
 
				-      ifeq ($(CC), clang)
			
 
				-        PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
			
 
				-      endif
			
 
				-      FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
			
 
				-
			
 
				       ifneq ($(feature-libpython), 1)
			
 
				         $(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev)
			
 
				       else
			
 
				-
			
 
				         ifneq ($(feature-libpython-version), 1)
			
 
				           $(warning Python 3 is not yet supported; please set)
			
 
				           $(warning PYTHON and/or PYTHON_CONFIG appropriately.)
			
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -33,6 +33,7 @@
 
				 #include "../../util/cs-etm.h"
			
 
				 
			
 
				 #include <stdlib.h>
			
 
				+#include <sys/stat.h>
			
 
				 
			
 
				 #define ENABLE_SINK_MAX	128
			
 
				 #define CS_BUS_DEVICE_PATH "/bus/coresight/devices/"
			
--- a/tools/perf/arch/arm/util/dwarf-regs.c
+++ b/tools/perf/arch/arm/util/dwarf-regs.c
@@ -9,6 +9,7 @@
 
				  */
			
 
				 
			
 
				 #include <stddef.h>
			
 
				+#include <linux/stringify.h>
			
 
				 #include <dwarf-regs.h>
			
 
				 
			
 
				 struct pt_regs_dwarfnum {
			
@@ -16,10 +17,9 @@ struct pt_regs_dwarfnum {
 
				 	unsigned int dwarfnum;
			
 
				 };
			
 
				 
			
 
				-#define STR(s) #s
			
 
				 #define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
			
 
				 #define GPR_DWARFNUM_NAME(num) \
			
 
				-	{.name = STR(%r##num), .dwarfnum = num}
			
 
				+	{.name = __stringify(%r##num), .dwarfnum = num}
			
 
				 #define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
			
 
				 
			
 
				 /*
			
--- a/tools/perf/arch/arm/util/unwind-libdw.c
+++ b/tools/perf/arch/arm/util/unwind-libdw.c
@@ -1,6 +1,7 @@
 
				 #include <elfutils/libdwfl.h>
			
 
				 #include "../../util/unwind-libdw.h"
			
 
				 #include "../../util/perf_regs.h"
			
 
				+#include "../../util/event.h"
			
 
				 
			
 
				 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
			
 
				 {
			
--- a/tools/perf/arch/arm64/util/dwarf-regs.c
+++ b/tools/perf/arch/arm64/util/dwarf-regs.c
@@ -8,9 +8,12 @@
 
				  * published by the Free Software Foundation.
			
 
				  */
			
 
				 
			
 
				+#include <errno.h>
			
 
				 #include <stddef.h>
			
 
				+#include <string.h>
			
 
				 #include <dwarf-regs.h>
			
 
				 #include <linux/ptrace.h> /* for struct user_pt_regs */
			
 
				+#include <linux/stringify.h>
			
 
				 #include "util.h"
			
 
				 
			
 
				 struct pt_regs_dwarfnum {
			
@@ -20,7 +23,7 @@ struct pt_regs_dwarfnum {
 
				 
			
 
				 #define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
			
 
				 #define GPR_DWARFNUM_NAME(num) \
			
 
				-	{.name = STR(%x##num), .dwarfnum = num}
			
 
				+	{.name = __stringify(%x##num), .dwarfnum = num}
			
 
				 #define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
			
 
				 #define DWARFNUM2OFFSET(index) \
			
 
				 	(index * sizeof((struct user_pt_regs *)0)->regs[0])
			
--- a/tools/perf/arch/arm64/util/unwind-libunwind.c
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -1,6 +1,6 @@
 
				+#include <errno.h>
			
 
				 
			
 
				 #ifndef REMOTE_UNWIND_LIBUNWIND
			
 
				-#include <errno.h>
			
 
				 #include <libunwind.h>
			
 
				 #include "perf_regs.h"
			
 
				 #include "../../util/unwind.h"
			
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -4,6 +4,8 @@
 
				 #include "../util/util.h"
			
 
				 #include "../util/debug.h"
			
 
				 
			
 
				+#include "sane_ctype.h"
			
 
				+
			
 
				 const char *const arm_triplets[] = {
			
 
				 	"arm-eabi-",
			
 
				 	"arm-linux-androideabi-",
			
--- a/tools/perf/arch/powerpc/util/dwarf-regs.c
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -15,6 +15,7 @@
 
				 #include <dwarf-regs.h>
			
 
				 #include <linux/ptrace.h>
			
 
				 #include <linux/kernel.h>
			
 
				+#include <linux/stringify.h>
			
 
				 #include "util.h"
			
 
				 
			
 
				 struct pt_regs_dwarfnum {
			
@@ -24,10 +25,10 @@ struct pt_regs_dwarfnum {
 
				 };
			
 
				 
			
 
				 #define REG_DWARFNUM_NAME(r, num)					\
			
 
				-		{.name = STR(%)STR(r), .dwarfnum = num,			\
			
 
				+		{.name = __stringify(%)__stringify(r), .dwarfnum = num,			\
			
 
				 		.ptregs_offset = offsetof(struct pt_regs, r)}
			
 
				 #define GPR_DWARFNUM_NAME(num)						\
			
 
				-		{.name = STR(%gpr##num), .dwarfnum = num,		\
			
 
				+		{.name = __stringify(%gpr##num), .dwarfnum = num,		\
			
 
				 		.ptregs_offset = offsetof(struct pt_regs, gpr[num])}
			
 
				 #define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0}
			
 
				 
			
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -1,3 +1,4 @@
 
				+#include <errno.h>
			
 
				 #include "util/kvm-stat.h"
			
 
				 #include "util/parse-events.h"
			
 
				 #include "util/debug.h"
			
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -1,5 +1,11 @@
 
				+#include <errno.h>
			
 
				+#include <string.h>
			
 
				+#include <regex.h>
			
 
				+
			
 
				 #include "../../perf.h"
			
 
				+#include "../../util/util.h"
			
 
				 #include "../../util/perf_regs.h"
			
 
				+#include "../../util/debug.h"
			
 
				 
			
 
				 const struct sample_reg sample_reg_masks[] = {
			
 
				 	SMPL_REG(r0, PERF_REG_POWERPC_R0),
			
@@ -47,3 +53,109 @@ const struct sample_reg sample_reg_masks[] = {
 
				 	SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
			
 
				 	SMPL_REG_END
			
 
				 };
			
 
				+
			
 
				+/* REG or %rREG */
			
 
				+#define SDT_OP_REGEX1  "^(%r)?([1-2]?[0-9]|3[0-1])$"
			
 
				+
			
 
				+/* -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) */
			
 
				+#define SDT_OP_REGEX2  "^(\\-)?([0-9]+)\\((%r)?([1-2]?[0-9]|3[0-1])\\)$"
			
 
				+
			
 
				+static regex_t sdt_op_regex1, sdt_op_regex2;
			
 
				+
			
 
				+static int sdt_init_op_regex(void)
			
 
				+{
			
 
				+	static int initialized;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (initialized)
			
 
				+		return 0;
			
 
				+
			
 
				+	ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED);
			
 
				+	if (ret)
			
 
				+		goto error;
			
 
				+
			
 
				+	ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED);
			
 
				+	if (ret)
			
 
				+		goto free_regex1;
			
 
				+
			
 
				+	initialized = 1;
			
 
				+	return 0;
			
 
				+
			
 
				+free_regex1:
			
 
				+	regfree(&sdt_op_regex1);
			
 
				+error:
			
 
				+	pr_debug4("Regex compilation error.\n");
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Parse OP and convert it into uprobe format, which is, +/-NUM(%gprREG).
			
 
				+ * Possible variants of OP are:
			
 
				+ *	Format		Example
			
 
				+ *	-------------------------
			
 
				+ *	NUM(REG)	48(18)
			
 
				+ *	-NUM(REG)	-48(18)
			
 
				+ *	NUM(%rREG)	48(%r18)
			
 
				+ *	-NUM(%rREG)	-48(%r18)
			
 
				+ *	REG		18
			
 
				+ *	%rREG		%r18
			
 
				+ *	iNUM		i0
			
 
				+ *	i-NUM		i-1
			
 
				+ *
			
 
				+ * SDT marker arguments on Powerpc uses %rREG form with -mregnames flag
			
 
				+ * and REG form with -mno-regnames. Here REG is general purpose register,
			
 
				+ * which is in 0 to 31 range.
			
 
				+ */
			
 
				+int arch_sdt_arg_parse_op(char *old_op, char **new_op)
			
 
				+{
			
 
				+	int ret, new_len;
			
 
				+	regmatch_t rm[5];
			
 
				+	char prefix;
			
 
				+
			
 
				+	/* Constant argument. Uprobe does not support it */
			
 
				+	if (old_op[0] == 'i') {
			
 
				+		pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
			
 
				+		return SDT_ARG_SKIP;
			
 
				+	}
			
 
				+
			
 
				+	ret = sdt_init_op_regex();
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) {
			
 
				+		/* REG or %rREG --> %gprREG */
			
 
				+
			
 
				+		new_len = 5;	/* % g p r NULL */
			
 
				+		new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
			
 
				+
			
 
				+		*new_op = zalloc(new_len);
			
 
				+		if (!*new_op)
			
 
				+			return -ENOMEM;
			
 
				+
			
 
				+		scnprintf(*new_op, new_len, "%%gpr%.*s",
			
 
				+			(int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so);
			
 
				+	} else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) {
			
 
				+		/*
			
 
				+		 * -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) -->
			
 
				+		 *	+/-NUM(%gprREG)
			
 
				+		 */
			
 
				+		prefix = (rm[1].rm_so == -1) ? '+' : '-';
			
 
				+
			
 
				+		new_len = 8;	/* +/- ( % g p r ) NULL */
			
 
				+		new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
			
 
				+		new_len += (int)(rm[4].rm_eo - rm[4].rm_so);
			
 
				+
			
 
				+		*new_op = zalloc(new_len);
			
 
				+		if (!*new_op)
			
 
				+			return -ENOMEM;
			
 
				+
			
 
				+		scnprintf(*new_op, new_len, "%c%.*s(%%gpr%.*s)", prefix,
			
 
				+			(int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
			
 
				+			(int)(rm[4].rm_eo - rm[4].rm_so), old_op + rm[4].rm_so);
			
 
				+	} else {
			
 
				+		pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
			
 
				+		return SDT_ARG_SKIP;
			
 
				+	}
			
 
				+
			
 
				+	return SDT_ARG_VALID;
			
 
				+}
			
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -10,6 +10,7 @@
 
				 #include "symbol.h"
			
 
				 #include "map.h"
			
 
				 #include "probe-event.h"
			
 
				+#include "probe-file.h"
			
 
				 
			
 
				 #ifdef HAVE_LIBELF_SUPPORT
			
 
				 bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
			
@@ -79,13 +80,18 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
 
				 	 * However, if the user specifies an offset, we fall back to using the
			
 
				 	 * GEP since all userspace applications (objdump/readelf) show function
			
 
				 	 * disassembly with offsets from the GEP.
			
 
				-	 *
			
 
				-	 * In addition, we shouldn't specify an offset for kretprobes.
			
 
				 	 */
			
 
				-	if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) ||
			
 
				-	    !map || !sym)
			
 
				+	if (pev->point.offset || !map || !sym)
			
 
				 		return;
			
 
				 
			
 
				+	/* For kretprobes, add an offset only if the kernel supports it */
			
 
				+	if (!pev->uprobes && pev->point.retprobe) {
			
 
				+#ifdef HAVE_LIBELF_SUPPORT
			
 
				+		if (!kretprobe_offset_is_supported())
			
 
				+#endif
			
 
				+			return;
			
 
				+	}
			
 
				+
			
 
				 	lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
			
 
				 
			
 
				 	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
			
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -0,0 +1,30 @@
 
				+static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
			
 
				+{
			
 
				+	struct ins_ops *ops = NULL;
			
 
				+
			
 
				+	/* catch all kind of jumps */
			
 
				+	if (strchr(name, 'j') ||
			
 
				+	    !strncmp(name, "bct", 3) ||
			
 
				+	    !strncmp(name, "br", 2))
			
 
				+		ops = &jump_ops;
			
 
				+	/* override call/returns */
			
 
				+	if (!strcmp(name, "bras") ||
			
 
				+	    !strcmp(name, "brasl") ||
			
 
				+	    !strcmp(name, "basr"))
			
 
				+		ops = &call_ops;
			
 
				+	if (!strcmp(name, "br"))
			
 
				+		ops = &ret_ops;
			
 
				+
			
 
				+	arch__associate_ins_ops(arch, name, ops);
			
 
				+	return ops;
			
 
				+}
			
 
				+
			
 
				+static int s390__annotate_init(struct arch *arch)
			
 
				+{
			
 
				+	if (!arch->initialized) {
			
 
				+		arch->initialized = true;
			
 
				+		arch->associate_instruction_ops = s390__associate_ins_ops;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -9,6 +9,7 @@
 
				  * as published by the Free Software Foundation.
			
 
				  */
			
 
				 
			
 
				+#include <errno.h>
			
 
				 #include "../../util/kvm-stat.h"
			
 
				 #include <asm/sie.h>
			
 
				 
			
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -338,6 +338,7 @@
 
				 329	common	pkey_mprotect		sys_pkey_mprotect
			
 
				 330	common	pkey_alloc		sys_pkey_alloc
			
 
				 331	common	pkey_free		sys_pkey_free
			
 
				+332	common	statx			sys_statx
			
 
				 
			
 
				 #
			
 
				 # x32-specific system call numbers start at 512 to avoid cache impact
			
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -6,7 +6,10 @@
 
				 #include "evsel.h"
			
 
				 #include "arch-tests.h"
			
 
				 
			
 
				+#include <signal.h>
			
 
				 #include <sys/mman.h>
			
 
				+#include <sys/wait.h>
			
 
				+#include <errno.h>
			
 
				 #include <string.h>
			
 
				 
			
 
				 static pid_t spawn(void)
			
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -1,3 +1,5 @@
 
				+#include <errno.h>
			
 
				+#include <inttypes.h>
			
 
				 #include <stdio.h>
			
 
				 #include <unistd.h>
			
 
				 #include <linux/types.h>
			
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -13,6 +13,7 @@
 
				  *
			
 
				  */
			
 
				 
			
 
				+#include <errno.h>
			
 
				 #include <stdbool.h>
			
 
				 
			
 
				 #include "../../util/header.h"
			
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -13,6 +13,7 @@
 
				  *
			
 
				  */
			
 
				 
			
 
				+#include <errno.h>
			
 
				 #include <linux/kernel.h>
			
 
				 #include <linux/types.h>
			
 
				 #include <linux/bitops.h>
			
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -13,6 +13,7 @@
 
				  *
			
 
				  */
			
 
				 
			
 
				+#include <errno.h>
			
 
				 #include <stdbool.h>
			
 
				 #include <linux/kernel.h>
			
 
				 #include <linux/types.h>
			
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -1,3 +1,4 @@
 
				+#include <errno.h>
			
 
				 #include "../../util/kvm-stat.h"
			
 
				 #include <asm/svm.h>
			
 
				 #include <asm/vmx.h>
			
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -1,5 +1,11 @@
 
				+#include <errno.h>
			
 
				+#include <string.h>
			
 
				+#include <regex.h>
			
 
				+
			
 
				 #include "../../perf.h"
			
 
				+#include "../../util/util.h"
			
 
				 #include "../../util/perf_regs.h"
			
 
				+#include "../../util/debug.h"
			
 
				 
			
 
				 const struct sample_reg sample_reg_masks[] = {
			
 
				 	SMPL_REG(AX, PERF_REG_X86_AX),
			
@@ -26,3 +32,224 @@ const struct sample_reg sample_reg_masks[] = {
 
				 #endif
			
 
				 	SMPL_REG_END
			
 
				 };
			
 
				+
			
 
				+struct sdt_name_reg {
			
 
				+	const char *sdt_name;
			
 
				+	const char *uprobe_name;
			
 
				+};
			
 
				+#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m}
			
 
				+#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL}
			
 
				+
			
 
				+static const struct sdt_name_reg sdt_reg_tbl[] = {
			
 
				+	SDT_NAME_REG(eax, ax),
			
 
				+	SDT_NAME_REG(rax, ax),
			
 
				+	SDT_NAME_REG(al,  ax),
			
 
				+	SDT_NAME_REG(ah,  ax),
			
 
				+	SDT_NAME_REG(ebx, bx),
			
 
				+	SDT_NAME_REG(rbx, bx),
			
 
				+	SDT_NAME_REG(bl,  bx),
			
 
				+	SDT_NAME_REG(bh,  bx),
			
 
				+	SDT_NAME_REG(ecx, cx),
			
 
				+	SDT_NAME_REG(rcx, cx),
			
 
				+	SDT_NAME_REG(cl,  cx),
			
 
				+	SDT_NAME_REG(ch,  cx),
			
 
				+	SDT_NAME_REG(edx, dx),
			
 
				+	SDT_NAME_REG(rdx, dx),
			
 
				+	SDT_NAME_REG(dl,  dx),
			
 
				+	SDT_NAME_REG(dh,  dx),
			
 
				+	SDT_NAME_REG(esi, si),
			
 
				+	SDT_NAME_REG(rsi, si),
			
 
				+	SDT_NAME_REG(sil, si),
			
 
				+	SDT_NAME_REG(edi, di),
			
 
				+	SDT_NAME_REG(rdi, di),
			
 
				+	SDT_NAME_REG(dil, di),
			
 
				+	SDT_NAME_REG(ebp, bp),
			
 
				+	SDT_NAME_REG(rbp, bp),
			
 
				+	SDT_NAME_REG(bpl, bp),
			
 
				+	SDT_NAME_REG(rsp, sp),
			
 
				+	SDT_NAME_REG(esp, sp),
			
 
				+	SDT_NAME_REG(spl, sp),
			
 
				+
			
 
				+	/* rNN registers */
			
 
				+	SDT_NAME_REG(r8b,  r8),
			
 
				+	SDT_NAME_REG(r8w,  r8),
			
 
				+	SDT_NAME_REG(r8d,  r8),
			
 
				+	SDT_NAME_REG(r9b,  r9),
			
 
				+	SDT_NAME_REG(r9w,  r9),
			
 
				+	SDT_NAME_REG(r9d,  r9),
			
 
				+	SDT_NAME_REG(r10b, r10),
			
 
				+	SDT_NAME_REG(r10w, r10),
			
 
				+	SDT_NAME_REG(r10d, r10),
			
 
				+	SDT_NAME_REG(r11b, r11),
			
 
				+	SDT_NAME_REG(r11w, r11),
			
 
				+	SDT_NAME_REG(r11d, r11),
			
 
				+	SDT_NAME_REG(r12b, r12),
			
 
				+	SDT_NAME_REG(r12w, r12),
			
 
				+	SDT_NAME_REG(r12d, r12),
			
 
				+	SDT_NAME_REG(r13b, r13),
			
 
				+	SDT_NAME_REG(r13w, r13),
			
 
				+	SDT_NAME_REG(r13d, r13),
			
 
				+	SDT_NAME_REG(r14b, r14),
			
 
				+	SDT_NAME_REG(r14w, r14),
			
 
				+	SDT_NAME_REG(r14d, r14),
			
 
				+	SDT_NAME_REG(r15b, r15),
			
 
				+	SDT_NAME_REG(r15w, r15),
			
 
				+	SDT_NAME_REG(r15d, r15),
			
 
				+	SDT_NAME_REG_END,
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Perf only supports OP which is in  +/-NUM(REG)  form.
			
 
				+ * Here plus-minus sign, NUM and parenthesis are optional,
			
 
				+ * only REG is mandatory.
			
 
				+ *
			
 
				+ * SDT events also supports indirect addressing mode with a
			
 
				+ * symbol as offset, scaled mode and constants in OP. But
			
 
				+ * perf does not support them yet. Below are few examples.
			
 
				+ *
			
 
				+ * OP with scaled mode:
			
 
				+ *     (%rax,%rsi,8)
			
 
				+ *     10(%ras,%rsi,8)
			
 
				+ *
			
 
				+ * OP with indirect addressing mode:
			
 
				+ *     check_action(%rip)
			
 
				+ *     mp_+52(%rip)
			
 
				+ *     44+mp_(%rip)
			
 
				+ *
			
 
				+ * OP with constant values:
			
 
				+ *     $0
			
 
				+ *     $123
			
 
				+ *     $-1
			
 
				+ */
			
 
				+#define SDT_OP_REGEX  "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$"
			
 
				+
			
 
				+static regex_t sdt_op_regex;
			
 
				+
			
 
				+static int sdt_init_op_regex(void)
			
 
				+{
			
 
				+	static int initialized;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (initialized)
			
 
				+		return 0;
			
 
				+
			
 
				+	ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED);
			
 
				+	if (ret < 0) {
			
 
				+		pr_debug4("Regex compilation error.\n");
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	initialized = 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Max x86 register name length is 5(ex: %r15d). So, 6th char
			
 
				+ * should always contain NULL. This helps to find register name
			
 
				+ * length using strlen, insted of maintaing one more variable.
			
 
				+ */
			
 
				+#define SDT_REG_NAME_SIZE  6
			
 
				+
			
 
				+/*
			
 
				+ * The uprobe parser does not support all gas register names;
			
 
				+ * so, we have to replace them (ex. for x86_64: %rax -> %ax).
			
 
				+ * Note: If register does not require renaming, just copy
			
 
				+ * paste as it is, but don't leave it empty.
			
 
				+ */
			
 
				+static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg)
			
 
				+{
			
 
				+	int i = 0;
			
 
				+
			
 
				+	for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) {
			
 
				+		if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) {
			
 
				+			strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name);
			
 
				+			return;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	strncpy(uprobe_reg, sdt_reg, sdt_len);
			
 
				+}
			
 
				+
			
 
				+int arch_sdt_arg_parse_op(char *old_op, char **new_op)
			
 
				+{
			
 
				+	char new_reg[SDT_REG_NAME_SIZE] = {0};
			
 
				+	int new_len = 0, ret;
			
 
				+	/*
			
 
				+	 * rm[0]:  +/-NUM(REG)
			
 
				+	 * rm[1]:  +/-
			
 
				+	 * rm[2]:  NUM
			
 
				+	 * rm[3]:  (
			
 
				+	 * rm[4]:  REG
			
 
				+	 * rm[5]:  )
			
 
				+	 */
			
 
				+	regmatch_t rm[6];
			
 
				+	/*
			
 
				+	 * Max prefix length is 2 as it may contains sign(+/-)
			
 
				+	 * and displacement 0 (Both sign and displacement 0 are
			
 
				+	 * optional so it may be empty). Use one more character
			
 
				+	 * to hold last NULL so that strlen can be used to find
			
 
				+	 * prefix length, instead of maintaing one more variable.
			
 
				+	 */
			
 
				+	char prefix[3] = {0};
			
 
				+
			
 
				+	ret = sdt_init_op_regex();
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	/*
			
 
				+	 * If unsupported OR does not match with regex OR
			
 
				+	 * register name too long, skip it.
			
 
				+	 */
			
 
				+	if (strchr(old_op, ',') || strchr(old_op, '$') ||
			
 
				+	    regexec(&sdt_op_regex, old_op, 6, rm, 0)   ||
			
 
				+	    rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) {
			
 
				+		pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
			
 
				+		return SDT_ARG_SKIP;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Prepare prefix.
			
 
				+	 * If SDT OP has parenthesis but does not provide
			
 
				+	 * displacement, add 0 for displacement.
			
 
				+	 *     SDT         Uprobe     Prefix
			
 
				+	 *     -----------------------------
			
 
				+	 *     +24(%rdi)   +24(%di)   +
			
 
				+	 *     24(%rdi)    +24(%di)   +
			
 
				+	 *     %rdi        %di
			
 
				+	 *     (%rdi)      +0(%di)    +0
			
 
				+	 *     -80(%rbx)   -80(%bx)   -
			
 
				+	 */
			
 
				+	if (rm[3].rm_so != rm[3].rm_eo) {
			
 
				+		if (rm[1].rm_so != rm[1].rm_eo)
			
 
				+			prefix[0] = *(old_op + rm[1].rm_so);
			
 
				+		else if (rm[2].rm_so != rm[2].rm_eo)
			
 
				+			prefix[0] = '+';
			
 
				+		else
			
 
				+			strncpy(prefix, "+0", 2);
			
 
				+	}
			
 
				+
			
 
				+	/* Rename register */
			
 
				+	sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so,
			
 
				+			    new_reg);
			
 
				+
			
 
				+	/* Prepare final OP which should be valid for uprobe_events */
			
 
				+	new_len = strlen(prefix)              +
			
 
				+		  (rm[2].rm_eo - rm[2].rm_so) +
			
 
				+		  (rm[3].rm_eo - rm[3].rm_so) +
			
 
				+		  strlen(new_reg)             +
			
 
				+		  (rm[5].rm_eo - rm[5].rm_so) +
			
 
				+		  1;					/* NULL */
			
 
				+
			
 
				+	*new_op = zalloc(new_len);
			
 
				+	if (!*new_op)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s",
			
 
				+		  strlen(prefix), prefix,
			
 
				+		  (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
			
 
				+		  (int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so,
			
 
				+		  strlen(new_reg), new_reg,
			
 
				+		  (int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so);
			
 
				+
			
 
				+	return SDT_ARG_VALID;
			
 
				+}
			
--- a/tools/perf/arch/x86/util/unwind-libdw.c
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -1,6 +1,7 @@
 
				 #include <elfutils/libdwfl.h>
			
 
				 #include "../../util/unwind-libdw.h"
			
 
				 #include "../../util/perf_regs.h"
			
 
				+#include "../../util/event.h"
			
 
				 
			
 
				 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
			
 
				 {
			
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -25,17 +25,17 @@
 
				 # endif
			
 
				 #endif
			
 
				 
			
 
				-int bench_numa(int argc, const char **argv, const char *prefix);
			
 
				-int bench_sched_messaging(int argc, const char **argv, const char *prefix);
			
 
				-int bench_sched_pipe(int argc, const char **argv, const char *prefix);
			
 
				-int bench_mem_memcpy(int argc, const char **argv, const char *prefix);
			
 
				-int bench_mem_memset(int argc, const char **argv, const char *prefix);
			
 
				-int bench_futex_hash(int argc, const char **argv, const char *prefix);
			
 
				-int bench_futex_wake(int argc, const char **argv, const char *prefix);
			
 
				-int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix);
			
 
				-int bench_futex_requeue(int argc, const char **argv, const char *prefix);
			
 
				+int bench_numa(int argc, const char **argv);
			
 
				+int bench_sched_messaging(int argc, const char **argv);
			
 
				+int bench_sched_pipe(int argc, const char **argv);
			
 
				+int bench_mem_memcpy(int argc, const char **argv);
			
 
				+int bench_mem_memset(int argc, const char **argv);
			
 
				+int bench_futex_hash(int argc, const char **argv);
			
 
				+int bench_futex_wake(int argc, const char **argv);
			
 
				+int bench_futex_wake_parallel(int argc, const char **argv);
			
 
				+int bench_futex_requeue(int argc, const char **argv);
			
 
				 /* pi futexes */
			
 
				-int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
			
 
				+int bench_futex_lock_pi(int argc, const char **argv);
			
 
				 
			
 
				 #define BENCH_FORMAT_DEFAULT_STR	"default"
			
 
				 #define BENCH_FORMAT_DEFAULT		0