8 years ago · 12c1c2fd78
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -8,8 +8,9 @@ Overview
 
				 --------
			
 
				 These events are similar to tracepoint based events. Instead of Tracepoint,
			
 
				 this is based on kprobes (kprobe and kretprobe). So it can probe wherever
			
 
				-kprobes can probe (this means, all functions body except for __kprobes
			
 
				-functions). Unlike the Tracepoint based event, this can be added and removed
			
 
				+kprobes can probe (this means, all functions except those with
			
 
				+__kprobes/nokprobe_inline annotation and those marked NOKPROBE_SYMBOL).
			
 
				+Unlike the Tracepoint based event, this can be added and removed
			
 
				 dynamically, on the fly.
			
 
				 
			
 
				 To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
			
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -302,8 +302,8 @@ extern int ignore_sigio_fd(int fd);
 
				 extern void maybe_sigio_broken(int fd, int read);
			
 
				 extern void sigio_broken(int fd, int read);
			
 
				 
			
 
				-/* sys-x86_64/prctl.c */
			
 
				-extern int os_arch_prctl(int pid, int code, unsigned long *addr);
			
 
				+/* prctl.c */
			
 
				+extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
			
 
				 
			
 
				 /* tty.c */
			
 
				 extern int get_pty(void);
			
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -390,3 +390,4 @@
 
				 381	i386	pkey_alloc		sys_pkey_alloc
			
 
				 382	i386	pkey_free		sys_pkey_free
			
 
				 383	i386	statx			sys_statx
			
 
				+384	i386	arch_prctl		sys_arch_prctl			compat_sys_arch_prctl
			
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -11,6 +11,8 @@
 
				  * published by the Free Software Foundation.
			
 
				  */
			
 
				 
			
 
				+#define pr_fmt(fmt)	"perf/amd_iommu: " fmt
			
 
				+
			
 
				 #include <linux/perf_event.h>
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/cpumask.h>
			
@@ -21,44 +23,42 @@
 
				 
			
 
				 #define COUNTER_SHIFT		16
			
 
				 
			
 
				-#define _GET_BANK(ev)       ((u8)(ev->hw.extra_reg.reg >> 8))
			
 
				-#define _GET_CNTR(ev)       ((u8)(ev->hw.extra_reg.reg))
			
 
				+/* iommu pmu conf masks */
			
 
				+#define GET_CSOURCE(x)     ((x)->conf & 0xFFULL)
			
 
				+#define GET_DEVID(x)       (((x)->conf >> 8)  & 0xFFFFULL)
			
 
				+#define GET_DOMID(x)       (((x)->conf >> 24) & 0xFFFFULL)
			
 
				+#define GET_PASID(x)       (((x)->conf >> 40) & 0xFFFFFULL)
			
 
				 
			
 
				-/* iommu pmu config masks */
			
 
				-#define _GET_CSOURCE(ev)    ((ev->hw.config & 0xFFULL))
			
 
				-#define _GET_DEVID(ev)      ((ev->hw.config >> 8)  & 0xFFFFULL)
			
 
				-#define _GET_PASID(ev)      ((ev->hw.config >> 24) & 0xFFFFULL)
			
 
				-#define _GET_DOMID(ev)      ((ev->hw.config >> 40) & 0xFFFFULL)
			
 
				-#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config)  & 0xFFFFULL)
			
 
				-#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
			
 
				-#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
			
 
				+/* iommu pmu conf1 masks */
			
 
				+#define GET_DEVID_MASK(x)  ((x)->conf1  & 0xFFFFULL)
			
 
				+#define GET_DOMID_MASK(x)  (((x)->conf1 >> 16) & 0xFFFFULL)
			
 
				+#define GET_PASID_MASK(x)  (((x)->conf1 >> 32) & 0xFFFFFULL)
			
 
				 
			
 
				-static struct perf_amd_iommu __perf_iommu;
			
 
				+#define IOMMU_NAME_SIZE 16
			
 
				 
			
 
				 struct perf_amd_iommu {
			
 
				+	struct list_head list;
			
 
				 	struct pmu pmu;
			
 
				+	struct amd_iommu *iommu;
			
 
				+	char name[IOMMU_NAME_SIZE];
			
 
				 	u8 max_banks;
			
 
				 	u8 max_counters;
			
 
				 	u64 cntr_assign_mask;
			
 
				 	raw_spinlock_t lock;
			
 
				-	const struct attribute_group *attr_groups[4];
			
 
				 };
			
 
				 
			
 
				-#define format_group	attr_groups[0]
			
 
				-#define cpumask_group	attr_groups[1]
			
 
				-#define events_group	attr_groups[2]
			
 
				-#define null_group	attr_groups[3]
			
 
				+static LIST_HEAD(perf_amd_iommu_list);
			
 
				 
			
 
				 /*---------------------------------------------
			
 
				  * sysfs format attributes
			
 
				  *---------------------------------------------*/
			
 
				 PMU_FORMAT_ATTR(csource,    "config:0-7");
			
 
				 PMU_FORMAT_ATTR(devid,      "config:8-23");
			
 
				-PMU_FORMAT_ATTR(pasid,      "config:24-39");
			
 
				-PMU_FORMAT_ATTR(domid,      "config:40-55");
			
 
				+PMU_FORMAT_ATTR(domid,      "config:24-39");
			
 
				+PMU_FORMAT_ATTR(pasid,      "config:40-59");
			
 
				 PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
			
 
				-PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
			
 
				-PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
			
 
				+PMU_FORMAT_ATTR(domid_mask, "config1:16-31");
			
 
				+PMU_FORMAT_ATTR(pasid_mask, "config1:32-51");
			
 
				 
			
 
				 static struct attribute *iommu_format_attrs[] = {
			
 
				 	&format_attr_csource.attr,
			
@@ -79,6 +79,10 @@ static struct attribute_group amd_iommu_format_group = {
 
				 /*---------------------------------------------
			
 
				  * sysfs events attributes
			
 
				  *---------------------------------------------*/
			
 
				+static struct attribute_group amd_iommu_events_group = {
			
 
				+	.name = "events",
			
 
				+};
			
 
				+
			
 
				 struct amd_iommu_event_desc {
			
 
				 	struct kobj_attribute attr;
			
 
				 	const char *event;
			
@@ -150,30 +154,34 @@ static struct attribute_group amd_iommu_cpumask_group = {
 
				 
			
 
				 /*---------------------------------------------*/
			
 
				 
			
 
				-static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
			
 
				+static int get_next_avail_iommu_bnk_cntr(struct perf_event *event)
			
 
				 {
			
 
				+	struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu);
			
 
				+	int max_cntrs = piommu->max_counters;
			
 
				+	int max_banks = piommu->max_banks;
			
 
				+	u32 shift, bank, cntr;
			
 
				 	unsigned long flags;
			
 
				-	int shift, bank, cntr, retval;
			
 
				-	int max_banks = perf_iommu->max_banks;
			
 
				-	int max_cntrs = perf_iommu->max_counters;
			
 
				+	int retval;
			
 
				 
			
 
				-	raw_spin_lock_irqsave(&perf_iommu->lock, flags);
			
 
				+	raw_spin_lock_irqsave(&piommu->lock, flags);
			
 
				 
			
 
				 	for (bank = 0, shift = 0; bank < max_banks; bank++) {
			
 
				 		for (cntr = 0; cntr < max_cntrs; cntr++) {
			
 
				 			shift = bank + (bank*3) + cntr;
			
 
				-			if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
			
 
				+			if (piommu->cntr_assign_mask & BIT_ULL(shift)) {
			
 
				 				continue;
			
 
				 			} else {
			
 
				-				perf_iommu->cntr_assign_mask |= (1ULL<<shift);
			
 
				-				retval = ((u16)((u16)bank<<8) | (u8)(cntr));
			
 
				+				piommu->cntr_assign_mask |= BIT_ULL(shift);
			
 
				+				event->hw.iommu_bank = bank;
			
 
				+				event->hw.iommu_cntr = cntr;
			
 
				+				retval = 0;
			
 
				 				goto out;
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 	retval = -ENOSPC;
			
 
				 out:
			
 
				-	raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
			
 
				+	raw_spin_unlock_irqrestore(&piommu->lock, flags);
			
 
				 	return retval;
			
 
				 }
			
 
				 
			
@@ -202,8 +210,6 @@ static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
 
				 static int perf_iommu_event_init(struct perf_event *event)
			
 
				 {
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
 
				-	struct perf_amd_iommu *perf_iommu;
			
 
				-	u64 config, config1;
			
 
				 
			
 
				 	/* test the event attr type check for PMU enumeration */
			
 
				 	if (event->attr.type != event->pmu->type)
			
@@ -225,80 +231,62 @@ static int perf_iommu_event_init(struct perf_event *event)
 
				 	if (event->cpu < 0)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	perf_iommu = &__perf_iommu;
			
 
				-
			
 
				-	if (event->pmu != &perf_iommu->pmu)
			
 
				-		return -ENOENT;
			
 
				-
			
 
				-	if (perf_iommu) {
			
 
				-		config = event->attr.config;
			
 
				-		config1 = event->attr.config1;
			
 
				-	} else {
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	/* integrate with iommu base devid (0000), assume one iommu */
			
 
				-	perf_iommu->max_banks =
			
 
				-		amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
			
 
				-	perf_iommu->max_counters =
			
 
				-		amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
			
 
				-	if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				 	/* update the hw_perf_event struct with the iommu config data */
			
 
				-	hwc->config = config;
			
 
				-	hwc->extra_reg.config = config1;
			
 
				+	hwc->conf  = event->attr.config;
			
 
				+	hwc->conf1 = event->attr.config1;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static inline struct amd_iommu *perf_event_2_iommu(struct perf_event *ev)
			
 
				+{
			
 
				+	return (container_of(ev->pmu, struct perf_amd_iommu, pmu))->iommu;
			
 
				+}
			
 
				+
			
 
				 static void perf_iommu_enable_event(struct perf_event *ev)
			
 
				 {
			
 
				-	u8 csource = _GET_CSOURCE(ev);
			
 
				-	u16 devid = _GET_DEVID(ev);
			
 
				+	struct amd_iommu *iommu = perf_event_2_iommu(ev);
			
 
				+	struct hw_perf_event *hwc = &ev->hw;
			
 
				+	u8 bank = hwc->iommu_bank;
			
 
				+	u8 cntr = hwc->iommu_cntr;
			
 
				 	u64 reg = 0ULL;
			
 
				 
			
 
				-	reg = csource;
			
 
				-	amd_iommu_pc_get_set_reg_val(devid,
			
 
				-			_GET_BANK(ev), _GET_CNTR(ev) ,
			
 
				-			 IOMMU_PC_COUNTER_SRC_REG, &reg, true);
			
 
				+	reg = GET_CSOURCE(hwc);
			
 
				+	amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, &reg);
			
 
				 
			
 
				-	reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
			
 
				+	reg = GET_DEVID_MASK(hwc);
			
 
				+	reg = GET_DEVID(hwc) | (reg << 32);
			
 
				 	if (reg)
			
 
				-		reg |= (1UL << 31);
			
 
				-	amd_iommu_pc_get_set_reg_val(devid,
			
 
				-			_GET_BANK(ev), _GET_CNTR(ev) ,
			
 
				-			 IOMMU_PC_DEVID_MATCH_REG, &reg, true);
			
 
				+		reg |= BIT(31);
			
 
				+	amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, &reg);
			
 
				 
			
 
				-	reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
			
 
				+	reg = GET_PASID_MASK(hwc);
			
 
				+	reg = GET_PASID(hwc) | (reg << 32);
			
 
				 	if (reg)
			
 
				-		reg |= (1UL << 31);
			
 
				-	amd_iommu_pc_get_set_reg_val(devid,
			
 
				-			_GET_BANK(ev), _GET_CNTR(ev) ,
			
 
				-			 IOMMU_PC_PASID_MATCH_REG, &reg, true);
			
 
				+		reg |= BIT(31);
			
 
				+	amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, &reg);
			
 
				 
			
 
				-	reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
			
 
				+	reg = GET_DOMID_MASK(hwc);
			
 
				+	reg = GET_DOMID(hwc) | (reg << 32);
			
 
				 	if (reg)
			
 
				-		reg |= (1UL << 31);
			
 
				-	amd_iommu_pc_get_set_reg_val(devid,
			
 
				-			_GET_BANK(ev), _GET_CNTR(ev) ,
			
 
				-			 IOMMU_PC_DOMID_MATCH_REG, &reg, true);
			
 
				+		reg |= BIT(31);
			
 
				+	amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, &reg);
			
 
				 }
			
 
				 
			
 
				 static void perf_iommu_disable_event(struct perf_event *event)
			
 
				 {
			
 
				+	struct amd_iommu *iommu = perf_event_2_iommu(event);
			
 
				+	struct hw_perf_event *hwc = &event->hw;
			
 
				 	u64 reg = 0ULL;
			
 
				 
			
 
				-	amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
			
 
				-			_GET_BANK(event), _GET_CNTR(event),
			
 
				-			IOMMU_PC_COUNTER_SRC_REG, &reg, true);
			
 
				+	amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
			
 
				+			     IOMMU_PC_COUNTER_SRC_REG, &reg);
			
 
				 }
			
 
				 
			
 
				 static void perf_iommu_start(struct perf_event *event, int flags)
			
 
				 {
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
 
				 
			
 
				-	pr_debug("perf: amd_iommu:perf_iommu_start\n");
			
 
				 	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
			
 
				 		return;
			
 
				 
			
@@ -306,10 +294,11 @@ static void perf_iommu_start(struct perf_event *event, int flags)
 
				 	hwc->state = 0;
			
 
				 
			
 
				 	if (flags & PERF_EF_RELOAD) {
			
 
				-		u64 prev_raw_count =  local64_read(&hwc->prev_count);
			
 
				-		amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
			
 
				-				_GET_BANK(event), _GET_CNTR(event),
			
 
				-				IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
			
 
				+		u64 prev_raw_count = local64_read(&hwc->prev_count);
			
 
				+		struct amd_iommu *iommu = perf_event_2_iommu(event);
			
 
				+
			
 
				+		amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
			
 
				+				     IOMMU_PC_COUNTER_REG, &prev_raw_count);
			
 
				 	}
			
 
				 
			
 
				 	perf_iommu_enable_event(event);
			
@@ -319,37 +308,30 @@ static void perf_iommu_start(struct perf_event *event, int flags)
 
				 
			
 
				 static void perf_iommu_read(struct perf_event *event)
			
 
				 {
			
 
				-	u64 count = 0ULL;
			
 
				-	u64 prev_raw_count = 0ULL;
			
 
				-	u64 delta = 0ULL;
			
 
				+	u64 count, prev, delta;
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
 
				-	pr_debug("perf: amd_iommu:perf_iommu_read\n");
			
 
				+	struct amd_iommu *iommu = perf_event_2_iommu(event);
			
 
				 
			
 
				-	amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
			
 
				-				_GET_BANK(event), _GET_CNTR(event),
			
 
				-				IOMMU_PC_COUNTER_REG, &count, false);
			
 
				+	if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
			
 
				+				 IOMMU_PC_COUNTER_REG, &count))
			
 
				+		return;
			
 
				 
			
 
				 	/* IOMMU pc counter register is only 48 bits */
			
 
				-	count &= 0xFFFFFFFFFFFFULL;
			
 
				+	count &= GENMASK_ULL(47, 0);
			
 
				 
			
 
				-	prev_raw_count =  local64_read(&hwc->prev_count);
			
 
				-	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
			
 
				-					count) != prev_raw_count)
			
 
				+	prev = local64_read(&hwc->prev_count);
			
 
				+	if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev)
			
 
				 		return;
			
 
				 
			
 
				-	/* Handling 48-bit counter overflowing */
			
 
				-	delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
			
 
				+	/* Handle 48-bit counter overflow */
			
 
				+	delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
			
 
				 	delta >>= COUNTER_SHIFT;
			
 
				 	local64_add(delta, &event->count);
			
 
				-
			
 
				 }
			
 
				 
			
 
				 static void perf_iommu_stop(struct perf_event *event, int flags)
			
 
				 {
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
 
				-	u64 config;
			
 
				-
			
 
				-	pr_debug("perf: amd_iommu:perf_iommu_stop\n");
			
 
				 
			
 
				 	if (hwc->state & PERF_HES_UPTODATE)
			
 
				 		return;
			
@@ -361,7 +343,6 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
 
				 	if (hwc->state & PERF_HES_UPTODATE)
			
 
				 		return;
			
 
				 
			
 
				-	config = hwc->config;
			
 
				 	perf_iommu_read(event);
			
 
				 	hwc->state |= PERF_HES_UPTODATE;
			
 
				 }
			
@@ -369,17 +350,12 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
 
				 static int perf_iommu_add(struct perf_event *event, int flags)
			
 
				 {
			
 
				 	int retval;
			
 
				-	struct perf_amd_iommu *perf_iommu =
			
 
				-			container_of(event->pmu, struct perf_amd_iommu, pmu);
			
 
				 
			
 
				-	pr_debug("perf: amd_iommu:perf_iommu_add\n");
			
 
				 	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
			
 
				 
			
 
				 	/* request an iommu bank/counter */
			
 
				-	retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
			
 
				-	if (retval != -ENOSPC)
			
 
				-		event->hw.extra_reg.reg = (u16)retval;
			
 
				-	else
			
 
				+	retval = get_next_avail_iommu_bnk_cntr(event);
			
 
				+	if (retval)
			
 
				 		return retval;
			
 
				 
			
 
				 	if (flags & PERF_EF_START)
			
@@ -390,115 +366,124 @@ static int perf_iommu_add(struct perf_event *event, int flags)
 
				 
			
 
				 static void perf_iommu_del(struct perf_event *event, int flags)
			
 
				 {
			
 
				+	struct hw_perf_event *hwc = &event->hw;
			
 
				 	struct perf_amd_iommu *perf_iommu =
			
 
				 			container_of(event->pmu, struct perf_amd_iommu, pmu);
			
 
				 
			
 
				-	pr_debug("perf: amd_iommu:perf_iommu_del\n");
			
 
				 	perf_iommu_stop(event, PERF_EF_UPDATE);
			
 
				 
			
 
				 	/* clear the assigned iommu bank/counter */
			
 
				 	clear_avail_iommu_bnk_cntr(perf_iommu,
			
 
				-				     _GET_BANK(event),
			
 
				-				     _GET_CNTR(event));
			
 
				+				   hwc->iommu_bank, hwc->iommu_cntr);
			
 
				 
			
 
				 	perf_event_update_userpage(event);
			
 
				 }
			
 
				 
			
 
				-static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
			
 
				+static __init int _init_events_attrs(void)
			
 
				 {
			
 
				-	struct attribute **attrs;
			
 
				-	struct attribute_group *attr_group;
			
 
				 	int i = 0, j;
			
 
				+	struct attribute **attrs;
			
 
				 
			
 
				 	while (amd_iommu_v2_event_descs[i].attr.attr.name)
			
 
				 		i++;
			
 
				 
			
 
				-	attr_group = kzalloc(sizeof(struct attribute *)
			
 
				-		* (i + 1) + sizeof(*attr_group), GFP_KERNEL);
			
 
				-	if (!attr_group)
			
 
				+	attrs = kzalloc(sizeof(struct attribute **) * (i + 1), GFP_KERNEL);
			
 
				+	if (!attrs)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	attrs = (struct attribute **)(attr_group + 1);
			
 
				 	for (j = 0; j < i; j++)
			
 
				 		attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
			
 
				 
			
 
				-	attr_group->name = "events";
			
 
				-	attr_group->attrs = attrs;
			
 
				-	perf_iommu->events_group = attr_group;
			
 
				-
			
 
				+	amd_iommu_events_group.attrs = attrs;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static __init void amd_iommu_pc_exit(void)
			
 
				-{
			
 
				-	if (__perf_iommu.events_group != NULL) {
			
 
				-		kfree(__perf_iommu.events_group);
			
 
				-		__perf_iommu.events_group = NULL;
			
 
				-	}
			
 
				-}
			
 
				+const struct attribute_group *amd_iommu_attr_groups[] = {
			
 
				+	&amd_iommu_format_group,
			
 
				+	&amd_iommu_cpumask_group,
			
 
				+	&amd_iommu_events_group,
			
 
				+	NULL,
			
 
				+};
			
 
				+
			
 
				+static struct pmu iommu_pmu = {
			
 
				+	.event_init	= perf_iommu_event_init,
			
 
				+	.add		= perf_iommu_add,
			
 
				+	.del		= perf_iommu_del,
			
 
				+	.start		= perf_iommu_start,
			
 
				+	.stop		= perf_iommu_stop,
			
 
				+	.read		= perf_iommu_read,
			
 
				+	.task_ctx_nr	= perf_invalid_context,
			
 
				+	.attr_groups	= amd_iommu_attr_groups,
			
 
				+};
			
 
				 
			
 
				-static __init int _init_perf_amd_iommu(
			
 
				-	struct perf_amd_iommu *perf_iommu, char *name)
			
 
				+static __init int init_one_iommu(unsigned int idx)
			
 
				 {
			
 
				+	struct perf_amd_iommu *perf_iommu;
			
 
				 	int ret;
			
 
				 
			
 
				+	perf_iommu = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL);
			
 
				+	if (!perf_iommu)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				 	raw_spin_lock_init(&perf_iommu->lock);
			
 
				 
			
 
				-	/* Init format attributes */
			
 
				-	perf_iommu->format_group = &amd_iommu_format_group;
			
 
				+	perf_iommu->pmu          = iommu_pmu;
			
 
				+	perf_iommu->iommu        = get_amd_iommu(idx);
			
 
				+	perf_iommu->max_banks    = amd_iommu_pc_get_max_banks(idx);
			
 
				+	perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx);
			
 
				 
			
 
				-	/* Init cpumask attributes to only core 0 */
			
 
				-	cpumask_set_cpu(0, &iommu_cpumask);
			
 
				-	perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
			
 
				-
			
 
				-	/* Init events attributes */
			
 
				-	if (_init_events_attrs(perf_iommu) != 0)
			
 
				-		pr_err("perf: amd_iommu: Only support raw events.\n");
			
 
				+	if (!perf_iommu->iommu ||
			
 
				+	    !perf_iommu->max_banks ||
			
 
				+	    !perf_iommu->max_counters) {
			
 
				+		kfree(perf_iommu);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				 
			
 
				-	/* Init null attributes */
			
 
				-	perf_iommu->null_group = NULL;
			
 
				-	perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
			
 
				+	snprintf(perf_iommu->name, IOMMU_NAME_SIZE, "amd_iommu_%u", idx);
			
 
				 
			
 
				-	ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
			
 
				-	if (ret) {
			
 
				-		pr_err("perf: amd_iommu: Failed to initialized.\n");
			
 
				-		amd_iommu_pc_exit();
			
 
				+	ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1);
			
 
				+	if (!ret) {
			
 
				+		pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank).\n",
			
 
				+			idx, perf_iommu->max_banks, perf_iommu->max_counters);
			
 
				+		list_add_tail(&perf_iommu->list, &perf_amd_iommu_list);
			
 
				 	} else {
			
 
				-		pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
			
 
				-			amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
			
 
				-			amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
			
 
				+		pr_warn("Error initializing IOMMU %d.\n", idx);
			
 
				+		kfree(perf_iommu);
			
 
				 	}
			
 
				-
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static struct perf_amd_iommu __perf_iommu = {
			
 
				-	.pmu = {
			
 
				-		.task_ctx_nr    = perf_invalid_context,
			
 
				-		.event_init	= perf_iommu_event_init,
			
 
				-		.add		= perf_iommu_add,
			
 
				-		.del		= perf_iommu_del,
			
 
				-		.start		= perf_iommu_start,
			
 
				-		.stop		= perf_iommu_stop,
			
 
				-		.read		= perf_iommu_read,
			
 
				-	},
			
 
				-	.max_banks		= 0x00,
			
 
				-	.max_counters		= 0x00,
			
 
				-	.cntr_assign_mask	= 0ULL,
			
 
				-	.format_group		= NULL,
			
 
				-	.cpumask_group		= NULL,
			
 
				-	.events_group		= NULL,
			
 
				-	.null_group		= NULL,
			
 
				-};
			
 
				-
			
 
				 static __init int amd_iommu_pc_init(void)
			
 
				 {
			
 
				+	unsigned int i, cnt = 0;
			
 
				+	int ret;
			
 
				+
			
 
				 	/* Make sure the IOMMU PC resource is available */
			
 
				 	if (!amd_iommu_pc_supported())
			
 
				 		return -ENODEV;
			
 
				 
			
 
				-	_init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
			
 
				+	ret = _init_events_attrs();
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	/*
			
 
				+	 * An IOMMU PMU is specific to an IOMMU, and can function independently.
			
 
				+	 * So we go through all IOMMUs and ignore the one that fails init
			
 
				+	 * unless all IOMMU are failing.
			
 
				+	 */
			
 
				+	for (i = 0; i < amd_iommu_get_num_iommus(); i++) {
			
 
				+		ret = init_one_iommu(i);
			
 
				+		if (!ret)
			
 
				+			cnt++;
			
 
				+	}
			
 
				+
			
 
				+	if (!cnt) {
			
 
				+		kfree(amd_iommu_events_group.attrs);
			
 
				+		return -ENODEV;
			
 
				+	}
			
 
				 
			
 
				+	/* Init cpumask attributes to only core 0 */
			
 
				+	cpumask_set_cpu(0, &iommu_cpumask);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/arch/x86/events/amd/iommu.h
+++ b/arch/x86/events/amd/iommu.h
@@ -24,17 +24,23 @@
 
				 #define PC_MAX_SPEC_BNKS			64
			
 
				 #define PC_MAX_SPEC_CNTRS			16
			
 
				 
			
 
				-/* iommu pc reg masks*/
			
 
				-#define IOMMU_BASE_DEVID			0x0000
			
 
				+struct amd_iommu;
			
 
				 
			
 
				 /* amd_iommu_init.c external support functions */
			
 
				+extern int amd_iommu_get_num_iommus(void);
			
 
				+
			
 
				 extern bool amd_iommu_pc_supported(void);
			
 
				 
			
 
				-extern u8 amd_iommu_pc_get_max_banks(u16 devid);
			
 
				+extern u8 amd_iommu_pc_get_max_banks(unsigned int idx);
			
 
				+
			
 
				+extern u8 amd_iommu_pc_get_max_counters(unsigned int idx);
			
 
				+
			
 
				+extern int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
			
 
				+				u8 fxn, u64 *value);
			
 
				 
			
 
				-extern u8 amd_iommu_pc_get_max_counters(u16 devid);
			
 
				+extern int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
			
 
				+				u8 fxn, u64 *value);
			
 
				 
			
 
				-extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
			
 
				-			u8 fxn, u64 *value, bool is_write);
			
 
				+extern struct amd_iommu *get_amd_iommu(int idx);
			
 
				 
			
 
				 #endif /*_PERF_EVENT_AMD_IOMMU_H_*/
			
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -30,6 +30,9 @@
 
				 
			
 
				 #define COUNTER_SHIFT		16
			
 
				 
			
 
				+#undef pr_fmt
			
 
				+#define pr_fmt(fmt)	"amd_uncore: " fmt
			
 
				+
			
 
				 static int num_counters_llc;
			
 
				 static int num_counters_nb;
			
 
				 
			
@@ -509,51 +512,34 @@ static int __init amd_uncore_init(void)
 
				 	int ret = -ENODEV;
			
 
				 
			
 
				 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
			
 
				-		goto fail_nodev;
			
 
				-
			
 
				-	switch(boot_cpu_data.x86) {
			
 
				-		case 23:
			
 
				-			/* Family 17h: */
			
 
				-			num_counters_nb = NUM_COUNTERS_NB;
			
 
				-			num_counters_llc = NUM_COUNTERS_L3;
			
 
				-			/*
			
 
				-			 * For Family17h, the NorthBridge counters are
			
 
				-			 * re-purposed as Data Fabric counters. Also, support is
			
 
				-			 * added for L3 counters. The pmus are exported based on
			
 
				-			 * family as either L2 or L3 and NB or DF.
			
 
				-			 */
			
 
				-			amd_nb_pmu.name = "amd_df";
			
 
				-			amd_llc_pmu.name = "amd_l3";
			
 
				-			format_attr_event_df.show = &event_show_df;
			
 
				-			format_attr_event_l3.show = &event_show_l3;
			
 
				-			break;
			
 
				-		case 22:
			
 
				-			/* Family 16h - may change: */
			
 
				-			num_counters_nb = NUM_COUNTERS_NB;
			
 
				-			num_counters_llc = NUM_COUNTERS_L2;
			
 
				-			amd_nb_pmu.name = "amd_nb";
			
 
				-			amd_llc_pmu.name = "amd_l2";
			
 
				-			format_attr_event_df = format_attr_event;
			
 
				-			format_attr_event_l3 = format_attr_event;
			
 
				-			break;
			
 
				-		default:
			
 
				-			/*
			
 
				-			 * All prior families have the same number of
			
 
				-			 * NorthBridge and Last Level Cache counters
			
 
				-			 */
			
 
				-			num_counters_nb = NUM_COUNTERS_NB;
			
 
				-			num_counters_llc = NUM_COUNTERS_L2;
			
 
				-			amd_nb_pmu.name = "amd_nb";
			
 
				-			amd_llc_pmu.name = "amd_l2";
			
 
				-			format_attr_event_df = format_attr_event;
			
 
				-			format_attr_event_l3 = format_attr_event;
			
 
				-			break;
			
 
				-	}
			
 
				-	amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
			
 
				-	amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
			
 
				+		return -ENODEV;
			
 
				 
			
 
				 	if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
			
 
				-		goto fail_nodev;
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	if (boot_cpu_data.x86 == 0x17) {
			
 
				+		/*
			
 
				+		 * For F17h, the Northbridge counters are repurposed as Data
			
 
				+		 * Fabric counters. Also, L3 counters are supported too. The PMUs
			
 
				+		 * are exported based on  family as either L2 or L3 and NB or DF.
			
 
				+		 */
			
 
				+		num_counters_nb		  = NUM_COUNTERS_NB;
			
 
				+		num_counters_llc	  = NUM_COUNTERS_L3;
			
 
				+		amd_nb_pmu.name		  = "amd_df";
			
 
				+		amd_llc_pmu.name	  = "amd_l3";
			
 
				+		format_attr_event_df.show = &event_show_df;
			
 
				+		format_attr_event_l3.show = &event_show_l3;
			
 
				+	} else {
			
 
				+		num_counters_nb		  = NUM_COUNTERS_NB;
			
 
				+		num_counters_llc	  = NUM_COUNTERS_L2;
			
 
				+		amd_nb_pmu.name		  = "amd_nb";
			
 
				+		amd_llc_pmu.name	  = "amd_l2";
			
 
				+		format_attr_event_df	  = format_attr_event;
			
 
				+		format_attr_event_l3	  = format_attr_event;
			
 
				+	}
			
 
				+
			
 
				+	amd_nb_pmu.attr_groups	= amd_uncore_attr_groups_df;
			
 
				+	amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
			
 
				 
			
 
				 	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
			
 
				 		amd_uncore_nb = alloc_percpu(struct amd_uncore *);
			
@@ -565,7 +551,7 @@ static int __init amd_uncore_init(void)
 
				 		if (ret)
			
 
				 			goto fail_nb;
			
 
				 
			
 
				-		pr_info("perf: AMD NB counters detected\n");
			
 
				+		pr_info("AMD NB counters detected\n");
			
 
				 		ret = 0;
			
 
				 	}
			
 
				 
			
@@ -579,7 +565,7 @@ static int __init amd_uncore_init(void)
 
				 		if (ret)
			
 
				 			goto fail_llc;
			
 
				 
			
 
				-		pr_info("perf: AMD LLC counters detected\n");
			
 
				+		pr_info("AMD LLC counters detected\n");
			
 
				 		ret = 0;
			
 
				 	}
			
 
				 
			
@@ -615,7 +601,6 @@ fail_nb:
 
				 	if (amd_uncore_nb)
			
 
				 		free_percpu(amd_uncore_nb);
			
 
				 
			
 
				-fail_nodev:
			
 
				 	return ret;
			
 
				 }
			
 
				 device_initcall(amd_uncore_init);
			
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -63,7 +63,6 @@ struct bts_buffer {
 
				 	unsigned int	cur_buf;
			
 
				 	bool		snapshot;
			
 
				 	local_t		data_size;
			
 
				-	local_t		lost;
			
 
				 	local_t		head;
			
 
				 	unsigned long	end;
			
 
				 	void		**data_pages;
			
@@ -199,7 +198,8 @@ static void bts_update(struct bts_ctx *bts)
 
				 			return;
			
 
				 
			
 
				 		if (ds->bts_index >= ds->bts_absolute_maximum)
			
 
				-			local_inc(&buf->lost);
			
 
				+			perf_aux_output_flag(&bts->handle,
			
 
				+			                     PERF_AUX_FLAG_TRUNCATED);
			
 
				 
			
 
				 		/*
			
 
				 		 * old and head are always in the same physical buffer, so we
			
@@ -276,7 +276,7 @@ static void bts_event_start(struct perf_event *event, int flags)
 
				 	return;
			
 
				 
			
 
				 fail_end_stop:
			
 
				-	perf_aux_output_end(&bts->handle, 0, false);
			
 
				+	perf_aux_output_end(&bts->handle, 0);
			
 
				 
			
 
				 fail_stop:
			
 
				 	event->hw.state = PERF_HES_STOPPED;
			
@@ -319,9 +319,8 @@ static void bts_event_stop(struct perf_event *event, int flags)
 
				 				bts->handle.head =
			
 
				 					local_xchg(&buf->data_size,
			
 
				 						   buf->nr_pages << PAGE_SHIFT);
			
 
				-
			
 
				-			perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
			
 
				-					    !!local_xchg(&buf->lost, 0));
			
 
				+			perf_aux_output_end(&bts->handle,
			
 
				+			                    local_xchg(&buf->data_size, 0));
			
 
				 		}
			
 
				 
			
 
				 		cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
			
@@ -484,8 +483,7 @@ int intel_bts_interrupt(void)
 
				 	if (old_head == local_read(&buf->head))
			
 
				 		return handled;
			
 
				 
			
 
				-	perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
			
 
				-			    !!local_xchg(&buf->lost, 0));
			
 
				+	perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0));
			
 
				 
			
 
				 	buf = perf_aux_output_begin(&bts->handle, event);
			
 
				 	if (buf)
			
@@ -500,7 +498,7 @@ int intel_bts_interrupt(void)
 
				 			 * cleared handle::event
			
 
				 			 */
			
 
				 			barrier();
			
 
				-			perf_aux_output_end(&bts->handle, 0, false);
			
 
				+			perf_aux_output_end(&bts->handle, 0);
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1553,6 +1553,27 @@ static __initconst const u64 slm_hw_cache_event_ids
 
				  },
			
 
				 };
			
 
				 
			
 
				+EVENT_ATTR_STR(topdown-total-slots, td_total_slots_glm, "event=0x3c");
			
 
				+EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_glm, "3");
			
 
				+/* UOPS_NOT_DELIVERED.ANY */
			
 
				+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_glm, "event=0x9c");
			
 
				+/* ISSUE_SLOTS_NOT_CONSUMED.RECOVERY */
			
 
				+EVENT_ATTR_STR(topdown-recovery-bubbles, td_recovery_bubbles_glm, "event=0xca,umask=0x02");
			
 
				+/* UOPS_RETIRED.ANY */
			
 
				+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_glm, "event=0xc2");
			
 
				+/* UOPS_ISSUED.ANY */
			
 
				+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_glm, "event=0x0e");
			
 
				+
			
 
				+static struct attribute *glm_events_attrs[] = {
			
 
				+	EVENT_PTR(td_total_slots_glm),
			
 
				+	EVENT_PTR(td_total_slots_scale_glm),
			
 
				+	EVENT_PTR(td_fetch_bubbles_glm),
			
 
				+	EVENT_PTR(td_recovery_bubbles_glm),
			
 
				+	EVENT_PTR(td_slots_issued_glm),
			
 
				+	EVENT_PTR(td_slots_retired_glm),
			
 
				+	NULL
			
 
				+};
			
 
				+
			
 
				 static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
			
 
				 	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
			
 
				 	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
			
@@ -2130,7 +2151,7 @@ again:
 
				 	 * counters from the GLOBAL_STATUS mask and we always process PEBS
			
 
				 	 * events via drain_pebs().
			
 
				 	 */
			
 
				-	status &= ~cpuc->pebs_enabled;
			
 
				+	status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
			
 
				 
			
 
				 	/*
			
 
				 	 * PEBS overflow sets bit 62 in the global status register
			
@@ -3750,6 +3771,7 @@ __init int intel_pmu_init(void)
 
				 		x86_pmu.pebs_prec_dist = true;
			
 
				 		x86_pmu.lbr_pt_coexist = true;
			
 
				 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
			
 
				+		x86_pmu.cpu_events = glm_events_attrs;
			
 
				 		pr_cont("Goldmont events, ");
			
 
				 		break;
			
 
				 
			
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1222,7 +1222,7 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
 
				 
			
 
				 			/* clear non-PEBS bit and re-check */
			
 
				 			pebs_status = p->status & cpuc->pebs_enabled;
			
 
				-			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
			
 
				+			pebs_status &= PEBS_COUNTER_MASK;
			
 
				 			if (pebs_status == (1 << bit))
			
 
				 				return at;
			
 
				 		}
			
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -28,6 +28,7 @@
 
				 #include <asm/insn.h>
			
 
				 #include <asm/io.h>
			
 
				 #include <asm/intel_pt.h>
			
 
				+#include <asm/intel-family.h>
			
 
				 
			
 
				 #include "../perf_event.h"
			
 
				 #include "pt.h"
			
@@ -98,6 +99,7 @@ static struct attribute_group pt_cap_group = {
 
				 	.name	= "caps",
			
 
				 };
			
 
				 
			
 
				+PMU_FORMAT_ATTR(pt,		"config:0"	);
			
 
				 PMU_FORMAT_ATTR(cyc,		"config:1"	);
			
 
				 PMU_FORMAT_ATTR(pwr_evt,	"config:4"	);
			
 
				 PMU_FORMAT_ATTR(fup_on_ptw,	"config:5"	);
			
@@ -105,11 +107,13 @@ PMU_FORMAT_ATTR(mtc,		"config:9"	);
 
				 PMU_FORMAT_ATTR(tsc,		"config:10"	);
			
 
				 PMU_FORMAT_ATTR(noretcomp,	"config:11"	);
			
 
				 PMU_FORMAT_ATTR(ptw,		"config:12"	);
			
 
				+PMU_FORMAT_ATTR(branch,		"config:13"	);
			
 
				 PMU_FORMAT_ATTR(mtc_period,	"config:14-17"	);
			
 
				 PMU_FORMAT_ATTR(cyc_thresh,	"config:19-22"	);
			
 
				 PMU_FORMAT_ATTR(psb_period,	"config:24-27"	);
			
 
				 
			
 
				 static struct attribute *pt_formats_attr[] = {
			
 
				+	&format_attr_pt.attr,
			
 
				 	&format_attr_cyc.attr,
			
 
				 	&format_attr_pwr_evt.attr,
			
 
				 	&format_attr_fup_on_ptw.attr,
			
@@ -117,6 +121,7 @@ static struct attribute *pt_formats_attr[] = {
 
				 	&format_attr_tsc.attr,
			
 
				 	&format_attr_noretcomp.attr,
			
 
				 	&format_attr_ptw.attr,
			
 
				+	&format_attr_branch.attr,
			
 
				 	&format_attr_mtc_period.attr,
			
 
				 	&format_attr_cyc_thresh.attr,
			
 
				 	&format_attr_psb_period.attr,
			
@@ -197,6 +202,19 @@ static int __init pt_pmu_hw_init(void)
 
				 		pt_pmu.tsc_art_den = eax;
			
 
				 	}
			
 
				 
			
 
				+	/* model-specific quirks */
			
 
				+	switch (boot_cpu_data.x86_model) {
			
 
				+	case INTEL_FAM6_BROADWELL_CORE:
			
 
				+	case INTEL_FAM6_BROADWELL_XEON_D:
			
 
				+	case INTEL_FAM6_BROADWELL_GT3E:
			
 
				+	case INTEL_FAM6_BROADWELL_X:
			
 
				+		/* not setting BRANCH_EN will #GP, erratum BDM106 */
			
 
				+		pt_pmu.branch_en_always_on = true;
			
 
				+		break;
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				 	if (boot_cpu_has(X86_FEATURE_VMX)) {
			
 
				 		/*
			
 
				 		 * Intel SDM, 36.5 "Tracing post-VMXON" says that
			
@@ -263,8 +281,20 @@ fail:
 
				 #define RTIT_CTL_PTW	(RTIT_CTL_PTW_EN	| \
			
 
				 			 RTIT_CTL_FUP_ON_PTW)
			
 
				 
			
 
				-#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN		| \
			
 
				+/*
			
 
				+ * Bit 0 (TraceEn) in the attr.config is meaningless as the
			
 
				+ * corresponding bit in the RTIT_CTL can only be controlled
			
 
				+ * by the driver; therefore, repurpose it to mean: pass
			
 
				+ * through the bit that was previously assumed to be always
			
 
				+ * on for PT, thereby allowing the user to *not* set it if
			
 
				+ * they so wish. See also pt_event_valid() and pt_config().
			
 
				+ */
			
 
				+#define RTIT_CTL_PASSTHROUGH RTIT_CTL_TRACEEN
			
 
				+
			
 
				+#define PT_CONFIG_MASK (RTIT_CTL_TRACEEN	| \
			
 
				+			RTIT_CTL_TSC_EN		| \
			
 
				 			RTIT_CTL_DISRETC	| \
			
 
				+			RTIT_CTL_BRANCH_EN	| \
			
 
				 			RTIT_CTL_CYC_PSB	| \
			
 
				 			RTIT_CTL_MTC		| \
			
 
				 			RTIT_CTL_PWR_EVT_EN	| \
			
@@ -332,6 +362,33 @@ static bool pt_event_valid(struct perf_event *event)
 
				 			return false;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Setting bit 0 (TraceEn in RTIT_CTL MSR) in the attr.config
			
 
				+	 * clears the assomption that BranchEn must always be enabled,
			
 
				+	 * as was the case with the first implementation of PT.
			
 
				+	 * If this bit is not set, the legacy behavior is preserved
			
 
				+	 * for compatibility with the older userspace.
			
 
				+	 *
			
 
				+	 * Re-using bit 0 for this purpose is fine because it is never
			
 
				+	 * directly set by the user; previous attempts at setting it in
			
 
				+	 * the attr.config resulted in -EINVAL.
			
 
				+	 */
			
 
				+	if (config & RTIT_CTL_PASSTHROUGH) {
			
 
				+		/*
			
 
				+		 * Disallow not setting BRANCH_EN where BRANCH_EN is
			
 
				+		 * always required.
			
 
				+		 */
			
 
				+		if (pt_pmu.branch_en_always_on &&
			
 
				+		    !(config & RTIT_CTL_BRANCH_EN))
			
 
				+			return false;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Disallow BRANCH_EN without the PASSTHROUGH.
			
 
				+		 */
			
 
				+		if (config & RTIT_CTL_BRANCH_EN)
			
 
				+			return false;
			
 
				+	}
			
 
				+
			
 
				 	return true;
			
 
				 }
			
 
				 
			
@@ -411,6 +468,7 @@ static u64 pt_config_filters(struct perf_event *event)
 
				 
			
 
				 static void pt_config(struct perf_event *event)
			
 
				 {
			
 
				+	struct pt *pt = this_cpu_ptr(&pt_ctx);
			
 
				 	u64 reg;
			
 
				 
			
 
				 	if (!event->hw.itrace_started) {
			
@@ -419,7 +477,20 @@ static void pt_config(struct perf_event *event)
 
				 	}
			
 
				 
			
 
				 	reg = pt_config_filters(event);
			
 
				-	reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
			
 
				+	reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
			
 
				+
			
 
				+	/*
			
 
				+	 * Previously, we had BRANCH_EN on by default, but now that PT has
			
 
				+	 * grown features outside of branch tracing, it is useful to allow
			
 
				+	 * the user to disable it. Setting bit 0 in the event's attr.config
			
 
				+	 * allows BRANCH_EN to pass through instead of being always on. See
			
 
				+	 * also the comment in pt_event_valid().
			
 
				+	 */
			
 
				+	if (event->attr.config & BIT(0)) {
			
 
				+		reg |= event->attr.config & RTIT_CTL_BRANCH_EN;
			
 
				+	} else {
			
 
				+		reg |= RTIT_CTL_BRANCH_EN;
			
 
				+	}
			
 
				 
			
 
				 	if (!event->attr.exclude_kernel)
			
 
				 		reg |= RTIT_CTL_OS;
			
@@ -429,11 +500,15 @@ static void pt_config(struct perf_event *event)
 
				 	reg |= (event->attr.config & PT_CONFIG_MASK);
			
 
				 
			
 
				 	event->hw.config = reg;
			
 
				-	wrmsrl(MSR_IA32_RTIT_CTL, reg);
			
 
				+	if (READ_ONCE(pt->vmx_on))
			
 
				+		perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
			
 
				+	else
			
 
				+		wrmsrl(MSR_IA32_RTIT_CTL, reg);
			
 
				 }
			
 
				 
			
 
				 static void pt_config_stop(struct perf_event *event)
			
 
				 {
			
 
				+	struct pt *pt = this_cpu_ptr(&pt_ctx);
			
 
				 	u64 ctl = READ_ONCE(event->hw.config);
			
 
				 
			
 
				 	/* may be already stopped by a PMI */
			
@@ -441,7 +516,8 @@ static void pt_config_stop(struct perf_event *event)
 
				 		return;
			
 
				 
			
 
				 	ctl &= ~RTIT_CTL_TRACEEN;
			
 
				-	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
			
 
				+	if (!READ_ONCE(pt->vmx_on))
			
 
				+		wrmsrl(MSR_IA32_RTIT_CTL, ctl);
			
 
				 
			
 
				 	WRITE_ONCE(event->hw.config, ctl);
			
 
				 
			
@@ -753,7 +829,8 @@ static void pt_handle_status(struct pt *pt)
 
				 		 */
			
 
				 		if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
			
 
				 		    buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
			
 
				-			local_inc(&buf->lost);
			
 
				+			perf_aux_output_flag(&pt->handle,
			
 
				+			                     PERF_AUX_FLAG_TRUNCATED);
			
 
				 			advance++;
			
 
				 		}
			
 
				 	}
			
@@ -846,8 +923,10 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
 
				 
			
 
				 	/* can't stop in the middle of an output region */
			
 
				 	if (buf->output_off + handle->size + 1 <
			
 
				-	    sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
			
 
				+	    sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
			
 
				+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				 		return -EINVAL;
			
 
				+	}
			
 
				 
			
 
				 
			
 
				 	/* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
			
@@ -1171,12 +1250,6 @@ void intel_pt_interrupt(void)
 
				 	if (!READ_ONCE(pt->handle_nmi))
			
 
				 		return;
			
 
				 
			
 
				-	/*
			
 
				-	 * If VMX is on and PT does not support it, don't touch anything.
			
 
				-	 */
			
 
				-	if (READ_ONCE(pt->vmx_on))
			
 
				-		return;
			
 
				-
			
 
				 	if (!event)
			
 
				 		return;
			
 
				 
			
@@ -1192,8 +1265,7 @@ void intel_pt_interrupt(void)
 
				 
			
 
				 	pt_update_head(pt);
			
 
				 
			
 
				-	perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
			
 
				-			    local_xchg(&buf->lost, 0));
			
 
				+	perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
			
 
				 
			
 
				 	if (!event->hw.state) {
			
 
				 		int ret;
			
@@ -1208,7 +1280,7 @@ void intel_pt_interrupt(void)
 
				 		/* snapshot counters don't use PMI, so it's safe */
			
 
				 		ret = pt_buffer_reset_markers(buf, &pt->handle);
			
 
				 		if (ret) {
			
 
				-			perf_aux_output_end(&pt->handle, 0, true);
			
 
				+			perf_aux_output_end(&pt->handle, 0);
			
 
				 			return;
			
 
				 		}
			
 
				 
			
@@ -1237,12 +1309,19 @@ void intel_pt_handle_vmx(int on)
 
				 	local_irq_save(flags);
			
 
				 	WRITE_ONCE(pt->vmx_on, on);
			
 
				 
			
 
				-	if (on) {
			
 
				-		/* prevent pt_config_stop() from writing RTIT_CTL */
			
 
				-		event = pt->handle.event;
			
 
				-		if (event)
			
 
				-			event->hw.config = 0;
			
 
				-	}
			
 
				+	/*
			
 
				+	 * If an AUX transaction is in progress, it will contain
			
 
				+	 * gap(s), so flag it PARTIAL to inform the user.
			
 
				+	 */
			
 
				+	event = pt->handle.event;
			
 
				+	if (event)
			
 
				+		perf_aux_output_flag(&pt->handle,
			
 
				+		                     PERF_AUX_FLAG_PARTIAL);
			
 
				+
			
 
				+	/* Turn PTs back on */
			
 
				+	if (!on && event)
			
 
				+		wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config);
			
 
				+
			
 
				 	local_irq_restore(flags);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
			
@@ -1257,9 +1336,6 @@ static void pt_event_start(struct perf_event *event, int mode)
 
				 	struct pt *pt = this_cpu_ptr(&pt_ctx);
			
 
				 	struct pt_buffer *buf;
			
 
				 
			
 
				-	if (READ_ONCE(pt->vmx_on))
			
 
				-		return;
			
 
				-
			
 
				 	buf = perf_aux_output_begin(&pt->handle, event);
			
 
				 	if (!buf)
			
 
				 		goto fail_stop;
			
@@ -1280,7 +1356,7 @@ static void pt_event_start(struct perf_event *event, int mode)
 
				 	return;
			
 
				 
			
 
				 fail_end_stop:
			
 
				-	perf_aux_output_end(&pt->handle, 0, true);
			
 
				+	perf_aux_output_end(&pt->handle, 0);
			
 
				 fail_stop:
			
 
				 	hwc->state = PERF_HES_STOPPED;
			
 
				 }
			
@@ -1321,8 +1397,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
 
				 			pt->handle.head =
			
 
				 				local_xchg(&buf->data_size,
			
 
				 					   buf->nr_pages << PAGE_SHIFT);
			
 
				-		perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
			
 
				-				    local_xchg(&buf->lost, 0));
			
 
				+		perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -110,6 +110,7 @@ struct pt_pmu {
 
				 	struct pmu		pmu;
			
 
				 	u32			caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
			
 
				 	bool			vmx;
			
 
				+	bool			branch_en_always_on;
			
 
				 	unsigned long		max_nonturbo_ratio;
			
 
				 	unsigned int		tsc_art_num;
			
 
				 	unsigned int		tsc_art_den;
			
@@ -143,7 +144,6 @@ struct pt_buffer {
 
				 	size_t			output_off;
			
 
				 	unsigned long		nr_pages;
			
 
				 	local_t			data_size;
			
 
				-	local_t			lost;
			
 
				 	local64_t		head;
			
 
				 	bool			snapshot;
			
 
				 	unsigned long		stop_pos, intr_pos;
			
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -79,6 +79,7 @@ struct amd_nb {
 
				 
			
 
				 /* The maximal number of PEBS events: */
			
 
				 #define MAX_PEBS_EVENTS		8
			
 
				+#define PEBS_COUNTER_MASK	((1ULL << MAX_PEBS_EVENTS) - 1)
			
 
				 
			
 
				 /*
			
 
				  * Flags PEBS can handle without an PMI.
			
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -187,6 +187,7 @@
 
				  * Reuse free bits when adding new feature flags!
			
 
				  */
			
 
				 #define X86_FEATURE_RING3MWAIT	( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
			
 
				+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
			
 
				 #define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
			
 
				 #define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
			
 
				 #define X86_FEATURE_CAT_L3	( 7*32+ 4) /* Cache Allocation Technology L3 */
			
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -72,14 +72,13 @@ struct arch_specific_insn {
 
				 	/* copy of the original instruction */
			
 
				 	kprobe_opcode_t *insn;
			
 
				 	/*
			
 
				-	 * boostable = -1: This instruction type is not boostable.
			
 
				-	 * boostable = 0: This instruction type is boostable.
			
 
				-	 * boostable = 1: This instruction has been boosted: we have
			
 
				+	 * boostable = false: This instruction type is not boostable.
			
 
				+	 * boostable = true: This instruction has been boosted: we have
			
 
				 	 * added a relative jump after the instruction copy in insn,
			
 
				 	 * so no single-step and fixup are needed (unless there's
			
 
				 	 * a post_handler or break_handler).
			
 
				 	 */
			
 
				-	int boostable;
			
 
				+	bool boostable;
			
 
				 	bool if_modifier;
			
 
				 };
			
 
				 
			
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -45,6 +45,8 @@
 
				 #define MSR_IA32_PERFCTR1		0x000000c2
			
 
				 #define MSR_FSB_FREQ			0x000000cd
			
 
				 #define MSR_PLATFORM_INFO		0x000000ce
			
 
				+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT	31
			
 
				+#define MSR_PLATFORM_INFO_CPUID_FAULT		BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
			
 
				 
			
 
				 #define MSR_PKG_CST_CONFIG_CONTROL	0x000000e2
			
 
				 #define NHM_C3_AUTO_DEMOTE		(1UL << 25)
			
@@ -127,6 +129,7 @@
 
				 
			
 
				 /* DEBUGCTLMSR bits (others vary by model): */
			
 
				 #define DEBUGCTLMSR_LBR			(1UL <<  0) /* last branch recording */
			
 
				+#define DEBUGCTLMSR_BTF_SHIFT		1
			
 
				 #define DEBUGCTLMSR_BTF			(1UL <<  1) /* single-step on branches */
			
 
				 #define DEBUGCTLMSR_TR			(1UL <<  6)
			
 
				 #define DEBUGCTLMSR_BTS			(1UL <<  7)
			
@@ -552,10 +555,12 @@
 
				 #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT	39
			
 
				 #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
			
 
				 
			
 
				-/* MISC_FEATURE_ENABLES non-architectural features */
			
 
				-#define MSR_MISC_FEATURE_ENABLES	0x00000140
			
 
				+/* MISC_FEATURES_ENABLES non-architectural features */
			
 
				+#define MSR_MISC_FEATURES_ENABLES	0x00000140
			
 
				 
			
 
				-#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT		1
			
 
				+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT	0
			
 
				+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT		BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
			
 
				+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT	1
			
 
				 
			
 
				 #define MSR_IA32_TSC_DEADLINE		0x000006E0
			
 
				 
			
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -884,6 +884,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 
				 extern int get_tsc_mode(unsigned long adr);
			
 
				 extern int set_tsc_mode(unsigned int val);
			
 
				 
			
 
				+DECLARE_PER_CPU(u64, msr_misc_features_shadow);
			
 
				+
			
 
				 /* Register/unregister a process' MPX related resource */
			
 
				 #define MPX_ENABLE_MANAGEMENT()	mpx_enable_management()
			
 
				 #define MPX_DISABLE_MANAGEMENT()	mpx_disable_management()
			
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -9,6 +9,7 @@ void syscall_init(void);
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
 
				 void entry_SYSCALL_64(void);
			
 
				+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_X86_32
			
@@ -30,6 +31,7 @@ void x86_report_nx(void);
 
				 
			
 
				 extern int reboot_force;
			
 
				 
			
 
				-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
			
 
				+long do_arch_prctl_common(struct task_struct *task, int option,
			
 
				+			  unsigned long cpuid_enabled);
			
 
				 
			
 
				 #endif /* _ASM_X86_PROTO_H */
			
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -87,6 +87,7 @@ struct thread_info {
 
				 #define TIF_SECCOMP		8	/* secure computing */
			
 
				 #define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
			
 
				 #define TIF_UPROBE		12	/* breakpointed or singlestepping */
			
 
				+#define TIF_NOCPUID		15	/* CPUID is not accessible in userland */
			
 
				 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
			
 
				 #define TIF_IA32		17	/* IA32 compatibility process */
			
 
				 #define TIF_NOHZ		19	/* in adaptive nohz mode */
			
@@ -110,6 +111,7 @@ struct thread_info {
 
				 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
			
 
				 #define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
			
 
				 #define _TIF_UPROBE		(1 << TIF_UPROBE)
			
 
				+#define _TIF_NOCPUID		(1 << TIF_NOCPUID)
			
 
				 #define _TIF_NOTSC		(1 << TIF_NOTSC)
			
 
				 #define _TIF_IA32		(1 << TIF_IA32)
			
 
				 #define _TIF_NOHZ		(1 << TIF_NOHZ)
			
@@ -138,7 +140,7 @@ struct thread_info {
 
				 
			
 
				 /* flags to check in __switch_to() */
			
 
				 #define _TIF_WORK_CTXSW							\
			
 
				-	(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
			
 
				+	(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
			
 
				 
			
 
				 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
			
 
				 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
			
@@ -239,6 +241,8 @@ static inline int arch_within_stack_frames(const void * const stack,
 
				 extern void arch_task_cache_init(void);
			
 
				 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
			
 
				 extern void arch_release_task_struct(struct task_struct *tsk);
			
 
				+extern void arch_setup_new_exec(void);
			
 
				+#define arch_setup_new_exec arch_setup_new_exec
			
 
				 #endif	/* !__ASSEMBLY__ */
			
 
				 
			
 
				 #endif /* _ASM_X86_THREAD_INFO_H */
			
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -110,6 +110,16 @@ static inline void cr4_clear_bits(unsigned long mask)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static inline void cr4_toggle_bits(unsigned long mask)
			
 
				+{
			
 
				+	unsigned long cr4;
			
 
				+
			
 
				+	cr4 = this_cpu_read(cpu_tlbstate.cr4);
			
 
				+	cr4 ^= mask;
			
 
				+	this_cpu_write(cpu_tlbstate.cr4, cr4);
			
 
				+	__write_cr4(cr4);
			
 
				+}
			
 
				+
			
 
				 /* Read the CR4 shadow. */
			
 
				 static inline unsigned long cr4_read_shadow(void)
			
 
				 {
			
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -1,10 +1,13 @@
 
				 #ifndef _ASM_X86_PRCTL_H
			
 
				 #define _ASM_X86_PRCTL_H
			
 
				 
			
 
				-#define ARCH_SET_GS 0x1001
			
 
				-#define ARCH_SET_FS 0x1002
			
 
				-#define ARCH_GET_FS 0x1003
			
 
				-#define ARCH_GET_GS 0x1004
			
 
				+#define ARCH_SET_GS		0x1001
			
 
				+#define ARCH_SET_FS		0x1002
			
 
				+#define ARCH_GET_FS		0x1003
			
 
				+#define ARCH_GET_GS		0x1004
			
 
				+
			
 
				+#define ARCH_GET_CPUID		0x1011
			
 
				+#define ARCH_SET_CPUID		0x1012
			
 
				 
			
 
				 #define ARCH_MAP_VDSO_X32	0x2001
			
 
				 #define ARCH_MAP_VDSO_32	0x2002
			
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -90,16 +90,12 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	if (ring3mwait_disabled) {
			
 
				-		msr_clear_bit(MSR_MISC_FEATURE_ENABLES,
			
 
				-			      MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
			
 
				+	if (ring3mwait_disabled)
			
 
				 		return;
			
 
				-	}
			
 
				-
			
 
				-	msr_set_bit(MSR_MISC_FEATURE_ENABLES,
			
 
				-		    MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
			
 
				 
			
 
				 	set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
			
 
				+	this_cpu_or(msr_misc_features_shadow,
			
 
				+		    1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
			
 
				 
			
 
				 	if (c == &boot_cpu_data)
			
 
				 		ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
			
@@ -488,6 +484,34 @@ static void intel_bsp_resume(struct cpuinfo_x86 *c)
 
				 	init_intel_energy_perf(c);
			
 
				 }
			
 
				 
			
 
				+static void init_cpuid_fault(struct cpuinfo_x86 *c)
			
 
				+{
			
 
				+	u64 msr;
			
 
				+
			
 
				+	if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
			
 
				+		if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
			
 
				+			set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void init_intel_misc_features(struct cpuinfo_x86 *c)
			
 
				+{
			
 
				+	u64 msr;
			
 
				+
			
 
				+	if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
			
 
				+		return;
			
 
				+
			
 
				+	/* Clear all MISC features */
			
 
				+	this_cpu_write(msr_misc_features_shadow, 0);
			
 
				+
			
 
				+	/* Check features and update capabilities and shadow control bits */
			
 
				+	init_cpuid_fault(c);
			
 
				+	probe_xeon_phi_r3mwait(c);
			
 
				+
			
 
				+	msr = this_cpu_read(msr_misc_features_shadow);
			
 
				+	wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
			
 
				+}
			
 
				+
			
 
				 static void init_intel(struct cpuinfo_x86 *c)
			
 
				 {
			
 
				 	unsigned int l2 = 0;
			
@@ -602,7 +626,7 @@ static void init_intel(struct cpuinfo_x86 *c)
 
				 
			
 
				 	init_intel_energy_perf(c);
			
 
				 
			
 
				-	probe_xeon_phi_r3mwait(c);
			
 
				+	init_intel_misc_features(c);
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_X86_32
			
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -67,7 +67,7 @@
 
				 #endif
			
 
				 
			
 
				 /* Ensure if the instruction can be boostable */
			
 
				-extern int can_boost(kprobe_opcode_t *instruction, void *addr);
			
 
				+extern int can_boost(struct insn *insn, void *orig_addr);
			
 
				 /* Recover instruction if given address is probed */
			
 
				 extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
			
 
				 					 unsigned long addr);
			
@@ -75,7 +75,7 @@ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
 
				  * Copy an instruction and adjust the displacement if the instruction
			
 
				  * uses the %rip-relative addressing mode.
			
 
				  */
			
 
				-extern int __copy_instruction(u8 *dest, u8 *src);
			
 
				+extern int __copy_instruction(u8 *dest, u8 *src, struct insn *insn);
			
 
				 
			
 
				 /* Generate a relative-jump/call instruction */
			
 
				 extern void synthesize_reljump(void *from, void *to);
			
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -164,42 +164,38 @@ static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
 
				 NOKPROBE_SYMBOL(skip_prefixes);
			
 
				 
			
 
				 /*
			
 
				- * Returns non-zero if opcode is boostable.
			
 
				+ * Returns non-zero if INSN is boostable.
			
 
				  * RIP relative instructions are adjusted at copying time in 64 bits mode
			
 
				  */
			
 
				-int can_boost(kprobe_opcode_t *opcodes, void *addr)
			
 
				+int can_boost(struct insn *insn, void *addr)
			
 
				 {
			
 
				 	kprobe_opcode_t opcode;
			
 
				-	kprobe_opcode_t *orig_opcodes = opcodes;
			
 
				 
			
 
				 	if (search_exception_tables((unsigned long)addr))
			
 
				 		return 0;	/* Page fault may occur on this address. */
			
 
				 
			
 
				-retry:
			
 
				-	if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
			
 
				-		return 0;
			
 
				-	opcode = *(opcodes++);
			
 
				-
			
 
				 	/* 2nd-byte opcode */
			
 
				-	if (opcode == 0x0f) {
			
 
				-		if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
			
 
				-			return 0;
			
 
				-		return test_bit(*opcodes,
			
 
				+	if (insn->opcode.nbytes == 2)
			
 
				+		return test_bit(insn->opcode.bytes[1],
			
 
				 				(unsigned long *)twobyte_is_boostable);
			
 
				-	}
			
 
				+
			
 
				+	if (insn->opcode.nbytes != 1)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* Can't boost Address-size override prefix */
			
 
				+	if (unlikely(inat_is_address_size_prefix(insn->attr)))
			
 
				+		return 0;
			
 
				+
			
 
				+	opcode = insn->opcode.bytes[0];
			
 
				 
			
 
				 	switch (opcode & 0xf0) {
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	case 0x40:
			
 
				-		goto retry; /* REX prefix is boostable */
			
 
				-#endif
			
 
				 	case 0x60:
			
 
				-		if (0x63 < opcode && opcode < 0x67)
			
 
				-			goto retry; /* prefixes */
			
 
				-		/* can't boost Address-size override and bound */
			
 
				-		return (opcode != 0x62 && opcode != 0x67);
			
 
				+		/* can't boost "bound" */
			
 
				+		return (opcode != 0x62);
			
 
				 	case 0x70:
			
 
				 		return 0; /* can't boost conditional jump */
			
 
				+	case 0x90:
			
 
				+		return opcode != 0x9a;	/* can't boost call far */
			
 
				 	case 0xc0:
			
 
				 		/* can't boost software-interruptions */
			
 
				 		return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
			
@@ -210,14 +206,9 @@ retry:
 
				 		/* can boost in/out and absolute jmps */
			
 
				 		return ((opcode & 0x04) || opcode == 0xea);
			
 
				 	case 0xf0:
			
 
				-		if ((opcode & 0x0c) == 0 && opcode != 0xf1)
			
 
				-			goto retry; /* lock/rep(ne) prefix */
			
 
				 		/* clear and set flags are boostable */
			
 
				 		return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
			
 
				 	default:
			
 
				-		/* segment override prefixes are boostable */
			
 
				-		if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
			
 
				-			goto retry; /* prefixes */
			
 
				 		/* CS override prefix and call are not boostable */
			
 
				 		return (opcode != 0x2e && opcode != 0x9a);
			
 
				 	}
			
@@ -264,7 +255,10 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
 
				 	 * Fortunately, we know that the original code is the ideal 5-byte
			
 
				 	 * long NOP.
			
 
				 	 */
			
 
				-	memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
			
 
				+	if (probe_kernel_read(buf, (void *)addr,
			
 
				+		MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
			
 
				+		return 0UL;
			
 
				+
			
 
				 	if (faddr)
			
 
				 		memcpy(buf, ideal_nops[NOP_ATOMIC5], 5);
			
 
				 	else
			
@@ -276,7 +270,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
 
				  * Recover the probed instruction at addr for further analysis.
			
 
				  * Caller must lock kprobes by kprobe_mutex, or disable preemption
			
 
				  * for preventing to release referencing kprobes.
			
 
				- * Returns zero if the instruction can not get recovered.
			
 
				+ * Returns zero if the instruction can not get recovered (or access failed).
			
 
				  */
			
 
				 unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
			
 
				 {
			
@@ -348,37 +342,36 @@ static int is_IF_modifier(kprobe_opcode_t *insn)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Copy an instruction and adjust the displacement if the instruction
			
 
				- * uses the %rip-relative addressing mode.
			
 
				- * If it does, Return the address of the 32-bit displacement word.
			
 
				- * If not, return null.
			
 
				- * Only applicable to 64-bit x86.
			
 
				+ * Copy an instruction with recovering modified instruction by kprobes
			
 
				+ * and adjust the displacement if the instruction uses the %rip-relative
			
 
				+ * addressing mode.
			
 
				+ * This returns the length of copied instruction, or 0 if it has an error.
			
 
				  */
			
 
				-int __copy_instruction(u8 *dest, u8 *src)
			
 
				+int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
			
 
				 {
			
 
				-	struct insn insn;
			
 
				 	kprobe_opcode_t buf[MAX_INSN_SIZE];
			
 
				-	int length;
			
 
				 	unsigned long recovered_insn =
			
 
				 		recover_probed_instruction(buf, (unsigned long)src);
			
 
				 
			
 
				-	if (!recovered_insn)
			
 
				+	if (!recovered_insn || !insn)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* This can access kernel text if given address is not recovered */
			
 
				+	if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE))
			
 
				 		return 0;
			
 
				-	kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
			
 
				-	insn_get_length(&insn);
			
 
				-	length = insn.length;
			
 
				+
			
 
				+	kernel_insn_init(insn, dest, MAX_INSN_SIZE);
			
 
				+	insn_get_length(insn);
			
 
				 
			
 
				 	/* Another subsystem puts a breakpoint, failed to recover */
			
 
				-	if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
			
 
				+	if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
			
 
				 		return 0;
			
 
				-	memcpy(dest, insn.kaddr, length);
			
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
 
				-	if (insn_rip_relative(&insn)) {
			
 
				+	/* Only x86_64 has RIP relative instructions */
			
 
				+	if (insn_rip_relative(insn)) {
			
 
				 		s64 newdisp;
			
 
				 		u8 *disp;
			
 
				-		kernel_insn_init(&insn, dest, length);
			
 
				-		insn_get_displacement(&insn);
			
 
				 		/*
			
 
				 		 * The copied instruction uses the %rip-relative addressing
			
 
				 		 * mode.  Adjust the displacement for the difference between
			
@@ -391,36 +384,57 @@ int __copy_instruction(u8 *dest, u8 *src)
 
				 		 * extension of the original signed 32-bit displacement would
			
 
				 		 * have given.
			
 
				 		 */
			
 
				-		newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
			
 
				+		newdisp = (u8 *) src + (s64) insn->displacement.value
			
 
				+			  - (u8 *) dest;
			
 
				 		if ((s64) (s32) newdisp != newdisp) {
			
 
				 			pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
			
 
				-			pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value);
			
 
				+			pr_err("\tSrc: %p, Dest: %p, old disp: %x\n",
			
 
				+				src, dest, insn->displacement.value);
			
 
				 			return 0;
			
 
				 		}
			
 
				-		disp = (u8 *) dest + insn_offset_displacement(&insn);
			
 
				+		disp = (u8 *) dest + insn_offset_displacement(insn);
			
 
				 		*(s32 *) disp = (s32) newdisp;
			
 
				 	}
			
 
				 #endif
			
 
				-	return length;
			
 
				+	return insn->length;
			
 
				+}
			
 
				+
			
 
				+/* Prepare reljump right after instruction to boost */
			
 
				+static void prepare_boost(struct kprobe *p, struct insn *insn)
			
 
				+{
			
 
				+	if (can_boost(insn, p->addr) &&
			
 
				+	    MAX_INSN_SIZE - insn->length >= RELATIVEJUMP_SIZE) {
			
 
				+		/*
			
 
				+		 * These instructions can be executed directly if it
			
 
				+		 * jumps back to correct address.
			
 
				+		 */
			
 
				+		synthesize_reljump(p->ainsn.insn + insn->length,
			
 
				+				   p->addr + insn->length);
			
 
				+		p->ainsn.boostable = true;
			
 
				+	} else {
			
 
				+		p->ainsn.boostable = false;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static int arch_copy_kprobe(struct kprobe *p)
			
 
				 {
			
 
				-	int ret;
			
 
				+	struct insn insn;
			
 
				+	int len;
			
 
				+
			
 
				+	set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
			
 
				 
			
 
				 	/* Copy an instruction with recovering if other optprobe modifies it.*/
			
 
				-	ret = __copy_instruction(p->ainsn.insn, p->addr);
			
 
				-	if (!ret)
			
 
				+	len = __copy_instruction(p->ainsn.insn, p->addr, &insn);
			
 
				+	if (!len)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	/*
			
 
				 	 * __copy_instruction can modify the displacement of the instruction,
			
 
				 	 * but it doesn't affect boostable check.
			
 
				 	 */
			
 
				-	if (can_boost(p->ainsn.insn, p->addr))
			
 
				-		p->ainsn.boostable = 0;
			
 
				-	else
			
 
				-		p->ainsn.boostable = -1;
			
 
				+	prepare_boost(p, &insn);
			
 
				+
			
 
				+	set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
			
 
				 
			
 
				 	/* Check whether the instruction modifies Interrupt Flag or not */
			
 
				 	p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn);
			
@@ -459,7 +473,7 @@ void arch_disarm_kprobe(struct kprobe *p)
 
				 void arch_remove_kprobe(struct kprobe *p)
			
 
				 {
			
 
				 	if (p->ainsn.insn) {
			
 
				-		free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
			
 
				+		free_insn_slot(p->ainsn.insn, p->ainsn.boostable);
			
 
				 		p->ainsn.insn = NULL;
			
 
				 	}
			
 
				 }
			
@@ -531,7 +545,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
 
				 		return;
			
 
				 
			
 
				 #if !defined(CONFIG_PREEMPT)
			
 
				-	if (p->ainsn.boostable == 1 && !p->post_handler) {
			
 
				+	if (p->ainsn.boostable && !p->post_handler) {
			
 
				 		/* Boost up -- we can execute copied instructions directly */
			
 
				 		if (!reenter)
			
 
				 			reset_current_kprobe();
			
@@ -851,7 +865,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
 
				 	case 0xcf:
			
 
				 	case 0xea:	/* jmp absolute -- ip is correct */
			
 
				 		/* ip is already adjusted, no more changes required */
			
 
				-		p->ainsn.boostable = 1;
			
 
				+		p->ainsn.boostable = true;
			
 
				 		goto no_change;
			
 
				 	case 0xe8:	/* call relative - Fix return addr */
			
 
				 		*tos = orig_ip + (*tos - copy_ip);
			
@@ -876,28 +890,13 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
 
				 			 * jmp near and far, absolute indirect
			
 
				 			 * ip is correct. And this is boostable
			
 
				 			 */
			
 
				-			p->ainsn.boostable = 1;
			
 
				+			p->ainsn.boostable = true;
			
 
				 			goto no_change;
			
 
				 		}
			
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				-	if (p->ainsn.boostable == 0) {
			
 
				-		if ((regs->ip > copy_ip) &&
			
 
				-		    (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
			
 
				-			/*
			
 
				-			 * These instructions can be executed directly if it
			
 
				-			 * jumps back to correct address.
			
 
				-			 */
			
 
				-			synthesize_reljump((void *)regs->ip,
			
 
				-				(void *)orig_ip + (regs->ip - copy_ip));
			
 
				-			p->ainsn.boostable = 1;
			
 
				-		} else {
			
 
				-			p->ainsn.boostable = -1;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				 	regs->ip += orig_ip - copy_ip;
			
 
				 
			
 
				 no_change:
			
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -94,6 +94,6 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler);
 
				 int arch_prepare_kprobe_ftrace(struct kprobe *p)
			
 
				 {
			
 
				 	p->ainsn.insn = NULL;
			
 
				-	p->ainsn.boostable = -1;
			
 
				+	p->ainsn.boostable = false;
			
 
				 	return 0;
			
 
				 }
			
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -65,7 +65,10 @@ found:
 
				 	 * overwritten by jump destination address. In this case, original
			
 
				 	 * bytes must be recovered from op->optinsn.copied_insn buffer.
			
 
				 	 */
			
 
				-	memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
			
 
				+	if (probe_kernel_read(buf, (void *)addr,
			
 
				+		MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
			
 
				+		return 0UL;
			
 
				+
			
 
				 	if (addr == (unsigned long)kp->addr) {
			
 
				 		buf[0] = kp->opcode;
			
 
				 		memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
			
@@ -174,11 +177,12 @@ NOKPROBE_SYMBOL(optimized_callback);
 
				 
			
 
				 static int copy_optimized_instructions(u8 *dest, u8 *src)
			
 
				 {
			
 
				+	struct insn insn;
			
 
				 	int len = 0, ret;
			
 
				 
			
 
				 	while (len < RELATIVEJUMP_SIZE) {
			
 
				-		ret = __copy_instruction(dest + len, src + len);
			
 
				-		if (!ret || !can_boost(dest + len, src + len))
			
 
				+		ret = __copy_instruction(dest + len, src + len, &insn);
			
 
				+		if (!ret || !can_boost(&insn, src + len))
			
 
				 			return -EINVAL;
			
 
				 		len += ret;
			
 
				 	}
			
@@ -350,6 +354,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
 
				 	}
			
 
				 
			
 
				 	buf = (u8 *)op->optinsn.insn;
			
 
				+	set_memory_rw((unsigned long)buf & PAGE_MASK, 1);
			
 
				 
			
 
				 	/* Copy instructions into the out-of-line buffer */
			
 
				 	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
			
@@ -372,6 +377,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
 
				 	synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
			
 
				 			   (u8 *)op->kp.addr + op->optinsn.size);
			
 
				 
			
 
				+	set_memory_ro((unsigned long)buf & PAGE_MASK, 1);
			
 
				+
			
 
				 	flush_icache_range((unsigned long) buf,
			
 
				 			   (unsigned long) buf + TMPL_END_IDX +
			
 
				 			   op->optinsn.size + RELATIVEJUMP_SIZE);
			
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -37,6 +37,7 @@
 
				 #include <asm/vm86.h>
			
 
				 #include <asm/switch_to.h>
			
 
				 #include <asm/desc.h>
			
 
				+#include <asm/prctl.h>
			
 
				 
			
 
				 /*
			
 
				  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
			
@@ -124,11 +125,6 @@ void flush_thread(void)
 
				 	fpu__clear(&tsk->thread.fpu);
			
 
				 }
			
 
				 
			
 
				-static void hard_disable_TSC(void)
			
 
				-{
			
 
				-	cr4_set_bits(X86_CR4_TSD);
			
 
				-}
			
 
				-
			
 
				 void disable_TSC(void)
			
 
				 {
			
 
				 	preempt_disable();
			
@@ -137,15 +133,10 @@ void disable_TSC(void)
 
				 		 * Must flip the CPU state synchronously with
			
 
				 		 * TIF_NOTSC in the current running context.
			
 
				 		 */
			
 
				-		hard_disable_TSC();
			
 
				+		cr4_set_bits(X86_CR4_TSD);
			
 
				 	preempt_enable();
			
 
				 }
			
 
				 
			
 
				-static void hard_enable_TSC(void)
			
 
				-{
			
 
				-	cr4_clear_bits(X86_CR4_TSD);
			
 
				-}
			
 
				-
			
 
				 static void enable_TSC(void)
			
 
				 {
			
 
				 	preempt_disable();
			
@@ -154,7 +145,7 @@ static void enable_TSC(void)
 
				 		 * Must flip the CPU state synchronously with
			
 
				 		 * TIF_NOTSC in the current running context.
			
 
				 		 */
			
 
				-		hard_enable_TSC();
			
 
				+		cr4_clear_bits(X86_CR4_TSD);
			
 
				 	preempt_enable();
			
 
				 }
			
 
				 
			
@@ -182,54 +173,129 @@ int set_tsc_mode(unsigned int val)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
			
 
				-		      struct tss_struct *tss)
			
 
				-{
			
 
				-	struct thread_struct *prev, *next;
			
 
				-
			
 
				-	prev = &prev_p->thread;
			
 
				-	next = &next_p->thread;
			
 
				+DEFINE_PER_CPU(u64, msr_misc_features_shadow);
			
 
				 
			
 
				-	if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
			
 
				-	    test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
			
 
				-		unsigned long debugctl = get_debugctlmsr();
			
 
				+static void set_cpuid_faulting(bool on)
			
 
				+{
			
 
				+	u64 msrval;
			
 
				 
			
 
				-		debugctl &= ~DEBUGCTLMSR_BTF;
			
 
				-		if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
			
 
				-			debugctl |= DEBUGCTLMSR_BTF;
			
 
				+	msrval = this_cpu_read(msr_misc_features_shadow);
			
 
				+	msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
			
 
				+	msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
			
 
				+	this_cpu_write(msr_misc_features_shadow, msrval);
			
 
				+	wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
			
 
				+}
			
 
				 
			
 
				-		update_debugctlmsr(debugctl);
			
 
				+static void disable_cpuid(void)
			
 
				+{
			
 
				+	preempt_disable();
			
 
				+	if (!test_and_set_thread_flag(TIF_NOCPUID)) {
			
 
				+		/*
			
 
				+		 * Must flip the CPU state synchronously with
			
 
				+		 * TIF_NOCPUID in the current running context.
			
 
				+		 */
			
 
				+		set_cpuid_faulting(true);
			
 
				 	}
			
 
				+	preempt_enable();
			
 
				+}
			
 
				 
			
 
				-	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
			
 
				-	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
			
 
				-		/* prev and next are different */
			
 
				-		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
			
 
				-			hard_disable_TSC();
			
 
				-		else
			
 
				-			hard_enable_TSC();
			
 
				+static void enable_cpuid(void)
			
 
				+{
			
 
				+	preempt_disable();
			
 
				+	if (test_and_clear_thread_flag(TIF_NOCPUID)) {
			
 
				+		/*
			
 
				+		 * Must flip the CPU state synchronously with
			
 
				+		 * TIF_NOCPUID in the current running context.
			
 
				+		 */
			
 
				+		set_cpuid_faulting(false);
			
 
				 	}
			
 
				+	preempt_enable();
			
 
				+}
			
 
				+
			
 
				+static int get_cpuid_mode(void)
			
 
				+{
			
 
				+	return !test_thread_flag(TIF_NOCPUID);
			
 
				+}
			
 
				+
			
 
				+static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
			
 
				+{
			
 
				+	if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	if (cpuid_enabled)
			
 
				+		enable_cpuid();
			
 
				+	else
			
 
				+		disable_cpuid();
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Called immediately after a successful exec.
			
 
				+ */
			
 
				+void arch_setup_new_exec(void)
			
 
				+{
			
 
				+	/* If cpuid was previously disabled for this task, re-enable it. */
			
 
				+	if (test_thread_flag(TIF_NOCPUID))
			
 
				+		enable_cpuid();
			
 
				+}
			
 
				 
			
 
				-	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
			
 
				+static inline void switch_to_bitmap(struct tss_struct *tss,
			
 
				+				    struct thread_struct *prev,
			
 
				+				    struct thread_struct *next,
			
 
				+				    unsigned long tifp, unsigned long tifn)
			
 
				+{
			
 
				+	if (tifn & _TIF_IO_BITMAP) {
			
 
				 		/*
			
 
				 		 * Copy the relevant range of the IO bitmap.
			
 
				 		 * Normally this is 128 bytes or less:
			
 
				 		 */
			
 
				 		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
			
 
				 		       max(prev->io_bitmap_max, next->io_bitmap_max));
			
 
				-
			
 
				 		/*
			
 
				 		 * Make sure that the TSS limit is correct for the CPU
			
 
				 		 * to notice the IO bitmap.
			
 
				 		 */
			
 
				 		refresh_tss_limit();
			
 
				-	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
			
 
				+	} else if (tifp & _TIF_IO_BITMAP) {
			
 
				 		/*
			
 
				 		 * Clear any possible leftover bits:
			
 
				 		 */
			
 
				 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
			
 
				 	}
			
 
				+}
			
 
				+
			
 
				+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
			
 
				+		      struct tss_struct *tss)
			
 
				+{
			
 
				+	struct thread_struct *prev, *next;
			
 
				+	unsigned long tifp, tifn;
			
 
				+
			
 
				+	prev = &prev_p->thread;
			
 
				+	next = &next_p->thread;
			
 
				+
			
 
				+	tifn = READ_ONCE(task_thread_info(next_p)->flags);
			
 
				+	tifp = READ_ONCE(task_thread_info(prev_p)->flags);
			
 
				+	switch_to_bitmap(tss, prev, next, tifp, tifn);
			
 
				+
			
 
				 	propagate_user_return_notify(prev_p, next_p);
			
 
				+
			
 
				+	if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
			
 
				+	    arch_has_block_step()) {
			
 
				+		unsigned long debugctl, msk;
			
 
				+
			
 
				+		rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
			
 
				+		debugctl &= ~DEBUGCTLMSR_BTF;
			
 
				+		msk = tifn & _TIF_BLOCKSTEP;
			
 
				+		debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
			
 
				+		wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
			
 
				+	}
			
 
				+
			
 
				+	if ((tifp ^ tifn) & _TIF_NOTSC)
			
 
				+		cr4_toggle_bits(X86_CR4_TSD);
			
 
				+
			
 
				+	if ((tifp ^ tifn) & _TIF_NOCPUID)
			
 
				+		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -550,3 +616,16 @@ out:
 
				 	put_task_stack(p);
			
 
				 	return ret;
			
 
				 }
			
 
				+
			
 
				+long do_arch_prctl_common(struct task_struct *task, int option,
			
 
				+			  unsigned long cpuid_enabled)
			
 
				+{
			
 
				+	switch (option) {
			
 
				+	case ARCH_GET_CPUID:
			
 
				+		return get_cpuid_mode();
			
 
				+	case ARCH_SET_CPUID:
			
 
				+		return set_cpuid_mode(task, cpuid_enabled);
			
 
				+	}
			
 
				+
			
 
				+	return -EINVAL;
			
 
				+}
			
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -37,6 +37,7 @@
 
				 #include <linux/uaccess.h>
			
 
				 #include <linux/io.h>
			
 
				 #include <linux/kdebug.h>
			
 
				+#include <linux/syscalls.h>
			
 
				 
			
 
				 #include <asm/pgtable.h>
			
 
				 #include <asm/ldt.h>
			
@@ -56,6 +57,7 @@
 
				 #include <asm/switch_to.h>
			
 
				 #include <asm/vm86.h>
			
 
				 #include <asm/intel_rdt.h>
			
 
				+#include <asm/proto.h>
			
 
				 
			
 
				 void __show_regs(struct pt_regs *regs, int all)
			
 
				 {
			
@@ -304,3 +306,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
				 
			
 
				 	return prev_p;
			
 
				 }
			
 
				+
			
 
				+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
			
 
				+{
			
 
				+	return do_arch_prctl_common(current, option, arg2);
			
 
				+}
			
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
 
				 #include <linux/uaccess.h>
			
 
				 #include <linux/io.h>
			
 
				 #include <linux/ftrace.h>
			
 
				+#include <linux/syscalls.h>
			
 
				 
			
 
				 #include <asm/pgtable.h>
			
 
				 #include <asm/processor.h>
			
@@ -204,7 +205,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 
				 				(struct user_desc __user *)tls, 0);
			
 
				 		else
			
 
				 #endif
			
 
				-			err = do_arch_prctl(p, ARCH_SET_FS, tls);
			
 
				+			err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
			
 
				 		if (err)
			
 
				 			goto out;
			
 
				 	}
			
@@ -547,70 +548,72 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
			
 
				+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 	int doit = task == current;
			
 
				 	int cpu;
			
 
				 
			
 
				-	switch (code) {
			
 
				+	switch (option) {
			
 
				 	case ARCH_SET_GS:
			
 
				-		if (addr >= TASK_SIZE_MAX)
			
 
				+		if (arg2 >= TASK_SIZE_MAX)
			
 
				 			return -EPERM;
			
 
				 		cpu = get_cpu();
			
 
				 		task->thread.gsindex = 0;
			
 
				-		task->thread.gsbase = addr;
			
 
				+		task->thread.gsbase = arg2;
			
 
				 		if (doit) {
			
 
				 			load_gs_index(0);
			
 
				-			ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
			
 
				+			ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
			
 
				 		}
			
 
				 		put_cpu();
			
 
				 		break;
			
 
				 	case ARCH_SET_FS:
			
 
				 		/* Not strictly needed for fs, but do it for symmetry
			
 
				 		   with gs */
			
 
				-		if (addr >= TASK_SIZE_MAX)
			
 
				+		if (arg2 >= TASK_SIZE_MAX)
			
 
				 			return -EPERM;
			
 
				 		cpu = get_cpu();
			
 
				 		task->thread.fsindex = 0;
			
 
				-		task->thread.fsbase = addr;
			
 
				+		task->thread.fsbase = arg2;
			
 
				 		if (doit) {
			
 
				 			/* set the selector to 0 to not confuse __switch_to */
			
 
				 			loadsegment(fs, 0);
			
 
				-			ret = wrmsrl_safe(MSR_FS_BASE, addr);
			
 
				+			ret = wrmsrl_safe(MSR_FS_BASE, arg2);
			
 
				 		}
			
 
				 		put_cpu();
			
 
				 		break;
			
 
				 	case ARCH_GET_FS: {
			
 
				 		unsigned long base;
			
 
				+
			
 
				 		if (doit)
			
 
				 			rdmsrl(MSR_FS_BASE, base);
			
 
				 		else
			
 
				 			base = task->thread.fsbase;
			
 
				-		ret = put_user(base, (unsigned long __user *)addr);
			
 
				+		ret = put_user(base, (unsigned long __user *)arg2);
			
 
				 		break;
			
 
				 	}
			
 
				 	case ARCH_GET_GS: {
			
 
				 		unsigned long base;
			
 
				+
			
 
				 		if (doit)
			
 
				 			rdmsrl(MSR_KERNEL_GS_BASE, base);
			
 
				 		else
			
 
				 			base = task->thread.gsbase;
			
 
				-		ret = put_user(base, (unsigned long __user *)addr);
			
 
				+		ret = put_user(base, (unsigned long __user *)arg2);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				 #ifdef CONFIG_CHECKPOINT_RESTORE
			
 
				 # ifdef CONFIG_X86_X32_ABI
			
 
				 	case ARCH_MAP_VDSO_X32:
			
 
				-		return prctl_map_vdso(&vdso_image_x32, addr);
			
 
				+		return prctl_map_vdso(&vdso_image_x32, arg2);
			
 
				 # endif
			
 
				 # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
			
 
				 	case ARCH_MAP_VDSO_32:
			
 
				-		return prctl_map_vdso(&vdso_image_32, addr);
			
 
				+		return prctl_map_vdso(&vdso_image_32, arg2);
			
 
				 # endif
			
 
				 	case ARCH_MAP_VDSO_64:
			
 
				-		return prctl_map_vdso(&vdso_image_64, addr);
			
 
				+		return prctl_map_vdso(&vdso_image_64, arg2);
			
 
				 #endif
			
 
				 
			
 
				 	default:
			
@@ -621,10 +624,23 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-long sys_arch_prctl(int code, unsigned long addr)
			
 
				+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
			
 
				+{
			
 
				+	long ret;
			
 
				+
			
 
				+	ret = do_arch_prctl_64(current, option, arg2);
			
 
				+	if (ret == -EINVAL)
			
 
				+		ret = do_arch_prctl_common(current, option, arg2);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_IA32_EMULATION
			
 
				+COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
			
 
				 {
			
 
				-	return do_arch_prctl(current, code, addr);
			
 
				+	return do_arch_prctl_common(current, option, arg2);
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				 unsigned long KSTK_ESP(struct task_struct *task)
			
 
				 {
			
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -396,12 +396,12 @@ static int putreg(struct task_struct *child,
 
				 		if (value >= TASK_SIZE_MAX)
			
 
				 			return -EIO;
			
 
				 		/*
			
 
				-		 * When changing the segment base, use do_arch_prctl
			
 
				+		 * When changing the segment base, use do_arch_prctl_64
			
 
				 		 * to set either thread.fs or thread.fsindex and the
			
 
				 		 * corresponding GDT slot.
			
 
				 		 */
			
 
				 		if (child->thread.fsbase != value)
			
 
				-			return do_arch_prctl(child, ARCH_SET_FS, value);
			
 
				+			return do_arch_prctl_64(child, ARCH_SET_FS, value);
			
 
				 		return 0;
			
 
				 	case offsetof(struct user_regs_struct,gs_base):
			
 
				 		/*
			
@@ -410,7 +410,7 @@ static int putreg(struct task_struct *child,
 
				 		if (value >= TASK_SIZE_MAX)
			
 
				 			return -EIO;
			
 
				 		if (child->thread.gsbase != value)
			
 
				-			return do_arch_prctl(child, ARCH_SET_GS, value);
			
 
				+			return do_arch_prctl_64(child, ARCH_SET_GS, value);
			
 
				 		return 0;
			
 
				 #endif
			
 
				 	}
			
@@ -869,7 +869,7 @@ long arch_ptrace(struct task_struct *child, long request,
 
				 		   Works just like arch_prctl, except that the arguments
			
 
				 		   are reversed. */
			
 
				 	case PTRACE_ARCH_PRCTL:
			
 
				-		ret = do_arch_prctl(child, data, addr);
			
 
				+		ret = do_arch_prctl_64(child, data, addr);
			
 
				 		break;
			
 
				 #endif
			
 
				 
			
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \
 
				 
			
 
				 ifeq ($(CONFIG_X86_32),y)
			
 
				 
			
 
				-obj-y += checksum_32.o
			
 
				+obj-y += checksum_32.o syscalls_32.o
			
 
				 obj-$(CONFIG_ELF_CORE) += elfcore.o
			
 
				 
			
 
				 subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
			
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -78,7 +78,7 @@ static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
 
				         return -ENOSYS;
			
 
				 }
			
 
				 
			
 
				-extern long arch_prctl(struct task_struct *task, int code,
			
 
				+extern long arch_prctl(struct task_struct *task, int option,
			
 
				 		       unsigned long __user *addr);
			
 
				 
			
 
				 #endif
			
--- a/arch/x86/um/os-Linux/prctl.c
+++ b/arch/x86/um/os-Linux/prctl.c
@@ -6,7 +6,7 @@
 
				 #include <sys/ptrace.h>
			
 
				 #include <asm/ptrace.h>
			
 
				 
			
 
				-int os_arch_prctl(int pid, int code, unsigned long *addr)
			
 
				+int os_arch_prctl(int pid, int option, unsigned long *arg2)
			
 
				 {
			
 
				-        return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code);
			
 
				+	return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option);
			
 
				 }
			
--- a/arch/x86/um/syscalls_32.c
+++ b/arch/x86/um/syscalls_32.c
@@ -0,0 +1,7 @@
 
				+#include <linux/syscalls.h>
			
 
				+#include <os.h>
			
 
				+
			
 
				+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
			
 
				+{
			
 
				+	return -EINVAL;
			
 
				+}
			
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -7,13 +7,15 @@
 
				 
			
 
				 #include <linux/sched.h>
			
 
				 #include <linux/sched/mm.h>
			
 
				+#include <linux/syscalls.h>
			
 
				 #include <linux/uaccess.h>
			
 
				 #include <asm/prctl.h> /* XXX This should get the constants from libc */
			
 
				 #include <os.h>
			
 
				 
			
 
				-long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
			
 
				+long arch_prctl(struct task_struct *task, int option)
			
 
				+		unsigned long __user *arg2)
			
 
				 {
			
 
				-	unsigned long *ptr = addr, tmp;
			
 
				+	unsigned long *ptr = arg2, tmp;
			
 
				 	long ret;
			
 
				 	int pid = task->mm->context.id.u.pid;
			
 
				 
			
@@ -30,7 +32,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
 
				 	 * arch_prctl is run on the host, then the registers are read
			
 
				 	 * back.
			
 
				 	 */
			
 
				-	switch (code) {
			
 
				+	switch (option) {
			
 
				 	case ARCH_SET_FS:
			
 
				 	case ARCH_SET_GS:
			
 
				 		ret = restore_registers(pid, &current->thread.regs.regs);
			
@@ -50,11 +52,11 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
 
				 		ptr = &tmp;
			
 
				 	}
			
 
				 
			
 
				-	ret = os_arch_prctl(pid, code, ptr);
			
 
				+	ret = os_arch_prctl(pid, option, ptr);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	switch (code) {
			
 
				+	switch (option) {
			
 
				 	case ARCH_SET_FS:
			
 
				 		current->thread.arch.fs = (unsigned long) ptr;
			
 
				 		ret = save_registers(pid, &current->thread.regs.regs);
			
@@ -63,19 +65,19 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
 
				 		ret = save_registers(pid, &current->thread.regs.regs);
			
 
				 		break;
			
 
				 	case ARCH_GET_FS:
			
 
				-		ret = put_user(tmp, addr);
			
 
				+		ret = put_user(tmp, arg2);
			
 
				 		break;
			
 
				 	case ARCH_GET_GS:
			
 
				-		ret = put_user(tmp, addr);
			
 
				+		ret = put_user(tmp, arg2);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-long sys_arch_prctl(int code, unsigned long addr)
			
 
				+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
			
 
				 {
			
 
				-	return arch_prctl(current, code, (unsigned long __user *) addr);
			
 
				+	return arch_prctl(current, option, (unsigned long __user *) arg2);
			
 
				 }
			
 
				 
			
 
				 void arch_switch_to(struct task_struct *to)
			
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -321,7 +321,7 @@ static int etb_set_buffer(struct coresight_device *csdev,
 
				 
			
 
				 static unsigned long etb_reset_buffer(struct coresight_device *csdev,
			
 
				 				      struct perf_output_handle *handle,
			
 
				-				      void *sink_config, bool *lost)
			
 
				+				      void *sink_config)
			
 
				 {
			
 
				 	unsigned long size = 0;
			
 
				 	struct cs_buffers *buf = sink_config;
			
@@ -343,7 +343,6 @@ static unsigned long etb_reset_buffer(struct coresight_device *csdev,
 
				 		 * resetting parameters here and squaring off with the ring
			
 
				 		 * buffer API in the tracer PMU is fine.
			
 
				 		 */
			
 
				-		*lost = !!local_xchg(&buf->lost, 0);
			
 
				 		size = local_xchg(&buf->data_size, 0);
			
 
				 	}
			
 
				 
			
@@ -385,7 +384,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
 
				 			(unsigned long)write_ptr);
			
 
				 
			
 
				 		write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1);
			
 
				-		local_inc(&buf->lost);
			
 
				+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -396,7 +395,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
 
				 	 */
			
 
				 	status = readl_relaxed(drvdata->base + ETB_STATUS_REG);
			
 
				 	if (status & ETB_STATUS_RAM_FULL) {
			
 
				-		local_inc(&buf->lost);
			
 
				+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				 		to_read = capacity;
			
 
				 		read_ptr = write_ptr;
			
 
				 	} else {
			
@@ -429,7 +428,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
 
				 		if (read_ptr > (drvdata->buffer_depth - 1))
			
 
				 			read_ptr -= drvdata->buffer_depth;
			
 
				 		/* let the decoder know we've skipped ahead */
			
 
				-		local_inc(&buf->lost);
			
 
				+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				 	}
			
 
				 
			
 
				 	/* finally tell HW where we want to start reading from */
			
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -302,7 +302,8 @@ out:
 
				 	return;
			
 
				 
			
 
				 fail_end_stop:
			
 
				-	perf_aux_output_end(handle, 0, true);
			
 
				+	perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				+	perf_aux_output_end(handle, 0);
			
 
				 fail:
			
 
				 	event->hw.state = PERF_HES_STOPPED;
			
 
				 	goto out;
			
@@ -310,7 +311,6 @@ fail:
 
				 
			
 
				 static void etm_event_stop(struct perf_event *event, int mode)
			
 
				 {
			
 
				-	bool lost;
			
 
				 	int cpu = smp_processor_id();
			
 
				 	unsigned long size;
			
 
				 	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
			
@@ -348,10 +348,9 @@ static void etm_event_stop(struct perf_event *event, int mode)
 
				 			return;
			
 
				 
			
 
				 		size = sink_ops(sink)->reset_buffer(sink, handle,
			
 
				-						    event_data->snk_config,
			
 
				-						    &lost);
			
 
				+						    event_data->snk_config);
			
 
				 
			
 
				-		perf_aux_output_end(handle, size, lost);
			
 
				+		perf_aux_output_end(handle, size);
			
 
				 	}
			
 
				 
			
 
				 	/* Disabling the path make its elements available to other sessions */
			
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -76,7 +76,6 @@ enum cs_mode {
 
				  * @nr_pages:	max number of pages granted to us
			
 
				  * @offset:	offset within the current buffer
			
 
				  * @data_size:	how much we collected in this run
			
 
				- * @lost:	other than zero if we had a HW buffer wrap around
			
 
				  * @snapshot:	is this run in snapshot mode
			
 
				  * @data_pages:	a handle the ring buffer
			
 
				  */
			
@@ -85,7 +84,6 @@ struct cs_buffers {
 
				 	unsigned int		nr_pages;
			
 
				 	unsigned long		offset;
			
 
				 	local_t			data_size;
			
 
				-	local_t			lost;
			
 
				 	bool			snapshot;
			
 
				 	void			**data_pages;
			
 
				 };
			
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -329,7 +329,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev,
 
				 
			
 
				 static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
			
 
				 					  struct perf_output_handle *handle,
			
 
				-					  void *sink_config, bool *lost)
			
 
				+					  void *sink_config)
			
 
				 {
			
 
				 	long size = 0;
			
 
				 	struct cs_buffers *buf = sink_config;
			
@@ -350,7 +350,6 @@ static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
 
				 		 * resetting parameters here and squaring off with the ring
			
 
				 		 * buffer API in the tracer PMU is fine.
			
 
				 		 */
			
 
				-		*lost = !!local_xchg(&buf->lost, 0);
			
 
				 		size = local_xchg(&buf->data_size, 0);
			
 
				 	}
			
 
				 
			
@@ -389,7 +388,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
 
				 	 */
			
 
				 	status = readl_relaxed(drvdata->base + TMC_STS);
			
 
				 	if (status & TMC_STS_FULL) {
			
 
				-		local_inc(&buf->lost);
			
 
				+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				 		to_read = drvdata->size;
			
 
				 	} else {
			
 
				 		to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size);
			
@@ -434,7 +433,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
 
				 			read_ptr -= drvdata->size;
			
 
				 		/* Tell the HW */
			
 
				 		writel_relaxed(read_ptr, drvdata->base + TMC_RRP);
			
 
				-		local_inc(&buf->lost);
			
 
				+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
			
 
				 	}
			
 
				 
			
 
				 	cur = buf->cur;
			
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1234,7 +1234,7 @@ static void __domain_flush_pages(struct protection_domain *domain,
 
				 
			
 
				 	build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
			
 
				 
			
 
				-	for (i = 0; i < amd_iommus_present; ++i) {
			
 
				+	for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
			
 
				 		if (!domain->dev_iommu[i])
			
 
				 			continue;
			
 
				 
			
@@ -1278,7 +1278,7 @@ static void domain_flush_complete(struct protection_domain *domain)
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				-	for (i = 0; i < amd_iommus_present; ++i) {
			
 
				+	for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
			
 
				 		if (domain && !domain->dev_iommu[i])
			
 
				 			continue;
			
 
				 
			
@@ -3363,7 +3363,7 @@ static int __flush_pasid(struct protection_domain *domain, int pasid,
 
				 	 * IOMMU TLB needs to be flushed before Device TLB to
			
 
				 	 * prevent device TLB refill from IOMMU TLB
			
 
				 	 */
			
 
				-	for (i = 0; i < amd_iommus_present; ++i) {
			
 
				+	for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
			
 
				 		if (domain->dev_iommu[i] == 0)
			
 
				 			continue;
			
 
				 
			
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -167,7 +167,9 @@ LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
 
				 
			
 
				 /* Array to assign indices to IOMMUs*/
			
 
				 struct amd_iommu *amd_iommus[MAX_IOMMUS];
			
 
				-int amd_iommus_present;
			
 
				+
			
 
				+/* Number of IOMMUs present in the system */
			
 
				+static int amd_iommus_present;
			
 
				 
			
 
				 /* IOMMUs have a non-present cache? */
			
 
				 bool amd_iommu_np_cache __read_mostly;
			
@@ -254,10 +256,6 @@ static int amd_iommu_enable_interrupts(void);
 
				 static int __init iommu_go_to_state(enum iommu_init_state state);
			
 
				 static void init_device_table_dma(void);
			
 
				 
			
 
				-static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
			
 
				-				    u8 bank, u8 cntr, u8 fxn,
			
 
				-				    u64 *value, bool is_write);
			
 
				-
			
 
				 static inline void update_last_devid(u16 devid)
			
 
				 {
			
 
				 	if (devid > amd_iommu_last_bdf)
			
@@ -272,6 +270,11 @@ static inline unsigned long tbl_size(int entry_size)
 
				 	return 1UL << shift;
			
 
				 }
			
 
				 
			
 
				+int amd_iommu_get_num_iommus(void)
			
 
				+{
			
 
				+	return amd_iommus_present;
			
 
				+}
			
 
				+
			
 
				 /* Access to l1 and l2 indexed register spaces */
			
 
				 
			
 
				 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
			
@@ -1336,7 +1339,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 
				 
			
 
				 	/* Add IOMMU to internal data structures */
			
 
				 	list_add_tail(&iommu->list, &amd_iommu_list);
			
 
				-	iommu->index             = amd_iommus_present++;
			
 
				+	iommu->index = amd_iommus_present++;
			
 
				 
			
 
				 	if (unlikely(iommu->index >= MAX_IOMMUS)) {
			
 
				 		WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
			
@@ -1477,6 +1480,8 @@ static int __init init_iommu_all(struct acpi_table_header *table)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
			
 
				+				u8 fxn, u64 *value, bool is_write);
			
 
				 
			
 
				 static void init_iommu_perf_ctr(struct amd_iommu *iommu)
			
 
				 {
			
@@ -1488,8 +1493,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
 
				 	amd_iommu_pc_present = true;
			
 
				 
			
 
				 	/* Check if the performance counters can be written to */
			
 
				-	if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) ||
			
 
				-	    (0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) ||
			
 
				+	if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
			
 
				+	    (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
			
 
				 	    (val != val2)) {
			
 
				 		pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
			
 
				 		amd_iommu_pc_present = false;
			
@@ -2711,6 +2716,18 @@ bool amd_iommu_v2_supported(void)
 
				 }
			
 
				 EXPORT_SYMBOL(amd_iommu_v2_supported);
			
 
				 
			
 
				+struct amd_iommu *get_amd_iommu(unsigned int idx)
			
 
				+{
			
 
				+	unsigned int i = 0;
			
 
				+	struct amd_iommu *iommu;
			
 
				+
			
 
				+	for_each_iommu(iommu)
			
 
				+		if (i++ == idx)
			
 
				+			return iommu;
			
 
				+	return NULL;
			
 
				+}
			
 
				+EXPORT_SYMBOL(get_amd_iommu);
			
 
				+
			
 
				 /****************************************************************************
			
 
				  *
			
 
				  * IOMMU EFR Performance Counter support functionality. This code allows
			
@@ -2718,17 +2735,14 @@ EXPORT_SYMBOL(amd_iommu_v2_supported);
 
				  *
			
 
				  ****************************************************************************/
			
 
				 
			
 
				-u8 amd_iommu_pc_get_max_banks(u16 devid)
			
 
				+u8 amd_iommu_pc_get_max_banks(unsigned int idx)
			
 
				 {
			
 
				-	struct amd_iommu *iommu;
			
 
				-	u8 ret = 0;
			
 
				+	struct amd_iommu *iommu = get_amd_iommu(idx);
			
 
				 
			
 
				-	/* locate the iommu governing the devid */
			
 
				-	iommu = amd_iommu_rlookup_table[devid];
			
 
				 	if (iommu)
			
 
				-		ret = iommu->max_banks;
			
 
				+		return iommu->max_banks;
			
 
				 
			
 
				-	return ret;
			
 
				+	return 0;
			
 
				 }
			
 
				 EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
			
 
				 
			
@@ -2738,62 +2752,69 @@ bool amd_iommu_pc_supported(void)
 
				 }
			
 
				 EXPORT_SYMBOL(amd_iommu_pc_supported);
			
 
				 
			
 
				-u8 amd_iommu_pc_get_max_counters(u16 devid)
			
 
				+u8 amd_iommu_pc_get_max_counters(unsigned int idx)
			
 
				 {
			
 
				-	struct amd_iommu *iommu;
			
 
				-	u8 ret = 0;
			
 
				+	struct amd_iommu *iommu = get_amd_iommu(idx);
			
 
				 
			
 
				-	/* locate the iommu governing the devid */
			
 
				-	iommu = amd_iommu_rlookup_table[devid];
			
 
				 	if (iommu)
			
 
				-		ret = iommu->max_counters;
			
 
				+		return iommu->max_counters;
			
 
				 
			
 
				-	return ret;
			
 
				+	return 0;
			
 
				 }
			
 
				 EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
			
 
				 
			
 
				-static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
			
 
				-				    u8 bank, u8 cntr, u8 fxn,
			
 
				-				    u64 *value, bool is_write)
			
 
				+static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
			
 
				+				u8 fxn, u64 *value, bool is_write)
			
 
				 {
			
 
				 	u32 offset;
			
 
				 	u32 max_offset_lim;
			
 
				 
			
 
				+	/* Make sure the IOMMU PC resource is available */
			
 
				+	if (!amd_iommu_pc_present)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				 	/* Check for valid iommu and pc register indexing */
			
 
				-	if (WARN_ON((fxn > 0x28) || (fxn & 7)))
			
 
				+	if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
			
 
				 		return -ENODEV;
			
 
				 
			
 
				-	offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn);
			
 
				+	offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
			
 
				 
			
 
				 	/* Limit the offset to the hw defined mmio region aperture */
			
 
				-	max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) |
			
 
				+	max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
			
 
				 				(iommu->max_counters << 8) | 0x28);
			
 
				 	if ((offset < MMIO_CNTR_REG_OFFSET) ||
			
 
				 	    (offset > max_offset_lim))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	if (is_write) {
			
 
				-		writel((u32)*value, iommu->mmio_base + offset);
			
 
				-		writel((*value >> 32), iommu->mmio_base + offset + 4);
			
 
				+		u64 val = *value & GENMASK_ULL(47, 0);
			
 
				+
			
 
				+		writel((u32)val, iommu->mmio_base + offset);
			
 
				+		writel((val >> 32), iommu->mmio_base + offset + 4);
			
 
				 	} else {
			
 
				 		*value = readl(iommu->mmio_base + offset + 4);
			
 
				 		*value <<= 32;
			
 
				-		*value = readl(iommu->mmio_base + offset);
			
 
				+		*value |= readl(iommu->mmio_base + offset);
			
 
				+		*value &= GENMASK_ULL(47, 0);
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				-EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val);
			
 
				 
			
 
				-int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
			
 
				-				    u64 *value, bool is_write)
			
 
				+int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
			
 
				 {
			
 
				-	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
			
 
				+	if (!iommu)
			
 
				+		return -EINVAL;
			
 
				 
			
 
				-	/* Make sure the IOMMU PC resource is available */
			
 
				-	if (!amd_iommu_pc_present || iommu == NULL)
			
 
				-		return -ENODEV;
			
 
				+	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
			
 
				+}
			
 
				+EXPORT_SYMBOL(amd_iommu_pc_get_reg);
			
 
				+
			
 
				+int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
			
 
				+{
			
 
				+	if (!iommu)
			
 
				+		return -EINVAL;
			
 
				 
			
 
				-	return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn,
			
 
				-					value, is_write);
			
 
				+	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
			
 
				 }
			
 
				+EXPORT_SYMBOL(amd_iommu_pc_set_reg);
			
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -21,6 +21,7 @@
 
				 
			
 
				 #include "amd_iommu_types.h"
			
 
				 
			
 
				+extern int amd_iommu_get_num_iommus(void);
			
 
				 extern int amd_iommu_init_dma_ops(void);
			
 
				 extern int amd_iommu_init_passthrough(void);
			
 
				 extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
			
@@ -56,13 +57,6 @@ extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
 
				 extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
			
 
				 extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
			
 
				 
			
 
				-/* IOMMU Performance Counter functions */
			
 
				-extern bool amd_iommu_pc_supported(void);
			
 
				-extern u8 amd_iommu_pc_get_max_banks(u16 devid);
			
 
				-extern u8 amd_iommu_pc_get_max_counters(u16 devid);
			
 
				-extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
			
 
				-				    u64 *value, bool is_write);
			
 
				-
			
 
				 #ifdef CONFIG_IRQ_REMAP
			
 
				 extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
			
 
				 #else
			
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -611,9 +611,6 @@ extern struct list_head amd_iommu_list;
 
				  */
			
 
				 extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
			
 
				 
			
 
				-/* Number of IOMMUs present in the system */
			
 
				-extern int amd_iommus_present;
			
 
				-
			
 
				 /*
			
 
				  * Declarations for the global list of all protection domains
			
 
				  */
			
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1320,6 +1320,7 @@ void setup_new_exec(struct linux_binprm * bprm)
 
				 	else
			
 
				 		set_dumpable(current->mm, suid_dumpable);
			
 
				 
			
 
				+	arch_setup_new_exec();
			
 
				 	perf_event_exec();
			
 
				 	__set_task_comm(current, kbasename(bprm->filename), true);
			
 
				 
			
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -723,6 +723,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
 
				 asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
			
 
				 					    int, const char __user *);
			
 
				 
			
 
				+asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2);
			
 
				+
			
 
				 /*
			
 
				  * For most but not all architectures, "am I in a compat syscall?" and
			
 
				  * "am I a compat task?" are the same question.  For architectures on which
			
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -201,7 +201,7 @@ struct coresight_ops_sink {
 
				 			  void *sink_config);
			
 
				 	unsigned long (*reset_buffer)(struct coresight_device *csdev,
			
 
				 				      struct perf_output_handle *handle,
			
 
				-				      void *sink_config, bool *lost);
			
 
				+				      void *sink_config);
			
 
				 	void (*update_buffer)(struct coresight_device *csdev,
			
 
				 			      struct perf_output_handle *handle,
			
 
				 			      void *sink_config);
			
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -267,6 +267,8 @@ extern int arch_init_kprobes(void);
 
				 extern void show_registers(struct pt_regs *regs);
			
 
				 extern void kprobes_inc_nmissed_count(struct kprobe *p);
			
 
				 extern bool arch_within_kprobe_blacklist(unsigned long addr);
			
 
				+extern bool arch_function_offset_within_entry(unsigned long offset);
			
 
				+extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
			
 
				 
			
 
				 extern bool within_kprobe_blacklist(unsigned long addr);
			
 
				 
			
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -165,6 +165,13 @@ struct hw_perf_event {
 
				 			struct list_head		bp_list;
			
 
				 		};
			
 
				 #endif
			
 
				+		struct { /* amd_iommu */
			
 
				+			u8	iommu_bank;
			
 
				+			u8	iommu_cntr;
			
 
				+			u16	padding;
			
 
				+			u64	conf;
			
 
				+			u64	conf1;
			
 
				+		};
			
 
				 	};
			
 
				 	/*
			
 
				 	 * If the event is a per task event, this will point to the task in
			
@@ -801,6 +808,7 @@ struct perf_output_handle {
 
				 	struct ring_buffer		*rb;
			
 
				 	unsigned long			wakeup;
			
 
				 	unsigned long			size;
			
 
				+	u64				aux_flags;
			
 
				 	union {
			
 
				 		void			*addr;
			
 
				 		unsigned long		head;
			
@@ -849,10 +857,11 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
 
				 extern void *perf_aux_output_begin(struct perf_output_handle *handle,
			
 
				 				   struct perf_event *event);
			
 
				 extern void perf_aux_output_end(struct perf_output_handle *handle,
			
 
				-				unsigned long size, bool truncated);
			
 
				+				unsigned long size);
			
 
				 extern int perf_aux_output_skip(struct perf_output_handle *handle,
			
 
				 				unsigned long size);
			
 
				 extern void *perf_get_aux(struct perf_output_handle *handle);
			
 
				+extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
			
 
				 
			
 
				 extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
			
 
				 extern void perf_pmu_unregister(struct pmu *pmu);
			
@@ -1112,6 +1121,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
 
				 
			
 
				 extern void perf_event_exec(void);
			
 
				 extern void perf_event_comm(struct task_struct *tsk, bool exec);
			
 
				+extern void perf_event_namespaces(struct task_struct *tsk);
			
 
				 extern void perf_event_fork(struct task_struct *tsk);
			
 
				 
			
 
				 /* Callchains */
			
@@ -1267,8 +1277,8 @@ static inline void *
 
				 perf_aux_output_begin(struct perf_output_handle *handle,
			
 
				 		      struct perf_event *event)				{ return NULL; }
			
 
				 static inline void
			
 
				-perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
			
 
				-		    bool truncated)					{ }
			
 
				+perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
			
 
				+									{ }
			
 
				 static inline int
			
 
				 perf_aux_output_skip(struct perf_output_handle *handle,
			
 
				 		     unsigned long size)				{ return -EINVAL; }
			
@@ -1315,6 +1325,7 @@ static inline int perf_unregister_guest_info_callbacks
 
				 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
			
 
				 static inline void perf_event_exec(void)				{ }
			
 
				 static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
			
 
				+static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
			
 
				 static inline void perf_event_fork(struct task_struct *tsk)		{ }
			
 
				 static inline void perf_event_init(void)				{ }
			
 
				 static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
			
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -101,6 +101,10 @@ static inline void check_object_size(const void *ptr, unsigned long n,
 
				 { }
			
 
				 #endif /* CONFIG_HARDENED_USERCOPY */
			
 
				 
			
 
				+#ifndef arch_setup_new_exec
			
 
				+static inline void arch_setup_new_exec(void) { }
			
 
				+#endif
			
 
				+
			
 
				 #endif	/* __KERNEL__ */
			
 
				 
			
 
				 #endif /* _LINUX_THREAD_INFO_H */
			
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
 
				 				use_clockid    :  1, /* use @clockid for time fields */
			
 
				 				context_switch :  1, /* context switch data */
			
 
				 				write_backward :  1, /* Write ring buffer from end to beginning */
			
 
				-				__reserved_1   : 36;
			
 
				+				namespaces     :  1, /* include namespaces data */
			
 
				+				__reserved_1   : 35;
			
 
				 
			
 
				 	union {
			
 
				 		__u32		wakeup_events;	  /* wakeup every n events */
			
@@ -610,6 +611,23 @@ struct perf_event_header {
 
				 	__u16	size;
			
 
				 };
			
 
				 
			
 
				+struct perf_ns_link_info {
			
 
				+	__u64	dev;
			
 
				+	__u64	ino;
			
 
				+};
			
 
				+
			
 
				+enum {
			
 
				+	NET_NS_INDEX		= 0,
			
 
				+	UTS_NS_INDEX		= 1,
			
 
				+	IPC_NS_INDEX		= 2,
			
 
				+	PID_NS_INDEX		= 3,
			
 
				+	USER_NS_INDEX		= 4,
			
 
				+	MNT_NS_INDEX		= 5,
			
 
				+	CGROUP_NS_INDEX		= 6,
			
 
				+
			
 
				+	NR_NAMESPACES,		/* number of available namespaces */
			
 
				+};
			
 
				+
			
 
				 enum perf_event_type {
			
 
				 
			
 
				 	/*
			
@@ -862,6 +880,18 @@ enum perf_event_type {
 
				 	 */
			
 
				 	PERF_RECORD_SWITCH_CPU_WIDE		= 15,
			
 
				 
			
 
				+	/*
			
 
				+	 * struct {
			
 
				+	 *	struct perf_event_header	header;
			
 
				+	 *	u32				pid;
			
 
				+	 *	u32				tid;
			
 
				+	 *	u64				nr_namespaces;
			
 
				+	 *	{ u64				dev, inode; } [nr_namespaces];
			
 
				+	 *	struct sample_id		sample_id;
			
 
				+	 * };
			
 
				+	 */
			
 
				+	PERF_RECORD_NAMESPACES			= 16,
			
 
				+
			
 
				 	PERF_RECORD_MAX,			/* non-ABI */
			
 
				 };
			
 
				 
			
@@ -885,6 +915,7 @@ enum perf_callchain_context {
 
				  */
			
 
				 #define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
			
 
				 #define PERF_AUX_FLAG_OVERWRITE		0x02	/* snapshot from overwrite mode */
			
 
				+#define PERF_AUX_FLAG_PARTIAL		0x04	/* record contains gaps */
			
 
				 
			
 
				 #define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
			
 
				 #define PERF_FLAG_FD_OUTPUT		(1UL << 1)
			
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -48,6 +48,8 @@
 
				 #include <linux/parser.h>
			
 
				 #include <linux/sched/clock.h>
			
 
				 #include <linux/sched/mm.h>
			
 
				+#include <linux/proc_ns.h>
			
 
				+#include <linux/mount.h>
			
 
				 
			
 
				 #include "internal.h"
			
 
				 
			
@@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
 
				 
			
 
				 static atomic_t nr_mmap_events __read_mostly;
			
 
				 static atomic_t nr_comm_events __read_mostly;
			
 
				+static atomic_t nr_namespaces_events __read_mostly;
			
 
				 static atomic_t nr_task_events __read_mostly;
			
 
				 static atomic_t nr_freq_events __read_mostly;
			
 
				 static atomic_t nr_switch_events __read_mostly;
			
@@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
 
				 		atomic_dec(&nr_mmap_events);
			
 
				 	if (event->attr.comm)
			
 
				 		atomic_dec(&nr_comm_events);
			
 
				+	if (event->attr.namespaces)
			
 
				+		atomic_dec(&nr_namespaces_events);
			
 
				 	if (event->attr.task)
			
 
				 		atomic_dec(&nr_task_events);
			
 
				 	if (event->attr.freq)
			
@@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
 
				 void perf_event_fork(struct task_struct *task)
			
 
				 {
			
 
				 	perf_event_task(task, NULL, 1);
			
 
				+	perf_event_namespaces(task);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
 
				 	perf_event_comm_event(&comm_event);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * namespaces tracking
			
 
				+ */
			
 
				+
			
 
				+struct perf_namespaces_event {
			
 
				+	struct task_struct		*task;
			
 
				+
			
 
				+	struct {
			
 
				+		struct perf_event_header	header;
			
 
				+
			
 
				+		u32				pid;
			
 
				+		u32				tid;
			
 
				+		u64				nr_namespaces;
			
 
				+		struct perf_ns_link_info	link_info[NR_NAMESPACES];
			
 
				+	} event_id;
			
 
				+};
			
 
				+
			
 
				+static int perf_event_namespaces_match(struct perf_event *event)
			
 
				+{
			
 
				+	return event->attr.namespaces;
			
 
				+}
			
 
				+
			
 
				+static void perf_event_namespaces_output(struct perf_event *event,
			
 
				+					 void *data)
			
 
				+{
			
 
				+	struct perf_namespaces_event *namespaces_event = data;
			
 
				+	struct perf_output_handle handle;
			
 
				+	struct perf_sample_data sample;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!perf_event_namespaces_match(event))
			
 
				+		return;
			
 
				+
			
 
				+	perf_event_header__init_id(&namespaces_event->event_id.header,
			
 
				+				   &sample, event);
			
 
				+	ret = perf_output_begin(&handle, event,
			
 
				+				namespaces_event->event_id.header.size);
			
 
				+	if (ret)
			
 
				+		return;
			
 
				+
			
 
				+	namespaces_event->event_id.pid = perf_event_pid(event,
			
 
				+							namespaces_event->task);
			
 
				+	namespaces_event->event_id.tid = perf_event_tid(event,
			
 
				+							namespaces_event->task);
			
 
				+
			
 
				+	perf_output_put(&handle, namespaces_event->event_id);
			
 
				+
			
 
				+	perf_event__output_id_sample(event, &handle, &sample);
			
 
				+
			
 
				+	perf_output_end(&handle);
			
 
				+}
			
 
				+
			
 
				+static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
			
 
				+				   struct task_struct *task,
			
 
				+				   const struct proc_ns_operations *ns_ops)
			
 
				+{
			
 
				+	struct path ns_path;
			
 
				+	struct inode *ns_inode;
			
 
				+	void *error;
			
 
				+
			
 
				+	error = ns_get_path(&ns_path, task, ns_ops);
			
 
				+	if (!error) {
			
 
				+		ns_inode = ns_path.dentry->d_inode;
			
 
				+		ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
			
 
				+		ns_link_info->ino = ns_inode->i_ino;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void perf_event_namespaces(struct task_struct *task)
			
 
				+{
			
 
				+	struct perf_namespaces_event namespaces_event;
			
 
				+	struct perf_ns_link_info *ns_link_info;
			
 
				+
			
 
				+	if (!atomic_read(&nr_namespaces_events))
			
 
				+		return;
			
 
				+
			
 
				+	namespaces_event = (struct perf_namespaces_event){
			
 
				+		.task	= task,
			
 
				+		.event_id  = {
			
 
				+			.header = {
			
 
				+				.type = PERF_RECORD_NAMESPACES,
			
 
				+				.misc = 0,
			
 
				+				.size = sizeof(namespaces_event.event_id),
			
 
				+			},
			
 
				+			/* .pid */
			
 
				+			/* .tid */
			
 
				+			.nr_namespaces = NR_NAMESPACES,
			
 
				+			/* .link_info[NR_NAMESPACES] */
			
 
				+		},
			
 
				+	};
			
 
				+
			
 
				+	ns_link_info = namespaces_event.event_id.link_info;
			
 
				+
			
 
				+	perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
			
 
				+			       task, &mntns_operations);
			
 
				+
			
 
				+#ifdef CONFIG_USER_NS
			
 
				+	perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
			
 
				+			       task, &userns_operations);
			
 
				+#endif
			
 
				+#ifdef CONFIG_NET_NS
			
 
				+	perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
			
 
				+			       task, &netns_operations);
			
 
				+#endif
			
 
				+#ifdef CONFIG_UTS_NS
			
 
				+	perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
			
 
				+			       task, &utsns_operations);
			
 
				+#endif
			
 
				+#ifdef CONFIG_IPC_NS
			
 
				+	perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
			
 
				+			       task, &ipcns_operations);
			
 
				+#endif
			
 
				+#ifdef CONFIG_PID_NS
			
 
				+	perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
			
 
				+			       task, &pidns_operations);
			
 
				+#endif
			
 
				+#ifdef CONFIG_CGROUPS
			
 
				+	perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
			
 
				+			       task, &cgroupns_operations);
			
 
				+#endif
			
 
				+
			
 
				+	perf_iterate_sb(perf_event_namespaces_output,
			
 
				+			&namespaces_event,
			
 
				+			NULL);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * mmap tracking
			
 
				  */
			
@@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
 
				 		atomic_inc(&nr_mmap_events);
			
 
				 	if (event->attr.comm)
			
 
				 		atomic_inc(&nr_comm_events);
			
 
				+	if (event->attr.namespaces)
			
 
				+		atomic_inc(&nr_namespaces_events);
			
 
				 	if (event->attr.task)
			
 
				 		atomic_inc(&nr_task_events);
			
 
				 	if (event->attr.freq)
			
@@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
 
				 			return -EACCES;
			
 
				 	}
			
 
				 
			
 
				+	if (attr.namespaces) {
			
 
				+		if (!capable(CAP_SYS_ADMIN))
			
 
				+			return -EACCES;
			
 
				+	}
			
 
				+
			
 
				 	if (attr.freq) {
			
 
				 		if (attr.sample_freq > sysctl_perf_event_sample_rate)
			
 
				 			return -EINVAL;
			
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -297,6 +297,19 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 
				 		rb->paused = 1;
			
 
				 }
			
 
				 
			
 
				+void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
			
 
				+{
			
 
				+	/*
			
 
				+	 * OVERWRITE is determined by perf_aux_output_end() and can't
			
 
				+	 * be passed in directly.
			
 
				+	 */
			
 
				+	if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE))
			
 
				+		return;
			
 
				+
			
 
				+	handle->aux_flags |= flags;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(perf_aux_output_flag);
			
 
				+
			
 
				 /*
			
 
				  * This is called before hardware starts writing to the AUX area to
			
 
				  * obtain an output handle and make sure there's room in the buffer.
			
@@ -360,6 +373,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
 
				 	handle->event = event;
			
 
				 	handle->head = aux_head;
			
 
				 	handle->size = 0;
			
 
				+	handle->aux_flags = 0;
			
 
				 
			
 
				 	/*
			
 
				 	 * In overwrite mode, AUX data stores do not depend on aux_tail,
			
@@ -408,34 +422,32 @@ err:
 
				  * of the AUX buffer management code is that after pmu::stop(), the AUX
			
 
				  * transaction must be stopped and therefore drop the AUX reference count.
			
 
				  */
			
 
				-void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
			
 
				-			 bool truncated)
			
 
				+void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
			
 
				 {
			
 
				+	bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED);
			
 
				 	struct ring_buffer *rb = handle->rb;
			
 
				-	bool wakeup = truncated;
			
 
				 	unsigned long aux_head;
			
 
				-	u64 flags = 0;
			
 
				-
			
 
				-	if (truncated)
			
 
				-		flags |= PERF_AUX_FLAG_TRUNCATED;
			
 
				 
			
 
				 	/* in overwrite mode, driver provides aux_head via handle */
			
 
				 	if (rb->aux_overwrite) {
			
 
				-		flags |= PERF_AUX_FLAG_OVERWRITE;
			
 
				+		handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE;
			
 
				 
			
 
				 		aux_head = handle->head;
			
 
				 		local_set(&rb->aux_head, aux_head);
			
 
				 	} else {
			
 
				+		handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE;
			
 
				+
			
 
				 		aux_head = local_read(&rb->aux_head);
			
 
				 		local_add(size, &rb->aux_head);
			
 
				 	}
			
 
				 
			
 
				-	if (size || flags) {
			
 
				+	if (size || handle->aux_flags) {
			
 
				 		/*
			
 
				 		 * Only send RECORD_AUX if we have something useful to communicate
			
 
				 		 */
			
 
				 
			
 
				-		perf_event_aux_event(handle->event, aux_head, size, flags);
			
 
				+		perf_event_aux_event(handle->event, aux_head, size,
			
 
				+		                     handle->aux_flags);
			
 
				 	}
			
 
				 
			
 
				 	aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
			
@@ -446,7 +458,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
 
				 	}
			
 
				 
			
 
				 	if (wakeup) {
			
 
				-		if (truncated)
			
 
				+		if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
			
 
				 			handle->event->pending_disable = 1;
			
 
				 		perf_output_wakeup(handle);
			
 
				 	}
			
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	perf_event_namespaces(current);
			
 
				+
			
 
				 bad_unshare_cleanup_cred:
			
 
				 	if (new_cred)
			
 
				 		put_cred(new_cred);
			
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1391,21 +1391,19 @@ bool within_kprobe_blacklist(unsigned long addr)
 
				  * This returns encoded errors if it fails to look up symbol or invalid
			
 
				  * combination of parameters.
			
 
				  */
			
 
				-static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
			
 
				+static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr,
			
 
				+			const char *symbol_name, unsigned int offset)
			
 
				 {
			
 
				-	kprobe_opcode_t *addr = p->addr;
			
 
				-
			
 
				-	if ((p->symbol_name && p->addr) ||
			
 
				-	    (!p->symbol_name && !p->addr))
			
 
				+	if ((symbol_name && addr) || (!symbol_name && !addr))
			
 
				 		goto invalid;
			
 
				 
			
 
				-	if (p->symbol_name) {
			
 
				-		kprobe_lookup_name(p->symbol_name, addr);
			
 
				+	if (symbol_name) {
			
 
				+		kprobe_lookup_name(symbol_name, addr);
			
 
				 		if (!addr)
			
 
				 			return ERR_PTR(-ENOENT);
			
 
				 	}
			
 
				 
			
 
				-	addr = (kprobe_opcode_t *)(((char *)addr) + p->offset);
			
 
				+	addr = (kprobe_opcode_t *)(((char *)addr) + offset);
			
 
				 	if (addr)
			
 
				 		return addr;
			
 
				 
			
@@ -1413,6 +1411,11 @@ invalid:
 
				 	return ERR_PTR(-EINVAL);
			
 
				 }
			
 
				 
			
 
				+static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
			
 
				+{
			
 
				+	return _kprobe_addr(p->addr, p->symbol_name, p->offset);
			
 
				+}
			
 
				+
			
 
				 /* Check passed kprobe is valid and return kprobe in kprobe_table. */
			
 
				 static struct kprobe *__get_valid_kprobe(struct kprobe *p)
			
 
				 {
			
@@ -1740,11 +1743,12 @@ void unregister_kprobes(struct kprobe **kps, int num)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(unregister_kprobes);
			
 
				 
			
 
				-int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self,
			
 
				-					      unsigned long val, void *data)
			
 
				+int __weak kprobe_exceptions_notify(struct notifier_block *self,
			
 
				+					unsigned long val, void *data)
			
 
				 {
			
 
				 	return NOTIFY_DONE;
			
 
				 }
			
 
				+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
			
 
				 
			
 
				 static struct notifier_block kprobe_exceptions_nb = {
			
 
				 	.notifier_call = kprobe_exceptions_notify,
			
@@ -1875,6 +1879,25 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 
				 }
			
 
				 NOKPROBE_SYMBOL(pre_handler_kretprobe);
			
 
				 
			
 
				+bool __weak arch_function_offset_within_entry(unsigned long offset)
			
 
				+{
			
 
				+	return !offset;
			
 
				+}
			
 
				+
			
 
				+bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
			
 
				+{
			
 
				+	kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
			
 
				+
			
 
				+	if (IS_ERR(kp_addr))
			
 
				+		return false;
			
 
				+
			
 
				+	if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
			
 
				+						!arch_function_offset_within_entry(offset))
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				 int register_kretprobe(struct kretprobe *rp)
			
 
				 {
			
 
				 	int ret = 0;
			
@@ -1882,6 +1905,9 @@ int register_kretprobe(struct kretprobe *rp)
 
				 	int i;
			
 
				 	void *addr;
			
 
				 
			
 
				+	if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				 	if (kretprobe_blacklist_size) {
			
 
				 		addr = kprobe_addr(&rp->kp);
			
 
				 		if (IS_ERR(addr))
			
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,6 +26,7 @@
 
				 #include <linux/file.h>
			
 
				 #include <linux/syscalls.h>
			
 
				 #include <linux/cgroup.h>
			
 
				+#include <linux/perf_event.h>
			
 
				 
			
 
				 static struct kmem_cache *nsproxy_cachep;
			
 
				 
			
@@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
 
				 		goto out;
			
 
				 	}
			
 
				 	switch_task_namespaces(tsk, new_nsproxy);
			
 
				+
			
 
				+	perf_event_namespaces(tsk);
			
 
				 out:
			
 
				 	fput(file);
			
 
				 	return err;
			
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -455,7 +455,7 @@ config UPROBE_EVENTS
 
				 	select UPROBES
			
 
				 	select PROBE_EVENTS
			
 
				 	select TRACING
			
 
				-	default n
			
 
				+	default y
			
 
				 	help
			
 
				 	  This allows the user to add tracing events on top of userspace
			
 
				 	  dynamic events (similar to tracepoints) on the fly via the trace
			
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4355,6 +4355,7 @@ static const char readme_msg[] =
 
				 	"\t           -:[<group>/]<event>\n"
			
 
				 #ifdef CONFIG_KPROBE_EVENTS
			
 
				 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
			
 
				+  "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
			
 
				 #endif
			
 
				 #ifdef CONFIG_UPROBE_EVENTS
			
 
				 	"\t    place: <path>:<offset>\n"
			
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -681,10 +681,6 @@ static int create_trace_kprobe(int argc, char **argv)
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 	if (isdigit(argv[1][0])) {
			
 
				-		if (is_return) {
			
 
				-			pr_info("Return probe point must be a symbol.\n");
			
 
				-			return -EINVAL;
			
 
				-		}
			
 
				 		/* an address specified */
			
 
				 		ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
			
 
				 		if (ret) {
			
@@ -700,8 +696,9 @@ static int create_trace_kprobe(int argc, char **argv)
 
				 			pr_info("Failed to parse symbol.\n");
			
 
				 			return ret;
			
 
				 		}
			
 
				-		if (offset && is_return) {
			
 
				-			pr_info("Return probe must be used without offset.\n");
			
 
				+		if (offset && is_return &&
			
 
				+		    !function_offset_within_entry(NULL, symbol, offset)) {
			
 
				+			pr_info("Given offset is not valid for return probe.\n");
			
 
				 			return -EINVAL;
			
 
				 		}
			
 
				 	}
			
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -181,10 +181,23 @@ struct kvm_arch_memory_slot {
 
				 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
			
 
				 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
			
 
				 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
			
 
				+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
			
 
				+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
			
 
				+			(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
			
 
				 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
			
 
				 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
			
 
				+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
			
 
				 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
			
 
				 #define KVM_DEV_ARM_VGIC_GRP_CTRL       4
			
 
				+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
			
 
				+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
			
 
				+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
			
 
				+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
			
 
				+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
			
 
				+			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
			
 
				+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
			
 
				+#define VGIC_LEVEL_INFO_LINE_LEVEL	0
			
 
				+
			
 
				 #define   KVM_DEV_ARM_VGIC_CTRL_INIT    0
			
 
				 
			
 
				 /* KVM_IRQ_LINE irq field index values */
			
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -201,10 +201,23 @@ struct kvm_arch_memory_slot {
 
				 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
			
 
				 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
			
 
				 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
			
 
				+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
			
 
				+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
			
 
				+			(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
			
 
				 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
			
 
				 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
			
 
				+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
			
 
				 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
			
 
				 #define KVM_DEV_ARM_VGIC_GRP_CTRL	4
			
 
				+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
			
 
				+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
			
 
				+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
			
 
				+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
			
 
				+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
			
 
				+			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
			
 
				+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK	0x3ff
			
 
				+#define VGIC_LEVEL_INFO_LINE_LEVEL	0
			
 
				+
			
 
				 #define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
			
 
				 
			
 
				 /* Device Control API on vcpu fd */
			
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -413,6 +413,26 @@ struct kvm_get_htab_header {
 
				 	__u16	n_invalid;
			
 
				 };
			
 
				 
			
 
				+/* For KVM_PPC_CONFIGURE_V3_MMU */
			
 
				+struct kvm_ppc_mmuv3_cfg {
			
 
				+	__u64	flags;
			
 
				+	__u64	process_table;	/* second doubleword of partition table entry */
			
 
				+};
			
 
				+
			
 
				+/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
			
 
				+#define KVM_PPC_MMUV3_RADIX	1	/* 1 = radix mode, 0 = HPT */
			
 
				+#define KVM_PPC_MMUV3_GTSE	2	/* global translation shootdown enb. */
			
 
				+
			
 
				+/* For KVM_PPC_GET_RMMU_INFO */
			
 
				+struct kvm_ppc_rmmu_info {
			
 
				+	struct kvm_ppc_radix_geom {
			
 
				+		__u8	page_shift;
			
 
				+		__u8	level_bits[4];
			
 
				+		__u8	pad[3];
			
 
				+	}	geometries[8];
			
 
				+	__u32	ap_encodings[8];
			
 
				+};
			
 
				+
			
 
				 /* Per-vcpu XICS interrupt controller state */
			
 
				 #define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
			
 
				 
			
@@ -613,5 +633,7 @@ struct kvm_get_htab_header {
 
				 #define  KVM_XICS_LEVEL_SENSITIVE	(1ULL << 40)
			
 
				 #define  KVM_XICS_MASKED		(1ULL << 41)
			
 
				 #define  KVM_XICS_PENDING		(1ULL << 42)
			
 
				+#define  KVM_XICS_PRESENTED		(1ULL << 43)
			
 
				+#define  KVM_XICS_QUEUED		(1ULL << 44)
			
 
				 
			
 
				 #endif /* __LINUX_KVM_POWERPC_H */
			
--- a/tools/arch/x86/include/asm/atomic.h
+++ b/tools/arch/x86/include/asm/atomic.h
@@ -7,6 +7,8 @@
 
				 
			
 
				 #define LOCK_PREFIX "\n\tlock; "
			
 
				 
			
 
				+#include <asm/cmpxchg.h>
			
 
				+
			
 
				 /*
			
 
				  * Atomic operations that C can't guarantee us.  Useful for
			
 
				  * resource counting etc..
			
@@ -62,4 +64,9 @@ static inline int atomic_dec_and_test(atomic_t *v)
 
				 	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
			
 
				 }
			
 
				 
			
 
				+static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
			
 
				+{
			
 
				+	return cmpxchg(&v->counter, old, new);
			
 
				+}
			
 
				+
			
 
				 #endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
			
--- a/tools/arch/x86/include/asm/cmpxchg.h
+++ b/tools/arch/x86/include/asm/cmpxchg.h
@@ -0,0 +1,89 @@
 
				+#ifndef TOOLS_ASM_X86_CMPXCHG_H
			
 
				+#define TOOLS_ASM_X86_CMPXCHG_H
			
 
				+
			
 
				+#include <linux/compiler.h>
			
 
				+
			
 
				+/*
			
 
				+ * Non-existant functions to indicate usage errors at link time
			
 
				+ * (or compile-time if the compiler implements __compiletime_error().
			
 
				+ */
			
 
				+extern void __cmpxchg_wrong_size(void)
			
 
				+	__compiletime_error("Bad argument size for cmpxchg");
			
 
				+
			
 
				+/*
			
 
				+ * Constants for operation sizes. On 32-bit, the 64-bit size it set to
			
 
				+ * -1 because sizeof will never return -1, thereby making those switch
			
 
				+ * case statements guaranteeed dead code which the compiler will
			
 
				+ * eliminate, and allowing the "missing symbol in the default case" to
			
 
				+ * indicate a usage error.
			
 
				+ */
			
 
				+#define __X86_CASE_B	1
			
 
				+#define __X86_CASE_W	2
			
 
				+#define __X86_CASE_L	4
			
 
				+#ifdef __x86_64__
			
 
				+#define __X86_CASE_Q	8
			
 
				+#else
			
 
				+#define	__X86_CASE_Q	-1		/* sizeof will never return -1 */
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
			
 
				+ * store NEW in MEM.  Return the initial value in MEM.  Success is
			
 
				+ * indicated by comparing RETURN with OLD.
			
 
				+ */
			
 
				+#define __raw_cmpxchg(ptr, old, new, size, lock)			\
			
 
				+({									\
			
 
				+	__typeof__(*(ptr)) __ret;					\
			
 
				+	__typeof__(*(ptr)) __old = (old);				\
			
 
				+	__typeof__(*(ptr)) __new = (new);				\
			
 
				+	switch (size) {							\
			
 
				+	case __X86_CASE_B:						\
			
 
				+	{								\
			
 
				+		volatile u8 *__ptr = (volatile u8 *)(ptr);		\
			
 
				+		asm volatile(lock "cmpxchgb %2,%1"			\
			
 
				+			     : "=a" (__ret), "+m" (*__ptr)		\
			
 
				+			     : "q" (__new), "0" (__old)			\
			
 
				+			     : "memory");				\
			
 
				+		break;							\
			
 
				+	}								\
			
 
				+	case __X86_CASE_W:						\
			
 
				+	{								\
			
 
				+		volatile u16 *__ptr = (volatile u16 *)(ptr);		\
			
 
				+		asm volatile(lock "cmpxchgw %2,%1"			\
			
 
				+			     : "=a" (__ret), "+m" (*__ptr)		\
			
 
				+			     : "r" (__new), "0" (__old)			\
			
 
				+			     : "memory");				\
			
 
				+		break;							\
			
 
				+	}								\
			
 
				+	case __X86_CASE_L:						\
			
 
				+	{								\
			
 
				+		volatile u32 *__ptr = (volatile u32 *)(ptr);		\
			
 
				+		asm volatile(lock "cmpxchgl %2,%1"			\
			
 
				+			     : "=a" (__ret), "+m" (*__ptr)		\
			
 
				+			     : "r" (__new), "0" (__old)			\
			
 
				+			     : "memory");				\
			
 
				+		break;							\
			
 
				+	}								\
			
 
				+	case __X86_CASE_Q:						\
			
 
				+	{								\
			
 
				+		volatile u64 *__ptr = (volatile u64 *)(ptr);		\
			
 
				+		asm volatile(lock "cmpxchgq %2,%1"			\
			
 
				+			     : "=a" (__ret), "+m" (*__ptr)		\
			
 
				+			     : "r" (__new), "0" (__old)			\
			
 
				+			     : "memory");				\
			
 
				+		break;							\
			
 
				+	}								\
			
 
				+	default:							\
			
 
				+		__cmpxchg_wrong_size();					\
			
 
				+	}								\
			
 
				+	__ret;								\
			
 
				+})
			
 
				+
			
 
				+#define __cmpxchg(ptr, old, new, size)					\
			
 
				+	__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
			
 
				+
			
 
				+#define cmpxchg(ptr, old, new)						\
			
 
				+	__cmpxchg(ptr, old, new, sizeof(*(ptr)))
			
 
				+
			
 
				+
			
 
				+#endif	/* TOOLS_ASM_X86_CMPXCHG_H */
			
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -100,7 +100,7 @@
 
				 #define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
			
 
				 #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
			
 
				 #define X86_FEATURE_NONSTOP_TSC	( 3*32+24) /* TSC does not stop in C states */
			
 
				-/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
			
 
				+#define X86_FEATURE_CPUID	( 3*32+25) /* CPU has CPUID instruction itself */
			
 
				 #define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
			
 
				 #define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
			
 
				 #define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
			
@@ -186,7 +186,8 @@
 
				  *
			
 
				  * Reuse free bits when adding new feature flags!
			
 
				  */
			
 
				-
			
 
				+#define X86_FEATURE_RING3MWAIT	( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
			
 
				+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
			
 
				 #define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
			
 
				 #define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
			
 
				 #define X86_FEATURE_CAT_L3	( 7*32+ 4) /* Cache Allocation Technology L3 */
			
@@ -289,7 +290,8 @@
 
				 #define X86_FEATURE_PKU		(16*32+ 3) /* Protection Keys for Userspace */
			
 
				 #define X86_FEATURE_OSPKE	(16*32+ 4) /* OS Protection Keys Enable */
			
 
				 #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
			
 
				-#define X86_FEATURE_RDPID	(16*32+ 22) /* RDPID instruction */
			
 
				+#define X86_FEATURE_LA57	(16*32+16) /* 5-level page tables */
			
 
				+#define X86_FEATURE_RDPID	(16*32+22) /* RDPID instruction */
			
 
				 
			
 
				 /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
			
 
				 #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
			
@@ -321,5 +323,4 @@
 
				 #define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
			
 
				 #define X86_BUG_MONITOR		X86_BUG(12) /* IPI required to wake up remote CPU */
			
 
				 #define X86_BUG_AMD_E400	X86_BUG(13) /* CPU is among the affected by Erratum 400 */
			
 
				-
			
 
				 #endif /* _ASM_X86_CPUFEATURES_H */
			
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -286,7 +286,7 @@ ENDPROC(memcpy_mcsafe_unrolled)
 
				 	_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
			
 
				 	_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
			
 
				 	_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
			
 
				-	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
			
 
				+	_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
			
 
				 	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
			
 
				 	_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
			
 
				 	_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
			
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -63,6 +63,7 @@ FEATURE_TESTS_BASIC :=                  \
 
				         lzma                            \
			
 
				         get_cpuid                       \
			
 
				         bpf                             \
			
 
				+        sched_getcpu			\
			
 
				         sdt
			
 
				 
			
 
				 # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
			
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -48,21 +48,22 @@ FILES=                                          \
 
				          test-get_cpuid.bin                     \
			
 
				          test-sdt.bin                           \
			
 
				          test-cxx.bin                           \
			
 
				-         test-jvmti.bin
			
 
				+         test-jvmti.bin				\
			
 
				+         test-sched_getcpu.bin
			
 
				 
			
 
				 FILES := $(addprefix $(OUTPUT),$(FILES))
			
 
				 
			
 
				-CC := $(CROSS_COMPILE)gcc -MD
			
 
				-CXX := $(CROSS_COMPILE)g++ -MD
			
 
				-PKG_CONFIG := $(CROSS_COMPILE)pkg-config
			
 
				+CC ?= $(CROSS_COMPILE)gcc
			
 
				+CXX ?= $(CROSS_COMPILE)g++
			
 
				+PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
			
 
				 LLVM_CONFIG ?= llvm-config
			
 
				 
			
 
				 all: $(FILES)
			
 
				 
			
 
				-__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
			
 
				+__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
			
 
				   BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
			
 
				 
			
 
				-__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
			
 
				+__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
			
 
				   BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
			
 
				 
			
 
				 ###############################
			
@@ -91,6 +92,9 @@ $(OUTPUT)test-libelf.bin:
 
				 $(OUTPUT)test-glibc.bin:
			
 
				 	$(BUILD)
			
 
				 
			
 
				+$(OUTPUT)test-sched_getcpu.bin:
			
 
				+	$(BUILD)
			
 
				+
			
 
				 DWARFLIBS := -ldw
			
 
				 ifeq ($(findstring -static,${LDFLAGS}),-static)
			
 
				 DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
			
@@ -171,7 +175,7 @@ $(OUTPUT)test-libperl.bin:
 
				 	$(BUILD) $(FLAGS_PERL_EMBED)
			
 
				 
			
 
				 $(OUTPUT)test-libpython.bin:
			
 
				-	$(BUILD)
			
 
				+	$(BUILD) $(FLAGS_PYTHON_EMBED)
			
 
				 
			
 
				 $(OUTPUT)test-libpython-version.bin:
			
 
				 	$(BUILD)
			
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -117,6 +117,10 @@
 
				 # include "test-pthread-attr-setaffinity-np.c"
			
 
				 #undef main
			
 
				 
			
 
				+#define main main_test_sched_getcpu
			
 
				+# include "test-sched_getcpu.c"
			
 
				+#undef main
			
 
				+
			
 
				 # if 0
			
 
				 /*
			
 
				  * Disable libbabeltrace check for test-all, because the requested
			
@@ -182,6 +186,7 @@ int main(int argc, char *argv[])
 
				 	main_test_get_cpuid();
			
 
				 	main_test_bpf();
			
 
				 	main_test_libcrypto();
			
 
				+	main_test_sched_getcpu();
			
 
				 	main_test_sdt();
			
 
				 
			
 
				 	return 0;
			
--- a/tools/build/feature/test-sched_getcpu.c
+++ b/tools/build/feature/test-sched_getcpu.c
@@ -0,0 +1,7 @@
 
				+#define _GNU_SOURCE
			
 
				+#include <sched.h>
			
 
				+
			
 
				+int main(void)
			
 
				+{
			
 
				+	return sched_getcpu();
			
 
				+}
			
--- a/tools/include/asm-generic/atomic-gcc.h
+++ b/tools/include/asm-generic/atomic-gcc.h
@@ -60,4 +60,12 @@ static inline int atomic_dec_and_test(atomic_t *v)
 
				 	return __sync_sub_and_fetch(&v->counter, 1) == 0;
			
 
				 }
			
 
				 
			
 
				+#define cmpxchg(ptr, oldval, newval) \
			
 
				+	__sync_val_compare_and_swap(ptr, oldval, newval)
			
 
				+
			
 
				+static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval)
			
 
				+{
			
 
				+	return cmpxchg(&(v)->counter, oldval, newval);
			
 
				+}
			
 
				+
			
 
				 #endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */
			
--- a/tools/include/linux/atomic.h
+++ b/tools/include/linux/atomic.h
@@ -3,4 +3,10 @@
 
				 
			
 
				 #include <asm/atomic.h>
			
 
				 
			
 
				+/* atomic_cmpxchg_relaxed */
			
 
				+#ifndef atomic_cmpxchg_relaxed
			
 
				+#define  atomic_cmpxchg_relaxed		atomic_cmpxchg
			
 
				+#define  atomic_cmpxchg_release         atomic_cmpxchg
			
 
				+#endif /* atomic_cmpxchg_relaxed */
			
 
				+
			
 
				 #endif /* __TOOLS_LINUX_ATOMIC_H */
			
--- a/tools/include/linux/bug.h
+++ b/tools/include/linux/bug.h
@@ -0,0 +1,10 @@
 
				+#ifndef _TOOLS_PERF_LINUX_BUG_H
			
 
				+#define _TOOLS_PERF_LINUX_BUG_H
			
 
				+
			
 
				+/* Force a compilation error if condition is true, but also produce a
			
 
				+   result (of value 0 and type size_t), so the expression can be used
			
 
				+   e.g. in a structure initializer (or where-ever else comma expressions
			
 
				+   aren't permitted). */
			
 
				+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
			
 
				+
			
 
				+#endif	/* _TOOLS_PERF_LINUX_BUG_H */
			
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -12,3 +12,10 @@
 
				 #if GCC_VERSION >= 70000 && !defined(__CHECKER__)
			
 
				 # define __fallthrough __attribute__ ((fallthrough))
			
 
				 #endif
			
 
				+
			
 
				+#if GCC_VERSION >= 40300
			
 
				+# define __compiletime_error(message) __attribute__((error(message)))
			
 
				+#endif /* GCC_VERSION >= 40300 */
			
 
				+
			
 
				+/* &a[0] degrades to a pointer: a different type from an array */
			
 
				+#define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
			
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -5,6 +5,10 @@
 
				 #include <linux/compiler-gcc.h>
			
 
				 #endif
			
 
				 
			
 
				+#ifndef __compiletime_error
			
 
				+# define __compiletime_error(message)
			
 
				+#endif
			
 
				+
			
 
				 /* Optimization barrier */
			
 
				 /* The "volatile" is due to gcc bugs */
			
 
				 #define barrier() __asm__ __volatile__("": : :"memory")
			
@@ -13,6 +17,11 @@
 
				 # define __always_inline	inline __attribute__((always_inline))
			
 
				 #endif
			
 
				 
			
 
				+/* Are two types/vars the same type (ignoring qualifiers)? */
			
 
				+#ifndef __same_type
			
 
				+# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
			
 
				+#endif
			
 
				+
			
 
				 #ifdef __ANDROID__
			
 
				 /*
			
 
				  * FIXME: Big hammer to get rid of tons of:
			
--- a/tools/include/linux/hashtable.h
+++ b/tools/include/linux/hashtable.h
@@ -13,10 +13,6 @@
 
				 #include <linux/hash.h>
			
 
				 #include <linux/log2.h>
			
 
				 
			
 
				-#ifndef ARRAY_SIZE
			
 
				-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
			
 
				-#endif
			
 
				-
			
 
				 #define DEFINE_HASHTABLE(name, bits)						\
			
 
				 	struct hlist_head name[1 << (bits)] =					\
			
 
				 			{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
			
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -4,6 +4,11 @@
 
				 #include <stdarg.h>
			
 
				 #include <stddef.h>
			
 
				 #include <assert.h>
			
 
				+#include <linux/compiler.h>
			
 
				+
			
 
				+#ifndef UINT_MAX
			
 
				+#define UINT_MAX	(~0U)
			
 
				+#endif
			
 
				 
			
 
				 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
			
 
				 
			
@@ -72,6 +77,8 @@
 
				 int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
			
 
				 int scnprintf(char * buf, size_t size, const char * fmt, ...);
			
 
				 
			
 
				+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
			
 
				+
			
 
				 /*
			
 
				  * This looks more complex than it should be. But we need to
			
 
				  * get the type for the ~ right in round_down (it needs to be
			
--- a/tools/include/linux/log2.h
+++ b/tools/include/linux/log2.h
@@ -12,6 +12,9 @@
 
				 #ifndef _TOOLS_LINUX_LOG2_H
			
 
				 #define _TOOLS_LINUX_LOG2_H
			
 
				 
			
 
				+#include <linux/bitops.h>
			
 
				+#include <linux/types.h>
			
 
				+
			
 
				 /*
			
 
				  * non-constant log of base 2 calculators
			
 
				  * - the arch may override these in asm/bitops.h if they can be implemented
			
--- a/tools/include/linux/refcount.h
+++ b/tools/include/linux/refcount.h
@@ -0,0 +1,151 @@
 
				+#ifndef _TOOLS_LINUX_REFCOUNT_H
			
 
				+#define _TOOLS_LINUX_REFCOUNT_H
			
 
				+
			
 
				+/*
			
 
				+ * Variant of atomic_t specialized for reference counts.
			
 
				+ *
			
 
				+ * The interface matches the atomic_t interface (to aid in porting) but only
			
 
				+ * provides the few functions one should use for reference counting.
			
 
				+ *
			
 
				+ * It differs in that the counter saturates at UINT_MAX and will not move once
			
 
				+ * there. This avoids wrapping the counter and causing 'spurious'
			
 
				+ * use-after-free issues.
			
 
				+ *
			
 
				+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
			
 
				+ * and provide only what is strictly required for refcounts.
			
 
				+ *
			
 
				+ * The increments are fully relaxed; these will not provide ordering. The
			
 
				+ * rationale is that whatever is used to obtain the object we're increasing the
			
 
				+ * reference count on will provide the ordering. For locked data structures,
			
 
				+ * its the lock acquire, for RCU/lockless data structures its the dependent
			
 
				+ * load.
			
 
				+ *
			
 
				+ * Do note that inc_not_zero() provides a control dependency which will order
			
 
				+ * future stores against the inc, this ensures we'll never modify the object
			
 
				+ * if we did not in fact acquire a reference.
			
 
				+ *
			
 
				+ * The decrements will provide release order, such that all the prior loads and
			
 
				+ * stores will be issued before, it also provides a control dependency, which
			
 
				+ * will order us against the subsequent free().
			
 
				+ *
			
 
				+ * The control dependency is against the load of the cmpxchg (ll/sc) that
			
 
				+ * succeeded. This means the stores aren't fully ordered, but this is fine
			
 
				+ * because the 1->0 transition indicates no concurrency.
			
 
				+ *
			
 
				+ * Note that the allocator is responsible for ordering things between free()
			
 
				+ * and alloc().
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include <linux/atomic.h>
			
 
				+#include <linux/kernel.h>
			
 
				+
			
 
				+#ifdef NDEBUG
			
 
				+#define REFCOUNT_WARN(cond, str) (void)(cond)
			
 
				+#define __refcount_check
			
 
				+#else
			
 
				+#define REFCOUNT_WARN(cond, str) BUG_ON(cond)
			
 
				+#define __refcount_check	__must_check
			
 
				+#endif
			
 
				+
			
 
				+typedef struct refcount_struct {
			
 
				+	atomic_t refs;
			
 
				+} refcount_t;
			
 
				+
			
 
				+#define REFCOUNT_INIT(n)	{ .refs = ATOMIC_INIT(n), }
			
 
				+
			
 
				+static inline void refcount_set(refcount_t *r, unsigned int n)
			
 
				+{
			
 
				+	atomic_set(&r->refs, n);
			
 
				+}
			
 
				+
			
 
				+static inline unsigned int refcount_read(const refcount_t *r)
			
 
				+{
			
 
				+	return atomic_read(&r->refs);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
			
 
				+ *
			
 
				+ * Provides no memory ordering, it is assumed the caller has guaranteed the
			
 
				+ * object memory to be stable (RCU, etc.). It does provide a control dependency
			
 
				+ * and thereby orders future stores. See the comment on top.
			
 
				+ */
			
 
				+static inline __refcount_check
			
 
				+bool refcount_inc_not_zero(refcount_t *r)
			
 
				+{
			
 
				+	unsigned int old, new, val = atomic_read(&r->refs);
			
 
				+
			
 
				+	for (;;) {
			
 
				+		new = val + 1;
			
 
				+
			
 
				+		if (!val)
			
 
				+			return false;
			
 
				+
			
 
				+		if (unlikely(!new))
			
 
				+			return true;
			
 
				+
			
 
				+		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
			
 
				+		if (old == val)
			
 
				+			break;
			
 
				+
			
 
				+		val = old;
			
 
				+	}
			
 
				+
			
 
				+	REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
			
 
				+ *
			
 
				+ * Provides no memory ordering, it is assumed the caller already has a
			
 
				+ * reference on the object, will WARN when this is not so.
			
 
				+ */
			
 
				+static inline void refcount_inc(refcount_t *r)
			
 
				+{
			
 
				+	REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
			
 
				+ * decrement when saturated at UINT_MAX.
			
 
				+ *
			
 
				+ * Provides release memory ordering, such that prior loads and stores are done
			
 
				+ * before, and provides a control dependency such that free() must come after.
			
 
				+ * See the comment on top.
			
 
				+ */
			
 
				+static inline __refcount_check
			
 
				+bool refcount_sub_and_test(unsigned int i, refcount_t *r)
			
 
				+{
			
 
				+	unsigned int old, new, val = atomic_read(&r->refs);
			
 
				+
			
 
				+	for (;;) {
			
 
				+		if (unlikely(val == UINT_MAX))
			
 
				+			return false;
			
 
				+
			
 
				+		new = val - i;
			
 
				+		if (new > val) {
			
 
				+			REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		old = atomic_cmpxchg_release(&r->refs, val, new);
			
 
				+		if (old == val)
			
 
				+			break;
			
 
				+
			
 
				+		val = old;
			
 
				+	}
			
 
				+
			
 
				+	return !new;
			
 
				+}
			
 
				+
			
 
				+static inline __refcount_check
			
 
				+bool refcount_dec_and_test(refcount_t *r)
			
 
				+{
			
 
				+	return refcount_sub_and_test(1, r);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif /* _ATOMIC_LINUX_REFCOUNT_H */
			
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -18,4 +18,6 @@ extern size_t strlcpy(char *dest, const char *src, size_t size);
 
				 
			
 
				 char *str_error_r(int errnum, char *buf, size_t buflen);
			
 
				 
			
 
				+int prefixcmp(const char *str, const char *prefix);
			
 
				+
			
 
				 #endif /* _LINUX_STRING_H_ */
			
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -7,6 +7,7 @@
 
				 
			
 
				 #define __SANE_USERSPACE_TYPES__	/* For PPC64, to get LL64 types */
			
 
				 #include <asm/types.h>
			
 
				+#include <asm/posix_types.h>
			
 
				 
			
 
				 struct page;
			
 
				 struct kmem_cache;
			
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -0,0 +1,72 @@
 
				+#ifndef _UAPI_LINUX_FCNTL_H
			
 
				+#define _UAPI_LINUX_FCNTL_H
			
 
				+
			
 
				+#include <asm/fcntl.h>
			
 
				+
			
 
				+#define F_SETLEASE	(F_LINUX_SPECIFIC_BASE + 0)
			
 
				+#define F_GETLEASE	(F_LINUX_SPECIFIC_BASE + 1)
			
 
				+
			
 
				+/*
			
 
				+ * Cancel a blocking posix lock; internal use only until we expose an
			
 
				+ * asynchronous lock api to userspace:
			
 
				+ */
			
 
				+#define F_CANCELLK	(F_LINUX_SPECIFIC_BASE + 5)
			
 
				+
			
 
				+/* Create a file descriptor with FD_CLOEXEC set. */
			
 
				+#define F_DUPFD_CLOEXEC	(F_LINUX_SPECIFIC_BASE + 6)
			
 
				+
			
 
				+/*
			
 
				+ * Request nofications on a directory.
			
 
				+ * See below for events that may be notified.
			
 
				+ */
			
 
				+#define F_NOTIFY	(F_LINUX_SPECIFIC_BASE+2)
			
 
				+
			
 
				+/*
			
 
				+ * Set and get of pipe page size array
			
 
				+ */
			
 
				+#define F_SETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 7)
			
 
				+#define F_GETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 8)
			
 
				+
			
 
				+/*
			
 
				+ * Set/Get seals
			
 
				+ */
			
 
				+#define F_ADD_SEALS	(F_LINUX_SPECIFIC_BASE + 9)
			
 
				+#define F_GET_SEALS	(F_LINUX_SPECIFIC_BASE + 10)
			
 
				+
			
 
				+/*
			
 
				+ * Types of seals
			
 
				+ */
			
 
				+#define F_SEAL_SEAL	0x0001	/* prevent further seals from being set */
			
 
				+#define F_SEAL_SHRINK	0x0002	/* prevent file from shrinking */
			
 
				+#define F_SEAL_GROW	0x0004	/* prevent file from growing */
			
 
				+#define F_SEAL_WRITE	0x0008	/* prevent writes */
			
 
				+/* (1U << 31) is reserved for signed error codes */
			
 
				+
			
 
				+/*
			
 
				+ * Types of directory notifications that may be requested.
			
 
				+ */
			
 
				+#define DN_ACCESS	0x00000001	/* File accessed */
			
 
				+#define DN_MODIFY	0x00000002	/* File modified */
			
 
				+#define DN_CREATE	0x00000004	/* File created */
			
 
				+#define DN_DELETE	0x00000008	/* File removed */
			
 
				+#define DN_RENAME	0x00000010	/* File renamed */
			
 
				+#define DN_ATTRIB	0x00000020	/* File changed attibutes */
			
 
				+#define DN_MULTISHOT	0x80000000	/* Don't remove notifier */
			
 
				+
			
 
				+#define AT_FDCWD		-100    /* Special value used to indicate
			
 
				+                                           openat should use the current
			
 
				+                                           working directory. */
			
 
				+#define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
			
 
				+#define AT_REMOVEDIR		0x200   /* Remove directory instead of
			
 
				+                                           unlinking file.  */
			
 
				+#define AT_SYMLINK_FOLLOW	0x400   /* Follow symbolic links.  */
			
 
				+#define AT_NO_AUTOMOUNT		0x800	/* Suppress terminal automount traversal */
			
 
				+#define AT_EMPTY_PATH		0x1000	/* Allow empty relative pathname */
			
 
				+
			
 
				+#define AT_STATX_SYNC_TYPE	0x6000	/* Type of synchronisation required from statx() */
			
 
				+#define AT_STATX_SYNC_AS_STAT	0x0000	/* - Do whatever stat() does */
			
 
				+#define AT_STATX_FORCE_SYNC	0x2000	/* - Force the attributes to be sync'd with the server */
			
 
				+#define AT_STATX_DONT_SYNC	0x4000	/* - Don't sync attributes with the server */
			
 
				+
			
 
				+
			
 
				+#endif /* _UAPI_LINUX_FCNTL_H */
			
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
 
				 				use_clockid    :  1, /* use @clockid for time fields */
			
 
				 				context_switch :  1, /* context switch data */
			
 
				 				write_backward :  1, /* Write ring buffer from end to beginning */
			
 
				-				__reserved_1   : 36;
			
 
				+				namespaces     :  1, /* include namespaces data */
			
 
				+				__reserved_1   : 35;
			
 
				 
			
 
				 	union {
			
 
				 		__u32		wakeup_events;	  /* wakeup every n events */
			
@@ -610,6 +611,23 @@ struct perf_event_header {
 
				 	__u16	size;
			
 
				 };
			
 
				 
			
 
				+struct perf_ns_link_info {
			
 
				+	__u64	dev;
			
 
				+	__u64	ino;
			
 
				+};
			
 
				+
			
 
				+enum {
			
 
				+	NET_NS_INDEX		= 0,
			
 
				+	UTS_NS_INDEX		= 1,
			
 
				+	IPC_NS_INDEX		= 2,
			
 
				+	PID_NS_INDEX		= 3,
			
 
				+	USER_NS_INDEX		= 4,
			
 
				+	MNT_NS_INDEX		= 5,
			
 
				+	CGROUP_NS_INDEX		= 6,
			
 
				+
			
 
				+	NR_NAMESPACES,		/* number of available namespaces */
			
 
				+};
			
 
				+
			
 
				 enum perf_event_type {
			
 
				 
			
 
				 	/*
			
@@ -862,6 +880,18 @@ enum perf_event_type {
 
				 	 */
			
 
				 	PERF_RECORD_SWITCH_CPU_WIDE		= 15,
			
 
				 
			
 
				+	/*
			
 
				+	 * struct {
			
 
				+	 *	struct perf_event_header	header;
			
 
				+	 *	u32				pid;
			
 
				+	 *	u32				tid;
			
 
				+	 *	u64				nr_namespaces;
			
 
				+	 *	{ u64				dev, inode; } [nr_namespaces];
			
 
				+	 *	struct sample_id		sample_id;
			
 
				+	 * };
			
 
				+	 */
			
 
				+	PERF_RECORD_NAMESPACES			= 16,
			
 
				+
			
 
				 	PERF_RECORD_MAX,			/* non-ABI */
			
 
				 };
			
 
				 
			
@@ -885,6 +915,7 @@ enum perf_callchain_context {
 
				  */
			
 
				 #define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
			
 
				 #define PERF_AUX_FLAG_OVERWRITE		0x02	/* snapshot from overwrite mode */
			
 
				+#define PERF_AUX_FLAG_PARTIAL		0x04	/* record contains gaps */
			
 
				 
			
 
				 #define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
			
 
				 #define PERF_FLAG_FD_OUTPUT		(1UL << 1)
			
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -0,0 +1,177 @@
 
				+#ifndef _UAPI_LINUX_STAT_H
			
 
				+#define _UAPI_LINUX_STAT_H
			
 
				+
			
 
				+#include <linux/types.h>
			
 
				+
			
 
				+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
			
 
				+
			
 
				+#define S_IFMT  00170000
			
 
				+#define S_IFSOCK 0140000
			
 
				+#define S_IFLNK	 0120000
			
 
				+#define S_IFREG  0100000
			
 
				+#define S_IFBLK  0060000
			
 
				+#define S_IFDIR  0040000
			
 
				+#define S_IFCHR  0020000
			
 
				+#define S_IFIFO  0010000
			
 
				+#define S_ISUID  0004000
			
 
				+#define S_ISGID  0002000
			
 
				+#define S_ISVTX  0001000
			
 
				+
			
 
				+#define S_ISLNK(m)	(((m) & S_IFMT) == S_IFLNK)
			
 
				+#define S_ISREG(m)	(((m) & S_IFMT) == S_IFREG)
			
 
				+#define S_ISDIR(m)	(((m) & S_IFMT) == S_IFDIR)
			
 
				+#define S_ISCHR(m)	(((m) & S_IFMT) == S_IFCHR)
			
 
				+#define S_ISBLK(m)	(((m) & S_IFMT) == S_IFBLK)
			
 
				+#define S_ISFIFO(m)	(((m) & S_IFMT) == S_IFIFO)
			
 
				+#define S_ISSOCK(m)	(((m) & S_IFMT) == S_IFSOCK)
			
 
				+
			
 
				+#define S_IRWXU 00700
			
 
				+#define S_IRUSR 00400
			
 
				+#define S_IWUSR 00200
			
 
				+#define S_IXUSR 00100
			
 
				+
			
 
				+#define S_IRWXG 00070
			
 
				+#define S_IRGRP 00040
			
 
				+#define S_IWGRP 00020
			
 
				+#define S_IXGRP 00010
			
 
				+
			
 
				+#define S_IRWXO 00007
			
 
				+#define S_IROTH 00004
			
 
				+#define S_IWOTH 00002
			
 
				+#define S_IXOTH 00001
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Timestamp structure for the timestamps in struct statx.
			
 
				+ *
			
 
				+ * tv_sec holds the number of seconds before (negative) or after (positive)
			
 
				+ * 00:00:00 1st January 1970 UTC.
			
 
				+ *
			
 
				+ * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
			
 
				+ * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
			
 
				+ *
			
 
				+ * Note that if both tv_sec and tv_nsec are non-zero, then the two values must
			
 
				+ * either be both positive or both negative.
			
 
				+ *
			
 
				+ * __reserved is held in case we need a yet finer resolution.
			
 
				+ */
			
 
				+struct statx_timestamp {
			
 
				+	__s64	tv_sec;
			
 
				+	__s32	tv_nsec;
			
 
				+	__s32	__reserved;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Structures for the extended file attribute retrieval system call
			
 
				+ * (statx()).
			
 
				+ *
			
 
				+ * The caller passes a mask of what they're specifically interested in as a
			
 
				+ * parameter to statx().  What statx() actually got will be indicated in
			
 
				+ * st_mask upon return.
			
 
				+ *
			
 
				+ * For each bit in the mask argument:
			
 
				+ *
			
 
				+ * - if the datum is not supported:
			
 
				+ *
			
 
				+ *   - the bit will be cleared, and
			
 
				+ *
			
 
				+ *   - the datum will be set to an appropriate fabricated value if one is
			
 
				+ *     available (eg. CIFS can take a default uid and gid), otherwise
			
 
				+ *
			
 
				+ *   - the field will be cleared;
			
 
				+ *
			
 
				+ * - otherwise, if explicitly requested:
			
 
				+ *
			
 
				+ *   - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
			
 
				+ *     set or if the datum is considered out of date, and
			
 
				+ *
			
 
				+ *   - the field will be filled in and the bit will be set;
			
 
				+ *
			
 
				+ * - otherwise, if not requested, but available in approximate form without any
			
 
				+ *   effort, it will be filled in anyway, and the bit will be set upon return
			
 
				+ *   (it might not be up to date, however, and no attempt will be made to
			
 
				+ *   synchronise the internal state first);
			
 
				+ *
			
 
				+ * - otherwise the field and the bit will be cleared before returning.
			
 
				+ *
			
 
				+ * Items in STATX_BASIC_STATS may be marked unavailable on return, but they
			
 
				+ * will have values installed for compatibility purposes so that stat() and
			
 
				+ * co. can be emulated in userspace.
			
 
				+ */
			
 
				+struct statx {
			
 
				+	/* 0x00 */
			
 
				+	__u32	stx_mask;	/* What results were written [uncond] */
			
 
				+	__u32	stx_blksize;	/* Preferred general I/O size [uncond] */
			
 
				+	__u64	stx_attributes;	/* Flags conveying information about the file [uncond] */
			
 
				+	/* 0x10 */
			
 
				+	__u32	stx_nlink;	/* Number of hard links */
			
 
				+	__u32	stx_uid;	/* User ID of owner */
			
 
				+	__u32	stx_gid;	/* Group ID of owner */
			
 
				+	__u16	stx_mode;	/* File mode */
			
 
				+	__u16	__spare0[1];
			
 
				+	/* 0x20 */
			
 
				+	__u64	stx_ino;	/* Inode number */
			
 
				+	__u64	stx_size;	/* File size */
			
 
				+	__u64	stx_blocks;	/* Number of 512-byte blocks allocated */
			
 
				+	__u64	stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
			
 
				+	/* 0x40 */
			
 
				+	struct statx_timestamp	stx_atime;	/* Last access time */
			
 
				+	struct statx_timestamp	stx_btime;	/* File creation time */
			
 
				+	struct statx_timestamp	stx_ctime;	/* Last attribute change time */
			
 
				+	struct statx_timestamp	stx_mtime;	/* Last data modification time */
			
 
				+	/* 0x80 */
			
 
				+	__u32	stx_rdev_major;	/* Device ID of special file [if bdev/cdev] */
			
 
				+	__u32	stx_rdev_minor;
			
 
				+	__u32	stx_dev_major;	/* ID of device containing file [uncond] */
			
 
				+	__u32	stx_dev_minor;
			
 
				+	/* 0x90 */
			
 
				+	__u64	__spare2[14];	/* Spare space for future expansion */
			
 
				+	/* 0x100 */
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Flags to be stx_mask
			
 
				+ *
			
 
				+ * Query request/result mask for statx() and struct statx::stx_mask.
			
 
				+ *
			
 
				+ * These bits should be set in the mask argument of statx() to request
			
 
				+ * particular items when calling statx().
			
 
				+ */
			
 
				+#define STATX_TYPE		0x00000001U	/* Want/got stx_mode & S_IFMT */
			
 
				+#define STATX_MODE		0x00000002U	/* Want/got stx_mode & ~S_IFMT */
			
 
				+#define STATX_NLINK		0x00000004U	/* Want/got stx_nlink */
			
 
				+#define STATX_UID		0x00000008U	/* Want/got stx_uid */
			
 
				+#define STATX_GID		0x00000010U	/* Want/got stx_gid */
			
 
				+#define STATX_ATIME		0x00000020U	/* Want/got stx_atime */
			
 
				+#define STATX_MTIME		0x00000040U	/* Want/got stx_mtime */
			
 
				+#define STATX_CTIME		0x00000080U	/* Want/got stx_ctime */
			
 
				+#define STATX_INO		0x00000100U	/* Want/got stx_ino */
			
 
				+#define STATX_SIZE		0x00000200U	/* Want/got stx_size */
			
 
				+#define STATX_BLOCKS		0x00000400U	/* Want/got stx_blocks */
			
 
				+#define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
			
 
				+#define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
			
 
				+#define STATX_ALL		0x00000fffU	/* All currently supported flags */
			
 
				+#define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
			
 
				+
			
 
				+/*
			
 
				+ * Attributes to be found in stx_attributes and masked in stx_attributes_mask.
			
 
				+ *
			
 
				+ * These give information about the features or the state of a file that might
			
 
				+ * be of use to ordinary userspace programs such as GUIs or ls rather than
			
 
				+ * specialised tools.
			
 
				+ *
			
 
				+ * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
			
 
				+ * semantically.  Where possible, the numerical value is picked to correspond
			
 
				+ * also.
			
 
				+ */
			
 
				+#define STATX_ATTR_COMPRESSED		0x00000004 /* [I] File is compressed by the fs */
			
 
				+#define STATX_ATTR_IMMUTABLE		0x00000010 /* [I] File is marked immutable */
			
 
				+#define STATX_ATTR_APPEND		0x00000020 /* [I] File is append-only */
			
 
				+#define STATX_ATTR_NODUMP		0x00000040 /* [I] File is not to be dumped */
			
 
				+#define STATX_ATTR_ENCRYPTED		0x00000800 /* [I] File requires key to decrypt in fs */
			
 
				+
			
 
				+#define STATX_ATTR_AUTOMOUNT		0x00001000 /* Dir: Automount trigger */
			
 
				+
			
 
				+
			
 
				+#endif /* _UAPI_LINUX_STAT_H */
			
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -439,6 +439,35 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
 
				 	return filename__read_str(path, buf, sizep);
			
 
				 }
			
 
				 
			
 
				+int sysfs__read_bool(const char *entry, bool *value)
			
 
				+{
			
 
				+	char *buf;
			
 
				+	size_t size;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = sysfs__read_str(entry, &buf, &size);
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	switch (buf[0]) {
			
 
				+	case '1':
			
 
				+	case 'y':
			
 
				+	case 'Y':
			
 
				+		*value = true;
			
 
				+		break;
			
 
				+	case '0':
			
 
				+	case 'n':
			
 
				+	case 'N':
			
 
				+		*value = false;
			
 
				+		break;
			
 
				+	default:
			
 
				+		ret = -1;
			
 
				+	}
			
 
				+
			
 
				+	free(buf);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				 int sysctl__read_int(const char *sysctl, int *value)
			
 
				 {
			
 
				 	char path[PATH_MAX];
			
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -37,4 +37,5 @@ int sysctl__read_int(const char *sysctl, int *value);
 
				 int sysfs__read_int(const char *entry, int *value);
			
 
				 int sysfs__read_ull(const char *entry, unsigned long long *value);
			
 
				 int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
			
 
				+int sysfs__read_bool(const char *entry, bool *value);
			
 
				 #endif /* __API_FS__ */
			
--- a/tools/lib/string.c
+++ b/tools/lib/string.c
@@ -87,3 +87,12 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size)
 
				 	}
			
 
				 	return ret;
			
 
				 }
			
 
				+
			
 
				+int prefixcmp(const char *str, const char *prefix)
			
 
				+{
			
 
				+	for (; ; str++, prefix++)
			
 
				+		if (!*prefix)
			
 
				+			return 0;
			
 
				+		else if (*str != *prefix)
			
 
				+			return (unsigned char)*prefix - (unsigned char)*str;
			
 
				+}
			
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -1,6 +1,7 @@
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
 
				 #include <string.h>
			
 
				+#include <linux/string.h>
			
 
				 #include <termios.h>
			
 
				 #include <sys/ioctl.h>
			
 
				 #include <sys/types.h>
			
--- a/tools/lib/subcmd/help.h
+++ b/tools/lib/subcmd/help.h
@@ -2,6 +2,7 @@
 
				 #define __SUBCMD_HELP_H
			
 
				 
			
 
				 #include <sys/types.h>
			
 
				+#include <stdio.h>
			
 
				 
			
 
				 struct cmdnames {
			
 
				 	size_t alloc;
			
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -1,4 +1,5 @@
 
				 #include <linux/compiler.h>
			
 
				+#include <linux/string.h>
			
 
				 #include <linux/types.h>
			
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
--- a/tools/lib/subcmd/subcmd-util.h
+++ b/tools/lib/subcmd/subcmd-util.h
@@ -79,13 +79,4 @@ static inline void astrcat(char **out, const char *add)
 
				 	free(tmp);
			
 
				 }
			
 
				 
			
 
				-static inline int prefixcmp(const char *str, const char *prefix)
			
 
				-{
			
 
				-	for (; ; str++, prefix++)
			
 
				-		if (!*prefix)
			
 
				-			return 0;
			
 
				-		else if (*str != *prefix)
			
 
				-			return (unsigned char)*prefix - (unsigned char)*str;
			
 
				-}
			
 
				-
			
 
				 #endif /* __SUBCMD_UTIL_H */
			
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -1,3 +1,4 @@
 
				+#include <ctype.h>
			
 
				 #include "symbol/kallsyms.h"
			
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -36,8 +36,7 @@
 
				 #include "warn.h"
			
 
				 
			
 
				 #include <linux/hashtable.h>
			
 
				-
			
 
				-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
			
 
				+#include <linux/kernel.h>
			
 
				 
			
 
				 #define STATE_FP_SAVED		0x1
			
 
				 #define STATE_FP_SETUP		0x2
			
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -31,11 +31,10 @@
 
				 #include <stdlib.h>
			
 
				 #include <subcmd/exec-cmd.h>
			
 
				 #include <subcmd/pager.h>
			
 
				+#include <linux/kernel.h>
			
 
				 
			
 
				 #include "builtin.h"
			
 
				 
			
 
				-#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
			
 
				-
			
 
				 struct cmd_struct {
			
 
				 	const char *name;
			
 
				 	int (*fn)(int, const char **);
			
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -31,3 +31,5 @@ config.mak.autogen
 
				 .config-detected
			
 
				 util/intel-pt-decoder/inat-tables.c
			
 
				 arch/*/include/generated/
			
 
				+pmu-events/pmu-events.c
			
 
				+pmu-events/jevents
			
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -50,5 +50,6 @@ libperf-y += util/
 
				 libperf-y += arch/
			
 
				 libperf-y += ui/
			
 
				 libperf-y += scripts/
			
 
				+libperf-y += trace/beauty/
			
 
				 
			
 
				 gtk-y += ui/gtk/
			
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -30,6 +30,24 @@ OPTIONS
 
				 --verbose=::
			
 
				         Verbosity level.
			
 
				 
			
 
				+-p::
			
 
				+--pid=::
			
 
				+	Trace on existing process id (comma separated list).
			
 
				+
			
 
				+-a::
			
 
				+--all-cpus::
			
 
				+	Force system-wide collection.  Scripts run without a <command>
			
 
				+	normally use -a by default, while scripts run with a <command>
			
 
				+	normally don't - this option allows the latter to be run in
			
 
				+	system-wide mode.
			
 
				+
			
 
				+-C::
			
 
				+--cpu=::
			
 
				+	Only trace for the list of CPUs provided.  Multiple CPUs can
			
 
				+	be provided as a comma separated list with no space like: 0,1.
			
 
				+	Ranges of CPUs are specified with -: 0-2.
			
 
				+	Default is to trace on all online CPUs.
			
 
				+
			
 
				 
			
 
				 SEE ALSO
			
 
				 --------
			
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,7 +8,7 @@ perf-list - List all symbolic event types
 
				 SYNOPSIS
			
 
				 --------
			
 
				 [verse]
			
 
				-'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob]
			
 
				+'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]
			
 
				 
			
 
				 DESCRIPTION
			
 
				 -----------
			
@@ -24,6 +24,10 @@ Don't print descriptions.
 
				 --long-desc::
			
 
				 Print longer event descriptions.
			
 
				 
			
 
				+--details::
			
 
				+Print how named events are resolved internally into perf events, and also
			
 
				+any extra expressions computed by perf stat.
			
 
				+
			
 
				 
			
 
				 [[EVENT_MODIFIERS]]
			
 
				 EVENT MODIFIERS
			
@@ -240,6 +244,8 @@ To limit the list use:
 
				 
			
 
				 . 'pmu' to print the kernel supplied PMU events.
			
 
				 
			
 
				+. 'sdt' to list all Statically Defined Tracepoint events.
			
 
				+
			
 
				 . If none of the above is matched, it will apply the supplied glob to all
			
 
				   events, printing the ones that match.
			
 
				 
			
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can
 
				 displayed with the weight and local_weight sort keys.  This currently works for TSX
			
 
				 abort events and some memory events in precise mode on modern Intel CPUs.
			
 
				 
			
 
				+--namespaces::
			
 
				+Record events of type PERF_RECORD_NAMESPACES.
			
 
				+
			
 
				 --transaction::
			
 
				 Record transaction flags for transaction related events.
			
 
				 
			
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -72,7 +72,8 @@ OPTIONS
 
				 --sort=::
			
 
				 	Sort histogram entries by given key(s) - multiple keys can be specified
			
 
				 	in CSV format.  Following sort keys are available:
			
 
				-	pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight.
			
 
				+	pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
			
 
				+	local_weight, cgroup_id.
			
 
				 
			
 
				 	Each key has following meaning:
			
 
				 
			
@@ -80,6 +81,7 @@ OPTIONS
 
				 	- pid: command and tid of the task
			
 
				 	- dso: name of library or module executed at the time of sample
			
 
				 	- symbol: name of function executed at the time of sample
			
 
				+	- symbol_size: size of function executed at the time of sample
			
 
				 	- parent: name of function matched to the parent regex filter. Unmatched
			
 
				 	entries are displayed as "[other]".
			
 
				 	- cpu: cpu number the task ran at the time of sample
			
@@ -91,6 +93,7 @@ OPTIONS
 
				 	- weight: Event specific weight, e.g. memory latency or transaction
			
 
				 	abort cost. This is the global weight.
			
 
				 	- local_weight: Local weight version of the weight above.
			
 
				+	- cgroup_id: ID derived from cgroup namespace device and inode numbers.
			
 
				 	- transaction: Transaction abort flags.
			
 
				 	- overhead: Overhead percentage of sample
			
 
				 	- overhead_sys: Overhead percentage of sample running in system mode
			
@@ -172,6 +175,9 @@ OPTIONS
 
				 	By default, every sort keys not specified in -F will be appended
			
 
				 	automatically.
			
 
				 
			
 
				+	If the keys starts with a prefix '+', then it will append the specified
			
 
				+        field(s) to the default field order. For example: perf report -F +period,sample.
			
 
				+
			
 
				 -p::
			
 
				 --parent=<regex>::
			
 
				         A regex filter to identify parent. The parent is a caller of this
			
@@ -229,6 +235,7 @@ OPTIONS
 
				 	sort_key can be:
			
 
				 	- function: compare on functions (default)
			
 
				 	- address: compare on individual code addresses
			
 
				+	- srcline: compare on source filename and line number
			
 
				 
			
 
				 	branch can be:
			
 
				 	- branch: include last branch information in callgraph when available.
			
@@ -424,6 +431,10 @@ include::itrace.txt[]
 
				 --hierarchy::
			
 
				 	Enable hierarchical output.
			
 
				 
			
 
				+--inline::
			
 
				+	If a callgraph address belongs to an inlined function, the inline stack
			
 
				+	will be printed. Each entry is function name or file/line.
			
 
				+
			
 
				 include::callchain-overhead-calculation.txt[]
			
 
				 
			
 
				 SEE ALSO
			
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist'
 
				 --migrations::
			
 
				 	Show migration events.
			
 
				 
			
 
				+-n::
			
 
				+--next::
			
 
				+	Show next task.
			
 
				+
			
 
				 -I::
			
 
				 --idle-hist::
			
 
				 	Show idle-related events only.
			
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,7 +116,7 @@ OPTIONS
 
				 --fields::
			
 
				         Comma separated list of fields to print. Options are:
			
 
				         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
			
 
				-        srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
			
 
				+        srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
			
 
				         callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
			
 
				         to indicate to which event type the field list applies.
			
 
				         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
			
@@ -189,15 +189,20 @@ OPTIONS
 
				 	i.e., -F "" is not allowed.
			
 
				 
			
 
				 	The brstack output includes branch related information with raw addresses using the
			
 
				-	/v/v/v/v/ syntax in the following order:
			
 
				+	/v/v/v/v/cycles syntax in the following order:
			
 
				 	FROM: branch source instruction
			
 
				 	TO  : branch target instruction
			
 
				         M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
			
 
				 	X/- : X=branch inside a transactional region, -=not in transaction region or not supported
			
 
				 	A/- : A=TSX abort entry, -=not aborted region or not supported
			
 
				+	cycles
			
 
				 
			
 
				 	The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
			
 
				 
			
 
				+	When brstackinsn is specified the full assembler sequences of branch sequences for each sample
			
 
				+	is printed. This is the full execution path leading to the sample. This is only supported when the
			
 
				+	sample was recorded with perf record -b or -j any.
			
 
				+
			
 
				 -k::
			
 
				 --vmlinux=<file>::
			
 
				         vmlinux pathname
			
@@ -248,6 +253,9 @@ OPTIONS
 
				 --show-mmap-events
			
 
				 	Display mmap related events (e.g. MMAP, MMAP2).
			
 
				 
			
 
				+--show-namespace-events
			
 
				+	Display namespace events i.e. events of type PERF_RECORD_NAMESPACES.
			
 
				+
			
 
				 --show-switch-events
			
 
				 	Display context switch events i.e. events of type PERF_RECORD_SWITCH or
			
 
				 	PERF_RECORD_SWITCH_CPU_WIDE.
			
@@ -299,6 +307,10 @@ include::itrace.txt[]
 
				 	stop time is not given (i.e, time string is 'x.y,') then analysis goes
			
 
				 	to end of file.
			
 
				 
			
 
				+--max-blocks::
			
 
				+	Set the maximum number of program blocks to print with brstackasm for
			
 
				+	each sample.
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-record[1], linkperf:perf-script-perl[1],