|
|
@@ -265,6 +265,75 @@ static bool pt_event_valid(struct perf_event *event)
|
|
|
* These all are cpu affine and operate on a local PT
|
|
|
*/
|
|
|
|
|
|
+/* Address ranges and their corresponding msr configuration registers */
|
|
|
+static const struct pt_address_range {
|
|
|
+ unsigned long msr_a;
|
|
|
+ unsigned long msr_b;
|
|
|
+ unsigned int reg_off;
|
|
|
+} pt_address_ranges[] = {
|
|
|
+ {
|
|
|
+ .msr_a = MSR_IA32_RTIT_ADDR0_A,
|
|
|
+ .msr_b = MSR_IA32_RTIT_ADDR0_B,
|
|
|
+ .reg_off = RTIT_CTL_ADDR0_OFFSET,
|
|
|
+ },
|
|
|
+ {
|
|
|
+ .msr_a = MSR_IA32_RTIT_ADDR1_A,
|
|
|
+ .msr_b = MSR_IA32_RTIT_ADDR1_B,
|
|
|
+ .reg_off = RTIT_CTL_ADDR1_OFFSET,
|
|
|
+ },
|
|
|
+ {
|
|
|
+ .msr_a = MSR_IA32_RTIT_ADDR2_A,
|
|
|
+ .msr_b = MSR_IA32_RTIT_ADDR2_B,
|
|
|
+ .reg_off = RTIT_CTL_ADDR2_OFFSET,
|
|
|
+ },
|
|
|
+ {
|
|
|
+ .msr_a = MSR_IA32_RTIT_ADDR3_A,
|
|
|
+ .msr_b = MSR_IA32_RTIT_ADDR3_B,
|
|
|
+ .reg_off = RTIT_CTL_ADDR3_OFFSET,
|
|
|
+ }
|
|
|
+};
|
|
|
+
|
|
|
+static u64 pt_config_filters(struct perf_event *event)
|
|
|
+{
|
|
|
+ struct pt_filters *filters = event->hw.addr_filters;
|
|
|
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
|
|
|
+ unsigned int range = 0;
|
|
|
+ u64 rtit_ctl = 0;
|
|
|
+
|
|
|
+ if (!filters)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ perf_event_addr_filters_sync(event);
|
|
|
+
|
|
|
+ for (range = 0; range < filters->nr_filters; range++) {
|
|
|
+ struct pt_filter *filter = &filters->filter[range];
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Note, if the range has zero start/end addresses due
|
|
|
+ * to its dynamic object not being loaded yet, we just
|
|
|
+ * go ahead and program zeroed range, which will simply
|
|
|
+ * produce no data. Note^2: if executable code at 0x0
|
|
|
+ * is a concern, we can set up an "invalid" configuration
|
|
|
+ * such as msr_b < msr_a.
|
|
|
+ */
|
|
|
+
|
|
|
+ /* avoid redundant msr writes */
|
|
|
+ if (pt->filters.filter[range].msr_a != filter->msr_a) {
|
|
|
+ wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
|
|
|
+ pt->filters.filter[range].msr_a = filter->msr_a;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (pt->filters.filter[range].msr_b != filter->msr_b) {
|
|
|
+ wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
|
|
|
+ pt->filters.filter[range].msr_b = filter->msr_b;
|
|
|
+ }
|
|
|
+
|
|
|
+ rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
|
|
|
+ }
|
|
|
+
|
|
|
+ return rtit_ctl;
|
|
|
+}
|
|
|
+
|
|
|
static void pt_config(struct perf_event *event)
|
|
|
{
|
|
|
u64 reg;
|
|
|
@@ -274,7 +343,8 @@ static void pt_config(struct perf_event *event)
|
|
|
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
|
|
|
}
|
|
|
|
|
|
- reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
|
|
|
+ reg = pt_config_filters(event);
|
|
|
+ reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
|
|
|
|
|
|
if (!event->attr.exclude_kernel)
|
|
|
reg |= RTIT_CTL_OS;
|
|
|
@@ -921,6 +991,82 @@ static void pt_buffer_free_aux(void *data)
|
|
|
kfree(buf);
|
|
|
}
|
|
|
|
|
|
+static int pt_addr_filters_init(struct perf_event *event)
|
|
|
+{
|
|
|
+ struct pt_filters *filters;
|
|
|
+ int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
|
|
|
+
|
|
|
+ if (!pt_cap_get(PT_CAP_num_address_ranges))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
|
|
|
+ if (!filters)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ if (event->parent)
|
|
|
+ memcpy(filters, event->parent->hw.addr_filters,
|
|
|
+ sizeof(*filters));
|
|
|
+
|
|
|
+ event->hw.addr_filters = filters;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void pt_addr_filters_fini(struct perf_event *event)
|
|
|
+{
|
|
|
+ kfree(event->hw.addr_filters);
|
|
|
+ event->hw.addr_filters = NULL;
|
|
|
+}
|
|
|
+
|
|
|
+static int pt_event_addr_filters_validate(struct list_head *filters)
|
|
|
+{
|
|
|
+ struct perf_addr_filter *filter;
|
|
|
+ int range = 0;
|
|
|
+
|
|
|
+ list_for_each_entry(filter, filters, entry) {
|
|
|
+ /* PT doesn't support single address triggers */
|
|
|
+ if (!filter->range)
|
|
|
+ return -EOPNOTSUPP;
|
|
|
+
|
|
|
+ if (!filter->inode && !kernel_ip(filter->offset))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ if (++range > pt_cap_get(PT_CAP_num_address_ranges))
|
|
|
+ return -EOPNOTSUPP;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void pt_event_addr_filters_sync(struct perf_event *event)
|
|
|
+{
|
|
|
+ struct perf_addr_filters_head *head = perf_event_addr_filters(event);
|
|
|
+ unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
|
|
|
+ struct pt_filters *filters = event->hw.addr_filters;
|
|
|
+ struct perf_addr_filter *filter;
|
|
|
+ int range = 0;
|
|
|
+
|
|
|
+ if (!filters)
|
|
|
+ return;
|
|
|
+
|
|
|
+ list_for_each_entry(filter, &head->list, entry) {
|
|
|
+ if (filter->inode && !offs[range]) {
|
|
|
+ msr_a = msr_b = 0;
|
|
|
+ } else {
|
|
|
+ /* apply the offset */
|
|
|
+ msr_a = filter->offset + offs[range];
|
|
|
+ msr_b = filter->size + msr_a;
|
|
|
+ }
|
|
|
+
|
|
|
+ filters->filter[range].msr_a = msr_a;
|
|
|
+ filters->filter[range].msr_b = msr_b;
|
|
|
+ filters->filter[range].config = filter->filter ? 1 : 2;
|
|
|
+ range++;
|
|
|
+ }
|
|
|
+
|
|
|
+ filters->nr_filters = range;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* intel_pt_interrupt() - PT PMI handler
|
|
|
*/
|
|
|
@@ -1128,6 +1274,7 @@ static void pt_event_read(struct perf_event *event)
|
|
|
|
|
|
static void pt_event_destroy(struct perf_event *event)
|
|
|
{
|
|
|
+ pt_addr_filters_fini(event);
|
|
|
x86_del_exclusive(x86_lbr_exclusive_pt);
|
|
|
}
|
|
|
|
|
|
@@ -1142,6 +1289,11 @@ static int pt_event_init(struct perf_event *event)
|
|
|
if (x86_add_exclusive(x86_lbr_exclusive_pt))
|
|
|
return -EBUSY;
|
|
|
|
|
|
+ if (pt_addr_filters_init(event)) {
|
|
|
+ x86_del_exclusive(x86_lbr_exclusive_pt);
|
|
|
+ return -ENOMEM;
|
|
|
+ }
|
|
|
+
|
|
|
event->destroy = pt_event_destroy;
|
|
|
|
|
|
return 0;
|
|
|
@@ -1195,16 +1347,21 @@ static __init int pt_init(void)
|
|
|
PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
|
|
|
|
|
|
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
|
|
|
- pt_pmu.pmu.attr_groups = pt_attr_groups;
|
|
|
- pt_pmu.pmu.task_ctx_nr = perf_sw_context;
|
|
|
- pt_pmu.pmu.event_init = pt_event_init;
|
|
|
- pt_pmu.pmu.add = pt_event_add;
|
|
|
- pt_pmu.pmu.del = pt_event_del;
|
|
|
- pt_pmu.pmu.start = pt_event_start;
|
|
|
- pt_pmu.pmu.stop = pt_event_stop;
|
|
|
- pt_pmu.pmu.read = pt_event_read;
|
|
|
- pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
|
|
|
- pt_pmu.pmu.free_aux = pt_buffer_free_aux;
|
|
|
+ pt_pmu.pmu.attr_groups = pt_attr_groups;
|
|
|
+ pt_pmu.pmu.task_ctx_nr = perf_sw_context;
|
|
|
+ pt_pmu.pmu.event_init = pt_event_init;
|
|
|
+ pt_pmu.pmu.add = pt_event_add;
|
|
|
+ pt_pmu.pmu.del = pt_event_del;
|
|
|
+ pt_pmu.pmu.start = pt_event_start;
|
|
|
+ pt_pmu.pmu.stop = pt_event_stop;
|
|
|
+ pt_pmu.pmu.read = pt_event_read;
|
|
|
+ pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
|
|
|
+ pt_pmu.pmu.free_aux = pt_buffer_free_aux;
|
|
|
+ pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync;
|
|
|
+ pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
|
|
|
+ pt_pmu.pmu.nr_addr_filters =
|
|
|
+ pt_cap_get(PT_CAP_num_address_ranges);
|
|
|
+
|
|
|
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
|
|
|
|
|
|
return ret;
|