|
@@ -11,8 +11,14 @@
|
|
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
|
|
+#include <linux/cacheinfo.h>
|
|
|
+#include <linux/cpu.h>
|
|
|
+#include <linux/cpumask.h>
|
|
|
+#include <linux/kthread.h>
|
|
|
#include <linux/slab.h>
|
|
|
+#include <asm/cacheflush.h>
|
|
|
#include <asm/intel-family.h>
|
|
|
+#include <asm/intel_rdt_sched.h>
|
|
|
#include "intel_rdt.h"
|
|
|
|
|
|
/*
|
|
@@ -79,6 +85,53 @@ static u64 get_prefetch_disable_bits(void)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * pseudo_lock_region_init - Initialize pseudo-lock region information
|
|
|
+ * @plr: pseudo-lock region
|
|
|
+ *
|
|
|
+ * Called after user provided a schemata to be pseudo-locked. From the
|
|
|
+ * schemata the &struct pseudo_lock_region is on entry already initialized
|
|
|
+ * with the resource, domain, and capacity bitmask. Here the information
|
|
|
+ * required for pseudo-locking is deduced from this data and &struct
|
|
|
+ * pseudo_lock_region initialized further. This information includes:
|
|
|
+ * - size in bytes of the region to be pseudo-locked
|
|
|
+ * - cache line size to know the stride with which data needs to be accessed
|
|
|
+ * to be pseudo-locked
|
|
|
+ * - a cpu associated with the cache instance on which the pseudo-locking
|
|
|
+ * flow can be executed
|
|
|
+ *
|
|
|
+ * Return: 0 on success, <0 on failure. Descriptive error will be written
|
|
|
+ * to last_cmd_status buffer.
|
|
|
+ */
|
|
|
+static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
|
|
|
+{
|
|
|
+ struct cpu_cacheinfo *ci;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ /* Pick the first cpu we find that is associated with the cache. */
|
|
|
+ plr->cpu = cpumask_first(&plr->d->cpu_mask);
|
|
|
+
|
|
|
+ if (!cpu_online(plr->cpu)) {
|
|
|
+ rdt_last_cmd_printf("cpu %u associated with cache not online\n",
|
|
|
+ plr->cpu);
|
|
|
+ return -ENODEV;
|
|
|
+ }
|
|
|
+
|
|
|
+ ci = get_cpu_cacheinfo(plr->cpu);
|
|
|
+
|
|
|
+ plr->size = rdtgroup_cbm_to_size(plr->r, plr->d, plr->cbm);
|
|
|
+
|
|
|
+ for (i = 0; i < ci->num_leaves; i++) {
|
|
|
+ if (ci->info_list[i].level == plr->r->cache_level) {
|
|
|
+ plr->line_size = ci->info_list[i].coherency_line_size;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rdt_last_cmd_puts("unable to determine cache line size\n");
|
|
|
+ return -1;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* pseudo_lock_init - Initialize a pseudo-lock region
|
|
|
* @rdtgrp: resource group to which new pseudo-locked region will belong
|
|
@@ -98,10 +151,69 @@ static int pseudo_lock_init(struct rdtgroup *rdtgrp)
|
|
|
if (!plr)
|
|
|
return -ENOMEM;
|
|
|
|
|
|
+ init_waitqueue_head(&plr->lock_thread_wq);
|
|
|
rdtgrp->plr = plr;
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * pseudo_lock_region_clear - Reset pseudo-lock region data
|
|
|
+ * @plr: pseudo-lock region
|
|
|
+ *
|
|
|
+ * All content of the pseudo-locked region is reset - any memory allocated
|
|
|
+ * freed.
|
|
|
+ *
|
|
|
+ * Return: void
|
|
|
+ */
|
|
|
+static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
|
|
|
+{
|
|
|
+ plr->size = 0;
|
|
|
+ plr->line_size = 0;
|
|
|
+ kfree(plr->kmem);
|
|
|
+ plr->kmem = NULL;
|
|
|
+ plr->r = NULL;
|
|
|
+ if (plr->d)
|
|
|
+ plr->d->plr = NULL;
|
|
|
+ plr->d = NULL;
|
|
|
+ plr->cbm = 0;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * pseudo_lock_region_alloc - Allocate kernel memory that will be pseudo-locked
|
|
|
+ * @plr: pseudo-lock region
|
|
|
+ *
|
|
|
+ * Initialize the details required to set up the pseudo-locked region and
|
|
|
+ * allocate the contiguous memory that will be pseudo-locked to the cache.
|
|
|
+ *
|
|
|
+ * Return: 0 on success, <0 on failure. Descriptive error will be written
|
|
|
+ * to last_cmd_status buffer.
|
|
|
+ */
|
|
|
+static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
|
|
|
+{
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ ret = pseudo_lock_region_init(plr);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We do not yet support contiguous regions larger than
|
|
|
+ * KMALLOC_MAX_SIZE.
|
|
|
+ */
|
|
|
+ if (plr->size > KMALLOC_MAX_SIZE) {
|
|
|
+ rdt_last_cmd_puts("requested region exceeds maximum size\n");
|
|
|
+ return -E2BIG;
|
|
|
+ }
|
|
|
+
|
|
|
+ plr->kmem = kzalloc(plr->size, GFP_KERNEL);
|
|
|
+ if (!plr->kmem) {
|
|
|
+ rdt_last_cmd_puts("unable to allocate memory\n");
|
|
|
+ return -ENOMEM;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* pseudo_lock_free - Free a pseudo-locked region
|
|
|
* @rdtgrp: resource group to which pseudo-locked region belonged
|
|
@@ -114,10 +226,142 @@ static int pseudo_lock_init(struct rdtgroup *rdtgrp)
|
|
|
*/
|
|
|
static void pseudo_lock_free(struct rdtgroup *rdtgrp)
|
|
|
{
|
|
|
+ pseudo_lock_region_clear(rdtgrp->plr);
|
|
|
kfree(rdtgrp->plr);
|
|
|
rdtgrp->plr = NULL;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * pseudo_lock_fn - Load kernel memory into cache
|
|
|
+ * @_rdtgrp: resource group to which pseudo-lock region belongs
|
|
|
+ *
|
|
|
+ * This is the core pseudo-locking flow.
|
|
|
+ *
|
|
|
+ * First we ensure that the kernel memory cannot be found in the cache.
|
|
|
+ * Then, while taking care that there will be as little interference as
|
|
|
+ * possible, the memory to be loaded is accessed while core is running
|
|
|
+ * with class of service set to the bitmask of the pseudo-locked region.
|
|
|
+ * After this is complete no future CAT allocations will be allowed to
|
|
|
+ * overlap with this bitmask.
|
|
|
+ *
|
|
|
+ * Local register variables are utilized to ensure that the memory region
|
|
|
+ * to be locked is the only memory access made during the critical locking
|
|
|
+ * loop.
|
|
|
+ *
|
|
|
+ * Return: 0. Waiter on waitqueue will be woken on completion.
|
|
|
+ */
|
|
|
+static int pseudo_lock_fn(void *_rdtgrp)
|
|
|
+{
|
|
|
+ struct rdtgroup *rdtgrp = _rdtgrp;
|
|
|
+ struct pseudo_lock_region *plr = rdtgrp->plr;
|
|
|
+ u32 rmid_p, closid_p;
|
|
|
+ unsigned long i;
|
|
|
+#ifdef CONFIG_KASAN
|
|
|
+ /*
|
|
|
+ * The registers used for local register variables are also used
|
|
|
+ * when KASAN is active. When KASAN is active we use a regular
|
|
|
+ * variable to ensure we always use a valid pointer, but the cost
|
|
|
+ * is that this variable will enter the cache through evicting the
|
|
|
+ * memory we are trying to lock into the cache. Thus expect lower
|
|
|
+ * pseudo-locking success rate when KASAN is active.
|
|
|
+ */
|
|
|
+ unsigned int line_size;
|
|
|
+ unsigned int size;
|
|
|
+ void *mem_r;
|
|
|
+#else
|
|
|
+ register unsigned int line_size asm("esi");
|
|
|
+ register unsigned int size asm("edi");
|
|
|
+#ifdef CONFIG_X86_64
|
|
|
+ register void *mem_r asm("rbx");
|
|
|
+#else
|
|
|
+ register void *mem_r asm("ebx");
|
|
|
+#endif /* CONFIG_X86_64 */
|
|
|
+#endif /* CONFIG_KASAN */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Make sure none of the allocated memory is cached. If it is we
|
|
|
+ * will get a cache hit in below loop from outside of pseudo-locked
|
|
|
+ * region.
|
|
|
+ * wbinvd (as opposed to clflush/clflushopt) is required to
|
|
|
+ * increase likelihood that allocated cache portion will be filled
|
|
|
+ * with associated memory.
|
|
|
+ */
|
|
|
+ native_wbinvd();
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Always called with interrupts enabled. By disabling interrupts
|
|
|
+ * ensure that we will not be preempted during this critical section.
|
|
|
+ */
|
|
|
+ local_irq_disable();
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Call wrmsr and rdmsr as directly as possible to avoid tracing
|
|
|
+ * clobbering local register variables or affecting cache accesses.
|
|
|
+ *
|
|
|
+ * Disable the hardware prefetcher so that when the end of the memory
|
|
|
+ * being pseudo-locked is reached the hardware will not read beyond
|
|
|
+ * the buffer and evict pseudo-locked memory read earlier from the
|
|
|
+ * cache.
|
|
|
+ */
|
|
|
+ __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
|
|
|
+ closid_p = this_cpu_read(pqr_state.cur_closid);
|
|
|
+ rmid_p = this_cpu_read(pqr_state.cur_rmid);
|
|
|
+ mem_r = plr->kmem;
|
|
|
+ size = plr->size;
|
|
|
+ line_size = plr->line_size;
|
|
|
+ /*
|
|
|
+ * Critical section begin: start by writing the closid associated
|
|
|
+ * with the capacity bitmask of the cache region being
|
|
|
+ * pseudo-locked followed by reading of kernel memory to load it
|
|
|
+ * into the cache.
|
|
|
+ */
|
|
|
+ __wrmsr(IA32_PQR_ASSOC, rmid_p, rdtgrp->closid);
|
|
|
+ /*
|
|
|
+ * Cache was flushed earlier. Now access kernel memory to read it
|
|
|
+ * into cache region associated with just activated plr->closid.
|
|
|
+ * Loop over data twice:
|
|
|
+ * - In first loop the cache region is shared with the page walker
|
|
|
+ * as it populates the paging structure caches (including TLB).
|
|
|
+ * - In the second loop the paging structure caches are used and
|
|
|
+ * cache region is populated with the memory being referenced.
|
|
|
+ */
|
|
|
+ for (i = 0; i < size; i += PAGE_SIZE) {
|
|
|
+ /*
|
|
|
+ * Add a barrier to prevent speculative execution of this
|
|
|
+ * loop reading beyond the end of the buffer.
|
|
|
+ */
|
|
|
+ rmb();
|
|
|
+ asm volatile("mov (%0,%1,1), %%eax\n\t"
|
|
|
+ :
|
|
|
+ : "r" (mem_r), "r" (i)
|
|
|
+ : "%eax", "memory");
|
|
|
+ }
|
|
|
+ for (i = 0; i < size; i += line_size) {
|
|
|
+ /*
|
|
|
+ * Add a barrier to prevent speculative execution of this
|
|
|
+ * loop reading beyond the end of the buffer.
|
|
|
+ */
|
|
|
+ rmb();
|
|
|
+ asm volatile("mov (%0,%1,1), %%eax\n\t"
|
|
|
+ :
|
|
|
+ : "r" (mem_r), "r" (i)
|
|
|
+ : "%eax", "memory");
|
|
|
+ }
|
|
|
+ /*
|
|
|
+ * Critical section end: restore closid with capacity bitmask that
|
|
|
+ * does not overlap with pseudo-locked region.
|
|
|
+ */
|
|
|
+ __wrmsr(IA32_PQR_ASSOC, rmid_p, closid_p);
|
|
|
+
|
|
|
+ /* Re-enable the hardware prefetcher(s) */
|
|
|
+ wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
|
|
|
+ local_irq_enable();
|
|
|
+
|
|
|
+ plr->thread_done = 1;
|
|
|
+ wake_up_interruptible(&plr->lock_thread_wq);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* rdtgroup_monitor_in_progress - Test if monitoring in progress
|
|
|
* @r: resource group being queried
|
|
@@ -399,7 +643,6 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm)
|
|
|
if (bitmap_intersects(cbm, cbm_b, cbm_len))
|
|
|
return true;
|
|
|
}
|
|
|
-
|
|
|
return false;
|
|
|
}
|
|
|
|
|
@@ -448,3 +691,95 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
|
|
|
free_cpumask_var(cpu_with_psl);
|
|
|
return ret;
|
|
|
}
|
|
|
+
|
|
|
+/**
|
|
|
+ * rdtgroup_pseudo_lock_create - Create a pseudo-locked region
|
|
|
+ * @rdtgrp: resource group to which pseudo-lock region belongs
|
|
|
+ *
|
|
|
+ * Called when a resource group in the pseudo-locksetup mode receives a
|
|
|
+ * valid schemata that should be pseudo-locked. Since the resource group is
|
|
|
+ * in pseudo-locksetup mode the &struct pseudo_lock_region has already been
|
|
|
+ * allocated and initialized with the essential information. If a failure
|
|
|
+ * occurs the resource group remains in the pseudo-locksetup mode with the
|
|
|
+ * &struct pseudo_lock_region associated with it, but cleared from all
|
|
|
+ * information and ready for the user to re-attempt pseudo-locking by
|
|
|
+ * writing the schemata again.
|
|
|
+ *
|
|
|
+ * Return: 0 if the pseudo-locked region was successfully pseudo-locked, <0
|
|
|
+ * on failure. Descriptive error will be written to last_cmd_status buffer.
|
|
|
+ */
|
|
|
+int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
|
|
|
+{
|
|
|
+ struct pseudo_lock_region *plr = rdtgrp->plr;
|
|
|
+ struct task_struct *thread;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ ret = pseudo_lock_region_alloc(plr);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ plr->thread_done = 0;
|
|
|
+
|
|
|
+ thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
|
|
|
+ cpu_to_node(plr->cpu),
|
|
|
+ "pseudo_lock/%u", plr->cpu);
|
|
|
+ if (IS_ERR(thread)) {
|
|
|
+ ret = PTR_ERR(thread);
|
|
|
+ rdt_last_cmd_printf("locking thread returned error %d\n", ret);
|
|
|
+ goto out_region;
|
|
|
+ }
|
|
|
+
|
|
|
+ kthread_bind(thread, plr->cpu);
|
|
|
+ wake_up_process(thread);
|
|
|
+
|
|
|
+ ret = wait_event_interruptible(plr->lock_thread_wq,
|
|
|
+ plr->thread_done == 1);
|
|
|
+ if (ret < 0) {
|
|
|
+ /*
|
|
|
+ * If the thread does not get on the CPU for whatever
|
|
|
+ * reason and the process which sets up the region is
|
|
|
+ * interrupted then this will leave the thread in runnable
|
|
|
+ * state and once it gets on the CPU it will derefence
|
|
|
+ * the cleared, but not freed, plr struct resulting in an
|
|
|
+ * empty pseudo-locking loop.
|
|
|
+ */
|
|
|
+ rdt_last_cmd_puts("locking thread interrupted\n");
|
|
|
+ goto out_region;
|
|
|
+ }
|
|
|
+
|
|
|
+ rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
|
|
|
+ closid_free(rdtgrp->closid);
|
|
|
+ ret = 0;
|
|
|
+ goto out;
|
|
|
+
|
|
|
+out_region:
|
|
|
+ pseudo_lock_region_clear(plr);
|
|
|
+out:
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * rdtgroup_pseudo_lock_remove - Remove a pseudo-locked region
|
|
|
+ * @rdtgrp: resource group to which the pseudo-locked region belongs
|
|
|
+ *
|
|
|
+ * The removal of a pseudo-locked region can be initiated when the resource
|
|
|
+ * group is removed from user space via a "rmdir" from userspace or the
|
|
|
+ * unmount of the resctrl filesystem. On removal the resource group does
|
|
|
+ * not go back to pseudo-locksetup mode before it is removed, instead it is
|
|
|
+ * removed directly. There is thus assymmetry with the creation where the
|
|
|
+ * &struct pseudo_lock_region is removed here while it was not created in
|
|
|
+ * rdtgroup_pseudo_lock_create().
|
|
|
+ *
|
|
|
+ * Return: void
|
|
|
+ */
|
|
|
+void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
|
|
|
+{
|
|
|
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP)
|
|
|
+ /*
|
|
|
+ * Default group cannot be a pseudo-locked region so we can
|
|
|
+ * free closid here.
|
|
|
+ */
|
|
|
+ closid_free(rdtgrp->closid);
|
|
|
+
|
|
|
+ pseudo_lock_free(rdtgrp);
|
|
|
+}
|