|
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
|
|
|
return NULL; /* Not found */
|
|
|
}
|
|
|
|
|
|
-__u32 cpu_caps_cleared[NCAPINTS];
|
|
|
-__u32 cpu_caps_set[NCAPINTS];
|
|
|
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
|
|
|
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
|
|
|
|
|
|
void load_percpu_segment(int cpu)
|
|
|
{
|
|
@@ -490,27 +490,116 @@ void load_percpu_segment(int cpu)
|
|
|
load_stack_canary_segment();
|
|
|
}
|
|
|
|
|
|
-/* Setup the fixmap mapping only once per-processor */
|
|
|
-static inline void setup_fixmap_gdt(int cpu)
|
|
|
+#ifdef CONFIG_X86_32
|
|
|
+/* The 32-bit entry code needs to find cpu_entry_area. */
|
|
|
+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef CONFIG_X86_64
|
|
|
+/*
|
|
|
+ * Special IST stacks which the CPU switches to when it calls
|
|
|
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
|
|
|
+ * limit), all of them are 4K, except the debug stack which
|
|
|
+ * is 8K.
|
|
|
+ */
|
|
|
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
|
|
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
|
|
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
|
|
|
+};
|
|
|
+
|
|
|
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
|
|
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
|
|
|
+#endif
|
|
|
+
|
|
|
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
|
|
|
+ SYSENTER_stack_storage);
|
|
|
+
|
|
|
+static void __init
|
|
|
+set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
|
|
|
+{
|
|
|
+ for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
|
|
|
+ __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
|
|
|
+}
|
|
|
+
|
|
|
+/* Setup the fixmap mappings only once per-processor */
|
|
|
+static void __init setup_cpu_entry_area(int cpu)
|
|
|
{
|
|
|
#ifdef CONFIG_X86_64
|
|
|
- /* On 64-bit systems, we use a read-only fixmap GDT. */
|
|
|
- pgprot_t prot = PAGE_KERNEL_RO;
|
|
|
+ extern char _entry_trampoline[];
|
|
|
+
|
|
|
+ /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
|
|
|
+ pgprot_t gdt_prot = PAGE_KERNEL_RO;
|
|
|
+ pgprot_t tss_prot = PAGE_KERNEL_RO;
|
|
|
#else
|
|
|
/*
|
|
|
* On native 32-bit systems, the GDT cannot be read-only because
|
|
|
* our double fault handler uses a task gate, and entering through
|
|
|
- * a task gate needs to change an available TSS to busy. If the GDT
|
|
|
- * is read-only, that will triple fault.
|
|
|
+ * a task gate needs to change an available TSS to busy. If the
|
|
|
+ * GDT is read-only, that will triple fault. The TSS cannot be
|
|
|
+ * read-only because the CPU writes to it on task switches.
|
|
|
*
|
|
|
- * On Xen PV, the GDT must be read-only because the hypervisor requires
|
|
|
- * it.
|
|
|
+ * On Xen PV, the GDT must be read-only because the hypervisor
|
|
|
+ * requires it.
|
|
|
*/
|
|
|
- pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
|
|
|
+ pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
|
|
|
PAGE_KERNEL_RO : PAGE_KERNEL;
|
|
|
+ pgprot_t tss_prot = PAGE_KERNEL;
|
|
|
#endif
|
|
|
|
|
|
- __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
|
|
|
+ __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
|
|
|
+ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
|
|
|
+ per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
|
|
|
+ PAGE_KERNEL);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The Intel SDM says (Volume 3, 7.2.1):
|
|
|
+ *
|
|
|
+ * Avoid placing a page boundary in the part of the TSS that the
|
|
|
+ * processor reads during a task switch (the first 104 bytes). The
|
|
|
+ * processor may not correctly perform address translations if a
|
|
|
+ * boundary occurs in this area. During a task switch, the processor
|
|
|
+ * reads and writes into the first 104 bytes of each TSS (using
|
|
|
+ * contiguous physical addresses beginning with the physical address
|
|
|
+ * of the first byte of the TSS). So, after TSS access begins, if
|
|
|
+ * part of the 104 bytes is not physically contiguous, the processor
|
|
|
+ * will access incorrect information without generating a page-fault
|
|
|
+ * exception.
|
|
|
+ *
|
|
|
+ * There are also a lot of errata involving the TSS spanning a page
|
|
|
+ * boundary. Assert that we're not doing that.
|
|
|
+ */
|
|
|
+ BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
|
|
|
+ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
|
|
|
+ BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
|
|
|
+ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
|
|
|
+ &per_cpu(cpu_tss_rw, cpu),
|
|
|
+ sizeof(struct tss_struct) / PAGE_SIZE,
|
|
|
+ tss_prot);
|
|
|
+
|
|
|
+#ifdef CONFIG_X86_32
|
|
|
+ per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef CONFIG_X86_64
|
|
|
+ BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
|
|
|
+ BUILD_BUG_ON(sizeof(exception_stacks) !=
|
|
|
+ sizeof(((struct cpu_entry_area *)0)->exception_stacks));
|
|
|
+ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
|
|
|
+ &per_cpu(exception_stacks, cpu),
|
|
|
+ sizeof(exception_stacks) / PAGE_SIZE,
|
|
|
+ PAGE_KERNEL);
|
|
|
+
|
|
|
+ __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
|
|
|
+ __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
+void __init setup_cpu_entry_areas(void)
|
|
|
+{
|
|
|
+ unsigned int cpu;
|
|
|
+
|
|
|
+ for_each_possible_cpu(cpu)
|
|
|
+ setup_cpu_entry_area(cpu);
|
|
|
}
|
|
|
|
|
|
/* Load the original GDT from the per-cpu structure */
|
|
@@ -747,7 +836,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
|
|
|
{
|
|
|
int i;
|
|
|
|
|
|
- for (i = 0; i < NCAPINTS; i++) {
|
|
|
+ for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
|
|
|
c->x86_capability[i] &= ~cpu_caps_cleared[i];
|
|
|
c->x86_capability[i] |= cpu_caps_set[i];
|
|
|
}
|
|
@@ -1250,7 +1339,7 @@ void enable_sep_cpu(void)
|
|
|
return;
|
|
|
|
|
|
cpu = get_cpu();
|
|
|
- tss = &per_cpu(cpu_tss, cpu);
|
|
|
+ tss = &per_cpu(cpu_tss_rw, cpu);
|
|
|
|
|
|
/*
|
|
|
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
|
|
@@ -1259,11 +1348,7 @@ void enable_sep_cpu(void)
|
|
|
|
|
|
tss->x86_tss.ss1 = __KERNEL_CS;
|
|
|
wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
|
|
|
-
|
|
|
- wrmsr(MSR_IA32_SYSENTER_ESP,
|
|
|
- (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
|
|
|
- 0);
|
|
|
-
|
|
|
+ wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
|
|
|
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
|
|
|
|
|
|
put_cpu();
|
|
@@ -1357,25 +1442,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
|
|
|
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
|
|
|
EXPORT_PER_CPU_SYMBOL(__preempt_count);
|
|
|
|
|
|
-/*
|
|
|
- * Special IST stacks which the CPU switches to when it calls
|
|
|
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
|
|
|
- * limit), all of them are 4K, except the debug stack which
|
|
|
- * is 8K.
|
|
|
- */
|
|
|
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
|
|
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
|
|
- [DEBUG_STACK - 1] = DEBUG_STKSZ
|
|
|
-};
|
|
|
-
|
|
|
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
|
|
- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
|
|
|
-
|
|
|
/* May not be marked __init: used by software suspend */
|
|
|
void syscall_init(void)
|
|
|
{
|
|
|
+ extern char _entry_trampoline[];
|
|
|
+ extern char entry_SYSCALL_64_trampoline[];
|
|
|
+
|
|
|
+ int cpu = smp_processor_id();
|
|
|
+ unsigned long SYSCALL64_entry_trampoline =
|
|
|
+ (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
|
|
|
+ (entry_SYSCALL_64_trampoline - _entry_trampoline);
|
|
|
+
|
|
|
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
|
|
|
- wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
|
|
|
+ wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
|
|
|
|
|
|
#ifdef CONFIG_IA32_EMULATION
|
|
|
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
|
|
@@ -1386,7 +1465,7 @@ void syscall_init(void)
|
|
|
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
|
|
|
*/
|
|
|
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
|
|
|
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
|
|
|
+ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
|
|
|
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
|
|
|
#else
|
|
|
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
|
|
@@ -1530,7 +1609,7 @@ void cpu_init(void)
|
|
|
if (cpu)
|
|
|
load_ucode_ap();
|
|
|
|
|
|
- t = &per_cpu(cpu_tss, cpu);
|
|
|
+ t = &per_cpu(cpu_tss_rw, cpu);
|
|
|
oist = &per_cpu(orig_ist, cpu);
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
|
@@ -1569,7 +1648,7 @@ void cpu_init(void)
|
|
|
* set up and load the per-CPU TSS
|
|
|
*/
|
|
|
if (!oist->ist[0]) {
|
|
|
- char *estacks = per_cpu(exception_stacks, cpu);
|
|
|
+ char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
|
|
|
|
|
|
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
|
|
estacks += exception_stack_sizes[v];
|
|
@@ -1580,7 +1659,7 @@ void cpu_init(void)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
|
|
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
|
|
|
|
|
/*
|
|
|
* <= is required because the CPU will access up to
|
|
@@ -1596,11 +1675,12 @@ void cpu_init(void)
|
|
|
enter_lazy_tlb(&init_mm, me);
|
|
|
|
|
|
/*
|
|
|
- * Initialize the TSS. Don't bother initializing sp0, as the initial
|
|
|
- * task never enters user mode.
|
|
|
+ * Initialize the TSS. sp0 points to the entry trampoline stack
|
|
|
+ * regardless of what task is running.
|
|
|
*/
|
|
|
- set_tss_desc(cpu, t);
|
|
|
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
|
|
|
load_TR_desc();
|
|
|
+ load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
|
|
|
|
|
|
load_mm_ldt(&init_mm);
|
|
|
|
|
@@ -1612,7 +1692,6 @@ void cpu_init(void)
|
|
|
if (is_uv_system())
|
|
|
uv_cpu_init();
|
|
|
|
|
|
- setup_fixmap_gdt(cpu);
|
|
|
load_fixmap_gdt(cpu);
|
|
|
}
|
|
|
|
|
@@ -1622,7 +1701,7 @@ void cpu_init(void)
|
|
|
{
|
|
|
int cpu = smp_processor_id();
|
|
|
struct task_struct *curr = current;
|
|
|
- struct tss_struct *t = &per_cpu(cpu_tss, cpu);
|
|
|
+ struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
|
|
|
|
|
|
wait_for_master_cpu(cpu);
|
|
|
|
|
@@ -1657,12 +1736,12 @@ void cpu_init(void)
|
|
|
* Initialize the TSS. Don't bother initializing sp0, as the initial
|
|
|
* task never enters user mode.
|
|
|
*/
|
|
|
- set_tss_desc(cpu, t);
|
|
|
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
|
|
|
load_TR_desc();
|
|
|
|
|
|
load_mm_ldt(&init_mm);
|
|
|
|
|
|
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
|
|
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
|
|
|
|
|
#ifdef CONFIG_DOUBLEFAULT
|
|
|
/* Set up doublefault TSS pointer in the GDT */
|
|
@@ -1674,7 +1753,6 @@ void cpu_init(void)
|
|
|
|
|
|
fpu__init_cpu();
|
|
|
|
|
|
- setup_fixmap_gdt(cpu);
|
|
|
load_fixmap_gdt(cpu);
|
|
|
}
|
|
|
#endif
|