|
@@ -15,12 +15,14 @@
|
|
#include <linux/context_tracking.h> /* exception_enter(), ... */
|
|
#include <linux/context_tracking.h> /* exception_enter(), ... */
|
|
#include <linux/uaccess.h> /* faulthandler_disabled() */
|
|
#include <linux/uaccess.h> /* faulthandler_disabled() */
|
|
|
|
|
|
|
|
+#include <asm/cpufeature.h> /* boot_cpu_has, ... */
|
|
#include <asm/traps.h> /* dotraplinkage, ... */
|
|
#include <asm/traps.h> /* dotraplinkage, ... */
|
|
#include <asm/pgalloc.h> /* pgd_*(), ... */
|
|
#include <asm/pgalloc.h> /* pgd_*(), ... */
|
|
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
|
|
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
|
|
#include <asm/fixmap.h> /* VSYSCALL_ADDR */
|
|
#include <asm/fixmap.h> /* VSYSCALL_ADDR */
|
|
#include <asm/vsyscall.h> /* emulate_vsyscall */
|
|
#include <asm/vsyscall.h> /* emulate_vsyscall */
|
|
#include <asm/vm86.h> /* struct vm86 */
|
|
#include <asm/vm86.h> /* struct vm86 */
|
|
|
|
+#include <asm/mmu_context.h> /* vma_pkey() */
|
|
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
#define CREATE_TRACE_POINTS
|
|
#include <asm/trace/exceptions.h>
|
|
#include <asm/trace/exceptions.h>
|
|
@@ -33,6 +35,7 @@
|
|
* bit 2 == 0: kernel-mode access 1: user-mode access
|
|
* bit 2 == 0: kernel-mode access 1: user-mode access
|
|
* bit 3 == 1: use of reserved bit detected
|
|
* bit 3 == 1: use of reserved bit detected
|
|
* bit 4 == 1: fault was an instruction fetch
|
|
* bit 4 == 1: fault was an instruction fetch
|
|
|
|
+ * bit 5 == 1: protection keys block access
|
|
*/
|
|
*/
|
|
enum x86_pf_error_code {
|
|
enum x86_pf_error_code {
|
|
|
|
|
|
@@ -41,6 +44,7 @@ enum x86_pf_error_code {
|
|
PF_USER = 1 << 2,
|
|
PF_USER = 1 << 2,
|
|
PF_RSVD = 1 << 3,
|
|
PF_RSVD = 1 << 3,
|
|
PF_INSTR = 1 << 4,
|
|
PF_INSTR = 1 << 4,
|
|
|
|
+ PF_PK = 1 << 5,
|
|
};
|
|
};
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -167,9 +171,60 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
|
return prefetch;
|
|
return prefetch;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * A protection key fault means that the PKRU value did not allow
|
|
|
|
+ * access to some PTE. Userspace can figure out what PKRU was
|
|
|
|
+ * from the XSAVE state, and this function fills out a field in
|
|
|
|
+ * siginfo so userspace can discover which protection key was set
|
|
|
|
+ * on the PTE.
|
|
|
|
+ *
|
|
|
|
+ * If we get here, we know that the hardware signaled a PF_PK
|
|
|
|
+ * fault and that there was a VMA once we got in the fault
|
|
|
|
+ * handler. It does *not* guarantee that the VMA we find here
|
|
|
|
+ * was the one that we faulted on.
|
|
|
|
+ *
|
|
|
|
+ * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4);
|
|
|
|
+ * 2. T1 : set PKRU to deny access to pkey=4, touches page
|
|
|
|
+ * 3. T1 : faults...
|
|
|
|
+ * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
|
|
|
|
+ * 5. T1 : enters fault handler, takes mmap_sem, etc...
|
|
|
|
+ * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
|
|
|
|
+ * faulted on a pte with its pkey=4.
|
|
|
|
+ */
|
|
|
|
+static void fill_sig_info_pkey(int si_code, siginfo_t *info,
|
|
|
|
+ struct vm_area_struct *vma)
|
|
|
|
+{
|
|
|
|
+ /* This is effectively an #ifdef */
|
|
|
|
+ if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ /* Fault not from Protection Keys: nothing to do */
|
|
|
|
+ if (si_code != SEGV_PKUERR)
|
|
|
|
+ return;
|
|
|
|
+ /*
|
|
|
|
+ * force_sig_info_fault() is called from a number of
|
|
|
|
+ * contexts, some of which have a VMA and some of which
|
|
|
|
+ * do not. The PF_PK handing happens after we have a
|
|
|
|
+ * valid VMA, so we should never reach this without a
|
|
|
|
+ * valid VMA.
|
|
|
|
+ */
|
|
|
|
+ if (!vma) {
|
|
|
|
+ WARN_ONCE(1, "PKU fault with no VMA passed in");
|
|
|
|
+ info->si_pkey = 0;
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+ /*
|
|
|
|
+ * si_pkey should be thought of as a strong hint, but not
|
|
|
|
+ * absolutely guranteed to be 100% accurate because of
|
|
|
|
+ * the race explained above.
|
|
|
|
+ */
|
|
|
|
+ info->si_pkey = vma_pkey(vma);
|
|
|
|
+}
|
|
|
|
+
|
|
static void
|
|
static void
|
|
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
|
|
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
|
|
- struct task_struct *tsk, int fault)
|
|
|
|
|
|
+ struct task_struct *tsk, struct vm_area_struct *vma,
|
|
|
|
+ int fault)
|
|
{
|
|
{
|
|
unsigned lsb = 0;
|
|
unsigned lsb = 0;
|
|
siginfo_t info;
|
|
siginfo_t info;
|
|
@@ -184,6 +239,8 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
|
|
lsb = PAGE_SHIFT;
|
|
lsb = PAGE_SHIFT;
|
|
info.si_addr_lsb = lsb;
|
|
info.si_addr_lsb = lsb;
|
|
|
|
|
|
|
|
+ fill_sig_info_pkey(si_code, &info, vma);
|
|
|
|
+
|
|
force_sig_info(si_signo, &info, tsk);
|
|
force_sig_info(si_signo, &info, tsk);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -661,6 +718,8 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
|
struct task_struct *tsk = current;
|
|
struct task_struct *tsk = current;
|
|
unsigned long flags;
|
|
unsigned long flags;
|
|
int sig;
|
|
int sig;
|
|
|
|
+ /* No context means no VMA to pass down */
|
|
|
|
+ struct vm_area_struct *vma = NULL;
|
|
|
|
|
|
/* Are we prepared to handle this kernel fault? */
|
|
/* Are we prepared to handle this kernel fault? */
|
|
if (fixup_exception(regs, X86_TRAP_PF)) {
|
|
if (fixup_exception(regs, X86_TRAP_PF)) {
|
|
@@ -684,7 +743,8 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
|
tsk->thread.cr2 = address;
|
|
tsk->thread.cr2 = address;
|
|
|
|
|
|
/* XXX: hwpoison faults will set the wrong code. */
|
|
/* XXX: hwpoison faults will set the wrong code. */
|
|
- force_sig_info_fault(signal, si_code, address, tsk, 0);
|
|
|
|
|
|
+ force_sig_info_fault(signal, si_code, address,
|
|
|
|
+ tsk, vma, 0);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -761,7 +821,8 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
|
|
|
|
|
|
static void
|
|
static void
|
|
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
|
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
|
- unsigned long address, int si_code)
|
|
|
|
|
|
+ unsigned long address, struct vm_area_struct *vma,
|
|
|
|
+ int si_code)
|
|
{
|
|
{
|
|
struct task_struct *tsk = current;
|
|
struct task_struct *tsk = current;
|
|
|
|
|
|
@@ -804,7 +865,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
|
tsk->thread.error_code = error_code;
|
|
tsk->thread.error_code = error_code;
|
|
tsk->thread.trap_nr = X86_TRAP_PF;
|
|
tsk->thread.trap_nr = X86_TRAP_PF;
|
|
|
|
|
|
- force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
|
|
|
|
|
|
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0);
|
|
|
|
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
@@ -817,14 +878,14 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
|
|
|
|
|
static noinline void
|
|
static noinline void
|
|
bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
|
bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
|
- unsigned long address)
|
|
|
|
|
|
+ unsigned long address, struct vm_area_struct *vma)
|
|
{
|
|
{
|
|
- __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
|
|
|
|
|
|
+ __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
static void
|
|
__bad_area(struct pt_regs *regs, unsigned long error_code,
|
|
__bad_area(struct pt_regs *regs, unsigned long error_code,
|
|
- unsigned long address, int si_code)
|
|
|
|
|
|
+ unsigned long address, struct vm_area_struct *vma, int si_code)
|
|
{
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
struct mm_struct *mm = current->mm;
|
|
|
|
|
|
@@ -834,25 +895,50 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
|
|
*/
|
|
*/
|
|
up_read(&mm->mmap_sem);
|
|
up_read(&mm->mmap_sem);
|
|
|
|
|
|
- __bad_area_nosemaphore(regs, error_code, address, si_code);
|
|
|
|
|
|
+ __bad_area_nosemaphore(regs, error_code, address, vma, si_code);
|
|
}
|
|
}
|
|
|
|
|
|
static noinline void
|
|
static noinline void
|
|
bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
|
|
bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
|
|
{
|
|
{
|
|
- __bad_area(regs, error_code, address, SEGV_MAPERR);
|
|
|
|
|
|
+ __bad_area(regs, error_code, address, NULL, SEGV_MAPERR);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline bool bad_area_access_from_pkeys(unsigned long error_code,
|
|
|
|
+ struct vm_area_struct *vma)
|
|
|
|
+{
|
|
|
|
+ /* This code is always called on the current mm */
|
|
|
|
+ bool foreign = false;
|
|
|
|
+
|
|
|
|
+ if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
|
|
|
+ return false;
|
|
|
|
+ if (error_code & PF_PK)
|
|
|
|
+ return true;
|
|
|
|
+ /* this checks permission keys on the VMA: */
|
|
|
|
+ if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
|
|
|
|
+ (error_code & PF_INSTR), foreign))
|
|
|
|
+ return true;
|
|
|
|
+ return false;
|
|
}
|
|
}
|
|
|
|
|
|
static noinline void
|
|
static noinline void
|
|
bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
|
|
bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
|
|
- unsigned long address)
|
|
|
|
|
|
+ unsigned long address, struct vm_area_struct *vma)
|
|
{
|
|
{
|
|
- __bad_area(regs, error_code, address, SEGV_ACCERR);
|
|
|
|
|
|
+ /*
|
|
|
|
+ * This OSPKE check is not strictly necessary at runtime.
|
|
|
|
+ * But, doing it this way allows compiler optimizations
|
|
|
|
+ * if pkeys are compiled out.
|
|
|
|
+ */
|
|
|
|
+ if (bad_area_access_from_pkeys(error_code, vma))
|
|
|
|
+ __bad_area(regs, error_code, address, vma, SEGV_PKUERR);
|
|
|
|
+ else
|
|
|
|
+ __bad_area(regs, error_code, address, vma, SEGV_ACCERR);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
static void
|
|
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
|
|
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
|
|
- unsigned int fault)
|
|
|
|
|
|
+ struct vm_area_struct *vma, unsigned int fault)
|
|
{
|
|
{
|
|
struct task_struct *tsk = current;
|
|
struct task_struct *tsk = current;
|
|
int code = BUS_ADRERR;
|
|
int code = BUS_ADRERR;
|
|
@@ -879,12 +965,13 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
|
|
code = BUS_MCEERR_AR;
|
|
code = BUS_MCEERR_AR;
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
- force_sig_info_fault(SIGBUS, code, address, tsk, fault);
|
|
|
|
|
|
+ force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault);
|
|
}
|
|
}
|
|
|
|
|
|
static noinline void
|
|
static noinline void
|
|
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
|
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
|
- unsigned long address, unsigned int fault)
|
|
|
|
|
|
+ unsigned long address, struct vm_area_struct *vma,
|
|
|
|
+ unsigned int fault)
|
|
{
|
|
{
|
|
if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
|
|
if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
|
|
no_context(regs, error_code, address, 0, 0);
|
|
no_context(regs, error_code, address, 0, 0);
|
|
@@ -908,9 +995,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
|
} else {
|
|
} else {
|
|
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
|
|
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
|
|
VM_FAULT_HWPOISON_LARGE))
|
|
VM_FAULT_HWPOISON_LARGE))
|
|
- do_sigbus(regs, error_code, address, fault);
|
|
|
|
|
|
+ do_sigbus(regs, error_code, address, vma, fault);
|
|
else if (fault & VM_FAULT_SIGSEGV)
|
|
else if (fault & VM_FAULT_SIGSEGV)
|
|
- bad_area_nosemaphore(regs, error_code, address);
|
|
|
|
|
|
+ bad_area_nosemaphore(regs, error_code, address, vma);
|
|
else
|
|
else
|
|
BUG();
|
|
BUG();
|
|
}
|
|
}
|
|
@@ -923,6 +1010,12 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
|
|
|
|
|
|
if ((error_code & PF_INSTR) && !pte_exec(*pte))
|
|
if ((error_code & PF_INSTR) && !pte_exec(*pte))
|
|
return 0;
|
|
return 0;
|
|
|
|
+ /*
|
|
|
|
+ * Note: We do not do lazy flushing on protection key
|
|
|
|
+ * changes, so no spurious fault will ever set PF_PK.
|
|
|
|
+ */
|
|
|
|
+ if ((error_code & PF_PK))
|
|
|
|
+ return 1;
|
|
|
|
|
|
return 1;
|
|
return 1;
|
|
}
|
|
}
|
|
@@ -1012,6 +1105,17 @@ int show_unhandled_signals = 1;
|
|
static inline int
|
|
static inline int
|
|
access_error(unsigned long error_code, struct vm_area_struct *vma)
|
|
access_error(unsigned long error_code, struct vm_area_struct *vma)
|
|
{
|
|
{
|
|
|
|
+ /* This is only called for the current mm, so: */
|
|
|
|
+ bool foreign = false;
|
|
|
|
+ /*
|
|
|
|
+ * Make sure to check the VMA so that we do not perform
|
|
|
|
+ * faults just to hit a PF_PK as soon as we fill in a
|
|
|
|
+ * page.
|
|
|
|
+ */
|
|
|
|
+ if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
|
|
|
|
+ (error_code & PF_INSTR), foreign))
|
|
|
|
+ return 1;
|
|
|
|
+
|
|
if (error_code & PF_WRITE) {
|
|
if (error_code & PF_WRITE) {
|
|
/* write, present and write, not present: */
|
|
/* write, present and write, not present: */
|
|
if (unlikely(!(vma->vm_flags & VM_WRITE)))
|
|
if (unlikely(!(vma->vm_flags & VM_WRITE)))
|
|
@@ -1118,7 +1222,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
|
* Don't take the mm semaphore here. If we fixup a prefetch
|
|
* Don't take the mm semaphore here. If we fixup a prefetch
|
|
* fault we could otherwise deadlock:
|
|
* fault we could otherwise deadlock:
|
|
*/
|
|
*/
|
|
- bad_area_nosemaphore(regs, error_code, address);
|
|
|
|
|
|
+ bad_area_nosemaphore(regs, error_code, address, NULL);
|
|
|
|
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
@@ -1131,7 +1235,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
|
pgtable_bad(regs, error_code, address);
|
|
pgtable_bad(regs, error_code, address);
|
|
|
|
|
|
if (unlikely(smap_violation(error_code, regs))) {
|
|
if (unlikely(smap_violation(error_code, regs))) {
|
|
- bad_area_nosemaphore(regs, error_code, address);
|
|
|
|
|
|
+ bad_area_nosemaphore(regs, error_code, address, NULL);
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1140,7 +1244,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
|
* in a region with pagefaults disabled then we must not take the fault
|
|
* in a region with pagefaults disabled then we must not take the fault
|
|
*/
|
|
*/
|
|
if (unlikely(faulthandler_disabled() || !mm)) {
|
|
if (unlikely(faulthandler_disabled() || !mm)) {
|
|
- bad_area_nosemaphore(regs, error_code, address);
|
|
|
|
|
|
+ bad_area_nosemaphore(regs, error_code, address, NULL);
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1164,6 +1268,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
|
|
|
|
|
if (error_code & PF_WRITE)
|
|
if (error_code & PF_WRITE)
|
|
flags |= FAULT_FLAG_WRITE;
|
|
flags |= FAULT_FLAG_WRITE;
|
|
|
|
+ if (error_code & PF_INSTR)
|
|
|
|
+ flags |= FAULT_FLAG_INSTRUCTION;
|
|
|
|
|
|
/*
|
|
/*
|
|
* When running in the kernel we expect faults to occur only to
|
|
* When running in the kernel we expect faults to occur only to
|
|
@@ -1184,7 +1290,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
|
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
|
|
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
|
|
if ((error_code & PF_USER) == 0 &&
|
|
if ((error_code & PF_USER) == 0 &&
|
|
!search_exception_tables(regs->ip)) {
|
|
!search_exception_tables(regs->ip)) {
|
|
- bad_area_nosemaphore(regs, error_code, address);
|
|
|
|
|
|
+ bad_area_nosemaphore(regs, error_code, address, NULL);
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
retry:
|
|
retry:
|
|
@@ -1232,7 +1338,7 @@ retry:
|
|
*/
|
|
*/
|
|
good_area:
|
|
good_area:
|
|
if (unlikely(access_error(error_code, vma))) {
|
|
if (unlikely(access_error(error_code, vma))) {
|
|
- bad_area_access_error(regs, error_code, address);
|
|
|
|
|
|
+ bad_area_access_error(regs, error_code, address, vma);
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1270,7 +1376,7 @@ good_area:
|
|
|
|
|
|
up_read(&mm->mmap_sem);
|
|
up_read(&mm->mmap_sem);
|
|
if (unlikely(fault & VM_FAULT_ERROR)) {
|
|
if (unlikely(fault & VM_FAULT_ERROR)) {
|
|
- mm_fault_error(regs, error_code, address, fault);
|
|
|
|
|
|
+ mm_fault_error(regs, error_code, address, vma, fault);
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
|
|
|