Przeglądaj źródła

Merge tag 'lkdtm-next' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux into char-misc-testing

Kees writes:

Become maintainer, add hardening tests for use-after-free and atomic wrapping.
Greg Kroah-Hartman 9 lat temu
rodzic
commit
d74e026ae5

+ 4 - 0
Documentation/watchdog/watchdog-parameters.txt

@@ -400,3 +400,7 @@ wm8350_wdt:
 nowayout: Watchdog cannot be stopped once started
 	(default=kernel config parameter)
 -------------------------------------------------
+sun4v_wdt:
+timeout_ms: Watchdog timeout in milliseconds 1..180000, default=60000)
+nowayout: Watchdog cannot be stopped once started
+-------------------------------------------------

+ 5 - 0
MAINTAINERS

@@ -6581,6 +6581,11 @@ F:	samples/livepatch/
 L:	live-patching@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/livepatching.git
 
+LINUX KERNEL DUMP TEST MODULE (LKDTM)
+M:	Kees Cook <keescook@chromium.org>
+S:	Maintained
+F:	drivers/misc/lkdtm.c
+
 LLC (802.2)
 M:	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
 S:	Maintained

+ 6 - 0
arch/sparc/Makefile

@@ -24,7 +24,13 @@ LDFLAGS        := -m elf32_sparc
 export BITS    := 32
 UTS_MACHINE    := sparc
 
+# We are adding -Wa,-Av8 to KBUILD_CFLAGS to deal with a specs bug in some
+# versions of gcc.  Some gcc versions won't pass -Av8 to binutils when you
+# give -mcpu=v8.  This silently worked with older bintutils versions but
+# does not any more.
 KBUILD_CFLAGS  += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
+KBUILD_CFLAGS  += -Wa,-Av8
+
 KBUILD_AFLAGS  += -m32 -Wa,-Av8
 
 else

+ 2 - 1
arch/sparc/include/uapi/asm/unistd.h

@@ -422,8 +422,9 @@
 #define __NR_listen		354
 #define __NR_setsockopt		355
 #define __NR_mlock2		356
+#define __NR_copy_file_range	357
 
-#define NR_syscalls		357
+#define NR_syscalls		358
 
 /* Bitmask values returned from kern_features system call.  */
 #define KERN_FEATURE_MIXED_MODE_STACK	0x00000001

+ 17 - 0
arch/sparc/kernel/entry.S

@@ -948,7 +948,24 @@ linux_syscall_trace:
 	cmp	%o0, 0
 	bne	3f
 	 mov	-ENOSYS, %o0
+
+	/* Syscall tracing can modify the registers.  */
+	ld	[%sp + STACKFRAME_SZ + PT_G1], %g1
+	sethi	%hi(sys_call_table), %l7
+	ld	[%sp + STACKFRAME_SZ + PT_I0], %i0
+	or	%l7, %lo(sys_call_table), %l7
+	ld	[%sp + STACKFRAME_SZ + PT_I1], %i1
+	ld	[%sp + STACKFRAME_SZ + PT_I2], %i2
+	ld	[%sp + STACKFRAME_SZ + PT_I3], %i3
+	ld	[%sp + STACKFRAME_SZ + PT_I4], %i4
+	ld	[%sp + STACKFRAME_SZ + PT_I5], %i5
+	cmp	%g1, NR_syscalls
+	bgeu	3f
+	 mov	-ENOSYS, %o0
+
+	sll	%g1, 2, %l4
 	mov	%i0, %o0
+	ld	[%l7 + %l4], %l7
 	mov	%i1, %o1
 	mov	%i2, %o2
 	mov	%i3, %o3

+ 2 - 1
arch/sparc/kernel/hvcalls.S

@@ -338,8 +338,9 @@ ENTRY(sun4v_mach_set_watchdog)
 	mov	%o1, %o4
 	mov	HV_FAST_MACH_SET_WATCHDOG, %o5
 	ta	HV_FAST_TRAP
+	brnz,a,pn %o4, 0f
 	stx	%o1, [%o4]
-	retl
+0:	retl
 	 nop
 ENDPROC(sun4v_mach_set_watchdog)
 

+ 1 - 1
arch/sparc/kernel/signal_64.c

@@ -52,7 +52,7 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs)
 	unsigned char fenab;
 	int err;
 
-	flush_user_windows();
+	synchronize_user_stack();
 	if (get_thread_wsaved()					||
 	    (((unsigned long)ucp) & (sizeof(unsigned long)-1))	||
 	    (!__access_ok(ucp, sizeof(*ucp))))

+ 1 - 0
arch/sparc/kernel/sparc_ksyms_64.c

@@ -37,6 +37,7 @@ EXPORT_SYMBOL(sun4v_niagara_getperf);
 EXPORT_SYMBOL(sun4v_niagara_setperf);
 EXPORT_SYMBOL(sun4v_niagara2_getperf);
 EXPORT_SYMBOL(sun4v_niagara2_setperf);
+EXPORT_SYMBOL(sun4v_mach_set_watchdog);
 
 /* from hweight.S */
 EXPORT_SYMBOL(__arch_hweight8);

+ 36 - 0
arch/sparc/kernel/syscalls.S

@@ -158,7 +158,25 @@ linux_syscall_trace32:
 	 add	%sp, PTREGS_OFF, %o0
 	brnz,pn	%o0, 3f
 	 mov	-ENOSYS, %o0
+
+	/* Syscall tracing can modify the registers.  */
+	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %g1
+	sethi	%hi(sys_call_table32), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %i0
+	or	%l7, %lo(sys_call_table32), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I1], %i1
+	ldx	[%sp + PTREGS_OFF + PT_V9_I2], %i2
+	ldx	[%sp + PTREGS_OFF + PT_V9_I3], %i3
+	ldx	[%sp + PTREGS_OFF + PT_V9_I4], %i4
+	ldx	[%sp + PTREGS_OFF + PT_V9_I5], %i5
+
+	cmp	%g1, NR_syscalls
+	bgeu,pn	%xcc, 3f
+	 mov	-ENOSYS, %o0
+
+	sll	%g1, 2, %l4
 	srl	%i0, 0, %o0
+	lduw	[%l7 + %l4], %l7
 	srl	%i4, 0, %o4
 	srl	%i1, 0, %o1
 	srl	%i2, 0, %o2
@@ -170,7 +188,25 @@ linux_syscall_trace:
 	 add	%sp, PTREGS_OFF, %o0
 	brnz,pn	%o0, 3f
 	 mov	-ENOSYS, %o0
+
+	/* Syscall tracing can modify the registers.  */
+	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %g1
+	sethi	%hi(sys_call_table64), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %i0
+	or	%l7, %lo(sys_call_table64), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I1], %i1
+	ldx	[%sp + PTREGS_OFF + PT_V9_I2], %i2
+	ldx	[%sp + PTREGS_OFF + PT_V9_I3], %i3
+	ldx	[%sp + PTREGS_OFF + PT_V9_I4], %i4
+	ldx	[%sp + PTREGS_OFF + PT_V9_I5], %i5
+
+	cmp	%g1, NR_syscalls
+	bgeu,pn	%xcc, 3f
+	 mov	-ENOSYS, %o0
+
+	sll	%g1, 2, %l4
 	mov	%i0, %o0
+	lduw	[%l7 + %l4], %l7
 	mov	%i1, %o1
 	mov	%i2, %o2
 	mov	%i3, %o3

+ 1 - 1
arch/sparc/kernel/systbls_32.S

@@ -88,4 +88,4 @@ sys_call_table:
 /*340*/	.long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
 /*345*/	.long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
-/*355*/	.long sys_setsockopt, sys_mlock2
+/*355*/	.long sys_setsockopt, sys_mlock2, sys_copy_file_range

+ 2 - 2
arch/sparc/kernel/systbls_64.S

@@ -89,7 +89,7 @@ sys_call_table32:
 /*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
 	.word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
-	.word compat_sys_setsockopt, sys_mlock2
+	.word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range
 
 #endif /* CONFIG_COMPAT */
 
@@ -170,4 +170,4 @@ sys_call_table:
 /*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
 	.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
-	.word sys_setsockopt, sys_mlock2
+	.word sys_setsockopt, sys_mlock2, sys_copy_file_range

+ 119 - 3
drivers/misc/lkdtm.c

@@ -92,6 +92,9 @@ enum ctype {
 	CT_UNALIGNED_LOAD_STORE_WRITE,
 	CT_OVERWRITE_ALLOCATION,
 	CT_WRITE_AFTER_FREE,
+	CT_READ_AFTER_FREE,
+	CT_WRITE_BUDDY_AFTER_FREE,
+	CT_READ_BUDDY_AFTER_FREE,
 	CT_SOFTLOCKUP,
 	CT_HARDLOCKUP,
 	CT_SPINLOCKUP,
@@ -104,6 +107,7 @@ enum ctype {
 	CT_ACCESS_USERSPACE,
 	CT_WRITE_RO,
 	CT_WRITE_KERN,
+	CT_WRAP_ATOMIC
 };
 
 static char* cp_name[] = {
@@ -129,6 +133,9 @@ static char* cp_type[] = {
 	"UNALIGNED_LOAD_STORE_WRITE",
 	"OVERWRITE_ALLOCATION",
 	"WRITE_AFTER_FREE",
+	"READ_AFTER_FREE",
+	"WRITE_BUDDY_AFTER_FREE",
+	"READ_BUDDY_AFTER_FREE",
 	"SOFTLOCKUP",
 	"HARDLOCKUP",
 	"SPINLOCKUP",
@@ -141,6 +148,7 @@ static char* cp_type[] = {
 	"ACCESS_USERSPACE",
 	"WRITE_RO",
 	"WRITE_KERN",
+	"WRAP_ATOMIC"
 };
 
 static struct jprobe lkdtm;
@@ -409,12 +417,109 @@ static void lkdtm_do_action(enum ctype which)
 		break;
 	}
 	case CT_WRITE_AFTER_FREE: {
+		int *base, *again;
 		size_t len = 1024;
-		u32 *data = kmalloc(len, GFP_KERNEL);
+		/*
+		 * The slub allocator uses the first word to store the free
+		 * pointer in some configurations. Use the middle of the
+		 * allocation to avoid running into the freelist
+		 */
+		size_t offset = (len / sizeof(*base)) / 2;
+
+		base = kmalloc(len, GFP_KERNEL);
+		pr_info("Allocated memory %p-%p\n", base, &base[offset * 2]);
+		pr_info("Attempting bad write to freed memory at %p\n",
+			&base[offset]);
+		kfree(base);
+		base[offset] = 0x0abcdef0;
+		/* Attempt to notice the overwrite. */
+		again = kmalloc(len, GFP_KERNEL);
+		kfree(again);
+		if (again != base)
+			pr_info("Hmm, didn't get the same memory range.\n");
 
-		kfree(data);
+		break;
+	}
+	case CT_READ_AFTER_FREE: {
+		int *base, *val, saw;
+		size_t len = 1024;
+		/*
+		 * The slub allocator uses the first word to store the free
+		 * pointer in some configurations. Use the middle of the
+		 * allocation to avoid running into the freelist
+		 */
+		size_t offset = (len / sizeof(*base)) / 2;
+
+		base = kmalloc(len, GFP_KERNEL);
+		if (!base)
+			break;
+
+		val = kmalloc(len, GFP_KERNEL);
+		if (!val)
+			break;
+
+		*val = 0x12345678;
+		base[offset] = *val;
+		pr_info("Value in memory before free: %x\n", base[offset]);
+
+		kfree(base);
+
+		pr_info("Attempting bad read from freed memory\n");
+		saw = base[offset];
+		if (saw != *val) {
+			/* Good! Poisoning happened, so declare a win. */
+			pr_info("Memory correctly poisoned (%x)\n", saw);
+			BUG();
+		}
+		pr_info("Memory was not poisoned\n");
+
+		kfree(val);
+		break;
+	}
+	case CT_WRITE_BUDDY_AFTER_FREE: {
+		unsigned long p = __get_free_page(GFP_KERNEL);
+		if (!p)
+			break;
+		pr_info("Writing to the buddy page before free\n");
+		memset((void *)p, 0x3, PAGE_SIZE);
+		free_page(p);
+		schedule();
+		pr_info("Attempting bad write to the buddy page after free\n");
+		memset((void *)p, 0x78, PAGE_SIZE);
+		/* Attempt to notice the overwrite. */
+		p = __get_free_page(GFP_KERNEL);
+		free_page(p);
 		schedule();
-		memset(data, 0x78, len);
+
+		break;
+	}
+	case CT_READ_BUDDY_AFTER_FREE: {
+		unsigned long p = __get_free_page(GFP_KERNEL);
+		int saw, *val = kmalloc(1024, GFP_KERNEL);
+		int *base;
+
+		if (!p)
+			break;
+
+		if (!val)
+			break;
+
+		base = (int *)p;
+
+		*val = 0x12345678;
+		base[0] = *val;
+		pr_info("Value in memory before free: %x\n", base[0]);
+		free_page(p);
+		pr_info("Attempting to read from freed memory\n");
+		saw = base[0];
+		if (saw != *val) {
+			/* Good! Poisoning happened, so declare a win. */
+			pr_info("Memory correctly poisoned (%x)\n", saw);
+			BUG();
+		}
+		pr_info("Buddy page was not poisoned\n");
+
+		kfree(val);
 		break;
 	}
 	case CT_SOFTLOCKUP:
@@ -528,6 +633,17 @@ static void lkdtm_do_action(enum ctype which)
 		do_overwritten();
 		break;
 	}
+	case CT_WRAP_ATOMIC: {
+		atomic_t under = ATOMIC_INIT(INT_MIN);
+		atomic_t over = ATOMIC_INIT(INT_MAX);
+
+		pr_info("attempting atomic underflow\n");
+		atomic_dec(&under);
+		pr_info("attempting atomic overflow\n");
+		atomic_inc(&over);
+
+		return;
+	}
 	case CT_NONE:
 	default:
 		break;

+ 11 - 0
drivers/watchdog/Kconfig

@@ -1584,6 +1584,17 @@ config WATCHDOG_RIO
 	  machines.  The watchdog timeout period is normally one minute but
 	  can be changed with a boot-time parameter.
 
+config WATCHDOG_SUN4V
+	tristate "Sun4v Watchdog support"
+	select WATCHDOG_CORE
+	depends on SPARC64
+	help
+	  Say Y here to support the hypervisor watchdog capability embedded
+	  in the SPARC sun4v architecture.
+
+	  To compile this driver as a module, choose M here. The module will
+	  be called sun4v_wdt.
+
 # XTENSA Architecture
 
 # Xen Architecture

+ 1 - 0
drivers/watchdog/Makefile

@@ -180,6 +180,7 @@ obj-$(CONFIG_SH_WDT) += shwdt.o
 
 obj-$(CONFIG_WATCHDOG_RIO)		+= riowd.o
 obj-$(CONFIG_WATCHDOG_CP1XXX)		+= cpwd.o
+obj-$(CONFIG_WATCHDOG_SUN4V)		+= sun4v_wdt.o
 
 # XTENSA Architecture
 

+ 191 - 0
drivers/watchdog/sun4v_wdt.c

@@ -0,0 +1,191 @@
+/*
+ *	sun4v watchdog timer
+ *	(c) Copyright 2016 Oracle Corporation
+ *
+ *	Implement a simple watchdog driver using the built-in sun4v hypervisor
+ *	watchdog support. If time expires, the hypervisor stops or bounces
+ *	the guest domain.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/watchdog.h>
+#include <asm/hypervisor.h>
+#include <asm/mdesc.h>
+
+#define WDT_TIMEOUT			60
+#define WDT_MAX_TIMEOUT			31536000
+#define WDT_MIN_TIMEOUT			1
+#define WDT_DEFAULT_RESOLUTION_MS	1000	/* 1 second */
+
+static unsigned int timeout;
+module_param(timeout, uint, 0);
+MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default="
+	__MODULE_STRING(WDT_TIMEOUT) ")");
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, S_IRUGO);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+	__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static int sun4v_wdt_stop(struct watchdog_device *wdd)
+{
+	sun4v_mach_set_watchdog(0, NULL);
+
+	return 0;
+}
+
+static int sun4v_wdt_ping(struct watchdog_device *wdd)
+{
+	int hverr;
+
+	/*
+	 * HV watchdog timer will round up the timeout
+	 * passed in to the nearest multiple of the
+	 * watchdog resolution in milliseconds.
+	 */
+	hverr = sun4v_mach_set_watchdog(wdd->timeout * 1000, NULL);
+	if (hverr == HV_EINVAL)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int sun4v_wdt_set_timeout(struct watchdog_device *wdd,
+				 unsigned int timeout)
+{
+	wdd->timeout = timeout;
+
+	return 0;
+}
+
+static const struct watchdog_info sun4v_wdt_ident = {
+	.options =	WDIOF_SETTIMEOUT |
+			WDIOF_MAGICCLOSE |
+			WDIOF_KEEPALIVEPING,
+	.identity =	"sun4v hypervisor watchdog",
+	.firmware_version = 0,
+};
+
+static struct watchdog_ops sun4v_wdt_ops = {
+	.owner =	THIS_MODULE,
+	.start =	sun4v_wdt_ping,
+	.stop =		sun4v_wdt_stop,
+	.ping =		sun4v_wdt_ping,
+	.set_timeout =	sun4v_wdt_set_timeout,
+};
+
+static struct watchdog_device wdd = {
+	.info = &sun4v_wdt_ident,
+	.ops = &sun4v_wdt_ops,
+	.min_timeout = WDT_MIN_TIMEOUT,
+	.max_timeout = WDT_MAX_TIMEOUT,
+	.timeout = WDT_TIMEOUT,
+};
+
+static int __init sun4v_wdt_init(void)
+{
+	struct mdesc_handle *handle;
+	u64 node;
+	const u64 *value;
+	int err = 0;
+	unsigned long major = 1, minor = 1;
+
+	/*
+	 * There are 2 properties that can be set from the control
+	 * domain for the watchdog.
+	 * watchdog-resolution
+	 * watchdog-max-timeout
+	 *
+	 * We can expect a handle to be returned otherwise something
+	 * serious is wrong. Correct to return -ENODEV here.
+	 */
+
+	handle = mdesc_grab();
+	if (!handle)
+		return -ENODEV;
+
+	node = mdesc_node_by_name(handle, MDESC_NODE_NULL, "platform");
+	err = -ENODEV;
+	if (node == MDESC_NODE_NULL)
+		goto out_release;
+
+	/*
+	 * This is a safe way to validate if we are on the right
+	 * platform.
+	 */
+	if (sun4v_hvapi_register(HV_GRP_CORE, major, &minor))
+		goto out_hv_unreg;
+
+	/* Allow value of watchdog-resolution up to 1s (default) */
+	value = mdesc_get_property(handle, node, "watchdog-resolution", NULL);
+	err = -EINVAL;
+	if (value) {
+		if (*value == 0 ||
+		    *value > WDT_DEFAULT_RESOLUTION_MS)
+			goto out_hv_unreg;
+	}
+
+	value = mdesc_get_property(handle, node, "watchdog-max-timeout", NULL);
+	if (value) {
+		/*
+		 * If the property value (in ms) is smaller than
+		 * min_timeout, return -EINVAL.
+		 */
+		if (*value < wdd.min_timeout * 1000)
+			goto out_hv_unreg;
+
+		/*
+		 * If the property value is smaller than
+		 * default max_timeout  then set watchdog max_timeout to
+		 * the value of the property in seconds.
+		 */
+		if (*value < wdd.max_timeout * 1000)
+			wdd.max_timeout = *value  / 1000;
+	}
+
+	watchdog_init_timeout(&wdd, timeout, NULL);
+
+	watchdog_set_nowayout(&wdd, nowayout);
+
+	err = watchdog_register_device(&wdd);
+	if (err)
+		goto out_hv_unreg;
+
+	pr_info("initialized (timeout=%ds, nowayout=%d)\n",
+		 wdd.timeout, nowayout);
+
+	mdesc_release(handle);
+
+	return 0;
+
+out_hv_unreg:
+	sun4v_hvapi_unregister(HV_GRP_CORE);
+
+out_release:
+	mdesc_release(handle);
+	return err;
+}
+
+static void __exit sun4v_wdt_exit(void)
+{
+	sun4v_hvapi_unregister(HV_GRP_CORE);
+	watchdog_unregister_device(&wdd);
+}
+
+module_init(sun4v_wdt_init);
+module_exit(sun4v_wdt_exit);
+
+MODULE_AUTHOR("Wim Coekaerts <wim.coekaerts@oracle.com>");
+MODULE_DESCRIPTION("sun4v watchdog driver");
+MODULE_LICENSE("GPL");