Selaa lähdekoodia

Merge branch 'locking/urgent' into locking/core, to pick up dependent fixes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 8 vuotta sitten
vanhempi
commit
1b95b1a06c

+ 2 - 2
arch/tile/kernel/time.c

@@ -218,8 +218,8 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num)
  */
  */
 unsigned long long sched_clock(void)
 unsigned long long sched_clock(void)
 {
 {
-	return clocksource_cyc2ns(get_cycles(),
-				  sched_clock_mult, SCHED_CLOCK_SHIFT);
+	return mult_frac(get_cycles(),
+			 sched_clock_mult, 1ULL << SCHED_CLOCK_SHIFT);
 }
 }
 
 
 int setup_profiling_timer(unsigned int multiplier)
 int setup_profiling_timer(unsigned int multiplier)

+ 2 - 3
arch/x86/boot/compressed/Makefile

@@ -40,8 +40,8 @@ GCOV_PROFILE := n
 UBSAN_SANITIZE :=n
 UBSAN_SANITIZE :=n
 
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
 LDFLAGS := -m elf_$(UTS_MACHINE)
-ifeq ($(CONFIG_RELOCATABLE),y)
-# If kernel is relocatable, build compressed kernel as PIE.
+# Compressed kernel should be built as PIE since it may be loaded at any
+# address by the bootloader.
 ifeq ($(CONFIG_X86_32),y)
 ifeq ($(CONFIG_X86_32),y)
 LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
 LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
 else
 else
@@ -51,7 +51,6 @@ else
 LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
 LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
 	&& echo "-z noreloc-overflow -pie --no-dynamic-linker")
 	&& echo "-z noreloc-overflow -pie --no-dynamic-linker")
 endif
 endif
-endif
 LDFLAGS_vmlinux := -T
 LDFLAGS_vmlinux := -T
 
 
 hostprogs-y	:= mkpiggy
 hostprogs-y	:= mkpiggy

+ 6 - 0
arch/x86/boot/cpu.c

@@ -87,6 +87,12 @@ int validate_cpu(void)
 		return -1;
 		return -1;
 	}
 	}
 
 
+	if (CONFIG_X86_MINIMUM_CPU_FAMILY <= 4 && !IS_ENABLED(CONFIG_M486) &&
+	    !has_eflag(X86_EFLAGS_ID)) {
+		printf("This kernel requires a CPU with the CPUID instruction.  Build with CONFIG_M486=y to run on this CPU.\n");
+		return -1;
+	}
+
 	if (err_flags) {
 	if (err_flags) {
 		puts("This kernel requires the following features "
 		puts("This kernel requires the following features "
 		     "not present on the CPU:\n");
 		     "not present on the CPU:\n");

+ 7 - 1
arch/x86/events/amd/core.c

@@ -662,7 +662,13 @@ static int __init amd_core_pmu_init(void)
 		pr_cont("Fam15h ");
 		pr_cont("Fam15h ");
 		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
 		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
 		break;
 		break;
-
+	case 0x17:
+		pr_cont("Fam17h ");
+		/*
+		 * In family 17h, there are no event constraints in the PMC hardware.
+		 * We fallback to using default amd_get_event_constraints.
+		 */
+		break;
 	default:
 	default:
 		pr_err("core perfctr but no constraints; unknown hardware!\n");
 		pr_err("core perfctr but no constraints; unknown hardware!\n");
 		return -ENODEV;
 		return -ENODEV;

+ 2 - 8
arch/x86/events/core.c

@@ -2352,7 +2352,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
 		frame.next_frame     = 0;
 		frame.next_frame     = 0;
 		frame.return_address = 0;
 		frame.return_address = 0;
 
 
-		if (!access_ok(VERIFY_READ, fp, 8))
+		if (!valid_user_frame(fp, sizeof(frame)))
 			break;
 			break;
 
 
 		bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
 		bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
@@ -2362,9 +2362,6 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
 		if (bytes != 0)
 		if (bytes != 0)
 			break;
 			break;
 
 
-		if (!valid_user_frame(fp, sizeof(frame)))
-			break;
-
 		perf_callchain_store(entry, cs_base + frame.return_address);
 		perf_callchain_store(entry, cs_base + frame.return_address);
 		fp = compat_ptr(ss_base + frame.next_frame);
 		fp = compat_ptr(ss_base + frame.next_frame);
 	}
 	}
@@ -2413,7 +2410,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 		frame.next_frame	     = NULL;
 		frame.next_frame	     = NULL;
 		frame.return_address = 0;
 		frame.return_address = 0;
 
 
-		if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2))
+		if (!valid_user_frame(fp, sizeof(frame)))
 			break;
 			break;
 
 
 		bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
 		bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
@@ -2423,9 +2420,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 		if (bytes != 0)
 		if (bytes != 0)
 			break;
 			break;
 
 
-		if (!valid_user_frame(fp, sizeof(frame)))
-			break;
-
 		perf_callchain_store(entry, frame.return_address);
 		perf_callchain_store(entry, frame.return_address);
 		fp = (void __user *)frame.next_frame;
 		fp = (void __user *)frame.next_frame;
 	}
 	}

+ 23 - 12
arch/x86/events/intel/ds.c

@@ -1108,20 +1108,20 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	}
 	}
 
 
 	/*
 	/*
-	 * We use the interrupt regs as a base because the PEBS record
-	 * does not contain a full regs set, specifically it seems to
-	 * lack segment descriptors, which get used by things like
-	 * user_mode().
+	 * We use the interrupt regs as a base because the PEBS record does not
+	 * contain a full regs set, specifically it seems to lack segment
+	 * descriptors, which get used by things like user_mode().
 	 *
 	 *
-	 * In the simple case fix up only the IP and BP,SP regs, for
-	 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
-	 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
+	 * In the simple case fix up only the IP for PERF_SAMPLE_IP.
+	 *
+	 * We must however always use BP,SP from iregs for the unwinder to stay
+	 * sane; the record BP,SP can point into thin air when the record is
+	 * from a previous PMI context or an (I)RET happend between the record
+	 * and PMI.
 	 */
 	 */
 	*regs = *iregs;
 	*regs = *iregs;
 	regs->flags = pebs->flags;
 	regs->flags = pebs->flags;
 	set_linear_ip(regs, pebs->ip);
 	set_linear_ip(regs, pebs->ip);
-	regs->bp = pebs->bp;
-	regs->sp = pebs->sp;
 
 
 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
 		regs->ax = pebs->ax;
 		regs->ax = pebs->ax;
@@ -1130,10 +1130,21 @@ static void setup_pebs_sample_data(struct perf_event *event,
 		regs->dx = pebs->dx;
 		regs->dx = pebs->dx;
 		regs->si = pebs->si;
 		regs->si = pebs->si;
 		regs->di = pebs->di;
 		regs->di = pebs->di;
-		regs->bp = pebs->bp;
-		regs->sp = pebs->sp;
 
 
-		regs->flags = pebs->flags;
+		/*
+		 * Per the above; only set BP,SP if we don't need callchains.
+		 *
+		 * XXX: does this make sense?
+		 */
+		if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+			regs->bp = pebs->bp;
+			regs->sp = pebs->sp;
+		}
+
+		/*
+		 * Preserve PERF_EFLAGS_VM from set_linear_ip().
+		 */
+		regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM);
 #ifndef CONFIG_X86_32
 #ifndef CONFIG_X86_32
 		regs->r8 = pebs->r8;
 		regs->r8 = pebs->r8;
 		regs->r9 = pebs->r9;
 		regs->r9 = pebs->r9;

+ 4 - 4
arch/x86/events/intel/uncore.c

@@ -319,9 +319,9 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
  */
  */
 static int uncore_pmu_event_init(struct perf_event *event);
 static int uncore_pmu_event_init(struct perf_event *event);
 
 
-static bool is_uncore_event(struct perf_event *event)
+static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
 {
 {
-	return event->pmu->event_init == uncore_pmu_event_init;
+	return &box->pmu->pmu == event->pmu;
 }
 }
 
 
 static int
 static int
@@ -340,7 +340,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
 
 
 	n = box->n_events;
 	n = box->n_events;
 
 
-	if (is_uncore_event(leader)) {
+	if (is_box_event(box, leader)) {
 		box->event_list[n] = leader;
 		box->event_list[n] = leader;
 		n++;
 		n++;
 	}
 	}
@@ -349,7 +349,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
 		return n;
 		return n;
 
 
 	list_for_each_entry(event, &leader->sibling_list, group_entry) {
 	list_for_each_entry(event, &leader->sibling_list, group_entry) {
-		if (!is_uncore_event(event) ||
+		if (!is_box_event(box, event) ||
 		    event->state <= PERF_EVENT_STATE_OFF)
 		    event->state <= PERF_EVENT_STATE_OFF)
 			continue;
 			continue;
 
 

+ 0 - 12
arch/x86/events/intel/uncore_snb.c

@@ -490,24 +490,12 @@ static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
 
 
 	snb_uncore_imc_event_start(event, 0);
 	snb_uncore_imc_event_start(event, 0);
 
 
-	box->n_events++;
-
 	return 0;
 	return 0;
 }
 }
 
 
 static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
 static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
 {
 {
-	struct intel_uncore_box *box = uncore_event_to_box(event);
-	int i;
-
 	snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
 	snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-
-	for (i = 0; i < box->n_events; i++) {
-		if (event == box->event_list[i]) {
-			--box->n_events;
-			break;
-		}
-	}
 }
 }
 
 
 int snb_pci2phy_map_init(int devid)
 int snb_pci2phy_map_init(int devid)

+ 1 - 1
arch/x86/events/perf_event.h

@@ -113,7 +113,7 @@ struct debug_store {
  * Per register state.
  * Per register state.
  */
  */
 struct er_account {
 struct er_account {
-	raw_spinlock_t		lock;	/* per-core: protect structure */
+	raw_spinlock_t      lock;	/* per-core: protect structure */
 	u64                 config;	/* extra MSR config */
 	u64                 config;	/* extra MSR config */
 	u64                 reg;	/* extra MSR number */
 	u64                 reg;	/* extra MSR number */
 	atomic_t            ref;	/* reference count */
 	atomic_t            ref;	/* reference count */

+ 1 - 1
arch/x86/kernel/dumpstack.c

@@ -112,7 +112,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		for (; stack < stack_info.end; stack++) {
 		for (; stack < stack_info.end; stack++) {
 			unsigned long real_addr;
 			unsigned long real_addr;
 			int reliable = 0;
 			int reliable = 0;
-			unsigned long addr = *stack;
+			unsigned long addr = READ_ONCE_NOCHECK(*stack);
 			unsigned long *ret_addr_p =
 			unsigned long *ret_addr_p =
 				unwind_get_return_address_ptr(&state);
 				unwind_get_return_address_ptr(&state);
 
 

+ 8 - 8
arch/x86/kernel/fpu/core.c

@@ -521,14 +521,14 @@ void fpu__clear(struct fpu *fpu)
 {
 {
 	WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
 	WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
 
 
-	if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
-		/* FPU state will be reallocated lazily at the first use. */
-		fpu__drop(fpu);
-	} else {
-		if (!fpu->fpstate_active) {
-			fpu__activate_curr(fpu);
-			user_fpu_begin();
-		}
+	fpu__drop(fpu);
+
+	/*
+	 * Make sure fpstate is cleared and initialized.
+	 */
+	if (static_cpu_has(X86_FEATURE_FPU)) {
+		fpu__activate_curr(fpu);
+		user_fpu_begin();
 		copy_init_fpstate_to_fpregs();
 		copy_init_fpstate_to_fpregs();
 	}
 	}
 }
 }

+ 6 - 3
arch/x86/kernel/head_32.S

@@ -665,14 +665,17 @@ __PAGE_ALIGNED_BSS
 initial_pg_pmd:
 initial_pg_pmd:
 	.fill 1024*KPMDS,4,0
 	.fill 1024*KPMDS,4,0
 #else
 #else
-ENTRY(initial_page_table)
+.globl initial_page_table
+initial_page_table:
 	.fill 1024,4,0
 	.fill 1024,4,0
 #endif
 #endif
 initial_pg_fixmap:
 initial_pg_fixmap:
 	.fill 1024,4,0
 	.fill 1024,4,0
-ENTRY(empty_zero_page)
+.globl empty_zero_page
+empty_zero_page:
 	.fill 4096,1,0
 	.fill 4096,1,0
-ENTRY(swapper_pg_dir)
+.globl swapper_pg_dir
+swapper_pg_dir:
 	.fill 1024,4,0
 	.fill 1024,4,0
 EXPORT_SYMBOL(empty_zero_page)
 EXPORT_SYMBOL(empty_zero_page)
 
 

+ 31 - 8
arch/x86/kernel/sysfb_simplefb.c

@@ -66,13 +66,36 @@ __init int create_simplefb(const struct screen_info *si,
 {
 {
 	struct platform_device *pd;
 	struct platform_device *pd;
 	struct resource res;
 	struct resource res;
-	unsigned long len;
+	u64 base, size;
+	u32 length;
 
 
-	/* don't use lfb_size as it may contain the whole VMEM instead of only
-	 * the part that is occupied by the framebuffer */
-	len = mode->height * mode->stride;
-	len = PAGE_ALIGN(len);
-	if (len > (u64)si->lfb_size << 16) {
+	/*
+	 * If the 64BIT_BASE capability is set, ext_lfb_base will contain the
+	 * upper half of the base address. Assemble the address, then make sure
+	 * it is valid and we can actually access it.
+	 */
+	base = si->lfb_base;
+	if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE)
+		base |= (u64)si->ext_lfb_base << 32;
+	if (!base || (u64)(resource_size_t)base != base) {
+		printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Don't use lfb_size as IORESOURCE size, since it may contain the
+	 * entire VMEM, and thus require huge mappings. Use just the part we
+	 * need, that is, the part where the framebuffer is located. But verify
+	 * that it does not exceed the advertised VMEM.
+	 * Note that in case of VBE, the lfb_size is shifted by 16 bits for
+	 * historical reasons.
+	 */
+	size = si->lfb_size;
+	if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
+		size <<= 16;
+	length = mode->height * mode->stride;
+	length = PAGE_ALIGN(length);
+	if (length > size) {
 		printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
 		printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
@@ -81,8 +104,8 @@ __init int create_simplefb(const struct screen_info *si,
 	memset(&res, 0, sizeof(res));
 	memset(&res, 0, sizeof(res));
 	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	res.name = simplefb_resname;
 	res.name = simplefb_resname;
-	res.start = si->lfb_base;
-	res.end = si->lfb_base + len - 1;
+	res.start = base;
+	res.end = res.start + length - 1;
 	if (res.end <= res.start)
 	if (res.end <= res.start)
 		return -EINVAL;
 		return -EINVAL;
 
 

+ 6 - 2
arch/x86/kernel/unwind_guess.c

@@ -7,11 +7,13 @@
 
 
 unsigned long unwind_get_return_address(struct unwind_state *state)
 unsigned long unwind_get_return_address(struct unwind_state *state)
 {
 {
+	unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+
 	if (unwind_done(state))
 	if (unwind_done(state))
 		return 0;
 		return 0;
 
 
 	return ftrace_graph_ret_addr(state->task, &state->graph_idx,
 	return ftrace_graph_ret_addr(state->task, &state->graph_idx,
-				     *state->sp, state->sp);
+				     addr, state->sp);
 }
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
 
@@ -23,8 +25,10 @@ bool unwind_next_frame(struct unwind_state *state)
 		return false;
 		return false;
 
 
 	do {
 	do {
+		unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+
 		for (state->sp++; state->sp < info->end; state->sp++)
 		for (state->sp++; state->sp < info->end; state->sp++)
-			if (__kernel_text_address(*state->sp))
+			if (__kernel_text_address(addr))
 				return true;
 				return true;
 
 
 		state->sp = info->next_sp;
 		state->sp = info->next_sp;

+ 6 - 1
arch/x86/mm/extable.c

@@ -135,7 +135,12 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 	if (early_recursion_flag > 2)
 	if (early_recursion_flag > 2)
 		goto halt_loop;
 		goto halt_loop;
 
 
-	if (regs->cs != __KERNEL_CS)
+	/*
+	 * Old CPUs leave the high bits of CS on the stack
+	 * undefined.  I'm not sure which CPUs do this, but at least
+	 * the 486 DX works this way.
+	 */
+	if ((regs->cs & 0xFFFF) != __KERNEL_CS)
 		goto fail;
 		goto fail;
 
 
 	/*
 	/*

+ 1 - 1
arch/x86/platform/intel-mid/device_libs/Makefile

@@ -28,4 +28,4 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
 obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 # MISC Devices
 # MISC Devices
 obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
 obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
-obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o

+ 28 - 6
arch/x86/platform/intel-mid/device_libs/platform_wdt.c → arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c

@@ -1,5 +1,5 @@
 /*
 /*
- * platform_wdt.c: Watchdog platform library file
+ * Intel Merrifield watchdog platform device library file
  *
  *
  * (C) Copyright 2014 Intel Corporation
  * (C) Copyright 2014 Intel Corporation
  * Author: David Cohen <david.a.cohen@linux.intel.com>
  * Author: David Cohen <david.a.cohen@linux.intel.com>
@@ -14,7 +14,9 @@
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/intel-mid_wdt.h>
 #include <linux/platform_data/intel-mid_wdt.h>
+
 #include <asm/intel-mid.h>
 #include <asm/intel-mid.h>
+#include <asm/intel_scu_ipc.h>
 #include <asm/io_apic.h>
 #include <asm/io_apic.h>
 
 
 #define TANGIER_EXT_TIMER0_MSI 15
 #define TANGIER_EXT_TIMER0_MSI 15
@@ -50,14 +52,34 @@ static struct intel_mid_wdt_pdata tangier_pdata = {
 	.probe = tangier_probe,
 	.probe = tangier_probe,
 };
 };
 
 
-static int __init register_mid_wdt(void)
+static int wdt_scu_status_change(struct notifier_block *nb,
+				 unsigned long code, void *data)
 {
 {
-	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) {
-		wdt_dev.dev.platform_data = &tangier_pdata;
-		return platform_device_register(&wdt_dev);
+	if (code == SCU_DOWN) {
+		platform_device_unregister(&wdt_dev);
+		return 0;
 	}
 	}
 
 
-	return -ENODEV;
+	return platform_device_register(&wdt_dev);
 }
 }
 
 
+static struct notifier_block wdt_scu_notifier = {
+	.notifier_call	= wdt_scu_status_change,
+};
+
+static int __init register_mid_wdt(void)
+{
+	if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+		return -ENODEV;
+
+	wdt_dev.dev.platform_data = &tangier_pdata;
+
+	/*
+	 * We need to be sure that the SCU IPC is ready before watchdog device
+	 * can be registered:
+	 */
+	intel_scu_notifier_add(&wdt_scu_notifier);
+
+	return 0;
+}
 rootfs_initcall(register_mid_wdt);
 rootfs_initcall(register_mid_wdt);

+ 1 - 1
crypto/algif_hash.c

@@ -214,7 +214,7 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 
 	ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
 	ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
 
 
-	if (!result) {
+	if (!result && !ctx->more) {
 		err = af_alg_wait_for_completion(
 		err = af_alg_wait_for_completion(
 				crypto_ahash_init(&ctx->req),
 				crypto_ahash_init(&ctx->req),
 				&ctx->completion);
 				&ctx->completion);

+ 0 - 4
crypto/scatterwalk.c

@@ -68,10 +68,6 @@ void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg,
 
 
 	sg = scatterwalk_ffwd(tmp, sg, start);
 	sg = scatterwalk_ffwd(tmp, sg, start);
 
 
-	if (sg_page(sg) == virt_to_page(buf) &&
-	    sg->offset == offset_in_page(buf))
-		return;
-
 	scatterwalk_start(&walk, sg);
 	scatterwalk_start(&walk, sg);
 	scatterwalk_copychunks(buf, &walk, nbytes, out);
 	scatterwalk_copychunks(buf, &walk, nbytes, out);
 	scatterwalk_done(&walk, out, 0);
 	scatterwalk_done(&walk, out, 0);

+ 1 - 1
drivers/clk/berlin/bg2.c

@@ -685,7 +685,7 @@ static void __init berlin2_clock_setup(struct device_node *np)
 	}
 	}
 
 
 	/* register clk-provider */
 	/* register clk-provider */
-	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data);
+	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
 
 
 	return;
 	return;
 
 

+ 1 - 1
drivers/clk/berlin/bg2q.c

@@ -382,7 +382,7 @@ static void __init berlin2q_clock_setup(struct device_node *np)
 	}
 	}
 
 
 	/* register clk-provider */
 	/* register clk-provider */
-	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data);
+	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
 
 
 	return;
 	return;
 
 

+ 1 - 1
drivers/clk/clk-efm32gg.c

@@ -82,6 +82,6 @@ static void __init efm32gg_cmu_init(struct device_node *np)
 	hws[clk_HFPERCLKDAC0] = clk_hw_register_gate(NULL, "HFPERCLK.DAC0",
 	hws[clk_HFPERCLKDAC0] = clk_hw_register_gate(NULL, "HFPERCLK.DAC0",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 17, 0, NULL);
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 17, 0, NULL);
 
 
-	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data);
+	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
 }
 }
 CLK_OF_DECLARE(efm32ggcmu, "efm32gg,cmu", efm32gg_cmu_init);
 CLK_OF_DECLARE(efm32ggcmu, "efm32gg,cmu", efm32gg_cmu_init);

+ 12 - 0
drivers/clk/sunxi-ng/ccu-sun6i-a31.c

@@ -191,6 +191,8 @@ static struct clk_div_table axi_div_table[] = {
 static SUNXI_CCU_DIV_TABLE(axi_clk, "axi", "cpu",
 static SUNXI_CCU_DIV_TABLE(axi_clk, "axi", "cpu",
 			   0x050, 0, 3, axi_div_table, 0);
 			   0x050, 0, 3, axi_div_table, 0);
 
 
+#define SUN6I_A31_AHB1_REG  0x054
+
 static const char * const ahb1_parents[] = { "osc32k", "osc24M",
 static const char * const ahb1_parents[] = { "osc32k", "osc24M",
 					     "axi", "pll-periph" };
 					     "axi", "pll-periph" };
 
 
@@ -1230,6 +1232,16 @@ static void __init sun6i_a31_ccu_setup(struct device_node *node)
 	val &= BIT(16);
 	val &= BIT(16);
 	writel(val, reg + SUN6I_A31_PLL_MIPI_REG);
 	writel(val, reg + SUN6I_A31_PLL_MIPI_REG);
 
 
+	/* Force AHB1 to PLL6 / 3 */
+	val = readl(reg + SUN6I_A31_AHB1_REG);
+	/* set PLL6 pre-div = 3 */
+	val &= ~GENMASK(7, 6);
+	val |= 0x2 << 6;
+	/* select PLL6 / pre-div */
+	val &= ~GENMASK(13, 12);
+	val |= 0x3 << 12;
+	writel(val, reg + SUN6I_A31_AHB1_REG);
+
 	sunxi_ccu_probe(node, reg, &sun6i_a31_ccu_desc);
 	sunxi_ccu_probe(node, reg, &sun6i_a31_ccu_desc);
 
 
 	ccu_mux_notifier_register(pll_cpu_clk.common.hw.clk,
 	ccu_mux_notifier_register(pll_cpu_clk.common.hw.clk,

+ 1 - 1
drivers/clk/sunxi/clk-sunxi.c

@@ -373,7 +373,7 @@ static void sun4i_get_apb1_factors(struct factors_request *req)
 	else
 	else
 		calcp = 3;
 		calcp = 3;
 
 
-	calcm = (req->parent_rate >> calcp) - 1;
+	calcm = (div >> calcp) - 1;
 
 
 	req->rate = (req->parent_rate >> calcp) / (calcm + 1);
 	req->rate = (req->parent_rate >> calcp) / (calcm + 1);
 	req->m = calcm;
 	req->m = calcm;

+ 14 - 1
drivers/scsi/mpt3sas/mpt3sas_scsih.c

@@ -4010,7 +4010,10 @@ _scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status)
 	    SAM_STAT_CHECK_CONDITION;
 	    SAM_STAT_CHECK_CONDITION;
 }
 }
 
 
-
+static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
+{
+	return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16);
+}
 
 
 /**
 /**
  * scsih_qcmd - main scsi request entry point
  * scsih_qcmd - main scsi request entry point
@@ -4038,6 +4041,13 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
 	if (ioc->logging_level & MPT_DEBUG_SCSI)
 	if (ioc->logging_level & MPT_DEBUG_SCSI)
 		scsi_print_command(scmd);
 		scsi_print_command(scmd);
 
 
+	/*
+	 * Lock the device for any subsequent command until command is
+	 * done.
+	 */
+	if (ata_12_16_cmd(scmd))
+		scsi_internal_device_block(scmd->device);
+
 	sas_device_priv_data = scmd->device->hostdata;
 	sas_device_priv_data = scmd->device->hostdata;
 	if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
 	if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
 		scmd->result = DID_NO_CONNECT << 16;
 		scmd->result = DID_NO_CONNECT << 16;
@@ -4613,6 +4623,9 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
 	if (scmd == NULL)
 	if (scmd == NULL)
 		return 1;
 		return 1;
 
 
+	if (ata_12_16_cmd(scmd))
+		scsi_internal_device_unblock(scmd->device, SDEV_RUNNING);
+
 	mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
 	mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
 
 
 	if (mpi_reply == NULL) {
 	if (mpi_reply == NULL) {

+ 13 - 8
drivers/scsi/qla2xxx/qla_os.c

@@ -1456,15 +1456,20 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 		for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
 		for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
 			sp = req->outstanding_cmds[cnt];
 			sp = req->outstanding_cmds[cnt];
 			if (sp) {
 			if (sp) {
-				/* Get a reference to the sp and drop the lock.
-				 * The reference ensures this sp->done() call
-				 * - and not the call in qla2xxx_eh_abort() -
-				 * ends the SCSI command (with result 'res').
+				/* Don't abort commands in adapter during EEH
+				 * recovery as it's not accessible/responding.
 				 */
 				 */
-				sp_get(sp);
-				spin_unlock_irqrestore(&ha->hardware_lock, flags);
-				qla2xxx_eh_abort(GET_CMD_SP(sp));
-				spin_lock_irqsave(&ha->hardware_lock, flags);
+				if (!ha->flags.eeh_busy) {
+					/* Get a reference to the sp and drop the lock.
+					 * The reference ensures this sp->done() call
+					 * - and not the call in qla2xxx_eh_abort() -
+					 * ends the SCSI command (with result 'res').
+					 */
+					sp_get(sp);
+					spin_unlock_irqrestore(&ha->hardware_lock, flags);
+					qla2xxx_eh_abort(GET_CMD_SP(sp));
+					spin_lock_irqsave(&ha->hardware_lock, flags);
+				}
 				req->outstanding_cmds[cnt] = NULL;
 				req->outstanding_cmds[cnt] = NULL;
 				sp->done(vha, sp, res);
 				sp->done(vha, sp, res);
 			}
 			}

+ 8 - 1
drivers/thermal/intel_powerclamp.c

@@ -669,9 +669,16 @@ static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
 	.set_cur_state = powerclamp_set_cur_state,
 	.set_cur_state = powerclamp_set_cur_state,
 };
 };
 
 
+static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
+	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_MWAIT },
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
+
 static int __init powerclamp_probe(void)
 static int __init powerclamp_probe(void)
 {
 {
-	if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
+
+	if (!x86_match_cpu(intel_powerclamp_ids)) {
 		pr_err("CPU does not support MWAIT");
 		pr_err("CPU does not support MWAIT");
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}

+ 1 - 1
fs/nfs/callback.c

@@ -197,7 +197,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
 	}
 	}
 
 
 	ret = -EPROTONOSUPPORT;
 	ret = -EPROTONOSUPPORT;
-	if (minorversion == 0)
+	if (!IS_ENABLED(CONFIG_NFS_V4_1) || minorversion == 0)
 		ret = nfs4_callback_up_net(serv, net);
 		ret = nfs4_callback_up_net(serv, net);
 	else if (xprt->ops->bc_up)
 	else if (xprt->ops->bc_up)
 		ret = xprt->ops->bc_up(serv, net);
 		ret = xprt->ops->bc_up(serv, net);

+ 7 - 0
fs/nfs/nfs4_fs.h

@@ -542,6 +542,13 @@ static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
 	return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
 	return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
 }
 }
 
 
+static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state *state,
+		const nfs4_stateid *stateid)
+{
+	return test_bit(NFS_OPEN_STATE, &state->flags) &&
+		nfs4_stateid_match_other(&state->open_stateid, stateid);
+}
+
 #else
 #else
 
 
 #define nfs4_close_state(a, b) do { } while (0)
 #define nfs4_close_state(a, b) do { } while (0)

+ 26 - 12
fs/nfs/nfs4proc.c

@@ -1451,7 +1451,6 @@ static void nfs_resync_open_stateid_locked(struct nfs4_state *state)
 }
 }
 
 
 static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
 static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
-		nfs4_stateid *arg_stateid,
 		nfs4_stateid *stateid, fmode_t fmode)
 		nfs4_stateid *stateid, fmode_t fmode)
 {
 {
 	clear_bit(NFS_O_RDWR_STATE, &state->flags);
 	clear_bit(NFS_O_RDWR_STATE, &state->flags);
@@ -1469,10 +1468,9 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
 	}
 	}
 	if (stateid == NULL)
 	if (stateid == NULL)
 		return;
 		return;
-	/* Handle races with OPEN */
-	if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) ||
-	    (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
-	    !nfs4_stateid_is_newer(stateid, &state->open_stateid))) {
+	/* Handle OPEN+OPEN_DOWNGRADE races */
+	if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
+	    !nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
 		nfs_resync_open_stateid_locked(state);
 		nfs_resync_open_stateid_locked(state);
 		return;
 		return;
 	}
 	}
@@ -1486,7 +1484,9 @@ static void nfs_clear_open_stateid(struct nfs4_state *state,
 	nfs4_stateid *stateid, fmode_t fmode)
 	nfs4_stateid *stateid, fmode_t fmode)
 {
 {
 	write_seqlock(&state->seqlock);
 	write_seqlock(&state->seqlock);
-	nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode);
+	/* Ignore, if the CLOSE argment doesn't match the current stateid */
+	if (nfs4_state_match_open_stateid_other(state, arg_stateid))
+		nfs_clear_open_stateid_locked(state, stateid, fmode);
 	write_sequnlock(&state->seqlock);
 	write_sequnlock(&state->seqlock);
 	if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
 	if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
 		nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
 		nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
@@ -2564,15 +2564,23 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
 static int nfs41_check_expired_locks(struct nfs4_state *state)
 static int nfs41_check_expired_locks(struct nfs4_state *state)
 {
 {
 	int status, ret = NFS_OK;
 	int status, ret = NFS_OK;
-	struct nfs4_lock_state *lsp;
+	struct nfs4_lock_state *lsp, *prev = NULL;
 	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs_server *server = NFS_SERVER(state->inode);
 
 
 	if (!test_bit(LK_STATE_IN_USE, &state->flags))
 	if (!test_bit(LK_STATE_IN_USE, &state->flags))
 		goto out;
 		goto out;
+
+	spin_lock(&state->state_lock);
 	list_for_each_entry(lsp, &state->lock_states, ls_locks) {
 	list_for_each_entry(lsp, &state->lock_states, ls_locks) {
 		if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
 		if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
 			struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
 			struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
 
 
+			atomic_inc(&lsp->ls_count);
+			spin_unlock(&state->state_lock);
+
+			nfs4_put_lock_state(prev);
+			prev = lsp;
+
 			status = nfs41_test_and_free_expired_stateid(server,
 			status = nfs41_test_and_free_expired_stateid(server,
 					&lsp->ls_stateid,
 					&lsp->ls_stateid,
 					cred);
 					cred);
@@ -2585,10 +2593,14 @@ static int nfs41_check_expired_locks(struct nfs4_state *state)
 					set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
 					set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
 			} else if (status != NFS_OK) {
 			} else if (status != NFS_OK) {
 				ret = status;
 				ret = status;
-				break;
+				nfs4_put_lock_state(prev);
+				goto out;
 			}
 			}
+			spin_lock(&state->state_lock);
 		}
 		}
-	};
+	}
+	spin_unlock(&state->state_lock);
+	nfs4_put_lock_state(prev);
 out:
 out:
 	return ret;
 	return ret;
 }
 }
@@ -3122,7 +3134,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 	} else if (is_rdwr)
 	} else if (is_rdwr)
 		calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
 		calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
 
 
-	if (!nfs4_valid_open_stateid(state))
+	if (!nfs4_valid_open_stateid(state) ||
+	    test_bit(NFS_OPEN_STATE, &state->flags) == 0)
 		call_close = 0;
 		call_close = 0;
 	spin_unlock(&state->owner->so_lock);
 	spin_unlock(&state->owner->so_lock);
 
 
@@ -5569,6 +5582,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 	switch (task->tk_status) {
 	switch (task->tk_status) {
 	case 0:
 	case 0:
 		renew_lease(data->res.server, data->timestamp);
 		renew_lease(data->res.server, data->timestamp);
+		break;
 	case -NFS4ERR_ADMIN_REVOKED:
 	case -NFS4ERR_ADMIN_REVOKED:
 	case -NFS4ERR_DELEG_REVOKED:
 	case -NFS4ERR_DELEG_REVOKED:
 	case -NFS4ERR_EXPIRED:
 	case -NFS4ERR_EXPIRED:
@@ -5579,8 +5593,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 	case -NFS4ERR_OLD_STATEID:
 	case -NFS4ERR_OLD_STATEID:
 	case -NFS4ERR_STALE_STATEID:
 	case -NFS4ERR_STALE_STATEID:
 		task->tk_status = 0;
 		task->tk_status = 0;
-		if (data->roc)
-			pnfs_roc_set_barrier(data->inode, data->roc_barrier);
 		break;
 		break;
 	default:
 	default:
 		if (nfs4_async_handle_error(task, data->res.server,
 		if (nfs4_async_handle_error(task, data->res.server,
@@ -5590,6 +5602,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 		}
 		}
 	}
 	}
 	data->rpc_status = task->tk_status;
 	data->rpc_status = task->tk_status;
+	if (data->roc && data->rpc_status == 0)
+		pnfs_roc_set_barrier(data->inode, data->roc_barrier);
 }
 }
 
 
 static void nfs4_delegreturn_release(void *calldata)
 static void nfs4_delegreturn_release(void *calldata)

+ 1 - 0
fs/nfs/nfs4state.c

@@ -1547,6 +1547,7 @@ restart:
 				ssleep(1);
 				ssleep(1);
 			case -NFS4ERR_ADMIN_REVOKED:
 			case -NFS4ERR_ADMIN_REVOKED:
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_STALE_STATEID:
+			case -NFS4ERR_OLD_STATEID:
 			case -NFS4ERR_BAD_STATEID:
 			case -NFS4ERR_BAD_STATEID:
 			case -NFS4ERR_RECLAIM_BAD:
 			case -NFS4ERR_RECLAIM_BAD:
 			case -NFS4ERR_RECLAIM_CONFLICT:
 			case -NFS4ERR_RECLAIM_CONFLICT:

+ 2 - 0
include/linux/sched.h

@@ -2571,6 +2571,7 @@ extern void sched_autogroup_create_attach(struct task_struct *p);
 extern void sched_autogroup_detach(struct task_struct *p);
 extern void sched_autogroup_detach(struct task_struct *p);
 extern void sched_autogroup_fork(struct signal_struct *sig);
 extern void sched_autogroup_fork(struct signal_struct *sig);
 extern void sched_autogroup_exit(struct signal_struct *sig);
 extern void sched_autogroup_exit(struct signal_struct *sig);
+extern void sched_autogroup_exit_task(struct task_struct *p);
 #ifdef CONFIG_PROC_FS
 #ifdef CONFIG_PROC_FS
 extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
 extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
 extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
 extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
@@ -2580,6 +2581,7 @@ static inline void sched_autogroup_create_attach(struct task_struct *p) { }
 static inline void sched_autogroup_detach(struct task_struct *p) { }
 static inline void sched_autogroup_detach(struct task_struct *p) { }
 static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit_task(struct task_struct *p) { }
 #endif
 #endif
 
 
 extern int yield_to(struct task_struct *p, bool preempt);
 extern int yield_to(struct task_struct *p, bool preempt);

+ 13 - 0
kernel/events/core.c

@@ -902,6 +902,17 @@ list_update_cgroup_event(struct perf_event *event,
 	 * this will always be called from the right CPU.
 	 * this will always be called from the right CPU.
 	 */
 	 */
 	cpuctx = __get_cpu_context(ctx);
 	cpuctx = __get_cpu_context(ctx);
+
+	/* Only set/clear cpuctx->cgrp if current task uses event->cgrp. */
+	if (perf_cgroup_from_task(current, ctx) != event->cgrp) {
+		/*
+		 * We are removing the last cpu event in this context.
+		 * If that event is not active in this cpu, cpuctx->cgrp
+		 * should've been cleared by perf_cgroup_switch.
+		 */
+		WARN_ON_ONCE(!add && cpuctx->cgrp);
+		return;
+	}
 	cpuctx->cgrp = add ? event->cgrp : NULL;
 	cpuctx->cgrp = add ? event->cgrp : NULL;
 }
 }
 
 
@@ -8018,6 +8029,7 @@ restart:
  * if <size> is not specified, the range is treated as a single address.
  * if <size> is not specified, the range is treated as a single address.
  */
  */
 enum {
 enum {
+	IF_ACT_NONE = -1,
 	IF_ACT_FILTER,
 	IF_ACT_FILTER,
 	IF_ACT_START,
 	IF_ACT_START,
 	IF_ACT_STOP,
 	IF_ACT_STOP,
@@ -8041,6 +8053,7 @@ static const match_table_t if_tokens = {
 	{ IF_SRC_KERNEL,	"%u/%u" },
 	{ IF_SRC_KERNEL,	"%u/%u" },
 	{ IF_SRC_FILEADDR,	"%u@%s" },
 	{ IF_SRC_FILEADDR,	"%u@%s" },
 	{ IF_SRC_KERNELADDR,	"%u" },
 	{ IF_SRC_KERNELADDR,	"%u" },
+	{ IF_ACT_NONE,		NULL },
 };
 };
 
 
 /*
 /*

+ 1 - 0
kernel/exit.c

@@ -836,6 +836,7 @@ void __noreturn do_exit(long code)
 	 */
 	 */
 	perf_event_exit_task(tsk);
 	perf_event_exit_task(tsk);
 
 
+	sched_autogroup_exit_task(tsk);
 	cgroup_exit(tsk);
 	cgroup_exit(tsk);
 
 
 	/*
 	/*

+ 66 - 2
kernel/locking/rtmutex.c

@@ -65,8 +65,72 @@ static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
 
 
 static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
 static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
 {
 {
-	if (!rt_mutex_has_waiters(lock))
-		clear_rt_mutex_waiters(lock);
+	unsigned long owner, *p = (unsigned long *) &lock->owner;
+
+	if (rt_mutex_has_waiters(lock))
+		return;
+
+	/*
+	 * The rbtree has no waiters enqueued, now make sure that the
+	 * lock->owner still has the waiters bit set, otherwise the
+	 * following can happen:
+	 *
+	 * CPU 0	CPU 1		CPU2
+	 * l->owner=T1
+	 *		rt_mutex_lock(l)
+	 *		lock(l->lock)
+	 *		l->owner = T1 | HAS_WAITERS;
+	 *		enqueue(T2)
+	 *		boost()
+	 *		  unlock(l->lock)
+	 *		block()
+	 *
+	 *				rt_mutex_lock(l)
+	 *				lock(l->lock)
+	 *				l->owner = T1 | HAS_WAITERS;
+	 *				enqueue(T3)
+	 *				boost()
+	 *				  unlock(l->lock)
+	 *				block()
+	 *		signal(->T2)	signal(->T3)
+	 *		lock(l->lock)
+	 *		dequeue(T2)
+	 *		deboost()
+	 *		  unlock(l->lock)
+	 *				lock(l->lock)
+	 *				dequeue(T3)
+	 *				 ==> wait list is empty
+	 *				deboost()
+	 *				 unlock(l->lock)
+	 *		lock(l->lock)
+	 *		fixup_rt_mutex_waiters()
+	 *		  if (wait_list_empty(l) {
+	 *		    l->owner = owner
+	 *		    owner = l->owner & ~HAS_WAITERS;
+	 *		      ==> l->owner = T1
+	 *		  }
+	 *				lock(l->lock)
+	 * rt_mutex_unlock(l)		fixup_rt_mutex_waiters()
+	 *				  if (wait_list_empty(l) {
+	 *				    owner = l->owner & ~HAS_WAITERS;
+	 * cmpxchg(l->owner, T1, NULL)
+	 *  ===> Success (l->owner = NULL)
+	 *
+	 *				    l->owner = owner
+	 *				      ==> l->owner = T1
+	 *				  }
+	 *
+	 * With the check for the waiter bit in place T3 on CPU2 will not
+	 * overwrite. All tasks fiddling with the waiters bit are
+	 * serialized by l->lock, so nothing else can modify the waiters
+	 * bit. If the bit is set then nothing can change l->owner either
+	 * so the simple RMW is safe. The cmpxchg() will simply fail if it
+	 * happens in the middle of the RMW because the waiters bit is
+	 * still set.
+	 */
+	owner = READ_ONCE(*p);
+	if (owner & RT_MUTEX_HAS_WAITERS)
+		WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
 }
 }
 
 
 /*
 /*

+ 3 - 2
kernel/locking/rtmutex_common.h

@@ -75,8 +75,9 @@ task_top_pi_waiter(struct task_struct *p)
 
 
 static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
 static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
 {
 {
-	return (struct task_struct *)
-		((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
+	unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
+
+	return (struct task_struct *) (owner & ~RT_MUTEX_OWNER_MASKALL);
 }
 }
 
 
 /*
 /*

+ 28 - 8
kernel/sched/auto_group.c

@@ -111,10 +111,13 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 {
 {
 	if (tg != &root_task_group)
 	if (tg != &root_task_group)
 		return false;
 		return false;
-
 	/*
 	/*
-	 * We can only assume the task group can't go away on us if
-	 * autogroup_move_group() can see us on ->thread_group list.
+	 * If we race with autogroup_move_group() the caller can use the old
+	 * value of signal->autogroup but in this case sched_move_task() will
+	 * be called again before autogroup_kref_put().
+	 *
+	 * However, there is no way sched_autogroup_exit_task() could tell us
+	 * to avoid autogroup->tg, so we abuse PF_EXITING flag for this case.
 	 */
 	 */
 	if (p->flags & PF_EXITING)
 	if (p->flags & PF_EXITING)
 		return false;
 		return false;
@@ -122,6 +125,16 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 	return true;
 	return true;
 }
 }
 
 
+void sched_autogroup_exit_task(struct task_struct *p)
+{
+	/*
+	 * We are going to call exit_notify() and autogroup_move_group() can't
+	 * see this thread after that: we can no longer use signal->autogroup.
+	 * See the PF_EXITING check in task_wants_autogroup().
+	 */
+	sched_move_task(p);
+}
+
 static void
 static void
 autogroup_move_group(struct task_struct *p, struct autogroup *ag)
 autogroup_move_group(struct task_struct *p, struct autogroup *ag)
 {
 {
@@ -138,13 +151,20 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
 	}
 	}
 
 
 	p->signal->autogroup = autogroup_kref_get(ag);
 	p->signal->autogroup = autogroup_kref_get(ag);
-
-	if (!READ_ONCE(sysctl_sched_autogroup_enabled))
-		goto out;
-
+	/*
+	 * We can't avoid sched_move_task() after we changed signal->autogroup,
+	 * this process can already run with task_group() == prev->tg or we can
+	 * race with cgroup code which can read autogroup = prev under rq->lock.
+	 * In the latter case for_each_thread() can not miss a migrating thread,
+	 * cpu_cgroup_attach() must not be possible after cgroup_exit() and it
+	 * can't be removed from thread list, we hold ->siglock.
+	 *
+	 * If an exiting thread was already removed from thread list we rely on
+	 * sched_autogroup_exit_task().
+	 */
 	for_each_thread(p, t)
 	for_each_thread(p, t)
 		sched_move_task(t);
 		sched_move_task(t);
-out:
+
 	unlock_task_sighand(p, &flags);
 	unlock_task_sighand(p, &flags);
 	autogroup_kref_put(prev);
 	autogroup_kref_put(prev);
 }
 }

+ 33 - 33
lib/locking-selftest.c

@@ -980,23 +980,23 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
 #ifndef CONFIG_PROVE_LOCKING
 #ifndef CONFIG_PROVE_LOCKING
 	if (expected == FAILURE && debug_locks) {
 	if (expected == FAILURE && debug_locks) {
 		expected_testcase_failures++;
 		expected_testcase_failures++;
-		printk("failed|");
+		pr_cont("failed|");
 	}
 	}
 	else
 	else
 #endif
 #endif
 	if (debug_locks != expected) {
 	if (debug_locks != expected) {
 		unexpected_testcase_failures++;
 		unexpected_testcase_failures++;
-		printk("FAILED|");
+		pr_cont("FAILED|");
 
 
 		dump_stack();
 		dump_stack();
 	} else {
 	} else {
 		testcase_successes++;
 		testcase_successes++;
-		printk("  ok  |");
+		pr_cont("  ok  |");
 	}
 	}
 	testcase_total++;
 	testcase_total++;
 
 
 	if (debug_locks_verbose)
 	if (debug_locks_verbose)
-		printk(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
+		pr_cont(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
 			lockclass_mask, debug_locks, expected);
 			lockclass_mask, debug_locks, expected);
 	/*
 	/*
 	 * Some tests (e.g. double-unlock) might corrupt the preemption
 	 * Some tests (e.g. double-unlock) might corrupt the preemption
@@ -1021,26 +1021,26 @@ static inline void print_testname(const char *testname)
 #define DO_TESTCASE_1(desc, name, nr)				\
 #define DO_TESTCASE_1(desc, name, nr)				\
 	print_testname(desc"/"#nr);				\
 	print_testname(desc"/"#nr);				\
 	dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK);		\
 	dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK);		\
-	printk("\n");
+	pr_cont("\n");
 
 
 #define DO_TESTCASE_1B(desc, name, nr)				\
 #define DO_TESTCASE_1B(desc, name, nr)				\
 	print_testname(desc"/"#nr);				\
 	print_testname(desc"/"#nr);				\
 	dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
 	dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
-	printk("\n");
+	pr_cont("\n");
 
 
 #define DO_TESTCASE_3(desc, name, nr)				\
 #define DO_TESTCASE_3(desc, name, nr)				\
 	print_testname(desc"/"#nr);				\
 	print_testname(desc"/"#nr);				\
 	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN);	\
 	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN);	\
 	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
 	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
 	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
 	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
-	printk("\n");
+	pr_cont("\n");
 
 
 #define DO_TESTCASE_3RW(desc, name, nr)				\
 #define DO_TESTCASE_3RW(desc, name, nr)				\
 	print_testname(desc"/"#nr);				\
 	print_testname(desc"/"#nr);				\
 	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
 	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
 	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
 	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
 	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
 	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
-	printk("\n");
+	pr_cont("\n");
 
 
 #define DO_TESTCASE_6(desc, name)				\
 #define DO_TESTCASE_6(desc, name)				\
 	print_testname(desc);					\
 	print_testname(desc);					\
@@ -1050,7 +1050,7 @@ static inline void print_testname(const char *testname)
 	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
 	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
 	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
 	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
-	printk("\n");
+	pr_cont("\n");
 
 
 #define DO_TESTCASE_6_SUCCESS(desc, name)			\
 #define DO_TESTCASE_6_SUCCESS(desc, name)			\
 	print_testname(desc);					\
 	print_testname(desc);					\
@@ -1060,7 +1060,7 @@ static inline void print_testname(const char *testname)
 	dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX);		\
 	dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX);		\
 	dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM);		\
 	dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM);		\
-	printk("\n");
+	pr_cont("\n");
 
 
 /*
 /*
  * 'read' variant: rlocks must not trigger.
  * 'read' variant: rlocks must not trigger.
@@ -1073,7 +1073,7 @@ static inline void print_testname(const char *testname)
 	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
 	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
 	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
 	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
-	printk("\n");
+	pr_cont("\n");
 
 
 #define DO_TESTCASE_2I(desc, name, nr)				\
 #define DO_TESTCASE_2I(desc, name, nr)				\
 	DO_TESTCASE_1("hard-"desc, name##_hard, nr);		\
 	DO_TESTCASE_1("hard-"desc, name##_hard, nr);		\
@@ -1726,25 +1726,25 @@ static void ww_tests(void)
 	dotest(ww_test_fail_acquire, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_fail_acquire, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_normal, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_normal, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_unneeded_slow, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_unneeded_slow, FAILURE, LOCKTYPE_WW);
-	printk("\n");
+	pr_cont("\n");
 
 
 	print_testname("ww contexts mixing");
 	print_testname("ww contexts mixing");
 	dotest(ww_test_two_contexts, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_two_contexts, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_diff_class, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_diff_class, FAILURE, LOCKTYPE_WW);
-	printk("\n");
+	pr_cont("\n");
 
 
 	print_testname("finishing ww context");
 	print_testname("finishing ww context");
 	dotest(ww_test_context_done_twice, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_context_done_twice, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_context_unlock_twice, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_context_unlock_twice, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_context_fini_early, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_context_fini_early, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_context_lock_after_done, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_context_lock_after_done, FAILURE, LOCKTYPE_WW);
-	printk("\n");
+	pr_cont("\n");
 
 
 	print_testname("locking mismatches");
 	print_testname("locking mismatches");
 	dotest(ww_test_object_unlock_twice, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_object_unlock_twice, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_object_lock_unbalanced, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_object_lock_unbalanced, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_object_lock_stale_context, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_object_lock_stale_context, FAILURE, LOCKTYPE_WW);
-	printk("\n");
+	pr_cont("\n");
 
 
 	print_testname("EDEADLK handling");
 	print_testname("EDEADLK handling");
 	dotest(ww_test_edeadlk_normal, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_edeadlk_normal, SUCCESS, LOCKTYPE_WW);
@@ -1757,11 +1757,11 @@ static void ww_tests(void)
 	dotest(ww_test_edeadlk_acquire_more_edeadlk_slow, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_edeadlk_acquire_more_edeadlk_slow, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_edeadlk_acquire_wrong, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_edeadlk_acquire_wrong, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_edeadlk_acquire_wrong_slow, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_edeadlk_acquire_wrong_slow, FAILURE, LOCKTYPE_WW);
-	printk("\n");
+	pr_cont("\n");
 
 
 	print_testname("spinlock nest unlocked");
 	print_testname("spinlock nest unlocked");
 	dotest(ww_test_spin_nest_unlocked, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_spin_nest_unlocked, FAILURE, LOCKTYPE_WW);
-	printk("\n");
+	pr_cont("\n");
 
 
 	printk("  -----------------------------------------------------\n");
 	printk("  -----------------------------------------------------\n");
 	printk("                                 |block | try  |context|\n");
 	printk("                                 |block | try  |context|\n");
@@ -1771,25 +1771,25 @@ static void ww_tests(void)
 	dotest(ww_test_context_block, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_context_block, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_context_try, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_context_try, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_context_context, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_context_context, SUCCESS, LOCKTYPE_WW);
-	printk("\n");
+	pr_cont("\n");
 
 
 	print_testname("try");
 	print_testname("try");
 	dotest(ww_test_try_block, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_try_block, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_try_try, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_try_try, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_try_context, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_try_context, FAILURE, LOCKTYPE_WW);
-	printk("\n");
+	pr_cont("\n");
 
 
 	print_testname("block");
 	print_testname("block");
 	dotest(ww_test_block_block, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_block_block, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_block_try, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_block_try, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_block_context, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_block_context, FAILURE, LOCKTYPE_WW);
-	printk("\n");
+	pr_cont("\n");
 
 
 	print_testname("spinlock");
 	print_testname("spinlock");
 	dotest(ww_test_spin_block, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_spin_block, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_spin_try, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_spin_try, SUCCESS, LOCKTYPE_WW);
 	dotest(ww_test_spin_context, FAILURE, LOCKTYPE_WW);
 	dotest(ww_test_spin_context, FAILURE, LOCKTYPE_WW);
-	printk("\n");
+	pr_cont("\n");
 }
 }
 
 
 void locking_selftest(void)
 void locking_selftest(void)
@@ -1829,32 +1829,32 @@ void locking_selftest(void)
 
 
 	printk("  --------------------------------------------------------------------------\n");
 	printk("  --------------------------------------------------------------------------\n");
 	print_testname("recursive read-lock");
 	print_testname("recursive read-lock");
-	printk("             |");
+	pr_cont("             |");
 	dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
 	dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
-	printk("             |");
+	pr_cont("             |");
 	dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
 	dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
-	printk("\n");
+	pr_cont("\n");
 
 
 	print_testname("recursive read-lock #2");
 	print_testname("recursive read-lock #2");
-	printk("             |");
+	pr_cont("             |");
 	dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
 	dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
-	printk("             |");
+	pr_cont("             |");
 	dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
 	dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
-	printk("\n");
+	pr_cont("\n");
 
 
 	print_testname("mixed read-write-lock");
 	print_testname("mixed read-write-lock");
-	printk("             |");
+	pr_cont("             |");
 	dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK);
 	dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK);
-	printk("             |");
+	pr_cont("             |");
 	dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM);
 	dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM);
-	printk("\n");
+	pr_cont("\n");
 
 
 	print_testname("mixed write-read-lock");
 	print_testname("mixed write-read-lock");
-	printk("             |");
+	pr_cont("             |");
 	dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK);
 	dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK);
-	printk("             |");
+	pr_cont("             |");
 	dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
 	dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
-	printk("\n");
+	pr_cont("\n");
 
 
 	printk("  --------------------------------------------------------------------------\n");
 	printk("  --------------------------------------------------------------------------\n");