瀏覽代碼

Merge tag 'v4.16-rc5' into locking/core, to pick up fixes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 7 年之前
父節點
當前提交
9884afa2fd
共有 88 個文件被更改,包括 850 次插入1084 次删除
  1. 5 1
      Documentation/devicetree/bindings/dma/mv-xor-v2.txt
  2. 1 1
      Makefile
  3. 1 10
      arch/x86/Kconfig
  4. 1 15
      arch/x86/entry/entry_64_compat.S
  5. 19 19
      arch/x86/entry/syscalls/syscall_32.tbl
  6. 3 13
      arch/x86/entry/vsyscall/vsyscall_64.c
  7. 1 1
      arch/x86/events/intel/uncore_snbep.c
  8. 44 30
      arch/x86/ia32/sys_ia32.c
  9. 0 2
      arch/x86/include/asm/pgtable_types.h
  10. 1 0
      arch/x86/include/asm/sections.h
  11. 30 18
      arch/x86/include/asm/sys_ia32.h
  12. 1 0
      arch/x86/include/uapi/asm/mce.h
  13. 7 0
      arch/x86/kernel/cpu/intel.c
  14. 24 2
      arch/x86/kernel/cpu/mcheck/mce.c
  15. 119 39
      arch/x86/kernel/cpu/microcode/core.c
  16. 40 8
      arch/x86/kernel/cpu/microcode/intel.c
  17. 1 1
      arch/x86/kernel/ioport.c
  18. 9 1
      arch/x86/kernel/kprobes/core.c
  19. 2 0
      arch/x86/kernel/vmlinux.lds.S
  20. 1 1
      arch/x86/mm/pti.c
  21. 1 1
      drivers/block/loop.c
  22. 8 9
      drivers/block/xen-blkfront.c
  23. 1 0
      drivers/clocksource/Kconfig
  24. 20 5
      drivers/dma/mv_xor_v2.c
  25. 1 1
      drivers/dma/sh/rcar-dmac.c
  26. 16 22
      drivers/gpio/gpio-rcar.c
  27. 5 10
      drivers/infiniband/core/addr.c
  28. 11 10
      drivers/infiniband/core/cq.c
  29. 4 2
      drivers/infiniband/core/device.c
  30. 3 4
      drivers/infiniband/core/sa_query.c
  31. 6 0
      drivers/infiniband/core/ucma.c
  32. 19 7
      drivers/infiniband/hw/bnxt_re/ib_verbs.c
  33. 3 0
      drivers/infiniband/hw/bnxt_re/ib_verbs.h
  34. 11 1
      drivers/infiniband/hw/bnxt_re/main.c
  35. 24 85
      drivers/infiniband/hw/bnxt_re/qplib_fp.c
  36. 12 0
      drivers/infiniband/hw/bnxt_re/qplib_fp.h
  37. 6 3
      drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
  38. 1 0
      drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
  39. 2 1
      drivers/infiniband/hw/bnxt_re/qplib_sp.c
  40. 24 1
      drivers/infiniband/hw/bnxt_re/roce_hsi.h
  41. 3 1
      drivers/infiniband/hw/mlx4/cq.c
  42. 7 4
      drivers/infiniband/hw/mlx4/main.c
  43. 8 2
      drivers/infiniband/hw/mlx5/cq.c
  44. 9 12
      drivers/infiniband/hw/mlx5/main.c
  45. 1 1
      drivers/infiniband/hw/mlx5/mr.c
  46. 9 2
      drivers/infiniband/hw/mlx5/qp.c
  47. 11 8
      drivers/infiniband/hw/qedr/qedr_iw_cm.c
  48. 12 1
      drivers/infiniband/hw/qedr/verbs.c
  49. 21 6
      drivers/md/bcache/super.c
  50. 6 10
      drivers/md/dm-bufio.c
  51. 29 37
      drivers/md/dm-mpath.c
  52. 4 3
      drivers/md/dm-raid.c
  53. 6 10
      drivers/md/dm-table.c
  54. 20 15
      drivers/md/dm.c
  55. 1 1
      drivers/net/ethernet/mellanox/mlx5/core/health.c
  56. 0 2
      drivers/nvme/host/core.c
  57. 5 0
      drivers/nvme/host/fabrics.c
  58. 17 10
      drivers/nvme/host/fc.c
  59. 0 30
      drivers/nvme/host/multipath.c
  60. 0 8
      drivers/nvme/host/nvme.h
  61. 8 7
      drivers/nvme/host/pci.c
  62. 20 7
      drivers/platform/x86/Kconfig
  63. 3 2
      drivers/platform/x86/Makefile
  64. 25 4
      drivers/platform/x86/dell-smbios-base.c
  65. 4 14
      drivers/platform/x86/dell-smbios-smm.c
  66. 4 10
      drivers/platform/x86/dell-smbios-wmi.c
  67. 26 1
      drivers/platform/x86/dell-smbios.h
  68. 2 1
      drivers/watchdog/f71808e_wdt.c
  69. 9 492
      drivers/watchdog/hpwdt.c
  70. 2 1
      drivers/watchdog/sbsa_gwdt.c
  71. 3 1
      kernel/events/core.c
  72. 3 2
      kernel/locking/rtmutex.c
  73. 4 4
      scripts/Makefile.lib
  74. 5 10
      scripts/basic/fixdep.c
  75. 1 1
      scripts/bloat-o-meter
  76. 1 0
      tools/arch/x86/include/asm/cpufeatures.h
  77. 2 0
      tools/include/uapi/linux/kvm.h
  78. 7 20
      tools/objtool/check.c
  79. 1 1
      tools/perf/Documentation/perf-kallsyms.txt
  80. 9 0
      tools/perf/builtin-record.c
  81. 1 1
      tools/perf/builtin-stat.c
  82. 1 1
      tools/perf/builtin-top.c
  83. 1 0
      tools/perf/perf.h
  84. 25 0
      tools/perf/ui/browsers/annotate.c
  85. 9 6
      tools/perf/util/auxtrace.c
  86. 6 2
      tools/perf/util/record.c
  87. 5 4
      tools/perf/util/trigger.h
  88. 6 5
      tools/testing/selftests/x86/test_vsyscall.c

+ 5 - 1
Documentation/devicetree/bindings/dma/mv-xor-v2.txt

@@ -11,7 +11,11 @@ Required properties:
   interrupts.
 
 Optional properties:
-- clocks: Optional reference to the clock used by the XOR engine.
+- clocks: Optional reference to the clocks used by the XOR engine.
+- clock-names: mandatory if there is a second clock, in this case the
+   name must be "core" for the first clock and "reg" for the second
+   one
+
 
 Example:
 

+ 1 - 1
Makefile

@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 16
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*

+ 1 - 10
arch/x86/Kconfig

@@ -2307,7 +2307,7 @@ choice
 	  it can be used to assist security vulnerability exploitation.
 
 	  This setting can be changed at boot time via the kernel command
-	  line parameter vsyscall=[native|emulate|none].
+	  line parameter vsyscall=[emulate|none].
 
 	  On a system with recent enough glibc (2.14 or newer) and no
 	  static binaries, you can say None without a performance penalty
@@ -2315,15 +2315,6 @@ choice
 
 	  If unsure, select "Emulate".
 
-	config LEGACY_VSYSCALL_NATIVE
-		bool "Native"
-		help
-		  Actual executable code is located in the fixed vsyscall
-		  address mapping, implementing time() efficiently. Since
-		  this makes the mapping executable, it can be used during
-		  security vulnerability exploitation (traditionally as
-		  ROP gadgets). This configuration is not recommended.
-
 	config LEGACY_VSYSCALL_EMULATE
 		bool "Emulate"
 		help

+ 1 - 15
arch/x86/entry/entry_64_compat.S

@@ -363,9 +363,7 @@ ENTRY(entry_INT80_compat)
 	pushq	2*8(%rdi)		/* regs->ip */
 	pushq	1*8(%rdi)		/* regs->orig_ax */
 
-	movq	(%rdi), %rdi		/* restore %rdi */
-
-	pushq	%rdi			/* pt_regs->di */
+	pushq	(%rdi)			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rcx			/* pt_regs->cx */
@@ -406,15 +404,3 @@ ENTRY(entry_INT80_compat)
 	TRACE_IRQS_ON
 	jmp	swapgs_restore_regs_and_return_to_usermode
 END(entry_INT80_compat)
-
-ENTRY(stub32_clone)
-	/*
-	 * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
-	 * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
-	 *
-	 * The native 64-bit kernel's sys_clone() implements the latter,
-	 * so we need to swap arguments here before calling it:
-	 */
-	xchg	%r8, %rcx
-	jmp	sys_clone
-ENDPROC(stub32_clone)

+ 19 - 19
arch/x86/entry/syscalls/syscall_32.tbl

@@ -8,12 +8,12 @@
 #
 0	i386	restart_syscall		sys_restart_syscall
 1	i386	exit			sys_exit
-2	i386	fork			sys_fork			sys_fork
+2	i386	fork			sys_fork
 3	i386	read			sys_read
 4	i386	write			sys_write
 5	i386	open			sys_open			compat_sys_open
 6	i386	close			sys_close
-7	i386	waitpid			sys_waitpid			sys32_waitpid
+7	i386	waitpid			sys_waitpid			compat_sys_x86_waitpid
 8	i386	creat			sys_creat
 9	i386	link			sys_link
 10	i386	unlink			sys_unlink
@@ -78,7 +78,7 @@
 69	i386	ssetmask		sys_ssetmask
 70	i386	setreuid		sys_setreuid16
 71	i386	setregid		sys_setregid16
-72	i386	sigsuspend		sys_sigsuspend			sys_sigsuspend
+72	i386	sigsuspend		sys_sigsuspend
 73	i386	sigpending		sys_sigpending			compat_sys_sigpending
 74	i386	sethostname		sys_sethostname
 75	i386	setrlimit		sys_setrlimit			compat_sys_setrlimit
@@ -96,7 +96,7 @@
 87	i386	swapon			sys_swapon
 88	i386	reboot			sys_reboot
 89	i386	readdir			sys_old_readdir			compat_sys_old_readdir
-90	i386	mmap			sys_old_mmap			sys32_mmap
+90	i386	mmap			sys_old_mmap			compat_sys_x86_mmap
 91	i386	munmap			sys_munmap
 92	i386	truncate		sys_truncate			compat_sys_truncate
 93	i386	ftruncate		sys_ftruncate			compat_sys_ftruncate
@@ -126,7 +126,7 @@
 117	i386	ipc			sys_ipc				compat_sys_ipc
 118	i386	fsync			sys_fsync
 119	i386	sigreturn		sys_sigreturn			sys32_sigreturn
-120	i386	clone			sys_clone			stub32_clone
+120	i386	clone			sys_clone			compat_sys_x86_clone
 121	i386	setdomainname		sys_setdomainname
 122	i386	uname			sys_newuname
 123	i386	modify_ldt		sys_modify_ldt
@@ -186,8 +186,8 @@
 177	i386	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
 178	i386	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 179	i386	rt_sigsuspend		sys_rt_sigsuspend
-180	i386	pread64			sys_pread64			sys32_pread
-181	i386	pwrite64		sys_pwrite64			sys32_pwrite
+180	i386	pread64			sys_pread64			compat_sys_x86_pread
+181	i386	pwrite64		sys_pwrite64			compat_sys_x86_pwrite
 182	i386	chown			sys_chown16
 183	i386	getcwd			sys_getcwd
 184	i386	capget			sys_capget
@@ -196,14 +196,14 @@
 187	i386	sendfile		sys_sendfile			compat_sys_sendfile
 188	i386	getpmsg
 189	i386	putpmsg
-190	i386	vfork			sys_vfork			sys_vfork
+190	i386	vfork			sys_vfork
 191	i386	ugetrlimit		sys_getrlimit			compat_sys_getrlimit
 192	i386	mmap2			sys_mmap_pgoff
-193	i386	truncate64		sys_truncate64			sys32_truncate64
-194	i386	ftruncate64		sys_ftruncate64			sys32_ftruncate64
-195	i386	stat64			sys_stat64			sys32_stat64
-196	i386	lstat64			sys_lstat64			sys32_lstat64
-197	i386	fstat64			sys_fstat64			sys32_fstat64
+193	i386	truncate64		sys_truncate64			compat_sys_x86_truncate64
+194	i386	ftruncate64		sys_ftruncate64			compat_sys_x86_ftruncate64
+195	i386	stat64			sys_stat64			compat_sys_x86_stat64
+196	i386	lstat64			sys_lstat64			compat_sys_x86_lstat64
+197	i386	fstat64			sys_fstat64			compat_sys_x86_fstat64
 198	i386	lchown32		sys_lchown
 199	i386	getuid32		sys_getuid
 200	i386	getgid32		sys_getgid
@@ -231,7 +231,7 @@
 # 222 is unused
 # 223 is unused
 224	i386	gettid			sys_gettid
-225	i386	readahead		sys_readahead			sys32_readahead
+225	i386	readahead		sys_readahead			compat_sys_x86_readahead
 226	i386	setxattr		sys_setxattr
 227	i386	lsetxattr		sys_lsetxattr
 228	i386	fsetxattr		sys_fsetxattr
@@ -256,7 +256,7 @@
 247	i386	io_getevents		sys_io_getevents		compat_sys_io_getevents
 248	i386	io_submit		sys_io_submit			compat_sys_io_submit
 249	i386	io_cancel		sys_io_cancel
-250	i386	fadvise64		sys_fadvise64			sys32_fadvise64
+250	i386	fadvise64		sys_fadvise64			compat_sys_x86_fadvise64
 # 251 is available for reuse (was briefly sys_set_zone_reclaim)
 252	i386	exit_group		sys_exit_group
 253	i386	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie
@@ -278,7 +278,7 @@
 269	i386	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
 270	i386	tgkill			sys_tgkill
 271	i386	utimes			sys_utimes			compat_sys_utimes
-272	i386	fadvise64_64		sys_fadvise64_64		sys32_fadvise64_64
+272	i386	fadvise64_64		sys_fadvise64_64		compat_sys_x86_fadvise64_64
 273	i386	vserver
 274	i386	mbind			sys_mbind
 275	i386	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
@@ -306,7 +306,7 @@
 297	i386	mknodat			sys_mknodat
 298	i386	fchownat		sys_fchownat
 299	i386	futimesat		sys_futimesat			compat_sys_futimesat
-300	i386	fstatat64		sys_fstatat64			sys32_fstatat
+300	i386	fstatat64		sys_fstatat64			compat_sys_x86_fstatat
 301	i386	unlinkat		sys_unlinkat
 302	i386	renameat		sys_renameat
 303	i386	linkat			sys_linkat
@@ -320,7 +320,7 @@
 311	i386	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
 312	i386	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
 313	i386	splice			sys_splice
-314	i386	sync_file_range		sys_sync_file_range		sys32_sync_file_range
+314	i386	sync_file_range		sys_sync_file_range		compat_sys_x86_sync_file_range
 315	i386	tee			sys_tee
 316	i386	vmsplice		sys_vmsplice			compat_sys_vmsplice
 317	i386	move_pages		sys_move_pages			compat_sys_move_pages
@@ -330,7 +330,7 @@
 321	i386	signalfd		sys_signalfd			compat_sys_signalfd
 322	i386	timerfd_create		sys_timerfd_create
 323	i386	eventfd			sys_eventfd
-324	i386	fallocate		sys_fallocate			sys32_fallocate
+324	i386	fallocate		sys_fallocate			compat_sys_x86_fallocate
 325	i386	timerfd_settime		sys_timerfd_settime		compat_sys_timerfd_settime
 326	i386	timerfd_gettime		sys_timerfd_gettime		compat_sys_timerfd_gettime
 327	i386	signalfd4		sys_signalfd4			compat_sys_signalfd4

+ 3 - 13
arch/x86/entry/vsyscall/vsyscall_64.c

@@ -42,10 +42,8 @@
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
 
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
-#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE)
-	NATIVE;
-#elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
+static enum { EMULATE, NONE } vsyscall_mode =
+#ifdef CONFIG_LEGACY_VSYSCALL_NONE
 	NONE;
 #else
 	EMULATE;
@@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str)
 	if (str) {
 		if (!strcmp("emulate", str))
 			vsyscall_mode = EMULATE;
-		else if (!strcmp("native", str))
-			vsyscall_mode = NATIVE;
 		else if (!strcmp("none", str))
 			vsyscall_mode = NONE;
 		else
@@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
 	WARN_ON_ONCE(address != regs->ip);
 
-	/* This should be unreachable in NATIVE mode. */
-	if (WARN_ON(vsyscall_mode == NATIVE))
-		return false;
-
 	if (vsyscall_mode == NONE) {
 		warn_bad_vsyscall(KERN_INFO, regs,
 				  "vsyscall attempted with vsyscall=none");
@@ -370,9 +362,7 @@ void __init map_vsyscall(void)
 
 	if (vsyscall_mode != NONE) {
 		__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
-			     vsyscall_mode == NATIVE
-			     ? PAGE_KERNEL_VSYSCALL
-			     : PAGE_KERNEL_VVAR);
+			     PAGE_KERNEL_VVAR);
 		set_vsyscall_pgtable_user_bits(swapper_pg_dir);
 	}
 

+ 1 - 1
arch/x86/events/intel/uncore_snbep.c

@@ -3606,7 +3606,7 @@ static struct intel_uncore_type skx_uncore_imc = {
 };
 
 static struct attribute *skx_upi_uncore_formats_attr[] = {
-	&format_attr_event_ext.attr,
+	&format_attr_event.attr,
 	&format_attr_umask_ext.attr,
 	&format_attr_edge.attr,
 	&format_attr_inv.attr,

+ 44 - 30
arch/x86/ia32/sys_ia32.c

@@ -51,15 +51,14 @@
 #define AA(__x)		((unsigned long)(__x))
 
 
-asmlinkage long sys32_truncate64(const char __user *filename,
-				 unsigned long offset_low,
-				 unsigned long offset_high)
+COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename,
+		       unsigned long, offset_low, unsigned long, offset_high)
 {
        return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
 }
 
-asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low,
-				  unsigned long offset_high)
+COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd,
+		       unsigned long, offset_low, unsigned long, offset_high)
 {
        return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
 }
@@ -96,8 +95,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
 	return 0;
 }
 
-asmlinkage long sys32_stat64(const char __user *filename,
-			     struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename,
+		       struct stat64 __user *, statbuf)
 {
 	struct kstat stat;
 	int ret = vfs_stat(filename, &stat);
@@ -107,8 +106,8 @@ asmlinkage long sys32_stat64(const char __user *filename,
 	return ret;
 }
 
-asmlinkage long sys32_lstat64(const char __user *filename,
-			      struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename,
+		       struct stat64 __user *, statbuf)
 {
 	struct kstat stat;
 	int ret = vfs_lstat(filename, &stat);
@@ -117,7 +116,8 @@ asmlinkage long sys32_lstat64(const char __user *filename,
 	return ret;
 }
 
-asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd,
+		       struct stat64 __user *, statbuf)
 {
 	struct kstat stat;
 	int ret = vfs_fstat(fd, &stat);
@@ -126,8 +126,9 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
 	return ret;
 }
 
-asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename,
-			      struct stat64 __user *statbuf, int flag)
+COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd,
+		       const char __user *, filename,
+		       struct stat64 __user *, statbuf, int, flag)
 {
 	struct kstat stat;
 	int error;
@@ -153,7 +154,7 @@ struct mmap_arg_struct32 {
 	unsigned int offset;
 };
 
-asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
+COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg)
 {
 	struct mmap_arg_struct32 a;
 
@@ -167,22 +168,22 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
 			       a.offset>>PAGE_SHIFT);
 }
 
-asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
-			      int options)
+COMPAT_SYSCALL_DEFINE3(x86_waitpid, compat_pid_t, pid, unsigned int __user *,
+		       stat_addr, int, options)
 {
 	return compat_sys_wait4(pid, stat_addr, options, NULL);
 }
 
 /* warning: next two assume little endian */
-asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
-			    u32 poslo, u32 poshi)
+COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf,
+		       u32, count, u32, poslo, u32, poshi)
 {
 	return sys_pread64(fd, ubuf, count,
 			 ((loff_t)AA(poshi) << 32) | AA(poslo));
 }
 
-asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
-			     u32 count, u32 poslo, u32 poshi)
+COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf,
+		       u32, count, u32, poslo, u32, poshi)
 {
 	return sys_pwrite64(fd, ubuf, count,
 			  ((loff_t)AA(poshi) << 32) | AA(poslo));
@@ -193,8 +194,9 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
  * Some system calls that need sign extended arguments. This could be
  * done by a generic wrapper.
  */
-long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
-			__u32 len_low, __u32 len_high, int advice)
+COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low,
+		       __u32, offset_high, __u32, len_low, __u32, len_high,
+		       int, advice)
 {
 	return sys_fadvise64_64(fd,
 			       (((u64)offset_high)<<32) | offset_low,
@@ -202,31 +204,43 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
 				advice);
 }
 
-asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
-				   size_t count)
+COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo,
+		       unsigned int, off_hi, size_t, count)
 {
 	return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
 }
 
-asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi,
-				      unsigned n_low, unsigned n_hi,  int flags)
+COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low,
+		       unsigned int, off_hi, unsigned int, n_low,
+		       unsigned int, n_hi, int, flags)
 {
 	return sys_sync_file_range(fd,
 				   ((u64)off_hi << 32) | off_low,
 				   ((u64)n_hi << 32) | n_low, flags);
 }
 
-asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
-				size_t len, int advice)
+COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo,
+		       unsigned int, offset_hi, size_t, len, int, advice)
 {
 	return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
 				len, advice);
 }
 
-asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
-				unsigned offset_hi, unsigned len_lo,
-				unsigned len_hi)
+COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode,
+		       unsigned int, offset_lo, unsigned int, offset_hi,
+		       unsigned int, len_lo, unsigned int, len_hi)
 {
 	return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
 			     ((u64)len_hi << 32) | len_lo);
 }
+
+/*
+ * The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS
+ */
+COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
+		       unsigned long, newsp, int __user *, parent_tidptr,
+		       unsigned long, tls_val, int __user *, child_tidptr)
+{
+	return sys_clone(clone_flags, newsp, parent_tidptr, child_tidptr,
+			tls_val);
+}

+ 0 - 2
arch/x86/include/asm/pgtable_types.h

@@ -174,7 +174,6 @@ enum page_cache_mode {
 #define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
 #define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
 #define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RX | _PAGE_USER)
 #define __PAGE_KERNEL_VVAR		(__PAGE_KERNEL_RO | _PAGE_USER)
 #define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
@@ -206,7 +205,6 @@ enum page_cache_mode {
 #define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE_EXEC	__pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VSYSCALL	__pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC)
 #define PAGE_KERNEL_VVAR	__pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
 
 #define PAGE_KERNEL_IO		__pgprot(__PAGE_KERNEL_IO)

+ 1 - 0
arch/x86/include/asm/sections.h

@@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[];
 
 #if defined(CONFIG_X86_64)
 extern char __end_rodata_hpage_align[];
+extern char __entry_trampoline_start[], __entry_trampoline_end[];
 #endif
 
 #endif	/* _ASM_X86_SECTIONS_H */

+ 30 - 18
arch/x86/include/asm/sys_ia32.h

@@ -20,31 +20,43 @@
 #include <asm/ia32.h>
 
 /* ia32/sys_ia32.c */
-asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long);
-asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
+asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long,
+					  unsigned long);
+asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long,
+					   unsigned long);
 
-asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *);
-asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *);
-asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
-asmlinkage long sys32_fstatat(unsigned int, const char __user *,
+asmlinkage long compat_sys_x86_stat64(const char __user *,
+				      struct stat64 __user *);
+asmlinkage long compat_sys_x86_lstat64(const char __user *,
+				       struct stat64 __user *);
+asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *);
+asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *,
 			      struct stat64 __user *, int);
 struct mmap_arg_struct32;
-asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *);
+asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *);
 
-asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
+asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *,
+				       int);
 
-asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
-asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
+asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32,
+				     u32);
+asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32,
+				      u32, u32);
 
-long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
-long sys32_vm86_warning(void);
+asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32,
+					    int);
 
-asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
-asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
-				      unsigned, unsigned, int);
-asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
-asmlinkage long sys32_fallocate(int, int, unsigned,
-				unsigned, unsigned, unsigned);
+asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int,
+					    size_t);
+asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int,
+					       unsigned int, unsigned int,
+					       int);
+asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int,
+					 size_t, int);
+asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int,
+					 unsigned int, unsigned int);
+asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *,
+				     unsigned long, int __user *);
 
 /* ia32/ia32_signal.c */
 asmlinkage long sys32_sigreturn(void);

+ 1 - 0
arch/x86/include/uapi/asm/mce.h

@@ -30,6 +30,7 @@ struct mce {
 	__u64 synd;	/* MCA_SYND MSR: only valid on SMCA systems */
 	__u64 ipid;	/* MCA_IPID MSR: only valid on SMCA systems */
 	__u64 ppin;	/* Protected Processor Inventory Number */
+	__u32 microcode;/* Microcode revision */
 };
 
 #define MCE_GET_RECORD_LEN   _IOR('M', 1, int)

+ 7 - 0
arch/x86/kernel/cpu/intel.c

@@ -144,6 +144,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
 {
 	int i;
 
+	/*
+	 * We know that the hypervisor lie to us on the microcode version so
+	 * we may as well hope that it is running the correct version.
+	 */
+	if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+		return false;
+
 	for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
 		if (c->x86_model == spectre_bad_microcodes[i].model &&
 		    c->x86_stepping == spectre_bad_microcodes[i].stepping)

+ 24 - 2
arch/x86/kernel/cpu/mcheck/mce.c

@@ -56,6 +56,9 @@
 
 static DEFINE_MUTEX(mce_log_mutex);
 
+/* sysfs synchronization */
+static DEFINE_MUTEX(mce_sysfs_mutex);
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/mce.h>
 
@@ -130,6 +133,8 @@ void mce_setup(struct mce *m)
 
 	if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
 		rdmsrl(MSR_PPIN, m->ppin);
+
+	m->microcode = boot_cpu_data.microcode;
 }
 
 DEFINE_PER_CPU(struct mce, injectm);
@@ -262,7 +267,7 @@ static void __print_mce(struct mce *m)
 	 */
 	pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
 		m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
-		cpu_data(m->extcpu).microcode);
+		m->microcode);
 }
 
 static void print_mce(struct mce *m)
@@ -2086,6 +2091,7 @@ static ssize_t set_ignore_ce(struct device *s,
 	if (kstrtou64(buf, 0, &new) < 0)
 		return -EINVAL;
 
+	mutex_lock(&mce_sysfs_mutex);
 	if (mca_cfg.ignore_ce ^ !!new) {
 		if (new) {
 			/* disable ce features */
@@ -2098,6 +2104,8 @@ static ssize_t set_ignore_ce(struct device *s,
 			on_each_cpu(mce_enable_ce, (void *)1, 1);
 		}
 	}
+	mutex_unlock(&mce_sysfs_mutex);
+
 	return size;
 }
 
@@ -2110,6 +2118,7 @@ static ssize_t set_cmci_disabled(struct device *s,
 	if (kstrtou64(buf, 0, &new) < 0)
 		return -EINVAL;
 
+	mutex_lock(&mce_sysfs_mutex);
 	if (mca_cfg.cmci_disabled ^ !!new) {
 		if (new) {
 			/* disable cmci */
@@ -2121,6 +2130,8 @@ static ssize_t set_cmci_disabled(struct device *s,
 			on_each_cpu(mce_enable_ce, NULL, 1);
 		}
 	}
+	mutex_unlock(&mce_sysfs_mutex);
+
 	return size;
 }
 
@@ -2128,8 +2139,19 @@ static ssize_t store_int_with_restart(struct device *s,
 				      struct device_attribute *attr,
 				      const char *buf, size_t size)
 {
-	ssize_t ret = device_store_int(s, attr, buf, size);
+	unsigned long old_check_interval = check_interval;
+	ssize_t ret = device_store_ulong(s, attr, buf, size);
+
+	if (check_interval == old_check_interval)
+		return ret;
+
+	if (check_interval < 1)
+		check_interval = 1;
+
+	mutex_lock(&mce_sysfs_mutex);
 	mce_restart();
+	mutex_unlock(&mce_sysfs_mutex);
+
 	return ret;
 }
 

+ 119 - 39
arch/x86/kernel/cpu/microcode/core.c

@@ -22,13 +22,16 @@
 #define pr_fmt(fmt) "microcode: " fmt
 
 #include <linux/platform_device.h>
+#include <linux/stop_machine.h>
 #include <linux/syscore_ops.h>
 #include <linux/miscdevice.h>
 #include <linux/capability.h>
 #include <linux/firmware.h>
 #include <linux/kernel.h>
+#include <linux/delay.h>
 #include <linux/mutex.h>
 #include <linux/cpu.h>
+#include <linux/nmi.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 
@@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);
  */
 static DEFINE_MUTEX(microcode_mutex);
 
+/*
+ * Serialize late loading so that CPUs get updated one-by-one.
+ */
+static DEFINE_SPINLOCK(update_lock);
+
 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
 
 struct cpu_info_ctx {
@@ -373,26 +381,23 @@ static int collect_cpu_info(int cpu)
 	return ret;
 }
 
-struct apply_microcode_ctx {
-	enum ucode_state err;
-};
-
 static void apply_microcode_local(void *arg)
 {
-	struct apply_microcode_ctx *ctx = arg;
+	enum ucode_state *err = arg;
 
-	ctx->err = microcode_ops->apply_microcode(smp_processor_id());
+	*err = microcode_ops->apply_microcode(smp_processor_id());
 }
 
 static int apply_microcode_on_target(int cpu)
 {
-	struct apply_microcode_ctx ctx = { .err = 0 };
+	enum ucode_state err;
 	int ret;
 
-	ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
-	if (!ret)
-		ret = ctx.err;
-
+	ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1);
+	if (!ret) {
+		if (err == UCODE_ERROR)
+			ret = 1;
+	}
 	return ret;
 }
 
@@ -489,19 +494,100 @@ static void __exit microcode_dev_exit(void)
 /* fake device for request_firmware */
 static struct platform_device	*microcode_pdev;
 
-static enum ucode_state reload_for_cpu(int cpu)
+/*
+ * Late loading dance. Why the heavy-handed stomp_machine effort?
+ *
+ * - HT siblings must be idle and not execute other code while the other sibling
+ *   is loading microcode in order to avoid any negative interactions caused by
+ *   the loading.
+ *
+ * - In addition, microcode update on the cores must be serialized until this
+ *   requirement can be relaxed in the future. Right now, this is conservative
+ *   and good.
+ */
+#define SPINUNIT 100 /* 100 nsec */
+
+static int check_online_cpus(void)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	enum ucode_state ustate;
+	if (num_online_cpus() == num_present_cpus())
+		return 0;
 
-	if (!uci->valid)
-		return UCODE_OK;
+	pr_err("Not all CPUs online, aborting microcode update.\n");
+
+	return -EINVAL;
+}
+
+static atomic_t late_cpus;
+
+/*
+ * Returns:
+ * < 0 - on error
+ *   0 - no update done
+ *   1 - microcode was updated
+ */
+static int __reload_late(void *info)
+{
+	unsigned int timeout = NSEC_PER_SEC;
+	int all_cpus = num_online_cpus();
+	int cpu = smp_processor_id();
+	enum ucode_state err;
+	int ret = 0;
+
+	atomic_dec(&late_cpus);
+
+	/*
+	 * Wait for all CPUs to arrive. A load will not be attempted unless all
+	 * CPUs show up.
+	 * */
+	while (atomic_read(&late_cpus)) {
+		if (timeout < SPINUNIT) {
+			pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
+				atomic_read(&late_cpus));
+			return -1;
+		}
 
-	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
-	if (ustate != UCODE_OK)
-		return ustate;
+		ndelay(SPINUNIT);
+		timeout -= SPINUNIT;
 
-	return apply_microcode_on_target(cpu);
+		touch_nmi_watchdog();
+	}
+
+	spin_lock(&update_lock);
+	apply_microcode_local(&err);
+	spin_unlock(&update_lock);
+
+	if (err > UCODE_NFOUND) {
+		pr_warn("Error reloading microcode on CPU %d\n", cpu);
+		ret = -1;
+	} else if (err == UCODE_UPDATED) {
+		ret = 1;
+	}
+
+	atomic_inc(&late_cpus);
+
+	while (atomic_read(&late_cpus) != all_cpus)
+		cpu_relax();
+
+	return ret;
+}
+
+/*
+ * Reload microcode late on all CPUs. Wait for a sec until they
+ * all gather together.
+ */
+static int microcode_reload_late(void)
+{
+	int ret;
+
+	atomic_set(&late_cpus, num_online_cpus());
+
+	ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
+	if (ret < 0)
+		return ret;
+	else if (ret > 0)
+		microcode_check();
+
+	return ret;
 }
 
 static ssize_t reload_store(struct device *dev,
@@ -509,10 +595,9 @@ static ssize_t reload_store(struct device *dev,
 			    const char *buf, size_t size)
 {
 	enum ucode_state tmp_ret = UCODE_OK;
-	bool do_callback = false;
+	int bsp = boot_cpu_data.cpu_index;
 	unsigned long val;
 	ssize_t ret = 0;
-	int cpu;
 
 	ret = kstrtoul(buf, 0, &val);
 	if (ret)
@@ -521,29 +606,24 @@ static ssize_t reload_store(struct device *dev,
 	if (val != 1)
 		return size;
 
-	get_online_cpus();
-	mutex_lock(&microcode_mutex);
-	for_each_online_cpu(cpu) {
-		tmp_ret = reload_for_cpu(cpu);
-		if (tmp_ret > UCODE_NFOUND) {
-			pr_warn("Error reloading microcode on CPU %d\n", cpu);
-
-			/* set retval for the first encountered reload error */
-			if (!ret)
-				ret = -EINVAL;
-		}
+	tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true);
+	if (tmp_ret != UCODE_OK)
+		return size;
 
-		if (tmp_ret == UCODE_UPDATED)
-			do_callback = true;
-	}
+	get_online_cpus();
 
-	if (!ret && do_callback)
-		microcode_check();
+	ret = check_online_cpus();
+	if (ret)
+		goto put;
 
+	mutex_lock(&microcode_mutex);
+	ret = microcode_reload_late();
 	mutex_unlock(&microcode_mutex);
+
+put:
 	put_online_cpus();
 
-	if (!ret)
+	if (ret >= 0)
 		ret = size;
 
 	return ret;

+ 40 - 8
arch/x86/kernel/cpu/microcode/intel.c

@@ -589,6 +589,23 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 	if (!mc)
 		return 0;
 
+	/*
+	 * Save us the MSR write below - which is a particular expensive
+	 * operation - when the other hyperthread has updated the microcode
+	 * already.
+	 */
+	rev = intel_get_microcode_revision();
+	if (rev >= mc->hdr.rev) {
+		uci->cpu_sig.rev = rev;
+		return UCODE_OK;
+	}
+
+	/*
+	 * Writeback and invalidate caches before updating microcode to avoid
+	 * internal issues depending on what the microcode is updating.
+	 */
+	native_wbinvd();
+
 	/* write microcode via MSR 0x79 */
 	native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
 
@@ -774,9 +791,9 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
 
 static enum ucode_state apply_microcode_intel(int cpu)
 {
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct microcode_intel *mc;
-	struct ucode_cpu_info *uci;
-	struct cpuinfo_x86 *c;
 	static int prev_rev;
 	u32 rev;
 
@@ -784,15 +801,32 @@ static enum ucode_state apply_microcode_intel(int cpu)
 	if (WARN_ON(raw_smp_processor_id() != cpu))
 		return UCODE_ERROR;
 
-	uci = ucode_cpu_info + cpu;
-	mc = uci->mc;
+	/* Look for a newer patch in our cache: */
+	mc = find_patch(uci);
 	if (!mc) {
-		/* Look for a newer patch in our cache: */
-		mc = find_patch(uci);
+		mc = uci->mc;
 		if (!mc)
 			return UCODE_NFOUND;
 	}
 
+	/*
+	 * Save us the MSR write below - which is a particular expensive
+	 * operation - when the other hyperthread has updated the microcode
+	 * already.
+	 */
+	rev = intel_get_microcode_revision();
+	if (rev >= mc->hdr.rev) {
+		uci->cpu_sig.rev = rev;
+		c->microcode = rev;
+		return UCODE_OK;
+	}
+
+	/*
+	 * Writeback and invalidate caches before updating microcode to avoid
+	 * internal issues depending on what the microcode is updating.
+	 */
+	native_wbinvd();
+
 	/* write microcode via MSR 0x79 */
 	wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
 
@@ -813,8 +847,6 @@ static enum ucode_state apply_microcode_intel(int cpu)
 		prev_rev = rev;
 	}
 
-	c = &cpu_data(cpu);
-
 	uci->cpu_sig.rev = rev;
 	c->microcode = rev;
 

+ 1 - 1
arch/x86/kernel/ioport.c

@@ -23,7 +23,7 @@
 /*
  * this changes the io permissions bitmap in the current task.
  */
-asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
 {
 	struct thread_struct *t = &current->thread;
 	struct tss_struct *tss;

+ 9 - 1
arch/x86/kernel/kprobes/core.c

@@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler);
 
 bool arch_within_kprobe_blacklist(unsigned long addr)
 {
+	bool is_in_entry_trampoline_section = false;
+
+#ifdef CONFIG_X86_64
+	is_in_entry_trampoline_section =
+		(addr >= (unsigned long)__entry_trampoline_start &&
+		 addr < (unsigned long)__entry_trampoline_end);
+#endif
 	return  (addr >= (unsigned long)__kprobes_text_start &&
 		 addr < (unsigned long)__kprobes_text_end) ||
 		(addr >= (unsigned long)__entry_text_start &&
-		 addr < (unsigned long)__entry_text_end);
+		 addr < (unsigned long)__entry_text_end) ||
+		is_in_entry_trampoline_section;
 }
 
 int __init arch_init_kprobes(void)

+ 2 - 0
arch/x86/kernel/vmlinux.lds.S

@@ -118,9 +118,11 @@ SECTIONS
 
 #ifdef CONFIG_X86_64
 		. = ALIGN(PAGE_SIZE);
+		VMLINUX_SYMBOL(__entry_trampoline_start) = .;
 		_entry_trampoline = .;
 		*(.entry_trampoline)
 		. = ALIGN(PAGE_SIZE);
+		VMLINUX_SYMBOL(__entry_trampoline_end) = .;
 		ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
 #endif
 

+ 1 - 1
arch/x86/mm/pti.c

@@ -332,7 +332,7 @@ static void __init pti_clone_user_shared(void)
 }
 
 /*
- * Clone the ESPFIX P4D into the user space visinble page table
+ * Clone the ESPFIX P4D into the user space visible page table
  */
 static void __init pti_setup_espfix64(void)
 {

+ 1 - 1
drivers/block/loop.c

@@ -266,7 +266,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
 	struct iov_iter i;
 	ssize_t bw;
 
-	iov_iter_bvec(&i, ITER_BVEC, bvec, 1, bvec->bv_len);
+	iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len);
 
 	file_start_write(file);
 	bw = vfs_iter_write(file, &i, ppos, 0);

+ 8 - 9
drivers/block/xen-blkfront.c

@@ -262,6 +262,7 @@ static DEFINE_SPINLOCK(minor_lock);
 
 static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo);
 static void blkfront_gather_backend_features(struct blkfront_info *info);
+static int negotiate_mq(struct blkfront_info *info);
 
 static int get_id_from_freelist(struct blkfront_ring_info *rinfo)
 {
@@ -1774,11 +1775,18 @@ static int talk_to_blkback(struct xenbus_device *dev,
 	unsigned int i, max_page_order;
 	unsigned int ring_page_order;
 
+	if (!info)
+		return -ENODEV;
+
 	max_page_order = xenbus_read_unsigned(info->xbdev->otherend,
 					      "max-ring-page-order", 0);
 	ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
 	info->nr_ring_pages = 1 << ring_page_order;
 
+	err = negotiate_mq(info);
+	if (err)
+		goto destroy_blkring;
+
 	for (i = 0; i < info->nr_rings; i++) {
 		struct blkfront_ring_info *rinfo = &info->rinfo[i];
 
@@ -1978,11 +1986,6 @@ static int blkfront_probe(struct xenbus_device *dev,
 	}
 
 	info->xbdev = dev;
-	err = negotiate_mq(info);
-	if (err) {
-		kfree(info);
-		return err;
-	}
 
 	mutex_init(&info->mutex);
 	info->vdevice = vdevice;
@@ -2099,10 +2102,6 @@ static int blkfront_resume(struct xenbus_device *dev)
 
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
-	err = negotiate_mq(info);
-	if (err)
-		return err;
-
 	err = talk_to_blkback(dev, info);
 	if (!err)
 		blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings);

+ 1 - 0
drivers/clocksource/Kconfig

@@ -386,6 +386,7 @@ config ATMEL_PIT
 
 config ATMEL_ST
 	bool "Atmel ST timer support" if COMPILE_TEST
+	depends on HAS_IOMEM
 	select TIMER_OF
 	select MFD_SYSCON
 	help

+ 20 - 5
drivers/dma/mv_xor_v2.c

@@ -163,6 +163,7 @@ struct mv_xor_v2_device {
 	void __iomem *dma_base;
 	void __iomem *glob_base;
 	struct clk *clk;
+	struct clk *reg_clk;
 	struct tasklet_struct irq_tasklet;
 	struct list_head free_sw_desc;
 	struct dma_device dmadev;
@@ -749,13 +750,26 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	xor_dev->reg_clk = devm_clk_get(&pdev->dev, "reg");
+	if (PTR_ERR(xor_dev->reg_clk) != -ENOENT) {
+		if (!IS_ERR(xor_dev->reg_clk)) {
+			ret = clk_prepare_enable(xor_dev->reg_clk);
+			if (ret)
+				return ret;
+		} else {
+			return PTR_ERR(xor_dev->reg_clk);
+		}
+	}
+
 	xor_dev->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER)
-		return -EPROBE_DEFER;
+	if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER) {
+		ret = EPROBE_DEFER;
+		goto disable_reg_clk;
+	}
 	if (!IS_ERR(xor_dev->clk)) {
 		ret = clk_prepare_enable(xor_dev->clk);
 		if (ret)
-			return ret;
+			goto disable_reg_clk;
 	}
 
 	ret = platform_msi_domain_alloc_irqs(&pdev->dev, 1,
@@ -866,8 +880,9 @@ free_hw_desq:
 free_msi_irqs:
 	platform_msi_domain_free_irqs(&pdev->dev);
 disable_clk:
-	if (!IS_ERR(xor_dev->clk))
-		clk_disable_unprepare(xor_dev->clk);
+	clk_disable_unprepare(xor_dev->clk);
+disable_reg_clk:
+	clk_disable_unprepare(xor_dev->reg_clk);
 	return ret;
 }
 

+ 1 - 1
drivers/dma/sh/rcar-dmac.c

@@ -917,7 +917,7 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
 
 	rcar_dmac_chan_configure_desc(chan, desc);
 
-	max_chunk_size = (RCAR_DMATCR_MASK + 1) << desc->xfer_shift;
+	max_chunk_size = RCAR_DMATCR_MASK << desc->xfer_shift;
 
 	/*
 	 * Allocate and fill the transfer chunk descriptors. We own the only

+ 16 - 22
drivers/gpio/gpio-rcar.c

@@ -14,7 +14,6 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
 #include <linux/init.h>
@@ -37,10 +36,9 @@ struct gpio_rcar_priv {
 	struct platform_device *pdev;
 	struct gpio_chip gpio_chip;
 	struct irq_chip irq_chip;
-	struct clk *clk;
 	unsigned int irq_parent;
+	atomic_t wakeup_path;
 	bool has_both_edge_trigger;
-	bool needs_clk;
 };
 
 #define IOINTSEL 0x00	/* General IO/Interrupt Switching Register */
@@ -186,13 +184,10 @@ static int gpio_rcar_irq_set_wake(struct irq_data *d, unsigned int on)
 		}
 	}
 
-	if (!p->clk)
-		return 0;
-
 	if (on)
-		clk_enable(p->clk);
+		atomic_inc(&p->wakeup_path);
 	else
-		clk_disable(p->clk);
+		atomic_dec(&p->wakeup_path);
 
 	return 0;
 }
@@ -330,17 +325,14 @@ static int gpio_rcar_direction_output(struct gpio_chip *chip, unsigned offset,
 
 struct gpio_rcar_info {
 	bool has_both_edge_trigger;
-	bool needs_clk;
 };
 
 static const struct gpio_rcar_info gpio_rcar_info_gen1 = {
 	.has_both_edge_trigger = false,
-	.needs_clk = false,
 };
 
 static const struct gpio_rcar_info gpio_rcar_info_gen2 = {
 	.has_both_edge_trigger = true,
-	.needs_clk = true,
 };
 
 static const struct of_device_id gpio_rcar_of_table[] = {
@@ -403,7 +395,6 @@ static int gpio_rcar_parse_dt(struct gpio_rcar_priv *p, unsigned int *npins)
 	ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args);
 	*npins = ret == 0 ? args.args[2] : RCAR_MAX_GPIO_PER_BANK;
 	p->has_both_edge_trigger = info->has_both_edge_trigger;
-	p->needs_clk = info->needs_clk;
 
 	if (*npins == 0 || *npins > RCAR_MAX_GPIO_PER_BANK) {
 		dev_warn(&p->pdev->dev,
@@ -440,16 +431,6 @@ static int gpio_rcar_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, p);
 
-	p->clk = devm_clk_get(dev, NULL);
-	if (IS_ERR(p->clk)) {
-		if (p->needs_clk) {
-			dev_err(dev, "unable to get clock\n");
-			ret = PTR_ERR(p->clk);
-			goto err0;
-		}
-		p->clk = NULL;
-	}
-
 	pm_runtime_enable(dev);
 
 	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
@@ -531,11 +512,24 @@ static int gpio_rcar_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int __maybe_unused gpio_rcar_suspend(struct device *dev)
+{
+	struct gpio_rcar_priv *p = dev_get_drvdata(dev);
+
+	if (atomic_read(&p->wakeup_path))
+		device_set_wakeup_path(dev);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(gpio_rcar_pm_ops, gpio_rcar_suspend, NULL);
+
 static struct platform_driver gpio_rcar_device_driver = {
 	.probe		= gpio_rcar_probe,
 	.remove		= gpio_rcar_remove,
 	.driver		= {
 		.name	= "gpio_rcar",
+		.pm     = &gpio_rcar_pm_ops,
 		.of_match_table = of_match_ptr(gpio_rcar_of_table),
 	}
 };

+ 5 - 10
drivers/infiniband/core/addr.c

@@ -550,18 +550,13 @@ static int addr_resolve(struct sockaddr *src_in,
 		dst_release(dst);
 	}
 
-	if (ndev->flags & IFF_LOOPBACK) {
-		ret = rdma_translate_ip(dst_in, addr);
-		/*
-		 * Put the loopback device and get the translated
-		 * device instead.
-		 */
+	if (ndev) {
+		if (ndev->flags & IFF_LOOPBACK)
+			ret = rdma_translate_ip(dst_in, addr);
+		else
+			addr->bound_dev_if = ndev->ifindex;
 		dev_put(ndev);
-		ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
-	} else {
-		addr->bound_dev_if = ndev->ifindex;
 	}
-	dev_put(ndev);
 
 	return ret;
 }

+ 11 - 10
drivers/infiniband/core/cq.c

@@ -17,6 +17,7 @@
 
 /* # of WCs to poll for with a single call to ib_poll_cq */
 #define IB_POLL_BATCH			16
+#define IB_POLL_BATCH_DIRECT		8
 
 /* # of WCs to iterate over before yielding */
 #define IB_POLL_BUDGET_IRQ		256
@@ -25,18 +26,18 @@
 #define IB_POLL_FLAGS \
 	(IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
 
-static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *poll_wc)
+static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
+			   int batch)
 {
 	int i, n, completed = 0;
-	struct ib_wc *wcs = poll_wc ? : cq->wc;
 
 	/*
 	 * budget might be (-1) if the caller does not
 	 * want to bound this call, thus we need unsigned
 	 * minimum here.
 	 */
-	while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH,
-			budget - completed), wcs)) > 0) {
+	while ((n = ib_poll_cq(cq, min_t(u32, batch,
+					 budget - completed), wcs)) > 0) {
 		for (i = 0; i < n; i++) {
 			struct ib_wc *wc = &wcs[i];
 
@@ -48,8 +49,7 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *poll_wc)
 
 		completed += n;
 
-		if (n != IB_POLL_BATCH ||
-		    (budget != -1 && completed >= budget))
+		if (n != batch || (budget != -1 && completed >= budget))
 			break;
 	}
 
@@ -72,9 +72,9 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *poll_wc)
  */
 int ib_process_cq_direct(struct ib_cq *cq, int budget)
 {
-	struct ib_wc wcs[IB_POLL_BATCH];
+	struct ib_wc wcs[IB_POLL_BATCH_DIRECT];
 
-	return __ib_process_cq(cq, budget, wcs);
+	return __ib_process_cq(cq, budget, wcs, IB_POLL_BATCH_DIRECT);
 }
 EXPORT_SYMBOL(ib_process_cq_direct);
 
@@ -88,7 +88,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
 	struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
 	int completed;
 
-	completed = __ib_process_cq(cq, budget, NULL);
+	completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH);
 	if (completed < budget) {
 		irq_poll_complete(&cq->iop);
 		if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
@@ -108,7 +108,8 @@ static void ib_cq_poll_work(struct work_struct *work)
 	struct ib_cq *cq = container_of(work, struct ib_cq, work);
 	int completed;
 
-	completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, NULL);
+	completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, cq->wc,
+				    IB_POLL_BATCH);
 	if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
 	    ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
 		queue_work(ib_comp_wq, &cq->work);

+ 4 - 2
drivers/infiniband/core/device.c

@@ -536,14 +536,14 @@ int ib_register_device(struct ib_device *device,
 	ret = device->query_device(device, &device->attrs, &uhw);
 	if (ret) {
 		pr_warn("Couldn't query the device attributes\n");
-		goto cache_cleanup;
+		goto cg_cleanup;
 	}
 
 	ret = ib_device_register_sysfs(device, port_callback);
 	if (ret) {
 		pr_warn("Couldn't register device %s with driver model\n",
 			device->name);
-		goto cache_cleanup;
+		goto cg_cleanup;
 	}
 
 	device->reg_state = IB_DEV_REGISTERED;
@@ -559,6 +559,8 @@ int ib_register_device(struct ib_device *device,
 	mutex_unlock(&device_mutex);
 	return 0;
 
+cg_cleanup:
+	ib_device_unregister_rdmacg(device);
 cache_cleanup:
 	ib_cache_cleanup_one(device);
 	ib_cache_release_one(device);

+ 3 - 4
drivers/infiniband/core/sa_query.c

@@ -1291,10 +1291,9 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
 
 		resolved_dev = dev_get_by_index(dev_addr.net,
 						dev_addr.bound_dev_if);
-		if (resolved_dev->flags & IFF_LOOPBACK) {
-			dev_put(resolved_dev);
-			resolved_dev = idev;
-			dev_hold(resolved_dev);
+		if (!resolved_dev) {
+			dev_put(idev);
+			return -ENODEV;
 		}
 		ndev = ib_get_ndev_from_path(rec);
 		rcu_read_lock();

+ 6 - 0
drivers/infiniband/core/ucma.c

@@ -1149,6 +1149,9 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
 
+	if (cmd.qp_state > IB_QPS_ERR)
+		return -EINVAL;
+
 	ctx = ucma_get_ctx(file, cmd.id);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
@@ -1294,6 +1297,9 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	if (unlikely(cmd.optval > KMALLOC_MAX_SIZE))
+		return -EINVAL;
+
 	optval = memdup_user((void __user *) (unsigned long) cmd.optval,
 			     cmd.optlen);
 	if (IS_ERR(optval)) {

+ 19 - 7
drivers/infiniband/hw/bnxt_re/ib_verbs.c

@@ -785,7 +785,7 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
 	return 0;
 }
 
-static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
+unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
 	__acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
 {
 	unsigned long flags;
@@ -799,8 +799,8 @@ static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
 	return flags;
 }
 
-static void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
-			       unsigned long flags)
+void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
+			unsigned long flags)
 	__releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock)
 {
 	if (qp->rcq != qp->scq)
@@ -1606,6 +1606,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 	int status;
 	union ib_gid sgid;
 	struct ib_gid_attr sgid_attr;
+	unsigned int flags;
 	u8 nw_type;
 
 	qp->qplib_qp.modify_flags = 0;
@@ -1634,14 +1635,18 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 			dev_dbg(rdev_to_dev(rdev),
 				"Move QP = %p to flush list\n",
 				qp);
+			flags = bnxt_re_lock_cqs(qp);
 			bnxt_qplib_add_flush_qp(&qp->qplib_qp);
+			bnxt_re_unlock_cqs(qp, flags);
 		}
 		if (!qp->sumem &&
 		    qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
 			dev_dbg(rdev_to_dev(rdev),
 				"Move QP = %p out of flush list\n",
 				qp);
+			flags = bnxt_re_lock_cqs(qp);
 			bnxt_qplib_clean_qp(&qp->qplib_qp);
+			bnxt_re_unlock_cqs(qp, flags);
 		}
 	}
 	if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
@@ -2227,10 +2232,13 @@ static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr,
 	wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
 	wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey;
 
+	/* Need unconditional fence for local invalidate
+	 * opcode to work as expected.
+	 */
+	wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+
 	if (wr->send_flags & IB_SEND_SIGNALED)
 		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
-	if (wr->send_flags & IB_SEND_FENCE)
-		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
 	if (wr->send_flags & IB_SEND_SOLICITED)
 		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
 
@@ -2251,8 +2259,12 @@ static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr,
 	wqe->frmr.levels = qplib_frpl->hwq.level + 1;
 	wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR;
 
-	if (wr->wr.send_flags & IB_SEND_FENCE)
-		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+	/* Need unconditional fence for reg_mr
+	 * opcode to function as expected.
+	 */
+
+	wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+
 	if (wr->wr.send_flags & IB_SEND_SIGNALED)
 		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
 

+ 3 - 0
drivers/infiniband/hw/bnxt_re/ib_verbs.h

@@ -222,4 +222,7 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
 					   struct ib_udata *udata);
 int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
 int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);
+void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags);
 #endif /* __BNXT_RE_IB_VERBS_H__ */

+ 11 - 1
drivers/infiniband/hw/bnxt_re/main.c

@@ -730,6 +730,13 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
 					 struct bnxt_re_qp *qp)
 {
 	struct ib_event event;
+	unsigned int flags;
+
+	if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+		flags = bnxt_re_lock_cqs(qp);
+		bnxt_qplib_add_flush_qp(&qp->qplib_qp);
+		bnxt_re_unlock_cqs(qp, flags);
+	}
 
 	memset(&event, 0, sizeof(event));
 	if (qp->qplib_qp.srq) {
@@ -1416,9 +1423,12 @@ static void bnxt_re_task(struct work_struct *work)
 	switch (re_work->event) {
 	case NETDEV_REGISTER:
 		rc = bnxt_re_ib_reg(rdev);
-		if (rc)
+		if (rc) {
 			dev_err(rdev_to_dev(rdev),
 				"Failed to register with IB: %#x", rc);
+			bnxt_re_remove_one(rdev);
+			bnxt_re_dev_unreg(rdev);
+		}
 		break;
 	case NETDEV_UP:
 		bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,

+ 24 - 85
drivers/infiniband/hw/bnxt_re/qplib_fp.c

@@ -88,75 +88,35 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
 	}
 }
 
-void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
-				 unsigned long *flags)
-	__acquires(&qp->scq->hwq.lock) __acquires(&qp->rcq->hwq.lock)
+static void bnxt_qplib_acquire_cq_flush_locks(struct bnxt_qplib_qp *qp,
+				       unsigned long *flags)
+	__acquires(&qp->scq->flush_lock) __acquires(&qp->rcq->flush_lock)
 {
-	spin_lock_irqsave(&qp->scq->hwq.lock, *flags);
+	spin_lock_irqsave(&qp->scq->flush_lock, *flags);
 	if (qp->scq == qp->rcq)
-		__acquire(&qp->rcq->hwq.lock);
+		__acquire(&qp->rcq->flush_lock);
 	else
-		spin_lock(&qp->rcq->hwq.lock);
+		spin_lock(&qp->rcq->flush_lock);
 }
 
-void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp,
-				 unsigned long *flags)
-	__releases(&qp->scq->hwq.lock) __releases(&qp->rcq->hwq.lock)
+static void bnxt_qplib_release_cq_flush_locks(struct bnxt_qplib_qp *qp,
+				       unsigned long *flags)
+	__releases(&qp->scq->flush_lock) __releases(&qp->rcq->flush_lock)
 {
 	if (qp->scq == qp->rcq)
-		__release(&qp->rcq->hwq.lock);
+		__release(&qp->rcq->flush_lock);
 	else
-		spin_unlock(&qp->rcq->hwq.lock);
-	spin_unlock_irqrestore(&qp->scq->hwq.lock, *flags);
-}
-
-static struct bnxt_qplib_cq *bnxt_qplib_find_buddy_cq(struct bnxt_qplib_qp *qp,
-						      struct bnxt_qplib_cq *cq)
-{
-	struct bnxt_qplib_cq *buddy_cq = NULL;
-
-	if (qp->scq == qp->rcq)
-		buddy_cq = NULL;
-	else if (qp->scq == cq)
-		buddy_cq = qp->rcq;
-	else
-		buddy_cq = qp->scq;
-	return buddy_cq;
-}
-
-static void bnxt_qplib_lock_buddy_cq(struct bnxt_qplib_qp *qp,
-				     struct bnxt_qplib_cq *cq)
-	__acquires(&buddy_cq->hwq.lock)
-{
-	struct bnxt_qplib_cq *buddy_cq = NULL;
-
-	buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq);
-	if (!buddy_cq)
-		__acquire(&cq->hwq.lock);
-	else
-		spin_lock(&buddy_cq->hwq.lock);
-}
-
-static void bnxt_qplib_unlock_buddy_cq(struct bnxt_qplib_qp *qp,
-				       struct bnxt_qplib_cq *cq)
-	__releases(&buddy_cq->hwq.lock)
-{
-	struct bnxt_qplib_cq *buddy_cq = NULL;
-
-	buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq);
-	if (!buddy_cq)
-		__release(&cq->hwq.lock);
-	else
-		spin_unlock(&buddy_cq->hwq.lock);
+		spin_unlock(&qp->rcq->flush_lock);
+	spin_unlock_irqrestore(&qp->scq->flush_lock, *flags);
 }
 
 void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
 {
 	unsigned long flags;
 
-	bnxt_qplib_acquire_cq_locks(qp, &flags);
+	bnxt_qplib_acquire_cq_flush_locks(qp, &flags);
 	__bnxt_qplib_add_flush_qp(qp);
-	bnxt_qplib_release_cq_locks(qp, &flags);
+	bnxt_qplib_release_cq_flush_locks(qp, &flags);
 }
 
 static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
@@ -177,7 +137,7 @@ void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
 {
 	unsigned long flags;
 
-	bnxt_qplib_acquire_cq_locks(qp, &flags);
+	bnxt_qplib_acquire_cq_flush_locks(qp, &flags);
 	__clean_cq(qp->scq, (u64)(unsigned long)qp);
 	qp->sq.hwq.prod = 0;
 	qp->sq.hwq.cons = 0;
@@ -186,7 +146,7 @@ void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
 	qp->rq.hwq.cons = 0;
 
 	__bnxt_qplib_del_flush_qp(qp);
-	bnxt_qplib_release_cq_locks(qp, &flags);
+	bnxt_qplib_release_cq_flush_locks(qp, &flags);
 }
 
 static void bnxt_qpn_cqn_sched_task(struct work_struct *work)
@@ -2107,9 +2067,6 @@ void bnxt_qplib_mark_qp_error(void *qp_handle)
 	/* Must block new posting of SQ and RQ */
 	qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
 	bnxt_qplib_cancel_phantom_processing(qp);
-
-	/* Add qp to flush list of the CQ */
-	__bnxt_qplib_add_flush_qp(qp);
 }
 
 /* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
@@ -2285,9 +2242,9 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 				sw_sq_cons, cqe->wr_id, cqe->status);
 			cqe++;
 			(*budget)--;
-			bnxt_qplib_lock_buddy_cq(qp, cq);
 			bnxt_qplib_mark_qp_error(qp);
-			bnxt_qplib_unlock_buddy_cq(qp, cq);
+			/* Add qp to flush list of the CQ */
+			bnxt_qplib_add_flush_qp(qp);
 		} else {
 			if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
 				/* Before we complete, do WA 9060 */
@@ -2403,9 +2360,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
 		if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
 			qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
 			/* Add qp to flush list of the CQ */
-			bnxt_qplib_lock_buddy_cq(qp, cq);
-			__bnxt_qplib_add_flush_qp(qp);
-			bnxt_qplib_unlock_buddy_cq(qp, cq);
+			bnxt_qplib_add_flush_qp(qp);
 		}
 	}
 
@@ -2489,9 +2444,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
 		if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
 			qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
 			/* Add qp to flush list of the CQ */
-			bnxt_qplib_lock_buddy_cq(qp, cq);
-			__bnxt_qplib_add_flush_qp(qp);
-			bnxt_qplib_unlock_buddy_cq(qp, cq);
+			bnxt_qplib_add_flush_qp(qp);
 		}
 	}
 done:
@@ -2501,11 +2454,9 @@ done:
 bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
 {
 	struct cq_base *hw_cqe, **hw_cqe_ptr;
-	unsigned long flags;
 	u32 sw_cons, raw_cons;
 	bool rc = true;
 
-	spin_lock_irqsave(&cq->hwq.lock, flags);
 	raw_cons = cq->hwq.cons;
 	sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
 	hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
@@ -2513,7 +2464,6 @@ bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
 
 	 /* Check for Valid bit. If the CQE is valid, return false */
 	rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements);
-	spin_unlock_irqrestore(&cq->hwq.lock, flags);
 	return rc;
 }
 
@@ -2602,9 +2552,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
 		if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
 			qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
 			/* Add qp to flush list of the CQ */
-			bnxt_qplib_lock_buddy_cq(qp, cq);
-			__bnxt_qplib_add_flush_qp(qp);
-			bnxt_qplib_unlock_buddy_cq(qp, cq);
+			bnxt_qplib_add_flush_qp(qp);
 		}
 	}
 
@@ -2719,9 +2667,7 @@ do_rq:
 	 */
 
 	/* Add qp to flush list of the CQ */
-	bnxt_qplib_lock_buddy_cq(qp, cq);
-	__bnxt_qplib_add_flush_qp(qp);
-	bnxt_qplib_unlock_buddy_cq(qp, cq);
+	bnxt_qplib_add_flush_qp(qp);
 done:
 	return rc;
 }
@@ -2750,7 +2696,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
 	u32 budget = num_cqes;
 	unsigned long flags;
 
-	spin_lock_irqsave(&cq->hwq.lock, flags);
+	spin_lock_irqsave(&cq->flush_lock, flags);
 	list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
 		dev_dbg(&cq->hwq.pdev->dev,
 			"QPLIB: FP: Flushing SQ QP= %p",
@@ -2764,7 +2710,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
 			qp);
 		__flush_rq(&qp->rq, qp, &cqe, &budget);
 	}
-	spin_unlock_irqrestore(&cq->hwq.lock, flags);
+	spin_unlock_irqrestore(&cq->flush_lock, flags);
 
 	return num_cqes - budget;
 }
@@ -2773,11 +2719,9 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 		       int num_cqes, struct bnxt_qplib_qp **lib_qp)
 {
 	struct cq_base *hw_cqe, **hw_cqe_ptr;
-	unsigned long flags;
 	u32 sw_cons, raw_cons;
 	int budget, rc = 0;
 
-	spin_lock_irqsave(&cq->hwq.lock, flags);
 	raw_cons = cq->hwq.cons;
 	budget = num_cqes;
 
@@ -2853,20 +2797,15 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 		bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ);
 	}
 exit:
-	spin_unlock_irqrestore(&cq->hwq.lock, flags);
 	return num_cqes - budget;
 }
 
 void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&cq->hwq.lock, flags);
 	if (arm_type)
 		bnxt_qplib_arm_cq(cq, arm_type);
 	/* Using cq->arm_state variable to track whether to issue cq handler */
 	atomic_set(&cq->arm_state, 1);
-	spin_unlock_irqrestore(&cq->hwq.lock, flags);
 }
 
 void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp)

+ 12 - 0
drivers/infiniband/hw/bnxt_re/qplib_fp.h

@@ -389,6 +389,18 @@ struct bnxt_qplib_cq {
 	struct list_head		sqf_head, rqf_head;
 	atomic_t			arm_state;
 	spinlock_t			compl_lock; /* synch CQ handlers */
+/* Locking Notes:
+ * QP can move to error state from modify_qp, async error event or error
+ * CQE as part of poll_cq. When QP is moved to error state, it gets added
+ * to two flush lists, one each for SQ and RQ.
+ * Each flush list is protected by qplib_cq->flush_lock. Both scq and rcq
+ * flush_locks should be acquired when QP is moved to error. The control path
+ * operations(modify_qp and async error events) are synchronized with poll_cq
+ * using upper level CQ locks (bnxt_re_cq->cq_lock) of both SCQ and RCQ.
+ * The qplib_cq->flush_lock is required to synchronize two instances of poll_cq
+ * of the same QP while manipulating the flush list.
+ */
+	spinlock_t			flush_lock; /* QP flush management */
 };
 
 #define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE	sizeof(struct xrrq_irrq)

+ 6 - 3
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c

@@ -305,9 +305,8 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 			err_event->res_err_state_reason);
 		if (!qp)
 			break;
-		bnxt_qplib_acquire_cq_locks(qp, &flags);
 		bnxt_qplib_mark_qp_error(qp);
-		bnxt_qplib_release_cq_locks(qp, &flags);
+		rcfw->aeq_handler(rcfw, qp_event, qp);
 		break;
 	default:
 		/* Command Response */
@@ -460,7 +459,11 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
 	int rc;
 
 	RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
-
+	/* Supply (log-base-2-of-host-page-size - base-page-shift)
+	 * to bono to adjust the doorbell page sizes.
+	 */
+	req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT -
+					   RCFW_DBR_BASE_PAGE_SHIFT);
 	/*
 	 * VFs need not setup the HW context area, PF
 	 * shall setup this area for VF. Skipping the

+ 1 - 0
drivers/infiniband/hw/bnxt_re/qplib_rcfw.h

@@ -49,6 +49,7 @@
 #define RCFW_COMM_SIZE			0x104
 
 #define RCFW_DBR_PCI_BAR_REGION		2
+#define RCFW_DBR_BASE_PAGE_SHIFT	12
 
 #define RCFW_CMD_PREP(req, CMD, cmd_flags)				\
 	do {								\

+ 2 - 1
drivers/infiniband/hw/bnxt_re/qplib_sp.c

@@ -139,7 +139,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 	attr->max_pkey = le32_to_cpu(sb->max_pkeys);
 
 	attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
-	attr->l2_db_size = (sb->l2_db_space_size + 1) * PAGE_SIZE;
+	attr->l2_db_size = (sb->l2_db_space_size + 1) *
+			    (0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
 	attr->max_sgid = le32_to_cpu(sb->max_gid);
 
 	bnxt_qplib_query_version(rcfw, attr->fw_ver);

+ 24 - 1
drivers/infiniband/hw/bnxt_re/roce_hsi.h

@@ -1761,7 +1761,30 @@ struct cmdq_initialize_fw {
 	#define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_2M		   (0x3UL << 4)
 	#define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8M		   (0x4UL << 4)
 	#define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_1G		   (0x5UL << 4)
-	__le16 reserved16;
+	/* This value is (log-base-2-of-DBR-page-size - 12).
+	 * 0 for 4KB. HW supported values are enumerated below.
+	 */
+	__le16  log2_dbr_pg_size;
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_MASK	0xfUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_SFT		0
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4K	0x0UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8K	0x1UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16K	0x2UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32K	0x3UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64K	0x4UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128K	0x5UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_256K	0x6UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_512K	0x7UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_1M	0x8UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_2M	0x9UL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4M	0xaUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8M	0xbUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16M	0xcUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32M	0xdUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64M	0xeUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M	0xfUL
+	#define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_LAST		\
+			CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M
 	__le64 qpc_page_dir;
 	__le64 mrw_page_dir;
 	__le64 srq_page_dir;

+ 3 - 1
drivers/infiniband/hw/mlx4/cq.c

@@ -601,6 +601,7 @@ static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
 	wc->dlid_path_bits = 0;
 
 	if (is_eth) {
+		wc->slid = 0;
 		wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
 		memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
 		memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
@@ -851,7 +852,6 @@ repoll:
 			}
 		}
 
-		wc->slid	   = be16_to_cpu(cqe->rlid);
 		g_mlpath_rqpn	   = be32_to_cpu(cqe->g_mlpath_rqpn);
 		wc->src_qp	   = g_mlpath_rqpn & 0xffffff;
 		wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
@@ -860,6 +860,7 @@ repoll:
 		wc->wc_flags	  |= mlx4_ib_ipoib_csum_ok(cqe->status,
 					cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
 		if (is_eth) {
+			wc->slid = 0;
 			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 13;
 			if (be32_to_cpu(cqe->vlan_my_qpn) &
 					MLX4_CQE_CVLAN_PRESENT_MASK) {
@@ -871,6 +872,7 @@ repoll:
 			memcpy(wc->smac, cqe->smac, ETH_ALEN);
 			wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
 		} else {
+			wc->slid = be16_to_cpu(cqe->rlid);
 			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 12;
 			wc->vlan_id = 0xffff;
 		}

+ 7 - 4
drivers/infiniband/hw/mlx4/main.c

@@ -219,8 +219,6 @@ static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
 			gid_tbl[i].version = 2;
 			if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
 				gid_tbl[i].type = 1;
-			else
-				memset(&gid_tbl[i].gid, 0, 12);
 		}
 	}
 
@@ -366,8 +364,13 @@ static int mlx4_ib_del_gid(struct ib_device *device,
 		if (!gids) {
 			ret = -ENOMEM;
 		} else {
-			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
-				memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
+				memcpy(&gids[i].gid,
+				       &port_gid_table->gids[i].gid,
+				       sizeof(union ib_gid));
+				gids[i].gid_type =
+				    port_gid_table->gids[i].gid_type;
+			}
 		}
 	}
 	spin_unlock_bh(&iboe->lock);

+ 8 - 2
drivers/infiniband/hw/mlx5/cq.c

@@ -226,7 +226,6 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 		wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
 		break;
 	}
-	wc->slid	   = be16_to_cpu(cqe->slid);
 	wc->src_qp	   = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
 	wc->dlid_path_bits = cqe->ml_path;
 	g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
@@ -241,10 +240,12 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 	}
 
 	if (ll != IB_LINK_LAYER_ETHERNET) {
+		wc->slid = be16_to_cpu(cqe->slid);
 		wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
 		return;
 	}
 
+	wc->slid = 0;
 	vlan_present = cqe->l4_l3_hdr_type & 0x1;
 	roce_packet_type   = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
 	if (vlan_present) {
@@ -1177,7 +1178,12 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	if (ucmd.reserved0 || ucmd.reserved1)
 		return -EINVAL;
 
-	umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
+	/* check multiplication overflow */
+	if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
+		return -EINVAL;
+
+	umem = ib_umem_get(context, ucmd.buf_addr,
+			   (size_t)ucmd.cqe_size * entries,
 			   IB_ACCESS_LOCAL_WRITE, 1);
 	if (IS_ERR(umem)) {
 		err = PTR_ERR(umem);

+ 9 - 12
drivers/infiniband/hw/mlx5/main.c

@@ -245,12 +245,16 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
 	struct mlx5_ib_multiport_info *mpi;
 	struct mlx5_ib_port *port;
 
+	if (!mlx5_core_mp_enabled(ibdev->mdev) ||
+	    ll != IB_LINK_LAYER_ETHERNET) {
+		if (native_port_num)
+			*native_port_num = ib_port_num;
+		return ibdev->mdev;
+	}
+
 	if (native_port_num)
 		*native_port_num = 1;
 
-	if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
-		return ibdev->mdev;
-
 	port = &ibdev->port[ib_port_num - 1];
 	if (!port)
 		return NULL;
@@ -3263,7 +3267,7 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
 	struct mlx5_ib_dev *ibdev;
 	struct ib_event ibev;
 	bool fatal = false;
-	u8 port = 0;
+	u8 port = (u8)work->param;
 
 	if (mlx5_core_is_mp_slave(work->dev)) {
 		ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
@@ -3283,8 +3287,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
 	case MLX5_DEV_EVENT_PORT_UP:
 	case MLX5_DEV_EVENT_PORT_DOWN:
 	case MLX5_DEV_EVENT_PORT_INITIALIZED:
-		port = (u8)work->param;
-
 		/* In RoCE, port up/down events are handled in
 		 * mlx5_netdev_event().
 		 */
@@ -3298,24 +3300,19 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
 
 	case MLX5_DEV_EVENT_LID_CHANGE:
 		ibev.event = IB_EVENT_LID_CHANGE;
-		port = (u8)work->param;
 		break;
 
 	case MLX5_DEV_EVENT_PKEY_CHANGE:
 		ibev.event = IB_EVENT_PKEY_CHANGE;
-		port = (u8)work->param;
-
 		schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
 		break;
 
 	case MLX5_DEV_EVENT_GUID_CHANGE:
 		ibev.event = IB_EVENT_GID_CHANGE;
-		port = (u8)work->param;
 		break;
 
 	case MLX5_DEV_EVENT_CLIENT_REREG:
 		ibev.event = IB_EVENT_CLIENT_REREGISTER;
-		port = (u8)work->param;
 		break;
 	case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
 		schedule_work(&ibdev->delay_drop.delay_drop_work);
@@ -3327,7 +3324,7 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
 	ibev.device	      = &ibdev->ib_dev;
 	ibev.element.port_num = port;
 
-	if (port < 1 || port > ibdev->num_ports) {
+	if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
 		mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
 		goto out;
 	}

+ 1 - 1
drivers/infiniband/hw/mlx5/mr.c

@@ -1816,7 +1816,6 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
 
 	mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
 	mr->ibmr.length = 0;
-	mr->ndescs = sg_nents;
 
 	for_each_sg(sgl, sg, sg_nents, i) {
 		if (unlikely(i >= mr->max_descs))
@@ -1828,6 +1827,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
 
 		sg_offset = 0;
 	}
+	mr->ndescs = i;
 
 	if (sg_offset_p)
 		*sg_offset_p = sg_offset;

+ 9 - 2
drivers/infiniband/hw/mlx5/qp.c

@@ -1584,6 +1584,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	u32 uidx = MLX5_IB_DEFAULT_UIDX;
 	struct mlx5_ib_create_qp ucmd;
 	struct mlx5_ib_qp_base *base;
+	int mlx5_st;
 	void *qpc;
 	u32 *in;
 	int err;
@@ -1592,6 +1593,10 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	spin_lock_init(&qp->sq.lock);
 	spin_lock_init(&qp->rq.lock);
 
+	mlx5_st = to_mlx5_st(init_attr->qp_type);
+	if (mlx5_st < 0)
+		return -EINVAL;
+
 	if (init_attr->rwq_ind_tbl) {
 		if (!udata)
 			return -ENOSYS;
@@ -1753,7 +1758,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 
 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 
-	MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type));
+	MLX5_SET(qpc, qpc, st, mlx5_st);
 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
 
 	if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
@@ -3095,8 +3100,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 		goto out;
 
 	if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
-	    !optab[mlx5_cur][mlx5_new])
+	    !optab[mlx5_cur][mlx5_new]) {
+		err = -EINVAL;
 		goto out;
+	}
 
 	op = optab[mlx5_cur][mlx5_new];
 	optpar = ib_mask_to_mlx5_opt(attr_mask);

+ 11 - 8
drivers/infiniband/hw/qedr/qedr_iw_cm.c

@@ -458,8 +458,7 @@ qedr_addr6_resolve(struct qedr_dev *dev,
 		}
 		return -EINVAL;
 	}
-	neigh = dst_neigh_lookup(dst, &dst_in);
-
+	neigh = dst_neigh_lookup(dst, &fl6.daddr);
 	if (neigh) {
 		rcu_read_lock();
 		if (neigh->nud_state & NUD_VALID) {
@@ -494,10 +493,14 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
 	qp = idr_find(&dev->qpidr, conn_param->qpn);
 
-	laddr = (struct sockaddr_in *)&cm_id->local_addr;
-	raddr = (struct sockaddr_in *)&cm_id->remote_addr;
-	laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr;
-	raddr6 = (struct sockaddr_in6 *)&cm_id->remote_addr;
+	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+	laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+	raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
+
+	DP_DEBUG(dev, QEDR_MSG_IWARP, "MAPPED %d %d\n",
+		 ntohs(((struct sockaddr_in *)&cm_id->remote_addr)->sin_port),
+		 ntohs(raddr->sin_port));
 
 	DP_DEBUG(dev, QEDR_MSG_IWARP,
 		 "Connect source address: %pISpc, remote address: %pISpc\n",
@@ -599,8 +602,8 @@ int qedr_iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 	int rc;
 	int i;
 
-	laddr = (struct sockaddr_in *)&cm_id->local_addr;
-	laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr;
+	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
 
 	DP_DEBUG(dev, QEDR_MSG_IWARP,
 		 "Create Listener address: %pISpc\n", &cm_id->local_addr);

+ 12 - 1
drivers/infiniband/hw/qedr/verbs.c

@@ -3034,6 +3034,11 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	switch (wr->opcode) {
 	case IB_WR_SEND_WITH_IMM:
+		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
+			rc = -EINVAL;
+			*bad_wr = wr;
+			break;
+		}
 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
 		swqe->wqe_size = 2;
@@ -3075,6 +3080,11 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		break;
 
 	case IB_WR_RDMA_WRITE_WITH_IMM:
+		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
+			rc = -EINVAL;
+			*bad_wr = wr;
+			break;
+		}
 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
 
@@ -3724,7 +3734,7 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 {
 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
 	struct qedr_cq *cq = get_qedr_cq(ibcq);
-	union rdma_cqe *cqe = cq->latest_cqe;
+	union rdma_cqe *cqe;
 	u32 old_cons, new_cons;
 	unsigned long flags;
 	int update = 0;
@@ -3741,6 +3751,7 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
 
 	spin_lock_irqsave(&cq->cq_lock, flags);
+	cqe = cq->latest_cqe;
 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
 	while (num_entries && is_valid_cqe(cq, cqe)) {
 		struct qedr_qp *qp;

+ 21 - 6
drivers/md/bcache/super.c

@@ -963,6 +963,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 	uint32_t rtime = cpu_to_le32(get_seconds());
 	struct uuid_entry *u;
 	char buf[BDEVNAME_SIZE];
+	struct cached_dev *exist_dc, *t;
 
 	bdevname(dc->bdev, buf);
 
@@ -987,6 +988,16 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 		return -EINVAL;
 	}
 
+	/* Check whether already attached */
+	list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
+		if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
+			pr_err("Tried to attach %s but duplicate UUID already attached",
+				buf);
+
+			return -EINVAL;
+		}
+	}
+
 	u = uuid_find(c, dc->sb.uuid);
 
 	if (u &&
@@ -1204,7 +1215,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
 
 	return;
 err:
-	pr_notice("error opening %s: %s", bdevname(bdev, name), err);
+	pr_notice("error %s: %s", bdevname(bdev, name), err);
 	bcache_device_stop(&dc->disk);
 }
 
@@ -1883,6 +1894,8 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
 	const char *err = NULL; /* must be set for any error case */
 	int ret = 0;
 
+	bdevname(bdev, name);
+
 	memcpy(&ca->sb, sb, sizeof(struct cache_sb));
 	ca->bdev = bdev;
 	ca->bdev->bd_holder = ca;
@@ -1891,11 +1904,12 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
 	bio_first_bvec_all(&ca->sb_bio)->bv_page = sb_page;
 	get_page(sb_page);
 
-	if (blk_queue_discard(bdev_get_queue(ca->bdev)))
+	if (blk_queue_discard(bdev_get_queue(bdev)))
 		ca->discard = CACHE_DISCARD(&ca->sb);
 
 	ret = cache_alloc(ca);
 	if (ret != 0) {
+		blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 		if (ret == -ENOMEM)
 			err = "cache_alloc(): -ENOMEM";
 		else
@@ -1918,14 +1932,14 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
 		goto out;
 	}
 
-	pr_info("registered cache device %s", bdevname(bdev, name));
+	pr_info("registered cache device %s", name);
 
 out:
 	kobject_put(&ca->kobj);
 
 err:
 	if (err)
-		pr_notice("error opening %s: %s", bdevname(bdev, name), err);
+		pr_notice("error %s: %s", name, err);
 
 	return ret;
 }
@@ -2014,6 +2028,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 	if (err)
 		goto err_close;
 
+	err = "failed to register device";
 	if (SB_IS_BDEV(sb)) {
 		struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
 		if (!dc)
@@ -2028,7 +2043,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 			goto err_close;
 
 		if (register_cache(sb, sb_page, bdev, ca) != 0)
-			goto err_close;
+			goto err;
 	}
 out:
 	if (sb_page)
@@ -2041,7 +2056,7 @@ out:
 err_close:
 	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 err:
-	pr_info("error opening %s: %s", path, err);
+	pr_info("error %s: %s", path, err);
 	ret = -EINVAL;
 	goto out;
 }

+ 6 - 10
drivers/md/dm-bufio.c

@@ -386,9 +386,6 @@ static void __cache_size_refresh(void)
 static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
 			       enum data_mode *data_mode)
 {
-	unsigned noio_flag;
-	void *ptr;
-
 	if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
 		*data_mode = DATA_MODE_SLAB;
 		return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
@@ -412,16 +409,15 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
 	 * all allocations done by this process (including pagetables) are done
 	 * as if GFP_NOIO was specified.
 	 */
+	if (gfp_mask & __GFP_NORETRY) {
+		unsigned noio_flag = memalloc_noio_save();
+		void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
 
-	if (gfp_mask & __GFP_NORETRY)
-		noio_flag = memalloc_noio_save();
-
-	ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
-
-	if (gfp_mask & __GFP_NORETRY)
 		memalloc_noio_restore(noio_flag);
+		return ptr;
+	}
 
-	return ptr;
+	return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
 }
 
 /*

+ 29 - 37
drivers/md/dm-mpath.c

@@ -22,6 +22,7 @@
 #include <linux/time.h>
 #include <linux/workqueue.h>
 #include <linux/delay.h>
+#include <scsi/scsi_device.h>
 #include <scsi/scsi_dh.h>
 #include <linux/atomic.h>
 #include <linux/blk-mq.h>
@@ -211,25 +212,13 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
 		else
 			m->queue_mode = DM_TYPE_REQUEST_BASED;
 
-	} else if (m->queue_mode == DM_TYPE_BIO_BASED ||
-		   m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
+	} else if (m->queue_mode == DM_TYPE_BIO_BASED) {
 		INIT_WORK(&m->process_queued_bios, process_queued_bios);
-
-		if (m->queue_mode == DM_TYPE_BIO_BASED) {
-			/*
-			 * bio-based doesn't support any direct scsi_dh management;
-			 * it just discovers if a scsi_dh is attached.
-			 */
-			set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
-		}
-	}
-
-	if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
-		set_bit(MPATHF_QUEUE_IO, &m->flags);
-		atomic_set(&m->pg_init_in_progress, 0);
-		atomic_set(&m->pg_init_count, 0);
-		m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
-		init_waitqueue_head(&m->pg_init_wait);
+		/*
+		 * bio-based doesn't support any direct scsi_dh management;
+		 * it just discovers if a scsi_dh is attached.
+		 */
+		set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
 	}
 
 	dm_table_set_type(ti->table, m->queue_mode);
@@ -337,14 +326,12 @@ static void __switch_pg(struct multipath *m, struct priority_group *pg)
 {
 	m->current_pg = pg;
 
-	if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
-		return;
-
 	/* Must we initialise the PG first, and queue I/O till it's ready? */
 	if (m->hw_handler_name) {
 		set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
 		set_bit(MPATHF_QUEUE_IO, &m->flags);
 	} else {
+		/* FIXME: not needed if no scsi_dh is attached */
 		clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
 		clear_bit(MPATHF_QUEUE_IO, &m->flags);
 	}
@@ -385,8 +372,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
 	unsigned bypassed = 1;
 
 	if (!atomic_read(&m->nr_valid_paths)) {
-		if (m->queue_mode != DM_TYPE_NVME_BIO_BASED)
-			clear_bit(MPATHF_QUEUE_IO, &m->flags);
+		clear_bit(MPATHF_QUEUE_IO, &m->flags);
 		goto failed;
 	}
 
@@ -599,7 +585,7 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
 	return pgpath;
 }
 
-static struct pgpath *__map_bio_nvme(struct multipath *m, struct bio *bio)
+static struct pgpath *__map_bio_fast(struct multipath *m, struct bio *bio)
 {
 	struct pgpath *pgpath;
 	unsigned long flags;
@@ -634,8 +620,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio,
 {
 	struct pgpath *pgpath;
 
-	if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
-		pgpath = __map_bio_nvme(m, bio);
+	if (!m->hw_handler_name)
+		pgpath = __map_bio_fast(m, bio);
 	else
 		pgpath = __map_bio(m, bio);
 
@@ -675,8 +661,7 @@ static void process_queued_io_list(struct multipath *m)
 {
 	if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
 		dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
-	else if (m->queue_mode == DM_TYPE_BIO_BASED ||
-		 m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+	else if (m->queue_mode == DM_TYPE_BIO_BASED)
 		queue_work(kmultipathd, &m->process_queued_bios);
 }
 
@@ -838,6 +823,16 @@ retain:
 			 */
 			kfree(m->hw_handler_name);
 			m->hw_handler_name = attached_handler_name;
+
+			/*
+			 * Init fields that are only used when a scsi_dh is attached
+			 */
+			if (!test_and_set_bit(MPATHF_QUEUE_IO, &m->flags)) {
+				atomic_set(&m->pg_init_in_progress, 0);
+				atomic_set(&m->pg_init_count, 0);
+				m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
+				init_waitqueue_head(&m->pg_init_wait);
+			}
 		}
 	}
 
@@ -873,6 +868,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 	int r;
 	struct pgpath *p;
 	struct multipath *m = ti->private;
+	struct scsi_device *sdev;
 
 	/* we need at least a path arg */
 	if (as->argc < 1) {
@@ -891,7 +887,9 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 		goto bad;
 	}
 
-	if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
+	sdev = scsi_device_from_queue(bdev_get_queue(p->path.dev->bdev));
+	if (sdev) {
+		put_device(&sdev->sdev_gendev);
 		INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
 		r = setup_scsi_dh(p->path.dev->bdev, m, &ti->error);
 		if (r) {
@@ -1001,8 +999,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
 	if (!hw_argc)
 		return 0;
 
-	if (m->queue_mode == DM_TYPE_BIO_BASED ||
-	    m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
+	if (m->queue_mode == DM_TYPE_BIO_BASED) {
 		dm_consume_args(as, hw_argc);
 		DMERR("bio-based multipath doesn't allow hardware handler args");
 		return 0;
@@ -1091,8 +1088,6 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 
 			if (!strcasecmp(queue_mode_name, "bio"))
 				m->queue_mode = DM_TYPE_BIO_BASED;
-			else if (!strcasecmp(queue_mode_name, "nvme"))
-				m->queue_mode = DM_TYPE_NVME_BIO_BASED;
 			else if (!strcasecmp(queue_mode_name, "rq"))
 				m->queue_mode = DM_TYPE_REQUEST_BASED;
 			else if (!strcasecmp(queue_mode_name, "mq"))
@@ -1193,7 +1188,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	ti->num_discard_bios = 1;
 	ti->num_write_same_bios = 1;
 	ti->num_write_zeroes_bios = 1;
-	if (m->queue_mode == DM_TYPE_BIO_BASED || m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+	if (m->queue_mode == DM_TYPE_BIO_BASED)
 		ti->per_io_data_size = multipath_per_bio_data_size();
 	else
 		ti->per_io_data_size = sizeof(struct dm_mpath_io);
@@ -1730,9 +1725,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
 			case DM_TYPE_BIO_BASED:
 				DMEMIT("queue_mode bio ");
 				break;
-			case DM_TYPE_NVME_BIO_BASED:
-				DMEMIT("queue_mode nvme ");
-				break;
 			case DM_TYPE_MQ_REQUEST_BASED:
 				DMEMIT("queue_mode mq ");
 				break;

+ 4 - 3
drivers/md/dm-raid.c

@@ -3408,9 +3408,10 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
 		set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
 
 	} else {
-		if (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
-		    test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
-		    test_bit(MD_RECOVERY_RUNNING, &recovery))
+		if (!test_bit(MD_RECOVERY_INTR, &recovery) &&
+		    (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
+		     test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
+		     test_bit(MD_RECOVERY_RUNNING, &recovery)))
 			r = mddev->curr_resync_completed;
 		else
 			r = mddev->recovery_cp;

+ 6 - 10
drivers/md/dm-table.c

@@ -942,17 +942,12 @@ static int dm_table_determine_type(struct dm_table *t)
 
 	if (t->type != DM_TYPE_NONE) {
 		/* target already set the table's type */
-		if (t->type == DM_TYPE_BIO_BASED)
-			return 0;
-		else if (t->type == DM_TYPE_NVME_BIO_BASED) {
-			if (!dm_table_does_not_support_partial_completion(t)) {
-				DMERR("nvme bio-based is only possible with devices"
-				      " that don't support partial completion");
-				return -EINVAL;
-			}
-			/* Fallthru, also verify all devices are blk-mq */
+		if (t->type == DM_TYPE_BIO_BASED) {
+			/* possibly upgrade to a variant of bio-based */
+			goto verify_bio_based;
 		}
 		BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
+		BUG_ON(t->type == DM_TYPE_NVME_BIO_BASED);
 		goto verify_rq_based;
 	}
 
@@ -985,6 +980,7 @@ static int dm_table_determine_type(struct dm_table *t)
 	}
 
 	if (bio_based) {
+verify_bio_based:
 		/* We must use this table as bio-based */
 		t->type = DM_TYPE_BIO_BASED;
 		if (dm_table_supports_dax(t) ||
@@ -1755,7 +1751,7 @@ static int device_no_partial_completion(struct dm_target *ti, struct dm_dev *dev
 	char b[BDEVNAME_SIZE];
 
 	/* For now, NVMe devices are the only devices of this class */
-	return (strncmp(bdevname(dev->bdev, b), "nvme", 3) == 0);
+	return (strncmp(bdevname(dev->bdev, b), "nvme", 4) == 0);
 }
 
 static bool dm_table_does_not_support_partial_completion(struct dm_table *t)

+ 20 - 15
drivers/md/dm.c

@@ -458,9 +458,11 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return dm_get_geometry(md, geo);
 }
 
-static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
-				  struct block_device **bdev,
-				  fmode_t *mode)
+static char *_dm_claim_ptr = "I belong to device-mapper";
+
+static int dm_get_bdev_for_ioctl(struct mapped_device *md,
+				 struct block_device **bdev,
+				 fmode_t *mode)
 {
 	struct dm_target *tgt;
 	struct dm_table *map;
@@ -490,6 +492,10 @@ retry:
 		goto out;
 
 	bdgrab(*bdev);
+	r = blkdev_get(*bdev, *mode, _dm_claim_ptr);
+	if (r < 0)
+		goto out;
+
 	dm_put_live_table(md, srcu_idx);
 	return r;
 
@@ -508,7 +514,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
 	struct mapped_device *md = bdev->bd_disk->private_data;
 	int r;
 
-	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -528,7 +534,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
 
 	r =  __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 out:
-	bdput(bdev);
+	blkdev_put(bdev, mode);
 	return r;
 }
 
@@ -708,14 +714,13 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
 static int open_table_device(struct table_device *td, dev_t dev,
 			     struct mapped_device *md)
 {
-	static char *_claim_ptr = "I belong to device-mapper";
 	struct block_device *bdev;
 
 	int r;
 
 	BUG_ON(td->dm_dev.bdev);
 
-	bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
+	bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
 
@@ -3011,7 +3016,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
 	fmode_t mode;
 	int r;
 
-	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3021,7 +3026,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
 	else
 		r = -EOPNOTSUPP;
 
-	bdput(bdev);
+	blkdev_put(bdev, mode);
 	return r;
 }
 
@@ -3032,7 +3037,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
 	fmode_t mode;
 	int r;
 
-	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3042,7 +3047,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
 	else
 		r = -EOPNOTSUPP;
 
-	bdput(bdev);
+	blkdev_put(bdev, mode);
 	return r;
 }
 
@@ -3054,7 +3059,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
 	fmode_t mode;
 	int r;
 
-	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3064,7 +3069,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
 	else
 		r = -EOPNOTSUPP;
 
-	bdput(bdev);
+	blkdev_put(bdev, mode);
 	return r;
 }
 
@@ -3075,7 +3080,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
 	fmode_t mode;
 	int r;
 
-	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3085,7 +3090,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
 	else
 		r = -EOPNOTSUPP;
 
-	bdput(bdev);
+	blkdev_put(bdev, mode);
 	return r;
 }
 

+ 1 - 1
drivers/net/ethernet/mellanox/mlx5/core/health.c

@@ -124,7 +124,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
 		trigger_cmd_completions(dev);
 	}
 
-	mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
+	mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
 	mlx5_core_err(dev, "end\n");
 
 unlock:

+ 0 - 2
drivers/nvme/host/core.c

@@ -3033,7 +3033,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 			ns->disk->disk_name);
 
 	nvme_mpath_add_disk(ns->head);
-	nvme_mpath_add_disk_links(ns);
 	return;
  out_unlink_ns:
 	mutex_lock(&ctrl->subsys->lock);
@@ -3053,7 +3052,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		return;
 
 	if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
-		nvme_mpath_remove_disk_links(ns);
 		sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
 					&nvme_ns_id_attr_group);
 		if (ns->ndev)

+ 5 - 0
drivers/nvme/host/fabrics.c

@@ -650,6 +650,11 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 				ret = -EINVAL;
 				goto out;
 			}
+			if (opts->discovery_nqn) {
+				pr_debug("Ignoring nr_io_queues value for discovery controller\n");
+				break;
+			}
+
 			opts->nr_io_queues = min_t(unsigned int,
 					num_online_cpus(), token);
 			break;

+ 17 - 10
drivers/nvme/host/fc.c

@@ -1206,7 +1206,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
 				sizeof(struct fcnvme_lsdesc_cr_assoc_cmd));
 
 	assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
-	assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize);
+	assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1);
 	/* Linux supports only Dynamic controllers */
 	assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff);
 	uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id);
@@ -1321,7 +1321,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 				sizeof(struct fcnvme_lsdesc_cr_conn_cmd));
 	conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
 	conn_rqst->connect_cmd.qid  = cpu_to_be16(queue->qnum);
-	conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize);
+	conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1);
 
 	lsop->queue = queue;
 	lsreq->rqstaddr = conn_rqst;
@@ -2481,11 +2481,11 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 		goto out_free_tag_set;
 	}
 
-	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
 	if (ret)
 		goto out_cleanup_blk_queue;
 
-	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
 	if (ret)
 		goto out_delete_hw_queues;
 
@@ -2532,11 +2532,11 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (ret)
 		goto out_free_io_queues;
 
-	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
 	if (ret)
 		goto out_free_io_queues;
 
-	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
 	if (ret)
 		goto out_delete_hw_queues;
 
@@ -2632,13 +2632,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	nvme_fc_init_queue(ctrl, 0);
 
 	ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
-				NVME_AQ_BLK_MQ_DEPTH);
+				NVME_AQ_DEPTH);
 	if (ret)
 		goto out_free_queue;
 
 	ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
-				NVME_AQ_BLK_MQ_DEPTH,
-				(NVME_AQ_BLK_MQ_DEPTH / 4));
+				NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4));
 	if (ret)
 		goto out_delete_hw_queue;
 
@@ -2666,7 +2665,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
 
 	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (ret)
@@ -2699,6 +2698,14 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 		opts->queue_size = ctrl->ctrl.maxcmd;
 	}
 
+	if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
+		/* warn if sqsize is lower than queue_size */
+		dev_warn(ctrl->ctrl.device,
+			"queue_size %zu > ctrl sqsize %u, clamping down\n",
+			opts->queue_size, ctrl->ctrl.sqsize + 1);
+		opts->queue_size = ctrl->ctrl.sqsize + 1;
+	}
+
 	ret = nvme_fc_init_aen_ops(ctrl);
 	if (ret)
 		goto out_term_aen_ops;

+ 0 - 30
drivers/nvme/host/multipath.c

@@ -210,25 +210,6 @@ void nvme_mpath_add_disk(struct nvme_ns_head *head)
 	mutex_unlock(&head->subsys->lock);
 }
 
-void nvme_mpath_add_disk_links(struct nvme_ns *ns)
-{
-	struct kobject *slave_disk_kobj, *holder_disk_kobj;
-
-	if (!ns->head->disk)
-		return;
-
-	slave_disk_kobj = &disk_to_dev(ns->disk)->kobj;
-	if (sysfs_create_link(ns->head->disk->slave_dir, slave_disk_kobj,
-			kobject_name(slave_disk_kobj)))
-		return;
-
-	holder_disk_kobj = &disk_to_dev(ns->head->disk)->kobj;
-	if (sysfs_create_link(ns->disk->part0.holder_dir, holder_disk_kobj,
-			kobject_name(holder_disk_kobj)))
-		sysfs_remove_link(ns->head->disk->slave_dir,
-			kobject_name(slave_disk_kobj));
-}
-
 void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 {
 	if (!head->disk)
@@ -243,14 +224,3 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 	blk_cleanup_queue(head->disk->queue);
 	put_disk(head->disk);
 }
-
-void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
-{
-	if (!ns->head->disk)
-		return;
-
-	sysfs_remove_link(ns->disk->part0.holder_dir,
-			kobject_name(&disk_to_dev(ns->head->disk)->kobj));
-	sysfs_remove_link(ns->head->disk->slave_dir,
-			kobject_name(&disk_to_dev(ns->disk)->kobj));
-}

+ 0 - 8
drivers/nvme/host/nvme.h

@@ -410,9 +410,7 @@ bool nvme_req_needs_failover(struct request *req, blk_status_t error);
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
 void nvme_mpath_add_disk(struct nvme_ns_head *head);
-void nvme_mpath_add_disk_links(struct nvme_ns *ns);
 void nvme_mpath_remove_disk(struct nvme_ns_head *head);
-void nvme_mpath_remove_disk_links(struct nvme_ns *ns);
 
 static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
 {
@@ -454,12 +452,6 @@ static inline void nvme_mpath_add_disk(struct nvme_ns_head *head)
 static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 {
 }
-static inline void nvme_mpath_add_disk_links(struct nvme_ns *ns)
-{
-}
-static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
-{
-}
 static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
 {
 }

+ 8 - 7
drivers/nvme/host/pci.c

@@ -1153,12 +1153,6 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
 	if (!(csts & NVME_CSTS_CFS) && !nssro)
 		return false;
 
-	/* If PCI error recovery process is happening, we cannot reset or
-	 * the recovery mechanism will surely fail.
-	 */
-	if (pci_channel_offline(to_pci_dev(dev->dev)))
-		return false;
-
 	return true;
 }
 
@@ -1189,6 +1183,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	struct nvme_command cmd;
 	u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
+	/* If PCI error recovery process is happening, we cannot reset or
+	 * the recovery mechanism will surely fail.
+	 */
+	mb();
+	if (pci_channel_offline(to_pci_dev(dev->dev)))
+		return BLK_EH_RESET_TIMER;
+
 	/*
 	 * Reset immediately if the controller is failed
 	 */
@@ -1913,7 +1914,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	int result, nr_io_queues;
 	unsigned long size;
 
-	nr_io_queues = num_present_cpus();
+	nr_io_queues = num_possible_cpus();
 	result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
 	if (result < 0)
 		return result;

+ 20 - 7
drivers/platform/x86/Kconfig

@@ -105,31 +105,44 @@ config ASUS_LAPTOP
 
 	  If you have an ACPI-compatible ASUS laptop, say Y or M here.
 
+#
+# If the DELL_SMBIOS_SMM feature is enabled, the DELL_SMBIOS driver
+# becomes dependent on the DCDBAS driver. The "depends" line prevents a
+# configuration where DELL_SMBIOS=y while DCDBAS=m.
+#
 config DELL_SMBIOS
-	tristate
+	tristate "Dell SMBIOS driver"
+	depends on DCDBAS || DCDBAS=n
+	---help---
+	This provides support for the Dell SMBIOS calling interface.
+	If you have a Dell computer you should enable this option.
+
+	Be sure to select at least one backend for it to work properly.
 
 config DELL_SMBIOS_WMI
-	tristate "Dell SMBIOS calling interface (WMI implementation)"
+	bool "Dell SMBIOS driver WMI backend"
+	default y
 	depends on ACPI_WMI
 	select DELL_WMI_DESCRIPTOR
-	select DELL_SMBIOS
+	depends on DELL_SMBIOS
 	---help---
 	This provides an implementation for the Dell SMBIOS calling interface
 	communicated over ACPI-WMI.
 
-	If you have a Dell computer from >2007 you should say Y or M here.
+	If you have a Dell computer from >2007 you should say Y here.
 	If you aren't sure and this module doesn't work for your computer
 	it just won't load.
 
 config DELL_SMBIOS_SMM
-	tristate "Dell SMBIOS calling interface (SMM implementation)"
+	bool "Dell SMBIOS driver SMM backend"
+	default y
 	depends on DCDBAS
-	select DELL_SMBIOS
+	depends on DELL_SMBIOS
 	---help---
 	This provides an implementation for the Dell SMBIOS calling interface
 	communicated over SMI/SMM.
 
-	If you have a Dell computer from <=2017 you should say Y or M here.
+	If you have a Dell computer from <=2017 you should say Y here.
 	If you aren't sure and this module doesn't work for your computer
 	it just won't load.
 

+ 3 - 2
drivers/platform/x86/Makefile

@@ -13,8 +13,9 @@ obj-$(CONFIG_MSI_LAPTOP)	+= msi-laptop.o
 obj-$(CONFIG_ACPI_CMPC)		+= classmate-laptop.o
 obj-$(CONFIG_COMPAL_LAPTOP)	+= compal-laptop.o
 obj-$(CONFIG_DELL_SMBIOS)	+= dell-smbios.o
-obj-$(CONFIG_DELL_SMBIOS_WMI)	+= dell-smbios-wmi.o
-obj-$(CONFIG_DELL_SMBIOS_SMM)	+= dell-smbios-smm.o
+dell-smbios-objs		:= dell-smbios-base.o
+dell-smbios-$(CONFIG_DELL_SMBIOS_WMI)	+= dell-smbios-wmi.o
+dell-smbios-$(CONFIG_DELL_SMBIOS_SMM)	+= dell-smbios-smm.o
 obj-$(CONFIG_DELL_LAPTOP)	+= dell-laptop.o
 obj-$(CONFIG_DELL_WMI)		+= dell-wmi.o
 obj-$(CONFIG_DELL_WMI_DESCRIPTOR)	+= dell-wmi-descriptor.o

+ 25 - 4
drivers/platform/x86/dell-smbios.c → drivers/platform/x86/dell-smbios-base.c

@@ -36,7 +36,7 @@ static DEFINE_MUTEX(smbios_mutex);
 struct smbios_device {
 	struct list_head list;
 	struct device *device;
-	int (*call_fn)(struct calling_interface_buffer *);
+	int (*call_fn)(struct calling_interface_buffer *arg);
 };
 
 struct smbios_call {
@@ -352,8 +352,10 @@ static void __init parse_da_table(const struct dmi_header *dm)
 	struct calling_interface_structure *table =
 		container_of(dm, struct calling_interface_structure, header);
 
-	/* 4 bytes of table header, plus 7 bytes of Dell header, plus at least
-	   6 bytes of entry */
+	/*
+	 * 4 bytes of table header, plus 7 bytes of Dell header
+	 * plus at least 6 bytes of entry
+	 */
 
 	if (dm->length < 17)
 		return;
@@ -554,7 +556,7 @@ static void free_group(struct platform_device *pdev)
 static int __init dell_smbios_init(void)
 {
 	const struct dmi_device *valid;
-	int ret;
+	int ret, wmi, smm;
 
 	valid = dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "Dell System", NULL);
 	if (!valid) {
@@ -589,8 +591,24 @@ static int __init dell_smbios_init(void)
 	if (ret)
 		goto fail_create_group;
 
+	/* register backends */
+	wmi = init_dell_smbios_wmi();
+	if (wmi)
+		pr_debug("Failed to initialize WMI backend: %d\n", wmi);
+	smm = init_dell_smbios_smm();
+	if (smm)
+		pr_debug("Failed to initialize SMM backend: %d\n", smm);
+	if (wmi && smm) {
+		pr_err("No SMBIOS backends available (wmi: %d, smm: %d)\n",
+			wmi, smm);
+		goto fail_sysfs;
+	}
+
 	return 0;
 
+fail_sysfs:
+	free_group(platform_device);
+
 fail_create_group:
 	platform_device_del(platform_device);
 
@@ -607,6 +625,8 @@ fail_platform_driver:
 
 static void __exit dell_smbios_exit(void)
 {
+	exit_dell_smbios_wmi();
+	exit_dell_smbios_smm();
 	mutex_lock(&smbios_mutex);
 	if (platform_device) {
 		free_group(platform_device);
@@ -623,5 +643,6 @@ module_exit(dell_smbios_exit);
 MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
 MODULE_AUTHOR("Gabriele Mazzotta <gabriele.mzt@gmail.com>");
 MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
+MODULE_AUTHOR("Mario Limonciello <mario.limonciello@dell.com>");
 MODULE_DESCRIPTION("Common functions for kernel modules using Dell SMBIOS");
 MODULE_LICENSE("GPL");

+ 4 - 14
drivers/platform/x86/dell-smbios-smm.c

@@ -58,7 +58,7 @@ static const struct dmi_system_id dell_device_table[] __initconst = {
 };
 MODULE_DEVICE_TABLE(dmi, dell_device_table);
 
-static void __init parse_da_table(const struct dmi_header *dm)
+static void parse_da_table(const struct dmi_header *dm)
 {
 	struct calling_interface_structure *table =
 		container_of(dm, struct calling_interface_structure, header);
@@ -73,7 +73,7 @@ static void __init parse_da_table(const struct dmi_header *dm)
 	da_command_code = table->cmdIOCode;
 }
 
-static void __init find_cmd_address(const struct dmi_header *dm, void *dummy)
+static void find_cmd_address(const struct dmi_header *dm, void *dummy)
 {
 	switch (dm->type) {
 	case 0xda: /* Calling interface */
@@ -128,7 +128,7 @@ static bool test_wsmt_enabled(void)
 	return false;
 }
 
-static int __init dell_smbios_smm_init(void)
+int init_dell_smbios_smm(void)
 {
 	int ret;
 	/*
@@ -176,7 +176,7 @@ fail_platform_device_alloc:
 	return ret;
 }
 
-static void __exit dell_smbios_smm_exit(void)
+void exit_dell_smbios_smm(void)
 {
 	if (platform_device) {
 		dell_smbios_unregister_device(&platform_device->dev);
@@ -184,13 +184,3 @@ static void __exit dell_smbios_smm_exit(void)
 		free_page((unsigned long)buffer);
 	}
 }
-
-subsys_initcall(dell_smbios_smm_init);
-module_exit(dell_smbios_smm_exit);
-
-MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
-MODULE_AUTHOR("Gabriele Mazzotta <gabriele.mzt@gmail.com>");
-MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
-MODULE_AUTHOR("Mario Limonciello <mario.limonciello@dell.com>");
-MODULE_DESCRIPTION("Dell SMBIOS communications over SMI");
-MODULE_LICENSE("GPL");

+ 4 - 10
drivers/platform/x86/dell-smbios-wmi.c

@@ -228,7 +228,7 @@ static const struct wmi_device_id dell_smbios_wmi_id_table[] = {
 	{ },
 };
 
-static void __init parse_b1_table(const struct dmi_header *dm)
+static void parse_b1_table(const struct dmi_header *dm)
 {
 	struct misc_bios_flags_structure *flags =
 	container_of(dm, struct misc_bios_flags_structure, header);
@@ -242,7 +242,7 @@ static void __init parse_b1_table(const struct dmi_header *dm)
 		wmi_supported = 1;
 }
 
-static void __init find_b1(const struct dmi_header *dm, void *dummy)
+static void find_b1(const struct dmi_header *dm, void *dummy)
 {
 	switch (dm->type) {
 	case 0xb1: /* misc bios flags */
@@ -261,7 +261,7 @@ static struct wmi_driver dell_smbios_wmi_driver = {
 	.filter_callback = dell_smbios_wmi_filter,
 };
 
-static int __init init_dell_smbios_wmi(void)
+int init_dell_smbios_wmi(void)
 {
 	dmi_walk(find_b1, NULL);
 
@@ -271,15 +271,9 @@ static int __init init_dell_smbios_wmi(void)
 	return wmi_driver_register(&dell_smbios_wmi_driver);
 }
 
-static void __exit exit_dell_smbios_wmi(void)
+void exit_dell_smbios_wmi(void)
 {
 	wmi_driver_unregister(&dell_smbios_wmi_driver);
 }
 
-module_init(init_dell_smbios_wmi);
-module_exit(exit_dell_smbios_wmi);
-
 MODULE_ALIAS("wmi:" DELL_WMI_SMBIOS_GUID);
-MODULE_AUTHOR("Mario Limonciello <mario.limonciello@dell.com>");
-MODULE_DESCRIPTION("Dell SMBIOS communications over WMI");
-MODULE_LICENSE("GPL");

+ 26 - 1
drivers/platform/x86/dell-smbios.h

@@ -75,4 +75,29 @@ int dell_laptop_register_notifier(struct notifier_block *nb);
 int dell_laptop_unregister_notifier(struct notifier_block *nb);
 void dell_laptop_call_notifier(unsigned long action, void *data);
 
-#endif
+/* for the supported backends */
+#ifdef CONFIG_DELL_SMBIOS_WMI
+int init_dell_smbios_wmi(void);
+void exit_dell_smbios_wmi(void);
+#else /* CONFIG_DELL_SMBIOS_WMI */
+static inline int init_dell_smbios_wmi(void)
+{
+	return -ENODEV;
+}
+static inline void exit_dell_smbios_wmi(void)
+{}
+#endif /* CONFIG_DELL_SMBIOS_WMI */
+
+#ifdef CONFIG_DELL_SMBIOS_SMM
+int init_dell_smbios_smm(void);
+void exit_dell_smbios_smm(void);
+#else /* CONFIG_DELL_SMBIOS_SMM */
+static inline int init_dell_smbios_smm(void)
+{
+	return -ENODEV;
+}
+static inline void exit_dell_smbios_smm(void)
+{}
+#endif /* CONFIG_DELL_SMBIOS_SMM */
+
+#endif /* _DELL_SMBIOS_H_ */

+ 2 - 1
drivers/watchdog/f71808e_wdt.c

@@ -566,7 +566,8 @@ static ssize_t watchdog_write(struct file *file, const char __user *buf,
 				char c;
 				if (get_user(c, buf + i))
 					return -EFAULT;
-				expect_close = (c == 'V');
+				if (c == 'V')
+					expect_close = true;
 			}
 
 			/* Properly order writes across fork()ed processes */

+ 9 - 492
drivers/watchdog/hpwdt.c

@@ -28,16 +28,7 @@
 #include <linux/types.h>
 #include <linux/uaccess.h>
 #include <linux/watchdog.h>
-#ifdef CONFIG_HPWDT_NMI_DECODING
-#include <linux/dmi.h>
-#include <linux/spinlock.h>
-#include <linux/nmi.h>
-#include <linux/kdebug.h>
-#include <linux/notifier.h>
-#include <asm/set_memory.h>
-#endif /* CONFIG_HPWDT_NMI_DECODING */
 #include <asm/nmi.h>
-#include <asm/frame.h>
 
 #define HPWDT_VERSION			"1.4.0"
 #define SECS_TO_TICKS(secs)		((secs) * 1000 / 128)
@@ -48,6 +39,9 @@
 static unsigned int soft_margin = DEFAULT_MARGIN;	/* in seconds */
 static unsigned int reload;			/* the computed soft_margin */
 static bool nowayout = WATCHDOG_NOWAYOUT;
+#ifdef CONFIG_HPWDT_NMI_DECODING
+static unsigned int allow_kdump = 1;
+#endif
 static char expect_release;
 static unsigned long hpwdt_is_open;
 
@@ -63,373 +57,6 @@ static const struct pci_device_id hpwdt_devices[] = {
 };
 MODULE_DEVICE_TABLE(pci, hpwdt_devices);
 
-#ifdef CONFIG_HPWDT_NMI_DECODING
-#define PCI_BIOS32_SD_VALUE		0x5F32335F	/* "_32_" */
-#define CRU_BIOS_SIGNATURE_VALUE	0x55524324
-#define PCI_BIOS32_PARAGRAPH_LEN	16
-#define PCI_ROM_BASE1			0x000F0000
-#define ROM_SIZE			0x10000
-
-struct bios32_service_dir {
-	u32 signature;
-	u32 entry_point;
-	u8 revision;
-	u8 length;
-	u8 checksum;
-	u8 reserved[5];
-};
-
-/* type 212 */
-struct smbios_cru64_info {
-	u8 type;
-	u8 byte_length;
-	u16 handle;
-	u32 signature;
-	u64 physical_address;
-	u32 double_length;
-	u32 double_offset;
-};
-#define SMBIOS_CRU64_INFORMATION	212
-
-/* type 219 */
-struct smbios_proliant_info {
-	u8 type;
-	u8 byte_length;
-	u16 handle;
-	u32 power_features;
-	u32 omega_features;
-	u32 reserved;
-	u32 misc_features;
-};
-#define SMBIOS_ICRU_INFORMATION		219
-
-
-struct cmn_registers {
-	union {
-		struct {
-			u8 ral;
-			u8 rah;
-			u16 rea2;
-		};
-		u32 reax;
-	} u1;
-	union {
-		struct {
-			u8 rbl;
-			u8 rbh;
-			u8 reb2l;
-			u8 reb2h;
-		};
-		u32 rebx;
-	} u2;
-	union {
-		struct {
-			u8 rcl;
-			u8 rch;
-			u16 rec2;
-		};
-		u32 recx;
-	} u3;
-	union {
-		struct {
-			u8 rdl;
-			u8 rdh;
-			u16 red2;
-		};
-		u32 redx;
-	} u4;
-
-	u32 resi;
-	u32 redi;
-	u16 rds;
-	u16 res;
-	u32 reflags;
-}  __attribute__((packed));
-
-static unsigned int hpwdt_nmi_decoding;
-static unsigned int allow_kdump = 1;
-static unsigned int is_icru;
-static unsigned int is_uefi;
-static DEFINE_SPINLOCK(rom_lock);
-static void *cru_rom_addr;
-static struct cmn_registers cmn_regs;
-
-extern asmlinkage void asminline_call(struct cmn_registers *pi86Regs,
-						unsigned long *pRomEntry);
-
-#ifdef CONFIG_X86_32
-/* --32 Bit Bios------------------------------------------------------------ */
-
-#define HPWDT_ARCH	32
-
-asm(".text                          \n\t"
-    ".align 4                       \n\t"
-    ".globl asminline_call	    \n"
-    "asminline_call:                \n\t"
-    "pushl       %ebp               \n\t"
-    "movl        %esp, %ebp         \n\t"
-    "pusha                          \n\t"
-    "pushf                          \n\t"
-    "push        %es                \n\t"
-    "push        %ds                \n\t"
-    "pop         %es                \n\t"
-    "movl        8(%ebp),%eax       \n\t"
-    "movl        4(%eax),%ebx       \n\t"
-    "movl        8(%eax),%ecx       \n\t"
-    "movl        12(%eax),%edx      \n\t"
-    "movl        16(%eax),%esi      \n\t"
-    "movl        20(%eax),%edi      \n\t"
-    "movl        (%eax),%eax        \n\t"
-    "push        %cs                \n\t"
-    "call        *12(%ebp)          \n\t"
-    "pushf                          \n\t"
-    "pushl       %eax               \n\t"
-    "movl        8(%ebp),%eax       \n\t"
-    "movl        %ebx,4(%eax)       \n\t"
-    "movl        %ecx,8(%eax)       \n\t"
-    "movl        %edx,12(%eax)      \n\t"
-    "movl        %esi,16(%eax)      \n\t"
-    "movl        %edi,20(%eax)      \n\t"
-    "movw        %ds,24(%eax)       \n\t"
-    "movw        %es,26(%eax)       \n\t"
-    "popl        %ebx               \n\t"
-    "movl        %ebx,(%eax)        \n\t"
-    "popl        %ebx               \n\t"
-    "movl        %ebx,28(%eax)      \n\t"
-    "pop         %es                \n\t"
-    "popf                           \n\t"
-    "popa                           \n\t"
-    "leave                          \n\t"
-    "ret                            \n\t"
-    ".previous");
-
-
-/*
- *	cru_detect
- *
- *	Routine Description:
- *	This function uses the 32-bit BIOS Service Directory record to
- *	search for a $CRU record.
- *
- *	Return Value:
- *	0        :  SUCCESS
- *	<0       :  FAILURE
- */
-static int cru_detect(unsigned long map_entry,
-	unsigned long map_offset)
-{
-	void *bios32_map;
-	unsigned long *bios32_entrypoint;
-	unsigned long cru_physical_address;
-	unsigned long cru_length;
-	unsigned long physical_bios_base = 0;
-	unsigned long physical_bios_offset = 0;
-	int retval = -ENODEV;
-
-	bios32_map = ioremap(map_entry, (2 * PAGE_SIZE));
-
-	if (bios32_map == NULL)
-		return -ENODEV;
-
-	bios32_entrypoint = bios32_map + map_offset;
-
-	cmn_regs.u1.reax = CRU_BIOS_SIGNATURE_VALUE;
-
-	set_memory_x((unsigned long)bios32_map, 2);
-	asminline_call(&cmn_regs, bios32_entrypoint);
-
-	if (cmn_regs.u1.ral != 0) {
-		pr_warn("Call succeeded but with an error: 0x%x\n",
-			cmn_regs.u1.ral);
-	} else {
-		physical_bios_base = cmn_regs.u2.rebx;
-		physical_bios_offset = cmn_regs.u4.redx;
-		cru_length = cmn_regs.u3.recx;
-		cru_physical_address =
-			physical_bios_base + physical_bios_offset;
-
-		/* If the values look OK, then map it in. */
-		if ((physical_bios_base + physical_bios_offset)) {
-			cru_rom_addr =
-				ioremap(cru_physical_address, cru_length);
-			if (cru_rom_addr) {
-				set_memory_x((unsigned long)cru_rom_addr & PAGE_MASK,
-					(cru_length + PAGE_SIZE - 1) >> PAGE_SHIFT);
-				retval = 0;
-			}
-		}
-
-		pr_debug("CRU Base Address:   0x%lx\n", physical_bios_base);
-		pr_debug("CRU Offset Address: 0x%lx\n", physical_bios_offset);
-		pr_debug("CRU Length:         0x%lx\n", cru_length);
-		pr_debug("CRU Mapped Address: %p\n", &cru_rom_addr);
-	}
-	iounmap(bios32_map);
-	return retval;
-}
-
-/*
- *	bios_checksum
- */
-static int bios_checksum(const char __iomem *ptr, int len)
-{
-	char sum = 0;
-	int i;
-
-	/*
-	 * calculate checksum of size bytes. This should add up
-	 * to zero if we have a valid header.
-	 */
-	for (i = 0; i < len; i++)
-		sum += ptr[i];
-
-	return ((sum == 0) && (len > 0));
-}
-
-/*
- *	bios32_present
- *
- *	Routine Description:
- *	This function finds the 32-bit BIOS Service Directory
- *
- *	Return Value:
- *	0        :  SUCCESS
- *	<0       :  FAILURE
- */
-static int bios32_present(const char __iomem *p)
-{
-	struct bios32_service_dir *bios_32_ptr;
-	int length;
-	unsigned long map_entry, map_offset;
-
-	bios_32_ptr = (struct bios32_service_dir *) p;
-
-	/*
-	 * Search for signature by checking equal to the swizzled value
-	 * instead of calling another routine to perform a strcmp.
-	 */
-	if (bios_32_ptr->signature == PCI_BIOS32_SD_VALUE) {
-		length = bios_32_ptr->length * PCI_BIOS32_PARAGRAPH_LEN;
-		if (bios_checksum(p, length)) {
-			/*
-			 * According to the spec, we're looking for the
-			 * first 4KB-aligned address below the entrypoint
-			 * listed in the header. The Service Directory code
-			 * is guaranteed to occupy no more than 2 4KB pages.
-			 */
-			map_entry = bios_32_ptr->entry_point & ~(PAGE_SIZE - 1);
-			map_offset = bios_32_ptr->entry_point - map_entry;
-
-			return cru_detect(map_entry, map_offset);
-		}
-	}
-	return -ENODEV;
-}
-
-static int detect_cru_service(void)
-{
-	char __iomem *p, *q;
-	int rc = -1;
-
-	/*
-	 * Search from 0x0f0000 through 0x0fffff, inclusive.
-	 */
-	p = ioremap(PCI_ROM_BASE1, ROM_SIZE);
-	if (p == NULL)
-		return -ENOMEM;
-
-	for (q = p; q < p + ROM_SIZE; q += 16) {
-		rc = bios32_present(q);
-		if (!rc)
-			break;
-	}
-	iounmap(p);
-	return rc;
-}
-/* ------------------------------------------------------------------------- */
-#endif /* CONFIG_X86_32 */
-#ifdef CONFIG_X86_64
-/* --64 Bit Bios------------------------------------------------------------ */
-
-#define HPWDT_ARCH	64
-
-asm(".text                      \n\t"
-    ".align 4                   \n\t"
-    ".globl asminline_call	\n\t"
-    ".type asminline_call, @function \n\t"
-    "asminline_call:            \n\t"
-    FRAME_BEGIN
-    "pushq      %rax            \n\t"
-    "pushq      %rbx            \n\t"
-    "pushq      %rdx            \n\t"
-    "pushq      %r12            \n\t"
-    "pushq      %r9             \n\t"
-    "movq       %rsi, %r12      \n\t"
-    "movq       %rdi, %r9       \n\t"
-    "movl       4(%r9),%ebx     \n\t"
-    "movl       8(%r9),%ecx     \n\t"
-    "movl       12(%r9),%edx    \n\t"
-    "movl       16(%r9),%esi    \n\t"
-    "movl       20(%r9),%edi    \n\t"
-    "movl       (%r9),%eax      \n\t"
-    "call       *%r12           \n\t"
-    "pushfq                     \n\t"
-    "popq        %r12           \n\t"
-    "movl       %eax, (%r9)     \n\t"
-    "movl       %ebx, 4(%r9)    \n\t"
-    "movl       %ecx, 8(%r9)    \n\t"
-    "movl       %edx, 12(%r9)   \n\t"
-    "movl       %esi, 16(%r9)   \n\t"
-    "movl       %edi, 20(%r9)   \n\t"
-    "movq       %r12, %rax      \n\t"
-    "movl       %eax, 28(%r9)   \n\t"
-    "popq       %r9             \n\t"
-    "popq       %r12            \n\t"
-    "popq       %rdx            \n\t"
-    "popq       %rbx            \n\t"
-    "popq       %rax            \n\t"
-    FRAME_END
-    "ret                        \n\t"
-    ".previous");
-
-/*
- *	dmi_find_cru
- *
- *	Routine Description:
- *	This function checks whether or not a SMBIOS/DMI record is
- *	the 64bit CRU info or not
- */
-static void dmi_find_cru(const struct dmi_header *dm, void *dummy)
-{
-	struct smbios_cru64_info *smbios_cru64_ptr;
-	unsigned long cru_physical_address;
-
-	if (dm->type == SMBIOS_CRU64_INFORMATION) {
-		smbios_cru64_ptr = (struct smbios_cru64_info *) dm;
-		if (smbios_cru64_ptr->signature == CRU_BIOS_SIGNATURE_VALUE) {
-			cru_physical_address =
-				smbios_cru64_ptr->physical_address +
-				smbios_cru64_ptr->double_offset;
-			cru_rom_addr = ioremap(cru_physical_address,
-				smbios_cru64_ptr->double_length);
-			set_memory_x((unsigned long)cru_rom_addr & PAGE_MASK,
-				smbios_cru64_ptr->double_length >> PAGE_SHIFT);
-		}
-	}
-}
-
-static int detect_cru_service(void)
-{
-	cru_rom_addr = NULL;
-
-	dmi_walk(dmi_find_cru, NULL);
-
-	/* if cru_rom_addr has been set then we found a CRU service */
-	return ((cru_rom_addr != NULL) ? 0 : -ENODEV);
-}
-/* ------------------------------------------------------------------------- */
-#endif /* CONFIG_X86_64 */
-#endif /* CONFIG_HPWDT_NMI_DECODING */
 
 /*
  *	Watchdog operations
@@ -486,30 +113,12 @@ static int hpwdt_my_nmi(void)
  */
 static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs)
 {
-	unsigned long rom_pl;
-	static int die_nmi_called;
-
-	if (!hpwdt_nmi_decoding)
-		return NMI_DONE;
-
 	if ((ulReason == NMI_UNKNOWN) && !hpwdt_my_nmi())
 		return NMI_DONE;
 
-	spin_lock_irqsave(&rom_lock, rom_pl);
-	if (!die_nmi_called && !is_icru && !is_uefi)
-		asminline_call(&cmn_regs, cru_rom_addr);
-	die_nmi_called = 1;
-	spin_unlock_irqrestore(&rom_lock, rom_pl);
-
 	if (allow_kdump)
 		hpwdt_stop();
 
-	if (!is_icru && !is_uefi) {
-		if (cmn_regs.u1.ral == 0) {
-			nmi_panic(regs, "An NMI occurred, but unable to determine source.\n");
-			return NMI_HANDLED;
-		}
-	}
 	nmi_panic(regs, "An NMI occurred. Depending on your system the reason "
 		"for the NMI is logged in any one of the following "
 		"resources:\n"
@@ -675,84 +284,11 @@ static struct miscdevice hpwdt_miscdev = {
  *	Init & Exit
  */
 
-#ifdef CONFIG_HPWDT_NMI_DECODING
-#ifdef CONFIG_X86_LOCAL_APIC
-static void hpwdt_check_nmi_decoding(struct pci_dev *dev)
-{
-	/*
-	 * If nmi_watchdog is turned off then we can turn on
-	 * our nmi decoding capability.
-	 */
-	hpwdt_nmi_decoding = 1;
-}
-#else
-static void hpwdt_check_nmi_decoding(struct pci_dev *dev)
-{
-	dev_warn(&dev->dev, "NMI decoding is disabled. "
-		"Your kernel does not support a NMI Watchdog.\n");
-}
-#endif /* CONFIG_X86_LOCAL_APIC */
-
-/*
- *	dmi_find_icru
- *
- *	Routine Description:
- *	This function checks whether or not we are on an iCRU-based server.
- *	This check is independent of architecture and needs to be made for
- *	any ProLiant system.
- */
-static void dmi_find_icru(const struct dmi_header *dm, void *dummy)
-{
-	struct smbios_proliant_info *smbios_proliant_ptr;
-
-	if (dm->type == SMBIOS_ICRU_INFORMATION) {
-		smbios_proliant_ptr = (struct smbios_proliant_info *) dm;
-		if (smbios_proliant_ptr->misc_features & 0x01)
-			is_icru = 1;
-		if (smbios_proliant_ptr->misc_features & 0x1400)
-			is_uefi = 1;
-	}
-}
 
 static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
 {
+#ifdef CONFIG_HPWDT_NMI_DECODING
 	int retval;
-
-	/*
-	 * On typical CRU-based systems we need to map that service in
-	 * the BIOS. For 32 bit Operating Systems we need to go through
-	 * the 32 Bit BIOS Service Directory. For 64 bit Operating
-	 * Systems we get that service through SMBIOS.
-	 *
-	 * On systems that support the new iCRU service all we need to
-	 * do is call dmi_walk to get the supported flag value and skip
-	 * the old cru detect code.
-	 */
-	dmi_walk(dmi_find_icru, NULL);
-	if (!is_icru && !is_uefi) {
-
-		/*
-		* We need to map the ROM to get the CRU service.
-		* For 32 bit Operating Systems we need to go through the 32 Bit
-		* BIOS Service Directory
-		* For 64 bit Operating Systems we get that service through SMBIOS.
-		*/
-		retval = detect_cru_service();
-		if (retval < 0) {
-			dev_warn(&dev->dev,
-				"Unable to detect the %d Bit CRU Service.\n",
-				HPWDT_ARCH);
-			return retval;
-		}
-
-		/*
-		* We know this is the only CRU call we need to make so lets keep as
-		* few instructions as possible once the NMI comes in.
-		*/
-		cmn_regs.u1.rah = 0x0D;
-		cmn_regs.u1.ral = 0x02;
-	}
-
 	/*
 	 * Only one function can register for NMI_UNKNOWN
 	 */
@@ -780,44 +316,25 @@ error:
 	dev_warn(&dev->dev,
 		"Unable to register a die notifier (err=%d).\n",
 		retval);
-	if (cru_rom_addr)
-		iounmap(cru_rom_addr);
 	return retval;
+#endif	/* CONFIG_HPWDT_NMI_DECODING */
+	return 0;
 }
 
 static void hpwdt_exit_nmi_decoding(void)
 {
+#ifdef CONFIG_HPWDT_NMI_DECODING
 	unregister_nmi_handler(NMI_UNKNOWN, "hpwdt");
 	unregister_nmi_handler(NMI_SERR, "hpwdt");
 	unregister_nmi_handler(NMI_IO_CHECK, "hpwdt");
-	if (cru_rom_addr)
-		iounmap(cru_rom_addr);
-}
-#else /* !CONFIG_HPWDT_NMI_DECODING */
-static void hpwdt_check_nmi_decoding(struct pci_dev *dev)
-{
-}
-
-static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
-{
-	return 0;
+#endif
 }
 
-static void hpwdt_exit_nmi_decoding(void)
-{
-}
-#endif /* CONFIG_HPWDT_NMI_DECODING */
-
 static int hpwdt_init_one(struct pci_dev *dev,
 					const struct pci_device_id *ent)
 {
 	int retval;
 
-	/*
-	 * Check if we can do NMI decoding or not
-	 */
-	hpwdt_check_nmi_decoding(dev);
-
 	/*
 	 * First let's find out if we are on an iLO2+ server. We will
 	 * not run on a legacy ASM box.
@@ -922,6 +439,6 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 #ifdef CONFIG_HPWDT_NMI_DECODING
 module_param(allow_kdump, int, 0);
 MODULE_PARM_DESC(allow_kdump, "Start a kernel dump after NMI occurs");
-#endif /* !CONFIG_HPWDT_NMI_DECODING */
+#endif /* CONFIG_HPWDT_NMI_DECODING */
 
 module_pci_driver(hpwdt_driver);

+ 2 - 1
drivers/watchdog/sbsa_gwdt.c

@@ -50,6 +50,7 @@
  */
 
 #include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -159,7 +160,7 @@ static unsigned int sbsa_gwdt_get_timeleft(struct watchdog_device *wdd)
 	    !(readl(gwdt->control_base + SBSA_GWDT_WCS) & SBSA_GWDT_WCS_WS0))
 		timeleft += readl(gwdt->control_base + SBSA_GWDT_WOR);
 
-	timeleft += readq(gwdt->control_base + SBSA_GWDT_WCV) -
+	timeleft += lo_hi_readq(gwdt->control_base + SBSA_GWDT_WCV) -
 		    arch_counter_get_cntvct();
 
 	do_div(timeleft, gwdt->clk);

+ 3 - 1
kernel/events/core.c

@@ -2246,7 +2246,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
 			struct perf_event_context *task_ctx,
 			enum event_type_t event_type)
 {
-	enum event_type_t ctx_event_type = event_type & EVENT_ALL;
+	enum event_type_t ctx_event_type;
 	bool cpu_event = !!(event_type & EVENT_CPU);
 
 	/*
@@ -2256,6 +2256,8 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
 	if (event_type & EVENT_PINNED)
 		event_type |= EVENT_FLEXIBLE;
 
+	ctx_event_type = event_type & EVENT_ALL;
+
 	perf_pmu_disable(cpuctx->ctx.pmu);
 	if (task_ctx)
 		task_ctx_sched_out(cpuctx, task_ctx, event_type);

+ 3 - 2
kernel/locking/rtmutex.c

@@ -1616,11 +1616,12 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
 void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
 {
 	DEFINE_WAKE_Q(wake_q);
+	unsigned long flags;
 	bool postunlock;
 
-	raw_spin_lock_irq(&lock->wait_lock);
+	raw_spin_lock_irqsave(&lock->wait_lock, flags);
 	postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
-	raw_spin_unlock_irq(&lock->wait_lock);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
 	if (postunlock)
 		rt_mutex_postunlock(&wake_q);

+ 4 - 4
scripts/Makefile.lib

@@ -297,11 +297,11 @@ cmd_dt_S_dtb=						\
 	echo '\#include <asm-generic/vmlinux.lds.h>'; 	\
 	echo '.section .dtb.init.rodata,"a"';		\
 	echo '.balign STRUCT_ALIGNMENT';		\
-	echo '.global __dtb_$(*F)_begin';		\
-	echo '__dtb_$(*F)_begin:';			\
+	echo '.global __dtb_$(subst -,_,$(*F))_begin';	\
+	echo '__dtb_$(subst -,_,$(*F))_begin:';		\
 	echo '.incbin "$<" ';				\
-	echo '__dtb_$(*F)_end:';			\
-	echo '.global __dtb_$(*F)_end';			\
+	echo '__dtb_$(subst -,_,$(*F))_end:';		\
+	echo '.global __dtb_$(subst -,_,$(*F))_end';	\
 	echo '.balign STRUCT_ALIGNMENT'; 		\
 ) > $@
 

+ 5 - 10
scripts/basic/fixdep.c

@@ -93,14 +93,6 @@
  * (Note: it'd be easy to port over the complete mkdep state machine,
  *  but I don't think the added complexity is worth it)
  */
-/*
- * Note 2: if somebody writes HELLO_CONFIG_BOOM in a file, it will depend onto
- * CONFIG_BOOM. This could seem a bug (not too hard to fix), but please do not
- * fix it! Some UserModeLinux files (look at arch/um/) call CONFIG_BOOM as
- * UML_CONFIG_BOOM, to avoid conflicts with /usr/include/linux/autoconf.h,
- * through arch/um/include/uml-config.h; this fixdep "bug" makes sure that
- * those files will have correct dependencies.
- */
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -233,8 +225,13 @@ static int str_ends_with(const char *s, int slen, const char *sub)
 static void parse_config_file(const char *p)
 {
 	const char *q, *r;
+	const char *start = p;
 
 	while ((p = strstr(p, "CONFIG_"))) {
+		if (p > start && (isalnum(p[-1]) || p[-1] == '_')) {
+			p += 7;
+			continue;
+		}
 		p += 7;
 		q = p;
 		while (*q && (isalnum(*q) || *q == '_'))
@@ -286,8 +283,6 @@ static int is_ignored_file(const char *s, int len)
 {
 	return str_ends_with(s, len, "include/generated/autoconf.h") ||
 	       str_ends_with(s, len, "include/generated/autoksyms.h") ||
-	       str_ends_with(s, len, "arch/um/include/uml-config.h") ||
-	       str_ends_with(s, len, "include/linux/kconfig.h") ||
 	       str_ends_with(s, len, ".ver");
 }
 

+ 1 - 1
scripts/bloat-o-meter

@@ -15,7 +15,7 @@ signal(SIGPIPE, SIG_DFL)
 if len(sys.argv) < 3:
     sys.stderr.write("usage: %s [option] file1 file2\n" % sys.argv[0])
     sys.stderr.write("The options are:\n")
-    sys.stderr.write("-c	cateogrize output based on symbole type\n")
+    sys.stderr.write("-c	categorize output based on symbol type\n")
     sys.stderr.write("-d	Show delta of Data Section\n")
     sys.stderr.write("-t	Show delta of text Section\n")
     sys.exit(-1)

+ 1 - 0
tools/arch/x86/include/asm/cpufeatures.h

@@ -213,6 +213,7 @@
 #define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
 
 #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */

+ 2 - 0
tools/include/uapi/linux/kvm.h

@@ -761,6 +761,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
 #define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
 #define KVM_GET_EMULATED_CPUID	  _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
+#define KVM_GET_MSR_FEATURE_INDEX_LIST    _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
 
 /*
  * Extension capability list.
@@ -934,6 +935,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_AIS_MIGRATION 150
 #define KVM_CAP_PPC_GET_CPU_CHAR 151
 #define KVM_CAP_S390_BPB 152
+#define KVM_CAP_GET_MSR_FEATURES 153
 
 #ifdef KVM_CAP_IRQ_ROUTING
 

+ 7 - 20
tools/objtool/check.c

@@ -1116,42 +1116,29 @@ static int read_unwind_hints(struct objtool_file *file)
 
 static int read_retpoline_hints(struct objtool_file *file)
 {
-	struct section *sec, *relasec;
+	struct section *sec;
 	struct instruction *insn;
 	struct rela *rela;
-	int i;
 
-	sec = find_section_by_name(file->elf, ".discard.retpoline_safe");
+	sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
 	if (!sec)
 		return 0;
 
-	relasec = sec->rela;
-	if (!relasec) {
-		WARN("missing .rela.discard.retpoline_safe section");
-		return -1;
-	}
-
-	if (sec->len % sizeof(unsigned long)) {
-		WARN("retpoline_safe size mismatch: %d %ld", sec->len, sizeof(unsigned long));
-		return -1;
-	}
-
-	for (i = 0; i < sec->len / sizeof(unsigned long); i++) {
-		rela = find_rela_by_dest(sec, i * sizeof(unsigned long));
-		if (!rela) {
-			WARN("can't find rela for retpoline_safe[%d]", i);
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s", sec->name);
 			return -1;
 		}
 
 		insn = find_insn(file, rela->sym->sec, rela->addend);
 		if (!insn) {
-			WARN("can't find insn for retpoline_safe[%d]", i);
+			WARN("bad .discard.retpoline_safe entry");
 			return -1;
 		}
 
 		if (insn->type != INSN_JUMP_DYNAMIC &&
 		    insn->type != INSN_CALL_DYNAMIC) {
-			WARN_FUNC("retpoline_safe hint not a indirect jump/call",
+			WARN_FUNC("retpoline_safe hint not an indirect jump/call",
 				  insn->sec, insn->offset);
 			return -1;
 		}

+ 1 - 1
tools/perf/Documentation/perf-kallsyms.txt

@@ -8,7 +8,7 @@ perf-kallsyms - Searches running kernel for symbols
 SYNOPSIS
 --------
 [verse]
-'perf kallsyms <options> symbol_name[,symbol_name...]'
+'perf kallsyms' [<options>] symbol_name[,symbol_name...]
 
 DESCRIPTION
 -----------

+ 9 - 0
tools/perf/builtin-record.c

@@ -881,6 +881,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		}
 	}
 
+	/*
+	 * If we have just single event and are sending data
+	 * through pipe, we need to force the ids allocation,
+	 * because we synthesize event name through the pipe
+	 * and need the id for that.
+	 */
+	if (data->is_pipe && rec->evlist->nr_entries == 1)
+		rec->opts.sample_id = true;
+
 	if (record__open(rec) != 0) {
 		err = -1;
 		goto out_child;

+ 1 - 1
tools/perf/builtin-stat.c

@@ -917,7 +917,7 @@ static void print_metric_csv(void *ctx,
 	char buf[64], *vals, *ends;
 
 	if (unit == NULL || fmt == NULL) {
-		fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep);
+		fprintf(out, "%s%s", csv_sep, csv_sep);
 		return;
 	}
 	snprintf(buf, sizeof(buf), fmt, val);

+ 1 - 1
tools/perf/builtin-top.c

@@ -991,7 +991,7 @@ static int perf_top_overwrite_fallback(struct perf_top *top,
 	evlist__for_each_entry(evlist, counter)
 		counter->attr.write_backward = false;
 	opts->overwrite = false;
-	ui__warning("fall back to non-overwrite mode\n");
+	pr_debug2("fall back to non-overwrite mode\n");
 	return 1;
 }
 

+ 1 - 0
tools/perf/perf.h

@@ -61,6 +61,7 @@ struct record_opts {
 	bool	     tail_synthesize;
 	bool	     overwrite;
 	bool	     ignore_missing_thread;
+	bool	     sample_id;
 	unsigned int freq;
 	unsigned int mmap_pages;
 	unsigned int auxtrace_mmap_pages;

+ 25 - 0
tools/perf/ui/browsers/annotate.c

@@ -327,7 +327,32 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	if (!disasm_line__is_valid_jump(cursor, sym))
 		return;
 
+	/*
+	 * This first was seen with a gcc function, _cpp_lex_token, that
+	 * has the usual jumps:
+	 *
+	 *  │1159e6c: ↓ jne    115aa32 <_cpp_lex_token@@Base+0xf92>
+	 *
+	 * I.e. jumps to a label inside that function (_cpp_lex_token), and
+	 * those works, but also this kind:
+	 *
+	 *  │1159e8b: ↓ jne    c469be <cpp_named_operator2name@@Base+0xa72>
+	 *
+	 *  I.e. jumps to another function, outside _cpp_lex_token, which
+	 *  are not being correctly handled generating as a side effect references
+	 *  to ab->offset[] entries that are set to NULL, so to make this code
+	 *  more robust, check that here.
+	 *
+	 *  A proper fix for will be put in place, looking at the function
+	 *  name right after the '<' token and probably treating this like a
+	 *  'call' instruction.
+	 */
 	target = ab->offsets[cursor->ops.target.offset];
+	if (target == NULL) {
+		ui_helpline__printf("WARN: jump target inconsistency, press 'o', ab->offsets[%#x] = NULL\n",
+				    cursor->ops.target.offset);
+		return;
+	}
 
 	bcursor = browser_line(&cursor->al);
 	btarget = browser_line(target);

+ 9 - 6
tools/perf/util/auxtrace.c

@@ -60,6 +60,12 @@
 #include "sane_ctype.h"
 #include "symbol/kallsyms.h"
 
+static bool auxtrace__dont_decode(struct perf_session *session)
+{
+	return !session->itrace_synth_opts ||
+	       session->itrace_synth_opts->dont_decode;
+}
+
 int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
 			struct auxtrace_mmap_params *mp,
 			void *userpg, int fd)
@@ -762,6 +768,9 @@ int auxtrace_queues__process_index(struct auxtrace_queues *queues,
 	size_t i;
 	int err;
 
+	if (auxtrace__dont_decode(session))
+		return 0;
+
 	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
 		for (i = 0; i < auxtrace_index->nr; i++) {
 			ent = &auxtrace_index->entries[i];
@@ -892,12 +901,6 @@ out_free:
 	return err;
 }
 
-static bool auxtrace__dont_decode(struct perf_session *session)
-{
-	return !session->itrace_synth_opts ||
-	       session->itrace_synth_opts->dont_decode;
-}
-
 int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
 				      union perf_event *event,
 				      struct perf_session *session)

+ 6 - 2
tools/perf/util/record.c

@@ -137,6 +137,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 	struct perf_evsel *evsel;
 	bool use_sample_identifier = false;
 	bool use_comm_exec;
+	bool sample_id = opts->sample_id;
 
 	/*
 	 * Set the evsel leader links before we configure attributes,
@@ -163,8 +164,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 		 * match the id.
 		 */
 		use_sample_identifier = perf_can_sample_identifier();
-		evlist__for_each_entry(evlist, evsel)
-			perf_evsel__set_sample_id(evsel, use_sample_identifier);
+		sample_id = true;
 	} else if (evlist->nr_entries > 1) {
 		struct perf_evsel *first = perf_evlist__first(evlist);
 
@@ -174,6 +174,10 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 			use_sample_identifier = perf_can_sample_identifier();
 			break;
 		}
+		sample_id = true;
+	}
+
+	if (sample_id) {
 		evlist__for_each_entry(evlist, evsel)
 			perf_evsel__set_sample_id(evsel, use_sample_identifier);
 	}

+ 5 - 4
tools/perf/util/trigger.h

@@ -12,7 +12,7 @@
  * States and transits:
  *
  *
- *  OFF--(on)--> READY --(hit)--> HIT
+ *  OFF--> ON --> READY --(hit)--> HIT
  *                 ^               |
  *                 |            (ready)
  *                 |               |
@@ -27,8 +27,9 @@ struct trigger {
 	volatile enum {
 		TRIGGER_ERROR		= -2,
 		TRIGGER_OFF		= -1,
-		TRIGGER_READY		= 0,
-		TRIGGER_HIT		= 1,
+		TRIGGER_ON		= 0,
+		TRIGGER_READY		= 1,
+		TRIGGER_HIT		= 2,
 	} state;
 	const char *name;
 };
@@ -50,7 +51,7 @@ static inline bool trigger_is_error(struct trigger *t)
 static inline void trigger_on(struct trigger *t)
 {
 	TRIGGER_WARN_ONCE(t, TRIGGER_OFF);
-	t->state = TRIGGER_READY;
+	t->state = TRIGGER_ON;
 }
 
 static inline void trigger_ready(struct trigger *t)

+ 6 - 5
tools/testing/selftests/x86/test_vsyscall.c

@@ -450,7 +450,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
 		num_vsyscall_traps++;
 }
 
-static int test_native_vsyscall(void)
+static int test_emulation(void)
 {
 	time_t tmp;
 	bool is_native;
@@ -458,7 +458,7 @@ static int test_native_vsyscall(void)
 	if (!vtime)
 		return 0;
 
-	printf("[RUN]\tchecking for native vsyscall\n");
+	printf("[RUN]\tchecking that vsyscalls are emulated\n");
 	sethandler(SIGTRAP, sigtrap, 0);
 	set_eflags(get_eflags() | X86_EFLAGS_TF);
 	vtime(&tmp);
@@ -474,11 +474,12 @@ static int test_native_vsyscall(void)
 	 */
 	is_native = (num_vsyscall_traps > 1);
 
-	printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+	printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+	       (is_native ? "FAIL" : "OK"),
 	       (is_native ? "native" : "emulated"),
 	       (int)num_vsyscall_traps);
 
-	return 0;
+	return is_native;
 }
 #endif
 
@@ -498,7 +499,7 @@ int main(int argc, char **argv)
 	nerrs += test_vsys_r();
 
 #ifdef __x86_64__
-	nerrs += test_native_vsyscall();
+	nerrs += test_emulation();
 #endif
 
 	return nerrs ? 1 : 0;