Browse Source

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

David S. Miller 9 years ago
parent
commit
c07f30ad68
100 changed files with 928 additions and 518 deletions
  1. 1 1
      Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
  2. 6 1
      Documentation/devicetree/bindings/mtd/partition.txt
  3. 3 3
      Documentation/devicetree/bindings/net/cpsw.txt
  4. 8 0
      MAINTAINERS
  5. 1 1
      Makefile
  6. 1 0
      arch/arc/Kconfig
  7. 1 1
      arch/arc/Makefile
  8. 1 0
      arch/arc/boot/dts/axs10x_mb.dtsi
  9. 2 1
      arch/arc/boot/dts/nsim_hs.dts
  10. 0 2
      arch/arc/include/asm/cache.h
  11. 2 2
      arch/arc/include/asm/mach_desc.h
  12. 2 2
      arch/arc/include/asm/smp.h
  13. 0 4
      arch/arc/include/asm/unwind.h
  14. 13 2
      arch/arc/kernel/intc-arcv2.c
  15. 24 9
      arch/arc/kernel/irq.c
  16. 1 1
      arch/arc/kernel/mcip.c
  17. 9 23
      arch/arc/kernel/perf_event.c
  18. 0 1
      arch/arc/kernel/setup.c
  19. 4 4
      arch/arc/kernel/smp.c
  20. 34 23
      arch/arc/kernel/unwind.c
  21. 2 2
      arch/arc/mm/highmem.c
  22. 3 1
      arch/arc/mm/init.c
  23. 1 1
      arch/arm/boot/dts/imx6q-gw5400-a.dts
  24. 1 1
      arch/arm/boot/dts/imx6qdl-gw51xx.dtsi
  25. 1 1
      arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
  26. 1 1
      arch/arm/boot/dts/imx6qdl-gw53xx.dtsi
  27. 1 1
      arch/arm/boot/dts/imx6qdl-gw54xx.dtsi
  28. 3 3
      arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
  29. 4 0
      arch/arm/boot/dts/omap4-duovero-parlor.dts
  30. 1 0
      arch/arm/boot/dts/sun6i-a31s-primo81.dts
  31. 1 1
      arch/arm/boot/dts/tegra124-nyan.dtsi
  32. 37 36
      arch/arm/kernel/sys_oabi-compat.c
  33. 2 0
      arch/arm/mach-omap2/Kconfig
  34. 6 0
      arch/arm/mach-omap2/timer.c
  35. 1 0
      arch/m32r/include/asm/Kbuild
  36. 9 1
      arch/m32r/include/asm/io.h
  37. 35 17
      arch/mips/include/asm/uaccess.h
  38. 0 2
      arch/mips/kernel/cps-vec.S
  39. 2 0
      arch/mips/kernel/mips_ksyms.c
  40. 2 0
      arch/mips/lib/memset.S
  41. 0 1
      arch/mips/pci/pci-rt2880.c
  42. 0 1
      arch/mips/pmcs-msp71xx/msp_setup.c
  43. 1 1
      arch/mips/sni/reset.c
  44. 2 2
      arch/mips/vdso/Makefile
  45. 52 12
      arch/parisc/kernel/signal.c
  46. 12 12
      arch/powerpc/include/asm/systbl.h
  47. 0 12
      arch/powerpc/include/uapi/asm/unistd.h
  48. 6 0
      arch/powerpc/kvm/book3s_hv.c
  49. 13 1
      arch/powerpc/platforms/powernv/opal-irqchip.c
  50. 1 1
      arch/powerpc/platforms/powernv/opal.c
  51. 12 5
      arch/s390/kernel/dis.c
  52. 1 0
      arch/sparc/include/asm/elf_64.h
  53. 6 1
      arch/sparc/include/uapi/asm/unistd.h
  54. 13 0
      arch/sparc/kernel/head_64.S
  55. 11 0
      arch/sparc/kernel/perf_event.c
  56. 7 1
      arch/sparc/kernel/rtrap_64.S
  57. 5 4
      arch/sparc/kernel/setup_64.c
  58. 10 9
      arch/sparc/kernel/systbls_32.S
  59. 10 8
      arch/sparc/kernel/systbls_64.S
  60. 8 0
      arch/sparc/lib/NG2copy_from_user.S
  61. 8 0
      arch/sparc/lib/NG2copy_to_user.S
  62. 62 56
      arch/sparc/lib/NG2memcpy.S
  63. 8 0
      arch/sparc/lib/NG4copy_from_user.S
  64. 8 0
      arch/sparc/lib/NG4copy_to_user.S
  65. 23 17
      arch/sparc/lib/NG4memcpy.S
  66. 8 0
      arch/sparc/lib/U1copy_from_user.S
  67. 8 0
      arch/sparc/lib/U1copy_to_user.S
  68. 27 21
      arch/sparc/lib/U1memcpy.S
  69. 8 0
      arch/sparc/lib/U3copy_from_user.S
  70. 8 0
      arch/sparc/lib/U3copy_to_user.S
  71. 46 40
      arch/sparc/lib/U3memcpy.S
  72. 8 0
      arch/x86/kvm/cpuid.h
  73. 19 6
      arch/x86/kvm/mtrr.c
  74. 2 2
      arch/x86/kvm/svm.c
  75. 4 3
      arch/x86/kvm/vmx.c
  76. 8 4
      arch/x86/kvm/x86.c
  77. 1 1
      arch/x86/um/signal.c
  78. 2 7
      arch/x86/xen/mmu.c
  79. 11 10
      arch/x86/xen/suspend.c
  80. 18 2
      block/blk-core.c
  81. 1 1
      block/blk-merge.c
  82. 30 31
      crypto/algif_skcipher.c
  83. 2 1
      drivers/acpi/processor_driver.c
  84. 22 11
      drivers/base/power/domain.c
  85. 7 8
      drivers/block/null_blk.c
  86. 10 5
      drivers/block/xen-blkback/blkback.c
  87. 4 4
      drivers/block/xen-blkback/common.h
  88. 4 4
      drivers/bus/sunxi-rsb.c
  89. 1 1
      drivers/cpufreq/Kconfig.arm
  90. 1 1
      drivers/cpufreq/intel_pstate.c
  91. 1 1
      drivers/cpufreq/scpi-cpufreq.c
  92. 1 1
      drivers/gpio/gpio-ath79.c
  93. 2 2
      drivers/gpio/gpio-generic.c
  94. 7 1
      drivers/gpio/gpiolib.c
  95. 2 1
      drivers/gpu/drm/amd/amdgpu/amdgpu.h
  96. 42 21
      drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
  97. 3 0
      drivers/gpu/drm/exynos/exynos_drm_crtc.c
  98. 20 8
      drivers/gpu/drm/i915/i915_drv.h
  99. 84 27
      drivers/gpu/drm/i915/i915_gem.c
  100. 1 0
      drivers/gpu/drm/i915/i915_gem_gtt.c

+ 1 - 1
Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt

@@ -12,7 +12,7 @@ Each key is represented as a sub-node of "allwinner,sun4i-a10-lradc-keys":
 Required subnode-properties:
 	- label: Descriptive name of the key.
 	- linux,code: Keycode to emit.
-	- channel: Channel this key is attached to, mut be 0 or 1.
+	- channel: Channel this key is attached to, must be 0 or 1.
 	- voltage: Voltage in µV at lradc input when this key is pressed.
 
 Example:

+ 6 - 1
Documentation/devicetree/bindings/mtd/partition.txt

@@ -6,7 +6,9 @@ used for what purposes, but which don't use an on-flash partition table such
 as RedBoot.
 
 The partition table should be a subnode of the mtd node and should be named
-'partitions'. Partitions are defined in subnodes of the partitions node.
+'partitions'. This node should have the following property:
+- compatible : (required) must be "fixed-partitions"
+Partitions are then defined in subnodes of the partitions node.
 
 For backwards compatibility partitions as direct subnodes of the mtd device are
 supported. This use is discouraged.
@@ -36,6 +38,7 @@ Examples:
 
 flash@0 {
 	partitions {
+		compatible = "fixed-partitions";
 		#address-cells = <1>;
 		#size-cells = <1>;
 
@@ -53,6 +56,7 @@ flash@0 {
 
 flash@1 {
 	partitions {
+		compatible = "fixed-partitions";
 		#address-cells = <1>;
 		#size-cells = <2>;
 
@@ -66,6 +70,7 @@ flash@1 {
 
 flash@2 {
 	partitions {
+		compatible = "fixed-partitions";
 		#address-cells = <2>;
 		#size-cells = <2>;
 

+ 3 - 3
Documentation/devicetree/bindings/net/cpsw.txt

@@ -40,18 +40,18 @@ Optional properties:
 
 Slave Properties:
 Required properties:
-- phy_id		: Specifies slave phy id
 - phy-mode		: See ethernet.txt file in the same directory
 
 Optional properties:
 - dual_emac_res_vlan	: Specifies VID to be used to segregate the ports
 - mac-address		: See ethernet.txt file in the same directory
+- phy_id		: Specifies slave phy id
 - phy-handle		: See ethernet.txt file in the same directory
 
 Slave sub-nodes:
 - fixed-link		: See fixed-link.txt file in the same directory
-			  Either the properties phy_id and phy-mode,
-			  or the sub-node fixed-link can be specified
+			  Either the property phy_id, or the sub-node
+			  fixed-link can be specified
 
 Note: "ti,hwmods" field is used to fetch the base address and irq
 resources from TI, omap hwmod data base during device registration.

+ 8 - 0
MAINTAINERS

@@ -8403,6 +8403,14 @@ L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/pinctrl/samsung/
 
+PIN CONTROLLER - SINGLE
+M:	Tony Lindgren <tony@atomide.com>
+M:	Haojian Zhuang <haojian.zhuang@linaro.org>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	linux-omap@vger.kernel.org
+S:	Maintained
+F:	drivers/pinctrl/pinctrl-single.c
+
 PIN CONTROLLER - ST SPEAR
 M:	Viresh Kumar <vireshk@kernel.org>
 L:	spear-devel@list.st.com

+ 1 - 1
Makefile

@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 4
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc7
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*

+ 1 - 0
arch/arc/Kconfig

@@ -445,6 +445,7 @@ config LINUX_LINK_BASE
 	  However some customers have peripherals mapped at this addr, so
 	  Linux needs to be scooted a bit.
 	  If you don't know what the above means, leave this setting alone.
+	  This needs to match memory start address specified in Device Tree
 
 config HIGHMEM
 	bool "High Memory Support"

+ 1 - 1
arch/arc/Makefile

@@ -81,7 +81,7 @@ endif
 LIBGCC	:= $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
 
 # Modules with short calls might break for calls into builtin-kernel
-KBUILD_CFLAGS_MODULE	+= -mlong-calls
+KBUILD_CFLAGS_MODULE	+= -mlong-calls -mno-millicode
 
 # Finally dump eveything into kernel build system
 KBUILD_CFLAGS	+= $(cflags-y)

+ 1 - 0
arch/arc/boot/dts/axs10x_mb.dtsi

@@ -46,6 +46,7 @@
 			snps,pbl = < 32 >;
 			clocks = <&apbclk>;
 			clock-names = "stmmaceth";
+			max-speed = <100>;
 		};
 
 		ehci@0x40000 {

+ 2 - 1
arch/arc/boot/dts/nsim_hs.dts

@@ -17,7 +17,8 @@
 
 	memory {
 		device_type = "memory";
-		reg = <0x0 0x80000000 0x0 0x40000000	/* 1 GB low mem */
+		/* CONFIG_LINUX_LINK_BASE needs to match low mem start */
+		reg = <0x0 0x80000000 0x0 0x20000000	/* 512 MB low mem */
 		       0x1 0x00000000 0x0 0x40000000>;	/* 1 GB highmem */
 	};
 

+ 0 - 2
arch/arc/include/asm/cache.h

@@ -62,9 +62,7 @@ extern int ioc_exists;
 #define ARC_REG_IC_IVIC		0x10
 #define ARC_REG_IC_CTRL		0x11
 #define ARC_REG_IC_IVIL		0x19
-#if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4)
 #define ARC_REG_IC_PTAG		0x1E
-#endif
 #define ARC_REG_IC_PTAG_HI	0x1F
 
 /* Bit val in IC_CTRL */

+ 2 - 2
arch/arc/include/asm/mach_desc.h

@@ -23,7 +23,7 @@
  * @dt_compat:		Array of device tree 'compatible' strings
  * 			(XXX: although only 1st entry is looked at)
  * @init_early:		Very early callback [called from setup_arch()]
- * @init_cpu_smp:	for each CPU as it is coming up (SMP as well as UP)
+ * @init_per_cpu:	for each CPU as it is coming up (SMP as well as UP)
  * 			[(M):init_IRQ(), (o):start_kernel_secondary()]
  * @init_machine:	arch initcall level callback (e.g. populate static
  * 			platform devices or parse Devicetree)
@@ -35,7 +35,7 @@ struct machine_desc {
 	const char		**dt_compat;
 	void			(*init_early)(void);
 #ifdef CONFIG_SMP
-	void			(*init_cpu_smp)(unsigned int);
+	void			(*init_per_cpu)(unsigned int);
 #endif
 	void			(*init_machine)(void);
 	void			(*init_late)(void);

+ 2 - 2
arch/arc/include/asm/smp.h

@@ -48,7 +48,7 @@ extern int smp_ipi_irq_setup(int cpu, int irq);
  * @init_early_smp:	A SMP specific h/w block can init itself
  * 			Could be common across platforms so not covered by
  * 			mach_desc->init_early()
- * @init_irq_cpu:	Called for each core so SMP h/w block driver can do
+ * @init_per_cpu:	Called for each core so SMP h/w block driver can do
  * 			any needed setup per cpu (e.g. IPI request)
  * @cpu_kick:		For Master to kickstart a cpu (optionally at a PC)
  * @ipi_send:		To send IPI to a @cpu
@@ -57,7 +57,7 @@ extern int smp_ipi_irq_setup(int cpu, int irq);
 struct plat_smp_ops {
 	const char 	*info;
 	void		(*init_early_smp)(void);
-	void		(*init_irq_cpu)(int cpu);
+	void		(*init_per_cpu)(int cpu);
 	void		(*cpu_kick)(int cpu, unsigned long pc);
 	void		(*ipi_send)(int cpu);
 	void		(*ipi_clear)(int irq);

+ 0 - 4
arch/arc/include/asm/unwind.h

@@ -112,7 +112,6 @@ struct unwind_frame_info {
 
 extern int arc_unwind(struct unwind_frame_info *frame);
 extern void arc_unwind_init(void);
-extern void arc_unwind_setup(void);
 extern void *unwind_add_table(struct module *module, const void *table_start,
 			      unsigned long table_size);
 extern void unwind_remove_table(void *handle, int init_only);
@@ -152,9 +151,6 @@ static inline void arc_unwind_init(void)
 {
 }
 
-static inline void arc_unwind_setup(void)
-{
-}
 #define unwind_add_table(a, b, c)
 #define unwind_remove_table(a, b)
 

+ 13 - 2
arch/arc/kernel/intc-arcv2.c

@@ -106,10 +106,21 @@ static struct irq_chip arcv2_irq_chip = {
 static int arcv2_irq_map(struct irq_domain *d, unsigned int irq,
 			 irq_hw_number_t hw)
 {
-	if (irq == TIMER0_IRQ || irq == IPI_IRQ)
+	/*
+	 * core intc IRQs [16, 23]:
+	 * Statically assigned always private-per-core (Timers, WDT, IPI, PCT)
+	 */
+	if (hw < 24) {
+		/*
+		 * A subsequent request_percpu_irq() fails if percpu_devid is
+		 * not set. That in turns sets NOAUTOEN, meaning each core needs
+		 * to call enable_percpu_irq()
+		 */
+		irq_set_percpu_devid(irq);
 		irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq);
-	else
+	} else {
 		irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq);
+	}
 
 	return 0;
 }

+ 24 - 9
arch/arc/kernel/irq.c

@@ -29,11 +29,11 @@ void __init init_IRQ(void)
 
 #ifdef CONFIG_SMP
 	/* a SMP H/w block could do IPI IRQ request here */
-	if (plat_smp_ops.init_irq_cpu)
-		plat_smp_ops.init_irq_cpu(smp_processor_id());
+	if (plat_smp_ops.init_per_cpu)
+		plat_smp_ops.init_per_cpu(smp_processor_id());
 
-	if (machine_desc->init_cpu_smp)
-		machine_desc->init_cpu_smp(smp_processor_id());
+	if (machine_desc->init_per_cpu)
+		machine_desc->init_per_cpu(smp_processor_id());
 #endif
 }
 
@@ -51,6 +51,18 @@ void arch_do_IRQ(unsigned int irq, struct pt_regs *regs)
 	set_irq_regs(old_regs);
 }
 
+/*
+ * API called for requesting percpu interrupts - called by each CPU
+ *  - For boot CPU, actually request the IRQ with genirq core + enables
+ *  - For subsequent callers only enable called locally
+ *
+ * Relies on being called by boot cpu first (i.e. request called ahead) of
+ * any enable as expected by genirq. Hence Suitable only for TIMER, IPI
+ * which are guaranteed to be setup on boot core first.
+ * Late probed peripherals such as perf can't use this as there no guarantee
+ * of being called on boot CPU first.
+ */
+
 void arc_request_percpu_irq(int irq, int cpu,
                             irqreturn_t (*isr)(int irq, void *dev),
                             const char *irq_nm,
@@ -60,14 +72,17 @@ void arc_request_percpu_irq(int irq, int cpu,
 	if (!cpu) {
 		int rc;
 
+#ifdef CONFIG_ISA_ARCOMPACT
 		/*
-		 * These 2 calls are essential to making percpu IRQ APIs work
-		 * Ideally these details could be hidden in irq chip map function
-		 * but the issue is IPIs IRQs being static (non-DT) and platform
-		 * specific, so we can't identify them there.
+		 * A subsequent request_percpu_irq() fails if percpu_devid is
+		 * not set. That in turns sets NOAUTOEN, meaning each core needs
+		 * to call enable_percpu_irq()
+		 *
+		 * For ARCv2, this is done in irq map function since we know
+		 * which irqs are strictly per cpu
 		 */
 		irq_set_percpu_devid(irq);
-		irq_modify_status(irq, IRQ_NOAUTOEN, 0);  /* @irq, @clr, @set */
+#endif
 
 		rc = request_percpu_irq(irq, isr, irq_nm, percpu_dev);
 		if (rc)

+ 1 - 1
arch/arc/kernel/mcip.c

@@ -132,7 +132,7 @@ static void mcip_probe_n_setup(void)
 struct plat_smp_ops plat_smp_ops = {
 	.info		= smp_cpuinfo_buf,
 	.init_early_smp	= mcip_probe_n_setup,
-	.init_irq_cpu	= mcip_setup_per_cpu,
+	.init_per_cpu	= mcip_setup_per_cpu,
 	.ipi_send	= mcip_ipi_send,
 	.ipi_clear	= mcip_ipi_clear,
 };

+ 9 - 23
arch/arc/kernel/perf_event.c

@@ -428,12 +428,11 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
 
 #endif /* CONFIG_ISA_ARCV2 */
 
-void arc_cpu_pmu_irq_init(void)
+static void arc_cpu_pmu_irq_init(void *data)
 {
-	struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
+	int irq = *(int *)data;
 
-	arc_request_percpu_irq(arc_pmu->irq, smp_processor_id(), arc_pmu_intr,
-			       "ARC perf counters", pmu_cpu);
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
 
 	/* Clear all pending interrupt flags */
 	write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
@@ -515,7 +514,6 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 
 	if (has_interrupts) {
 		int irq = platform_get_irq(pdev, 0);
-		unsigned long flags;
 
 		if (irq < 0) {
 			pr_err("Cannot get IRQ number for the platform\n");
@@ -524,24 +522,12 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 
 		arc_pmu->irq = irq;
 
-		/*
-		 * arc_cpu_pmu_irq_init() needs to be called on all cores for
-		 * their respective local PMU.
-		 * However we use opencoded on_each_cpu() to ensure it is called
-		 * on core0 first, so that arc_request_percpu_irq() sets up
-		 * AUTOEN etc. Otherwise enable_percpu_irq() fails to enable
-		 * perf IRQ on non master cores.
-		 * see arc_request_percpu_irq()
-		 */
-		preempt_disable();
-		local_irq_save(flags);
-		arc_cpu_pmu_irq_init();
-		local_irq_restore(flags);
-		smp_call_function((smp_call_func_t)arc_cpu_pmu_irq_init, 0, 1);
-		preempt_enable();
-
-		/* Clean all pending interrupt flags */
-		write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
+		/* intc map function ensures irq_set_percpu_devid() called */
+		request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters",
+				   this_cpu_ptr(&arc_pmu_cpu));
+
+		on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1);
+
 	} else
 		arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 

+ 0 - 1
arch/arc/kernel/setup.c

@@ -429,7 +429,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	arc_unwind_init();
-	arc_unwind_setup();
 }
 
 static int __init customize_machine(void)

+ 4 - 4
arch/arc/kernel/smp.c

@@ -132,11 +132,11 @@ void start_kernel_secondary(void)
 	pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
 
 	/* Some SMP H/w setup - for each cpu */
-	if (plat_smp_ops.init_irq_cpu)
-		plat_smp_ops.init_irq_cpu(cpu);
+	if (plat_smp_ops.init_per_cpu)
+		plat_smp_ops.init_per_cpu(cpu);
 
-	if (machine_desc->init_cpu_smp)
-		machine_desc->init_cpu_smp(cpu);
+	if (machine_desc->init_per_cpu)
+		machine_desc->init_per_cpu(cpu);
 
 	arc_local_timer_setup();
 

+ 34 - 23
arch/arc/kernel/unwind.c

@@ -170,6 +170,23 @@ static struct unwind_table *find_table(unsigned long pc)
 
 static unsigned long read_pointer(const u8 **pLoc,
 				  const void *end, signed ptrType);
+static void init_unwind_hdr(struct unwind_table *table,
+			    void *(*alloc) (unsigned long));
+
+/*
+ * wrappers for header alloc (vs. calling one vs. other at call site)
+ * to elide section mismatches warnings
+ */
+static void *__init unw_hdr_alloc_early(unsigned long sz)
+{
+	return __alloc_bootmem_nopanic(sz, sizeof(unsigned int),
+				       MAX_DMA_ADDRESS);
+}
+
+static void *unw_hdr_alloc(unsigned long sz)
+{
+	return kmalloc(sz, GFP_KERNEL);
+}
 
 static void init_unwind_table(struct unwind_table *table, const char *name,
 			      const void *core_start, unsigned long core_size,
@@ -209,6 +226,8 @@ void __init arc_unwind_init(void)
 			  __start_unwind, __end_unwind - __start_unwind,
 			  NULL, 0);
 	  /*__start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);*/
+
+	init_unwind_hdr(&root_table, unw_hdr_alloc_early);
 }
 
 static const u32 bad_cie, not_fde;
@@ -241,8 +260,8 @@ static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
 	e2->fde = v;
 }
 
-static void __init setup_unwind_table(struct unwind_table *table,
-				      void *(*alloc) (unsigned long))
+static void init_unwind_hdr(struct unwind_table *table,
+			    void *(*alloc) (unsigned long))
 {
 	const u8 *ptr;
 	unsigned long tableSize = table->size, hdrSize;
@@ -277,10 +296,10 @@ static void __init setup_unwind_table(struct unwind_table *table,
 		if (cie == &not_fde)
 			continue;
 		if (cie == NULL || cie == &bad_cie)
-			return;
+			goto ret_err;
 		ptrType = fde_pointer_type(cie);
 		if (ptrType < 0)
-			return;
+			goto ret_err;
 
 		ptr = (const u8 *)(fde + 2);
 		if (!read_pointer(&ptr, (const u8 *)(fde + 1) + *fde,
@@ -296,13 +315,15 @@ static void __init setup_unwind_table(struct unwind_table *table,
 	}
 
 	if (tableSize || !n)
-		return;
+		goto ret_err;
 
 	hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
 	    + 2 * n * sizeof(unsigned long);
+
 	header = alloc(hdrSize);
 	if (!header)
-		return;
+		goto ret_err;
+
 	header->version = 1;
 	header->eh_frame_ptr_enc = DW_EH_PE_abs | DW_EH_PE_native;
 	header->fde_count_enc = DW_EH_PE_abs | DW_EH_PE_data4;
@@ -340,18 +361,10 @@ static void __init setup_unwind_table(struct unwind_table *table,
 	table->hdrsz = hdrSize;
 	smp_wmb();
 	table->header = (const void *)header;
-}
-
-static void *__init balloc(unsigned long sz)
-{
-	return __alloc_bootmem_nopanic(sz,
-				       sizeof(unsigned int),
-				       __pa(MAX_DMA_ADDRESS));
-}
+	return;
 
-void __init arc_unwind_setup(void)
-{
-	setup_unwind_table(&root_table, balloc);
+ret_err:
+	panic("Attention !!! Dwarf FDE parsing errors\n");;
 }
 
 #ifdef CONFIG_MODULES
@@ -377,6 +390,8 @@ void *unwind_add_table(struct module *module, const void *table_start,
 			  table_start, table_size,
 			  NULL, 0);
 
+	init_unwind_hdr(table, unw_hdr_alloc);
+
 #ifdef UNWIND_DEBUG
 	unw_debug("Table added for [%s] %lx %lx\n",
 		module->name, table->core.pc, table->core.range);
@@ -439,6 +454,7 @@ void unwind_remove_table(void *handle, int init_only)
 	info.init_only = init_only;
 
 	unlink_table(&info); /* XXX: SMP */
+	kfree(table->header);
 	kfree(table);
 }
 
@@ -588,9 +604,6 @@ static signed fde_pointer_type(const u32 *cie)
 	const u8 *ptr = (const u8 *)(cie + 2);
 	unsigned version = *ptr;
 
-	if (version != 1)
-		return -1;	/* unsupported */
-
 	if (*++ptr) {
 		const char *aug;
 		const u8 *end = (const u8 *)(cie + 1) + *cie;
@@ -1002,9 +1015,7 @@ int arc_unwind(struct unwind_frame_info *frame)
 		ptr = (const u8 *)(cie + 2);
 		end = (const u8 *)(cie + 1) + *cie;
 		frame->call_frame = 1;
-		if ((state.version = *ptr) != 1)
-			cie = NULL;	/* unsupported version */
-		else if (*++ptr) {
+		if (*++ptr) {
 			/* check if augmentation size is first (thus present) */
 			if (*ptr == 'z') {
 				while (++ptr < end && *ptr) {

+ 2 - 2
arch/arc/mm/highmem.c

@@ -111,7 +111,7 @@ void __kunmap_atomic(void *kv)
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
-noinline pte_t *alloc_kmap_pgtable(unsigned long kvaddr)
+static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr)
 {
 	pgd_t *pgd_k;
 	pud_t *pud_k;
@@ -127,7 +127,7 @@ noinline pte_t *alloc_kmap_pgtable(unsigned long kvaddr)
 	return pte_k;
 }
 
-void kmap_init(void)
+void __init kmap_init(void)
 {
 	/* Due to recursive include hell, we can't do this in processor.h */
 	BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE));

+ 3 - 1
arch/arc/mm/init.c

@@ -51,7 +51,9 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	int in_use = 0;
 
 	if (!low_mem_sz) {
-		BUG_ON(base != low_mem_start);
+		if (base != low_mem_start)
+			panic("CONFIG_LINUX_LINK_BASE != DT memory { }");
+
 		low_mem_sz = size;
 		in_use = 1;
 	} else {

+ 1 - 1
arch/arm/boot/dts/imx6q-gw5400-a.dts

@@ -154,7 +154,7 @@
 &fec {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_enet>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };

+ 1 - 1
arch/arm/boot/dts/imx6qdl-gw51xx.dtsi

@@ -94,7 +94,7 @@
 &fec {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_enet>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };

+ 1 - 1
arch/arm/boot/dts/imx6qdl-gw52xx.dtsi

@@ -154,7 +154,7 @@
 &fec {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_enet>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };

+ 1 - 1
arch/arm/boot/dts/imx6qdl-gw53xx.dtsi

@@ -155,7 +155,7 @@
 &fec {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_enet>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };

+ 1 - 1
arch/arm/boot/dts/imx6qdl-gw54xx.dtsi

@@ -145,7 +145,7 @@
 &fec {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_enet>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };

+ 3 - 3
arch/arm/boot/dts/imx6qdl-sabreauto.dtsi

@@ -113,14 +113,14 @@
 &clks {
 	assigned-clocks = <&clks IMX6QDL_PLL4_BYPASS_SRC>,
 			  <&clks IMX6QDL_PLL4_BYPASS>,
-			  <&clks IMX6QDL_CLK_PLL4_POST_DIV>,
 			  <&clks IMX6QDL_CLK_LDB_DI0_SEL>,
-			  <&clks IMX6QDL_CLK_LDB_DI1_SEL>;
+			  <&clks IMX6QDL_CLK_LDB_DI1_SEL>,
+			  <&clks IMX6QDL_CLK_PLL4_POST_DIV>;
 	assigned-clock-parents = <&clks IMX6QDL_CLK_LVDS2_IN>,
 				 <&clks IMX6QDL_PLL4_BYPASS_SRC>,
 				 <&clks IMX6QDL_CLK_PLL3_USB_OTG>,
 				 <&clks IMX6QDL_CLK_PLL3_USB_OTG>;
-	assigned-clock-rates = <0>, <0>, <24576000>;
+	assigned-clock-rates = <0>, <0>, <0>, <0>, <24576000>;
 };
 
 &ecspi1 {

+ 4 - 0
arch/arm/boot/dts/omap4-duovero-parlor.dts

@@ -189,3 +189,7 @@
 	};
 };
 
+&uart3 {
+	interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
+			       &omap4_pmx_core OMAP4_UART3_RX>;
+};

+ 1 - 0
arch/arm/boot/dts/sun6i-a31s-primo81.dts

@@ -83,6 +83,7 @@
 		reg = <0x5d>;
 		interrupt-parent = <&pio>;
 		interrupts = <0 3 IRQ_TYPE_LEVEL_HIGH>; /* PA3 */
+		touchscreen-swapped-x-y;
 	};
 };
 

+ 1 - 1
arch/arm/boot/dts/tegra124-nyan.dtsi

@@ -399,7 +399,7 @@
 
 	/* CPU DFLL clock */
 	clock@0,70110000 {
-		status = "okay";
+		status = "disabled";
 		vdd-cpu-supply = <&vdd_cpu>;
 		nvidia,i2c-fs-rate = <400000>;
 	};

+ 37 - 36
arch/arm/kernel/sys_oabi-compat.c

@@ -193,15 +193,44 @@ struct oabi_flock64 {
 	pid_t	l_pid;
 } __attribute__ ((packed,aligned(4)));
 
-asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
+static long do_locks(unsigned int fd, unsigned int cmd,
 				 unsigned long arg)
 {
-	struct oabi_flock64 user;
 	struct flock64 kernel;
-	mm_segment_t fs = USER_DS; /* initialized to kill a warning */
-	unsigned long local_arg = arg;
-	int ret;
+	struct oabi_flock64 user;
+	mm_segment_t fs;
+	long ret;
+
+	if (copy_from_user(&user, (struct oabi_flock64 __user *)arg,
+			   sizeof(user)))
+		return -EFAULT;
+	kernel.l_type	= user.l_type;
+	kernel.l_whence	= user.l_whence;
+	kernel.l_start	= user.l_start;
+	kernel.l_len	= user.l_len;
+	kernel.l_pid	= user.l_pid;
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	ret = sys_fcntl64(fd, cmd, (unsigned long)&kernel);
+	set_fs(fs);
+
+	if (!ret && (cmd == F_GETLK64 || cmd == F_OFD_GETLK)) {
+		user.l_type	= kernel.l_type;
+		user.l_whence	= kernel.l_whence;
+		user.l_start	= kernel.l_start;
+		user.l_len	= kernel.l_len;
+		user.l_pid	= kernel.l_pid;
+		if (copy_to_user((struct oabi_flock64 __user *)arg,
+				 &user, sizeof(user)))
+			ret = -EFAULT;
+	}
+	return ret;
+}
 
+asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
+				 unsigned long arg)
+{
 	switch (cmd) {
 	case F_OFD_GETLK:
 	case F_OFD_SETLK:
@@ -209,39 +238,11 @@ asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
 	case F_GETLK64:
 	case F_SETLK64:
 	case F_SETLKW64:
-		if (copy_from_user(&user, (struct oabi_flock64 __user *)arg,
-				   sizeof(user)))
-			return -EFAULT;
-		kernel.l_type	= user.l_type;
-		kernel.l_whence	= user.l_whence;
-		kernel.l_start	= user.l_start;
-		kernel.l_len	= user.l_len;
-		kernel.l_pid	= user.l_pid;
-		local_arg = (unsigned long)&kernel;
-		fs = get_fs();
-		set_fs(KERNEL_DS);
-	}
-
-	ret = sys_fcntl64(fd, cmd, local_arg);
+		return do_locks(fd, cmd, arg);
 
-	switch (cmd) {
-	case F_GETLK64:
-		if (!ret) {
-			user.l_type	= kernel.l_type;
-			user.l_whence	= kernel.l_whence;
-			user.l_start	= kernel.l_start;
-			user.l_len	= kernel.l_len;
-			user.l_pid	= kernel.l_pid;
-			if (copy_to_user((struct oabi_flock64 __user *)arg,
-					 &user, sizeof(user)))
-				ret = -EFAULT;
-		}
-	case F_SETLK64:
-	case F_SETLKW64:
-		set_fs(fs);
+	default:
+		return sys_fcntl64(fd, cmd, arg);
 	}
-
-	return ret;
 }
 
 struct oabi_epoll_event {

+ 2 - 0
arch/arm/mach-omap2/Kconfig

@@ -65,6 +65,8 @@ config SOC_AM43XX
 	select MACH_OMAP_GENERIC
 	select MIGHT_HAVE_CACHE_L2X0
 	select HAVE_ARM_SCU
+	select GENERIC_CLOCKEVENTS_BROADCAST
+	select HAVE_ARM_TWD
 
 config SOC_DRA7XX
 	bool "TI DRA7XX"

+ 6 - 0
arch/arm/mach-omap2/timer.c

@@ -320,6 +320,12 @@ static int __init omap_dm_timer_init_one(struct omap_dm_timer *timer,
 	return r;
 }
 
+#if !defined(CONFIG_SMP) && defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)
+void tick_broadcast(const struct cpumask *mask)
+{
+}
+#endif
+
 static void __init omap2_gp_clockevent_init(int gptimer_id,
 						const char *fck_source,
 						const char *property)

+ 1 - 0
arch/m32r/include/asm/Kbuild

@@ -3,6 +3,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += irq_work.h
+generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += module.h

+ 9 - 1
arch/m32r/include/asm/io.h

@@ -168,13 +168,21 @@ static inline void _writel(unsigned long l, unsigned long addr)
 #define writew_relaxed writew
 #define writel_relaxed writel
 
-#define ioread8 read
+#define ioread8 readb
 #define ioread16 readw
 #define ioread32 readl
 #define iowrite8 writeb
 #define iowrite16 writew
 #define iowrite32 writel
 
+#define ioread8_rep(p, dst, count) insb((unsigned long)(p), (dst), (count))
+#define ioread16_rep(p, dst, count) insw((unsigned long)(p), (dst), (count))
+#define ioread32_rep(p, dst, count) insl((unsigned long)(p), (dst), (count))
+
+#define iowrite8_rep(p, src, count) outsb((unsigned long)(p), (src), (count))
+#define iowrite16_rep(p, src, count) outsw((unsigned long)(p), (src), (count))
+#define iowrite32_rep(p, src, count) outsl((unsigned long)(p), (src), (count))
+
 #define ioread16be(addr)	be16_to_cpu(readw(addr))
 #define ioread32be(addr)	be32_to_cpu(readl(addr))
 #define iowrite16be(v, addr)	writew(cpu_to_be16(v), (addr))

+ 35 - 17
arch/mips/include/asm/uaccess.h

@@ -599,7 +599,7 @@ extern void __put_user_unknown(void);
  * On error, the variable @x is set to zero.
  */
 #define __get_user_unaligned(x,ptr) \
-	__get_user__unalignednocheck((x),(ptr),sizeof(*(ptr)))
+	__get_user_unaligned_nocheck((x),(ptr),sizeof(*(ptr)))
 
 /*
  * Yuck.  We need two variants, one for 64bit operation and one
@@ -620,8 +620,8 @@ extern void __get_user_unaligned_unknown(void);
 do {									\
 	switch (size) {							\
 	case 1: __get_data_asm(val, "lb", ptr); break;			\
-	case 2: __get_user_unaligned_asm(val, "ulh", ptr); break;	\
-	case 4: __get_user_unaligned_asm(val, "ulw", ptr); break;	\
+	case 2: __get_data_unaligned_asm(val, "ulh", ptr); break;	\
+	case 4: __get_data_unaligned_asm(val, "ulw", ptr); break;	\
 	case 8: __GET_USER_UNALIGNED_DW(val, ptr); break;		\
 	default: __get_user_unaligned_unknown(); break;			\
 	}								\
@@ -1122,9 +1122,15 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
 	__cu_to = (to);							\
 	__cu_from = (from);						\
 	__cu_len = (n);							\
-	might_fault();							\
-	__cu_len = __invoke_copy_from_user(__cu_to, __cu_from,		\
-					   __cu_len);			\
+	if (eva_kernel_access()) {					\
+		__cu_len = __invoke_copy_from_kernel(__cu_to,		\
+						     __cu_from,		\
+						     __cu_len);		\
+	} else {							\
+		might_fault();						\
+		__cu_len = __invoke_copy_from_user(__cu_to, __cu_from,	\
+						   __cu_len);		\
+	}								\
 	__cu_len;							\
 })
 
@@ -1229,16 +1235,28 @@ __clear_user(void __user *addr, __kernel_size_t size)
 {
 	__kernel_size_t res;
 
-	might_fault();
-	__asm__ __volatile__(
-		"move\t$4, %1\n\t"
-		"move\t$5, $0\n\t"
-		"move\t$6, %2\n\t"
-		__MODULE_JAL(__bzero)
-		"move\t%0, $6"
-		: "=r" (res)
-		: "r" (addr), "r" (size)
-		: "$4", "$5", "$6", __UA_t0, __UA_t1, "$31");
+	if (eva_kernel_access()) {
+		__asm__ __volatile__(
+			"move\t$4, %1\n\t"
+			"move\t$5, $0\n\t"
+			"move\t$6, %2\n\t"
+			__MODULE_JAL(__bzero_kernel)
+			"move\t%0, $6"
+			: "=r" (res)
+			: "r" (addr), "r" (size)
+			: "$4", "$5", "$6", __UA_t0, __UA_t1, "$31");
+	} else {
+		might_fault();
+		__asm__ __volatile__(
+			"move\t$4, %1\n\t"
+			"move\t$5, $0\n\t"
+			"move\t$6, %2\n\t"
+			__MODULE_JAL(__bzero)
+			"move\t%0, $6"
+			: "=r" (res)
+			: "r" (addr), "r" (size)
+			: "$4", "$5", "$6", __UA_t0, __UA_t1, "$31");
+	}
 
 	return res;
 }
@@ -1384,7 +1402,7 @@ static inline long strlen_user(const char __user *s)
 		might_fault();
 		__asm__ __volatile__(
 			"move\t$4, %1\n\t"
-			__MODULE_JAL(__strlen_kernel_asm)
+			__MODULE_JAL(__strlen_user_asm)
 			"move\t%0, $2"
 			: "=r" (res)
 			: "r" (s)

+ 0 - 2
arch/mips/kernel/cps-vec.S

@@ -257,7 +257,6 @@ LEAF(mips_cps_core_init)
 	has_mt	t0, 3f
 
 	.set	push
-	.set	mips64r2
 	.set	mt
 
 	/* Only allow 1 TC per VPE to execute... */
@@ -376,7 +375,6 @@ LEAF(mips_cps_boot_vpes)
 	 nop
 
 	.set	push
-	.set	mips64r2
 	.set	mt
 
 1:	/* Enter VPE configuration state */

+ 2 - 0
arch/mips/kernel/mips_ksyms.c

@@ -17,6 +17,7 @@
 #include <asm/fpu.h>
 #include <asm/msa.h>
 
+extern void *__bzero_kernel(void *__s, size_t __count);
 extern void *__bzero(void *__s, size_t __count);
 extern long __strncpy_from_kernel_nocheck_asm(char *__to,
 					      const char *__from, long __len);
@@ -64,6 +65,7 @@ EXPORT_SYMBOL(__copy_from_user_eva);
 EXPORT_SYMBOL(__copy_in_user_eva);
 EXPORT_SYMBOL(__copy_to_user_eva);
 EXPORT_SYMBOL(__copy_user_inatomic_eva);
+EXPORT_SYMBOL(__bzero_kernel);
 #endif
 EXPORT_SYMBOL(__bzero);
 EXPORT_SYMBOL(__strncpy_from_kernel_nocheck_asm);

+ 2 - 0
arch/mips/lib/memset.S

@@ -283,6 +283,8 @@ LEAF(memset)
 1:
 #ifndef CONFIG_EVA
 FEXPORT(__bzero)
+#else
+FEXPORT(__bzero_kernel)
 #endif
 	__BUILD_BZERO LEGACY_MODE
 

+ 0 - 1
arch/mips/pci/pci-rt2880.c

@@ -221,7 +221,6 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 static int rt288x_pci_probe(struct platform_device *pdev)
 {
 	void __iomem *io_map_base;
-	int i;
 
 	rt2880_pci_base = ioremap_nocache(RT2880_PCI_BASE, PAGE_SIZE);
 

+ 0 - 1
arch/mips/pmcs-msp71xx/msp_setup.c

@@ -39,7 +39,6 @@ extern void msp_serial_setup(void);
 void msp7120_reset(void)
 {
 	void *start, *end, *iptr;
-	register int i;
 
 	/* Diasble all interrupts */
 	local_irq_disable();

+ 1 - 1
arch/mips/sni/reset.c

@@ -26,7 +26,7 @@ static inline void kb_wait(void)
 /* XXX This ends up at the ARC firmware prompt ...  */
 void sni_machine_restart(char *command)
 {
-	int i, j;
+	int i;
 
 	/* This does a normal via the keyboard controller like a PC.
 	   We can do that easier ...  */

+ 2 - 2
arch/mips/vdso/Makefile

@@ -26,8 +26,8 @@ aflags-vdso := $(ccflags-vdso) \
 # the comments on that file.
 #
 ifndef CONFIG_CPU_MIPSR6
-  ifeq ($(call ld-ifversion, -gt, 22400000, y),)
-    $(warning MIPS VDSO requires binutils > 2.24)
+  ifeq ($(call ld-ifversion, -lt, 22500000, y),)
+    $(warning MIPS VDSO requires binutils >= 2.25)
     obj-vdso-y := $(filter-out gettimeofday.o, $(obj-vdso-y))
     ccflags-vdso += -DDISABLE_MIPS_VDSO
   endif

+ 52 - 12
arch/parisc/kernel/signal.c

@@ -435,6 +435,55 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs, int in_syscall)
 		regs->gr[28]);
 }
 
+/*
+ * Check how the syscall number gets loaded into %r20 within
+ * the delay branch in userspace and adjust as needed.
+ */
+
+static void check_syscallno_in_delay_branch(struct pt_regs *regs)
+{
+	u32 opcode, source_reg;
+	u32 __user *uaddr;
+	int err;
+
+	/* Usually we don't have to restore %r20 (the system call number)
+	 * because it gets loaded in the delay slot of the branch external
+	 * instruction via the ldi instruction.
+	 * In some cases a register-to-register copy instruction might have
+	 * been used instead, in which case we need to copy the syscall
+	 * number into the source register before returning to userspace.
+	 */
+
+	/* A syscall is just a branch, so all we have to do is fiddle the
+	 * return pointer so that the ble instruction gets executed again.
+	 */
+	regs->gr[31] -= 8; /* delayed branching */
+
+	/* Get assembler opcode of code in delay branch */
+	uaddr = (unsigned int *) ((regs->gr[31] & ~3) + 4);
+	err = get_user(opcode, uaddr);
+	if (err)
+		return;
+
+	/* Check if delay branch uses "ldi int,%r20" */
+	if ((opcode & 0xffff0000) == 0x34140000)
+		return;	/* everything ok, just return */
+
+	/* Check if delay branch uses "nop" */
+	if (opcode == INSN_NOP)
+		return;
+
+	/* Check if delay branch uses "copy %rX,%r20" */
+	if ((opcode & 0xffe0ffff) == 0x08000254) {
+		source_reg = (opcode >> 16) & 31;
+		regs->gr[source_reg] = regs->gr[20];
+		return;
+	}
+
+	pr_warn("syscall restart: %s (pid %d): unexpected opcode 0x%08x\n",
+		current->comm, task_pid_nr(current), opcode);
+}
+
 static inline void
 syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
 {
@@ -457,10 +506,7 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
 		}
 		/* fallthrough */
 	case -ERESTARTNOINTR:
-		/* A syscall is just a branch, so all
-		 * we have to do is fiddle the return pointer.
-		 */
-		regs->gr[31] -= 8; /* delayed branching */
+		check_syscallno_in_delay_branch(regs);
 		break;
 	}
 }
@@ -510,15 +556,9 @@ insert_restart_trampoline(struct pt_regs *regs)
 	}
 	case -ERESTARTNOHAND:
 	case -ERESTARTSYS:
-	case -ERESTARTNOINTR: {
-		/* Hooray for delayed branching.  We don't
-		 * have to restore %r20 (the system call
-		 * number) because it gets loaded in the delay
-		 * slot of the branch external instruction.
-		 */
-		regs->gr[31] -= 8;
+	case -ERESTARTNOINTR:
+		check_syscallno_in_delay_branch(regs);
 		return;
-	}
 	default:
 		break;
 	}

+ 12 - 12
arch/powerpc/include/asm/systbl.h

@@ -370,16 +370,16 @@ COMPAT_SYS(execveat)
 PPC64ONLY(switch_endian)
 SYSCALL_SPU(userfaultfd)
 SYSCALL_SPU(membarrier)
-SYSCALL(semop)
-SYSCALL(semget)
-COMPAT_SYS(semctl)
-COMPAT_SYS(semtimedop)
-COMPAT_SYS(msgsnd)
-COMPAT_SYS(msgrcv)
-SYSCALL(msgget)
-COMPAT_SYS(msgctl)
-COMPAT_SYS(shmat)
-SYSCALL(shmdt)
-SYSCALL(shmget)
-COMPAT_SYS(shmctl)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
 SYSCALL(mlock2)

+ 0 - 12
arch/powerpc/include/uapi/asm/unistd.h

@@ -388,18 +388,6 @@
 #define __NR_switch_endian	363
 #define __NR_userfaultfd	364
 #define __NR_membarrier		365
-#define __NR_semop		366
-#define __NR_semget		367
-#define __NR_semctl		368
-#define __NR_semtimedop		369
-#define __NR_msgsnd		370
-#define __NR_msgrcv		371
-#define __NR_msgget		372
-#define __NR_msgctl		373
-#define __NR_shmat		374
-#define __NR_shmdt		375
-#define __NR_shmget		376
-#define __NR_shmctl		377
 #define __NR_mlock2		378
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */

+ 6 - 0
arch/powerpc/kvm/book3s_hv.c

@@ -224,6 +224,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 
 static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
 {
+	/*
+	 * Check for illegal transactional state bit combination
+	 * and if we find it, force the TS field to a safe state.
+	 */
+	if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
+		msr &= ~MSR_TS_MASK;
 	vcpu->arch.shregs.msr = msr;
 	kvmppc_end_cede(vcpu);
 }

+ 13 - 1
arch/powerpc/platforms/powernv/opal-irqchip.c

@@ -83,7 +83,19 @@ static void opal_event_unmask(struct irq_data *d)
 	set_bit(d->hwirq, &opal_event_irqchip.mask);
 
 	opal_poll_events(&events);
-	opal_handle_events(be64_to_cpu(events));
+	last_outstanding_events = be64_to_cpu(events);
+
+	/*
+	 * We can't just handle the events now with opal_handle_events().
+	 * If we did we would deadlock when opal_event_unmask() is called from
+	 * handle_level_irq() with the irq descriptor lock held, because
+	 * calling opal_handle_events() would call generic_handle_irq() and
+	 * then handle_level_irq() which would try to take the descriptor lock
+	 * again. Instead queue the events for later.
+	 */
+	if (last_outstanding_events & opal_event_irqchip.mask)
+		/* Need to retrigger the interrupt */
+		irq_work_queue(&opal_event_irq_work);
 }
 
 static int opal_event_set_type(struct irq_data *d, unsigned int flow_type)

+ 1 - 1
arch/powerpc/platforms/powernv/opal.c

@@ -278,7 +278,7 @@ static void opal_handle_message(void)
 
 	/* Sanity check */
 	if (type >= OPAL_MSG_TYPE_MAX) {
-		pr_warning("%s: Unknown message type: %u\n", __func__, type);
+		pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
 		return;
 	}
 	opal_message_do_notify(type, (void *)&msg);

+ 12 - 5
arch/s390/kernel/dis.c

@@ -1920,16 +1920,23 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
 			}
 			if (separator)
 				ptr += sprintf(ptr, "%c", separator);
+			/*
+			 * Use four '%' characters below because of the
+			 * following two conversions:
+			 *
+			 *  1) sprintf: %%%%r -> %%r
+			 *  2) printk : %%r   -> %r
+			 */
 			if (operand->flags & OPERAND_GPR)
-				ptr += sprintf(ptr, "%%r%i", value);
+				ptr += sprintf(ptr, "%%%%r%i", value);
 			else if (operand->flags & OPERAND_FPR)
-				ptr += sprintf(ptr, "%%f%i", value);
+				ptr += sprintf(ptr, "%%%%f%i", value);
 			else if (operand->flags & OPERAND_AR)
-				ptr += sprintf(ptr, "%%a%i", value);
+				ptr += sprintf(ptr, "%%%%a%i", value);
 			else if (operand->flags & OPERAND_CR)
-				ptr += sprintf(ptr, "%%c%i", value);
+				ptr += sprintf(ptr, "%%%%c%i", value);
 			else if (operand->flags & OPERAND_VR)
-				ptr += sprintf(ptr, "%%v%i", value);
+				ptr += sprintf(ptr, "%%%%v%i", value);
 			else if (operand->flags & OPERAND_PCREL)
 				ptr += sprintf(ptr, "%lx", (signed int) value
 								      + addr);

+ 1 - 0
arch/sparc/include/asm/elf_64.h

@@ -95,6 +95,7 @@
  * really available.  So we simply advertise only "crypto" support.
  */
 #define HWCAP_SPARC_CRYPTO	0x04000000 /* CRYPTO insns available */
+#define HWCAP_SPARC_ADI		0x08000000 /* ADI available */
 
 #define CORE_DUMP_USE_REGSET
 

+ 6 - 1
arch/sparc/include/uapi/asm/unistd.h

@@ -417,8 +417,13 @@
 #define __NR_bpf		349
 #define __NR_execveat		350
 #define __NR_membarrier		351
+#define __NR_userfaultfd	352
+#define __NR_bind		353
+#define __NR_listen		354
+#define __NR_setsockopt		355
+#define __NR_mlock2		356
 
-#define NR_syscalls		352
+#define NR_syscalls		357
 
 /* Bitmask values returned from kern_features system call.  */
 #define KERN_FEATURE_MIXED_MODE_STACK	0x00000001

+ 13 - 0
arch/sparc/kernel/head_64.S

@@ -946,6 +946,12 @@ ENTRY(__retl_one)
 	 mov	1, %o0
 ENDPROC(__retl_one)
 
+ENTRY(__retl_one_fp)
+	VISExitHalf
+	retl
+	 mov	1, %o0
+ENDPROC(__retl_one_fp)
+
 ENTRY(__ret_one_asi)
 	wr	%g0, ASI_AIUS, %asi
 	ret
@@ -958,6 +964,13 @@ ENTRY(__retl_one_asi)
 	 mov	1, %o0
 ENDPROC(__retl_one_asi)
 
+ENTRY(__retl_one_asi_fp)
+	wr	%g0, ASI_AIUS, %asi
+	VISExitHalf
+	retl
+	 mov	1, %o0
+ENDPROC(__retl_one_asi_fp)
+
 ENTRY(__retl_o1)
 	retl
 	 mov	%o1, %o0

+ 11 - 0
arch/sparc/kernel/perf_event.c

@@ -1828,11 +1828,18 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
 void
 perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
+	u64 saved_fault_address = current_thread_info()->fault_address;
+	u8 saved_fault_code = get_thread_fault_code();
+	mm_segment_t old_fs;
+
 	perf_callchain_store(entry, regs->tpc);
 
 	if (!current->mm)
 		return;
 
+	old_fs = get_fs();
+	set_fs(USER_DS);
+
 	flushw_user();
 
 	pagefault_disable();
@@ -1843,4 +1850,8 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 		perf_callchain_user_64(entry, regs);
 
 	pagefault_enable();
+
+	set_fs(old_fs);
+	set_thread_fault_code(saved_fault_code);
+	current_thread_info()->fault_address = saved_fault_address;
 }

+ 7 - 1
arch/sparc/kernel/rtrap_64.S

@@ -73,7 +73,13 @@ rtrap_nmi:	ldx			[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
 		andn			%l1, %l4, %l1
 		srl			%l4, 20, %l4
 		ba,pt			%xcc, rtrap_no_irq_enable
-		 wrpr			%l4, %pil
+		nop
+		/* Do not actually set the %pil here.  We will do that
+		 * below after we clear PSTATE_IE in the %pstate register.
+		 * If we re-enable interrupts here, we can recurse down
+		 * the hardirq stack potentially endlessly, causing a
+		 * stack overflow.
+		 */
 
 		.align			64
 		.globl			rtrap_irq, rtrap, irqsz_patchme, rtrap_xcall

+ 5 - 4
arch/sparc/kernel/setup_64.c

@@ -380,7 +380,8 @@ static const char *hwcaps[] = {
 	 */
 	"mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2",
 	"ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau",
-	"ima", "cspare", "pause", "cbcond",
+	"ima", "cspare", "pause", "cbcond", NULL /*reserved for crypto */,
+	"adp",
 };
 
 static const char *crypto_hwcaps[] = {
@@ -396,7 +397,7 @@ void cpucap_info(struct seq_file *m)
 	seq_puts(m, "cpucaps\t\t: ");
 	for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
 		unsigned long bit = 1UL << i;
-		if (caps & bit) {
+		if (hwcaps[i] && (caps & bit)) {
 			seq_printf(m, "%s%s",
 				   printed ? "," : "", hwcaps[i]);
 			printed++;
@@ -450,7 +451,7 @@ static void __init report_hwcaps(unsigned long caps)
 
 	for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
 		unsigned long bit = 1UL << i;
-		if (caps & bit)
+		if (hwcaps[i] && (caps & bit))
 			report_one_hwcap(&printed, hwcaps[i]);
 	}
 	if (caps & HWCAP_SPARC_CRYPTO)
@@ -485,7 +486,7 @@ static unsigned long __init mdesc_cpu_hwcap_list(void)
 		for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
 			unsigned long bit = 1UL << i;
 
-			if (!strcmp(prop, hwcaps[i])) {
+			if (hwcaps[i] && !strcmp(prop, hwcaps[i])) {
 				caps |= bit;
 				break;
 			}

+ 10 - 9
arch/sparc/kernel/systbls_32.S

@@ -35,18 +35,18 @@ sys_call_table:
 /*80*/	.long sys_setgroups16, sys_getpgrp, sys_setgroups, sys_setitimer, sys_ftruncate64
 /*85*/	.long sys_swapon, sys_getitimer, sys_setuid, sys_sethostname, sys_setgid
 /*90*/	.long sys_dup2, sys_setfsuid, sys_fcntl, sys_select, sys_setfsgid
-/*95*/	.long sys_fsync, sys_setpriority, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
+/*95*/	.long sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept
 /*100*/	.long sys_getpriority, sys_rt_sigreturn, sys_rt_sigaction, sys_rt_sigprocmask, sys_rt_sigpending
 /*105*/	.long sys_rt_sigtimedwait, sys_rt_sigqueueinfo, sys_rt_sigsuspend, sys_setresuid, sys_getresuid
-/*110*/	.long sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall
-/*115*/	.long sys_getgroups, sys_gettimeofday, sys_getrusage, sys_nis_syscall, sys_getcwd
+/*110*/	.long sys_setresgid, sys_getresgid, sys_setregid, sys_recvmsg, sys_sendmsg
+/*115*/	.long sys_getgroups, sys_gettimeofday, sys_getrusage, sys_getsockopt, sys_getcwd
 /*120*/	.long sys_readv, sys_writev, sys_settimeofday, sys_fchown16, sys_fchmod
-/*125*/	.long sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate
-/*130*/	.long sys_ftruncate, sys_flock, sys_lstat64, sys_nis_syscall, sys_nis_syscall
-/*135*/	.long sys_nis_syscall, sys_mkdir, sys_rmdir, sys_utimes, sys_stat64
-/*140*/	.long sys_sendfile64, sys_nis_syscall, sys_futex, sys_gettid, sys_getrlimit
+/*125*/	.long sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate
+/*130*/	.long sys_ftruncate, sys_flock, sys_lstat64, sys_sendto, sys_shutdown
+/*135*/	.long sys_socketpair, sys_mkdir, sys_rmdir, sys_utimes, sys_stat64
+/*140*/	.long sys_sendfile64, sys_getpeername, sys_futex, sys_gettid, sys_getrlimit
 /*145*/	.long sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
-/*150*/	.long sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
+/*150*/	.long sys_getsockname, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
 /*155*/	.long sys_fcntl64, sys_inotify_rm_watch, sys_statfs, sys_fstatfs, sys_oldumount
 /*160*/	.long sys_sched_setaffinity, sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_nis_syscall
 /*165*/	.long sys_quotactl, sys_set_tid_address, sys_mount, sys_ustat, sys_setxattr
@@ -87,4 +87,5 @@ sys_call_table:
 /*335*/	.long sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
 /*340*/	.long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
 /*345*/	.long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
-/*350*/	.long sys_execveat, sys_membarrier
+/*350*/	.long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
+/*355*/	.long sys_setsockopt, sys_mlock2

+ 10 - 8
arch/sparc/kernel/systbls_64.S

@@ -37,15 +37,15 @@ sys_call_table32:
 /*80*/	.word sys_setgroups16, sys_getpgrp, sys_setgroups, compat_sys_setitimer, sys32_ftruncate64
 	.word sys_swapon, compat_sys_getitimer, sys_setuid, sys_sethostname, sys_setgid
 /*90*/	.word sys_dup2, sys_setfsuid, compat_sys_fcntl, sys32_select, sys_setfsgid
-	.word sys_fsync, sys_setpriority, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
+	.word sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept
 /*100*/ .word sys_getpriority, sys32_rt_sigreturn, compat_sys_rt_sigaction, compat_sys_rt_sigprocmask, compat_sys_rt_sigpending
 	.word compat_sys_rt_sigtimedwait, compat_sys_rt_sigqueueinfo, compat_sys_rt_sigsuspend, sys_setresuid, sys_getresuid
-/*110*/	.word sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall
-	.word sys_getgroups, compat_sys_gettimeofday, compat_sys_getrusage, sys_nis_syscall, sys_getcwd
+/*110*/	.word sys_setresgid, sys_getresgid, sys_setregid, compat_sys_recvmsg, compat_sys_sendmsg
+	.word sys_getgroups, compat_sys_gettimeofday, compat_sys_getrusage, compat_sys_getsockopt, sys_getcwd
 /*120*/	.word compat_sys_readv, compat_sys_writev, compat_sys_settimeofday, sys_fchown16, sys_fchmod
-	.word sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate
-/*130*/	.word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall
-	.word sys_nis_syscall, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
+	.word sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate
+/*130*/	.word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_sendto, sys_shutdown
+	.word sys_socketpair, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
 /*140*/	.word sys_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit
 	.word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
 /*150*/	.word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
@@ -88,7 +88,8 @@ sys_call_table32:
 	.word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev
 /*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
 	.word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
-/*350*/	.word sys32_execveat, sys_membarrier
+/*350*/	.word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
+	.word compat_sys_setsockopt, sys_mlock2
 
 #endif /* CONFIG_COMPAT */
 
@@ -168,4 +169,5 @@ sys_call_table:
 	.word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
 /*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
 	.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
-/*350*/	.word sys64_execveat, sys_membarrier
+/*350*/	.word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
+	.word sys_setsockopt, sys_mlock2

+ 8 - 0
arch/sparc/lib/NG2copy_from_user.S

@@ -11,6 +11,14 @@
 	.text;			\
 	.align 4;
 
+#define EX_LD_FP(x)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, __retl_one_asi_fp;\
+	.text;			\
+	.align 4;
+
 #ifndef ASI_AIUS
 #define ASI_AIUS	0x11
 #endif

+ 8 - 0
arch/sparc/lib/NG2copy_to_user.S

@@ -11,6 +11,14 @@
 	.text;			\
 	.align 4;
 
+#define EX_ST_FP(x)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, __retl_one_asi_fp;\
+	.text;			\
+	.align 4;
+
 #ifndef ASI_AIUS
 #define ASI_AIUS	0x11
 #endif

+ 62 - 56
arch/sparc/lib/NG2memcpy.S

@@ -34,10 +34,16 @@
 #ifndef EX_LD
 #define EX_LD(x)	x
 #endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x)	x
+#endif
 
 #ifndef EX_ST
 #define EX_ST(x)	x
 #endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x)	x
+#endif
 
 #ifndef EX_RETVAL
 #define EX_RETVAL(x)	x
@@ -134,40 +140,40 @@
 	fsrc2		%x6, %f12; \
 	fsrc2		%x7, %f14;
 #define FREG_LOAD_1(base, x0) \
-	EX_LD(LOAD(ldd, base + 0x00, %x0))
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
 #define FREG_LOAD_2(base, x0, x1) \
-	EX_LD(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD(LOAD(ldd, base + 0x08, %x1));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
 #define FREG_LOAD_3(base, x0, x1, x2) \
-	EX_LD(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD(LOAD(ldd, base + 0x10, %x2));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
 #define FREG_LOAD_4(base, x0, x1, x2, x3) \
-	EX_LD(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD(LOAD(ldd, base + 0x10, %x2)); \
-	EX_LD(LOAD(ldd, base + 0x18, %x3));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
 #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
-	EX_LD(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD(LOAD(ldd, base + 0x10, %x2)); \
-	EX_LD(LOAD(ldd, base + 0x18, %x3)); \
-	EX_LD(LOAD(ldd, base + 0x20, %x4));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
+	EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
 #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
-	EX_LD(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD(LOAD(ldd, base + 0x10, %x2)); \
-	EX_LD(LOAD(ldd, base + 0x18, %x3)); \
-	EX_LD(LOAD(ldd, base + 0x20, %x4)); \
-	EX_LD(LOAD(ldd, base + 0x28, %x5));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
+	EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
+	EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
 #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
-	EX_LD(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD(LOAD(ldd, base + 0x10, %x2)); \
-	EX_LD(LOAD(ldd, base + 0x18, %x3)); \
-	EX_LD(LOAD(ldd, base + 0x20, %x4)); \
-	EX_LD(LOAD(ldd, base + 0x28, %x5)); \
-	EX_LD(LOAD(ldd, base + 0x30, %x6));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
+	EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
+	EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
+	EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
 
 	.register	%g2,#scratch
 	.register	%g3,#scratch
@@ -275,11 +281,11 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	 nop
 	/* fall through for 0 < low bits < 8 */
 110:	sub		%o4, 64, %g2
-	EX_LD(LOAD_BLK(%g2, %f0))
-1:	EX_ST(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD(LOAD_BLK(%o4, %f16))
+	EX_LD_FP(LOAD_BLK(%g2, %f0))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+	EX_LD_FP(LOAD_BLK(%o4, %f16))
 	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
-	EX_ST(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
 	FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -290,10 +296,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 120:	sub		%o4, 56, %g2
 	FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
-1:	EX_ST(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+	EX_LD_FP(LOAD_BLK(%o4, %f16))
 	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
-	EX_ST(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
 	FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -304,10 +310,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 130:	sub		%o4, 48, %g2
 	FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
-1:	EX_ST(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+	EX_LD_FP(LOAD_BLK(%o4, %f16))
 	FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
-	EX_ST(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
 	FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -318,10 +324,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 140:	sub		%o4, 40, %g2
 	FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
-1:	EX_ST(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+	EX_LD_FP(LOAD_BLK(%o4, %f16))
 	FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
-	EX_ST(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
 	FREG_MOVE_5(f22, f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -332,10 +338,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 150:	sub		%o4, 32, %g2
 	FREG_LOAD_4(%g2, f0, f2, f4, f6)
-1:	EX_ST(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+	EX_LD_FP(LOAD_BLK(%o4, %f16))
 	FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
-	EX_ST(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
 	FREG_MOVE_4(f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -346,10 +352,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 160:	sub		%o4, 24, %g2
 	FREG_LOAD_3(%g2, f0, f2, f4)
-1:	EX_ST(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+	EX_LD_FP(LOAD_BLK(%o4, %f16))
 	FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
-	EX_ST(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
 	FREG_MOVE_3(f26, f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -360,10 +366,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 170:	sub		%o4, 16, %g2
 	FREG_LOAD_2(%g2, f0, f2)
-1:	EX_ST(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+	EX_LD_FP(LOAD_BLK(%o4, %f16))
 	FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
-	EX_ST(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
 	FREG_MOVE_2(f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -374,10 +380,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 180:	sub		%o4, 8, %g2
 	FREG_LOAD_1(%g2, f0)
-1:	EX_ST(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+	EX_LD_FP(LOAD_BLK(%o4, %f16))
 	FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
-	EX_ST(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
 	FREG_MOVE_1(f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -387,10 +393,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	 nop
 
 190:
-1:	EX_ST(STORE_INIT(%g0, %o4 + %g3))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
 	subcc		%g1, 64, %g1
-	EX_LD(LOAD_BLK(%o4, %f0))
-	EX_ST(STORE_BLK(%f0, %o4 + %g3))
+	EX_LD_FP(LOAD_BLK(%o4, %f0))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
 	add		%o4, 64, %o4
 	bne,pt		%xcc, 1b
 	 LOAD(prefetch, %o4 + 64, #one_read)

+ 8 - 0
arch/sparc/lib/NG4copy_from_user.S

@@ -11,6 +11,14 @@
 	.text;			\
 	.align 4;
 
+#define EX_LD_FP(x)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, __retl_one_asi_fp;\
+	.text;			\
+	.align 4;
+
 #ifndef ASI_AIUS
 #define ASI_AIUS	0x11
 #endif

+ 8 - 0
arch/sparc/lib/NG4copy_to_user.S

@@ -11,6 +11,14 @@
 	.text;			\
 	.align 4;
 
+#define EX_ST_FP(x)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, __retl_one_asi_fp;\
+	.text;			\
+	.align 4;
+
 #ifndef ASI_AIUS
 #define ASI_AIUS	0x11
 #endif

+ 23 - 17
arch/sparc/lib/NG4memcpy.S

@@ -48,10 +48,16 @@
 #ifndef EX_LD
 #define EX_LD(x)	x
 #endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x)	x
+#endif
 
 #ifndef EX_ST
 #define EX_ST(x)	x
 #endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x)	x
+#endif
 
 #ifndef EX_RETVAL
 #define EX_RETVAL(x)	x
@@ -210,17 +216,17 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	sub		%o2, %o4, %o2
 	alignaddr	%o1, %g0, %g1
 	add		%o1, %o4, %o1
-	EX_LD(LOAD(ldd, %g1 + 0x00, %f0))
-1:	EX_LD(LOAD(ldd, %g1 + 0x08, %f2))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
+1:	EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
 	subcc		%o4, 0x40, %o4
-	EX_LD(LOAD(ldd, %g1 + 0x10, %f4))
-	EX_LD(LOAD(ldd, %g1 + 0x18, %f6))
-	EX_LD(LOAD(ldd, %g1 + 0x20, %f8))
-	EX_LD(LOAD(ldd, %g1 + 0x28, %f10))
-	EX_LD(LOAD(ldd, %g1 + 0x30, %f12))
-	EX_LD(LOAD(ldd, %g1 + 0x38, %f14))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
 	faligndata	%f0, %f2, %f16
-	EX_LD(LOAD(ldd, %g1 + 0x40, %f0))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
 	faligndata	%f2, %f4, %f18
 	add		%g1, 0x40, %g1
 	faligndata	%f4, %f6, %f20
@@ -229,14 +235,14 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	faligndata	%f10, %f12, %f26
 	faligndata	%f12, %f14, %f28
 	faligndata	%f14, %f0, %f30
-	EX_ST(STORE(std, %f16, %o0 + 0x00))
-	EX_ST(STORE(std, %f18, %o0 + 0x08))
-	EX_ST(STORE(std, %f20, %o0 + 0x10))
-	EX_ST(STORE(std, %f22, %o0 + 0x18))
-	EX_ST(STORE(std, %f24, %o0 + 0x20))
-	EX_ST(STORE(std, %f26, %o0 + 0x28))
-	EX_ST(STORE(std, %f28, %o0 + 0x30))
-	EX_ST(STORE(std, %f30, %o0 + 0x38))
+	EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
+	EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
+	EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
+	EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
+	EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
+	EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
+	EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
+	EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
 	add		%o0, 0x40, %o0
 	bne,pt		%icc, 1b
 	 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)

+ 8 - 0
arch/sparc/lib/U1copy_from_user.S

@@ -11,6 +11,14 @@
 	.text;			\
 	.align 4;
 
+#define EX_LD_FP(x)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, __retl_one_fp;\
+	.text;			\
+	.align 4;
+
 #define FUNC_NAME		___copy_from_user
 #define LOAD(type,addr,dest)	type##a [addr] %asi, dest
 #define LOAD_BLK(addr,dest)	ldda [addr] ASI_BLK_AIUS, dest

+ 8 - 0
arch/sparc/lib/U1copy_to_user.S

@@ -11,6 +11,14 @@
 	.text;			\
 	.align 4;
 
+#define EX_ST_FP(x)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, __retl_one_fp;\
+	.text;			\
+	.align 4;
+
 #define FUNC_NAME		___copy_to_user
 #define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
 #define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_AIUS

+ 27 - 21
arch/sparc/lib/U1memcpy.S

@@ -25,10 +25,16 @@
 #ifndef EX_LD
 #define EX_LD(x)	x
 #endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x)	x
+#endif
 
 #ifndef EX_ST
 #define EX_ST(x)	x
 #endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x)	x
+#endif
 
 #ifndef EX_RETVAL
 #define EX_RETVAL(x)	x
@@ -73,8 +79,8 @@
 	faligndata		%f8, %f9, %f62;
 
 #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)	\
-	EX_LD(LOAD_BLK(%src, %fdest));				\
-	EX_ST(STORE_BLK(%fsrc, %dest));				\
+	EX_LD_FP(LOAD_BLK(%src, %fdest));				\
+	EX_ST_FP(STORE_BLK(%fsrc, %dest));				\
 	add			%src, 0x40, %src;		\
 	subcc			%len, 0x40, %len;		\
 	be,pn			%xcc, jmptgt;			\
@@ -89,12 +95,12 @@
 
 #define DO_SYNC			membar	#Sync;
 #define STORE_SYNC(dest, fsrc)				\
-	EX_ST(STORE_BLK(%fsrc, %dest));			\
+	EX_ST_FP(STORE_BLK(%fsrc, %dest));			\
 	add			%dest, 0x40, %dest;	\
 	DO_SYNC
 
 #define STORE_JUMP(dest, fsrc, target)			\
-	EX_ST(STORE_BLK(%fsrc, %dest));			\
+	EX_ST_FP(STORE_BLK(%fsrc, %dest));			\
 	add			%dest, 0x40, %dest;	\
 	ba,pt			%xcc, target;		\
 	 nop;
@@ -103,7 +109,7 @@
 	subcc			%left, 8, %left;\
 	bl,pn			%xcc, 95f;	\
 	 faligndata		%f0, %f1, %f48;	\
-	EX_ST(STORE(std, %f48, %dest));		\
+	EX_ST_FP(STORE(std, %f48, %dest));		\
 	add			%dest, 8, %dest;
 
 #define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)	\
@@ -160,8 +166,8 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	 and		%g2, 0x38, %g2
 
 1:	subcc		%g1, 0x1, %g1
-	EX_LD(LOAD(ldub, %o1 + 0x00, %o3))
-	EX_ST(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
+	EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
+	EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
 	bgu,pt		%XCC, 1b
 	 add		%o1, 0x1, %o1
 
@@ -172,20 +178,20 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	be,pt		%icc, 3f
 	 alignaddr	%o1, %g0, %o1
 
-	EX_LD(LOAD(ldd, %o1, %f4))
-1:	EX_LD(LOAD(ldd, %o1 + 0x8, %f6))
+	EX_LD_FP(LOAD(ldd, %o1, %f4))
+1:	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
 	add		%o1, 0x8, %o1
 	subcc		%g2, 0x8, %g2
 	faligndata	%f4, %f6, %f0
-	EX_ST(STORE(std, %f0, %o0))
+	EX_ST_FP(STORE(std, %f0, %o0))
 	be,pn		%icc, 3f
 	 add		%o0, 0x8, %o0
 
-	EX_LD(LOAD(ldd, %o1 + 0x8, %f4))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
 	add		%o1, 0x8, %o1
 	subcc		%g2, 0x8, %g2
 	faligndata	%f6, %f4, %f0
-	EX_ST(STORE(std, %f0, %o0))
+	EX_ST_FP(STORE(std, %f0, %o0))
 	bne,pt		%icc, 1b
 	 add		%o0, 0x8, %o0
 
@@ -208,13 +214,13 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	add		%g1, %GLOBAL_SPARE, %g1
 	subcc		%o2, %g3, %o2
 
-	EX_LD(LOAD_BLK(%o1, %f0))
+	EX_LD_FP(LOAD_BLK(%o1, %f0))
 	add		%o1, 0x40, %o1
 	add		%g1, %g3, %g1
-	EX_LD(LOAD_BLK(%o1, %f16))
+	EX_LD_FP(LOAD_BLK(%o1, %f16))
 	add		%o1, 0x40, %o1
 	sub		%GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
-	EX_LD(LOAD_BLK(%o1, %f32))
+	EX_LD_FP(LOAD_BLK(%o1, %f32))
 	add		%o1, 0x40, %o1
 
 	/* There are 8 instances of the unrolled loop,
@@ -426,28 +432,28 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 62:	FINISH_VISCHUNK(o0, f44, f46, g3)
 63:	UNEVEN_VISCHUNK_LAST(o0, f46, f0,  g3)
 
-93:	EX_LD(LOAD(ldd, %o1, %f2))
+93:	EX_LD_FP(LOAD(ldd, %o1, %f2))
 	add		%o1, 8, %o1
 	subcc		%g3, 8, %g3
 	faligndata	%f0, %f2, %f8
-	EX_ST(STORE(std, %f8, %o0))
+	EX_ST_FP(STORE(std, %f8, %o0))
 	bl,pn		%xcc, 95f
 	 add		%o0, 8, %o0
-	EX_LD(LOAD(ldd, %o1, %f0))
+	EX_LD_FP(LOAD(ldd, %o1, %f0))
 	add		%o1, 8, %o1
 	subcc		%g3, 8, %g3
 	faligndata	%f2, %f0, %f8
-	EX_ST(STORE(std, %f8, %o0))
+	EX_ST_FP(STORE(std, %f8, %o0))
 	bge,pt		%xcc, 93b
 	 add		%o0, 8, %o0
 
 95:	brz,pt		%o2, 2f
 	 mov		%g1, %o1
 
-1:	EX_LD(LOAD(ldub, %o1, %o3))
+1:	EX_LD_FP(LOAD(ldub, %o1, %o3))
 	add		%o1, 1, %o1
 	subcc		%o2, 1, %o2
-	EX_ST(STORE(stb, %o3, %o0))
+	EX_ST_FP(STORE(stb, %o3, %o0))
 	bne,pt		%xcc, 1b
 	 add		%o0, 1, %o0
 

+ 8 - 0
arch/sparc/lib/U3copy_from_user.S

@@ -11,6 +11,14 @@
 	.text;			\
 	.align 4;
 
+#define EX_LD_FP(x)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, __retl_one_fp;\
+	.text;			\
+	.align 4;
+
 #define FUNC_NAME		U3copy_from_user
 #define LOAD(type,addr,dest)	type##a [addr] %asi, dest
 #define EX_RETVAL(x)		0

+ 8 - 0
arch/sparc/lib/U3copy_to_user.S

@@ -11,6 +11,14 @@
 	.text;			\
 	.align 4;
 
+#define EX_ST_FP(x)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, __retl_one_fp;\
+	.text;			\
+	.align 4;
+
 #define FUNC_NAME		U3copy_to_user
 #define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
 #define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_AIUS

+ 46 - 40
arch/sparc/lib/U3memcpy.S

@@ -24,10 +24,16 @@
 #ifndef EX_LD
 #define EX_LD(x)	x
 #endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x)	x
+#endif
 
 #ifndef EX_ST
 #define EX_ST(x)	x
 #endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x)	x
+#endif
 
 #ifndef EX_RETVAL
 #define EX_RETVAL(x)	x
@@ -120,8 +126,8 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	 and		%g2, 0x38, %g2
 
 1:	subcc		%g1, 0x1, %g1
-	EX_LD(LOAD(ldub, %o1 + 0x00, %o3))
-	EX_ST(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
+	EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
+	EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
 	bgu,pt		%XCC, 1b
 	 add		%o1, 0x1, %o1
 
@@ -132,20 +138,20 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	be,pt		%icc, 3f
 	 alignaddr	%o1, %g0, %o1
 
-	EX_LD(LOAD(ldd, %o1, %f4))
-1:	EX_LD(LOAD(ldd, %o1 + 0x8, %f6))
+	EX_LD_FP(LOAD(ldd, %o1, %f4))
+1:	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
 	add		%o1, 0x8, %o1
 	subcc		%g2, 0x8, %g2
 	faligndata	%f4, %f6, %f0
-	EX_ST(STORE(std, %f0, %o0))
+	EX_ST_FP(STORE(std, %f0, %o0))
 	be,pn		%icc, 3f
 	 add		%o0, 0x8, %o0
 
-	EX_LD(LOAD(ldd, %o1 + 0x8, %f4))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
 	add		%o1, 0x8, %o1
 	subcc		%g2, 0x8, %g2
 	faligndata	%f6, %f4, %f2
-	EX_ST(STORE(std, %f2, %o0))
+	EX_ST_FP(STORE(std, %f2, %o0))
 	bne,pt		%icc, 1b
 	 add		%o0, 0x8, %o0
 
@@ -155,25 +161,25 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	LOAD(prefetch, %o1 + 0x080, #one_read)
 	LOAD(prefetch, %o1 + 0x0c0, #one_read)
 	LOAD(prefetch, %o1 + 0x100, #one_read)
-	EX_LD(LOAD(ldd, %o1 + 0x000, %f0))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0))
 	LOAD(prefetch, %o1 + 0x140, #one_read)
-	EX_LD(LOAD(ldd, %o1 + 0x008, %f2))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
 	LOAD(prefetch, %o1 + 0x180, #one_read)
-	EX_LD(LOAD(ldd, %o1 + 0x010, %f4))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
 	LOAD(prefetch, %o1 + 0x1c0, #one_read)
 	faligndata	%f0, %f2, %f16
-	EX_LD(LOAD(ldd, %o1 + 0x018, %f6))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
 	faligndata	%f2, %f4, %f18
-	EX_LD(LOAD(ldd, %o1 + 0x020, %f8))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
 	faligndata	%f4, %f6, %f20
-	EX_LD(LOAD(ldd, %o1 + 0x028, %f10))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
 	faligndata	%f6, %f8, %f22
 
-	EX_LD(LOAD(ldd, %o1 + 0x030, %f12))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
 	faligndata	%f8, %f10, %f24
-	EX_LD(LOAD(ldd, %o1 + 0x038, %f14))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
 	faligndata	%f10, %f12, %f26
-	EX_LD(LOAD(ldd, %o1 + 0x040, %f0))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
 
 	subcc		GLOBAL_SPARE, 0x80, GLOBAL_SPARE
 	add		%o1, 0x40, %o1
@@ -184,26 +190,26 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 	.align		64
 1:
-	EX_LD(LOAD(ldd, %o1 + 0x008, %f2))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
 	faligndata	%f12, %f14, %f28
-	EX_LD(LOAD(ldd, %o1 + 0x010, %f4))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
 	faligndata	%f14, %f0, %f30
-	EX_ST(STORE_BLK(%f16, %o0))
-	EX_LD(LOAD(ldd, %o1 + 0x018, %f6))
+	EX_ST_FP(STORE_BLK(%f16, %o0))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
 	faligndata	%f0, %f2, %f16
 	add		%o0, 0x40, %o0
 
-	EX_LD(LOAD(ldd, %o1 + 0x020, %f8))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
 	faligndata	%f2, %f4, %f18
-	EX_LD(LOAD(ldd, %o1 + 0x028, %f10))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
 	faligndata	%f4, %f6, %f20
-	EX_LD(LOAD(ldd, %o1 + 0x030, %f12))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
 	subcc		%o3, 0x01, %o3
 	faligndata	%f6, %f8, %f22
-	EX_LD(LOAD(ldd, %o1 + 0x038, %f14))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
 
 	faligndata	%f8, %f10, %f24
-	EX_LD(LOAD(ldd, %o1 + 0x040, %f0))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
 	LOAD(prefetch, %o1 + 0x1c0, #one_read)
 	faligndata	%f10, %f12, %f26
 	bg,pt		%XCC, 1b
@@ -211,29 +217,29 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 	/* Finally we copy the last full 64-byte block. */
 2:
-	EX_LD(LOAD(ldd, %o1 + 0x008, %f2))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
 	faligndata	%f12, %f14, %f28
-	EX_LD(LOAD(ldd, %o1 + 0x010, %f4))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
 	faligndata	%f14, %f0, %f30
-	EX_ST(STORE_BLK(%f16, %o0))
-	EX_LD(LOAD(ldd, %o1 + 0x018, %f6))
+	EX_ST_FP(STORE_BLK(%f16, %o0))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
 	faligndata	%f0, %f2, %f16
-	EX_LD(LOAD(ldd, %o1 + 0x020, %f8))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
 	faligndata	%f2, %f4, %f18
-	EX_LD(LOAD(ldd, %o1 + 0x028, %f10))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
 	faligndata	%f4, %f6, %f20
-	EX_LD(LOAD(ldd, %o1 + 0x030, %f12))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
 	faligndata	%f6, %f8, %f22
-	EX_LD(LOAD(ldd, %o1 + 0x038, %f14))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
 	faligndata	%f8, %f10, %f24
 	cmp		%g1, 0
 	be,pt		%XCC, 1f
 	 add		%o0, 0x40, %o0
-	EX_LD(LOAD(ldd, %o1 + 0x040, %f0))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
 1:	faligndata	%f10, %f12, %f26
 	faligndata	%f12, %f14, %f28
 	faligndata	%f14, %f0, %f30
-	EX_ST(STORE_BLK(%f16, %o0))
+	EX_ST_FP(STORE_BLK(%f16, %o0))
 	add		%o0, 0x40, %o0
 	add		%o1, 0x40, %o1
 	membar		#Sync
@@ -253,20 +259,20 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 	sub		%o2, %g2, %o2
 	be,a,pt		%XCC, 1f
-	 EX_LD(LOAD(ldd, %o1 + 0x00, %f0))
+	 EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0))
 
-1:	EX_LD(LOAD(ldd, %o1 + 0x08, %f2))
+1:	EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2))
 	add		%o1, 0x8, %o1
 	subcc		%g2, 0x8, %g2
 	faligndata	%f0, %f2, %f8
-	EX_ST(STORE(std, %f8, %o0))
+	EX_ST_FP(STORE(std, %f8, %o0))
 	be,pn		%XCC, 2f
 	 add		%o0, 0x8, %o0
-	EX_LD(LOAD(ldd, %o1 + 0x08, %f0))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0))
 	add		%o1, 0x8, %o1
 	subcc		%g2, 0x8, %g2
 	faligndata	%f2, %f0, %f8
-	EX_ST(STORE(std, %f8, %o0))
+	EX_ST_FP(STORE(std, %f8, %o0))
 	bne,pn		%XCC, 1b
 	 add		%o0, 0x8, %o0
 

+ 8 - 0
arch/x86/kvm/cpuid.h

@@ -38,6 +38,14 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
 	return best && (best->ecx & bit(X86_FEATURE_XSAVE));
 }
 
+static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	return best && (best->edx & bit(X86_FEATURE_MTRR));
+}
+
 static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;

+ 19 - 6
arch/x86/kvm/mtrr.c

@@ -120,14 +120,22 @@ static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state)
 	return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK;
 }
 
-static u8 mtrr_disabled_type(void)
+static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu)
 {
 	/*
 	 * Intel SDM 11.11.2.2: all MTRRs are disabled when
 	 * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC
 	 * memory type is applied to all of physical memory.
+	 *
+	 * However, virtual machines can be run with CPUID such that
+	 * there are no MTRRs.  In that case, the firmware will never
+	 * enable MTRRs and it is obviously undesirable to run the
+	 * guest entirely with UC memory and we use WB.
 	 */
-	return MTRR_TYPE_UNCACHABLE;
+	if (guest_cpuid_has_mtrr(vcpu))
+		return MTRR_TYPE_UNCACHABLE;
+	else
+		return MTRR_TYPE_WRBACK;
 }
 
 /*
@@ -267,7 +275,7 @@ static int fixed_mtrr_addr_to_seg(u64 addr)
 
 	for (seg = 0; seg < seg_num; seg++) {
 		mtrr_seg = &fixed_seg_table[seg];
-		if (mtrr_seg->start >= addr && addr < mtrr_seg->end)
+		if (mtrr_seg->start <= addr && addr < mtrr_seg->end)
 			return seg;
 	}
 
@@ -300,7 +308,6 @@ static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end)
 	*start = range->base & PAGE_MASK;
 
 	mask = range->mask & PAGE_MASK;
-	mask |= ~0ULL << boot_cpu_data.x86_phys_bits;
 
 	/* This cannot overflow because writing to the reserved bits of
 	 * variable MTRRs causes a #GP.
@@ -356,10 +363,14 @@ static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	if (var_mtrr_range_is_valid(cur))
 		list_del(&mtrr_state->var_ranges[index].node);
 
+	/* Extend the mask with all 1 bits to the left, since those
+	 * bits must implicitly be 0.  The bits are then cleared
+	 * when reading them.
+	 */
 	if (!is_mtrr_mask)
 		cur->base = data;
 	else
-		cur->mask = data;
+		cur->mask = data | (-1LL << cpuid_maxphyaddr(vcpu));
 
 	/* add it to the list if it's enabled. */
 	if (var_mtrr_range_is_valid(cur)) {
@@ -426,6 +437,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 			*pdata = vcpu->arch.mtrr_state.var_ranges[index].base;
 		else
 			*pdata = vcpu->arch.mtrr_state.var_ranges[index].mask;
+
+		*pdata &= (1ULL << cpuid_maxphyaddr(vcpu)) - 1;
 	}
 
 	return 0;
@@ -670,7 +683,7 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 	}
 
 	if (iter.mtrr_disabled)
-		return mtrr_disabled_type();
+		return mtrr_disabled_type(vcpu);
 
 	/* not contained in any MTRRs. */
 	if (type == -1)

+ 2 - 2
arch/x86/kvm/svm.c

@@ -3422,6 +3422,8 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 	struct kvm_run *kvm_run = vcpu->run;
 	u32 exit_code = svm->vmcb->control.exit_code;
 
+	trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
+
 	if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
 		vcpu->arch.cr0 = svm->vmcb->save.cr0;
 	if (npt_enabled)
@@ -3892,8 +3894,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
 	vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
 
-	trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM);
-
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
 		kvm_before_handle_nmi(&svm->vcpu);
 

+ 4 - 3
arch/x86/kvm/vmx.c

@@ -2803,7 +2803,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = vcpu->arch.ia32_xss;
 		break;
 	case MSR_TSC_AUX:
-		if (!guest_cpuid_has_rdtscp(vcpu))
+		if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
 			return 1;
 		/* Otherwise falls through */
 	default:
@@ -2909,7 +2909,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
 		break;
 	case MSR_TSC_AUX:
-		if (!guest_cpuid_has_rdtscp(vcpu))
+		if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
 			return 1;
 		/* Check reserved bit, higher 32 bits should be zero */
 		if ((data >> 32) != 0)
@@ -8042,6 +8042,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	u32 exit_reason = vmx->exit_reason;
 	u32 vectoring_info = vmx->idt_vectoring_info;
 
+	trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
+
 	/*
 	 * Flush logged GPAs PML buffer, this will make dirty_bitmap more
 	 * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
@@ -8668,7 +8670,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx->loaded_vmcs->launched = 1;
 
 	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
-	trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
 
 	/*
 	 * the KVM_REQ_EVENT optimization bit is only on for one entry, and if

+ 8 - 4
arch/x86/kvm/x86.c

@@ -3572,9 +3572,11 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 
 static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 {
+	int i;
 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
 	memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
-	kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
+	for (i = 0; i < 3; i++)
+		kvm_pit_load_count(kvm, i, ps->channels[i].count, 0);
 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
 	return 0;
 }
@@ -3593,6 +3595,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
 static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
 {
 	int start = 0;
+	int i;
 	u32 prev_legacy, cur_legacy;
 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
 	prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
@@ -3602,7 +3605,8 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
 	memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
 	       sizeof(kvm->arch.vpit->pit_state.channels));
 	kvm->arch.vpit->pit_state.flags = ps->flags;
-	kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
+	for (i = 0; i < 3; i++)
+		kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count, start);
 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
 	return 0;
 }
@@ -6515,6 +6519,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	if (req_immediate_exit)
 		smp_send_reschedule(vcpu->cpu);
 
+	trace_kvm_entry(vcpu->vcpu_id);
+	wait_lapic_expire(vcpu);
 	__kvm_guest_enter();
 
 	if (unlikely(vcpu->arch.switch_db_regs)) {
@@ -6527,8 +6533,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	}
 
-	trace_kvm_entry(vcpu->vcpu_id);
-	wait_lapic_expire(vcpu);
 	kvm_x86_ops->run(vcpu);
 
 	/*

+ 1 - 1
arch/x86/um/signal.c

@@ -470,7 +470,7 @@ long sys_sigreturn(void)
 	struct sigcontext __user *sc = &frame->sc;
 	int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long);
 
-	if (copy_from_user(&set.sig[0], (void *)sc->oldmask, sizeof(set.sig[0])) ||
+	if (copy_from_user(&set.sig[0], &sc->oldmask, sizeof(set.sig[0])) ||
 	    copy_from_user(&set.sig[1], frame->extramask, sig_size))
 		goto segfault;
 

+ 2 - 7
arch/x86/xen/mmu.c

@@ -2495,14 +2495,9 @@ void __init xen_init_mmu_ops(void)
 {
 	x86_init.paging.pagetable_init = xen_pagetable_init;
 
-	/* Optimization - we can use the HVM one but it has no idea which
-	 * VCPUs are descheduled - which means that it will needlessly IPI
-	 * them. Xen knows so let it do the job.
-	 */
-	if (xen_feature(XENFEAT_auto_translated_physmap)) {
-		pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
+	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return;
-	}
+
 	pv_mmu_ops = xen_mmu_ops;
 
 	memset(dummy_mapping, 0xff, PAGE_SIZE);

+ 11 - 10
arch/x86/xen/suspend.c

@@ -1,6 +1,7 @@
 #include <linux/types.h>
 #include <linux/tick.h>
 
+#include <xen/xen.h>
 #include <xen/interface/xen.h>
 #include <xen/grant_table.h>
 #include <xen/events.h>
@@ -68,26 +69,16 @@ static void xen_pv_post_suspend(int suspend_cancelled)
 
 void xen_arch_pre_suspend(void)
 {
-	int cpu;
-
-	for_each_online_cpu(cpu)
-		xen_pmu_finish(cpu);
-
 	if (xen_pv_domain())
 		xen_pv_pre_suspend();
 }
 
 void xen_arch_post_suspend(int cancelled)
 {
-	int cpu;
-
 	if (xen_pv_domain())
 		xen_pv_post_suspend(cancelled);
 	else
 		xen_hvm_post_suspend(cancelled);
-
-	for_each_online_cpu(cpu)
-		xen_pmu_init(cpu);
 }
 
 static void xen_vcpu_notify_restore(void *data)
@@ -106,10 +97,20 @@ static void xen_vcpu_notify_suspend(void *data)
 
 void xen_arch_resume(void)
 {
+	int cpu;
+
 	on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
+
+	for_each_online_cpu(cpu)
+		xen_pmu_init(cpu);
 }
 
 void xen_arch_suspend(void)
 {
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		xen_pmu_finish(cpu);
+
 	on_each_cpu(xen_vcpu_notify_suspend, NULL, 1);
 }

+ 18 - 2
block/blk-core.c

@@ -206,6 +206,22 @@ void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 }
 EXPORT_SYMBOL(blk_delay_queue);
 
+/**
+ * blk_start_queue_async - asynchronously restart a previously stopped queue
+ * @q:    The &struct request_queue in question
+ *
+ * Description:
+ *   blk_start_queue_async() will clear the stop flag on the queue, and
+ *   ensure that the request_fn for the queue is run from an async
+ *   context.
+ **/
+void blk_start_queue_async(struct request_queue *q)
+{
+	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
+	blk_run_queue_async(q);
+}
+EXPORT_SYMBOL(blk_start_queue_async);
+
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &struct request_queue in question
@@ -1689,8 +1705,6 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	struct request *req;
 	unsigned int request_count = 0;
 
-	blk_queue_split(q, &bio, q->bio_split);
-
 	/*
 	 * low level driver can indicate that it wants pages above a
 	 * certain limit bounced to low memory (ie for highmem, or even
@@ -1698,6 +1712,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 */
 	blk_queue_bounce(q, &bio);
 
+	blk_queue_split(q, &bio, q->bio_split);
+
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio->bi_error = -EIO;
 		bio_endio(bio);

+ 1 - 1
block/blk-merge.c

@@ -81,7 +81,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 	struct bio *new = NULL;
 
 	bio_for_each_segment(bv, bio, iter) {
-		if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q))
+		if (sectors + (bv.bv_len >> 9) > blk_max_size_offset(q, bio->bi_iter.bi_sector))
 			goto split;
 
 		/*

+ 30 - 31
crypto/algif_skcipher.c

@@ -47,7 +47,7 @@ struct skcipher_ctx {
 	bool merge;
 	bool enc;
 
-	struct ablkcipher_request req;
+	struct skcipher_request req;
 };
 
 struct skcipher_async_rsgl {
@@ -64,13 +64,13 @@ struct skcipher_async_req {
 };
 
 #define GET_SREQ(areq, ctx) (struct skcipher_async_req *)((char *)areq + \
-	crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req)))
+	crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req)))
 
 #define GET_REQ_SIZE(ctx) \
-	crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req))
+	crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req))
 
 #define GET_IV_SIZE(ctx) \
-	crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(&ctx->req))
+	crypto_skcipher_ivsize(crypto_skcipher_reqtfm(&ctx->req))
 
 #define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
 		      sizeof(struct scatterlist) - 1)
@@ -302,8 +302,8 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_ctx *ctx = ask->private;
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req);
-	unsigned ivsize = crypto_ablkcipher_ivsize(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req);
+	unsigned ivsize = crypto_skcipher_ivsize(tfm);
 	struct skcipher_sg_list *sgl;
 	struct af_alg_control con = {};
 	long copied = 0;
@@ -507,7 +507,7 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
 	struct skcipher_sg_list *sgl;
 	struct scatterlist *sg;
 	struct skcipher_async_req *sreq;
-	struct ablkcipher_request *req;
+	struct skcipher_request *req;
 	struct skcipher_async_rsgl *last_rsgl = NULL;
 	unsigned int txbufs = 0, len = 0, tx_nents = skcipher_all_sg_nents(ctx);
 	unsigned int reqlen = sizeof(struct skcipher_async_req) +
@@ -531,9 +531,9 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
 	}
 	sg_init_table(sreq->tsg, tx_nents);
 	memcpy(sreq->iv, ctx->iv, GET_IV_SIZE(ctx));
-	ablkcipher_request_set_tfm(req, crypto_ablkcipher_reqtfm(&ctx->req));
-	ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-					skcipher_async_cb, sk);
+	skcipher_request_set_tfm(req, crypto_skcipher_reqtfm(&ctx->req));
+	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      skcipher_async_cb, sk);
 
 	while (iov_iter_count(&msg->msg_iter)) {
 		struct skcipher_async_rsgl *rsgl;
@@ -608,10 +608,10 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
 	if (mark)
 		sg_mark_end(sreq->tsg + txbufs - 1);
 
-	ablkcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
-				     len, sreq->iv);
-	err = ctx->enc ? crypto_ablkcipher_encrypt(req) :
-			 crypto_ablkcipher_decrypt(req);
+	skcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
+				   len, sreq->iv);
+	err = ctx->enc ? crypto_skcipher_encrypt(req) :
+			 crypto_skcipher_decrypt(req);
 	if (err == -EINPROGRESS) {
 		atomic_inc(&ctx->inflight);
 		err = -EIOCBQUEUED;
@@ -632,7 +632,7 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_ctx *ctx = ask->private;
-	unsigned bs = crypto_ablkcipher_blocksize(crypto_ablkcipher_reqtfm(
+	unsigned bs = crypto_skcipher_blocksize(crypto_skcipher_reqtfm(
 		&ctx->req));
 	struct skcipher_sg_list *sgl;
 	struct scatterlist *sg;
@@ -669,14 +669,13 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
 		if (!used)
 			goto free;
 
-		ablkcipher_request_set_crypt(&ctx->req, sg,
-					     ctx->rsgl.sg, used,
-					     ctx->iv);
+		skcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used,
+					   ctx->iv);
 
 		err = af_alg_wait_for_completion(
 				ctx->enc ?
-					crypto_ablkcipher_encrypt(&ctx->req) :
-					crypto_ablkcipher_decrypt(&ctx->req),
+					crypto_skcipher_encrypt(&ctx->req) :
+					crypto_skcipher_decrypt(&ctx->req),
 				&ctx->completion);
 
 free:
@@ -751,17 +750,17 @@ static struct proto_ops algif_skcipher_ops = {
 
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
 {
-	return crypto_alloc_ablkcipher(name, type, mask);
+	return crypto_alloc_skcipher(name, type, mask);
 }
 
 static void skcipher_release(void *private)
 {
-	crypto_free_ablkcipher(private);
+	crypto_free_skcipher(private);
 }
 
 static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
 {
-	return crypto_ablkcipher_setkey(private, key, keylen);
+	return crypto_skcipher_setkey(private, key, keylen);
 }
 
 static void skcipher_wait(struct sock *sk)
@@ -778,13 +777,13 @@ static void skcipher_sock_destruct(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_ctx *ctx = ask->private;
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req);
 
 	if (atomic_read(&ctx->inflight))
 		skcipher_wait(sk);
 
 	skcipher_free_sgl(sk);
-	sock_kzfree_s(sk, ctx->iv, crypto_ablkcipher_ivsize(tfm));
+	sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm));
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);
 }
@@ -793,20 +792,20 @@ static int skcipher_accept_parent(void *private, struct sock *sk)
 {
 	struct skcipher_ctx *ctx;
 	struct alg_sock *ask = alg_sk(sk);
-	unsigned int len = sizeof(*ctx) + crypto_ablkcipher_reqsize(private);
+	unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(private);
 
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
-	ctx->iv = sock_kmalloc(sk, crypto_ablkcipher_ivsize(private),
+	ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(private),
 			       GFP_KERNEL);
 	if (!ctx->iv) {
 		sock_kfree_s(sk, ctx, len);
 		return -ENOMEM;
 	}
 
-	memset(ctx->iv, 0, crypto_ablkcipher_ivsize(private));
+	memset(ctx->iv, 0, crypto_skcipher_ivsize(private));
 
 	INIT_LIST_HEAD(&ctx->tsgl);
 	ctx->len = len;
@@ -819,9 +818,9 @@ static int skcipher_accept_parent(void *private, struct sock *sk)
 
 	ask->private = ctx;
 
-	ablkcipher_request_set_tfm(&ctx->req, private);
-	ablkcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-					af_alg_complete, &ctx->completion);
+	skcipher_request_set_tfm(&ctx->req, private);
+	skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      af_alg_complete, &ctx->completion);
 
 	sk->sk_destruct = skcipher_sock_destruct;
 

+ 2 - 1
drivers/acpi/processor_driver.c

@@ -200,7 +200,8 @@ static int acpi_pss_perf_init(struct acpi_processor *pr,
 		goto err_remove_sysfs_thermal;
 	}
 
-	sysfs_remove_link(&pr->cdev->device.kobj, "device");
+	return 0;
+
  err_remove_sysfs_thermal:
 	sysfs_remove_link(&device->dev.kobj, "thermal_cooling");
  err_thermal_unregister:

+ 22 - 11
drivers/base/power/domain.c

@@ -390,6 +390,7 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 	struct generic_pm_domain *genpd;
 	bool (*stop_ok)(struct device *__dev);
 	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+	bool runtime_pm = pm_runtime_enabled(dev);
 	ktime_t time_start;
 	s64 elapsed_ns;
 	int ret;
@@ -400,12 +401,19 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
+	/*
+	 * A runtime PM centric subsystem/driver may re-use the runtime PM
+	 * callbacks for other purposes than runtime PM. In those scenarios
+	 * runtime PM is disabled. Under these circumstances, we shall skip
+	 * validating/measuring the PM QoS latency.
+	 */
 	stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL;
-	if (stop_ok && !stop_ok(dev))
+	if (runtime_pm && stop_ok && !stop_ok(dev))
 		return -EBUSY;
 
 	/* Measure suspend latency. */
-	time_start = ktime_get();
+	if (runtime_pm)
+		time_start = ktime_get();
 
 	ret = genpd_save_dev(genpd, dev);
 	if (ret)
@@ -418,13 +426,15 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 	}
 
 	/* Update suspend latency value if the measured time exceeds it. */
-	elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
-	if (elapsed_ns > td->suspend_latency_ns) {
-		td->suspend_latency_ns = elapsed_ns;
-		dev_dbg(dev, "suspend latency exceeded, %lld ns\n",
-			elapsed_ns);
-		genpd->max_off_time_changed = true;
-		td->constraint_changed = true;
+	if (runtime_pm) {
+		elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
+		if (elapsed_ns > td->suspend_latency_ns) {
+			td->suspend_latency_ns = elapsed_ns;
+			dev_dbg(dev, "suspend latency exceeded, %lld ns\n",
+				elapsed_ns);
+			genpd->max_off_time_changed = true;
+			td->constraint_changed = true;
+		}
 	}
 
 	/*
@@ -453,6 +463,7 @@ static int pm_genpd_runtime_resume(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
 	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+	bool runtime_pm = pm_runtime_enabled(dev);
 	ktime_t time_start;
 	s64 elapsed_ns;
 	int ret;
@@ -479,14 +490,14 @@ static int pm_genpd_runtime_resume(struct device *dev)
 
  out:
 	/* Measure resume latency. */
-	if (timed)
+	if (timed && runtime_pm)
 		time_start = ktime_get();
 
 	genpd_start_dev(genpd, dev);
 	genpd_restore_dev(genpd, dev);
 
 	/* Update resume latency value if the measured time exceeds it. */
-	if (timed) {
+	if (timed && runtime_pm) {
 		elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
 		if (elapsed_ns > td->resume_latency_ns) {
 			td->resume_latency_ns = elapsed_ns;

+ 7 - 8
drivers/block/null_blk.c

@@ -219,6 +219,9 @@ static void end_cmd(struct nullb_cmd *cmd)
 {
 	struct request_queue *q = NULL;
 
+	if (cmd->rq)
+		q = cmd->rq->q;
+
 	switch (queue_mode)  {
 	case NULL_Q_MQ:
 		blk_mq_end_request(cmd->rq, 0);
@@ -229,23 +232,19 @@ static void end_cmd(struct nullb_cmd *cmd)
 		break;
 	case NULL_Q_BIO:
 		bio_endio(cmd->bio);
-		goto free_cmd;
+		break;
 	}
 
-	if (cmd->rq)
-		q = cmd->rq->q;
+	free_cmd(cmd);
 
 	/* Restart queue if needed, as we are freeing a tag */
-	if (q && !q->mq_ops && blk_queue_stopped(q)) {
+	if (queue_mode == NULL_Q_RQ && blk_queue_stopped(q)) {
 		unsigned long flags;
 
 		spin_lock_irqsave(q->queue_lock, flags);
-		if (blk_queue_stopped(q))
-			blk_start_queue(q);
+		blk_start_queue_async(q);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 	}
-free_cmd:
-	free_cmd(cmd);
 }
 
 static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)

+ 10 - 5
drivers/block/xen-blkback/blkback.c

@@ -950,6 +950,8 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
 		goto unmap;
 
 	for (n = 0, i = 0; n < nseg; n++) {
+		uint8_t first_sect, last_sect;
+
 		if ((n % SEGS_PER_INDIRECT_FRAME) == 0) {
 			/* Map indirect segments */
 			if (segments)
@@ -957,15 +959,18 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
 			segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page);
 		}
 		i = n % SEGS_PER_INDIRECT_FRAME;
+
 		pending_req->segments[n]->gref = segments[i].gref;
-		seg[n].nsec = segments[i].last_sect -
-			segments[i].first_sect + 1;
-		seg[n].offset = (segments[i].first_sect << 9);
-		if ((segments[i].last_sect >= (XEN_PAGE_SIZE >> 9)) ||
-		    (segments[i].last_sect < segments[i].first_sect)) {
+
+		first_sect = READ_ONCE(segments[i].first_sect);
+		last_sect = READ_ONCE(segments[i].last_sect);
+		if (last_sect >= (XEN_PAGE_SIZE >> 9) || last_sect < first_sect) {
 			rc = -EINVAL;
 			goto unmap;
 		}
+
+		seg[n].nsec = last_sect - first_sect + 1;
+		seg[n].offset = first_sect << 9;
 		preq->nr_sects += seg[n].nsec;
 	}
 

+ 4 - 4
drivers/block/xen-blkback/common.h

@@ -408,8 +408,8 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 					struct blkif_x86_32_request *src)
 {
 	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
-	dst->operation = src->operation;
-	switch (src->operation) {
+	dst->operation = READ_ONCE(src->operation);
+	switch (dst->operation) {
 	case BLKIF_OP_READ:
 	case BLKIF_OP_WRITE:
 	case BLKIF_OP_WRITE_BARRIER:
@@ -456,8 +456,8 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 					struct blkif_x86_64_request *src)
 {
 	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
-	dst->operation = src->operation;
-	switch (src->operation) {
+	dst->operation = READ_ONCE(src->operation);
+	switch (dst->operation) {
 	case BLKIF_OP_READ:
 	case BLKIF_OP_WRITE:
 	case BLKIF_OP_WRITE_BARRIER:

+ 4 - 4
drivers/bus/sunxi-rsb.c

@@ -342,13 +342,13 @@ static int sunxi_rsb_read(struct sunxi_rsb *rsb, u8 rtaddr, u8 addr,
 
 	ret = _sunxi_rsb_run_xfer(rsb);
 	if (ret)
-		goto out;
+		goto unlock;
 
 	*buf = readl(rsb->regs + RSB_DATA);
 
+unlock:
 	mutex_unlock(&rsb->lock);
 
-out:
 	return ret;
 }
 
@@ -527,9 +527,9 @@ static int sunxi_rsb_init_device_mode(struct sunxi_rsb *rsb)
  */
 
 static const struct sunxi_rsb_addr_map sunxi_rsb_addr_maps[] = {
-	{ 0x3e3, 0x2d }, /* Primary PMIC: AXP223, AXP809, AXP81X, ... */
+	{ 0x3a3, 0x2d }, /* Primary PMIC: AXP223, AXP809, AXP81X, ... */
 	{ 0x745, 0x3a }, /* Secondary PMIC: AXP806, ... */
-	{ 0xe89, 0x45 }, /* Peripheral IC: AC100, ... */
+	{ 0xe89, 0x4e }, /* Peripheral IC: AC100, ... */
 };
 
 static u8 sunxi_rsb_get_rtaddr(u16 hwaddr)

+ 1 - 1
drivers/cpufreq/Kconfig.arm

@@ -226,7 +226,7 @@ config ARM_TEGRA20_CPUFREQ
 
 config ARM_TEGRA124_CPUFREQ
 	tristate "Tegra124 CPUFreq support"
-	depends on ARCH_TEGRA && CPUFREQ_DT
+	depends on ARCH_TEGRA && CPUFREQ_DT && REGULATOR
 	default y
 	help
 	  This adds the CPUFreq driver support for Tegra124 SOCs.

+ 1 - 1
drivers/cpufreq/intel_pstate.c

@@ -1123,7 +1123,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 				   limits->max_sysfs_pct);
 	limits->max_perf_pct = max(limits->min_policy_pct,
 				   limits->max_perf_pct);
-	limits->max_perf = round_up(limits->max_perf, 8);
+	limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
 
 	/* Make sure min_perf_pct <= max_perf_pct */
 	limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);

+ 1 - 1
drivers/cpufreq/scpi-cpufreq.c

@@ -31,7 +31,7 @@ static struct scpi_ops *scpi_ops;
 
 static struct scpi_dvfs_info *scpi_get_dvfs_info(struct device *cpu_dev)
 {
-	u8 domain = topology_physical_package_id(cpu_dev->id);
+	int domain = topology_physical_package_id(cpu_dev->id);
 
 	if (domain < 0)
 		return ERR_PTR(-EINVAL);

+ 1 - 1
drivers/gpio/gpio-ath79.c

@@ -113,7 +113,7 @@ static int ar934x_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
 		__raw_writel(BIT(offset), ctrl->base + AR71XX_GPIO_REG_CLEAR);
 
 	__raw_writel(
-		__raw_readl(ctrl->base + AR71XX_GPIO_REG_OE) & BIT(offset),
+		__raw_readl(ctrl->base + AR71XX_GPIO_REG_OE) & ~BIT(offset),
 		ctrl->base + AR71XX_GPIO_REG_OE);
 
 	spin_unlock_irqrestore(&ctrl->lock, flags);

+ 2 - 2
drivers/gpio/gpio-generic.c

@@ -141,9 +141,9 @@ static int bgpio_get_set(struct gpio_chip *gc, unsigned int gpio)
 	unsigned long pinmask = bgc->pin2mask(bgc, gpio);
 
 	if (bgc->dir & pinmask)
-		return bgc->read_reg(bgc->reg_set) & pinmask;
+		return !!(bgc->read_reg(bgc->reg_set) & pinmask);
 	else
-		return bgc->read_reg(bgc->reg_dat) & pinmask;
+		return !!(bgc->read_reg(bgc->reg_dat) & pinmask);
 }
 
 static int bgpio_get(struct gpio_chip *gc, unsigned int gpio)

+ 7 - 1
drivers/gpio/gpiolib.c

@@ -1279,7 +1279,13 @@ static int _gpiod_get_raw_value(const struct gpio_desc *desc)
 	chip = desc->chip;
 	offset = gpio_chip_hwgpio(desc);
 	value = chip->get ? chip->get(chip, offset) : -EIO;
-	value = value < 0 ? value : !!value;
+	/*
+	 * FIXME: fix all drivers to clamp to [0,1] or return negative,
+	 * then change this to:
+	 * value = value < 0 ? value : !!value;
+	 * so we can properly propagate error codes.
+	 */
+	value = !!value;
 	trace_gpio_value(desc_to_gpio(desc), 1, value);
 	return value;
 }

+ 2 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu.h

@@ -1264,7 +1264,8 @@ struct amdgpu_cs_parser {
 	struct ww_acquire_ctx	ticket;
 
 	/* user fence */
-	struct amdgpu_user_fence uf;
+	struct amdgpu_user_fence	uf;
+	struct amdgpu_bo_list_entry	uf_entry;
 };
 
 struct amdgpu_job {

+ 42 - 21
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

@@ -127,6 +127,37 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
 	return 0;
 }
 
+static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
+				      struct drm_amdgpu_cs_chunk_fence *fence_data)
+{
+	struct drm_gem_object *gobj;
+	uint32_t handle;
+
+	handle = fence_data->handle;
+	gobj = drm_gem_object_lookup(p->adev->ddev, p->filp,
+				     fence_data->handle);
+	if (gobj == NULL)
+		return -EINVAL;
+
+	p->uf.bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+	p->uf.offset = fence_data->offset;
+
+	if (amdgpu_ttm_tt_has_userptr(p->uf.bo->tbo.ttm)) {
+		drm_gem_object_unreference_unlocked(gobj);
+		return -EINVAL;
+	}
+
+	p->uf_entry.robj = amdgpu_bo_ref(p->uf.bo);
+	p->uf_entry.prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
+	p->uf_entry.allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+	p->uf_entry.priority = 0;
+	p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
+	p->uf_entry.tv.shared = true;
+
+	drm_gem_object_unreference_unlocked(gobj);
+	return 0;
+}
+
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 {
 	union drm_amdgpu_cs *cs = data;
@@ -207,28 +238,15 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 
 		case AMDGPU_CHUNK_ID_FENCE:
 			size = sizeof(struct drm_amdgpu_cs_chunk_fence);
-			if (p->chunks[i].length_dw * sizeof(uint32_t) >= size) {
-				uint32_t handle;
-				struct drm_gem_object *gobj;
-				struct drm_amdgpu_cs_chunk_fence *fence_data;
-
-				fence_data = (void *)p->chunks[i].kdata;
-				handle = fence_data->handle;
-				gobj = drm_gem_object_lookup(p->adev->ddev,
-							     p->filp, handle);
-				if (gobj == NULL) {
-					ret = -EINVAL;
-					goto free_partial_kdata;
-				}
-
-				p->uf.bo = gem_to_amdgpu_bo(gobj);
-				amdgpu_bo_ref(p->uf.bo);
-				drm_gem_object_unreference_unlocked(gobj);
-				p->uf.offset = fence_data->offset;
-			} else {
+			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
 				ret = -EINVAL;
 				goto free_partial_kdata;
 			}
+
+			ret = amdgpu_cs_user_fence_chunk(p, (void *)p->chunks[i].kdata);
+			if (ret)
+				goto free_partial_kdata;
+
 			break;
 
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
@@ -391,6 +409,9 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)
 	p->vm_bos = amdgpu_vm_get_bos(p->adev, &fpriv->vm,
 				      &p->validated);
 
+	if (p->uf.bo)
+		list_add(&p->uf_entry.tv.head, &p->validated);
+
 	if (need_mmap_lock)
 		down_read(&current->mm->mmap_sem);
 
@@ -488,8 +509,8 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
 		for (i = 0; i < parser->num_ibs; i++)
 			amdgpu_ib_free(parser->adev, &parser->ibs[i]);
 	kfree(parser->ibs);
-	if (parser->uf.bo)
-		amdgpu_bo_unref(&parser->uf.bo);
+	amdgpu_bo_unref(&parser->uf.bo);
+	amdgpu_bo_unref(&parser->uf_entry.robj);
 }
 
 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,

+ 3 - 0
drivers/gpu/drm/exynos/exynos_drm_crtc.c

@@ -55,6 +55,9 @@ static int exynos_crtc_atomic_check(struct drm_crtc *crtc,
 {
 	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
 
+	if (!state->enable)
+		return 0;
+
 	if (exynos_crtc->ops->atomic_check)
 		return exynos_crtc->ops->atomic_check(exynos_crtc, state);
 

+ 20 - 8
drivers/gpu/drm/i915/i915_drv.h

@@ -2193,8 +2193,17 @@ struct drm_i915_gem_request {
 	struct drm_i915_private *i915;
 	struct intel_engine_cs *ring;
 
-	/** GEM sequence number associated with this request. */
-	uint32_t seqno;
+	 /** GEM sequence number associated with the previous request,
+	  * when the HWS breadcrumb is equal to this the GPU is processing
+	  * this request.
+	  */
+	u32 previous_seqno;
+
+	 /** GEM sequence number associated with this request,
+	  * when the HWS breadcrumb is equal or greater than this the GPU
+	  * has finished processing this request.
+	  */
+	u32 seqno;
 
 	/** Position in the ringbuffer of the start of the request */
 	u32 head;
@@ -2839,6 +2848,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 
 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 		  u32 flags);
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
 /*
  * BEWARE: Do not use the function below unless you can _absolutely_
@@ -2910,15 +2920,17 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
 	return (int32_t)(seq1 - seq2) >= 0;
 }
 
+static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
+					   bool lazy_coherency)
+{
+	u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
+	return i915_seqno_passed(seqno, req->previous_seqno);
+}
+
 static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
 					      bool lazy_coherency)
 {
-	u32 seqno;
-
-	BUG_ON(req == NULL);
-
-	seqno = req->ring->get_seqno(req->ring, lazy_coherency);
-
+	u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
 	return i915_seqno_passed(seqno, req->seqno);
 }
 

+ 84 - 27
drivers/gpu/drm/i915/i915_gem.c

@@ -1146,23 +1146,74 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
 	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
 }
 
-static int __i915_spin_request(struct drm_i915_gem_request *req)
+static unsigned long local_clock_us(unsigned *cpu)
+{
+	unsigned long t;
+
+	/* Cheaply and approximately convert from nanoseconds to microseconds.
+	 * The result and subsequent calculations are also defined in the same
+	 * approximate microseconds units. The principal source of timing
+	 * error here is from the simple truncation.
+	 *
+	 * Note that local_clock() is only defined wrt to the current CPU;
+	 * the comparisons are no longer valid if we switch CPUs. Instead of
+	 * blocking preemption for the entire busywait, we can detect the CPU
+	 * switch and use that as indicator of system load and a reason to
+	 * stop busywaiting, see busywait_stop().
+	 */
+	*cpu = get_cpu();
+	t = local_clock() >> 10;
+	put_cpu();
+
+	return t;
+}
+
+static bool busywait_stop(unsigned long timeout, unsigned cpu)
+{
+	unsigned this_cpu;
+
+	if (time_after(local_clock_us(&this_cpu), timeout))
+		return true;
+
+	return this_cpu != cpu;
+}
+
+static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
 {
 	unsigned long timeout;
+	unsigned cpu;
+
+	/* When waiting for high frequency requests, e.g. during synchronous
+	 * rendering split between the CPU and GPU, the finite amount of time
+	 * required to set up the irq and wait upon it limits the response
+	 * rate. By busywaiting on the request completion for a short while we
+	 * can service the high frequency waits as quick as possible. However,
+	 * if it is a slow request, we want to sleep as quickly as possible.
+	 * The tradeoff between waiting and sleeping is roughly the time it
+	 * takes to sleep on a request, on the order of a microsecond.
+	 */
 
-	if (i915_gem_request_get_ring(req)->irq_refcount)
+	if (req->ring->irq_refcount)
 		return -EBUSY;
 
-	timeout = jiffies + 1;
+	/* Only spin if we know the GPU is processing this request */
+	if (!i915_gem_request_started(req, true))
+		return -EAGAIN;
+
+	timeout = local_clock_us(&cpu) + 5;
 	while (!need_resched()) {
 		if (i915_gem_request_completed(req, true))
 			return 0;
 
-		if (time_after_eq(jiffies, timeout))
+		if (signal_pending_state(state, current))
+			break;
+
+		if (busywait_stop(timeout, cpu))
 			break;
 
 		cpu_relax_lowlatency();
 	}
+
 	if (i915_gem_request_completed(req, false))
 		return 0;
 
@@ -1197,6 +1248,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	const bool irq_test_in_progress =
 		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
+	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	DEFINE_WAIT(wait);
 	unsigned long timeout_expire;
 	s64 before, now;
@@ -1229,7 +1281,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 	before = ktime_get_raw_ns();
 
 	/* Optimistic spin for the next jiffie before touching IRQs */
-	ret = __i915_spin_request(req);
+	ret = __i915_spin_request(req, state);
 	if (ret == 0)
 		goto out;
 
@@ -1241,8 +1293,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 	for (;;) {
 		struct timer_list timer;
 
-		prepare_to_wait(&ring->irq_queue, &wait,
-				interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(&ring->irq_queue, &wait, state);
 
 		/* We need to check whether any gpu reset happened in between
 		 * the caller grabbing the seqno and now ... */
@@ -1260,7 +1311,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 			break;
 		}
 
-		if (interruptible && signal_pending(current)) {
+		if (signal_pending_state(state, current)) {
 			ret = -ERESTARTSYS;
 			break;
 		}
@@ -2554,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
 	request->batch_obj = obj;
 
 	request->emitted_jiffies = jiffies;
+	request->previous_seqno = ring->last_submitted_seqno;
 	ring->last_submitted_seqno = request->seqno;
 	list_add_tail(&request->list, &ring->request_list);
 
@@ -4080,6 +4132,29 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
 	return false;
 }
 
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	bool mappable, fenceable;
+	u32 fence_size, fence_alignment;
+
+	fence_size = i915_gem_get_gtt_size(obj->base.dev,
+					   obj->base.size,
+					   obj->tiling_mode);
+	fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
+						     obj->base.size,
+						     obj->tiling_mode,
+						     true);
+
+	fenceable = (vma->node.size == fence_size &&
+		     (vma->node.start & (fence_alignment - 1)) == 0);
+
+	mappable = (vma->node.start + fence_size <=
+		    to_i915(obj->base.dev)->gtt.mappable_end);
+
+	obj->map_and_fenceable = mappable && fenceable;
+}
+
 static int
 i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		       struct i915_address_space *vm,
@@ -4147,25 +4222,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 
 	if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
 	    (bound ^ vma->bound) & GLOBAL_BIND) {
-		bool mappable, fenceable;
-		u32 fence_size, fence_alignment;
-
-		fence_size = i915_gem_get_gtt_size(obj->base.dev,
-						   obj->base.size,
-						   obj->tiling_mode);
-		fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
-							     obj->base.size,
-							     obj->tiling_mode,
-							     true);
-
-		fenceable = (vma->node.size == fence_size &&
-			     (vma->node.start & (fence_alignment - 1)) == 0);
-
-		mappable = (vma->node.start + fence_size <=
-			    dev_priv->gtt.mappable_end);
-
-		obj->map_and_fenceable = mappable && fenceable;
-
+		__i915_vma_set_map_and_fenceable(vma);
 		WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
 	}
 

+ 1 - 0
drivers/gpu/drm/i915/i915_gem_gtt.c

@@ -2676,6 +2676,7 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
 			return ret;
 		}
 		vma->bound |= GLOBAL_BIND;
+		__i915_vma_set_map_and_fenceable(vma);
 		list_add_tail(&vma->mm_list, &ggtt_vm->inactive_list);
 	}
 

Some files were not shown because too many files changed in this diff