瀏覽代碼

Merge 4.9-rc5 into char-misc-next

We want those fixes in here as well.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Greg Kroah-Hartman 8 年之前
父節點
當前提交
b7d91c9152
共有 100 個文件被更改,包括 1361 次插入816 次删除
  1. 2 2
      Documentation/ABI/testing/sysfs-devices-system-ibm-rtl
  2. 5 0
      Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
  3. 1 0
      Documentation/devicetree/bindings/net/marvell-orion-net.txt
  4. 8 3
      Documentation/devicetree/bindings/pci/rockchip-pcie.txt
  5. 5 5
      Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt
  6. 0 1
      Documentation/filesystems/Locking
  7. 0 1
      Documentation/filesystems/vfs.txt
  8. 4 4
      Documentation/networking/netdev-FAQ.txt
  9. 0 18
      Documentation/networking/nf_conntrack-sysctl.txt
  10. 11 1
      Documentation/virtual/kvm/locking.txt
  11. 40 20
      MAINTAINERS
  12. 7 5
      Makefile
  13. 6 1
      arch/arc/Makefile
  14. 1 1
      arch/arc/boot/dts/axc001.dtsi
  15. 1 1
      arch/arc/boot/dts/nsim_700.dts
  16. 4 0
      arch/arc/boot/dts/nsimosci.dts
  17. 1 0
      arch/arc/configs/nsim_700_defconfig
  18. 1 0
      arch/arc/configs/nsim_hs_defconfig
  19. 1 0
      arch/arc/configs/nsim_hs_smp_defconfig
  20. 1 0
      arch/arc/configs/nsimosci_defconfig
  21. 1 0
      arch/arc/configs/nsimosci_hs_defconfig
  22. 1 2
      arch/arc/configs/nsimosci_hs_smp_defconfig
  23. 2 0
      arch/arc/include/asm/arcregs.h
  24. 2 2
      arch/arc/include/asm/smp.h
  25. 2 0
      arch/arc/kernel/devtree.c
  26. 20 12
      arch/arc/kernel/mcip.c
  27. 11 9
      arch/arc/kernel/process.c
  28. 15 8
      arch/arc/kernel/smp.c
  29. 11 8
      arch/arc/kernel/time.c
  30. 26 0
      arch/arc/mm/dma.c
  31. 0 6
      arch/arc/plat-eznps/smp.c
  32. 1 0
      arch/arm/include/asm/kvm_asm.h
  33. 3 0
      arch/arm/include/asm/kvm_host.h
  34. 1 0
      arch/arm/include/asm/kvm_hyp.h
  35. 1 1
      arch/arm/include/asm/unistd.h
  36. 3 0
      arch/arm/include/uapi/asm/unistd.h
  37. 3 0
      arch/arm/kernel/calls.S
  38. 26 1
      arch/arm/kvm/arm.c
  39. 15 0
      arch/arm/kvm/hyp/tlb.c
  40. 24 10
      arch/arm/mm/abort-lv4t.S
  41. 5 2
      arch/arm64/boot/dts/rockchip/rk3399.dtsi
  42. 1 1
      arch/arm64/include/asm/alternative.h
  43. 40 0
      arch/arm64/include/asm/cpucaps.h
  44. 1 19
      arch/arm64/include/asm/cpufeature.h
  45. 1 0
      arch/arm64/include/asm/kvm_asm.h
  46. 3 0
      arch/arm64/include/asm/kvm_host.h
  47. 1 1
      arch/arm64/include/asm/kvm_mmu.h
  48. 0 1
      arch/arm64/include/asm/lse.h
  49. 15 0
      arch/arm64/kvm/hyp/tlb.c
  50. 1 1
      arch/mips/Makefile
  51. 2 1
      arch/mips/boot/dts/mti/malta.dts
  52. 10 6
      arch/mips/generic/init.c
  53. 13 0
      arch/mips/include/asm/fpu_emulator.h
  54. 4 3
      arch/mips/include/asm/kvm_host.h
  55. 18 0
      arch/mips/include/asm/switch_to.h
  56. 10 1
      arch/mips/kernel/mips-cpc.c
  57. 5 5
      arch/mips/kernel/mips-r2-to-r6-emul.c
  58. 4 4
      arch/mips/kernel/ptrace.c
  59. 58 80
      arch/mips/kernel/r2300_fpu.S
  60. 48 41
      arch/mips/kernel/r6000_fpu.S
  61. 1 1
      arch/mips/kernel/relocate.c
  62. 13 0
      arch/mips/kernel/setup.c
  63. 73 64
      arch/mips/kernel/traps.c
  64. 19 13
      arch/mips/kvm/emulate.c
  65. 4 1
      arch/mips/kvm/mips.c
  66. 0 4
      arch/mips/kvm/mmu.c
  67. 22 22
      arch/mips/lib/dump_tlb.c
  68. 9 9
      arch/mips/lib/r3k_dump_tlb.c
  69. 1 0
      arch/nios2/kernel/time.c
  70. 2 0
      arch/openrisc/include/asm/cache.h
  71. 3 1
      arch/parisc/include/uapi/asm/unistd.h
  72. 3 3
      arch/parisc/kernel/drivers.c
  73. 34 32
      arch/parisc/kernel/syscall.S
  74. 4 8
      arch/powerpc/include/asm/checksum.h
  75. 3 3
      arch/s390/hypfs/hypfs_diag.c
  76. 2 0
      arch/s390/kernel/vmlinux.lds.S
  77. 2 2
      arch/s390/kvm/sthyi.c
  78. 1 1
      arch/s390/pci/pci_dma.c
  79. 3 2
      arch/sparc/include/asm/cpudata_64.h
  80. 1 1
      arch/sparc/include/asm/spinlock_32.h
  81. 6 6
      arch/sparc/include/asm/spinlock_64.h
  82. 7 1
      arch/sparc/include/asm/topology_64.h
  83. 3 25
      arch/sparc/include/asm/uaccess_64.h
  84. 0 37
      arch/sparc/kernel/head_64.S
  85. 17 6
      arch/sparc/kernel/jump_label.c
  86. 28 18
      arch/sparc/kernel/mdesc.c
  87. 8 0
      arch/sparc/kernel/smp_64.c
  88. 2 2
      arch/sparc/lib/GENcopy_from_user.S
  89. 2 2
      arch/sparc/lib/GENcopy_to_user.S
  90. 34 14
      arch/sparc/lib/GENmemcpy.S
  91. 1 1
      arch/sparc/lib/Makefile
  92. 4 4
      arch/sparc/lib/NG2copy_from_user.S
  93. 4 4
      arch/sparc/lib/NG2copy_to_user.S
  94. 145 83
      arch/sparc/lib/NG2memcpy.S
  95. 4 4
      arch/sparc/lib/NG4copy_from_user.S
  96. 4 4
      arch/sparc/lib/NG4copy_to_user.S
  97. 223 71
      arch/sparc/lib/NG4memcpy.S
  98. 2 2
      arch/sparc/lib/NGcopy_from_user.S
  99. 2 2
      arch/sparc/lib/NGcopy_to_user.S
  100. 158 75
      arch/sparc/lib/NGmemcpy.S

+ 2 - 2
Documentation/ABI/testing/sysfs-devices-system-ibm-rtl

@@ -1,4 +1,4 @@
-What:           state
+What:           /sys/devices/system/ibm_rtl/state
 Date:           Sep 2010
 Date:           Sep 2010
 KernelVersion:  2.6.37
 KernelVersion:  2.6.37
 Contact:        Vernon Mauery <vernux@us.ibm.com>
 Contact:        Vernon Mauery <vernux@us.ibm.com>
@@ -10,7 +10,7 @@ Description:    The state file allows a means by which to change in and
 Users:          The ibm-prtm userspace daemon uses this interface.
 Users:          The ibm-prtm userspace daemon uses this interface.
 
 
 
 
-What:           version
+What:           /sys/devices/system/ibm_rtl/version
 Date:           Sep 2010
 Date:           Sep 2010
 KernelVersion:  2.6.37
 KernelVersion:  2.6.37
 Contact:        Vernon Mauery <vernux@us.ibm.com>
 Contact:        Vernon Mauery <vernux@us.ibm.com>

+ 5 - 0
Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt

@@ -43,6 +43,9 @@ Optional properties:
   reset signal present internally in some host controller IC designs.
   reset signal present internally in some host controller IC designs.
   See Documentation/devicetree/bindings/reset/reset.txt for details.
   See Documentation/devicetree/bindings/reset/reset.txt for details.
 
 
+* reset-names: request name for using "resets" property. Must be "reset".
+	(It will be used together with "resets" property.)
+
 * clocks: from common clock binding: handle to biu and ciu clocks for the
 * clocks: from common clock binding: handle to biu and ciu clocks for the
   bus interface unit clock and the card interface unit clock.
   bus interface unit clock and the card interface unit clock.
 
 
@@ -103,6 +106,8 @@ board specific portions as listed below.
 		interrupts = <0 75 0>;
 		interrupts = <0 75 0>;
 		#address-cells = <1>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 		#size-cells = <0>;
+		resets = <&rst 20>;
+		reset-names = "reset";
 	};
 	};
 
 
 [board specific internal DMA resources]
 [board specific internal DMA resources]

+ 1 - 0
Documentation/devicetree/bindings/net/marvell-orion-net.txt

@@ -49,6 +49,7 @@ Optional port properties:
 and
 and
 
 
  - phy-handle: See ethernet.txt file in the same directory.
  - phy-handle: See ethernet.txt file in the same directory.
+ - phy-mode: See ethernet.txt file in the same directory.
 
 
 or
 or
 
 

+ 8 - 3
Documentation/devicetree/bindings/pci/rockchip-pcie.txt

@@ -26,13 +26,16 @@ Required properties:
 	- "sys"
 	- "sys"
 	- "legacy"
 	- "legacy"
 	- "client"
 	- "client"
-- resets: Must contain five entries for each entry in reset-names.
+- resets: Must contain seven entries for each entry in reset-names.
 	   See ../reset/reset.txt for details.
 	   See ../reset/reset.txt for details.
 - reset-names: Must include the following names
 - reset-names: Must include the following names
 	- "core"
 	- "core"
 	- "mgmt"
 	- "mgmt"
 	- "mgmt-sticky"
 	- "mgmt-sticky"
 	- "pipe"
 	- "pipe"
+	- "pm"
+	- "aclk"
+	- "pclk"
 - pinctrl-names : The pin control state names
 - pinctrl-names : The pin control state names
 - pinctrl-0: The "default" pinctrl state
 - pinctrl-0: The "default" pinctrl state
 - #interrupt-cells: specifies the number of cells needed to encode an
 - #interrupt-cells: specifies the number of cells needed to encode an
@@ -86,8 +89,10 @@ pcie0: pcie@f8000000 {
 	reg = <0x0 0xf8000000 0x0 0x2000000>, <0x0 0xfd000000 0x0 0x1000000>;
 	reg = <0x0 0xf8000000 0x0 0x2000000>, <0x0 0xfd000000 0x0 0x1000000>;
 	reg-names = "axi-base", "apb-base";
 	reg-names = "axi-base", "apb-base";
 	resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>,
 	resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>,
-		 <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE>;
-	reset-names = "core", "mgmt", "mgmt-sticky", "pipe";
+		 <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE> ,
+		 <&cru SRST_PCIE_PM>, <&cru SRST_P_PCIE>, <&cru SRST_A_PCIE>;
+	reset-names = "core", "mgmt", "mgmt-sticky", "pipe",
+		      "pm", "pclk", "aclk";
 	phys = <&pcie_phy>;
 	phys = <&pcie_phy>;
 	phy-names = "pcie-phy";
 	phy-names = "pcie-phy";
 	pinctrl-names = "default";
 	pinctrl-names = "default";

+ 5 - 5
Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt

@@ -14,11 +14,6 @@ Required properies:
  - #size-cells	: The value of this property must be 1
  - #size-cells	: The value of this property must be 1
  - ranges	: defines mapping between pin controller node (parent) to
  - ranges	: defines mapping between pin controller node (parent) to
    gpio-bank node (children).
    gpio-bank node (children).
- - interrupt-parent: phandle of the interrupt parent to which the external
-   GPIO interrupts are forwarded to.
- - st,syscfg: Should be phandle/offset pair. The phandle to the syscon node
-   which includes IRQ mux selection register, and the offset of the IRQ mux
-   selection register.
  - pins-are-numbered: Specify the subnodes are using numbered pinmux to
  - pins-are-numbered: Specify the subnodes are using numbered pinmux to
    specify pins.
    specify pins.
 
 
@@ -37,6 +32,11 @@ Required properties:
 
 
 Optional properties:
 Optional properties:
  - reset:	  : Reference to the reset controller
  - reset:	  : Reference to the reset controller
+ - interrupt-parent: phandle of the interrupt parent to which the external
+   GPIO interrupts are forwarded to.
+ - st,syscfg: Should be phandle/offset pair. The phandle to the syscon node
+   which includes IRQ mux selection register, and the offset of the IRQ mux
+   selection register.
 
 
 Example:
 Example:
 #include <dt-bindings/pinctrl/stm32f429-pinfunc.h>
 #include <dt-bindings/pinctrl/stm32f429-pinfunc.h>

+ 0 - 1
Documentation/filesystems/Locking

@@ -447,7 +447,6 @@ prototypes:
 	int (*flush) (struct file *);
 	int (*flush) (struct file *);
 	int (*release) (struct inode *, struct file *);
 	int (*release) (struct inode *, struct file *);
 	int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
 	int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
-	int (*aio_fsync) (struct kiocb *, int datasync);
 	int (*fasync) (int, struct file *, int);
 	int (*fasync) (int, struct file *, int);
 	int (*lock) (struct file *, int, struct file_lock *);
 	int (*lock) (struct file *, int, struct file_lock *);
 	ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
 	ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,

+ 0 - 1
Documentation/filesystems/vfs.txt

@@ -828,7 +828,6 @@ struct file_operations {
 	int (*flush) (struct file *, fl_owner_t id);
 	int (*flush) (struct file *, fl_owner_t id);
 	int (*release) (struct inode *, struct file *);
 	int (*release) (struct inode *, struct file *);
 	int (*fsync) (struct file *, loff_t, loff_t, int datasync);
 	int (*fsync) (struct file *, loff_t, loff_t, int datasync);
-	int (*aio_fsync) (struct kiocb *, int datasync);
 	int (*fasync) (int, struct file *, int);
 	int (*fasync) (int, struct file *, int);
 	int (*lock) (struct file *, int, struct file_lock *);
 	int (*lock) (struct file *, int, struct file_lock *);
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);

+ 4 - 4
Documentation/networking/netdev-FAQ.txt

@@ -29,8 +29,8 @@ A: There are always two trees (git repositories) in play.  Both are driven
    Linus, and net-next is where the new code goes for the future release.
    Linus, and net-next is where the new code goes for the future release.
    You can find the trees here:
    You can find the trees here:
 
 
-	http://git.kernel.org/?p=linux/kernel/git/davem/net.git
-	http://git.kernel.org/?p=linux/kernel/git/davem/net-next.git
+        https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
+        https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
 
 
 Q: How often do changes from these trees make it to the mainline Linus tree?
 Q: How often do changes from these trees make it to the mainline Linus tree?
 
 
@@ -76,7 +76,7 @@ Q: So where are we now in this cycle?
 
 
 A: Load the mainline (Linus) page here:
 A: Load the mainline (Linus) page here:
 
 
-	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git
+	https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
 
 
    and note the top of the "tags" section.  If it is rc1, it is early
    and note the top of the "tags" section.  If it is rc1, it is early
    in the dev cycle.  If it was tagged rc7 a week ago, then a release
    in the dev cycle.  If it was tagged rc7 a week ago, then a release
@@ -123,7 +123,7 @@ A: Normally Greg Kroah-Hartman collects stable commits himself, but
 
 
    It contains the patches which Dave has selected, but not yet handed
    It contains the patches which Dave has selected, but not yet handed
    off to Greg.  If Greg already has the patch, then it will be here:
    off to Greg.  If Greg already has the patch, then it will be here:
-	http://git.kernel.org/cgit/linux/kernel/git/stable/stable-queue.git
+	https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git
 
 
    A quick way to find whether the patch is in this stable-queue is
    A quick way to find whether the patch is in this stable-queue is
    to simply clone the repo, and then git grep the mainline commit ID, e.g.
    to simply clone the repo, and then git grep the mainline commit ID, e.g.

+ 0 - 18
Documentation/networking/nf_conntrack-sysctl.txt

@@ -33,24 +33,6 @@ nf_conntrack_events - BOOLEAN
 	If this option is enabled, the connection tracking code will
 	If this option is enabled, the connection tracking code will
 	provide userspace with connection tracking events via ctnetlink.
 	provide userspace with connection tracking events via ctnetlink.
 
 
-nf_conntrack_events_retry_timeout - INTEGER (seconds)
-	default 15
-
-	This option is only relevant when "reliable connection tracking
-	events" are used.  Normally, ctnetlink is "lossy", that is,
-	events are normally dropped when userspace listeners can't keep up.
-
-	Userspace can request "reliable event mode".  When this mode is
-	active, the conntrack will only be destroyed after the event was
-	delivered.  If event delivery fails, the kernel periodically
-	re-tries to send the event to userspace.
-
-	This is the maximum interval the kernel should use when re-trying
-	to deliver the destroy event.
-
-	A higher number means there will be fewer delivery retries and it
-	will take longer for a backlog to be processed.
-
 nf_conntrack_expect_max - INTEGER
 nf_conntrack_expect_max - INTEGER
 	Maximum size of expectation table.  Default value is
 	Maximum size of expectation table.  Default value is
 	nf_conntrack_buckets / 256. Minimum is 1.
 	nf_conntrack_buckets / 256. Minimum is 1.

+ 11 - 1
Documentation/virtual/kvm/locking.txt

@@ -4,7 +4,17 @@ KVM Lock Overview
 1. Acquisition Orders
 1. Acquisition Orders
 ---------------------
 ---------------------
 
 
-(to be written)
+The acquisition orders for mutexes are as follows:
+
+- kvm->lock is taken outside vcpu->mutex
+
+- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
+
+- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
+  them together is quite rare.
+
+For spinlocks, kvm_lock is taken outside kvm->mmu_lock.  Everything
+else is a leaf: no other lock is taken inside the critical sections.
 
 
 2: Exception
 2: Exception
 ------------
 ------------

+ 40 - 20
MAINTAINERS

@@ -2552,15 +2552,18 @@ S:	Supported
 F:	drivers/net/ethernet/broadcom/genet/
 F:	drivers/net/ethernet/broadcom/genet/
 
 
 BROADCOM BNX2 GIGABIT ETHERNET DRIVER
 BROADCOM BNX2 GIGABIT ETHERNET DRIVER
-M:	Sony Chacko <sony.chacko@qlogic.com>
-M:	Dept-HSGLinuxNICDev@qlogic.com
+M:	Rasesh Mody <rasesh.mody@cavium.com>
+M:	Harish Patil <harish.patil@cavium.com>
+M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 L:	netdev@vger.kernel.org
 S:	Supported
 S:	Supported
 F:	drivers/net/ethernet/broadcom/bnx2.*
 F:	drivers/net/ethernet/broadcom/bnx2.*
 F:	drivers/net/ethernet/broadcom/bnx2_*
 F:	drivers/net/ethernet/broadcom/bnx2_*
 
 
 BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
 BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
-M:	Ariel Elior <ariel.elior@qlogic.com>
+M:	Yuval Mintz <Yuval.Mintz@cavium.com>
+M:	Ariel Elior <ariel.elior@cavium.com>
+M:	everest-linux-l2@cavium.com
 L:	netdev@vger.kernel.org
 L:	netdev@vger.kernel.org
 S:	Supported
 S:	Supported
 F:	drivers/net/ethernet/broadcom/bnx2x/
 F:	drivers/net/ethernet/broadcom/bnx2x/
@@ -2767,7 +2770,9 @@ S:	Supported
 F:	drivers/scsi/bfa/
 F:	drivers/scsi/bfa/
 
 
 BROCADE BNA 10 GIGABIT ETHERNET DRIVER
 BROCADE BNA 10 GIGABIT ETHERNET DRIVER
-M:	Rasesh Mody <rasesh.mody@qlogic.com>
+M:	Rasesh Mody <rasesh.mody@cavium.com>
+M:	Sudarsana Kalluru <sudarsana.kalluru@cavium.com>
+M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 L:	netdev@vger.kernel.org
 S:	Supported
 S:	Supported
 F:	drivers/net/ethernet/brocade/bna/
 F:	drivers/net/ethernet/brocade/bna/
@@ -7926,6 +7931,10 @@ F:	mm/
 MEMORY TECHNOLOGY DEVICES (MTD)
 MEMORY TECHNOLOGY DEVICES (MTD)
 M:	David Woodhouse <dwmw2@infradead.org>
 M:	David Woodhouse <dwmw2@infradead.org>
 M:	Brian Norris <computersforpeace@gmail.com>
 M:	Brian Norris <computersforpeace@gmail.com>
+M:	Boris Brezillon <boris.brezillon@free-electrons.com>
+M:	Marek Vasut <marek.vasut@gmail.com>
+M:	Richard Weinberger <richard@nod.at>
+M:	Cyrille Pitchen <cyrille.pitchen@atmel.com>
 L:	linux-mtd@lists.infradead.org
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
 W:	http://www.linux-mtd.infradead.org/
 Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
 Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
@@ -8523,11 +8532,10 @@ F:	Documentation/devicetree/bindings/net/wireless/
 F:	drivers/net/wireless/
 F:	drivers/net/wireless/
 
 
 NETXEN (1/10) GbE SUPPORT
 NETXEN (1/10) GbE SUPPORT
-M:	Manish Chopra <manish.chopra@qlogic.com>
-M:	Sony Chacko <sony.chacko@qlogic.com>
-M:	Rajesh Borundia <rajesh.borundia@qlogic.com>
+M:	Manish Chopra <manish.chopra@cavium.com>
+M:	Rahul Verma <rahul.verma@cavium.com>
+M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 L:	netdev@vger.kernel.org
-W:	http://www.qlogic.com
 S:	Supported
 S:	Supported
 F:	drivers/net/ethernet/qlogic/netxen/
 F:	drivers/net/ethernet/qlogic/netxen/
 
 
@@ -9333,7 +9341,7 @@ PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD)
 M:	Keith Busch <keith.busch@intel.com>
 M:	Keith Busch <keith.busch@intel.com>
 L:	linux-pci@vger.kernel.org
 L:	linux-pci@vger.kernel.org
 S:	Supported
 S:	Supported
-F:	arch/x86/pci/vmd.c
+F:	drivers/pci/host/vmd.c
 
 
 PCIE DRIVER FOR ST SPEAR13XX
 PCIE DRIVER FOR ST SPEAR13XX
 M:	Pratyush Anand <pratyush.anand@gmail.com>
 M:	Pratyush Anand <pratyush.anand@gmail.com>
@@ -9903,33 +9911,32 @@ F:	Documentation/scsi/LICENSE.qla4xxx
 F:	drivers/scsi/qla4xxx/
 F:	drivers/scsi/qla4xxx/
 
 
 QLOGIC QLA3XXX NETWORK DRIVER
 QLOGIC QLA3XXX NETWORK DRIVER
-M:	Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
-M:	Ron Mercer <ron.mercer@qlogic.com>
-M:	linux-driver@qlogic.com
+M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 L:	netdev@vger.kernel.org
 S:	Supported
 S:	Supported
 F:	Documentation/networking/LICENSE.qla3xxx
 F:	Documentation/networking/LICENSE.qla3xxx
 F:	drivers/net/ethernet/qlogic/qla3xxx.*
 F:	drivers/net/ethernet/qlogic/qla3xxx.*
 
 
 QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
 QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
-M:	Dept-GELinuxNICDev@qlogic.com
+M:	Harish Patil <harish.patil@cavium.com>
+M:	Manish Chopra <manish.chopra@cavium.com>
+M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 L:	netdev@vger.kernel.org
 S:	Supported
 S:	Supported
 F:	drivers/net/ethernet/qlogic/qlcnic/
 F:	drivers/net/ethernet/qlogic/qlcnic/
 
 
 QLOGIC QLGE 10Gb ETHERNET DRIVER
 QLOGIC QLGE 10Gb ETHERNET DRIVER
-M:	Harish Patil <harish.patil@qlogic.com>
-M:	Sudarsana Kalluru <sudarsana.kalluru@qlogic.com>
-M:	Dept-GELinuxNICDev@qlogic.com
-M:	linux-driver@qlogic.com
+M:	Harish Patil <harish.patil@cavium.com>
+M:	Manish Chopra <manish.chopra@cavium.com>
+M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 L:	netdev@vger.kernel.org
 S:	Supported
 S:	Supported
 F:	drivers/net/ethernet/qlogic/qlge/
 F:	drivers/net/ethernet/qlogic/qlge/
 
 
 QLOGIC QL4xxx ETHERNET DRIVER
 QLOGIC QL4xxx ETHERNET DRIVER
-M:	Yuval Mintz <Yuval.Mintz@qlogic.com>
-M:	Ariel Elior <Ariel.Elior@qlogic.com>
-M:	everest-linux-l2@qlogic.com
+M:	Yuval Mintz <Yuval.Mintz@cavium.com>
+M:	Ariel Elior <Ariel.Elior@cavium.com>
+M:	everest-linux-l2@cavium.com
 L:	netdev@vger.kernel.org
 L:	netdev@vger.kernel.org
 S:	Supported
 S:	Supported
 F:	drivers/net/ethernet/qlogic/qed/
 F:	drivers/net/ethernet/qlogic/qed/
@@ -11412,6 +11419,17 @@ W:	http://www.st.com/spear
 S:	Maintained
 S:	Maintained
 F:	drivers/clk/spear/
 F:	drivers/clk/spear/
 
 
+SPI NOR SUBSYSTEM
+M:	Cyrille Pitchen <cyrille.pitchen@atmel.com>
+M:	Marek Vasut <marek.vasut@gmail.com>
+L:	linux-mtd@lists.infradead.org
+W:	http://www.linux-mtd.infradead.org/
+Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
+T:	git git://github.com/spi-nor/linux.git
+S:	Maintained
+F:	drivers/mtd/spi-nor/
+F:	include/linux/mtd/spi-nor.h
+
 SPI SUBSYSTEM
 SPI SUBSYSTEM
 M:	Mark Brown <broonie@kernel.org>
 M:	Mark Brown <broonie@kernel.org>
 L:	linux-spi@vger.kernel.org
 L:	linux-spi@vger.kernel.org
@@ -12791,6 +12809,7 @@ F:	include/uapi/linux/virtio_console.h
 
 
 VIRTIO CORE, NET AND BLOCK DRIVERS
 VIRTIO CORE, NET AND BLOCK DRIVERS
 M:	"Michael S. Tsirkin" <mst@redhat.com>
 M:	"Michael S. Tsirkin" <mst@redhat.com>
+M:	Jason Wang <jasowang@redhat.com>
 L:	virtualization@lists.linux-foundation.org
 L:	virtualization@lists.linux-foundation.org
 S:	Maintained
 S:	Maintained
 F:	Documentation/devicetree/bindings/virtio/
 F:	Documentation/devicetree/bindings/virtio/
@@ -12821,6 +12840,7 @@ F:	include/uapi/linux/virtio_gpu.h
 
 
 VIRTIO HOST (VHOST)
 VIRTIO HOST (VHOST)
 M:	"Michael S. Tsirkin" <mst@redhat.com>
 M:	"Michael S. Tsirkin" <mst@redhat.com>
+M:	Jason Wang <jasowang@redhat.com>
 L:	kvm@vger.kernel.org
 L:	kvm@vger.kernel.org
 L:	virtualization@lists.linux-foundation.org
 L:	virtualization@lists.linux-foundation.org
 L:	netdev@vger.kernel.org
 L:	netdev@vger.kernel.org

+ 7 - 5
Makefile

@@ -1,7 +1,7 @@
 VERSION = 4
 VERSION = 4
 PATCHLEVEL = 9
 PATCHLEVEL = 9
 SUBLEVEL = 0
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc5
 NAME = Psychotic Stoned Sheep
 NAME = Psychotic Stoned Sheep
 
 
 # *DOCUMENTATION*
 # *DOCUMENTATION*
@@ -370,7 +370,7 @@ LDFLAGS_MODULE  =
 CFLAGS_KERNEL	=
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
 LDFLAGS_vmlinux =
 LDFLAGS_vmlinux =
-CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im
+CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized
 CFLAGS_KCOV	:= $(call cc-option,-fsanitize-coverage=trace-pc,)
 CFLAGS_KCOV	:= $(call cc-option,-fsanitize-coverage=trace-pc,)
 
 
 
 
@@ -620,7 +620,6 @@ ARCH_CFLAGS :=
 include arch/$(SRCARCH)/Makefile
 include arch/$(SRCARCH)/Makefile
 
 
 KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
 KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
-KBUILD_CFLAGS	+= $(call cc-disable-warning,maybe-uninitialized,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 
 
 ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
 ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
@@ -629,15 +628,18 @@ KBUILD_CFLAGS	+= $(call cc-option,-fdata-sections,)
 endif
 endif
 
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-KBUILD_CFLAGS	+= -Os
+KBUILD_CFLAGS	+= -Os $(call cc-disable-warning,maybe-uninitialized,)
 else
 else
 ifdef CONFIG_PROFILE_ALL_BRANCHES
 ifdef CONFIG_PROFILE_ALL_BRANCHES
-KBUILD_CFLAGS	+= -O2
+KBUILD_CFLAGS	+= -O2 $(call cc-disable-warning,maybe-uninitialized,)
 else
 else
 KBUILD_CFLAGS   += -O2
 KBUILD_CFLAGS   += -O2
 endif
 endif
 endif
 endif
 
 
+KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0409, \
+			$(call cc-disable-warning,maybe-uninitialized,))
+
 # Tell gcc to never replace conditional load with a non-conditional one
 # Tell gcc to never replace conditional load with a non-conditional one
 KBUILD_CFLAGS	+= $(call cc-option,--param=allow-store-data-races=0)
 KBUILD_CFLAGS	+= $(call cc-option,--param=allow-store-data-races=0)
 
 

+ 6 - 1
arch/arc/Makefile

@@ -50,6 +50,9 @@ atleast_gcc44 :=  $(call cc-ifversion, -ge, 0404, y)
 
 
 cflags-$(atleast_gcc44)			+= -fsection-anchors
 cflags-$(atleast_gcc44)			+= -fsection-anchors
 
 
+cflags-$(CONFIG_ARC_HAS_LLSC)		+= -mlock
+cflags-$(CONFIG_ARC_HAS_SWAPE)		+= -mswape
+
 ifdef CONFIG_ISA_ARCV2
 ifdef CONFIG_ISA_ARCV2
 
 
 ifndef CONFIG_ARC_HAS_LL64
 ifndef CONFIG_ARC_HAS_LL64
@@ -68,7 +71,9 @@ cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -fasynchronous-unwind-tables $(cfi)
 ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
 ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
 # Generic build system uses -O2, we want -O3
 # Generic build system uses -O2, we want -O3
 # Note: No need to add to cflags-y as that happens anyways
 # Note: No need to add to cflags-y as that happens anyways
-ARCH_CFLAGS += -O3
+#
+# Disable the false maybe-uninitialized warings gcc spits out at -O3
+ARCH_CFLAGS += -O3 $(call cc-disable-warning,maybe-uninitialized,)
 endif
 endif
 
 
 # small data is default for elf32 tool-chain. If not usable, disable it
 # small data is default for elf32 tool-chain. If not usable, disable it

+ 1 - 1
arch/arc/boot/dts/axc001.dtsi

@@ -71,7 +71,7 @@
 			reg-io-width = <4>;
 			reg-io-width = <4>;
 		};
 		};
 
 
-		arcpmu0: pmu {
+		arcpct0: pct {
 			compatible = "snps,arc700-pct";
 			compatible = "snps,arc700-pct";
 		};
 		};
 	};
 	};

+ 1 - 1
arch/arc/boot/dts/nsim_700.dts

@@ -69,7 +69,7 @@
 			};
 			};
 		};
 		};
 
 
-		arcpmu0: pmu {
+		arcpct0: pct {
 			compatible = "snps,arc700-pct";
 			compatible = "snps,arc700-pct";
 		};
 		};
 	};
 	};

+ 4 - 0
arch/arc/boot/dts/nsimosci.dts

@@ -83,5 +83,9 @@
 			reg = <0xf0003000 0x44>;
 			reg = <0xf0003000 0x44>;
 			interrupts = <7>;
 			interrupts = <7>;
 		};
 		};
+
+		arcpct0: pct {
+			compatible = "snps,arc700-pct";
+		};
 	};
 	};
 };
 };

+ 1 - 0
arch/arc/configs/nsim_700_defconfig

@@ -14,6 +14,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
 CONFIG_KPROBES=y

+ 1 - 0
arch/arc/configs/nsim_hs_defconfig

@@ -14,6 +14,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
 CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
 CONFIG_KPROBES=y

+ 1 - 0
arch/arc/configs/nsim_hs_smp_defconfig

@@ -12,6 +12,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
 CONFIG_KPROBES=y

+ 1 - 0
arch/arc/configs/nsimosci_defconfig

@@ -14,6 +14,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
 CONFIG_KPROBES=y

+ 1 - 0
arch/arc/configs/nsimosci_hs_defconfig

@@ -14,6 +14,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
 CONFIG_KPROBES=y

+ 1 - 2
arch/arc/configs/nsimosci_hs_smp_defconfig

@@ -10,6 +10,7 @@ CONFIG_IKCONFIG_PROC=y
 # CONFIG_PID_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 CONFIG_MODULES=y
@@ -34,7 +35,6 @@ CONFIG_INET=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS=y
@@ -72,7 +72,6 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HWMON is not set
 # CONFIG_HWMON is not set
 CONFIG_DRM=y
 CONFIG_DRM=y
 CONFIG_DRM_ARCPGU=y
 CONFIG_DRM_ARCPGU=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set

+ 2 - 0
arch/arc/include/asm/arcregs.h

@@ -43,12 +43,14 @@
 #define STATUS_AE_BIT		5	/* Exception active */
 #define STATUS_AE_BIT		5	/* Exception active */
 #define STATUS_DE_BIT		6	/* PC is in delay slot */
 #define STATUS_DE_BIT		6	/* PC is in delay slot */
 #define STATUS_U_BIT		7	/* User/Kernel mode */
 #define STATUS_U_BIT		7	/* User/Kernel mode */
+#define STATUS_Z_BIT            11
 #define STATUS_L_BIT		12	/* Loop inhibit */
 #define STATUS_L_BIT		12	/* Loop inhibit */
 
 
 /* These masks correspond to the status word(STATUS_32) bits */
 /* These masks correspond to the status word(STATUS_32) bits */
 #define STATUS_AE_MASK		(1<<STATUS_AE_BIT)
 #define STATUS_AE_MASK		(1<<STATUS_AE_BIT)
 #define STATUS_DE_MASK		(1<<STATUS_DE_BIT)
 #define STATUS_DE_MASK		(1<<STATUS_DE_BIT)
 #define STATUS_U_MASK		(1<<STATUS_U_BIT)
 #define STATUS_U_MASK		(1<<STATUS_U_BIT)
+#define STATUS_Z_MASK		(1<<STATUS_Z_BIT)
 #define STATUS_L_MASK		(1<<STATUS_L_BIT)
 #define STATUS_L_MASK		(1<<STATUS_L_BIT)
 
 
 /*
 /*

+ 2 - 2
arch/arc/include/asm/smp.h

@@ -37,9 +37,9 @@ extern const char *arc_platform_smp_cpuinfo(void);
  * API expected BY platform smp code (FROM arch smp code)
  * API expected BY platform smp code (FROM arch smp code)
  *
  *
  * smp_ipi_irq_setup:
  * smp_ipi_irq_setup:
- *	Takes @cpu and @irq to which the arch-common ISR is hooked up
+ *	Takes @cpu and @hwirq to which the arch-common ISR is hooked up
  */
  */
-extern int smp_ipi_irq_setup(int cpu, int irq);
+extern int smp_ipi_irq_setup(int cpu, irq_hw_number_t hwirq);
 
 
 /*
 /*
  * struct plat_smp_ops	- SMP callbacks provided by platform to ARC SMP
  * struct plat_smp_ops	- SMP callbacks provided by platform to ARC SMP

+ 2 - 0
arch/arc/kernel/devtree.c

@@ -31,6 +31,8 @@ static void __init arc_set_early_base_baud(unsigned long dt_root)
 		arc_base_baud = 166666666;	/* Fixed 166.6MHz clk (TB10x) */
 		arc_base_baud = 166666666;	/* Fixed 166.6MHz clk (TB10x) */
 	else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp"))
 	else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp"))
 		arc_base_baud = 33333333;	/* Fixed 33MHz clk (AXS10x) */
 		arc_base_baud = 33333333;	/* Fixed 33MHz clk (AXS10x) */
+	else if (of_flat_dt_is_compatible(dt_root, "ezchip,arc-nps"))
+		arc_base_baud = 800000000;      /* Fixed 800MHz clk (NPS) */
 	else
 	else
 		arc_base_baud = 50000000;	/* Fixed default 50MHz */
 		arc_base_baud = 50000000;	/* Fixed default 50MHz */
 }
 }

+ 20 - 12
arch/arc/kernel/mcip.c

@@ -181,6 +181,8 @@ idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	cpumask_t online;
 	cpumask_t online;
+	unsigned int destination_bits;
+	unsigned int distribution_mode;
 
 
 	/* errout if no online cpu per @cpumask */
 	/* errout if no online cpu per @cpumask */
 	if (!cpumask_and(&online, cpumask, cpu_online_mask))
 	if (!cpumask_and(&online, cpumask, cpu_online_mask))
@@ -188,8 +190,15 @@ idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
 
 
 	raw_spin_lock_irqsave(&mcip_lock, flags);
 	raw_spin_lock_irqsave(&mcip_lock, flags);
 
 
-	idu_set_dest(data->hwirq, cpumask_bits(&online)[0]);
-	idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
+	destination_bits = cpumask_bits(&online)[0];
+	idu_set_dest(data->hwirq, destination_bits);
+
+	if (ffs(destination_bits) == fls(destination_bits))
+		distribution_mode = IDU_M_DISTRI_DEST;
+	else
+		distribution_mode = IDU_M_DISTRI_RR;
+
+	idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, distribution_mode);
 
 
 	raw_spin_unlock_irqrestore(&mcip_lock, flags);
 	raw_spin_unlock_irqrestore(&mcip_lock, flags);
 
 
@@ -207,16 +216,15 @@ static struct irq_chip idu_irq_chip = {
 
 
 };
 };
 
 
-static int idu_first_irq;
+static irq_hw_number_t idu_first_hwirq;
 
 
 static void idu_cascade_isr(struct irq_desc *desc)
 static void idu_cascade_isr(struct irq_desc *desc)
 {
 {
-	struct irq_domain *domain = irq_desc_get_handler_data(desc);
-	unsigned int core_irq = irq_desc_get_irq(desc);
-	unsigned int idu_irq;
+	struct irq_domain *idu_domain = irq_desc_get_handler_data(desc);
+	irq_hw_number_t core_hwirq = irqd_to_hwirq(irq_desc_get_irq_data(desc));
+	irq_hw_number_t idu_hwirq = core_hwirq - idu_first_hwirq;
 
 
-	idu_irq = core_irq - idu_first_irq;
-	generic_handle_irq(irq_find_mapping(domain, idu_irq));
+	generic_handle_irq(irq_find_mapping(idu_domain, idu_hwirq));
 }
 }
 
 
 static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
 static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
@@ -282,7 +290,7 @@ idu_of_init(struct device_node *intc, struct device_node *parent)
 	struct irq_domain *domain;
 	struct irq_domain *domain;
 	/* Read IDU BCR to confirm nr_irqs */
 	/* Read IDU BCR to confirm nr_irqs */
 	int nr_irqs = of_irq_count(intc);
 	int nr_irqs = of_irq_count(intc);
-	int i, irq;
+	int i, virq;
 	struct mcip_bcr mp;
 	struct mcip_bcr mp;
 
 
 	READ_BCR(ARC_REG_MCIP_BCR, mp);
 	READ_BCR(ARC_REG_MCIP_BCR, mp);
@@ -303,11 +311,11 @@ idu_of_init(struct device_node *intc, struct device_node *parent)
 		 * however we need it to get the parent virq and set IDU handler
 		 * however we need it to get the parent virq and set IDU handler
 		 * as first level isr
 		 * as first level isr
 		 */
 		 */
-		irq = irq_of_parse_and_map(intc, i);
+		virq = irq_of_parse_and_map(intc, i);
 		if (!i)
 		if (!i)
-			idu_first_irq = irq;
+			idu_first_hwirq = irqd_to_hwirq(irq_get_irq_data(virq));
 
 
-		irq_set_chained_handler_and_data(irq, idu_cascade_isr, domain);
+		irq_set_chained_handler_and_data(virq, idu_cascade_isr, domain);
 	}
 	}
 
 
 	__mcip_cmd(CMD_IDU_ENABLE, 0);
 	__mcip_cmd(CMD_IDU_ENABLE, 0);

+ 11 - 9
arch/arc/kernel/process.c

@@ -43,8 +43,8 @@ SYSCALL_DEFINE0(arc_gettls)
 
 
 SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
 SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
 {
 {
-	int uval;
-	int ret;
+	struct pt_regs *regs = current_pt_regs();
+	int uval = -EFAULT;
 
 
 	/*
 	/*
 	 * This is only for old cores lacking LLOCK/SCOND, which by defintion
 	 * This is only for old cores lacking LLOCK/SCOND, which by defintion
@@ -54,24 +54,26 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
 	 */
 	 */
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP));
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP));
 
 
+	/* Z indicates to userspace if operation succeded */
+	regs->status32 &= ~STATUS_Z_MASK;
+
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 		return -EFAULT;
 
 
 	preempt_disable();
 	preempt_disable();
 
 
-	ret = __get_user(uval, uaddr);
-	if (ret)
+	if (__get_user(uval, uaddr))
 		goto done;
 		goto done;
 
 
-	if (uval != expected)
-		ret = -EAGAIN;
-	else
-		ret = __put_user(new, uaddr);
+	if (uval == expected) {
+		if (!__put_user(new, uaddr))
+			regs->status32 |= STATUS_Z_MASK;
+	}
 
 
 done:
 done:
 	preempt_enable();
 	preempt_enable();
 
 
-	return ret;
+	return uval;
 }
 }
 
 
 void arch_cpu_idle(void)
 void arch_cpu_idle(void)

+ 15 - 8
arch/arc/kernel/smp.c

@@ -22,6 +22,7 @@
 #include <linux/atomic.h>
 #include <linux/atomic.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
 #include <linux/reboot.h>
 #include <linux/reboot.h>
+#include <linux/irqdomain.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
 #include <asm/setup.h>
 #include <asm/mach_desc.h>
 #include <asm/mach_desc.h>
@@ -67,11 +68,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	int i;
 	int i;
 
 
 	/*
 	/*
-	 * Initialise the present map, which describes the set of CPUs
-	 * actually populated at the present time.
+	 * if platform didn't set the present map already, do it now
+	 * boot cpu is set to present already by init/main.c
 	 */
 	 */
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
+	if (num_present_cpus() <= 1) {
+		for (i = 0; i < max_cpus; i++)
+			set_cpu_present(i, true);
+	}
 }
 }
 
 
 void __init smp_cpus_done(unsigned int max_cpus)
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -351,20 +354,24 @@ irqreturn_t do_IPI(int irq, void *dev_id)
  */
  */
 static DEFINE_PER_CPU(int, ipi_dev);
 static DEFINE_PER_CPU(int, ipi_dev);
 
 
-int smp_ipi_irq_setup(int cpu, int irq)
+int smp_ipi_irq_setup(int cpu, irq_hw_number_t hwirq)
 {
 {
 	int *dev = per_cpu_ptr(&ipi_dev, cpu);
 	int *dev = per_cpu_ptr(&ipi_dev, cpu);
+	unsigned int virq = irq_find_mapping(NULL, hwirq);
+
+	if (!virq)
+		panic("Cannot find virq for root domain and hwirq=%lu", hwirq);
 
 
 	/* Boot cpu calls request, all call enable */
 	/* Boot cpu calls request, all call enable */
 	if (!cpu) {
 	if (!cpu) {
 		int rc;
 		int rc;
 
 
-		rc = request_percpu_irq(irq, do_IPI, "IPI Interrupt", dev);
+		rc = request_percpu_irq(virq, do_IPI, "IPI Interrupt", dev);
 		if (rc)
 		if (rc)
-			panic("Percpu IRQ request failed for %d\n", irq);
+			panic("Percpu IRQ request failed for %u\n", virq);
 	}
 	}
 
 
-	enable_percpu_irq(irq, 0);
+	enable_percpu_irq(virq, 0);
 
 
 	return 0;
 	return 0;
 }
 }

+ 11 - 8
arch/arc/kernel/time.c

@@ -152,14 +152,17 @@ static cycle_t arc_read_rtc(struct clocksource *cs)
 		cycle_t  full;
 		cycle_t  full;
 	} stamp;
 	} stamp;
 
 
-
-	__asm__ __volatile(
-	"1:						\n"
-	"	lr		%0, [AUX_RTC_LOW]	\n"
-	"	lr		%1, [AUX_RTC_HIGH]	\n"
-	"	lr		%2, [AUX_RTC_CTRL]	\n"
-	"	bbit0.nt	%2, 31, 1b		\n"
-	: "=r" (stamp.low), "=r" (stamp.high), "=r" (status));
+	/*
+	 * hardware has an internal state machine which tracks readout of
+	 * low/high and updates the CTRL.status if
+	 *  - interrupt/exception taken between the two reads
+	 *  - high increments after low has been read
+	 */
+	do {
+		stamp.low = read_aux_reg(AUX_RTC_LOW);
+		stamp.high = read_aux_reg(AUX_RTC_HIGH);
+		status = read_aux_reg(AUX_RTC_CTRL);
+	} while (!(status & _BITUL(31)));
 
 
 	return stamp.full;
 	return stamp.full;
 }
 }

+ 26 - 0
arch/arc/mm/dma.c

@@ -105,6 +105,31 @@ static void arc_dma_free(struct device *dev, size_t size, void *vaddr,
 	__free_pages(page, get_order(size));
 	__free_pages(page, get_order(size));
 }
 }
 
 
+static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+			void *cpu_addr, dma_addr_t dma_addr, size_t size,
+			unsigned long attrs)
+{
+	unsigned long user_count = vma_pages(vma);
+	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long pfn = __phys_to_pfn(plat_dma_to_phys(dev, dma_addr));
+	unsigned long off = vma->vm_pgoff;
+	int ret = -ENXIO;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+		return ret;
+
+	if (off < count && user_count <= (count - off)) {
+		ret = remap_pfn_range(vma, vma->vm_start,
+				      pfn + off,
+				      user_count << PAGE_SHIFT,
+				      vma->vm_page_prot);
+	}
+
+	return ret;
+}
+
 /*
 /*
  * streaming DMA Mapping API...
  * streaming DMA Mapping API...
  * CPU accesses page via normal paddr, thus needs to explicitly made
  * CPU accesses page via normal paddr, thus needs to explicitly made
@@ -193,6 +218,7 @@ static int arc_dma_supported(struct device *dev, u64 dma_mask)
 struct dma_map_ops arc_dma_ops = {
 struct dma_map_ops arc_dma_ops = {
 	.alloc			= arc_dma_alloc,
 	.alloc			= arc_dma_alloc,
 	.free			= arc_dma_free,
 	.free			= arc_dma_free,
+	.mmap			= arc_dma_mmap,
 	.map_page		= arc_dma_map_page,
 	.map_page		= arc_dma_map_page,
 	.map_sg			= arc_dma_map_sg,
 	.map_sg			= arc_dma_map_sg,
 	.sync_single_for_device	= arc_dma_sync_single_for_device,
 	.sync_single_for_device	= arc_dma_sync_single_for_device,

+ 0 - 6
arch/arc/plat-eznps/smp.c

@@ -140,16 +140,10 @@ static void eznps_init_per_cpu(int cpu)
 	mtm_enable_core(cpu);
 	mtm_enable_core(cpu);
 }
 }
 
 
-static void eznps_ipi_clear(int irq)
-{
-	write_aux_reg(CTOP_AUX_IACK, 1 << irq);
-}
-
 struct plat_smp_ops plat_smp_ops = {
 struct plat_smp_ops plat_smp_ops = {
 	.info		= smp_cpuinfo_buf,
 	.info		= smp_cpuinfo_buf,
 	.init_early_smp	= eznps_init_cpumasks,
 	.init_early_smp	= eznps_init_cpumasks,
 	.cpu_kick	= eznps_smp_wakeup_cpu,
 	.cpu_kick	= eznps_smp_wakeup_cpu,
 	.ipi_send	= eznps_ipi_send,
 	.ipi_send	= eznps_ipi_send,
 	.init_per_cpu	= eznps_init_per_cpu,
 	.init_per_cpu	= eznps_init_per_cpu,
-	.ipi_clear	= eznps_ipi_clear,
 };
 };

+ 1 - 0
arch/arm/include/asm/kvm_asm.h

@@ -66,6 +66,7 @@ extern char __kvm_hyp_vector[];
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
 
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
 

+ 3 - 0
arch/arm/include/asm/kvm_host.h

@@ -57,6 +57,9 @@ struct kvm_arch {
 	/* VTTBR value associated with below pgd and vmid */
 	/* VTTBR value associated with below pgd and vmid */
 	u64    vttbr;
 	u64    vttbr;
 
 
+	/* The last vcpu id that ran on each physical CPU */
+	int __percpu *last_vcpu_ran;
+
 	/* Timer */
 	/* Timer */
 	struct arch_timer_kvm	timer;
 	struct arch_timer_kvm	timer;
 
 

+ 1 - 0
arch/arm/include/asm/kvm_hyp.h

@@ -71,6 +71,7 @@
 #define ICIALLUIS	__ACCESS_CP15(c7, 0, c1, 0)
 #define ICIALLUIS	__ACCESS_CP15(c7, 0, c1, 0)
 #define ATS1CPR		__ACCESS_CP15(c7, 0, c8, 0)
 #define ATS1CPR		__ACCESS_CP15(c7, 0, c8, 0)
 #define TLBIALLIS	__ACCESS_CP15(c8, 0, c3, 0)
 #define TLBIALLIS	__ACCESS_CP15(c8, 0, c3, 0)
+#define TLBIALL		__ACCESS_CP15(c8, 0, c7, 0)
 #define TLBIALLNSNHIS	__ACCESS_CP15(c8, 4, c3, 4)
 #define TLBIALLNSNHIS	__ACCESS_CP15(c8, 4, c3, 4)
 #define PRRR		__ACCESS_CP15(c10, 0, c2, 0)
 #define PRRR		__ACCESS_CP15(c10, 0, c2, 0)
 #define NMRR		__ACCESS_CP15(c10, 0, c2, 1)
 #define NMRR		__ACCESS_CP15(c10, 0, c2, 1)

+ 1 - 1
arch/arm/include/asm/unistd.h

@@ -19,7 +19,7 @@
  * This may need to be greater than __NR_last_syscall+1 in order to
  * This may need to be greater than __NR_last_syscall+1 in order to
  * account for the padding in the syscall table
  * account for the padding in the syscall table
  */
  */
-#define __NR_syscalls  (396)
+#define __NR_syscalls  (400)
 
 
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_GETHOSTNAME

+ 3 - 0
arch/arm/include/uapi/asm/unistd.h

@@ -420,6 +420,9 @@
 #define __NR_copy_file_range		(__NR_SYSCALL_BASE+391)
 #define __NR_copy_file_range		(__NR_SYSCALL_BASE+391)
 #define __NR_preadv2			(__NR_SYSCALL_BASE+392)
 #define __NR_preadv2			(__NR_SYSCALL_BASE+392)
 #define __NR_pwritev2			(__NR_SYSCALL_BASE+393)
 #define __NR_pwritev2			(__NR_SYSCALL_BASE+393)
+#define __NR_pkey_mprotect		(__NR_SYSCALL_BASE+394)
+#define __NR_pkey_alloc			(__NR_SYSCALL_BASE+395)
+#define __NR_pkey_free			(__NR_SYSCALL_BASE+396)
 
 
 /*
 /*
  * The following SWIs are ARM private.
  * The following SWIs are ARM private.

+ 3 - 0
arch/arm/kernel/calls.S

@@ -403,6 +403,9 @@
 		CALL(sys_copy_file_range)
 		CALL(sys_copy_file_range)
 		CALL(sys_preadv2)
 		CALL(sys_preadv2)
 		CALL(sys_pwritev2)
 		CALL(sys_pwritev2)
+		CALL(sys_pkey_mprotect)
+/* 395 */	CALL(sys_pkey_alloc)
+		CALL(sys_pkey_free)
 #ifndef syscalls_counted
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
 #define syscalls_counted

+ 26 - 1
arch/arm/kvm/arm.c

@@ -114,11 +114,18 @@ void kvm_arch_check_processor_compat(void *rtn)
  */
  */
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 {
-	int ret = 0;
+	int ret, cpu;
 
 
 	if (type)
 	if (type)
 		return -EINVAL;
 		return -EINVAL;
 
 
+	kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
+	if (!kvm->arch.last_vcpu_ran)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu)
+		*per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
+
 	ret = kvm_alloc_stage2_pgd(kvm);
 	ret = kvm_alloc_stage2_pgd(kvm);
 	if (ret)
 	if (ret)
 		goto out_fail_alloc;
 		goto out_fail_alloc;
@@ -141,6 +148,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 out_free_stage2_pgd:
 out_free_stage2_pgd:
 	kvm_free_stage2_pgd(kvm);
 	kvm_free_stage2_pgd(kvm);
 out_fail_alloc:
 out_fail_alloc:
+	free_percpu(kvm->arch.last_vcpu_ran);
+	kvm->arch.last_vcpu_ran = NULL;
 	return ret;
 	return ret;
 }
 }
 
 
@@ -168,6 +177,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 {
 	int i;
 	int i;
 
 
+	free_percpu(kvm->arch.last_vcpu_ran);
+	kvm->arch.last_vcpu_ran = NULL;
+
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
 		if (kvm->vcpus[i]) {
 			kvm_arch_vcpu_free(kvm->vcpus[i]);
 			kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -312,6 +324,19 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 {
+	int *last_ran;
+
+	last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
+
+	/*
+	 * We might get preempted before the vCPU actually runs, but
+	 * over-invalidation doesn't affect correctness.
+	 */
+	if (*last_ran != vcpu->vcpu_id) {
+		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
+		*last_ran = vcpu->vcpu_id;
+	}
+
 	vcpu->cpu = cpu;
 	vcpu->cpu = cpu;
 	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
 	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
 
 

+ 15 - 0
arch/arm/kvm/hyp/tlb.c

@@ -55,6 +55,21 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 	__kvm_tlb_flush_vmid(kvm);
 	__kvm_tlb_flush_vmid(kvm);
 }
 }
 
 
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
+
+	/* Switch to requested VMID */
+	write_sysreg(kvm->arch.vttbr, VTTBR);
+	isb();
+
+	write_sysreg(0, TLBIALL);
+	dsb(nsh);
+	isb();
+
+	write_sysreg(0, VTTBR);
+}
+
 void __hyp_text __kvm_flush_vm_context(void)
 void __hyp_text __kvm_flush_vm_context(void)
 {
 {
 	write_sysreg(0, TLBIALLNSNHIS);
 	write_sysreg(0, TLBIALLNSNHIS);

+ 24 - 10
arch/arm/mm/abort-lv4t.S

@@ -7,7 +7,7 @@
  *	   : r4 = aborted context pc
  *	   : r4 = aborted context pc
  *	   : r5 = aborted context psr
  *	   : r5 = aborted context psr
  *
  *
- * Returns : r4-r5, r10-r11, r13 preserved
+ * Returns : r4-r5, r9-r11, r13 preserved
  *
  *
  * Purpose : obtain information about current aborted instruction.
  * Purpose : obtain information about current aborted instruction.
  * Note: we read user space.  This means we might cause a data
  * Note: we read user space.  This means we might cause a data
@@ -48,7 +48,10 @@ ENTRY(v4t_late_abort)
 /* c */	b	do_DataAbort			@ ldc	rd, [rn], #m	@ Same as ldr	rd, [rn], #m
 /* c */	b	do_DataAbort			@ ldc	rd, [rn], #m	@ Same as ldr	rd, [rn], #m
 /* d */	b	do_DataAbort			@ ldc	rd, [rn, #m]
 /* d */	b	do_DataAbort			@ ldc	rd, [rn, #m]
 /* e */	b	.data_unknown
 /* e */	b	.data_unknown
-/* f */
+/* f */	b	.data_unknown
+
+.data_unknown_r9:
+	ldr	r9, [sp], #4
 .data_unknown:	@ Part of jumptable
 .data_unknown:	@ Part of jumptable
 	mov	r0, r4
 	mov	r0, r4
 	mov	r1, r8
 	mov	r1, r8
@@ -57,6 +60,7 @@ ENTRY(v4t_late_abort)
 .data_arm_ldmstm:
 .data_arm_ldmstm:
 	tst	r8, #1 << 21			@ check writeback bit
 	tst	r8, #1 << 21			@ check writeback bit
 	beq	do_DataAbort			@ no writeback -> no fixup
 	beq	do_DataAbort			@ no writeback -> no fixup
+	str	r9, [sp, #-4]!
 	mov	r7, #0x11
 	mov	r7, #0x11
 	orr	r7, r7, #0x1100
 	orr	r7, r7, #0x1100
 	and	r6, r8, r7
 	and	r6, r8, r7
@@ -75,12 +79,14 @@ ENTRY(v4t_late_abort)
 	subne	r7, r7, r6, lsl #2		@ Undo increment
 	subne	r7, r7, r6, lsl #2		@ Undo increment
 	addeq	r7, r7, r6, lsl #2		@ Undo decrement
 	addeq	r7, r7, r6, lsl #2		@ Undo decrement
 	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
 	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
+	ldr	r9, [sp], #4
 	b	do_DataAbort
 	b	do_DataAbort
 
 
 .data_arm_lateldrhpre:
 .data_arm_lateldrhpre:
 	tst	r8, #1 << 21			@ Check writeback bit
 	tst	r8, #1 << 21			@ Check writeback bit
 	beq	do_DataAbort			@ No writeback -> no fixup
 	beq	do_DataAbort			@ No writeback -> no fixup
 .data_arm_lateldrhpost:
 .data_arm_lateldrhpost:
+	str	r9, [sp, #-4]!
 	and	r9, r8, #0x00f			@ get Rm / low nibble of immediate value
 	and	r9, r8, #0x00f			@ get Rm / low nibble of immediate value
 	tst	r8, #1 << 22			@ if (immediate offset)
 	tst	r8, #1 << 22			@ if (immediate offset)
 	andne	r6, r8, #0xf00			@ { immediate high nibble
 	andne	r6, r8, #0xf00			@ { immediate high nibble
@@ -93,6 +99,7 @@ ENTRY(v4t_late_abort)
 	subne	r7, r7, r6			@ Undo incrmenet
 	subne	r7, r7, r6			@ Undo incrmenet
 	addeq	r7, r7, r6			@ Undo decrement
 	addeq	r7, r7, r6			@ Undo decrement
 	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
 	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
+	ldr	r9, [sp], #4
 	b	do_DataAbort
 	b	do_DataAbort
 
 
 .data_arm_lateldrpreconst:
 .data_arm_lateldrpreconst:
@@ -101,12 +108,14 @@ ENTRY(v4t_late_abort)
 .data_arm_lateldrpostconst:
 .data_arm_lateldrpostconst:
 	movs	r6, r8, lsl #20			@ Get offset
 	movs	r6, r8, lsl #20			@ Get offset
 	beq	do_DataAbort			@ zero -> no fixup
 	beq	do_DataAbort			@ zero -> no fixup
+	str	r9, [sp, #-4]!
 	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
 	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
 	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
 	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
 	tst	r8, #1 << 23			@ Check U bit
 	tst	r8, #1 << 23			@ Check U bit
 	subne	r7, r7, r6, lsr #20		@ Undo increment
 	subne	r7, r7, r6, lsr #20		@ Undo increment
 	addeq	r7, r7, r6, lsr #20		@ Undo decrement
 	addeq	r7, r7, r6, lsr #20		@ Undo decrement
 	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
 	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
+	ldr	r9, [sp], #4
 	b	do_DataAbort
 	b	do_DataAbort
 
 
 .data_arm_lateldrprereg:
 .data_arm_lateldrprereg:
@@ -115,6 +124,7 @@ ENTRY(v4t_late_abort)
 .data_arm_lateldrpostreg:
 .data_arm_lateldrpostreg:
 	and	r7, r8, #15			@ Extract 'm' from instruction
 	and	r7, r8, #15			@ Extract 'm' from instruction
 	ldr	r6, [r2, r7, lsl #2]		@ Get register 'Rm'
 	ldr	r6, [r2, r7, lsl #2]		@ Get register 'Rm'
+	str	r9, [sp, #-4]!
 	mov	r9, r8, lsr #7			@ get shift count
 	mov	r9, r8, lsr #7			@ get shift count
 	ands	r9, r9, #31
 	ands	r9, r9, #31
 	and	r7, r8, #0x70			@ get shift type
 	and	r7, r8, #0x70			@ get shift type
@@ -126,33 +136,33 @@ ENTRY(v4t_late_abort)
 	b	.data_arm_apply_r6_and_rn
 	b	.data_arm_apply_r6_and_rn
 	b	.data_arm_apply_r6_and_rn	@ 1: LSL #0
 	b	.data_arm_apply_r6_and_rn	@ 1: LSL #0
 	nop
 	nop
-	b	.data_unknown			@ 2: MUL?
+	b	.data_unknown_r9		@ 2: MUL?
 	nop
 	nop
-	b	.data_unknown			@ 3: MUL?
+	b	.data_unknown_r9		@ 3: MUL?
 	nop
 	nop
 	mov	r6, r6, lsr r9			@ 4: LSR #!0
 	mov	r6, r6, lsr r9			@ 4: LSR #!0
 	b	.data_arm_apply_r6_and_rn
 	b	.data_arm_apply_r6_and_rn
 	mov	r6, r6, lsr #32			@ 5: LSR #32
 	mov	r6, r6, lsr #32			@ 5: LSR #32
 	b	.data_arm_apply_r6_and_rn
 	b	.data_arm_apply_r6_and_rn
-	b	.data_unknown			@ 6: MUL?
+	b	.data_unknown_r9		@ 6: MUL?
 	nop
 	nop
-	b	.data_unknown			@ 7: MUL?
+	b	.data_unknown_r9		@ 7: MUL?
 	nop
 	nop
 	mov	r6, r6, asr r9			@ 8: ASR #!0
 	mov	r6, r6, asr r9			@ 8: ASR #!0
 	b	.data_arm_apply_r6_and_rn
 	b	.data_arm_apply_r6_and_rn
 	mov	r6, r6, asr #32			@ 9: ASR #32
 	mov	r6, r6, asr #32			@ 9: ASR #32
 	b	.data_arm_apply_r6_and_rn
 	b	.data_arm_apply_r6_and_rn
-	b	.data_unknown			@ A: MUL?
+	b	.data_unknown_r9		@ A: MUL?
 	nop
 	nop
-	b	.data_unknown			@ B: MUL?
+	b	.data_unknown_r9		@ B: MUL?
 	nop
 	nop
 	mov	r6, r6, ror r9			@ C: ROR #!0
 	mov	r6, r6, ror r9			@ C: ROR #!0
 	b	.data_arm_apply_r6_and_rn
 	b	.data_arm_apply_r6_and_rn
 	mov	r6, r6, rrx			@ D: RRX
 	mov	r6, r6, rrx			@ D: RRX
 	b	.data_arm_apply_r6_and_rn
 	b	.data_arm_apply_r6_and_rn
-	b	.data_unknown			@ E: MUL?
+	b	.data_unknown_r9		@ E: MUL?
 	nop
 	nop
-	b	.data_unknown			@ F: MUL?
+	b	.data_unknown_r9		@ F: MUL?
 
 
 .data_thumb_abort:
 .data_thumb_abort:
 	ldrh	r8, [r4]			@ read instruction
 	ldrh	r8, [r4]			@ read instruction
@@ -190,6 +200,7 @@ ENTRY(v4t_late_abort)
 .data_thumb_pushpop:
 .data_thumb_pushpop:
 	tst	r8, #1 << 10
 	tst	r8, #1 << 10
 	beq	.data_unknown
 	beq	.data_unknown
+	str	r9, [sp, #-4]!
 	and	r6, r8, #0x55			@ hweight8(r8) + R bit
 	and	r6, r8, #0x55			@ hweight8(r8) + R bit
 	and	r9, r8, #0xaa
 	and	r9, r8, #0xaa
 	add	r6, r6, r9, lsr #1
 	add	r6, r6, r9, lsr #1
@@ -204,9 +215,11 @@ ENTRY(v4t_late_abort)
 	addeq	r7, r7, r6, lsl #2		@ increment SP if PUSH
 	addeq	r7, r7, r6, lsl #2		@ increment SP if PUSH
 	subne	r7, r7, r6, lsl #2		@ decrement SP if POP
 	subne	r7, r7, r6, lsl #2		@ decrement SP if POP
 	str	r7, [r2, #13 << 2]
 	str	r7, [r2, #13 << 2]
+	ldr	r9, [sp], #4
 	b	do_DataAbort
 	b	do_DataAbort
 
 
 .data_thumb_ldmstm:
 .data_thumb_ldmstm:
+	str	r9, [sp, #-4]!
 	and	r6, r8, #0x55			@ hweight8(r8)
 	and	r6, r8, #0x55			@ hweight8(r8)
 	and	r9, r8, #0xaa
 	and	r9, r8, #0xaa
 	add	r6, r6, r9, lsr #1
 	add	r6, r6, r9, lsr #1
@@ -219,4 +232,5 @@ ENTRY(v4t_late_abort)
 	and	r6, r6, #15			@ number of regs to transfer
 	and	r6, r6, #15			@ number of regs to transfer
 	sub	r7, r7, r6, lsl #2		@ always decrement
 	sub	r7, r7, r6, lsl #2		@ always decrement
 	str	r7, [r2, r9, lsr #6]
 	str	r7, [r2, r9, lsr #6]
+	ldr	r9, [sp], #4
 	b	do_DataAbort
 	b	do_DataAbort

+ 5 - 2
arch/arm64/boot/dts/rockchip/rk3399.dtsi

@@ -300,8 +300,11 @@
 		ranges = <0x83000000 0x0 0xfa000000 0x0 0xfa000000 0x0 0x600000
 		ranges = <0x83000000 0x0 0xfa000000 0x0 0xfa000000 0x0 0x600000
 			  0x81000000 0x0 0xfa600000 0x0 0xfa600000 0x0 0x100000>;
 			  0x81000000 0x0 0xfa600000 0x0 0xfa600000 0x0 0x100000>;
 		resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>,
 		resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>,
-			 <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE>;
-		reset-names = "core", "mgmt", "mgmt-sticky", "pipe";
+			 <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE>,
+			 <&cru SRST_PCIE_PM>, <&cru SRST_P_PCIE>,
+			 <&cru SRST_A_PCIE>;
+		reset-names = "core", "mgmt", "mgmt-sticky", "pipe",
+			      "pm", "pclk", "aclk";
 		status = "disabled";
 		status = "disabled";
 
 
 		pcie0_intc: interrupt-controller {
 		pcie0_intc: interrupt-controller {

+ 1 - 1
arch/arm64/include/asm/alternative.h

@@ -1,7 +1,7 @@
 #ifndef __ASM_ALTERNATIVE_H
 #ifndef __ASM_ALTERNATIVE_H
 #define __ASM_ALTERNATIVE_H
 #define __ASM_ALTERNATIVE_H
 
 
-#include <asm/cpufeature.h>
+#include <asm/cpucaps.h>
 #include <asm/insn.h>
 #include <asm/insn.h>
 
 
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__

+ 40 - 0
arch/arm64/include/asm/cpucaps.h

@@ -0,0 +1,40 @@
+/*
+ * arch/arm64/include/asm/cpucaps.h
+ *
+ * Copyright (C) 2016 ARM Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CPUCAPS_H
+#define __ASM_CPUCAPS_H
+
+#define ARM64_WORKAROUND_CLEAN_CACHE		0
+#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE	1
+#define ARM64_WORKAROUND_845719			2
+#define ARM64_HAS_SYSREG_GIC_CPUIF		3
+#define ARM64_HAS_PAN				4
+#define ARM64_HAS_LSE_ATOMICS			5
+#define ARM64_WORKAROUND_CAVIUM_23154		6
+#define ARM64_WORKAROUND_834220			7
+#define ARM64_HAS_NO_HW_PREFETCH		8
+#define ARM64_HAS_UAO				9
+#define ARM64_ALT_PAN_NOT_UAO			10
+#define ARM64_HAS_VIRT_HOST_EXTN		11
+#define ARM64_WORKAROUND_CAVIUM_27456		12
+#define ARM64_HAS_32BIT_EL0			13
+#define ARM64_HYP_OFFSET_LOW			14
+#define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
+
+#define ARM64_NCAPS				16
+
+#endif /* __ASM_CPUCAPS_H */

+ 1 - 19
arch/arm64/include/asm/cpufeature.h

@@ -11,6 +11,7 @@
 
 
 #include <linux/jump_label.h>
 #include <linux/jump_label.h>
 
 
+#include <asm/cpucaps.h>
 #include <asm/hwcap.h>
 #include <asm/hwcap.h>
 #include <asm/sysreg.h>
 #include <asm/sysreg.h>
 
 
@@ -24,25 +25,6 @@
 #define MAX_CPU_FEATURES	(8 * sizeof(elf_hwcap))
 #define MAX_CPU_FEATURES	(8 * sizeof(elf_hwcap))
 #define cpu_feature(x)		ilog2(HWCAP_ ## x)
 #define cpu_feature(x)		ilog2(HWCAP_ ## x)
 
 
-#define ARM64_WORKAROUND_CLEAN_CACHE		0
-#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE	1
-#define ARM64_WORKAROUND_845719			2
-#define ARM64_HAS_SYSREG_GIC_CPUIF		3
-#define ARM64_HAS_PAN				4
-#define ARM64_HAS_LSE_ATOMICS			5
-#define ARM64_WORKAROUND_CAVIUM_23154		6
-#define ARM64_WORKAROUND_834220			7
-#define ARM64_HAS_NO_HW_PREFETCH		8
-#define ARM64_HAS_UAO				9
-#define ARM64_ALT_PAN_NOT_UAO			10
-#define ARM64_HAS_VIRT_HOST_EXTN		11
-#define ARM64_WORKAROUND_CAVIUM_27456		12
-#define ARM64_HAS_32BIT_EL0			13
-#define ARM64_HYP_OFFSET_LOW			14
-#define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
-
-#define ARM64_NCAPS				16
-
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__
 
 
 #include <linux/kernel.h>
 #include <linux/kernel.h>

+ 1 - 0
arch/arm64/include/asm/kvm_asm.h

@@ -54,6 +54,7 @@ extern char __kvm_hyp_vector[];
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
 
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
 

+ 3 - 0
arch/arm64/include/asm/kvm_host.h

@@ -62,6 +62,9 @@ struct kvm_arch {
 	/* VTTBR value associated with above pgd and vmid */
 	/* VTTBR value associated with above pgd and vmid */
 	u64    vttbr;
 	u64    vttbr;
 
 
+	/* The last vcpu id that ran on each physical CPU */
+	int __percpu *last_vcpu_ran;
+
 	/* The maximum number of vCPUs depends on the used GIC model */
 	/* The maximum number of vCPUs depends on the used GIC model */
 	int max_vcpus;
 	int max_vcpus;
 
 

+ 1 - 1
arch/arm64/include/asm/kvm_mmu.h

@@ -128,7 +128,7 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
 	return v;
 	return v;
 }
 }
 
 
-#define kern_hyp_va(v) 	(typeof(v))(__kern_hyp_va((unsigned long)(v)))
+#define kern_hyp_va(v) 	((typeof(v))(__kern_hyp_va((unsigned long)(v))))
 
 
 /*
 /*
  * We currently only support a 40bit IPA.
  * We currently only support a 40bit IPA.

+ 0 - 1
arch/arm64/include/asm/lse.h

@@ -5,7 +5,6 @@
 
 
 #include <linux/stringify.h>
 #include <linux/stringify.h>
 #include <asm/alternative.h>
 #include <asm/alternative.h>
-#include <asm/cpufeature.h>
 
 
 #ifdef __ASSEMBLER__
 #ifdef __ASSEMBLER__
 
 

+ 15 - 0
arch/arm64/kvm/hyp/tlb.c

@@ -64,6 +64,21 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
 	write_sysreg(0, vttbr_el2);
 	write_sysreg(0, vttbr_el2);
 }
 }
 
 
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
+
+	/* Switch to requested VMID */
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	isb();
+
+	asm volatile("tlbi vmalle1" : : );
+	dsb(nsh);
+	isb();
+
+	write_sysreg(0, vttbr_el2);
+}
+
 void __hyp_text __kvm_flush_vm_context(void)
 void __hyp_text __kvm_flush_vm_context(void)
 {
 {
 	dsb(ishst);
 	dsb(ishst);

+ 1 - 1
arch/mips/Makefile

@@ -263,7 +263,7 @@ KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)
 
 
 bootvars-y	= VMLINUX_LOAD_ADDRESS=$(load-y) \
 bootvars-y	= VMLINUX_LOAD_ADDRESS=$(load-y) \
 		  VMLINUX_ENTRY_ADDRESS=$(entry-y) \
 		  VMLINUX_ENTRY_ADDRESS=$(entry-y) \
-		  PLATFORM=$(platform-y)
+		  PLATFORM="$(platform-y)"
 ifdef CONFIG_32BIT
 ifdef CONFIG_32BIT
 bootvars-y	+= ADDR_BITS=32
 bootvars-y	+= ADDR_BITS=32
 endif
 endif

+ 2 - 1
arch/mips/boot/dts/mti/malta.dts

@@ -84,12 +84,13 @@
 	fpga_regs: system-controller@1f000000 {
 	fpga_regs: system-controller@1f000000 {
 		compatible = "mti,malta-fpga", "syscon", "simple-mfd";
 		compatible = "mti,malta-fpga", "syscon", "simple-mfd";
 		reg = <0x1f000000 0x1000>;
 		reg = <0x1f000000 0x1000>;
+		native-endian;
 
 
 		reboot {
 		reboot {
 			compatible = "syscon-reboot";
 			compatible = "syscon-reboot";
 			regmap = <&fpga_regs>;
 			regmap = <&fpga_regs>;
 			offset = <0x500>;
 			offset = <0x500>;
-			mask = <0x4d>;
+			mask = <0x42>;
 		};
 		};
 	};
 	};
 
 

+ 10 - 6
arch/mips/generic/init.c

@@ -29,10 +29,20 @@ static __initdata const struct mips_machine *mach;
 static __initdata const void *mach_match_data;
 static __initdata const void *mach_match_data;
 
 
 void __init prom_init(void)
 void __init prom_init(void)
+{
+	plat_get_fdt();
+	BUG_ON(!fdt);
+}
+
+void __init *plat_get_fdt(void)
 {
 {
 	const struct mips_machine *check_mach;
 	const struct mips_machine *check_mach;
 	const struct of_device_id *match;
 	const struct of_device_id *match;
 
 
+	if (fdt)
+		/* Already set up */
+		return (void *)fdt;
+
 	if ((fw_arg0 == -2) && !fdt_check_header((void *)fw_arg1)) {
 	if ((fw_arg0 == -2) && !fdt_check_header((void *)fw_arg1)) {
 		/*
 		/*
 		 * We booted using the UHI boot protocol, so we have been
 		 * We booted using the UHI boot protocol, so we have been
@@ -75,12 +85,6 @@ void __init prom_init(void)
 		/* Retrieve the machine's FDT */
 		/* Retrieve the machine's FDT */
 		fdt = mach->fdt;
 		fdt = mach->fdt;
 	}
 	}
-
-	BUG_ON(!fdt);
-}
-
-void __init *plat_get_fdt(void)
-{
 	return (void *)fdt;
 	return (void *)fdt;
 }
 }
 
 

+ 13 - 0
arch/mips/include/asm/fpu_emulator.h

@@ -63,6 +63,8 @@ do {									\
 extern int fpu_emulator_cop1Handler(struct pt_regs *xcp,
 extern int fpu_emulator_cop1Handler(struct pt_regs *xcp,
 				    struct mips_fpu_struct *ctx, int has_fpu,
 				    struct mips_fpu_struct *ctx, int has_fpu,
 				    void *__user *fault_addr);
 				    void *__user *fault_addr);
+void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr,
+		     struct task_struct *tsk);
 int process_fpemu_return(int sig, void __user *fault_addr,
 int process_fpemu_return(int sig, void __user *fault_addr,
 			 unsigned long fcr31);
 			 unsigned long fcr31);
 int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
@@ -81,4 +83,15 @@ static inline void fpu_emulator_init_fpu(void)
 		set_fpr64(&t->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
 		set_fpr64(&t->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
 }
 }
 
 
+/*
+ * Mask the FCSR Cause bits according to the Enable bits, observing
+ * that Unimplemented is always enabled.
+ */
+static inline unsigned long mask_fcr31_x(unsigned long fcr31)
+{
+	return fcr31 & (FPU_CSR_UNI_X |
+			((fcr31 & FPU_CSR_ALL_E) <<
+			 (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E))));
+}
+
 #endif /* _ASM_FPU_EMULATOR_H */
 #endif /* _ASM_FPU_EMULATOR_H */

+ 4 - 3
arch/mips/include/asm/kvm_host.h

@@ -293,7 +293,10 @@ struct kvm_vcpu_arch {
 	/* Host KSEG0 address of the EI/DI offset */
 	/* Host KSEG0 address of the EI/DI offset */
 	void *kseg0_commpage;
 	void *kseg0_commpage;
 
 
-	u32 io_gpr;		/* GPR used as IO source/target */
+	/* Resume PC after MMIO completion */
+	unsigned long io_pc;
+	/* GPR used as IO source/target */
+	u32 io_gpr;
 
 
 	struct hrtimer comparecount_timer;
 	struct hrtimer comparecount_timer;
 	/* Count timer control KVM register */
 	/* Count timer control KVM register */
@@ -315,8 +318,6 @@ struct kvm_vcpu_arch {
 	/* Bitmask of pending exceptions to be cleared */
 	/* Bitmask of pending exceptions to be cleared */
 	unsigned long pending_exceptions_clr;
 	unsigned long pending_exceptions_clr;
 
 
-	u32 pending_load_cause;
-
 	/* Save/Restore the entryhi register when are are preempted/scheduled back in */
 	/* Save/Restore the entryhi register when are are preempted/scheduled back in */
 	unsigned long preempt_entryhi;
 	unsigned long preempt_entryhi;
 
 

+ 18 - 0
arch/mips/include/asm/switch_to.h

@@ -75,6 +75,22 @@ do {	if (cpu_has_rw_llb) {						\
 	}								\
 	}								\
 } while (0)
 } while (0)
 
 
+/*
+ * Check FCSR for any unmasked exceptions pending set with `ptrace',
+ * clear them and send a signal.
+ */
+#define __sanitize_fcr31(next)						\
+do {									\
+	unsigned long fcr31 = mask_fcr31_x(next->thread.fpu.fcr31);	\
+	void __user *pc;						\
+									\
+	if (unlikely(fcr31)) {						\
+		pc = (void __user *)task_pt_regs(next)->cp0_epc;	\
+		next->thread.fpu.fcr31 &= ~fcr31;			\
+		force_fcr31_sig(fcr31, pc, next);			\
+	}								\
+} while (0)
+
 /*
 /*
  * For newly created kernel threads switch_to() will return to
  * For newly created kernel threads switch_to() will return to
  * ret_from_kernel_thread, newly created user threads to ret_from_fork.
  * ret_from_kernel_thread, newly created user threads to ret_from_fork.
@@ -85,6 +101,8 @@ do {	if (cpu_has_rw_llb) {						\
 do {									\
 do {									\
 	__mips_mt_fpaff_switch_to(prev);				\
 	__mips_mt_fpaff_switch_to(prev);				\
 	lose_fpu_inatomic(1, prev);					\
 	lose_fpu_inatomic(1, prev);					\
+	if (tsk_used_math(next))					\
+		__sanitize_fcr31(next);					\
 	if (cpu_has_dsp) {						\
 	if (cpu_has_dsp) {						\
 		__save_dsp(prev);					\
 		__save_dsp(prev);					\
 		__restore_dsp(next);					\
 		__restore_dsp(next);					\

+ 10 - 1
arch/mips/kernel/mips-cpc.c

@@ -21,6 +21,11 @@ static DEFINE_PER_CPU_ALIGNED(spinlock_t, cpc_core_lock);
 
 
 static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
 static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
 
 
+phys_addr_t __weak mips_cpc_default_phys_base(void)
+{
+	return 0;
+}
+
 /**
 /**
  * mips_cpc_phys_base - retrieve the physical base address of the CPC
  * mips_cpc_phys_base - retrieve the physical base address of the CPC
  *
  *
@@ -43,8 +48,12 @@ static phys_addr_t mips_cpc_phys_base(void)
 	if (cpc_base & CM_GCR_CPC_BASE_CPCEN_MSK)
 	if (cpc_base & CM_GCR_CPC_BASE_CPCEN_MSK)
 		return cpc_base & CM_GCR_CPC_BASE_CPCBASE_MSK;
 		return cpc_base & CM_GCR_CPC_BASE_CPCBASE_MSK;
 
 
-	/* Otherwise, give it the default address & enable it */
+	/* Otherwise, use the default address */
 	cpc_base = mips_cpc_default_phys_base();
 	cpc_base = mips_cpc_default_phys_base();
+	if (!cpc_base)
+		return cpc_base;
+
+	/* Enable the CPC, mapped at the default address */
 	write_gcr_cpc_base(cpc_base | CM_GCR_CPC_BASE_CPCEN_MSK);
 	write_gcr_cpc_base(cpc_base | CM_GCR_CPC_BASE_CPCEN_MSK);
 	return cpc_base;
 	return cpc_base;
 }
 }

+ 5 - 5
arch/mips/kernel/mips-r2-to-r6-emul.c

@@ -899,7 +899,7 @@ static inline int mipsr2_find_op_func(struct pt_regs *regs, u32 inst,
  * mipsr2_decoder: Decode and emulate a MIPS R2 instruction
  * mipsr2_decoder: Decode and emulate a MIPS R2 instruction
  * @regs: Process register set
  * @regs: Process register set
  * @inst: Instruction to decode and emulate
  * @inst: Instruction to decode and emulate
- * @fcr31: Floating Point Control and Status Register returned
+ * @fcr31: Floating Point Control and Status Register Cause bits returned
  */
  */
 int mipsr2_decoder(struct pt_regs *regs, u32 inst, unsigned long *fcr31)
 int mipsr2_decoder(struct pt_regs *regs, u32 inst, unsigned long *fcr31)
 {
 {
@@ -1172,13 +1172,13 @@ fpu_emul:
 
 
 		err = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 0,
 		err = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 0,
 					       &fault_addr);
 					       &fault_addr);
-		*fcr31 = current->thread.fpu.fcr31;
 
 
 		/*
 		/*
-		 * We can't allow the emulated instruction to leave any of
-		 * the cause bits set in $fcr31.
+		 * We can't allow the emulated instruction to leave any
+		 * enabled Cause bits set in $fcr31.
 		 */
 		 */
-		current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+		*fcr31 = res = mask_fcr31_x(current->thread.fpu.fcr31);
+		current->thread.fpu.fcr31 &= ~res;
 
 
 		/*
 		/*
 		 * this is a tricky issue - lose_fpu() uses LL/SC atomics
 		 * this is a tricky issue - lose_fpu() uses LL/SC atomics

+ 4 - 4
arch/mips/kernel/ptrace.c

@@ -79,16 +79,15 @@ void ptrace_disable(struct task_struct *child)
 }
 }
 
 
 /*
 /*
- * Poke at FCSR according to its mask.  Don't set the cause bits as
- * this is currently not handled correctly in FP context restoration
- * and will cause an oops if a corresponding enable bit is set.
+ * Poke at FCSR according to its mask.  Set the Cause bits even
+ * if a corresponding Enable bit is set.  This will be noticed at
+ * the time the thread is switched to and SIGFPE thrown accordingly.
  */
  */
 static void ptrace_setfcr31(struct task_struct *child, u32 value)
 static void ptrace_setfcr31(struct task_struct *child, u32 value)
 {
 {
 	u32 fcr31;
 	u32 fcr31;
 	u32 mask;
 	u32 mask;
 
 
-	value &= ~FPU_CSR_ALL_X;
 	fcr31 = child->thread.fpu.fcr31;
 	fcr31 = child->thread.fpu.fcr31;
 	mask = boot_cpu_data.fpu_msk31;
 	mask = boot_cpu_data.fpu_msk31;
 	child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
 	child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
@@ -817,6 +816,7 @@ long arch_ptrace(struct task_struct *child, long request,
 			break;
 			break;
 #endif
 #endif
 		case FPC_CSR:
 		case FPC_CSR:
+			init_fp_ctx(child);
 			ptrace_setfcr31(child, data);
 			ptrace_setfcr31(child, data);
 			break;
 			break;
 		case DSP_BASE ... DSP_BASE + 5: {
 		case DSP_BASE ... DSP_BASE + 5: {

+ 58 - 80
arch/mips/kernel/r2300_fpu.S

@@ -19,108 +19,86 @@
 #include <asm/regdef.h>
 #include <asm/regdef.h>
 
 
 #define EX(a,b)							\
 #define EX(a,b)							\
+9:	a,##b;							\
+	.section __ex_table,"a";				\
+	PTR	9b,fault;					\
+	.previous
+
+#define EX2(a,b)						\
 9:	a,##b;							\
 9:	a,##b;							\
 	.section __ex_table,"a";				\
 	.section __ex_table,"a";				\
 	PTR	9b,bad_stack;					\
 	PTR	9b,bad_stack;					\
+	PTR	9b+4,bad_stack;					\
 	.previous
 	.previous
 
 
 	.set	noreorder
 	.set	noreorder
 	.set	mips1
 	.set	mips1
-	/* Save floating point context */
+
+/**
+ * _save_fp_context() - save FP context from the FPU
+ * @a0 - pointer to fpregs field of sigcontext
+ * @a1 - pointer to fpc_csr field of sigcontext
+ *
+ * Save FP context, including the 32 FP data registers and the FP
+ * control & status register, from the FPU to signal context.
+ */
 LEAF(_save_fp_context)
 LEAF(_save_fp_context)
 	.set	push
 	.set	push
 	SET_HARDFLOAT
 	SET_HARDFLOAT
 	li	v0, 0					# assume success
 	li	v0, 0					# assume success
-	cfc1	t1,fcr31
-	EX(swc1 $f0,(SC_FPREGS+0)(a0))
-	EX(swc1 $f1,(SC_FPREGS+8)(a0))
-	EX(swc1 $f2,(SC_FPREGS+16)(a0))
-	EX(swc1 $f3,(SC_FPREGS+24)(a0))
-	EX(swc1 $f4,(SC_FPREGS+32)(a0))
-	EX(swc1 $f5,(SC_FPREGS+40)(a0))
-	EX(swc1 $f6,(SC_FPREGS+48)(a0))
-	EX(swc1 $f7,(SC_FPREGS+56)(a0))
-	EX(swc1 $f8,(SC_FPREGS+64)(a0))
-	EX(swc1 $f9,(SC_FPREGS+72)(a0))
-	EX(swc1 $f10,(SC_FPREGS+80)(a0))
-	EX(swc1 $f11,(SC_FPREGS+88)(a0))
-	EX(swc1 $f12,(SC_FPREGS+96)(a0))
-	EX(swc1 $f13,(SC_FPREGS+104)(a0))
-	EX(swc1 $f14,(SC_FPREGS+112)(a0))
-	EX(swc1 $f15,(SC_FPREGS+120)(a0))
-	EX(swc1 $f16,(SC_FPREGS+128)(a0))
-	EX(swc1 $f17,(SC_FPREGS+136)(a0))
-	EX(swc1 $f18,(SC_FPREGS+144)(a0))
-	EX(swc1 $f19,(SC_FPREGS+152)(a0))
-	EX(swc1 $f20,(SC_FPREGS+160)(a0))
-	EX(swc1 $f21,(SC_FPREGS+168)(a0))
-	EX(swc1 $f22,(SC_FPREGS+176)(a0))
-	EX(swc1 $f23,(SC_FPREGS+184)(a0))
-	EX(swc1 $f24,(SC_FPREGS+192)(a0))
-	EX(swc1 $f25,(SC_FPREGS+200)(a0))
-	EX(swc1 $f26,(SC_FPREGS+208)(a0))
-	EX(swc1 $f27,(SC_FPREGS+216)(a0))
-	EX(swc1 $f28,(SC_FPREGS+224)(a0))
-	EX(swc1 $f29,(SC_FPREGS+232)(a0))
-	EX(swc1 $f30,(SC_FPREGS+240)(a0))
-	EX(swc1 $f31,(SC_FPREGS+248)(a0))
-	EX(sw	t1,(SC_FPC_CSR)(a0))
-	cfc1	t0,$0				# implementation/version
+	cfc1	t1, fcr31
+	EX2(s.d $f0, 0(a0))
+	EX2(s.d $f2, 16(a0))
+	EX2(s.d $f4, 32(a0))
+	EX2(s.d $f6, 48(a0))
+	EX2(s.d $f8, 64(a0))
+	EX2(s.d $f10, 80(a0))
+	EX2(s.d $f12, 96(a0))
+	EX2(s.d $f14, 112(a0))
+	EX2(s.d $f16, 128(a0))
+	EX2(s.d $f18, 144(a0))
+	EX2(s.d $f20, 160(a0))
+	EX2(s.d $f22, 176(a0))
+	EX2(s.d $f24, 192(a0))
+	EX2(s.d $f26, 208(a0))
+	EX2(s.d $f28, 224(a0))
+	EX2(s.d $f30, 240(a0))
 	jr	ra
 	jr	ra
+	 EX(sw	t1, (a1))
 	.set	pop
 	.set	pop
-	.set	nomacro
-	 EX(sw	t0,(SC_FPC_EIR)(a0))
-	.set	macro
 	END(_save_fp_context)
 	END(_save_fp_context)
 
 
-/*
- * Restore FPU state:
- *  - fp gp registers
- *  - cp1 status/control register
+/**
+ * _restore_fp_context() - restore FP context to the FPU
+ * @a0 - pointer to fpregs field of sigcontext
+ * @a1 - pointer to fpc_csr field of sigcontext
  *
  *
- * We base the decision which registers to restore from the signal stack
- * frame on the current content of c0_status, not on the content of the
- * stack frame which might have been changed by the user.
+ * Restore FP context, including the 32 FP data registers and the FP
+ * control & status register, from signal context to the FPU.
  */
  */
 LEAF(_restore_fp_context)
 LEAF(_restore_fp_context)
 	.set	push
 	.set	push
 	SET_HARDFLOAT
 	SET_HARDFLOAT
 	li	v0, 0					# assume success
 	li	v0, 0					# assume success
-	EX(lw t0,(SC_FPC_CSR)(a0))
-	EX(lwc1 $f0,(SC_FPREGS+0)(a0))
-	EX(lwc1 $f1,(SC_FPREGS+8)(a0))
-	EX(lwc1 $f2,(SC_FPREGS+16)(a0))
-	EX(lwc1 $f3,(SC_FPREGS+24)(a0))
-	EX(lwc1 $f4,(SC_FPREGS+32)(a0))
-	EX(lwc1 $f5,(SC_FPREGS+40)(a0))
-	EX(lwc1 $f6,(SC_FPREGS+48)(a0))
-	EX(lwc1 $f7,(SC_FPREGS+56)(a0))
-	EX(lwc1 $f8,(SC_FPREGS+64)(a0))
-	EX(lwc1 $f9,(SC_FPREGS+72)(a0))
-	EX(lwc1 $f10,(SC_FPREGS+80)(a0))
-	EX(lwc1 $f11,(SC_FPREGS+88)(a0))
-	EX(lwc1 $f12,(SC_FPREGS+96)(a0))
-	EX(lwc1 $f13,(SC_FPREGS+104)(a0))
-	EX(lwc1 $f14,(SC_FPREGS+112)(a0))
-	EX(lwc1 $f15,(SC_FPREGS+120)(a0))
-	EX(lwc1 $f16,(SC_FPREGS+128)(a0))
-	EX(lwc1 $f17,(SC_FPREGS+136)(a0))
-	EX(lwc1 $f18,(SC_FPREGS+144)(a0))
-	EX(lwc1 $f19,(SC_FPREGS+152)(a0))
-	EX(lwc1 $f20,(SC_FPREGS+160)(a0))
-	EX(lwc1 $f21,(SC_FPREGS+168)(a0))
-	EX(lwc1 $f22,(SC_FPREGS+176)(a0))
-	EX(lwc1 $f23,(SC_FPREGS+184)(a0))
-	EX(lwc1 $f24,(SC_FPREGS+192)(a0))
-	EX(lwc1 $f25,(SC_FPREGS+200)(a0))
-	EX(lwc1 $f26,(SC_FPREGS+208)(a0))
-	EX(lwc1 $f27,(SC_FPREGS+216)(a0))
-	EX(lwc1 $f28,(SC_FPREGS+224)(a0))
-	EX(lwc1 $f29,(SC_FPREGS+232)(a0))
-	EX(lwc1 $f30,(SC_FPREGS+240)(a0))
-	EX(lwc1 $f31,(SC_FPREGS+248)(a0))
+	EX(lw t0, (a1))
+	EX2(l.d $f0, 0(a0))
+	EX2(l.d $f2, 16(a0))
+	EX2(l.d $f4, 32(a0))
+	EX2(l.d $f6, 48(a0))
+	EX2(l.d $f8, 64(a0))
+	EX2(l.d $f10, 80(a0))
+	EX2(l.d $f12, 96(a0))
+	EX2(l.d $f14, 112(a0))
+	EX2(l.d $f16, 128(a0))
+	EX2(l.d $f18, 144(a0))
+	EX2(l.d $f20, 160(a0))
+	EX2(l.d $f22, 176(a0))
+	EX2(l.d $f24, 192(a0))
+	EX2(l.d $f26, 208(a0))
+	EX2(l.d $f28, 224(a0))
+	EX2(l.d $f30, 240(a0))
 	jr	ra
 	jr	ra
-	 ctc1	t0,fcr31
+	 ctc1	t0, fcr31
 	.set	pop
 	.set	pop
 	END(_restore_fp_context)
 	END(_restore_fp_context)
 	.set	reorder
 	.set	reorder

+ 48 - 41
arch/mips/kernel/r6000_fpu.S

@@ -21,7 +21,14 @@
 	.set	push
 	.set	push
 	SET_HARDFLOAT
 	SET_HARDFLOAT
 
 
-	/* Save floating point context */
+/**
+ * _save_fp_context() - save FP context from the FPU
+ * @a0 - pointer to fpregs field of sigcontext
+ * @a1 - pointer to fpc_csr field of sigcontext
+ *
+ * Save FP context, including the 32 FP data registers and the FP
+ * control & status register, from the FPU to signal context.
+ */
 	LEAF(_save_fp_context)
 	LEAF(_save_fp_context)
 	mfc0	t0,CP0_STATUS
 	mfc0	t0,CP0_STATUS
 	sll	t0,t0,2
 	sll	t0,t0,2
@@ -30,59 +37,59 @@
 
 
 	cfc1	t1,fcr31
 	cfc1	t1,fcr31
 	/* Store the 16 double precision registers */
 	/* Store the 16 double precision registers */
-	sdc1	$f0,(SC_FPREGS+0)(a0)
-	sdc1	$f2,(SC_FPREGS+16)(a0)
-	sdc1	$f4,(SC_FPREGS+32)(a0)
-	sdc1	$f6,(SC_FPREGS+48)(a0)
-	sdc1	$f8,(SC_FPREGS+64)(a0)
-	sdc1	$f10,(SC_FPREGS+80)(a0)
-	sdc1	$f12,(SC_FPREGS+96)(a0)
-	sdc1	$f14,(SC_FPREGS+112)(a0)
-	sdc1	$f16,(SC_FPREGS+128)(a0)
-	sdc1	$f18,(SC_FPREGS+144)(a0)
-	sdc1	$f20,(SC_FPREGS+160)(a0)
-	sdc1	$f22,(SC_FPREGS+176)(a0)
-	sdc1	$f24,(SC_FPREGS+192)(a0)
-	sdc1	$f26,(SC_FPREGS+208)(a0)
-	sdc1	$f28,(SC_FPREGS+224)(a0)
-	sdc1	$f30,(SC_FPREGS+240)(a0)
+	sdc1	$f0,0(a0)
+	sdc1	$f2,16(a0)
+	sdc1	$f4,32(a0)
+	sdc1	$f6,48(a0)
+	sdc1	$f8,64(a0)
+	sdc1	$f10,80(a0)
+	sdc1	$f12,96(a0)
+	sdc1	$f14,112(a0)
+	sdc1	$f16,128(a0)
+	sdc1	$f18,144(a0)
+	sdc1	$f20,160(a0)
+	sdc1	$f22,176(a0)
+	sdc1	$f24,192(a0)
+	sdc1	$f26,208(a0)
+	sdc1	$f28,224(a0)
+	sdc1	$f30,240(a0)
 	jr	ra
 	jr	ra
-	 sw	t0,SC_FPC_CSR(a0)
+	 sw	t0,(a1)
 1:	jr	ra
 1:	jr	ra
 	 nop
 	 nop
 	END(_save_fp_context)
 	END(_save_fp_context)
 
 
-/* Restore FPU state:
- *  - fp gp registers
- *  - cp1 status/control register
+/**
+ * _restore_fp_context() - restore FP context to the FPU
+ * @a0 - pointer to fpregs field of sigcontext
+ * @a1 - pointer to fpc_csr field of sigcontext
  *
  *
- * We base the decision which registers to restore from the signal stack
- * frame on the current content of c0_status, not on the content of the
- * stack frame which might have been changed by the user.
+ * Restore FP context, including the 32 FP data registers and the FP
+ * control & status register, from signal context to the FPU.
  */
  */
 	LEAF(_restore_fp_context)
 	LEAF(_restore_fp_context)
 	mfc0	t0,CP0_STATUS
 	mfc0	t0,CP0_STATUS
 	sll	t0,t0,2
 	sll	t0,t0,2
 
 
 	bgez	t0,1f
 	bgez	t0,1f
-	 lw	t0,SC_FPC_CSR(a0)
+	 lw	t0,(a1)
 	/* Restore the 16 double precision registers */
 	/* Restore the 16 double precision registers */
-	ldc1	$f0,(SC_FPREGS+0)(a0)
-	ldc1	$f2,(SC_FPREGS+16)(a0)
-	ldc1	$f4,(SC_FPREGS+32)(a0)
-	ldc1	$f6,(SC_FPREGS+48)(a0)
-	ldc1	$f8,(SC_FPREGS+64)(a0)
-	ldc1	$f10,(SC_FPREGS+80)(a0)
-	ldc1	$f12,(SC_FPREGS+96)(a0)
-	ldc1	$f14,(SC_FPREGS+112)(a0)
-	ldc1	$f16,(SC_FPREGS+128)(a0)
-	ldc1	$f18,(SC_FPREGS+144)(a0)
-	ldc1	$f20,(SC_FPREGS+160)(a0)
-	ldc1	$f22,(SC_FPREGS+176)(a0)
-	ldc1	$f24,(SC_FPREGS+192)(a0)
-	ldc1	$f26,(SC_FPREGS+208)(a0)
-	ldc1	$f28,(SC_FPREGS+224)(a0)
-	ldc1	$f30,(SC_FPREGS+240)(a0)
+	ldc1	$f0,0(a0)
+	ldc1	$f2,16(a0)
+	ldc1	$f4,32(a0)
+	ldc1	$f6,48(a0)
+	ldc1	$f8,64(a0)
+	ldc1	$f10,80(a0)
+	ldc1	$f12,96(a0)
+	ldc1	$f14,112(a0)
+	ldc1	$f16,128(a0)
+	ldc1	$f18,144(a0)
+	ldc1	$f20,160(a0)
+	ldc1	$f22,176(a0)
+	ldc1	$f24,192(a0)
+	ldc1	$f26,208(a0)
+	ldc1	$f28,224(a0)
+	ldc1	$f30,240(a0)
 	jr	ra
 	jr	ra
 	 ctc1	t0,fcr31
 	 ctc1	t0,fcr31
 1:	jr	ra
 1:	jr	ra

+ 1 - 1
arch/mips/kernel/relocate.c

@@ -200,7 +200,7 @@ static inline __init unsigned long get_random_boot(void)
 
 
 #if defined(CONFIG_USE_OF)
 #if defined(CONFIG_USE_OF)
 	/* Get any additional entropy passed in device tree */
 	/* Get any additional entropy passed in device tree */
-	{
+	if (initial_boot_params) {
 		int node, len;
 		int node, len;
 		u64 *prop;
 		u64 *prop;
 
 

+ 13 - 0
arch/mips/kernel/setup.c

@@ -368,6 +368,19 @@ static void __init bootmem_init(void)
 		end = PFN_DOWN(boot_mem_map.map[i].addr
 		end = PFN_DOWN(boot_mem_map.map[i].addr
 				+ boot_mem_map.map[i].size);
 				+ boot_mem_map.map[i].size);
 
 
+#ifndef CONFIG_HIGHMEM
+		/*
+		 * Skip highmem here so we get an accurate max_low_pfn if low
+		 * memory stops short of high memory.
+		 * If the region overlaps HIGHMEM_START, end is clipped so
+		 * max_pfn excludes the highmem portion.
+		 */
+		if (start >= PFN_DOWN(HIGHMEM_START))
+			continue;
+		if (end > PFN_DOWN(HIGHMEM_START))
+			end = PFN_DOWN(HIGHMEM_START);
+#endif
+
 		if (end > max_low_pfn)
 		if (end > max_low_pfn)
 			max_low_pfn = end;
 			max_low_pfn = end;
 		if (start < min_low_pfn)
 		if (start < min_low_pfn)

+ 73 - 64
arch/mips/kernel/traps.c

@@ -156,7 +156,7 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs)
 		print_ip_sym(pc);
 		print_ip_sym(pc);
 		pc = unwind_stack(task, &sp, pc, &ra);
 		pc = unwind_stack(task, &sp, pc, &ra);
 	} while (pc);
 	} while (pc);
-	printk("\n");
+	pr_cont("\n");
 }
 }
 
 
 /*
 /*
@@ -174,22 +174,24 @@ static void show_stacktrace(struct task_struct *task,
 	printk("Stack :");
 	printk("Stack :");
 	i = 0;
 	i = 0;
 	while ((unsigned long) sp & (PAGE_SIZE - 1)) {
 	while ((unsigned long) sp & (PAGE_SIZE - 1)) {
-		if (i && ((i % (64 / field)) == 0))
-			printk("\n	 ");
+		if (i && ((i % (64 / field)) == 0)) {
+			pr_cont("\n");
+			printk("       ");
+		}
 		if (i > 39) {
 		if (i > 39) {
-			printk(" ...");
+			pr_cont(" ...");
 			break;
 			break;
 		}
 		}
 
 
 		if (__get_user(stackdata, sp++)) {
 		if (__get_user(stackdata, sp++)) {
-			printk(" (Bad stack address)");
+			pr_cont(" (Bad stack address)");
 			break;
 			break;
 		}
 		}
 
 
-		printk(" %0*lx", field, stackdata);
+		pr_cont(" %0*lx", field, stackdata);
 		i++;
 		i++;
 	}
 	}
-	printk("\n");
+	pr_cont("\n");
 	show_backtrace(task, regs);
 	show_backtrace(task, regs);
 }
 }
 
 
@@ -229,18 +231,19 @@ static void show_code(unsigned int __user *pc)
 	long i;
 	long i;
 	unsigned short __user *pc16 = NULL;
 	unsigned short __user *pc16 = NULL;
 
 
-	printk("\nCode:");
+	printk("Code:");
 
 
 	if ((unsigned long)pc & 1)
 	if ((unsigned long)pc & 1)
 		pc16 = (unsigned short __user *)((unsigned long)pc & ~1);
 		pc16 = (unsigned short __user *)((unsigned long)pc & ~1);
 	for(i = -3 ; i < 6 ; i++) {
 	for(i = -3 ; i < 6 ; i++) {
 		unsigned int insn;
 		unsigned int insn;
 		if (pc16 ? __get_user(insn, pc16 + i) : __get_user(insn, pc + i)) {
 		if (pc16 ? __get_user(insn, pc16 + i) : __get_user(insn, pc + i)) {
-			printk(" (Bad address in epc)\n");
+			pr_cont(" (Bad address in epc)\n");
 			break;
 			break;
 		}
 		}
-		printk("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>'));
+		pr_cont("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>'));
 	}
 	}
+	pr_cont("\n");
 }
 }
 
 
 static void __show_regs(const struct pt_regs *regs)
 static void __show_regs(const struct pt_regs *regs)
@@ -259,15 +262,15 @@ static void __show_regs(const struct pt_regs *regs)
 		if ((i % 4) == 0)
 		if ((i % 4) == 0)
 			printk("$%2d   :", i);
 			printk("$%2d   :", i);
 		if (i == 0)
 		if (i == 0)
-			printk(" %0*lx", field, 0UL);
+			pr_cont(" %0*lx", field, 0UL);
 		else if (i == 26 || i == 27)
 		else if (i == 26 || i == 27)
-			printk(" %*s", field, "");
+			pr_cont(" %*s", field, "");
 		else
 		else
-			printk(" %0*lx", field, regs->regs[i]);
+			pr_cont(" %0*lx", field, regs->regs[i]);
 
 
 		i++;
 		i++;
 		if ((i % 4) == 0)
 		if ((i % 4) == 0)
-			printk("\n");
+			pr_cont("\n");
 	}
 	}
 
 
 #ifdef CONFIG_CPU_HAS_SMARTMIPS
 #ifdef CONFIG_CPU_HAS_SMARTMIPS
@@ -288,46 +291,46 @@ static void __show_regs(const struct pt_regs *regs)
 
 
 	if (cpu_has_3kex) {
 	if (cpu_has_3kex) {
 		if (regs->cp0_status & ST0_KUO)
 		if (regs->cp0_status & ST0_KUO)
-			printk("KUo ");
+			pr_cont("KUo ");
 		if (regs->cp0_status & ST0_IEO)
 		if (regs->cp0_status & ST0_IEO)
-			printk("IEo ");
+			pr_cont("IEo ");
 		if (regs->cp0_status & ST0_KUP)
 		if (regs->cp0_status & ST0_KUP)
-			printk("KUp ");
+			pr_cont("KUp ");
 		if (regs->cp0_status & ST0_IEP)
 		if (regs->cp0_status & ST0_IEP)
-			printk("IEp ");
+			pr_cont("IEp ");
 		if (regs->cp0_status & ST0_KUC)
 		if (regs->cp0_status & ST0_KUC)
-			printk("KUc ");
+			pr_cont("KUc ");
 		if (regs->cp0_status & ST0_IEC)
 		if (regs->cp0_status & ST0_IEC)
-			printk("IEc ");
+			pr_cont("IEc ");
 	} else if (cpu_has_4kex) {
 	} else if (cpu_has_4kex) {
 		if (regs->cp0_status & ST0_KX)
 		if (regs->cp0_status & ST0_KX)
-			printk("KX ");
+			pr_cont("KX ");
 		if (regs->cp0_status & ST0_SX)
 		if (regs->cp0_status & ST0_SX)
-			printk("SX ");
+			pr_cont("SX ");
 		if (regs->cp0_status & ST0_UX)
 		if (regs->cp0_status & ST0_UX)
-			printk("UX ");
+			pr_cont("UX ");
 		switch (regs->cp0_status & ST0_KSU) {
 		switch (regs->cp0_status & ST0_KSU) {
 		case KSU_USER:
 		case KSU_USER:
-			printk("USER ");
+			pr_cont("USER ");
 			break;
 			break;
 		case KSU_SUPERVISOR:
 		case KSU_SUPERVISOR:
-			printk("SUPERVISOR ");
+			pr_cont("SUPERVISOR ");
 			break;
 			break;
 		case KSU_KERNEL:
 		case KSU_KERNEL:
-			printk("KERNEL ");
+			pr_cont("KERNEL ");
 			break;
 			break;
 		default:
 		default:
-			printk("BAD_MODE ");
+			pr_cont("BAD_MODE ");
 			break;
 			break;
 		}
 		}
 		if (regs->cp0_status & ST0_ERL)
 		if (regs->cp0_status & ST0_ERL)
-			printk("ERL ");
+			pr_cont("ERL ");
 		if (regs->cp0_status & ST0_EXL)
 		if (regs->cp0_status & ST0_EXL)
-			printk("EXL ");
+			pr_cont("EXL ");
 		if (regs->cp0_status & ST0_IE)
 		if (regs->cp0_status & ST0_IE)
-			printk("IE ");
+			pr_cont("IE ");
 	}
 	}
-	printk("\n");
+	pr_cont("\n");
 
 
 	exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE;
 	exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE;
 	printk("Cause : %08x (ExcCode %02x)\n", cause, exccode);
 	printk("Cause : %08x (ExcCode %02x)\n", cause, exccode);
@@ -705,6 +708,32 @@ asmlinkage void do_ov(struct pt_regs *regs)
 	exception_exit(prev_state);
 	exception_exit(prev_state);
 }
 }
 
 
+/*
+ * Send SIGFPE according to FCSR Cause bits, which must have already
+ * been masked against Enable bits.  This is impotant as Inexact can
+ * happen together with Overflow or Underflow, and `ptrace' can set
+ * any bits.
+ */
+void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr,
+		     struct task_struct *tsk)
+{
+	struct siginfo si = { .si_addr = fault_addr, .si_signo = SIGFPE };
+
+	if (fcr31 & FPU_CSR_INV_X)
+		si.si_code = FPE_FLTINV;
+	else if (fcr31 & FPU_CSR_DIV_X)
+		si.si_code = FPE_FLTDIV;
+	else if (fcr31 & FPU_CSR_OVF_X)
+		si.si_code = FPE_FLTOVF;
+	else if (fcr31 & FPU_CSR_UDF_X)
+		si.si_code = FPE_FLTUND;
+	else if (fcr31 & FPU_CSR_INE_X)
+		si.si_code = FPE_FLTRES;
+	else
+		si.si_code = __SI_FAULT;
+	force_sig_info(SIGFPE, &si, tsk);
+}
+
 int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
 int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
 {
 {
 	struct siginfo si = { 0 };
 	struct siginfo si = { 0 };
@@ -715,27 +744,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
 		return 0;
 		return 0;
 
 
 	case SIGFPE:
 	case SIGFPE:
-		si.si_addr = fault_addr;
-		si.si_signo = sig;
-		/*
-		 * Inexact can happen together with Overflow or Underflow.
-		 * Respect the mask to deliver the correct exception.
-		 */
-		fcr31 &= (fcr31 & FPU_CSR_ALL_E) <<
-			 (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E));
-		if (fcr31 & FPU_CSR_INV_X)
-			si.si_code = FPE_FLTINV;
-		else if (fcr31 & FPU_CSR_DIV_X)
-			si.si_code = FPE_FLTDIV;
-		else if (fcr31 & FPU_CSR_OVF_X)
-			si.si_code = FPE_FLTOVF;
-		else if (fcr31 & FPU_CSR_UDF_X)
-			si.si_code = FPE_FLTUND;
-		else if (fcr31 & FPU_CSR_INE_X)
-			si.si_code = FPE_FLTRES;
-		else
-			si.si_code = __SI_FAULT;
-		force_sig_info(sig, &si, current);
+		force_fcr31_sig(fcr31, fault_addr, current);
 		return 1;
 		return 1;
 
 
 	case SIGBUS:
 	case SIGBUS:
@@ -799,13 +808,13 @@ static int simulate_fp(struct pt_regs *regs, unsigned int opcode,
 	/* Run the emulator */
 	/* Run the emulator */
 	sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
 	sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
 				       &fault_addr);
 				       &fault_addr);
-	fcr31 = current->thread.fpu.fcr31;
 
 
 	/*
 	/*
-	 * We can't allow the emulated instruction to leave any of
-	 * the cause bits set in $fcr31.
+	 * We can't allow the emulated instruction to leave any
+	 * enabled Cause bits set in $fcr31.
 	 */
 	 */
-	current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+	fcr31 = mask_fcr31_x(current->thread.fpu.fcr31);
+	current->thread.fpu.fcr31 &= ~fcr31;
 
 
 	/* Restore the hardware register state */
 	/* Restore the hardware register state */
 	own_fpu(1);
 	own_fpu(1);
@@ -831,7 +840,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
 		goto out;
 		goto out;
 
 
 	/* Clear FCSR.Cause before enabling interrupts */
 	/* Clear FCSR.Cause before enabling interrupts */
-	write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X);
+	write_32bit_cp1_register(CP1_STATUS, fcr31 & ~mask_fcr31_x(fcr31));
 	local_irq_enable();
 	local_irq_enable();
 
 
 	die_if_kernel("FP exception in kernel code", regs);
 	die_if_kernel("FP exception in kernel code", regs);
@@ -853,13 +862,13 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
 		/* Run the emulator */
 		/* Run the emulator */
 		sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
 		sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
 					       &fault_addr);
 					       &fault_addr);
-		fcr31 = current->thread.fpu.fcr31;
 
 
 		/*
 		/*
-		 * We can't allow the emulated instruction to leave any of
-		 * the cause bits set in $fcr31.
+		 * We can't allow the emulated instruction to leave any
+		 * enabled Cause bits set in $fcr31.
 		 */
 		 */
-		current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+		fcr31 = mask_fcr31_x(current->thread.fpu.fcr31);
+		current->thread.fpu.fcr31 &= ~fcr31;
 
 
 		/* Restore the hardware register state */
 		/* Restore the hardware register state */
 		own_fpu(1);	/* Using the FPU again.	 */
 		own_fpu(1);	/* Using the FPU again.	 */
@@ -1424,13 +1433,13 @@ asmlinkage void do_cpu(struct pt_regs *regs)
 
 
 		sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 0,
 		sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 0,
 					       &fault_addr);
 					       &fault_addr);
-		fcr31 = current->thread.fpu.fcr31;
 
 
 		/*
 		/*
 		 * We can't allow the emulated instruction to leave
 		 * We can't allow the emulated instruction to leave
-		 * any of the cause bits set in $fcr31.
+		 * any enabled Cause bits set in $fcr31.
 		 */
 		 */
-		current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+		fcr31 = mask_fcr31_x(current->thread.fpu.fcr31);
+		current->thread.fpu.fcr31 &= ~fcr31;
 
 
 		/* Send a signal if required.  */
 		/* Send a signal if required.  */
 		if (!process_fpemu_return(sig, fault_addr, fcr31) && !err)
 		if (!process_fpemu_return(sig, fault_addr, fcr31) && !err)

+ 19 - 13
arch/mips/kvm/emulate.c

@@ -790,15 +790,15 @@ enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu)
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	enum emulation_result er = EMULATE_DONE;
 	enum emulation_result er = EMULATE_DONE;
 
 
-	if (kvm_read_c0_guest_status(cop0) & ST0_EXL) {
+	if (kvm_read_c0_guest_status(cop0) & ST0_ERL) {
+		kvm_clear_c0_guest_status(cop0, ST0_ERL);
+		vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0);
+	} else if (kvm_read_c0_guest_status(cop0) & ST0_EXL) {
 		kvm_debug("[%#lx] ERET to %#lx\n", vcpu->arch.pc,
 		kvm_debug("[%#lx] ERET to %#lx\n", vcpu->arch.pc,
 			  kvm_read_c0_guest_epc(cop0));
 			  kvm_read_c0_guest_epc(cop0));
 		kvm_clear_c0_guest_status(cop0, ST0_EXL);
 		kvm_clear_c0_guest_status(cop0, ST0_EXL);
 		vcpu->arch.pc = kvm_read_c0_guest_epc(cop0);
 		vcpu->arch.pc = kvm_read_c0_guest_epc(cop0);
 
 
-	} else if (kvm_read_c0_guest_status(cop0) & ST0_ERL) {
-		kvm_clear_c0_guest_status(cop0, ST0_ERL);
-		vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0);
 	} else {
 	} else {
 		kvm_err("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n",
 		kvm_err("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n",
 			vcpu->arch.pc);
 			vcpu->arch.pc);
@@ -1528,13 +1528,25 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
 					    struct kvm_vcpu *vcpu)
 					    struct kvm_vcpu *vcpu)
 {
 {
 	enum emulation_result er = EMULATE_DO_MMIO;
 	enum emulation_result er = EMULATE_DO_MMIO;
+	unsigned long curr_pc;
 	u32 op, rt;
 	u32 op, rt;
 	u32 bytes;
 	u32 bytes;
 
 
 	rt = inst.i_format.rt;
 	rt = inst.i_format.rt;
 	op = inst.i_format.opcode;
 	op = inst.i_format.opcode;
 
 
-	vcpu->arch.pending_load_cause = cause;
+	/*
+	 * Find the resume PC now while we have safe and easy access to the
+	 * prior branch instruction, and save it for
+	 * kvm_mips_complete_mmio_load() to restore later.
+	 */
+	curr_pc = vcpu->arch.pc;
+	er = update_pc(vcpu, cause);
+	if (er == EMULATE_FAIL)
+		return er;
+	vcpu->arch.io_pc = vcpu->arch.pc;
+	vcpu->arch.pc = curr_pc;
+
 	vcpu->arch.io_gpr = rt;
 	vcpu->arch.io_gpr = rt;
 
 
 	switch (op) {
 	switch (op) {
@@ -2494,9 +2506,8 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 		goto done;
 		goto done;
 	}
 	}
 
 
-	er = update_pc(vcpu, vcpu->arch.pending_load_cause);
-	if (er == EMULATE_FAIL)
-		return er;
+	/* Restore saved resume PC */
+	vcpu->arch.pc = vcpu->arch.io_pc;
 
 
 	switch (run->mmio.len) {
 	switch (run->mmio.len) {
 	case 4:
 	case 4:
@@ -2518,11 +2529,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 		break;
 		break;
 	}
 	}
 
 
-	if (vcpu->arch.pending_load_cause & CAUSEF_BD)
-		kvm_debug("[%#lx] Completing %d byte BD Load to gpr %d (0x%08lx) type %d\n",
-			  vcpu->arch.pc, run->mmio.len, vcpu->arch.io_gpr, *gpr,
-			  vcpu->mmio_needed);
-
 done:
 done:
 	return er;
 	return er;
 }
 }

+ 4 - 1
arch/mips/kvm/mips.c

@@ -426,7 +426,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 static void kvm_mips_check_asids(struct kvm_vcpu *vcpu)
 static void kvm_mips_check_asids(struct kvm_vcpu *vcpu)
 {
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
-	int cpu = smp_processor_id();
+	int i, cpu = smp_processor_id();
 	unsigned int gasid;
 	unsigned int gasid;
 
 
 	/*
 	/*
@@ -442,6 +442,9 @@ static void kvm_mips_check_asids(struct kvm_vcpu *vcpu)
 						vcpu);
 						vcpu);
 			vcpu->arch.guest_user_asid[cpu] =
 			vcpu->arch.guest_user_asid[cpu] =
 				vcpu->arch.guest_user_mm.context.asid[cpu];
 				vcpu->arch.guest_user_mm.context.asid[cpu];
+			for_each_possible_cpu(i)
+				if (i != cpu)
+					vcpu->arch.guest_user_asid[cpu] = 0;
 			vcpu->arch.last_user_gasid = gasid;
 			vcpu->arch.last_user_gasid = gasid;
 		}
 		}
 	}
 	}

+ 0 - 4
arch/mips/kvm/mmu.c

@@ -260,13 +260,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 
 	if ((vcpu->arch.guest_user_asid[cpu] ^ asid_cache(cpu)) &
 	if ((vcpu->arch.guest_user_asid[cpu] ^ asid_cache(cpu)) &
 						asid_version_mask(cpu)) {
 						asid_version_mask(cpu)) {
-		u32 gasid = kvm_read_c0_guest_entryhi(vcpu->arch.cop0) &
-				KVM_ENTRYHI_ASID;
-
 		kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu);
 		kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu);
 		vcpu->arch.guest_user_asid[cpu] =
 		vcpu->arch.guest_user_asid[cpu] =
 		    vcpu->arch.guest_user_mm.context.asid[cpu];
 		    vcpu->arch.guest_user_mm.context.asid[cpu];
-		vcpu->arch.last_user_gasid = gasid;
 		newasid++;
 		newasid++;
 
 
 		kvm_debug("[%d]: cpu_context: %#lx\n", cpu,
 		kvm_debug("[%d]: cpu_context: %#lx\n", cpu,

+ 22 - 22
arch/mips/lib/dump_tlb.c

@@ -135,42 +135,42 @@ static void dump_tlb(int first, int last)
 		c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
 		c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
 		c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
 		c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
 
 
-		printk("va=%0*lx asid=%0*lx",
-		       vwidth, (entryhi & ~0x1fffUL),
-		       asidwidth, entryhi & asidmask);
+		pr_cont("va=%0*lx asid=%0*lx",
+			vwidth, (entryhi & ~0x1fffUL),
+			asidwidth, entryhi & asidmask);
 		if (cpu_has_guestid)
 		if (cpu_has_guestid)
-			printk(" gid=%02lx",
-			       (guestctl1 & MIPS_GCTL1_RID)
+			pr_cont(" gid=%02lx",
+				(guestctl1 & MIPS_GCTL1_RID)
 					>> MIPS_GCTL1_RID_SHIFT);
 					>> MIPS_GCTL1_RID_SHIFT);
 		/* RI/XI are in awkward places, so mask them off separately */
 		/* RI/XI are in awkward places, so mask them off separately */
 		pa = entrylo0 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI);
 		pa = entrylo0 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI);
 		if (xpa)
 		if (xpa)
 			pa |= (unsigned long long)readx_c0_entrylo0() << 30;
 			pa |= (unsigned long long)readx_c0_entrylo0() << 30;
 		pa = (pa << 6) & PAGE_MASK;
 		pa = (pa << 6) & PAGE_MASK;
-		printk("\n\t[");
+		pr_cont("\n\t[");
 		if (cpu_has_rixi)
 		if (cpu_has_rixi)
-			printk("ri=%d xi=%d ",
-			       (entrylo0 & MIPS_ENTRYLO_RI) ? 1 : 0,
-			       (entrylo0 & MIPS_ENTRYLO_XI) ? 1 : 0);
-		printk("pa=%0*llx c=%d d=%d v=%d g=%d] [",
-		       pwidth, pa, c0,
-		       (entrylo0 & ENTRYLO_D) ? 1 : 0,
-		       (entrylo0 & ENTRYLO_V) ? 1 : 0,
-		       (entrylo0 & ENTRYLO_G) ? 1 : 0);
+			pr_cont("ri=%d xi=%d ",
+				(entrylo0 & MIPS_ENTRYLO_RI) ? 1 : 0,
+				(entrylo0 & MIPS_ENTRYLO_XI) ? 1 : 0);
+		pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d] [",
+			pwidth, pa, c0,
+			(entrylo0 & ENTRYLO_D) ? 1 : 0,
+			(entrylo0 & ENTRYLO_V) ? 1 : 0,
+			(entrylo0 & ENTRYLO_G) ? 1 : 0);
 		/* RI/XI are in awkward places, so mask them off separately */
 		/* RI/XI are in awkward places, so mask them off separately */
 		pa = entrylo1 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI);
 		pa = entrylo1 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI);
 		if (xpa)
 		if (xpa)
 			pa |= (unsigned long long)readx_c0_entrylo1() << 30;
 			pa |= (unsigned long long)readx_c0_entrylo1() << 30;
 		pa = (pa << 6) & PAGE_MASK;
 		pa = (pa << 6) & PAGE_MASK;
 		if (cpu_has_rixi)
 		if (cpu_has_rixi)
-			printk("ri=%d xi=%d ",
-			       (entrylo1 & MIPS_ENTRYLO_RI) ? 1 : 0,
-			       (entrylo1 & MIPS_ENTRYLO_XI) ? 1 : 0);
-		printk("pa=%0*llx c=%d d=%d v=%d g=%d]\n",
-		       pwidth, pa, c1,
-		       (entrylo1 & ENTRYLO_D) ? 1 : 0,
-		       (entrylo1 & ENTRYLO_V) ? 1 : 0,
-		       (entrylo1 & ENTRYLO_G) ? 1 : 0);
+			pr_cont("ri=%d xi=%d ",
+				(entrylo1 & MIPS_ENTRYLO_RI) ? 1 : 0,
+				(entrylo1 & MIPS_ENTRYLO_XI) ? 1 : 0);
+		pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d]\n",
+			pwidth, pa, c1,
+			(entrylo1 & ENTRYLO_D) ? 1 : 0,
+			(entrylo1 & ENTRYLO_V) ? 1 : 0,
+			(entrylo1 & ENTRYLO_G) ? 1 : 0);
 	}
 	}
 	printk("\n");
 	printk("\n");
 
 

+ 9 - 9
arch/mips/lib/r3k_dump_tlb.c

@@ -53,15 +53,15 @@ static void dump_tlb(int first, int last)
 			 */
 			 */
 			printk("Index: %2d ", i);
 			printk("Index: %2d ", i);
 
 
-			printk("va=%08lx asid=%08lx"
-			       "  [pa=%06lx n=%d d=%d v=%d g=%d]",
-			       entryhi & PAGE_MASK,
-			       entryhi & asid_mask,
-			       entrylo0 & PAGE_MASK,
-			       (entrylo0 & R3K_ENTRYLO_N) ? 1 : 0,
-			       (entrylo0 & R3K_ENTRYLO_D) ? 1 : 0,
-			       (entrylo0 & R3K_ENTRYLO_V) ? 1 : 0,
-			       (entrylo0 & R3K_ENTRYLO_G) ? 1 : 0);
+			pr_cont("va=%08lx asid=%08lx"
+				"  [pa=%06lx n=%d d=%d v=%d g=%d]",
+				entryhi & PAGE_MASK,
+				entryhi & asid_mask,
+				entrylo0 & PAGE_MASK,
+				(entrylo0 & R3K_ENTRYLO_N) ? 1 : 0,
+				(entrylo0 & R3K_ENTRYLO_D) ? 1 : 0,
+				(entrylo0 & R3K_ENTRYLO_V) ? 1 : 0,
+				(entrylo0 & R3K_ENTRYLO_G) ? 1 : 0);
 		}
 		}
 	}
 	}
 	printk("\n");
 	printk("\n");

+ 1 - 0
arch/nios2/kernel/time.c

@@ -324,6 +324,7 @@ static int __init nios2_time_init(struct device_node *timer)
 		ret = nios2_clocksource_init(timer);
 		ret = nios2_clocksource_init(timer);
 		break;
 		break;
 	default:
 	default:
+		ret = 0;
 		break;
 		break;
 	}
 	}
 
 

+ 2 - 0
arch/openrisc/include/asm/cache.h

@@ -23,6 +23,8 @@
  * they shouldn't be hard-coded!
  * they shouldn't be hard-coded!
  */
  */
 
 
+#define __ro_after_init __read_mostly
+
 #define L1_CACHE_BYTES 16
 #define L1_CACHE_BYTES 16
 #define L1_CACHE_SHIFT 4
 #define L1_CACHE_SHIFT 4
 
 

+ 3 - 1
arch/parisc/include/uapi/asm/unistd.h

@@ -368,7 +368,9 @@
 
 
 #define __IGNORE_select		/* newselect */
 #define __IGNORE_select		/* newselect */
 #define __IGNORE_fadvise64	/* fadvise64_64 */
 #define __IGNORE_fadvise64	/* fadvise64_64 */
-
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
 
 
 #define LINUX_GATEWAY_ADDR      0x100
 #define LINUX_GATEWAY_ADDR      0x100
 
 

+ 3 - 3
arch/parisc/kernel/drivers.c

@@ -873,11 +873,11 @@ static void print_parisc_device(struct parisc_device *dev)
 
 
 	if (dev->num_addrs) {
 	if (dev->num_addrs) {
 		int k;
 		int k;
-		printk(", additional addresses: ");
+		pr_cont(", additional addresses: ");
 		for (k = 0; k < dev->num_addrs; k++)
 		for (k = 0; k < dev->num_addrs; k++)
-			printk("0x%lx ", dev->addr[k]);
+			pr_cont("0x%lx ", dev->addr[k]);
 	}
 	}
-	printk("\n");
+	pr_cont("\n");
 }
 }
 
 
 /**
 /**

+ 34 - 32
arch/parisc/kernel/syscall.S

@@ -100,14 +100,12 @@ set_thread_pointer:
 	.endr
 	.endr
 
 
 /* This address must remain fixed at 0x100 for glibc's syscalls to work */
 /* This address must remain fixed at 0x100 for glibc's syscalls to work */
-	.align 256
+	.align LINUX_GATEWAY_ADDR
 linux_gateway_entry:
 linux_gateway_entry:
 	gate	.+8, %r0			/* become privileged */
 	gate	.+8, %r0			/* become privileged */
 	mtsp	%r0,%sr4			/* get kernel space into sr4 */
 	mtsp	%r0,%sr4			/* get kernel space into sr4 */
 	mtsp	%r0,%sr5			/* get kernel space into sr5 */
 	mtsp	%r0,%sr5			/* get kernel space into sr5 */
 	mtsp	%r0,%sr6			/* get kernel space into sr6 */
 	mtsp	%r0,%sr6			/* get kernel space into sr6 */
-	mfsp    %sr7,%r1                        /* save user sr7 */
-	mtsp    %r1,%sr3                        /* and store it in sr3 */
 
 
 #ifdef CONFIG_64BIT
 #ifdef CONFIG_64BIT
 	/* for now we can *always* set the W bit on entry to the syscall
 	/* for now we can *always* set the W bit on entry to the syscall
@@ -133,6 +131,14 @@ linux_gateway_entry:
 	depdi	0, 31, 32, %r21
 	depdi	0, 31, 32, %r21
 1:	
 1:	
 #endif
 #endif
+
+	/* We use a rsm/ssm pair to prevent sr3 from being clobbered
+	 * by external interrupts.
+	 */
+	mfsp    %sr7,%r1                        /* save user sr7 */
+	rsm	PSW_SM_I, %r0			/* disable interrupts */
+	mtsp    %r1,%sr3                        /* and store it in sr3 */
+
 	mfctl   %cr30,%r1
 	mfctl   %cr30,%r1
 	xor     %r1,%r30,%r30                   /* ye olde xor trick */
 	xor     %r1,%r30,%r30                   /* ye olde xor trick */
 	xor     %r1,%r30,%r1
 	xor     %r1,%r30,%r1
@@ -147,6 +153,7 @@ linux_gateway_entry:
 	 */
 	 */
 
 
 	mtsp	%r0,%sr7			/* get kernel space into sr7 */
 	mtsp	%r0,%sr7			/* get kernel space into sr7 */
+	ssm	PSW_SM_I, %r0			/* enable interrupts */
 	STREGM	%r1,FRAME_SIZE(%r30)		/* save r1 (usp) here for now */
 	STREGM	%r1,FRAME_SIZE(%r30)		/* save r1 (usp) here for now */
 	mfctl	%cr30,%r1			/* get task ptr in %r1 */
 	mfctl	%cr30,%r1			/* get task ptr in %r1 */
 	LDREG	TI_TASK(%r1),%r1
 	LDREG	TI_TASK(%r1),%r1
@@ -474,11 +481,6 @@ lws_start:
 	comiclr,>>	__NR_lws_entries, %r20, %r0
 	comiclr,>>	__NR_lws_entries, %r20, %r0
 	b,n	lws_exit_nosys
 	b,n	lws_exit_nosys
 
 
-	/* WARNING: Trashing sr2 and sr3 */
-	mfsp	%sr7,%r1			/* get userspace into sr3 */
-	mtsp	%r1,%sr3
-	mtsp	%r0,%sr2			/* get kernel space into sr2 */
-
 	/* Load table start */
 	/* Load table start */
 	ldil	L%lws_table, %r1
 	ldil	L%lws_table, %r1
 	ldo	R%lws_table(%r1), %r28	/* Scratch use of r28 */
 	ldo	R%lws_table(%r1), %r28	/* Scratch use of r28 */
@@ -627,9 +629,9 @@ cas_action:
 	stw	%r1, 4(%sr2,%r20)
 	stw	%r1, 4(%sr2,%r20)
 #endif
 #endif
 	/* The load and store could fail */
 	/* The load and store could fail */
-1:	ldw,ma	0(%sr3,%r26), %r28
+1:	ldw,ma	0(%r26), %r28
 	sub,<>	%r28, %r25, %r0
 	sub,<>	%r28, %r25, %r0
-2:	stw,ma	%r24, 0(%sr3,%r26)
+2:	stw,ma	%r24, 0(%r26)
 	/* Free lock */
 	/* Free lock */
 	stw,ma	%r20, 0(%sr2,%r20)
 	stw,ma	%r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
 #if ENABLE_LWS_DEBUG
@@ -706,9 +708,9 @@ lws_compare_and_swap_2:
 	nop
 	nop
 
 
 	/* 8bit load */
 	/* 8bit load */
-4:	ldb	0(%sr3,%r25), %r25
+4:	ldb	0(%r25), %r25
 	b	cas2_lock_start
 	b	cas2_lock_start
-5:	ldb	0(%sr3,%r24), %r24
+5:	ldb	0(%r24), %r24
 	nop
 	nop
 	nop
 	nop
 	nop
 	nop
@@ -716,9 +718,9 @@ lws_compare_and_swap_2:
 	nop
 	nop
 
 
 	/* 16bit load */
 	/* 16bit load */
-6:	ldh	0(%sr3,%r25), %r25
+6:	ldh	0(%r25), %r25
 	b	cas2_lock_start
 	b	cas2_lock_start
-7:	ldh	0(%sr3,%r24), %r24
+7:	ldh	0(%r24), %r24
 	nop
 	nop
 	nop
 	nop
 	nop
 	nop
@@ -726,9 +728,9 @@ lws_compare_and_swap_2:
 	nop
 	nop
 
 
 	/* 32bit load */
 	/* 32bit load */
-8:	ldw	0(%sr3,%r25), %r25
+8:	ldw	0(%r25), %r25
 	b	cas2_lock_start
 	b	cas2_lock_start
-9:	ldw	0(%sr3,%r24), %r24
+9:	ldw	0(%r24), %r24
 	nop
 	nop
 	nop
 	nop
 	nop
 	nop
@@ -737,14 +739,14 @@ lws_compare_and_swap_2:
 
 
 	/* 64bit load */
 	/* 64bit load */
 #ifdef CONFIG_64BIT
 #ifdef CONFIG_64BIT
-10:	ldd	0(%sr3,%r25), %r25
-11:	ldd	0(%sr3,%r24), %r24
+10:	ldd	0(%r25), %r25
+11:	ldd	0(%r24), %r24
 #else
 #else
 	/* Load new value into r22/r23 - high/low */
 	/* Load new value into r22/r23 - high/low */
-10:	ldw	0(%sr3,%r25), %r22
-11:	ldw	4(%sr3,%r25), %r23
+10:	ldw	0(%r25), %r22
+11:	ldw	4(%r25), %r23
 	/* Load new value into fr4 for atomic store later */
 	/* Load new value into fr4 for atomic store later */
-12:	flddx	0(%sr3,%r24), %fr4
+12:	flddx	0(%r24), %fr4
 #endif
 #endif
 
 
 cas2_lock_start:
 cas2_lock_start:
@@ -794,30 +796,30 @@ cas2_action:
 	ldo	1(%r0),%r28
 	ldo	1(%r0),%r28
 
 
 	/* 8bit CAS */
 	/* 8bit CAS */
-13:	ldb,ma	0(%sr3,%r26), %r29
+13:	ldb,ma	0(%r26), %r29
 	sub,=	%r29, %r25, %r0
 	sub,=	%r29, %r25, %r0
 	b,n	cas2_end
 	b,n	cas2_end
-14:	stb,ma	%r24, 0(%sr3,%r26)
+14:	stb,ma	%r24, 0(%r26)
 	b	cas2_end
 	b	cas2_end
 	copy	%r0, %r28
 	copy	%r0, %r28
 	nop
 	nop
 	nop
 	nop
 
 
 	/* 16bit CAS */
 	/* 16bit CAS */
-15:	ldh,ma	0(%sr3,%r26), %r29
+15:	ldh,ma	0(%r26), %r29
 	sub,=	%r29, %r25, %r0
 	sub,=	%r29, %r25, %r0
 	b,n	cas2_end
 	b,n	cas2_end
-16:	sth,ma	%r24, 0(%sr3,%r26)
+16:	sth,ma	%r24, 0(%r26)
 	b	cas2_end
 	b	cas2_end
 	copy	%r0, %r28
 	copy	%r0, %r28
 	nop
 	nop
 	nop
 	nop
 
 
 	/* 32bit CAS */
 	/* 32bit CAS */
-17:	ldw,ma	0(%sr3,%r26), %r29
+17:	ldw,ma	0(%r26), %r29
 	sub,=	%r29, %r25, %r0
 	sub,=	%r29, %r25, %r0
 	b,n	cas2_end
 	b,n	cas2_end
-18:	stw,ma	%r24, 0(%sr3,%r26)
+18:	stw,ma	%r24, 0(%r26)
 	b	cas2_end
 	b	cas2_end
 	copy	%r0, %r28
 	copy	%r0, %r28
 	nop
 	nop
@@ -825,22 +827,22 @@ cas2_action:
 
 
 	/* 64bit CAS */
 	/* 64bit CAS */
 #ifdef CONFIG_64BIT
 #ifdef CONFIG_64BIT
-19:	ldd,ma	0(%sr3,%r26), %r29
+19:	ldd,ma	0(%r26), %r29
 	sub,*=	%r29, %r25, %r0
 	sub,*=	%r29, %r25, %r0
 	b,n	cas2_end
 	b,n	cas2_end
-20:	std,ma	%r24, 0(%sr3,%r26)
+20:	std,ma	%r24, 0(%r26)
 	copy	%r0, %r28
 	copy	%r0, %r28
 #else
 #else
 	/* Compare first word */
 	/* Compare first word */
-19:	ldw,ma	0(%sr3,%r26), %r29
+19:	ldw,ma	0(%r26), %r29
 	sub,=	%r29, %r22, %r0
 	sub,=	%r29, %r22, %r0
 	b,n	cas2_end
 	b,n	cas2_end
 	/* Compare second word */
 	/* Compare second word */
-20:	ldw,ma	4(%sr3,%r26), %r29
+20:	ldw,ma	4(%r26), %r29
 	sub,=	%r29, %r23, %r0
 	sub,=	%r29, %r23, %r0
 	b,n	cas2_end
 	b,n	cas2_end
 	/* Perform the store */
 	/* Perform the store */
-21:	fstdx	%fr4, 0(%sr3,%r26)
+21:	fstdx	%fr4, 0(%r26)
 	copy	%r0, %r28
 	copy	%r0, %r28
 #endif
 #endif
 
 

+ 4 - 8
arch/powerpc/include/asm/checksum.h

@@ -53,10 +53,8 @@ static inline __sum16 csum_fold(__wsum sum)
 	return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
 	return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
 }
 }
 
 
-static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
-                                     unsigned short len,
-                                     unsigned short proto,
-                                     __wsum sum)
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
+					__u8 proto, __wsum sum)
 {
 {
 #ifdef __powerpc64__
 #ifdef __powerpc64__
 	unsigned long s = (__force u32)sum;
 	unsigned long s = (__force u32)sum;
@@ -83,10 +81,8 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
  * computes the checksum of the TCP/UDP pseudo-header
  * computes the checksum of the TCP/UDP pseudo-header
  * returns a 16-bit checksum, already complemented
  * returns a 16-bit checksum, already complemented
  */
  */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
-					unsigned short len,
-					unsigned short proto,
-					__wsum sum)
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+					__u8 proto, __wsum sum)
 {
 {
 	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
 	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
 }
 }

+ 3 - 3
arch/s390/hypfs/hypfs_diag.c

@@ -363,11 +363,11 @@ out:
 static int diag224_get_name_table(void)
 static int diag224_get_name_table(void)
 {
 {
 	/* memory must be below 2GB */
 	/* memory must be below 2GB */
-	diag224_cpu_names = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
+	diag224_cpu_names = (char *) __get_free_page(GFP_KERNEL | GFP_DMA);
 	if (!diag224_cpu_names)
 	if (!diag224_cpu_names)
 		return -ENOMEM;
 		return -ENOMEM;
 	if (diag224(diag224_cpu_names)) {
 	if (diag224(diag224_cpu_names)) {
-		kfree(diag224_cpu_names);
+		free_page((unsigned long) diag224_cpu_names);
 		return -EOPNOTSUPP;
 		return -EOPNOTSUPP;
 	}
 	}
 	EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
 	EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
@@ -376,7 +376,7 @@ static int diag224_get_name_table(void)
 
 
 static void diag224_delete_name_table(void)
 static void diag224_delete_name_table(void)
 {
 {
-	kfree(diag224_cpu_names);
+	free_page((unsigned long) diag224_cpu_names);
 }
 }
 
 
 static int diag224_idx2name(int index, char *name)
 static int diag224_idx2name(int index, char *name)

+ 2 - 0
arch/s390/kernel/vmlinux.lds.S

@@ -62,9 +62,11 @@ SECTIONS
 
 
 	. = ALIGN(PAGE_SIZE);
 	. = ALIGN(PAGE_SIZE);
 	__start_ro_after_init = .;
 	__start_ro_after_init = .;
+	__start_data_ro_after_init = .;
 	.data..ro_after_init : {
 	.data..ro_after_init : {
 		 *(.data..ro_after_init)
 		 *(.data..ro_after_init)
 	}
 	}
+	__end_data_ro_after_init = .;
 	EXCEPTION_TABLE(16)
 	EXCEPTION_TABLE(16)
 	. = ALIGN(PAGE_SIZE);
 	. = ALIGN(PAGE_SIZE);
 	__end_ro_after_init = .;
 	__end_ro_after_init = .;

+ 2 - 2
arch/s390/kvm/sthyi.c

@@ -315,7 +315,7 @@ static void fill_diag(struct sthyi_sctns *sctns)
 	if (r < 0)
 	if (r < 0)
 		goto out;
 		goto out;
 
 
-	diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
+	diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
 	if (!diag224_buf || diag224(diag224_buf))
 	if (!diag224_buf || diag224(diag224_buf))
 		goto out;
 		goto out;
 
 
@@ -378,7 +378,7 @@ static void fill_diag(struct sthyi_sctns *sctns)
 	sctns->par.infpval1 |= PAR_WGHT_VLD;
 	sctns->par.infpval1 |= PAR_WGHT_VLD;
 
 
 out:
 out:
-	kfree(diag224_buf);
+	free_page((unsigned long)diag224_buf);
 	vfree(diag204_buf);
 	vfree(diag204_buf);
 }
 }
 
 

+ 1 - 1
arch/s390/pci/pci_dma.c

@@ -423,7 +423,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	dma_addr_t dma_addr_base, dma_addr;
 	dma_addr_t dma_addr_base, dma_addr;
 	int flags = ZPCI_PTE_VALID;
 	int flags = ZPCI_PTE_VALID;
 	struct scatterlist *s;
 	struct scatterlist *s;
-	unsigned long pa;
+	unsigned long pa = 0;
 	int ret;
 	int ret;
 
 
 	size = PAGE_ALIGN(size);
 	size = PAGE_ALIGN(size);

+ 3 - 2
arch/sparc/include/asm/cpudata_64.h

@@ -24,9 +24,10 @@ typedef struct {
 	unsigned int	icache_line_size;
 	unsigned int	icache_line_size;
 	unsigned int	ecache_size;
 	unsigned int	ecache_size;
 	unsigned int	ecache_line_size;
 	unsigned int	ecache_line_size;
-	unsigned short	sock_id;
+	unsigned short	sock_id;	/* physical package */
 	unsigned short	core_id;
 	unsigned short	core_id;
-	int		proc_id;
+	unsigned short  max_cache_id;	/* groupings of highest shared cache */
+	unsigned short	proc_id;	/* strand (aka HW thread) id */
 } cpuinfo_sparc;
 } cpuinfo_sparc;
 
 
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);

+ 1 - 1
arch/sparc/include/asm/spinlock_32.h

@@ -134,7 +134,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
 	*(volatile __u32 *)&lp->lock = ~0U;
 	*(volatile __u32 *)&lp->lock = ~0U;
 }
 }
 
 
-static void inline arch_write_unlock(arch_rwlock_t *lock)
+static inline void arch_write_unlock(arch_rwlock_t *lock)
 {
 {
 	__asm__ __volatile__(
 	__asm__ __volatile__(
 "	st		%%g0, [%0]"
 "	st		%%g0, [%0]"

+ 6 - 6
arch/sparc/include/asm/spinlock_64.h

@@ -96,7 +96,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla
 
 
 /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */
 /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */
 
 
-static void inline arch_read_lock(arch_rwlock_t *lock)
+static inline void arch_read_lock(arch_rwlock_t *lock)
 {
 {
 	unsigned long tmp1, tmp2;
 	unsigned long tmp1, tmp2;
 
 
@@ -119,7 +119,7 @@ static void inline arch_read_lock(arch_rwlock_t *lock)
 	: "memory");
 	: "memory");
 }
 }
 
 
-static int inline arch_read_trylock(arch_rwlock_t *lock)
+static inline int arch_read_trylock(arch_rwlock_t *lock)
 {
 {
 	int tmp1, tmp2;
 	int tmp1, tmp2;
 
 
@@ -140,7 +140,7 @@ static int inline arch_read_trylock(arch_rwlock_t *lock)
 	return tmp1;
 	return tmp1;
 }
 }
 
 
-static void inline arch_read_unlock(arch_rwlock_t *lock)
+static inline void arch_read_unlock(arch_rwlock_t *lock)
 {
 {
 	unsigned long tmp1, tmp2;
 	unsigned long tmp1, tmp2;
 
 
@@ -156,7 +156,7 @@ static void inline arch_read_unlock(arch_rwlock_t *lock)
 	: "memory");
 	: "memory");
 }
 }
 
 
-static void inline arch_write_lock(arch_rwlock_t *lock)
+static inline void arch_write_lock(arch_rwlock_t *lock)
 {
 {
 	unsigned long mask, tmp1, tmp2;
 	unsigned long mask, tmp1, tmp2;
 
 
@@ -181,7 +181,7 @@ static void inline arch_write_lock(arch_rwlock_t *lock)
 	: "memory");
 	: "memory");
 }
 }
 
 
-static void inline arch_write_unlock(arch_rwlock_t *lock)
+static inline void arch_write_unlock(arch_rwlock_t *lock)
 {
 {
 	__asm__ __volatile__(
 	__asm__ __volatile__(
 "	stw		%%g0, [%0]"
 "	stw		%%g0, [%0]"
@@ -190,7 +190,7 @@ static void inline arch_write_unlock(arch_rwlock_t *lock)
 	: "memory");
 	: "memory");
 }
 }
 
 
-static int inline arch_write_trylock(arch_rwlock_t *lock)
+static inline int arch_write_trylock(arch_rwlock_t *lock)
 {
 {
 	unsigned long mask, tmp1, tmp2, result;
 	unsigned long mask, tmp1, tmp2, result;
 
 

+ 7 - 1
arch/sparc/include/asm/topology_64.h

@@ -44,14 +44,20 @@ int __node_distance(int, int);
 #define topology_physical_package_id(cpu)	(cpu_data(cpu).proc_id)
 #define topology_physical_package_id(cpu)	(cpu_data(cpu).proc_id)
 #define topology_core_id(cpu)			(cpu_data(cpu).core_id)
 #define topology_core_id(cpu)			(cpu_data(cpu).core_id)
 #define topology_core_cpumask(cpu)		(&cpu_core_sib_map[cpu])
 #define topology_core_cpumask(cpu)		(&cpu_core_sib_map[cpu])
+#define topology_core_cache_cpumask(cpu)	(&cpu_core_sib_cache_map[cpu])
 #define topology_sibling_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
 #define topology_sibling_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
 #endif /* CONFIG_SMP */
 #endif /* CONFIG_SMP */
 
 
 extern cpumask_t cpu_core_map[NR_CPUS];
 extern cpumask_t cpu_core_map[NR_CPUS];
 extern cpumask_t cpu_core_sib_map[NR_CPUS];
 extern cpumask_t cpu_core_sib_map[NR_CPUS];
+extern cpumask_t cpu_core_sib_cache_map[NR_CPUS];
+
+/**
+ * Return cores that shares the last level cache.
+ */
 static inline const struct cpumask *cpu_coregroup_mask(int cpu)
 static inline const struct cpumask *cpu_coregroup_mask(int cpu)
 {
 {
-        return &cpu_core_map[cpu];
+	return &cpu_core_sib_cache_map[cpu];
 }
 }
 
 
 #endif /* _ASM_SPARC64_TOPOLOGY_H */
 #endif /* _ASM_SPARC64_TOPOLOGY_H */

+ 3 - 25
arch/sparc/include/asm/uaccess_64.h

@@ -82,7 +82,6 @@ static inline int access_ok(int type, const void __user * addr, unsigned long si
 	return 1;
 	return 1;
 }
 }
 
 
-void __ret_efault(void);
 void __retl_efault(void);
 void __retl_efault(void);
 
 
 /* Uh, these should become the main single-value transfer routines..
 /* Uh, these should become the main single-value transfer routines..
@@ -189,55 +188,34 @@ int __get_user_bad(void);
 unsigned long __must_check ___copy_from_user(void *to,
 unsigned long __must_check ___copy_from_user(void *to,
 					     const void __user *from,
 					     const void __user *from,
 					     unsigned long size);
 					     unsigned long size);
-unsigned long copy_from_user_fixup(void *to, const void __user *from,
-				   unsigned long size);
 static inline unsigned long __must_check
 static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long size)
 copy_from_user(void *to, const void __user *from, unsigned long size)
 {
 {
-	unsigned long ret;
-
 	check_object_size(to, size, false);
 	check_object_size(to, size, false);
 
 
-	ret = ___copy_from_user(to, from, size);
-	if (unlikely(ret))
-		ret = copy_from_user_fixup(to, from, size);
-
-	return ret;
+	return ___copy_from_user(to, from, size);
 }
 }
 #define __copy_from_user copy_from_user
 #define __copy_from_user copy_from_user
 
 
 unsigned long __must_check ___copy_to_user(void __user *to,
 unsigned long __must_check ___copy_to_user(void __user *to,
 					   const void *from,
 					   const void *from,
 					   unsigned long size);
 					   unsigned long size);
-unsigned long copy_to_user_fixup(void __user *to, const void *from,
-				 unsigned long size);
 static inline unsigned long __must_check
 static inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long size)
 copy_to_user(void __user *to, const void *from, unsigned long size)
 {
 {
-	unsigned long ret;
-
 	check_object_size(from, size, true);
 	check_object_size(from, size, true);
 
 
-	ret = ___copy_to_user(to, from, size);
-	if (unlikely(ret))
-		ret = copy_to_user_fixup(to, from, size);
-	return ret;
+	return ___copy_to_user(to, from, size);
 }
 }
 #define __copy_to_user copy_to_user
 #define __copy_to_user copy_to_user
 
 
 unsigned long __must_check ___copy_in_user(void __user *to,
 unsigned long __must_check ___copy_in_user(void __user *to,
 					   const void __user *from,
 					   const void __user *from,
 					   unsigned long size);
 					   unsigned long size);
-unsigned long copy_in_user_fixup(void __user *to, void __user *from,
-				 unsigned long size);
 static inline unsigned long __must_check
 static inline unsigned long __must_check
 copy_in_user(void __user *to, void __user *from, unsigned long size)
 copy_in_user(void __user *to, void __user *from, unsigned long size)
 {
 {
-	unsigned long ret = ___copy_in_user(to, from, size);
-
-	if (unlikely(ret))
-		ret = copy_in_user_fixup(to, from, size);
-	return ret;
+	return ___copy_in_user(to, from, size);
 }
 }
 #define __copy_in_user copy_in_user
 #define __copy_in_user copy_in_user
 
 

+ 0 - 37
arch/sparc/kernel/head_64.S

@@ -926,48 +926,11 @@ tlb_type:	.word	0	/* Must NOT end up in BSS */
 EXPORT_SYMBOL(tlb_type)
 EXPORT_SYMBOL(tlb_type)
 	.section	".fixup",#alloc,#execinstr
 	.section	".fixup",#alloc,#execinstr
 
 
-	.globl	__ret_efault, __retl_efault, __ret_one, __retl_one
-ENTRY(__ret_efault)
-	ret
-	 restore %g0, -EFAULT, %o0
-ENDPROC(__ret_efault)
-EXPORT_SYMBOL(__ret_efault)
-
 ENTRY(__retl_efault)
 ENTRY(__retl_efault)
 	retl
 	retl
 	 mov	-EFAULT, %o0
 	 mov	-EFAULT, %o0
 ENDPROC(__retl_efault)
 ENDPROC(__retl_efault)
 
 
-ENTRY(__retl_one)
-	retl
-	 mov	1, %o0
-ENDPROC(__retl_one)
-
-ENTRY(__retl_one_fp)
-	VISExitHalf
-	retl
-	 mov	1, %o0
-ENDPROC(__retl_one_fp)
-
-ENTRY(__ret_one_asi)
-	wr	%g0, ASI_AIUS, %asi
-	ret
-	 restore %g0, 1, %o0
-ENDPROC(__ret_one_asi)
-
-ENTRY(__retl_one_asi)
-	wr	%g0, ASI_AIUS, %asi
-	retl
-	 mov	1, %o0
-ENDPROC(__retl_one_asi)
-
-ENTRY(__retl_one_asi_fp)
-	wr	%g0, ASI_AIUS, %asi
-	VISExitHalf
-	retl
-	 mov	1, %o0
-ENDPROC(__retl_one_asi_fp)
-
 ENTRY(__retl_o1)
 ENTRY(__retl_o1)
 	retl
 	retl
 	 mov	%o1, %o0
 	 mov	%o1, %o0

+ 17 - 6
arch/sparc/kernel/jump_label.c

@@ -13,19 +13,30 @@
 void arch_jump_label_transform(struct jump_entry *entry,
 void arch_jump_label_transform(struct jump_entry *entry,
 			       enum jump_label_type type)
 			       enum jump_label_type type)
 {
 {
-	u32 val;
 	u32 *insn = (u32 *) (unsigned long) entry->code;
 	u32 *insn = (u32 *) (unsigned long) entry->code;
+	u32 val;
 
 
 	if (type == JUMP_LABEL_JMP) {
 	if (type == JUMP_LABEL_JMP) {
 		s32 off = (s32)entry->target - (s32)entry->code;
 		s32 off = (s32)entry->target - (s32)entry->code;
+		bool use_v9_branch = false;
+
+		BUG_ON(off & 3);
 
 
 #ifdef CONFIG_SPARC64
 #ifdef CONFIG_SPARC64
-		/* ba,pt %xcc, . + (off << 2) */
-		val = 0x10680000 | ((u32) off >> 2);
-#else
-		/* ba . + (off << 2) */
-		val = 0x10800000 | ((u32) off >> 2);
+		if (off <= 0xfffff && off >= -0x100000)
+			use_v9_branch = true;
 #endif
 #endif
+		if (use_v9_branch) {
+			/* WDISP19 - target is . + immed << 2 */
+			/* ba,pt %xcc, . + off */
+			val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
+		} else {
+			/* WDISP22 - target is . + immed << 2 */
+			BUG_ON(off > 0x7fffff);
+			BUG_ON(off < -0x800000);
+			/* ba . + off */
+			val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
+		}
 	} else {
 	} else {
 		val = 0x01000000;
 		val = 0x01000000;
 	}
 	}

+ 28 - 18
arch/sparc/kernel/mdesc.c

@@ -645,13 +645,20 @@ static void __mark_core_id(struct mdesc_handle *hp, u64 node,
 		cpu_data(*id).core_id = core_id;
 		cpu_data(*id).core_id = core_id;
 }
 }
 
 
-static void __mark_sock_id(struct mdesc_handle *hp, u64 node,
-			   int sock_id)
+static void __mark_max_cache_id(struct mdesc_handle *hp, u64 node,
+				int max_cache_id)
 {
 {
 	const u64 *id = mdesc_get_property(hp, node, "id", NULL);
 	const u64 *id = mdesc_get_property(hp, node, "id", NULL);
 
 
-	if (*id < num_possible_cpus())
-		cpu_data(*id).sock_id = sock_id;
+	if (*id < num_possible_cpus()) {
+		cpu_data(*id).max_cache_id = max_cache_id;
+
+		/**
+		 * On systems without explicit socket descriptions socket
+		 * is max_cache_id
+		 */
+		cpu_data(*id).sock_id = max_cache_id;
+	}
 }
 }
 
 
 static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
 static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
@@ -660,10 +667,11 @@ static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
 	find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
 	find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
 }
 }
 
 
-static void mark_sock_ids(struct mdesc_handle *hp, u64 mp,
-			  int sock_id)
+static void mark_max_cache_ids(struct mdesc_handle *hp, u64 mp,
+			       int max_cache_id)
 {
 {
-	find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10);
+	find_back_node_value(hp, mp, "cpu", __mark_max_cache_id,
+			     max_cache_id, 10);
 }
 }
 
 
 static void set_core_ids(struct mdesc_handle *hp)
 static void set_core_ids(struct mdesc_handle *hp)
@@ -694,14 +702,15 @@ static void set_core_ids(struct mdesc_handle *hp)
 	}
 	}
 }
 }
 
 
-static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
+static int set_max_cache_ids_by_cache(struct mdesc_handle *hp, int level)
 {
 {
 	u64 mp;
 	u64 mp;
 	int idx = 1;
 	int idx = 1;
 	int fnd = 0;
 	int fnd = 0;
 
 
-	/* Identify unique sockets by looking for cpus backpointed to by
-	 * shared level n caches.
+	/**
+	 * Identify unique highest level of shared cache by looking for cpus
+	 * backpointed to by shared level N caches.
 	 */
 	 */
 	mdesc_for_each_node_by_name(hp, mp, "cache") {
 	mdesc_for_each_node_by_name(hp, mp, "cache") {
 		const u64 *cur_lvl;
 		const u64 *cur_lvl;
@@ -709,8 +718,7 @@ static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
 		cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
 		cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
 		if (*cur_lvl != level)
 		if (*cur_lvl != level)
 			continue;
 			continue;
-
-		mark_sock_ids(hp, mp, idx);
+		mark_max_cache_ids(hp, mp, idx);
 		idx++;
 		idx++;
 		fnd = 1;
 		fnd = 1;
 	}
 	}
@@ -745,15 +753,17 @@ static void set_sock_ids(struct mdesc_handle *hp)
 {
 {
 	u64 mp;
 	u64 mp;
 
 
-	/* If machine description exposes sockets data use it.
-	 * Otherwise fallback to use shared L3 or L2 caches.
+	/**
+	 * Find the highest level of shared cache which pre-T7 is also
+	 * the socket.
 	 */
 	 */
+	if (!set_max_cache_ids_by_cache(hp, 3))
+		set_max_cache_ids_by_cache(hp, 2);
+
+	/* If machine description exposes sockets data use it.*/
 	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
 	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
 	if (mp != MDESC_NODE_NULL)
 	if (mp != MDESC_NODE_NULL)
-		return set_sock_ids_by_socket(hp, mp);
-
-	if (!set_sock_ids_by_cache(hp, 3))
-		set_sock_ids_by_cache(hp, 2);
+		set_sock_ids_by_socket(hp, mp);
 }
 }
 
 
 static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
 static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)

+ 8 - 0
arch/sparc/kernel/smp_64.c

@@ -63,9 +63,13 @@ cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
 cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
 cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
 	[0 ... NR_CPUS-1] = CPU_MASK_NONE };
 	[0 ... NR_CPUS-1] = CPU_MASK_NONE };
 
 
+cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = {
+	[0 ... NR_CPUS - 1] = CPU_MASK_NONE };
+
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_SYMBOL(cpu_core_map);
 EXPORT_SYMBOL(cpu_core_map);
 EXPORT_SYMBOL(cpu_core_sib_map);
 EXPORT_SYMBOL(cpu_core_sib_map);
+EXPORT_SYMBOL(cpu_core_sib_cache_map);
 
 
 static cpumask_t smp_commenced_mask;
 static cpumask_t smp_commenced_mask;
 
 
@@ -1265,6 +1269,10 @@ void smp_fill_in_sib_core_maps(void)
 		unsigned int j;
 		unsigned int j;
 
 
 		for_each_present_cpu(j)  {
 		for_each_present_cpu(j)  {
+			if (cpu_data(i).max_cache_id ==
+			    cpu_data(j).max_cache_id)
+				cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]);
+
 			if (cpu_data(i).sock_id == cpu_data(j).sock_id)
 			if (cpu_data(i).sock_id == cpu_data(j).sock_id)
 				cpumask_set_cpu(j, &cpu_core_sib_map[i]);
 				cpumask_set_cpu(j, &cpu_core_sib_map[i]);
 		}
 		}

+ 2 - 2
arch/sparc/lib/GENcopy_from_user.S

@@ -3,11 +3,11 @@
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  */
  */
 
 
-#define EX_LD(x)		\
+#define EX_LD(x,y)		\
 98:	x;			\
 98:	x;			\
 	.section __ex_table,"a";\
 	.section __ex_table,"a";\
 	.align 4;		\
 	.align 4;		\
-	.word 98b, __retl_one;	\
+	.word 98b, y;		\
 	.text;			\
 	.text;			\
 	.align 4;
 	.align 4;
 
 

+ 2 - 2
arch/sparc/lib/GENcopy_to_user.S

@@ -3,11 +3,11 @@
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  */
  */
 
 
-#define EX_ST(x)		\
+#define EX_ST(x,y)		\
 98:	x;			\
 98:	x;			\
 	.section __ex_table,"a";\
 	.section __ex_table,"a";\
 	.align 4;		\
 	.align 4;		\
-	.word 98b, __retl_one;	\
+	.word 98b, y;		\
 	.text;			\
 	.text;			\
 	.align 4;
 	.align 4;
 
 

+ 34 - 14
arch/sparc/lib/GENmemcpy.S

@@ -4,21 +4,18 @@
  */
  */
 
 
 #ifdef __KERNEL__
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #define GLOBAL_SPARE	%g7
 #define GLOBAL_SPARE	%g7
 #else
 #else
 #define GLOBAL_SPARE	%g5
 #define GLOBAL_SPARE	%g5
 #endif
 #endif
 
 
 #ifndef EX_LD
 #ifndef EX_LD
-#define EX_LD(x)	x
+#define EX_LD(x,y)	x
 #endif
 #endif
 
 
 #ifndef EX_ST
 #ifndef EX_ST
-#define EX_ST(x)	x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
+#define EX_ST(x,y)	x
 #endif
 #endif
 
 
 #ifndef LOAD
 #ifndef LOAD
@@ -45,6 +42,29 @@
 	.register	%g3,#scratch
 	.register	%g3,#scratch
 
 
 	.text
 	.text
+
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+ENTRY(GEN_retl_o4_1)
+	add	%o4, %o2, %o4
+	retl
+	 add	%o4, 1, %o0
+ENDPROC(GEN_retl_o4_1)
+ENTRY(GEN_retl_g1_8)
+	add	%g1, %o2, %g1
+	retl
+	 add	%g1, 8, %o0
+ENDPROC(GEN_retl_g1_8)
+ENTRY(GEN_retl_o2_4)
+	retl
+	 add	%o2, 4, %o0
+ENDPROC(GEN_retl_o2_4)
+ENTRY(GEN_retl_o2_1)
+	retl
+	 add	%o2, 1, %o0
+ENDPROC(GEN_retl_o2_1)
+#endif
+
 	.align		64
 	.align		64
 
 
 	.globl	FUNC_NAME
 	.globl	FUNC_NAME
@@ -73,8 +93,8 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	sub		%g0, %o4, %o4
 	sub		%g0, %o4, %o4
 	sub		%o2, %o4, %o2
 	sub		%o2, %o4, %o2
 1:	subcc		%o4, 1, %o4
 1:	subcc		%o4, 1, %o4
-	EX_LD(LOAD(ldub, %o1, %g1))
-	EX_ST(STORE(stb, %g1, %o0))
+	EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
+	EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
 	add		%o1, 1, %o1
 	add		%o1, 1, %o1
 	bne,pt		%XCC, 1b
 	bne,pt		%XCC, 1b
 	add		%o0, 1, %o0
 	add		%o0, 1, %o0
@@ -82,8 +102,8 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	andn		%o2, 0x7, %g1
 	andn		%o2, 0x7, %g1
 	sub		%o2, %g1, %o2
 	sub		%o2, %g1, %o2
 1:	subcc		%g1, 0x8, %g1
 1:	subcc		%g1, 0x8, %g1
-	EX_LD(LOAD(ldx, %o1, %g2))
-	EX_ST(STORE(stx, %g2, %o0))
+	EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
+	EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
 	add		%o1, 0x8, %o1
 	add		%o1, 0x8, %o1
 	bne,pt		%XCC, 1b
 	bne,pt		%XCC, 1b
 	 add		%o0, 0x8, %o0
 	 add		%o0, 0x8, %o0
@@ -100,8 +120,8 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 
 1:
 1:
 	subcc		%o2, 4, %o2
 	subcc		%o2, 4, %o2
-	EX_LD(LOAD(lduw, %o1, %g1))
-	EX_ST(STORE(stw, %g1, %o1 + %o3))
+	EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
+	EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
 	bgu,pt		%XCC, 1b
 	bgu,pt		%XCC, 1b
 	 add		%o1, 4, %o1
 	 add		%o1, 4, %o1
 
 
@@ -111,8 +131,8 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	.align		32
 	.align		32
 90:
 90:
 	subcc		%o2, 1, %o2
 	subcc		%o2, 1, %o2
-	EX_LD(LOAD(ldub, %o1, %g1))
-	EX_ST(STORE(stb, %g1, %o1 + %o3))
+	EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
+	EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
 	bgu,pt		%XCC, 90b
 	bgu,pt		%XCC, 90b
 	 add		%o1, 1, %o1
 	 add		%o1, 1, %o1
 	retl
 	retl

+ 1 - 1
arch/sparc/lib/Makefile

@@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) +=  NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
 lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
 lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
 lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
 lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
 
 
-lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
+lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
 lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
 lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
 
 
 obj-$(CONFIG_SPARC64) += iomap.o
 obj-$(CONFIG_SPARC64) += iomap.o

+ 4 - 4
arch/sparc/lib/NG2copy_from_user.S

@@ -3,19 +3,19 @@
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  */
  */
 
 
-#define EX_LD(x)		\
+#define EX_LD(x,y)		\
 98:	x;			\
 98:	x;			\
 	.section __ex_table,"a";\
 	.section __ex_table,"a";\
 	.align 4;		\
 	.align 4;		\
-	.word 98b, __retl_one_asi;\
+	.word 98b, y;		\
 	.text;			\
 	.text;			\
 	.align 4;
 	.align 4;
 
 
-#define EX_LD_FP(x)		\
+#define EX_LD_FP(x,y)		\
 98:	x;			\
 98:	x;			\
 	.section __ex_table,"a";\
 	.section __ex_table,"a";\
 	.align 4;		\
 	.align 4;		\
-	.word 98b, __retl_one_asi_fp;\
+	.word 98b, y##_fp;	\
 	.text;			\
 	.text;			\
 	.align 4;
 	.align 4;
 
 

+ 4 - 4
arch/sparc/lib/NG2copy_to_user.S

@@ -3,19 +3,19 @@
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  */
  */
 
 
-#define EX_ST(x)		\
+#define EX_ST(x,y)		\
 98:	x;			\
 98:	x;			\
 	.section __ex_table,"a";\
 	.section __ex_table,"a";\
 	.align 4;		\
 	.align 4;		\
-	.word 98b, __retl_one_asi;\
+	.word 98b, y;		\
 	.text;			\
 	.text;			\
 	.align 4;
 	.align 4;
 
 
-#define EX_ST_FP(x)		\
+#define EX_ST_FP(x,y)		\
 98:	x;			\
 98:	x;			\
 	.section __ex_table,"a";\
 	.section __ex_table,"a";\
 	.align 4;		\
 	.align 4;		\
-	.word 98b, __retl_one_asi_fp;\
+	.word 98b, y##_fp;	\
 	.text;			\
 	.text;			\
 	.align 4;
 	.align 4;
 
 

+ 145 - 83
arch/sparc/lib/NG2memcpy.S

@@ -4,6 +4,7 @@
  */
  */
 
 
 #ifdef __KERNEL__
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #include <asm/asi.h>
 #define GLOBAL_SPARE	%g7
 #define GLOBAL_SPARE	%g7
@@ -32,21 +33,17 @@
 #endif
 #endif
 
 
 #ifndef EX_LD
 #ifndef EX_LD
-#define EX_LD(x)	x
+#define EX_LD(x,y)	x
 #endif
 #endif
 #ifndef EX_LD_FP
 #ifndef EX_LD_FP
-#define EX_LD_FP(x)	x
+#define EX_LD_FP(x,y)	x
 #endif
 #endif
 
 
 #ifndef EX_ST
 #ifndef EX_ST
-#define EX_ST(x)	x
+#define EX_ST(x,y)	x
 #endif
 #endif
 #ifndef EX_ST_FP
 #ifndef EX_ST_FP
-#define EX_ST_FP(x)	x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
+#define EX_ST_FP(x,y)	x
 #endif
 #endif
 
 
 #ifndef LOAD
 #ifndef LOAD
@@ -140,45 +137,110 @@
 	fsrc2		%x6, %f12; \
 	fsrc2		%x6, %f12; \
 	fsrc2		%x7, %f14;
 	fsrc2		%x7, %f14;
 #define FREG_LOAD_1(base, x0) \
 #define FREG_LOAD_1(base, x0) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
 #define FREG_LOAD_2(base, x0, x1) \
 #define FREG_LOAD_2(base, x0, x1) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_3(base, x0, x1, x2) \
 #define FREG_LOAD_3(base, x0, x1, x2) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_4(base, x0, x1, x2, x3) \
 #define FREG_LOAD_4(base, x0, x1, x2, x3) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
-	EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
 #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
-	EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
-	EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
 #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
-	EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
-	EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
-	EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
 #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
-	EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
-	EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
-	EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
-	EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
 
 
 	.register	%g2,#scratch
 	.register	%g2,#scratch
 	.register	%g3,#scratch
 	.register	%g3,#scratch
 
 
 	.text
 	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+__restore_fp:
+	VISExitHalf
+__restore_asi:
+	retl
+	 wr	%g0, ASI_AIUS, %asi
+ENTRY(NG2_retl_o2)
+	ba,pt	%xcc, __restore_asi
+	 mov	%o2, %o0
+ENDPROC(NG2_retl_o2)
+ENTRY(NG2_retl_o2_plus_1)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 1, %o0
+ENDPROC(NG2_retl_o2_plus_1)
+ENTRY(NG2_retl_o2_plus_4)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 4, %o0
+ENDPROC(NG2_retl_o2_plus_4)
+ENTRY(NG2_retl_o2_plus_8)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 8, %o0
+ENDPROC(NG2_retl_o2_plus_8)
+ENTRY(NG2_retl_o2_plus_o4_plus_1)
+	add	%o4, 1, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_1)
+ENTRY(NG2_retl_o2_plus_o4_plus_8)
+	add	%o4, 8, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_8)
+ENTRY(NG2_retl_o2_plus_o4_plus_16)
+	add	%o4, 16, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_16)
+ENTRY(NG2_retl_o2_plus_g1_fp)
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_fp)
+ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
+	add	%g1, 64, %g1
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
+ENTRY(NG2_retl_o2_plus_g1_plus_1)
+	add	%g1, 1, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_plus_1)
+ENTRY(NG2_retl_o2_and_7_plus_o4)
+	and	%o2, 7, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_and_7_plus_o4)
+ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
+	and	%o2, 7, %o2
+	add	%o4, 8, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
+#endif
+
 	.align		64
 	.align		64
 
 
 	.globl	FUNC_NAME
 	.globl	FUNC_NAME
@@ -230,8 +292,8 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	sub		%g0, %o4, %o4	! bytes to align dst
 	sub		%g0, %o4, %o4	! bytes to align dst
 	sub		%o2, %o4, %o2
 	sub		%o2, %o4, %o2
 1:	subcc		%o4, 1, %o4
 1:	subcc		%o4, 1, %o4
-	EX_LD(LOAD(ldub, %o1, %g1))
-	EX_ST(STORE(stb, %g1, %o0))
+	EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
+	EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
 	add		%o1, 1, %o1
 	add		%o1, 1, %o1
 	bne,pt		%XCC, 1b
 	bne,pt		%XCC, 1b
 	add		%o0, 1, %o0
 	add		%o0, 1, %o0
@@ -281,11 +343,11 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	 nop
 	 nop
 	/* fall through for 0 < low bits < 8 */
 	/* fall through for 0 < low bits < 8 */
 110:	sub		%o4, 64, %g2
 110:	sub		%o4, 64, %g2
-	EX_LD_FP(LOAD_BLK(%g2, %f0))
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+	EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
 	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
 	FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
 	add		%o4, 64, %o4
@@ -296,10 +358,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 
 120:	sub		%o4, 56, %g2
 120:	sub		%o4, 56, %g2
 	FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
 	FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
 	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
 	FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
 	add		%o4, 64, %o4
@@ -310,10 +372,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 
 130:	sub		%o4, 48, %g2
 130:	sub		%o4, 48, %g2
 	FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
 	FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
 	FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
 	FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
 	add		%o4, 64, %o4
@@ -324,10 +386,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 
 140:	sub		%o4, 40, %g2
 140:	sub		%o4, 40, %g2
 	FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
 	FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
 	FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_5(f22, f24, f26, f28, f30)
 	FREG_MOVE_5(f22, f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
 	add		%o4, 64, %o4
@@ -338,10 +400,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 
 150:	sub		%o4, 32, %g2
 150:	sub		%o4, 32, %g2
 	FREG_LOAD_4(%g2, f0, f2, f4, f6)
 	FREG_LOAD_4(%g2, f0, f2, f4, f6)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
 	FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_4(f24, f26, f28, f30)
 	FREG_MOVE_4(f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
 	add		%o4, 64, %o4
@@ -352,10 +414,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 
 160:	sub		%o4, 24, %g2
 160:	sub		%o4, 24, %g2
 	FREG_LOAD_3(%g2, f0, f2, f4)
 	FREG_LOAD_3(%g2, f0, f2, f4)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
 	FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_3(f26, f28, f30)
 	FREG_MOVE_3(f26, f28, f30)
 	subcc		%g1, 64, %g1
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
 	add		%o4, 64, %o4
@@ -366,10 +428,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 
 170:	sub		%o4, 16, %g2
 170:	sub		%o4, 16, %g2
 	FREG_LOAD_2(%g2, f0, f2)
 	FREG_LOAD_2(%g2, f0, f2)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
 	FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_2(f28, f30)
 	FREG_MOVE_2(f28, f30)
 	subcc		%g1, 64, %g1
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
 	add		%o4, 64, %o4
@@ -380,10 +442,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 
 180:	sub		%o4, 8, %g2
 180:	sub		%o4, 8, %g2
 	FREG_LOAD_1(%g2, f0)
 	FREG_LOAD_1(%g2, f0)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
 	FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_1(f30)
 	FREG_MOVE_1(f30)
 	subcc		%g1, 64, %g1
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
 	add		%o4, 64, %o4
@@ -393,10 +455,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	 nop
 	 nop
 
 
 190:
 190:
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	subcc		%g1, 64, %g1
 	subcc		%g1, 64, %g1
-	EX_LD_FP(LOAD_BLK(%o4, %f0))
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
 	add		%o4, 64, %o4
 	add		%o4, 64, %o4
 	bne,pt		%xcc, 1b
 	bne,pt		%xcc, 1b
 	 LOAD(prefetch, %o4 + 64, #one_read)
 	 LOAD(prefetch, %o4 + 64, #one_read)
@@ -423,28 +485,28 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	andn		%o2, 0xf, %o4
 	andn		%o2, 0xf, %o4
 	and		%o2, 0xf, %o2
 	and		%o2, 0xf, %o2
 1:	subcc		%o4, 0x10, %o4
 1:	subcc		%o4, 0x10, %o4
-	EX_LD(LOAD(ldx, %o1, %o5))
+	EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
 	add		%o1, 0x08, %o1
 	add		%o1, 0x08, %o1
-	EX_LD(LOAD(ldx, %o1, %g1))
+	EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
 	sub		%o1, 0x08, %o1
 	sub		%o1, 0x08, %o1
-	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
+	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
 	add		%o1, 0x8, %o1
 	add		%o1, 0x8, %o1
-	EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE))
+	EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
 	bgu,pt		%XCC, 1b
 	bgu,pt		%XCC, 1b
 	 add		%o1, 0x8, %o1
 	 add		%o1, 0x8, %o1
 73:	andcc		%o2, 0x8, %g0
 73:	andcc		%o2, 0x8, %g0
 	be,pt		%XCC, 1f
 	be,pt		%XCC, 1f
 	 nop
 	 nop
 	sub		%o2, 0x8, %o2
 	sub		%o2, 0x8, %o2
-	EX_LD(LOAD(ldx, %o1, %o5))
-	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
+	EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
+	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
 	add		%o1, 0x8, %o1
 	add		%o1, 0x8, %o1
 1:	andcc		%o2, 0x4, %g0
 1:	andcc		%o2, 0x4, %g0
 	be,pt		%XCC, 1f
 	be,pt		%XCC, 1f
 	 nop
 	 nop
 	sub		%o2, 0x4, %o2
 	sub		%o2, 0x4, %o2
-	EX_LD(LOAD(lduw, %o1, %o5))
-	EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE))
+	EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
+	EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
 	add		%o1, 0x4, %o1
 	add		%o1, 0x4, %o1
 1:	cmp		%o2, 0
 1:	cmp		%o2, 0
 	be,pt		%XCC, 85f
 	be,pt		%XCC, 85f
@@ -460,8 +522,8 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	sub		%o2, %g1, %o2
 	sub		%o2, %g1, %o2
 
 
 1:	subcc		%g1, 1, %g1
 1:	subcc		%g1, 1, %g1
-	EX_LD(LOAD(ldub, %o1, %o5))
-	EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE))
+	EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
+	EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
 	bgu,pt		%icc, 1b
 	bgu,pt		%icc, 1b
 	 add		%o1, 1, %o1
 	 add		%o1, 1, %o1
 
 
@@ -477,16 +539,16 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 
 8:	mov		64, GLOBAL_SPARE
 8:	mov		64, GLOBAL_SPARE
 	andn		%o1, 0x7, %o1
 	andn		%o1, 0x7, %o1
-	EX_LD(LOAD(ldx, %o1, %g2))
+	EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
 	sub		GLOBAL_SPARE, %g1, GLOBAL_SPARE
 	sub		GLOBAL_SPARE, %g1, GLOBAL_SPARE
 	andn		%o2, 0x7, %o4
 	andn		%o2, 0x7, %o4
 	sllx		%g2, %g1, %g2
 	sllx		%g2, %g1, %g2
 1:	add		%o1, 0x8, %o1
 1:	add		%o1, 0x8, %o1
-	EX_LD(LOAD(ldx, %o1, %g3))
+	EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
 	subcc		%o4, 0x8, %o4
 	subcc		%o4, 0x8, %o4
 	srlx		%g3, GLOBAL_SPARE, %o5
 	srlx		%g3, GLOBAL_SPARE, %o5
 	or		%o5, %g2, %o5
 	or		%o5, %g2, %o5
-	EX_ST(STORE(stx, %o5, %o0))
+	EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
 	add		%o0, 0x8, %o0
 	add		%o0, 0x8, %o0
 	bgu,pt		%icc, 1b
 	bgu,pt		%icc, 1b
 	 sllx		%g3, %g1, %g2
 	 sllx		%g3, %g1, %g2
@@ -506,8 +568,8 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 
 
 1:
 1:
 	subcc		%o2, 4, %o2
 	subcc		%o2, 4, %o2
-	EX_LD(LOAD(lduw, %o1, %g1))
-	EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE))
+	EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
+	EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
 	bgu,pt		%XCC, 1b
 	bgu,pt		%XCC, 1b
 	 add		%o1, 4, %o1
 	 add		%o1, 4, %o1
 
 
@@ -517,8 +579,8 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	.align		32
 	.align		32
 90:
 90:
 	subcc		%o2, 1, %o2
 	subcc		%o2, 1, %o2
-	EX_LD(LOAD(ldub, %o1, %g1))
-	EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE))
+	EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
+	EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
 	bgu,pt		%XCC, 90b
 	bgu,pt		%XCC, 90b
 	 add		%o1, 1, %o1
 	 add		%o1, 1, %o1
 	retl
 	retl

+ 4 - 4
arch/sparc/lib/NG4copy_from_user.S

@@ -3,19 +3,19 @@
  * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
  * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
  */
  */
 
 
-#define EX_LD(x)		\
+#define EX_LD(x, y)		\
 98:	x;			\
 98:	x;			\
 	.section __ex_table,"a";\
 	.section __ex_table,"a";\
 	.align 4;		\
 	.align 4;		\
-	.word 98b, __retl_one_asi;\
+	.word 98b, y;		\
 	.text;			\
 	.text;			\
 	.align 4;
 	.align 4;
 
 
-#define EX_LD_FP(x)		\
+#define EX_LD_FP(x,y)		\
 98:	x;			\
 98:	x;			\
 	.section __ex_table,"a";\
 	.section __ex_table,"a";\
 	.align 4;		\
 	.align 4;		\
-	.word 98b, __retl_one_asi_fp;\
+	.word 98b, y##_fp;	\
 	.text;			\
 	.text;			\
 	.align 4;
 	.align 4;
 
 

+ 4 - 4
arch/sparc/lib/NG4copy_to_user.S

@@ -3,19 +3,19 @@
  * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
  * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
  */
  */
 
 
-#define EX_ST(x)		\
+#define EX_ST(x,y)		\
 98:	x;			\
 98:	x;			\
 	.section __ex_table,"a";\
 	.section __ex_table,"a";\
 	.align 4;		\
 	.align 4;		\
-	.word 98b, __retl_one_asi;\
+	.word 98b, y;		\
 	.text;			\
 	.text;			\
 	.align 4;
 	.align 4;
 
 
-#define EX_ST_FP(x)		\
+#define EX_ST_FP(x,y)		\
 98:	x;			\
 98:	x;			\
 	.section __ex_table,"a";\
 	.section __ex_table,"a";\
 	.align 4;		\
 	.align 4;		\
-	.word 98b, __retl_one_asi_fp;\
+	.word 98b, y##_fp;	\
 	.text;			\
 	.text;			\
 	.align 4;
 	.align 4;
 
 

+ 223 - 71
arch/sparc/lib/NG4memcpy.S

@@ -4,6 +4,7 @@
  */
  */
 
 
 #ifdef __KERNEL__
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #include <asm/asi.h>
 #define GLOBAL_SPARE	%g7
 #define GLOBAL_SPARE	%g7
@@ -46,22 +47,19 @@
 #endif
 #endif
 
 
 #ifndef EX_LD
 #ifndef EX_LD
-#define EX_LD(x)	x
+#define EX_LD(x,y)	x
 #endif
 #endif
 #ifndef EX_LD_FP
 #ifndef EX_LD_FP
-#define EX_LD_FP(x)	x
+#define EX_LD_FP(x,y)	x
 #endif
 #endif
 
 
 #ifndef EX_ST
 #ifndef EX_ST
-#define EX_ST(x)	x
+#define EX_ST(x,y)	x
 #endif
 #endif
 #ifndef EX_ST_FP
 #ifndef EX_ST_FP
-#define EX_ST_FP(x)	x
+#define EX_ST_FP(x,y)	x
 #endif
 #endif
 
 
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
-#endif
 
 
 #ifndef LOAD
 #ifndef LOAD
 #define LOAD(type,addr,dest)	type [addr], dest
 #define LOAD(type,addr,dest)	type [addr], dest
@@ -94,6 +92,158 @@
 	.register	%g3,#scratch
 	.register	%g3,#scratch
 
 
 	.text
 	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+__restore_asi_fp:
+	VISExitHalf
+__restore_asi:
+	retl
+	 wr	%g0, ASI_AIUS, %asi
+
+ENTRY(NG4_retl_o2)
+	ba,pt	%xcc, __restore_asi
+	 mov	%o2, %o0
+ENDPROC(NG4_retl_o2)
+ENTRY(NG4_retl_o2_plus_1)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 1, %o0
+ENDPROC(NG4_retl_o2_plus_1)
+ENTRY(NG4_retl_o2_plus_4)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 4, %o0
+ENDPROC(NG4_retl_o2_plus_4)
+ENTRY(NG4_retl_o2_plus_o5)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5)
+ENTRY(NG4_retl_o2_plus_o5_plus_4)
+	add	%o5, 4, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_4)
+ENTRY(NG4_retl_o2_plus_o5_plus_8)
+	add	%o5, 8, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_8)
+ENTRY(NG4_retl_o2_plus_o5_plus_16)
+	add	%o5, 16, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_16)
+ENTRY(NG4_retl_o2_plus_o5_plus_24)
+	add	%o5, 24, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_24)
+ENTRY(NG4_retl_o2_plus_o5_plus_32)
+	add	%o5, 32, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_32)
+ENTRY(NG4_retl_o2_plus_g1)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1)
+ENTRY(NG4_retl_o2_plus_g1_plus_1)
+	add	%g1, 1, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_1)
+ENTRY(NG4_retl_o2_plus_g1_plus_8)
+	add	%g1, 8, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_8)
+ENTRY(NG4_retl_o2_plus_o4)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4)
+ENTRY(NG4_retl_o2_plus_o4_plus_8)
+	add	%o4, 8, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_8)
+ENTRY(NG4_retl_o2_plus_o4_plus_16)
+	add	%o4, 16, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_16)
+ENTRY(NG4_retl_o2_plus_o4_plus_24)
+	add	%o4, 24, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_24)
+ENTRY(NG4_retl_o2_plus_o4_plus_32)
+	add	%o4, 32, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_32)
+ENTRY(NG4_retl_o2_plus_o4_plus_40)
+	add	%o4, 40, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_40)
+ENTRY(NG4_retl_o2_plus_o4_plus_48)
+	add	%o4, 48, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_48)
+ENTRY(NG4_retl_o2_plus_o4_plus_56)
+	add	%o4, 56, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_56)
+ENTRY(NG4_retl_o2_plus_o4_plus_64)
+	add	%o4, 64, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_64)
+ENTRY(NG4_retl_o2_plus_o4_fp)
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
+	add	%o4, 8, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
+	add	%o4, 16, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
+	add	%o4, 24, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
+	add	%o4, 32, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
+	add	%o4, 40, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
+	add	%o4, 48, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
+	add	%o4, 56, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
+	add	%o4, 64, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
+#endif
 	.align		64
 	.align		64
 
 
 	.globl	FUNC_NAME
 	.globl	FUNC_NAME
@@ -124,12 +274,13 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	brz,pt		%g1, 51f
 	brz,pt		%g1, 51f
 	 sub		%o2, %g1, %o2
 	 sub		%o2, %g1, %o2
 
 
-1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
+
+1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
 	add		%o1, 1, %o1
 	add		%o1, 1, %o1
 	subcc		%g1, 1, %g1
 	subcc		%g1, 1, %g1
 	add		%o0, 1, %o0
 	add		%o0, 1, %o0
 	bne,pt		%icc, 1b
 	bne,pt		%icc, 1b
-	 EX_ST(STORE(stb, %g2, %o0 - 0x01))
+	 EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
 
 
 51:	LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
 51:	LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
 	LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
 	LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
@@ -154,43 +305,43 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	brz,pt		%g1, .Llarge_aligned
 	brz,pt		%g1, .Llarge_aligned
 	 sub		%o2, %g1, %o2
 	 sub		%o2, %g1, %o2
 
 
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
 	add		%o1, 8, %o1
 	add		%o1, 8, %o1
 	subcc		%g1, 8, %g1
 	subcc		%g1, 8, %g1
 	add		%o0, 8, %o0
 	add		%o0, 8, %o0
 	bne,pt		%icc, 1b
 	bne,pt		%icc, 1b
-	 EX_ST(STORE(stx, %g2, %o0 - 0x08))
+	 EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
 
 
 .Llarge_aligned:
 .Llarge_aligned:
 	/* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
 	/* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
 	andn		%o2, 0x3f, %o4
 	andn		%o2, 0x3f, %o4
 	sub		%o2, %o4, %o2
 	sub		%o2, %o4, %o2
 
 
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
 	add		%o1, 0x40, %o1
 	add		%o1, 0x40, %o1
-	EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
+	EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
 	subcc		%o4, 0x40, %o4
 	subcc		%o4, 0x40, %o4
-	EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
-	EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
-	EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
-	EX_ST(STORE_INIT(%g1, %o0))
+	EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
+	EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
 	add		%o0, 0x08, %o0
 	add		%o0, 0x08, %o0
-	EX_ST(STORE_INIT(%g2, %o0))
+	EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
 	add		%o0, 0x08, %o0
 	add		%o0, 0x08, %o0
-	EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
-	EX_ST(STORE_INIT(%g3, %o0))
+	EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
+	EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
 	add		%o0, 0x08, %o0
 	add		%o0, 0x08, %o0
-	EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
-	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
+	EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
+	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
 	add		%o0, 0x08, %o0
 	add		%o0, 0x08, %o0
-	EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
-	EX_ST(STORE_INIT(%o5, %o0))
+	EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
+	EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
 	add		%o0, 0x08, %o0
 	add		%o0, 0x08, %o0
-	EX_ST(STORE_INIT(%g2, %o0))
+	EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
 	add		%o0, 0x08, %o0
 	add		%o0, 0x08, %o0
-	EX_ST(STORE_INIT(%g3, %o0))
+	EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
 	add		%o0, 0x08, %o0
 	add		%o0, 0x08, %o0
-	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
+	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
 	add		%o0, 0x08, %o0
 	add		%o0, 0x08, %o0
 	bne,pt		%icc, 1b
 	bne,pt		%icc, 1b
 	 LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
 	 LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
@@ -216,17 +367,17 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	sub		%o2, %o4, %o2
 	sub		%o2, %o4, %o2
 	alignaddr	%o1, %g0, %g1
 	alignaddr	%o1, %g0, %g1
 	add		%o1, %o4, %o1
 	add		%o1, %o4, %o1
-	EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
-1:	EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
+1:	EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
 	subcc		%o4, 0x40, %o4
 	subcc		%o4, 0x40, %o4
-	EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
-	EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
-	EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
-	EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
-	EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
-	EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
 	faligndata	%f0, %f2, %f16
 	faligndata	%f0, %f2, %f16
-	EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
 	faligndata	%f2, %f4, %f18
 	faligndata	%f2, %f4, %f18
 	add		%g1, 0x40, %g1
 	add		%g1, 0x40, %g1
 	faligndata	%f4, %f6, %f20
 	faligndata	%f4, %f6, %f20
@@ -235,14 +386,14 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	faligndata	%f10, %f12, %f26
 	faligndata	%f10, %f12, %f26
 	faligndata	%f12, %f14, %f28
 	faligndata	%f12, %f14, %f28
 	faligndata	%f14, %f0, %f30
 	faligndata	%f14, %f0, %f30
-	EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
-	EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
-	EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
-	EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
-	EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
-	EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
-	EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
-	EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
+	EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
+	EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
+	EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
+	EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
+	EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
+	EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
+	EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
+	EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
 	add		%o0, 0x40, %o0
 	add		%o0, 0x40, %o0
 	bne,pt		%icc, 1b
 	bne,pt		%icc, 1b
 	 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
 	 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
@@ -270,37 +421,38 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	andncc		%o2, 0x20 - 1, %o5
 	andncc		%o2, 0x20 - 1, %o5
 	be,pn		%icc, 2f
 	be,pn		%icc, 2f
 	 sub		%o2, %o5, %o2
 	 sub		%o2, %o5, %o2
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
-	EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
-	EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
-	EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
 	add		%o1, 0x20, %o1
 	add		%o1, 0x20, %o1
 	subcc		%o5, 0x20, %o5
 	subcc		%o5, 0x20, %o5
-	EX_ST(STORE(stx, %g1, %o0 + 0x00))
-	EX_ST(STORE(stx, %g2, %o0 + 0x08))
-	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
-	EX_ST(STORE(stx, %o4, %o0 + 0x18))
+	EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
+	EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
+	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
+	EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
 	bne,pt		%icc, 1b
 	bne,pt		%icc, 1b
 	 add		%o0, 0x20, %o0
 	 add		%o0, 0x20, %o0
 2:	andcc		%o2, 0x18, %o5
 2:	andcc		%o2, 0x18, %o5
 	be,pt		%icc, 3f
 	be,pt		%icc, 3f
 	 sub		%o2, %o5, %o2
 	 sub		%o2, %o5, %o2
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
 	add		%o1, 0x08, %o1
 	add		%o1, 0x08, %o1
 	add		%o0, 0x08, %o0
 	add		%o0, 0x08, %o0
 	subcc		%o5, 0x08, %o5
 	subcc		%o5, 0x08, %o5
 	bne,pt		%icc, 1b
 	bne,pt		%icc, 1b
-	 EX_ST(STORE(stx, %g1, %o0 - 0x08))
+	 EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
 3:	brz,pt		%o2, .Lexit
 3:	brz,pt		%o2, .Lexit
 	 cmp		%o2, 0x04
 	 cmp		%o2, 0x04
 	bl,pn		%icc, .Ltiny
 	bl,pn		%icc, .Ltiny
 	 nop
 	 nop
-	EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
+	EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
 	add		%o1, 0x04, %o1
 	add		%o1, 0x04, %o1
 	add		%o0, 0x04, %o0
 	add		%o0, 0x04, %o0
 	subcc		%o2, 0x04, %o2
 	subcc		%o2, 0x04, %o2
 	bne,pn		%icc, .Ltiny
 	bne,pn		%icc, .Ltiny
-	 EX_ST(STORE(stw, %g1, %o0 - 0x04))
+	 EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
 	ba,a,pt		%icc, .Lexit
 	ba,a,pt		%icc, .Lexit
 .Lmedium_unaligned:
 .Lmedium_unaligned:
 	/* First get dest 8 byte aligned.  */
 	/* First get dest 8 byte aligned.  */
@@ -309,12 +461,12 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	brz,pt		%g1, 2f
 	brz,pt		%g1, 2f
 	 sub		%o2, %g1, %o2
 	 sub		%o2, %g1, %o2
 
 
-1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
+1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
 	add		%o1, 1, %o1
 	add		%o1, 1, %o1
 	subcc		%g1, 1, %g1
 	subcc		%g1, 1, %g1
 	add		%o0, 1, %o0
 	add		%o0, 1, %o0
 	bne,pt		%icc, 1b
 	bne,pt		%icc, 1b
-	 EX_ST(STORE(stb, %g2, %o0 - 0x01))
+	 EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
 2:
 2:
 	and		%o1, 0x7, %g1
 	and		%o1, 0x7, %g1
 	brz,pn		%g1, .Lmedium_noprefetch
 	brz,pn		%g1, .Lmedium_noprefetch
@@ -322,16 +474,16 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	mov		64, %g2
 	mov		64, %g2
 	sub		%g2, %g1, %g2
 	sub		%g2, %g1, %g2
 	andn		%o1, 0x7, %o1
 	andn		%o1, 0x7, %o1
-	EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
+	EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
 	sllx		%o4, %g1, %o4
 	sllx		%o4, %g1, %o4
 	andn		%o2, 0x08 - 1, %o5
 	andn		%o2, 0x08 - 1, %o5
 	sub		%o2, %o5, %o2
 	sub		%o2, %o5, %o2
-1:	EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
+1:	EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
 	add		%o1, 0x08, %o1
 	add		%o1, 0x08, %o1
 	subcc		%o5, 0x08, %o5
 	subcc		%o5, 0x08, %o5
 	srlx		%g3, %g2, GLOBAL_SPARE
 	srlx		%g3, %g2, GLOBAL_SPARE
 	or		GLOBAL_SPARE, %o4, GLOBAL_SPARE
 	or		GLOBAL_SPARE, %o4, GLOBAL_SPARE
-	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
+	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
 	add		%o0, 0x08, %o0
 	add		%o0, 0x08, %o0
 	bne,pt		%icc, 1b
 	bne,pt		%icc, 1b
 	 sllx		%g3, %g1, %o4
 	 sllx		%g3, %g1, %o4
@@ -342,17 +494,17 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%icc, .Lsmall_unaligned
 	ba,pt		%icc, .Lsmall_unaligned
 
 
 .Ltiny:
 .Ltiny:
-	EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
+	EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
 	subcc		%o2, 1, %o2
 	subcc		%o2, 1, %o2
 	be,pn		%icc, .Lexit
 	be,pn		%icc, .Lexit
-	 EX_ST(STORE(stb, %g1, %o0 + 0x00))
-	EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
+	 EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
+	EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
 	subcc		%o2, 1, %o2
 	subcc		%o2, 1, %o2
 	be,pn		%icc, .Lexit
 	be,pn		%icc, .Lexit
-	 EX_ST(STORE(stb, %g1, %o0 + 0x01))
-	EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
+	 EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
+	EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
 	ba,pt		%icc, .Lexit
 	ba,pt		%icc, .Lexit
-	 EX_ST(STORE(stb, %g1, %o0 + 0x02))
+	 EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
 
 
 .Lsmall:
 .Lsmall:
 	andcc		%g2, 0x3, %g0
 	andcc		%g2, 0x3, %g0
@@ -360,22 +512,22 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	 andn		%o2, 0x4 - 1, %o5
 	 andn		%o2, 0x4 - 1, %o5
 	sub		%o2, %o5, %o2
 	sub		%o2, %o5, %o2
 1:
 1:
-	EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
+	EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
 	add		%o1, 0x04, %o1
 	add		%o1, 0x04, %o1
 	subcc		%o5, 0x04, %o5
 	subcc		%o5, 0x04, %o5
 	add		%o0, 0x04, %o0
 	add		%o0, 0x04, %o0
 	bne,pt		%icc, 1b
 	bne,pt		%icc, 1b
-	 EX_ST(STORE(stw, %g1, %o0 - 0x04))
+	 EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
 	brz,pt		%o2, .Lexit
 	brz,pt		%o2, .Lexit
 	 nop
 	 nop
 	ba,a,pt		%icc, .Ltiny
 	ba,a,pt		%icc, .Ltiny
 
 
 .Lsmall_unaligned:
 .Lsmall_unaligned:
-1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
+1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
 	add		%o1, 1, %o1
 	add		%o1, 1, %o1
 	add		%o0, 1, %o0
 	add		%o0, 1, %o0
 	subcc		%o2, 1, %o2
 	subcc		%o2, 1, %o2
 	bne,pt		%icc, 1b
 	bne,pt		%icc, 1b
-	 EX_ST(STORE(stb, %g1, %o0 - 0x01))
+	 EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
 	ba,a,pt		%icc, .Lexit
 	ba,a,pt		%icc, .Lexit
 	.size		FUNC_NAME, .-FUNC_NAME
 	.size		FUNC_NAME, .-FUNC_NAME

+ 2 - 2
arch/sparc/lib/NGcopy_from_user.S

@@ -3,11 +3,11 @@
  * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
  * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
  */
  */
 
 
-#define EX_LD(x)		\
+#define EX_LD(x,y)		\
 98:	x;			\
 98:	x;			\
 	.section __ex_table,"a";\
 	.section __ex_table,"a";\
 	.align 4;		\
 	.align 4;		\
-	.word 98b, __ret_one_asi;\
+	.word 98b, y;		\
 	.text;			\
 	.text;			\
 	.align 4;
 	.align 4;
 
 

+ 2 - 2
arch/sparc/lib/NGcopy_to_user.S

@@ -3,11 +3,11 @@
  * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
  * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
  */
  */
 
 
-#define EX_ST(x)		\
+#define EX_ST(x,y)		\
 98:	x;			\
 98:	x;			\
 	.section __ex_table,"a";\
 	.section __ex_table,"a";\
 	.align 4;		\
 	.align 4;		\
-	.word 98b, __ret_one_asi;\
+	.word 98b, y;		\
 	.text;			\
 	.text;			\
 	.align 4;
 	.align 4;
 
 

+ 158 - 75
arch/sparc/lib/NGmemcpy.S

@@ -4,6 +4,7 @@
  */
  */
 
 
 #ifdef __KERNEL__
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/asi.h>
 #include <asm/thread_info.h>
 #include <asm/thread_info.h>
 #define GLOBAL_SPARE	%g7
 #define GLOBAL_SPARE	%g7
@@ -27,15 +28,11 @@
 #endif
 #endif
 
 
 #ifndef EX_LD
 #ifndef EX_LD
-#define EX_LD(x)	x
+#define EX_LD(x,y)	x
 #endif
 #endif
 
 
 #ifndef EX_ST
 #ifndef EX_ST
-#define EX_ST(x)	x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
+#define EX_ST(x,y)	x
 #endif
 #endif
 
 
 #ifndef LOAD
 #ifndef LOAD
@@ -79,6 +76,92 @@
 	.register	%g3,#scratch
 	.register	%g3,#scratch
 
 
 	.text
 	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+__restore_asi:
+	ret
+	wr	%g0, ASI_AIUS, %asi
+	 restore
+ENTRY(NG_ret_i2_plus_i4_plus_1)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %i5, %i0
+ENDPROC(NG_ret_i2_plus_i4_plus_1)
+ENTRY(NG_ret_i2_plus_g1)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1)
+ENTRY(NG_ret_i2_plus_g1_minus_8)
+	sub	%g1, 8, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_8)
+ENTRY(NG_ret_i2_plus_g1_minus_16)
+	sub	%g1, 16, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_16)
+ENTRY(NG_ret_i2_plus_g1_minus_24)
+	sub	%g1, 24, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_24)
+ENTRY(NG_ret_i2_plus_g1_minus_32)
+	sub	%g1, 32, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_32)
+ENTRY(NG_ret_i2_plus_g1_minus_40)
+	sub	%g1, 40, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_40)
+ENTRY(NG_ret_i2_plus_g1_minus_48)
+	sub	%g1, 48, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_48)
+ENTRY(NG_ret_i2_plus_g1_minus_56)
+	sub	%g1, 56, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_56)
+ENTRY(NG_ret_i2_plus_i4)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %i4, %i0
+ENDPROC(NG_ret_i2_plus_i4)
+ENTRY(NG_ret_i2_plus_i4_minus_8)
+	sub	%i4, 8, %i4
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %i4, %i0
+ENDPROC(NG_ret_i2_plus_i4_minus_8)
+ENTRY(NG_ret_i2_plus_8)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, 8, %i0
+ENDPROC(NG_ret_i2_plus_8)
+ENTRY(NG_ret_i2_plus_4)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, 4, %i0
+ENDPROC(NG_ret_i2_plus_4)
+ENTRY(NG_ret_i2_plus_1)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, 1, %i0
+ENDPROC(NG_ret_i2_plus_1)
+ENTRY(NG_ret_i2_plus_g1_plus_1)
+	add	%g1, 1, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_plus_1)
+ENTRY(NG_ret_i2)
+	ba,pt	%xcc, __restore_asi
+	 mov	%i2, %i0
+ENDPROC(NG_ret_i2)
+ENTRY(NG_ret_i2_and_7_plus_i4)
+	and	%i2, 7, %i2
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %i4, %i0
+ENDPROC(NG_ret_i2_and_7_plus_i4)
+#endif
+
 	.align		64
 	.align		64
 
 
 	.globl	FUNC_NAME
 	.globl	FUNC_NAME
@@ -126,8 +209,8 @@ FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
 	sub		%g0, %i4, %i4	! bytes to align dst
 	sub		%g0, %i4, %i4	! bytes to align dst
 	sub		%i2, %i4, %i2
 	sub		%i2, %i4, %i2
 1:	subcc		%i4, 1, %i4
 1:	subcc		%i4, 1, %i4
-	EX_LD(LOAD(ldub, %i1, %g1))
-	EX_ST(STORE(stb, %g1, %o0))
+	EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
+	EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
 	add		%i1, 1, %i1
 	add		%i1, 1, %i1
 	bne,pt		%XCC, 1b
 	bne,pt		%XCC, 1b
 	add		%o0, 1, %o0
 	add		%o0, 1, %o0
@@ -160,7 +243,7 @@ FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
 	and		%i4, 0x7, GLOBAL_SPARE
 	and		%i4, 0x7, GLOBAL_SPARE
 	sll		GLOBAL_SPARE, 3, GLOBAL_SPARE
 	sll		GLOBAL_SPARE, 3, GLOBAL_SPARE
 	mov		64, %i5
 	mov		64, %i5
-	EX_LD(LOAD_TWIN(%i1, %g2, %g3))
+	EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
 	sub		%i5, GLOBAL_SPARE, %i5
 	sub		%i5, GLOBAL_SPARE, %i5
 	mov		16, %o4
 	mov		16, %o4
 	mov		32, %o5
 	mov		32, %o5
@@ -178,31 +261,31 @@ FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
 	srlx		WORD3, PRE_SHIFT, TMP; \
 	srlx		WORD3, PRE_SHIFT, TMP; \
 	or		WORD2, TMP, WORD2;
 	or		WORD2, TMP, WORD2;
 
 
-8:	EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
+8:	EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
 	MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
 	MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
 	LOAD(prefetch, %i1 + %i3, #one_read)
 	LOAD(prefetch, %i1 + %i3, #one_read)
 
 
-	EX_ST(STORE_INIT(%g2, %o0 + 0x00))
-	EX_ST(STORE_INIT(%g3, %o0 + 0x08))
+	EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
+	EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
 
 
-	EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
+	EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
 	MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
 	MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
 
 
-	EX_ST(STORE_INIT(%o2, %o0 + 0x10))
-	EX_ST(STORE_INIT(%o3, %o0 + 0x18))
+	EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
 
 
-	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
 	MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
 	MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
 
 
-	EX_ST(STORE_INIT(%g2, %o0 + 0x20))
-	EX_ST(STORE_INIT(%g3, %o0 + 0x28))
+	EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
 
 
-	EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
+	EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
 	add		%i1, 64, %i1
 	add		%i1, 64, %i1
 	MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
 	MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
 
 
-	EX_ST(STORE_INIT(%o2, %o0 + 0x30))
-	EX_ST(STORE_INIT(%o3, %o0 + 0x38))
+	EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 
 
 	subcc		%g1, 64, %g1
 	subcc		%g1, 64, %g1
 	bne,pt		%XCC, 8b
 	bne,pt		%XCC, 8b
@@ -211,31 +294,31 @@ FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
 	ba,pt		%XCC, 60f
 	ba,pt		%XCC, 60f
 	 add		%i1, %i4, %i1
 	 add		%i1, %i4, %i1
 
 
-9:	EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
+9:	EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
 	MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
 	MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
 	LOAD(prefetch, %i1 + %i3, #one_read)
 	LOAD(prefetch, %i1 + %i3, #one_read)
 
 
-	EX_ST(STORE_INIT(%g3, %o0 + 0x00))
-	EX_ST(STORE_INIT(%o2, %o0 + 0x08))
+	EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
 
 
-	EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
+	EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
 	MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
 	MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
 
 
-	EX_ST(STORE_INIT(%o3, %o0 + 0x10))
-	EX_ST(STORE_INIT(%g2, %o0 + 0x18))
+	EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
 
 
-	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
 	MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
 	MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
 
 
-	EX_ST(STORE_INIT(%g3, %o0 + 0x20))
-	EX_ST(STORE_INIT(%o2, %o0 + 0x28))
+	EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
 
 
-	EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
+	EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
 	add		%i1, 64, %i1
 	add		%i1, 64, %i1
 	MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
 	MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
 
 
-	EX_ST(STORE_INIT(%o3, %o0 + 0x30))
-	EX_ST(STORE_INIT(%g2, %o0 + 0x38))
+	EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+	EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 
 
 	subcc		%g1, 64, %g1
 	subcc		%g1, 64, %g1
 	bne,pt		%XCC, 9b
 	bne,pt		%XCC, 9b
@@ -249,25 +332,25 @@ FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
 	 * one twin load ahead, then add 8 back into source when
 	 * one twin load ahead, then add 8 back into source when
 	 * we finish the loop.
 	 * we finish the loop.
 	 */
 	 */
-	EX_LD(LOAD_TWIN(%i1, %o4, %o5))
+	EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
 	mov	16, %o7
 	mov	16, %o7
 	mov	32, %g2
 	mov	32, %g2
 	mov	48, %g3
 	mov	48, %g3
 	mov	64, %o1
 	mov	64, %o1
-1:	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+1:	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
 	LOAD(prefetch, %i1 + %o1, #one_read)
 	LOAD(prefetch, %i1 + %o1, #one_read)
-	EX_ST(STORE_INIT(%o5, %o0 + 0x00))	! initializes cache line
-	EX_ST(STORE_INIT(%o2, %o0 + 0x08))
-	EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
-	EX_ST(STORE_INIT(%o3, %o0 + 0x10))
-	EX_ST(STORE_INIT(%o4, %o0 + 0x18))
-	EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
-	EX_ST(STORE_INIT(%o5, %o0 + 0x20))
-	EX_ST(STORE_INIT(%o2, %o0 + 0x28))
-	EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5))
+	EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1)	! initializes cache line
+	EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+	EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+	EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+	EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
 	add		%i1, 64, %i1
 	add		%i1, 64, %i1
-	EX_ST(STORE_INIT(%o3, %o0 + 0x30))
-	EX_ST(STORE_INIT(%o4, %o0 + 0x38))
+	EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+	EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 	subcc		%g1, 64, %g1
 	subcc		%g1, 64, %g1
 	bne,pt		%XCC, 1b
 	bne,pt		%XCC, 1b
 	 add		%o0, 64, %o0
 	 add		%o0, 64, %o0
@@ -282,20 +365,20 @@ FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
 	mov	32, %g2
 	mov	32, %g2
 	mov	48, %g3
 	mov	48, %g3
 	mov	64, %o1
 	mov	64, %o1
-1:	EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5))
-	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+1:	EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
+	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
 	LOAD(prefetch, %i1 + %o1, #one_read)
 	LOAD(prefetch, %i1 + %o1, #one_read)
-	EX_ST(STORE_INIT(%o4, %o0 + 0x00))	! initializes cache line
-	EX_ST(STORE_INIT(%o5, %o0 + 0x08))
-	EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
-	EX_ST(STORE_INIT(%o2, %o0 + 0x10))
-	EX_ST(STORE_INIT(%o3, %o0 + 0x18))
-	EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
+	EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1)	! initializes cache line
+	EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+	EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+	EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
 	add	%i1, 64, %i1
 	add	%i1, 64, %i1
-	EX_ST(STORE_INIT(%o4, %o0 + 0x20))
-	EX_ST(STORE_INIT(%o5, %o0 + 0x28))
-	EX_ST(STORE_INIT(%o2, %o0 + 0x30))
-	EX_ST(STORE_INIT(%o3, %o0 + 0x38))
+	EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 	subcc	%g1, 64, %g1
 	subcc	%g1, 64, %g1
 	bne,pt	%XCC, 1b
 	bne,pt	%XCC, 1b
 	 add	%o0, 64, %o0
 	 add	%o0, 64, %o0
@@ -321,28 +404,28 @@ FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
 	andn		%i2, 0xf, %i4
 	andn		%i2, 0xf, %i4
 	and		%i2, 0xf, %i2
 	and		%i2, 0xf, %i2
 1:	subcc		%i4, 0x10, %i4
 1:	subcc		%i4, 0x10, %i4
-	EX_LD(LOAD(ldx, %i1, %o4))
+	EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
 	add		%i1, 0x08, %i1
 	add		%i1, 0x08, %i1
-	EX_LD(LOAD(ldx, %i1, %g1))
+	EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
 	sub		%i1, 0x08, %i1
 	sub		%i1, 0x08, %i1
-	EX_ST(STORE(stx, %o4, %i1 + %i3))
+	EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
 	add		%i1, 0x8, %i1
 	add		%i1, 0x8, %i1
-	EX_ST(STORE(stx, %g1, %i1 + %i3))
+	EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
 	bgu,pt		%XCC, 1b
 	bgu,pt		%XCC, 1b
 	 add		%i1, 0x8, %i1
 	 add		%i1, 0x8, %i1
 73:	andcc		%i2, 0x8, %g0
 73:	andcc		%i2, 0x8, %g0
 	be,pt		%XCC, 1f
 	be,pt		%XCC, 1f
 	 nop
 	 nop
 	sub		%i2, 0x8, %i2
 	sub		%i2, 0x8, %i2
-	EX_LD(LOAD(ldx, %i1, %o4))
-	EX_ST(STORE(stx, %o4, %i1 + %i3))
+	EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
+	EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
 	add		%i1, 0x8, %i1
 	add		%i1, 0x8, %i1
 1:	andcc		%i2, 0x4, %g0
 1:	andcc		%i2, 0x4, %g0
 	be,pt		%XCC, 1f
 	be,pt		%XCC, 1f
 	 nop
 	 nop
 	sub		%i2, 0x4, %i2
 	sub		%i2, 0x4, %i2
-	EX_LD(LOAD(lduw, %i1, %i5))
-	EX_ST(STORE(stw, %i5, %i1 + %i3))
+	EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
+	EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
 	add		%i1, 0x4, %i1
 	add		%i1, 0x4, %i1
 1:	cmp		%i2, 0
 1:	cmp		%i2, 0
 	be,pt		%XCC, 85f
 	be,pt		%XCC, 85f
@@ -358,8 +441,8 @@ FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
 	sub		%i2, %g1, %i2
 	sub		%i2, %g1, %i2
 
 
 1:	subcc		%g1, 1, %g1
 1:	subcc		%g1, 1, %g1
-	EX_LD(LOAD(ldub, %i1, %i5))
-	EX_ST(STORE(stb, %i5, %i1 + %i3))
+	EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
+	EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
 	bgu,pt		%icc, 1b
 	bgu,pt		%icc, 1b
 	 add		%i1, 1, %i1
 	 add		%i1, 1, %i1
 
 
@@ -375,16 +458,16 @@ FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
 
 
 8:	mov		64, %i3
 8:	mov		64, %i3
 	andn		%i1, 0x7, %i1
 	andn		%i1, 0x7, %i1
-	EX_LD(LOAD(ldx, %i1, %g2))
+	EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
 	sub		%i3, %g1, %i3
 	sub		%i3, %g1, %i3
 	andn		%i2, 0x7, %i4
 	andn		%i2, 0x7, %i4
 	sllx		%g2, %g1, %g2
 	sllx		%g2, %g1, %g2
 1:	add		%i1, 0x8, %i1
 1:	add		%i1, 0x8, %i1
-	EX_LD(LOAD(ldx, %i1, %g3))
+	EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
 	subcc		%i4, 0x8, %i4
 	subcc		%i4, 0x8, %i4
 	srlx		%g3, %i3, %i5
 	srlx		%g3, %i3, %i5
 	or		%i5, %g2, %i5
 	or		%i5, %g2, %i5
-	EX_ST(STORE(stx, %i5, %o0))
+	EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
 	add		%o0, 0x8, %o0
 	add		%o0, 0x8, %o0
 	bgu,pt		%icc, 1b
 	bgu,pt		%icc, 1b
 	 sllx		%g3, %g1, %g2
 	 sllx		%g3, %g1, %g2
@@ -404,8 +487,8 @@ FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
 
 
 1:
 1:
 	subcc		%i2, 4, %i2
 	subcc		%i2, 4, %i2
-	EX_LD(LOAD(lduw, %i1, %g1))
-	EX_ST(STORE(stw, %g1, %i1 + %i3))
+	EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
+	EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
 	bgu,pt		%XCC, 1b
 	bgu,pt		%XCC, 1b
 	 add		%i1, 4, %i1
 	 add		%i1, 4, %i1
 
 
@@ -415,8 +498,8 @@ FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
 	.align		32
 	.align		32
 90:
 90:
 	subcc		%i2, 1, %i2
 	subcc		%i2, 1, %i2
-	EX_LD(LOAD(ldub, %i1, %g1))
-	EX_ST(STORE(stb, %g1, %i1 + %i3))
+	EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
+	EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
 	bgu,pt		%XCC, 90b
 	bgu,pt		%XCC, 90b
 	 add		%i1, 1, %i1
 	 add		%i1, 1, %i1
 	ret
 	ret

Some files were not shown because too many files changed in this diff