Browse Source

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 updates from Martin Schwidefsky:
 "The big one is support for fake NUMA, splitting a really large machine
  in more manageable piece improves performance in some cases, e.g. for
  a KVM host.

  The FICON Link Incident handling has been improved, this helps the
  operator to identify degraded or non-operational FICON connections.

  The save and restore of floating point and vector registers has been
  overhauled to allow the future use of vector registers in the kernel.

  A few small enhancement, magic sys-requests for the vt220 console via
  SCLP, some more assembler code has been converted to C, the PCI error
  handling is improved.

  And the usual cleanup and bug fixing"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (59 commits)
  s390/jump_label: Use %*ph to print small buffers
  s390/sclp_vt220: support magic sysrequests
  s390/ctrlchar: improve handling of magic sysrequests
  s390/numa: remove superfluous ARCH_WANT defines
  s390/3270: redraw screen on unsolicited device end
  s390/dcssblk: correct out of bounds array indexes
  s390/mm: simplify page table alloc/free code
  s390/pci: move debug messages to debugfs
  s390/nmi: initialize control register 0 earlier
  s390/zcrypt: use msleep() instead of mdelay()
  s390/hmcdrv: fix interrupt registration
  s390/setup: fix novx parameter
  s390/uaccess: remove uaccess_primary kernel parameter
  s390: remove unneeded sizeof(void *) comparisons
  s390/facilities: remove transactional-execution bits
  s390/numa: re-add DIE sched_domain_topology_level
  s390/dasd: enhance CUIR scope detection
  s390/dasd: fix failing path verification
  s390/vdso: emit a GNU hash
  s390/numa: make core to node mapping data dynamic
  ...
Linus Torvalds 10 năm trước cách đây
mục cha
commit
9c6a019c6e
94 tập tin đã thay đổi với 3696 bổ sung1368 xóa
  1. 0 6
      MAINTAINERS
  2. 1 0
      arch/s390/Kbuild
  3. 75 0
      arch/s390/Kconfig
  4. 2 0
      arch/s390/Makefile
  5. 2 0
      arch/s390/configs/default_defconfig
  6. 2 0
      arch/s390/configs/gcov_defconfig
  7. 3 0
      arch/s390/configs/performance_defconfig
  8. 2 1
      arch/s390/crypto/aes_s390.c
  9. 2 1
      arch/s390/crypto/des_s390.c
  10. 2 1
      arch/s390/crypto/ghash_s390.c
  11. 2 2
      arch/s390/crypto/prng.c
  12. 2 1
      arch/s390/crypto/sha1_s390.c
  13. 2 1
      arch/s390/crypto/sha256_s390.c
  14. 2 1
      arch/s390/crypto/sha512_s390.c
  15. 29 0
      arch/s390/include/asm/cpufeature.h
  16. 2 0
      arch/s390/include/asm/ctl_reg.h
  17. 110 0
      arch/s390/include/asm/fpu-internal.h
  18. 3 3
      arch/s390/include/asm/kvm_host.h
  19. 22 0
      arch/s390/include/asm/linkage.h
  20. 16 0
      arch/s390/include/asm/mmzone.h
  21. 35 0
      arch/s390/include/asm/numa.h
  22. 21 1
      arch/s390/include/asm/pci.h
  23. 13 0
      arch/s390/include/asm/pgtable.h
  24. 15 21
      arch/s390/include/asm/processor.h
  25. 1 1
      arch/s390/include/asm/sclp.h
  26. 3 132
      arch/s390/include/asm/switch_to.h
  27. 39 0
      arch/s390/include/asm/topology.h
  28. 16 8
      arch/s390/include/asm/unistd.h
  29. 480 0
      arch/s390/include/asm/vx-insn.h
  30. 5 5
      arch/s390/include/uapi/asm/unistd.h
  31. 11 0
      arch/s390/kernel/Makefile
  32. 3 0
      arch/s390/kernel/asm-offsets.c
  33. 14 34
      arch/s390/kernel/compat_signal.c
  34. 449 165
      arch/s390/kernel/entry.S
  35. 3 2
      arch/s390/kernel/head.S
  36. 3 6
      arch/s390/kernel/jump_label.c
  37. 8 3
      arch/s390/kernel/nmi.c
  38. 3 6
      arch/s390/kernel/perf_cpum_sf.c
  39. 42 10
      arch/s390/kernel/process.c
  40. 9 0
      arch/s390/kernel/processor.c
  41. 73 101
      arch/s390/kernel/ptrace.c
  42. 3 0
      arch/s390/kernel/s390_ksyms.c
  43. 0 355
      arch/s390/kernel/sclp.S
  44. 160 0
      arch/s390/kernel/sclp.c
  45. 10 9
      arch/s390/kernel/setup.c
  46. 14 33
      arch/s390/kernel/signal.c
  47. 2 2
      arch/s390/kernel/smp.c
  48. 5 5
      arch/s390/kernel/syscalls.S
  49. 16 15
      arch/s390/kernel/topology.c
  50. 16 18
      arch/s390/kernel/traps.c
  51. 1 1
      arch/s390/kernel/vdso32/Makefile
  52. 1 1
      arch/s390/kernel/vdso64/Makefile
  53. 11 1
      arch/s390/kernel/vtime.c
  54. 97 33
      arch/s390/kvm/kvm-s390.c
  55. 1 0
      arch/s390/lib/delay.c
  56. 1 14
      arch/s390/lib/uaccess.c
  57. 1 1
      arch/s390/mm/fault.c
  58. 10 0
      arch/s390/mm/gup.c
  59. 20 20
      arch/s390/mm/init.c
  60. 89 136
      arch/s390/mm/pgtable.c
  61. 3 0
      arch/s390/numa/Makefile
  62. 530 0
      arch/s390/numa/mode_emu.c
  63. 184 0
      arch/s390/numa/numa.c
  64. 24 0
      arch/s390/numa/numa_mode.h
  65. 342 0
      arch/s390/numa/toptree.c
  66. 60 0
      arch/s390/numa/toptree.h
  67. 17 17
      arch/s390/pci/pci.c
  68. 4 4
      arch/s390/pci/pci_dma.c
  69. 6 6
      arch/s390/pci/pci_event.c
  70. 23 10
      arch/s390/pci/pci_insn.c
  71. 12 5
      arch/s390/pci/pci_sysfs.c
  72. 3 3
      drivers/s390/block/dasd_alias.c
  73. 252 81
      drivers/s390/block/dasd_eckd.c
  74. 10 1
      drivers/s390/block/dasd_eckd.h
  75. 1 0
      drivers/s390/block/dasd_int.h
  76. 7 7
      drivers/s390/block/dcssblk.c
  77. 4 0
      drivers/s390/char/con3270.c
  78. 11 5
      drivers/s390/char/ctrlchar.c
  79. 12 0
      drivers/s390/char/ctrlchar.h
  80. 2 2
      drivers/s390/char/diag_ftp.c
  81. 1 1
      drivers/s390/char/monreader.c
  82. 3 3
      drivers/s390/char/sclp.c
  83. 11 7
      drivers/s390/char/sclp_cmd.c
  84. 51 1
      drivers/s390/char/sclp_vt220.c
  85. 4 0
      drivers/s390/char/tty3270.c
  86. 123 42
      drivers/s390/cio/chsc.c
  87. 1 1
      drivers/s390/cio/device_ops.c
  88. 1 2
      drivers/s390/cio/eadm_sch.c
  89. 1 1
      drivers/s390/crypto/ap_bus.c
  90. 1 1
      drivers/s390/crypto/zcrypt_pcixcc.c
  91. 1 4
      drivers/s390/net/qeth_l2_main.c
  92. 1 4
      drivers/s390/net/qeth_l3_main.c
  93. 1 1
      drivers/s390/scsi/zfcp_fsf.c
  94. 4 3
      include/linux/cpufeature.h

+ 0 - 6
MAINTAINERS

@@ -5907,7 +5907,6 @@ F:	arch/powerpc/kvm/
 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
 M:	Christian Borntraeger <borntraeger@de.ibm.com>
 M:	Christian Borntraeger <borntraeger@de.ibm.com>
 M:	Cornelia Huck <cornelia.huck@de.ibm.com>
 M:	Cornelia Huck <cornelia.huck@de.ibm.com>
-M:	linux390@de.ibm.com
 L:	linux-s390@vger.kernel.org
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 S:	Supported
@@ -8718,7 +8717,6 @@ F:	drivers/video/fbdev/savage/
 S390
 S390
 M:	Martin Schwidefsky <schwidefsky@de.ibm.com>
 M:	Martin Schwidefsky <schwidefsky@de.ibm.com>
 M:	Heiko Carstens <heiko.carstens@de.ibm.com>
 M:	Heiko Carstens <heiko.carstens@de.ibm.com>
-M:	linux390@de.ibm.com
 L:	linux-s390@vger.kernel.org
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 S:	Supported
@@ -8746,7 +8744,6 @@ F:	block/partitions/ibm.c
 
 
 S390 NETWORK DRIVERS
 S390 NETWORK DRIVERS
 M:	Ursula Braun <ursula.braun@de.ibm.com>
 M:	Ursula Braun <ursula.braun@de.ibm.com>
-M:	linux390@de.ibm.com
 L:	linux-s390@vger.kernel.org
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 S:	Supported
@@ -8763,7 +8760,6 @@ F:	drivers/pci/hotplug/s390_pci_hpc.c
 
 
 S390 ZCRYPT DRIVER
 S390 ZCRYPT DRIVER
 M:	Ingo Tuchscherer <ingo.tuchscherer@de.ibm.com>
 M:	Ingo Tuchscherer <ingo.tuchscherer@de.ibm.com>
-M:	linux390@de.ibm.com
 L:	linux-s390@vger.kernel.org
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 S:	Supported
@@ -8771,7 +8767,6 @@ F:	drivers/s390/crypto/
 
 
 S390 ZFCP DRIVER
 S390 ZFCP DRIVER
 M:	Steffen Maier <maier@linux.vnet.ibm.com>
 M:	Steffen Maier <maier@linux.vnet.ibm.com>
-M:	linux390@de.ibm.com
 L:	linux-s390@vger.kernel.org
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 S:	Supported
@@ -8779,7 +8774,6 @@ F:	drivers/s390/scsi/zfcp_*
 
 
 S390 IUCV NETWORK LAYER
 S390 IUCV NETWORK LAYER
 M:	Ursula Braun <ursula.braun@de.ibm.com>
 M:	Ursula Braun <ursula.braun@de.ibm.com>
-M:	linux390@de.ibm.com
 L:	linux-s390@vger.kernel.org
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 S:	Supported

+ 1 - 0
arch/s390/Kbuild

@@ -6,3 +6,4 @@ obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs/
 obj-$(CONFIG_APPLDATA_BASE)	+= appldata/
 obj-$(CONFIG_APPLDATA_BASE)	+= appldata/
 obj-y				+= net/
 obj-y				+= net/
 obj-$(CONFIG_PCI)		+= pci/
 obj-$(CONFIG_PCI)		+= pci/
+obj-$(CONFIG_NUMA)		+= numa/

+ 75 - 0
arch/s390/Kconfig

@@ -99,18 +99,22 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_WANTS_PROT_NUMA_PROT_NONE
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS2
 	select CLONE_BACKWARDS2
 	select DYNAMIC_FTRACE if FUNCTION_TRACER
 	select DYNAMIC_FTRACE if FUNCTION_TRACER
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_DEVICES if !SMP
 	select GENERIC_CPU_DEVICES if !SMP
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
 	select GENERIC_TIME_VSYSCALL
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_EARLY_PFN_TO_NID
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRACEHOOK
@@ -153,6 +157,7 @@ config S390
 	select VIRT_CPU_ACCOUNTING
 	select VIRT_CPU_ACCOUNTING
 	select VIRT_TO_BUS
 	select VIRT_TO_BUS
 
 
+
 config SCHED_OMIT_FRAME_POINTER
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
 	def_bool y
 
 
@@ -385,6 +390,76 @@ config HOTPLUG_CPU
 config SCHED_SMT
 config SCHED_SMT
 	def_bool n
 	def_bool n
 
 
+# Some NUMA nodes have memory ranges that span
+# other nodes.	Even though a pfn is valid and
+# between a node's start and end pfns, it may not
+# reside on that node.	See memmap_init_zone()
+# for details. <- They meant memory holes!
+config NODES_SPAN_OTHER_NODES
+	def_bool NUMA
+
+config NUMA
+	bool "NUMA support"
+	depends on SMP && 64BIT && SCHED_TOPOLOGY
+	default n
+	help
+	  Enable NUMA support
+
+	  This option adds NUMA support to the kernel.
+
+	  An operation mode can be selected by appending
+	  numa=<method> to the kernel command line.
+
+	  The default behaviour is identical to appending numa=plain to
+	  the command line. This will create just one node with all
+	  available memory and all CPUs in it.
+
+config NODES_SHIFT
+	int "Maximum NUMA nodes (as a power of 2)"
+	range 1 10
+	depends on NUMA
+	default "4"
+	help
+	  Specify the maximum number of NUMA nodes available on the target
+	  system. Increases memory reserved to accommodate various tables.
+
+menu "Select NUMA modes"
+	depends on NUMA
+
+config NUMA_EMU
+	bool "NUMA emulation"
+	default y
+	help
+	  Numa emulation mode will split the available system memory into
+	  equal chunks which then are distributed over the configured number
+	  of nodes in a round-robin manner.
+
+	  The number of fake nodes is limited by the number of available memory
+	  chunks (i.e. memory size / fake size) and the number of supported
+	  nodes in the kernel.
+
+	  The CPUs are assigned to the nodes in a way that partially respects
+	  the original machine topology (if supported by the machine).
+	  Fair distribution of the CPUs is not guaranteed.
+
+config EMU_SIZE
+	hex "NUMA emulation memory chunk size"
+	default 0x10000000
+	range 0x400000 0x100000000
+	depends on NUMA_EMU
+	help
+	  Select the default size by which the memory is chopped and then
+	  assigned to emulated NUMA nodes.
+
+	  This can be overridden by specifying
+
+	  emu_size=<n>
+
+	  on the kernel command line where also suffixes K, M, G, and T are
+	  supported.
+
+endmenu
+
 config SCHED_MC
 config SCHED_MC
 	def_bool n
 	def_bool n
 
 

+ 2 - 0
arch/s390/Makefile

@@ -33,6 +33,8 @@ mflags-$(CONFIG_MARCH_Z196)   := -march=z196
 mflags-$(CONFIG_MARCH_ZEC12)  := -march=zEC12
 mflags-$(CONFIG_MARCH_ZEC12)  := -march=zEC12
 mflags-$(CONFIG_MARCH_Z13)   := -march=z13
 mflags-$(CONFIG_MARCH_Z13)   := -march=z13
 
 
+export CC_FLAGS_MARCH := $(mflags-y)
+
 aflags-y += $(mflags-y)
 aflags-y += $(mflags-y)
 cflags-y += $(mflags-y)
 cflags-y += $(mflags-y)
 
 

+ 2 - 0
arch/s390/configs/default_defconfig

@@ -13,6 +13,7 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
 CONFIG_CPUSETS=y
@@ -50,6 +51,7 @@ CONFIG_LIVEPATCH=y
 CONFIG_MARCH_Z196=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=256
 CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
 CONFIG_PREEMPT=y
 CONFIG_PREEMPT=y
 CONFIG_HZ_100=y
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTPLUG=y

+ 2 - 0
arch/s390/configs/gcov_defconfig

@@ -13,6 +13,7 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
 CONFIG_CPUSETS=y
@@ -49,6 +50,7 @@ CONFIG_DEFAULT_DEADLINE=y
 CONFIG_MARCH_Z196=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=256
 CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_MEMORY_HOTREMOVE=y

+ 3 - 0
arch/s390/configs/performance_defconfig

@@ -13,6 +13,8 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
+# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
 CONFIG_CPUSETS=y
@@ -48,6 +50,7 @@ CONFIG_LIVEPATCH=y
 CONFIG_MARCH_Z196=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=512
 CONFIG_NR_CPUS=512
+CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_MEMORY_HOTREMOVE=y

+ 2 - 1
arch/s390/crypto/aes_s390.c

@@ -24,6 +24,7 @@
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
 #include <linux/err.h>
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
 #include "crypt_s390.h"
 #include "crypt_s390.h"
@@ -976,7 +977,7 @@ static void __exit aes_s390_fini(void)
 	crypto_unregister_alg(&aes_alg);
 	crypto_unregister_alg(&aes_alg);
 }
 }
 
 
-module_init(aes_s390_init);
+module_cpu_feature_match(MSA, aes_s390_init);
 module_exit(aes_s390_fini);
 module_exit(aes_s390_fini);
 
 
 MODULE_ALIAS_CRYPTO("aes-all");
 MODULE_ALIAS_CRYPTO("aes-all");

+ 2 - 1
arch/s390/crypto/des_s390.c

@@ -16,6 +16,7 @@
 
 
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
 #include <crypto/des.h>
 #include <crypto/des.h>
@@ -616,7 +617,7 @@ static void __exit des_s390_exit(void)
 	crypto_unregister_alg(&des_alg);
 	crypto_unregister_alg(&des_alg);
 }
 }
 
 
-module_init(des_s390_init);
+module_cpu_feature_match(MSA, des_s390_init);
 module_exit(des_s390_exit);
 module_exit(des_s390_exit);
 
 
 MODULE_ALIAS_CRYPTO("des");
 MODULE_ALIAS_CRYPTO("des");

+ 2 - 1
arch/s390/crypto/ghash_s390.c

@@ -9,6 +9,7 @@
 
 
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <linux/module.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 
 
 #include "crypt_s390.h"
 #include "crypt_s390.h"
 
 
@@ -158,7 +159,7 @@ static void __exit ghash_mod_exit(void)
 	crypto_unregister_shash(&ghash_alg);
 	crypto_unregister_shash(&ghash_alg);
 }
 }
 
 
-module_init(ghash_mod_init);
+module_cpu_feature_match(MSA, ghash_mod_init);
 module_exit(ghash_mod_exit);
 module_exit(ghash_mod_exit);
 
 
 MODULE_ALIAS_CRYPTO("ghash");
 MODULE_ALIAS_CRYPTO("ghash");

+ 2 - 2
arch/s390/crypto/prng.c

@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/mutex.h>
+#include <linux/cpufeature.h>
 #include <linux/random.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <asm/debug.h>
 #include <asm/debug.h>
@@ -914,6 +915,5 @@ static void __exit prng_exit(void)
 	}
 	}
 }
 }
 
 
-
-module_init(prng_init);
+module_cpu_feature_match(MSA, prng_init);
 module_exit(prng_exit);
 module_exit(prng_exit);

+ 2 - 1
arch/s390/crypto/sha1_s390.c

@@ -26,6 +26,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
 
 
 #include "crypt_s390.h"
 #include "crypt_s390.h"
@@ -100,7 +101,7 @@ static void __exit sha1_s390_fini(void)
 	crypto_unregister_shash(&alg);
 	crypto_unregister_shash(&alg);
 }
 }
 
 
-module_init(sha1_s390_init);
+module_cpu_feature_match(MSA, sha1_s390_init);
 module_exit(sha1_s390_fini);
 module_exit(sha1_s390_fini);
 
 
 MODULE_ALIAS_CRYPTO("sha1");
 MODULE_ALIAS_CRYPTO("sha1");

+ 2 - 1
arch/s390/crypto/sha256_s390.c

@@ -16,6 +16,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
 
 
 #include "crypt_s390.h"
 #include "crypt_s390.h"
@@ -140,7 +141,7 @@ static void __exit sha256_s390_fini(void)
 	crypto_unregister_shash(&sha256_alg);
 	crypto_unregister_shash(&sha256_alg);
 }
 }
 
 
-module_init(sha256_s390_init);
+module_cpu_feature_match(MSA, sha256_s390_init);
 module_exit(sha256_s390_fini);
 module_exit(sha256_s390_fini);
 
 
 MODULE_ALIAS_CRYPTO("sha256");
 MODULE_ALIAS_CRYPTO("sha256");

+ 2 - 1
arch/s390/crypto/sha512_s390.c

@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 
 
 #include "sha.h"
 #include "sha.h"
 #include "crypt_s390.h"
 #include "crypt_s390.h"
@@ -148,7 +149,7 @@ static void __exit fini(void)
 	crypto_unregister_shash(&sha384_alg);
 	crypto_unregister_shash(&sha384_alg);
 }
 }
 
 
-module_init(init);
+module_cpu_feature_match(MSA, init);
 module_exit(fini);
 module_exit(fini);
 
 
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");

+ 29 - 0
arch/s390/include/asm/cpufeature.h

@@ -0,0 +1,29 @@
+/*
+ * Module interface for CPU features
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_CPUFEATURE_H
+#define __ASM_S390_CPUFEATURE_H
+
+#include <asm/elf.h>
+
+/* Hardware features on Linux on z Systems are indicated by facility bits that
+ * are mapped to the so-called machine flags.  Particular machine flags are
+ * then used to define ELF hardware capabilities; most notably hardware flags
+ * that are essential for user space / glibc.
+ *
+ * Restrict the set of exposed CPU features to ELF hardware capabilities for
+ * now.  Additional machine flags can be indicated by values larger than
+ * MAX_ELF_HWCAP_FEATURES.
+ */
+#define MAX_ELF_HWCAP_FEATURES	(8 * sizeof(elf_hwcap))
+#define MAX_CPU_FEATURES	MAX_ELF_HWCAP_FEATURES
+
+#define cpu_feature(feat)	ilog2(HWCAP_S390_ ## feat)
+
+int cpu_have_feature(unsigned int nr);
+
+#endif /* __ASM_S390_CPUFEATURE_H */

+ 2 - 0
arch/s390/include/asm/ctl_reg.h

@@ -46,6 +46,8 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
 	__ctl_load(reg, cr, cr);
 	__ctl_load(reg, cr, cr);
 }
 }
 
 
+void __ctl_set_vx(void);
+
 void smp_ctl_set_bit(int cr, int bit);
 void smp_ctl_set_bit(int cr, int bit);
 void smp_ctl_clear_bit(int cr, int bit);
 void smp_ctl_clear_bit(int cr, int bit);
 
 

+ 110 - 0
arch/s390/include/asm/fpu-internal.h

@@ -0,0 +1,110 @@
+/*
+ * General floating pointer and vector register helpers
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_INTERNAL_H
+#define _ASM_S390_FPU_INTERNAL_H
+
+#define FPU_USE_VX		1	/* Vector extension is active */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <asm/linkage.h>
+#include <asm/ctl_reg.h>
+#include <asm/sigcontext.h>
+
+struct fpu {
+	__u32 fpc;			/* Floating-point control */
+	__u32 flags;
+	union {
+		void *regs;
+		freg_t *fprs;		/* Floating-point register save area */
+		__vector128 *vxrs;	/* Vector register save area */
+	};
+};
+
+void save_fpu_regs(void);
+
+#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
+#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
+
+/* VX array structure for address operand constraints in inline assemblies */
+struct vx_array { __vector128 _[__NUM_VXRS]; };
+
+static inline int test_fp_ctl(u32 fpc)
+{
+	u32 orig_fpc;
+	int rc;
+
+	asm volatile(
+		"	efpc    %1\n"
+		"	sfpc	%2\n"
+		"0:	sfpc	%1\n"
+		"	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc), "=d" (orig_fpc)
+		: "d" (fpc), "0" (-EINVAL));
+	return rc;
+}
+
+static inline void save_vx_regs_safe(__vector128 *vxrs)
+{
+	unsigned long cr0, flags;
+
+	flags = arch_local_irq_save();
+	__ctl_store(cr0, 0, 0);
+	__ctl_set_bit(0, 17);
+	__ctl_set_bit(0, 18);
+	asm volatile(
+		"	la	1,%0\n"
+		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		: "=Q" (*(struct vx_array *) vxrs) : : "1");
+	__ctl_load(cr0, 0, 0);
+	arch_local_irq_restore(flags);
+}
+
+static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		fprs[i] = *(freg_t *)(vxrs + i);
+}
+
+static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		*(freg_t *)(vxrs + i) = fprs[i];
+}
+
+static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	fpregs->pad = 0;
+	if (is_vx_fpu(fpu))
+		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
+	else
+		memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
+		       sizeof(fpregs->fprs));
+}
+
+static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	if (is_vx_fpu(fpu))
+		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
+	else
+		memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
+		       sizeof(fpregs->fprs));
+}
+
+#endif
+
+#endif /* _ASM_S390_FPU_INTERNAL_H */

+ 3 - 3
arch/s390/include/asm/kvm_host.h

@@ -22,6 +22,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm.h>
 #include <asm/debug.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
 #include <asm/cpu.h>
+#include <asm/fpu-internal.h>
 #include <asm/isc.h>
 #include <asm/isc.h>
 
 
 #define KVM_MAX_VCPUS 64
 #define KVM_MAX_VCPUS 64
@@ -501,10 +502,9 @@ struct kvm_guestdbg_info_arch {
 
 
 struct kvm_vcpu_arch {
 struct kvm_vcpu_arch {
 	struct kvm_s390_sie_block *sie_block;
 	struct kvm_s390_sie_block *sie_block;
-	s390_fp_regs      host_fpregs;
 	unsigned int      host_acrs[NUM_ACRS];
 	unsigned int      host_acrs[NUM_ACRS];
-	s390_fp_regs      guest_fpregs;
-	struct kvm_s390_vregs	*host_vregs;
+	struct fpu	  host_fpregs;
+	struct fpu	  guest_fpregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
 	struct kvm_s390_pgm_info pgm;

+ 22 - 0
arch/s390/include/asm/linkage.h

@@ -6,4 +6,26 @@
 #define __ALIGN .align 4, 0x07
 #define __ALIGN .align 4, 0x07
 #define __ALIGN_STR __stringify(__ALIGN)
 #define __ALIGN_STR __stringify(__ALIGN)
 
 
+#ifndef __ASSEMBLY__
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target)	\
+	".section __ex_table,\"a\"\n"	\
+	".align	4\n"			\
+	".long	(" #_fault ") - .\n"	\
+	".long	(" #_target ") - .\n"	\
+	".previous\n"
+
+#else /* __ASSEMBLY__ */
+
+#define EX_TABLE(_fault, _target)	\
+	.section __ex_table,"a"	;	\
+	.align	4 ;			\
+	.long	(_fault) - . ;		\
+	.long	(_target) - . ;		\
+	.previous
+
+#endif /* __ASSEMBLY__ */
 #endif
 #endif

+ 16 - 0
arch/s390/include/asm/mmzone.h

@@ -0,0 +1,16 @@
+/*
+ * NUMA support for s390
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_MMZONE_H
+#define _ASM_S390_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[nid])
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_MMZONE_H */

+ 35 - 0
arch/s390/include/asm/numa.h

@@ -0,0 +1,35 @@
+/*
+ * NUMA support for s390
+ *
+ * Declare the NUMA core code structures and functions.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_NUMA_H
+#define _ASM_S390_NUMA_H
+
+#ifdef CONFIG_NUMA
+
+#include <linux/numa.h>
+#include <linux/cpumask.h>
+
+void numa_setup(void);
+int numa_pfn_to_nid(unsigned long pfn);
+int __node_distance(int a, int b);
+void numa_update_cpu_topology(void);
+
+extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+extern int numa_debug_enabled;
+
+#else
+
+static inline void numa_setup(void) { }
+static inline void numa_update_cpu_topology(void) { }
+static inline int numa_pfn_to_nid(unsigned long pfn)
+{
+	return 0;
+}
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_NUMA_H */

+ 21 - 1
arch/s390/include/asm/pci.h

@@ -170,7 +170,11 @@ static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
 #endif /* CONFIG_HOTPLUG_PCI_S390 */
 #endif /* CONFIG_HOTPLUG_PCI_S390 */
 
 
 /* Helpers */
 /* Helpers */
-struct zpci_dev *get_zdev(struct pci_dev *);
+static inline struct zpci_dev *to_zpci(struct pci_dev *pdev)
+{
+	return pdev->sysdata;
+}
+
 struct zpci_dev *get_zdev_by_fid(u32);
 struct zpci_dev *get_zdev_by_fid(u32);
 
 
 /* DMA */
 /* DMA */
@@ -188,4 +192,20 @@ void zpci_debug_init_device(struct zpci_dev *);
 void zpci_debug_exit_device(struct zpci_dev *);
 void zpci_debug_exit_device(struct zpci_dev *);
 void zpci_debug_info(struct zpci_dev *, struct seq_file *);
 void zpci_debug_info(struct zpci_dev *, struct seq_file *);
 
 
+#ifdef CONFIG_NUMA
+
+/* Returns the node based on PCI bus */
+static inline int __pcibus_to_node(const struct pci_bus *bus)
+{
+	return NUMA_NO_NODE;
+}
+
+static inline const struct cpumask *
+cpumask_of_pcibus(const struct pci_bus *bus)
+{
+	return cpu_online_mask;
+}
+
+#endif /* CONFIG_NUMA */
+
 #endif
 #endif

+ 13 - 0
arch/s390/include/asm/pgtable.h

@@ -576,6 +576,19 @@ static inline int pte_same(pte_t a, pte_t b)
 	return pte_val(a) == pte_val(b);
 	return pte_val(a) == pte_val(b);
 }
 }
 
 
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pte_protnone(pte_t pte)
+{
+	return pte_present(pte) && !(pte_val(pte) & _PAGE_READ);
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+	/* pmd_large(pmd) implies pmd_present(pmd) */
+	return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
+}
+#endif
+
 static inline pgste_t pgste_get_lock(pte_t *ptep)
 static inline pgste_t pgste_get_lock(pte_t *ptep)
 {
 {
 	unsigned long new = 0;
 	unsigned long new = 0;

+ 15 - 21
arch/s390/include/asm/processor.h

@@ -14,10 +14,12 @@
 #define CIF_MCCK_PENDING	0	/* machine check handling is pending */
 #define CIF_MCCK_PENDING	0	/* machine check handling is pending */
 #define CIF_ASCE		1	/* user asce needs fixup / uaccess */
 #define CIF_ASCE		1	/* user asce needs fixup / uaccess */
 #define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
 #define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
+#define CIF_FPU			3	/* restore vector registers */
 
 
 #define _CIF_MCCK_PENDING	(1<<CIF_MCCK_PENDING)
 #define _CIF_MCCK_PENDING	(1<<CIF_MCCK_PENDING)
 #define _CIF_ASCE		(1<<CIF_ASCE)
 #define _CIF_ASCE		(1<<CIF_ASCE)
 #define _CIF_NOHZ_DELAY		(1<<CIF_NOHZ_DELAY)
 #define _CIF_NOHZ_DELAY		(1<<CIF_NOHZ_DELAY)
+#define _CIF_FPU		(1<<CIF_FPU)
 
 
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__
 
 
@@ -28,6 +30,7 @@
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 #include <asm/setup.h>
 #include <asm/setup.h>
 #include <asm/runtime_instr.h>
 #include <asm/runtime_instr.h>
+#include <asm/fpu-internal.h>
 
 
 static inline void set_cpu_flag(int flag)
 static inline void set_cpu_flag(int flag)
 {
 {
@@ -85,7 +88,7 @@ typedef struct {
  * Thread structure
  * Thread structure
  */
  */
 struct thread_struct {
 struct thread_struct {
-	s390_fp_regs fp_regs;
+	struct fpu fpu;			/* FP and VX register save area */
 	unsigned int  acrs[NUM_ACRS];
 	unsigned int  acrs[NUM_ACRS];
         unsigned long ksp;              /* kernel stack pointer             */
         unsigned long ksp;              /* kernel stack pointer             */
 	mm_segment_t mm_segment;
 	mm_segment_t mm_segment;
@@ -101,7 +104,6 @@ struct thread_struct {
 	struct runtime_instr_cb *ri_cb;
 	struct runtime_instr_cb *ri_cb;
 	int ri_signum;
 	int ri_signum;
 	unsigned char trap_tdb[256];	/* Transaction abort diagnose block */
 	unsigned char trap_tdb[256];	/* Transaction abort diagnose block */
-	__vector128 *vxrs;		/* Vector register save area */
 };
 };
 
 
 /* Flag to disable transactions. */
 /* Flag to disable transactions. */
@@ -230,6 +232,17 @@ static inline void __load_psw_mask (unsigned long mask)
 		: "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
 		: "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
 }
 }
 
 
+/*
+ * Extract current PSW mask
+ */
+static inline unsigned long __extract_psw(void)
+{
+	unsigned int reg1, reg2;
+
+	asm volatile("epsw %0,%1" : "=d" (reg1), "=a" (reg2));
+	return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
+}
+
 /*
 /*
  * Rewind PSW instruction address by specified number of bytes.
  * Rewind PSW instruction address by specified number of bytes.
  */
  */
@@ -336,25 +349,6 @@ extern void memcpy_absolute(void *, void *, size_t);
 	memcpy_absolute(&(dest), &__tmp, sizeof(__tmp));	\
 	memcpy_absolute(&(dest), &__tmp, sizeof(__tmp));	\
 }
 }
 
 
-/*
- * Helper macro for exception table entries
- */
-#define EX_TABLE(_fault, _target)	\
-	".section __ex_table,\"a\"\n"	\
-	".align	4\n"			\
-	".long	(" #_fault ") - .\n"	\
-	".long	(" #_target ") - .\n"	\
-	".previous\n"
-
-#else /* __ASSEMBLY__ */
-
-#define EX_TABLE(_fault, _target)	\
-	.section __ex_table,"a"	;	\
-	.align	4 ;			\
-	.long	(_fault) - . ;		\
-	.long	(_target) - . ;		\
-	.previous
-
 #endif /* __ASSEMBLY__ */
 #endif /* __ASSEMBLY__ */
 
 
 #endif /* __ASM_S390_PROCESSOR_H */
 #endif /* __ASM_S390_PROCESSOR_H */

+ 1 - 1
arch/s390/include/asm/sclp.h

@@ -79,6 +79,6 @@ int sclp_pci_configure(u32 fid);
 int sclp_pci_deconfigure(u32 fid);
 int sclp_pci_deconfigure(u32 fid);
 int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
 int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
 void sclp_early_detect(void);
 void sclp_early_detect(void);
-long _sclp_print_early(const char *);
+int _sclp_print_early(const char *);
 
 
 #endif /* _ASM_S390_SCLP_H */
 #endif /* _ASM_S390_SCLP_H */

+ 3 - 132
arch/s390/include/asm/switch_to.h

@@ -8,139 +8,12 @@
 #define __ASM_SWITCH_TO_H
 #define __ASM_SWITCH_TO_H
 
 
 #include <linux/thread_info.h>
 #include <linux/thread_info.h>
+#include <asm/fpu-internal.h>
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 
 
 extern struct task_struct *__switch_to(void *, void *);
 extern struct task_struct *__switch_to(void *, void *);
 extern void update_cr_regs(struct task_struct *task);
 extern void update_cr_regs(struct task_struct *task);
 
 
-static inline int test_fp_ctl(u32 fpc)
-{
-	u32 orig_fpc;
-	int rc;
-
-	asm volatile(
-		"	efpc    %1\n"
-		"	sfpc	%2\n"
-		"0:	sfpc	%1\n"
-		"	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc), "=d" (orig_fpc)
-		: "d" (fpc), "0" (-EINVAL));
-	return rc;
-}
-
-static inline void save_fp_ctl(u32 *fpc)
-{
-	asm volatile(
-		"       stfpc   %0\n"
-		: "+Q" (*fpc));
-}
-
-static inline int restore_fp_ctl(u32 *fpc)
-{
-	int rc;
-
-	asm volatile(
-		"	lfpc    %1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
-	return rc;
-}
-
-static inline void save_fp_regs(freg_t *fprs)
-{
-	asm volatile("std 0,%0" : "=Q" (fprs[0]));
-	asm volatile("std 2,%0" : "=Q" (fprs[2]));
-	asm volatile("std 4,%0" : "=Q" (fprs[4]));
-	asm volatile("std 6,%0" : "=Q" (fprs[6]));
-	asm volatile("std 1,%0" : "=Q" (fprs[1]));
-	asm volatile("std 3,%0" : "=Q" (fprs[3]));
-	asm volatile("std 5,%0" : "=Q" (fprs[5]));
-	asm volatile("std 7,%0" : "=Q" (fprs[7]));
-	asm volatile("std 8,%0" : "=Q" (fprs[8]));
-	asm volatile("std 9,%0" : "=Q" (fprs[9]));
-	asm volatile("std 10,%0" : "=Q" (fprs[10]));
-	asm volatile("std 11,%0" : "=Q" (fprs[11]));
-	asm volatile("std 12,%0" : "=Q" (fprs[12]));
-	asm volatile("std 13,%0" : "=Q" (fprs[13]));
-	asm volatile("std 14,%0" : "=Q" (fprs[14]));
-	asm volatile("std 15,%0" : "=Q" (fprs[15]));
-}
-
-static inline void restore_fp_regs(freg_t *fprs)
-{
-	asm volatile("ld 0,%0" : : "Q" (fprs[0]));
-	asm volatile("ld 2,%0" : : "Q" (fprs[2]));
-	asm volatile("ld 4,%0" : : "Q" (fprs[4]));
-	asm volatile("ld 6,%0" : : "Q" (fprs[6]));
-	asm volatile("ld 1,%0" : : "Q" (fprs[1]));
-	asm volatile("ld 3,%0" : : "Q" (fprs[3]));
-	asm volatile("ld 5,%0" : : "Q" (fprs[5]));
-	asm volatile("ld 7,%0" : : "Q" (fprs[7]));
-	asm volatile("ld 8,%0" : : "Q" (fprs[8]));
-	asm volatile("ld 9,%0" : : "Q" (fprs[9]));
-	asm volatile("ld 10,%0" : : "Q" (fprs[10]));
-	asm volatile("ld 11,%0" : : "Q" (fprs[11]));
-	asm volatile("ld 12,%0" : : "Q" (fprs[12]));
-	asm volatile("ld 13,%0" : : "Q" (fprs[13]));
-	asm volatile("ld 14,%0" : : "Q" (fprs[14]));
-	asm volatile("ld 15,%0" : : "Q" (fprs[15]));
-}
-
-static inline void save_vx_regs(__vector128 *vxrs)
-{
-	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
-	asm volatile(
-		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
-		: "=Q" (*(addrtype *) vxrs) : : "1");
-}
-
-static inline void save_vx_regs_safe(__vector128 *vxrs)
-{
-	unsigned long cr0, flags;
-
-	flags = arch_local_irq_save();
-	__ctl_store(cr0, 0, 0);
-	__ctl_set_bit(0, 17);
-	__ctl_set_bit(0, 18);
-	save_vx_regs(vxrs);
-	__ctl_load(cr0, 0, 0);
-	arch_local_irq_restore(flags);
-}
-
-static inline void restore_vx_regs(__vector128 *vxrs)
-{
-	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
-	asm volatile(
-		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
-		: : "Q" (*(addrtype *) vxrs) : "1");
-}
-
-static inline void save_fp_vx_regs(struct task_struct *task)
-{
-	if (task->thread.vxrs)
-		save_vx_regs(task->thread.vxrs);
-	else
-		save_fp_regs(task->thread.fp_regs.fprs);
-}
-
-static inline void restore_fp_vx_regs(struct task_struct *task)
-{
-	if (task->thread.vxrs)
-		restore_vx_regs(task->thread.vxrs);
-	else
-		restore_fp_regs(task->thread.fp_regs.fprs);
-}
-
 static inline void save_access_regs(unsigned int *acrs)
 static inline void save_access_regs(unsigned int *acrs)
 {
 {
 	typedef struct { int _[NUM_ACRS]; } acrstype;
 	typedef struct { int _[NUM_ACRS]; } acrstype;
@@ -157,15 +30,13 @@ static inline void restore_access_regs(unsigned int *acrs)
 
 
 #define switch_to(prev,next,last) do {					\
 #define switch_to(prev,next,last) do {					\
 	if (prev->mm) {							\
 	if (prev->mm) {							\
-		save_fp_ctl(&prev->thread.fp_regs.fpc);			\
-		save_fp_vx_regs(prev);					\
+		save_fpu_regs();					\
 		save_access_regs(&prev->thread.acrs[0]);		\
 		save_access_regs(&prev->thread.acrs[0]);		\
 		save_ri_cb(prev->thread.ri_cb);				\
 		save_ri_cb(prev->thread.ri_cb);				\
 	}								\
 	}								\
 	if (next->mm) {							\
 	if (next->mm) {							\
 		update_cr_regs(next);					\
 		update_cr_regs(next);					\
-		restore_fp_ctl(&next->thread.fp_regs.fpc);		\
-		restore_fp_vx_regs(next);				\
+		set_cpu_flag(CIF_FPU);					\
 		restore_access_regs(&next->thread.acrs[0]);		\
 		restore_access_regs(&next->thread.acrs[0]);		\
 		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
 		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
 	}								\
 	}								\

+ 39 - 0
arch/s390/include/asm/topology.h

@@ -2,6 +2,7 @@
 #define _ASM_S390_TOPOLOGY_H
 #define _ASM_S390_TOPOLOGY_H
 
 
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
+#include <asm/numa.h>
 
 
 struct sysinfo_15_1_x;
 struct sysinfo_15_1_x;
 struct cpu;
 struct cpu;
@@ -13,6 +14,7 @@ struct cpu_topology_s390 {
 	unsigned short core_id;
 	unsigned short core_id;
 	unsigned short socket_id;
 	unsigned short socket_id;
 	unsigned short book_id;
 	unsigned short book_id;
+	unsigned short node_id;
 	cpumask_t thread_mask;
 	cpumask_t thread_mask;
 	cpumask_t core_mask;
 	cpumask_t core_mask;
 	cpumask_t book_mask;
 	cpumask_t book_mask;
@@ -52,6 +54,43 @@ static inline void topology_expect_change(void) { }
 #define POLARIZATION_VM		(2)
 #define POLARIZATION_VM		(2)
 #define POLARIZATION_VH		(3)
 #define POLARIZATION_VH		(3)
 
 
+#define SD_BOOK_INIT	SD_CPU_INIT
+
+#ifdef CONFIG_NUMA
+
+#define cpu_to_node cpu_to_node
+static inline int cpu_to_node(int cpu)
+{
+	return per_cpu(cpu_topology, cpu).node_id;
+}
+
+/* Returns a pointer to the cpumask of CPUs on node 'node'. */
+#define cpumask_of_node cpumask_of_node
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+	return node_to_cpumask_map[node];
+}
+
+/*
+ * Returns the number of the node containing node 'node'. This
+ * architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+
+#define node_distance(a, b) __node_distance(a, b)
+
+#else /* !CONFIG_NUMA */
+
+#define numa_node_id numa_node_id
+static inline int numa_node_id(void)
+{
+	return 0;
+}
+
+#endif /* CONFIG_NUMA */
+
 #include <asm-generic/topology.h>
 #include <asm-generic/topology.h>
 
 
 #endif /* _ASM_S390_TOPOLOGY_H */
 #endif /* _ASM_S390_TOPOLOGY_H */

+ 16 - 8
arch/s390/include/asm/unistd.h

@@ -11,16 +11,24 @@
 
 
 #define __IGNORE_time
 #define __IGNORE_time
 
 
-/* Ignore NUMA system calls. Not wired up on s390. */
-#define __IGNORE_mbind
-#define __IGNORE_get_mempolicy
-#define __IGNORE_set_mempolicy
-#define __IGNORE_migrate_pages
-#define __IGNORE_move_pages
-
-/* Ignore system calls that are also reachable via sys_socket */
+/* Ignore system calls that are also reachable via sys_socketcall */
 #define __IGNORE_recvmmsg
 #define __IGNORE_recvmmsg
 #define __IGNORE_sendmmsg
 #define __IGNORE_sendmmsg
+#define __IGNORE_socket
+#define __IGNORE_socketpair
+#define __IGNORE_bind
+#define __IGNORE_connect
+#define __IGNORE_listen
+#define __IGNORE_accept4
+#define __IGNORE_getsockopt
+#define __IGNORE_setsockopt
+#define __IGNORE_getsockname
+#define __IGNORE_getpeername
+#define __IGNORE_sendto
+#define __IGNORE_sendmsg
+#define __IGNORE_recvfrom
+#define __IGNORE_recvmsg
+#define __IGNORE_shutdown
 
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_ALARM

+ 480 - 0
arch/s390/include/asm/vx-insn.h

@@ -0,0 +1,480 @@
+/*
+ * Support for Vector Instructions
+ *
+ * Assembler macros to generate .byte/.word code for particular
+ * vector instructions that are supported by recent binutils (>= 2.26) only.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_VX_INSN_H
+#define __ASM_S390_VX_INSN_H
+
+#ifdef __ASSEMBLY__
+
+
+/* Macros to generate vector instruction byte code */
+
+#define REG_NUM_INVALID	       255
+
+/* GR_NUM - Retrieve general-purpose register number
+ *
+ * @opd:	Operand to store register number
+ * @r64:	String designation register in the format "%rN"
+ */
+.macro	GR_NUM	opd gr
+	\opd = REG_NUM_INVALID
+	.ifc \gr,%r0
+		\opd = 0
+	.endif
+	.ifc \gr,%r1
+		\opd = 1
+	.endif
+	.ifc \gr,%r2
+		\opd = 2
+	.endif
+	.ifc \gr,%r3
+		\opd = 3
+	.endif
+	.ifc \gr,%r4
+		\opd = 4
+	.endif
+	.ifc \gr,%r5
+		\opd = 5
+	.endif
+	.ifc \gr,%r6
+		\opd = 6
+	.endif
+	.ifc \gr,%r7
+		\opd = 7
+	.endif
+	.ifc \gr,%r8
+		\opd = 8
+	.endif
+	.ifc \gr,%r9
+		\opd = 9
+	.endif
+	.ifc \gr,%r10
+		\opd = 10
+	.endif
+	.ifc \gr,%r11
+		\opd = 11
+	.endif
+	.ifc \gr,%r12
+		\opd = 12
+	.endif
+	.ifc \gr,%r13
+		\opd = 13
+	.endif
+	.ifc \gr,%r14
+		\opd = 14
+	.endif
+	.ifc \gr,%r15
+		\opd = 15
+	.endif
+	.if \opd == REG_NUM_INVALID
+		.error "Invalid general-purpose register designation: \gr"
+	.endif
+.endm
+
+/* VX_R() - Macro to encode the VX_NUM into the instruction */
+#define VX_R(v)		(v & 0x0F)
+
+/* VX_NUM - Retrieve vector register number
+ *
+ * @opd:	Operand to store register number
+ * @vxr:	String designation register in the format "%vN"
+ *
+ * The vector register number is used for as input number to the
+ * instruction and, as well as, to compute the RXB field of the
+ * instruction.  To encode the particular vector register number,
+ * use the VX_R(v) macro to extract the instruction opcode.
+ */
+.macro	VX_NUM	opd vxr
+	\opd = REG_NUM_INVALID
+	.ifc \vxr,%v0
+		\opd = 0
+	.endif
+	.ifc \vxr,%v1
+		\opd = 1
+	.endif
+	.ifc \vxr,%v2
+		\opd = 2
+	.endif
+	.ifc \vxr,%v3
+		\opd = 3
+	.endif
+	.ifc \vxr,%v4
+		\opd = 4
+	.endif
+	.ifc \vxr,%v5
+		\opd = 5
+	.endif
+	.ifc \vxr,%v6
+		\opd = 6
+	.endif
+	.ifc \vxr,%v7
+		\opd = 7
+	.endif
+	.ifc \vxr,%v8
+		\opd = 8
+	.endif
+	.ifc \vxr,%v9
+		\opd = 9
+	.endif
+	.ifc \vxr,%v10
+		\opd = 10
+	.endif
+	.ifc \vxr,%v11
+		\opd = 11
+	.endif
+	.ifc \vxr,%v12
+		\opd = 12
+	.endif
+	.ifc \vxr,%v13
+		\opd = 13
+	.endif
+	.ifc \vxr,%v14
+		\opd = 14
+	.endif
+	.ifc \vxr,%v15
+		\opd = 15
+	.endif
+	.ifc \vxr,%v16
+		\opd = 16
+	.endif
+	.ifc \vxr,%v17
+		\opd = 17
+	.endif
+	.ifc \vxr,%v18
+		\opd = 18
+	.endif
+	.ifc \vxr,%v19
+		\opd = 19
+	.endif
+	.ifc \vxr,%v20
+		\opd = 20
+	.endif
+	.ifc \vxr,%v21
+		\opd = 21
+	.endif
+	.ifc \vxr,%v22
+		\opd = 22
+	.endif
+	.ifc \vxr,%v23
+		\opd = 23
+	.endif
+	.ifc \vxr,%v24
+		\opd = 24
+	.endif
+	.ifc \vxr,%v25
+		\opd = 25
+	.endif
+	.ifc \vxr,%v26
+		\opd = 26
+	.endif
+	.ifc \vxr,%v27
+		\opd = 27
+	.endif
+	.ifc \vxr,%v28
+		\opd = 28
+	.endif
+	.ifc \vxr,%v29
+		\opd = 29
+	.endif
+	.ifc \vxr,%v30
+		\opd = 30
+	.endif
+	.ifc \vxr,%v31
+		\opd = 31
+	.endif
+	.if \opd == REG_NUM_INVALID
+		.error "Invalid vector register designation: \vxr"
+	.endif
+.endm
+
+/* RXB - Compute most significant bit used vector registers
+ *
+ * @rxb:	Operand to store computed RXB value
+ * @v1:		First vector register designated operand
+ * @v2:		Second vector register designated operand
+ * @v3:		Third vector register designated operand
+ * @v4:		Fourth vector register designated operand
+ */
+.macro	RXB	rxb v1 v2=0 v3=0 v4=0
+	\rxb = 0
+	.if \v1 & 0x10
+		\rxb = \rxb | 0x08
+	.endif
+	.if \v2 & 0x10
+		\rxb = \rxb | 0x04
+	.endif
+	.if \v3 & 0x10
+		\rxb = \rxb | 0x02
+	.endif
+	.if \v4 & 0x10
+		\rxb = \rxb | 0x01
+	.endif
+.endm
+
+/* MRXB - Generate Element Size Control and RXB value
+ *
+ * @m:		Element size control
+ * @v1:		First vector register designated operand (for RXB)
+ * @v2:		Second vector register designated operand (for RXB)
+ * @v3:		Third vector register designated operand (for RXB)
+ * @v4:		Fourth vector register designated operand (for RXB)
+ */
+.macro	MRXB	m v1 v2=0 v3=0 v4=0
+	rxb = 0
+	RXB	rxb, \v1, \v2, \v3, \v4
+	.byte	(\m << 4) | rxb
+.endm
+
+/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields
+ *
+ * @m:		Element size control
+ * @opc:	Opcode
+ * @v1:		First vector register designated operand (for RXB)
+ * @v2:		Second vector register designated operand (for RXB)
+ * @v3:		Third vector register designated operand (for RXB)
+ * @v4:		Fourth vector register designated operand (for RXB)
+ */
+.macro	MRXBOPC	m opc v1 v2=0 v3=0 v4=0
+	MRXB	\m, \v1, \v2, \v3, \v4
+	.byte	\opc
+.endm
+
+/* Vector support instructions */
+
+/* VECTOR GENERATE BYTE MASK */
+.macro	VGBM	vr imm2
+	VX_NUM	v1, \vr
+	.word	(0xE700 | (VX_R(v1) << 4))
+	.word	\imm2
+	MRXBOPC	0, 0x44, v1
+.endm
+.macro	VZERO	vxr
+	VGBM	\vxr, 0
+.endm
+.macro	VONE	vxr
+	VGBM	\vxr, 0xFFFF
+.endm
+
+/* VECTOR LOAD VR ELEMENT FROM GR */
+.macro	VLVG	v, gr, disp, m
+	VX_NUM	v1, \v
+	GR_NUM	b2, "%r0"
+	GR_NUM	r3, \gr
+	.word	0xE700 | (VX_R(v1) << 4) | r3
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m, 0x22, v1
+.endm
+.macro	VLVGB	v, gr, index, base
+	VLVG	\v, \gr, \index, \base, 0
+.endm
+.macro	VLVGH	v, gr, index
+	VLVG	\v, \gr, \index, 1
+.endm
+.macro	VLVGF	v, gr, index
+	VLVG	\v, \gr, \index, 2
+.endm
+.macro	VLVGG	v, gr, index
+	VLVG	\v, \gr, \index, 3
+.endm
+
+/* VECTOR LOAD */
+.macro	VL	v, disp, index="%r0", base
+	VX_NUM	v1, \v
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base
+	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC 0, 0x06, v1
+.endm
+
+/* VECTOR LOAD ELEMENT */
+.macro	VLEx	vr1, disp, index="%r0", base, m3, opc
+	VX_NUM	v1, \vr1
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base
+	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m3, \opc, v1
+.endm
+.macro	VLEB	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x00
+.endm
+.macro	VLEH	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x01
+.endm
+.macro	VLEF	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x03
+.endm
+.macro	VLEG	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x02
+.endm
+
+/* VECTOR LOAD ELEMENT IMMEDIATE */
+.macro	VLEIx	vr1, imm2, m3, opc
+	VX_NUM	v1, \vr1
+	.word	0xE700 | (VX_R(v1) << 4)
+	.word	\imm2
+	MRXBOPC	\m3, \opc, v1
+.endm
+.macro	VLEIB	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x40
+.endm
+.macro	VLEIH	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x41
+.endm
+.macro	VLEIF	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x43
+.endm
+.macro	VLEIG	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x42
+.endm
+
+/* VECTOR LOAD GR FROM VR ELEMENT */
+.macro	VLGV	gr, vr, disp, base="%r0", m
+	GR_NUM	r1, \gr
+	GR_NUM	b2, \base
+	VX_NUM	v3, \vr
+	.word	0xE700 | (r1 << 4) | VX_R(v3)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m, 0x21, v3
+.endm
+.macro	VLGVB	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 0
+.endm
+.macro	VLGVH	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 1
+.endm
+.macro	VLGVF	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 2
+.endm
+.macro	VLGVG	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 3
+.endm
+
+/* VECTOR LOAD MULTIPLE */
+.macro	VLM	vfrom, vto, disp, base
+	VX_NUM	v1, \vfrom
+	VX_NUM	v3, \vto
+	GR_NUM	b2, \base	    /* Base register */
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	0, 0x36, v1, v3
+.endm
+
+/* VECTOR STORE MULTIPLE */
+.macro	VSTM	vfrom, vto, disp, base
+	VX_NUM	v1, \vfrom
+	VX_NUM	v3, \vto
+	GR_NUM	b2, \base	    /* Base register */
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	0, 0x3E, v1, v3
+.endm
+
+/* VECTOR PERMUTE */
+.macro	VPERM	vr1, vr2, vr3, vr4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	VX_NUM	v4, \vr4
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	VX_R(v4), 0x8C, v1, v2, v3, v4
+.endm
+
+/* VECTOR UNPACK LOGICAL LOW */
+.macro	VUPLL	vr1, vr2, m3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	0x0000
+	MRXBOPC	\m3, 0xD4, v1, v2
+.endm
+.macro	VUPLLB	vr1, vr2
+	VUPLL	\vr1, \vr2, 0
+.endm
+.macro	VUPLLH	vr1, vr2
+	VUPLL	\vr1, \vr2, 1
+.endm
+.macro	VUPLLF	vr1, vr2
+	VUPLL	\vr1, \vr2, 2
+.endm
+
+
+/* Vector integer instructions */
+
+/* VECTOR EXCLUSIVE OR */
+.macro	VX	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	0, 0x6D, v1, v2, v3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM */
+.macro	VGFM	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	\m4, 0xB4, v1, v2, v3
+.endm
+.macro	VGFMB	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VGFMH	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VGFMF	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VGFMG	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
+.macro	VGFMA	vr1, vr2, vr3, vr4, m5
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	VX_NUM	v4, \vr4
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12) | (\m5 << 8)
+	MRXBOPC	VX_R(v4), 0xBC, v1, v2, v3, v4
+.endm
+.macro	VGFMAB	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 0
+.endm
+.macro	VGFMAH	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 1
+.endm
+.macro	VGFMAF	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 2
+.endm
+.macro	VGFMAG	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 3
+.endm
+
+/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
+.macro	VSRLB	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	0, 0x7D, v1, v2, v3
+.endm
+
+
+#endif	/* __ASSEMBLY__ */
+#endif	/* __ASM_S390_VX_INSN_H */

+ 5 - 5
arch/s390/include/uapi/asm/unistd.h

@@ -204,9 +204,9 @@
 #define __NR_statfs64		265
 #define __NR_statfs64		265
 #define __NR_fstatfs64		266
 #define __NR_fstatfs64		266
 #define __NR_remap_file_pages	267
 #define __NR_remap_file_pages	267
-/* Number 268 is reserved for new sys_mbind */
-/* Number 269 is reserved for new sys_get_mempolicy */
-/* Number 270 is reserved for new sys_set_mempolicy */
+#define __NR_mbind		268
+#define __NR_get_mempolicy	269
+#define __NR_set_mempolicy	270
 #define __NR_mq_open		271
 #define __NR_mq_open		271
 #define __NR_mq_unlink		272
 #define __NR_mq_unlink		272
 #define __NR_mq_timedsend	273
 #define __NR_mq_timedsend	273
@@ -223,7 +223,7 @@
 #define __NR_inotify_init	284
 #define __NR_inotify_init	284
 #define __NR_inotify_add_watch	285
 #define __NR_inotify_add_watch	285
 #define __NR_inotify_rm_watch	286
 #define __NR_inotify_rm_watch	286
-/* Number 287 is reserved for new sys_migrate_pages */
+#define __NR_migrate_pages	287
 #define __NR_openat		288
 #define __NR_openat		288
 #define __NR_mkdirat		289
 #define __NR_mkdirat		289
 #define __NR_mknodat		290
 #define __NR_mknodat		290
@@ -245,7 +245,7 @@
 #define __NR_sync_file_range	307
 #define __NR_sync_file_range	307
 #define __NR_tee		308
 #define __NR_tee		308
 #define __NR_vmsplice		309
 #define __NR_vmsplice		309
-/* Number 310 is reserved for new sys_move_pages */
+#define __NR_move_pages		310
 #define __NR_getcpu		311
 #define __NR_getcpu		311
 #define __NR_epoll_pwait	312
 #define __NR_epoll_pwait	312
 #define __NR_utimes		313
 #define __NR_utimes		313

+ 11 - 0
arch/s390/kernel/Makefile

@@ -28,6 +28,17 @@ CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
 
 CFLAGS_sysinfo.o += -w
 CFLAGS_sysinfo.o += -w
 
 
+#
+# Use -march=z900 for sclp.c to be able to print an error message if
+# the kernel is started on a machine which is too old
+#
+CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE)
+ifneq ($(CC_FLAGS_MARCH),-march=z900)
+CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH)
+CFLAGS_sclp.o	+= -march=z900
+endif
+GCOV_PROFILE_sclp.o := n
+
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o

+ 3 - 0
arch/s390/kernel/asm-offsets.c

@@ -28,6 +28,9 @@ int main(void)
 	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
 	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
 	BLANK();
 	BLANK();
 	DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
 	DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
+	DEFINE(__THREAD_FPU_fpc, offsetof(struct thread_struct, fpu.fpc));
+	DEFINE(__THREAD_FPU_flags, offsetof(struct thread_struct, fpu.flags));
+	DEFINE(__THREAD_FPU_regs, offsetof(struct thread_struct, fpu.regs));
 	DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
 	DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
 	DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
 	DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
 	DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));
 	DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));

+ 14 - 34
arch/s390/kernel/compat_signal.c

@@ -153,33 +153,14 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 /* Store registers needed to create the signal frame */
 /* Store registers needed to create the signal frame */
 static void store_sigregs(void)
 static void store_sigregs(void)
 {
 {
-	int i;
-
 	save_access_regs(current->thread.acrs);
 	save_access_regs(current->thread.acrs);
-	save_fp_ctl(&current->thread.fp_regs.fpc);
-	if (current->thread.vxrs) {
-		save_vx_regs(current->thread.vxrs);
-		for (i = 0; i < __NUM_FPRS; i++)
-			current->thread.fp_regs.fprs[i] =
-				*(freg_t *)(current->thread.vxrs + i);
-	} else
-		save_fp_regs(current->thread.fp_regs.fprs);
+	save_fpu_regs();
 }
 }
 
 
 /* Load registers after signal return */
 /* Load registers after signal return */
 static void load_sigregs(void)
 static void load_sigregs(void)
 {
 {
-	int i;
-
 	restore_access_regs(current->thread.acrs);
 	restore_access_regs(current->thread.acrs);
-	/* restore_fp_ctl is done in restore_sigregs */
-	if (current->thread.vxrs) {
-		for (i = 0; i < __NUM_FPRS; i++)
-			*(freg_t *)(current->thread.vxrs + i) =
-				current->thread.fp_regs.fprs[i];
-		restore_vx_regs(current->thread.vxrs);
-	} else
-		restore_fp_regs(current->thread.fp_regs.fprs);
 }
 }
 
 
 static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
 static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
@@ -196,8 +177,7 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
 		user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
 		user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	       sizeof(user_sregs.regs.acrs));
 	       sizeof(user_sregs.regs.acrs));
-	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
-	       sizeof(user_sregs.fpregs));
+	fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
 		return -EFAULT;
 		return -EFAULT;
 	return 0;
 	return 0;
@@ -217,8 +197,8 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
 	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
 		return -EINVAL;
 		return -EINVAL;
 
 
-	/* Loading the floating-point-control word can fail. Do that first. */
-	if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+	/* Test the floating-point-control word. */
+	if (test_fp_ctl(user_sregs.fpregs.fpc))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
 	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
@@ -235,9 +215,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 		regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
 		regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(current->thread.acrs));
 	       sizeof(current->thread.acrs));
-
-	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
-	       sizeof(current->thread.fp_regs));
+	fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
 
 
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
 	return 0;
@@ -258,13 +236,13 @@ static int save_sigregs_ext32(struct pt_regs *regs,
 		return -EFAULT;
 		return -EFAULT;
 
 
 	/* Save vector registers to signal stack */
 	/* Save vector registers to signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
 				   sizeof(sregs_ext->vxrs_low)) ||
 				   sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_to_user(&sregs_ext->vxrs_high,
 		    __copy_to_user(&sregs_ext->vxrs_high,
-				   current->thread.vxrs + __NUM_VXRS_LOW,
+				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				   sizeof(sregs_ext->vxrs_high)))
 				   sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 			return -EFAULT;
 	}
 	}
@@ -286,15 +264,15 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
 		*(__u32 *)&regs->gprs[i] = gprs_high[i];
 		*(__u32 *)&regs->gprs[i] = gprs_high[i];
 
 
 	/* Restore vector registers from signal stack */
 	/* Restore vector registers from signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
 				     sizeof(sregs_ext->vxrs_low)) ||
-		    __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				     &sregs_ext->vxrs_high,
 				     &sregs_ext->vxrs_high,
 				     sizeof(sregs_ext->vxrs_high)))
 				     sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 			return -EFAULT;
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+			*((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
 	}
 	}
 	return 0;
 	return 0;
 }
 }
@@ -308,6 +286,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
 		goto badframe;
 		goto badframe;
 	set_current_blocked(&set);
 	set_current_blocked(&set);
+	save_fpu_regs();
 	if (restore_sigregs32(regs, &frame->sregs))
 	if (restore_sigregs32(regs, &frame->sregs))
 		goto badframe;
 		goto badframe;
 	if (restore_sigregs_ext32(regs, &frame->sregs_ext))
 	if (restore_sigregs_ext32(regs, &frame->sregs_ext))
@@ -330,6 +309,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
 	set_current_blocked(&set);
 	set_current_blocked(&set);
 	if (compat_restore_altstack(&frame->uc.uc_stack))
 	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 		goto badframe;
+	save_fpu_regs();
 	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
 	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 		goto badframe;
 	if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
 	if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
@@ -472,7 +452,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
 	 */
 	 */
 	uc_flags = UC_GPRS_HIGH;
 	uc_flags = UC_GPRS_HIGH;
 	if (MACHINE_HAS_VX) {
 	if (MACHINE_HAS_VX) {
-		if (current->thread.vxrs)
+		if (is_vx_task(current))
 			uc_flags |= UC_VXRS;
 			uc_flags |= UC_VXRS;
 	} else
 	} else
 		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
 		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +

+ 449 - 165
arch/s390/kernel/entry.S

@@ -20,6 +20,8 @@
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/sigp.h>
 #include <asm/sigp.h>
 #include <asm/irq.h>
 #include <asm/irq.h>
+#include <asm/fpu-internal.h>
+#include <asm/vx-insn.h>
 
 
 __PT_R0      =	__PT_GPRS
 __PT_R0      =	__PT_GPRS
 __PT_R1      =	__PT_GPRS + 8
 __PT_R1      =	__PT_GPRS + 8
@@ -46,10 +48,10 @@ _TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		   _TIF_UPROBE)
 		   _TIF_UPROBE)
 _TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 _TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 		   _TIF_SYSCALL_TRACEPOINT)
 		   _TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE)
+_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE | _CIF_FPU)
 _PIF_WORK	= (_PIF_PER_TRAP)
 _PIF_WORK	= (_PIF_PER_TRAP)
 
 
-#define BASED(name) name-system_call(%r13)
+#define BASED(name) name-cleanup_critical(%r13)
 
 
 	.macro	TRACE_IRQS_ON
 	.macro	TRACE_IRQS_ON
 #ifdef CONFIG_TRACE_IRQFLAGS
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -73,38 +75,6 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 #endif
 #endif
 	.endm
 	.endm
 
 
-	.macro LPP newpp
-#if IS_ENABLED(CONFIG_KVM)
-	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
-	jz	.+8
-	.insn	s,0xb2800000,\newpp
-#endif
-	.endm
-
-	.macro	HANDLE_SIE_INTERCEPT scratch,reason
-#if IS_ENABLED(CONFIG_KVM)
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jnz	.+62
-	lgr	\scratch,%r9
-	slg	\scratch,BASED(.Lsie_critical)
-	clg	\scratch,BASED(.Lsie_critical_length)
-	.if	\reason==1
-	# Some program interrupts are suppressing (e.g. protection).
-	# We must also check the instruction after SIE in that case.
-	# do_protection_exception will rewind to .Lrewind_pad
-	jh	.+42
-	.else
-	jhe	.+42
-	.endif
-	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
-	LPP	__SF_EMPTY+16(%r15)		# set host id
-	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	larl	%r9,sie_exit			# skip forward to sie_exit
-	mvi	__SF_EMPTY+31(%r15),\reason	# set exit reason
-#endif
-	.endm
-
 	.macro	CHECK_STACK stacksize,savearea
 	.macro	CHECK_STACK stacksize,savearea
 #ifdef CONFIG_CHECK_STACK
 #ifdef CONFIG_CHECK_STACK
 	tml	%r15,\stacksize - CONFIG_STACK_GUARD
 	tml	%r15,\stacksize - CONFIG_STACK_GUARD
@@ -113,7 +83,7 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 #endif
 #endif
 	.endm
 	.endm
 
 
-	.macro	SWITCH_ASYNC savearea,stack,shift
+	.macro	SWITCH_ASYNC savearea,timer
 	tmhh	%r8,0x0001		# interrupting from user ?
 	tmhh	%r8,0x0001		# interrupting from user ?
 	jnz	1f
 	jnz	1f
 	lgr	%r14,%r9
 	lgr	%r14,%r9
@@ -124,26 +94,28 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	brasl	%r14,cleanup_critical
 	brasl	%r14,cleanup_critical
 	tmhh	%r8,0x0001		# retest problem state after cleanup
 	tmhh	%r8,0x0001		# retest problem state after cleanup
 	jnz	1f
 	jnz	1f
-0:	lg	%r14,\stack		# are we already on the target stack?
+0:	lg	%r14,__LC_ASYNC_STACK	# are we already on the async stack?
 	slgr	%r14,%r15
 	slgr	%r14,%r15
-	srag	%r14,%r14,\shift
-	jnz	1f
-	CHECK_STACK 1<<\shift,\savearea
+	srag	%r14,%r14,STACK_SHIFT
+	jnz	2f
+	CHECK_STACK 1<<STACK_SHIFT,\savearea
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	2f
-1:	lg	%r15,\stack		# load target stack
-2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	j	3f
+1:	LAST_BREAK %r14
+	UPDATE_VTIME %r14,%r15,\timer
+2:	lg	%r15,__LC_ASYNC_STACK	# load async stack
+3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	.endm
 	.endm
 
 
-	.macro UPDATE_VTIME scratch,enter_timer
-	lg	\scratch,__LC_EXIT_TIMER
-	slg	\scratch,\enter_timer
-	alg	\scratch,__LC_USER_TIMER
-	stg	\scratch,__LC_USER_TIMER
-	lg	\scratch,__LC_LAST_UPDATE_TIMER
-	slg	\scratch,__LC_EXIT_TIMER
-	alg	\scratch,__LC_SYSTEM_TIMER
-	stg	\scratch,__LC_SYSTEM_TIMER
+	.macro UPDATE_VTIME w1,w2,enter_timer
+	lg	\w1,__LC_EXIT_TIMER
+	lg	\w2,__LC_LAST_UPDATE_TIMER
+	slg	\w1,\enter_timer
+	slg	\w2,__LC_EXIT_TIMER
+	alg	\w1,__LC_USER_TIMER
+	alg	\w2,__LC_SYSTEM_TIMER
+	stg	\w1,__LC_USER_TIMER
+	stg	\w2,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),\enter_timer
 	mvc	__LC_LAST_UPDATE_TIMER(8),\enter_timer
 	.endm
 	.endm
 
 
@@ -197,6 +169,69 @@ ENTRY(__switch_to)
 	br	%r14
 	br	%r14
 
 
 .L__critical_start:
 .L__critical_start:
+
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+ENTRY(sie64a)
+	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
+	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
+	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
+	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU	# load guest fp/vx registers ?
+	jno	.Lsie_load_guest_gprs
+	brasl	%r14,load_fpu_regs		# load guest fp/vx regs
+.Lsie_load_guest_gprs:
+	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
+	lg	%r14,__LC_GMAP			# get gmap pointer
+	ltgr	%r14,%r14
+	jz	.Lsie_gmap
+	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
+.Lsie_gmap:
+	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
+	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
+	tm	__SIE_PROG20+3(%r14),3		# last exit...
+	jnz	.Lsie_skip
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lsie_skip			# exit if fp/vx regs changed
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	.Lsie_enter
+	.insn	s,0xb2800000,__LC_CURRENT_PID	# set guest id to pid
+.Lsie_enter:
+	sie	0(%r14)
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	.Lsie_skip
+	.insn	s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
+.Lsie_skip:
+	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+.Lsie_done:
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also .Lcleanup_sie
+.Lrewind_pad:
+	nop	0
+	.globl sie_exit
+sie_exit:
+	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
+	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
+	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
+	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
+	br	%r14
+.Lsie_fault:
+	lghi	%r14,-EFAULT
+	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
+	j	sie_exit
+
+	EX_TABLE(.Lrewind_pad,.Lsie_fault)
+	EX_TABLE(sie_exit,.Lsie_fault)
+#endif
+
 /*
 /*
  * SVC interrupt handler routine. System calls are synchronous events and
  * SVC interrupt handler routine. System calls are synchronous events and
  * are executed with interrupts enabled.
  * are executed with interrupts enabled.
@@ -212,9 +247,9 @@ ENTRY(system_call)
 .Lsysc_per:
 .Lsysc_per:
 	lg	%r15,__LC_KERNEL_STACK
 	lg	%r15,__LC_KERNEL_STACK
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
-.Lsysc_vtime:
-	UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
 	LAST_BREAK %r13
 	LAST_BREAK %r13
+.Lsysc_vtime:
+	UPDATE_VTIME %r10,%r13,__LC_SYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
 	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
@@ -244,8 +279,6 @@ ENTRY(system_call)
 .Lsysc_return:
 .Lsysc_return:
 	LOCKDEP_SYS_EXIT
 	LOCKDEP_SYS_EXIT
 .Lsysc_tif:
 .Lsysc_tif:
-	tm	__PT_PSW+1(%r11),0x01		# returning to user ?
-	jno	.Lsysc_restore
 	tm	__PT_FLAGS+7(%r11),_PIF_WORK
 	tm	__PT_FLAGS+7(%r11),_PIF_WORK
 	jnz	.Lsysc_work
 	jnz	.Lsysc_work
 	tm	__TI_flags+7(%r12),_TIF_WORK
 	tm	__TI_flags+7(%r12),_TIF_WORK
@@ -280,6 +313,8 @@ ENTRY(system_call)
 	jo	.Lsysc_sigpending
 	jo	.Lsysc_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lsysc_notify_resume
 	jo	.Lsysc_notify_resume
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lsysc_vxrs
 	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	jo	.Lsysc_uaccess
 	jo	.Lsysc_uaccess
 	j	.Lsysc_return		# beware of critical section cleanup
 	j	.Lsysc_return		# beware of critical section cleanup
@@ -306,6 +341,13 @@ ENTRY(system_call)
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	j	.Lsysc_return
 	j	.Lsysc_return
 
 
+#
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lsysc_vxrs:
+	larl	%r14,.Lsysc_return
+	jg	load_fpu_regs
+
 #
 #
 # _TIF_SIGPENDING is set, call do_signal
 # _TIF_SIGPENDING is set, call do_signal
 #
 #
@@ -405,28 +447,35 @@ ENTRY(pgm_check_handler)
 	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
 	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_PGM_OLD_PSW
 	lmg	%r8,%r9,__LC_PGM_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,1
 	tmhh	%r8,0x0001		# test problem state bit
 	tmhh	%r8,0x0001		# test problem state bit
-	jnz	1f			# -> fault in user space
-	tmhh	%r8,0x4000		# PER bit set in old PSW ?
-	jnz	0f			# -> enabled, can't be a double fault
+	jnz	2f			# -> fault in user space
+#if IS_ENABLED(CONFIG_KVM)
+	# cleanup critical section for sie64a
+	lgr	%r14,%r9
+	slg	%r14,BASED(.Lsie_critical_start)
+	clg	%r14,BASED(.Lsie_critical_length)
+	jhe	0f
+	brasl	%r14,.Lcleanup_sie
+#endif
+0:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
+	jnz	1f			# -> enabled, can't be a double fault
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
 	jnz	.Lpgm_svcper		# -> single stepped svc
 	jnz	.Lpgm_svcper		# -> single stepped svc
-0:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+1:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	2f
-1:	UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
-	LAST_BREAK %r14
+	j	3f
+2:	LAST_BREAK %r14
+	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
 	lg	%r15,__LC_KERNEL_STACK
 	lg	%r15,__LC_KERNEL_STACK
 	lg	%r14,__TI_task(%r12)
 	lg	%r14,__TI_task(%r12)
 	aghi	%r14,__TASK_thread	# pointer to thread_struct
 	aghi	%r14,__TASK_thread	# pointer to thread_struct
 	lghi	%r13,__LC_PGM_TDB
 	lghi	%r13,__LC_PGM_TDB
 	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
 	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
-	jz	2f
+	jz	3f
 	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
 	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
-2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -435,24 +484,28 @@ ENTRY(pgm_check_handler)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	stg	%r10,__PT_ARGS(%r11)
 	stg	%r10,__PT_ARGS(%r11)
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jz	0f
+	jz	4f
 	tmhh	%r8,0x0001		# kernel per event ?
 	tmhh	%r8,0x0001		# kernel per event ?
 	jz	.Lpgm_kprobe
 	jz	.Lpgm_kprobe
 	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
 	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
 	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
 	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-0:	REENABLE_IRQS
+4:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	larl	%r1,pgm_check_table
 	larl	%r1,pgm_check_table
 	llgh	%r10,__PT_INT_CODE+2(%r11)
 	llgh	%r10,__PT_INT_CODE+2(%r11)
 	nill	%r10,0x007f
 	nill	%r10,0x007f
 	sll	%r10,2
 	sll	%r10,2
-	je	.Lsysc_return
+	je	.Lpgm_return
 	lgf	%r1,0(%r10,%r1)		# load address of handler routine
 	lgf	%r1,0(%r10,%r1)		# load address of handler routine
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	basr	%r14,%r1		# branch to interrupt-handler
 	basr	%r14,%r1		# branch to interrupt-handler
-	j	.Lsysc_return
+.Lpgm_return:
+	LOCKDEP_SYS_EXIT
+	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
+	jno	.Lsysc_restore
+	j	.Lsysc_tif
 
 
 #
 #
 # PER event in supervisor state, must be kprobes
 # PER event in supervisor state, must be kprobes
@@ -462,7 +515,7 @@ ENTRY(pgm_check_handler)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_per_trap
 	brasl	%r14,do_per_trap
-	j	.Lsysc_return
+	j	.Lpgm_return
 
 
 #
 #
 # single stepped system call
 # single stepped system call
@@ -483,15 +536,9 @@ ENTRY(io_int_handler)
 	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_IO_OLD_PSW
 	lmg	%r8,%r9,__LC_IO_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,2
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
-	tmhh	%r8,0x0001		# interrupting from user?
-	jz	.Lio_skip
-	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
-	LAST_BREAK %r14
-.Lio_skip:
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -587,6 +634,8 @@ ENTRY(io_int_handler)
 	jo	.Lio_sigpending
 	jo	.Lio_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lio_notify_resume
 	jo	.Lio_notify_resume
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lio_vxrs
 	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	jo	.Lio_uaccess
 	jo	.Lio_uaccess
 	j	.Lio_return		# beware of critical section cleanup
 	j	.Lio_return		# beware of critical section cleanup
@@ -608,6 +657,13 @@ ENTRY(io_int_handler)
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	j	.Lio_return
 	j	.Lio_return
 
 
+#
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lio_vxrs:
+	larl	%r14,.Lio_return
+	jg	load_fpu_regs
+
 #
 #
 # _TIF_NEED_RESCHED is set, call schedule
 # _TIF_NEED_RESCHED is set, call schedule
 #
 #
@@ -652,15 +708,9 @@ ENTRY(ext_int_handler)
 	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_EXT_OLD_PSW
 	lmg	%r8,%r9,__LC_EXT_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,3
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jz	.Lext_skip
-	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
-	LAST_BREAK %r14
-.Lext_skip:
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -690,6 +740,122 @@ ENTRY(psw_idle)
 	br	%r14
 	br	%r14
 .Lpsw_idle_end:
 .Lpsw_idle_end:
 
 
+/* Store floating-point controls and floating-point or vector extension
+ * registers instead.  A critical section cleanup assures that the registers
+ * are stored even if interrupted for some other work.	The register %r2
+ * designates a struct fpu to store register contents.	If the specified
+ * structure does not contain a register save area, the register store is
+ * omitted (see also comments in arch_dup_task_struct()).
+ *
+ * The CIF_FPU flag is set in any case.  The CIF_FPU triggers a lazy restore
+ * of the register contents at system call or io return.
+ */
+ENTRY(save_fpu_regs)
+	lg	%r2,__LC_CURRENT
+	aghi	%r2,__TASK_thread
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bor	%r14
+	stfpc	__THREAD_FPU_fpc(%r2)
+.Lsave_fpu_regs_fpc_end:
+	lg	%r3,__THREAD_FPU_regs(%r2)
+	ltgr	%r3,%r3
+	jz	.Lsave_fpu_regs_done	  # no save area -> set CIF_FPU
+	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
+	jz	.Lsave_fpu_regs_fp	  # no -> store FP regs
+.Lsave_fpu_regs_vx_low:
+	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
+.Lsave_fpu_regs_vx_high:
+	VSTM	%v16,%v31,256,%r3	  # vstm 16,31,256(3)
+	j	.Lsave_fpu_regs_done	  # -> set CIF_FPU flag
+.Lsave_fpu_regs_fp:
+	std	0,0(%r3)
+	std	1,8(%r3)
+	std	2,16(%r3)
+	std	3,24(%r3)
+	std	4,32(%r3)
+	std	5,40(%r3)
+	std	6,48(%r3)
+	std	7,56(%r3)
+	std	8,64(%r3)
+	std	9,72(%r3)
+	std	10,80(%r3)
+	std	11,88(%r3)
+	std	12,96(%r3)
+	std	13,104(%r3)
+	std	14,112(%r3)
+	std	15,120(%r3)
+.Lsave_fpu_regs_done:
+	oi	__LC_CPU_FLAGS+7,_CIF_FPU
+	br	%r14
+.Lsave_fpu_regs_end:
+
+/* Load floating-point controls and floating-point or vector extension
+ * registers.  A critical section cleanup assures that the register contents
+ * are loaded even if interrupted for some other work.	Depending on the saved
+ * FP/VX state, the vector-enablement control, CR0.46, is either set or cleared.
+ *
+ * There are special calling conventions to fit into sysc and io return work:
+ *	%r15:	<kernel stack>
+ * The function requires:
+ *	%r4 and __SF_EMPTY+32(%r15)
+ */
+load_fpu_regs:
+	lg	%r4,__LC_CURRENT
+	aghi	%r4,__TASK_thread
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bnor	%r14
+	lfpc	__THREAD_FPU_fpc(%r4)
+	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
+	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
+	jz	.Lload_fpu_regs_fp_ctl		# -> no VX, load FP regs
+.Lload_fpu_regs_vx_ctl:
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jo	.Lload_fpu_regs_vx
+	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+.Lload_fpu_regs_vx:
+	VLM	%v0,%v15,0,%r4
+.Lload_fpu_regs_vx_high:
+	VLM	%v16,%v31,256,%r4
+	j	.Lload_fpu_regs_done
+.Lload_fpu_regs_fp_ctl:
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jz	.Lload_fpu_regs_fp
+	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+.Lload_fpu_regs_fp:
+	ld	0,0(%r4)
+	ld	1,8(%r4)
+	ld	2,16(%r4)
+	ld	3,24(%r4)
+	ld	4,32(%r4)
+	ld	5,40(%r4)
+	ld	6,48(%r4)
+	ld	7,56(%r4)
+	ld	8,64(%r4)
+	ld	9,72(%r4)
+	ld	10,80(%r4)
+	ld	11,88(%r4)
+	ld	12,96(%r4)
+	ld	13,104(%r4)
+	ld	14,112(%r4)
+	ld	15,120(%r4)
+.Lload_fpu_regs_done:
+	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
+	br	%r14
+.Lload_fpu_regs_end:
+
+/* Test and set the vector enablement control in CR0.46 */
+ENTRY(__ctl_set_vx)
+	stctg	%c0,%c0,__SF_EMPTY(%r15)
+	tm	__SF_EMPTY+5(%r15),2
+	bor	%r14
+	oi	__SF_EMPTY+5(%r15),2
+	lctlg	%c0,%c0,__SF_EMPTY(%r15)
+	br	%r14
+.L__ctl_set_vx_end:
+
 .L__critical_end:
 .L__critical_end:
 
 
 /*
 /*
@@ -702,9 +868,8 @@ ENTRY(mcck_int_handler)
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_MCK_OLD_PSW
 	lmg	%r8,%r9,__LC_MCK_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,4
 	tm	__LC_MCCK_CODE,0x80	# system damage?
 	tm	__LC_MCCK_CODE,0x80	# system damage?
 	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
 	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
 	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
 	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
@@ -725,11 +890,7 @@ ENTRY(mcck_int_handler)
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
 3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
 	jno	.Lmcck_panic		# no -> skip cleanup critical
 	jno	.Lmcck_panic		# no -> skip cleanup critical
-	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
-	tm	%r8,0x0001		# interrupting from user ?
-	jz	.Lmcck_skip
-	UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
-	LAST_BREAK %r14
+	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
 .Lmcck_skip:
 .Lmcck_skip:
 	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
 	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
 	stmg	%r0,%r7,__PT_R0(%r11)
 	stmg	%r0,%r7,__PT_R0(%r11)
@@ -764,12 +925,8 @@ ENTRY(mcck_int_handler)
 	lpswe	__LC_RETURN_MCCK_PSW
 	lpswe	__LC_RETURN_MCCK_PSW
 
 
 .Lmcck_panic:
 .Lmcck_panic:
-	lg	%r14,__LC_PANIC_STACK
-	slgr	%r14,%r15
-	srag	%r14,%r14,PAGE_SHIFT
-	jz	0f
 	lg	%r15,__LC_PANIC_STACK
 	lg	%r15,__LC_PANIC_STACK
-0:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	.Lmcck_skip
 	j	.Lmcck_skip
 
 
 #
 #
@@ -819,20 +976,13 @@ stack_overflow:
 	jg	kernel_stack_overflow
 	jg	kernel_stack_overflow
 #endif
 #endif
 
 
-	.align	8
-.Lcleanup_table:
-	.quad	system_call
-	.quad	.Lsysc_do_svc
-	.quad	.Lsysc_tif
-	.quad	.Lsysc_restore
-	.quad	.Lsysc_done
-	.quad	.Lio_tif
-	.quad	.Lio_restore
-	.quad	.Lio_done
-	.quad	psw_idle
-	.quad	.Lpsw_idle_end
-
 cleanup_critical:
 cleanup_critical:
+#if IS_ENABLED(CONFIG_KVM)
+	clg	%r9,BASED(.Lcleanup_table_sie)	# .Lsie_gmap
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done
+	jl	.Lcleanup_sie
+#endif
 	clg	%r9,BASED(.Lcleanup_table)	# system_call
 	clg	%r9,BASED(.Lcleanup_table)	# system_call
 	jl	0f
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_do_svc
 	clg	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_do_svc
@@ -853,8 +1003,54 @@ cleanup_critical:
 	jl	0f
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+72)	# .Lpsw_idle_end
 	clg	%r9,BASED(.Lcleanup_table+72)	# .Lpsw_idle_end
 	jl	.Lcleanup_idle
 	jl	.Lcleanup_idle
+	clg	%r9,BASED(.Lcleanup_table+80)	# save_fpu_regs
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+88)	# .Lsave_fpu_regs_end
+	jl	.Lcleanup_save_fpu_regs
+	clg	%r9,BASED(.Lcleanup_table+96)	# load_fpu_regs
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
+	jl	.Lcleanup_load_fpu_regs
+	clg	%r9,BASED(.Lcleanup_table+112)	# __ctl_set_vx
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+120)	# .L__ctl_set_vx_end
+	jl	.Lcleanup___ctl_set_vx
 0:	br	%r14
 0:	br	%r14
 
 
+	.align	8
+.Lcleanup_table:
+	.quad	system_call
+	.quad	.Lsysc_do_svc
+	.quad	.Lsysc_tif
+	.quad	.Lsysc_restore
+	.quad	.Lsysc_done
+	.quad	.Lio_tif
+	.quad	.Lio_restore
+	.quad	.Lio_done
+	.quad	psw_idle
+	.quad	.Lpsw_idle_end
+	.quad	save_fpu_regs
+	.quad	.Lsave_fpu_regs_end
+	.quad	load_fpu_regs
+	.quad	.Lload_fpu_regs_end
+	.quad	__ctl_set_vx
+	.quad	.L__ctl_set_vx_end
+
+#if IS_ENABLED(CONFIG_KVM)
+.Lcleanup_table_sie:
+	.quad	.Lsie_gmap
+	.quad	.Lsie_done
+
+.Lcleanup_sie:
+	lg	%r9,__SF_EMPTY(%r15)		# get control block pointer
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	0f
+	.insn	s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
+0:	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+	larl	%r9,sie_exit			# skip forward to sie_exit
+	br	%r14
+#endif
 
 
 .Lcleanup_system_call:
 .Lcleanup_system_call:
 	# check if stpt has been executed
 	# check if stpt has been executed
@@ -915,7 +1111,7 @@ cleanup_critical:
 	.quad	system_call
 	.quad	system_call
 	.quad	.Lsysc_stmg
 	.quad	.Lsysc_stmg
 	.quad	.Lsysc_per
 	.quad	.Lsysc_per
-	.quad	.Lsysc_vtime+18
+	.quad	.Lsysc_vtime+36
 	.quad	.Lsysc_vtime+42
 	.quad	.Lsysc_vtime+42
 
 
 .Lcleanup_sysc_tif:
 .Lcleanup_sysc_tif:
@@ -981,6 +1177,145 @@ cleanup_critical:
 .Lcleanup_idle_insn:
 .Lcleanup_idle_insn:
 	.quad	.Lpsw_idle_lpsw
 	.quad	.Lpsw_idle_lpsw
 
 
+.Lcleanup_save_fpu_regs:
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bor	%r14
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_done)
+	jhe	5f
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_fp)
+	jhe	4f
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_vx_high)
+	jhe	3f
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_vx_low)
+	jhe	2f
+	clg	%r9,BASED(.Lcleanup_save_fpu_fpc_end)
+	jhe	1f
+	lg	%r2,__LC_CURRENT
+0:	# Store floating-point controls
+	stfpc	__THREAD_FPU_fpc(%r2)
+1:	# Load register save area and check if VX is active
+	lg	%r3,__THREAD_FPU_regs(%r2)
+	ltgr	%r3,%r3
+	jz	5f			  # no save area -> set CIF_FPU
+	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
+	jz	4f			  # no VX -> store FP regs
+2:	# Store vector registers (V0-V15)
+	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
+3:	# Store vector registers (V16-V31)
+	VSTM	%v16,%v31,256,%r3	  # vstm 16,31,256(3)
+	j	5f			  # -> done, set CIF_FPU flag
+4:	# Store floating-point registers
+	std	0,0(%r3)
+	std	1,8(%r3)
+	std	2,16(%r3)
+	std	3,24(%r3)
+	std	4,32(%r3)
+	std	5,40(%r3)
+	std	6,48(%r3)
+	std	7,56(%r3)
+	std	8,64(%r3)
+	std	9,72(%r3)
+	std	10,80(%r3)
+	std	11,88(%r3)
+	std	12,96(%r3)
+	std	13,104(%r3)
+	std	14,112(%r3)
+	std	15,120(%r3)
+5:	# Set CIF_FPU flag
+	oi	__LC_CPU_FLAGS+7,_CIF_FPU
+	lg	%r9,48(%r11)		# return from save_fpu_regs
+	br	%r14
+.Lcleanup_save_fpu_fpc_end:
+	.quad	.Lsave_fpu_regs_fpc_end
+.Lcleanup_save_fpu_regs_vx_low:
+	.quad	.Lsave_fpu_regs_vx_low
+.Lcleanup_save_fpu_regs_vx_high:
+	.quad	.Lsave_fpu_regs_vx_high
+.Lcleanup_save_fpu_regs_fp:
+	.quad	.Lsave_fpu_regs_fp
+.Lcleanup_save_fpu_regs_done:
+	.quad	.Lsave_fpu_regs_done
+
+.Lcleanup_load_fpu_regs:
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bnor	%r14
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_done)
+	jhe	1f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp)
+	jhe	2f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl)
+	jhe	3f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
+	jhe	4f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx)
+	jhe	5f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl)
+	jhe	6f
+	lg	%r4,__LC_CURRENT
+	lfpc	__THREAD_FPU_fpc(%r4)
+	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
+	jz	3f				# -> no VX, load FP regs
+6:	# Set VX-enablement control
+	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jo	5f
+	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+5:	# Load V0 ..V15 registers
+	VLM	%v0,%v15,0,%r4
+4:	# Load V16..V31 registers
+	VLM	%v16,%v31,256,%r4
+	j	1f
+3:	# Clear VX-enablement control for FP
+	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jz	2f
+	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+2:	# Load floating-point registers
+	ld	0,0(%r4)
+	ld	1,8(%r4)
+	ld	2,16(%r4)
+	ld	3,24(%r4)
+	ld	4,32(%r4)
+	ld	5,40(%r4)
+	ld	6,48(%r4)
+	ld	7,56(%r4)
+	ld	8,64(%r4)
+	ld	9,72(%r4)
+	ld	10,80(%r4)
+	ld	11,88(%r4)
+	ld	12,96(%r4)
+	ld	13,104(%r4)
+	ld	14,112(%r4)
+	ld	15,120(%r4)
+1:	# Clear CIF_FPU bit
+	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
+	lg	%r9,48(%r11)		# return from load_fpu_regs
+	br	%r14
+.Lcleanup_load_fpu_regs_vx_ctl:
+	.quad	.Lload_fpu_regs_vx_ctl
+.Lcleanup_load_fpu_regs_vx:
+	.quad	.Lload_fpu_regs_vx
+.Lcleanup_load_fpu_regs_vx_high:
+	.quad	.Lload_fpu_regs_vx_high
+.Lcleanup_load_fpu_regs_fp_ctl:
+	.quad	.Lload_fpu_regs_fp_ctl
+.Lcleanup_load_fpu_regs_fp:
+	.quad	.Lload_fpu_regs_fp
+.Lcleanup_load_fpu_regs_done:
+	.quad	.Lload_fpu_regs_done
+
+.Lcleanup___ctl_set_vx:
+	stctg	%c0,%c0,__SF_EMPTY(%r15)
+	tm	__SF_EMPTY+5(%r15),2
+	bor	%r14
+	oi	__SF_EMPTY+5(%r15),2
+	lctlg	%c0,%c0,__SF_EMPTY(%r15)
+	lg	%r9,48(%r11)		# return from __ctl_set_vx
+	br	%r14
+
 /*
 /*
  * Integer constants
  * Integer constants
  */
  */
@@ -989,62 +1324,11 @@ cleanup_critical:
 	.quad	.L__critical_start
 	.quad	.L__critical_start
 .Lcritical_length:
 .Lcritical_length:
 	.quad	.L__critical_end - .L__critical_start
 	.quad	.L__critical_end - .L__critical_start
-
-
 #if IS_ENABLED(CONFIG_KVM)
 #if IS_ENABLED(CONFIG_KVM)
-/*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
- */
-ENTRY(sie64a)
-	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
-	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
-	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
-	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
-	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
-	lg	%r14,__LC_GMAP			# get gmap pointer
-	ltgr	%r14,%r14
-	jz	.Lsie_gmap
-	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
-.Lsie_gmap:
-	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
-	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
-	tm	__SIE_PROG20+3(%r14),3		# last exit...
-	jnz	.Lsie_done
-	LPP	__SF_EMPTY(%r15)		# set guest id
-	sie	0(%r14)
-.Lsie_done:
-	LPP	__SF_EMPTY+16(%r15)		# set host id
-	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
-# See also HANDLE_SIE_INTERCEPT
-.Lrewind_pad:
-	nop	0
-	.globl sie_exit
-sie_exit:
-	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
-	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
-	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
-	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
-	br	%r14
-.Lsie_fault:
-	lghi	%r14,-EFAULT
-	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
-	j	sie_exit
-
-	.align	8
-.Lsie_critical:
+.Lsie_critical_start:
 	.quad	.Lsie_gmap
 	.quad	.Lsie_gmap
 .Lsie_critical_length:
 .Lsie_critical_length:
 	.quad	.Lsie_done - .Lsie_gmap
 	.quad	.Lsie_done - .Lsie_gmap
-
-	EX_TABLE(.Lrewind_pad,.Lsie_fault)
-	EX_TABLE(sie_exit,.Lsie_fault)
 #endif
 #endif
 
 
 	.section .rodata, "a"
 	.section .rodata, "a"

+ 3 - 2
arch/s390/kernel/head.S

@@ -370,6 +370,7 @@ ENTRY(startup_kdump)
 	xc	0x200(256),0x200	# partially clear lowcore
 	xc	0x200(256),0x200	# partially clear lowcore
 	xc	0x300(256),0x300
 	xc	0x300(256),0x300
 	xc	0xe00(256),0xe00
 	xc	0xe00(256),0xe00
+	lctlg	%c0,%c15,0x200(%r0)	# initialize control registers
 	stck	__LC_LAST_UPDATE_CLOCK
 	stck	__LC_LAST_UPDATE_CLOCK
 	spt	6f-.LPG0(%r13)
 	spt	6f-.LPG0(%r13)
 	mvc	__LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
 	mvc	__LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
@@ -413,9 +414,9 @@ ENTRY(startup_kdump)
 # followed by the facility words.
 # followed by the facility words.
 
 
 #if defined(CONFIG_MARCH_Z13)
 #if defined(CONFIG_MARCH_Z13)
-	.long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+	.long 2, 0xc100eff2, 0xf46cc800
 #elif defined(CONFIG_MARCH_ZEC12)
 #elif defined(CONFIG_MARCH_ZEC12)
-	.long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+	.long 2, 0xc100eff2, 0xf46cc800
 #elif defined(CONFIG_MARCH_Z196)
 #elif defined(CONFIG_MARCH_Z196)
 	.long 2, 0xc100eff2, 0xf46c0000
 	.long 2, 0xc100eff2, 0xf46c0000
 #elif defined(CONFIG_MARCH_Z10)
 #elif defined(CONFIG_MARCH_Z10)

+ 3 - 6
arch/s390/kernel/jump_label.c

@@ -44,12 +44,9 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
 	unsigned char *ipn = (unsigned char *)new;
 	unsigned char *ipn = (unsigned char *)new;
 
 
 	pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
 	pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
-	pr_emerg("Found:    %02x %02x %02x %02x %02x %02x\n",
-		 ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]);
-	pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n",
-		 ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]);
-	pr_emerg("New:      %02x %02x %02x %02x %02x %02x\n",
-		 ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]);
+	pr_emerg("Found:    %6ph\n", ipc);
+	pr_emerg("Expected: %6ph\n", ipe);
+	pr_emerg("New:      %6ph\n", ipn);
 	panic("Corrupted kernel text");
 	panic("Corrupted kernel text");
 }
 }
 
 

+ 8 - 3
arch/s390/kernel/nmi.c

@@ -21,6 +21,7 @@
 #include <asm/nmi.h>
 #include <asm/nmi.h>
 #include <asm/crw.h>
 #include <asm/crw.h>
 #include <asm/switch_to.h>
 #include <asm/switch_to.h>
+#include <asm/fpu-internal.h>
 #include <asm/ctl_reg.h>
 #include <asm/ctl_reg.h>
 
 
 struct mcck_struct {
 struct mcck_struct {
@@ -164,8 +165,12 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 		cr0.val = S390_lowcore.cregs_save_area[0];
 		cr0.val = S390_lowcore.cregs_save_area[0];
 		cr0.afp = cr0.vx = 1;
 		cr0.afp = cr0.vx = 1;
 		__ctl_load(cr0.val, 0, 0);
 		__ctl_load(cr0.val, 0, 0);
-		restore_vx_regs((__vector128 *)
-				&S390_lowcore.vector_save_area);
+		asm volatile(
+			"	la	1,%0\n"
+			"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+			"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
+			: : "Q" (*(struct vx_array *)
+				 &S390_lowcore.vector_save_area) : "1");
 		__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
 		__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
 	}
 	}
 	/* Revalidate access registers */
 	/* Revalidate access registers */
@@ -358,4 +363,4 @@ static int __init machine_check_init(void)
 	ctl_set_bit(14, 24);	/* enable warning MCH */
 	ctl_set_bit(14, 24);	/* enable warning MCH */
 	return 0;
 	return 0;
 }
 }
-arch_initcall(machine_check_init);
+early_initcall(machine_check_init);

+ 3 - 6
arch/s390/kernel/perf_cpum_sf.c

@@ -1019,12 +1019,9 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 		break;
 		break;
 	}
 	}
 
 
-	/* The host-program-parameter (hpp) contains the sie control
-	 * block that is set by sie64a() in entry64.S.	Check if hpp
-	 * refers to a valid control block and set sde_regs flags
-	 * accordingly.  This would allow to use hpp values for other
-	 * purposes too.
-	 * For now, simply use a non-zero value as guest indicator.
+	/* The host-program-parameter (hpp) contains the pid of
+	 * the CPU thread as set by sie64a() in entry.S.
+	 * If non-zero assume a guest sample.
 	 */
 	 */
 	if (sfr->basic.hpp)
 	if (sfr->basic.hpp)
 		sde_regs->in_guest = 1;
 		sde_regs->in_guest = 1;

+ 42 - 10
arch/s390/kernel/process.c

@@ -81,8 +81,38 @@ void release_thread(struct task_struct *dead_task)
 
 
 void arch_release_task_struct(struct task_struct *tsk)
 void arch_release_task_struct(struct task_struct *tsk)
 {
 {
-	if (tsk->thread.vxrs)
-		kfree(tsk->thread.vxrs);
+	/* Free either the floating-point or the vector register save area */
+	kfree(tsk->thread.fpu.regs);
+}
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+	*dst = *src;
+
+	/* Set up a new floating-point register save area */
+	dst->thread.fpu.fpc = 0;
+	dst->thread.fpu.flags = 0;	/* Always start with VX disabled */
+	dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
+				       GFP_KERNEL|__GFP_REPEAT);
+	if (!dst->thread.fpu.fprs)
+		return -ENOMEM;
+
+	/*
+	 * Save the floating-point or vector register state of the current
+	 * task.  The state is not saved for early kernel threads, for example,
+	 * the init_task, which do not have an allocated save area.
+	 * The CIF_FPU flag is set in any case to lazy clear or restore a saved
+	 * state when switching to a different task or returning to user space.
+	 */
+	save_fpu_regs();
+	dst->thread.fpu.fpc = current->thread.fpu.fpc;
+	if (is_vx_task(current))
+		convert_vx_to_fp(dst->thread.fpu.fprs,
+				 current->thread.fpu.vxrs);
+	else
+		memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs,
+		       sizeof(freg_t) * __NUM_FPRS);
+	return 0;
 }
 }
 
 
 int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
@@ -142,11 +172,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 	p->thread.ri_signum = 0;
 	p->thread.ri_signum = 0;
 	frame->childregs.psw.mask &= ~PSW_MASK_RI;
 	frame->childregs.psw.mask &= ~PSW_MASK_RI;
 
 
-	/* Save the fpu registers to new thread structure. */
-	save_fp_ctl(&p->thread.fp_regs.fpc);
-	save_fp_regs(p->thread.fp_regs.fprs);
-	p->thread.fp_regs.pad = 0;
-	p->thread.vxrs = NULL;
 	/* Set a new TLS ?  */
 	/* Set a new TLS ?  */
 	if (clone_flags & CLONE_SETTLS) {
 	if (clone_flags & CLONE_SETTLS) {
 		unsigned long tls = frame->childregs.gprs[6];
 		unsigned long tls = frame->childregs.gprs[6];
@@ -162,7 +187,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 
 
 asmlinkage void execve_tail(void)
 asmlinkage void execve_tail(void)
 {
 {
-	current->thread.fp_regs.fpc = 0;
+	current->thread.fpu.fpc = 0;
 	asm volatile("sfpc %0" : : "d" (0));
 	asm volatile("sfpc %0" : : "d" (0));
 }
 }
 
 
@@ -171,8 +196,15 @@ asmlinkage void execve_tail(void)
  */
  */
 int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
 int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
 {
 {
-	save_fp_ctl(&fpregs->fpc);
-	save_fp_regs(fpregs->fprs);
+	save_fpu_regs();
+	fpregs->fpc = current->thread.fpu.fpc;
+	fpregs->pad = 0;
+	if (is_vx_task(current))
+		convert_vx_to_fp((freg_t *)&fpregs->fprs,
+				 current->thread.fpu.vxrs);
+	else
+		memcpy(&fpregs->fprs, current->thread.fpu.fprs,
+		       sizeof(fpregs->fprs));
 	return 1;
 	return 1;
 }
 }
 EXPORT_SYMBOL(dump_fpu);
 EXPORT_SYMBOL(dump_fpu);

+ 9 - 0
arch/s390/kernel/processor.c

@@ -40,6 +40,15 @@ void cpu_init(void)
 	enter_lazy_tlb(&init_mm, current);
 	enter_lazy_tlb(&init_mm, current);
 }
 }
 
 
+/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+	return elf_hwcap & (1UL << num);
+}
+EXPORT_SYMBOL(cpu_have_feature);
+
 /*
 /*
  * show_cpuinfo - Get information on one CPU for use by procfs.
  * show_cpuinfo - Get information on one CPU for use by procfs.
  */
  */

+ 73 - 101
arch/s390/kernel/ptrace.c

@@ -45,39 +45,27 @@ void update_cr_regs(struct task_struct *task)
 	struct per_regs old, new;
 	struct per_regs old, new;
 
 
 	/* Take care of the enable/disable of transactional execution. */
 	/* Take care of the enable/disable of transactional execution. */
-	if (MACHINE_HAS_TE || MACHINE_HAS_VX) {
+	if (MACHINE_HAS_TE) {
 		unsigned long cr, cr_new;
 		unsigned long cr, cr_new;
 
 
 		__ctl_store(cr, 0, 0);
 		__ctl_store(cr, 0, 0);
-		cr_new = cr;
-		if (MACHINE_HAS_TE) {
-			/* Set or clear transaction execution TXC bit 8. */
-			cr_new |= (1UL << 55);
-			if (task->thread.per_flags & PER_FLAG_NO_TE)
-				cr_new &= ~(1UL << 55);
-		}
-		if (MACHINE_HAS_VX) {
-			/* Enable/disable of vector extension */
-			cr_new &= ~(1UL << 17);
-			if (task->thread.vxrs)
-				cr_new |= (1UL << 17);
-		}
+		/* Set or clear transaction execution TXC bit 8. */
+		cr_new = cr | (1UL << 55);
+		if (task->thread.per_flags & PER_FLAG_NO_TE)
+			cr_new &= ~(1UL << 55);
 		if (cr_new != cr)
 		if (cr_new != cr)
 			__ctl_load(cr_new, 0, 0);
 			__ctl_load(cr_new, 0, 0);
-		if (MACHINE_HAS_TE) {
-			/* Set/clear transaction execution TDC bits 62/63. */
-			__ctl_store(cr, 2, 2);
-			cr_new = cr & ~3UL;
-			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
-				if (task->thread.per_flags &
-				    PER_FLAG_TE_ABORT_RAND_TEND)
-					cr_new |= 1UL;
-				else
-					cr_new |= 2UL;
-			}
-			if (cr_new != cr)
-				__ctl_load(cr_new, 2, 2);
+		/* Set or clear transaction execution TDC bits 62 and 63. */
+		__ctl_store(cr, 2, 2);
+		cr_new = cr & ~3UL;
+		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+				cr_new |= 1UL;
+			else
+				cr_new |= 2UL;
 		}
 		}
+		if (cr_new != cr)
+			__ctl_load(cr_new, 2, 2);
 	}
 	}
 	/* Copy user specified PER registers */
 	/* Copy user specified PER registers */
 	new.control = thread->per_user.control;
 	new.control = thread->per_user.control;
@@ -242,21 +230,21 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 		/*
 		/*
 		 * floating point control reg. is in the thread structure
 		 * floating point control reg. is in the thread structure
 		 */
 		 */
-		tmp = child->thread.fp_regs.fpc;
+		tmp = child->thread.fpu.fpc;
 		tmp <<= BITS_PER_LONG - 32;
 		tmp <<= BITS_PER_LONG - 32;
 
 
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 		/*
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			tmp = *(addr_t *)
 			tmp = *(addr_t *)
-			       ((addr_t) child->thread.vxrs + 2*offset);
+			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 		else
 			tmp = *(addr_t *)
 			tmp = *(addr_t *)
-			       ((addr_t) &child->thread.fp_regs.fprs + offset);
+			       ((addr_t) &child->thread.fpu.fprs + offset);
 
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
 		/*
@@ -387,20 +375,20 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		if ((unsigned int) data != 0 ||
 		if ((unsigned int) data != 0 ||
 		    test_fp_ctl(data >> (BITS_PER_LONG - 32)))
 		    test_fp_ctl(data >> (BITS_PER_LONG - 32)))
 			return -EINVAL;
 			return -EINVAL;
-		child->thread.fp_regs.fpc = data >> (BITS_PER_LONG - 32);
+		child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
 
 
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 		/*
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			*(addr_t *)((addr_t)
 			*(addr_t *)((addr_t)
-				child->thread.vxrs + 2*offset) = data;
+				child->thread.fpu.vxrs + 2*offset) = data;
 		else
 		else
 			*(addr_t *)((addr_t)
 			*(addr_t *)((addr_t)
-				&child->thread.fp_regs.fprs + offset) = data;
+				&child->thread.fpu.fprs + offset) = data;
 
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
 		/*
@@ -621,20 +609,20 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 		/*
 		/*
 		 * floating point control reg. is in the thread structure
 		 * floating point control reg. is in the thread structure
 		 */
 		 */
-		tmp = child->thread.fp_regs.fpc;
+		tmp = child->thread.fpu.fpc;
 
 
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 		/*
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			tmp = *(__u32 *)
 			tmp = *(__u32 *)
-			       ((addr_t) child->thread.vxrs + 2*offset);
+			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 		else
 			tmp = *(__u32 *)
 			tmp = *(__u32 *)
-			       ((addr_t) &child->thread.fp_regs.fprs + offset);
+			       ((addr_t) &child->thread.fpu.fprs + offset);
 
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
 		/*
@@ -746,20 +734,20 @@ static int __poke_user_compat(struct task_struct *child,
 		 */
 		 */
 		if (test_fp_ctl(tmp))
 		if (test_fp_ctl(tmp))
 			return -EINVAL;
 			return -EINVAL;
-		child->thread.fp_regs.fpc = data;
+		child->thread.fpu.fpc = data;
 
 
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 		/*
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			*(__u32 *)((addr_t)
 			*(__u32 *)((addr_t)
-				child->thread.vxrs + 2*offset) = tmp;
+				child->thread.fpu.vxrs + 2*offset) = tmp;
 		else
 		else
 			*(__u32 *)((addr_t)
 			*(__u32 *)((addr_t)
-				&child->thread.fp_regs.fprs + offset) = tmp;
+				&child->thread.fpu.fprs + offset) = tmp;
 
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
 		/*
@@ -952,18 +940,16 @@ static int s390_fpregs_get(struct task_struct *target,
 			   const struct user_regset *regset, unsigned int pos,
 			   const struct user_regset *regset, unsigned int pos,
 			   unsigned int count, void *kbuf, void __user *ubuf)
 			   unsigned int count, void *kbuf, void __user *ubuf)
 {
 {
-	if (target == current) {
-		save_fp_ctl(&target->thread.fp_regs.fpc);
-		save_fp_regs(target->thread.fp_regs.fprs);
-	} else if (target->thread.vxrs) {
-		int i;
+	_s390_fp_regs fp_regs;
+
+	if (target == current)
+		save_fpu_regs();
+
+	fp_regs.fpc = target->thread.fpu.fpc;
+	fpregs_store(&fp_regs, &target->thread.fpu);
 
 
-		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			target->thread.fp_regs.fprs[i] =
-				*(freg_t *)(target->thread.vxrs + i);
-	}
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   &target->thread.fp_regs, 0, -1);
+				   &fp_regs, 0, -1);
 }
 }
 
 
 static int s390_fpregs_set(struct task_struct *target,
 static int s390_fpregs_set(struct task_struct *target,
@@ -972,41 +958,33 @@ static int s390_fpregs_set(struct task_struct *target,
 			   const void __user *ubuf)
 			   const void __user *ubuf)
 {
 {
 	int rc = 0;
 	int rc = 0;
+	freg_t fprs[__NUM_FPRS];
 
 
-	if (target == current) {
-		save_fp_ctl(&target->thread.fp_regs.fpc);
-		save_fp_regs(target->thread.fp_regs.fprs);
-	}
+	if (target == current)
+		save_fpu_regs();
 
 
 	/* If setting FPC, must validate it first. */
 	/* If setting FPC, must validate it first. */
 	if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
 	if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
-		u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 };
+		u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
 		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
 		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
 					0, offsetof(s390_fp_regs, fprs));
 					0, offsetof(s390_fp_regs, fprs));
 		if (rc)
 		if (rc)
 			return rc;
 			return rc;
 		if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
 		if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
 			return -EINVAL;
 			return -EINVAL;
-		target->thread.fp_regs.fpc = ufpc[0];
+		target->thread.fpu.fpc = ufpc[0];
 	}
 	}
 
 
 	if (rc == 0 && count > 0)
 	if (rc == 0 && count > 0)
 		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-					target->thread.fp_regs.fprs,
-					offsetof(s390_fp_regs, fprs), -1);
-
-	if (rc == 0) {
-		if (target == current) {
-			restore_fp_ctl(&target->thread.fp_regs.fpc);
-			restore_fp_regs(target->thread.fp_regs.fprs);
-		} else if (target->thread.vxrs) {
-			int i;
-
-			for (i = 0; i < __NUM_VXRS_LOW; i++)
-				*(freg_t *)(target->thread.vxrs + i) =
-					target->thread.fp_regs.fprs[i];
-		}
-	}
+					fprs, offsetof(s390_fp_regs, fprs), -1);
+	if (rc)
+		return rc;
+
+	if (is_vx_task(target))
+		convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
+	else
+		memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
 
 
 	return rc;
 	return rc;
 }
 }
@@ -1069,11 +1047,11 @@ static int s390_vxrs_low_get(struct task_struct *target,
 
 
 	if (!MACHINE_HAS_VX)
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
 		return -ENODEV;
-	if (target->thread.vxrs) {
+	if (is_vx_task(target)) {
 		if (target == current)
 		if (target == current)
-			save_vx_regs(target->thread.vxrs);
+			save_fpu_regs();
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1);
+			vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
 	} else
 	} else
 		memset(vxrs, 0, sizeof(vxrs));
 		memset(vxrs, 0, sizeof(vxrs));
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
@@ -1089,20 +1067,17 @@ static int s390_vxrs_low_set(struct task_struct *target,
 
 
 	if (!MACHINE_HAS_VX)
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
 		return -ENODEV;
-	if (!target->thread.vxrs) {
+	if (!is_vx_task(target)) {
 		rc = alloc_vector_registers(target);
 		rc = alloc_vector_registers(target);
 		if (rc)
 		if (rc)
 			return rc;
 			return rc;
 	} else if (target == current)
 	} else if (target == current)
-		save_vx_regs(target->thread.vxrs);
+		save_fpu_regs();
 
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
-	if (rc == 0) {
+	if (rc == 0)
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i];
-		if (target == current)
-			restore_vx_regs(target->thread.vxrs);
-	}
+			*((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
 
 
 	return rc;
 	return rc;
 }
 }
@@ -1116,10 +1091,10 @@ static int s390_vxrs_high_get(struct task_struct *target,
 
 
 	if (!MACHINE_HAS_VX)
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
 		return -ENODEV;
-	if (target->thread.vxrs) {
+	if (is_vx_task(target)) {
 		if (target == current)
 		if (target == current)
-			save_vx_regs(target->thread.vxrs);
-		memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW,
+			save_fpu_regs();
+		memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
 		       sizeof(vxrs));
 		       sizeof(vxrs));
 	} else
 	} else
 		memset(vxrs, 0, sizeof(vxrs));
 		memset(vxrs, 0, sizeof(vxrs));
@@ -1135,18 +1110,15 @@ static int s390_vxrs_high_set(struct task_struct *target,
 
 
 	if (!MACHINE_HAS_VX)
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
 		return -ENODEV;
-	if (!target->thread.vxrs) {
+	if (!is_vx_task(target)) {
 		rc = alloc_vector_registers(target);
 		rc = alloc_vector_registers(target);
 		if (rc)
 		if (rc)
 			return rc;
 			return rc;
 	} else if (target == current)
 	} else if (target == current)
-		save_vx_regs(target->thread.vxrs);
+		save_fpu_regs();
 
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				target->thread.vxrs + __NUM_VXRS_LOW, 0, -1);
-	if (rc == 0 && target == current)
-		restore_vx_regs(target->thread.vxrs);
-
+				target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
 	return rc;
 	return rc;
 }
 }
 
 

+ 3 - 0
arch/s390/kernel/s390_ksyms.c

@@ -1,5 +1,6 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/kvm_host.h>
 #include <linux/kvm_host.h>
+#include <asm/fpu-internal.h>
 #include <asm/ftrace.h>
 #include <asm/ftrace.h>
 
 
 #ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_FUNCTION_TRACER
@@ -8,6 +9,8 @@ EXPORT_SYMBOL(_mcount);
 #if IS_ENABLED(CONFIG_KVM)
 #if IS_ENABLED(CONFIG_KVM)
 EXPORT_SYMBOL(sie64a);
 EXPORT_SYMBOL(sie64a);
 EXPORT_SYMBOL(sie_exit);
 EXPORT_SYMBOL(sie_exit);
+EXPORT_SYMBOL(save_fpu_regs);
+EXPORT_SYMBOL(__ctl_set_vx);
 #endif
 #endif
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memset);

+ 0 - 355
arch/s390/kernel/sclp.S

@@ -1,355 +0,0 @@
-/*
- * Mini SCLP driver.
- *
- * Copyright IBM Corp. 2004, 2009
- *
- *   Author(s):	Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>,
- *		Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/irq.h>
-
-LC_EXT_NEW_PSW		= 0x58			# addr of ext int handler
-LC_EXT_NEW_PSW_64	= 0x1b0			# addr of ext int handler 64 bit
-LC_EXT_INT_PARAM	= 0x80			# addr of ext int parameter
-LC_EXT_INT_CODE		= 0x86			# addr of ext int code
-LC_AR_MODE_ID		= 0xa3
-
-#
-# Subroutine which waits synchronously until either an external interruption
-# or a timeout occurs.
-#
-# Parameters:
-#   R2	= 0 for no timeout, non-zero for timeout in (approximated) seconds
-#
-# Returns:
-#   R2	= 0 on interrupt, 2 on timeout
-#   R3	= external interruption parameter if R2=0
-#
-
-_sclp_wait_int:
-	stm	%r6,%r15,24(%r15)		# save registers
-	basr	%r13,0				# get base register
-.LbaseS1:
-	ahi	%r15,-96			# create stack frame
-	la	%r8,LC_EXT_NEW_PSW		# register int handler
-	la	%r9,.LextpswS1-.LbaseS1(%r13)
-	tm	LC_AR_MODE_ID,1
-	jno	.Lesa1
-	la	%r8,LC_EXT_NEW_PSW_64		# register int handler 64 bit
-	la	%r9,.LextpswS1_64-.LbaseS1(%r13)
-.Lesa1:
-	mvc	.LoldpswS1-.LbaseS1(16,%r13),0(%r8)
-	mvc	0(16,%r8),0(%r9)
-	epsw	%r6,%r7				# set current addressing mode
-	nill	%r6,0x1				# in new psw (31 or 64 bit mode)
-	nilh	%r7,0x8000
-	stm	%r6,%r7,0(%r8)
-	lhi	%r6,0x0200			# cr mask for ext int (cr0.54)
-	ltr	%r2,%r2
-	jz	.LsetctS1
-	ahi	%r6,0x0800			# cr mask for clock int (cr0.52)
-	stck	.LtimeS1-.LbaseS1(%r13)		# initiate timeout
-	al	%r2,.LtimeS1-.LbaseS1(%r13)
-	st	%r2,.LtimeS1-.LbaseS1(%r13)
-	sckc	.LtimeS1-.LbaseS1(%r13)
-
-.LsetctS1:
-	stctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)	# enable required interrupts
-	l	%r0,.LctlS1-.LbaseS1(%r13)
-	lhi	%r1,~(0x200 | 0x800)		# clear old values
-	nr	%r1,%r0
-	or	%r1,%r6				# set new value
-	st	%r1,.LctlS1-.LbaseS1(%r13)
-	lctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)
-	st	%r0,.LctlS1-.LbaseS1(%r13)
-	lhi	%r2,2				# return code for timeout
-.LloopS1:
-	lpsw	.LwaitpswS1-.LbaseS1(%r13)	# wait until interrupt
-.LwaitS1:
-	lh	%r7,LC_EXT_INT_CODE
-	chi	%r7,EXT_IRQ_CLK_COMP		# timeout?
-	je	.LtimeoutS1
-	chi	%r7,EXT_IRQ_SERVICE_SIG		# service int?
-	jne	.LloopS1
-	sr	%r2,%r2
-	l	%r3,LC_EXT_INT_PARAM
-.LtimeoutS1:
-	lctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)	# restore interrupt setting
-	# restore old handler
-	mvc	0(16,%r8),.LoldpswS1-.LbaseS1(%r13)
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14				# return to caller
-
-	.align	8
-.LoldpswS1:
-	.long	0, 0, 0, 0			# old ext int PSW
-.LextpswS1:
-	.long	0x00080000, 0x80000000+.LwaitS1	# PSW to handle ext int
-.LextpswS1_64:
-	.quad	0, .LwaitS1			# PSW to handle ext int, 64 bit
-.LwaitpswS1:
-	.long	0x010a0000, 0x00000000+.LloopS1	# PSW to wait for ext int
-.LtimeS1:
-	.quad	0				# current time
-.LctlS1:
-	.long	0				# CT0 contents
-
-#
-# Subroutine to synchronously issue a service call.
-#
-# Parameters:
-#   R2	= command word
-#   R3	= sccb address
-#
-# Returns:
-#   R2	= 0 on success, 1 on failure
-#   R3	= sccb response code if R2 = 0
-#
-
-_sclp_servc:
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	lr	%r6,%r2				# save command word
-	lr	%r7,%r3				# save sccb address
-.LretryS2:
-	lhi	%r2,1				# error return code
-	.insn	rre,0xb2200000,%r6,%r7		# servc
-	brc	1,.LendS2			# exit if not operational
-	brc	8,.LnotbusyS2			# go on if not busy
-	sr	%r2,%r2				# wait until no longer busy
-	bras	%r14,_sclp_wait_int
-	j	.LretryS2			# retry
-.LnotbusyS2:
-	sr	%r2,%r2				# wait until result
-	bras	%r14,_sclp_wait_int
-	sr	%r2,%r2
-	lh	%r3,6(%r7)
-.LendS2:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-
-#
-# Subroutine to set up the SCLP interface.
-#
-# Parameters:
-#   R2	= 0 to activate, non-zero to deactivate
-#
-# Returns:
-#   R2	= 0 on success, non-zero on failure
-#
-
-_sclp_setup:
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	basr	%r13,0				# get base register
-.LbaseS3:
-	l	%r6,.LsccbS0-.LbaseS3(%r13)	# prepare init mask sccb
-	mvc	0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13)
-	ltr	%r2,%r2				# initialization?
-	jz	.LdoinitS3			# go ahead
-	# clear masks
-	xc	.LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6)
-.LdoinitS3:
-	l	%r2,.LwritemaskS3-.LbaseS3(%r13)# get command word
-	lr	%r3,%r6				# get sccb address
-	bras	%r14,_sclp_servc		# issue service call
-	ltr	%r2,%r2				# servc successful?
-	jnz	.LerrorS3
-	chi	%r3,0x20			# write mask successful?
-	jne	.LerrorS3
-	# check masks
-	la	%r2,.LinitmaskS3-.LinitsccbS3(%r6)
-	l	%r1,0(%r2)			# receive mask ok?
-	n	%r1,12(%r2)
-	cl	%r1,0(%r2)
-	jne	.LerrorS3
-	l	%r1,4(%r2)			# send mask ok?
-	n	%r1,8(%r2)
-	cl	%r1,4(%r2)
-	sr	%r2,%r2
-	je	.LendS3
-.LerrorS3:
-	lhi	%r2,1				# error return code
-.LendS3:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-.LwritemaskS3:
-	.long	0x00780005			# SCLP command for write mask
-.LinitsccbS3:
-	.word	.LinitendS3-.LinitsccbS3
-	.byte	0,0,0,0
-	.word	0
-	.word	0
-	.word	4
-.LinitmaskS3:
-	.long	0x80000000
-	.long	0x40000000
-	.long	0
-	.long	0
-.LinitendS3:
-
-#
-# Subroutine which prints a given text to the SCLP console.
-#
-# Parameters:
-#   R2	= address of nil-terminated ASCII text
-#
-# Returns:
-#   R2	= 0 on success, 1 on failure
-#
-
-_sclp_print:
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	basr	%r13,0				# get base register
-.LbaseS4:
-	l	%r8,.LsccbS0-.LbaseS4(%r13)	# prepare write data sccb
-	mvc	0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13)
-	la	%r7,.LmtoS4-.LwritesccbS4(%r8)	# current mto addr
-	sr	%r0,%r0
-	l	%r10,.Lascebc-.LbaseS4(%r13)	# address of translation table
-.LinitmtoS4:
-	# initialize mto
-	mvc	0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13)
-	lhi	%r6,.LmtoendS4-.LmtoS4		# current mto length
-.LloopS4:
-	ic	%r0,0(%r2)			# get character
-	ahi	%r2,1
-	ltr	%r0,%r0				# end of string?
-	jz	.LfinalizemtoS4
-	chi	%r0,0x0a			# end of line (NL)?
-	jz	.LfinalizemtoS4
-	stc	%r0,0(%r6,%r7)			# copy to mto
-	la	%r11,0(%r6,%r7)
-	tr	0(1,%r11),0(%r10)		# translate to EBCDIC
-	ahi	%r6,1
-	j	.LloopS4
-.LfinalizemtoS4:
-	sth	%r6,0(%r7)			# update mto length
-	lh	%r9,.LmdbS4-.LwritesccbS4(%r8)	# update mdb length
-	ar	%r9,%r6
-	sth	%r9,.LmdbS4-.LwritesccbS4(%r8)
-	lh	%r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length
-	ar	%r9,%r6
-	sth	%r9,.LevbufS4-.LwritesccbS4(%r8)
-	lh	%r9,0(%r8)			# update sccb length
-	ar	%r9,%r6
-	sth	%r9,0(%r8)
-	ar	%r7,%r6				# update current mto address
-	ltr	%r0,%r0				# more characters?
-	jnz	.LinitmtoS4
-	l	%r2,.LwritedataS4-.LbaseS4(%r13)# write data
-	lr	%r3,%r8
-	bras	%r14,_sclp_servc
-	ltr	%r2,%r2				# servc successful?
-	jnz	.LendS4
-	chi	%r3,0x20			# write data successful?
-	je	.LendS4
-	lhi	%r2,1				# error return code
-.LendS4:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-
-#
-# Function which prints a given text to the SCLP console.
-#
-# Parameters:
-#   R2	= address of nil-terminated ASCII text
-#
-# Returns:
-#   R2	= 0 on success, 1 on failure
-#
-
-ENTRY(_sclp_print_early)
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	tm	LC_AR_MODE_ID,1
-	jno	.Lesa2
-	ahi	%r15,-80
-	stmh	%r6,%r15,96(%r15)		# store upper register halves
-	basr	%r13,0
-	lmh	%r0,%r15,.Lzeroes-.(%r13)	# clear upper register halves
-.Lesa2:
-	lr	%r10,%r2			# save string pointer
-	lhi	%r2,0
-	bras	%r14,_sclp_setup		# enable console
-	ltr	%r2,%r2
-	jnz	.LendS5
-	lr	%r2,%r10
-	bras	%r14,_sclp_print		# print string
-	ltr	%r2,%r2
-	jnz	.LendS5
-	lhi	%r2,1
-	bras	%r14,_sclp_setup		# disable console
-.LendS5:
-	tm	LC_AR_MODE_ID,1
-	jno	.Lesa3
-	lgfr	%r2,%r2				# sign extend return value
-	lmh	%r6,%r15,96(%r15)		# restore upper register halves
-	ahi	%r15,80
-.Lesa3:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-.Lzeroes:
-	.fill	64,4,0
-
-.LwritedataS4:
-	.long	0x00760005			# SCLP command for write data
-.LwritesccbS4:
-	# sccb
-	.word	.LmtoS4-.LwritesccbS4
-	.byte	0
-	.byte	0,0,0
-	.word	0
-
-	# evbuf
-.LevbufS4:
-	.word	.LmtoS4-.LevbufS4
-	.byte	0x02
-	.byte	0
-	.word	0
-
-.LmdbS4:
-	# mdb
-	.word	.LmtoS4-.LmdbS4
-	.word	1
-	.long	0xd4c4c240
-	.long	1
-
-	# go
-.LgoS4:
-	.word	.LmtoS4-.LgoS4
-	.word	1
-	.long	0
-	.byte	0,0,0,0,0,0,0,0
-	.byte	0,0,0
-	.byte	0
-	.byte	0,0,0,0,0,0,0
-	.byte	0
-	.word	0
-	.byte	0,0,0,0,0,0,0,0,0,0
-	.byte	0,0,0,0,0,0,0,0
-	.byte	0,0,0,0,0,0,0,0
-
-.LmtoS4:
-	.word	.LmtoendS4-.LmtoS4
-	.word	4
-	.word	0x1000
-	.byte	0
-	.byte	0,0,0
-.LmtoendS4:
-
-	# Global constants
-.LsccbS0:
-	.long	_sclp_work_area
-.Lascebc:
-	.long	_ascebc
-
-.section .data,"aw",@progbits
-	.balign 4096
-_sclp_work_area:
-	.fill	4096
-.previous

+ 160 - 0
arch/s390/kernel/sclp.c

@@ -0,0 +1,160 @@
+/*
+ *    Copyright IBM Corp. 2015
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <asm/ebcdic.h>
+#include <asm/irq.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sclp.h>
+
+static char _sclp_work_area[4096] __aligned(PAGE_SIZE);
+
+static void _sclp_wait_int(void)
+{
+	unsigned long cr0, cr0_new, psw_mask, addr;
+	psw_t psw_ext_save, psw_wait;
+
+	__ctl_store(cr0, 0, 0);
+	cr0_new = cr0 | 0x200;
+	__ctl_load(cr0_new, 0, 0);
+
+	psw_ext_save = S390_lowcore.external_new_psw;
+	psw_mask = __extract_psw() & (PSW_MASK_EA | PSW_MASK_BA);
+	S390_lowcore.external_new_psw.mask = psw_mask;
+	psw_wait.mask = psw_mask | PSW_MASK_EXT | PSW_MASK_WAIT;
+	S390_lowcore.ext_int_code = 0;
+
+	do {
+		asm volatile(
+			"	larl	%[addr],0f\n"
+			"	stg	%[addr],%[psw_wait_addr]\n"
+			"	stg	%[addr],%[psw_ext_addr]\n"
+			"	lpswe	%[psw_wait]\n"
+			"0:\n"
+			: [addr] "=&d" (addr),
+			  [psw_wait_addr] "=Q" (psw_wait.addr),
+			  [psw_ext_addr] "=Q" (S390_lowcore.external_new_psw.addr)
+			: [psw_wait] "Q" (psw_wait)
+			: "cc", "memory");
+	} while (S390_lowcore.ext_int_code != EXT_IRQ_SERVICE_SIG);
+
+	__ctl_load(cr0, 0, 0);
+	S390_lowcore.external_new_psw = psw_ext_save;
+}
+
+static int _sclp_servc(unsigned int cmd, char *sccb)
+{
+	unsigned int cc;
+
+	do {
+		asm volatile(
+			"	.insn	rre,0xb2200000,%1,%2\n"
+			"	ipm	%0\n"
+			: "=d" (cc) : "d" (cmd), "a" (sccb)
+			: "cc", "memory");
+		cc >>= 28;
+		if (cc == 3)
+			return -EINVAL;
+		_sclp_wait_int();
+	} while (cc != 0);
+	return (*(unsigned short *)(sccb + 6) == 0x20) ? 0 : -EIO;
+}
+
+static int _sclp_setup(int disable)
+{
+	static unsigned char init_sccb[] = {
+		0x00, 0x1c,
+		0x00, 0x00, 0x00, 0x00,	0x00, 0x00, 0x00, 0x00,
+		0x00, 0x04,
+		0x80, 0x00, 0x00, 0x00,	0x40, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00,	0x00, 0x00, 0x00, 0x00
+	};
+	unsigned int *masks;
+	int rc;
+
+	memcpy(_sclp_work_area, init_sccb, 28);
+	masks = (unsigned int *)(_sclp_work_area + 12);
+	if (disable)
+		memset(masks, 0, 16);
+	/* SCLP write mask */
+	rc = _sclp_servc(0x00780005, _sclp_work_area);
+	if (rc)
+		return rc;
+	if ((masks[0] & masks[3]) != masks[0] ||
+	    (masks[1] & masks[2]) != masks[1])
+		return -EIO;
+	return 0;
+}
+
+static int _sclp_print(const char *str)
+{
+	static unsigned char write_head[] = {
+		/* sccb header */
+		0x00, 0x52,					/* 0 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00,		/* 2 */
+		/* evbuf */
+		0x00, 0x4a,					/* 8 */
+		0x02, 0x00, 0x00, 0x00,				/* 10 */
+		/* mdb */
+		0x00, 0x44,					/* 14 */
+		0x00, 0x01,					/* 16 */
+		0xd4, 0xc4, 0xc2, 0x40,				/* 18 */
+		0x00, 0x00, 0x00, 0x01,				/* 22 */
+		/* go */
+		0x00, 0x38,					/* 26 */
+		0x00, 0x01,					/* 28 */
+		0x00, 0x00, 0x00, 0x00,				/* 30 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 34 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 42 */
+		0x00, 0x00, 0x00, 0x00,				/* 50 */
+		0x00, 0x00,					/* 54 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 56 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 64 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 72 */
+		0x00, 0x00,					/* 80 */
+	};
+	static unsigned char write_mto[] = {
+		/* mto	*/
+		0x00, 0x0a,					/* 0 */
+		0x00, 0x04,					/* 2 */
+		0x10, 0x00,					/* 4 */
+		0x00, 0x00, 0x00, 0x00				/* 6 */
+	};
+	unsigned char *ptr, ch;
+	unsigned int count;
+
+	memcpy(_sclp_work_area, write_head, sizeof(write_head));
+	ptr = _sclp_work_area + sizeof(write_head);
+	do {
+		memcpy(ptr, write_mto, sizeof(write_mto));
+		for (count = sizeof(write_mto); (ch = *str++) != 0; count++) {
+			if (ch == 0x0a)
+				break;
+			ptr[count] = _ascebc[ch];
+		}
+		/* Update length fields in mto, mdb, evbuf and sccb */
+		*(unsigned short *) ptr = count;
+		*(unsigned short *)(_sclp_work_area + 14) += count;
+		*(unsigned short *)(_sclp_work_area + 8) += count;
+		*(unsigned short *)(_sclp_work_area + 0) += count;
+		ptr += count;
+	} while (ch != 0);
+
+	/* SCLP write data */
+	return _sclp_servc(0x00760005, _sclp_work_area);
+}
+
+int _sclp_print_early(const char *str)
+{
+	int rc;
+
+	rc = _sclp_setup(0);
+	if (rc)
+		return rc;
+	rc = _sclp_print(str);
+	if (rc)
+		return rc;
+	return _sclp_setup(1);
+}

+ 10 - 9
arch/s390/kernel/setup.c

@@ -62,6 +62,7 @@
 #include <asm/os_info.h>
 #include <asm/os_info.h>
 #include <asm/sclp.h>
 #include <asm/sclp.h>
 #include <asm/sysinfo.h>
 #include <asm/sysinfo.h>
+#include <asm/numa.h>
 #include "entry.h"
 #include "entry.h"
 
 
 /*
 /*
@@ -76,7 +77,7 @@ EXPORT_SYMBOL(console_devno);
 unsigned int console_irq = -1;
 unsigned int console_irq = -1;
 EXPORT_SYMBOL(console_irq);
 EXPORT_SYMBOL(console_irq);
 
 
-unsigned long elf_hwcap = 0;
+unsigned long elf_hwcap __read_mostly = 0;
 char elf_platform[ELF_PLATFORM_SIZE];
 char elf_platform[ELF_PLATFORM_SIZE];
 
 
 int __initdata memory_end_set;
 int __initdata memory_end_set;
@@ -688,7 +689,7 @@ static void __init setup_memory(void)
 /*
 /*
  * Setup hardware capabilities.
  * Setup hardware capabilities.
  */
  */
-static void __init setup_hwcaps(void)
+static int __init setup_hwcaps(void)
 {
 {
 	static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
 	static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
 	struct cpuid cpu_id;
 	struct cpuid cpu_id;
@@ -754,9 +755,11 @@ static void __init setup_hwcaps(void)
 		elf_hwcap |= HWCAP_S390_TE;
 		elf_hwcap |= HWCAP_S390_TE;
 
 
 	/*
 	/*
-	 * Vector extension HWCAP_S390_VXRS is bit 11.
+	 * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
+	 * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
+	 * instead of facility bit 129.
 	 */
 	 */
-	if (test_facility(129))
+	if (MACHINE_HAS_VX)
 		elf_hwcap |= HWCAP_S390_VXRS;
 		elf_hwcap |= HWCAP_S390_VXRS;
 	get_cpu_id(&cpu_id);
 	get_cpu_id(&cpu_id);
 	add_device_randomness(&cpu_id, sizeof(cpu_id));
 	add_device_randomness(&cpu_id, sizeof(cpu_id));
@@ -793,7 +796,9 @@ static void __init setup_hwcaps(void)
 		strcpy(elf_platform, "z13");
 		strcpy(elf_platform, "z13");
 		break;
 		break;
 	}
 	}
+	return 0;
 }
 }
+arch_initcall(setup_hwcaps);
 
 
 /*
 /*
  * Add system information as device randomness
  * Add system information as device randomness
@@ -879,11 +884,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_lowcore();
 	setup_lowcore();
 	smp_fill_possible_mask();
 	smp_fill_possible_mask();
         cpu_init();
         cpu_init();
-
-	/*
-	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
-	 */
-	setup_hwcaps();
+	numa_setup();
 
 
 	/*
 	/*
 	 * Create kernel page tables and switch to virtual addressing.
 	 * Create kernel page tables and switch to virtual addressing.

+ 14 - 33
arch/s390/kernel/signal.c

@@ -105,32 +105,13 @@ struct rt_sigframe
 static void store_sigregs(void)
 static void store_sigregs(void)
 {
 {
 	save_access_regs(current->thread.acrs);
 	save_access_regs(current->thread.acrs);
-	save_fp_ctl(&current->thread.fp_regs.fpc);
-	if (current->thread.vxrs) {
-		int i;
-
-		save_vx_regs(current->thread.vxrs);
-		for (i = 0; i < __NUM_FPRS; i++)
-			current->thread.fp_regs.fprs[i] =
-				*(freg_t *)(current->thread.vxrs + i);
-	} else
-		save_fp_regs(current->thread.fp_regs.fprs);
+	save_fpu_regs();
 }
 }
 
 
 /* Load registers after signal return */
 /* Load registers after signal return */
 static void load_sigregs(void)
 static void load_sigregs(void)
 {
 {
 	restore_access_regs(current->thread.acrs);
 	restore_access_regs(current->thread.acrs);
-	/* restore_fp_ctl is done in restore_sigregs */
-	if (current->thread.vxrs) {
-		int i;
-
-		for (i = 0; i < __NUM_FPRS; i++)
-			*(freg_t *)(current->thread.vxrs + i) =
-				current->thread.fp_regs.fprs[i];
-		restore_vx_regs(current->thread.vxrs);
-	} else
-		restore_fp_regs(current->thread.fp_regs.fprs);
 }
 }
 
 
 /* Returns non-zero on fault. */
 /* Returns non-zero on fault. */
@@ -146,8 +127,7 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
 	memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	       sizeof(user_sregs.regs.acrs));
 	       sizeof(user_sregs.regs.acrs));
-	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
-	       sizeof(user_sregs.fpregs));
+	fpregs_store(&user_sregs.fpregs, &current->thread.fpu);
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
 		return -EFAULT;
 		return -EFAULT;
 	return 0;
 	return 0;
@@ -166,8 +146,8 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
 	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
 		return -EINVAL;
 		return -EINVAL;
 
 
-	/* Loading the floating-point-control word can fail. Do that first. */
-	if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+	/* Test the floating-point-control word. */
+	if (test_fp_ctl(user_sregs.fpregs.fpc))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
 	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
@@ -185,8 +165,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(current->thread.acrs));
 	       sizeof(current->thread.acrs));
 
 
-	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
-	       sizeof(current->thread.fp_regs));
+	fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
 
 
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
 	return 0;
@@ -200,13 +179,13 @@ static int save_sigregs_ext(struct pt_regs *regs,
 	int i;
 	int i;
 
 
 	/* Save vector registers to signal stack */
 	/* Save vector registers to signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
 				   sizeof(sregs_ext->vxrs_low)) ||
 				   sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_to_user(&sregs_ext->vxrs_high,
 		    __copy_to_user(&sregs_ext->vxrs_high,
-				   current->thread.vxrs + __NUM_VXRS_LOW,
+				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				   sizeof(sregs_ext->vxrs_high)))
 				   sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 			return -EFAULT;
 	}
 	}
@@ -220,15 +199,15 @@ static int restore_sigregs_ext(struct pt_regs *regs,
 	int i;
 	int i;
 
 
 	/* Restore vector registers from signal stack */
 	/* Restore vector registers from signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
 				     sizeof(sregs_ext->vxrs_low)) ||
-		    __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				     &sregs_ext->vxrs_high,
 				     &sregs_ext->vxrs_high,
 				     sizeof(sregs_ext->vxrs_high)))
 				     sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 			return -EFAULT;
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+			*((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
 	}
 	}
 	return 0;
 	return 0;
 }
 }
@@ -243,6 +222,7 @@ SYSCALL_DEFINE0(sigreturn)
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
 		goto badframe;
 		goto badframe;
 	set_current_blocked(&set);
 	set_current_blocked(&set);
+	save_fpu_regs();
 	if (restore_sigregs(regs, &frame->sregs))
 	if (restore_sigregs(regs, &frame->sregs))
 		goto badframe;
 		goto badframe;
 	if (restore_sigregs_ext(regs, &frame->sregs_ext))
 	if (restore_sigregs_ext(regs, &frame->sregs_ext))
@@ -266,6 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	set_current_blocked(&set);
 	set_current_blocked(&set);
 	if (restore_altstack(&frame->uc.uc_stack))
 	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 		goto badframe;
+	save_fpu_regs();
 	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
 	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 		goto badframe;
 	if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
 	if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
@@ -400,7 +381,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	uc_flags = 0;
 	uc_flags = 0;
 	if (MACHINE_HAS_VX) {
 	if (MACHINE_HAS_VX) {
 		frame_size += sizeof(_sigregs_ext);
 		frame_size += sizeof(_sigregs_ext);
-		if (current->thread.vxrs)
+		if (is_vx_task(current))
 			uc_flags |= UC_VXRS;
 			uc_flags |= UC_VXRS;
 	}
 	}
 	frame = get_sigframe(&ksig->ka, regs, frame_size);
 	frame = get_sigframe(&ksig->ka, regs, frame_size);

+ 2 - 2
arch/s390/kernel/smp.c

@@ -532,8 +532,8 @@ EXPORT_SYMBOL(smp_ctl_clear_bit);
 
 
 #ifdef CONFIG_CRASH_DUMP
 #ifdef CONFIG_CRASH_DUMP
 
 
-static void __smp_store_cpu_state(struct save_area_ext *sa_ext, u16 address,
-				  int is_boot_cpu)
+static void __init __smp_store_cpu_state(struct save_area_ext *sa_ext,
+					 u16 address, int is_boot_cpu)
 {
 {
 	void *lc = (void *)(unsigned long) store_prefix();
 	void *lc = (void *)(unsigned long) store_prefix();
 	unsigned long vx_sa;
 	unsigned long vx_sa;

+ 5 - 5
arch/s390/kernel/syscalls.S

@@ -276,9 +276,9 @@ SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
 SYSCALL(sys_statfs64,compat_sys_statfs64)
 SYSCALL(sys_statfs64,compat_sys_statfs64)
 SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
 SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
 SYSCALL(sys_remap_file_pages,compat_sys_remap_file_pages)
 SYSCALL(sys_remap_file_pages,compat_sys_remap_file_pages)
-NI_SYSCALL						/* 268 sys_mbind */
-NI_SYSCALL						/* 269 sys_get_mempolicy */
-NI_SYSCALL						/* 270 sys_set_mempolicy */
+SYSCALL(sys_mbind,compat_sys_mbind)
+SYSCALL(sys_get_mempolicy,compat_sys_get_mempolicy)
+SYSCALL(sys_set_mempolicy,compat_sys_set_mempolicy)
 SYSCALL(sys_mq_open,compat_sys_mq_open)
 SYSCALL(sys_mq_open,compat_sys_mq_open)
 SYSCALL(sys_mq_unlink,compat_sys_mq_unlink)
 SYSCALL(sys_mq_unlink,compat_sys_mq_unlink)
 SYSCALL(sys_mq_timedsend,compat_sys_mq_timedsend)
 SYSCALL(sys_mq_timedsend,compat_sys_mq_timedsend)
@@ -295,7 +295,7 @@ SYSCALL(sys_ioprio_get,compat_sys_ioprio_get)
 SYSCALL(sys_inotify_init,sys_inotify_init)
 SYSCALL(sys_inotify_init,sys_inotify_init)
 SYSCALL(sys_inotify_add_watch,compat_sys_inotify_add_watch)	/* 285 */
 SYSCALL(sys_inotify_add_watch,compat_sys_inotify_add_watch)	/* 285 */
 SYSCALL(sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
 SYSCALL(sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
-NI_SYSCALL						/* 287 sys_migrate_pages */
+SYSCALL(sys_migrate_pages,compat_sys_migrate_pages)
 SYSCALL(sys_openat,compat_sys_openat)
 SYSCALL(sys_openat,compat_sys_openat)
 SYSCALL(sys_mkdirat,compat_sys_mkdirat)
 SYSCALL(sys_mkdirat,compat_sys_mkdirat)
 SYSCALL(sys_mknodat,compat_sys_mknodat)			/* 290 */
 SYSCALL(sys_mknodat,compat_sys_mknodat)			/* 290 */
@@ -318,7 +318,7 @@ SYSCALL(sys_splice,compat_sys_splice)
 SYSCALL(sys_sync_file_range,compat_sys_s390_sync_file_range)
 SYSCALL(sys_sync_file_range,compat_sys_s390_sync_file_range)
 SYSCALL(sys_tee,compat_sys_tee)
 SYSCALL(sys_tee,compat_sys_tee)
 SYSCALL(sys_vmsplice,compat_sys_vmsplice)
 SYSCALL(sys_vmsplice,compat_sys_vmsplice)
-NI_SYSCALL						/* 310 sys_move_pages */
+SYSCALL(sys_move_pages,compat_sys_move_pages)
 SYSCALL(sys_getcpu,compat_sys_getcpu)
 SYSCALL(sys_getcpu,compat_sys_getcpu)
 SYSCALL(sys_epoll_pwait,compat_sys_epoll_pwait)
 SYSCALL(sys_epoll_pwait,compat_sys_epoll_pwait)
 SYSCALL(sys_utimes,compat_sys_utimes)
 SYSCALL(sys_utimes,compat_sys_utimes)

+ 16 - 15
arch/s390/kernel/topology.c

@@ -18,7 +18,10 @@
 #include <linux/cpu.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
+#include <linux/nodemask.h>
+#include <linux/node.h>
 #include <asm/sysinfo.h>
 #include <asm/sysinfo.h>
+#include <asm/numa.h>
 
 
 #define PTF_HORIZONTAL	(0UL)
 #define PTF_HORIZONTAL	(0UL)
 #define PTF_VERTICAL	(1UL)
 #define PTF_VERTICAL	(1UL)
@@ -37,8 +40,10 @@ static struct sysinfo_15_1_x *tl_info;
 static int topology_enabled = 1;
 static int topology_enabled = 1;
 static DECLARE_WORK(topology_work, topology_work_fn);
 static DECLARE_WORK(topology_work, topology_work_fn);
 
 
-/* topology_lock protects the socket and book linked lists */
-static DEFINE_SPINLOCK(topology_lock);
+/*
+ * Socket/Book linked lists and per_cpu(cpu_topology) updates are
+ * protected by "sched_domains_mutex".
+ */
 static struct mask_info socket_info;
 static struct mask_info socket_info;
 static struct mask_info book_info;
 static struct mask_info book_info;
 
 
@@ -188,7 +193,6 @@ static void tl_to_masks(struct sysinfo_15_1_x *info)
 {
 {
 	struct cpuid cpu_id;
 	struct cpuid cpu_id;
 
 
-	spin_lock_irq(&topology_lock);
 	get_cpu_id(&cpu_id);
 	get_cpu_id(&cpu_id);
 	clear_masks();
 	clear_masks();
 	switch (cpu_id.machine) {
 	switch (cpu_id.machine) {
@@ -199,7 +203,6 @@ static void tl_to_masks(struct sysinfo_15_1_x *info)
 	default:
 	default:
 		__tl_to_masks_generic(info);
 		__tl_to_masks_generic(info);
 	}
 	}
-	spin_unlock_irq(&topology_lock);
 }
 }
 
 
 static void topology_update_polarization_simple(void)
 static void topology_update_polarization_simple(void)
@@ -244,10 +247,8 @@ int topology_set_cpu_management(int fc)
 
 
 static void update_cpu_masks(void)
 static void update_cpu_masks(void)
 {
 {
-	unsigned long flags;
 	int cpu;
 	int cpu;
 
 
-	spin_lock_irqsave(&topology_lock, flags);
 	for_each_possible_cpu(cpu) {
 	for_each_possible_cpu(cpu) {
 		per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
 		per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
 		per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
 		per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
@@ -259,7 +260,7 @@ static void update_cpu_masks(void)
 			per_cpu(cpu_topology, cpu).book_id = cpu;
 			per_cpu(cpu_topology, cpu).book_id = cpu;
 		}
 		}
 	}
 	}
-	spin_unlock_irqrestore(&topology_lock, flags);
+	numa_update_cpu_topology();
 }
 }
 
 
 void store_topology(struct sysinfo_15_1_x *info)
 void store_topology(struct sysinfo_15_1_x *info)
@@ -274,21 +275,21 @@ int arch_update_cpu_topology(void)
 {
 {
 	struct sysinfo_15_1_x *info = tl_info;
 	struct sysinfo_15_1_x *info = tl_info;
 	struct device *dev;
 	struct device *dev;
-	int cpu;
+	int cpu, rc = 0;
 
 
-	if (!MACHINE_HAS_TOPOLOGY) {
-		update_cpu_masks();
-		topology_update_polarization_simple();
-		return 0;
+	if (MACHINE_HAS_TOPOLOGY) {
+		rc = 1;
+		store_topology(info);
+		tl_to_masks(info);
 	}
 	}
-	store_topology(info);
-	tl_to_masks(info);
 	update_cpu_masks();
 	update_cpu_masks();
+	if (!MACHINE_HAS_TOPOLOGY)
+		topology_update_polarization_simple();
 	for_each_online_cpu(cpu) {
 	for_each_online_cpu(cpu) {
 		dev = get_cpu_device(cpu);
 		dev = get_cpu_device(cpu);
 		kobject_uevent(&dev->kobj, KOBJ_CHANGE);
 		kobject_uevent(&dev->kobj, KOBJ_CHANGE);
 	}
 	}
-	return 1;
+	return rc;
 }
 }
 
 
 static void topology_work_fn(struct work_struct *work)
 static void topology_work_fn(struct work_struct *work)

+ 16 - 18
arch/s390/kernel/traps.c

@@ -19,7 +19,7 @@
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
-#include <asm/switch_to.h>
+#include <asm/fpu-internal.h>
 #include "entry.h"
 #include "entry.h"
 
 
 int show_unhandled_signals = 1;
 int show_unhandled_signals = 1;
@@ -151,7 +151,7 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
 DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
 DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
 	      "transaction constraint exception")
 	      "transaction constraint exception")
 
 
-static inline void do_fp_trap(struct pt_regs *regs, int fpc)
+static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
 {
 {
 	int si_code = 0;
 	int si_code = 0;
 	/* FPC[2] is Data Exception Code */
 	/* FPC[2] is Data Exception Code */
@@ -227,7 +227,7 @@ DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
 int alloc_vector_registers(struct task_struct *tsk)
 int alloc_vector_registers(struct task_struct *tsk)
 {
 {
 	__vector128 *vxrs;
 	__vector128 *vxrs;
-	int i;
+	freg_t *fprs;
 
 
 	/* Allocate vector register save area. */
 	/* Allocate vector register save area. */
 	vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
 	vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
@@ -236,15 +236,13 @@ int alloc_vector_registers(struct task_struct *tsk)
 		return -ENOMEM;
 		return -ENOMEM;
 	preempt_disable();
 	preempt_disable();
 	if (tsk == current)
 	if (tsk == current)
-		save_fp_regs(tsk->thread.fp_regs.fprs);
+		save_fpu_regs();
 	/* Copy the 16 floating point registers */
 	/* Copy the 16 floating point registers */
-	for (i = 0; i < 16; i++)
-		*(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i];
-	tsk->thread.vxrs = vxrs;
-	if (tsk == current) {
-		__ctl_set_bit(0, 17);
-		restore_vx_regs(vxrs);
-	}
+	convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
+	fprs = tsk->thread.fpu.fprs;
+	tsk->thread.fpu.vxrs = vxrs;
+	tsk->thread.fpu.flags |= FPU_USE_VX;
+	kfree(fprs);
 	preempt_enable();
 	preempt_enable();
 	return 0;
 	return 0;
 }
 }
@@ -259,8 +257,8 @@ void vector_exception(struct pt_regs *regs)
 	}
 	}
 
 
 	/* get vector interrupt code from fpc */
 	/* get vector interrupt code from fpc */
-	asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc));
-	vic = (current->thread.fp_regs.fpc & 0xf00) >> 8;
+	save_fpu_regs();
+	vic = (current->thread.fpu.fpc & 0xf00) >> 8;
 	switch (vic) {
 	switch (vic) {
 	case 1: /* invalid vector operation */
 	case 1: /* invalid vector operation */
 		si_code = FPE_FLTINV;
 		si_code = FPE_FLTINV;
@@ -297,22 +295,22 @@ void data_exception(struct pt_regs *regs)
 
 
 	location = get_trap_ip(regs);
 	location = get_trap_ip(regs);
 
 
-	asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc));
+	save_fpu_regs();
 	/* Check for vector register enablement */
 	/* Check for vector register enablement */
-	if (MACHINE_HAS_VX && !current->thread.vxrs &&
-	    (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) {
+	if (MACHINE_HAS_VX && !is_vx_task(current) &&
+	    (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) {
 		alloc_vector_registers(current);
 		alloc_vector_registers(current);
 		/* Vector data exception is suppressing, rewind psw. */
 		/* Vector data exception is suppressing, rewind psw. */
 		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
 		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
 		clear_pt_regs_flag(regs, PIF_PER_TRAP);
 		clear_pt_regs_flag(regs, PIF_PER_TRAP);
 		return;
 		return;
 	}
 	}
-	if (current->thread.fp_regs.fpc & FPC_DXC_MASK)
+	if (current->thread.fpu.fpc & FPC_DXC_MASK)
 		signal = SIGFPE;
 		signal = SIGFPE;
 	else
 	else
 		signal = SIGILL;
 		signal = SIGILL;
 	if (signal == SIGFPE)
 	if (signal == SIGFPE)
-		do_fp_trap(regs, current->thread.fp_regs.fpc);
+		do_fp_trap(regs, current->thread.fpu.fpc);
 	else if (signal)
 	else if (signal)
 		do_trap(regs, signal, ILL_ILLOPN, "data exception");
 		do_trap(regs, signal, ILL_ILLOPN, "data exception");
 }
 }

+ 1 - 1
arch/s390/kernel/vdso32/Makefile

@@ -13,7 +13,7 @@ KBUILD_AFLAGS_31 += -m31 -s
 KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
 KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
 KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
 KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
-			$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+			$(call cc-ldoption, -Wl$(comma)--hash-style=both)
 
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)

+ 1 - 1
arch/s390/kernel/vdso64/Makefile

@@ -13,7 +13,7 @@ KBUILD_AFLAGS_64 += -m64 -s
 KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
 KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
 KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
 KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
-			$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+			$(call cc-ldoption, -Wl$(comma)--hash-style=both)
 
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)

+ 11 - 1
arch/s390/kernel/vtime.c

@@ -28,6 +28,7 @@ static atomic64_t virt_timer_elapsed;
 static DEFINE_PER_CPU(u64, mt_cycles[32]);
 static DEFINE_PER_CPU(u64, mt_cycles[32]);
 static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
 static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
 static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
 static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
 
 
 static inline u64 get_vtimer(void)
 static inline u64 get_vtimer(void)
 {
 {
@@ -85,7 +86,8 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 	S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
 	S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
 
 
 	/* Do MT utilization calculation */
 	/* Do MT utilization calculation */
-	if (smp_cpu_mtid) {
+	if (smp_cpu_mtid &&
+	    time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies))) {
 		u64 cycles_new[32], *cycles_old;
 		u64 cycles_new[32], *cycles_old;
 		u64 delta, mult, div;
 		u64 delta, mult, div;
 
 
@@ -105,6 +107,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 				       sizeof(u64) * (smp_cpu_mtid + 1));
 				       sizeof(u64) * (smp_cpu_mtid + 1));
 			}
 			}
 		}
 		}
+		__this_cpu_write(mt_scaling_jiffies, jiffies_64);
 	}
 	}
 
 
 	user = S390_lowcore.user_timer - ti->user_timer;
 	user = S390_lowcore.user_timer - ti->user_timer;
@@ -376,4 +379,11 @@ void vtime_init(void)
 {
 {
 	/* set initial cpu timer */
 	/* set initial cpu timer */
 	set_vtimer(VTIMER_MAX_SLICE);
 	set_vtimer(VTIMER_MAX_SLICE);
+	/* Setup initial MT scaling values */
+	if (smp_cpu_mtid) {
+		__this_cpu_write(mt_scaling_jiffies, jiffies);
+		__this_cpu_write(mt_scaling_mult, 1);
+		__this_cpu_write(mt_scaling_div, 1);
+		stcctm5(smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles));
+	}
 }
 }

+ 97 - 33
arch/s390/kvm/kvm-s390.c

@@ -1283,21 +1283,54 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 	return 0;
 }
 }
 
 
+/*
+ * Backs up the current FP/VX register save area on a particular
+ * destination.  Used to switch between different register save
+ * areas.
+ */
+static inline void save_fpu_to(struct fpu *dst)
+{
+	dst->fpc = current->thread.fpu.fpc;
+	dst->flags = current->thread.fpu.flags;
+	dst->regs = current->thread.fpu.regs;
+}
+
+/*
+ * Switches the FP/VX register save area from which to lazy
+ * restore register contents.
+ */
+static inline void load_fpu_from(struct fpu *from)
+{
+	current->thread.fpu.fpc = from->fpc;
+	current->thread.fpu.flags = from->flags;
+	current->thread.fpu.regs = from->regs;
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 {
-	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-	if (test_kvm_facility(vcpu->kvm, 129))
-		save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
-	else
-		save_fp_regs(vcpu->arch.host_fpregs.fprs);
-	save_access_regs(vcpu->arch.host_acrs);
+	/* Save host register state */
+	save_fpu_regs();
+	save_fpu_to(&vcpu->arch.host_fpregs);
+
 	if (test_kvm_facility(vcpu->kvm, 129)) {
 	if (test_kvm_facility(vcpu->kvm, 129)) {
-		restore_fp_ctl(&vcpu->run->s.regs.fpc);
-		restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
-	} else {
-		restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-		restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
-	}
+		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+		current->thread.fpu.flags = FPU_USE_VX;
+		/*
+		 * Use the register save area in the SIE-control block
+		 * for register restore and save in kvm_arch_vcpu_put()
+		 */
+		current->thread.fpu.vxrs =
+			(__vector128 *)&vcpu->run->s.regs.vrs;
+		/* Always enable the vector extension for KVM */
+		__ctl_set_vx();
+	} else
+		load_fpu_from(&vcpu->arch.guest_fpregs);
+
+	if (test_fp_ctl(current->thread.fpu.fpc))
+		/* User space provided an invalid FPC, let's clear it */
+		current->thread.fpu.fpc = 0;
+
+	save_access_regs(vcpu->arch.host_acrs);
 	restore_access_regs(vcpu->run->s.regs.acrs);
 	restore_access_regs(vcpu->run->s.regs.acrs);
 	gmap_enable(vcpu->arch.gmap);
 	gmap_enable(vcpu->arch.gmap);
 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@ -1307,19 +1340,22 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 {
 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
 	gmap_disable(vcpu->arch.gmap);
-	if (test_kvm_facility(vcpu->kvm, 129)) {
-		save_fp_ctl(&vcpu->run->s.regs.fpc);
-		save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
-	} else {
-		save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-		save_fp_regs(vcpu->arch.guest_fpregs.fprs);
-	}
-	save_access_regs(vcpu->run->s.regs.acrs);
-	restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+
+	save_fpu_regs();
+
 	if (test_kvm_facility(vcpu->kvm, 129))
 	if (test_kvm_facility(vcpu->kvm, 129))
-		restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+		/*
+		 * kvm_arch_vcpu_load() set up the register save area to
+		 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
+		 * are already saved.  Only the floating-point control must be
+		 * copied.
+		 */
+		vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 	else
 	else
-		restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+		save_fpu_to(&vcpu->arch.guest_fpregs);
+	load_fpu_from(&vcpu->arch.host_fpregs);
+
+	save_access_regs(vcpu->run->s.regs.acrs);
 	restore_access_regs(vcpu->arch.host_acrs);
 	restore_access_regs(vcpu->arch.host_acrs);
 }
 }
 
 
@@ -1464,7 +1500,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 
 	vcpu->arch.sie_block = &sie_page->sie_block;
 	vcpu->arch.sie_block = &sie_page->sie_block;
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
-	vcpu->arch.host_vregs = &sie_page->vregs;
 
 
 	vcpu->arch.sie_block->icpua = id;
 	vcpu->arch.sie_block->icpua = id;
 	if (!kvm_is_ucontrol(kvm)) {
 	if (!kvm_is_ucontrol(kvm)) {
@@ -1486,6 +1521,19 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 
 
+	/*
+	 * Allocate a save area for floating-point registers.  If the vector
+	 * extension is available, register contents are saved in the SIE
+	 * control block.  The allocated save area is still required in
+	 * particular places, for example, in kvm_s390_vcpu_store_status().
+	 */
+	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
+					       GFP_KERNEL);
+	if (!vcpu->arch.guest_fpregs.fprs) {
+		rc = -ENOMEM;
+		goto out_free_sie_block;
+	}
+
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
 	if (rc)
 		goto out_free_sie_block;
 		goto out_free_sie_block;
@@ -1708,16 +1756,16 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 {
 	if (test_fp_ctl(fpu->fpc))
 	if (test_fp_ctl(fpu->fpc))
 		return -EINVAL;
 		return -EINVAL;
-	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+	memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
-	restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	save_fpu_regs();
+	load_fpu_from(&vcpu->arch.guest_fpregs);
 	return 0;
 	return 0;
 }
 }
 
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 {
-	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
+	memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
 	return 0;
 	return 0;
 }
 }
@@ -2268,8 +2316,21 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	 * copying in vcpu load/put. Lets update our copies before we save
 	 * copying in vcpu load/put. Lets update our copies before we save
 	 * it into the save area
 	 * it into the save area
 	 */
 	 */
-	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	save_fpu_regs();
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		/*
+		 * If the vector extension is available, the vector registers
+		 * which overlaps with floating-point registers are saved in
+		 * the SIE-control block.  Hence, extract the floating-point
+		 * registers and the FPC value and store them in the
+		 * guest_fpregs structure.
+		 */
+		WARN_ON(!is_vx_task(current));	  /* XXX remove later */
+		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
+		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
+				 current->thread.fpu.vxrs);
+	} else
+		save_fpu_to(&vcpu->arch.guest_fpregs);
 	save_access_regs(vcpu->run->s.regs.acrs);
 	save_access_regs(vcpu->run->s.regs.acrs);
 
 
 	return kvm_s390_store_status_unloaded(vcpu, addr);
 	return kvm_s390_store_status_unloaded(vcpu, addr);
@@ -2296,10 +2357,13 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
 
 
 	/*
 	/*
 	 * The guest VXRS are in the host VXRs due to the lazy
 	 * The guest VXRS are in the host VXRs due to the lazy
-	 * copying in vcpu load/put. Let's update our copies before we save
-	 * it into the save area.
+	 * copying in vcpu load/put. We can simply call save_fpu_regs()
+	 * to save the current register state because we are in the
+	 * middle of a load/put cycle.
+	 *
+	 * Let's update our copies before we save it into the save area.
 	 */
 	 */
-	save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+	save_fpu_regs();
 
 
 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
 }
 }

+ 1 - 0
arch/s390/lib/delay.c

@@ -26,6 +26,7 @@ void __delay(unsigned long loops)
          */
          */
 	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
 }
+EXPORT_SYMBOL(__delay);
 
 
 static void __udelay_disabled(unsigned long long usecs)
 static void __udelay_disabled(unsigned long long usecs)
 {
 {

+ 1 - 14
arch/s390/lib/uaccess.c

@@ -370,22 +370,9 @@ long __strncpy_from_user(char *dst, const char __user *src, long size)
 }
 }
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strncpy_from_user);
 
 
-/*
- * The "old" uaccess variant without mvcos can be enforced with the
- * uaccess_primary kernel parameter. This is mainly for debugging purposes.
- */
-static int uaccess_primary __initdata;
-
-static int __init parse_uaccess_pt(char *__unused)
-{
-	uaccess_primary = 1;
-	return 0;
-}
-early_param("uaccess_primary", parse_uaccess_pt);
-
 static int __init uaccess_init(void)
 static int __init uaccess_init(void)
 {
 {
-	if (!uaccess_primary && test_facility(27))
+	if (test_facility(27))
 		static_key_slow_inc(&have_mvcos);
 		static_key_slow_inc(&have_mvcos);
 	return 0;
 	return 0;
 }
 }

+ 1 - 1
arch/s390/mm/fault.c

@@ -646,7 +646,7 @@ static void pfault_interrupt(struct ext_code ext_code,
 		return;
 		return;
 	inc_irq_stat(IRQEXT_PFL);
 	inc_irq_stat(IRQEXT_PFL);
 	/* Get the token (= pid of the affected task). */
 	/* Get the token (= pid of the affected task). */
-	pid = sizeof(void *) == 4 ? param32 : param64;
+	pid = param64;
 	rcu_read_lock();
 	rcu_read_lock();
 	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
 	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
 	if (tsk)
 	if (tsk)

+ 10 - 0
arch/s390/mm/gup.c

@@ -30,6 +30,9 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 	do {
 	do {
 		pte = *ptep;
 		pte = *ptep;
 		barrier();
 		barrier();
+		/* Similar to the PMD case, NUMA hinting must take slow path */
+		if (pte_protnone(pte))
+			return 0;
 		if ((pte_val(pte) & mask) != 0)
 		if ((pte_val(pte) & mask) != 0)
 			return 0;
 			return 0;
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -125,6 +128,13 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
 		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
 		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
 			return 0;
 			return 0;
 		if (unlikely(pmd_large(pmd))) {
 		if (unlikely(pmd_large(pmd))) {
+			/*
+			 * NUMA hinting faults need to be handled in the GUP
+			 * slowpath for accounting purposes and so that they
+			 * can be serialised against THP migration.
+			 */
+			if (pmd_protnone(pmd))
+				return 0;
 			if (!gup_huge_pmd(pmdp, pmd, addr, next,
 			if (!gup_huge_pmd(pmdp, pmd, addr, next,
 					  write, pages, nr))
 					  write, pages, nr))
 				return 0;
 				return 0;

+ 20 - 20
arch/s390/mm/init.c

@@ -27,6 +27,7 @@
 #include <linux/initrd.h>
 #include <linux/initrd.h>
 #include <linux/export.h>
 #include <linux/export.h>
 #include <linux/gfp.h>
 #include <linux/gfp.h>
+#include <linux/memblock.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
@@ -138,7 +139,7 @@ void __init mem_init(void)
 	cpumask_set_cpu(0, mm_cpumask(&init_mm));
 	cpumask_set_cpu(0, mm_cpumask(&init_mm));
 	atomic_set(&init_mm.context.attach_count, 1);
 	atomic_set(&init_mm.context.attach_count, 1);
 
 
-        max_mapnr = max_low_pfn;
+	set_max_mapnr(max_low_pfn);
         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 
 
 	/* Setup guest page hinting */
 	/* Setup guest page hinting */
@@ -170,37 +171,36 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 #ifdef CONFIG_MEMORY_HOTPLUG
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size)
 int arch_add_memory(int nid, u64 start, u64 size)
 {
 {
-	unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
+	unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
+	unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long size_pages = PFN_DOWN(size);
 	unsigned long size_pages = PFN_DOWN(size);
-	struct zone *zone;
-	int rc;
+	unsigned long nr_pages;
+	int rc, zone_enum;
 
 
 	rc = vmem_add_mapping(start, size);
 	rc = vmem_add_mapping(start, size);
 	if (rc)
 	if (rc)
 		return rc;
 		return rc;
-	for_each_zone(zone) {
-		if (zone_idx(zone) != ZONE_MOVABLE) {
-			/* Add range within existing zone limits */
-			zone_start_pfn = zone->zone_start_pfn;
-			zone_end_pfn = zone->zone_start_pfn +
-				       zone->spanned_pages;
+
+	while (size_pages > 0) {
+		if (start_pfn < dma_end_pfn) {
+			nr_pages = (start_pfn + size_pages > dma_end_pfn) ?
+				   dma_end_pfn - start_pfn : size_pages;
+			zone_enum = ZONE_DMA;
+		} else if (start_pfn < normal_end_pfn) {
+			nr_pages = (start_pfn + size_pages > normal_end_pfn) ?
+				   normal_end_pfn - start_pfn : size_pages;
+			zone_enum = ZONE_NORMAL;
 		} else {
 		} else {
-			/* Add remaining range to ZONE_MOVABLE */
-			zone_start_pfn = start_pfn;
-			zone_end_pfn = start_pfn + size_pages;
+			nr_pages = size_pages;
+			zone_enum = ZONE_MOVABLE;
 		}
 		}
-		if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
-			continue;
-		nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
-			   zone_end_pfn - start_pfn : size_pages;
-		rc = __add_pages(nid, zone, start_pfn, nr_pages);
+		rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum,
+				 start_pfn, size_pages);
 		if (rc)
 		if (rc)
 			break;
 			break;
 		start_pfn += nr_pages;
 		start_pfn += nr_pages;
 		size_pages -= nr_pages;
 		size_pages -= nr_pages;
-		if (!size_pages)
-			break;
 	}
 	}
 	if (rc)
 	if (rc)
 		vmem_remove_mapping(start, size);
 		vmem_remove_mapping(start, size);

+ 89 - 136
arch/s390/mm/pgtable.c

@@ -10,11 +10,7 @@
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
 #include <linux/smp.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/quicklist.h>
 #include <linux/rcupdate.h>
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/swapops.h>
 #include <linux/swapops.h>
@@ -28,12 +24,9 @@
 #include <asm/tlbflush.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/mmu_context.h>
 
 
-#define ALLOC_ORDER	2
-#define FRAG_MASK	0x03
-
 unsigned long *crst_table_alloc(struct mm_struct *mm)
 unsigned long *crst_table_alloc(struct mm_struct *mm)
 {
 {
-	struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	struct page *page = alloc_pages(GFP_KERNEL, 2);
 
 
 	if (!page)
 	if (!page)
 		return NULL;
 		return NULL;
@@ -42,7 +35,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
 
 
 void crst_table_free(struct mm_struct *mm, unsigned long *table)
 void crst_table_free(struct mm_struct *mm, unsigned long *table)
 {
 {
-	free_pages((unsigned long) table, ALLOC_ORDER);
+	free_pages((unsigned long) table, 2);
 }
 }
 
 
 static void __crst_table_upgrade(void *arg)
 static void __crst_table_upgrade(void *arg)
@@ -176,7 +169,7 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
 	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
 	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
 	spin_lock_init(&gmap->guest_table_lock);
 	spin_lock_init(&gmap->guest_table_lock);
 	gmap->mm = mm;
 	gmap->mm = mm;
-	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	page = alloc_pages(GFP_KERNEL, 2);
 	if (!page)
 	if (!page)
 		goto out_free;
 		goto out_free;
 	page->index = 0;
 	page->index = 0;
@@ -247,7 +240,7 @@ void gmap_free(struct gmap *gmap)
 
 
 	/* Free all segment & region tables. */
 	/* Free all segment & region tables. */
 	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
 	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
-		__free_pages(page, ALLOC_ORDER);
+		__free_pages(page, 2);
 	gmap_radix_tree_free(&gmap->guest_to_host);
 	gmap_radix_tree_free(&gmap->guest_to_host);
 	gmap_radix_tree_free(&gmap->host_to_guest);
 	gmap_radix_tree_free(&gmap->host_to_guest);
 	down_write(&gmap->mm->mmap_sem);
 	down_write(&gmap->mm->mmap_sem);
@@ -287,7 +280,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	unsigned long *new;
 	unsigned long *new;
 
 
 	/* since we dont free the gmap table until gmap_free we can unlock */
 	/* since we dont free the gmap table until gmap_free we can unlock */
-	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	page = alloc_pages(GFP_KERNEL, 2);
 	if (!page)
 	if (!page)
 		return -ENOMEM;
 		return -ENOMEM;
 	new = (unsigned long *) page_to_phys(page);
 	new = (unsigned long *) page_to_phys(page);
@@ -302,7 +295,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	}
 	}
 	spin_unlock(&gmap->mm->page_table_lock);
 	spin_unlock(&gmap->mm->page_table_lock);
 	if (page)
 	if (page)
-		__free_pages(page, ALLOC_ORDER);
+		__free_pages(page, 2);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -795,40 +788,6 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
 }
 }
 EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
 EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
 
 
-static inline int page_table_with_pgste(struct page *page)
-{
-	return atomic_read(&page->_mapcount) == 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
-	struct page *page;
-	unsigned long *table;
-
-	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
-	if (!page)
-		return NULL;
-	if (!pgtable_page_ctor(page)) {
-		__free_page(page);
-		return NULL;
-	}
-	atomic_set(&page->_mapcount, 0);
-	table = (unsigned long *) page_to_phys(page);
-	clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
-	clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
-	return table;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
-	struct page *page;
-
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	pgtable_page_dtor(page);
-	atomic_set(&page->_mapcount, -1);
-	__free_page(page);
-}
-
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq)
 			  unsigned long key, bool nq)
 {
 {
@@ -957,20 +916,6 @@ __initcall(page_table_register_sysctl);
 
 
 #else /* CONFIG_PGSTE */
 #else /* CONFIG_PGSTE */
 
 
-static inline int page_table_with_pgste(struct page *page)
-{
-	return 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
-}
-
 static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
 static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
 			unsigned long vmaddr)
 			unsigned long vmaddr)
 {
 {
@@ -994,44 +939,55 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
  */
  */
 unsigned long *page_table_alloc(struct mm_struct *mm)
 unsigned long *page_table_alloc(struct mm_struct *mm)
 {
 {
-	unsigned long *uninitialized_var(table);
-	struct page *uninitialized_var(page);
+	unsigned long *table;
+	struct page *page;
 	unsigned int mask, bit;
 	unsigned int mask, bit;
 
 
-	if (mm_alloc_pgste(mm))
-		return page_table_alloc_pgste(mm);
-	/* Allocate fragments of a 4K page as 1K/2K page table */
-	spin_lock_bh(&mm->context.list_lock);
-	mask = FRAG_MASK;
-	if (!list_empty(&mm->context.pgtable_list)) {
-		page = list_first_entry(&mm->context.pgtable_list,
-					struct page, lru);
-		table = (unsigned long *) page_to_phys(page);
-		mask = atomic_read(&page->_mapcount);
-		mask = mask | (mask >> 4);
-	}
-	if ((mask & FRAG_MASK) == FRAG_MASK) {
-		spin_unlock_bh(&mm->context.list_lock);
-		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
-		if (!page)
-			return NULL;
-		if (!pgtable_page_ctor(page)) {
-			__free_page(page);
-			return NULL;
+	/* Try to get a fragment of a 4K page as a 2K page table */
+	if (!mm_alloc_pgste(mm)) {
+		table = NULL;
+		spin_lock_bh(&mm->context.list_lock);
+		if (!list_empty(&mm->context.pgtable_list)) {
+			page = list_first_entry(&mm->context.pgtable_list,
+						struct page, lru);
+			mask = atomic_read(&page->_mapcount);
+			mask = (mask | (mask >> 4)) & 3;
+			if (mask != 3) {
+				table = (unsigned long *) page_to_phys(page);
+				bit = mask & 1;		/* =1 -> second 2K */
+				if (bit)
+					table += PTRS_PER_PTE;
+				atomic_xor_bits(&page->_mapcount, 1U << bit);
+				list_del(&page->lru);
+			}
 		}
 		}
+		spin_unlock_bh(&mm->context.list_lock);
+		if (table)
+			return table;
+	}
+	/* Allocate a fresh page */
+	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	if (!page)
+		return NULL;
+	if (!pgtable_page_ctor(page)) {
+		__free_page(page);
+		return NULL;
+	}
+	/* Initialize page table */
+	table = (unsigned long *) page_to_phys(page);
+	if (mm_alloc_pgste(mm)) {
+		/* Return 4K page table with PGSTEs */
+		atomic_set(&page->_mapcount, 3);
+		clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+		clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+	} else {
+		/* Return the first 2K fragment of the page */
 		atomic_set(&page->_mapcount, 1);
 		atomic_set(&page->_mapcount, 1);
-		table = (unsigned long *) page_to_phys(page);
 		clear_table(table, _PAGE_INVALID, PAGE_SIZE);
 		clear_table(table, _PAGE_INVALID, PAGE_SIZE);
 		spin_lock_bh(&mm->context.list_lock);
 		spin_lock_bh(&mm->context.list_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
 		list_add(&page->lru, &mm->context.pgtable_list);
-	} else {
-		for (bit = 1; mask & bit; bit <<= 1)
-			table += PTRS_PER_PTE;
-		mask = atomic_xor_bits(&page->_mapcount, bit);
-		if ((mask & FRAG_MASK) == FRAG_MASK)
-			list_del(&page->lru);
+		spin_unlock_bh(&mm->context.list_lock);
 	}
 	}
-	spin_unlock_bh(&mm->context.list_lock);
 	return table;
 	return table;
 }
 }
 
 
@@ -1041,37 +997,23 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	unsigned int bit, mask;
 	unsigned int bit, mask;
 
 
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (page_table_with_pgste(page))
-		return page_table_free_pgste(table);
-	/* Free 1K/2K page table fragment of a 4K page */
-	bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
-	spin_lock_bh(&mm->context.list_lock);
-	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
-		list_del(&page->lru);
-	mask = atomic_xor_bits(&page->_mapcount, bit);
-	if (mask & FRAG_MASK)
-		list_add(&page->lru, &mm->context.pgtable_list);
-	spin_unlock_bh(&mm->context.list_lock);
-	if (mask == 0) {
-		pgtable_page_dtor(page);
-		atomic_set(&page->_mapcount, -1);
-		__free_page(page);
+	if (!mm_alloc_pgste(mm)) {
+		/* Free 2K page table fragment of a 4K page */
+		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+		spin_lock_bh(&mm->context.list_lock);
+		mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
+		if (mask & 3)
+			list_add(&page->lru, &mm->context.pgtable_list);
+		else
+			list_del(&page->lru);
+		spin_unlock_bh(&mm->context.list_lock);
+		if (mask != 0)
+			return;
 	}
 	}
-}
-
-static void __page_table_free_rcu(void *table, unsigned bit)
-{
-	struct page *page;
 
 
-	if (bit == FRAG_MASK)
-		return page_table_free_pgste(table);
-	/* Free 1K/2K page table fragment of a 4K page */
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
-		pgtable_page_dtor(page);
-		atomic_set(&page->_mapcount, -1);
-		__free_page(page);
-	}
+	pgtable_page_dtor(page);
+	atomic_set(&page->_mapcount, -1);
+	__free_page(page);
 }
 }
 
 
 void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
 void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
@@ -1083,34 +1025,45 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
 
 
 	mm = tlb->mm;
 	mm = tlb->mm;
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (page_table_with_pgste(page)) {
+	if (mm_alloc_pgste(mm)) {
 		gmap_unlink(mm, table, vmaddr);
 		gmap_unlink(mm, table, vmaddr);
-		table = (unsigned long *) (__pa(table) | FRAG_MASK);
+		table = (unsigned long *) (__pa(table) | 3);
 		tlb_remove_table(tlb, table);
 		tlb_remove_table(tlb, table);
 		return;
 		return;
 	}
 	}
-	bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
+	bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
 	spin_lock_bh(&mm->context.list_lock);
 	spin_lock_bh(&mm->context.list_lock);
-	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
-		list_del(&page->lru);
-	mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
-	if (mask & FRAG_MASK)
+	mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
+	if (mask & 3)
 		list_add_tail(&page->lru, &mm->context.pgtable_list);
 		list_add_tail(&page->lru, &mm->context.pgtable_list);
+	else
+		list_del(&page->lru);
 	spin_unlock_bh(&mm->context.list_lock);
 	spin_unlock_bh(&mm->context.list_lock);
-	table = (unsigned long *) (__pa(table) | (bit << 4));
+	table = (unsigned long *) (__pa(table) | (1U << bit));
 	tlb_remove_table(tlb, table);
 	tlb_remove_table(tlb, table);
 }
 }
 
 
 static void __tlb_remove_table(void *_table)
 static void __tlb_remove_table(void *_table)
 {
 {
-	const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
-	void *table = (void *)((unsigned long) _table & ~mask);
-	unsigned type = (unsigned long) _table & mask;
-
-	if (type)
-		__page_table_free_rcu(table, type);
-	else
-		free_pages((unsigned long) table, ALLOC_ORDER);
+	unsigned int mask = (unsigned long) _table & 3;
+	void *table = (void *)((unsigned long) _table ^ mask);
+	struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+
+	switch (mask) {
+	case 0:		/* pmd or pud */
+		free_pages((unsigned long) table, 2);
+		break;
+	case 1:		/* lower 2K of a 4K page table */
+	case 2:		/* higher 2K of a 4K page table */
+		if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
+			break;
+		/* fallthrough */
+	case 3:		/* 4K page table with pgstes */
+		pgtable_page_dtor(page);
+		atomic_set(&page->_mapcount, -1);
+		__free_page(page);
+		break;
+	}
 }
 }
 
 
 static void tlb_remove_table_smp_sync(void *arg)
 static void tlb_remove_table_smp_sync(void *arg)

+ 3 - 0
arch/s390/numa/Makefile

@@ -0,0 +1,3 @@
+obj-y			+= numa.o
+obj-y			+= toptree.o
+obj-$(CONFIG_NUMA_EMU)	+= mode_emu.o

+ 530 - 0
arch/s390/numa/mode_emu.c

@@ -0,0 +1,530 @@
+/*
+ * NUMA support for s390
+ *
+ * NUMA emulation (aka fake NUMA) distributes the available memory to nodes
+ * without using real topology information about the physical memory of the
+ * machine.
+ *
+ * It distributes the available CPUs to nodes while respecting the original
+ * machine topology information. This is done by trying to avoid to separate
+ * CPUs which reside on the same book or even on the same MC.
+ *
+ * Because the current Linux scheduler code requires a stable cpu to node
+ * mapping, cores are pinned to nodes when the first CPU thread is set online.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa_emu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/node.h>
+#include <linux/memory.h>
+#include <linux/slab.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+#include "numa_mode.h"
+#include "toptree.h"
+
+/* Distances between the different system components */
+#define DIST_EMPTY	0
+#define DIST_CORE	1
+#define DIST_MC		2
+#define DIST_BOOK	3
+#define DIST_MAX	4
+
+/* Node distance reported to common code */
+#define EMU_NODE_DIST	10
+
+/* Node ID for free (not yet pinned) cores */
+#define NODE_ID_FREE	-1
+
+/* Different levels of toptree */
+enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
+
+/* The two toptree IDs */
+enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
+
+/* Number of NUMA nodes */
+static int emu_nodes = 1;
+/* NUMA stripe size */
+static unsigned long emu_size;
+
+/*
+ * Node to core pinning information updates are protected by
+ * "sched_domains_mutex".
+ */
+static struct {
+	s32 to_node_id[CONFIG_NR_CPUS];	/* Pinned core to node mapping */
+	int total;			/* Total number of pinned cores */
+	int per_node_target;		/* Cores per node without extra cores */
+	int per_node[MAX_NUMNODES];	/* Number of cores pinned to node */
+} *emu_cores;
+
+/*
+ * Pin a core to a node
+ */
+static void pin_core_to_node(int core_id, int node_id)
+{
+	if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) {
+		emu_cores->per_node[node_id]++;
+		emu_cores->to_node_id[core_id] = node_id;
+		emu_cores->total++;
+	} else {
+		WARN_ON(emu_cores->to_node_id[core_id] != node_id);
+	}
+}
+
+/*
+ * Number of pinned cores of a node
+ */
+static int cores_pinned(struct toptree *node)
+{
+	return emu_cores->per_node[node->id];
+}
+
+/*
+ * ID of the node where the core is pinned (or NODE_ID_FREE)
+ */
+static int core_pinned_to_node_id(struct toptree *core)
+{
+	return emu_cores->to_node_id[core->id];
+}
+
+/*
+ * Number of cores in the tree that are not yet pinned
+ */
+static int cores_free(struct toptree *tree)
+{
+	struct toptree *core;
+	int count = 0;
+
+	toptree_for_each(core, tree, CORE) {
+		if (core_pinned_to_node_id(core) == NODE_ID_FREE)
+			count++;
+	}
+	return count;
+}
+
+/*
+ * Return node of core
+ */
+static struct toptree *core_node(struct toptree *core)
+{
+	return core->parent->parent->parent;
+}
+
+/*
+ * Return book of core
+ */
+static struct toptree *core_book(struct toptree *core)
+{
+	return core->parent->parent;
+}
+
+/*
+ * Return mc of core
+ */
+static struct toptree *core_mc(struct toptree *core)
+{
+	return core->parent;
+}
+
+/*
+ * Distance between two cores
+ */
+static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
+{
+	if (core_book(core1)->id != core_book(core2)->id)
+		return DIST_BOOK;
+	if (core_mc(core1)->id != core_mc(core2)->id)
+		return DIST_MC;
+	/* Same core or sibling on same MC */
+	return DIST_CORE;
+}
+
+/*
+ * Distance of a node to a core
+ */
+static int dist_node_to_core(struct toptree *node, struct toptree *core)
+{
+	struct toptree *core_node;
+	int dist_min = DIST_MAX;
+
+	toptree_for_each(core_node, node, CORE)
+		dist_min = min(dist_min, dist_core_to_core(core_node, core));
+	return dist_min == DIST_MAX ? DIST_EMPTY : dist_min;
+}
+
+/*
+ * Unify will delete empty nodes, therefore recreate nodes.
+ */
+static void toptree_unify_tree(struct toptree *tree)
+{
+	int nid;
+
+	toptree_unify(tree);
+	for (nid = 0; nid < emu_nodes; nid++)
+		toptree_get_child(tree, nid);
+}
+
+/*
+ * Find the best/nearest node for a given core and ensure that no node
+ * gets more than "emu_cores->per_node_target + extra" cores.
+ */
+static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
+				     int extra)
+{
+	struct toptree *node, *node_best = NULL;
+	int dist_cur, dist_best, cores_target;
+
+	cores_target = emu_cores->per_node_target + extra;
+	dist_best = DIST_MAX;
+	node_best = NULL;
+	toptree_for_each(node, numa, NODE) {
+		/* Already pinned cores must use their nodes */
+		if (core_pinned_to_node_id(core) == node->id) {
+			node_best = node;
+			break;
+		}
+		/* Skip nodes that already have enough cores */
+		if (cores_pinned(node) >= cores_target)
+			continue;
+		dist_cur = dist_node_to_core(node, core);
+		if (dist_cur < dist_best) {
+			dist_best = dist_cur;
+			node_best = node;
+		}
+	}
+	return node_best;
+}
+
+/*
+ * Find the best node for each core with respect to "extra" core count
+ */
+static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
+				   int extra)
+{
+	struct toptree *node, *core, *tmp;
+
+	toptree_for_each_safe(core, tmp, phys, CORE) {
+		node = node_for_core(numa, core, extra);
+		if (!node)
+			return;
+		toptree_move(core, node);
+		pin_core_to_node(core->id, node->id);
+	}
+}
+
+/*
+ * Move structures of given level to specified NUMA node
+ */
+static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
+				    enum toptree_level level, bool perfect)
+{
+	int cores_free, cores_target = emu_cores->per_node_target;
+	struct toptree *cur, *tmp;
+
+	toptree_for_each_safe(cur, tmp, phys, level) {
+		cores_free = cores_target - toptree_count(node, CORE);
+		if (perfect) {
+			if (cores_free == toptree_count(cur, CORE))
+				toptree_move(cur, node);
+		} else {
+			if (cores_free >= toptree_count(cur, CORE))
+				toptree_move(cur, node);
+		}
+	}
+}
+
+/*
+ * Move structures of a given level to NUMA nodes. If "perfect" is specified
+ * move only perfectly fitting structures. Otherwise move also smaller
+ * than needed structures.
+ */
+static void move_level_to_numa(struct toptree *numa, struct toptree *phys,
+			       enum toptree_level level, bool perfect)
+{
+	struct toptree *node;
+
+	toptree_for_each(node, numa, NODE)
+		move_level_to_numa_node(node, phys, level, perfect);
+}
+
+/*
+ * For the first run try to move the big structures
+ */
+static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
+{
+	struct toptree *core;
+
+	/* Always try to move perfectly fitting structures first */
+	move_level_to_numa(numa, phys, BOOK, true);
+	move_level_to_numa(numa, phys, BOOK, false);
+	move_level_to_numa(numa, phys, MC, true);
+	move_level_to_numa(numa, phys, MC, false);
+	/* Now pin all the moved cores */
+	toptree_for_each(core, numa, CORE)
+		pin_core_to_node(core->id, core_node(core)->id);
+}
+
+/*
+ * Allocate new topology and create required nodes
+ */
+static struct toptree *toptree_new(int id, int nodes)
+{
+	struct toptree *tree;
+	int nid;
+
+	tree = toptree_alloc(TOPOLOGY, id);
+	if (!tree)
+		goto fail;
+	for (nid = 0; nid < nodes; nid++) {
+		if (!toptree_get_child(tree, nid))
+			goto fail;
+	}
+	return tree;
+fail:
+	panic("NUMA emulation could not allocate topology");
+}
+
+/*
+ * Allocate and initialize core to node mapping
+ */
+static void create_core_to_node_map(void)
+{
+	int i;
+
+	emu_cores = kzalloc(sizeof(*emu_cores), GFP_KERNEL);
+	if (emu_cores == NULL)
+		panic("Could not allocate cores to node memory");
+	for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
+		emu_cores->to_node_id[i] = NODE_ID_FREE;
+}
+
+/*
+ * Move cores from physical topology into NUMA target topology
+ * and try to keep as much of the physical topology as possible.
+ */
+static struct toptree *toptree_to_numa(struct toptree *phys)
+{
+	static int first = 1;
+	struct toptree *numa;
+	int cores_total;
+
+	cores_total = emu_cores->total + cores_free(phys);
+	emu_cores->per_node_target = cores_total / emu_nodes;
+	numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
+	if (first) {
+		toptree_to_numa_first(numa, phys);
+		first = 0;
+	}
+	toptree_to_numa_single(numa, phys, 0);
+	toptree_to_numa_single(numa, phys, 1);
+	toptree_unify_tree(numa);
+
+	WARN_ON(cpumask_weight(&phys->mask));
+	return numa;
+}
+
+/*
+ * Create a toptree out of the physical topology that we got from the hypervisor
+ */
+static struct toptree *toptree_from_topology(void)
+{
+	struct toptree *phys, *node, *book, *mc, *core;
+	struct cpu_topology_s390 *top;
+	int cpu;
+
+	phys = toptree_new(TOPTREE_ID_PHYS, 1);
+
+	for_each_online_cpu(cpu) {
+		top = &per_cpu(cpu_topology, cpu);
+		node = toptree_get_child(phys, 0);
+		book = toptree_get_child(node, top->book_id);
+		mc = toptree_get_child(book, top->socket_id);
+		core = toptree_get_child(mc, top->core_id);
+		if (!book || !mc || !core)
+			panic("NUMA emulation could not allocate memory");
+		cpumask_set_cpu(cpu, &core->mask);
+		toptree_update_mask(mc);
+	}
+	return phys;
+}
+
+/*
+ * Add toptree core to topology and create correct CPU masks
+ */
+static void topology_add_core(struct toptree *core)
+{
+	struct cpu_topology_s390 *top;
+	int cpu;
+
+	for_each_cpu(cpu, &core->mask) {
+		top = &per_cpu(cpu_topology, cpu);
+		cpumask_copy(&top->thread_mask, &core->mask);
+		cpumask_copy(&top->core_mask, &core_mc(core)->mask);
+		cpumask_copy(&top->book_mask, &core_book(core)->mask);
+		cpumask_set_cpu(cpu, node_to_cpumask_map[core_node(core)->id]);
+		top->node_id = core_node(core)->id;
+	}
+}
+
+/*
+ * Apply toptree to topology and create CPU masks
+ */
+static void toptree_to_topology(struct toptree *numa)
+{
+	struct toptree *core;
+	int i;
+
+	/* Clear all node masks */
+	for (i = 0; i < MAX_NUMNODES; i++)
+		cpumask_clear(node_to_cpumask_map[i]);
+
+	/* Rebuild all masks */
+	toptree_for_each(core, numa, CORE)
+		topology_add_core(core);
+}
+
+/*
+ * Show the node to core mapping
+ */
+static void print_node_to_core_map(void)
+{
+	int nid, cid;
+
+	if (!numa_debug_enabled)
+		return;
+	printk(KERN_DEBUG "NUMA node to core mapping\n");
+	for (nid = 0; nid < emu_nodes; nid++) {
+		printk(KERN_DEBUG "  node %3d: ", nid);
+		for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) {
+			if (emu_cores->to_node_id[cid] == nid)
+				printk(KERN_CONT "%d ", cid);
+		}
+		printk(KERN_CONT "\n");
+	}
+}
+
+/*
+ * Transfer physical topology into a NUMA topology and modify CPU masks
+ * according to the NUMA topology.
+ *
+ * Must be called with "sched_domains_mutex" lock held.
+ */
+static void emu_update_cpu_topology(void)
+{
+	struct toptree *phys, *numa;
+
+	if (emu_cores == NULL)
+		create_core_to_node_map();
+	phys = toptree_from_topology();
+	numa = toptree_to_numa(phys);
+	toptree_free(phys);
+	toptree_to_topology(numa);
+	toptree_free(numa);
+	print_node_to_core_map();
+}
+
+/*
+ * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum
+ * alignment (needed for memory hotplug).
+ */
+static unsigned long emu_setup_size_adjust(unsigned long size)
+{
+	size = size ? : CONFIG_EMU_SIZE;
+	size = roundup(size, memory_block_size_bytes());
+	return size;
+}
+
+/*
+ * If we have not enough memory for the specified nodes, reduce the node count.
+ */
+static int emu_setup_nodes_adjust(int nodes)
+{
+	int nodes_max;
+
+	nodes_max = memblock.memory.total_size / emu_size;
+	nodes_max = max(nodes_max, 1);
+	if (nodes_max >= nodes)
+		return nodes;
+	pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes);
+	return nodes_max;
+}
+
+/*
+ * Early emu setup
+ */
+static void emu_setup(void)
+{
+	emu_size = emu_setup_size_adjust(emu_size);
+	emu_nodes = emu_setup_nodes_adjust(emu_nodes);
+	pr_info("Creating %d nodes with memory stripe size %ld MB\n",
+		emu_nodes, emu_size >> 20);
+}
+
+/*
+ * Return node id for given page number
+ */
+static int emu_pfn_to_nid(unsigned long pfn)
+{
+	return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes;
+}
+
+/*
+ * Return stripe size
+ */
+static unsigned long emu_align(void)
+{
+	return emu_size;
+}
+
+/*
+ * Return distance between two nodes
+ */
+static int emu_distance(int node1, int node2)
+{
+	return (node1 != node2) * EMU_NODE_DIST;
+}
+
+/*
+ * Define callbacks for generic s390 NUMA infrastructure
+ */
+const struct numa_mode numa_mode_emu = {
+	.name = "emu",
+	.setup = emu_setup,
+	.update_cpu_topology = emu_update_cpu_topology,
+	.__pfn_to_nid = emu_pfn_to_nid,
+	.align = emu_align,
+	.distance = emu_distance,
+};
+
+/*
+ * Kernel parameter: emu_nodes=<n>
+ */
+static int __init early_parse_emu_nodes(char *p)
+{
+	int count;
+
+	if (kstrtoint(p, 0, &count) != 0 || count <= 0)
+		return 0;
+	if (count <= 0)
+		return 0;
+	emu_nodes = min(count, MAX_NUMNODES);
+	return 0;
+}
+early_param("emu_nodes", early_parse_emu_nodes);
+
+/*
+ * Kernel parameter: emu_size=[<n>[k|M|G|T]]
+ */
+static int __init early_parse_emu_size(char *p)
+{
+	emu_size = memparse(p, NULL);
+	return 0;
+}
+early_param("emu_size", early_parse_emu_size);

+ 184 - 0
arch/s390/numa/numa.c

@@ -0,0 +1,184 @@
+/*
+ * NUMA support for s390
+ *
+ * Implement NUMA core code.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/cpumask.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/node.h>
+
+#include <asm/numa.h>
+#include "numa_mode.h"
+
+pg_data_t *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+const struct numa_mode numa_mode_plain = {
+	.name = "plain",
+};
+
+static const struct numa_mode *mode = &numa_mode_plain;
+
+int numa_pfn_to_nid(unsigned long pfn)
+{
+	return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0;
+}
+
+void numa_update_cpu_topology(void)
+{
+	if (mode->update_cpu_topology)
+		mode->update_cpu_topology();
+}
+
+int __node_distance(int a, int b)
+{
+	return mode->distance ? mode->distance(a, b) : 0;
+}
+
+int numa_debug_enabled;
+
+/*
+ * alloc_node_data() - Allocate node data
+ */
+static __init pg_data_t *alloc_node_data(void)
+{
+	pg_data_t *res;
+
+	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
+	if (!res)
+		panic("Could not allocate memory for node data!\n");
+	memset(res, 0, sizeof(pg_data_t));
+	return res;
+}
+
+/*
+ * numa_setup_memory() - Assign bootmem to nodes
+ *
+ * The memory is first added to memblock without any respect to nodes.
+ * This is fixed before remaining memblock memory is handed over to the
+ * buddy allocator.
+ * An important side effect is that large bootmem allocations might easily
+ * cross node boundaries, which can be needed for large allocations with
+ * smaller memory stripes in each node (i.e. when using NUMA emulation).
+ *
+ * Memory defines nodes:
+ * Therefore this routine also sets the nodes online with memory.
+ */
+static void __init numa_setup_memory(void)
+{
+	unsigned long cur_base, align, end_of_dram;
+	int nid = 0;
+
+	end_of_dram = memblock_end_of_DRAM();
+	align = mode->align ? mode->align() : ULONG_MAX;
+
+	/*
+	 * Step through all available memory and assign it to the nodes
+	 * indicated by the mode implementation.
+	 * All nodes which are seen here will be set online.
+	 */
+	cur_base = 0;
+	do {
+		nid = numa_pfn_to_nid(PFN_DOWN(cur_base));
+		node_set_online(nid);
+		memblock_set_node(cur_base, align, &memblock.memory, nid);
+		cur_base += align;
+	} while (cur_base < end_of_dram);
+
+	/* Allocate and fill out node_data */
+	for (nid = 0; nid < MAX_NUMNODES; nid++)
+		NODE_DATA(nid) = alloc_node_data();
+
+	for_each_online_node(nid) {
+		unsigned long start_pfn, end_pfn;
+		unsigned long t_start, t_end;
+		int i;
+
+		start_pfn = ULONG_MAX;
+		end_pfn = 0;
+		for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) {
+			if (t_start < start_pfn)
+				start_pfn = t_start;
+			if (t_end > end_pfn)
+				end_pfn = t_end;
+		}
+		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+		NODE_DATA(nid)->node_id = nid;
+	}
+}
+
+/*
+ * numa_setup() - Earliest initialization
+ *
+ * Assign the mode and call the mode's setup routine.
+ */
+void __init numa_setup(void)
+{
+	pr_info("NUMA mode: %s\n", mode->name);
+	if (mode->setup)
+		mode->setup();
+	numa_setup_memory();
+	memblock_dump_all();
+}
+
+
+/*
+ * numa_init_early() - Initialization initcall
+ *
+ * This runs when only one CPU is online and before the first
+ * topology update is called for by the scheduler.
+ */
+static int __init numa_init_early(void)
+{
+	/* Attach all possible CPUs to node 0 for now. */
+	cpumask_copy(node_to_cpumask_map[0], cpu_possible_mask);
+	return 0;
+}
+early_initcall(numa_init_early);
+
+/*
+ * numa_init_late() - Initialization initcall
+ *
+ * Register NUMA nodes.
+ */
+static int __init numa_init_late(void)
+{
+	int nid;
+
+	for_each_online_node(nid)
+		register_one_node(nid);
+	return 0;
+}
+device_initcall(numa_init_late);
+
+static int __init parse_debug(char *parm)
+{
+	numa_debug_enabled = 1;
+	return 0;
+}
+early_param("numa_debug", parse_debug);
+
+static int __init parse_numa(char *parm)
+{
+	if (strcmp(parm, numa_mode_plain.name) == 0)
+		mode = &numa_mode_plain;
+#ifdef CONFIG_NUMA_EMU
+	if (strcmp(parm, numa_mode_emu.name) == 0)
+		mode = &numa_mode_emu;
+#endif
+	return 0;
+}
+early_param("numa", parse_numa);

+ 24 - 0
arch/s390/numa/numa_mode.h

@@ -0,0 +1,24 @@
+/*
+ * NUMA support for s390
+ *
+ * Define declarations used for communication between NUMA mode
+ * implementations and NUMA core functionality.
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef __S390_NUMA_MODE_H
+#define __S390_NUMA_MODE_H
+
+struct numa_mode {
+	char *name;				/* Name of mode */
+	void (*setup)(void);			/* Initizalize mode */
+	void (*update_cpu_topology)(void);	/* Called by topology code */
+	int (*__pfn_to_nid)(unsigned long pfn);	/* PFN to node ID */
+	unsigned long (*align)(void);		/* Minimum node alignment */
+	int (*distance)(int a, int b);		/* Distance between two nodes */
+};
+
+extern const struct numa_mode numa_mode_plain;
+extern const struct numa_mode numa_mode_emu;
+
+#endif /* __S390_NUMA_MODE_H */

+ 342 - 0
arch/s390/numa/toptree.c

@@ -0,0 +1,342 @@
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <asm/numa.h>
+
+#include "toptree.h"
+
+/**
+ * toptree_alloc - Allocate and initialize a new tree node.
+ * @level: The node's vertical level; level 0 contains the leaves.
+ * @id: ID number, explicitly not unique beyond scope of node's siblings
+ *
+ * Allocate a new tree node and initialize it.
+ *
+ * RETURNS:
+ * Pointer to the new tree node or NULL on error
+ */
+struct toptree *toptree_alloc(int level, int id)
+{
+	struct toptree *res = kzalloc(sizeof(struct toptree), GFP_KERNEL);
+
+	if (!res)
+		return res;
+
+	INIT_LIST_HEAD(&res->children);
+	INIT_LIST_HEAD(&res->sibling);
+	cpumask_clear(&res->mask);
+	res->level = level;
+	res->id = id;
+	return res;
+}
+
+/**
+ * toptree_remove - Remove a tree node from a tree
+ * @cand: Pointer to the node to remove
+ *
+ * The node is detached from its parent node. The parent node's
+ * masks will be updated to reflect the loss of the child.
+ */
+static void toptree_remove(struct toptree *cand)
+{
+	struct toptree *oldparent;
+
+	list_del_init(&cand->sibling);
+	oldparent = cand->parent;
+	cand->parent = NULL;
+	toptree_update_mask(oldparent);
+}
+
+/**
+ * toptree_free - discard a tree node
+ * @cand: Pointer to the tree node to discard
+ *
+ * Checks if @cand is attached to a parent node. Detaches it
+ * cleanly using toptree_remove. Possible children are freed
+ * recursively. In the end @cand itself is freed.
+ */
+void toptree_free(struct toptree *cand)
+{
+	struct toptree *child, *tmp;
+
+	if (cand->parent)
+		toptree_remove(cand);
+	toptree_for_each_child_safe(child, tmp, cand)
+		toptree_free(child);
+	kfree(cand);
+}
+
+/**
+ * toptree_update_mask - Update node bitmasks
+ * @cand: Pointer to a tree node
+ *
+ * The node's cpumask will be updated by combining all children's
+ * masks. Then toptree_update_mask is called recursively for the
+ * parent if applicable.
+ *
+ * NOTE:
+ * This must not be called on leaves. If called on a leaf, its
+ * CPU mask is cleared and lost.
+ */
+void toptree_update_mask(struct toptree *cand)
+{
+	struct toptree *child;
+
+	cpumask_clear(&cand->mask);
+	list_for_each_entry(child, &cand->children, sibling)
+		cpumask_or(&cand->mask, &cand->mask, &child->mask);
+	if (cand->parent)
+		toptree_update_mask(cand->parent);
+}
+
+/**
+ * toptree_insert - Insert a tree node into tree
+ * @cand: Pointer to the node to insert
+ * @target: Pointer to the node to which @cand will added as a child
+ *
+ * Insert a tree node into a tree. Masks will be updated automatically.
+ *
+ * RETURNS:
+ * 0 on success, -1 if NULL is passed as argument or the node levels
+ * don't fit.
+ */
+static int toptree_insert(struct toptree *cand, struct toptree *target)
+{
+	if (!cand || !target)
+		return -1;
+	if (target->level != (cand->level + 1))
+		return -1;
+	list_add_tail(&cand->sibling, &target->children);
+	cand->parent = target;
+	toptree_update_mask(target);
+	return 0;
+}
+
+/**
+ * toptree_move_children - Move all child nodes of a node to a new place
+ * @cand: Pointer to the node whose children are to be moved
+ * @target: Pointer to the node to which @cand's children will be attached
+ *
+ * Take all child nodes of @cand and move them using toptree_move.
+ */
+static void toptree_move_children(struct toptree *cand, struct toptree *target)
+{
+	struct toptree *child, *tmp;
+
+	toptree_for_each_child_safe(child, tmp, cand)
+		toptree_move(child, target);
+}
+
+/**
+ * toptree_unify - Merge children with same ID
+ * @cand: Pointer to node whose direct children should be made unique
+ *
+ * When mangling the tree it is possible that a node has two or more children
+ * which have the same ID. This routine merges these children into one and
+ * moves all children of the merged nodes into the unified node.
+ */
+void toptree_unify(struct toptree *cand)
+{
+	struct toptree *child, *tmp, *cand_copy;
+
+	/* Threads cannot be split, cores are not split */
+	if (cand->level < 2)
+		return;
+
+	cand_copy = toptree_alloc(cand->level, 0);
+	toptree_for_each_child_safe(child, tmp, cand) {
+		struct toptree *tmpchild;
+
+		if (!cpumask_empty(&child->mask)) {
+			tmpchild = toptree_get_child(cand_copy, child->id);
+			toptree_move_children(child, tmpchild);
+		}
+		toptree_free(child);
+	}
+	toptree_move_children(cand_copy, cand);
+	toptree_free(cand_copy);
+
+	toptree_for_each_child(child, cand)
+		toptree_unify(child);
+}
+
+/**
+ * toptree_move - Move a node to another context
+ * @cand: Pointer to the node to move
+ * @target: Pointer to the node where @cand should go
+ *
+ * In the easiest case @cand is exactly on the level below @target
+ * and will be immediately moved to the target.
+ *
+ * If @target's level is not the direct parent level of @cand,
+ * nodes for the missing levels are created and put between
+ * @cand and @target. The "stacking" nodes' IDs are taken from
+ * @cand's parents.
+ *
+ * After this it is likely to have redundant nodes in the tree
+ * which are addressed by means of toptree_unify.
+ */
+void toptree_move(struct toptree *cand, struct toptree *target)
+{
+	struct toptree *stack_target, *real_insert_point, *ptr, *tmp;
+
+	if (cand->level + 1 == target->level) {
+		toptree_remove(cand);
+		toptree_insert(cand, target);
+		return;
+	}
+
+	real_insert_point = NULL;
+	ptr = cand;
+	stack_target = NULL;
+
+	do {
+		tmp = stack_target;
+		stack_target = toptree_alloc(ptr->level + 1,
+					     ptr->parent->id);
+		toptree_insert(tmp, stack_target);
+		if (!real_insert_point)
+			real_insert_point = stack_target;
+		ptr = ptr->parent;
+	} while (stack_target->level < (target->level - 1));
+
+	toptree_remove(cand);
+	toptree_insert(cand, real_insert_point);
+	toptree_insert(stack_target, target);
+}
+
+/**
+ * toptree_get_child - Access a tree node's child by its ID
+ * @cand: Pointer to tree node whose child is to access
+ * @id: The desired child's ID
+ *
+ * @cand's children are searched for a child with matching ID.
+ * If no match can be found, a new child with the desired ID
+ * is created and returned.
+ */
+struct toptree *toptree_get_child(struct toptree *cand, int id)
+{
+	struct toptree *child;
+
+	toptree_for_each_child(child, cand)
+		if (child->id == id)
+			return child;
+	child = toptree_alloc(cand->level-1, id);
+	toptree_insert(child, cand);
+	return child;
+}
+
+/**
+ * toptree_first - Find the first descendant on specified level
+ * @context: Pointer to tree node whose descendants are to be used
+ * @level: The level of interest
+ *
+ * RETURNS:
+ * @context's first descendant on the specified level, or NULL
+ * if there is no matching descendant
+ */
+struct toptree *toptree_first(struct toptree *context, int level)
+{
+	struct toptree *child, *tmp;
+
+	if (context->level == level)
+		return context;
+
+	if (!list_empty(&context->children)) {
+		list_for_each_entry(child, &context->children, sibling) {
+			tmp = toptree_first(child, level);
+			if (tmp)
+				return tmp;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * toptree_next_sibling - Return next sibling
+ * @cur: Pointer to a tree node
+ *
+ * RETURNS:
+ * If @cur has a parent and is not the last in the parent's children list,
+ * the next sibling is returned. Or NULL when there are no siblings left.
+ */
+static struct toptree *toptree_next_sibling(struct toptree *cur)
+{
+	if (cur->parent == NULL)
+		return NULL;
+
+	if (cur == list_last_entry(&cur->parent->children,
+				   struct toptree, sibling))
+		return NULL;
+	return (struct toptree *) list_next_entry(cur, sibling);
+}
+
+/**
+ * toptree_next - Tree traversal function
+ * @cur: Pointer to current element
+ * @context: Pointer to the root node of the tree or subtree to
+ * be traversed.
+ * @level: The level of interest.
+ *
+ * RETURNS:
+ * Pointer to the next node on level @level
+ * or NULL when there is no next node.
+ */
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+			     int level)
+{
+	struct toptree *cur_context, *tmp;
+
+	if (!cur)
+		return NULL;
+
+	if (context->level == level)
+		return NULL;
+
+	tmp = toptree_next_sibling(cur);
+	if (tmp != NULL)
+		return tmp;
+
+	cur_context = cur;
+	while (cur_context->level < context->level - 1) {
+		/* Step up */
+		cur_context = cur_context->parent;
+		/* Step aside */
+		tmp = toptree_next_sibling(cur_context);
+		if (tmp != NULL) {
+			/* Step down */
+			tmp = toptree_first(tmp, level);
+			if (tmp != NULL)
+				return tmp;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * toptree_count - Count descendants on specified level
+ * @context: Pointer to node whose descendants are to be considered
+ * @level: Only descendants on the specified level will be counted
+ *
+ * RETURNS:
+ * Number of descendants on the specified level
+ */
+int toptree_count(struct toptree *context, int level)
+{
+	struct toptree *cur;
+	int cnt = 0;
+
+	toptree_for_each(cur, context, level)
+		cnt++;
+	return cnt;
+}

+ 60 - 0
arch/s390/numa/toptree.h

@@ -0,0 +1,60 @@
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef S390_TOPTREE_H
+#define S390_TOPTREE_H
+
+#include <linux/cpumask.h>
+#include <linux/list.h>
+
+struct toptree {
+	int level;
+	int id;
+	cpumask_t mask;
+	struct toptree *parent;
+	struct list_head sibling;
+	struct list_head children;
+};
+
+struct toptree *toptree_alloc(int level, int id);
+void toptree_free(struct toptree *cand);
+void toptree_update_mask(struct toptree *cand);
+void toptree_unify(struct toptree *cand);
+struct toptree *toptree_get_child(struct toptree *cand, int id);
+void toptree_move(struct toptree *cand, struct toptree *target);
+int toptree_count(struct toptree *context, int level);
+
+struct toptree *toptree_first(struct toptree *context, int level);
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+			     int level);
+
+#define toptree_for_each_child(child, ptree)				\
+	list_for_each_entry(child,  &ptree->children, sibling)
+
+#define toptree_for_each_child_safe(child, ptmp, ptree)			\
+	list_for_each_entry_safe(child, ptmp, &ptree->children, sibling)
+
+#define toptree_is_last(ptree)					\
+	((ptree->parent == NULL) ||				\
+	 (ptree->parent->children.prev == &ptree->sibling))
+
+#define toptree_for_each(ptree, cont, ttype)		\
+	for (ptree = toptree_first(cont, ttype);	\
+	     ptree != NULL;				\
+	     ptree = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_safe(ptree, tmp, cont, ttype)		\
+	for (ptree = toptree_first(cont, ttype),		\
+		     tmp = toptree_next(ptree, cont, ttype);	\
+	     ptree != NULL;					\
+	     ptree = tmp,					\
+		     tmp = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_sibling(ptree, start)			\
+	toptree_for_each(ptree, start->parent, start->level)
+
+#endif /* S390_TOPTREE_H */

+ 17 - 17
arch/s390/pci/pci.c

@@ -76,11 +76,6 @@ EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
 
 static struct kmem_cache *zdev_fmb_cache;
 static struct kmem_cache *zdev_fmb_cache;
 
 
-struct zpci_dev *get_zdev(struct pci_dev *pdev)
-{
-	return (struct zpci_dev *) pdev->sysdata;
-}
-
 struct zpci_dev *get_zdev_by_fid(u32 fid)
 struct zpci_dev *get_zdev_by_fid(u32 fid)
 {
 {
 	struct zpci_dev *tmp, *zdev = NULL;
 	struct zpci_dev *tmp, *zdev = NULL;
@@ -269,7 +264,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
 			      unsigned long offset,
 			      unsigned long offset,
 			      unsigned long max)
 			      unsigned long max)
 {
 {
-	struct zpci_dev *zdev =	get_zdev(pdev);
+	struct zpci_dev *zdev =	to_zpci(pdev);
 	u64 addr;
 	u64 addr;
 	int idx;
 	int idx;
 
 
@@ -385,7 +380,7 @@ static void zpci_irq_handler(struct airq_struct *airq)
 
 
 int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	unsigned int hwirq, msi_vecs;
 	unsigned int hwirq, msi_vecs;
 	unsigned long aisb;
 	unsigned long aisb;
 	struct msi_desc *msi;
 	struct msi_desc *msi;
@@ -460,7 +455,7 @@ out:
 
 
 void arch_teardown_msi_irqs(struct pci_dev *pdev)
 void arch_teardown_msi_irqs(struct pci_dev *pdev)
 {
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	struct msi_desc *msi;
 	struct msi_desc *msi;
 	int rc;
 	int rc;
 
 
@@ -637,7 +632,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 	int i;
 	int i;
 
 
 	for (i = 0; i < PCI_BAR_COUNT; i++) {
 	for (i = 0; i < PCI_BAR_COUNT; i++) {
-		if (!zdev->bars[i].size)
+		if (!zdev->bars[i].size || !zdev->bars[i].res)
 			continue;
 			continue;
 
 
 		zpci_free_iomap(zdev, zdev->bars[i].map_idx);
 		zpci_free_iomap(zdev, zdev->bars[i].map_idx);
@@ -648,7 +643,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 
 
 int pcibios_add_device(struct pci_dev *pdev)
 int pcibios_add_device(struct pci_dev *pdev)
 {
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	struct resource *res;
 	struct resource *res;
 	int i;
 	int i;
 
 
@@ -673,7 +668,7 @@ void pcibios_release_device(struct pci_dev *pdev)
 
 
 int pcibios_enable_device(struct pci_dev *pdev, int mask)
 int pcibios_enable_device(struct pci_dev *pdev, int mask)
 {
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 
 	zdev->pdev = pdev;
 	zdev->pdev = pdev;
 	zpci_debug_init_device(zdev);
 	zpci_debug_init_device(zdev);
@@ -684,7 +679,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask)
 
 
 void pcibios_disable_device(struct pci_dev *pdev)
 void pcibios_disable_device(struct pci_dev *pdev)
 {
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 
 	zpci_fmb_disable_device(zdev);
 	zpci_fmb_disable_device(zdev);
 	zpci_debug_exit_device(zdev);
 	zpci_debug_exit_device(zdev);
@@ -695,7 +690,7 @@ void pcibios_disable_device(struct pci_dev *pdev)
 static int zpci_restore(struct device *dev)
 static int zpci_restore(struct device *dev)
 {
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	int ret = 0;
 	int ret = 0;
 
 
 	if (zdev->state != ZPCI_FN_STATE_ONLINE)
 	if (zdev->state != ZPCI_FN_STATE_ONLINE)
@@ -717,7 +712,7 @@ out:
 static int zpci_freeze(struct device *dev)
 static int zpci_freeze(struct device *dev)
 {
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 
 	if (zdev->state != ZPCI_FN_STATE_ONLINE)
 	if (zdev->state != ZPCI_FN_STATE_ONLINE)
 		return 0;
 		return 0;
@@ -777,17 +772,22 @@ static int zpci_scan_bus(struct zpci_dev *zdev)
 
 
 	ret = zpci_setup_bus_resources(zdev, &resources);
 	ret = zpci_setup_bus_resources(zdev, &resources);
 	if (ret)
 	if (ret)
-		return ret;
+		goto error;
 
 
 	zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
 	zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
 				      zdev, &resources);
 				      zdev, &resources);
 	if (!zdev->bus) {
 	if (!zdev->bus) {
-		zpci_cleanup_bus_resources(zdev);
-		return -EIO;
+		ret = -EIO;
+		goto error;
 	}
 	}
 	zdev->bus->max_bus_speed = zdev->max_bus_speed;
 	zdev->bus->max_bus_speed = zdev->max_bus_speed;
 	pci_bus_add_devices(zdev->bus);
 	pci_bus_add_devices(zdev->bus);
 	return 0;
 	return 0;
+
+error:
+	zpci_cleanup_bus_resources(zdev);
+	pci_free_resource_list(&resources);
+	return ret;
 }
 }
 
 
 int zpci_enable_device(struct zpci_dev *zdev)
 int zpci_enable_device(struct zpci_dev *zdev)

+ 4 - 4
arch/s390/pci/pci_dma.c

@@ -277,7 +277,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 				     enum dma_data_direction direction,
 				     enum dma_data_direction direction,
 				     struct dma_attrs *attrs)
 				     struct dma_attrs *attrs)
 {
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long nr_pages, iommu_page_index;
 	unsigned long nr_pages, iommu_page_index;
 	unsigned long pa = page_to_phys(page) + offset;
 	unsigned long pa = page_to_phys(page) + offset;
 	int flags = ZPCI_PTE_VALID;
 	int flags = ZPCI_PTE_VALID;
@@ -316,7 +316,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 				 size_t size, enum dma_data_direction direction,
 				 size_t size, enum dma_data_direction direction,
 				 struct dma_attrs *attrs)
 				 struct dma_attrs *attrs)
 {
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long iommu_page_index;
 	unsigned long iommu_page_index;
 	int npages;
 	int npages;
 
 
@@ -337,7 +337,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 			    dma_addr_t *dma_handle, gfp_t flag,
 			    dma_addr_t *dma_handle, gfp_t flag,
 			    struct dma_attrs *attrs)
 			    struct dma_attrs *attrs)
 {
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	struct page *page;
 	struct page *page;
 	unsigned long pa;
 	unsigned long pa;
 	dma_addr_t map;
 	dma_addr_t map;
@@ -367,7 +367,7 @@ static void s390_dma_free(struct device *dev, size_t size,
 			  void *pa, dma_addr_t dma_handle,
 			  void *pa, dma_addr_t dma_handle,
 			  struct dma_attrs *attrs)
 			  struct dma_attrs *attrs)
 {
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 
 
 	size = PAGE_ALIGN(size);
 	size = PAGE_ALIGN(size);
 	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
 	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);

+ 6 - 6
arch/s390/pci/pci_event.c

@@ -46,15 +46,13 @@ struct zpci_ccdf_avail {
 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 {
 {
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+	struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
 
 
 	zpci_err("error CCDF:\n");
 	zpci_err("error CCDF:\n");
 	zpci_err_hex(ccdf, sizeof(*ccdf));
 	zpci_err_hex(ccdf, sizeof(*ccdf));
 
 
-	if (!zdev)
-		return;
-
 	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
 	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
-	       pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
+	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
 }
 }
 
 
 void zpci_event_error(void *data)
 void zpci_event_error(void *data)
@@ -89,7 +87,9 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		ret = zpci_enable_device(zdev);
 		ret = zpci_enable_device(zdev);
 		if (ret)
 		if (ret)
 			break;
 			break;
+		pci_lock_rescan_remove();
 		pci_rescan_bus(zdev->bus);
 		pci_rescan_bus(zdev->bus);
+		pci_unlock_rescan_remove();
 		break;
 		break;
 	case 0x0302: /* Reserved -> Standby */
 	case 0x0302: /* Reserved -> Standby */
 		if (!zdev)
 		if (!zdev)
@@ -97,7 +97,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		break;
 		break;
 	case 0x0303: /* Deconfiguration requested */
 	case 0x0303: /* Deconfiguration requested */
 		if (pdev)
 		if (pdev)
-			pci_stop_and_remove_bus_device(pdev);
+			pci_stop_and_remove_bus_device_locked(pdev);
 
 
 		ret = zpci_disable_device(zdev);
 		ret = zpci_disable_device(zdev);
 		if (ret)
 		if (ret)
@@ -114,7 +114,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 			/* Give the driver a hint that the function is
 			/* Give the driver a hint that the function is
 			 * already unusable. */
 			 * already unusable. */
 			pdev->error_state = pci_channel_io_perm_failure;
 			pdev->error_state = pci_channel_io_perm_failure;
-			pci_stop_and_remove_bus_device(pdev);
+			pci_stop_and_remove_bus_device_locked(pdev);
 		}
 		}
 
 
 		zdev->fh = ccdf->fh;
 		zdev->fh = ccdf->fh;

+ 23 - 10
arch/s390/pci/pci_insn.c

@@ -8,10 +8,23 @@
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
 #include <asm/pci_insn.h>
 #include <asm/pci_insn.h>
+#include <asm/pci_debug.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
 
 
 #define ZPCI_INSN_BUSY_DELAY	1	/* 1 microsecond */
 #define ZPCI_INSN_BUSY_DELAY	1	/* 1 microsecond */
 
 
+static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
+{
+	struct {
+		u8 cc;
+		u8 status;
+		u64 req;
+		u64 offset;
+	} data = {cc, status, req, offset};
+
+	zpci_err_hex(&data, sizeof(data));
+}
+
 /* Modify PCI Function Controls */
 /* Modify PCI Function Controls */
 static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
 static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
 {
 {
@@ -38,8 +51,8 @@ int zpci_mod_fc(u64 req, struct zpci_fib *fib)
 	} while (cc == 2);
 	} while (cc == 2);
 
 
 	if (cc)
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d\n",
-			     __func__, cc, status);
+		zpci_err_insn(cc, status, req, 0);
+
 	return (cc) ? -EIO : 0;
 	return (cc) ? -EIO : 0;
 }
 }
 
 
@@ -72,8 +85,8 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
 	} while (cc == 2);
 	} while (cc == 2);
 
 
 	if (cc)
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  dma_addr: %Lx  size: %Lx\n",
-			    __func__, cc, status, addr, range);
+		zpci_err_insn(cc, status, addr, range);
+
 	return (cc) ? -EIO : 0;
 	return (cc) ? -EIO : 0;
 }
 }
 
 
@@ -121,8 +134,8 @@ int zpci_load(u64 *data, u64 req, u64 offset)
 	} while (cc == 2);
 	} while (cc == 2);
 
 
 	if (cc)
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
-			    __func__, cc, status, req, offset);
+		zpci_err_insn(cc, status, req, offset);
+
 	return (cc > 0) ? -EIO : cc;
 	return (cc > 0) ? -EIO : cc;
 }
 }
 EXPORT_SYMBOL_GPL(zpci_load);
 EXPORT_SYMBOL_GPL(zpci_load);
@@ -159,8 +172,8 @@ int zpci_store(u64 data, u64 req, u64 offset)
 	} while (cc == 2);
 	} while (cc == 2);
 
 
 	if (cc)
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
-			__func__, cc, status, req, offset);
+		zpci_err_insn(cc, status, req, offset);
+
 	return (cc > 0) ? -EIO : cc;
 	return (cc > 0) ? -EIO : cc;
 }
 }
 EXPORT_SYMBOL_GPL(zpci_store);
 EXPORT_SYMBOL_GPL(zpci_store);
@@ -195,8 +208,8 @@ int zpci_store_block(const u64 *data, u64 req, u64 offset)
 	} while (cc == 2);
 	} while (cc == 2);
 
 
 	if (cc)
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
-			    __func__, cc, status, req, offset);
+		zpci_err_insn(cc, status, req, offset);
+
 	return (cc > 0) ? -EIO : cc;
 	return (cc > 0) ? -EIO : cc;
 }
 }
 EXPORT_SYMBOL_GPL(zpci_store_block);
 EXPORT_SYMBOL_GPL(zpci_store_block);

+ 12 - 5
arch/s390/pci/pci_sysfs.c

@@ -16,7 +16,7 @@
 static ssize_t name##_show(struct device *dev,				\
 static ssize_t name##_show(struct device *dev,				\
 			   struct device_attribute *attr, char *buf)	\
 			   struct device_attribute *attr, char *buf)	\
 {									\
 {									\
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));		\
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));		\
 									\
 									\
 	return sprintf(buf, fmt, zdev->member);				\
 	return sprintf(buf, fmt, zdev->member);				\
 }									\
 }									\
@@ -38,23 +38,30 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 			     const char *buf, size_t count)
 			     const char *buf, size_t count)
 {
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	int ret;
 	int ret;
 
 
 	if (!device_remove_file_self(dev, attr))
 	if (!device_remove_file_self(dev, attr))
 		return count;
 		return count;
 
 
+	pci_lock_rescan_remove();
 	pci_stop_and_remove_bus_device(pdev);
 	pci_stop_and_remove_bus_device(pdev);
 	ret = zpci_disable_device(zdev);
 	ret = zpci_disable_device(zdev);
 	if (ret)
 	if (ret)
-		return ret;
+		goto error;
 
 
 	ret = zpci_enable_device(zdev);
 	ret = zpci_enable_device(zdev);
 	if (ret)
 	if (ret)
-		return ret;
+		goto error;
 
 
 	pci_rescan_bus(zdev->bus);
 	pci_rescan_bus(zdev->bus);
+	pci_unlock_rescan_remove();
+
 	return count;
 	return count;
+
+error:
+	pci_unlock_rescan_remove();
+	return ret;
 }
 }
 static DEVICE_ATTR_WO(recover);
 static DEVICE_ATTR_WO(recover);
 
 
@@ -64,7 +71,7 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
 {
 {
 	struct device *dev = kobj_to_dev(kobj);
 	struct device *dev = kobj_to_dev(kobj);
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 
 	return memory_read_from_buffer(buf, count, &off, zdev->util_str,
 	return memory_read_from_buffer(buf, count, &off, zdev->util_str,
 				       sizeof(zdev->util_str));
 				       sizeof(zdev->util_str));

+ 3 - 3
drivers/s390/block/dasd_alias.c

@@ -58,7 +58,7 @@ static struct alias_server *_find_server(struct dasd_uid *uid)
 		    && !strncmp(pos->uid.serial, uid->serial,
 		    && !strncmp(pos->uid.serial, uid->serial,
 				sizeof(uid->serial)))
 				sizeof(uid->serial)))
 			return pos;
 			return pos;
-	};
+	}
 	return NULL;
 	return NULL;
 }
 }
 
 
@@ -69,7 +69,7 @@ static struct alias_lcu *_find_lcu(struct alias_server *server,
 	list_for_each_entry(pos, &server->lculist, lcu) {
 	list_for_each_entry(pos, &server->lculist, lcu) {
 		if (pos->uid.ssid == uid->ssid)
 		if (pos->uid.ssid == uid->ssid)
 			return pos;
 			return pos;
-	};
+	}
 	return NULL;
 	return NULL;
 }
 }
 
 
@@ -97,7 +97,7 @@ static struct alias_pav_group *_find_group(struct alias_lcu *lcu,
 		if (pos->uid.base_unit_addr == search_unit_addr &&
 		if (pos->uid.base_unit_addr == search_unit_addr &&
 		    !strncmp(pos->uid.vduit, uid->vduit, sizeof(uid->vduit)))
 		    !strncmp(pos->uid.vduit, uid->vduit, sizeof(uid->vduit)))
 			return pos;
 			return pos;
-	};
+	}
 	return NULL;
 	return NULL;
 }
 }
 
 

+ 252 - 81
drivers/s390/block/dasd_eckd.c

@@ -1036,7 +1036,7 @@ static int dasd_eckd_read_conf(struct dasd_device *device)
 {
 {
 	void *conf_data;
 	void *conf_data;
 	int conf_len, conf_data_saved;
 	int conf_len, conf_data_saved;
-	int rc, path_err;
+	int rc, path_err, pos;
 	__u8 lpm, opm;
 	__u8 lpm, opm;
 	struct dasd_eckd_private *private, path_private;
 	struct dasd_eckd_private *private, path_private;
 	struct dasd_path *path_data;
 	struct dasd_path *path_data;
@@ -1068,6 +1068,17 @@ static int dasd_eckd_read_conf(struct dasd_device *device)
 			path_data->opm |= lpm;
 			path_data->opm |= lpm;
 			continue;	/* no error */
 			continue;	/* no error */
 		}
 		}
+		/* translate path mask to position in mask */
+		pos = 8 - ffs(lpm);
+		kfree(private->path_conf_data[pos]);
+		if ((__u8 *)private->path_conf_data[pos] ==
+		    private->conf_data) {
+			private->conf_data = NULL;
+			private->conf_len = 0;
+			conf_data_saved = 0;
+		}
+		private->path_conf_data[pos] =
+			(struct dasd_conf_data *) conf_data;
 		/* save first valid configuration data */
 		/* save first valid configuration data */
 		if (!conf_data_saved) {
 		if (!conf_data_saved) {
 			kfree(private->conf_data);
 			kfree(private->conf_data);
@@ -1095,7 +1106,6 @@ static int dasd_eckd_read_conf(struct dasd_device *device)
 				kfree(conf_data);
 				kfree(conf_data);
 				continue;
 				continue;
 			}
 			}
-
 			if (dasd_eckd_compare_path_uid(
 			if (dasd_eckd_compare_path_uid(
 				    device, &path_private)) {
 				    device, &path_private)) {
 				uid = &path_private.uid;
 				uid = &path_private.uid;
@@ -1157,9 +1167,6 @@ static int dasd_eckd_read_conf(struct dasd_device *device)
 		path_data->cablepm &= ~lpm;
 		path_data->cablepm &= ~lpm;
 		path_data->hpfpm &= ~lpm;
 		path_data->hpfpm &= ~lpm;
 		path_data->cuirpm &= ~lpm;
 		path_data->cuirpm &= ~lpm;
-
-		if (conf_data != private->conf_data)
-			kfree(conf_data);
 	}
 	}
 
 
 	return path_err;
 	return path_err;
@@ -1259,7 +1266,11 @@ static void do_path_verification_work(struct work_struct *work)
 		schedule_work(work);
 		schedule_work(work);
 		return;
 		return;
 	}
 	}
-
+	/* check if path verification already running and delay if so */
+	if (test_and_set_bit(DASD_FLAG_PATH_VERIFY, &device->flags)) {
+		schedule_work(work);
+		return;
+	}
 	opm = 0;
 	opm = 0;
 	npm = 0;
 	npm = 0;
 	ppm = 0;
 	ppm = 0;
@@ -1402,7 +1413,7 @@ static void do_path_verification_work(struct work_struct *work)
 		device->path_data.hpfpm |= hpfpm;
 		device->path_data.hpfpm |= hpfpm;
 		spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
 		spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
 	}
 	}
-
+	clear_bit(DASD_FLAG_PATH_VERIFY, &device->flags);
 	dasd_put_device(device);
 	dasd_put_device(device);
 	if (data->isglobal)
 	if (data->isglobal)
 		mutex_unlock(&dasd_path_verification_mutex);
 		mutex_unlock(&dasd_path_verification_mutex);
@@ -1810,6 +1821,7 @@ out_err1:
 static void dasd_eckd_uncheck_device(struct dasd_device *device)
 static void dasd_eckd_uncheck_device(struct dasd_device *device)
 {
 {
 	struct dasd_eckd_private *private;
 	struct dasd_eckd_private *private;
+	int i;
 
 
 	private = (struct dasd_eckd_private *) device->private;
 	private = (struct dasd_eckd_private *) device->private;
 	dasd_alias_disconnect_device_from_lcu(device);
 	dasd_alias_disconnect_device_from_lcu(device);
@@ -1818,6 +1830,15 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device)
 	private->vdsneq = NULL;
 	private->vdsneq = NULL;
 	private->gneq = NULL;
 	private->gneq = NULL;
 	private->conf_len = 0;
 	private->conf_len = 0;
+	for (i = 0; i < 8; i++) {
+		kfree(private->path_conf_data[i]);
+		if ((__u8 *)private->path_conf_data[i] ==
+		    private->conf_data) {
+			private->conf_data = NULL;
+			private->conf_len = 0;
+		}
+		private->path_conf_data[i] = NULL;
+	}
 	kfree(private->conf_data);
 	kfree(private->conf_data);
 	private->conf_data = NULL;
 	private->conf_data = NULL;
 }
 }
@@ -3968,7 +3989,7 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp)
 	rc = -EFAULT;
 	rc = -EFAULT;
 	if (copy_from_user(&usrparm, argp, sizeof(usrparm)))
 	if (copy_from_user(&usrparm, argp, sizeof(usrparm)))
 		goto out;
 		goto out;
-	if (is_compat_task() || sizeof(long) == 4) {
+	if (is_compat_task()) {
 		/* Make sure pointers are sane even on 31 bit. */
 		/* Make sure pointers are sane even on 31 bit. */
 		rc = -EINVAL;
 		rc = -EINVAL;
 		if ((usrparm.psf_data >> 32) != 0)
 		if ((usrparm.psf_data >> 32) != 0)
@@ -4525,12 +4546,13 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device,
 	cqr->startdev = device;
 	cqr->startdev = device;
 	cqr->memdev = device;
 	cqr->memdev = device;
 	cqr->block = NULL;
 	cqr->block = NULL;
-	cqr->retries = 256;
 	cqr->expires = 10 * HZ;
 	cqr->expires = 10 * HZ;
-
-	/* we need to check for messages on exactly this path */
 	set_bit(DASD_CQR_VERIFY_PATH, &cqr->flags);
 	set_bit(DASD_CQR_VERIFY_PATH, &cqr->flags);
-	cqr->lpm = lpum;
+	/* dasd_sleep_on_immediatly does not do complex error
+	 * recovery so clear erp flag and set retry counter to
+	 * do basic erp */
+	clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
+	cqr->retries = 256;
 
 
 	/* Prepare for Read Subsystem Data */
 	/* Prepare for Read Subsystem Data */
 	prssdp = (struct dasd_psf_prssd_data *) cqr->data;
 	prssdp = (struct dasd_psf_prssd_data *) cqr->data;
@@ -4605,10 +4627,10 @@ dasd_eckd_psf_cuir_response(struct dasd_device *device, int response,
 	psf_cuir->message_id = message_id;
 	psf_cuir->message_id = message_id;
 	psf_cuir->cssid = sch_id.cssid;
 	psf_cuir->cssid = sch_id.cssid;
 	psf_cuir->ssid = sch_id.ssid;
 	psf_cuir->ssid = sch_id.ssid;
-
 	ccw = cqr->cpaddr;
 	ccw = cqr->cpaddr;
 	ccw->cmd_code = DASD_ECKD_CCW_PSF;
 	ccw->cmd_code = DASD_ECKD_CCW_PSF;
 	ccw->cda = (__u32)(addr_t)psf_cuir;
 	ccw->cda = (__u32)(addr_t)psf_cuir;
+	ccw->flags = CCW_FLAG_SLI;
 	ccw->count = sizeof(struct dasd_psf_cuir_response);
 	ccw->count = sizeof(struct dasd_psf_cuir_response);
 
 
 	cqr->startdev = device;
 	cqr->startdev = device;
@@ -4618,6 +4640,7 @@ dasd_eckd_psf_cuir_response(struct dasd_device *device, int response,
 	cqr->expires = 10*HZ;
 	cqr->expires = 10*HZ;
 	cqr->buildclk = get_tod_clock();
 	cqr->buildclk = get_tod_clock();
 	cqr->status = DASD_CQR_FILLED;
 	cqr->status = DASD_CQR_FILLED;
+	set_bit(DASD_CQR_VERIFY_PATH, &cqr->flags);
 
 
 	rc = dasd_sleep_on(cqr);
 	rc = dasd_sleep_on(cqr);
 
 
@@ -4625,118 +4648,252 @@ dasd_eckd_psf_cuir_response(struct dasd_device *device, int response,
 	return rc;
 	return rc;
 }
 }
 
 
-static int dasd_eckd_cuir_change_state(struct dasd_device *device, __u8 lpum)
+/*
+ * return configuration data that is referenced by record selector
+ * if a record selector is specified or per default return the
+ * conf_data pointer for the path specified by lpum
+ */
+static struct dasd_conf_data *dasd_eckd_get_ref_conf(struct dasd_device *device,
+						     __u8 lpum,
+						     struct dasd_cuir_message *cuir)
 {
 {
-	unsigned long flags;
-	__u8 tbcpm;
+	struct dasd_eckd_private *private;
+	struct dasd_conf_data *conf_data;
+	int path, pos;
 
 
-	spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
-	tbcpm = device->path_data.opm & ~lpum;
-	if (tbcpm) {
-		device->path_data.opm = tbcpm;
-		device->path_data.cuirpm |= lpum;
+	private = (struct dasd_eckd_private *) device->private;
+	if (cuir->record_selector == 0)
+		goto out;
+	for (path = 0x80, pos = 0; path; path >>= 1, pos++) {
+		conf_data = private->path_conf_data[pos];
+		if (conf_data->gneq.record_selector ==
+		    cuir->record_selector)
+			return conf_data;
+	}
+out:
+	return private->path_conf_data[8 - ffs(lpum)];
+}
+
+/*
+ * This function determines the scope of a reconfiguration request by
+ * analysing the path and device selection data provided in the CUIR request.
+ * Returns a path mask containing CUIR affected paths for the give device.
+ *
+ * If the CUIR request does not contain the required information return the
+ * path mask of the path the attention message for the CUIR request was reveived
+ * on.
+ */
+static int dasd_eckd_cuir_scope(struct dasd_device *device, __u8 lpum,
+				struct dasd_cuir_message *cuir)
+{
+	struct dasd_conf_data *ref_conf_data;
+	unsigned long bitmask = 0, mask = 0;
+	struct dasd_eckd_private *private;
+	struct dasd_conf_data *conf_data;
+	unsigned int pos, path;
+	char *ref_gneq, *gneq;
+	char *ref_ned, *ned;
+	int tbcpm = 0;
+
+	/* if CUIR request does not specify the scope use the path
+	   the attention message was presented on */
+	if (!cuir->ned_map ||
+	    !(cuir->neq_map[0] | cuir->neq_map[1] | cuir->neq_map[2]))
+		return lpum;
+
+	private = (struct dasd_eckd_private *) device->private;
+	/* get reference conf data */
+	ref_conf_data = dasd_eckd_get_ref_conf(device, lpum, cuir);
+	/* reference ned is determined by ned_map field */
+	pos = 8 - ffs(cuir->ned_map);
+	ref_ned = (char *)&ref_conf_data->neds[pos];
+	ref_gneq = (char *)&ref_conf_data->gneq;
+	/* transfer 24 bit neq_map to mask */
+	mask = cuir->neq_map[2];
+	mask |= cuir->neq_map[1] << 8;
+	mask |= cuir->neq_map[0] << 16;
+
+	for (path = 0x80; path; path >>= 1) {
+		/* initialise data per path */
+		bitmask = mask;
+		pos = 8 - ffs(path);
+		conf_data = private->path_conf_data[pos];
+		pos = 8 - ffs(cuir->ned_map);
+		ned = (char *) &conf_data->neds[pos];
+		/* compare reference ned and per path ned */
+		if (memcmp(ref_ned, ned, sizeof(*ned)) != 0)
+			continue;
+		gneq = (char *)&conf_data->gneq;
+		/* compare reference gneq and per_path gneq under
+		   24 bit mask where mask bit 0 equals byte 7 of
+		   the gneq and mask bit 24 equals byte 31 */
+		while (bitmask) {
+			pos = ffs(bitmask) - 1;
+			if (memcmp(&ref_gneq[31 - pos], &gneq[31 - pos], 1)
+			    != 0)
+				break;
+			clear_bit(pos, &bitmask);
+		}
+		if (bitmask)
+			continue;
+		/* device and path match the reference values
+		   add path to CUIR scope */
+		tbcpm |= path;
+	}
+	return tbcpm;
+}
+
+static void dasd_eckd_cuir_notify_user(struct dasd_device *device,
+				       unsigned long paths,
+				       struct subchannel_id sch_id, int action)
+{
+	struct channel_path_desc *desc;
+	int pos;
+
+	while (paths) {
+		/* get position of bit in mask */
+		pos = ffs(paths) - 1;
+		/* get channel path descriptor from this position */
+		desc = ccw_device_get_chp_desc(device->cdev, 7 - pos);
+		if (action == CUIR_QUIESCE)
+			pr_warn("Service on the storage server caused path "
+				"%x.%02x to go offline", sch_id.cssid,
+				desc ? desc->chpid : 0);
+		else if (action == CUIR_RESUME)
+			pr_info("Path %x.%02x is back online after service "
+				"on the storage server", sch_id.cssid,
+				desc ? desc->chpid : 0);
+		kfree(desc);
+		clear_bit(pos, &paths);
 	}
 	}
-	spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
-	return tbcpm ? 0 : PSF_CUIR_LAST_PATH;
+}
+
+static int dasd_eckd_cuir_remove_path(struct dasd_device *device, __u8 lpum,
+				      struct dasd_cuir_message *cuir)
+{
+	unsigned long tbcpm;
+
+	tbcpm = dasd_eckd_cuir_scope(device, lpum, cuir);
+	/* nothing to do if path is not in use */
+	if (!(device->path_data.opm & tbcpm))
+		return 0;
+	if (!(device->path_data.opm & ~tbcpm)) {
+		/* no path would be left if the CUIR action is taken
+		   return error */
+		return -EINVAL;
+	}
+	/* remove device from operational path mask */
+	device->path_data.opm &= ~tbcpm;
+	device->path_data.cuirpm |= tbcpm;
+	return tbcpm;
 }
 }
 
 
 /*
 /*
- * walk through all devices and quiesce them
- * if it is the last path return error
+ * walk through all devices and build a path mask to quiesce them
+ * return an error if the last path to a device would be removed
  *
  *
  * if only part of the devices are quiesced and an error
  * if only part of the devices are quiesced and an error
  * occurs no onlining necessary, the storage server will
  * occurs no onlining necessary, the storage server will
  * notify the already set offline devices again
  * notify the already set offline devices again
  */
  */
 static int dasd_eckd_cuir_quiesce(struct dasd_device *device, __u8 lpum,
 static int dasd_eckd_cuir_quiesce(struct dasd_device *device, __u8 lpum,
-				 struct channel_path_desc *desc,
-				 struct subchannel_id sch_id)
+				  struct subchannel_id sch_id,
+				  struct dasd_cuir_message *cuir)
 {
 {
 	struct alias_pav_group *pavgroup, *tempgroup;
 	struct alias_pav_group *pavgroup, *tempgroup;
 	struct dasd_eckd_private *private;
 	struct dasd_eckd_private *private;
 	struct dasd_device *dev, *n;
 	struct dasd_device *dev, *n;
-	int rc;
+	unsigned long paths = 0;
+	unsigned long flags;
+	int tbcpm;
 
 
 	private = (struct dasd_eckd_private *) device->private;
 	private = (struct dasd_eckd_private *) device->private;
-	rc = 0;
-
 	/* active devices */
 	/* active devices */
-	list_for_each_entry_safe(dev, n,
-				 &private->lcu->active_devices,
+	list_for_each_entry_safe(dev, n, &private->lcu->active_devices,
 				 alias_list) {
 				 alias_list) {
-		rc = dasd_eckd_cuir_change_state(dev, lpum);
-		if (rc)
-			goto out;
+		spin_lock_irqsave(get_ccwdev_lock(dev->cdev), flags);
+		tbcpm = dasd_eckd_cuir_remove_path(dev, lpum, cuir);
+		spin_unlock_irqrestore(get_ccwdev_lock(dev->cdev), flags);
+		if (tbcpm < 0)
+			goto out_err;
+		paths |= tbcpm;
 	}
 	}
-
 	/* inactive devices */
 	/* inactive devices */
-	list_for_each_entry_safe(dev, n,
-				 &private->lcu->inactive_devices,
+	list_for_each_entry_safe(dev, n, &private->lcu->inactive_devices,
 				 alias_list) {
 				 alias_list) {
-		rc = dasd_eckd_cuir_change_state(dev, lpum);
-		if (rc)
-			goto out;
+		spin_lock_irqsave(get_ccwdev_lock(dev->cdev), flags);
+		tbcpm = dasd_eckd_cuir_remove_path(dev, lpum, cuir);
+		spin_unlock_irqrestore(get_ccwdev_lock(dev->cdev), flags);
+		if (tbcpm < 0)
+			goto out_err;
+		paths |= tbcpm;
 	}
 	}
-
 	/* devices in PAV groups */
 	/* devices in PAV groups */
 	list_for_each_entry_safe(pavgroup, tempgroup,
 	list_for_each_entry_safe(pavgroup, tempgroup,
 				 &private->lcu->grouplist, group) {
 				 &private->lcu->grouplist, group) {
 		list_for_each_entry_safe(dev, n, &pavgroup->baselist,
 		list_for_each_entry_safe(dev, n, &pavgroup->baselist,
 					 alias_list) {
 					 alias_list) {
-			rc = dasd_eckd_cuir_change_state(dev, lpum);
-			if (rc)
-				goto out;
+			spin_lock_irqsave(get_ccwdev_lock(dev->cdev), flags);
+			tbcpm = dasd_eckd_cuir_remove_path(dev, lpum, cuir);
+			spin_unlock_irqrestore(
+				get_ccwdev_lock(dev->cdev), flags);
+			if (tbcpm < 0)
+				goto out_err;
+			paths |= tbcpm;
 		}
 		}
 		list_for_each_entry_safe(dev, n, &pavgroup->aliaslist,
 		list_for_each_entry_safe(dev, n, &pavgroup->aliaslist,
 					 alias_list) {
 					 alias_list) {
-			rc = dasd_eckd_cuir_change_state(dev, lpum);
-			if (rc)
-				goto out;
+			spin_lock_irqsave(get_ccwdev_lock(dev->cdev), flags);
+			tbcpm = dasd_eckd_cuir_remove_path(dev, lpum, cuir);
+			spin_unlock_irqrestore(
+				get_ccwdev_lock(dev->cdev), flags);
+			if (tbcpm < 0)
+				goto out_err;
+			paths |= tbcpm;
 		}
 		}
 	}
 	}
-
-	pr_warn("Service on the storage server caused path %x.%02x to go offline",
-		sch_id.cssid, desc ? desc->chpid : 0);
-	rc = PSF_CUIR_COMPLETED;
-out:
-	return rc;
+	/* notify user about all paths affected by CUIR action */
+	dasd_eckd_cuir_notify_user(device, paths, sch_id, CUIR_QUIESCE);
+	return 0;
+out_err:
+	return tbcpm;
 }
 }
 
 
 static int dasd_eckd_cuir_resume(struct dasd_device *device, __u8 lpum,
 static int dasd_eckd_cuir_resume(struct dasd_device *device, __u8 lpum,
-				 struct channel_path_desc *desc,
-				 struct subchannel_id sch_id)
+				 struct subchannel_id sch_id,
+				 struct dasd_cuir_message *cuir)
 {
 {
 	struct alias_pav_group *pavgroup, *tempgroup;
 	struct alias_pav_group *pavgroup, *tempgroup;
 	struct dasd_eckd_private *private;
 	struct dasd_eckd_private *private;
 	struct dasd_device *dev, *n;
 	struct dasd_device *dev, *n;
+	unsigned long paths = 0;
+	int tbcpm;
 
 
-	pr_info("Path %x.%02x is back online after service on the storage server",
-		sch_id.cssid, desc ? desc->chpid : 0);
 	private = (struct dasd_eckd_private *) device->private;
 	private = (struct dasd_eckd_private *) device->private;
-
 	/*
 	/*
 	 * the path may have been added through a generic path event before
 	 * the path may have been added through a generic path event before
 	 * only trigger path verification if the path is not already in use
 	 * only trigger path verification if the path is not already in use
 	 */
 	 */
-
 	list_for_each_entry_safe(dev, n,
 	list_for_each_entry_safe(dev, n,
 				 &private->lcu->active_devices,
 				 &private->lcu->active_devices,
 				 alias_list) {
 				 alias_list) {
-		if (!(dev->path_data.opm & lpum)) {
-			dev->path_data.tbvpm |= lpum;
+		tbcpm = dasd_eckd_cuir_scope(dev, lpum, cuir);
+		paths |= tbcpm;
+		if (!(dev->path_data.opm & tbcpm)) {
+			dev->path_data.tbvpm |= tbcpm;
 			dasd_schedule_device_bh(dev);
 			dasd_schedule_device_bh(dev);
 		}
 		}
 	}
 	}
-
 	list_for_each_entry_safe(dev, n,
 	list_for_each_entry_safe(dev, n,
 				 &private->lcu->inactive_devices,
 				 &private->lcu->inactive_devices,
 				 alias_list) {
 				 alias_list) {
-		if (!(dev->path_data.opm & lpum)) {
-			dev->path_data.tbvpm |= lpum;
+		tbcpm = dasd_eckd_cuir_scope(dev, lpum, cuir);
+		paths |= tbcpm;
+		if (!(dev->path_data.opm & tbcpm)) {
+			dev->path_data.tbvpm |= tbcpm;
 			dasd_schedule_device_bh(dev);
 			dasd_schedule_device_bh(dev);
 		}
 		}
 	}
 	}
-
 	/* devices in PAV groups */
 	/* devices in PAV groups */
 	list_for_each_entry_safe(pavgroup, tempgroup,
 	list_for_each_entry_safe(pavgroup, tempgroup,
 				 &private->lcu->grouplist,
 				 &private->lcu->grouplist,
@@ -4744,21 +4901,27 @@ static int dasd_eckd_cuir_resume(struct dasd_device *device, __u8 lpum,
 		list_for_each_entry_safe(dev, n,
 		list_for_each_entry_safe(dev, n,
 					 &pavgroup->baselist,
 					 &pavgroup->baselist,
 					 alias_list) {
 					 alias_list) {
-			if (!(dev->path_data.opm & lpum)) {
-				dev->path_data.tbvpm |= lpum;
+			tbcpm = dasd_eckd_cuir_scope(dev, lpum, cuir);
+			paths |= tbcpm;
+			if (!(dev->path_data.opm & tbcpm)) {
+				dev->path_data.tbvpm |= tbcpm;
 				dasd_schedule_device_bh(dev);
 				dasd_schedule_device_bh(dev);
 			}
 			}
 		}
 		}
 		list_for_each_entry_safe(dev, n,
 		list_for_each_entry_safe(dev, n,
 					 &pavgroup->aliaslist,
 					 &pavgroup->aliaslist,
 					 alias_list) {
 					 alias_list) {
-			if (!(dev->path_data.opm & lpum)) {
-				dev->path_data.tbvpm |= lpum;
+			tbcpm = dasd_eckd_cuir_scope(dev, lpum, cuir);
+			paths |= tbcpm;
+			if (!(dev->path_data.opm & tbcpm)) {
+				dev->path_data.tbvpm |= tbcpm;
 				dasd_schedule_device_bh(dev);
 				dasd_schedule_device_bh(dev);
 			}
 			}
 		}
 		}
 	}
 	}
-	return PSF_CUIR_COMPLETED;
+	/* notify user about all paths affected by CUIR action */
+	dasd_eckd_cuir_notify_user(device, paths, sch_id, CUIR_RESUME);
+	return 0;
 }
 }
 
 
 static void dasd_eckd_handle_cuir(struct dasd_device *device, void *messages,
 static void dasd_eckd_handle_cuir(struct dasd_device *device, void *messages,
@@ -4768,8 +4931,12 @@ static void dasd_eckd_handle_cuir(struct dasd_device *device, void *messages,
 	struct channel_path_desc *desc;
 	struct channel_path_desc *desc;
 	struct subchannel_id sch_id;
 	struct subchannel_id sch_id;
 	int pos, response;
 	int pos, response;
-	ccw_device_get_schid(device->cdev, &sch_id);
 
 
+	DBF_DEV_EVENT(DBF_WARNING, device,
+		      "CUIR request: %016llx %016llx %016llx %08x",
+		      ((u64 *)cuir)[0], ((u64 *)cuir)[1], ((u64 *)cuir)[2],
+		      ((u32 *)cuir)[3]);
+	ccw_device_get_schid(device->cdev, &sch_id);
 	/* get position of path in mask */
 	/* get position of path in mask */
 	pos = 8 - ffs(lpum);
 	pos = 8 - ffs(lpum);
 	/* get channel path descriptor from this position */
 	/* get channel path descriptor from this position */
@@ -4777,18 +4944,26 @@ static void dasd_eckd_handle_cuir(struct dasd_device *device, void *messages,
 
 
 	if (cuir->code == CUIR_QUIESCE) {
 	if (cuir->code == CUIR_QUIESCE) {
 		/* quiesce */
 		/* quiesce */
-		response = dasd_eckd_cuir_quiesce(device, lpum, desc, sch_id);
+		if (dasd_eckd_cuir_quiesce(device, lpum, sch_id, cuir))
+			response = PSF_CUIR_LAST_PATH;
+		else
+			response = PSF_CUIR_COMPLETED;
 	} else if (cuir->code == CUIR_RESUME) {
 	} else if (cuir->code == CUIR_RESUME) {
 		/* resume */
 		/* resume */
-		response = dasd_eckd_cuir_resume(device, lpum, desc, sch_id);
+		dasd_eckd_cuir_resume(device, lpum, sch_id, cuir);
+		response = PSF_CUIR_COMPLETED;
 	} else
 	} else
 		response = PSF_CUIR_NOT_SUPPORTED;
 		response = PSF_CUIR_NOT_SUPPORTED;
 
 
-	dasd_eckd_psf_cuir_response(device, response, cuir->message_id,
-				    desc, sch_id);
-
+	dasd_eckd_psf_cuir_response(device, response,
+				    cuir->message_id, desc, sch_id);
+	DBF_DEV_EVENT(DBF_WARNING, device,
+		      "CUIR response: %d on message ID %08x", response,
+		      cuir->message_id);
 	/* free descriptor copy */
 	/* free descriptor copy */
 	kfree(desc);
 	kfree(desc);
+	/* to make sure there is no attention left schedule work again */
+	device->discipline->check_attention(device, lpum);
 }
 }
 
 
 static void dasd_eckd_check_attention_work(struct work_struct *work)
 static void dasd_eckd_check_attention_work(struct work_struct *work)
@@ -4800,22 +4975,18 @@ static void dasd_eckd_check_attention_work(struct work_struct *work)
 
 
 	data = container_of(work, struct check_attention_work_data, worker);
 	data = container_of(work, struct check_attention_work_data, worker);
 	device = data->device;
 	device = data->device;
-
 	messages = kzalloc(sizeof(*messages), GFP_KERNEL);
 	messages = kzalloc(sizeof(*messages), GFP_KERNEL);
 	if (!messages) {
 	if (!messages) {
 		DBF_DEV_EVENT(DBF_WARNING, device, "%s",
 		DBF_DEV_EVENT(DBF_WARNING, device, "%s",
 			      "Could not allocate attention message buffer");
 			      "Could not allocate attention message buffer");
 		goto out;
 		goto out;
 	}
 	}
-
 	rc = dasd_eckd_read_message_buffer(device, messages, data->lpum);
 	rc = dasd_eckd_read_message_buffer(device, messages, data->lpum);
 	if (rc)
 	if (rc)
 		goto out;
 		goto out;
-
 	if (messages->length == ATTENTION_LENGTH_CUIR &&
 	if (messages->length == ATTENTION_LENGTH_CUIR &&
 	    messages->format == ATTENTION_FORMAT_CUIR)
 	    messages->format == ATTENTION_FORMAT_CUIR)
 		dasd_eckd_handle_cuir(device, messages, data->lpum);
 		dasd_eckd_handle_cuir(device, messages, data->lpum);
-
 out:
 out:
 	dasd_put_device(device);
 	dasd_put_device(device);
 	kfree(messages);
 	kfree(messages);

+ 10 - 1
drivers/s390/block/dasd_eckd.h

@@ -355,7 +355,8 @@ struct dasd_gneq {
 		__u8 identifier:2;
 		__u8 identifier:2;
 		__u8 reserved:6;
 		__u8 reserved:6;
 	} __attribute__ ((packed)) flags;
 	} __attribute__ ((packed)) flags;
-	__u8 reserved[5];
+	__u8 record_selector;
+	__u8 reserved[4];
 	struct {
 	struct {
 		__u8 value:2;
 		__u8 value:2;
 		__u8 number:6;
 		__u8 number:6;
@@ -492,10 +493,18 @@ struct alias_pav_group {
 	struct dasd_device *next;
 	struct dasd_device *next;
 };
 };
 
 
+struct dasd_conf_data {
+	struct dasd_ned neds[5];
+	u8 reserved[64];
+	struct dasd_gneq gneq;
+} __packed;
+
 struct dasd_eckd_private {
 struct dasd_eckd_private {
 	struct dasd_eckd_characteristics rdc_data;
 	struct dasd_eckd_characteristics rdc_data;
 	u8 *conf_data;
 	u8 *conf_data;
 	int conf_len;
 	int conf_len;
+	/* per path configuration data */
+	struct dasd_conf_data *path_conf_data[8];
 	/* pointers to specific parts in the conf_data */
 	/* pointers to specific parts in the conf_data */
 	struct dasd_ned *ned;
 	struct dasd_ned *ned;
 	struct dasd_sneq *sneq;
 	struct dasd_sneq *sneq;

+ 1 - 0
drivers/s390/block/dasd_int.h

@@ -534,6 +534,7 @@ struct dasd_attention_data {
 #define DASD_FLAG_SAFE_OFFLINE	10	/* safe offline processing requested*/
 #define DASD_FLAG_SAFE_OFFLINE	10	/* safe offline processing requested*/
 #define DASD_FLAG_SAFE_OFFLINE_RUNNING	11	/* safe offline running */
 #define DASD_FLAG_SAFE_OFFLINE_RUNNING	11	/* safe offline running */
 #define DASD_FLAG_ABORTALL	12	/* Abort all noretry requests */
 #define DASD_FLAG_ABORTALL	12	/* Abort all noretry requests */
+#define DASD_FLAG_PATH_VERIFY	13	/* Path verification worker running */
 
 
 #define DASD_SLEEPON_START_TAG	((void *) 1)
 #define DASD_SLEEPON_START_TAG	((void *) 1)
 #define DASD_SLEEPON_END_TAG	((void *) 2)
 #define DASD_SLEEPON_END_TAG	((void *) 2)

+ 7 - 7
drivers/s390/block/dcssblk.c

@@ -548,10 +548,10 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 	 */
 	 */
 	num_of_segments = 0;
 	num_of_segments = 0;
 	for (i = 0; (i < count && (buf[i] != '\0') && (buf[i] != '\n')); i++) {
 	for (i = 0; (i < count && (buf[i] != '\0') && (buf[i] != '\n')); i++) {
-		for (j = i; (buf[j] != ':') &&
+		for (j = i; j < count &&
+			(buf[j] != ':') &&
 			(buf[j] != '\0') &&
 			(buf[j] != '\0') &&
-			(buf[j] != '\n') &&
-			j < count; j++) {
+			(buf[j] != '\n'); j++) {
 			local_buf[j-i] = toupper(buf[j]);
 			local_buf[j-i] = toupper(buf[j]);
 		}
 		}
 		local_buf[j-i] = '\0';
 		local_buf[j-i] = '\0';
@@ -723,7 +723,7 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch
 	/*
 	/*
 	 * parse input
 	 * parse input
 	 */
 	 */
-	for (i = 0; ((*(buf+i)!='\0') && (*(buf+i)!='\n') && i < count); i++) {
+	for (i = 0; (i < count && (*(buf+i)!='\0') && (*(buf+i)!='\n')); i++) {
 		local_buf[i] = toupper(buf[i]);
 		local_buf[i] = toupper(buf[i]);
 	}
 	}
 	local_buf[i] = '\0';
 	local_buf[i] = '\0';
@@ -904,10 +904,10 @@ dcssblk_check_params(void)
 
 
 	for (i = 0; (i < DCSSBLK_PARM_LEN) && (dcssblk_segments[i] != '\0');
 	for (i = 0; (i < DCSSBLK_PARM_LEN) && (dcssblk_segments[i] != '\0');
 	     i++) {
 	     i++) {
-		for (j = i; (dcssblk_segments[j] != ',')  &&
+		for (j = i; (j < DCSSBLK_PARM_LEN) &&
+			    (dcssblk_segments[j] != ',')  &&
 			    (dcssblk_segments[j] != '\0') &&
 			    (dcssblk_segments[j] != '\0') &&
-			    (dcssblk_segments[j] != '(')  &&
-			    (j < DCSSBLK_PARM_LEN); j++)
+			    (dcssblk_segments[j] != '('); j++)
 		{
 		{
 			buf[j-i] = dcssblk_segments[j];
 			buf[j-i] = dcssblk_segments[j];
 		}
 		}

+ 4 - 0
drivers/s390/char/con3270.c

@@ -413,6 +413,10 @@ con3270_irq(struct con3270 *cp, struct raw3270_request *rq, struct irb *irb)
 		else
 		else
 			/* Normal end. Copy residual count. */
 			/* Normal end. Copy residual count. */
 			rq->rescnt = irb->scsw.cmd.count;
 			rq->rescnt = irb->scsw.cmd.count;
+	} else if (irb->scsw.cmd.dstat & DEV_STAT_DEV_END) {
+		/* Interrupt without an outstanding request -> update all */
+		cp->update_flags = CON_UPDATE_ALL;
+		con3270_set_timer(cp, 1);
 	}
 	}
 	return RAW3270_IO_DONE;
 	return RAW3270_IO_DONE;
 }
 }

+ 11 - 5
drivers/s390/char/ctrlchar.c

@@ -14,15 +14,21 @@
 #include "ctrlchar.h"
 #include "ctrlchar.h"
 
 
 #ifdef CONFIG_MAGIC_SYSRQ
 #ifdef CONFIG_MAGIC_SYSRQ
-static int ctrlchar_sysrq_key;
+static struct sysrq_work ctrlchar_sysrq;
 
 
 static void
 static void
 ctrlchar_handle_sysrq(struct work_struct *work)
 ctrlchar_handle_sysrq(struct work_struct *work)
 {
 {
-	handle_sysrq(ctrlchar_sysrq_key);
+	struct sysrq_work *sysrq = container_of(work, struct sysrq_work, work);
+
+	handle_sysrq(sysrq->key);
 }
 }
 
 
-static DECLARE_WORK(ctrlchar_work, ctrlchar_handle_sysrq);
+void schedule_sysrq_work(struct sysrq_work *sw)
+{
+	INIT_WORK(&sw->work, ctrlchar_handle_sysrq);
+	schedule_work(&sw->work);
+}
 #endif
 #endif
 
 
 
 
@@ -51,8 +57,8 @@ ctrlchar_handle(const unsigned char *buf, int len, struct tty_struct *tty)
 #ifdef CONFIG_MAGIC_SYSRQ
 #ifdef CONFIG_MAGIC_SYSRQ
 	/* racy */
 	/* racy */
 	if (len == 3 && buf[1] == '-') {
 	if (len == 3 && buf[1] == '-') {
-		ctrlchar_sysrq_key = buf[2];
-		schedule_work(&ctrlchar_work);
+		ctrlchar_sysrq.key = buf[2];
+		schedule_sysrq_work(&ctrlchar_sysrq);
 		return CTRLCHAR_SYSRQ;
 		return CTRLCHAR_SYSRQ;
 	}
 	}
 #endif
 #endif

+ 12 - 0
drivers/s390/char/ctrlchar.h

@@ -7,6 +7,8 @@
  */
  */
 
 
 #include <linux/tty.h>
 #include <linux/tty.h>
+#include <linux/sysrq.h>
+#include <linux/workqueue.h>
 
 
 extern unsigned int
 extern unsigned int
 ctrlchar_handle(const unsigned char *buf, int len, struct tty_struct *tty);
 ctrlchar_handle(const unsigned char *buf, int len, struct tty_struct *tty);
@@ -17,3 +19,13 @@ ctrlchar_handle(const unsigned char *buf, int len, struct tty_struct *tty);
 #define CTRLCHAR_SYSRQ (3 << 8)
 #define CTRLCHAR_SYSRQ (3 << 8)
 
 
 #define CTRLCHAR_MASK (~0xffu)
 #define CTRLCHAR_MASK (~0xffu)
+
+
+#ifdef CONFIG_MAGIC_SYSRQ
+struct sysrq_work {
+	int key;
+	struct work_struct work;
+};
+
+void schedule_sysrq_work(struct sysrq_work *sw);
+#endif

+ 2 - 2
drivers/s390/char/diag_ftp.c

@@ -223,7 +223,7 @@ int diag_ftp_startup(void)
 	if (rc)
 	if (rc)
 		return rc;
 		return rc;
 
 
-	ctl_set_bit(0, 63 - 22);
+	irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -232,6 +232,6 @@ int diag_ftp_startup(void)
  */
  */
 void diag_ftp_shutdown(void)
 void diag_ftp_shutdown(void)
 {
 {
-	ctl_clear_bit(0, 63 - 22);
+	irq_subclass_unregister(IRQ_SUBCLASS_SERVICE_SIGNAL);
 	unregister_external_irq(EXT_IRQ_CP_SERVICE, diag_ftp_handler);
 	unregister_external_irq(EXT_IRQ_CP_SERVICE, diag_ftp_handler);
 }
 }

+ 1 - 1
drivers/s390/char/monreader.c

@@ -95,7 +95,7 @@ static void dcss_mkname(char *ascii_name, char *ebcdic_name)
 		if (ascii_name[i] == '\0')
 		if (ascii_name[i] == '\0')
 			break;
 			break;
 		ebcdic_name[i] = toupper(ascii_name[i]);
 		ebcdic_name[i] = toupper(ascii_name[i]);
-	};
+	}
 	for (; i < 8; i++)
 	for (; i < 8; i++)
 		ebcdic_name[i] = ' ';
 		ebcdic_name[i] = ' ';
 	ASCEBC(ebcdic_name, 8);
 	ASCEBC(ebcdic_name, 8);

+ 3 - 3
drivers/s390/char/sclp.c

@@ -53,7 +53,7 @@ static DECLARE_COMPLETION(sclp_request_queue_flushed);
 /* Number of console pages to allocate, used by sclp_con.c and sclp_vt220.c */
 /* Number of console pages to allocate, used by sclp_con.c and sclp_vt220.c */
 int sclp_console_pages = SCLP_CONSOLE_PAGES;
 int sclp_console_pages = SCLP_CONSOLE_PAGES;
 /* Flag to indicate if buffer pages are dropped on buffer full condition */
 /* Flag to indicate if buffer pages are dropped on buffer full condition */
-int sclp_console_drop = 0;
+int sclp_console_drop = 1;
 /* Number of times the console dropped buffer pages */
 /* Number of times the console dropped buffer pages */
 unsigned long sclp_console_full;
 unsigned long sclp_console_full;
 
 
@@ -79,8 +79,8 @@ static int __init sclp_setup_console_drop(char *str)
 	int drop, rc;
 	int drop, rc;
 
 
 	rc = kstrtoint(str, 0, &drop);
 	rc = kstrtoint(str, 0, &drop);
-	if (!rc && drop)
-		sclp_console_drop = 1;
+	if (!rc)
+		sclp_console_drop = drop;
 	return 1;
 	return 1;
 }
 }
 
 

+ 11 - 7
drivers/s390/char/sclp_cmd.c

@@ -25,6 +25,7 @@
 #include <asm/setup.h>
 #include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/sclp.h>
 #include <asm/sclp.h>
+#include <asm/numa.h>
 
 
 #include "sclp.h"
 #include "sclp.h"
 
 
@@ -388,11 +389,11 @@ static struct notifier_block sclp_mem_nb = {
 };
 };
 
 
 static void __init align_to_block_size(unsigned long long *start,
 static void __init align_to_block_size(unsigned long long *start,
-				       unsigned long long *size)
+				       unsigned long long *size,
+				       unsigned long long alignment)
 {
 {
-	unsigned long long start_align, size_align, alignment;
+	unsigned long long start_align, size_align;
 
 
-	alignment = memory_block_size_bytes();
 	start_align = roundup(*start, alignment);
 	start_align = roundup(*start, alignment);
 	size_align = rounddown(*start + *size, alignment) - start_align;
 	size_align = rounddown(*start + *size, alignment) - start_align;
 
 
@@ -404,8 +405,8 @@ static void __init align_to_block_size(unsigned long long *start,
 
 
 static void __init add_memory_merged(u16 rn)
 static void __init add_memory_merged(u16 rn)
 {
 {
+	unsigned long long start, size, addr, block_size;
 	static u16 first_rn, num;
 	static u16 first_rn, num;
-	unsigned long long start, size;
 
 
 	if (rn && first_rn && (first_rn + num == rn)) {
 	if (rn && first_rn && (first_rn + num == rn)) {
 		num++;
 		num++;
@@ -423,9 +424,12 @@ static void __init add_memory_merged(u16 rn)
 		goto skip_add;
 		goto skip_add;
 	if (memory_end_set && (start + size > memory_end))
 	if (memory_end_set && (start + size > memory_end))
 		size = memory_end - start;
 		size = memory_end - start;
-	align_to_block_size(&start, &size);
-	if (size)
-		add_memory(0, start, size);
+	block_size = memory_block_size_bytes();
+	align_to_block_size(&start, &size, block_size);
+	if (!size)
+		goto skip_add;
+	for (addr = start; addr < start + size; addr += block_size)
+		add_memory(numa_pfn_to_nid(PFN_DOWN(addr)), addr, block_size);
 skip_add:
 skip_add:
 	first_rn = rn;
 	first_rn = rn;
 	num = 1;
 	num = 1;

+ 51 - 1
drivers/s390/char/sclp_vt220.c

@@ -12,6 +12,7 @@
 #include <linux/wait.h>
 #include <linux/wait.h>
 #include <linux/timer.h>
 #include <linux/timer.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
+#include <linux/sysrq.h>
 #include <linux/tty.h>
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
 #include <linux/tty_flip.h>
@@ -27,6 +28,7 @@
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include "sclp.h"
 #include "sclp.h"
+#include "ctrlchar.h"
 
 
 #define SCLP_VT220_MAJOR		TTY_MAJOR
 #define SCLP_VT220_MAJOR		TTY_MAJOR
 #define SCLP_VT220_MINOR		65
 #define SCLP_VT220_MINOR		65
@@ -477,6 +479,53 @@ sclp_vt220_write(struct tty_struct *tty, const unsigned char *buf, int count)
 #define	SCLP_VT220_SESSION_STARTED	0x80
 #define	SCLP_VT220_SESSION_STARTED	0x80
 #define SCLP_VT220_SESSION_DATA		0x00
 #define SCLP_VT220_SESSION_DATA		0x00
 
 
+#ifdef CONFIG_MAGIC_SYSRQ
+
+static int sysrq_pressed;
+static struct sysrq_work sysrq;
+
+static void sclp_vt220_reset_session(void)
+{
+	sysrq_pressed = 0;
+}
+
+static void sclp_vt220_handle_input(const char *buffer, unsigned int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		/* Handle magic sys request */
+		if (buffer[i] == ('O' ^ 0100)) { /* CTRL-O */
+			/*
+			 * If pressed again, reset sysrq_pressed
+			 * and flip CTRL-O character
+			 */
+			sysrq_pressed = !sysrq_pressed;
+			if (sysrq_pressed)
+				continue;
+		} else if (sysrq_pressed) {
+			sysrq.key = buffer[i];
+			schedule_sysrq_work(&sysrq);
+			sysrq_pressed = 0;
+			continue;
+		}
+		tty_insert_flip_char(&sclp_vt220_port, buffer[i], 0);
+	}
+}
+
+#else
+
+static void sclp_vt220_reset_session(void)
+{
+}
+
+static void sclp_vt220_handle_input(const char *buffer, unsigned int count)
+{
+	tty_insert_flip_string(&sclp_vt220_port, buffer, count);
+}
+
+#endif
+
 /*
 /*
  * Called by the SCLP to report incoming event buffers.
  * Called by the SCLP to report incoming event buffers.
  */
  */
@@ -492,12 +541,13 @@ sclp_vt220_receiver_fn(struct evbuf_header *evbuf)
 	switch (*buffer) {
 	switch (*buffer) {
 	case SCLP_VT220_SESSION_ENDED:
 	case SCLP_VT220_SESSION_ENDED:
 	case SCLP_VT220_SESSION_STARTED:
 	case SCLP_VT220_SESSION_STARTED:
+		sclp_vt220_reset_session();
 		break;
 		break;
 	case SCLP_VT220_SESSION_DATA:
 	case SCLP_VT220_SESSION_DATA:
 		/* Send input to line discipline */
 		/* Send input to line discipline */
 		buffer++;
 		buffer++;
 		count--;
 		count--;
-		tty_insert_flip_string(&sclp_vt220_port, buffer, count);
+		sclp_vt220_handle_input(buffer, count);
 		tty_flip_buffer_push(&sclp_vt220_port);
 		tty_flip_buffer_push(&sclp_vt220_port);
 		break;
 		break;
 	}
 	}

+ 4 - 0
drivers/s390/char/tty3270.c

@@ -659,6 +659,10 @@ tty3270_irq(struct tty3270 *tp, struct raw3270_request *rq, struct irb *irb)
 		else
 		else
 			/* Normal end. Copy residual count. */
 			/* Normal end. Copy residual count. */
 			rq->rescnt = irb->scsw.cmd.count;
 			rq->rescnt = irb->scsw.cmd.count;
+	} else if (irb->scsw.cmd.dstat & DEV_STAT_DEV_END) {
+		/* Interrupt without an outstanding request -> update all */
+		tp->update_flags = TTY_UPDATE_ALL;
+		tty3270_set_timer(tp, 1);
 	}
 	}
 	return RAW3270_IO_DONE;
 	return RAW3270_IO_DONE;
 }
 }

+ 123 - 42
drivers/s390/cio/chsc.c

@@ -21,6 +21,7 @@
 #include <asm/chsc.h>
 #include <asm/chsc.h>
 #include <asm/crw.h>
 #include <asm/crw.h>
 #include <asm/isc.h>
 #include <asm/isc.h>
+#include <asm/ebcdic.h>
 
 
 #include "css.h"
 #include "css.h"
 #include "cio.h"
 #include "cio.h"
@@ -272,36 +273,6 @@ static void s390_process_res_acc(struct chp_link *link)
 	css_schedule_reprobe();
 	css_schedule_reprobe();
 }
 }
 
 
-static int
-__get_chpid_from_lir(void *data)
-{
-	struct lir {
-		u8  iq;
-		u8  ic;
-		u16 sci;
-		/* incident-node descriptor */
-		u32 indesc[28];
-		/* attached-node descriptor */
-		u32 andesc[28];
-		/* incident-specific information */
-		u32 isinfo[28];
-	} __attribute__ ((packed)) *lir;
-
-	lir = data;
-	if (!(lir->iq&0x80))
-		/* NULL link incident record */
-		return -EINVAL;
-	if (!(lir->indesc[0]&0xc0000000))
-		/* node descriptor not valid */
-		return -EINVAL;
-	if (!(lir->indesc[0]&0x10000000))
-		/* don't handle device-type nodes - FIXME */
-		return -EINVAL;
-	/* Byte 3 contains the chpid. Could also be CTCA, but we don't care */
-
-	return (u16) (lir->indesc[0]&0x000000ff);
-}
-
 struct chsc_sei_nt0_area {
 struct chsc_sei_nt0_area {
 	u8  flags;
 	u8  flags;
 	u8  vf;				/* validity flags */
 	u8  vf;				/* validity flags */
@@ -341,22 +312,132 @@ struct chsc_sei {
 	} u;
 	} u;
 } __packed;
 } __packed;
 
 
+/*
+ * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands"
+ */
+
+#define ND_VALIDITY_VALID	0
+#define ND_VALIDITY_OUTDATED	1
+#define ND_VALIDITY_INVALID	2
+
+struct node_descriptor {
+	/* Flags. */
+	union {
+		struct {
+			u32 validity:3;
+			u32 reserved:5;
+		} __packed;
+		u8 byte0;
+	} __packed;
+
+	/* Node parameters. */
+	u32 params:24;
+
+	/* Node ID. */
+	char type[6];
+	char model[3];
+	char manufacturer[3];
+	char plant[2];
+	char seq[12];
+	u16 tag;
+} __packed;
+
+/*
+ * Link Incident Record as defined in SA22-7202, "ESCON I/O Interface"
+ */
+
+#define LIR_IQ_CLASS_INFO		0
+#define LIR_IQ_CLASS_DEGRADED		1
+#define LIR_IQ_CLASS_NOT_OPERATIONAL	2
+
+struct lir {
+	struct {
+		u32 null:1;
+		u32 reserved:3;
+		u32 class:2;
+		u32 reserved2:2;
+	} __packed iq;
+	u32 ic:8;
+	u32 reserved:16;
+	struct node_descriptor incident_node;
+	struct node_descriptor attached_node;
+	u8 reserved2[32];
+} __packed;
+
+#define PARAMS_LEN	10	/* PARAMS=xx,xxxxxx */
+#define NODEID_LEN	35	/* NODEID=tttttt/mdl,mmm.ppssssssssssss,xxxx */
+
+/* Copy EBCIDC text, convert to ASCII and optionally add delimiter. */
+static char *store_ebcdic(char *dest, const char *src, unsigned long len,
+			  char delim)
+{
+	memcpy(dest, src, len);
+	EBCASC(dest, len);
+
+	if (delim)
+		dest[len++] = delim;
+
+	return dest + len;
+}
+
+/* Format node ID and parameters for output in LIR log message. */
+static void format_node_data(char *params, char *id, struct node_descriptor *nd)
+{
+	memset(params, 0, PARAMS_LEN);
+	memset(id, 0, NODEID_LEN);
+
+	if (nd->validity != ND_VALIDITY_VALID) {
+		strncpy(params, "n/a", PARAMS_LEN - 1);
+		strncpy(id, "n/a", NODEID_LEN - 1);
+		return;
+	}
+
+	/* PARAMS=xx,xxxxxx */
+	snprintf(params, PARAMS_LEN, "%02x,%06x", nd->byte0, nd->params);
+	/* NODEID=tttttt/mdl,mmm.ppssssssssssss,xxxx */
+	id = store_ebcdic(id, nd->type, sizeof(nd->type), '/');
+	id = store_ebcdic(id, nd->model, sizeof(nd->model), ',');
+	id = store_ebcdic(id, nd->manufacturer, sizeof(nd->manufacturer), '.');
+	id = store_ebcdic(id, nd->plant, sizeof(nd->plant), 0);
+	id = store_ebcdic(id, nd->seq, sizeof(nd->seq), ',');
+	sprintf(id, "%04X", nd->tag);
+}
+
 static void chsc_process_sei_link_incident(struct chsc_sei_nt0_area *sei_area)
 static void chsc_process_sei_link_incident(struct chsc_sei_nt0_area *sei_area)
 {
 {
-	struct chp_id chpid;
-	int id;
+	struct lir *lir = (struct lir *) &sei_area->ccdf;
+	char iuparams[PARAMS_LEN], iunodeid[NODEID_LEN], auparams[PARAMS_LEN],
+	     aunodeid[NODEID_LEN];
 
 
-	CIO_CRW_EVENT(4, "chsc: link incident (rs=%02x, rs_id=%04x)\n",
-		      sei_area->rs, sei_area->rsid);
-	if (sei_area->rs != 4)
+	CIO_CRW_EVENT(4, "chsc: link incident (rs=%02x, rs_id=%04x, iq=%02x)\n",
+		      sei_area->rs, sei_area->rsid, sei_area->ccdf[0]);
+
+	/* Ignore NULL Link Incident Records. */
+	if (lir->iq.null)
 		return;
 		return;
-	id = __get_chpid_from_lir(sei_area->ccdf);
-	if (id < 0)
-		CIO_CRW_EVENT(4, "chsc: link incident - invalid LIR\n");
-	else {
-		chp_id_init(&chpid);
-		chpid.id = id;
-		chsc_chp_offline(chpid);
+
+	/* Inform user that a link requires maintenance actions because it has
+	 * become degraded or not operational. Note that this log message is
+	 * the primary intention behind a Link Incident Record. */
+
+	format_node_data(iuparams, iunodeid, &lir->incident_node);
+	format_node_data(auparams, aunodeid, &lir->attached_node);
+
+	switch (lir->iq.class) {
+	case LIR_IQ_CLASS_DEGRADED:
+		pr_warn("Link degraded: RS=%02x RSID=%04x IC=%02x "
+			"IUPARAMS=%s IUNODEID=%s AUPARAMS=%s AUNODEID=%s\n",
+			sei_area->rs, sei_area->rsid, lir->ic, iuparams,
+			iunodeid, auparams, aunodeid);
+		break;
+	case LIR_IQ_CLASS_NOT_OPERATIONAL:
+		pr_err("Link stopped: RS=%02x RSID=%04x IC=%02x "
+		       "IUPARAMS=%s IUNODEID=%s AUPARAMS=%s AUNODEID=%s\n",
+		       sei_area->rs, sei_area->rsid, lir->ic, iuparams,
+		       iunodeid, auparams, aunodeid);
+		break;
+	default:
+		break;
 	}
 	}
 }
 }
 
 

+ 1 - 1
drivers/s390/cio/device_ops.c

@@ -540,7 +540,7 @@ int ccw_device_stlck(struct ccw_device *cdev)
 	if (rc)
 	if (rc)
 		goto out_unlock;
 		goto out_unlock;
 	/* Perform operation. */
 	/* Perform operation. */
-	cdev->private->state = DEV_STATE_STEAL_LOCK,
+	cdev->private->state = DEV_STATE_STEAL_LOCK;
 	ccw_device_stlck_start(cdev, &data, &buffer[0], &buffer[32]);
 	ccw_device_stlck_start(cdev, &data, &buffer[0], &buffer[32]);
 	spin_unlock_irq(sch->lock);
 	spin_unlock_irq(sch->lock);
 	/* Wait for operation to finish. */
 	/* Wait for operation to finish. */

+ 1 - 2
drivers/s390/cio/eadm_sch.c

@@ -336,7 +336,6 @@ static int eadm_subchannel_sch_event(struct subchannel *sch, int process)
 {
 {
 	struct eadm_private *private;
 	struct eadm_private *private;
 	unsigned long flags;
 	unsigned long flags;
-	int ret = 0;
 
 
 	spin_lock_irqsave(sch->lock, flags);
 	spin_lock_irqsave(sch->lock, flags);
 	if (!device_is_registered(&sch->dev))
 	if (!device_is_registered(&sch->dev))
@@ -356,7 +355,7 @@ static int eadm_subchannel_sch_event(struct subchannel *sch, int process)
 out_unlock:
 out_unlock:
 	spin_unlock_irqrestore(sch->lock, flags);
 	spin_unlock_irqrestore(sch->lock, flags);
 
 
-	return ret;
+	return 0;
 }
 }
 
 
 static struct css_device_id eadm_subchannel_ids[] = {
 static struct css_device_id eadm_subchannel_ids[] = {

+ 1 - 1
drivers/s390/crypto/ap_bus.c

@@ -1372,7 +1372,7 @@ static int ap_probe_device_type(struct ap_device *ap_dev)
 
 
 	/* Wait for the test message to complete. */
 	/* Wait for the test message to complete. */
 	for (i = 0; i < 6; i++) {
 	for (i = 0; i < 6; i++) {
-		mdelay(300);
+		msleep(300);
 		status = __ap_recv(ap_dev->qid, &psmid, reply, 4096);
 		status = __ap_recv(ap_dev->qid, &psmid, reply, 4096);
 		if (status.response_code == AP_RESPONSE_NORMAL &&
 		if (status.response_code == AP_RESPONSE_NORMAL &&
 		    psmid == 0x0102030405060708ULL)
 		    psmid == 0x0102030405060708ULL)

+ 1 - 1
drivers/s390/crypto/zcrypt_pcixcc.c

@@ -182,7 +182,7 @@ static int zcrypt_pcixcc_mcl(struct ap_device *ap_dev)
 
 
 	/* Wait for the test message to complete. */
 	/* Wait for the test message to complete. */
 	for (i = 0; i < 6; i++) {
 	for (i = 0; i < 6; i++) {
-		mdelay(300);
+		msleep(300);
 		rc = ap_recv(ap_dev->qid, &psmid, reply, 4096);
 		rc = ap_recv(ap_dev->qid, &psmid, reply, 4096);
 		if (rc == 0 && psmid == 0x0102030405060708ULL)
 		if (rc == 0 && psmid == 0x0102030405060708ULL)
 			break;
 			break;

+ 1 - 4
drivers/s390/net/qeth_l2_main.c

@@ -390,10 +390,8 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev,
 	return rc;
 	return rc;
 }
 }
 
 
-static int qeth_l2_stop_card(struct qeth_card *card, int recovery_mode)
+static void qeth_l2_stop_card(struct qeth_card *card, int recovery_mode)
 {
 {
-	int rc = 0;
-
 	QETH_DBF_TEXT(SETUP , 2, "stopcard");
 	QETH_DBF_TEXT(SETUP , 2, "stopcard");
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
 
 
@@ -427,7 +425,6 @@ static int qeth_l2_stop_card(struct qeth_card *card, int recovery_mode)
 		qeth_clear_cmd_buffers(&card->read);
 		qeth_clear_cmd_buffers(&card->read);
 		qeth_clear_cmd_buffers(&card->write);
 		qeth_clear_cmd_buffers(&card->write);
 	}
 	}
-	return rc;
 }
 }
 
 
 static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
 static int qeth_l2_process_inbound_buffer(struct qeth_card *card,

+ 1 - 4
drivers/s390/net/qeth_l3_main.c

@@ -2158,10 +2158,8 @@ static struct qeth_card *qeth_l3_get_card_from_dev(struct net_device *dev)
 	return card ;
 	return card ;
 }
 }
 
 
-static int qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
+static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
 {
 {
-	int rc = 0;
-
 	QETH_DBF_TEXT(SETUP, 2, "stopcard");
 	QETH_DBF_TEXT(SETUP, 2, "stopcard");
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
 
 
@@ -2196,7 +2194,6 @@ static int qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
 		qeth_clear_cmd_buffers(&card->read);
 		qeth_clear_cmd_buffers(&card->read);
 		qeth_clear_cmd_buffers(&card->write);
 		qeth_clear_cmd_buffers(&card->write);
 	}
 	}
-	return rc;
 }
 }
 
 
 /*
 /*

+ 1 - 1
drivers/s390/scsi/zfcp_fsf.c

@@ -204,7 +204,7 @@ static void zfcp_fsf_status_read_link_down(struct zfcp_fsf_req *req)
 		break;
 		break;
 	case FSF_STATUS_READ_SUB_FIRMWARE_UPDATE:
 	case FSF_STATUS_READ_SUB_FIRMWARE_UPDATE:
 		zfcp_fsf_link_down_info_eval(req, NULL);
 		zfcp_fsf_link_down_info_eval(req, NULL);
-	};
+	}
 }
 }
 
 
 static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req)
 static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req)

+ 4 - 3
include/linux/cpufeature.h

@@ -11,6 +11,7 @@
 
 
 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE
 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE
 
 
+#include <linux/init.h>
 #include <linux/mod_devicetable.h>
 #include <linux/mod_devicetable.h>
 #include <asm/cpufeature.h>
 #include <asm/cpufeature.h>
 
 
@@ -43,16 +44,16 @@
  * For a list of legal values for 'feature', please consult the file
  * For a list of legal values for 'feature', please consult the file
  * 'asm/cpufeature.h' of your favorite architecture.
  * 'asm/cpufeature.h' of your favorite architecture.
  */
  */
-#define module_cpu_feature_match(x, __init)			\
+#define module_cpu_feature_match(x, __initfunc)			\
 static struct cpu_feature const cpu_feature_match_ ## x[] =	\
 static struct cpu_feature const cpu_feature_match_ ## x[] =	\
 	{ { .feature = cpu_feature(x) }, { } };			\
 	{ { .feature = cpu_feature(x) }, { } };			\
 MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x);		\
 MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x);		\
 								\
 								\
-static int cpu_feature_match_ ## x ## _init(void)		\
+static int __init cpu_feature_match_ ## x ## _init(void)	\
 {								\
 {								\
 	if (!cpu_have_feature(cpu_feature(x)))			\
 	if (!cpu_have_feature(cpu_feature(x)))			\
 		return -ENODEV;					\
 		return -ENODEV;					\
-	return __init();					\
+	return __initfunc();					\
 }								\
 }								\
 module_init(cpu_feature_match_ ## x ## _init)
 module_init(cpu_feature_match_ ## x ## _init)