Browse Source

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 updates from Martin Schwidefsky:
 "There are a couple of new things for s390 with this merge request:

   - a new scheduling domain "drawer" is added to reflect the unusual
     topology found on z13 machines.  Performance tests showed up to 8
     percent gain with the additional domain.

   - the new crc-32 checksum crypto module uses the vector-galois-field
     multiply and sum SIMD instruction to speed up crc-32 and crc-32c.

   - proper __ro_after_init support, this requires RO_AFTER_INIT_DATA in
     the generic vmlinux.lds linker script definitions.

   - kcov instrumentation support.  A prerequisite for that is the
     inline assembly basic block cleanup, which is the reason for the
     net/iucv/iucv.c change.

   - support for 2GB pages is added to the hugetlbfs backend.

  Then there are two removals:

   - the oprofile hardware sampling support is dead code and is removed.
     The oprofile user space uses the perf interface nowadays.

   - the ETR clock synchronization is removed, this has been superseeded
     be the STP clock synchronization.  And it always has been
     "interesting" code..

  And the usual bug fixes and cleanups"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (82 commits)
  s390/pci: Delete an unnecessary check before the function call "pci_dev_put"
  s390/smp: clean up a condition
  s390/cio/chp : Remove deprecated create_singlethread_workqueue
  s390/chsc: improve channel path descriptor determination
  s390/chsc: sanitize fmt check for chp_desc determination
  s390/cio: make fmt1 channel path descriptor optional
  s390/chsc: fix ioctl CHSC_INFO_CU command
  s390/cio/device_ops: fix kernel doc
  s390/cio: allow to reset channel measurement block
  s390/console: Make preferred console handling more consistent
  s390/mm: fix gmap tlb flush issues
  s390/mm: add support for 2GB hugepages
  s390: have unique symbol for __switch_to address
  s390/cpuinfo: show maximum thread id
  s390/ptrace: clarify bits in the per_struct
  s390: stack address vs thread_info
  s390: remove pointless load within __switch_to
  s390: enable kcov support
  s390/cpumf: use basic block for ecctr inline assembly
  s390/hypfs: use basic block for diag inline assembly
  ...
Linus Torvalds 9 years ago
parent
commit
015cd867e5
100 changed files with 2642 additions and 3883 deletions
  1. 32 8
      Documentation/cputopology.txt
  2. 0 2
      Documentation/kernel-parameters.txt
  3. 1 1
      Documentation/s390/s390dbf.txt
  4. 6 10
      arch/s390/Kconfig
  5. 2 0
      arch/s390/boot/compressed/Makefile
  6. 1 0
      arch/s390/configs/default_defconfig
  7. 1 0
      arch/s390/configs/gcov_defconfig
  8. 1 0
      arch/s390/configs/performance_defconfig
  9. 3 0
      arch/s390/crypto/Makefile
  10. 310 0
      arch/s390/crypto/crc32-vx.c
  11. 207 0
      arch/s390/crypto/crc32be-vx.S
  12. 268 0
      arch/s390/crypto/crc32le-vx.S
  13. 4 0
      arch/s390/defconfig
  14. 8 6
      arch/s390/hypfs/hypfs_diag.c
  15. 1 1
      arch/s390/hypfs/hypfs_vm.c
  16. 1 4
      arch/s390/include/asm/cache.h
  17. 1 1
      arch/s390/include/asm/cio.h
  18. 14 3
      arch/s390/include/asm/cpu_mf.h
  19. 1 1
      arch/s390/include/asm/diag.h
  20. 0 261
      arch/s390/include/asm/etr.h
  21. 1 1
      arch/s390/include/asm/fcx.h
  22. 75 0
      arch/s390/include/asm/fpu/api.h
  23. 10 0
      arch/s390/include/asm/fpu/types.h
  24. 4 1
      arch/s390/include/asm/hugetlb.h
  25. 5 5
      arch/s390/include/asm/ipl.h
  26. 2 5
      arch/s390/include/asm/irq.h
  27. 1 0
      arch/s390/include/asm/jump_label.h
  28. 2 2
      arch/s390/include/asm/kprobes.h
  29. 0 28
      arch/s390/include/asm/mathemu.h
  30. 1 1
      arch/s390/include/asm/mmu.h
  31. 5 10
      arch/s390/include/asm/mmu_context.h
  32. 5 3
      arch/s390/include/asm/page.h
  33. 0 12
      arch/s390/include/asm/perf_event.h
  34. 223 49
      arch/s390/include/asm/pgtable.h
  35. 16 1
      arch/s390/include/asm/processor.h
  36. 1 0
      arch/s390/include/asm/sections.h
  37. 4 0
      arch/s390/include/asm/setup.h
  38. 0 142
      arch/s390/include/asm/sfp-machine.h
  39. 0 67
      arch/s390/include/asm/sfp-util.h
  40. 14 3
      arch/s390/include/asm/sigp.h
  41. 51 0
      arch/s390/include/asm/stp.h
  42. 65 1
      arch/s390/include/asm/timex.h
  43. 11 20
      arch/s390/include/asm/tlbflush.h
  44. 4 0
      arch/s390/include/asm/topology.h
  45. 61 4
      arch/s390/include/asm/uaccess.h
  46. 3 3
      arch/s390/include/uapi/asm/ptrace.h
  47. 4 1
      arch/s390/kernel/Makefile
  48. 1 6
      arch/s390/kernel/cache.c
  49. 0 1
      arch/s390/kernel/dis.c
  50. 4 8
      arch/s390/kernel/dumpstack.c
  51. 22 0
      arch/s390/kernel/early.c
  52. 10 1
      arch/s390/kernel/entry.S
  53. 249 0
      arch/s390/kernel/fpu.c
  54. 14 11
      arch/s390/kernel/ipl.c
  55. 4 3
      arch/s390/kernel/irq.c
  56. 26 29
      arch/s390/kernel/machine_kexec.c
  57. 2 11
      arch/s390/kernel/nmi.c
  58. 0 5
      arch/s390/kernel/perf_cpum_sf.c
  59. 0 30
      arch/s390/kernel/perf_event.c
  60. 83 31
      arch/s390/kernel/processor.c
  61. 30 10
      arch/s390/kernel/setup.c
  62. 4 14
      arch/s390/kernel/smp.c
  63. 38 25
      arch/s390/kernel/sysinfo.c
  64. 54 999
      arch/s390/kernel/time.c
  65. 31 58
      arch/s390/kernel/topology.c
  66. 2 0
      arch/s390/kernel/vdso32/Makefile
  67. 2 0
      arch/s390/kernel/vdso64/Makefile
  68. 19 2
      arch/s390/kernel/vmlinux.lds.S
  69. 1 1
      arch/s390/kvm/kvm-s390.c
  70. 26 24
      arch/s390/lib/string.c
  71. 3 3
      arch/s390/lib/uaccess.c
  72. 1 1
      arch/s390/mm/dump_pagetables.c
  73. 1 1
      arch/s390/mm/fault.c
  74. 5 2
      arch/s390/mm/gmap.c
  75. 44 1
      arch/s390/mm/gup.c
  76. 90 39
      arch/s390/mm/hugetlbpage.c
  77. 6 7
      arch/s390/mm/init.c
  78. 9 4
      arch/s390/mm/page-states.c
  79. 233 34
      arch/s390/mm/pageattr.c
  80. 71 22
      arch/s390/mm/pgtable.c
  81. 33 40
      arch/s390/mm/vmem.c
  82. 20 5
      arch/s390/numa/mode_emu.c
  83. 0 1
      arch/s390/oprofile/Makefile
  84. 0 1178
      arch/s390/oprofile/hwsampler.c
  85. 0 63
      arch/s390/oprofile/hwsampler.h
  86. 0 489
      arch/s390/oprofile/init.c
  87. 0 21
      arch/s390/oprofile/op_counter.h
  88. 3 1
      arch/s390/pci/pci_dma.c
  89. 1 2
      arch/s390/pci/pci_event.c
  90. 11 1
      arch/s390/pci/pci_insn.c
  91. 13 0
      drivers/base/topology.c
  92. 13 0
      drivers/crypto/Kconfig
  93. 2 2
      drivers/s390/block/dasd_eckd.c
  94. 3 12
      drivers/s390/char/keyboard.c
  95. 2 1
      drivers/s390/char/sclp_con.c
  96. 1 1
      drivers/s390/char/sclp_config.c
  97. 1 1
      drivers/s390/char/zcore.c
  98. 9 13
      drivers/s390/cio/chp.c
  99. 1 1
      drivers/s390/cio/chp.h
  100. 14 11
      drivers/s390/cio/chsc.c

+ 32 - 8
Documentation/cputopology.txt

@@ -20,48 +20,70 @@ to /proc/cpuinfo output of some architectures:
 	identifier (rather than the kernel's).	The actual value is
 	identifier (rather than the kernel's).	The actual value is
 	architecture and platform dependent.
 	architecture and platform dependent.
 
 
-4) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
+4) /sys/devices/system/cpu/cpuX/topology/drawer_id:
+
+	the drawer ID of cpuX. Typically it is the hardware platform's
+	identifier (rather than the kernel's).	The actual value is
+	architecture and platform dependent.
+
+5) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
 
 
 	internal kernel map of cpuX's hardware threads within the same
 	internal kernel map of cpuX's hardware threads within the same
 	core as cpuX.
 	core as cpuX.
 
 
-5) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list:
+6) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list:
 
 
 	human-readable list of cpuX's hardware threads within the same
 	human-readable list of cpuX's hardware threads within the same
 	core as cpuX.
 	core as cpuX.
 
 
-6) /sys/devices/system/cpu/cpuX/topology/core_siblings:
+7) /sys/devices/system/cpu/cpuX/topology/core_siblings:
 
 
 	internal kernel map of cpuX's hardware threads within the same
 	internal kernel map of cpuX's hardware threads within the same
 	physical_package_id.
 	physical_package_id.
 
 
-7) /sys/devices/system/cpu/cpuX/topology/core_siblings_list:
+8) /sys/devices/system/cpu/cpuX/topology/core_siblings_list:
 
 
 	human-readable list of cpuX's hardware threads within the same
 	human-readable list of cpuX's hardware threads within the same
 	physical_package_id.
 	physical_package_id.
 
 
-8) /sys/devices/system/cpu/cpuX/topology/book_siblings:
+9) /sys/devices/system/cpu/cpuX/topology/book_siblings:
 
 
 	internal kernel map of cpuX's hardware threads within the same
 	internal kernel map of cpuX's hardware threads within the same
 	book_id.
 	book_id.
 
 
-9) /sys/devices/system/cpu/cpuX/topology/book_siblings_list:
+10) /sys/devices/system/cpu/cpuX/topology/book_siblings_list:
 
 
 	human-readable list of cpuX's hardware threads within the same
 	human-readable list of cpuX's hardware threads within the same
 	book_id.
 	book_id.
 
 
+11) /sys/devices/system/cpu/cpuX/topology/drawer_siblings:
+
+	internal kernel map of cpuX's hardware threads within the same
+	drawer_id.
+
+12) /sys/devices/system/cpu/cpuX/topology/drawer_siblings_list:
+
+	human-readable list of cpuX's hardware threads within the same
+	drawer_id.
+
 To implement it in an architecture-neutral way, a new source file,
 To implement it in an architecture-neutral way, a new source file,
-drivers/base/topology.c, is to export the 6 or 9 attributes. The three book
-related sysfs files will only be created if CONFIG_SCHED_BOOK is selected.
+drivers/base/topology.c, is to export the 6 to 12 attributes. The book
+and drawer related sysfs files will only be created if CONFIG_SCHED_BOOK
+and CONFIG_SCHED_DRAWER are selected.
+
+CONFIG_SCHED_BOOK and CONFIG_DRAWER are currently only used on s390, where
+they reflect the cpu and cache hierarchy.
 
 
 For an architecture to support this feature, it must define some of
 For an architecture to support this feature, it must define some of
 these macros in include/asm-XXX/topology.h:
 these macros in include/asm-XXX/topology.h:
 #define topology_physical_package_id(cpu)
 #define topology_physical_package_id(cpu)
 #define topology_core_id(cpu)
 #define topology_core_id(cpu)
 #define topology_book_id(cpu)
 #define topology_book_id(cpu)
+#define topology_drawer_id(cpu)
 #define topology_sibling_cpumask(cpu)
 #define topology_sibling_cpumask(cpu)
 #define topology_core_cpumask(cpu)
 #define topology_core_cpumask(cpu)
 #define topology_book_cpumask(cpu)
 #define topology_book_cpumask(cpu)
+#define topology_drawer_cpumask(cpu)
 
 
 The type of **_id macros is int.
 The type of **_id macros is int.
 The type of **_cpumask macros is (const) struct cpumask *. The latter
 The type of **_cpumask macros is (const) struct cpumask *. The latter
@@ -78,6 +100,8 @@ not defined by include/asm-XXX/topology.h:
 
 
 For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
 For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
 default definitions for topology_book_id() and topology_book_cpumask().
 default definitions for topology_book_id() and topology_book_cpumask().
+For architectures that don't support drawes (CONFIG_SCHED_DRAWER) there are
+no default definitions for topology_drawer_id() and topology_drawer_cpumask().
 
 
 Additionally, CPU topology information is provided under
 Additionally, CPU topology information is provided under
 /sys/devices/system/cpu and includes these files.  The internal
 /sys/devices/system/cpu and includes these files.  The internal

+ 0 - 2
Documentation/kernel-parameters.txt

@@ -2794,8 +2794,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			timer: [X86] Force use of architectural NMI
 			timer: [X86] Force use of architectural NMI
 				timer mode (see also oprofile.timer
 				timer mode (see also oprofile.timer
 				for generic hr timer mode)
 				for generic hr timer mode)
-				[s390] Force legacy basic mode sampling
-                                (report cpu_type "timer")
 
 
 	oops=panic	Always panic on oopses. Default is to just kill the
 	oops=panic	Always panic on oopses. Default is to just kill the
 			process, but there is a small probability of
 			process, but there is a small probability of

+ 1 - 1
Documentation/s390/s390dbf.txt

@@ -405,7 +405,7 @@ Example:
 
 
 > ls /sys/kernel/debug/s390dbf/dasd
 > ls /sys/kernel/debug/s390dbf/dasd
 flush  hex_ascii  level pages raw
 flush  hex_ascii  level pages raw
-> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort +1
+> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
 00 00974733272:680099 2 - 02 0006ad7e  07 ea 4a 90 | ....
 00 00974733272:680099 2 - 02 0006ad7e  07 ea 4a 90 | ....
 00 00974733272:682210 2 - 02 0006ade6  46 52 45 45 | FREE
 00 00974733272:682210 2 - 02 0006ade6  46 52 45 45 | FREE
 00 00974733272:682213 2 - 02 0006adf6  07 ea 4a 90 | ....
 00 00974733272:682213 2 - 02 0006adf6  07 ea 4a 90 | ....

+ 6 - 10
arch/s390/Kconfig

@@ -72,6 +72,7 @@ config S390
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_KCOV
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_INLINE_READ_LOCK
 	select ARCH_INLINE_READ_LOCK
@@ -163,6 +164,7 @@ config S390
 	select NO_BOOTMEM
 	select NO_BOOTMEM
 	select OLD_SIGACTION
 	select OLD_SIGACTION
 	select OLD_SIGSUSPEND3
 	select OLD_SIGSUSPEND3
+	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
 	select SYSCTL_EXCEPTION_TRACE
 	select TTY
 	select TTY
 	select VIRT_CPU_ACCOUNTING
 	select VIRT_CPU_ACCOUNTING
@@ -477,6 +479,9 @@ config SCHED_MC
 config SCHED_BOOK
 config SCHED_BOOK
 	def_bool n
 	def_bool n
 
 
+config SCHED_DRAWER
+	def_bool n
+
 config SCHED_TOPOLOGY
 config SCHED_TOPOLOGY
 	def_bool y
 	def_bool y
 	prompt "Topology scheduler support"
 	prompt "Topology scheduler support"
@@ -484,6 +489,7 @@ config SCHED_TOPOLOGY
 	select SCHED_SMT
 	select SCHED_SMT
 	select SCHED_MC
 	select SCHED_MC
 	select SCHED_BOOK
 	select SCHED_BOOK
+	select SCHED_DRAWER
 	help
 	help
 	  Topology scheduler support improves the CPU scheduler's decision
 	  Topology scheduler support improves the CPU scheduler's decision
 	  making when dealing with machines that have multi-threading,
 	  making when dealing with machines that have multi-threading,
@@ -605,16 +611,6 @@ config PCI_NR_FUNCTIONS
 	  This allows you to specify the maximum number of PCI functions which
 	  This allows you to specify the maximum number of PCI functions which
 	  this kernel will support.
 	  this kernel will support.
 
 
-config PCI_NR_MSI
-	int "Maximum number of MSI interrupts (64-32768)"
-	range 64 32768
-	default "256"
-	help
-	  This defines the number of virtual interrupts the kernel will
-	  provide for MSI interrupts. If you configure your system to have
-	  too few drivers will fail to allocate MSI interrupts for all
-	  PCI devices.
-
 source "drivers/pci/Kconfig"
 source "drivers/pci/Kconfig"
 
 
 endif	# PCI
 endif	# PCI

+ 2 - 0
arch/s390/boot/compressed/Makefile

@@ -4,6 +4,8 @@
 # create a compressed vmlinux image from the original vmlinux
 # create a compressed vmlinux image from the original vmlinux
 #
 #
 
 
+KCOV_INSTRUMENT := n
+
 targets	:= vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
 targets	:= vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
 targets += misc.o piggy.o sizes.h head.o
 targets += misc.o piggy.o sizes.h head.o

+ 1 - 0
arch/s390/configs/default_defconfig

@@ -678,6 +678,7 @@ CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_CRC32_S390=m
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
 CONFIG_X509_CERTIFICATE_PARSER=m

+ 1 - 0
arch/s390/configs/gcov_defconfig

@@ -616,6 +616,7 @@ CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_CRC32_S390=m
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
 CONFIG_X509_CERTIFICATE_PARSER=m

+ 1 - 0
arch/s390/configs/performance_defconfig

@@ -615,6 +615,7 @@ CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_CRC32_S390=m
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
 CONFIG_X509_CERTIFICATE_PARSER=m

+ 3 - 0
arch/s390/crypto/Makefile

@@ -9,3 +9,6 @@ obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
 obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
 obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
 obj-$(CONFIG_S390_PRNG) += prng.o
 obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
 obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
+obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
+
+crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o

+ 310 - 0
arch/s390/crypto/crc32-vx.c

@@ -0,0 +1,310 @@
+/*
+ * Crypto-API module for CRC-32 algorithms implemented with the
+ * z/Architecture Vector Extension Facility.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT	"crc32-vx"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+#include <asm/fpu/api.h>
+
+
+#define CRC32_BLOCK_SIZE	1
+#define CRC32_DIGEST_SIZE	4
+
+#define VX_MIN_LEN		64
+#define VX_ALIGNMENT		16L
+#define VX_ALIGN_MASK		(VX_ALIGNMENT - 1)
+
+struct crc_ctx {
+	u32 key;
+};
+
+struct crc_desc_ctx {
+	u32 crc;
+};
+
+/* Prototypes for functions in assembly files */
+u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+
+/*
+ * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
+ *
+ * Creates a function to perform a particular CRC-32 computation. Depending
+ * on the message buffer, the hardware-accelerated or software implementation
+ * is used.   Note that the message buffer is aligned to improve fetch
+ * operations of VECTOR LOAD MULTIPLE instructions.
+ *
+ */
+#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw)		    \
+	static u32 __pure ___fname(u32 crc,				    \
+				unsigned char const *data, size_t datalen)  \
+	{								    \
+		struct kernel_fpu vxstate;				    \
+		unsigned long prealign, aligned, remaining;		    \
+									    \
+		if ((unsigned long)data & VX_ALIGN_MASK) {		    \
+			prealign = VX_ALIGNMENT -			    \
+				  ((unsigned long)data & VX_ALIGN_MASK);    \
+			datalen -= prealign;				    \
+			crc = ___crc32_sw(crc, data, prealign);		    \
+			data = (void *)((unsigned long)data + prealign);    \
+		}							    \
+									    \
+		if (datalen < VX_MIN_LEN)				    \
+			return ___crc32_sw(crc, data, datalen);		    \
+									    \
+		aligned = datalen & ~VX_ALIGN_MASK;			    \
+		remaining = datalen & VX_ALIGN_MASK;			    \
+									    \
+		kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW);		    \
+		crc = ___crc32_vx(crc, data, aligned);			    \
+		kernel_fpu_end(&vxstate);				    \
+									    \
+		if (remaining)						    \
+			crc = ___crc32_sw(crc, data + aligned, remaining);  \
+									    \
+		return crc;						    \
+	}
+
+DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le)
+DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be)
+DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le)
+
+
+static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm)
+{
+	struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = 0;
+	return 0;
+}
+
+static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm)
+{
+	struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = ~0;
+	return 0;
+}
+
+static int crc32_vx_init(struct shash_desc *desc)
+{
+	struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = mctx->key;
+	return 0;
+}
+
+static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
+			   unsigned int newkeylen)
+{
+	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (newkeylen != sizeof(mctx->key)) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	mctx->key = le32_to_cpu(*(__le32 *)newkey);
+	return 0;
+}
+
+static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
+			     unsigned int newkeylen)
+{
+	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (newkeylen != sizeof(mctx->key)) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	mctx->key = be32_to_cpu(*(__be32 *)newkey);
+	return 0;
+}
+
+static int crc32le_vx_final(struct shash_desc *desc, u8 *out)
+{
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__le32 *)out = cpu_to_le32p(&ctx->crc);
+	return 0;
+}
+
+static int crc32be_vx_final(struct shash_desc *desc, u8 *out)
+{
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__be32 *)out = cpu_to_be32p(&ctx->crc);
+	return 0;
+}
+
+static int crc32c_vx_final(struct shash_desc *desc, u8 *out)
+{
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	/*
+	 * Perform a final XOR with 0xFFFFFFFF to be in sync
+	 * with the generic crc32c shash implementation.
+	 */
+	*(__le32 *)out = ~cpu_to_le32p(&ctx->crc);
+	return 0;
+}
+
+static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+			      u8 *out)
+{
+	*(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len));
+	return 0;
+}
+
+static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+			      u8 *out)
+{
+	*(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len));
+	return 0;
+}
+
+static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+			     u8 *out)
+{
+	/*
+	 * Perform a final XOR with 0xFFFFFFFF to be in sync
+	 * with the generic crc32c shash implementation.
+	 */
+	*(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len));
+	return 0;
+}
+
+
+#define CRC32_VX_FINUP(alg, func)					      \
+	static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data,  \
+				   unsigned int datalen, u8 *out)	      \
+	{								      \
+		return __ ## alg ## _vx_finup(shash_desc_ctx(desc),	      \
+					      data, datalen, out);	      \
+	}
+
+CRC32_VX_FINUP(crc32le, crc32_le_vx)
+CRC32_VX_FINUP(crc32be, crc32_be_vx)
+CRC32_VX_FINUP(crc32c, crc32c_le_vx)
+
+#define CRC32_VX_DIGEST(alg, func)					      \
+	static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \
+				     unsigned int len, u8 *out)		      \
+	{								      \
+		return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm),    \
+					      data, len, out);		      \
+	}
+
+CRC32_VX_DIGEST(crc32le, crc32_le_vx)
+CRC32_VX_DIGEST(crc32be, crc32_be_vx)
+CRC32_VX_DIGEST(crc32c, crc32c_le_vx)
+
+#define CRC32_VX_UPDATE(alg, func)					      \
+	static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \
+				     unsigned int datalen)		      \
+	{								      \
+		struct crc_desc_ctx *ctx = shash_desc_ctx(desc);	      \
+		ctx->crc = func(ctx->crc, data, datalen);		      \
+		return 0;						      \
+	}
+
+CRC32_VX_UPDATE(crc32le, crc32_le_vx)
+CRC32_VX_UPDATE(crc32be, crc32_be_vx)
+CRC32_VX_UPDATE(crc32c, crc32c_le_vx)
+
+
+static struct shash_alg crc32_vx_algs[] = {
+	/* CRC-32 LE */
+	{
+		.init		=	crc32_vx_init,
+		.setkey		=	crc32_vx_setkey,
+		.update		=	crc32le_vx_update,
+		.final		=	crc32le_vx_final,
+		.finup		=	crc32le_vx_finup,
+		.digest		=	crc32le_vx_digest,
+		.descsize	=	sizeof(struct crc_desc_ctx),
+		.digestsize	=	CRC32_DIGEST_SIZE,
+		.base		=	{
+			.cra_name	 = "crc32",
+			.cra_driver_name = "crc32-vx",
+			.cra_priority	 = 200,
+			.cra_blocksize	 = CRC32_BLOCK_SIZE,
+			.cra_ctxsize	 = sizeof(struct crc_ctx),
+			.cra_module	 = THIS_MODULE,
+			.cra_init	 = crc32_vx_cra_init_zero,
+		},
+	},
+	/* CRC-32 BE */
+	{
+		.init		=	crc32_vx_init,
+		.setkey		=	crc32be_vx_setkey,
+		.update		=	crc32be_vx_update,
+		.final		=	crc32be_vx_final,
+		.finup		=	crc32be_vx_finup,
+		.digest		=	crc32be_vx_digest,
+		.descsize	=	sizeof(struct crc_desc_ctx),
+		.digestsize	=	CRC32_DIGEST_SIZE,
+		.base		=	{
+			.cra_name	 = "crc32be",
+			.cra_driver_name = "crc32be-vx",
+			.cra_priority	 = 200,
+			.cra_blocksize	 = CRC32_BLOCK_SIZE,
+			.cra_ctxsize	 = sizeof(struct crc_ctx),
+			.cra_module	 = THIS_MODULE,
+			.cra_init	 = crc32_vx_cra_init_zero,
+		},
+	},
+	/* CRC-32C LE */
+	{
+		.init		=	crc32_vx_init,
+		.setkey		=	crc32_vx_setkey,
+		.update		=	crc32c_vx_update,
+		.final		=	crc32c_vx_final,
+		.finup		=	crc32c_vx_finup,
+		.digest		=	crc32c_vx_digest,
+		.descsize	=	sizeof(struct crc_desc_ctx),
+		.digestsize	=	CRC32_DIGEST_SIZE,
+		.base		=	{
+			.cra_name	 = "crc32c",
+			.cra_driver_name = "crc32c-vx",
+			.cra_priority	 = 200,
+			.cra_blocksize	 = CRC32_BLOCK_SIZE,
+			.cra_ctxsize	 = sizeof(struct crc_ctx),
+			.cra_module	 = THIS_MODULE,
+			.cra_init	 = crc32_vx_cra_init_invert,
+		},
+	},
+};
+
+
+static int __init crc_vx_mod_init(void)
+{
+	return crypto_register_shashes(crc32_vx_algs,
+				       ARRAY_SIZE(crc32_vx_algs));
+}
+
+static void __exit crc_vx_mod_exit(void)
+{
+	crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
+}
+
+module_cpu_feature_match(VXRS, crc_vx_mod_init);
+module_exit(crc_vx_mod_exit);
+
+MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS_CRYPTO("crc32");
+MODULE_ALIAS_CRYPTO("crc32-vx");
+MODULE_ALIAS_CRYPTO("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c-vx");

+ 207 - 0
arch/s390/crypto/crc32be-vx.S

@@ -0,0 +1,207 @@
+/*
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
+ *
+ * Use the z/Architecture Vector Extension Facility to accelerate the
+ * computing of CRC-32 checksums.
+ *
+ * This CRC-32 implementation algorithm processes the most-significant
+ * bit first (BE).
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/vx-insn.h>
+
+/* Vector register range containing CRC-32 constants */
+#define CONST_R1R2		%v9
+#define CONST_R3R4		%v10
+#define CONST_R5		%v11
+#define CONST_R6		%v12
+#define CONST_RU_POLY		%v13
+#define CONST_CRC_POLY		%v14
+
+.data
+.align 8
+
+/*
+ * The CRC-32 constant block contains reduction constants to fold and
+ * process particular chunks of the input data stream in parallel.
+ *
+ * For the CRC-32 variants, the constants are precomputed according to
+ * these defintions:
+ *
+ *	R1 = x4*128+64 mod P(x)
+ *	R2 = x4*128    mod P(x)
+ *	R3 = x128+64   mod P(x)
+ *	R4 = x128      mod P(x)
+ *	R5 = x96       mod P(x)
+ *	R6 = x64       mod P(x)
+ *
+ *	Barret reduction constant, u, is defined as floor(x**64 / P(x)).
+ *
+ *	where P(x) is the polynomial in the normal domain and the P'(x) is the
+ *	polynomial in the reversed (bitreflected) domain.
+ *
+ * Note that the constant definitions below are extended in order to compute
+ * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
+ * The righmost doubleword can be 0 to prevent contribution to the result or
+ * can be multiplied by 1 to perform an XOR without the need for a separate
+ * VECTOR EXCLUSIVE OR instruction.
+ *
+ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
+ *
+ *	P(x)  = 0x04C11DB7
+ *	P'(x) = 0xEDB88320
+ */
+
+.Lconstants_CRC_32_BE:
+	.quad		0x08833794c, 0x0e6228b11	# R1, R2
+	.quad		0x0c5b9cd4c, 0x0e8a45605	# R3, R4
+	.quad		0x0f200aa66, 1 << 32		# R5, x32
+	.quad		0x0490d678d, 1			# R6, 1
+	.quad		0x104d101df, 0			# u
+	.quad		0x104C11DB7, 0			# P(x)
+
+.previous
+
+.text
+/*
+ * The CRC-32 function(s) use these calling conventions:
+ *
+ * Parameters:
+ *
+ *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
+ *	%r3:	Input buffer pointer, performance might be improved if the
+ *		buffer is on a doubleword boundary.
+ *	%r4:	Length of the buffer, must be 64 bytes or greater.
+ *
+ * Register usage:
+ *
+ *	%r5:	CRC-32 constant pool base pointer.
+ *	V0:	Initial CRC value and intermediate constants and results.
+ *	V1..V4:	Data for CRC computation.
+ *	V5..V8:	Next data chunks that are fetched from the input buffer.
+ *
+ *	V9..V14: CRC-32 constants.
+ */
+ENTRY(crc32_be_vgfm_16)
+	/* Load CRC-32 constants */
+	larl	%r5,.Lconstants_CRC_32_BE
+	VLM	CONST_R1R2,CONST_CRC_POLY,0,%r5
+
+	/* Load the initial CRC value into the leftmost word of V0. */
+	VZERO	%v0
+	VLVGF	%v0,%r2,0
+
+	/* Load a 64-byte data chunk and XOR with CRC */
+	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
+	VX	%v1,%v0,%v1		/* V1 ^= CRC */
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	/* Check remaining buffer size and jump to proper folding method */
+	cghi	%r4,64
+	jl	.Lless_than_64bytes
+
+.Lfold_64bytes_loop:
+	/* Load the next 64-byte data chunk into V5 to V8 */
+	VLM	%v5,%v8,0,%r3
+
+	/*
+	 * Perform a GF(2) multiplication of the doublewords in V1 with
+	 * the reduction constants in V0.  The intermediate result is
+	 * then folded (accumulated) with the next data chunk in V5 and
+	 * stored in V1.  Repeat this step for the register contents
+	 * in V2, V3, and V4 respectively.
+	 */
+	VGFMAG	%v1,CONST_R1R2,%v1,%v5
+	VGFMAG	%v2,CONST_R1R2,%v2,%v6
+	VGFMAG	%v3,CONST_R1R2,%v3,%v7
+	VGFMAG	%v4,CONST_R1R2,%v4,%v8
+
+	/* Adjust buffer pointer and length for next loop */
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	cghi	%r4,64
+	jnl	.Lfold_64bytes_loop
+
+.Lless_than_64bytes:
+	/* Fold V1 to V4 into a single 128-bit value in V1 */
+	VGFMAG	%v1,CONST_R3R4,%v1,%v2
+	VGFMAG	%v1,CONST_R3R4,%v1,%v3
+	VGFMAG	%v1,CONST_R3R4,%v1,%v4
+
+	/* Check whether to continue with 64-bit folding */
+	cghi	%r4,16
+	jl	.Lfinal_fold
+
+.Lfold_16bytes_loop:
+
+	VL	%v2,0,,%r3		/* Load next data chunk */
+	VGFMAG	%v1,CONST_R3R4,%v1,%v2	/* Fold next data chunk */
+
+	/* Adjust buffer pointer and size for folding next data chunk */
+	aghi	%r3,16
+	aghi	%r4,-16
+
+	/* Process remaining data chunks */
+	cghi	%r4,16
+	jnl	.Lfold_16bytes_loop
+
+.Lfinal_fold:
+	/*
+	 * The R5 constant is used to fold a 128-bit value into an 96-bit value
+	 * that is XORed with the next 96-bit input data chunk.  To use a single
+	 * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to
+	 * form an intermediate 96-bit value (with appended zeros) which is then
+	 * XORed with the intermediate reduction result.
+	 */
+	VGFMG	%v1,CONST_R5,%v1
+
+	/*
+	 * Further reduce the remaining 96-bit value to a 64-bit value using a
+	 * single VGFMG, the rightmost doubleword is multiplied with 0x1. The
+	 * intermediate result is then XORed with the product of the leftmost
+	 * doubleword with R6.	The result is a 64-bit value and is subject to
+	 * the Barret reduction.
+	 */
+	VGFMG	%v1,CONST_R6,%v1
+
+	/*
+	 * The input values to the Barret reduction are the degree-63 polynomial
+	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
+	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
+	 * P(x).
+	 *
+	 * The Barret reduction algorithm is defined as:
+	 *
+	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
+	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
+	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
+	 *
+	 * Note: To compensate the division by x^32, use the vector unpack
+	 * instruction to move the leftmost word into the leftmost doubleword
+	 * of the vector register.  The rightmost doubleword is multiplied
+	 * with zero to not contribute to the intermedate results.
+	 */
+
+	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
+	VUPLLF	%v2,%v1
+	VGFMG	%v2,CONST_RU_POLY,%v2
+
+	/*
+	 * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
+	 * V2 and XOR the intermediate result, T2(x),  with the value in V1.
+	 * The final result is in the rightmost word of V2.
+	 */
+	VUPLLF	%v2,%v2
+	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
+
+.Ldone:
+	VLGVF	%r2,%v2,3
+	br	%r14
+
+.previous

+ 268 - 0
arch/s390/crypto/crc32le-vx.S

@@ -0,0 +1,268 @@
+/*
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
+ *
+ * Use the z/Architecture Vector Extension Facility to accelerate the
+ * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet
+ * and Castagnoli.
+ *
+ * This CRC-32 implementation algorithm is bitreflected and processes
+ * the least-significant bit first (Little-Endian).
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/vx-insn.h>
+
+/* Vector register range containing CRC-32 constants */
+#define CONST_PERM_LE2BE	%v9
+#define CONST_R2R1		%v10
+#define CONST_R4R3		%v11
+#define CONST_R5		%v12
+#define CONST_RU_POLY		%v13
+#define CONST_CRC_POLY		%v14
+
+.data
+.align 8
+
+/*
+ * The CRC-32 constant block contains reduction constants to fold and
+ * process particular chunks of the input data stream in parallel.
+ *
+ * For the CRC-32 variants, the constants are precomputed according to
+ * these definitions:
+ *
+ *	R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
+ *	R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
+ *	R3 = [(x128+32 mod P'(x) << 32)]'   << 1
+ *	R4 = [(x128-32 mod P'(x) << 32)]'   << 1
+ *	R5 = [(x64 mod P'(x) << 32)]'	    << 1
+ *	R6 = [(x32 mod P'(x) << 32)]'	    << 1
+ *
+ *	The bitreflected Barret reduction constant, u', is defined as
+ *	the bit reversal of floor(x**64 / P(x)).
+ *
+ *	where P(x) is the polynomial in the normal domain and the P'(x) is the
+ *	polynomial in the reversed (bitreflected) domain.
+ *
+ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
+ *
+ *	P(x)  = 0x04C11DB7
+ *	P'(x) = 0xEDB88320
+ *
+ * CRC-32C (Castagnoli) polynomials:
+ *
+ *	P(x)  = 0x1EDC6F41
+ *	P'(x) = 0x82F63B78
+ */
+
+.Lconstants_CRC_32_LE:
+	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
+	.quad		0x1c6e41596, 0x154442bd4		# R2, R1
+	.quad		0x0ccaa009e, 0x1751997d0		# R4, R3
+	.octa		0x163cd6124				# R5
+	.octa		0x1F7011641				# u'
+	.octa		0x1DB710641				# P'(x) << 1
+
+.Lconstants_CRC_32C_LE:
+	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
+	.quad		0x09e4addf8, 0x740eef02			# R2, R1
+	.quad		0x14cd00bd6, 0xf20c0dfe			# R4, R3
+	.octa		0x0dd45aab8				# R5
+	.octa		0x0dea713f1				# u'
+	.octa		0x105ec76f0				# P'(x) << 1
+
+.previous
+
+
+.text
+
+/*
+ * The CRC-32 functions use these calling conventions:
+ *
+ * Parameters:
+ *
+ *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
+ *	%r3:	Input buffer pointer, performance might be improved if the
+ *		buffer is on a doubleword boundary.
+ *	%r4:	Length of the buffer, must be 64 bytes or greater.
+ *
+ * Register usage:
+ *
+ *	%r5:	CRC-32 constant pool base pointer.
+ *	V0:	Initial CRC value and intermediate constants and results.
+ *	V1..V4:	Data for CRC computation.
+ *	V5..V8:	Next data chunks that are fetched from the input buffer.
+ *	V9:	Constant for BE->LE conversion and shift operations
+ *
+ *	V10..V14: CRC-32 constants.
+ */
+
+ENTRY(crc32_le_vgfm_16)
+	larl	%r5,.Lconstants_CRC_32_LE
+	j	crc32_le_vgfm_generic
+
+ENTRY(crc32c_le_vgfm_16)
+	larl	%r5,.Lconstants_CRC_32C_LE
+	j	crc32_le_vgfm_generic
+
+
+crc32_le_vgfm_generic:
+	/* Load CRC-32 constants */
+	VLM	CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
+
+	/*
+	 * Load the initial CRC value.
+	 *
+	 * The CRC value is loaded into the rightmost word of the
+	 * vector register and is later XORed with the LSB portion
+	 * of the loaded input data.
+	 */
+	VZERO	%v0			/* Clear V0 */
+	VLVGF	%v0,%r2,3		/* Load CRC into rightmost word */
+
+	/* Load a 64-byte data chunk and XOR with CRC */
+	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
+	VPERM	%v1,%v1,%v1,CONST_PERM_LE2BE
+	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
+	VPERM	%v3,%v3,%v3,CONST_PERM_LE2BE
+	VPERM	%v4,%v4,%v4,CONST_PERM_LE2BE
+
+	VX	%v1,%v0,%v1		/* V1 ^= CRC */
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	cghi	%r4,64
+	jl	.Lless_than_64bytes
+
+.Lfold_64bytes_loop:
+	/* Load the next 64-byte data chunk into V5 to V8 */
+	VLM	%v5,%v8,0,%r3
+	VPERM	%v5,%v5,%v5,CONST_PERM_LE2BE
+	VPERM	%v6,%v6,%v6,CONST_PERM_LE2BE
+	VPERM	%v7,%v7,%v7,CONST_PERM_LE2BE
+	VPERM	%v8,%v8,%v8,CONST_PERM_LE2BE
+
+	/*
+	 * Perform a GF(2) multiplication of the doublewords in V1 with
+	 * the R1 and R2 reduction constants in V0.  The intermediate result
+	 * is then folded (accumulated) with the next data chunk in V5 and
+	 * stored in V1. Repeat this step for the register contents
+	 * in V2, V3, and V4 respectively.
+	 */
+	VGFMAG	%v1,CONST_R2R1,%v1,%v5
+	VGFMAG	%v2,CONST_R2R1,%v2,%v6
+	VGFMAG	%v3,CONST_R2R1,%v3,%v7
+	VGFMAG	%v4,CONST_R2R1,%v4,%v8
+
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	cghi	%r4,64
+	jnl	.Lfold_64bytes_loop
+
+.Lless_than_64bytes:
+	/*
+	 * Fold V1 to V4 into a single 128-bit value in V1.  Multiply V1 with R3
+	 * and R4 and accumulating the next 128-bit chunk until a single 128-bit
+	 * value remains.
+	 */
+	VGFMAG	%v1,CONST_R4R3,%v1,%v2
+	VGFMAG	%v1,CONST_R4R3,%v1,%v3
+	VGFMAG	%v1,CONST_R4R3,%v1,%v4
+
+	cghi	%r4,16
+	jl	.Lfinal_fold
+
+.Lfold_16bytes_loop:
+
+	VL	%v2,0,,%r3		/* Load next data chunk */
+	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
+	VGFMAG	%v1,CONST_R4R3,%v1,%v2	/* Fold next data chunk */
+
+	aghi	%r3,16
+	aghi	%r4,-16
+
+	cghi	%r4,16
+	jnl	.Lfold_16bytes_loop
+
+.Lfinal_fold:
+	/*
+	 * Set up a vector register for byte shifts.  The shift value must
+	 * be loaded in bits 1-4 in byte element 7 of a vector register.
+	 * Shift by 8 bytes: 0x40
+	 * Shift by 4 bytes: 0x20
+	 */
+	VLEIB	%v9,0x40,7
+
+	/*
+	 * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
+	 * to move R4 into the rightmost doubleword and set the leftmost
+	 * doubleword to 0x1.
+	 */
+	VSRLB	%v0,CONST_R4R3,%v9
+	VLEIG	%v0,1,0
+
+	/*
+	 * Compute GF(2) product of V1 and V0.	The rightmost doubleword
+	 * of V1 is multiplied with R4.  The leftmost doubleword of V1 is
+	 * multiplied by 0x1 and is then XORed with rightmost product.
+	 * Implicitly, the intermediate leftmost product becomes padded
+	 */
+	VGFMG	%v1,%v0,%v1
+
+	/*
+	 * Now do the final 32-bit fold by multiplying the rightmost word
+	 * in V1 with R5 and XOR the result with the remaining bits in V1.
+	 *
+	 * To achieve this by a single VGFMAG, right shift V1 by a word
+	 * and store the result in V2 which is then accumulated.  Use the
+	 * vector unpack instruction to load the rightmost half of the
+	 * doubleword into the rightmost doubleword element of V1; the other
+	 * half is loaded in the leftmost doubleword.
+	 * The vector register with CONST_R5 contains the R5 constant in the
+	 * rightmost doubleword and the leftmost doubleword is zero to ignore
+	 * the leftmost product of V1.
+	 */
+	VLEIB	%v9,0x20,7		  /* Shift by words */
+	VSRLB	%v2,%v1,%v9		  /* Store remaining bits in V2 */
+	VUPLLF	%v1,%v1			  /* Split rightmost doubleword */
+	VGFMAG	%v1,CONST_R5,%v1,%v2	  /* V1 = (V1 * R5) XOR V2 */
+
+	/*
+	 * Apply a Barret reduction to compute the final 32-bit CRC value.
+	 *
+	 * The input values to the Barret reduction are the degree-63 polynomial
+	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
+	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
+	 * P(x).
+	 *
+	 * The Barret reduction algorithm is defined as:
+	 *
+	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
+	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
+	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
+	 *
+	 *  Note: The leftmost doubleword of vector register containing
+	 *  CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
+	 *  is zero and does not contribute to the final result.
+	 */
+
+	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
+	VUPLLF	%v2,%v1
+	VGFMG	%v2,CONST_RU_POLY,%v2
+
+	/*
+	 * Compute the GF(2) product of the CRC polynomial with T1(x) in
+	 * V2 and XOR the intermediate result, T2(x), with the value in V1.
+	 * The final result is stored in word element 2 of V2.
+	 */
+	VUPLLF	%v2,%v2
+	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
+
+.Ldone:
+	VLGVF	%r2,%v2,2
+	br	%r14
+
+.previous

+ 4 - 0
arch/s390/defconfig

@@ -225,12 +225,16 @@ CONFIG_CRYPTO_DEFLATE=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
 CONFIG_CRYPTO_LZ4HC=m
 CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_ZCRYPT=m
 CONFIG_ZCRYPT=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_CRC32_S390=m
 CONFIG_CRC7=m
 CONFIG_CRC7=m
 # CONFIG_XZ_DEC_X86 is not set
 # CONFIG_XZ_DEC_X86 is not set
 # CONFIG_XZ_DEC_POWERPC is not set
 # CONFIG_XZ_DEC_POWERPC is not set

+ 8 - 6
arch/s390/hypfs/hypfs_diag.c

@@ -337,25 +337,27 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
 
 
 /* Diagnose 204 functions */
 /* Diagnose 204 functions */
 
 
-static inline int __diag204(unsigned long subcode, unsigned long size, void *addr)
+static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
 {
 {
-	register unsigned long _subcode asm("0") = subcode;
+	register unsigned long _subcode asm("0") = *subcode;
 	register unsigned long _size asm("1") = size;
 	register unsigned long _size asm("1") = size;
 
 
 	asm volatile(
 	asm volatile(
 		"	diag	%2,%0,0x204\n"
 		"	diag	%2,%0,0x204\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		EX_TABLE(0b,0b)
 		: "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
 		: "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
-	if (_subcode)
-		return -1;
+	*subcode = _subcode;
 	return _size;
 	return _size;
 }
 }
 
 
 static int diag204(unsigned long subcode, unsigned long size, void *addr)
 static int diag204(unsigned long subcode, unsigned long size, void *addr)
 {
 {
 	diag_stat_inc(DIAG_STAT_X204);
 	diag_stat_inc(DIAG_STAT_X204);
-	return __diag204(subcode, size, addr);
+	size = __diag204(&subcode, size, addr);
+	if (subcode)
+		return -1;
+	return size;
 }
 }
 
 
 /*
 /*

+ 1 - 1
arch/s390/hypfs/hypfs_vm.c

@@ -70,7 +70,7 @@ static int diag2fc(int size, char* query, void *addr)
 	diag_stat_inc(DIAG_STAT_X2FC);
 	diag_stat_inc(DIAG_STAT_X2FC);
 	asm volatile(
 	asm volatile(
 		"	diag    %0,%1,0x2fc\n"
 		"	diag    %0,%1,0x2fc\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		EX_TABLE(0b,0b)
 		: "=d" (residual_cnt), "+d" (rc) : "0" (&parm_list) : "memory");
 		: "=d" (residual_cnt), "+d" (rc) : "0" (&parm_list) : "memory");
 
 

+ 1 - 4
arch/s390/include/asm/cache.h

@@ -13,9 +13,6 @@
 #define L1_CACHE_SHIFT     8
 #define L1_CACHE_SHIFT     8
 #define NET_SKB_PAD	   32
 #define NET_SKB_PAD	   32
 
 
-#define __read_mostly __attribute__((__section__(".data..read_mostly")))
-
-/* Read-only memory is marked before mark_rodata_ro() is called. */
-#define __ro_after_init __read_mostly
+#define __read_mostly __section(.data..read_mostly)
 
 
 #endif
 #endif

+ 1 - 1
arch/s390/include/asm/cio.h

@@ -320,7 +320,7 @@ struct cio_iplinfo {
 extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo);
 extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo);
 
 
 /* Function from drivers/s390/cio/chsc.c */
 /* Function from drivers/s390/cio/chsc.c */
-int chsc_sstpc(void *page, unsigned int op, u16 ctrl);
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
 int chsc_sstpi(void *page, void *result, size_t size);
 int chsc_sstpi(void *page, void *result, size_t size);
 
 
 #endif
 #endif

+ 14 - 3
arch/s390/include/asm/cpu_mf.h

@@ -169,16 +169,27 @@ static inline int lcctl(u64 ctl)
 }
 }
 
 
 /* Extract CPU counter */
 /* Extract CPU counter */
-static inline int ecctr(u64 ctr, u64 *val)
+static inline int __ecctr(u64 ctr, u64 *content)
 {
 {
-	register u64 content asm("4") = 0;
+	register u64 _content asm("4") = 0;
 	int cc;
 	int cc;
 
 
 	asm volatile (
 	asm volatile (
 		"	.insn	rre,0xb2e40000,%0,%2\n"
 		"	.insn	rre,0xb2e40000,%0,%2\n"
 		"	ipm	%1\n"
 		"	ipm	%1\n"
 		"	srl	%1,28\n"
 		"	srl	%1,28\n"
-		: "=d" (content), "=d" (cc) : "d" (ctr) : "cc");
+		: "=d" (_content), "=d" (cc) : "d" (ctr) : "cc");
+	*content = _content;
+	return cc;
+}
+
+/* Extract CPU counter */
+static inline int ecctr(u64 ctr, u64 *val)
+{
+	u64 content;
+	int cc;
+
+	cc = __ecctr(ctr, &content);
 	if (!cc)
 	if (!cc)
 		*val = content;
 		*val = content;
 	return cc;
 	return cc;

+ 1 - 1
arch/s390/include/asm/diag.h

@@ -49,7 +49,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
 	diag_stat_inc(DIAG_STAT_X010);
 	diag_stat_inc(DIAG_STAT_X010);
 	asm volatile(
 	asm volatile(
 		"0:	diag	%0,%1,0x10\n"
 		"0:	diag	%0,%1,0x10\n"
-		"1:\n"
+		"1:	nopr	%%r7\n"
 		EX_TABLE(0b, 1b)
 		EX_TABLE(0b, 1b)
 		EX_TABLE(1b, 1b)
 		EX_TABLE(1b, 1b)
 		: : "a" (start_addr), "a" (end_addr));
 		: : "a" (start_addr), "a" (end_addr));

+ 0 - 261
arch/s390/include/asm/etr.h

@@ -1,261 +0,0 @@
-/*
- *  Copyright IBM Corp. 2006
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#ifndef __S390_ETR_H
-#define __S390_ETR_H
-
-/* ETR attachment control register */
-struct etr_eacr {
-	unsigned int e0		: 1;	/* port 0 stepping control */
-	unsigned int e1		: 1;	/* port 1 stepping control */
-	unsigned int _pad0	: 5;	/* must be 00100 */
-	unsigned int dp		: 1;	/* data port control */
-	unsigned int p0		: 1;	/* port 0 change recognition control */
-	unsigned int p1		: 1;	/* port 1 change recognition control */
-	unsigned int _pad1	: 3;	/* must be 000 */
-	unsigned int ea		: 1;	/* ETR alert control */
-	unsigned int es		: 1;	/* ETR sync check control */
-	unsigned int sl		: 1;	/* switch to local control */
-} __attribute__ ((packed));
-
-/* Port state returned by steai */
-enum etr_psc {
-	etr_psc_operational = 0,
-	etr_psc_semi_operational = 1,
-	etr_psc_protocol_error =  4,
-	etr_psc_no_symbols = 8,
-	etr_psc_no_signal = 12,
-	etr_psc_pps_mode = 13
-};
-
-/* Logical port state returned by stetr */
-enum etr_lpsc {
-	etr_lpsc_operational_step = 0,
-	etr_lpsc_operational_alt = 1,
-	etr_lpsc_semi_operational = 2,
-	etr_lpsc_protocol_error =  4,
-	etr_lpsc_no_symbol_sync = 8,
-	etr_lpsc_no_signal = 12,
-	etr_lpsc_pps_mode = 13
-};
-
-/* ETR status words */
-struct etr_esw {
-	struct etr_eacr eacr;		/* attachment control register */
-	unsigned int y		: 1;	/* stepping mode */
-	unsigned int _pad0	: 5;	/* must be 00000 */
-	unsigned int p		: 1;	/* stepping port number */
-	unsigned int q		: 1;	/* data port number */
-	unsigned int psc0	: 4;	/* port 0 state code */
-	unsigned int psc1	: 4;	/* port 1 state code */
-} __attribute__ ((packed));
-
-/* Second level data register status word */
-struct etr_slsw {
-	unsigned int vv1	: 1;	/* copy of validity bit data frame 1 */
-	unsigned int vv2	: 1;	/* copy of validity bit data frame 2 */
-	unsigned int vv3	: 1;	/* copy of validity bit data frame 3 */
-	unsigned int vv4	: 1;	/* copy of validity bit data frame 4 */
-	unsigned int _pad0	: 19;	/* must by all zeroes */
-	unsigned int n		: 1;	/* EAF port number */
-	unsigned int v1		: 1;	/* validity bit ETR data frame 1 */
-	unsigned int v2		: 1;	/* validity bit ETR data frame 2 */
-	unsigned int v3		: 1;	/* validity bit ETR data frame 3 */
-	unsigned int v4		: 1;	/* validity bit ETR data frame 4 */
-	unsigned int _pad1	: 4;	/* must be 0000 */
-} __attribute__ ((packed));
-
-/* ETR data frames */
-struct etr_edf1 {
-	unsigned int u		: 1;	/* untuned bit */
-	unsigned int _pad0	: 1;	/* must be 0 */
-	unsigned int r		: 1;	/* service request bit */
-	unsigned int _pad1	: 4;	/* must be 0000 */
-	unsigned int a		: 1;	/* time adjustment bit */
-	unsigned int net_id	: 8;	/* ETR network id */
-	unsigned int etr_id	: 8;	/* id of ETR which sends data frames */
-	unsigned int etr_pn	: 8;	/* port number of ETR output port */
-} __attribute__ ((packed));
-
-struct etr_edf2 {
-	unsigned int etv	: 32;	/* Upper 32 bits of TOD. */
-} __attribute__ ((packed));
-
-struct etr_edf3 {
-	unsigned int rc		: 8;	/* failure reason code */
-	unsigned int _pad0	: 3;	/* must be 000 */
-	unsigned int c		: 1;	/* ETR coupled bit */
-	unsigned int tc		: 4;	/* ETR type code */
-	unsigned int blto	: 8;	/* biased local time offset */
-					/* (blto - 128) * 15 = minutes */
-	unsigned int buo	: 8;	/* biased utc offset */
-					/* (buo - 128) = leap seconds */
-} __attribute__ ((packed));
-
-struct etr_edf4 {
-	unsigned int ed		: 8;	/* ETS device dependent data */
-	unsigned int _pad0	: 1;	/* must be 0 */
-	unsigned int buc	: 5;	/* biased ut1 correction */
-					/* (buc - 16) * 0.1 seconds */
-	unsigned int em		: 6;	/* ETS error magnitude */
-	unsigned int dc		: 6;	/* ETS drift code */
-	unsigned int sc		: 6;	/* ETS steering code */
-} __attribute__ ((packed));
-
-/*
- * ETR attachment information block, two formats
- * format 1 has 4 reserved words with a size of 64 bytes
- * format 2 has 16 reserved words with a size of 96 bytes
- */
-struct etr_aib {
-	struct etr_esw esw;
-	struct etr_slsw slsw;
-	unsigned long long tsp;
-	struct etr_edf1 edf1;
-	struct etr_edf2 edf2;
-	struct etr_edf3 edf3;
-	struct etr_edf4 edf4;
-	unsigned int reserved[16];
-} __attribute__ ((packed,aligned(8)));
-
-/* ETR interruption parameter */
-struct etr_irq_parm {
-	unsigned int _pad0	: 8;
-	unsigned int pc0	: 1;	/* port 0 state change */
-	unsigned int pc1	: 1;	/* port 1 state change */
-	unsigned int _pad1	: 3;
-	unsigned int eai	: 1;	/* ETR alert indication */
-	unsigned int _pad2	: 18;
-} __attribute__ ((packed));
-
-/* Query TOD offset result */
-struct etr_ptff_qto {
-	unsigned long long physical_clock;
-	unsigned long long tod_offset;
-	unsigned long long logical_tod_offset;
-	unsigned long long tod_epoch_difference;
-} __attribute__ ((packed));
-
-/* Inline assembly helper functions */
-static inline int etr_setr(struct etr_eacr *ctrl)
-{
-	int rc = -EOPNOTSUPP;
-
-	asm volatile(
-		"	.insn	s,0xb2160000,%1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (rc) : "Q" (*ctrl));
-	return rc;
-}
-
-/* Stores a format 1 aib with 64 bytes */
-static inline int etr_stetr(struct etr_aib *aib)
-{
-	int rc = -EOPNOTSUPP;
-
-	asm volatile(
-		"	.insn	s,0xb2170000,%1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (rc) : "Q" (*aib));
-	return rc;
-}
-
-/* Stores a format 2 aib with 96 bytes for specified port */
-static inline int etr_steai(struct etr_aib *aib, unsigned int func)
-{
-	register unsigned int reg0 asm("0") = func;
-	int rc = -EOPNOTSUPP;
-
-	asm volatile(
-		"	.insn	s,0xb2b30000,%1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (rc) : "Q" (*aib), "d" (reg0));
-	return rc;
-}
-
-/* Function codes for the steai instruction. */
-#define ETR_STEAI_STEPPING_PORT		0x10
-#define ETR_STEAI_ALTERNATE_PORT	0x11
-#define ETR_STEAI_PORT_0		0x12
-#define ETR_STEAI_PORT_1		0x13
-
-static inline int etr_ptff(void *ptff_block, unsigned int func)
-{
-	register unsigned int reg0 asm("0") = func;
-	register unsigned long reg1 asm("1") = (unsigned long) ptff_block;
-	int rc = -EOPNOTSUPP;
-
-	asm volatile(
-		"	.word	0x0104\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (rc), "=m" (ptff_block)
-		: "d" (reg0), "d" (reg1), "m" (ptff_block) : "cc");
-	return rc;
-}
-
-/* Function codes for the ptff instruction. */
-#define ETR_PTFF_QAF	0x00	/* query available functions */
-#define ETR_PTFF_QTO	0x01	/* query tod offset */
-#define ETR_PTFF_QSI	0x02	/* query steering information */
-#define ETR_PTFF_ATO	0x40	/* adjust tod offset */
-#define ETR_PTFF_STO	0x41	/* set tod offset */
-#define ETR_PTFF_SFS	0x42	/* set fine steering rate */
-#define ETR_PTFF_SGS	0x43	/* set gross steering rate */
-
-/* Functions needed by the machine check handler */
-int etr_switch_to_local(void);
-int etr_sync_check(void);
-void etr_queue_work(void);
-
-/* notifier for syncs */
-extern struct atomic_notifier_head s390_epoch_delta_notifier;
-
-/* STP interruption parameter */
-struct stp_irq_parm {
-	unsigned int _pad0	: 14;
-	unsigned int tsc	: 1;	/* Timing status change */
-	unsigned int lac	: 1;	/* Link availability change */
-	unsigned int tcpc	: 1;	/* Time control parameter change */
-	unsigned int _pad2	: 15;
-} __attribute__ ((packed));
-
-#define STP_OP_SYNC	1
-#define STP_OP_CTRL	3
-
-struct stp_sstpi {
-	unsigned int rsvd0;
-	unsigned int rsvd1 : 8;
-	unsigned int stratum : 8;
-	unsigned int vbits : 16;
-	unsigned int leaps : 16;
-	unsigned int tmd : 4;
-	unsigned int ctn : 4;
-	unsigned int rsvd2 : 3;
-	unsigned int c : 1;
-	unsigned int tst : 4;
-	unsigned int tzo : 16;
-	unsigned int dsto : 16;
-	unsigned int ctrl : 16;
-	unsigned int rsvd3 : 16;
-	unsigned int tto;
-	unsigned int rsvd4;
-	unsigned int ctnid[3];
-	unsigned int rsvd5;
-	unsigned int todoff[4];
-	unsigned int rsvd6[48];
-} __attribute__ ((packed));
-
-/* Functions needed by the machine check handler */
-int stp_sync_check(void);
-int stp_island_check(void);
-void stp_queue_work(void);
-
-#endif /* __S390_ETR_H */

+ 1 - 1
arch/s390/include/asm/fcx.h

@@ -6,7 +6,7 @@
  */
  */
 
 
 #ifndef _ASM_S390_FCX_H
 #ifndef _ASM_S390_FCX_H
-#define _ASM_S390_FCX_H _ASM_S390_FCX_H
+#define _ASM_S390_FCX_H
 
 
 #include <linux/types.h>
 #include <linux/types.h>
 
 

+ 75 - 0
arch/s390/include/asm/fpu/api.h

@@ -1,6 +1,41 @@
 /*
 /*
  * In-kernel FPU support functions
  * In-kernel FPU support functions
  *
  *
+ *
+ * Consider these guidelines before using in-kernel FPU functions:
+ *
+ *  1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
+ *     use of floating-point or vector registers and instructions.
+ *
+ *  2. For kernel_fpu_begin(), specify the vector register range you want to
+ *     use with the KERNEL_VXR_* constants. Consider these usage guidelines:
+ *
+ *     a) If your function typically runs in process-context, use the lower
+ *	  half of the vector registers, for example, specify KERNEL_VXR_LOW.
+ *     b) If your function typically runs in soft-irq or hard-irq context,
+ *	  prefer using the upper half of the vector registers, for example,
+ *	  specify KERNEL_VXR_HIGH.
+ *
+ *     If you adhere to these guidelines, an interrupted process context
+ *     does not require to save and restore vector registers because of
+ *     disjoint register ranges.
+ *
+ *     Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
+ *     includes logic to save and restore up to 16 vector registers at once.
+ *
+ *  3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
+ *     struct kernel_fpu states.  Vector registers that are in use by outer
+ *     levels are saved and restored.  You can minimize the save and restore
+ *     effort by choosing disjoint vector register ranges.
+ *
+ *  5. To use vector floating-point instructions, specify the KERNEL_FPC
+ *     flag to save and restore floating-point controls in addition to any
+ *     vector register range.
+ *
+ *  6. To use floating-point registers and instructions only, specify the
+ *     KERNEL_FPR flag.  This flag triggers a save and restore of vector
+ *     registers V0 to V15 and floating-point controls.
+ *
  * Copyright IBM Corp. 2015
  * Copyright IBM Corp. 2015
  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
  */
@@ -8,6 +43,8 @@
 #ifndef _ASM_S390_FPU_API_H
 #ifndef _ASM_S390_FPU_API_H
 #define _ASM_S390_FPU_API_H
 #define _ASM_S390_FPU_API_H
 
 
+#include <linux/preempt.h>
+
 void save_fpu_regs(void);
 void save_fpu_regs(void);
 
 
 static inline int test_fp_ctl(u32 fpc)
 static inline int test_fp_ctl(u32 fpc)
@@ -27,4 +64,42 @@ static inline int test_fp_ctl(u32 fpc)
 	return rc;
 	return rc;
 }
 }
 
 
+#define KERNEL_VXR_V0V7		1
+#define KERNEL_VXR_V8V15	2
+#define KERNEL_VXR_V16V23	4
+#define KERNEL_VXR_V24V31	8
+#define KERNEL_FPR		16
+#define KERNEL_FPC		256
+
+#define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
+#define KERNEL_VXR_MID		(KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
+#define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
+
+#define KERNEL_FPU_MASK		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR)
+
+struct kernel_fpu;
+
+/*
+ * Note the functions below must be called with preemption disabled.
+ * Do not enable preemption before calling __kernel_fpu_end() to prevent
+ * an corruption of an existing kernel FPU state.
+ *
+ * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
+ */
+void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
+void __kernel_fpu_end(struct kernel_fpu *state);
+
+
+static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+{
+	preempt_disable();
+	__kernel_fpu_begin(state, flags);
+}
+
+static inline void kernel_fpu_end(struct kernel_fpu *state)
+{
+	__kernel_fpu_end(state);
+	preempt_enable();
+}
+
 #endif /* _ASM_S390_FPU_API_H */
 #endif /* _ASM_S390_FPU_API_H */

+ 10 - 0
arch/s390/include/asm/fpu/types.h

@@ -24,4 +24,14 @@ struct fpu {
 /* VX array structure for address operand constraints in inline assemblies */
 /* VX array structure for address operand constraints in inline assemblies */
 struct vx_array { __vector128 _[__NUM_VXRS]; };
 struct vx_array { __vector128 _[__NUM_VXRS]; };
 
 
+/* In-kernel FPU state structure */
+struct kernel_fpu {
+	u32	    mask;
+	u32	    fpc;
+	union {
+		freg_t fprs[__NUM_FPRS];
+		__vector128 vxrs[__NUM_VXRS];
+	};
+};
+
 #endif /* _ASM_S390_FPU_TYPES_H */
 #endif /* _ASM_S390_FPU_TYPES_H */

+ 4 - 1
arch/s390/include/asm/hugetlb.h

@@ -41,7 +41,10 @@ static inline int prepare_hugepage_range(struct file *file,
 static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 				  pte_t *ptep)
 				  pte_t *ptep)
 {
 {
-	pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
+	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		pte_val(*ptep) = _REGION3_ENTRY_EMPTY;
+	else
+		pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
 }
 }
 
 
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,

+ 5 - 5
arch/s390/include/asm/ipl.h

@@ -141,11 +141,11 @@ extern void setup_ipl(void);
  * DIAG 308 support
  * DIAG 308 support
  */
  */
 enum diag308_subcode  {
 enum diag308_subcode  {
-	DIAG308_REL_HSA	= 2,
-	DIAG308_IPL	= 3,
-	DIAG308_DUMP	= 4,
-	DIAG308_SET	= 5,
-	DIAG308_STORE	= 6,
+	DIAG308_REL_HSA = 2,
+	DIAG308_LOAD_CLEAR = 3,
+	DIAG308_LOAD_NORMAL_DUMP = 4,
+	DIAG308_SET = 5,
+	DIAG308_STORE = 6,
 };
 };
 
 
 enum diag308_ipl_type {
 enum diag308_ipl_type {

+ 2 - 5
arch/s390/include/asm/irq.h

@@ -7,11 +7,8 @@
 
 
 #define NR_IRQS_BASE	3
 #define NR_IRQS_BASE	3
 
 
-#ifdef CONFIG_PCI_NR_MSI
-# define NR_IRQS	(NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
-#else
-# define NR_IRQS	NR_IRQS_BASE
-#endif
+#define NR_IRQS	NR_IRQS_BASE
+#define NR_IRQS_LEGACY NR_IRQS_BASE
 
 
 /* External interruption codes */
 /* External interruption codes */
 #define EXT_IRQ_INTERRUPT_KEY	0x0040
 #define EXT_IRQ_INTERRUPT_KEY	0x0040

+ 1 - 0
arch/s390/include/asm/jump_label.h

@@ -4,6 +4,7 @@
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__
 
 
 #include <linux/types.h>
 #include <linux/types.h>
+#include <linux/stringify.h>
 
 
 #define JUMP_LABEL_NOP_SIZE 6
 #define JUMP_LABEL_NOP_SIZE 6
 #define JUMP_LABEL_NOP_OFFSET 2
 #define JUMP_LABEL_NOP_OFFSET 2

+ 2 - 2
arch/s390/include/asm/kprobes.h

@@ -43,9 +43,9 @@ typedef u16 kprobe_opcode_t;
 #define MAX_INSN_SIZE		0x0003
 #define MAX_INSN_SIZE		0x0003
 #define MAX_STACK_SIZE		64
 #define MAX_STACK_SIZE		64
 #define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
 #define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
-	(((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
+	(((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR))) \
 	? (MAX_STACK_SIZE) \
 	? (MAX_STACK_SIZE) \
-	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
+	: (((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR)))
 
 
 #define kretprobe_blacklist_size 0
 #define kretprobe_blacklist_size 0
 
 

+ 0 - 28
arch/s390/include/asm/mathemu.h

@@ -1,28 +0,0 @@
-/*
- *    IEEE floating point emulation.
- *
- *  S390 version
- *    Copyright IBM Corp. 1999
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-
-#ifndef __MATHEMU__
-#define __MATHEMU__
-
-extern int math_emu_b3(__u8 *, struct pt_regs *);
-extern int math_emu_ed(__u8 *, struct pt_regs *);
-extern int math_emu_ldr(__u8 *);
-extern int math_emu_ler(__u8 *);
-extern int math_emu_std(__u8 *, struct pt_regs *);
-extern int math_emu_ld(__u8 *, struct pt_regs *);
-extern int math_emu_ste(__u8 *, struct pt_regs *);
-extern int math_emu_le(__u8 *, struct pt_regs *);
-extern int math_emu_lfpc(__u8 *, struct pt_regs *);
-extern int math_emu_stfpc(__u8 *, struct pt_regs *);
-extern int math_emu_srnm(__u8 *, struct pt_regs *);
-
-#endif                                 /* __MATHEMU__                      */
-
-
-
-

+ 1 - 1
arch/s390/include/asm/mmu.h

@@ -6,7 +6,7 @@
 
 
 typedef struct {
 typedef struct {
 	cpumask_t cpu_attach_mask;
 	cpumask_t cpu_attach_mask;
-	atomic_t attach_count;
+	atomic_t flush_count;
 	unsigned int flush_mm;
 	unsigned int flush_mm;
 	spinlock_t list_lock;
 	spinlock_t list_lock;
 	struct list_head pgtable_list;
 	struct list_head pgtable_list;

+ 5 - 10
arch/s390/include/asm/mmu_context.h

@@ -19,7 +19,7 @@ static inline int init_new_context(struct task_struct *tsk,
 	INIT_LIST_HEAD(&mm->context.pgtable_list);
 	INIT_LIST_HEAD(&mm->context.pgtable_list);
 	INIT_LIST_HEAD(&mm->context.gmap_list);
 	INIT_LIST_HEAD(&mm->context.gmap_list);
 	cpumask_clear(&mm->context.cpu_attach_mask);
 	cpumask_clear(&mm->context.cpu_attach_mask);
-	atomic_set(&mm->context.attach_count, 0);
+	atomic_set(&mm->context.flush_count, 0);
 	mm->context.flush_mm = 0;
 	mm->context.flush_mm = 0;
 #ifdef CONFIG_PGSTE
 #ifdef CONFIG_PGSTE
 	mm->context.alloc_pgste = page_table_allocate_pgste;
 	mm->context.alloc_pgste = page_table_allocate_pgste;
@@ -90,15 +90,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	S390_lowcore.user_asce = next->context.asce;
 	S390_lowcore.user_asce = next->context.asce;
 	if (prev == next)
 	if (prev == next)
 		return;
 		return;
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, mm_cpumask(next));
 	/* Clear old ASCE by loading the kernel ASCE. */
 	/* Clear old ASCE by loading the kernel ASCE. */
 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
 	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
 	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
-	atomic_inc(&next->context.attach_count);
-	atomic_dec(&prev->context.attach_count);
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
+	cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
 }
 }
 
 
 #define finish_arch_post_lock_switch finish_arch_post_lock_switch
 #define finish_arch_post_lock_switch finish_arch_post_lock_switch
@@ -110,10 +107,9 @@ static inline void finish_arch_post_lock_switch(void)
 	load_kernel_asce();
 	load_kernel_asce();
 	if (mm) {
 	if (mm) {
 		preempt_disable();
 		preempt_disable();
-		while (atomic_read(&mm->context.attach_count) >> 16)
+		while (atomic_read(&mm->context.flush_count))
 			cpu_relax();
 			cpu_relax();
 
 
-		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
 		if (mm->context.flush_mm)
 		if (mm->context.flush_mm)
 			__tlb_flush_mm(mm);
 			__tlb_flush_mm(mm);
 		preempt_enable();
 		preempt_enable();
@@ -128,7 +124,6 @@ static inline void activate_mm(struct mm_struct *prev,
                                struct mm_struct *next)
                                struct mm_struct *next)
 {
 {
 	switch_mm(prev, next, current);
 	switch_mm(prev, next, current);
-	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
 	set_user_asce(next);
 	set_user_asce(next);
 }
 }
 
 

+ 5 - 3
arch/s390/include/asm/page.h

@@ -21,6 +21,7 @@
 #define HPAGE_SIZE	(1UL << HPAGE_SHIFT)
 #define HPAGE_SIZE	(1UL << HPAGE_SHIFT)
 #define HPAGE_MASK	(~(HPAGE_SIZE - 1))
 #define HPAGE_MASK	(~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+#define HUGE_MAX_HSTATE		2
 
 
 #define ARCH_HAS_SETCLEAR_HUGE_PTE
 #define ARCH_HAS_SETCLEAR_HUGE_PTE
 #define ARCH_HAS_HUGE_PTE_TYPE
 #define ARCH_HAS_HUGE_PTE_TYPE
@@ -30,11 +31,12 @@
 #include <asm/setup.h>
 #include <asm/setup.h>
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__
 
 
+void __storage_key_init_range(unsigned long start, unsigned long end);
+
 static inline void storage_key_init_range(unsigned long start, unsigned long end)
 static inline void storage_key_init_range(unsigned long start, unsigned long end)
 {
 {
-#if PAGE_DEFAULT_KEY
-	__storage_key_init_range(start, end);
-#endif
+	if (PAGE_DEFAULT_KEY)
+		__storage_key_init_range(start, end);
 }
 }
 
 
 #define clear_page(page)	memset((page), 0, PAGE_SIZE)
 #define clear_page(page)	memset((page), 0, PAGE_SIZE)

+ 0 - 12
arch/s390/include/asm/perf_event.h

@@ -86,16 +86,4 @@ struct sf_raw_sample {
 	u8		    padding[];	  /* Padding to next multiple of 8 */
 	u8		    padding[];	  /* Padding to next multiple of 8 */
 } __packed;
 } __packed;
 
 
-/* Perf hardware reserve and release functions */
-#ifdef CONFIG_PERF_EVENTS
-int perf_reserve_sampling(void);
-void perf_release_sampling(void);
-#else /* CONFIG_PERF_EVENTS */
-static inline int perf_reserve_sampling(void)
-{
-	return 0;
-}
-static inline void perf_release_sampling(void) {}
-#endif /* CONFIG_PERF_EVENTS */
-
 #endif /* _ASM_S390_PERF_EVENT_H */
 #endif /* _ASM_S390_PERF_EVENT_H */

+ 223 - 49
arch/s390/include/asm/pgtable.h

@@ -28,12 +28,33 @@
 #include <linux/mm_types.h>
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
 #include <linux/page-flags.h>
 #include <linux/radix-tree.h>
 #include <linux/radix-tree.h>
+#include <linux/atomic.h>
 #include <asm/bug.h>
 #include <asm/bug.h>
 #include <asm/page.h>
 #include <asm/page.h>
 
 
-extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096)));
+extern pgd_t swapper_pg_dir[];
 extern void paging_init(void);
 extern void paging_init(void);
 extern void vmem_map_init(void);
 extern void vmem_map_init(void);
+pmd_t *vmem_pmd_alloc(void);
+pte_t *vmem_pte_alloc(void);
+
+enum {
+	PG_DIRECT_MAP_4K = 0,
+	PG_DIRECT_MAP_1M,
+	PG_DIRECT_MAP_2G,
+	PG_DIRECT_MAP_MAX
+};
+
+extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
+
+static inline void update_page_count(int level, long count)
+{
+	if (IS_ENABLED(CONFIG_PROC_FS))
+		atomic_long_add(count, &direct_pages_count[level]);
+}
+
+struct seq_file;
+void arch_report_meminfo(struct seq_file *m);
 
 
 /*
 /*
  * The S390 doesn't have any external MMU info: the kernel page
  * The S390 doesn't have any external MMU info: the kernel page
@@ -270,8 +291,23 @@ static inline int is_module_addr(void *addr)
 #define _REGION3_ENTRY		(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
 #define _REGION3_ENTRY		(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
 #define _REGION3_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
 #define _REGION3_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
 
 
-#define _REGION3_ENTRY_LARGE	0x400	/* RTTE-format control, large page  */
-#define _REGION3_ENTRY_RO	0x200	/* page protection bit		    */
+#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address	     */
+#define _REGION3_ENTRY_ORIGIN  ~0x7ffUL/* region third table origin	     */
+
+#define _REGION3_ENTRY_DIRTY	0x2000	/* SW region dirty bit */
+#define _REGION3_ENTRY_YOUNG	0x1000	/* SW region young bit */
+#define _REGION3_ENTRY_LARGE	0x0400	/* RTTE-format control, large page  */
+#define _REGION3_ENTRY_READ	0x0002	/* SW region read bit */
+#define _REGION3_ENTRY_WRITE	0x0001	/* SW region write bit */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */
+#else
+#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
+#endif
+
+#define _REGION_ENTRY_BITS	 0xfffffffffffff227UL
+#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL
 
 
 /* Bits in the segment table entry */
 /* Bits in the segment table entry */
 #define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
 #define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
@@ -297,7 +333,8 @@ static inline int is_module_addr(void *addr)
 #endif
 #endif
 
 
 /*
 /*
- * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
+ * Segment table and region3 table entry encoding
+ * (R = read-only, I = invalid, y = young bit):
  *				dy..R...I...rw
  *				dy..R...I...rw
  * prot-none, clean, old	00..1...1...00
  * prot-none, clean, old	00..1...1...00
  * prot-none, clean, young	01..1...1...00
  * prot-none, clean, young	01..1...1...00
@@ -391,6 +428,33 @@ static inline int is_module_addr(void *addr)
 				 _SEGMENT_ENTRY_READ)
 				 _SEGMENT_ENTRY_READ)
 #define SEGMENT_WRITE	__pgprot(_SEGMENT_ENTRY_READ | \
 #define SEGMENT_WRITE	__pgprot(_SEGMENT_ENTRY_READ | \
 				 _SEGMENT_ENTRY_WRITE)
 				 _SEGMENT_ENTRY_WRITE)
+#define SEGMENT_KERNEL	__pgprot(_SEGMENT_ENTRY |	\
+				 _SEGMENT_ENTRY_LARGE |	\
+				 _SEGMENT_ENTRY_READ |	\
+				 _SEGMENT_ENTRY_WRITE | \
+				 _SEGMENT_ENTRY_YOUNG | \
+				 _SEGMENT_ENTRY_DIRTY)
+#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY |	\
+				 _SEGMENT_ENTRY_LARGE |	\
+				 _SEGMENT_ENTRY_READ |	\
+				 _SEGMENT_ENTRY_YOUNG |	\
+				 _SEGMENT_ENTRY_PROTECT)
+
+/*
+ * Region3 entry (large page) protection definitions.
+ */
+
+#define REGION3_KERNEL	__pgprot(_REGION_ENTRY_TYPE_R3 | \
+				 _REGION3_ENTRY_LARGE |	 \
+				 _REGION3_ENTRY_READ |	 \
+				 _REGION3_ENTRY_WRITE |	 \
+				 _REGION3_ENTRY_YOUNG |	 \
+				 _REGION3_ENTRY_DIRTY)
+#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
+				   _REGION3_ENTRY_LARGE |  \
+				   _REGION3_ENTRY_READ |   \
+				   _REGION3_ENTRY_YOUNG |  \
+				   _REGION_ENTRY_PROTECT)
 
 
 static inline int mm_has_pgste(struct mm_struct *mm)
 static inline int mm_has_pgste(struct mm_struct *mm)
 {
 {
@@ -424,6 +488,53 @@ static inline int mm_use_skey(struct mm_struct *mm)
 	return 0;
 	return 0;
 }
 }
 
 
+static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new)
+{
+	register unsigned long reg2 asm("2") = old;
+	register unsigned long reg3 asm("3") = new;
+	unsigned long address = (unsigned long)ptr | 1;
+
+	asm volatile(
+		"	csp	%0,%3"
+		: "+d" (reg2), "+m" (*ptr)
+		: "d" (reg3), "d" (address)
+		: "cc");
+}
+
+static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new)
+{
+	register unsigned long reg2 asm("2") = old;
+	register unsigned long reg3 asm("3") = new;
+	unsigned long address = (unsigned long)ptr | 1;
+
+	asm volatile(
+		"	.insn	rre,0xb98a0000,%0,%3"
+		: "+d" (reg2), "+m" (*ptr)
+		: "d" (reg3), "d" (address)
+		: "cc");
+}
+
+#define CRDTE_DTT_PAGE		0x00UL
+#define CRDTE_DTT_SEGMENT	0x10UL
+#define CRDTE_DTT_REGION3	0x14UL
+#define CRDTE_DTT_REGION2	0x18UL
+#define CRDTE_DTT_REGION1	0x1cUL
+
+static inline void crdte(unsigned long old, unsigned long new,
+			 unsigned long table, unsigned long dtt,
+			 unsigned long address, unsigned long asce)
+{
+	register unsigned long reg2 asm("2") = old;
+	register unsigned long reg3 asm("3") = new;
+	register unsigned long reg4 asm("4") = table | dtt;
+	register unsigned long reg5 asm("5") = address;
+
+	asm volatile(".insn rrf,0xb98f0000,%0,%2,%4,0"
+		     : "+d" (reg2)
+		     : "d" (reg3), "d" (reg4), "d" (reg5), "a" (asce)
+		     : "memory", "cc");
+}
+
 /*
 /*
  * pgd/pmd/pte query functions
  * pgd/pmd/pte query functions
  */
  */
@@ -465,7 +576,7 @@ static inline int pud_none(pud_t pud)
 {
 {
 	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
 	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
 		return 0;
 		return 0;
-	return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL;
+	return pud_val(pud) == _REGION3_ENTRY_EMPTY;
 }
 }
 
 
 static inline int pud_large(pud_t pud)
 static inline int pud_large(pud_t pud)
@@ -475,17 +586,35 @@ static inline int pud_large(pud_t pud)
 	return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
 	return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
 }
 }
 
 
+static inline unsigned long pud_pfn(pud_t pud)
+{
+	unsigned long origin_mask;
+
+	origin_mask = _REGION3_ENTRY_ORIGIN;
+	if (pud_large(pud))
+		origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
+	return (pud_val(pud) & origin_mask) >> PAGE_SHIFT;
+}
+
+static inline int pmd_large(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+	if (pmd_large(pmd))
+		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
+	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
+}
+
 static inline int pud_bad(pud_t pud)
 static inline int pud_bad(pud_t pud)
 {
 {
-	/*
-	 * With dynamic page table levels the pud can be a region table
-	 * entry or a segment table entry. Check for the bit that are
-	 * invalid for either table entry.
-	 */
-	unsigned long mask =
-		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
-		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
-	return (pud_val(pud) & mask) != 0;
+	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
+		return pmd_bad(__pmd(pud_val(pud)));
+	if (pud_large(pud))
+		return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
+	return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
 }
 }
 
 
 static inline int pmd_present(pmd_t pmd)
 static inline int pmd_present(pmd_t pmd)
@@ -498,11 +627,6 @@ static inline int pmd_none(pmd_t pmd)
 	return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
 	return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
 }
 }
 
 
-static inline int pmd_large(pmd_t pmd)
-{
-	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
-}
-
 static inline unsigned long pmd_pfn(pmd_t pmd)
 static inline unsigned long pmd_pfn(pmd_t pmd)
 {
 {
 	unsigned long origin_mask;
 	unsigned long origin_mask;
@@ -513,13 +637,6 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
 	return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
 	return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
 }
 }
 
 
-static inline int pmd_bad(pmd_t pmd)
-{
-	if (pmd_large(pmd))
-		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
-	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
-}
-
 #define __HAVE_ARCH_PMD_WRITE
 #define __HAVE_ARCH_PMD_WRITE
 static inline int pmd_write(pmd_t pmd)
 static inline int pmd_write(pmd_t pmd)
 {
 {
@@ -963,6 +1080,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 #define pte_page(x) pfn_to_page(pte_pfn(x))
 #define pte_page(x) pfn_to_page(pte_pfn(x))
 
 
 #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
 #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
+#define pud_page(pud) pfn_to_page(pud_pfn(pud))
 
 
 /* Find an entry in the lowest level page table.. */
 /* Find an entry in the lowest level page table.. */
 #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
 #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
@@ -970,20 +1088,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
 #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
 #define pte_unmap(pte) do { } while (0)
 #define pte_unmap(pte) do { } while (0)
 
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
-static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
-{
-	/*
-	 * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
-	 * Convert to segment table entry format.
-	 */
-	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
-		return pgprot_val(SEGMENT_NONE);
-	if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
-		return pgprot_val(SEGMENT_READ);
-	return pgprot_val(SEGMENT_WRITE);
-}
-
 static inline pmd_t pmd_wrprotect(pmd_t pmd)
 static inline pmd_t pmd_wrprotect(pmd_t pmd)
 {
 {
 	pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
 	pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
@@ -1020,6 +1124,56 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
 	return pmd;
 	return pmd;
 }
 }
 
 
+static inline pud_t pud_wrprotect(pud_t pud)
+{
+	pud_val(pud) &= ~_REGION3_ENTRY_WRITE;
+	pud_val(pud) |= _REGION_ENTRY_PROTECT;
+	return pud;
+}
+
+static inline pud_t pud_mkwrite(pud_t pud)
+{
+	pud_val(pud) |= _REGION3_ENTRY_WRITE;
+	if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY))
+		return pud;
+	pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+	return pud;
+}
+
+static inline pud_t pud_mkclean(pud_t pud)
+{
+	if (pud_large(pud)) {
+		pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
+		pud_val(pud) |= _REGION_ENTRY_PROTECT;
+	}
+	return pud;
+}
+
+static inline pud_t pud_mkdirty(pud_t pud)
+{
+	if (pud_large(pud)) {
+		pud_val(pud) |= _REGION3_ENTRY_DIRTY |
+				_REGION3_ENTRY_SOFT_DIRTY;
+		if (pud_val(pud) & _REGION3_ENTRY_WRITE)
+			pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+	}
+	return pud;
+}
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
+{
+	/*
+	 * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
+	 * Convert to segment table entry format.
+	 */
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
+		return pgprot_val(SEGMENT_NONE);
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
+		return pgprot_val(SEGMENT_READ);
+	return pgprot_val(SEGMENT_WRITE);
+}
+
 static inline pmd_t pmd_mkyoung(pmd_t pmd)
 static inline pmd_t pmd_mkyoung(pmd_t pmd)
 {
 {
 	if (pmd_large(pmd)) {
 	if (pmd_large(pmd)) {
@@ -1068,15 +1222,8 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
 
 
 static inline void __pmdp_csp(pmd_t *pmdp)
 static inline void __pmdp_csp(pmd_t *pmdp)
 {
 {
-	register unsigned long reg2 asm("2") = pmd_val(*pmdp);
-	register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
-					       _SEGMENT_ENTRY_INVALID;
-	register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
-
-	asm volatile(
-		"	csp %1,%3"
-		: "=m" (*pmdp)
-		: "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+	csp((unsigned int *)pmdp + 1, pmd_val(*pmdp),
+	    pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
 }
 }
 
 
 static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
 static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
@@ -1091,6 +1238,19 @@ static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
 		: "cc" );
 		: "cc" );
 }
 }
 
 
+static inline void __pudp_idte(unsigned long address, pud_t *pudp)
+{
+	unsigned long r3o;
+
+	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
+	r3o |= _ASCE_TYPE_REGION3;
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,%2,%3,0,0"
+		: "=m" (*pudp)
+		: "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK))
+		: "cc");
+}
+
 static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
 static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
 {
 {
 	unsigned long sto;
 	unsigned long sto;
@@ -1103,8 +1263,22 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
 		: "cc" );
 		: "cc" );
 }
 }
 
 
+static inline void __pudp_idte_local(unsigned long address, pud_t *pudp)
+{
+	unsigned long r3o;
+
+	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
+	r3o |= _ASCE_TYPE_REGION3;
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,%2,%3,0,1"
+		: "=m" (*pudp)
+		: "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK))
+		: "cc");
+}
+
 pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
 pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
 pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
 pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
+pud_t pudp_xchg_direct(struct mm_struct *, unsigned long, pud_t *, pud_t);
 
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
 

+ 16 - 1
arch/s390/include/asm/processor.h

@@ -77,7 +77,10 @@ static inline void get_cpu_id(struct cpuid *ptr)
 	asm volatile("stidp %0" : "=Q" (*ptr));
 	asm volatile("stidp %0" : "=Q" (*ptr));
 }
 }
 
 
-extern void s390_adjust_jiffies(void);
+void s390_adjust_jiffies(void);
+void s390_update_cpu_mhz(void);
+void cpu_detect_mhz_feature(void);
+
 extern const struct seq_operations cpuinfo_op;
 extern const struct seq_operations cpuinfo_op;
 extern int sysctl_ieee_emulation_warnings;
 extern int sysctl_ieee_emulation_warnings;
 extern void execve_tail(void);
 extern void execve_tail(void);
@@ -233,6 +236,18 @@ void cpu_relax(void);
 
 
 #define cpu_relax_lowlatency()  barrier()
 #define cpu_relax_lowlatency()  barrier()
 
 
+#define ECAG_CACHE_ATTRIBUTE	0
+#define ECAG_CPU_ATTRIBUTE	1
+
+static inline unsigned long __ecag(unsigned int asi, unsigned char parm)
+{
+	unsigned long val;
+
+	asm volatile(".insn	rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
+		     : "=d" (val) : "a" (asi << 8 | parm));
+	return val;
+}
+
 static inline void psw_set_key(unsigned int key)
 static inline void psw_set_key(unsigned int key)
 {
 {
 	asm volatile("spka 0(%0)" : : "d" (key));
 	asm volatile("spka 0(%0)" : : "d" (key));

+ 1 - 0
arch/s390/include/asm/sections.h

@@ -4,5 +4,6 @@
 #include <asm-generic/sections.h>
 #include <asm-generic/sections.h>
 
 
 extern char _eshared[], _ehead[];
 extern char _eshared[], _ehead[];
+extern char __start_ro_after_init[], __end_ro_after_init[];
 
 
 #endif
 #endif

+ 4 - 0
arch/s390/include/asm/setup.h

@@ -86,9 +86,13 @@ extern char vmpoff_cmd[];
 #define CONSOLE_IS_SCLP		(console_mode == 1)
 #define CONSOLE_IS_SCLP		(console_mode == 1)
 #define CONSOLE_IS_3215		(console_mode == 2)
 #define CONSOLE_IS_3215		(console_mode == 2)
 #define CONSOLE_IS_3270		(console_mode == 3)
 #define CONSOLE_IS_3270		(console_mode == 3)
+#define CONSOLE_IS_VT220	(console_mode == 4)
+#define CONSOLE_IS_HVC		(console_mode == 5)
 #define SET_CONSOLE_SCLP	do { console_mode = 1; } while (0)
 #define SET_CONSOLE_SCLP	do { console_mode = 1; } while (0)
 #define SET_CONSOLE_3215	do { console_mode = 2; } while (0)
 #define SET_CONSOLE_3215	do { console_mode = 2; } while (0)
 #define SET_CONSOLE_3270	do { console_mode = 3; } while (0)
 #define SET_CONSOLE_3270	do { console_mode = 3; } while (0)
+#define SET_CONSOLE_VT220	do { console_mode = 4; } while (0)
+#define SET_CONSOLE_HVC		do { console_mode = 5; } while (0)
 
 
 #define NSS_NAME_SIZE	8
 #define NSS_NAME_SIZE	8
 extern char kernel_nss_name[];
 extern char kernel_nss_name[];

+ 0 - 142
arch/s390/include/asm/sfp-machine.h

@@ -1,142 +0,0 @@
-/* Machine-dependent software floating-point definitions.
-   S/390 kernel version.
-   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Richard Henderson (rth@cygnus.com),
-		  Jakub Jelinek (jj@ultra.linux.cz),
-		  David S. Miller (davem@redhat.com) and
-		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If
-   not, write to the Free Software Foundation, Inc.,
-   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
-
-#ifndef _SFP_MACHINE_H
-#define _SFP_MACHINE_H
-   
-
-#define _FP_W_TYPE_SIZE		32
-#define _FP_W_TYPE		unsigned int
-#define _FP_WS_TYPE		signed int
-#define _FP_I_TYPE		int
-
-#define _FP_MUL_MEAT_S(R,X,Y)					\
-  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
-#define _FP_MUL_MEAT_D(R,X,Y)					\
-  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
-#define _FP_MUL_MEAT_Q(R,X,Y)					\
-  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
-
-#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_udiv(S,R,X,Y)
-#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
-#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
-
-#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
-#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
-#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
-#define _FP_NANSIGN_S		0
-#define _FP_NANSIGN_D		0
-#define _FP_NANSIGN_Q		0
-
-#define _FP_KEEPNANFRACP 1
-
-/*
- * If one NaN is signaling and the other is not,
- * we choose that one, otherwise we choose X.
- */
-#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)                      \
-  do {                                                          \
-    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)          \
-        && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))     \
-      {                                                         \
-        R##_s = Y##_s;                                          \
-        _FP_FRAC_COPY_##wc(R,Y);                                \
-      }                                                         \
-    else                                                        \
-      {                                                         \
-        R##_s = X##_s;                                          \
-        _FP_FRAC_COPY_##wc(R,X);                                \
-      }                                                         \
-    R##_c = FP_CLS_NAN;                                         \
-  } while (0)
-
-/* Some assembly to speed things up. */
-#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({		\
-	unsigned int __r2 = (x2) + (y2);			\
-	unsigned int __r1 = (x1);				\
-	unsigned int __r0 = (x0);				\
-	asm volatile(						\
-		"	alr	%2,%3\n"			\
-		"	brc	12,0f\n"			\
-		"	lhi	0,1\n"				\
-		"	alr	%1,0\n"				\
-		"	brc	12,0f\n"			\
-		"	alr	%0,0\n"				\
-		"0:"						\
-		: "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
-		: "d" (y0), "i" (1) : "cc", "0" );		\
-	asm volatile(						\
-		"	alr	%1,%2\n"			\
-		"	brc	12,0f\n"			\
-		"	ahi	%0,1\n"				\
-		"0:"						\
-		: "+&d" (__r2), "+&d" (__r1)			\
-		: "d" (y1) : "cc");				\
-	(r2) = __r2;						\
-	(r1) = __r1;						\
-	(r0) = __r0;						\
-})
-
-#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({		\
-	unsigned int __r2 = (x2) - (y2);			\
-	unsigned int __r1 = (x1);				\
-	unsigned int __r0 = (x0);				\
-	asm volatile(						\
-		"	slr   %2,%3\n"				\
-		"	brc	3,0f\n"				\
-		"	lhi	0,1\n"				\
-		"	slr	%1,0\n"				\
-		"	brc	3,0f\n"				\
-		"	slr	%0,0\n"				\
-		"0:"						\
-		: "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
-		: "d" (y0) : "cc", "0");			\
-	asm volatile(						\
-		"	slr	%1,%2\n"			\
-		"	brc	3,0f\n"				\
-		"	ahi	%0,-1\n"			\
-		"0:"						\
-		: "+&d" (__r2), "+&d" (__r1)			\
-		: "d" (y1) : "cc");				\
-	(r2) = __r2;						\
-	(r1) = __r1;						\
-	(r0) = __r0;						\
-})
-
-#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0)
-
-/* Obtain the current rounding mode. */
-#define FP_ROUNDMODE	mode
-
-/* Exception flags. */
-#define FP_EX_INVALID		0x800000
-#define FP_EX_DIVZERO		0x400000
-#define FP_EX_OVERFLOW		0x200000
-#define FP_EX_UNDERFLOW		0x100000
-#define FP_EX_INEXACT		0x080000
-
-/* We write the results always */
-#define FP_INHIBIT_RESULTS 0
-
-#endif

+ 0 - 67
arch/s390/include/asm/sfp-util.h

@@ -1,67 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
-
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) ({		\
-	unsigned int __sh = (ah);			\
-	unsigned int __sl = (al);			\
-	asm volatile(					\
-		"	alr	%1,%3\n"		\
-		"	brc	12,0f\n"		\
-		"	ahi	%0,1\n"			\
-		"0:	alr  %0,%2"			\
-		: "+&d" (__sh), "+d" (__sl)		\
-		: "d" (bh), "d" (bl) : "cc");		\
-	(sh) = __sh;					\
-	(sl) = __sl;					\
-})
-
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) ({		\
-	unsigned int __sh = (ah);			\
-	unsigned int __sl = (al);			\
-	asm volatile(					\
-		"	slr	%1,%3\n"		\
-		"	brc	3,0f\n"			\
-		"	ahi	%0,-1\n"		\
-		"0:	slr	%0,%2"			\
-		: "+&d" (__sh), "+d" (__sl)		\
-		: "d" (bh), "d" (bl) : "cc");		\
-	(sh) = __sh;					\
-	(sl) = __sl;					\
-})
-
-/* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */
-#define umul_ppmm(wh, wl, u, v) ({			\
-	unsigned int __wh = u;				\
-	unsigned int __wl = v;				\
-	asm volatile(					\
-		"	ltr	1,%0\n"			\
-		"	mr	0,%1\n"			\
-		"	jnm	0f\n"				\
-		"	alr	0,%1\n"			\
-		"0:	ltr	%1,%1\n"			\
-		"	jnm	1f\n"				\
-		"	alr	0,%0\n"			\
-		"1:	lr	%0,0\n"			\
-		"	lr	%1,1\n"			\
-		: "+d" (__wh), "+d" (__wl)		\
-		: : "0", "1", "cc");			\
-	wh = __wh;					\
-	wl = __wl;					\
-})
-
-#define udiv_qrnnd(q, r, n1, n0, d)			\
-  do { unsigned long __n;				\
-       unsigned int __r, __d;				\
-    __n = ((unsigned long)(n1) << 32) + n0;		\
-    __d = (d);						\
-    (q) = __n / __d;					\
-    (r) = __n % __d;					\
-  } while (0)
-
-#define UDIV_NEEDS_NORMALIZATION 0
-
-#define abort() BUG()
-
-#define __BYTE_ORDER __BIG_ENDIAN

+ 14 - 3
arch/s390/include/asm/sigp.h

@@ -37,8 +37,8 @@
 
 
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__
 
 
-static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
-			      u32 *status)
+static inline int ____pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+				u32 *status)
 {
 {
 	register unsigned long reg1 asm ("1") = parm;
 	register unsigned long reg1 asm ("1") = parm;
 	int cc;
 	int cc;
@@ -48,8 +48,19 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
 		"	ipm	%0\n"
 		"	ipm	%0\n"
 		"	srl	%0,28\n"
 		"	srl	%0,28\n"
 		: "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
 		: "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
+	*status = reg1;
+	return cc;
+}
+
+static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+			      u32 *status)
+{
+	u32 _status;
+	int cc;
+
+	cc = ____pcpu_sigp(addr, order, parm, &_status);
 	if (status && cc == 1)
 	if (status && cc == 1)
-		*status = reg1;
+		*status = _status;
 	return cc;
 	return cc;
 }
 }
 
 

+ 51 - 0
arch/s390/include/asm/stp.h

@@ -0,0 +1,51 @@
+/*
+ *  Copyright IBM Corp. 2006
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#ifndef __S390_STP_H
+#define __S390_STP_H
+
+/* notifier for syncs */
+extern struct atomic_notifier_head s390_epoch_delta_notifier;
+
+/* STP interruption parameter */
+struct stp_irq_parm {
+	unsigned int _pad0	: 14;
+	unsigned int tsc	: 1;	/* Timing status change */
+	unsigned int lac	: 1;	/* Link availability change */
+	unsigned int tcpc	: 1;	/* Time control parameter change */
+	unsigned int _pad2	: 15;
+} __attribute__ ((packed));
+
+#define STP_OP_SYNC	1
+#define STP_OP_CTRL	3
+
+struct stp_sstpi {
+	unsigned int rsvd0;
+	unsigned int rsvd1 : 8;
+	unsigned int stratum : 8;
+	unsigned int vbits : 16;
+	unsigned int leaps : 16;
+	unsigned int tmd : 4;
+	unsigned int ctn : 4;
+	unsigned int rsvd2 : 3;
+	unsigned int c : 1;
+	unsigned int tst : 4;
+	unsigned int tzo : 16;
+	unsigned int dsto : 16;
+	unsigned int ctrl : 16;
+	unsigned int rsvd3 : 16;
+	unsigned int tto;
+	unsigned int rsvd4;
+	unsigned int ctnid[3];
+	unsigned int rsvd5;
+	unsigned int todoff[4];
+	unsigned int rsvd6[48];
+} __attribute__ ((packed));
+
+/* Functions needed by the machine check handler */
+int stp_sync_check(void);
+int stp_island_check(void);
+void stp_queue_work(void);
+
+#endif /* __S390_STP_H */

+ 65 - 1
arch/s390/include/asm/timex.h

@@ -52,6 +52,70 @@ static inline void store_clock_comparator(__u64 *time)
 
 
 void clock_comparator_work(void);
 void clock_comparator_work(void);
 
 
+void __init ptff_init(void);
+
+extern unsigned char ptff_function_mask[16];
+extern unsigned long lpar_offset;
+extern unsigned long initial_leap_seconds;
+
+/* Function codes for the ptff instruction. */
+#define PTFF_QAF	0x00	/* query available functions */
+#define PTFF_QTO	0x01	/* query tod offset */
+#define PTFF_QSI	0x02	/* query steering information */
+#define PTFF_QUI	0x04	/* query UTC information */
+#define PTFF_ATO	0x40	/* adjust tod offset */
+#define PTFF_STO	0x41	/* set tod offset */
+#define PTFF_SFS	0x42	/* set fine steering rate */
+#define PTFF_SGS	0x43	/* set gross steering rate */
+
+/* Query TOD offset result */
+struct ptff_qto {
+	unsigned long long physical_clock;
+	unsigned long long tod_offset;
+	unsigned long long logical_tod_offset;
+	unsigned long long tod_epoch_difference;
+} __packed;
+
+static inline int ptff_query(unsigned int nr)
+{
+	unsigned char *ptr;
+
+	ptr = ptff_function_mask + (nr >> 3);
+	return (*ptr & (0x80 >> (nr & 7))) != 0;
+}
+
+/* Query UTC information result */
+struct ptff_qui {
+	unsigned int tm : 2;
+	unsigned int ts : 2;
+	unsigned int : 28;
+	unsigned int pad_0x04;
+	unsigned long leap_event;
+	short old_leap;
+	short new_leap;
+	unsigned int pad_0x14;
+	unsigned long prt[5];
+	unsigned long cst[3];
+	unsigned int skew;
+	unsigned int pad_0x5c[41];
+} __packed;
+
+static inline int ptff(void *ptff_block, size_t len, unsigned int func)
+{
+	typedef struct { char _[len]; } addrtype;
+	register unsigned int reg0 asm("0") = func;
+	register unsigned long reg1 asm("1") = (unsigned long) ptff_block;
+	int rc;
+
+	asm volatile(
+		"	.word	0x0104\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (rc), "+m" (*(addrtype *) ptff_block)
+		: "d" (reg0), "d" (reg1) : "cc");
+	return rc;
+}
+
 static inline unsigned long long local_tick_disable(void)
 static inline unsigned long long local_tick_disable(void)
 {
 {
 	unsigned long long old;
 	unsigned long long old;
@@ -105,7 +169,7 @@ static inline cycles_t get_cycles(void)
 	return (cycles_t) get_tod_clock() >> 2;
 	return (cycles_t) get_tod_clock() >> 2;
 }
 }
 
 
-int get_sync_clock(unsigned long long *clock);
+int get_phys_clock(unsigned long long *clock);
 void init_cpu_timer(void);
 void init_cpu_timer(void);
 unsigned long long monotonic_clock(void);
 unsigned long long monotonic_clock(void);
 
 

+ 11 - 20
arch/s390/include/asm/tlbflush.h

@@ -5,6 +5,7 @@
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/pgalloc.h>
 #include <asm/pgalloc.h>
+#include <asm/pgtable.h>
 
 
 /*
 /*
  * Flush all TLB entries on the local CPU.
  * Flush all TLB entries on the local CPU.
@@ -44,17 +45,9 @@ void smp_ptlb_all(void);
  */
  */
 static inline void __tlb_flush_global(void)
 static inline void __tlb_flush_global(void)
 {
 {
-	register unsigned long reg2 asm("2");
-	register unsigned long reg3 asm("3");
-	register unsigned long reg4 asm("4");
-	long dummy;
-
-	dummy = 0;
-	reg2 = reg3 = 0;
-	reg4 = ((unsigned long) &dummy) + 1;
-	asm volatile(
-		"	csp	%0,%2"
-		: : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" );
+	unsigned int dummy = 0;
+
+	csp(&dummy, 0, 0);
 }
 }
 
 
 /*
 /*
@@ -64,7 +57,7 @@ static inline void __tlb_flush_global(void)
 static inline void __tlb_flush_full(struct mm_struct *mm)
 static inline void __tlb_flush_full(struct mm_struct *mm)
 {
 {
 	preempt_disable();
 	preempt_disable();
-	atomic_add(0x10000, &mm->context.attach_count);
+	atomic_inc(&mm->context.flush_count);
 	if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
 	if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
 		/* Local TLB flush */
 		/* Local TLB flush */
 		__tlb_flush_local();
 		__tlb_flush_local();
@@ -76,21 +69,19 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
 			cpumask_copy(mm_cpumask(mm),
 			cpumask_copy(mm_cpumask(mm),
 				     &mm->context.cpu_attach_mask);
 				     &mm->context.cpu_attach_mask);
 	}
 	}
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	preempt_enable();
 	preempt_enable();
 }
 }
 
 
 /*
 /*
- * Flush TLB entries for a specific ASCE on all CPUs.
+ * Flush TLB entries for a specific ASCE on all CPUs. Should never be used
+ * when more than one asce (e.g. gmap) ran on this mm.
  */
  */
 static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
 static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
 {
 {
-	int active, count;
-
 	preempt_disable();
 	preempt_disable();
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
 		__tlb_flush_idte_local(asce);
 		__tlb_flush_idte_local(asce);
 	} else {
 	} else {
@@ -103,7 +94,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
 			cpumask_copy(mm_cpumask(mm),
 			cpumask_copy(mm_cpumask(mm),
 				     &mm->context.cpu_attach_mask);
 				     &mm->context.cpu_attach_mask);
 	}
 	}
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	preempt_enable();
 	preempt_enable();
 }
 }
 
 

+ 4 - 0
arch/s390/include/asm/topology.h

@@ -14,10 +14,12 @@ struct cpu_topology_s390 {
 	unsigned short core_id;
 	unsigned short core_id;
 	unsigned short socket_id;
 	unsigned short socket_id;
 	unsigned short book_id;
 	unsigned short book_id;
+	unsigned short drawer_id;
 	unsigned short node_id;
 	unsigned short node_id;
 	cpumask_t thread_mask;
 	cpumask_t thread_mask;
 	cpumask_t core_mask;
 	cpumask_t core_mask;
 	cpumask_t book_mask;
 	cpumask_t book_mask;
+	cpumask_t drawer_mask;
 };
 };
 
 
 DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
@@ -30,6 +32,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 #define topology_core_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).core_mask)
 #define topology_core_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).core_mask)
 #define topology_book_id(cpu)		  (per_cpu(cpu_topology, cpu).book_id)
 #define topology_book_id(cpu)		  (per_cpu(cpu_topology, cpu).book_id)
 #define topology_book_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).book_mask)
 #define topology_book_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).book_mask)
+#define topology_drawer_id(cpu)		  (per_cpu(cpu_topology, cpu).drawer_id)
+#define topology_drawer_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).drawer_mask)
 
 
 #define mc_capable() 1
 #define mc_capable() 1
 
 

+ 61 - 4
arch/s390/include/asm/uaccess.h

@@ -151,8 +151,65 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from,
 	__rc;							\
 	__rc;							\
 })
 })
 
 
-#define __put_user_fn(x, ptr, size) __put_get_user_asm(ptr, x, size, 0x810000UL)
-#define __get_user_fn(x, ptr, size) __put_get_user_asm(x, ptr, size, 0x81UL)
+static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
+{
+	unsigned long spec = 0x810000UL;
+	int rc;
+
+	switch (size) {
+	case 1:
+		rc = __put_get_user_asm((unsigned char __user *)ptr,
+					(unsigned char *)x,
+					size, spec);
+		break;
+	case 2:
+		rc = __put_get_user_asm((unsigned short __user *)ptr,
+					(unsigned short *)x,
+					size, spec);
+		break;
+	case 4:
+		rc = __put_get_user_asm((unsigned int __user *)ptr,
+					(unsigned int *)x,
+					size, spec);
+		break;
+	case 8:
+		rc = __put_get_user_asm((unsigned long __user *)ptr,
+					(unsigned long *)x,
+					size, spec);
+		break;
+	};
+	return rc;
+}
+
+static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
+{
+	unsigned long spec = 0x81UL;
+	int rc;
+
+	switch (size) {
+	case 1:
+		rc = __put_get_user_asm((unsigned char *)x,
+					(unsigned char __user *)ptr,
+					size, spec);
+		break;
+	case 2:
+		rc = __put_get_user_asm((unsigned short *)x,
+					(unsigned short __user *)ptr,
+					size, spec);
+		break;
+	case 4:
+		rc = __put_get_user_asm((unsigned int *)x,
+					(unsigned int __user *)ptr,
+					size, spec);
+		break;
+	case 8:
+		rc = __put_get_user_asm((unsigned long *)x,
+					(unsigned long __user *)ptr,
+					size, spec);
+		break;
+	};
+	return rc;
+}
 
 
 #else /* CONFIG_HAVE_MARCH_Z10_FEATURES */
 #else /* CONFIG_HAVE_MARCH_Z10_FEATURES */
 
 
@@ -191,7 +248,7 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s
 		__put_user_bad();				\
 		__put_user_bad();				\
 		break;						\
 		break;						\
 	 }							\
 	 }							\
-	__pu_err;						\
+	__builtin_expect(__pu_err, 0);				\
 })
 })
 
 
 #define put_user(x, ptr)					\
 #define put_user(x, ptr)					\
@@ -240,7 +297,7 @@ int __put_user_bad(void) __attribute__((noreturn));
 		__get_user_bad();				\
 		__get_user_bad();				\
 		break;						\
 		break;						\
 	}							\
 	}							\
-	__gu_err;						\
+	__builtin_expect(__gu_err, 0);				\
 })
 })
 
 
 #define get_user(x, ptr)					\
 #define get_user(x, ptr)					\

+ 3 - 3
arch/s390/include/uapi/asm/ptrace.h

@@ -359,9 +359,9 @@ typedef struct
 		per_cr_bits    bits;
 		per_cr_bits    bits;
 	} control_regs;
 	} control_regs;
 	/*
 	/*
-	 * Use these flags instead of setting em_instruction_fetch
-	 * directly they are used so that single stepping can be
-	 * switched on & off while not affecting other tracing
+	 * The single_step and instruction_fetch bits are obsolete,
+	 * the kernel always sets them to zero. To enable single
+	 * stepping use ptrace(PTRACE_SINGLESTEP) instead.
 	 */
 	 */
 	unsigned  single_step       : 1;
 	unsigned  single_step       : 1;
 	unsigned  instruction_fetch : 1;
 	unsigned  instruction_fetch : 1;

+ 4 - 1
arch/s390/kernel/Makefile

@@ -2,6 +2,9 @@
 # Makefile for the linux kernel.
 # Makefile for the linux kernel.
 #
 #
 
 
+KCOV_INSTRUMENT_early.o := n
+KCOV_INSTRUMENT_sclp.o := n
+
 ifdef CONFIG_FUNCTION_TRACER
 ifdef CONFIG_FUNCTION_TRACER
 # Don't trace early setup code and tracing code
 # Don't trace early setup code and tracing code
 CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
@@ -45,7 +48,7 @@ obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
 obj-y	+= sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
 obj-y	+= sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
-obj-y	+= runtime_instr.o cache.o dumpstack.o
+obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o
 obj-y	+= entry.o reipl.o relocate_kernel.o
 obj-y	+= entry.o reipl.o relocate_kernel.o
 
 
 extra-y				+= head.o head64.o vmlinux.lds
 extra-y				+= head.o head64.o vmlinux.lds

+ 1 - 6
arch/s390/kernel/cache.c

@@ -99,12 +99,7 @@ static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
 
 
 static inline unsigned long ecag(int ai, int li, int ti)
 static inline unsigned long ecag(int ai, int li, int ti)
 {
 {
-	unsigned long cmd, val;
-
-	cmd = ai << 4 | li << 1 | ti;
-	asm volatile(".insn	rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
-		     : "=d" (val) : "a" (cmd));
-	return val;
+	return __ecag(ECAG_CACHE_ATTRIBUTE, ai << 4 | li << 1 | ti);
 }
 }
 
 
 static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
 static void ci_leaf_init(struct cacheinfo *this_leaf, int private,

+ 0 - 1
arch/s390/kernel/dis.c

@@ -26,7 +26,6 @@
 #include <asm/dis.h>
 #include <asm/dis.h>
 #include <asm/io.h>
 #include <asm/io.h>
 #include <linux/atomic.h>
 #include <linux/atomic.h>
-#include <asm/mathemu.h>
 #include <asm/cpcmd.h>
 #include <asm/cpcmd.h>
 #include <asm/lowcore.h>
 #include <asm/lowcore.h>
 #include <asm/debug.h>
 #include <asm/debug.h>

+ 4 - 8
arch/s390/kernel/dumpstack.c

@@ -78,14 +78,10 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
 	sp = __dump_trace(func, data, sp,
 	sp = __dump_trace(func, data, sp,
 			  S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
 			  S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
 			  S390_lowcore.async_stack + frame_size);
 			  S390_lowcore.async_stack + frame_size);
-	if (task)
-		__dump_trace(func, data, sp,
-			     (unsigned long)task_stack_page(task),
-			     (unsigned long)task_stack_page(task) + THREAD_SIZE);
-	else
-		__dump_trace(func, data, sp,
-			     S390_lowcore.thread_info,
-			     S390_lowcore.thread_info + THREAD_SIZE);
+	task = task ?: current;
+	__dump_trace(func, data, sp,
+		     (unsigned long)task_stack_page(task),
+		     (unsigned long)task_stack_page(task) + THREAD_SIZE);
 }
 }
 EXPORT_SYMBOL_GPL(dump_trace);
 EXPORT_SYMBOL_GPL(dump_trace);
 
 

+ 22 - 0
arch/s390/kernel/early.c

@@ -231,6 +231,26 @@ static noinline __init void detect_machine_type(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
 }
 }
 
 
+static noinline __init void setup_arch_string(void)
+{
+	struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
+
+	if (stsi(mach, 1, 1, 1))
+		return;
+	EBCASC(mach->manufacturer, sizeof(mach->manufacturer));
+	EBCASC(mach->type, sizeof(mach->type));
+	EBCASC(mach->model, sizeof(mach->model));
+	EBCASC(mach->model_capacity, sizeof(mach->model_capacity));
+	dump_stack_set_arch_desc("%-16.16s %-4.4s %-16.16s %-16.16s (%s)",
+				 mach->manufacturer,
+				 mach->type,
+				 mach->model,
+				 mach->model_capacity,
+				 MACHINE_IS_LPAR ? "LPAR" :
+				 MACHINE_IS_VM ? "z/VM" :
+				 MACHINE_IS_KVM ? "KVM" : "unknown");
+}
+
 static __init void setup_topology(void)
 static __init void setup_topology(void)
 {
 {
 	int max_mnest;
 	int max_mnest;
@@ -447,11 +467,13 @@ void __init startup_init(void)
 	ipl_save_parameters();
 	ipl_save_parameters();
 	rescue_initrd();
 	rescue_initrd();
 	clear_bss_section();
 	clear_bss_section();
+	ptff_init();
 	init_kernel_storage_key();
 	init_kernel_storage_key();
 	lockdep_off();
 	lockdep_off();
 	setup_lowcore_early();
 	setup_lowcore_early();
 	setup_facility_list();
 	setup_facility_list();
 	detect_machine_type();
 	detect_machine_type();
+	setup_arch_string();
 	ipl_update_parameters();
 	ipl_update_parameters();
 	setup_boot_command_line();
 	setup_boot_command_line();
 	create_kernel_nss();
 	create_kernel_nss();

+ 10 - 1
arch/s390/kernel/entry.S

@@ -163,6 +163,16 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	.endm
 	.endm
 
 
 	.section .kprobes.text, "ax"
 	.section .kprobes.text, "ax"
+.Ldummy:
+	/*
+	 * This nop exists only in order to avoid that __switch_to starts at
+	 * the beginning of the kprobes text section. In that case we would
+	 * have several symbols at the same address. E.g. objdump would take
+	 * an arbitrary symbol name when disassembling this code.
+	 * With the added nop in between the __switch_to symbol is unique
+	 * again.
+	 */
+	nop	0
 
 
 /*
 /*
  * Scheduler resume function, called by switch_to
  * Scheduler resume function, called by switch_to
@@ -175,7 +185,6 @@ ENTRY(__switch_to)
 	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
 	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
 	lgr	%r1,%r2
 	lgr	%r1,%r2
 	aghi	%r1,__TASK_thread		# thread_struct of prev task
 	aghi	%r1,__TASK_thread		# thread_struct of prev task
-	lg	%r4,__TASK_thread_info(%r2)	# get thread_info of prev
 	lg	%r5,__TASK_thread_info(%r3)	# get thread_info of next
 	lg	%r5,__TASK_thread_info(%r3)	# get thread_info of next
 	stg	%r15,__THREAD_ksp(%r1)		# store kernel stack of prev
 	stg	%r15,__THREAD_ksp(%r1)		# store kernel stack of prev
 	lgr	%r1,%r3
 	lgr	%r1,%r3

+ 249 - 0
arch/s390/kernel/fpu.c

@@ -0,0 +1,249 @@
+/*
+ * In-kernel vector facility support functions
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <asm/fpu/types.h>
+#include <asm/fpu/api.h>
+
+/*
+ * Per-CPU variable to maintain FPU register ranges that are in use
+ * by the kernel.
+ */
+static DEFINE_PER_CPU(u32, kernel_fpu_state);
+
+#define KERNEL_FPU_STATE_MASK	(KERNEL_FPU_MASK|KERNEL_FPC)
+
+
+void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+{
+	if (!__this_cpu_read(kernel_fpu_state)) {
+		/*
+		 * Save user space FPU state and register contents.  Multiple
+		 * calls because of interruptions do not matter and return
+		 * immediately.  This also sets CIF_FPU to lazy restore FP/VX
+		 * register contents when returning to user space.
+		 */
+		save_fpu_regs();
+	}
+
+	/* Update flags to use the vector facility for KERNEL_FPR */
+	if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) {
+		flags |= KERNEL_VXR_LOW | KERNEL_FPC;
+		flags &= ~KERNEL_FPR;
+	}
+
+	/* Save and update current kernel VX state */
+	state->mask = __this_cpu_read(kernel_fpu_state);
+	__this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK);
+
+	/*
+	 * If this is the first call to __kernel_fpu_begin(), no additional
+	 * work is required.
+	 */
+	if (!(state->mask & KERNEL_FPU_STATE_MASK))
+		return;
+
+	/*
+	 * If KERNEL_FPR is still set, the vector facility is not available
+	 * and, thus, save floating-point control and registers only.
+	 */
+	if (state->mask & KERNEL_FPR) {
+		asm volatile("stfpc %0" : "=Q" (state->fpc));
+		asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
+		asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
+		asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
+		asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
+		asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
+		asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
+		asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
+		asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
+		asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
+		asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
+		asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
+		asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
+		asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
+		asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
+		asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
+		asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
+		return;
+	}
+
+	/*
+	 * If this is a nested call to __kernel_fpu_begin(), check the saved
+	 * state mask to save and later restore the vector registers that
+	 * are already in use.	Let's start with checking floating-point
+	 * controls.
+	 */
+	if (state->mask & KERNEL_FPC)
+		asm volatile("stfpc %0" : "=m" (state->fpc));
+
+	/* Test and save vector registers */
+	asm volatile (
+		/*
+		 * Test if any vector register must be saved and, if so,
+		 * test if all register can be saved.
+		 */
+		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
+		"	jz	20f\n"		/* no work -> done */
+		"	la	1,%[vxrs]\n"	/* load save area */
+		"	jo	18f\n"		/* -> save V0..V31 */
+
+		/*
+		 * Test if V8..V23 can be saved at once... this speeds up
+		 * for KERNEL_fpu_MID only. Otherwise continue to split the
+		 * range of vector registers into two halves and test them
+		 * separately.
+		 */
+		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
+		"	jo	17f\n"		/* -> save V8..V23 */
+
+		/* Test and save the first half of 16 vector registers */
+		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
+		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> save V0..V15 */
+		"	brc	4,3f\n"		/* 01 -> save V0..V7  */
+		"	brc	2,4f\n"		/* 10 -> save V8..V15 */
+
+		/* Test and save the second half of 16 vector registers */
+		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
+		"	jo	19f\n"		/* 11 -> save V16..V31 */
+		"	brc	4,11f\n"	/* 01 -> save V16..V23	*/
+		"	brc	2,12f\n"	/* 10 -> save V24..V31 */
+		"	j	20f\n"		/* 00 -> done */
+
+		/*
+		 * Below are the vstm combinations to save multiple vector
+		 * registers at once.
+		 */
+		"2:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"3:	.word	0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"4:	.word	0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"\n"
+		"11:	.word	0xe707,0x1100,0x0c3e\n"	/* vstm 16,23,256(1) */
+		"	j	20f\n"			/* -> done */
+		"12:	.word	0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */
+		"	j	20f\n"			/* -> done */
+		"\n"
+		"17:	.word	0xe787,0x1080,0x043e\n"	/* vstm 8,23,128(1) */
+		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
+		"	j	1b\n"			/* -> VXR_LOW */
+		"\n"
+		"18:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"19:	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		"20:"
+		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+		: [m] "d" (state->mask)
+		: "1", "cc");
+}
+EXPORT_SYMBOL(__kernel_fpu_begin);
+
+void __kernel_fpu_end(struct kernel_fpu *state)
+{
+	/* Just update the per-CPU state if there is nothing to restore */
+	if (!(state->mask & KERNEL_FPU_STATE_MASK))
+		goto update_fpu_state;
+
+	/*
+	 * If KERNEL_FPR is specified, the vector facility is not available
+	 * and, thus, restore floating-point control and registers only.
+	 */
+	if (state->mask & KERNEL_FPR) {
+		asm volatile("lfpc %0" : : "Q" (state->fpc));
+		asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
+		asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
+		asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
+		asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
+		asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
+		asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
+		asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
+		asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
+		asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
+		asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
+		asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
+		asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
+		asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
+		asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
+		asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
+		asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
+		goto update_fpu_state;
+	}
+
+	/* Test and restore floating-point controls */
+	if (state->mask & KERNEL_FPC)
+		asm volatile("lfpc %0" : : "Q" (state->fpc));
+
+	/* Test and restore (load) vector registers */
+	asm volatile (
+		/*
+		 * Test if any vector registers must be loaded and, if so,
+		 * test if all registers can be loaded at once.
+		 */
+		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
+		"	jz	20f\n"		/* no work -> done */
+		"	la	1,%[vxrs]\n"	/* load load area */
+		"	jo	18f\n"		/* -> load V0..V31 */
+
+		/*
+		 * Test if V8..V23 can be restored at once... this speeds up
+		 * for KERNEL_VXR_MID only. Otherwise continue to split the
+		 * range of vector registers into two halves and test them
+		 * separately.
+		 */
+		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
+		"	jo	17f\n"		/* -> load V8..V23 */
+
+		/* Test and load the first half of 16 vector registers */
+		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
+		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> load V0..V15 */
+		"	brc	4,3f\n"		/* 01 -> load V0..V7  */
+		"	brc	2,4f\n"		/* 10 -> load V8..V15 */
+
+		/* Test and load the second half of 16 vector registers */
+		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
+		"	jo	19f\n"		/* 11 -> load V16..V31 */
+		"	brc	4,11f\n"	/* 01 -> load V16..V23	*/
+		"	brc	2,12f\n"	/* 10 -> load V24..V31 */
+		"	j	20f\n"		/* 00 -> done */
+
+		/*
+		 * Below are the vstm combinations to load multiple vector
+		 * registers at once.
+		 */
+		"2:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"3:	.word	0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"4:	.word	0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"\n"
+		"11:	.word	0xe707,0x1100,0x0c36\n"	/* vlm 16,23,256(1) */
+		"	j	20f\n"			/* -> done */
+		"12:	.word	0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */
+		"	j	20f\n"			/* -> done */
+		"\n"
+		"17:	.word	0xe787,0x1080,0x0436\n"	/* vlm 8,23,128(1) */
+		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
+		"	j	1b\n"			/* -> VXR_LOW */
+		"\n"
+		"18:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+		"19:	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
+		"20:"
+		:
+		: [vxrs] "Q" (*(struct vx_array *) &state->vxrs),
+		  [m] "d" (state->mask)
+		: "1", "cc");
+
+update_fpu_state:
+	/* Update current kernel VX state */
+	__this_cpu_write(kernel_fpu_state, state->mask);
+}
+EXPORT_SYMBOL(__kernel_fpu_end);

+ 14 - 11
arch/s390/kernel/ipl.c

@@ -121,9 +121,9 @@ static char *dump_type_str(enum dump_type type)
  * Must be in data section since the bss section
  * Must be in data section since the bss section
  * is not cleared when these are accessed.
  * is not cleared when these are accessed.
  */
  */
-static u8 ipl_ssid __attribute__((__section__(".data"))) = 0;
-static u16 ipl_devno __attribute__((__section__(".data"))) = 0;
-u32 ipl_flags __attribute__((__section__(".data"))) = 0;
+static u8 ipl_ssid __section(.data) = 0;
+static u16 ipl_devno __section(.data) = 0;
+u32 ipl_flags __section(.data) = 0;
 
 
 enum ipl_method {
 enum ipl_method {
 	REIPL_METHOD_CCW_CIO,
 	REIPL_METHOD_CCW_CIO,
@@ -174,7 +174,7 @@ static inline int __diag308(unsigned long subcode, void *addr)
 
 
 	asm volatile(
 	asm volatile(
 		"	diag	%0,%2,0x308\n"
 		"	diag	%0,%2,0x308\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		EX_TABLE(0b,0b)
 		: "+d" (_addr), "+d" (_rc)
 		: "+d" (_addr), "+d" (_rc)
 		: "d" (subcode) : "cc", "memory");
 		: "d" (subcode) : "cc", "memory");
@@ -563,7 +563,7 @@ static struct kset *ipl_kset;
 
 
 static void __ipl_run(void *unused)
 static void __ipl_run(void *unused)
 {
 {
-	diag308(DIAG308_IPL, NULL);
+	diag308(DIAG308_LOAD_CLEAR, NULL);
 	if (MACHINE_IS_VM)
 	if (MACHINE_IS_VM)
 		__cpcmd("IPL", NULL, 0, NULL);
 		__cpcmd("IPL", NULL, 0, NULL);
 	else if (ipl_info.type == IPL_TYPE_CCW)
 	else if (ipl_info.type == IPL_TYPE_CCW)
@@ -1085,21 +1085,24 @@ static void __reipl_run(void *unused)
 		break;
 		break;
 	case REIPL_METHOD_CCW_DIAG:
 	case REIPL_METHOD_CCW_DIAG:
 		diag308(DIAG308_SET, reipl_block_ccw);
 		diag308(DIAG308_SET, reipl_block_ccw);
-		diag308(DIAG308_IPL, NULL);
+		if (MACHINE_IS_LPAR)
+			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
+		else
+			diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 		break;
 	case REIPL_METHOD_FCP_RW_DIAG:
 	case REIPL_METHOD_FCP_RW_DIAG:
 		diag308(DIAG308_SET, reipl_block_fcp);
 		diag308(DIAG308_SET, reipl_block_fcp);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 		break;
 	case REIPL_METHOD_FCP_RO_DIAG:
 	case REIPL_METHOD_FCP_RO_DIAG:
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 		break;
 	case REIPL_METHOD_FCP_RO_VM:
 	case REIPL_METHOD_FCP_RO_VM:
 		__cpcmd("IPL", NULL, 0, NULL);
 		__cpcmd("IPL", NULL, 0, NULL);
 		break;
 		break;
 	case REIPL_METHOD_NSS_DIAG:
 	case REIPL_METHOD_NSS_DIAG:
 		diag308(DIAG308_SET, reipl_block_nss);
 		diag308(DIAG308_SET, reipl_block_nss);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 		break;
 	case REIPL_METHOD_NSS:
 	case REIPL_METHOD_NSS:
 		get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
 		get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
@@ -1108,7 +1111,7 @@ static void __reipl_run(void *unused)
 	case REIPL_METHOD_DEFAULT:
 	case REIPL_METHOD_DEFAULT:
 		if (MACHINE_IS_VM)
 		if (MACHINE_IS_VM)
 			__cpcmd("IPL", NULL, 0, NULL);
 			__cpcmd("IPL", NULL, 0, NULL);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 		break;
 	case REIPL_METHOD_FCP_DUMP:
 	case REIPL_METHOD_FCP_DUMP:
 		break;
 		break;
@@ -1423,7 +1426,7 @@ static void diag308_dump(void *dump_block)
 {
 {
 	diag308(DIAG308_SET, dump_block);
 	diag308(DIAG308_SET, dump_block);
 	while (1) {
 	while (1) {
-		if (diag308(DIAG308_DUMP, NULL) != 0x302)
+		if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302)
 			break;
 			break;
 		udelay_simple(USEC_PER_SEC);
 		udelay_simple(USEC_PER_SEC);
 	}
 	}

+ 4 - 3
arch/s390/kernel/irq.c

@@ -127,9 +127,7 @@ int show_interrupts(struct seq_file *p, void *v)
 			seq_printf(p, "CPU%d       ", cpu);
 			seq_printf(p, "CPU%d       ", cpu);
 		seq_putc(p, '\n');
 		seq_putc(p, '\n');
 	}
 	}
-	if (index < NR_IRQS) {
-		if (index >= NR_IRQS_BASE)
-			goto out;
+	if (index < NR_IRQS_BASE) {
 		seq_printf(p, "%s: ", irqclass_main_desc[index].name);
 		seq_printf(p, "%s: ", irqclass_main_desc[index].name);
 		irq = irqclass_main_desc[index].irq;
 		irq = irqclass_main_desc[index].irq;
 		for_each_online_cpu(cpu)
 		for_each_online_cpu(cpu)
@@ -137,6 +135,9 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 		seq_putc(p, '\n');
 		goto out;
 		goto out;
 	}
 	}
+	if (index > NR_IRQS_BASE)
+		goto out;
+
 	for (index = 0; index < NR_ARCH_IRQS; index++) {
 	for (index = 0; index < NR_ARCH_IRQS; index++) {
 		seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
 		seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
 		irq = irqclass_sub_desc[index].irq;
 		irq = irqclass_sub_desc[index].irq;

+ 26 - 29
arch/s390/kernel/machine_kexec.c

@@ -24,6 +24,7 @@
 #include <asm/diag.h>
 #include <asm/diag.h>
 #include <asm/elf.h>
 #include <asm/elf.h>
 #include <asm/asm-offsets.h>
 #include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
 #include <asm/os_info.h>
 #include <asm/os_info.h>
 #include <asm/switch_to.h>
 #include <asm/switch_to.h>
 
 
@@ -60,8 +61,6 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
 static int __init machine_kdump_pm_init(void)
 static int __init machine_kdump_pm_init(void)
 {
 {
 	pm_notifier(machine_kdump_pm_cb, 0);
 	pm_notifier(machine_kdump_pm_cb, 0);
-	/* Create initial mapping for crashkernel memory */
-	arch_kexec_unprotect_crashkres();
 	return 0;
 	return 0;
 }
 }
 arch_initcall(machine_kdump_pm_init);
 arch_initcall(machine_kdump_pm_init);
@@ -150,42 +149,40 @@ static int kdump_csum_valid(struct kimage *image)
 
 
 #ifdef CONFIG_CRASH_DUMP
 #ifdef CONFIG_CRASH_DUMP
 
 
-/*
- * Map or unmap crashkernel memory
- */
-static void crash_map_pages(int enable)
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
 {
 {
-	unsigned long size = resource_size(&crashk_res);
-
-	BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN ||
-	       size % KEXEC_CRASH_MEM_ALIGN);
-	if (enable)
-		vmem_add_mapping(crashk_res.start, size);
-	else {
-		vmem_remove_mapping(crashk_res.start, size);
-		if (size)
-			os_info_crashkernel_add(crashk_res.start, size);
-		else
-			os_info_crashkernel_add(0, 0);
-	}
+	unsigned long addr, size;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE)
+		free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+	size = begin - crashk_res.start;
+	if (size)
+		os_info_crashkernel_add(crashk_res.start, size);
+	else
+		os_info_crashkernel_add(0, 0);
+}
+
+static void crash_protect_pages(int protect)
+{
+	unsigned long size;
+
+	if (!crashk_res.end)
+		return;
+	size = resource_size(&crashk_res);
+	if (protect)
+		set_memory_ro(crashk_res.start, size >> PAGE_SHIFT);
+	else
+		set_memory_rw(crashk_res.start, size >> PAGE_SHIFT);
 }
 }
 
 
-/*
- * Unmap crashkernel memory
- */
 void arch_kexec_protect_crashkres(void)
 void arch_kexec_protect_crashkres(void)
 {
 {
-	if (crashk_res.end)
-		crash_map_pages(0);
+	crash_protect_pages(1);
 }
 }
 
 
-/*
- * Map crashkernel memory
- */
 void arch_kexec_unprotect_crashkres(void)
 void arch_kexec_unprotect_crashkres(void)
 {
 {
-	if (crashk_res.end)
-		crash_map_pages(1);
+	crash_protect_pages(0);
 }
 }
 
 
 #endif
 #endif

+ 2 - 11
arch/s390/kernel/nmi.c

@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <asm/lowcore.h>
 #include <asm/lowcore.h>
 #include <asm/smp.h>
 #include <asm/smp.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
 #include <asm/cputime.h>
 #include <asm/cputime.h>
 #include <asm/nmi.h>
 #include <asm/nmi.h>
 #include <asm/crw.h>
 #include <asm/crw.h>
@@ -27,7 +27,6 @@ struct mcck_struct {
 	unsigned int kill_task : 1;
 	unsigned int kill_task : 1;
 	unsigned int channel_report : 1;
 	unsigned int channel_report : 1;
 	unsigned int warning : 1;
 	unsigned int warning : 1;
-	unsigned int etr_queue : 1;
 	unsigned int stp_queue : 1;
 	unsigned int stp_queue : 1;
 	unsigned long mcck_code;
 	unsigned long mcck_code;
 };
 };
@@ -82,8 +81,6 @@ void s390_handle_mcck(void)
 		if (xchg(&mchchk_wng_posted, 1) == 0)
 		if (xchg(&mchchk_wng_posted, 1) == 0)
 			kill_cad_pid(SIGPWR, 1);
 			kill_cad_pid(SIGPWR, 1);
 	}
 	}
-	if (mcck.etr_queue)
-		etr_queue_work();
 	if (mcck.stp_queue)
 	if (mcck.stp_queue)
 		stp_queue_work();
 		stp_queue_work();
 	if (mcck.kill_task) {
 	if (mcck.kill_task) {
@@ -241,8 +238,6 @@ static int notrace s390_validate_registers(union mci mci)
 
 
 #define ED_STP_ISLAND	6	/* External damage STP island check */
 #define ED_STP_ISLAND	6	/* External damage STP island check */
 #define ED_STP_SYNC	7	/* External damage STP sync check */
 #define ED_STP_SYNC	7	/* External damage STP sync check */
-#define ED_ETR_SYNC	12	/* External damage ETR sync check */
-#define ED_ETR_SWITCH	13	/* External damage ETR switch to local */
 
 
 /*
 /*
  * machine check handler.
  * machine check handler.
@@ -325,15 +320,11 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 	}
 	}
 	if (mci.ed && mci.ec) {
 	if (mci.ed && mci.ec) {
 		/* External damage */
 		/* External damage */
-		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
-			mcck->etr_queue |= etr_sync_check();
-		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
-			mcck->etr_queue |= etr_switch_to_local();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
 			mcck->stp_queue |= stp_sync_check();
 			mcck->stp_queue |= stp_sync_check();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
 			mcck->stp_queue |= stp_island_check();
 			mcck->stp_queue |= stp_island_check();
-		if (mcck->etr_queue || mcck->stp_queue)
+		if (mcck->stp_queue)
 			set_cpu_flag(CIF_MCCK_PENDING);
 			set_cpu_flag(CIF_MCCK_PENDING);
 	}
 	}
 	if (mci.se)
 	if (mci.se)

+ 0 - 5
arch/s390/kernel/perf_cpum_sf.c

@@ -601,17 +601,12 @@ static void release_pmc_hardware(void)
 
 
 	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
 	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
-	perf_release_sampling();
 }
 }
 
 
 static int reserve_pmc_hardware(void)
 static int reserve_pmc_hardware(void)
 {
 {
 	int flags = PMC_INIT;
 	int flags = PMC_INIT;
-	int err;
 
 
-	err = perf_reserve_sampling();
-	if (err)
-		return err;
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
 	if (flags & PMC_FAILURE) {
 	if (flags & PMC_FAILURE) {
 		release_pmc_hardware();
 		release_pmc_hardware();

+ 0 - 30
arch/s390/kernel/perf_event.c

@@ -248,33 +248,3 @@ ssize_t cpumf_events_sysfs_show(struct device *dev,
 	return sprintf(page, "event=0x%04llx,name=%s\n",
 	return sprintf(page, "event=0x%04llx,name=%s\n",
 		       pmu_attr->id, attr->attr.name);
 		       pmu_attr->id, attr->attr.name);
 }
 }
-
-/* Reserve/release functions for sharing perf hardware */
-static DEFINE_SPINLOCK(perf_hw_owner_lock);
-static void *perf_sampling_owner;
-
-int perf_reserve_sampling(void)
-{
-	int err;
-
-	err = 0;
-	spin_lock(&perf_hw_owner_lock);
-	if (perf_sampling_owner) {
-		pr_warn("The sampling facility is already reserved by %p\n",
-			perf_sampling_owner);
-		err = -EBUSY;
-	} else
-		perf_sampling_owner = __builtin_return_address(0);
-	spin_unlock(&perf_hw_owner_lock);
-	return err;
-}
-EXPORT_SYMBOL(perf_reserve_sampling);
-
-void perf_release_sampling(void)
-{
-	spin_lock(&perf_hw_owner_lock);
-	WARN_ON(!perf_sampling_owner);
-	perf_sampling_owner = NULL;
-	spin_unlock(&perf_hw_owner_lock);
-}
-EXPORT_SYMBOL(perf_release_sampling);

+ 83 - 31
arch/s390/kernel/processor.c

@@ -13,12 +13,45 @@
 #include <linux/delay.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <linux/cpu.h>
 #include <asm/diag.h>
 #include <asm/diag.h>
+#include <asm/facility.h>
 #include <asm/elf.h>
 #include <asm/elf.h>
 #include <asm/lowcore.h>
 #include <asm/lowcore.h>
 #include <asm/param.h>
 #include <asm/param.h>
 #include <asm/smp.h>
 #include <asm/smp.h>
 
 
-static DEFINE_PER_CPU(struct cpuid, cpu_id);
+struct cpu_info {
+	unsigned int cpu_mhz_dynamic;
+	unsigned int cpu_mhz_static;
+	struct cpuid cpu_id;
+};
+
+static DEFINE_PER_CPU(struct cpu_info, cpu_info);
+
+static bool machine_has_cpu_mhz;
+
+void __init cpu_detect_mhz_feature(void)
+{
+	if (test_facility(34) && __ecag(ECAG_CPU_ATTRIBUTE, 0) != -1UL)
+		machine_has_cpu_mhz = 1;
+}
+
+static void update_cpu_mhz(void *arg)
+{
+	unsigned long mhz;
+	struct cpu_info *c;
+
+	mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+	c = this_cpu_ptr(&cpu_info);
+	c->cpu_mhz_dynamic = mhz >> 32;
+	c->cpu_mhz_static = mhz & 0xffffffff;
+}
+
+void s390_update_cpu_mhz(void)
+{
+	s390_adjust_jiffies();
+	if (machine_has_cpu_mhz)
+		on_each_cpu(update_cpu_mhz, NULL, 0);
+}
 
 
 void notrace cpu_relax(void)
 void notrace cpu_relax(void)
 {
 {
@@ -35,9 +68,11 @@ EXPORT_SYMBOL(cpu_relax);
  */
  */
 void cpu_init(void)
 void cpu_init(void)
 {
 {
-	struct cpuid *id = this_cpu_ptr(&cpu_id);
+	struct cpuid *id = this_cpu_ptr(&cpu_info.cpu_id);
 
 
 	get_cpu_id(id);
 	get_cpu_id(id);
+	if (machine_has_cpu_mhz)
+		update_cpu_mhz(NULL);
 	atomic_inc(&init_mm.mm_count);
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
 	current->active_mm = &init_mm;
 	BUG_ON(current->mm);
 	BUG_ON(current->mm);
@@ -53,10 +88,7 @@ int cpu_have_feature(unsigned int num)
 }
 }
 EXPORT_SYMBOL(cpu_have_feature);
 EXPORT_SYMBOL(cpu_have_feature);
 
 
-/*
- * show_cpuinfo - Get information on one CPU for use by procfs.
- */
-static int show_cpuinfo(struct seq_file *m, void *v)
+static void show_cpu_summary(struct seq_file *m, void *v)
 {
 {
 	static const char *hwcap_str[] = {
 	static const char *hwcap_str[] = {
 		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
 		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
@@ -65,34 +97,55 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	static const char * const int_hwcap_str[] = {
 	static const char * const int_hwcap_str[] = {
 		"sie"
 		"sie"
 	};
 	};
-	unsigned long n = (unsigned long) v - 1;
-	int i;
-
-	if (!n) {
-		s390_adjust_jiffies();
-		seq_printf(m, "vendor_id       : IBM/S390\n"
-			   "# processors    : %i\n"
-			   "bogomips per cpu: %lu.%02lu\n",
-			   num_online_cpus(), loops_per_jiffy/(500000/HZ),
-			   (loops_per_jiffy/(5000/HZ))%100);
-		seq_puts(m, "features\t: ");
-		for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
-			if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
-				seq_printf(m, "%s ", hwcap_str[i]);
-		for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
-			if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
-				seq_printf(m, "%s ", int_hwcap_str[i]);
-		seq_puts(m, "\n");
-		show_cacheinfo(m);
-	}
-	if (cpu_online(n)) {
-		struct cpuid *id = &per_cpu(cpu_id, n);
-		seq_printf(m, "processor %li: "
+	int i, cpu;
+
+	seq_printf(m, "vendor_id       : IBM/S390\n"
+		   "# processors    : %i\n"
+		   "bogomips per cpu: %lu.%02lu\n",
+		   num_online_cpus(), loops_per_jiffy/(500000/HZ),
+		   (loops_per_jiffy/(5000/HZ))%100);
+	seq_printf(m, "max thread id   : %d\n", smp_cpu_mtid);
+	seq_puts(m, "features\t: ");
+	for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
+		if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
+			seq_printf(m, "%s ", hwcap_str[i]);
+	for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
+		if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
+			seq_printf(m, "%s ", int_hwcap_str[i]);
+	seq_puts(m, "\n");
+	show_cacheinfo(m);
+	for_each_online_cpu(cpu) {
+		struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu);
+
+		seq_printf(m, "processor %d: "
 			   "version = %02X,  "
 			   "version = %02X,  "
 			   "identification = %06X,  "
 			   "identification = %06X,  "
 			   "machine = %04X\n",
 			   "machine = %04X\n",
-			   n, id->version, id->ident, id->machine);
+			   cpu, id->version, id->ident, id->machine);
 	}
 	}
+}
+
+static void show_cpu_mhz(struct seq_file *m, unsigned long n)
+{
+	struct cpu_info *c = per_cpu_ptr(&cpu_info, n);
+
+	seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic);
+	seq_printf(m, "cpu MHz static  : %d\n", c->cpu_mhz_static);
+}
+
+/*
+ * show_cpuinfo - Get information on one CPU for use by procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	unsigned long n = (unsigned long) v - 1;
+
+	if (!n)
+		show_cpu_summary(m, v);
+	if (!machine_has_cpu_mhz)
+		return 0;
+	seq_printf(m, "\ncpu number      : %ld\n", n);
+	show_cpu_mhz(m, n);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -126,4 +179,3 @@ const struct seq_operations cpuinfo_op = {
 	.stop	= c_stop,
 	.stop	= c_stop,
 	.show	= show_cpuinfo,
 	.show	= show_cpuinfo,
 };
 };
-

+ 30 - 10
arch/s390/kernel/setup.c

@@ -130,17 +130,14 @@ __setup("condev=", condev_setup);
 
 
 static void __init set_preferred_console(void)
 static void __init set_preferred_console(void)
 {
 {
-	if (MACHINE_IS_KVM) {
-		if (sclp.has_vt220)
-			add_preferred_console("ttyS", 1, NULL);
-		else if (sclp.has_linemode)
-			add_preferred_console("ttyS", 0, NULL);
-		else
-			add_preferred_console("hvc", 0, NULL);
-	} else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+	if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
 		add_preferred_console("ttyS", 0, NULL);
 		add_preferred_console("ttyS", 0, NULL);
 	else if (CONSOLE_IS_3270)
 	else if (CONSOLE_IS_3270)
 		add_preferred_console("tty3270", 0, NULL);
 		add_preferred_console("tty3270", 0, NULL);
+	else if (CONSOLE_IS_VT220)
+		add_preferred_console("ttyS", 1, NULL);
+	else if (CONSOLE_IS_HVC)
+		add_preferred_console("hvc", 0, NULL);
 }
 }
 
 
 static int __init conmode_setup(char *str)
 static int __init conmode_setup(char *str)
@@ -206,6 +203,15 @@ static void __init conmode_default(void)
 			SET_CONSOLE_SCLP;
 			SET_CONSOLE_SCLP;
 #endif
 #endif
 		}
 		}
+	} else if (MACHINE_IS_KVM) {
+		if (sclp.has_vt220 &&
+		    config_enabled(CONFIG_SCLP_VT220_CONSOLE))
+			SET_CONSOLE_VT220;
+		else if (sclp.has_linemode &&
+			 config_enabled(CONFIG_SCLP_CONSOLE))
+			SET_CONSOLE_SCLP;
+		else
+			SET_CONSOLE_HVC;
 	} else {
 	} else {
 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 		SET_CONSOLE_SCLP;
 		SET_CONSOLE_SCLP;
@@ -289,7 +295,7 @@ static int __init parse_vmalloc(char *arg)
 }
 }
 early_param("vmalloc", parse_vmalloc);
 early_param("vmalloc", parse_vmalloc);
 
 
-void *restart_stack __attribute__((__section__(".data")));
+void *restart_stack __section(.data);
 
 
 static void __init setup_lowcore(void)
 static void __init setup_lowcore(void)
 {
 {
@@ -432,6 +438,20 @@ static void __init setup_resources(void)
 			}
 			}
 		}
 		}
 	}
 	}
+#ifdef CONFIG_CRASH_DUMP
+	/*
+	 * Re-add removed crash kernel memory as reserved memory. This makes
+	 * sure it will be mapped with the identity mapping and struct pages
+	 * will be created, so it can be resized later on.
+	 * However add it later since the crash kernel resource should not be
+	 * part of the System RAM resource.
+	 */
+	if (crashk_res.end) {
+		memblock_add(crashk_res.start, resource_size(&crashk_res));
+		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
+		insert_resource(&iomem_resource, &crashk_res);
+	}
+#endif
 }
 }
 
 
 static void __init setup_memory_end(void)
 static void __init setup_memory_end(void)
@@ -602,7 +622,6 @@ static void __init reserve_crashkernel(void)
 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
 	crashk_res.start = crash_base;
 	crashk_res.start = crash_base;
 	crashk_res.end = crash_base + crash_size - 1;
 	crashk_res.end = crash_base + crash_size - 1;
-	insert_resource(&iomem_resource, &crashk_res);
 	memblock_remove(crash_base, crash_size);
 	memblock_remove(crash_base, crash_size);
 	pr_info("Reserving %lluMB of memory at %lluMB "
 	pr_info("Reserving %lluMB of memory at %lluMB "
 		"for crashkernel (System RAM: %luMB)\n",
 		"for crashkernel (System RAM: %luMB)\n",
@@ -901,6 +920,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_vmcoreinfo();
 	setup_vmcoreinfo();
 	setup_lowcore();
 	setup_lowcore();
 	smp_fill_possible_mask();
 	smp_fill_possible_mask();
+	cpu_detect_mhz_feature();
         cpu_init();
         cpu_init();
 	numa_setup();
 	numa_setup();
 
 

+ 4 - 14
arch/s390/kernel/smp.c

@@ -242,10 +242,8 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
 {
 {
 	struct lowcore *lc = pcpu->lowcore;
 	struct lowcore *lc = pcpu->lowcore;
 
 
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
 	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
 	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
-	atomic_inc(&init_mm.context.attach_count);
 	lc->cpu_nr = cpu;
 	lc->cpu_nr = cpu;
 	lc->spinlock_lockval = arch_spin_lockval(cpu);
 	lc->spinlock_lockval = arch_spin_lockval(cpu);
 	lc->percpu_offset = __per_cpu_offset[cpu];
 	lc->percpu_offset = __per_cpu_offset[cpu];
@@ -320,17 +318,11 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
  */
  */
 static int pcpu_set_smt(unsigned int mtid)
 static int pcpu_set_smt(unsigned int mtid)
 {
 {
-	register unsigned long reg1 asm ("1") = (unsigned long) mtid;
 	int cc;
 	int cc;
 
 
 	if (smp_cpu_mtid == mtid)
 	if (smp_cpu_mtid == mtid)
 		return 0;
 		return 0;
-	asm volatile(
-		"	sigp	%1,0,%2	# sigp set multi-threading\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING)
-		: "cc");
+	cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL);
 	if (cc == 0) {
 	if (cc == 0) {
 		smp_cpu_mtid = mtid;
 		smp_cpu_mtid = mtid;
 		smp_cpu_mt_shift = 0;
 		smp_cpu_mt_shift = 0;
@@ -876,10 +868,8 @@ void __cpu_die(unsigned int cpu)
 	while (!pcpu_stopped(pcpu))
 	while (!pcpu_stopped(pcpu))
 		cpu_relax();
 		cpu_relax();
 	pcpu_free_lowcore(pcpu);
 	pcpu_free_lowcore(pcpu);
-	atomic_dec(&init_mm.context.attach_count);
 	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
 	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+	cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
 }
 }
 
 
 void __noreturn cpu_die(void)
 void __noreturn cpu_die(void)
@@ -897,7 +887,7 @@ void __init smp_fill_possible_mask(void)
 
 
 	sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
 	sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
 	sclp_max = min(smp_max_threads, sclp_max);
 	sclp_max = min(smp_max_threads, sclp_max);
-	sclp_max = sclp.max_cores * sclp_max ?: nr_cpu_ids;
+	sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids;
 	possible = setup_possible_cpus ?: nr_cpu_ids;
 	possible = setup_possible_cpus ?: nr_cpu_ids;
 	possible = min(possible, sclp_max);
 	possible = min(possible, sclp_max);
 	for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
 	for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)

+ 38 - 25
arch/s390/kernel/sysinfo.c

@@ -16,21 +16,11 @@
 #include <asm/sysinfo.h>
 #include <asm/sysinfo.h>
 #include <asm/cpcmd.h>
 #include <asm/cpcmd.h>
 #include <asm/topology.h>
 #include <asm/topology.h>
-
-/* Sigh, math-emu. Don't ask. */
-#include <asm/sfp-util.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
+#include <asm/fpu/api.h>
 
 
 int topology_max_mnest;
 int topology_max_mnest;
 
 
-/*
- * stsi - store system information
- *
- * Returns the current configuration level if function code 0 was specified.
- * Otherwise returns 0 on success or a negative value on error.
- */
-int stsi(void *sysinfo, int fc, int sel1, int sel2)
+static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl)
 {
 {
 	register int r0 asm("0") = (fc << 28) | sel1;
 	register int r0 asm("0") = (fc << 28) | sel1;
 	register int r1 asm("1") = sel2;
 	register int r1 asm("1") = sel2;
@@ -45,9 +35,24 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2)
 		: "+d" (r0), "+d" (rc)
 		: "+d" (r0), "+d" (rc)
 		: "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP)
 		: "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP)
 		: "cc", "memory");
 		: "cc", "memory");
+	*lvl = ((unsigned int) r0) >> 28;
+	return rc;
+}
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+	int lvl, rc;
+
+	rc = __stsi(sysinfo, fc, sel1, sel2, &lvl);
 	if (rc)
 	if (rc)
 		return rc;
 		return rc;
-	return fc ? 0 : ((unsigned int) r0) >> 28;
+	return fc ? 0 : lvl;
 }
 }
 EXPORT_SYMBOL(stsi);
 EXPORT_SYMBOL(stsi);
 
 
@@ -414,10 +419,8 @@ subsys_initcall(create_proc_service_level);
 void s390_adjust_jiffies(void)
 void s390_adjust_jiffies(void)
 {
 {
 	struct sysinfo_1_2_2 *info;
 	struct sysinfo_1_2_2 *info;
-	const unsigned int fmil = 0x4b189680;	/* 1e7 as 32-bit float. */
-	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
-	FP_DECL_EX;
-	unsigned int capability;
+	unsigned long capability;
+	struct kernel_fpu fpu;
 
 
 	info = (void *) get_zeroed_page(GFP_KERNEL);
 	info = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!info)
 	if (!info)
@@ -433,15 +436,25 @@ void s390_adjust_jiffies(void)
 		 * higher cpu capacity. Bogomips are the other way round.
 		 * higher cpu capacity. Bogomips are the other way round.
 		 * To get to a halfway suitable number we divide 1e7
 		 * To get to a halfway suitable number we divide 1e7
 		 * by the cpu capability number. Yes, that means a floating
 		 * by the cpu capability number. Yes, that means a floating
-		 * point division .. math-emu here we come :-)
+		 * point division ..
 		 */
 		 */
-		FP_UNPACK_SP(SA, &fmil);
-		if ((info->capability >> 23) == 0)
-			FP_FROM_INT_S(SB, (long) info->capability, 64, long);
-		else
-			FP_UNPACK_SP(SB, &info->capability);
-		FP_DIV_S(SR, SA, SB);
-		FP_TO_INT_S(capability, SR, 32, 0);
+		kernel_fpu_begin(&fpu, KERNEL_FPR);
+		asm volatile(
+			"	sfpc	%3\n"
+			"	l	%0,%1\n"
+			"	tmlh	%0,0xff80\n"
+			"	jnz	0f\n"
+			"	cefbr	%%f2,%0\n"
+			"	j	1f\n"
+			"0:	le	%%f2,%1\n"
+			"1:	cefbr	%%f0,%2\n"
+			"	debr	%%f0,%%f2\n"
+			"	cgebr	%0,5,%%f0\n"
+			: "=&d" (capability)
+			: "Q" (info->capability), "d" (10000000), "d" (0)
+			: "cc"
+			);
+		kernel_fpu_end(&fpu);
 	} else
 	} else
 		/*
 		/*
 		 * Really old machine without stsi block for basic
 		 * Really old machine without stsi block for basic

+ 54 - 999
arch/s390/kernel/time.c

@@ -39,13 +39,14 @@
 #include <linux/gfp.h>
 #include <linux/gfp.h>
 #include <linux/kprobes.h>
 #include <linux/kprobes.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
+#include <asm/facility.h>
 #include <asm/delay.h>
 #include <asm/delay.h>
 #include <asm/div64.h>
 #include <asm/div64.h>
 #include <asm/vdso.h>
 #include <asm/vdso.h>
 #include <asm/irq.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
 #include <asm/irq_regs.h>
 #include <asm/vtimer.h>
 #include <asm/vtimer.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
 #include <asm/cio.h>
 #include <asm/cio.h>
 #include "entry.h"
 #include "entry.h"
 
 
@@ -61,6 +62,32 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators);
 ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
 ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
 EXPORT_SYMBOL(s390_epoch_delta_notifier);
 EXPORT_SYMBOL(s390_epoch_delta_notifier);
 
 
+unsigned char ptff_function_mask[16];
+unsigned long lpar_offset;
+unsigned long initial_leap_seconds;
+
+/*
+ * Get time offsets with PTFF
+ */
+void __init ptff_init(void)
+{
+	struct ptff_qto qto;
+	struct ptff_qui qui;
+
+	if (!test_facility(28))
+		return;
+	ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF);
+
+	/* get LPAR offset */
+	if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+		lpar_offset = qto.tod_epoch_difference;
+
+	/* get initial leap seconds */
+	if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0)
+		initial_leap_seconds = (unsigned long)
+			((long) qui.old_leap * 4096000000L);
+}
+
 /*
 /*
  * Scheduler clock - returns current time in nanosec units.
  * Scheduler clock - returns current time in nanosec units.
  */
  */
@@ -162,30 +189,32 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
 		set_clock_comparator(S390_lowcore.clock_comparator);
 		set_clock_comparator(S390_lowcore.clock_comparator);
 }
 }
 
 
-static void etr_timing_alert(struct etr_irq_parm *);
 static void stp_timing_alert(struct stp_irq_parm *);
 static void stp_timing_alert(struct stp_irq_parm *);
 
 
 static void timing_alert_interrupt(struct ext_code ext_code,
 static void timing_alert_interrupt(struct ext_code ext_code,
 				   unsigned int param32, unsigned long param64)
 				   unsigned int param32, unsigned long param64)
 {
 {
 	inc_irq_stat(IRQEXT_TLA);
 	inc_irq_stat(IRQEXT_TLA);
-	if (param32 & 0x00c40000)
-		etr_timing_alert((struct etr_irq_parm *) &param32);
 	if (param32 & 0x00038000)
 	if (param32 & 0x00038000)
 		stp_timing_alert((struct stp_irq_parm *) &param32);
 		stp_timing_alert((struct stp_irq_parm *) &param32);
 }
 }
 
 
-static void etr_reset(void);
 static void stp_reset(void);
 static void stp_reset(void);
 
 
 void read_persistent_clock64(struct timespec64 *ts)
 void read_persistent_clock64(struct timespec64 *ts)
 {
 {
-	tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
+	__u64 clock;
+
+	clock = get_tod_clock() - initial_leap_seconds;
+	tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
 }
 }
 
 
 void read_boot_clock64(struct timespec64 *ts)
 void read_boot_clock64(struct timespec64 *ts)
 {
 {
-	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
+	__u64 clock;
+
+	clock = sched_clock_base_cc - initial_leap_seconds;
+	tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
 }
 }
 
 
 static cycle_t read_tod_clock(struct clocksource *cs)
 static cycle_t read_tod_clock(struct clocksource *cs)
@@ -269,7 +298,6 @@ void update_vsyscall_tz(void)
 void __init time_init(void)
 void __init time_init(void)
 {
 {
 	/* Reset time synchronization interfaces. */
 	/* Reset time synchronization interfaces. */
-	etr_reset();
 	stp_reset();
 	stp_reset();
 
 
 	/* request the clock comparator external interrupt */
 	/* request the clock comparator external interrupt */
@@ -337,20 +365,20 @@ static unsigned long clock_sync_flags;
 #define CLOCK_SYNC_STP		3
 #define CLOCK_SYNC_STP		3
 
 
 /*
 /*
- * The synchronous get_clock function. It will write the current clock
- * value to the clock pointer and return 0 if the clock is in sync with
- * the external time source. If the clock mode is local it will return
- * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external
- * reference.
+ * The get_clock function for the physical clock. It will get the current
+ * TOD clock, subtract the LPAR offset and write the result to *clock.
+ * The function returns 0 if the clock is in sync with the external time
+ * source. If the clock mode is local it will return -EOPNOTSUPP and
+ * -EAGAIN if the clock is not in sync with the external reference.
  */
  */
-int get_sync_clock(unsigned long long *clock)
+int get_phys_clock(unsigned long long *clock)
 {
 {
 	atomic_t *sw_ptr;
 	atomic_t *sw_ptr;
 	unsigned int sw0, sw1;
 	unsigned int sw0, sw1;
 
 
 	sw_ptr = &get_cpu_var(clock_sync_word);
 	sw_ptr = &get_cpu_var(clock_sync_word);
 	sw0 = atomic_read(sw_ptr);
 	sw0 = atomic_read(sw_ptr);
-	*clock = get_tod_clock();
+	*clock = get_tod_clock() - lpar_offset;
 	sw1 = atomic_read(sw_ptr);
 	sw1 = atomic_read(sw_ptr);
 	put_cpu_var(clock_sync_word);
 	put_cpu_var(clock_sync_word);
 	if (sw0 == sw1 && (sw0 & 0x80000000U))
 	if (sw0 == sw1 && (sw0 & 0x80000000U))
@@ -364,7 +392,7 @@ int get_sync_clock(unsigned long long *clock)
 		return -EACCES;
 		return -EACCES;
 	return -EAGAIN;
 	return -EAGAIN;
 }
 }
-EXPORT_SYMBOL(get_sync_clock);
+EXPORT_SYMBOL(get_phys_clock);
 
 
 /*
 /*
  * Make get_sync_clock return -EAGAIN.
  * Make get_sync_clock return -EAGAIN.
@@ -416,301 +444,6 @@ static void __init time_init_wq(void)
 	time_sync_wq = create_singlethread_workqueue("timesync");
 	time_sync_wq = create_singlethread_workqueue("timesync");
 }
 }
 
 
-/*
- * External Time Reference (ETR) code.
- */
-static int etr_port0_online;
-static int etr_port1_online;
-static int etr_steai_available;
-
-static int __init early_parse_etr(char *p)
-{
-	if (strncmp(p, "off", 3) == 0)
-		etr_port0_online = etr_port1_online = 0;
-	else if (strncmp(p, "port0", 5) == 0)
-		etr_port0_online = 1;
-	else if (strncmp(p, "port1", 5) == 0)
-		etr_port1_online = 1;
-	else if (strncmp(p, "on", 2) == 0)
-		etr_port0_online = etr_port1_online = 1;
-	return 0;
-}
-early_param("etr", early_parse_etr);
-
-enum etr_event {
-	ETR_EVENT_PORT0_CHANGE,
-	ETR_EVENT_PORT1_CHANGE,
-	ETR_EVENT_PORT_ALERT,
-	ETR_EVENT_SYNC_CHECK,
-	ETR_EVENT_SWITCH_LOCAL,
-	ETR_EVENT_UPDATE,
-};
-
-/*
- * Valid bit combinations of the eacr register are (x = don't care):
- * e0 e1 dp p0 p1 ea es sl
- *  0  0  x  0	0  0  0  0  initial, disabled state
- *  0  0  x  0	1  1  0  0  port 1 online
- *  0  0  x  1	0  1  0  0  port 0 online
- *  0  0  x  1	1  1  0  0  both ports online
- *  0  1  x  0	1  1  0  0  port 1 online and usable, ETR or PPS mode
- *  0  1  x  0	1  1  0  1  port 1 online, usable and ETR mode
- *  0  1  x  0	1  1  1  0  port 1 online, usable, PPS mode, in-sync
- *  0  1  x  0	1  1  1  1  port 1 online, usable, ETR mode, in-sync
- *  0  1  x  1	1  1  0  0  both ports online, port 1 usable
- *  0  1  x  1	1  1  1  0  both ports online, port 1 usable, PPS mode, in-sync
- *  0  1  x  1	1  1  1  1  both ports online, port 1 usable, ETR mode, in-sync
- *  1  0  x  1	0  1  0  0  port 0 online and usable, ETR or PPS mode
- *  1  0  x  1	0  1  0  1  port 0 online, usable and ETR mode
- *  1  0  x  1	0  1  1  0  port 0 online, usable, PPS mode, in-sync
- *  1  0  x  1	0  1  1  1  port 0 online, usable, ETR mode, in-sync
- *  1  0  x  1	1  1  0  0  both ports online, port 0 usable
- *  1  0  x  1	1  1  1  0  both ports online, port 0 usable, PPS mode, in-sync
- *  1  0  x  1	1  1  1  1  both ports online, port 0 usable, ETR mode, in-sync
- *  1  1  x  1	1  1  1  0  both ports online & usable, ETR, in-sync
- *  1  1  x  1	1  1  1  1  both ports online & usable, ETR, in-sync
- */
-static struct etr_eacr etr_eacr;
-static u64 etr_tolec;			/* time of last eacr update */
-static struct etr_aib etr_port0;
-static int etr_port0_uptodate;
-static struct etr_aib etr_port1;
-static int etr_port1_uptodate;
-static unsigned long etr_events;
-static struct timer_list etr_timer;
-
-static void etr_timeout(unsigned long dummy);
-static void etr_work_fn(struct work_struct *work);
-static DEFINE_MUTEX(etr_work_mutex);
-static DECLARE_WORK(etr_work, etr_work_fn);
-
-/*
- * Reset ETR attachment.
- */
-static void etr_reset(void)
-{
-	etr_eacr =  (struct etr_eacr) {
-		.e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0,
-		.p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0,
-		.es = 0, .sl = 0 };
-	if (etr_setr(&etr_eacr) == 0) {
-		etr_tolec = get_tod_clock();
-		set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
-		if (etr_port0_online && etr_port1_online)
-			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-	} else if (etr_port0_online || etr_port1_online) {
-		pr_warn("The real or virtual hardware system does not provide an ETR interface\n");
-		etr_port0_online = etr_port1_online = 0;
-	}
-}
-
-static int __init etr_init(void)
-{
-	struct etr_aib aib;
-
-	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
-		return 0;
-	time_init_wq();
-	/* Check if this machine has the steai instruction. */
-	if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
-		etr_steai_available = 1;
-	setup_timer(&etr_timer, etr_timeout, 0UL);
-	if (etr_port0_online) {
-		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	}
-	if (etr_port1_online) {
-		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	}
-	return 0;
-}
-
-arch_initcall(etr_init);
-
-/*
- * Two sorts of ETR machine checks. The architecture reads:
- * "When a machine-check niterruption occurs and if a switch-to-local or
- *  ETR-sync-check interrupt request is pending but disabled, this pending
- *  disabled interruption request is indicated and is cleared".
- * Which means that we can get etr_switch_to_local events from the machine
- * check handler although the interruption condition is disabled. Lovely..
- */
-
-/*
- * Switch to local machine check. This is called when the last usable
- * ETR port goes inactive. After switch to local the clock is not in sync.
- */
-int etr_switch_to_local(void)
-{
-	if (!etr_eacr.sl)
-		return 0;
-	disable_sync_clock(NULL);
-	if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) {
-		etr_eacr.es = etr_eacr.sl = 0;
-		etr_setr(&etr_eacr);
-		return 1;
-	}
-	return 0;
-}
-
-/*
- * ETR sync check machine check. This is called when the ETR OTE and the
- * local clock OTE are farther apart than the ETR sync check tolerance.
- * After a ETR sync check the clock is not in sync. The machine check
- * is broadcasted to all cpus at the same time.
- */
-int etr_sync_check(void)
-{
-	if (!etr_eacr.es)
-		return 0;
-	disable_sync_clock(NULL);
-	if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) {
-		etr_eacr.es = 0;
-		etr_setr(&etr_eacr);
-		return 1;
-	}
-	return 0;
-}
-
-void etr_queue_work(void)
-{
-	queue_work(time_sync_wq, &etr_work);
-}
-
-/*
- * ETR timing alert. There are two causes:
- * 1) port state change, check the usability of the port
- * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the
- *    sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3)
- *    or ETR-data word 4 (edf4) has changed.
- */
-static void etr_timing_alert(struct etr_irq_parm *intparm)
-{
-	if (intparm->pc0)
-		/* ETR port 0 state change. */
-		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-	if (intparm->pc1)
-		/* ETR port 1 state change. */
-		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-	if (intparm->eai)
-		/*
-		 * ETR port alert on either port 0, 1 or both.
-		 * Both ports are not up-to-date now.
-		 */
-		set_bit(ETR_EVENT_PORT_ALERT, &etr_events);
-	queue_work(time_sync_wq, &etr_work);
-}
-
-static void etr_timeout(unsigned long dummy)
-{
-	set_bit(ETR_EVENT_UPDATE, &etr_events);
-	queue_work(time_sync_wq, &etr_work);
-}
-
-/*
- * Check if the etr mode is pss.
- */
-static inline int etr_mode_is_pps(struct etr_eacr eacr)
-{
-	return eacr.es && !eacr.sl;
-}
-
-/*
- * Check if the etr mode is etr.
- */
-static inline int etr_mode_is_etr(struct etr_eacr eacr)
-{
-	return eacr.es && eacr.sl;
-}
-
-/*
- * Check if the port can be used for TOD synchronization.
- * For PPS mode the port has to receive OTEs. For ETR mode
- * the port has to receive OTEs, the ETR stepping bit has to
- * be zero and the validity bits for data frame 1, 2, and 3
- * have to be 1.
- */
-static int etr_port_valid(struct etr_aib *aib, int port)
-{
-	unsigned int psc;
-
-	/* Check that this port is receiving OTEs. */
-	if (aib->tsp == 0)
-		return 0;
-
-	psc = port ? aib->esw.psc1 : aib->esw.psc0;
-	if (psc == etr_lpsc_pps_mode)
-		return 1;
-	if (psc == etr_lpsc_operational_step)
-		return !aib->esw.y && aib->slsw.v1 &&
-			aib->slsw.v2 && aib->slsw.v3;
-	return 0;
-}
-
-/*
- * Check if two ports are on the same network.
- */
-static int etr_compare_network(struct etr_aib *aib1, struct etr_aib *aib2)
-{
-	// FIXME: any other fields we have to compare?
-	return aib1->edf1.net_id == aib2->edf1.net_id;
-}
-
-/*
- * Wrapper for etr_stei that converts physical port states
- * to logical port states to be consistent with the output
- * of stetr (see etr_psc vs. etr_lpsc).
- */
-static void etr_steai_cv(struct etr_aib *aib, unsigned int func)
-{
-	BUG_ON(etr_steai(aib, func) != 0);
-	/* Convert port state to logical port state. */
-	if (aib->esw.psc0 == 1)
-		aib->esw.psc0 = 2;
-	else if (aib->esw.psc0 == 0 && aib->esw.p == 0)
-		aib->esw.psc0 = 1;
-	if (aib->esw.psc1 == 1)
-		aib->esw.psc1 = 2;
-	else if (aib->esw.psc1 == 0 && aib->esw.p == 1)
-		aib->esw.psc1 = 1;
-}
-
-/*
- * Check if the aib a2 is still connected to the same attachment as
- * aib a1, the etv values differ by one and a2 is valid.
- */
-static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
-{
-	int state_a1, state_a2;
-
-	/* Paranoia check: e0/e1 should better be the same. */
-	if (a1->esw.eacr.e0 != a2->esw.eacr.e0 ||
-	    a1->esw.eacr.e1 != a2->esw.eacr.e1)
-		return 0;
-
-	/* Still connected to the same etr ? */
-	state_a1 = p ? a1->esw.psc1 : a1->esw.psc0;
-	state_a2 = p ? a2->esw.psc1 : a2->esw.psc0;
-	if (state_a1 == etr_lpsc_operational_step) {
-		if (state_a2 != etr_lpsc_operational_step ||
-		    a1->edf1.net_id != a2->edf1.net_id ||
-		    a1->edf1.etr_id != a2->edf1.etr_id ||
-		    a1->edf1.etr_pn != a2->edf1.etr_pn)
-			return 0;
-	} else if (state_a2 != etr_lpsc_pps_mode)
-		return 0;
-
-	/* The ETV value of a2 needs to be ETV of a1 + 1. */
-	if (a1->edf2.etv + 1 != a2->edf2.etv)
-		return 0;
-
-	if (!etr_port_valid(a2, p))
-		return 0;
-
-	return 1;
-}
-
 struct clock_sync_data {
 struct clock_sync_data {
 	atomic_t cpus;
 	atomic_t cpus;
 	int in_sync;
 	int in_sync;
@@ -747,688 +480,6 @@ static void clock_sync_cpu(struct clock_sync_data *sync)
 	fixup_clock_comparator(sync->fixup_cc);
 	fixup_clock_comparator(sync->fixup_cc);
 }
 }
 
 
-/*
- * Sync the TOD clock using the port referred to by aibp. This port
- * has to be enabled and the other port has to be disabled. The
- * last eacr update has to be more than 1.6 seconds in the past.
- */
-static int etr_sync_clock(void *data)
-{
-	static int first;
-	unsigned long long clock, old_clock, clock_delta, delay, delta;
-	struct clock_sync_data *etr_sync;
-	struct etr_aib *sync_port, *aib;
-	int port;
-	int rc;
-
-	etr_sync = data;
-
-	if (xchg(&first, 1) == 1) {
-		/* Slave */
-		clock_sync_cpu(etr_sync);
-		return 0;
-	}
-
-	/* Wait until all other cpus entered the sync function. */
-	while (atomic_read(&etr_sync->cpus) != 0)
-		cpu_relax();
-
-	port = etr_sync->etr_port;
-	aib = etr_sync->etr_aib;
-	sync_port = (port == 0) ? &etr_port0 : &etr_port1;
-	enable_sync_clock();
-
-	/* Set clock to next OTE. */
-	__ctl_set_bit(14, 21);
-	__ctl_set_bit(0, 29);
-	clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
-	old_clock = get_tod_clock();
-	if (set_tod_clock(clock) == 0) {
-		__udelay(1);	/* Wait for the clock to start. */
-		__ctl_clear_bit(0, 29);
-		__ctl_clear_bit(14, 21);
-		etr_stetr(aib);
-		/* Adjust Linux timing variables. */
-		delay = (unsigned long long)
-			(aib->edf2.etv - sync_port->edf2.etv) << 32;
-		delta = adjust_time(old_clock, clock, delay);
-		clock_delta = clock - old_clock;
-		atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0,
-					   &clock_delta);
-		etr_sync->fixup_cc = delta;
-		fixup_clock_comparator(delta);
-		/* Verify that the clock is properly set. */
-		if (!etr_aib_follows(sync_port, aib, port)) {
-			/* Didn't work. */
-			disable_sync_clock(NULL);
-			etr_sync->in_sync = -EAGAIN;
-			rc = -EAGAIN;
-		} else {
-			etr_sync->in_sync = 1;
-			rc = 0;
-		}
-	} else {
-		/* Could not set the clock ?!? */
-		__ctl_clear_bit(0, 29);
-		__ctl_clear_bit(14, 21);
-		disable_sync_clock(NULL);
-		etr_sync->in_sync = -EAGAIN;
-		rc = -EAGAIN;
-	}
-	xchg(&first, 0);
-	return rc;
-}
-
-static int etr_sync_clock_stop(struct etr_aib *aib, int port)
-{
-	struct clock_sync_data etr_sync;
-	struct etr_aib *sync_port;
-	int follows;
-	int rc;
-
-	/* Check if the current aib is adjacent to the sync port aib. */
-	sync_port = (port == 0) ? &etr_port0 : &etr_port1;
-	follows = etr_aib_follows(sync_port, aib, port);
-	memcpy(sync_port, aib, sizeof(*aib));
-	if (!follows)
-		return -EAGAIN;
-	memset(&etr_sync, 0, sizeof(etr_sync));
-	etr_sync.etr_aib = aib;
-	etr_sync.etr_port = port;
-	get_online_cpus();
-	atomic_set(&etr_sync.cpus, num_online_cpus() - 1);
-	rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask);
-	put_online_cpus();
-	return rc;
-}
-
-/*
- * Handle the immediate effects of the different events.
- * The port change event is used for online/offline changes.
- */
-static struct etr_eacr etr_handle_events(struct etr_eacr eacr)
-{
-	if (test_and_clear_bit(ETR_EVENT_SYNC_CHECK, &etr_events))
-		eacr.es = 0;
-	if (test_and_clear_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events))
-		eacr.es = eacr.sl = 0;
-	if (test_and_clear_bit(ETR_EVENT_PORT_ALERT, &etr_events))
-		etr_port0_uptodate = etr_port1_uptodate = 0;
-
-	if (test_and_clear_bit(ETR_EVENT_PORT0_CHANGE, &etr_events)) {
-		if (eacr.e0)
-			/*
-			 * Port change of an enabled port. We have to
-			 * assume that this can have caused an stepping
-			 * port switch.
-			 */
-			etr_tolec = get_tod_clock();
-		eacr.p0 = etr_port0_online;
-		if (!eacr.p0)
-			eacr.e0 = 0;
-		etr_port0_uptodate = 0;
-	}
-	if (test_and_clear_bit(ETR_EVENT_PORT1_CHANGE, &etr_events)) {
-		if (eacr.e1)
-			/*
-			 * Port change of an enabled port. We have to
-			 * assume that this can have caused an stepping
-			 * port switch.
-			 */
-			etr_tolec = get_tod_clock();
-		eacr.p1 = etr_port1_online;
-		if (!eacr.p1)
-			eacr.e1 = 0;
-		etr_port1_uptodate = 0;
-	}
-	clear_bit(ETR_EVENT_UPDATE, &etr_events);
-	return eacr;
-}
-
-/*
- * Set up a timer that expires after the etr_tolec + 1.6 seconds if
- * one of the ports needs an update.
- */
-static void etr_set_tolec_timeout(unsigned long long now)
-{
-	unsigned long micros;
-
-	if ((!etr_eacr.p0 || etr_port0_uptodate) &&
-	    (!etr_eacr.p1 || etr_port1_uptodate))
-		return;
-	micros = (now > etr_tolec) ? ((now - etr_tolec) >> 12) : 0;
-	micros = (micros > 1600000) ? 0 : 1600000 - micros;
-	mod_timer(&etr_timer, jiffies + (micros * HZ) / 1000000 + 1);
-}
-
-/*
- * Set up a time that expires after 1/2 second.
- */
-static void etr_set_sync_timeout(void)
-{
-	mod_timer(&etr_timer, jiffies + HZ/2);
-}
-
-/*
- * Update the aib information for one or both ports.
- */
-static struct etr_eacr etr_handle_update(struct etr_aib *aib,
-					 struct etr_eacr eacr)
-{
-	/* With both ports disabled the aib information is useless. */
-	if (!eacr.e0 && !eacr.e1)
-		return eacr;
-
-	/* Update port0 or port1 with aib stored in etr_work_fn. */
-	if (aib->esw.q == 0) {
-		/* Information for port 0 stored. */
-		if (eacr.p0 && !etr_port0_uptodate) {
-			etr_port0 = *aib;
-			if (etr_port0_online)
-				etr_port0_uptodate = 1;
-		}
-	} else {
-		/* Information for port 1 stored. */
-		if (eacr.p1 && !etr_port1_uptodate) {
-			etr_port1 = *aib;
-			if (etr_port0_online)
-				etr_port1_uptodate = 1;
-		}
-	}
-
-	/*
-	 * Do not try to get the alternate port aib if the clock
-	 * is not in sync yet.
-	 */
-	if (!eacr.es || !check_sync_clock())
-		return eacr;
-
-	/*
-	 * If steai is available we can get the information about
-	 * the other port immediately. If only stetr is available the
-	 * data-port bit toggle has to be used.
-	 */
-	if (etr_steai_available) {
-		if (eacr.p0 && !etr_port0_uptodate) {
-			etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0);
-			etr_port0_uptodate = 1;
-		}
-		if (eacr.p1 && !etr_port1_uptodate) {
-			etr_steai_cv(&etr_port1, ETR_STEAI_PORT_1);
-			etr_port1_uptodate = 1;
-		}
-	} else {
-		/*
-		 * One port was updated above, if the other
-		 * port is not uptodate toggle dp bit.
-		 */
-		if ((eacr.p0 && !etr_port0_uptodate) ||
-		    (eacr.p1 && !etr_port1_uptodate))
-			eacr.dp ^= 1;
-		else
-			eacr.dp = 0;
-	}
-	return eacr;
-}
-
-/*
- * Write new etr control register if it differs from the current one.
- * Return 1 if etr_tolec has been updated as well.
- */
-static void etr_update_eacr(struct etr_eacr eacr)
-{
-	int dp_changed;
-
-	if (memcmp(&etr_eacr, &eacr, sizeof(eacr)) == 0)
-		/* No change, return. */
-		return;
-	/*
-	 * The disable of an active port of the change of the data port
-	 * bit can/will cause a change in the data port.
-	 */
-	dp_changed = etr_eacr.e0 > eacr.e0 || etr_eacr.e1 > eacr.e1 ||
-		(etr_eacr.dp ^ eacr.dp) != 0;
-	etr_eacr = eacr;
-	etr_setr(&etr_eacr);
-	if (dp_changed)
-		etr_tolec = get_tod_clock();
-}
-
-/*
- * ETR work. In this function you'll find the main logic. In
- * particular this is the only function that calls etr_update_eacr(),
- * it "controls" the etr control register.
- */
-static void etr_work_fn(struct work_struct *work)
-{
-	unsigned long long now;
-	struct etr_eacr eacr;
-	struct etr_aib aib;
-	int sync_port;
-
-	/* prevent multiple execution. */
-	mutex_lock(&etr_work_mutex);
-
-	/* Create working copy of etr_eacr. */
-	eacr = etr_eacr;
-
-	/* Check for the different events and their immediate effects. */
-	eacr = etr_handle_events(eacr);
-
-	/* Check if ETR is supposed to be active. */
-	eacr.ea = eacr.p0 || eacr.p1;
-	if (!eacr.ea) {
-		/* Both ports offline. Reset everything. */
-		eacr.dp = eacr.es = eacr.sl = 0;
-		on_each_cpu(disable_sync_clock, NULL, 1);
-		del_timer_sync(&etr_timer);
-		etr_update_eacr(eacr);
-		goto out_unlock;
-	}
-
-	/* Store aib to get the current ETR status word. */
-	BUG_ON(etr_stetr(&aib) != 0);
-	etr_port0.esw = etr_port1.esw = aib.esw;	/* Copy status word. */
-	now = get_tod_clock();
-
-	/*
-	 * Update the port information if the last stepping port change
-	 * or data port change is older than 1.6 seconds.
-	 */
-	if (now >= etr_tolec + (1600000 << 12))
-		eacr = etr_handle_update(&aib, eacr);
-
-	/*
-	 * Select ports to enable. The preferred synchronization mode is PPS.
-	 * If a port can be enabled depends on a number of things:
-	 * 1) The port needs to be online and uptodate. A port is not
-	 *    disabled just because it is not uptodate, but it is only
-	 *    enabled if it is uptodate.
-	 * 2) The port needs to have the same mode (pps / etr).
-	 * 3) The port needs to be usable -> etr_port_valid() == 1
-	 * 4) To enable the second port the clock needs to be in sync.
-	 * 5) If both ports are useable and are ETR ports, the network id
-	 *    has to be the same.
-	 * The eacr.sl bit is used to indicate etr mode vs. pps mode.
-	 */
-	if (eacr.p0 && aib.esw.psc0 == etr_lpsc_pps_mode) {
-		eacr.sl = 0;
-		eacr.e0 = 1;
-		if (!etr_mode_is_pps(etr_eacr))
-			eacr.es = 0;
-		if (!eacr.es || !eacr.p1 || aib.esw.psc1 != etr_lpsc_pps_mode)
-			eacr.e1 = 0;
-		// FIXME: uptodate checks ?
-		else if (etr_port0_uptodate && etr_port1_uptodate)
-			eacr.e1 = 1;
-		sync_port = (etr_port0_uptodate &&
-			     etr_port_valid(&etr_port0, 0)) ? 0 : -1;
-	} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) {
-		eacr.sl = 0;
-		eacr.e0 = 0;
-		eacr.e1 = 1;
-		if (!etr_mode_is_pps(etr_eacr))
-			eacr.es = 0;
-		sync_port = (etr_port1_uptodate &&
-			     etr_port_valid(&etr_port1, 1)) ? 1 : -1;
-	} else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) {
-		eacr.sl = 1;
-		eacr.e0 = 1;
-		if (!etr_mode_is_etr(etr_eacr))
-			eacr.es = 0;
-		if (!eacr.es || !eacr.p1 ||
-		    aib.esw.psc1 != etr_lpsc_operational_alt)
-			eacr.e1 = 0;
-		else if (etr_port0_uptodate && etr_port1_uptodate &&
-			 etr_compare_network(&etr_port0, &etr_port1))
-			eacr.e1 = 1;
-		sync_port = (etr_port0_uptodate &&
-			     etr_port_valid(&etr_port0, 0)) ? 0 : -1;
-	} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) {
-		eacr.sl = 1;
-		eacr.e0 = 0;
-		eacr.e1 = 1;
-		if (!etr_mode_is_etr(etr_eacr))
-			eacr.es = 0;
-		sync_port = (etr_port1_uptodate &&
-			     etr_port_valid(&etr_port1, 1)) ? 1 : -1;
-	} else {
-		/* Both ports not usable. */
-		eacr.es = eacr.sl = 0;
-		sync_port = -1;
-	}
-
-	/*
-	 * If the clock is in sync just update the eacr and return.
-	 * If there is no valid sync port wait for a port update.
-	 */
-	if ((eacr.es && check_sync_clock()) || sync_port < 0) {
-		etr_update_eacr(eacr);
-		etr_set_tolec_timeout(now);
-		goto out_unlock;
-	}
-
-	/*
-	 * Prepare control register for clock syncing
-	 * (reset data port bit, set sync check control.
-	 */
-	eacr.dp = 0;
-	eacr.es = 1;
-
-	/*
-	 * Update eacr and try to synchronize the clock. If the update
-	 * of eacr caused a stepping port switch (or if we have to
-	 * assume that a stepping port switch has occurred) or the
-	 * clock syncing failed, reset the sync check control bit
-	 * and set up a timer to try again after 0.5 seconds
-	 */
-	etr_update_eacr(eacr);
-	if (now < etr_tolec + (1600000 << 12) ||
-	    etr_sync_clock_stop(&aib, sync_port) != 0) {
-		/* Sync failed. Try again in 1/2 second. */
-		eacr.es = 0;
-		etr_update_eacr(eacr);
-		etr_set_sync_timeout();
-	} else
-		etr_set_tolec_timeout(now);
-out_unlock:
-	mutex_unlock(&etr_work_mutex);
-}
-
-/*
- * Sysfs interface functions
- */
-static struct bus_type etr_subsys = {
-	.name		= "etr",
-	.dev_name	= "etr",
-};
-
-static struct device etr_port0_dev = {
-	.id	= 0,
-	.bus	= &etr_subsys,
-};
-
-static struct device etr_port1_dev = {
-	.id	= 1,
-	.bus	= &etr_subsys,
-};
-
-/*
- * ETR subsys attributes
- */
-static ssize_t etr_stepping_port_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	return sprintf(buf, "%i\n", etr_port0.esw.p);
-}
-
-static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL);
-
-static ssize_t etr_stepping_mode_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	char *mode_str;
-
-	if (etr_mode_is_pps(etr_eacr))
-		mode_str = "pps";
-	else if (etr_mode_is_etr(etr_eacr))
-		mode_str = "etr";
-	else
-		mode_str = "local";
-	return sprintf(buf, "%s\n", mode_str);
-}
-
-static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL);
-
-/*
- * ETR port attributes
- */
-static inline struct etr_aib *etr_aib_from_dev(struct device *dev)
-{
-	if (dev == &etr_port0_dev)
-		return etr_port0_online ? &etr_port0 : NULL;
-	else
-		return etr_port1_online ? &etr_port1 : NULL;
-}
-
-static ssize_t etr_online_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
-{
-	unsigned int online;
-
-	online = (dev == &etr_port0_dev) ? etr_port0_online : etr_port1_online;
-	return sprintf(buf, "%i\n", online);
-}
-
-static ssize_t etr_online_store(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf, size_t count)
-{
-	unsigned int value;
-
-	value = simple_strtoul(buf, NULL, 0);
-	if (value != 0 && value != 1)
-		return -EINVAL;
-	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
-		return -EOPNOTSUPP;
-	mutex_lock(&clock_sync_mutex);
-	if (dev == &etr_port0_dev) {
-		if (etr_port0_online == value)
-			goto out;	/* Nothing to do. */
-		etr_port0_online = value;
-		if (etr_port0_online && etr_port1_online)
-			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		else
-			clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	} else {
-		if (etr_port1_online == value)
-			goto out;	/* Nothing to do. */
-		etr_port1_online = value;
-		if (etr_port0_online && etr_port1_online)
-			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		else
-			clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	}
-out:
-	mutex_unlock(&clock_sync_mutex);
-	return count;
-}
-
-static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store);
-
-static ssize_t etr_stepping_control_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
-		       etr_eacr.e0 : etr_eacr.e1);
-}
-
-static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL);
-
-static ssize_t etr_mode_code_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	if (!etr_port0_online && !etr_port1_online)
-		/* Status word is not uptodate if both ports are offline. */
-		return -ENODATA;
-	return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
-		       etr_port0.esw.psc0 : etr_port0.esw.psc1);
-}
-
-static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL);
-
-static ssize_t etr_untuned_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.u);
-}
-
-static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL);
-
-static ssize_t etr_network_id_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.net_id);
-}
-
-static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL);
-
-static ssize_t etr_id_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.etr_id);
-}
-
-static DEVICE_ATTR(id, 0400, etr_id_show, NULL);
-
-static ssize_t etr_port_number_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.etr_pn);
-}
-
-static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL);
-
-static ssize_t etr_coupled_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v3)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf3.c);
-}
-
-static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL);
-
-static ssize_t etr_local_time_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v3)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf3.blto);
-}
-
-static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL);
-
-static ssize_t etr_utc_offset_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v3)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf3.buo);
-}
-
-static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL);
-
-static struct device_attribute *etr_port_attributes[] = {
-	&dev_attr_online,
-	&dev_attr_stepping_control,
-	&dev_attr_state_code,
-	&dev_attr_untuned,
-	&dev_attr_network,
-	&dev_attr_id,
-	&dev_attr_port,
-	&dev_attr_coupled,
-	&dev_attr_local_time,
-	&dev_attr_utc_offset,
-	NULL
-};
-
-static int __init etr_register_port(struct device *dev)
-{
-	struct device_attribute **attr;
-	int rc;
-
-	rc = device_register(dev);
-	if (rc)
-		goto out;
-	for (attr = etr_port_attributes; *attr; attr++) {
-		rc = device_create_file(dev, *attr);
-		if (rc)
-			goto out_unreg;
-	}
-	return 0;
-out_unreg:
-	for (; attr >= etr_port_attributes; attr--)
-		device_remove_file(dev, *attr);
-	device_unregister(dev);
-out:
-	return rc;
-}
-
-static void __init etr_unregister_port(struct device *dev)
-{
-	struct device_attribute **attr;
-
-	for (attr = etr_port_attributes; *attr; attr++)
-		device_remove_file(dev, *attr);
-	device_unregister(dev);
-}
-
-static int __init etr_init_sysfs(void)
-{
-	int rc;
-
-	rc = subsys_system_register(&etr_subsys, NULL);
-	if (rc)
-		goto out;
-	rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port);
-	if (rc)
-		goto out_unreg_subsys;
-	rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
-	if (rc)
-		goto out_remove_stepping_port;
-	rc = etr_register_port(&etr_port0_dev);
-	if (rc)
-		goto out_remove_stepping_mode;
-	rc = etr_register_port(&etr_port1_dev);
-	if (rc)
-		goto out_remove_port0;
-	return 0;
-
-out_remove_port0:
-	etr_unregister_port(&etr_port0_dev);
-out_remove_stepping_mode:
-	device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
-out_remove_stepping_port:
-	device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port);
-out_unreg_subsys:
-	bus_unregister(&etr_subsys);
-out:
-	return rc;
-}
-
-device_initcall(etr_init_sysfs);
-
 /*
 /*
  * Server Time Protocol (STP) code.
  * Server Time Protocol (STP) code.
  */
  */
@@ -1455,7 +506,7 @@ static void __init stp_reset(void)
 	int rc;
 	int rc;
 
 
 	stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
 	stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
-	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
 	if (rc == 0)
 	if (rc == 0)
 		set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
 		set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
 	else if (stp_online) {
 	else if (stp_online) {
@@ -1533,6 +584,7 @@ static int stp_sync_clock(void *data)
 	static int first;
 	static int first;
 	unsigned long long old_clock, delta, new_clock, clock_delta;
 	unsigned long long old_clock, delta, new_clock, clock_delta;
 	struct clock_sync_data *stp_sync;
 	struct clock_sync_data *stp_sync;
+	struct ptff_qto qto;
 	int rc;
 	int rc;
 
 
 	stp_sync = data;
 	stp_sync = data;
@@ -1554,11 +606,14 @@ static int stp_sync_clock(void *data)
 	    stp_info.todoff[2] || stp_info.todoff[3] ||
 	    stp_info.todoff[2] || stp_info.todoff[3] ||
 	    stp_info.tmd != 2) {
 	    stp_info.tmd != 2) {
 		old_clock = get_tod_clock();
 		old_clock = get_tod_clock();
-		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
+		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta);
 		if (rc == 0) {
 		if (rc == 0) {
-			new_clock = get_tod_clock();
+			new_clock = old_clock + clock_delta;
 			delta = adjust_time(old_clock, new_clock, 0);
 			delta = adjust_time(old_clock, new_clock, 0);
-			clock_delta = new_clock - old_clock;
+			if (ptff_query(PTFF_QTO) &&
+			    ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+				/* Update LPAR offset */
+				lpar_offset = qto.tod_epoch_difference;
 			atomic_notifier_call_chain(&s390_epoch_delta_notifier,
 			atomic_notifier_call_chain(&s390_epoch_delta_notifier,
 						   0, &clock_delta);
 						   0, &clock_delta);
 			fixup_clock_comparator(delta);
 			fixup_clock_comparator(delta);
@@ -1590,12 +645,12 @@ static void stp_work_fn(struct work_struct *work)
 	mutex_lock(&stp_work_mutex);
 	mutex_lock(&stp_work_mutex);
 
 
 	if (!stp_online) {
 	if (!stp_online) {
-		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
 		del_timer_sync(&stp_timer);
 		del_timer_sync(&stp_timer);
 		goto out_unlock;
 		goto out_unlock;
 	}
 	}
 
 
-	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL);
 	if (rc)
 	if (rc)
 		goto out_unlock;
 		goto out_unlock;
 
 

+ 31 - 58
arch/s390/kernel/topology.c

@@ -46,6 +46,7 @@ static DECLARE_WORK(topology_work, topology_work_fn);
  */
  */
 static struct mask_info socket_info;
 static struct mask_info socket_info;
 static struct mask_info book_info;
 static struct mask_info book_info;
+static struct mask_info drawer_info;
 
 
 DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
 EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
@@ -79,10 +80,10 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
 	return mask;
 	return mask;
 }
 }
 
 
-static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
-					  struct mask_info *book,
-					  struct mask_info *socket,
-					  int one_socket_per_cpu)
+static void add_cpus_to_mask(struct topology_core *tl_core,
+			     struct mask_info *drawer,
+			     struct mask_info *book,
+			     struct mask_info *socket)
 {
 {
 	struct cpu_topology_s390 *topo;
 	struct cpu_topology_s390 *topo;
 	unsigned int core;
 	unsigned int core;
@@ -97,21 +98,17 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
 			continue;
 			continue;
 		for (i = 0; i <= smp_cpu_mtid; i++) {
 		for (i = 0; i <= smp_cpu_mtid; i++) {
 			topo = &per_cpu(cpu_topology, lcpu + i);
 			topo = &per_cpu(cpu_topology, lcpu + i);
+			topo->drawer_id = drawer->id;
 			topo->book_id = book->id;
 			topo->book_id = book->id;
+			topo->socket_id = socket->id;
 			topo->core_id = rcore;
 			topo->core_id = rcore;
 			topo->thread_id = lcpu + i;
 			topo->thread_id = lcpu + i;
+			cpumask_set_cpu(lcpu + i, &drawer->mask);
 			cpumask_set_cpu(lcpu + i, &book->mask);
 			cpumask_set_cpu(lcpu + i, &book->mask);
 			cpumask_set_cpu(lcpu + i, &socket->mask);
 			cpumask_set_cpu(lcpu + i, &socket->mask);
-			if (one_socket_per_cpu)
-				topo->socket_id = rcore;
-			else
-				topo->socket_id = socket->id;
 			smp_cpu_set_polarization(lcpu + i, tl_core->pp);
 			smp_cpu_set_polarization(lcpu + i, tl_core->pp);
 		}
 		}
-		if (one_socket_per_cpu)
-			socket = socket->next;
 	}
 	}
-	return socket;
 }
 }
 
 
 static void clear_masks(void)
 static void clear_masks(void)
@@ -128,6 +125,11 @@ static void clear_masks(void)
 		cpumask_clear(&info->mask);
 		cpumask_clear(&info->mask);
 		info = info->next;
 		info = info->next;
 	}
 	}
+	info = &drawer_info;
+	while (info) {
+		cpumask_clear(&info->mask);
+		info = info->next;
+	}
 }
 }
 
 
 static union topology_entry *next_tle(union topology_entry *tle)
 static union topology_entry *next_tle(union topology_entry *tle)
@@ -137,16 +139,22 @@ static union topology_entry *next_tle(union topology_entry *tle)
 	return (union topology_entry *)((struct topology_container *)tle + 1);
 	return (union topology_entry *)((struct topology_container *)tle + 1);
 }
 }
 
 
-static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
+static void tl_to_masks(struct sysinfo_15_1_x *info)
 {
 {
 	struct mask_info *socket = &socket_info;
 	struct mask_info *socket = &socket_info;
 	struct mask_info *book = &book_info;
 	struct mask_info *book = &book_info;
+	struct mask_info *drawer = &drawer_info;
 	union topology_entry *tle, *end;
 	union topology_entry *tle, *end;
 
 
+	clear_masks();
 	tle = info->tle;
 	tle = info->tle;
 	end = (union topology_entry *)((unsigned long)info + info->length);
 	end = (union topology_entry *)((unsigned long)info + info->length);
 	while (tle < end) {
 	while (tle < end) {
 		switch (tle->nl) {
 		switch (tle->nl) {
+		case 3:
+			drawer = drawer->next;
+			drawer->id = tle->container.id;
+			break;
 		case 2:
 		case 2:
 			book = book->next;
 			book = book->next;
 			book->id = tle->container.id;
 			book->id = tle->container.id;
@@ -156,32 +164,7 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
 			socket->id = tle->container.id;
 			socket->id = tle->container.id;
 			break;
 			break;
 		case 0:
 		case 0:
-			add_cpus_to_mask(&tle->cpu, book, socket, 0);
-			break;
-		default:
-			clear_masks();
-			return;
-		}
-		tle = next_tle(tle);
-	}
-}
-
-static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
-{
-	struct mask_info *socket = &socket_info;
-	struct mask_info *book = &book_info;
-	union topology_entry *tle, *end;
-
-	tle = info->tle;
-	end = (union topology_entry *)((unsigned long)info + info->length);
-	while (tle < end) {
-		switch (tle->nl) {
-		case 1:
-			book = book->next;
-			book->id = tle->container.id;
-			break;
-		case 0:
-			socket = add_cpus_to_mask(&tle->cpu, book, socket, 1);
+			add_cpus_to_mask(&tle->cpu, drawer, book, socket);
 			break;
 			break;
 		default:
 		default:
 			clear_masks();
 			clear_masks();
@@ -191,22 +174,6 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
 	}
 	}
 }
 }
 
 
-static void tl_to_masks(struct sysinfo_15_1_x *info)
-{
-	struct cpuid cpu_id;
-
-	get_cpu_id(&cpu_id);
-	clear_masks();
-	switch (cpu_id.machine) {
-	case 0x2097:
-	case 0x2098:
-		__tl_to_masks_z10(info);
-		break;
-	default:
-		__tl_to_masks_generic(info);
-	}
-}
-
 static void topology_update_polarization_simple(void)
 static void topology_update_polarization_simple(void)
 {
 {
 	int cpu;
 	int cpu;
@@ -257,11 +224,13 @@ static void update_cpu_masks(void)
 		topo->thread_mask = cpu_thread_map(cpu);
 		topo->thread_mask = cpu_thread_map(cpu);
 		topo->core_mask = cpu_group_map(&socket_info, cpu);
 		topo->core_mask = cpu_group_map(&socket_info, cpu);
 		topo->book_mask = cpu_group_map(&book_info, cpu);
 		topo->book_mask = cpu_group_map(&book_info, cpu);
+		topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
 		if (!MACHINE_HAS_TOPOLOGY) {
 		if (!MACHINE_HAS_TOPOLOGY) {
 			topo->thread_id = cpu;
 			topo->thread_id = cpu;
 			topo->core_id = cpu;
 			topo->core_id = cpu;
 			topo->socket_id = cpu;
 			topo->socket_id = cpu;
 			topo->book_id = cpu;
 			topo->book_id = cpu;
+			topo->drawer_id = cpu;
 		}
 		}
 	}
 	}
 	numa_update_cpu_topology();
 	numa_update_cpu_topology();
@@ -269,10 +238,7 @@ static void update_cpu_masks(void)
 
 
 void store_topology(struct sysinfo_15_1_x *info)
 void store_topology(struct sysinfo_15_1_x *info)
 {
 {
-	if (topology_max_mnest >= 3)
-		stsi(info, 15, 1, 3);
-	else
-		stsi(info, 15, 1, 2);
+	stsi(info, 15, 1, min(topology_max_mnest, 4));
 }
 }
 
 
 int arch_update_cpu_topology(void)
 int arch_update_cpu_topology(void)
@@ -442,6 +408,11 @@ static const struct cpumask *cpu_book_mask(int cpu)
 	return &per_cpu(cpu_topology, cpu).book_mask;
 	return &per_cpu(cpu_topology, cpu).book_mask;
 }
 }
 
 
+static const struct cpumask *cpu_drawer_mask(int cpu)
+{
+	return &per_cpu(cpu_topology, cpu).drawer_mask;
+}
+
 static int __init early_parse_topology(char *p)
 static int __init early_parse_topology(char *p)
 {
 {
 	return kstrtobool(p, &topology_enabled);
 	return kstrtobool(p, &topology_enabled);
@@ -452,6 +423,7 @@ static struct sched_domain_topology_level s390_topology[] = {
 	{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
 	{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
 	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
 	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
 	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
 	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
+	{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
 	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
 	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
 	{ NULL, },
 	{ NULL, },
 };
 };
@@ -487,6 +459,7 @@ static int __init s390_topology_init(void)
 	printk(KERN_CONT " / %d\n", info->mnest);
 	printk(KERN_CONT " / %d\n", info->mnest);
 	alloc_masks(info, &socket_info, 1);
 	alloc_masks(info, &socket_info, 1);
 	alloc_masks(info, &book_info, 2);
 	alloc_masks(info, &book_info, 2);
+	alloc_masks(info, &drawer_info, 3);
 	set_sched_topology(s390_topology);
 	set_sched_topology(s390_topology);
 	return 0;
 	return 0;
 }
 }

+ 2 - 0
arch/s390/kernel/vdso32/Makefile

@@ -1,5 +1,7 @@
 # List of files in the vdso, has to be asm only for now
 # List of files in the vdso, has to be asm only for now
 
 
+KCOV_INSTRUMENT := n
+
 obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
 obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
 
 
 # Build rules
 # Build rules

+ 2 - 0
arch/s390/kernel/vdso64/Makefile

@@ -1,5 +1,7 @@
 # List of files in the vdso, has to be asm only for now
 # List of files in the vdso, has to be asm only for now
 
 
+KCOV_INSTRUMENT := n
+
 obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
 obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
 
 
 # Build rules
 # Build rules

+ 19 - 2
arch/s390/kernel/vmlinux.lds.S

@@ -4,6 +4,16 @@
 
 
 #include <asm/thread_info.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <asm/page.h>
+
+/*
+ * Put .bss..swapper_pg_dir as the first thing in .bss. This will
+ * make sure it has 16k alignment.
+ */
+#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir)
+
+/* Handle ro_after_init data on our own. */
+#define RO_AFTER_INIT_DATA
+
 #include <asm-generic/vmlinux.lds.h>
 #include <asm-generic/vmlinux.lds.h>
 
 
 OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
 OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
@@ -49,7 +59,14 @@ SECTIONS
 	_eshared = .;		/* End of shareable data */
 	_eshared = .;		/* End of shareable data */
 	_sdata = .;		/* Start of data section */
 	_sdata = .;		/* Start of data section */
 
 
-	EXCEPTION_TABLE(16) :data
+	. = ALIGN(PAGE_SIZE);
+	__start_ro_after_init = .;
+	.data..ro_after_init : {
+		 *(.data..ro_after_init)
+	}
+	EXCEPTION_TABLE(16)
+	. = ALIGN(PAGE_SIZE);
+	__end_ro_after_init = .;
 
 
 	RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
 	RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
 
 
@@ -81,7 +98,7 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;		/* freed after init ends here */
 	__init_end = .;		/* freed after init ends here */
 
 
-	BSS_SECTION(0, 2, 0)
+	BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
 
 
 	_end = . ;
 	_end = . ;
 
 

+ 1 - 1
arch/s390/kvm/kvm-s390.c

@@ -28,7 +28,7 @@
 #include <linux/vmalloc.h>
 #include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
 #include <asm/lowcore.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/gmap.h>
 #include <asm/gmap.h>
 #include <asm/nmi.h>
 #include <asm/nmi.h>

+ 26 - 24
arch/s390/lib/string.c

@@ -236,6 +236,26 @@ char * strrchr(const char * s, int c)
 }
 }
 EXPORT_SYMBOL(strrchr);
 EXPORT_SYMBOL(strrchr);
 
 
+static inline int clcle(const char *s1, unsigned long l1,
+			const char *s2, unsigned long l2,
+			int *diff)
+{
+	register unsigned long r2 asm("2") = (unsigned long) s1;
+	register unsigned long r3 asm("3") = (unsigned long) l2;
+	register unsigned long r4 asm("4") = (unsigned long) s2;
+	register unsigned long r5 asm("5") = (unsigned long) l2;
+	int cc;
+
+	asm volatile ("0: clcle %1,%3,0\n"
+		      "   jo    0b\n"
+		      "   ipm   %0\n"
+		      "   srl   %0,28"
+		      : "=&d" (cc), "+a" (r2), "+a" (r3),
+			"+a" (r4), "+a" (r5) : : "cc");
+	*diff = *(char *)r2 - *(char *)r4;
+	return cc;
+}
+
 /**
 /**
  * strstr - Find the first substring in a %NUL terminated string
  * strstr - Find the first substring in a %NUL terminated string
  * @s1: The string to be searched
  * @s1: The string to be searched
@@ -250,18 +270,9 @@ char * strstr(const char * s1,const char * s2)
 		return (char *) s1;
 		return (char *) s1;
 	l1 = __strend(s1) - s1;
 	l1 = __strend(s1) - s1;
 	while (l1-- >= l2) {
 	while (l1-- >= l2) {
-		register unsigned long r2 asm("2") = (unsigned long) s1;
-		register unsigned long r3 asm("3") = (unsigned long) l2;
-		register unsigned long r4 asm("4") = (unsigned long) s2;
-		register unsigned long r5 asm("5") = (unsigned long) l2;
-		int cc;
-
-		asm volatile ("0: clcle %1,%3,0\n"
-			      "   jo    0b\n"
-			      "   ipm   %0\n"
-			      "   srl   %0,28"
-			      : "=&d" (cc), "+a" (r2), "+a" (r3),
-			        "+a" (r4), "+a" (r5) : : "cc" );
+		int cc, dummy;
+
+		cc = clcle(s1, l1, s2, l2, &dummy);
 		if (!cc)
 		if (!cc)
 			return (char *) s1;
 			return (char *) s1;
 		s1++;
 		s1++;
@@ -302,20 +313,11 @@ EXPORT_SYMBOL(memchr);
  */
  */
 int memcmp(const void *cs, const void *ct, size_t n)
 int memcmp(const void *cs, const void *ct, size_t n)
 {
 {
-	register unsigned long r2 asm("2") = (unsigned long) cs;
-	register unsigned long r3 asm("3") = (unsigned long) n;
-	register unsigned long r4 asm("4") = (unsigned long) ct;
-	register unsigned long r5 asm("5") = (unsigned long) n;
-	int ret;
+	int ret, diff;
 
 
-	asm volatile ("0: clcle %1,%3,0\n"
-		      "   jo    0b\n"
-		      "   ipm   %0\n"
-		      "   srl   %0,28"
-		      : "=&d" (ret), "+a" (r2), "+a" (r3), "+a" (r4), "+a" (r5)
-		      : : "cc" );
+	ret = clcle(cs, n, ct, n, &diff);
 	if (ret)
 	if (ret)
-		ret = *(char *) r2 - *(char *) r4;
+		ret = diff;
 	return ret;
 	return ret;
 }
 }
 EXPORT_SYMBOL(memcmp);
 EXPORT_SYMBOL(memcmp);

+ 3 - 3
arch/s390/lib/uaccess.c

@@ -49,7 +49,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
 		"   jnm   5b\n"
 		"   jnm   5b\n"
 		"   ex    %4,0(%3)\n"
 		"   ex    %4,0(%3)\n"
 		"   j     8f\n"
 		"   j     8f\n"
-		"7:slgr  %0,%0\n"
+		"7: slgr  %0,%0\n"
 		"8:\n"
 		"8:\n"
 		EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b)
 		EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b)
 		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
 		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
@@ -93,7 +93,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
 		"   jnm   6b\n"
 		"   jnm   6b\n"
 		"   ex    %4,0(%3)\n"
 		"   ex    %4,0(%3)\n"
 		"   j     9f\n"
 		"   j     9f\n"
-		"8:slgr  %0,%0\n"
+		"8: slgr  %0,%0\n"
 		"9: sacf  768\n"
 		"9: sacf  768\n"
 		EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b)
 		EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b)
 		EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b)
 		EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b)
@@ -266,7 +266,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
 		"3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
 		"3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
 		"   slgr  %0,%3\n"
 		"   slgr  %0,%3\n"
 		"   j	  5f\n"
 		"   j	  5f\n"
-		"4:slgr  %0,%0\n"
+		"4: slgr  %0,%0\n"
 		"5:\n"
 		"5:\n"
 		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
 		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
 		: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
 		: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)

+ 1 - 1
arch/s390/mm/dump_pagetables.c

@@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
 		pud = pud_offset(pgd, addr);
 		pud = pud_offset(pgd, addr);
 		if (!pud_none(*pud))
 		if (!pud_none(*pud))
 			if (pud_large(*pud)) {
 			if (pud_large(*pud)) {
-				prot = pud_val(*pud) & _REGION3_ENTRY_RO;
+				prot = pud_val(*pud) & _REGION_ENTRY_PROTECT;
 				note_page(m, st, prot, 2);
 				note_page(m, st, prot, 2);
 			} else
 			} else
 				walk_pmd_level(m, st, pud, addr);
 				walk_pmd_level(m, st, pud, addr);

+ 1 - 1
arch/s390/mm/fault.c

@@ -624,7 +624,7 @@ void pfault_fini(void)
 	diag_stat_inc(DIAG_STAT_X258);
 	diag_stat_inc(DIAG_STAT_X258);
 	asm volatile(
 	asm volatile(
 		"	diag	%0,0,0x258\n"
 		"	diag	%0,0,0x258\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		EX_TABLE(0b,0b)
 		: : "a" (&refbk), "m" (refbk) : "cc");
 		: : "a" (&refbk), "m" (refbk) : "cc");
 }
 }

+ 5 - 2
arch/s390/mm/gmap.c

@@ -85,7 +85,7 @@ EXPORT_SYMBOL_GPL(gmap_alloc);
 static void gmap_flush_tlb(struct gmap *gmap)
 static void gmap_flush_tlb(struct gmap *gmap)
 {
 {
 	if (MACHINE_HAS_IDTE)
 	if (MACHINE_HAS_IDTE)
-		__tlb_flush_asce(gmap->mm, gmap->asce);
+		__tlb_flush_idte(gmap->asce);
 	else
 	else
 		__tlb_flush_global();
 		__tlb_flush_global();
 }
 }
@@ -124,7 +124,7 @@ void gmap_free(struct gmap *gmap)
 
 
 	/* Flush tlb. */
 	/* Flush tlb. */
 	if (MACHINE_HAS_IDTE)
 	if (MACHINE_HAS_IDTE)
-		__tlb_flush_asce(gmap->mm, gmap->asce);
+		__tlb_flush_idte(gmap->asce);
 	else
 	else
 		__tlb_flush_global();
 		__tlb_flush_global();
 
 
@@ -430,6 +430,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 	VM_BUG_ON(pgd_none(*pgd));
 	VM_BUG_ON(pgd_none(*pgd));
 	pud = pud_offset(pgd, vmaddr);
 	pud = pud_offset(pgd, vmaddr);
 	VM_BUG_ON(pud_none(*pud));
 	VM_BUG_ON(pud_none(*pud));
+	/* large puds cannot yet be handled */
+	if (pud_large(*pud))
+		return -EFAULT;
 	pmd = pmd_offset(pud, vmaddr);
 	pmd = pmd_offset(pud, vmaddr);
 	VM_BUG_ON(pmd_none(*pmd));
 	VM_BUG_ON(pmd_none(*pmd));
 	/* large pmds cannot yet be handled */
 	/* large pmds cannot yet be handled */

+ 44 - 1
arch/s390/mm/gup.c

@@ -128,6 +128,44 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
 	return 1;
 	return 1;
 }
 }
 
 
+static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	struct page *head, *page;
+	unsigned long mask;
+	int refs;
+
+	mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID;
+	if ((pud_val(pud) & mask) != 0)
+		return 0;
+	VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
+
+	refs = 0;
+	head = pud_page(pud);
+	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+	do {
+		VM_BUG_ON_PAGE(compound_head(page) != head, page);
+		pages[*nr] = page;
+		(*nr)++;
+		page++;
+		refs++;
+	} while (addr += PAGE_SIZE, addr != end);
+
+	if (!page_cache_add_speculative(head, refs)) {
+		*nr -= refs;
+		return 0;
+	}
+
+	if (unlikely(pud_val(pud) != pud_val(*pudp))) {
+		*nr -= refs;
+		while (refs--)
+			put_page(head);
+		return 0;
+	}
+
+	return 1;
+}
+
 static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
 static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 		unsigned long end, int write, struct page **pages, int *nr)
 {
 {
@@ -144,7 +182,12 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
 		next = pud_addr_end(addr, end);
 		next = pud_addr_end(addr, end);
 		if (pud_none(pud))
 		if (pud_none(pud))
 			return 0;
 			return 0;
-		if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr))
+		if (unlikely(pud_large(pud))) {
+			if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
+					  nr))
+				return 0;
+		} else if (!gup_pmd_range(pudp, pud, addr, next, write, pages,
+					  nr))
 			return 0;
 			return 0;
 	} while (pudp++, addr = next, addr != end);
 	} while (pudp++, addr = next, addr != end);
 
 

+ 90 - 39
arch/s390/mm/hugetlbpage.c

@@ -1,19 +1,22 @@
 /*
 /*
  *  IBM System z Huge TLB Page Support for Kernel.
  *  IBM System z Huge TLB Page Support for Kernel.
  *
  *
- *    Copyright IBM Corp. 2007
+ *    Copyright IBM Corp. 2007,2016
  *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  */
  */
 
 
+#define KMSG_COMPONENT "hugetlb"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/hugetlb.h>
 
 
-static inline pmd_t __pte_to_pmd(pte_t pte)
+static inline unsigned long __pte_to_rste(pte_t pte)
 {
 {
-	pmd_t pmd;
+	unsigned long rste;
 
 
 	/*
 	/*
-	 * Convert encoding		  pte bits	   pmd bits
+	 * Convert encoding		  pte bits	pmd / pud bits
 	 *				lIR.uswrdy.p	dy..R...I...wr
 	 *				lIR.uswrdy.p	dy..R...I...wr
 	 * empty			010.000000.0 -> 00..0...1...00
 	 * empty			010.000000.0 -> 00..0...1...00
 	 * prot-none, clean, old	111.000000.1 -> 00..1...1...00
 	 * prot-none, clean, old	111.000000.1 -> 00..1...1...00
@@ -33,25 +36,31 @@ static inline pmd_t __pte_to_pmd(pte_t pte)
 	 *	    u unused, l large
 	 *	    u unused, l large
 	 */
 	 */
 	if (pte_present(pte)) {
 	if (pte_present(pte)) {
-		pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4;
-		pmd_val(pmd) |=	(pte_val(pte) & _PAGE_INVALID) >> 5;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT);
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
+		rste = pte_val(pte) & PAGE_MASK;
+		rste |= (pte_val(pte) & _PAGE_READ) >> 4;
+		rste |= (pte_val(pte) & _PAGE_WRITE) >> 4;
+		rste |= (pte_val(pte) & _PAGE_INVALID) >> 5;
+		rste |= (pte_val(pte) & _PAGE_PROTECT);
+		rste |= (pte_val(pte) & _PAGE_DIRTY) << 10;
+		rste |= (pte_val(pte) & _PAGE_YOUNG) << 10;
+		rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
 	} else
 	} else
-		pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
-	return pmd;
+		rste = _SEGMENT_ENTRY_INVALID;
+	return rste;
 }
 }
 
 
-static inline pte_t __pmd_to_pte(pmd_t pmd)
+static inline pte_t __rste_to_pte(unsigned long rste)
 {
 {
+	int present;
 	pte_t pte;
 	pte_t pte;
 
 
+	if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		present = pud_present(__pud(rste));
+	else
+		present = pmd_present(__pmd(rste));
+
 	/*
 	/*
-	 * Convert encoding		   pmd bits	    pte bits
+	 * Convert encoding		pmd / pud bits	    pte bits
 	 *				dy..R...I...wr	  lIR.uswrdy.p
 	 *				dy..R...I...wr	  lIR.uswrdy.p
 	 * empty			00..0...1...00 -> 010.000000.0
 	 * empty			00..0...1...00 -> 010.000000.0
 	 * prot-none, clean, old	00..1...1...00 -> 111.000000.1
 	 * prot-none, clean, old	00..1...1...00 -> 111.000000.1
@@ -70,16 +79,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
 	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
 	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
 	 *	    u unused, l large
 	 *	    u unused, l large
 	 */
 	 */
-	if (pmd_present(pmd)) {
-		pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE;
+	if (present) {
+		pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
 		pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
 		pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT);
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
 	} else
 	} else
 		pte_val(pte) = _PAGE_INVALID;
 		pte_val(pte) = _PAGE_INVALID;
 	return pte;
 	return pte;
@@ -88,27 +97,33 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t pte)
 		     pte_t *ptep, pte_t pte)
 {
 {
-	pmd_t pmd = __pte_to_pmd(pte);
-
-	pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
-	*(pmd_t *) ptep = pmd;
+	unsigned long rste = __pte_to_rste(pte);
+
+	/* Set correct table type for 2G hugepages */
+	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
+	else
+		rste |= _SEGMENT_ENTRY_LARGE;
+	pte_val(*ptep) = rste;
 }
 }
 
 
 pte_t huge_ptep_get(pte_t *ptep)
 pte_t huge_ptep_get(pte_t *ptep)
 {
 {
-	pmd_t pmd = *(pmd_t *) ptep;
-
-	return __pmd_to_pte(pmd);
+	return __rste_to_pte(pte_val(*ptep));
 }
 }
 
 
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 			      unsigned long addr, pte_t *ptep)
 			      unsigned long addr, pte_t *ptep)
 {
 {
+	pte_t pte = huge_ptep_get(ptep);
 	pmd_t *pmdp = (pmd_t *) ptep;
 	pmd_t *pmdp = (pmd_t *) ptep;
-	pmd_t old;
+	pud_t *pudp = (pud_t *) ptep;
 
 
-	old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
-	return __pmd_to_pte(old);
+	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
+	else
+		pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+	return pte;
 }
 }
 
 
 pte_t *huge_pte_alloc(struct mm_struct *mm,
 pte_t *huge_pte_alloc(struct mm_struct *mm,
@@ -120,8 +135,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 
 
 	pgdp = pgd_offset(mm, addr);
 	pgdp = pgd_offset(mm, addr);
 	pudp = pud_alloc(mm, pgdp, addr);
 	pudp = pud_alloc(mm, pgdp, addr);
-	if (pudp)
-		pmdp = pmd_alloc(mm, pudp, addr);
+	if (pudp) {
+		if (sz == PUD_SIZE)
+			return (pte_t *) pudp;
+		else if (sz == PMD_SIZE)
+			pmdp = pmd_alloc(mm, pudp, addr);
+	}
 	return (pte_t *) pmdp;
 	return (pte_t *) pmdp;
 }
 }
 
 
@@ -134,8 +153,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	pgdp = pgd_offset(mm, addr);
 	pgdp = pgd_offset(mm, addr);
 	if (pgd_present(*pgdp)) {
 	if (pgd_present(*pgdp)) {
 		pudp = pud_offset(pgdp, addr);
 		pudp = pud_offset(pgdp, addr);
-		if (pud_present(*pudp))
+		if (pud_present(*pudp)) {
+			if (pud_large(*pudp))
+				return (pte_t *) pudp;
 			pmdp = pmd_offset(pudp, addr);
 			pmdp = pmd_offset(pudp, addr);
+		}
 	}
 	}
 	return (pte_t *) pmdp;
 	return (pte_t *) pmdp;
 }
 }
@@ -147,5 +169,34 @@ int pmd_huge(pmd_t pmd)
 
 
 int pud_huge(pud_t pud)
 int pud_huge(pud_t pud)
 {
 {
-	return 0;
+	return pud_large(pud);
+}
+
+struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+		pud_t *pud, int flags)
+{
+	if (flags & FOLL_GET)
+		return NULL;
+
+	return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
+}
+
+static __init int setup_hugepagesz(char *opt)
+{
+	unsigned long size;
+	char *string = opt;
+
+	size = memparse(opt, &opt);
+	if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) {
+		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+	} else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
+		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+	} else {
+		pr_err("hugepagesz= specifies an unsupported page size %s\n",
+			string);
+		return 0;
+	}
+	return 1;
 }
 }
+__setup("hugepagesz=", setup_hugepagesz);

+ 6 - 7
arch/s390/mm/init.c

@@ -40,7 +40,7 @@
 #include <asm/ctl_reg.h>
 #include <asm/ctl_reg.h>
 #include <asm/sclp.h>
 #include <asm/sclp.h>
 
 
-pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
 
 
 unsigned long empty_zero_page, zero_page_mask;
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(empty_zero_page);
@@ -111,17 +111,16 @@ void __init paging_init(void)
 
 
 void mark_rodata_ro(void)
 void mark_rodata_ro(void)
 {
 {
-	/* Text and rodata are already protected. Nothing to do here. */
-	pr_info("Write protecting the kernel read-only data: %luk\n",
-		((unsigned long)&_eshared - (unsigned long)&_stext) >> 10);
+	unsigned long size = __end_ro_after_init - __start_ro_after_init;
+
+	set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
+	pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
 }
 }
 
 
 void __init mem_init(void)
 void __init mem_init(void)
 {
 {
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
+	cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
 	cpumask_set_cpu(0, mm_cpumask(&init_mm));
 	cpumask_set_cpu(0, mm_cpumask(&init_mm));
-	atomic_set(&init_mm.context.attach_count, 1);
 
 
 	set_max_mapnr(max_low_pfn);
 	set_max_mapnr(max_low_pfn);
         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);

+ 9 - 4
arch/s390/mm/page-states.c

@@ -34,20 +34,25 @@ static int __init cmma(char *str)
 }
 }
 __setup("cmma=", cmma);
 __setup("cmma=", cmma);
 
 
-void __init cmma_init(void)
+static inline int cmma_test_essa(void)
 {
 {
 	register unsigned long tmp asm("0") = 0;
 	register unsigned long tmp asm("0") = 0;
 	register int rc asm("1") = -EOPNOTSUPP;
 	register int rc asm("1") = -EOPNOTSUPP;
 
 
-	if (!cmma_flag)
-		return;
 	asm volatile(
 	asm volatile(
 		"       .insn rrf,0xb9ab0000,%1,%1,0,0\n"
 		"       .insn rrf,0xb9ab0000,%1,%1,0,0\n"
 		"0:     la      %0,0\n"
 		"0:     la      %0,0\n"
 		"1:\n"
 		"1:\n"
 		EX_TABLE(0b,1b)
 		EX_TABLE(0b,1b)
 		: "+&d" (rc), "+&d" (tmp));
 		: "+&d" (rc), "+&d" (tmp));
-	if (rc)
+	return rc;
+}
+
+void __init cmma_init(void)
+{
+	if (!cmma_flag)
+		return;
+	if (cmma_test_essa())
 		cmma_flag = 0;
 		cmma_flag = 0;
 }
 }
 
 

+ 233 - 34
arch/s390/mm/pageattr.c

@@ -10,7 +10,6 @@
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/page.h>
 
 
-#if PAGE_DEFAULT_KEY
 static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
 static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
 {
 {
 	asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0"
 	asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0"
@@ -22,6 +21,8 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
 {
 {
 	unsigned long boundary, size;
 	unsigned long boundary, size;
 
 
+	if (!PAGE_DEFAULT_KEY)
+		return;
 	while (start < end) {
 	while (start < end) {
 		if (MACHINE_HAS_EDAT1) {
 		if (MACHINE_HAS_EDAT1) {
 			/* set storage keys for a 1MB frame */
 			/* set storage keys for a 1MB frame */
@@ -38,56 +39,254 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
 		start += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	}
 }
 }
-#endif
 
 
-static pte_t *walk_page_table(unsigned long addr)
+#ifdef CONFIG_PROC_FS
+atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
+
+void arch_report_meminfo(struct seq_file *m)
 {
 {
-	pgd_t *pgdp;
-	pud_t *pudp;
+	seq_printf(m, "DirectMap4k:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
+	seq_printf(m, "DirectMap1M:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
+	seq_printf(m, "DirectMap2G:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
+}
+#endif /* CONFIG_PROC_FS */
+
+static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
+		    unsigned long dtt)
+{
+	unsigned long table, mask;
+
+	mask = 0;
+	if (MACHINE_HAS_EDAT2) {
+		switch (dtt) {
+		case CRDTE_DTT_REGION3:
+			mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
+			break;
+		case CRDTE_DTT_SEGMENT:
+			mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
+			break;
+		case CRDTE_DTT_PAGE:
+			mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
+			break;
+		}
+		table = (unsigned long)old & mask;
+		crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
+	} else if (MACHINE_HAS_IDTE) {
+		cspg(old, *old, new);
+	} else {
+		csp((unsigned int *)old + 1, *old, new);
+	}
+}
+
+struct cpa {
+	unsigned int set_ro	: 1;
+	unsigned int clear_ro	: 1;
+};
+
+static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
+			  struct cpa cpa)
+{
+	pte_t *ptep, new;
+
+	ptep = pte_offset(pmdp, addr);
+	do {
+		if (pte_none(*ptep))
+			return -EINVAL;
+		if (cpa.set_ro)
+			new = pte_wrprotect(*ptep);
+		else if (cpa.clear_ro)
+			new = pte_mkwrite(pte_mkdirty(*ptep));
+		pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
+		ptep++;
+		addr += PAGE_SIZE;
+		cond_resched();
+	} while (addr < end);
+	return 0;
+}
+
+static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
+{
+	unsigned long pte_addr, prot;
+	pte_t *pt_dir, *ptep;
+	pmd_t new;
+	int i, ro;
+
+	pt_dir = vmem_pte_alloc();
+	if (!pt_dir)
+		return -ENOMEM;
+	pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
+	ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
+	prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
+	ptep = pt_dir;
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		pte_val(*ptep) = pte_addr | prot;
+		pte_addr += PAGE_SIZE;
+		ptep++;
+	}
+	pmd_val(new) = __pa(pt_dir) | _SEGMENT_ENTRY;
+	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
+	update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
+	update_page_count(PG_DIRECT_MAP_1M, -1);
+	return 0;
+}
+
+static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, struct cpa cpa)
+{
+	pmd_t new;
+
+	if (cpa.set_ro)
+		new = pmd_wrprotect(*pmdp);
+	else if (cpa.clear_ro)
+		new = pmd_mkwrite(pmd_mkdirty(*pmdp));
+	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
+}
+
+static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
+			  struct cpa cpa)
+{
+	unsigned long next;
 	pmd_t *pmdp;
 	pmd_t *pmdp;
-	pte_t *ptep;
+	int rc = 0;
 
 
-	pgdp = pgd_offset_k(addr);
-	if (pgd_none(*pgdp))
-		return NULL;
-	pudp = pud_offset(pgdp, addr);
-	if (pud_none(*pudp) || pud_large(*pudp))
-		return NULL;
 	pmdp = pmd_offset(pudp, addr);
 	pmdp = pmd_offset(pudp, addr);
-	if (pmd_none(*pmdp) || pmd_large(*pmdp))
-		return NULL;
-	ptep = pte_offset_kernel(pmdp, addr);
-	if (pte_none(*ptep))
-		return NULL;
-	return ptep;
+	do {
+		if (pmd_none(*pmdp))
+			return -EINVAL;
+		next = pmd_addr_end(addr, end);
+		if (pmd_large(*pmdp)) {
+			if (addr & ~PMD_MASK || addr + PMD_SIZE > next) {
+				rc = split_pmd_page(pmdp, addr);
+				if (rc)
+					return rc;
+				continue;
+			}
+			modify_pmd_page(pmdp, addr, cpa);
+		} else {
+			rc = walk_pte_level(pmdp, addr, next, cpa);
+			if (rc)
+				return rc;
+		}
+		pmdp++;
+		addr = next;
+		cond_resched();
+	} while (addr < end);
+	return rc;
 }
 }
 
 
-static void change_page_attr(unsigned long addr, int numpages,
-			     pte_t (*set) (pte_t))
+static int split_pud_page(pud_t *pudp, unsigned long addr)
 {
 {
-	pte_t *ptep;
-	int i;
+	unsigned long pmd_addr, prot;
+	pmd_t *pm_dir, *pmdp;
+	pud_t new;
+	int i, ro;
 
 
-	for (i = 0; i < numpages; i++) {
-		ptep = walk_page_table(addr);
-		if (WARN_ON_ONCE(!ptep))
-			break;
-		*ptep = set(*ptep);
-		addr += PAGE_SIZE;
+	pm_dir = vmem_pmd_alloc();
+	if (!pm_dir)
+		return -ENOMEM;
+	pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
+	ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
+	prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
+	pmdp = pm_dir;
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		pmd_val(*pmdp) = pmd_addr | prot;
+		pmd_addr += PMD_SIZE;
+		pmdp++;
 	}
 	}
-	__tlb_flush_kernel();
+	pud_val(new) = __pa(pm_dir) | _REGION3_ENTRY;
+	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
+	update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
+	update_page_count(PG_DIRECT_MAP_2G, -1);
+	return 0;
+}
+
+static void modify_pud_page(pud_t *pudp, unsigned long addr, struct cpa cpa)
+{
+	pud_t new;
+
+	if (cpa.set_ro)
+		new = pud_wrprotect(*pudp);
+	else if (cpa.clear_ro)
+		new = pud_mkwrite(pud_mkdirty(*pudp));
+	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
+}
+
+static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
+			  struct cpa cpa)
+{
+	unsigned long next;
+	pud_t *pudp;
+	int rc = 0;
+
+	pudp = pud_offset(pgd, addr);
+	do {
+		if (pud_none(*pudp))
+			return -EINVAL;
+		next = pud_addr_end(addr, end);
+		if (pud_large(*pudp)) {
+			if (addr & ~PUD_MASK || addr + PUD_SIZE > next) {
+				rc = split_pud_page(pudp, addr);
+				if (rc)
+					break;
+				continue;
+			}
+			modify_pud_page(pudp, addr, cpa);
+		} else {
+			rc = walk_pmd_level(pudp, addr, next, cpa);
+		}
+		pudp++;
+		addr = next;
+		cond_resched();
+	} while (addr < end && !rc);
+	return rc;
+}
+
+static DEFINE_MUTEX(cpa_mutex);
+
+static int change_page_attr(unsigned long addr, unsigned long end,
+			    struct cpa cpa)
+{
+	unsigned long next;
+	int rc = -EINVAL;
+	pgd_t *pgdp;
+
+	if (end >= MODULES_END)
+		return -EINVAL;
+	mutex_lock(&cpa_mutex);
+	pgdp = pgd_offset_k(addr);
+	do {
+		if (pgd_none(*pgdp))
+			break;
+		next = pgd_addr_end(addr, end);
+		rc = walk_pud_level(pgdp, addr, next, cpa);
+		if (rc)
+			break;
+		cond_resched();
+	} while (pgdp++, addr = next, addr < end && !rc);
+	mutex_unlock(&cpa_mutex);
+	return rc;
 }
 }
 
 
 int set_memory_ro(unsigned long addr, int numpages)
 int set_memory_ro(unsigned long addr, int numpages)
 {
 {
-	change_page_attr(addr, numpages, pte_wrprotect);
-	return 0;
+	struct cpa cpa = {
+		.set_ro = 1,
+	};
+
+	addr &= PAGE_MASK;
+	return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa);
 }
 }
 
 
 int set_memory_rw(unsigned long addr, int numpages)
 int set_memory_rw(unsigned long addr, int numpages)
 {
 {
-	change_page_attr(addr, numpages, pte_mkwrite);
-	return 0;
+	struct cpa cpa = {
+		.clear_ro = 1,
+	};
+
+	addr &= PAGE_MASK;
+	return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa);
 }
 }
 
 
 /* not possible */
 /* not possible */
@@ -138,7 +337,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
 		nr = min(numpages - i, nr);
 		nr = min(numpages - i, nr);
 		if (enable) {
 		if (enable) {
 			for (j = 0; j < nr; j++) {
 			for (j = 0; j < nr; j++) {
-				pte_val(*pte) = __pa(address);
+				pte_val(*pte) = address | pgprot_val(PAGE_KERNEL);
 				address += PAGE_SIZE;
 				address += PAGE_SIZE;
 				pte++;
 				pte++;
 			}
 			}

+ 71 - 22
arch/s390/mm/pgtable.c

@@ -27,40 +27,37 @@
 static inline pte_t ptep_flush_direct(struct mm_struct *mm,
 static inline pte_t ptep_flush_direct(struct mm_struct *mm,
 				      unsigned long addr, pte_t *ptep)
 				      unsigned long addr, pte_t *ptep)
 {
 {
-	int active, count;
 	pte_t old;
 	pte_t old;
 
 
 	old = *ptep;
 	old = *ptep;
 	if (unlikely(pte_val(old) & _PAGE_INVALID))
 	if (unlikely(pte_val(old) & _PAGE_INVALID))
 		return old;
 		return old;
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 		__ptep_ipte_local(addr, ptep);
 		__ptep_ipte_local(addr, ptep);
 	else
 	else
 		__ptep_ipte(addr, ptep);
 		__ptep_ipte(addr, ptep);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	return old;
 	return old;
 }
 }
 
 
 static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
 static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
 				    unsigned long addr, pte_t *ptep)
 				    unsigned long addr, pte_t *ptep)
 {
 {
-	int active, count;
 	pte_t old;
 	pte_t old;
 
 
 	old = *ptep;
 	old = *ptep;
 	if (unlikely(pte_val(old) & _PAGE_INVALID))
 	if (unlikely(pte_val(old) & _PAGE_INVALID))
 		return old;
 		return old;
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if ((count & 0xffff) <= active) {
+	atomic_inc(&mm->context.flush_count);
+	if (cpumask_equal(&mm->context.cpu_attach_mask,
+			  cpumask_of(smp_processor_id()))) {
 		pte_val(*ptep) |= _PAGE_INVALID;
 		pte_val(*ptep) |= _PAGE_INVALID;
 		mm->context.flush_mm = 1;
 		mm->context.flush_mm = 1;
 	} else
 	} else
 		__ptep_ipte(addr, ptep);
 		__ptep_ipte(addr, ptep);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	return old;
 	return old;
 }
 }
 
 
@@ -70,7 +67,6 @@ static inline pgste_t pgste_get_lock(pte_t *ptep)
 #ifdef CONFIG_PGSTE
 #ifdef CONFIG_PGSTE
 	unsigned long old;
 	unsigned long old;
 
 
-	preempt_disable();
 	asm(
 	asm(
 		"	lg	%0,%2\n"
 		"	lg	%0,%2\n"
 		"0:	lgr	%1,%0\n"
 		"0:	lgr	%1,%0\n"
@@ -93,7 +89,6 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
 		: "=Q" (ptep[PTRS_PER_PTE])
 		: "=Q" (ptep[PTRS_PER_PTE])
 		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
 		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
 		: "cc", "memory");
 		: "cc", "memory");
-	preempt_enable();
 #endif
 #endif
 }
 }
 
 
@@ -230,9 +225,11 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
 	pgste_t pgste;
 	pgste_t pgste;
 	pte_t old;
 	pte_t old;
 
 
+	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
 	pgste = ptep_xchg_start(mm, addr, ptep);
 	old = ptep_flush_direct(mm, addr, ptep);
 	old = ptep_flush_direct(mm, addr, ptep);
 	ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
 	ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+	preempt_enable();
 	return old;
 	return old;
 }
 }
 EXPORT_SYMBOL(ptep_xchg_direct);
 EXPORT_SYMBOL(ptep_xchg_direct);
@@ -243,9 +240,11 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 	pgste_t pgste;
 	pgste_t pgste;
 	pte_t old;
 	pte_t old;
 
 
+	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
 	pgste = ptep_xchg_start(mm, addr, ptep);
 	old = ptep_flush_lazy(mm, addr, ptep);
 	old = ptep_flush_lazy(mm, addr, ptep);
 	ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
 	ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+	preempt_enable();
 	return old;
 	return old;
 }
 }
 EXPORT_SYMBOL(ptep_xchg_lazy);
 EXPORT_SYMBOL(ptep_xchg_lazy);
@@ -256,6 +255,7 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
 	pgste_t pgste;
 	pgste_t pgste;
 	pte_t old;
 	pte_t old;
 
 
+	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
 	pgste = ptep_xchg_start(mm, addr, ptep);
 	old = ptep_flush_lazy(mm, addr, ptep);
 	old = ptep_flush_lazy(mm, addr, ptep);
 	if (mm_has_pgste(mm)) {
 	if (mm_has_pgste(mm)) {
@@ -279,13 +279,13 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 	} else {
 	} else {
 		*ptep = pte;
 		*ptep = pte;
 	}
 	}
+	preempt_enable();
 }
 }
 EXPORT_SYMBOL(ptep_modify_prot_commit);
 EXPORT_SYMBOL(ptep_modify_prot_commit);
 
 
 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 				      unsigned long addr, pmd_t *pmdp)
 				      unsigned long addr, pmd_t *pmdp)
 {
 {
-	int active, count;
 	pmd_t old;
 	pmd_t old;
 
 
 	old = *pmdp;
 	old = *pmdp;
@@ -295,36 +295,34 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 		__pmdp_csp(pmdp);
 		__pmdp_csp(pmdp);
 		return old;
 		return old;
 	}
 	}
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 		__pmdp_idte_local(addr, pmdp);
 		__pmdp_idte_local(addr, pmdp);
 	else
 	else
 		__pmdp_idte(addr, pmdp);
 		__pmdp_idte(addr, pmdp);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	return old;
 	return old;
 }
 }
 
 
 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
 				    unsigned long addr, pmd_t *pmdp)
 				    unsigned long addr, pmd_t *pmdp)
 {
 {
-	int active, count;
 	pmd_t old;
 	pmd_t old;
 
 
 	old = *pmdp;
 	old = *pmdp;
 	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
 	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
 		return old;
 		return old;
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if ((count & 0xffff) <= active) {
+	atomic_inc(&mm->context.flush_count);
+	if (cpumask_equal(&mm->context.cpu_attach_mask,
+			  cpumask_of(smp_processor_id()))) {
 		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
 		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
 		mm->context.flush_mm = 1;
 		mm->context.flush_mm = 1;
 	} else if (MACHINE_HAS_IDTE)
 	} else if (MACHINE_HAS_IDTE)
 		__pmdp_idte(addr, pmdp);
 		__pmdp_idte(addr, pmdp);
 	else
 	else
 		__pmdp_csp(pmdp);
 		__pmdp_csp(pmdp);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	return old;
 	return old;
 }
 }
 
 
@@ -333,8 +331,10 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
 {
 {
 	pmd_t old;
 	pmd_t old;
 
 
+	preempt_disable();
 	old = pmdp_flush_direct(mm, addr, pmdp);
 	old = pmdp_flush_direct(mm, addr, pmdp);
 	*pmdp = new;
 	*pmdp = new;
+	preempt_enable();
 	return old;
 	return old;
 }
 }
 EXPORT_SYMBOL(pmdp_xchg_direct);
 EXPORT_SYMBOL(pmdp_xchg_direct);
@@ -344,12 +344,53 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 {
 {
 	pmd_t old;
 	pmd_t old;
 
 
+	preempt_disable();
 	old = pmdp_flush_lazy(mm, addr, pmdp);
 	old = pmdp_flush_lazy(mm, addr, pmdp);
 	*pmdp = new;
 	*pmdp = new;
+	preempt_enable();
 	return old;
 	return old;
 }
 }
 EXPORT_SYMBOL(pmdp_xchg_lazy);
 EXPORT_SYMBOL(pmdp_xchg_lazy);
 
 
+static inline pud_t pudp_flush_direct(struct mm_struct *mm,
+				      unsigned long addr, pud_t *pudp)
+{
+	pud_t old;
+
+	old = *pudp;
+	if (pud_val(old) & _REGION_ENTRY_INVALID)
+		return old;
+	if (!MACHINE_HAS_IDTE) {
+		/*
+		 * Invalid bit position is the same for pmd and pud, so we can
+		 * re-use _pmd_csp() here
+		 */
+		__pmdp_csp((pmd_t *) pudp);
+		return old;
+	}
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		__pudp_idte_local(addr, pudp);
+	else
+		__pudp_idte(addr, pudp);
+	atomic_dec(&mm->context.flush_count);
+	return old;
+}
+
+pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
+		       pud_t *pudp, pud_t new)
+{
+	pud_t old;
+
+	preempt_disable();
+	old = pudp_flush_direct(mm, addr, pudp);
+	*pudp = new;
+	preempt_enable();
+	return old;
+}
+EXPORT_SYMBOL(pudp_xchg_direct);
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 				pgtable_t pgtable)
 				pgtable_t pgtable)
@@ -398,20 +439,24 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	pgste_t pgste;
 	pgste_t pgste;
 
 
 	/* the mm_has_pgste() check is done in set_pte_at() */
 	/* the mm_has_pgste() check is done in set_pte_at() */
+	preempt_disable();
 	pgste = pgste_get_lock(ptep);
 	pgste = pgste_get_lock(ptep);
 	pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
 	pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
 	pgste_set_key(ptep, pgste, entry, mm);
 	pgste_set_key(ptep, pgste, entry, mm);
 	pgste = pgste_set_pte(ptep, pgste, entry);
 	pgste = pgste_set_pte(ptep, pgste, entry);
 	pgste_set_unlock(ptep, pgste);
 	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
 }
 }
 
 
 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 {
 	pgste_t pgste;
 	pgste_t pgste;
 
 
+	preempt_disable();
 	pgste = pgste_get_lock(ptep);
 	pgste = pgste_get_lock(ptep);
 	pgste_val(pgste) |= PGSTE_IN_BIT;
 	pgste_val(pgste) |= PGSTE_IN_BIT;
 	pgste_set_unlock(ptep, pgste);
 	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
 }
 }
 
 
 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
@@ -434,6 +479,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 	pte_t pte;
 	pte_t pte;
 
 
 	/* Zap unused and logically-zero pages */
 	/* Zap unused and logically-zero pages */
+	preempt_disable();
 	pgste = pgste_get_lock(ptep);
 	pgste = pgste_get_lock(ptep);
 	pgstev = pgste_val(pgste);
 	pgstev = pgste_val(pgste);
 	pte = *ptep;
 	pte = *ptep;
@@ -446,6 +492,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 	if (reset)
 	if (reset)
 		pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
 		pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
 	pgste_set_unlock(ptep, pgste);
 	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
 }
 }
 
 
 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
@@ -454,6 +501,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 	pgste_t pgste;
 	pgste_t pgste;
 
 
 	/* Clear storage key */
 	/* Clear storage key */
+	preempt_disable();
 	pgste = pgste_get_lock(ptep);
 	pgste = pgste_get_lock(ptep);
 	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
 	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
 			      PGSTE_GR_BIT | PGSTE_GC_BIT);
 			      PGSTE_GR_BIT | PGSTE_GC_BIT);
@@ -461,6 +509,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
 	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
 		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
 		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
 	pgste_set_unlock(ptep, pgste);
 	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
 }
 }
 
 
 /*
 /*

+ 33 - 40
arch/s390/mm/vmem.c

@@ -11,6 +11,7 @@
 #include <linux/hugetlb.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/memblock.h>
 #include <linux/memblock.h>
+#include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
 #include <asm/setup.h>
@@ -29,9 +30,11 @@ static LIST_HEAD(mem_segs);
 
 
 static void __ref *vmem_alloc_pages(unsigned int order)
 static void __ref *vmem_alloc_pages(unsigned int order)
 {
 {
+	unsigned long size = PAGE_SIZE << order;
+
 	if (slab_is_available())
 	if (slab_is_available())
 		return (void *)__get_free_pages(GFP_KERNEL, order);
 		return (void *)__get_free_pages(GFP_KERNEL, order);
-	return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
+	return alloc_bootmem_align(size, size);
 }
 }
 
 
 static inline pud_t *vmem_pud_alloc(void)
 static inline pud_t *vmem_pud_alloc(void)
@@ -45,7 +48,7 @@ static inline pud_t *vmem_pud_alloc(void)
 	return pud;
 	return pud;
 }
 }
 
 
-static inline pmd_t *vmem_pmd_alloc(void)
+pmd_t *vmem_pmd_alloc(void)
 {
 {
 	pmd_t *pmd = NULL;
 	pmd_t *pmd = NULL;
 
 
@@ -56,7 +59,7 @@ static inline pmd_t *vmem_pmd_alloc(void)
 	return pmd;
 	return pmd;
 }
 }
 
 
-static pte_t __ref *vmem_pte_alloc(void)
+pte_t __ref *vmem_pte_alloc(void)
 {
 {
 	pte_t *pte;
 	pte_t *pte;
 
 
@@ -75,8 +78,9 @@ static pte_t __ref *vmem_pte_alloc(void)
 /*
 /*
  * Add a physical memory range to the 1:1 mapping.
  * Add a physical memory range to the 1:1 mapping.
  */
  */
-static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
+static int vmem_add_mem(unsigned long start, unsigned long size)
 {
 {
+	unsigned long pages4k, pages1m, pages2g;
 	unsigned long end = start + size;
 	unsigned long end = start + size;
 	unsigned long address = start;
 	unsigned long address = start;
 	pgd_t *pg_dir;
 	pgd_t *pg_dir;
@@ -85,6 +89,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 	pte_t *pt_dir;
 	pte_t *pt_dir;
 	int ret = -ENOMEM;
 	int ret = -ENOMEM;
 
 
+	pages4k = pages1m = pages2g = 0;
 	while (address < end) {
 	while (address < end) {
 		pg_dir = pgd_offset_k(address);
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
 		if (pgd_none(*pg_dir)) {
@@ -97,10 +102,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
 		if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
 		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
 		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
 		     !debug_pagealloc_enabled()) {
 		     !debug_pagealloc_enabled()) {
-			pud_val(*pu_dir) = __pa(address) |
-				_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
-				(ro ? _REGION_ENTRY_PROTECT : 0);
+			pud_val(*pu_dir) = address | pgprot_val(REGION3_KERNEL);
 			address += PUD_SIZE;
 			address += PUD_SIZE;
+			pages2g++;
 			continue;
 			continue;
 		}
 		}
 		if (pud_none(*pu_dir)) {
 		if (pud_none(*pu_dir)) {
@@ -113,11 +117,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
 		if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
 		    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
 		    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
 		    !debug_pagealloc_enabled()) {
 		    !debug_pagealloc_enabled()) {
-			pmd_val(*pm_dir) = __pa(address) |
-				_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
-				_SEGMENT_ENTRY_YOUNG |
-				(ro ? _SEGMENT_ENTRY_PROTECT : 0);
+			pmd_val(*pm_dir) = address | pgprot_val(SEGMENT_KERNEL);
 			address += PMD_SIZE;
 			address += PMD_SIZE;
+			pages1m++;
 			continue;
 			continue;
 		}
 		}
 		if (pmd_none(*pm_dir)) {
 		if (pmd_none(*pm_dir)) {
@@ -128,12 +130,15 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		}
 		}
 
 
 		pt_dir = pte_offset_kernel(pm_dir, address);
 		pt_dir = pte_offset_kernel(pm_dir, address);
-		pte_val(*pt_dir) = __pa(address) |
-			pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
+		pte_val(*pt_dir) = address |  pgprot_val(PAGE_KERNEL);
 		address += PAGE_SIZE;
 		address += PAGE_SIZE;
+		pages4k++;
 	}
 	}
 	ret = 0;
 	ret = 0;
 out:
 out:
+	update_page_count(PG_DIRECT_MAP_4K, pages4k);
+	update_page_count(PG_DIRECT_MAP_1M, pages1m);
+	update_page_count(PG_DIRECT_MAP_2G, pages2g);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -143,15 +148,15 @@ out:
  */
  */
 static void vmem_remove_range(unsigned long start, unsigned long size)
 static void vmem_remove_range(unsigned long start, unsigned long size)
 {
 {
+	unsigned long pages4k, pages1m, pages2g;
 	unsigned long end = start + size;
 	unsigned long end = start + size;
 	unsigned long address = start;
 	unsigned long address = start;
 	pgd_t *pg_dir;
 	pgd_t *pg_dir;
 	pud_t *pu_dir;
 	pud_t *pu_dir;
 	pmd_t *pm_dir;
 	pmd_t *pm_dir;
 	pte_t *pt_dir;
 	pte_t *pt_dir;
-	pte_t  pte;
 
 
-	pte_val(pte) = _PAGE_INVALID;
+	pages4k = pages1m = pages2g = 0;
 	while (address < end) {
 	while (address < end) {
 		pg_dir = pgd_offset_k(address);
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
 		if (pgd_none(*pg_dir)) {
@@ -166,6 +171,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 		if (pud_large(*pu_dir)) {
 		if (pud_large(*pu_dir)) {
 			pud_clear(pu_dir);
 			pud_clear(pu_dir);
 			address += PUD_SIZE;
 			address += PUD_SIZE;
+			pages2g++;
 			continue;
 			continue;
 		}
 		}
 		pm_dir = pmd_offset(pu_dir, address);
 		pm_dir = pmd_offset(pu_dir, address);
@@ -176,13 +182,18 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 		if (pmd_large(*pm_dir)) {
 		if (pmd_large(*pm_dir)) {
 			pmd_clear(pm_dir);
 			pmd_clear(pm_dir);
 			address += PMD_SIZE;
 			address += PMD_SIZE;
+			pages1m++;
 			continue;
 			continue;
 		}
 		}
 		pt_dir = pte_offset_kernel(pm_dir, address);
 		pt_dir = pte_offset_kernel(pm_dir, address);
-		*pt_dir = pte;
+		pte_clear(&init_mm, address, pt_dir);
 		address += PAGE_SIZE;
 		address += PAGE_SIZE;
+		pages4k++;
 	}
 	}
 	flush_tlb_kernel_range(start, end);
 	flush_tlb_kernel_range(start, end);
+	update_page_count(PG_DIRECT_MAP_4K, -pages4k);
+	update_page_count(PG_DIRECT_MAP_1M, -pages1m);
+	update_page_count(PG_DIRECT_MAP_2G, -pages2g);
 }
 }
 
 
 /*
 /*
@@ -341,7 +352,7 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
 	if (ret)
 	if (ret)
 		goto out_free;
 		goto out_free;
 
 
-	ret = vmem_add_mem(start, size, 0);
+	ret = vmem_add_mem(start, size);
 	if (ret)
 	if (ret)
 		goto out_remove;
 		goto out_remove;
 	goto out;
 	goto out;
@@ -362,31 +373,13 @@ out:
  */
  */
 void __init vmem_map_init(void)
 void __init vmem_map_init(void)
 {
 {
-	unsigned long ro_start, ro_end;
+	unsigned long size = _eshared - _stext;
 	struct memblock_region *reg;
 	struct memblock_region *reg;
-	phys_addr_t start, end;
 
 
-	ro_start = PFN_ALIGN((unsigned long)&_stext);
-	ro_end = (unsigned long)&_eshared & PAGE_MASK;
-	for_each_memblock(memory, reg) {
-		start = reg->base;
-		end = reg->base + reg->size;
-		if (start >= ro_end || end <= ro_start)
-			vmem_add_mem(start, end - start, 0);
-		else if (start >= ro_start && end <= ro_end)
-			vmem_add_mem(start, end - start, 1);
-		else if (start >= ro_start) {
-			vmem_add_mem(start, ro_end - start, 1);
-			vmem_add_mem(ro_end, end - ro_end, 0);
-		} else if (end < ro_end) {
-			vmem_add_mem(start, ro_start - start, 0);
-			vmem_add_mem(ro_start, end - ro_start, 1);
-		} else {
-			vmem_add_mem(start, ro_start - start, 0);
-			vmem_add_mem(ro_start, ro_end - ro_start, 1);
-			vmem_add_mem(ro_end, end - ro_end, 0);
-		}
-	}
+	for_each_memblock(memory, reg)
+		vmem_add_mem(reg->base, reg->size);
+	set_memory_ro((unsigned long)_stext, size >> PAGE_SHIFT);
+	pr_info("Write protected kernel read-only data: %luk\n", size >> 10);
 }
 }
 
 
 /*
 /*

+ 20 - 5
arch/s390/numa/mode_emu.c

@@ -34,7 +34,8 @@
 #define DIST_CORE	1
 #define DIST_CORE	1
 #define DIST_MC		2
 #define DIST_MC		2
 #define DIST_BOOK	3
 #define DIST_BOOK	3
-#define DIST_MAX	4
+#define DIST_DRAWER	4
+#define DIST_MAX	5
 
 
 /* Node distance reported to common code */
 /* Node distance reported to common code */
 #define EMU_NODE_DIST	10
 #define EMU_NODE_DIST	10
@@ -43,7 +44,7 @@
 #define NODE_ID_FREE	-1
 #define NODE_ID_FREE	-1
 
 
 /* Different levels of toptree */
 /* Different levels of toptree */
-enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
+enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY};
 
 
 /* The two toptree IDs */
 /* The two toptree IDs */
 enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
 enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
@@ -113,6 +114,14 @@ static int cores_free(struct toptree *tree)
  * Return node of core
  * Return node of core
  */
  */
 static struct toptree *core_node(struct toptree *core)
 static struct toptree *core_node(struct toptree *core)
+{
+	return core->parent->parent->parent->parent;
+}
+
+/*
+ * Return drawer of core
+ */
+static struct toptree *core_drawer(struct toptree *core)
 {
 {
 	return core->parent->parent->parent;
 	return core->parent->parent->parent;
 }
 }
@@ -138,6 +147,8 @@ static struct toptree *core_mc(struct toptree *core)
  */
  */
 static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
 static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
 {
 {
+	if (core_drawer(core1)->id != core_drawer(core2)->id)
+		return DIST_DRAWER;
 	if (core_book(core1)->id != core_book(core2)->id)
 	if (core_book(core1)->id != core_book(core2)->id)
 		return DIST_BOOK;
 		return DIST_BOOK;
 	if (core_mc(core1)->id != core_mc(core2)->id)
 	if (core_mc(core1)->id != core_mc(core2)->id)
@@ -262,6 +273,8 @@ static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
 	struct toptree *core;
 	struct toptree *core;
 
 
 	/* Always try to move perfectly fitting structures first */
 	/* Always try to move perfectly fitting structures first */
+	move_level_to_numa(numa, phys, DRAWER, true);
+	move_level_to_numa(numa, phys, DRAWER, false);
 	move_level_to_numa(numa, phys, BOOK, true);
 	move_level_to_numa(numa, phys, BOOK, true);
 	move_level_to_numa(numa, phys, BOOK, false);
 	move_level_to_numa(numa, phys, BOOK, false);
 	move_level_to_numa(numa, phys, MC, true);
 	move_level_to_numa(numa, phys, MC, true);
@@ -335,7 +348,7 @@ static struct toptree *toptree_to_numa(struct toptree *phys)
  */
  */
 static struct toptree *toptree_from_topology(void)
 static struct toptree *toptree_from_topology(void)
 {
 {
-	struct toptree *phys, *node, *book, *mc, *core;
+	struct toptree *phys, *node, *drawer, *book, *mc, *core;
 	struct cpu_topology_s390 *top;
 	struct cpu_topology_s390 *top;
 	int cpu;
 	int cpu;
 
 
@@ -344,10 +357,11 @@ static struct toptree *toptree_from_topology(void)
 	for_each_online_cpu(cpu) {
 	for_each_online_cpu(cpu) {
 		top = &per_cpu(cpu_topology, cpu);
 		top = &per_cpu(cpu_topology, cpu);
 		node = toptree_get_child(phys, 0);
 		node = toptree_get_child(phys, 0);
-		book = toptree_get_child(node, top->book_id);
+		drawer = toptree_get_child(node, top->drawer_id);
+		book = toptree_get_child(drawer, top->book_id);
 		mc = toptree_get_child(book, top->socket_id);
 		mc = toptree_get_child(book, top->socket_id);
 		core = toptree_get_child(mc, top->core_id);
 		core = toptree_get_child(mc, top->core_id);
-		if (!book || !mc || !core)
+		if (!drawer || !book || !mc || !core)
 			panic("NUMA emulation could not allocate memory");
 			panic("NUMA emulation could not allocate memory");
 		cpumask_set_cpu(cpu, &core->mask);
 		cpumask_set_cpu(cpu, &core->mask);
 		toptree_update_mask(mc);
 		toptree_update_mask(mc);
@@ -368,6 +382,7 @@ static void topology_add_core(struct toptree *core)
 		cpumask_copy(&top->thread_mask, &core->mask);
 		cpumask_copy(&top->thread_mask, &core->mask);
 		cpumask_copy(&top->core_mask, &core_mc(core)->mask);
 		cpumask_copy(&top->core_mask, &core_mc(core)->mask);
 		cpumask_copy(&top->book_mask, &core_book(core)->mask);
 		cpumask_copy(&top->book_mask, &core_book(core)->mask);
+		cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask);
 		cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
 		cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
 		top->node_id = core_node(core)->id;
 		top->node_id = core_node(core)->id;
 	}
 	}

+ 0 - 1
arch/s390/oprofile/Makefile

@@ -7,4 +7,3 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		timer_int.o )
 		timer_int.o )
 
 
 oprofile-y :=	$(DRIVER_OBJS) init.o
 oprofile-y :=	$(DRIVER_OBJS) init.o
-oprofile-y +=	hwsampler.o

+ 0 - 1178
arch/s390/oprofile/hwsampler.c

@@ -1,1178 +0,0 @@
-/*
- * Copyright IBM Corp. 2010
- * Author: Heinz Graalfs <graalfs@de.ibm.com>
- */
-
-#include <linux/kernel_stat.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/semaphore.h>
-#include <linux/oom.h>
-#include <linux/oprofile.h>
-
-#include <asm/facility.h>
-#include <asm/cpu_mf.h>
-#include <asm/irq.h>
-
-#include "hwsampler.h"
-#include "op_counter.h"
-
-#define MAX_NUM_SDB 511
-#define MIN_NUM_SDB 1
-
-DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
-
-struct hws_execute_parms {
-	void *buffer;
-	signed int rc;
-};
-
-DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
-EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer);
-
-static DEFINE_MUTEX(hws_sem);
-static DEFINE_MUTEX(hws_sem_oom);
-
-static unsigned char hws_flush_all;
-static unsigned int hws_oom;
-static unsigned int hws_alert;
-static struct workqueue_struct *hws_wq;
-
-static unsigned int hws_state;
-enum {
-	HWS_INIT = 1,
-	HWS_DEALLOCATED,
-	HWS_STOPPED,
-	HWS_STARTED,
-	HWS_STOPPING };
-
-/* set to 1 if called by kernel during memory allocation */
-static unsigned char oom_killer_was_active;
-/* size of SDBT and SDB as of allocate API */
-static unsigned long num_sdbt = 100;
-static unsigned long num_sdb = 511;
-/* sampling interval (machine cycles) */
-static unsigned long interval;
-
-static unsigned long min_sampler_rate;
-static unsigned long max_sampler_rate;
-
-static void execute_qsi(void *parms)
-{
-	struct hws_execute_parms *ep = parms;
-
-	ep->rc = qsi(ep->buffer);
-}
-
-static void execute_ssctl(void *parms)
-{
-	struct hws_execute_parms *ep = parms;
-
-	ep->rc = lsctl(ep->buffer);
-}
-
-static int smp_ctl_ssctl_stop(int cpu)
-{
-	int rc;
-	struct hws_execute_parms ep;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	cb->ssctl.es = 0;
-	cb->ssctl.cs = 0;
-
-	ep.buffer = &cb->ssctl;
-	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
-	rc = ep.rc;
-	if (rc) {
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
-		dump_stack();
-	}
-
-	ep.buffer = &cb->qsi;
-	smp_call_function_single(cpu, execute_qsi, &ep, 1);
-
-	if (cb->qsi.es || cb->qsi.cs) {
-		printk(KERN_EMERG "CPUMF sampling did not stop properly.\n");
-		dump_stack();
-	}
-
-	return rc;
-}
-
-static int smp_ctl_ssctl_deactivate(int cpu)
-{
-	int rc;
-	struct hws_execute_parms ep;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	cb->ssctl.es = 1;
-	cb->ssctl.cs = 0;
-
-	ep.buffer = &cb->ssctl;
-	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
-	rc = ep.rc;
-	if (rc)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
-
-	ep.buffer = &cb->qsi;
-	smp_call_function_single(cpu, execute_qsi, &ep, 1);
-
-	if (cb->qsi.cs)
-		printk(KERN_EMERG "CPUMF sampling was not set inactive.\n");
-
-	return rc;
-}
-
-static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval)
-{
-	int rc;
-	struct hws_execute_parms ep;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	cb->ssctl.h = 1;
-	cb->ssctl.tear = cb->first_sdbt;
-	cb->ssctl.dear = *(unsigned long *) cb->first_sdbt;
-	cb->ssctl.interval = interval;
-	cb->ssctl.es = 1;
-	cb->ssctl.cs = 1;
-
-	ep.buffer = &cb->ssctl;
-	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
-	rc = ep.rc;
-	if (rc)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
-
-	ep.buffer = &cb->qsi;
-	smp_call_function_single(cpu, execute_qsi, &ep, 1);
-	if (ep.rc)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu);
-
-	return rc;
-}
-
-static int smp_ctl_qsi(int cpu)
-{
-	struct hws_execute_parms ep;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	ep.buffer = &cb->qsi;
-	smp_call_function_single(cpu, execute_qsi, &ep, 1);
-
-	return ep.rc;
-}
-
-static void hws_ext_handler(struct ext_code ext_code,
-			    unsigned int param32, unsigned long param64)
-{
-	struct hws_cpu_buffer *cb = this_cpu_ptr(&sampler_cpu_buffer);
-
-	if (!(param32 & CPU_MF_INT_SF_MASK))
-		return;
-
-	if (!hws_alert)
-		return;
-
-	inc_irq_stat(IRQEXT_CMS);
-	atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32);
-
-	if (hws_wq)
-		queue_work(hws_wq, &cb->worker);
-}
-
-static void worker(struct work_struct *work);
-
-static void add_samples_to_oprofile(unsigned cpu, unsigned long *,
-				unsigned long *dear);
-
-static void init_all_cpu_buffers(void)
-{
-	int cpu;
-	struct hws_cpu_buffer *cb;
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		memset(cb, 0, sizeof(struct hws_cpu_buffer));
-	}
-}
-
-static void prepare_cpu_buffers(void)
-{
-	struct hws_cpu_buffer *cb;
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		atomic_set(&cb->ext_params, 0);
-		cb->worker_entry = 0;
-		cb->sample_overflow = 0;
-		cb->req_alert = 0;
-		cb->incorrect_sdbt_entry = 0;
-		cb->invalid_entry_address = 0;
-		cb->loss_of_sample_data = 0;
-		cb->sample_auth_change_alert = 0;
-		cb->finish = 0;
-		cb->oom = 0;
-		cb->stop_mode = 0;
-	}
-}
-
-/*
- * allocate_sdbt() - allocate sampler memory
- * @cpu: the cpu for which sampler memory is allocated
- *
- * A 4K page is allocated for each requested SDBT.
- * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs.
- * Set ALERT_REQ mask in each SDBs trailer.
- * Returns zero if successful, <0 otherwise.
- */
-static int allocate_sdbt(int cpu)
-{
-	int j, k, rc;
-	unsigned long *sdbt;
-	unsigned long  sdb;
-	unsigned long *tail;
-	unsigned long *trailer;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	if (cb->first_sdbt)
-		return -EINVAL;
-
-	sdbt = NULL;
-	tail = sdbt;
-
-	for (j = 0; j < num_sdbt; j++) {
-		sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
-
-		mutex_lock(&hws_sem_oom);
-		/* OOM killer might have been activated */
-		barrier();
-		if (oom_killer_was_active || !sdbt) {
-			if (sdbt)
-				free_page((unsigned long)sdbt);
-
-			goto allocate_sdbt_error;
-		}
-		if (cb->first_sdbt == 0)
-			cb->first_sdbt = (unsigned long)sdbt;
-
-		/* link current page to tail of chain */
-		if (tail)
-			*tail = (unsigned long)(void *)sdbt + 1;
-
-		mutex_unlock(&hws_sem_oom);
-
-		for (k = 0; k < num_sdb; k++) {
-			/* get and set SDB page */
-			sdb = get_zeroed_page(GFP_KERNEL);
-
-			mutex_lock(&hws_sem_oom);
-			/* OOM killer might have been activated */
-			barrier();
-			if (oom_killer_was_active || !sdb) {
-				if (sdb)
-					free_page(sdb);
-
-				goto allocate_sdbt_error;
-			}
-			*sdbt = sdb;
-			trailer = trailer_entry_ptr(*sdbt);
-			*trailer = SDB_TE_ALERT_REQ_MASK;
-			sdbt++;
-			mutex_unlock(&hws_sem_oom);
-		}
-		tail = sdbt;
-	}
-	mutex_lock(&hws_sem_oom);
-	if (oom_killer_was_active)
-		goto allocate_sdbt_error;
-
-	rc = 0;
-	if (tail)
-		*tail = (unsigned long)
-			((void *)cb->first_sdbt) + 1;
-
-allocate_sdbt_exit:
-	mutex_unlock(&hws_sem_oom);
-	return rc;
-
-allocate_sdbt_error:
-	rc = -ENOMEM;
-	goto allocate_sdbt_exit;
-}
-
-/*
- * deallocate_sdbt() - deallocate all sampler memory
- *
- * For each online CPU all SDBT trees are deallocated.
- * Returns the number of freed pages.
- */
-static int deallocate_sdbt(void)
-{
-	int cpu;
-	int counter;
-
-	counter = 0;
-
-	for_each_online_cpu(cpu) {
-		unsigned long start;
-		unsigned long sdbt;
-		unsigned long *curr;
-		struct hws_cpu_buffer *cb;
-
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-		if (!cb->first_sdbt)
-			continue;
-
-		sdbt = cb->first_sdbt;
-		curr = (unsigned long *) sdbt;
-		start = sdbt;
-
-		/* we'll free the SDBT after all SDBs are processed... */
-		while (1) {
-			if (!*curr || !sdbt)
-				break;
-
-			/* watch for link entry reset if found */
-			if (is_link_entry(curr)) {
-				curr = get_next_sdbt(curr);
-				if (sdbt)
-					free_page(sdbt);
-
-				/* we are done if we reach the start */
-				if ((unsigned long) curr == start)
-					break;
-				else
-					sdbt = (unsigned long) curr;
-			} else {
-				/* process SDB pointer */
-				if (*curr) {
-					free_page(*curr);
-					curr++;
-				}
-			}
-			counter++;
-		}
-		cb->first_sdbt = 0;
-	}
-	return counter;
-}
-
-static int start_sampling(int cpu)
-{
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	rc = smp_ctl_ssctl_enable_activate(cpu, interval);
-	if (rc) {
-		printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu);
-		goto start_exit;
-	}
-
-	rc = -EINVAL;
-	if (!cb->qsi.es) {
-		printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu);
-		goto start_exit;
-	}
-
-	if (!cb->qsi.cs) {
-		printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu);
-		goto start_exit;
-	}
-
-	printk(KERN_INFO
-		"hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n",
-		cpu, interval);
-
-	rc = 0;
-
-start_exit:
-	return rc;
-}
-
-static int stop_sampling(int cpu)
-{
-	unsigned long v;
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	rc = smp_ctl_qsi(cpu);
-	WARN_ON(rc);
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	if (!rc && !cb->qsi.es)
-		printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu);
-
-	rc = smp_ctl_ssctl_stop(cpu);
-	if (rc) {
-		printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n",
-				cpu, rc);
-		goto stop_exit;
-	}
-
-	printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu);
-
-stop_exit:
-	v = cb->req_alert;
-	if (v)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert,"
-				" count=%lu.\n", cpu, v);
-
-	v = cb->loss_of_sample_data;
-	if (v)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data,"
-				" count=%lu.\n", cpu, v);
-
-	v = cb->invalid_entry_address;
-	if (v)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address,"
-				" count=%lu.\n", cpu, v);
-
-	v = cb->incorrect_sdbt_entry;
-	if (v)
-		printk(KERN_ERR
-				"hwsampler: CPU %d CPUMF Incorrect SDBT address,"
-				" count=%lu.\n", cpu, v);
-
-	v = cb->sample_auth_change_alert;
-	if (v)
-		printk(KERN_ERR
-				"hwsampler: CPU %d CPUMF Sample authorization change,"
-				" count=%lu.\n", cpu, v);
-
-	return rc;
-}
-
-static int check_hardware_prerequisites(void)
-{
-	if (!test_facility(68))
-		return -EOPNOTSUPP;
-	return 0;
-}
-/*
- * hws_oom_callback() - the OOM callback function
- *
- * In case the callback is invoked during memory allocation for the
- *  hw sampler, all obtained memory is deallocated and a flag is set
- *  so main sampler memory allocation can exit with a failure code.
- * In case the callback is invoked during sampling the hw sampler
- *  is deactivated for all CPUs.
- */
-static int hws_oom_callback(struct notifier_block *nfb,
-	unsigned long dummy, void *parm)
-{
-	unsigned long *freed;
-	int cpu;
-	struct hws_cpu_buffer *cb;
-
-	freed = parm;
-
-	mutex_lock(&hws_sem_oom);
-
-	if (hws_state == HWS_DEALLOCATED) {
-		/* during memory allocation */
-		if (oom_killer_was_active == 0) {
-			oom_killer_was_active = 1;
-			*freed += deallocate_sdbt();
-		}
-	} else {
-		int i;
-		cpu = get_cpu();
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-		if (!cb->oom) {
-			for_each_online_cpu(i) {
-				smp_ctl_ssctl_deactivate(i);
-				cb->oom = 1;
-			}
-			cb->finish = 1;
-
-			printk(KERN_INFO
-				"hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n",
-				cpu);
-		}
-	}
-
-	mutex_unlock(&hws_sem_oom);
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block hws_oom_notifier = {
-	.notifier_call = hws_oom_callback
-};
-
-static int hws_cpu_callback(struct notifier_block *nfb,
-	unsigned long action, void *hcpu)
-{
-	/* We do not have sampler space available for all possible CPUs.
-	   All CPUs should be online when hw sampling is activated. */
-	return (hws_state <= HWS_DEALLOCATED) ? NOTIFY_OK : NOTIFY_BAD;
-}
-
-static struct notifier_block hws_cpu_notifier = {
-	.notifier_call = hws_cpu_callback
-};
-
-/**
- * hwsampler_deactivate() - set hardware sampling temporarily inactive
- * @cpu:  specifies the CPU to be set inactive.
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_deactivate(unsigned int cpu)
-{
-	/*
-	 * Deactivate hw sampling temporarily and flush the buffer
-	 * by pushing all the pending samples to oprofile buffer.
-	 *
-	 * This function can be called under one of the following conditions:
-	 *     Memory unmap, task is exiting.
-	 */
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	rc = 0;
-	mutex_lock(&hws_sem);
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	if (hws_state == HWS_STARTED) {
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (cb->qsi.cs) {
-			rc = smp_ctl_ssctl_deactivate(cpu);
-			if (rc) {
-				printk(KERN_INFO
-				"hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu);
-				cb->finish = 1;
-				hws_state = HWS_STOPPING;
-			} else  {
-				hws_flush_all = 1;
-				/* Add work to queue to read pending samples.*/
-				queue_work_on(cpu, hws_wq, &cb->worker);
-			}
-		}
-	}
-	mutex_unlock(&hws_sem);
-
-	if (hws_wq)
-		flush_workqueue(hws_wq);
-
-	return rc;
-}
-
-/**
- * hwsampler_activate() - activate/resume hardware sampling which was deactivated
- * @cpu:  specifies the CPU to be set active.
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_activate(unsigned int cpu)
-{
-	/*
-	 * Re-activate hw sampling. This should be called in pair with
-	 * hwsampler_deactivate().
-	 */
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	rc = 0;
-	mutex_lock(&hws_sem);
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	if (hws_state == HWS_STARTED) {
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (!cb->qsi.cs) {
-			hws_flush_all = 0;
-			rc = smp_ctl_ssctl_enable_activate(cpu, interval);
-			if (rc) {
-				printk(KERN_ERR
-				"CPU %d, CPUMF activate sampling failed.\n",
-					 cpu);
-			}
-		}
-	}
-
-	mutex_unlock(&hws_sem);
-
-	return rc;
-}
-
-static int check_qsi_on_setup(void)
-{
-	int rc;
-	unsigned int cpu;
-	struct hws_cpu_buffer *cb;
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (rc)
-			return -EOPNOTSUPP;
-
-		if (!cb->qsi.as) {
-			printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n");
-			return -EINVAL;
-		}
-
-		if (cb->qsi.es) {
-			printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n");
-			rc = smp_ctl_ssctl_stop(cpu);
-			if (rc)
-				return -EINVAL;
-
-			printk(KERN_INFO
-				"CPU %d, CPUMF Sampling stopped now.\n", cpu);
-		}
-	}
-	return 0;
-}
-
-static int check_qsi_on_start(void)
-{
-	unsigned int cpu;
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-
-		if (!cb->qsi.as)
-			return -EINVAL;
-
-		if (cb->qsi.es)
-			return -EINVAL;
-
-		if (cb->qsi.cs)
-			return -EINVAL;
-	}
-	return 0;
-}
-
-static void worker_on_start(unsigned int cpu)
-{
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	cb->worker_entry = cb->first_sdbt;
-}
-
-static int worker_check_error(unsigned int cpu, int ext_params)
-{
-	int rc;
-	unsigned long *sdbt;
-	struct hws_cpu_buffer *cb;
-
-	rc = 0;
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	sdbt = (unsigned long *) cb->worker_entry;
-
-	if (!sdbt || !*sdbt)
-		return -EINVAL;
-
-	if (ext_params & CPU_MF_INT_SF_PRA)
-		cb->req_alert++;
-
-	if (ext_params & CPU_MF_INT_SF_LSDA)
-		cb->loss_of_sample_data++;
-
-	if (ext_params & CPU_MF_INT_SF_IAE) {
-		cb->invalid_entry_address++;
-		rc = -EINVAL;
-	}
-
-	if (ext_params & CPU_MF_INT_SF_ISE) {
-		cb->incorrect_sdbt_entry++;
-		rc = -EINVAL;
-	}
-
-	if (ext_params & CPU_MF_INT_SF_SACA) {
-		cb->sample_auth_change_alert++;
-		rc = -EINVAL;
-	}
-
-	return rc;
-}
-
-static void worker_on_finish(unsigned int cpu)
-{
-	int rc, i;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	if (cb->finish) {
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (cb->qsi.es) {
-			printk(KERN_INFO
-				"hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n",
-				cpu);
-			rc = smp_ctl_ssctl_stop(cpu);
-			if (rc)
-				printk(KERN_INFO
-					"hwsampler: CPU %d, CPUMF Deactivation failed.\n",
-					cpu);
-
-			for_each_online_cpu(i) {
-				if (i == cpu)
-					continue;
-				if (!cb->finish) {
-					cb->finish = 1;
-					queue_work_on(i, hws_wq,
-						&cb->worker);
-				}
-			}
-		}
-	}
-}
-
-static void worker_on_interrupt(unsigned int cpu)
-{
-	unsigned long *sdbt;
-	unsigned char done;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	sdbt = (unsigned long *) cb->worker_entry;
-
-	done = 0;
-	/* do not proceed if stop was entered,
-	 * forget the buffers not yet processed */
-	while (!done && !cb->stop_mode) {
-		unsigned long *trailer;
-		struct hws_trailer_entry *te;
-		unsigned long *dear = 0;
-
-		trailer = trailer_entry_ptr(*sdbt);
-		/* leave loop if no more work to do */
-		if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) {
-			done = 1;
-			if (!hws_flush_all)
-				continue;
-		}
-
-		te = (struct hws_trailer_entry *)trailer;
-		cb->sample_overflow += te->overflow;
-
-		add_samples_to_oprofile(cpu, sdbt, dear);
-
-		/* reset trailer */
-		xchg((unsigned char *) te, 0x40);
-
-		/* advance to next sdb slot in current sdbt */
-		sdbt++;
-		/* in case link bit is set use address w/o link bit */
-		if (is_link_entry(sdbt))
-			sdbt = get_next_sdbt(sdbt);
-
-		cb->worker_entry = (unsigned long)sdbt;
-	}
-}
-
-static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
-		unsigned long *dear)
-{
-	struct hws_basic_entry *sample_data_ptr;
-	unsigned long *trailer;
-
-	trailer = trailer_entry_ptr(*sdbt);
-	if (dear) {
-		if (dear > trailer)
-			return;
-		trailer = dear;
-	}
-
-	sample_data_ptr = (struct hws_basic_entry *)(*sdbt);
-
-	while ((unsigned long *)sample_data_ptr < trailer) {
-		struct pt_regs *regs = NULL;
-		struct task_struct *tsk = NULL;
-
-		/*
-		 * Check sampling mode, 1 indicates basic (=customer) sampling
-		 * mode.
-		 */
-		if (sample_data_ptr->def != 1) {
-			/* sample slot is not yet written */
-			break;
-		} else {
-			/* make sure we don't use it twice,
-			 * the next time the sampler will set it again */
-			sample_data_ptr->def = 0;
-		}
-
-		/* Get pt_regs. */
-		if (sample_data_ptr->P == 1) {
-			/* userspace sample */
-			unsigned int pid = sample_data_ptr->prim_asn;
-			if (!counter_config.user)
-				goto skip_sample;
-			rcu_read_lock();
-			tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
-			if (tsk)
-				regs = task_pt_regs(tsk);
-			rcu_read_unlock();
-		} else {
-			/* kernelspace sample */
-			if (!counter_config.kernel)
-				goto skip_sample;
-			regs = task_pt_regs(current);
-		}
-
-		mutex_lock(&hws_sem);
-		oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
-				!sample_data_ptr->P, tsk);
-		mutex_unlock(&hws_sem);
-	skip_sample:
-		sample_data_ptr++;
-	}
-}
-
-static void worker(struct work_struct *work)
-{
-	unsigned int cpu;
-	int ext_params;
-	struct hws_cpu_buffer *cb;
-
-	cb = container_of(work, struct hws_cpu_buffer, worker);
-	cpu = smp_processor_id();
-	ext_params = atomic_xchg(&cb->ext_params, 0);
-
-	if (!cb->worker_entry)
-		worker_on_start(cpu);
-
-	if (worker_check_error(cpu, ext_params))
-		return;
-
-	if (!cb->finish)
-		worker_on_interrupt(cpu);
-
-	if (cb->finish)
-		worker_on_finish(cpu);
-}
-
-/**
- * hwsampler_allocate() - allocate memory for the hardware sampler
- * @sdbt:  number of SDBTs per online CPU (must be > 0)
- * @sdb:   number of SDBs per SDBT (minimum 1, maximum 511)
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_allocate(unsigned long sdbt, unsigned long sdb)
-{
-	int cpu, rc;
-	mutex_lock(&hws_sem);
-
-	rc = -EINVAL;
-	if (hws_state != HWS_DEALLOCATED)
-		goto allocate_exit;
-
-	if (sdbt < 1)
-		goto allocate_exit;
-
-	if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB)
-		goto allocate_exit;
-
-	num_sdbt = sdbt;
-	num_sdb = sdb;
-
-	oom_killer_was_active = 0;
-	register_oom_notifier(&hws_oom_notifier);
-
-	for_each_online_cpu(cpu) {
-		if (allocate_sdbt(cpu)) {
-			unregister_oom_notifier(&hws_oom_notifier);
-			goto allocate_error;
-		}
-	}
-	unregister_oom_notifier(&hws_oom_notifier);
-	if (oom_killer_was_active)
-		goto allocate_error;
-
-	hws_state = HWS_STOPPED;
-	rc = 0;
-
-allocate_exit:
-	mutex_unlock(&hws_sem);
-	return rc;
-
-allocate_error:
-	rc = -ENOMEM;
-	printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n");
-	goto allocate_exit;
-}
-
-/**
- * hwsampler_deallocate() - deallocate hardware sampler memory
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_deallocate(void)
-{
-	int rc;
-
-	mutex_lock(&hws_sem);
-
-	rc = -EINVAL;
-	if (hws_state != HWS_STOPPED)
-		goto deallocate_exit;
-
-	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-	hws_alert = 0;
-	deallocate_sdbt();
-
-	hws_state = HWS_DEALLOCATED;
-	rc = 0;
-
-deallocate_exit:
-	mutex_unlock(&hws_sem);
-
-	return rc;
-}
-
-unsigned long hwsampler_query_min_interval(void)
-{
-	return min_sampler_rate;
-}
-
-unsigned long hwsampler_query_max_interval(void)
-{
-	return max_sampler_rate;
-}
-
-unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu)
-{
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	return cb->sample_overflow;
-}
-
-int hwsampler_setup(void)
-{
-	int rc;
-	int cpu;
-	struct hws_cpu_buffer *cb;
-
-	mutex_lock(&hws_sem);
-
-	rc = -EINVAL;
-	if (hws_state)
-		goto setup_exit;
-
-	hws_state = HWS_INIT;
-
-	init_all_cpu_buffers();
-
-	rc = check_hardware_prerequisites();
-	if (rc)
-		goto setup_exit;
-
-	rc = check_qsi_on_setup();
-	if (rc)
-		goto setup_exit;
-
-	rc = -EINVAL;
-	hws_wq = create_workqueue("hwsampler");
-	if (!hws_wq)
-		goto setup_exit;
-
-	register_cpu_notifier(&hws_cpu_notifier);
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		INIT_WORK(&cb->worker, worker);
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (min_sampler_rate != cb->qsi.min_sampl_rate) {
-			if (min_sampler_rate) {
-				printk(KERN_WARNING
-					"hwsampler: different min sampler rate values.\n");
-				if (min_sampler_rate < cb->qsi.min_sampl_rate)
-					min_sampler_rate =
-						cb->qsi.min_sampl_rate;
-			} else
-				min_sampler_rate = cb->qsi.min_sampl_rate;
-		}
-		if (max_sampler_rate != cb->qsi.max_sampl_rate) {
-			if (max_sampler_rate) {
-				printk(KERN_WARNING
-					"hwsampler: different max sampler rate values.\n");
-				if (max_sampler_rate > cb->qsi.max_sampl_rate)
-					max_sampler_rate =
-						cb->qsi.max_sampl_rate;
-			} else
-				max_sampler_rate = cb->qsi.max_sampl_rate;
-		}
-	}
-	register_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
-
-	hws_state = HWS_DEALLOCATED;
-	rc = 0;
-
-setup_exit:
-	mutex_unlock(&hws_sem);
-	return rc;
-}
-
-int hwsampler_shutdown(void)
-{
-	int rc;
-
-	mutex_lock(&hws_sem);
-
-	rc = -EINVAL;
-	if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) {
-		mutex_unlock(&hws_sem);
-
-		if (hws_wq)
-			flush_workqueue(hws_wq);
-
-		mutex_lock(&hws_sem);
-
-		if (hws_state == HWS_STOPPED) {
-			irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-			hws_alert = 0;
-			deallocate_sdbt();
-		}
-		if (hws_wq) {
-			destroy_workqueue(hws_wq);
-			hws_wq = NULL;
-		}
-
-		unregister_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
-		hws_state = HWS_INIT;
-		rc = 0;
-	}
-	mutex_unlock(&hws_sem);
-
-	unregister_cpu_notifier(&hws_cpu_notifier);
-
-	return rc;
-}
-
-/**
- * hwsampler_start_all() - start hardware sampling on all online CPUs
- * @rate:  specifies the used interval when samples are taken
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_start_all(unsigned long rate)
-{
-	int rc, cpu;
-
-	mutex_lock(&hws_sem);
-
-	hws_oom = 0;
-
-	rc = -EINVAL;
-	if (hws_state != HWS_STOPPED)
-		goto start_all_exit;
-
-	interval = rate;
-
-	/* fail if rate is not valid */
-	if (interval < min_sampler_rate || interval > max_sampler_rate)
-		goto start_all_exit;
-
-	rc = check_qsi_on_start();
-	if (rc)
-		goto start_all_exit;
-
-	prepare_cpu_buffers();
-
-	for_each_online_cpu(cpu) {
-		rc = start_sampling(cpu);
-		if (rc)
-			break;
-	}
-	if (rc) {
-		for_each_online_cpu(cpu) {
-			stop_sampling(cpu);
-		}
-		goto start_all_exit;
-	}
-	hws_state = HWS_STARTED;
-	rc = 0;
-
-start_all_exit:
-	mutex_unlock(&hws_sem);
-
-	if (rc)
-		return rc;
-
-	register_oom_notifier(&hws_oom_notifier);
-	hws_oom = 1;
-	hws_flush_all = 0;
-	/* now let them in, 1407 CPUMF external interrupts */
-	hws_alert = 1;
-	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
-	return 0;
-}
-
-/**
- * hwsampler_stop_all() - stop hardware sampling on all online CPUs
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_stop_all(void)
-{
-	int tmp_rc, rc, cpu;
-	struct hws_cpu_buffer *cb;
-
-	mutex_lock(&hws_sem);
-
-	rc = 0;
-	if (hws_state == HWS_INIT) {
-		mutex_unlock(&hws_sem);
-		return 0;
-	}
-	hws_state = HWS_STOPPING;
-	mutex_unlock(&hws_sem);
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		cb->stop_mode = 1;
-		tmp_rc = stop_sampling(cpu);
-		if (tmp_rc)
-			rc = tmp_rc;
-	}
-
-	if (hws_wq)
-		flush_workqueue(hws_wq);
-
-	mutex_lock(&hws_sem);
-	if (hws_oom) {
-		unregister_oom_notifier(&hws_oom_notifier);
-		hws_oom = 0;
-	}
-	hws_state = HWS_STOPPED;
-	mutex_unlock(&hws_sem);
-
-	return rc;
-}

+ 0 - 63
arch/s390/oprofile/hwsampler.h

@@ -1,63 +0,0 @@
-/*
- * CPUMF HW sampler functions and internal structures
- *
- *    Copyright IBM Corp. 2010
- *    Author(s): Heinz Graalfs <graalfs@de.ibm.com>
- */
-
-#ifndef HWSAMPLER_H_
-#define HWSAMPLER_H_
-
-#include <linux/workqueue.h>
-#include <asm/cpu_mf.h>
-
-struct hws_ssctl_request_block     /* SET SAMPLING CONTROLS req block   */
-{ /* bytes 0 - 7  Bit(s) */
-	unsigned int s:1;           /* 0: maximum buffer indicator       */
-	unsigned int h:1;           /* 1: part. level reserved for VM use*/
-	unsigned long b2_53:52;     /* 2-53: zeros                       */
-	unsigned int es:1;          /* 54: sampling enable control       */
-	unsigned int b55_61:7;      /* 55-61: - zeros                    */
-	unsigned int cs:1;          /* 62: sampling activation control   */
-	unsigned int b63:1;         /* 63: zero                          */
-	unsigned long interval;     /* 8-15: sampling interval           */
-	unsigned long tear;         /* 16-23: TEAR contents              */
-	unsigned long dear;         /* 24-31: DEAR contents              */
-	/* 32-63:                                                        */
-	unsigned long rsvrd1;       /* reserved                          */
-	unsigned long rsvrd2;       /* reserved                          */
-	unsigned long rsvrd3;       /* reserved                          */
-	unsigned long rsvrd4;       /* reserved                          */
-};
-
-struct hws_cpu_buffer {
-	unsigned long first_sdbt;       /* @ of 1st SDB-Table for this CP*/
-	unsigned long worker_entry;
-	unsigned long sample_overflow;  /* taken from SDB ...            */
-	struct hws_qsi_info_block qsi;
-	struct hws_ssctl_request_block ssctl;
-	struct work_struct worker;
-	atomic_t ext_params;
-	unsigned long req_alert;
-	unsigned long loss_of_sample_data;
-	unsigned long invalid_entry_address;
-	unsigned long incorrect_sdbt_entry;
-	unsigned long sample_auth_change_alert;
-	unsigned int finish:1;
-	unsigned int oom:1;
-	unsigned int stop_mode:1;
-};
-
-int hwsampler_setup(void);
-int hwsampler_shutdown(void);
-int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
-int hwsampler_deallocate(void);
-unsigned long hwsampler_query_min_interval(void);
-unsigned long hwsampler_query_max_interval(void);
-int hwsampler_start_all(unsigned long interval);
-int hwsampler_stop_all(void);
-int hwsampler_deactivate(unsigned int cpu);
-int hwsampler_activate(unsigned int cpu);
-unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu);
-
-#endif /*HWSAMPLER_H_*/

+ 0 - 489
arch/s390/oprofile/init.c

@@ -10,488 +10,8 @@
  */
  */
 
 
 #include <linux/oprofile.h>
 #include <linux/oprofile.h>
-#include <linux/perf_event.h>
 #include <linux/init.h>
 #include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/module.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
-#include <asm/perf_event.h>
-
-#include "../../../drivers/oprofile/oprof.h"
-
-#include "hwsampler.h"
-#include "op_counter.h"
-
-#define DEFAULT_INTERVAL	4127518
-
-#define DEFAULT_SDBT_BLOCKS	1
-#define DEFAULT_SDB_BLOCKS	511
-
-static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
-static unsigned long oprofile_min_interval;
-static unsigned long oprofile_max_interval;
-
-static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
-static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
-
-static int hwsampler_enabled;
-static int hwsampler_running;	/* start_mutex must be held to change */
-static int hwsampler_available;
-
-static struct oprofile_operations timer_ops;
-
-struct op_counter_config counter_config;
-
-enum __force_cpu_type {
-	reserved = 0,		/* do not force */
-	timer,
-};
-static int force_cpu_type;
-
-static int set_cpu_type(const char *str, struct kernel_param *kp)
-{
-	if (!strcmp(str, "timer")) {
-		force_cpu_type = timer;
-		printk(KERN_INFO "oprofile: forcing timer to be returned "
-		                 "as cpu type\n");
-	} else {
-		force_cpu_type = 0;
-	}
-
-	return 0;
-}
-module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
-MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
-		           "(report cpu_type \"timer\"");
-
-static int __oprofile_hwsampler_start(void)
-{
-	int retval;
-
-	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
-	if (retval)
-		return retval;
-
-	retval = hwsampler_start_all(oprofile_hw_interval);
-	if (retval)
-		hwsampler_deallocate();
-
-	return retval;
-}
-
-static int oprofile_hwsampler_start(void)
-{
-	int retval;
-
-	hwsampler_running = hwsampler_enabled;
-
-	if (!hwsampler_running)
-		return timer_ops.start();
-
-	retval = perf_reserve_sampling();
-	if (retval)
-		return retval;
-
-	retval = __oprofile_hwsampler_start();
-	if (retval)
-		perf_release_sampling();
-
-	return retval;
-}
-
-static void oprofile_hwsampler_stop(void)
-{
-	if (!hwsampler_running) {
-		timer_ops.stop();
-		return;
-	}
-
-	hwsampler_stop_all();
-	hwsampler_deallocate();
-	perf_release_sampling();
-	return;
-}
-
-/*
- * File ops used for:
- * /dev/oprofile/0/enabled
- * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
- */
-
-static ssize_t hwsampler_read(struct file *file, char __user *buf,
-		size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
-}
-
-static ssize_t hwsampler_write(struct file *file, char const __user *buf,
-		size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-
-	if (val != 0 && val != 1)
-		return -EINVAL;
-
-	if (oprofile_started)
-		/*
-		 * save to do without locking as we set
-		 * hwsampler_running in start() when start_mutex is
-		 * held
-		 */
-		return -EBUSY;
-
-	hwsampler_enabled = val;
-
-	return count;
-}
-
-static const struct file_operations hwsampler_fops = {
-	.read		= hwsampler_read,
-	.write		= hwsampler_write,
-};
-
-/*
- * File ops used for:
- * /dev/oprofile/0/count
- * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
- *
- * Make sure that the value is within the hardware range.
- */
-
-static ssize_t hw_interval_read(struct file *file, char __user *buf,
-				size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
-					count, offset);
-}
-
-static ssize_t hw_interval_write(struct file *file, char const __user *buf,
-				 size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-	if (val < oprofile_min_interval)
-		oprofile_hw_interval = oprofile_min_interval;
-	else if (val > oprofile_max_interval)
-		oprofile_hw_interval = oprofile_max_interval;
-	else
-		oprofile_hw_interval = val;
-
-	return count;
-}
-
-static const struct file_operations hw_interval_fops = {
-	.read		= hw_interval_read,
-	.write		= hw_interval_write,
-};
-
-/*
- * File ops used for:
- * /dev/oprofile/0/event
- * Only a single event with number 0 is supported with this counter.
- *
- * /dev/oprofile/0/unit_mask
- * This is a dummy file needed by the user space tools.
- * No value other than 0 is accepted or returned.
- */
-
-static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
-				    size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(0, buf, count, offset);
-}
-
-static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
-				     size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-	if (val != 0)
-		return -EINVAL;
-	return count;
-}
-
-static const struct file_operations zero_fops = {
-	.read		= hwsampler_zero_read,
-	.write		= hwsampler_zero_write,
-};
-
-/* /dev/oprofile/0/kernel file ops.  */
-
-static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
-				     size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(counter_config.kernel,
-					buf, count, offset);
-}
-
-static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
-				      size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-
-	if (val != 0 && val != 1)
-		return -EINVAL;
-
-	counter_config.kernel = val;
-
-	return count;
-}
-
-static const struct file_operations kernel_fops = {
-	.read		= hwsampler_kernel_read,
-	.write		= hwsampler_kernel_write,
-};
-
-/* /dev/oprofile/0/user file ops. */
-
-static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
-				   size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(counter_config.user,
-					buf, count, offset);
-}
-
-static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
-				    size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-
-	if (val != 0 && val != 1)
-		return -EINVAL;
-
-	counter_config.user = val;
-
-	return count;
-}
-
-static const struct file_operations user_fops = {
-	.read		= hwsampler_user_read,
-	.write		= hwsampler_user_write,
-};
-
-
-/*
- * File ops used for: /dev/oprofile/timer/enabled
- * The value always has to be the inverted value of hwsampler_enabled. So
- * no separate variable is created. That way we do not need locking.
- */
-
-static ssize_t timer_enabled_read(struct file *file, char __user *buf,
-				  size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
-}
-
-static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
-				   size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-
-	if (val != 0 && val != 1)
-		return -EINVAL;
-
-	/* Timer cannot be disabled without having hardware sampling.  */
-	if (val == 0 && !hwsampler_available)
-		return -EINVAL;
-
-	if (oprofile_started)
-		/*
-		 * save to do without locking as we set
-		 * hwsampler_running in start() when start_mutex is
-		 * held
-		 */
-		return -EBUSY;
-
-	hwsampler_enabled = !val;
-
-	return count;
-}
-
-static const struct file_operations timer_enabled_fops = {
-	.read		= timer_enabled_read,
-	.write		= timer_enabled_write,
-};
-
-
-static int oprofile_create_hwsampling_files(struct dentry *root)
-{
-	struct dentry *dir;
-
-	dir = oprofilefs_mkdir(root, "timer");
-	if (!dir)
-		return -EINVAL;
-
-	oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
-
-	if (!hwsampler_available)
-		return 0;
-
-	/* reinitialize default values */
-	hwsampler_enabled = 1;
-	counter_config.kernel = 1;
-	counter_config.user = 1;
-
-	if (!force_cpu_type) {
-		/*
-		 * Create the counter file system.  A single virtual
-		 * counter is created which can be used to
-		 * enable/disable hardware sampling dynamically from
-		 * user space.  The user space will configure a single
-		 * counter with a single event.  The value of 'event'
-		 * and 'unit_mask' are not evaluated by the kernel code
-		 * and can only be set to 0.
-		 */
-
-		dir = oprofilefs_mkdir(root, "0");
-		if (!dir)
-			return -EINVAL;
-
-		oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
-		oprofilefs_create_file(dir, "event", &zero_fops);
-		oprofilefs_create_file(dir, "count", &hw_interval_fops);
-		oprofilefs_create_file(dir, "unit_mask", &zero_fops);
-		oprofilefs_create_file(dir, "kernel", &kernel_fops);
-		oprofilefs_create_file(dir, "user", &user_fops);
-		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
-					&oprofile_sdbt_blocks);
-
-	} else {
-		/*
-		 * Hardware sampling can be used but the cpu_type is
-		 * forced to timer in order to deal with legacy user
-		 * space tools.  The /dev/oprofile/hwsampling fs is
-		 * provided in that case.
-		 */
-		dir = oprofilefs_mkdir(root, "hwsampling");
-		if (!dir)
-			return -EINVAL;
-
-		oprofilefs_create_file(dir, "hwsampler",
-				       &hwsampler_fops);
-		oprofilefs_create_file(dir, "hw_interval",
-				       &hw_interval_fops);
-		oprofilefs_create_ro_ulong(dir, "hw_min_interval",
-					   &oprofile_min_interval);
-		oprofilefs_create_ro_ulong(dir, "hw_max_interval",
-					   &oprofile_max_interval);
-		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
-					&oprofile_sdbt_blocks);
-	}
-	return 0;
-}
-
-static int oprofile_hwsampler_init(struct oprofile_operations *ops)
-{
-	/*
-	 * Initialize the timer mode infrastructure as well in order
-	 * to be able to switch back dynamically.  oprofile_timer_init
-	 * is not supposed to fail.
-	 */
-	if (oprofile_timer_init(ops))
-		BUG();
-
-	memcpy(&timer_ops, ops, sizeof(timer_ops));
-	ops->create_files = oprofile_create_hwsampling_files;
-
-	/*
-	 * If the user space tools do not support newer cpu types,
-	 * the force_cpu_type module parameter
-	 * can be used to always return \"timer\" as cpu type.
-	 */
-	if (force_cpu_type != timer) {
-		struct cpuid id;
-
-		get_cpu_id (&id);
-
-		switch (id.machine) {
-		case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
-		case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
-		case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
-		case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break;
-		default: return -ENODEV;
-		}
-	}
-
-	if (hwsampler_setup())
-		return -ENODEV;
-
-	/*
-	 * Query the range for the sampling interval from the
-	 * hardware.
-	 */
-	oprofile_min_interval = hwsampler_query_min_interval();
-	if (oprofile_min_interval == 0)
-		return -ENODEV;
-	oprofile_max_interval = hwsampler_query_max_interval();
-	if (oprofile_max_interval == 0)
-		return -ENODEV;
-
-	/* The initial value should be sane */
-	if (oprofile_hw_interval < oprofile_min_interval)
-		oprofile_hw_interval = oprofile_min_interval;
-	if (oprofile_hw_interval > oprofile_max_interval)
-		oprofile_hw_interval = oprofile_max_interval;
-
-	printk(KERN_INFO "oprofile: System z hardware sampling "
-	       "facility found.\n");
-
-	ops->start = oprofile_hwsampler_start;
-	ops->stop = oprofile_hwsampler_stop;
-
-	return 0;
-}
-
-static void oprofile_hwsampler_exit(void)
-{
-	hwsampler_shutdown();
-}
 
 
 static int __s390_backtrace(void *data, unsigned long address)
 static int __s390_backtrace(void *data, unsigned long address)
 {
 {
@@ -514,18 +34,9 @@ static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 {
 	ops->backtrace = s390_backtrace;
 	ops->backtrace = s390_backtrace;
-
-	/*
-	 * -ENODEV is not reported to the caller.  The module itself
-         * will use the timer mode sampling as fallback and this is
-         * always available.
-	 */
-	hwsampler_available = oprofile_hwsampler_init(ops) == 0;
-
 	return 0;
 	return 0;
 }
 }
 
 
 void oprofile_arch_exit(void)
 void oprofile_arch_exit(void)
 {
 {
-	oprofile_hwsampler_exit();
 }
 }

+ 0 - 21
arch/s390/oprofile/op_counter.h

@@ -1,21 +0,0 @@
-/*
- *   Copyright IBM Corp. 2011
- *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
- *
- * @remark Copyright 2011 OProfile authors
- */
-
-#ifndef OP_COUNTER_H
-#define OP_COUNTER_H
-
-struct op_counter_config {
-	/* `enabled' maps to the hwsampler_file variable.  */
-	/* `count' maps to the oprofile_hw_interval variable.  */
-	/* `event' and `unit_mask' are unused. */
-	unsigned long kernel;
-	unsigned long user;
-};
-
-extern struct op_counter_config counter_config;
-
-#endif /* OP_COUNTER_H */

+ 3 - 1
arch/s390/pci/pci_dma.c

@@ -226,7 +226,8 @@ static unsigned long __dma_alloc_iommu(struct device *dev,
 	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
 	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
 			      PAGE_SIZE) >> PAGE_SHIFT;
 			      PAGE_SIZE) >> PAGE_SHIFT;
 	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
 	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
-				start, size, 0, boundary_size, 0);
+				start, size, zdev->start_dma >> PAGE_SHIFT,
+				boundary_size, 0);
 }
 }
 
 
 static unsigned long dma_alloc_iommu(struct device *dev, int size)
 static unsigned long dma_alloc_iommu(struct device *dev, int size)
@@ -469,6 +470,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 	 * Also set zdev->end_dma to the actual end address of the usable
 	 * Also set zdev->end_dma to the actual end address of the usable
 	 * range, instead of the theoretical maximum as reported by hardware.
 	 * range, instead of the theoretical maximum as reported by hardware.
 	 */
 	 */
+	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
 	zdev->iommu_size = min3((u64) high_memory,
 	zdev->iommu_size = min3((u64) high_memory,
 				ZPCI_TABLE_SIZE_RT - zdev->start_dma,
 				ZPCI_TABLE_SIZE_RT - zdev->start_dma,
 				zdev->end_dma - zdev->start_dma + 1);
 				zdev->end_dma - zdev->start_dma + 1);

+ 1 - 2
arch/s390/pci/pci_event.c

@@ -145,8 +145,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 	default:
 	default:
 		break;
 		break;
 	}
 	}
-	if (pdev)
-		pci_dev_put(pdev);
+	pci_dev_put(pdev);
 }
 }
 
 
 void zpci_event_availability(void *data)
 void zpci_event_availability(void *data)

+ 11 - 1
arch/s390/pci/pci_insn.c

@@ -99,7 +99,7 @@ void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
 }
 }
 
 
 /* PCI Load */
 /* PCI Load */
-static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
 {
 {
 	register u64 __req asm("2") = req;
 	register u64 __req asm("2") = req;
 	register u64 __offset asm("3") = offset;
 	register u64 __offset asm("3") = offset;
@@ -116,6 +116,16 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
 		:  "d" (__offset)
 		:  "d" (__offset)
 		: "cc");
 		: "cc");
 	*status = __req >> 24 & 0xff;
 	*status = __req >> 24 & 0xff;
+	*data = __data;
+	return cc;
+}
+
+static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+{
+	u64 __data;
+	int cc;
+
+	cc = ____pcilg(&__data, req, offset, status);
 	if (!cc)
 	if (!cc)
 		*data = __data;
 		*data = __data;
 
 

+ 13 - 0
drivers/base/topology.c

@@ -77,6 +77,14 @@ static DEVICE_ATTR_RO(book_siblings);
 static DEVICE_ATTR_RO(book_siblings_list);
 static DEVICE_ATTR_RO(book_siblings_list);
 #endif
 #endif
 
 
+#ifdef CONFIG_SCHED_DRAWER
+define_id_show_func(drawer_id);
+static DEVICE_ATTR_RO(drawer_id);
+define_siblings_show_func(drawer_siblings, drawer_cpumask);
+static DEVICE_ATTR_RO(drawer_siblings);
+static DEVICE_ATTR_RO(drawer_siblings_list);
+#endif
+
 static struct attribute *default_attrs[] = {
 static struct attribute *default_attrs[] = {
 	&dev_attr_physical_package_id.attr,
 	&dev_attr_physical_package_id.attr,
 	&dev_attr_core_id.attr,
 	&dev_attr_core_id.attr,
@@ -88,6 +96,11 @@ static struct attribute *default_attrs[] = {
 	&dev_attr_book_id.attr,
 	&dev_attr_book_id.attr,
 	&dev_attr_book_siblings.attr,
 	&dev_attr_book_siblings.attr,
 	&dev_attr_book_siblings_list.attr,
 	&dev_attr_book_siblings_list.attr,
+#endif
+#ifdef CONFIG_SCHED_DRAWER
+	&dev_attr_drawer_id.attr,
+	&dev_attr_drawer_siblings.attr,
+	&dev_attr_drawer_siblings_list.attr,
 #endif
 #endif
 	NULL
 	NULL
 };
 };

+ 13 - 0
drivers/crypto/Kconfig

@@ -159,6 +159,19 @@ config CRYPTO_GHASH_S390
 
 
 	  It is available as of z196.
 	  It is available as of z196.
 
 
+config CRYPTO_CRC32_S390
+	tristate "CRC-32 algorithms"
+	depends on S390
+	select CRYPTO_HASH
+	select CRC32
+	help
+	  Select this option if you want to use hardware accelerated
+	  implementations of CRC algorithms.  With this option, you
+	  can optimize the computation of CRC-32 (IEEE 802.3 Ethernet)
+	  and CRC-32C (Castagnoli).
+
+	  It is available with IBM z13 or later.
+
 config CRYPTO_DEV_MV_CESA
 config CRYPTO_DEV_MV_CESA
 	tristate "Marvell's Cryptographic Engine"
 	tristate "Marvell's Cryptographic Engine"
 	depends on PLAT_ORION
 	depends on PLAT_ORION

+ 2 - 2
drivers/s390/block/dasd_eckd.c

@@ -228,7 +228,7 @@ check_XRC (struct ccw1         *de_ccw,
 	data->ga_extended |= 0x08; /* switch on 'Time Stamp Valid'   */
 	data->ga_extended |= 0x08; /* switch on 'Time Stamp Valid'   */
 	data->ga_extended |= 0x02; /* switch on 'Extended Parameter' */
 	data->ga_extended |= 0x02; /* switch on 'Extended Parameter' */
 
 
-	rc = get_sync_clock(&data->ep_sys_time);
+	rc = get_phys_clock(&data->ep_sys_time);
 	/* Ignore return code if sync clock is switched off. */
 	/* Ignore return code if sync clock is switched off. */
 	if (rc == -EOPNOTSUPP || rc == -EACCES)
 	if (rc == -EOPNOTSUPP || rc == -EACCES)
 		rc = 0;
 		rc = 0;
@@ -339,7 +339,7 @@ static int check_XRC_on_prefix(struct PFX_eckd_data *pfxdata,
 	pfxdata->define_extent.ga_extended |= 0x02; /* 'Extended Parameter' */
 	pfxdata->define_extent.ga_extended |= 0x02; /* 'Extended Parameter' */
 	pfxdata->validity.time_stamp = 1;	    /* 'Time Stamp Valid'   */
 	pfxdata->validity.time_stamp = 1;	    /* 'Time Stamp Valid'   */
 
 
-	rc = get_sync_clock(&pfxdata->define_extent.ep_sys_time);
+	rc = get_phys_clock(&pfxdata->define_extent.ep_sys_time);
 	/* Ignore return code if sync clock is switched off. */
 	/* Ignore return code if sync clock is switched off. */
 	if (rc == -EOPNOTSUPP || rc == -EACCES)
 	if (rc == -EOPNOTSUPP || rc == -EACCES)
 		rc = 0;
 		rc = 0;

+ 3 - 12
drivers/s390/char/keyboard.c

@@ -438,18 +438,9 @@ do_kdgkb_ioctl(struct kbd_data *kbd, struct kbsentry __user *u_kbs,
 			return -EFAULT;
 			return -EFAULT;
 		if (len > sizeof(u_kbs->kb_string))
 		if (len > sizeof(u_kbs->kb_string))
 			return -EINVAL;
 			return -EINVAL;
-		p = kmalloc(len, GFP_KERNEL);
-		if (!p)
-			return -ENOMEM;
-		if (copy_from_user(p, u_kbs->kb_string, len)) {
-			kfree(p);
-			return -EFAULT;
-		}
-		/*
-		 * Make sure the string is terminated by 0. User could have
-		 * modified it between us running strnlen_user() and copying it.
-		 */
-		p[len - 1] = 0;
+		p = memdup_user_nul(u_kbs->kb_string, len);
+		if (IS_ERR(p))
+			return PTR_ERR(p);
 		kfree(kbd->func_table[kb_func]);
 		kfree(kbd->func_table[kb_func]);
 		kbd->func_table[kb_func] = p;
 		kbd->func_table[kb_func] = p;
 		break;
 		break;

+ 2 - 1
drivers/s390/char/sclp_con.c

@@ -319,7 +319,8 @@ sclp_console_init(void)
 	int i;
 	int i;
 	int rc;
 	int rc;
 
 
-	if (!CONSOLE_IS_SCLP)
+	/* SCLP consoles are handled together */
+	if (!(CONSOLE_IS_SCLP || CONSOLE_IS_VT220))
 		return 0;
 		return 0;
 	rc = sclp_rw_init();
 	rc = sclp_rw_init();
 	if (rc)
 	if (rc)

+ 1 - 1
drivers/s390/char/sclp_config.c

@@ -47,7 +47,7 @@ static void sclp_cpu_capability_notify(struct work_struct *work)
 	int cpu;
 	int cpu;
 	struct device *dev;
 	struct device *dev;
 
 
-	s390_adjust_jiffies();
+	s390_update_cpu_mhz();
 	pr_info("CPU capability may have changed\n");
 	pr_info("CPU capability may have changed\n");
 	get_online_cpus();
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
 	for_each_online_cpu(cpu) {

+ 1 - 1
drivers/s390/char/zcore.c

@@ -185,7 +185,7 @@ static ssize_t zcore_reipl_write(struct file *filp, const char __user *buf,
 {
 {
 	if (ipl_block) {
 	if (ipl_block) {
 		diag308(DIAG308_SET, ipl_block);
 		diag308(DIAG308_SET, ipl_block);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 	}
 	}
 	return count;
 	return count;
 }
 }

+ 9 - 13
drivers/s390/cio/chp.c

@@ -47,8 +47,6 @@ static DEFINE_MUTEX(info_lock);
 /* Time after which channel-path status may be outdated. */
 /* Time after which channel-path status may be outdated. */
 static unsigned long chp_info_expires;
 static unsigned long chp_info_expires;
 
 
-/* Workqueue to perform pending configure tasks. */
-static struct workqueue_struct *chp_wq;
 static struct work_struct cfg_work;
 static struct work_struct cfg_work;
 
 
 /* Wait queue for configure completion events. */
 /* Wait queue for configure completion events. */
@@ -428,11 +426,14 @@ int chp_update_desc(struct channel_path *chp)
 	if (rc)
 	if (rc)
 		return rc;
 		return rc;
 
 
-	rc = chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
-	if (rc)
-		return rc;
+	/*
+	 * Fetching the following data is optional. Not all machines or
+	 * hypervisors implement the required chsc commands.
+	 */
+	chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
+	chsc_get_channel_measurement_chars(chp);
 
 
-	return chsc_get_channel_measurement_chars(chp);
+	return 0;
 }
 }
 
 
 /**
 /**
@@ -714,7 +715,7 @@ static void cfg_func(struct work_struct *work)
 		wake_up_interruptible(&cfg_wait_queue);
 		wake_up_interruptible(&cfg_wait_queue);
 		return;
 		return;
 	}
 	}
-	queue_work(chp_wq, &cfg_work);
+	schedule_work(&cfg_work);
 }
 }
 
 
 /**
 /**
@@ -732,7 +733,7 @@ void chp_cfg_schedule(struct chp_id chpid, int configure)
 	cfg_set_task(chpid, configure ? cfg_configure : cfg_deconfigure);
 	cfg_set_task(chpid, configure ? cfg_configure : cfg_deconfigure);
 	cfg_busy = 1;
 	cfg_busy = 1;
 	mutex_unlock(&cfg_lock);
 	mutex_unlock(&cfg_lock);
-	queue_work(chp_wq, &cfg_work);
+	schedule_work(&cfg_work);
 }
 }
 
 
 /**
 /**
@@ -766,11 +767,6 @@ static int __init chp_init(void)
 	ret = crw_register_handler(CRW_RSC_CPATH, chp_process_crw);
 	ret = crw_register_handler(CRW_RSC_CPATH, chp_process_crw);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
-	chp_wq = create_singlethread_workqueue("cio_chp");
-	if (!chp_wq) {
-		crw_unregister_handler(CRW_RSC_CPATH);
-		return -ENOMEM;
-	}
 	INIT_WORK(&cfg_work, cfg_func);
 	INIT_WORK(&cfg_work, cfg_func);
 	init_waitqueue_head(&cfg_wait_queue);
 	init_waitqueue_head(&cfg_wait_queue);
 	if (info_update())
 	if (info_update())

+ 1 - 1
drivers/s390/cio/chp.h

@@ -4,7 +4,7 @@
  */
  */
 
 
 #ifndef S390_CHP_H
 #ifndef S390_CHP_H
-#define S390_CHP_H S390_CHP_H
+#define S390_CHP_H
 
 
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/device.h>
 #include <linux/device.h>

+ 14 - 11
drivers/s390/cio/chsc.c

@@ -907,7 +907,8 @@ int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt,
 	struct chsc_scpd *scpd_area;
 	struct chsc_scpd *scpd_area;
 	int ccode, ret;
 	int ccode, ret;
 
 
-	if ((rfmt == 1) && !css_general_characteristics.fcs)
+	if ((rfmt == 1 || rfmt == 0) && c == 1 &&
+	    !css_general_characteristics.fcs)
 		return -EINVAL;
 		return -EINVAL;
 	if ((rfmt == 2) && !css_general_characteristics.cib)
 	if ((rfmt == 2) && !css_general_characteristics.cib)
 		return -EINVAL;
 		return -EINVAL;
@@ -939,7 +940,6 @@ EXPORT_SYMBOL_GPL(chsc_determine_channel_path_desc);
 int chsc_determine_base_channel_path_desc(struct chp_id chpid,
 int chsc_determine_base_channel_path_desc(struct chp_id chpid,
 					  struct channel_path_desc *desc)
 					  struct channel_path_desc *desc)
 {
 {
-	struct chsc_response_struct *chsc_resp;
 	struct chsc_scpd *scpd_area;
 	struct chsc_scpd *scpd_area;
 	unsigned long flags;
 	unsigned long flags;
 	int ret;
 	int ret;
@@ -949,8 +949,8 @@ int chsc_determine_base_channel_path_desc(struct chp_id chpid,
 	ret = chsc_determine_channel_path_desc(chpid, 0, 0, 0, 0, scpd_area);
 	ret = chsc_determine_channel_path_desc(chpid, 0, 0, 0, 0, scpd_area);
 	if (ret)
 	if (ret)
 		goto out;
 		goto out;
-	chsc_resp = (void *)&scpd_area->response;
-	memcpy(desc, &chsc_resp->data, sizeof(*desc));
+
+	memcpy(desc, scpd_area->data, sizeof(*desc));
 out:
 out:
 	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
 	return ret;
@@ -959,18 +959,17 @@ out:
 int chsc_determine_fmt1_channel_path_desc(struct chp_id chpid,
 int chsc_determine_fmt1_channel_path_desc(struct chp_id chpid,
 					  struct channel_path_desc_fmt1 *desc)
 					  struct channel_path_desc_fmt1 *desc)
 {
 {
-	struct chsc_response_struct *chsc_resp;
 	struct chsc_scpd *scpd_area;
 	struct chsc_scpd *scpd_area;
 	unsigned long flags;
 	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	spin_lock_irqsave(&chsc_page_lock, flags);
 	spin_lock_irqsave(&chsc_page_lock, flags);
 	scpd_area = chsc_page;
 	scpd_area = chsc_page;
-	ret = chsc_determine_channel_path_desc(chpid, 0, 0, 1, 0, scpd_area);
+	ret = chsc_determine_channel_path_desc(chpid, 0, 1, 1, 0, scpd_area);
 	if (ret)
 	if (ret)
 		goto out;
 		goto out;
-	chsc_resp = (void *)&scpd_area->response;
-	memcpy(desc, &chsc_resp->data, sizeof(*desc));
+
+	memcpy(desc, scpd_area->data, sizeof(*desc));
 out:
 out:
 	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
 	return ret;
@@ -1020,7 +1019,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
 	chp->cmg = -1;
 	chp->cmg = -1;
 
 
 	if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
 	if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
-		return 0;
+		return -EINVAL;
 
 
 	spin_lock_irq(&chsc_page_lock);
 	spin_lock_irq(&chsc_page_lock);
 	memset(chsc_page, 0, PAGE_SIZE);
 	memset(chsc_page, 0, PAGE_SIZE);
@@ -1176,7 +1175,7 @@ exit:
 EXPORT_SYMBOL_GPL(css_general_characteristics);
 EXPORT_SYMBOL_GPL(css_general_characteristics);
 EXPORT_SYMBOL_GPL(css_chsc_characteristics);
 EXPORT_SYMBOL_GPL(css_chsc_characteristics);
 
 
-int chsc_sstpc(void *page, unsigned int op, u16 ctrl)
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta)
 {
 {
 	struct {
 	struct {
 		struct chsc_header request;
 		struct chsc_header request;
@@ -1186,7 +1185,9 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl)
 		unsigned int ctrl : 16;
 		unsigned int ctrl : 16;
 		unsigned int rsvd2[5];
 		unsigned int rsvd2[5];
 		struct chsc_header response;
 		struct chsc_header response;
-		unsigned int rsvd3[7];
+		unsigned int rsvd3[3];
+		u64 clock_delta;
+		unsigned int rsvd4[2];
 	} __attribute__ ((packed)) *rr;
 	} __attribute__ ((packed)) *rr;
 	int rc;
 	int rc;
 
 
@@ -1200,6 +1201,8 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl)
 	if (rc)
 	if (rc)
 		return -EIO;
 		return -EIO;
 	rc = (rr->response.code == 0x0001) ? 0 : -EIO;
 	rc = (rr->response.code == 0x0001) ? 0 : -EIO;
+	if (clock_delta)
+		*clock_delta = rr->clock_delta;
 	return rc;
 	return rc;
 }
 }
 
 

Some files were not shown because too many files changed in this diff