7 years ago · 85eae57bbb
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4391,6 +4391,22 @@ all such vmexits.
 
				 
			
 
				 Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
			
 
				 
			
 
				+7.14 KVM_CAP_S390_HPAGE_1M
			
 
				+
			
 
				+Architectures: s390
			
 
				+Parameters: none
			
 
				+Returns: 0 on success, -EINVAL if hpage module parameter was not set
			
 
				+	 or cmma is enabled
			
 
				+
			
 
				+With this capability the KVM support for memory backing with 1m pages
			
 
				+through hugetlbfs can be enabled for a VM. After the capability is
			
 
				+enabled, cmma can't be enabled anymore and pfmfi and the storage key
			
 
				+interpretation are disabled. If cmma has already been enabled or the
			
 
				+hpage module parameter is not set to 1, -EINVAL is returned.
			
 
				+
			
 
				+While it is generally possible to create a huge page backed VM without
			
 
				+this capability, the VM will not be able to run.
			
 
				+
			
 
				 8. Other capabilities.
			
 
				 ----------------------
			
 
				 
			
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -9,6 +9,14 @@
 
				 #ifndef _ASM_S390_GMAP_H
			
 
				 #define _ASM_S390_GMAP_H
			
 
				 
			
 
				+/* Generic bits for GMAP notification on DAT table entry changes. */
			
 
				+#define GMAP_NOTIFY_SHADOW	0x2
			
 
				+#define GMAP_NOTIFY_MPROT	0x1
			
 
				+
			
 
				+/* Status bits only for huge segment entries */
			
 
				+#define _SEGMENT_ENTRY_GMAP_IN		0x8000	/* invalidation notify bit */
			
 
				+#define _SEGMENT_ENTRY_GMAP_UC		0x4000	/* dirty (migration) */
			
 
				+
			
 
				 /**
			
 
				  * struct gmap_struct - guest address space
			
 
				  * @list: list head for the mm->context gmap list
			
@@ -132,4 +140,6 @@ void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *,
 
				 int gmap_mprotect_notify(struct gmap *, unsigned long start,
			
 
				 			 unsigned long len, int prot);
			
 
				 
			
 
				+void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
			
 
				+			     unsigned long gaddr, unsigned long vmaddr);
			
 
				 #endif /* _ASM_S390_GMAP_H */
			
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -37,7 +37,10 @@ static inline int prepare_hugepage_range(struct file *file,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-#define arch_clear_hugepage_flags(page)		do { } while (0)
			
 
				+static inline void arch_clear_hugepage_flags(struct page *page)
			
 
				+{
			
 
				+	clear_bit(PG_arch_1, &page->flags);
			
 
				+}
			
 
				 
			
 
				 static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
			
 
				 				  pte_t *ptep, unsigned long sz)
			
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -269,6 +269,7 @@ struct kvm_s390_sie_block {
 
				 	__u8	reserved1c0[8];		/* 0x01c0 */
			
 
				 #define ECD_HOSTREGMGMT	0x20000000
			
 
				 #define ECD_MEF		0x08000000
			
 
				+#define ECD_ETOKENF	0x02000000
			
 
				 	__u32	ecd;			/* 0x01c8 */
			
 
				 	__u8	reserved1cc[18];	/* 0x01cc */
			
 
				 	__u64	pp;			/* 0x01de */
			
@@ -655,6 +656,7 @@ struct kvm_vcpu_arch {
 
				 	seqcount_t cputm_seqcount;
			
 
				 	__u64 cputm_start;
			
 
				 	bool gs_enabled;
			
 
				+	bool skey_enabled;
			
 
				 };
			
 
				 
			
 
				 struct kvm_vm_stat {
			
@@ -793,12 +795,6 @@ struct kvm_s390_vsie {
 
				 	struct page *pages[KVM_MAX_VCPUS];
			
 
				 };
			
 
				 
			
 
				-struct kvm_s390_migration_state {
			
 
				-	unsigned long bitmap_size;	/* in bits (number of guest pages) */
			
 
				-	atomic64_t dirty_pages;		/* number of dirty pages */
			
 
				-	unsigned long *pgste_bitmap;
			
 
				-};
			
 
				-
			
 
				 struct kvm_arch{
			
 
				 	void *sca;
			
 
				 	int use_esca;
			
@@ -828,7 +824,8 @@ struct kvm_arch{
 
				 	struct kvm_s390_vsie vsie;
			
 
				 	u8 epdx;
			
 
				 	u64 epoch;
			
 
				-	struct kvm_s390_migration_state *migration_state;
			
 
				+	int migration_mode;
			
 
				+	atomic64_t cmma_dirty_pages;
			
 
				 	/* subset of available cpu features enabled by user space */
			
 
				 	DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
			
 
				 	struct kvm_s390_gisa *gisa;
			
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -24,6 +24,8 @@ typedef struct {
 
				 	unsigned int uses_skeys:1;
			
 
				 	/* The mmu context uses CMM. */
			
 
				 	unsigned int uses_cmm:1;
			
 
				+	/* The gmaps associated with this context are allowed to use huge pages. */
			
 
				+	unsigned int allow_gmap_hpage_1m:1;
			
 
				 } mm_context_t;
			
 
				 
			
 
				 #define INIT_MM_CONTEXT(name)						   \
			
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -32,6 +32,7 @@ static inline int init_new_context(struct task_struct *tsk,
 
				 	mm->context.has_pgste = 0;
			
 
				 	mm->context.uses_skeys = 0;
			
 
				 	mm->context.uses_cmm = 0;
			
 
				+	mm->context.allow_gmap_hpage_1m = 0;
			
 
				 #endif
			
 
				 	switch (mm->context.asce_limit) {
			
 
				 	case _REGION2_SIZE:
			
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -268,8 +268,10 @@ static inline int is_module_addr(void *addr)
 
				 #define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
			
 
				 
			
 
				 /* Bits in the segment table entry */
			
 
				-#define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
			
 
				-#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
			
 
				+#define _SEGMENT_ENTRY_BITS			0xfffffffffffffe33UL
			
 
				+#define _SEGMENT_ENTRY_BITS_LARGE		0xfffffffffff0ff33UL
			
 
				+#define _SEGMENT_ENTRY_HARDWARE_BITS		0xfffffffffffffe30UL
			
 
				+#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE	0xfffffffffff00730UL
			
 
				 #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
			
 
				 #define _SEGMENT_ENTRY_ORIGIN	~0x7ffUL/* page table origin		    */
			
 
				 #define _SEGMENT_ENTRY_PROTECT	0x200	/* segment protection bit	    */
			
@@ -1101,7 +1103,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
 
				 		    pte_t *sptep, pte_t *tptep, pte_t pte);
			
 
				 void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep);
			
 
				 
			
 
				-bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
			
 
				+bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address,
			
 
				+			    pte_t *ptep);
			
 
				 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
			
 
				 			  unsigned char key, bool nq);
			
 
				 int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
			
@@ -1116,6 +1119,10 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long addr,
 
				 int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
			
 
				 int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
			
 
				 			unsigned long *oldpte, unsigned long *oldpgste);
			
 
				+void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr);
			
 
				+void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr);
			
 
				+void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr);
			
 
				+void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
			
 
				 
			
 
				 /*
			
 
				  * Certain architectures need to do special things when PTEs
			
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -4,7 +4,7 @@
 
				 /*
			
 
				  * KVM s390 specific structures and definitions
			
 
				  *
			
 
				- * Copyright IBM Corp. 2008
			
 
				+ * Copyright IBM Corp. 2008, 2018
			
 
				  *
			
 
				  *    Author(s): Carsten Otte <cotte@de.ibm.com>
			
 
				  *               Christian Borntraeger <borntraeger@de.ibm.com>
			
@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch {
 
				 #define KVM_SYNC_FPRS   (1UL << 8)
			
 
				 #define KVM_SYNC_GSCB   (1UL << 9)
			
 
				 #define KVM_SYNC_BPBC   (1UL << 10)
			
 
				+#define KVM_SYNC_ETOKEN (1UL << 11)
			
 
				 /* length and alignment of the sdnx as a power of two */
			
 
				 #define SDNXC 8
			
 
				 #define SDNXL (1UL << SDNXC)
			
@@ -258,6 +259,8 @@ struct kvm_sync_regs {
 
				 		struct {
			
 
				 			__u64 reserved1[2];
			
 
				 			__u64 gscb[4];
			
 
				+			__u64 etoken;
			
 
				+			__u64 etoken_extension;
			
 
				 		};
			
 
				 	};
			
 
				 };
			
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -172,6 +172,10 @@ static int nested;
 
				 module_param(nested, int, S_IRUGO);
			
 
				 MODULE_PARM_DESC(nested, "Nested virtualization support");
			
 
				 
			
 
				+/* allow 1m huge page guest backing, if !nested */
			
 
				+static int hpage;
			
 
				+module_param(hpage, int, 0444);
			
 
				+MODULE_PARM_DESC(hpage, "1m huge page backing support");
			
 
				 
			
 
				 /*
			
 
				  * For now we handle at most 16 double words as this is what the s390 base
			
@@ -475,6 +479,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
				 	case KVM_CAP_S390_AIS_MIGRATION:
			
 
				 		r = 1;
			
 
				 		break;
			
 
				+	case KVM_CAP_S390_HPAGE_1M:
			
 
				+		r = 0;
			
 
				+		if (hpage)
			
 
				+			r = 1;
			
 
				+		break;
			
 
				 	case KVM_CAP_S390_MEM_OP:
			
 
				 		r = MEM_OP_MAX_SIZE;
			
 
				 		break;
			
@@ -511,19 +520,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
				 }
			
 
				 
			
 
				 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
			
 
				-					struct kvm_memory_slot *memslot)
			
 
				+				    struct kvm_memory_slot *memslot)
			
 
				 {
			
 
				+	int i;
			
 
				 	gfn_t cur_gfn, last_gfn;
			
 
				-	unsigned long address;
			
 
				+	unsigned long gaddr, vmaddr;
			
 
				 	struct gmap *gmap = kvm->arch.gmap;
			
 
				+	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
			
 
				 
			
 
				-	/* Loop over all guest pages */
			
 
				+	/* Loop over all guest segments */
			
 
				+	cur_gfn = memslot->base_gfn;
			
 
				 	last_gfn = memslot->base_gfn + memslot->npages;
			
 
				-	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
			
 
				-		address = gfn_to_hva_memslot(memslot, cur_gfn);
			
 
				+	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
			
 
				+		gaddr = gfn_to_gpa(cur_gfn);
			
 
				+		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
			
 
				+		if (kvm_is_error_hva(vmaddr))
			
 
				+			continue;
			
 
				+
			
 
				+		bitmap_zero(bitmap, _PAGE_ENTRIES);
			
 
				+		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
			
 
				+		for (i = 0; i < _PAGE_ENTRIES; i++) {
			
 
				+			if (test_bit(i, bitmap))
			
 
				+				mark_page_dirty(kvm, cur_gfn + i);
			
 
				+		}
			
 
				 
			
 
				-		if (test_and_clear_guest_dirty(gmap->mm, address))
			
 
				-			mark_page_dirty(kvm, cur_gfn);
			
 
				 		if (fatal_signal_pending(current))
			
 
				 			return;
			
 
				 		cond_resched();
			
@@ -667,6 +687,27 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 
				 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
			
 
				 			 r ? "(not available)" : "(success)");
			
 
				 		break;
			
 
				+	case KVM_CAP_S390_HPAGE_1M:
			
 
				+		mutex_lock(&kvm->lock);
			
 
				+		if (kvm->created_vcpus)
			
 
				+			r = -EBUSY;
			
 
				+		else if (!hpage || kvm->arch.use_cmma)
			
 
				+			r = -EINVAL;
			
 
				+		else {
			
 
				+			r = 0;
			
 
				+			kvm->mm->context.allow_gmap_hpage_1m = 1;
			
 
				+			/*
			
 
				+			 * We might have to create fake 4k page
			
 
				+			 * tables. To avoid that the hardware works on
			
 
				+			 * stale PGSTEs, we emulate these instructions.
			
 
				+			 */
			
 
				+			kvm->arch.use_skf = 0;
			
 
				+			kvm->arch.use_pfmfi = 0;
			
 
				+		}
			
 
				+		mutex_unlock(&kvm->lock);
			
 
				+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
			
 
				+			 r ? "(not available)" : "(success)");
			
 
				+		break;
			
 
				 	case KVM_CAP_S390_USER_STSI:
			
 
				 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
			
 
				 		kvm->arch.user_stsi = 1;
			
@@ -714,10 +755,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 
				 		if (!sclp.has_cmma)
			
 
				 			break;
			
 
				 
			
 
				-		ret = -EBUSY;
			
 
				 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
			
 
				 		mutex_lock(&kvm->lock);
			
 
				-		if (!kvm->created_vcpus) {
			
 
				+		if (kvm->created_vcpus)
			
 
				+			ret = -EBUSY;
			
 
				+		else if (kvm->mm->context.allow_gmap_hpage_1m)
			
 
				+			ret = -EINVAL;
			
 
				+		else {
			
 
				 			kvm->arch.use_cmma = 1;
			
 
				 			/* Not compatible with cmma. */
			
 
				 			kvm->arch.use_pfmfi = 0;
			
@@ -862,54 +906,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 
				  */
			
 
				 static int kvm_s390_vm_start_migration(struct kvm *kvm)
			
 
				 {
			
 
				-	struct kvm_s390_migration_state *mgs;
			
 
				 	struct kvm_memory_slot *ms;
			
 
				-	/* should be the only one */
			
 
				 	struct kvm_memslots *slots;
			
 
				-	unsigned long ram_pages;
			
 
				+	unsigned long ram_pages = 0;
			
 
				 	int slotnr;
			
 
				 
			
 
				 	/* migration mode already enabled */
			
 
				-	if (kvm->arch.migration_state)
			
 
				+	if (kvm->arch.migration_mode)
			
 
				 		return 0;
			
 
				-
			
 
				 	slots = kvm_memslots(kvm);
			
 
				 	if (!slots || !slots->used_slots)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
			
 
				-	if (!mgs)
			
 
				-		return -ENOMEM;
			
 
				-	kvm->arch.migration_state = mgs;
			
 
				-
			
 
				-	if (kvm->arch.use_cmma) {
			
 
				+	if (!kvm->arch.use_cmma) {
			
 
				+		kvm->arch.migration_mode = 1;
			
 
				+		return 0;
			
 
				+	}
			
 
				+	/* mark all the pages in active slots as dirty */
			
 
				+	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
			
 
				+		ms = slots->memslots + slotnr;
			
 
				 		/*
			
 
				-		 * Get the first slot. They are reverse sorted by base_gfn, so
			
 
				-		 * the first slot is also the one at the end of the address
			
 
				-		 * space. We have verified above that at least one slot is
			
 
				-		 * present.
			
 
				+		 * The second half of the bitmap is only used on x86,
			
 
				+		 * and would be wasted otherwise, so we put it to good
			
 
				+		 * use here to keep track of the state of the storage
			
 
				+		 * attributes.
			
 
				 		 */
			
 
				-		ms = slots->memslots;
			
 
				-		/* round up so we only use full longs */
			
 
				-		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
			
 
				-		/* allocate enough bytes to store all the bits */
			
 
				-		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
			
 
				-		if (!mgs->pgste_bitmap) {
			
 
				-			kfree(mgs);
			
 
				-			kvm->arch.migration_state = NULL;
			
 
				-			return -ENOMEM;
			
 
				-		}
			
 
				-
			
 
				-		mgs->bitmap_size = ram_pages;
			
 
				-		atomic64_set(&mgs->dirty_pages, ram_pages);
			
 
				-		/* mark all the pages in active slots as dirty */
			
 
				-		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
			
 
				-			ms = slots->memslots + slotnr;
			
 
				-			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
			
 
				-		}
			
 
				-
			
 
				-		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
			
 
				+		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
			
 
				+		ram_pages += ms->npages;
			
 
				 	}
			
 
				+	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
			
 
				+	kvm->arch.migration_mode = 1;
			
 
				+	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -919,21 +946,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
 
				  */
			
 
				 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
			
 
				 {
			
 
				-	struct kvm_s390_migration_state *mgs;
			
 
				-
			
 
				 	/* migration mode already disabled */
			
 
				-	if (!kvm->arch.migration_state)
			
 
				+	if (!kvm->arch.migration_mode)
			
 
				 		return 0;
			
 
				-	mgs = kvm->arch.migration_state;
			
 
				-	kvm->arch.migration_state = NULL;
			
 
				-
			
 
				-	if (kvm->arch.use_cmma) {
			
 
				+	kvm->arch.migration_mode = 0;
			
 
				+	if (kvm->arch.use_cmma)
			
 
				 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
			
 
				-		/* We have to wait for the essa emulation to finish */
			
 
				-		synchronize_srcu(&kvm->srcu);
			
 
				-		vfree(mgs->pgste_bitmap);
			
 
				-	}
			
 
				-	kfree(mgs);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -961,7 +979,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm,
 
				 static int kvm_s390_vm_get_migration(struct kvm *kvm,
			
 
				 				     struct kvm_device_attr *attr)
			
 
				 {
			
 
				-	u64 mig = (kvm->arch.migration_state != NULL);
			
 
				+	u64 mig = kvm->arch.migration_mode;
			
 
				 
			
 
				 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
			
 
				 		return -ENXIO;
			
@@ -1540,6 +1558,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 
				 	uint8_t *keys;
			
 
				 	uint64_t hva;
			
 
				 	int srcu_idx, i, r = 0;
			
 
				+	bool unlocked;
			
 
				 
			
 
				 	if (args->flags != 0)
			
 
				 		return -EINVAL;
			
@@ -1564,9 +1583,11 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 
				 	if (r)
			
 
				 		goto out;
			
 
				 
			
 
				+	i = 0;
			
 
				 	down_read(&current->mm->mmap_sem);
			
 
				 	srcu_idx = srcu_read_lock(&kvm->srcu);
			
 
				-	for (i = 0; i < args->count; i++) {
			
 
				+        while (i < args->count) {
			
 
				+		unlocked = false;
			
 
				 		hva = gfn_to_hva(kvm, args->start_gfn + i);
			
 
				 		if (kvm_is_error_hva(hva)) {
			
 
				 			r = -EFAULT;
			
@@ -1580,8 +1601,14 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 
				 		}
			
 
				 
			
 
				 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
			
 
				-		if (r)
			
 
				-			break;
			
 
				+		if (r) {
			
 
				+			r = fixup_user_fault(current, current->mm, hva,
			
 
				+					     FAULT_FLAG_WRITE, &unlocked);
			
 
				+			if (r)
			
 
				+				break;
			
 
				+		}
			
 
				+		if (!r)
			
 
				+			i++;
			
 
				 	}
			
 
				 	srcu_read_unlock(&kvm->srcu, srcu_idx);
			
 
				 	up_read(&current->mm->mmap_sem);
			
@@ -1599,6 +1626,134 @@ out:
 
				 /* for consistency */
			
 
				 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
			
 
				 
			
 
				+/*
			
 
				+ * Similar to gfn_to_memslot, but returns the index of a memslot also when the
			
 
				+ * address falls in a hole. In that case the index of one of the memslots
			
 
				+ * bordering the hole is returned.
			
 
				+ */
			
 
				+static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
			
 
				+{
			
 
				+	int start = 0, end = slots->used_slots;
			
 
				+	int slot = atomic_read(&slots->lru_slot);
			
 
				+	struct kvm_memory_slot *memslots = slots->memslots;
			
 
				+
			
 
				+	if (gfn >= memslots[slot].base_gfn &&
			
 
				+	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
			
 
				+		return slot;
			
 
				+
			
 
				+	while (start < end) {
			
 
				+		slot = start + (end - start) / 2;
			
 
				+
			
 
				+		if (gfn >= memslots[slot].base_gfn)
			
 
				+			end = slot;
			
 
				+		else
			
 
				+			start = slot + 1;
			
 
				+	}
			
 
				+
			
 
				+	if (gfn >= memslots[start].base_gfn &&
			
 
				+	    gfn < memslots[start].base_gfn + memslots[start].npages) {
			
 
				+		atomic_set(&slots->lru_slot, start);
			
 
				+	}
			
 
				+
			
 
				+	return start;
			
 
				+}
			
 
				+
			
 
				+static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
			
 
				+			      u8 *res, unsigned long bufsize)
			
 
				+{
			
 
				+	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
			
 
				+
			
 
				+	args->count = 0;
			
 
				+	while (args->count < bufsize) {
			
 
				+		hva = gfn_to_hva(kvm, cur_gfn);
			
 
				+		/*
			
 
				+		 * We return an error if the first value was invalid, but we
			
 
				+		 * return successfully if at least one value was copied.
			
 
				+		 */
			
 
				+		if (kvm_is_error_hva(hva))
			
 
				+			return args->count ? 0 : -EFAULT;
			
 
				+		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
			
 
				+			pgstev = 0;
			
 
				+		res[args->count++] = (pgstev >> 24) & 0x43;
			
 
				+		cur_gfn++;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
			
 
				+					      unsigned long cur_gfn)
			
 
				+{
			
 
				+	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
			
 
				+	struct kvm_memory_slot *ms = slots->memslots + slotidx;
			
 
				+	unsigned long ofs = cur_gfn - ms->base_gfn;
			
 
				+
			
 
				+	if (ms->base_gfn + ms->npages <= cur_gfn) {
			
 
				+		slotidx--;
			
 
				+		/* If we are above the highest slot, wrap around */
			
 
				+		if (slotidx < 0)
			
 
				+			slotidx = slots->used_slots - 1;
			
 
				+
			
 
				+		ms = slots->memslots + slotidx;
			
 
				+		ofs = 0;
			
 
				+	}
			
 
				+	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
			
 
				+	while ((slotidx > 0) && (ofs >= ms->npages)) {
			
 
				+		slotidx--;
			
 
				+		ms = slots->memslots + slotidx;
			
 
				+		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
			
 
				+	}
			
 
				+	return ms->base_gfn + ofs;
			
 
				+}
			
 
				+
			
 
				+static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
			
 
				+			     u8 *res, unsigned long bufsize)
			
 
				+{
			
 
				+	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
			
 
				+	struct kvm_memslots *slots = kvm_memslots(kvm);
			
 
				+	struct kvm_memory_slot *ms;
			
 
				+
			
 
				+	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
			
 
				+	ms = gfn_to_memslot(kvm, cur_gfn);
			
 
				+	args->count = 0;
			
 
				+	args->start_gfn = cur_gfn;
			
 
				+	if (!ms)
			
 
				+		return 0;
			
 
				+	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
			
 
				+	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
			
 
				+
			
 
				+	while (args->count < bufsize) {
			
 
				+		hva = gfn_to_hva(kvm, cur_gfn);
			
 
				+		if (kvm_is_error_hva(hva))
			
 
				+			return 0;
			
 
				+		/* Decrement only if we actually flipped the bit to 0 */
			
 
				+		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
			
 
				+			atomic64_dec(&kvm->arch.cmma_dirty_pages);
			
 
				+		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
			
 
				+			pgstev = 0;
			
 
				+		/* Save the value */
			
 
				+		res[args->count++] = (pgstev >> 24) & 0x43;
			
 
				+		/* If the next bit is too far away, stop. */
			
 
				+		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
			
 
				+			return 0;
			
 
				+		/* If we reached the previous "next", find the next one */
			
 
				+		if (cur_gfn == next_gfn)
			
 
				+			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
			
 
				+		/* Reached the end of memory or of the buffer, stop */
			
 
				+		if ((next_gfn >= mem_end) ||
			
 
				+		    (next_gfn - args->start_gfn >= bufsize))
			
 
				+			return 0;
			
 
				+		cur_gfn++;
			
 
				+		/* Reached the end of the current memslot, take the next one. */
			
 
				+		if (cur_gfn - ms->base_gfn >= ms->npages) {
			
 
				+			ms = gfn_to_memslot(kvm, cur_gfn);
			
 
				+			if (!ms)
			
 
				+				return 0;
			
 
				+		}
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This function searches for the next page with dirty CMMA attributes, and
			
 
				  * saves the attributes in the buffer up to either the end of the buffer or
			
@@ -1610,22 +1765,18 @@ out:
 
				 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
			
 
				 				  struct kvm_s390_cmma_log *args)
			
 
				 {
			
 
				-	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
			
 
				-	unsigned long bufsize, hva, pgstev, i, next, cur;
			
 
				-	int srcu_idx, peek, r = 0, rr;
			
 
				-	u8 *res;
			
 
				-
			
 
				-	cur = args->start_gfn;
			
 
				-	i = next = pgstev = 0;
			
 
				+	unsigned long bufsize;
			
 
				+	int srcu_idx, peek, ret;
			
 
				+	u8 *values;
			
 
				 
			
 
				-	if (unlikely(!kvm->arch.use_cmma))
			
 
				+	if (!kvm->arch.use_cmma)
			
 
				 		return -ENXIO;
			
 
				 	/* Invalid/unsupported flags were specified */
			
 
				 	if (args->flags & ~KVM_S390_CMMA_PEEK)
			
 
				 		return -EINVAL;
			
 
				 	/* Migration mode query, and we are not doing a migration */
			
 
				 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
			
 
				-	if (!peek && !s)
			
 
				+	if (!peek && !kvm->arch.migration_mode)
			
 
				 		return -EINVAL;
			
 
				 	/* CMMA is disabled or was not used, or the buffer has length zero */
			
 
				 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
			
@@ -1633,74 +1784,35 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
 
				 		memset(args, 0, sizeof(*args));
			
 
				 		return 0;
			
 
				 	}
			
 
				-
			
 
				-	if (!peek) {
			
 
				-		/* We are not peeking, and there are no dirty pages */
			
 
				-		if (!atomic64_read(&s->dirty_pages)) {
			
 
				-			memset(args, 0, sizeof(*args));
			
 
				-			return 0;
			
 
				-		}
			
 
				-		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
			
 
				-				    args->start_gfn);
			
 
				-		if (cur >= s->bitmap_size)	/* nothing found, loop back */
			
 
				-			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
			
 
				-		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
			
 
				-			memset(args, 0, sizeof(*args));
			
 
				-			return 0;
			
 
				-		}
			
 
				-		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
			
 
				+	/* We are not peeking, and there are no dirty pages */
			
 
				+	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
			
 
				+		memset(args, 0, sizeof(*args));
			
 
				+		return 0;
			
 
				 	}
			
 
				 
			
 
				-	res = vmalloc(bufsize);
			
 
				-	if (!res)
			
 
				+	values = vmalloc(bufsize);
			
 
				+	if (!values)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	args->start_gfn = cur;
			
 
				-
			
 
				 	down_read(&kvm->mm->mmap_sem);
			
 
				 	srcu_idx = srcu_read_lock(&kvm->srcu);
			
 
				-	while (i < bufsize) {
			
 
				-		hva = gfn_to_hva(kvm, cur);
			
 
				-		if (kvm_is_error_hva(hva)) {
			
 
				-			r = -EFAULT;
			
 
				-			break;
			
 
				-		}
			
 
				-		/* decrement only if we actually flipped the bit to 0 */
			
 
				-		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
			
 
				-			atomic64_dec(&s->dirty_pages);
			
 
				-		r = get_pgste(kvm->mm, hva, &pgstev);
			
 
				-		if (r < 0)
			
 
				-			pgstev = 0;
			
 
				-		/* save the value */
			
 
				-		res[i++] = (pgstev >> 24) & 0x43;
			
 
				-		/*
			
 
				-		 * if the next bit is too far away, stop.
			
 
				-		 * if we reached the previous "next", find the next one
			
 
				-		 */
			
 
				-		if (!peek) {
			
 
				-			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
			
 
				-				break;
			
 
				-			if (cur == next)
			
 
				-				next = find_next_bit(s->pgste_bitmap,
			
 
				-						     s->bitmap_size, cur + 1);
			
 
				-		/* reached the end of the bitmap or of the buffer, stop */
			
 
				-			if ((next >= s->bitmap_size) ||
			
 
				-			    (next >= args->start_gfn + bufsize))
			
 
				-				break;
			
 
				-		}
			
 
				-		cur++;
			
 
				-	}
			
 
				+	if (peek)
			
 
				+		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
			
 
				+	else
			
 
				+		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
			
 
				 	srcu_read_unlock(&kvm->srcu, srcu_idx);
			
 
				 	up_read(&kvm->mm->mmap_sem);
			
 
				-	args->count = i;
			
 
				-	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
			
 
				 
			
 
				-	rr = copy_to_user((void __user *)args->values, res, args->count);
			
 
				-	if (rr)
			
 
				-		r = -EFAULT;
			
 
				+	if (kvm->arch.migration_mode)
			
 
				+		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
			
 
				+	else
			
 
				+		args->remaining = 0;
			
 
				 
			
 
				-	vfree(res);
			
 
				-	return r;
			
 
				+	if (copy_to_user((void __user *)args->values, values, args->count))
			
 
				+		ret = -EFAULT;
			
 
				+
			
 
				+	vfree(values);
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2139,10 +2251,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 
				 	kvm_s390_destroy_adapters(kvm);
			
 
				 	kvm_s390_clear_float_irqs(kvm);
			
 
				 	kvm_s390_vsie_destroy(kvm);
			
 
				-	if (kvm->arch.migration_state) {
			
 
				-		vfree(kvm->arch.migration_state->pgste_bitmap);
			
 
				-		kfree(kvm->arch.migration_state);
			
 
				-	}
			
 
				 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
			
 
				 }
			
 
				 
			
@@ -2300,6 +2408,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
				 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
			
 
				 	if (test_kvm_facility(vcpu->kvm, 133))
			
 
				 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
			
 
				+	if (test_kvm_facility(vcpu->kvm, 156))
			
 
				+		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
			
 
				 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
			
 
				 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
			
 
				 	 */
			
@@ -2549,7 +2659,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
				 	}
			
 
				 	if (test_kvm_facility(vcpu->kvm, 139))
			
 
				 		vcpu->arch.sie_block->ecd |= ECD_MEF;
			
 
				-
			
 
				+	if (test_kvm_facility(vcpu->kvm, 156))
			
 
				+		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
			
 
				 	if (vcpu->arch.sie_block->gd) {
			
 
				 		vcpu->arch.sie_block->eca |= ECA_AIV;
			
 
				 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
			
@@ -3467,6 +3578,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		}
			
 
				 		preempt_enable();
			
 
				 	}
			
 
				+	/* SIE will load etoken directly from SDNX and therefore kvm_run */
			
 
				 
			
 
				 	kvm_run->kvm_dirty_regs = 0;
			
 
				 }
			
@@ -3506,7 +3618,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 			__ctl_clear_bit(2, 4);
			
 
				 		vcpu->arch.host_gscb = NULL;
			
 
				 	}
			
 
				-
			
 
				+	/* SIE will save etoken directly into SDNX and therefore kvm_run */
			
 
				 }
			
 
				 
			
 
				 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
@@ -4082,6 +4194,11 @@ static int __init kvm_s390_init(void)
 
				 		return -ENODEV;
			
 
				 	}
			
 
				 
			
 
				+	if (nested && hpage) {
			
 
				+		pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				 	for (i = 0; i < 16; i++)
			
 
				 		kvm_s390_fac_base[i] |=
			
 
				 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
			
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -205,13 +205,10 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 
				 int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	int rc;
			
 
				-	struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
			
 
				 
			
 
				 	trace_kvm_s390_skey_related_inst(vcpu);
			
 
				 	/* Already enabled? */
			
 
				-	if (vcpu->kvm->arch.use_skf &&
			
 
				-	    !(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
			
 
				-	    !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
			
 
				+	if (vcpu->arch.skey_enabled)
			
 
				 		return 0;
			
 
				 
			
 
				 	rc = s390_enable_skey();
			
@@ -222,9 +219,10 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
 
				 	if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
			
 
				 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
			
 
				 	if (!vcpu->kvm->arch.use_skf)
			
 
				-		sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
			
 
				+		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
			
 
				 	else
			
 
				-		sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
			
 
				+		vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
			
 
				+	vcpu->arch.skey_enabled = true;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -246,9 +244,10 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
 
				 
			
 
				 static int handle_iske(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	unsigned long addr;
			
 
				+	unsigned long gaddr, vmaddr;
			
 
				 	unsigned char key;
			
 
				 	int reg1, reg2;
			
 
				+	bool unlocked;
			
 
				 	int rc;
			
 
				 
			
 
				 	vcpu->stat.instruction_iske++;
			
@@ -262,18 +261,28 @@ static int handle_iske(struct kvm_vcpu *vcpu)
 
				 
			
 
				 	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
			
 
				 
			
 
				-	addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
			
 
				-	addr = kvm_s390_logical_to_effective(vcpu, addr);
			
 
				-	addr = kvm_s390_real_to_abs(vcpu, addr);
			
 
				-	addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
			
 
				-	if (kvm_is_error_hva(addr))
			
 
				+	gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
			
 
				+	gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
			
 
				+	gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
			
 
				+	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
			
 
				+	if (kvm_is_error_hva(vmaddr))
			
 
				 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
			
 
				-
			
 
				+retry:
			
 
				+	unlocked = false;
			
 
				 	down_read(&current->mm->mmap_sem);
			
 
				-	rc = get_guest_storage_key(current->mm, addr, &key);
			
 
				-	up_read(&current->mm->mmap_sem);
			
 
				+	rc = get_guest_storage_key(current->mm, vmaddr, &key);
			
 
				+
			
 
				+	if (rc) {
			
 
				+		rc = fixup_user_fault(current, current->mm, vmaddr,
			
 
				+				      FAULT_FLAG_WRITE, &unlocked);
			
 
				+		if (!rc) {
			
 
				+			up_read(&current->mm->mmap_sem);
			
 
				+			goto retry;
			
 
				+		}
			
 
				+	}
			
 
				 	if (rc)
			
 
				 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
			
 
				+	up_read(&current->mm->mmap_sem);
			
 
				 	vcpu->run->s.regs.gprs[reg1] &= ~0xff;
			
 
				 	vcpu->run->s.regs.gprs[reg1] |= key;
			
 
				 	return 0;
			
@@ -281,8 +290,9 @@ static int handle_iske(struct kvm_vcpu *vcpu)
 
				 
			
 
				 static int handle_rrbe(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	unsigned long addr;
			
 
				+	unsigned long vmaddr, gaddr;
			
 
				 	int reg1, reg2;
			
 
				+	bool unlocked;
			
 
				 	int rc;
			
 
				 
			
 
				 	vcpu->stat.instruction_rrbe++;
			
@@ -296,19 +306,27 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
 
				 
			
 
				 	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
			
 
				 
			
 
				-	addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
			
 
				-	addr = kvm_s390_logical_to_effective(vcpu, addr);
			
 
				-	addr = kvm_s390_real_to_abs(vcpu, addr);
			
 
				-	addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
			
 
				-	if (kvm_is_error_hva(addr))
			
 
				+	gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
			
 
				+	gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
			
 
				+	gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
			
 
				+	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
			
 
				+	if (kvm_is_error_hva(vmaddr))
			
 
				 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
			
 
				-
			
 
				+retry:
			
 
				+	unlocked = false;
			
 
				 	down_read(&current->mm->mmap_sem);
			
 
				-	rc = reset_guest_reference_bit(current->mm, addr);
			
 
				-	up_read(&current->mm->mmap_sem);
			
 
				+	rc = reset_guest_reference_bit(current->mm, vmaddr);
			
 
				+	if (rc < 0) {
			
 
				+		rc = fixup_user_fault(current, current->mm, vmaddr,
			
 
				+				      FAULT_FLAG_WRITE, &unlocked);
			
 
				+		if (!rc) {
			
 
				+			up_read(&current->mm->mmap_sem);
			
 
				+			goto retry;
			
 
				+		}
			
 
				+	}
			
 
				 	if (rc < 0)
			
 
				 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
			
 
				-
			
 
				+	up_read(&current->mm->mmap_sem);
			
 
				 	kvm_s390_set_psw_cc(vcpu, rc);
			
 
				 	return 0;
			
 
				 }
			
@@ -323,6 +341,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
 
				 	unsigned long start, end;
			
 
				 	unsigned char key, oldkey;
			
 
				 	int reg1, reg2;
			
 
				+	bool unlocked;
			
 
				 	int rc;
			
 
				 
			
 
				 	vcpu->stat.instruction_sske++;
			
@@ -355,19 +374,28 @@ static int handle_sske(struct kvm_vcpu *vcpu)
 
				 	}
			
 
				 
			
 
				 	while (start != end) {
			
 
				-		unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
			
 
				+		unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
			
 
				+		unlocked = false;
			
 
				 
			
 
				-		if (kvm_is_error_hva(addr))
			
 
				+		if (kvm_is_error_hva(vmaddr))
			
 
				 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
			
 
				 
			
 
				 		down_read(&current->mm->mmap_sem);
			
 
				-		rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey,
			
 
				+		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
			
 
				 						m3 & SSKE_NQ, m3 & SSKE_MR,
			
 
				 						m3 & SSKE_MC);
			
 
				-		up_read(&current->mm->mmap_sem);
			
 
				-		if (rc < 0)
			
 
				+
			
 
				+		if (rc < 0) {
			
 
				+			rc = fixup_user_fault(current, current->mm, vmaddr,
			
 
				+					      FAULT_FLAG_WRITE, &unlocked);
			
 
				+			rc = !rc ? -EAGAIN : rc;
			
 
				+		}
			
 
				+		if (rc == -EFAULT)
			
 
				 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
			
 
				-		start += PAGE_SIZE;
			
 
				+
			
 
				+		up_read(&current->mm->mmap_sem);
			
 
				+		if (rc >= 0)
			
 
				+			start += PAGE_SIZE;
			
 
				 	}
			
 
				 
			
 
				 	if (m3 & (SSKE_MC | SSKE_MR)) {
			
@@ -948,15 +976,16 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 
				 	}
			
 
				 
			
 
				 	while (start != end) {
			
 
				-		unsigned long useraddr;
			
 
				+		unsigned long vmaddr;
			
 
				+		bool unlocked = false;
			
 
				 
			
 
				 		/* Translate guest address to host address */
			
 
				-		useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
			
 
				-		if (kvm_is_error_hva(useraddr))
			
 
				+		vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
			
 
				+		if (kvm_is_error_hva(vmaddr))
			
 
				 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
			
 
				 
			
 
				 		if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
			
 
				-			if (clear_user((void __user *)useraddr, PAGE_SIZE))
			
 
				+			if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
			
 
				 				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
			
 
				 		}
			
 
				 
			
@@ -966,14 +995,20 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 
				 			if (rc)
			
 
				 				return rc;
			
 
				 			down_read(&current->mm->mmap_sem);
			
 
				-			rc = cond_set_guest_storage_key(current->mm, useraddr,
			
 
				+			rc = cond_set_guest_storage_key(current->mm, vmaddr,
			
 
				 							key, NULL, nq, mr, mc);
			
 
				-			up_read(&current->mm->mmap_sem);
			
 
				-			if (rc < 0)
			
 
				+			if (rc < 0) {
			
 
				+				rc = fixup_user_fault(current, current->mm, vmaddr,
			
 
				+						      FAULT_FLAG_WRITE, &unlocked);
			
 
				+				rc = !rc ? -EAGAIN : rc;
			
 
				+			}
			
 
				+			if (rc == -EFAULT)
			
 
				 				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
			
 
				-		}
			
 
				 
			
 
				-		start += PAGE_SIZE;
			
 
				+			up_read(&current->mm->mmap_sem);
			
 
				+			if (rc >= 0)
			
 
				+				start += PAGE_SIZE;
			
 
				+		}
			
 
				 	}
			
 
				 	if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
			
 
				 		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
			
@@ -987,9 +1022,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
			
 
				+/*
			
 
				+ * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu)
			
 
				+ */
			
 
				+static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
			
 
				 {
			
 
				-	struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
			
 
				 	int r1, r2, nappended, entries;
			
 
				 	unsigned long gfn, hva, res, pgstev, ptev;
			
 
				 	unsigned long *cbrlo;
			
@@ -1039,10 +1076,12 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
 
				 		cbrlo[entries] = gfn << PAGE_SHIFT;
			
 
				 	}
			
 
				 
			
 
				-	if (orc && gfn < ms->bitmap_size) {
			
 
				-		/* increment only if we are really flipping the bit to 1 */
			
 
				-		if (!test_and_set_bit(gfn, ms->pgste_bitmap))
			
 
				-			atomic64_inc(&ms->dirty_pages);
			
 
				+	if (orc) {
			
 
				+		struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn);
			
 
				+
			
 
				+		/* Increment only if we are really flipping the bit */
			
 
				+		if (ms && !test_and_set_bit(gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
			
 
				+			atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
			
 
				 	}
			
 
				 
			
 
				 	return nappended;
			
@@ -1071,7 +1110,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 
				 						: ESSA_SET_STABLE_IF_RESIDENT))
			
 
				 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
			
 
				 
			
 
				-	if (likely(!vcpu->kvm->arch.migration_state)) {
			
 
				+	if (!vcpu->kvm->arch.migration_mode) {
			
 
				 		/*
			
 
				 		 * CMMA is enabled in the KVM settings, but is disabled in
			
 
				 		 * the SIE block and in the mm_context, and we are not doing
			
@@ -1099,10 +1138,16 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 
				 		/* Retry the ESSA instruction */
			
 
				 		kvm_s390_retry_instr(vcpu);
			
 
				 	} else {
			
 
				-		/* Account for the possible extra cbrl entry */
			
 
				-		i = do_essa(vcpu, orc);
			
 
				+		int srcu_idx;
			
 
				+
			
 
				+		down_read(&vcpu->kvm->mm->mmap_sem);
			
 
				+		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
			
 
				+		i = __do_essa(vcpu, orc);
			
 
				+		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
			
 
				+		up_read(&vcpu->kvm->mm->mmap_sem);
			
 
				 		if (i < 0)
			
 
				 			return i;
			
 
				+		/* Account for the possible extra cbrl entry */
			
 
				 		entries += i;
			
 
				 	}
			
 
				 	vcpu->arch.sie_block->cbrlo &= PAGE_MASK;	/* reset nceo */
			
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -2,7 +2,7 @@
 
				 /*
			
 
				  * kvm nested virtualization support for s390x
			
 
				  *
			
 
				- * Copyright IBM Corp. 2016
			
 
				+ * Copyright IBM Corp. 2016, 2018
			
 
				  *
			
 
				  *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
			
 
				  */
			
@@ -378,6 +378,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
				 	if (test_kvm_facility(vcpu->kvm, 139))
			
 
				 		scb_s->ecd |= scb_o->ecd & ECD_MEF;
			
 
				 
			
 
				+	/* etoken */
			
 
				+	if (test_kvm_facility(vcpu->kvm, 156))
			
 
				+		scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
			
 
				+
			
 
				 	prepare_ibc(vcpu, vsie_page);
			
 
				 	rc = shadow_crycb(vcpu, vsie_page);
			
 
				 out:
			
@@ -627,7 +631,8 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
				 		vsie_page->riccbd_gpa = gpa;
			
 
				 		scb_s->riccbd = hpa;
			
 
				 	}
			
 
				-	if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
			
 
				+	if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) ||
			
 
				+	    (scb_s->ecd & ECD_ETOKENF)) {
			
 
				 		unsigned long sdnxc;
			
 
				 
			
 
				 		gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
			
@@ -818,6 +823,8 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
				  *          - < 0 if an error occurred
			
 
				  */
			
 
				 static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
			
 
				+	__releases(vcpu->kvm->srcu)
			
 
				+	__acquires(vcpu->kvm->srcu)
			
 
				 {
			
 
				 	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
			
 
				 	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
			
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2,8 +2,10 @@
 
				 /*
			
 
				  *  KVM guest address space mapping code
			
 
				  *
			
 
				- *    Copyright IBM Corp. 2007, 2016
			
 
				+ *    Copyright IBM Corp. 2007, 2016, 2018
			
 
				  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
			
 
				+ *		 David Hildenbrand <david@redhat.com>
			
 
				+ *		 Janosch Frank <frankja@linux.vnet.ibm.com>
			
 
				  */
			
 
				 
			
 
				 #include <linux/kernel.h>
			
@@ -521,6 +523,9 @@ void gmap_unlink(struct mm_struct *mm, unsigned long *table,
 
				 	rcu_read_unlock();
			
 
				 }
			
 
				 
			
 
				+static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
			
 
				+			   unsigned long gaddr);
			
 
				+
			
 
				 /**
			
 
				  * gmap_link - set up shadow page tables to connect a host to a guest address
			
 
				  * @gmap: pointer to guest mapping meta data structure
			
@@ -541,6 +546,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 
				 	p4d_t *p4d;
			
 
				 	pud_t *pud;
			
 
				 	pmd_t *pmd;
			
 
				+	u64 unprot;
			
 
				 	int rc;
			
 
				 
			
 
				 	BUG_ON(gmap_is_shadow(gmap));
			
@@ -584,8 +590,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 
				 		return -EFAULT;
			
 
				 	pmd = pmd_offset(pud, vmaddr);
			
 
				 	VM_BUG_ON(pmd_none(*pmd));
			
 
				-	/* large pmds cannot yet be handled */
			
 
				-	if (pmd_large(*pmd))
			
 
				+	/* Are we allowed to use huge pages? */
			
 
				+	if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
			
 
				 		return -EFAULT;
			
 
				 	/* Link gmap segment table entry location to page table. */
			
 
				 	rc = radix_tree_preload(GFP_KERNEL);
			
@@ -596,10 +602,22 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 
				 	if (*table == _SEGMENT_ENTRY_EMPTY) {
			
 
				 		rc = radix_tree_insert(&gmap->host_to_guest,
			
 
				 				       vmaddr >> PMD_SHIFT, table);
			
 
				-		if (!rc)
			
 
				-			*table = pmd_val(*pmd);
			
 
				-	} else
			
 
				-		rc = 0;
			
 
				+		if (!rc) {
			
 
				+			if (pmd_large(*pmd)) {
			
 
				+				*table = (pmd_val(*pmd) &
			
 
				+					  _SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
			
 
				+					| _SEGMENT_ENTRY_GMAP_UC;
			
 
				+			} else
			
 
				+				*table = pmd_val(*pmd) &
			
 
				+					_SEGMENT_ENTRY_HARDWARE_BITS;
			
 
				+		}
			
 
				+	} else if (*table & _SEGMENT_ENTRY_PROTECT &&
			
 
				+		   !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
			
 
				+		unprot = (u64)*table;
			
 
				+		unprot &= ~_SEGMENT_ENTRY_PROTECT;
			
 
				+		unprot |= _SEGMENT_ENTRY_GMAP_UC;
			
 
				+		gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr);
			
 
				+	}
			
 
				 	spin_unlock(&gmap->guest_table_lock);
			
 
				 	spin_unlock(ptl);
			
 
				 	radix_tree_preload_end();
			
@@ -690,6 +708,12 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
 
				 		vmaddr |= gaddr & ~PMD_MASK;
			
 
				 		/* Find vma in the parent mm */
			
 
				 		vma = find_vma(gmap->mm, vmaddr);
			
 
				+		/*
			
 
				+		 * We do not discard pages that are backed by
			
 
				+		 * hugetlbfs, so we don't have to refault them.
			
 
				+		 */
			
 
				+		if (vma && is_vm_hugetlb_page(vma))
			
 
				+			continue;
			
 
				 		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
			
 
				 		zap_page_range(vma, vmaddr, size);
			
 
				 	}
			
@@ -864,7 +888,128 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
 
				  */
			
 
				 static void gmap_pte_op_end(spinlock_t *ptl)
			
 
				 {
			
 
				-	spin_unlock(ptl);
			
 
				+	if (ptl)
			
 
				+		spin_unlock(ptl);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock
			
 
				+ *		      and return the pmd pointer
			
 
				+ * @gmap: pointer to guest mapping meta data structure
			
 
				+ * @gaddr: virtual address in the guest address space
			
 
				+ *
			
 
				+ * Returns a pointer to the pmd for a guest address, or NULL
			
 
				+ */
			
 
				+static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
			
 
				+{
			
 
				+	pmd_t *pmdp;
			
 
				+
			
 
				+	BUG_ON(gmap_is_shadow(gmap));
			
 
				+	spin_lock(&gmap->guest_table_lock);
			
 
				+	pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
			
 
				+
			
 
				+	if (!pmdp || pmd_none(*pmdp)) {
			
 
				+		spin_unlock(&gmap->guest_table_lock);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	/* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
			
 
				+	if (!pmd_large(*pmdp))
			
 
				+		spin_unlock(&gmap->guest_table_lock);
			
 
				+	return pmdp;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * gmap_pmd_op_end - release the guest_table_lock if needed
			
 
				+ * @gmap: pointer to the guest mapping meta data structure
			
 
				+ * @pmdp: pointer to the pmd
			
 
				+ */
			
 
				+static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
			
 
				+{
			
 
				+	if (pmd_large(*pmdp))
			
 
				+		spin_unlock(&gmap->guest_table_lock);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * gmap_protect_pmd - remove access rights to memory and set pmd notification bits
			
 
				+ * @pmdp: pointer to the pmd to be protected
			
 
				+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
			
 
				+ * @bits: notification bits to set
			
 
				+ *
			
 
				+ * Returns:
			
 
				+ * 0 if successfully protected
			
 
				+ * -EAGAIN if a fixup is needed
			
 
				+ * -EINVAL if unsupported notifier bits have been specified
			
 
				+ *
			
 
				+ * Expected to be called with sg->mm->mmap_sem in read and
			
 
				+ * guest_table_lock held.
			
 
				+ */
			
 
				+static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
			
 
				+			    pmd_t *pmdp, int prot, unsigned long bits)
			
 
				+{
			
 
				+	int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;
			
 
				+	int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;
			
 
				+	pmd_t new = *pmdp;
			
 
				+
			
 
				+	/* Fixup needed */
			
 
				+	if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))
			
 
				+		return -EAGAIN;
			
 
				+
			
 
				+	if (prot == PROT_NONE && !pmd_i) {
			
 
				+		pmd_val(new) |= _SEGMENT_ENTRY_INVALID;
			
 
				+		gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
			
 
				+	}
			
 
				+
			
 
				+	if (prot == PROT_READ && !pmd_p) {
			
 
				+		pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID;
			
 
				+		pmd_val(new) |= _SEGMENT_ENTRY_PROTECT;
			
 
				+		gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
			
 
				+	}
			
 
				+
			
 
				+	if (bits & GMAP_NOTIFY_MPROT)
			
 
				+		pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN;
			
 
				+
			
 
				+	/* Shadow GMAP protection needs split PMDs */
			
 
				+	if (bits & GMAP_NOTIFY_SHADOW)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * gmap_protect_pte - remove access rights to memory and set pgste bits
			
 
				+ * @gmap: pointer to guest mapping meta data structure
			
 
				+ * @gaddr: virtual address in the guest address space
			
 
				+ * @pmdp: pointer to the pmd associated with the pte
			
 
				+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
			
 
				+ * @bits: notification bits to set
			
 
				+ *
			
 
				+ * Returns 0 if successfully protected, -ENOMEM if out of memory and
			
 
				+ * -EAGAIN if a fixup is needed.
			
 
				+ *
			
 
				+ * Expected to be called with sg->mm->mmap_sem in read
			
 
				+ */
			
 
				+static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
			
 
				+			    pmd_t *pmdp, int prot, unsigned long bits)
			
 
				+{
			
 
				+	int rc;
			
 
				+	pte_t *ptep;
			
 
				+	spinlock_t *ptl = NULL;
			
 
				+	unsigned long pbits = 0;
			
 
				+
			
 
				+	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
			
 
				+		return -EAGAIN;
			
 
				+
			
 
				+	ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl);
			
 
				+	if (!ptep)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0;
			
 
				+	pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
			
 
				+	/* Protect and unlock. */
			
 
				+	rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
			
 
				+	gmap_pte_op_end(ptl);
			
 
				+	return rc;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -883,30 +1028,45 @@ static void gmap_pte_op_end(spinlock_t *ptl)
 
				 static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
			
 
				 			      unsigned long len, int prot, unsigned long bits)
			
 
				 {
			
 
				-	unsigned long vmaddr;
			
 
				-	spinlock_t *ptl;
			
 
				-	pte_t *ptep;
			
 
				+	unsigned long vmaddr, dist;
			
 
				+	pmd_t *pmdp;
			
 
				 	int rc;
			
 
				 
			
 
				 	BUG_ON(gmap_is_shadow(gmap));
			
 
				 	while (len) {
			
 
				 		rc = -EAGAIN;
			
 
				-		ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
			
 
				-		if (ptep) {
			
 
				-			rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits);
			
 
				-			gmap_pte_op_end(ptl);
			
 
				+		pmdp = gmap_pmd_op_walk(gmap, gaddr);
			
 
				+		if (pmdp) {
			
 
				+			if (!pmd_large(*pmdp)) {
			
 
				+				rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
			
 
				+						      bits);
			
 
				+				if (!rc) {
			
 
				+					len -= PAGE_SIZE;
			
 
				+					gaddr += PAGE_SIZE;
			
 
				+				}
			
 
				+			} else {
			
 
				+				rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
			
 
				+						      bits);
			
 
				+				if (!rc) {
			
 
				+					dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
			
 
				+					len = len < dist ? 0 : len - dist;
			
 
				+					gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
			
 
				+				}
			
 
				+			}
			
 
				+			gmap_pmd_op_end(gmap, pmdp);
			
 
				 		}
			
 
				 		if (rc) {
			
 
				+			if (rc == -EINVAL)
			
 
				+				return rc;
			
 
				+
			
 
				+			/* -EAGAIN, fixup of userspace mm and gmap */
			
 
				 			vmaddr = __gmap_translate(gmap, gaddr);
			
 
				 			if (IS_ERR_VALUE(vmaddr))
			
 
				 				return vmaddr;
			
 
				 			rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
			
 
				 			if (rc)
			
 
				 				return rc;
			
 
				-			continue;
			
 
				 		}
			
 
				-		gaddr += PAGE_SIZE;
			
 
				-		len -= PAGE_SIZE;
			
 
				 	}
			
 
				 	return 0;
			
 
				 }
			
@@ -935,7 +1095,7 @@ int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
 
				 	if (!MACHINE_HAS_ESOP && prot == PROT_READ)
			
 
				 		return -EINVAL;
			
 
				 	down_read(&gmap->mm->mmap_sem);
			
 
				-	rc = gmap_protect_range(gmap, gaddr, len, prot, PGSTE_IN_BIT);
			
 
				+	rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
			
 
				 	up_read(&gmap->mm->mmap_sem);
			
 
				 	return rc;
			
 
				 }
			
@@ -1474,6 +1634,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
 
				 	unsigned long limit;
			
 
				 	int rc;
			
 
				 
			
 
				+	BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
			
 
				 	BUG_ON(gmap_is_shadow(parent));
			
 
				 	spin_lock(&parent->shadow_lock);
			
 
				 	sg = gmap_find_shadow(parent, asce, edat_level);
			
@@ -1526,7 +1687,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
 
				 	down_read(&parent->mm->mmap_sem);
			
 
				 	rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
			
 
				 				((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
			
 
				-				PROT_READ, PGSTE_VSIE_BIT);
			
 
				+				PROT_READ, GMAP_NOTIFY_SHADOW);
			
 
				 	up_read(&parent->mm->mmap_sem);
			
 
				 	spin_lock(&parent->shadow_lock);
			
 
				 	new->initialized = true;
			
@@ -2092,6 +2253,225 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(ptep_notify);
			
 
				 
			
 
				+static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,
			
 
				+			     unsigned long gaddr)
			
 
				+{
			
 
				+	pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN;
			
 
				+	gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * gmap_pmdp_xchg - exchange a gmap pmd with another
			
 
				+ * @gmap: pointer to the guest address space structure
			
 
				+ * @pmdp: pointer to the pmd entry
			
 
				+ * @new: replacement entry
			
 
				+ * @gaddr: the affected guest address
			
 
				+ *
			
 
				+ * This function is assumed to be called with the guest_table_lock
			
 
				+ * held.
			
 
				+ */
			
 
				+static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
			
 
				+			   unsigned long gaddr)
			
 
				+{
			
 
				+	gaddr &= HPAGE_MASK;
			
 
				+	pmdp_notify_gmap(gmap, pmdp, gaddr);
			
 
				+	pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN;
			
 
				+	if (MACHINE_HAS_TLB_GUEST)
			
 
				+		__pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
			
 
				+			    IDTE_GLOBAL);
			
 
				+	else if (MACHINE_HAS_IDTE)
			
 
				+		__pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
			
 
				+	else
			
 
				+		__pmdp_csp(pmdp);
			
 
				+	*pmdp = new;
			
 
				+}
			
 
				+
			
 
				+static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
			
 
				+			    int purge)
			
 
				+{
			
 
				+	pmd_t *pmdp;
			
 
				+	struct gmap *gmap;
			
 
				+	unsigned long gaddr;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
			
 
				+		spin_lock(&gmap->guest_table_lock);
			
 
				+		pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
			
 
				+						  vmaddr >> PMD_SHIFT);
			
 
				+		if (pmdp) {
			
 
				+			gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
			
 
				+			pmdp_notify_gmap(gmap, pmdp, gaddr);
			
 
				+			WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
			
 
				+						   _SEGMENT_ENTRY_GMAP_UC));
			
 
				+			if (purge)
			
 
				+				__pmdp_csp(pmdp);
			
 
				+			pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
			
 
				+		}
			
 
				+		spin_unlock(&gmap->guest_table_lock);
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without
			
 
				+ *                        flushing
			
 
				+ * @mm: pointer to the process mm_struct
			
 
				+ * @vmaddr: virtual address in the process address space
			
 
				+ */
			
 
				+void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr)
			
 
				+{
			
 
				+	gmap_pmdp_clear(mm, vmaddr, 0);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);
			
 
				+
			
 
				+/**
			
 
				+ * gmap_pmdp_csp - csp all affected guest pmd entries
			
 
				+ * @mm: pointer to the process mm_struct
			
 
				+ * @vmaddr: virtual address in the process address space
			
 
				+ */
			
 
				+void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr)
			
 
				+{
			
 
				+	gmap_pmdp_clear(mm, vmaddr, 1);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
			
 
				+
			
 
				+/**
			
 
				+ * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry
			
 
				+ * @mm: pointer to the process mm_struct
			
 
				+ * @vmaddr: virtual address in the process address space
			
 
				+ */
			
 
				+void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
			
 
				+{
			
 
				+	unsigned long *entry, gaddr;
			
 
				+	struct gmap *gmap;
			
 
				+	pmd_t *pmdp;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
			
 
				+		spin_lock(&gmap->guest_table_lock);
			
 
				+		entry = radix_tree_delete(&gmap->host_to_guest,
			
 
				+					  vmaddr >> PMD_SHIFT);
			
 
				+		if (entry) {
			
 
				+			pmdp = (pmd_t *)entry;
			
 
				+			gaddr = __gmap_segment_gaddr(entry);
			
 
				+			pmdp_notify_gmap(gmap, pmdp, gaddr);
			
 
				+			WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
			
 
				+					   _SEGMENT_ENTRY_GMAP_UC));
			
 
				+			if (MACHINE_HAS_TLB_GUEST)
			
 
				+				__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
			
 
				+					    gmap->asce, IDTE_LOCAL);
			
 
				+			else if (MACHINE_HAS_IDTE)
			
 
				+				__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
			
 
				+			*entry = _SEGMENT_ENTRY_EMPTY;
			
 
				+		}
			
 
				+		spin_unlock(&gmap->guest_table_lock);
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
			
 
				+
			
 
				+/**
			
 
				+ * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry
			
 
				+ * @mm: pointer to the process mm_struct
			
 
				+ * @vmaddr: virtual address in the process address space
			
 
				+ */
			
 
				+void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
			
 
				+{
			
 
				+	unsigned long *entry, gaddr;
			
 
				+	struct gmap *gmap;
			
 
				+	pmd_t *pmdp;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
			
 
				+		spin_lock(&gmap->guest_table_lock);
			
 
				+		entry = radix_tree_delete(&gmap->host_to_guest,
			
 
				+					  vmaddr >> PMD_SHIFT);
			
 
				+		if (entry) {
			
 
				+			pmdp = (pmd_t *)entry;
			
 
				+			gaddr = __gmap_segment_gaddr(entry);
			
 
				+			pmdp_notify_gmap(gmap, pmdp, gaddr);
			
 
				+			WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
			
 
				+					   _SEGMENT_ENTRY_GMAP_UC));
			
 
				+			if (MACHINE_HAS_TLB_GUEST)
			
 
				+				__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
			
 
				+					    gmap->asce, IDTE_GLOBAL);
			
 
				+			else if (MACHINE_HAS_IDTE)
			
 
				+				__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
			
 
				+			else
			
 
				+				__pmdp_csp(pmdp);
			
 
				+			*entry = _SEGMENT_ENTRY_EMPTY;
			
 
				+		}
			
 
				+		spin_unlock(&gmap->guest_table_lock);
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);
			
 
				+
			
 
				+/**
			
 
				+ * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status
			
 
				+ * @gmap: pointer to guest address space
			
 
				+ * @pmdp: pointer to the pmd to be tested
			
 
				+ * @gaddr: virtual address in the guest address space
			
 
				+ *
			
 
				+ * This function is assumed to be called with the guest_table_lock
			
 
				+ * held.
			
 
				+ */
			
 
				+bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,
			
 
				+				   unsigned long gaddr)
			
 
				+{
			
 
				+	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
			
 
				+		return false;
			
 
				+
			
 
				+	/* Already protected memory, which did not change is clean */
			
 
				+	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&
			
 
				+	    !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))
			
 
				+		return false;
			
 
				+
			
 
				+	/* Clear UC indication and reset protection */
			
 
				+	pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC;
			
 
				+	gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
			
 
				+ * @gmap: pointer to guest address space
			
 
				+ * @bitmap: dirty bitmap for this pmd
			
 
				+ * @gaddr: virtual address in the guest address space
			
 
				+ * @vmaddr: virtual address in the host address space
			
 
				+ *
			
 
				+ * This function is assumed to be called with the guest_table_lock
			
 
				+ * held.
			
 
				+ */
			
 
				+void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
			
 
				+			     unsigned long gaddr, unsigned long vmaddr)
			
 
				+{
			
 
				+	int i;
			
 
				+	pmd_t *pmdp;
			
 
				+	pte_t *ptep;
			
 
				+	spinlock_t *ptl;
			
 
				+
			
 
				+	pmdp = gmap_pmd_op_walk(gmap, gaddr);
			
 
				+	if (!pmdp)
			
 
				+		return;
			
 
				+
			
 
				+	if (pmd_large(*pmdp)) {
			
 
				+		if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
			
 
				+			bitmap_fill(bitmap, _PAGE_ENTRIES);
			
 
				+	} else {
			
 
				+		for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {
			
 
				+			ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl);
			
 
				+			if (!ptep)
			
 
				+				continue;
			
 
				+			if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
			
 
				+				set_bit(i, bitmap);
			
 
				+			spin_unlock(ptl);
			
 
				+		}
			
 
				+	}
			
 
				+	gmap_pmd_op_end(gmap, pmdp);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
			
 
				+
			
 
				 static inline void thp_split_mm(struct mm_struct *mm)
			
 
				 {
			
 
				 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
			
@@ -2168,17 +2548,45 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
 
				  * Enable storage key handling from now on and initialize the storage
			
 
				  * keys with the default key.
			
 
				  */
			
 
				-static int __s390_enable_skey(pte_t *pte, unsigned long addr,
			
 
				-			      unsigned long next, struct mm_walk *walk)
			
 
				+static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
			
 
				+				  unsigned long next, struct mm_walk *walk)
			
 
				 {
			
 
				 	/* Clear storage key */
			
 
				 	ptep_zap_key(walk->mm, addr, pte);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
			
 
				+				      unsigned long hmask, unsigned long next,
			
 
				+				      struct mm_walk *walk)
			
 
				+{
			
 
				+	pmd_t *pmd = (pmd_t *)pte;
			
 
				+	unsigned long start, end;
			
 
				+	struct page *page = pmd_page(*pmd);
			
 
				+
			
 
				+	/*
			
 
				+	 * The write check makes sure we do not set a key on shared
			
 
				+	 * memory. This is needed as the walker does not differentiate
			
 
				+	 * between actual guest memory and the process executable or
			
 
				+	 * shared libraries.
			
 
				+	 */
			
 
				+	if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID ||
			
 
				+	    !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE))
			
 
				+		return 0;
			
 
				+
			
 
				+	start = pmd_val(*pmd) & HPAGE_MASK;
			
 
				+	end = start + HPAGE_SIZE - 1;
			
 
				+	__storage_key_init_range(start, end);
			
 
				+	set_bit(PG_arch_1, &page->flags);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 int s390_enable_skey(void)
			
 
				 {
			
 
				-	struct mm_walk walk = { .pte_entry = __s390_enable_skey };
			
 
				+	struct mm_walk walk = {
			
 
				+		.hugetlb_entry = __s390_enable_skey_hugetlb,
			
 
				+		.pte_entry = __s390_enable_skey_pte,
			
 
				+	};
			
 
				 	struct mm_struct *mm = current->mm;
			
 
				 	struct vm_area_struct *vma;
			
 
				 	int rc = 0;
			
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -123,6 +123,29 @@ static inline pte_t __rste_to_pte(unsigned long rste)
 
				 	return pte;
			
 
				 }
			
 
				 
			
 
				+static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
			
 
				+{
			
 
				+	struct page *page;
			
 
				+	unsigned long size, paddr;
			
 
				+
			
 
				+	if (!mm_uses_skeys(mm) ||
			
 
				+	    rste & _SEGMENT_ENTRY_INVALID)
			
 
				+		return;
			
 
				+
			
 
				+	if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
			
 
				+		page = pud_page(__pud(rste));
			
 
				+		size = PUD_SIZE;
			
 
				+		paddr = rste & PUD_MASK;
			
 
				+	} else {
			
 
				+		page = pmd_page(__pmd(rste));
			
 
				+		size = PMD_SIZE;
			
 
				+		paddr = rste & PMD_MASK;
			
 
				+	}
			
 
				+
			
 
				+	if (!test_and_set_bit(PG_arch_1, &page->flags))
			
 
				+		__storage_key_init_range(paddr, paddr + size - 1);
			
 
				+}
			
 
				+
			
 
				 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
			
 
				 		     pte_t *ptep, pte_t pte)
			
 
				 {
			
@@ -137,6 +160,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 
				 		rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
			
 
				 	else
			
 
				 		rste |= _SEGMENT_ENTRY_LARGE;
			
 
				+	clear_huge_pte_skeys(mm, rste);
			
 
				 	pte_val(*ptep) = rste;
			
 
				 }
			
 
				 
			
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -14,7 +14,7 @@
 
				 
			
 
				 static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
			
 
				 {
			
 
				-	asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0"
			
 
				+	asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
			
 
				 		     : [addr] "+a" (addr) : [skey] "d" (skey));
			
 
				 	return addr;
			
 
				 }
			
@@ -23,8 +23,6 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
 
				 {
			
 
				 	unsigned long boundary, size;
			
 
				 
			
 
				-	if (!PAGE_DEFAULT_KEY)
			
 
				-		return;
			
 
				 	while (start < end) {
			
 
				 		if (MACHINE_HAS_EDAT1) {
			
 
				 			/* set storage keys for a 1MB frame */
			
@@ -37,7 +35,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
 
				 				continue;
			
 
				 			}
			
 
				 		}
			
 
				-		page_set_storage_key(start, PAGE_DEFAULT_KEY, 0);
			
 
				+		page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
			
 
				 		start += PAGE_SIZE;
			
 
				 	}
			
 
				 }
			
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -347,18 +347,27 @@ static inline void pmdp_idte_local(struct mm_struct *mm,
 
				 			    mm->context.asce, IDTE_LOCAL);
			
 
				 	else
			
 
				 		__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
			
 
				+	if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
			
 
				+		gmap_pmdp_idte_local(mm, addr);
			
 
				 }
			
 
				 
			
 
				 static inline void pmdp_idte_global(struct mm_struct *mm,
			
 
				 				    unsigned long addr, pmd_t *pmdp)
			
 
				 {
			
 
				-	if (MACHINE_HAS_TLB_GUEST)
			
 
				+	if (MACHINE_HAS_TLB_GUEST) {
			
 
				 		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
			
 
				 			    mm->context.asce, IDTE_GLOBAL);
			
 
				-	else if (MACHINE_HAS_IDTE)
			
 
				+		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
			
 
				+			gmap_pmdp_idte_global(mm, addr);
			
 
				+	} else if (MACHINE_HAS_IDTE) {
			
 
				 		__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
			
 
				-	else
			
 
				+		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
			
 
				+			gmap_pmdp_idte_global(mm, addr);
			
 
				+	} else {
			
 
				 		__pmdp_csp(pmdp);
			
 
				+		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
			
 
				+			gmap_pmdp_csp(mm, addr);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
			
@@ -392,6 +401,8 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
 
				 			  cpumask_of(smp_processor_id()))) {
			
 
				 		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
			
 
				 		mm->context.flush_mm = 1;
			
 
				+		if (mm_has_pgste(mm))
			
 
				+			gmap_pmdp_invalidate(mm, addr);
			
 
				 	} else {
			
 
				 		pmdp_idte_global(mm, addr, pmdp);
			
 
				 	}
			
@@ -399,6 +410,24 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
 
				 	return old;
			
 
				 }
			
 
				 
			
 
				+static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
			
 
				+{
			
 
				+	pgd_t *pgd;
			
 
				+	p4d_t *p4d;
			
 
				+	pud_t *pud;
			
 
				+	pmd_t *pmd;
			
 
				+
			
 
				+	pgd = pgd_offset(mm, addr);
			
 
				+	p4d = p4d_alloc(mm, pgd, addr);
			
 
				+	if (!p4d)
			
 
				+		return NULL;
			
 
				+	pud = pud_alloc(mm, p4d, addr);
			
 
				+	if (!pud)
			
 
				+		return NULL;
			
 
				+	pmd = pmd_alloc(mm, pud, addr);
			
 
				+	return pmd;
			
 
				+}
			
 
				+
			
 
				 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
			
 
				 		       pmd_t *pmdp, pmd_t new)
			
 
				 {
			
@@ -693,40 +722,14 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 
				 /*
			
 
				  * Test and reset if a guest page is dirty
			
 
				  */
			
 
				-bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
			
 
				+bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
			
 
				+		       pte_t *ptep)
			
 
				 {
			
 
				-	spinlock_t *ptl;
			
 
				-	pgd_t *pgd;
			
 
				-	p4d_t *p4d;
			
 
				-	pud_t *pud;
			
 
				-	pmd_t *pmd;
			
 
				 	pgste_t pgste;
			
 
				-	pte_t *ptep;
			
 
				 	pte_t pte;
			
 
				 	bool dirty;
			
 
				 	int nodat;
			
 
				 
			
 
				-	pgd = pgd_offset(mm, addr);
			
 
				-	p4d = p4d_alloc(mm, pgd, addr);
			
 
				-	if (!p4d)
			
 
				-		return false;
			
 
				-	pud = pud_alloc(mm, p4d, addr);
			
 
				-	if (!pud)
			
 
				-		return false;
			
 
				-	pmd = pmd_alloc(mm, pud, addr);
			
 
				-	if (!pmd)
			
 
				-		return false;
			
 
				-	/* We can't run guests backed by huge pages, but userspace can
			
 
				-	 * still set them up and then try to migrate them without any
			
 
				-	 * migration support.
			
 
				-	 */
			
 
				-	if (pmd_large(*pmd))
			
 
				-		return true;
			
 
				-
			
 
				-	ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
			
 
				-	if (unlikely(!ptep))
			
 
				-		return false;
			
 
				-
			
 
				 	pgste = pgste_get_lock(ptep);
			
 
				 	dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
			
 
				 	pgste_val(pgste) &= ~PGSTE_UC_BIT;
			
@@ -742,21 +745,43 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 
				 		*ptep = pte;
			
 
				 	}
			
 
				 	pgste_set_unlock(ptep, pgste);
			
 
				-
			
 
				-	spin_unlock(ptl);
			
 
				 	return dirty;
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
			
 
				+EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
			
 
				 
			
 
				 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
			
 
				 			  unsigned char key, bool nq)
			
 
				 {
			
 
				-	unsigned long keyul;
			
 
				+	unsigned long keyul, paddr;
			
 
				 	spinlock_t *ptl;
			
 
				 	pgste_t old, new;
			
 
				+	pmd_t *pmdp;
			
 
				 	pte_t *ptep;
			
 
				 
			
 
				-	ptep = get_locked_pte(mm, addr, &ptl);
			
 
				+	pmdp = pmd_alloc_map(mm, addr);
			
 
				+	if (unlikely(!pmdp))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	ptl = pmd_lock(mm, pmdp);
			
 
				+	if (!pmd_present(*pmdp)) {
			
 
				+		spin_unlock(ptl);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	if (pmd_large(*pmdp)) {
			
 
				+		paddr = pmd_val(*pmdp) & HPAGE_MASK;
			
 
				+		paddr |= addr & ~HPAGE_MASK;
			
 
				+		/*
			
 
				+		 * Huge pmds need quiescing operations, they are
			
 
				+		 * always mapped.
			
 
				+		 */
			
 
				+		page_set_storage_key(paddr, key, 1);
			
 
				+		spin_unlock(ptl);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	spin_unlock(ptl);
			
 
				+
			
 
				+	ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
			
 
				 	if (unlikely(!ptep))
			
 
				 		return -EFAULT;
			
 
				 
			
@@ -767,14 +792,14 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 
				 	pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
			
 
				 	pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
			
 
				 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
			
 
				-		unsigned long address, bits, skey;
			
 
				+		unsigned long bits, skey;
			
 
				 
			
 
				-		address = pte_val(*ptep) & PAGE_MASK;
			
 
				-		skey = (unsigned long) page_get_storage_key(address);
			
 
				+		paddr = pte_val(*ptep) & PAGE_MASK;
			
 
				+		skey = (unsigned long) page_get_storage_key(paddr);
			
 
				 		bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
			
 
				 		skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
			
 
				 		/* Set storage key ACC and FP */
			
 
				-		page_set_storage_key(address, skey, !nq);
			
 
				+		page_set_storage_key(paddr, skey, !nq);
			
 
				 		/* Merge host changed & referenced into pgste  */
			
 
				 		pgste_val(new) |= bits << 52;
			
 
				 	}
			
@@ -830,11 +855,32 @@ EXPORT_SYMBOL(cond_set_guest_storage_key);
 
				 int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
			
 
				 {
			
 
				 	spinlock_t *ptl;
			
 
				+	unsigned long paddr;
			
 
				 	pgste_t old, new;
			
 
				+	pmd_t *pmdp;
			
 
				 	pte_t *ptep;
			
 
				 	int cc = 0;
			
 
				 
			
 
				-	ptep = get_locked_pte(mm, addr, &ptl);
			
 
				+	pmdp = pmd_alloc_map(mm, addr);
			
 
				+	if (unlikely(!pmdp))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	ptl = pmd_lock(mm, pmdp);
			
 
				+	if (!pmd_present(*pmdp)) {
			
 
				+		spin_unlock(ptl);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	if (pmd_large(*pmdp)) {
			
 
				+		paddr = pmd_val(*pmdp) & HPAGE_MASK;
			
 
				+		paddr |= addr & ~HPAGE_MASK;
			
 
				+		cc = page_reset_referenced(paddr);
			
 
				+		spin_unlock(ptl);
			
 
				+		return cc;
			
 
				+	}
			
 
				+	spin_unlock(ptl);
			
 
				+
			
 
				+	ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
			
 
				 	if (unlikely(!ptep))
			
 
				 		return -EFAULT;
			
 
				 
			
@@ -843,7 +889,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
 
				 	pgste_val(new) &= ~PGSTE_GR_BIT;
			
 
				 
			
 
				 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
			
 
				-		cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
			
 
				+		paddr = pte_val(*ptep) & PAGE_MASK;
			
 
				+		cc = page_reset_referenced(paddr);
			
 
				 		/* Merge real referenced bit into host-set */
			
 
				 		pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
			
 
				 	}
			
@@ -862,18 +909,42 @@ EXPORT_SYMBOL(reset_guest_reference_bit);
 
				 int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
			
 
				 			  unsigned char *key)
			
 
				 {
			
 
				+	unsigned long paddr;
			
 
				 	spinlock_t *ptl;
			
 
				 	pgste_t pgste;
			
 
				+	pmd_t *pmdp;
			
 
				 	pte_t *ptep;
			
 
				 
			
 
				-	ptep = get_locked_pte(mm, addr, &ptl);
			
 
				+	pmdp = pmd_alloc_map(mm, addr);
			
 
				+	if (unlikely(!pmdp))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	ptl = pmd_lock(mm, pmdp);
			
 
				+	if (!pmd_present(*pmdp)) {
			
 
				+		/* Not yet mapped memory has a zero key */
			
 
				+		spin_unlock(ptl);
			
 
				+		*key = 0;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (pmd_large(*pmdp)) {
			
 
				+		paddr = pmd_val(*pmdp) & HPAGE_MASK;
			
 
				+		paddr |= addr & ~HPAGE_MASK;
			
 
				+		*key = page_get_storage_key(paddr);
			
 
				+		spin_unlock(ptl);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	spin_unlock(ptl);
			
 
				+
			
 
				+	ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
			
 
				 	if (unlikely(!ptep))
			
 
				 		return -EFAULT;
			
 
				 
			
 
				 	pgste = pgste_get_lock(ptep);
			
 
				 	*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
			
 
				+	paddr = pte_val(*ptep) & PAGE_MASK;
			
 
				 	if (!(pte_val(*ptep) & _PAGE_INVALID))
			
 
				-		*key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
			
 
				+		*key = page_get_storage_key(paddr);
			
 
				 	/* Reflect guest's logical view, not physical */
			
 
				 	*key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
			
 
				 	pgste_set_unlock(ptep, pgste);
			
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -4,7 +4,7 @@
 
				  * numbering scheme from the Princples of Operations: most significant bit
			
 
				  * has bit number 0.
			
 
				  *
			
 
				- *    Copyright IBM Corp. 2015
			
 
				+ *    Copyright IBM Corp. 2015, 2018
			
 
				  *
			
 
				  */
			
 
				 
			
@@ -106,6 +106,7 @@ static struct facility_def facility_defs[] = {
 
				 
			
 
				 		.name = "FACILITIES_KVM_CPUMODEL",
			
 
				 		.bits = (int[]){
			
 
				+			156, /* etoken facility */
			
 
				 			-1  /* END */
			
 
				 		}
			
 
				 	},
			
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -309,6 +309,13 @@ static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memsl
 
				 	return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
			
 
				 }
			
 
				 
			
 
				+static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *memslot)
			
 
				+{
			
 
				+	unsigned long len = kvm_dirty_bitmap_bytes(memslot);
			
 
				+
			
 
				+	return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap);
			
 
				+}
			
 
				+
			
 
				 struct kvm_s390_adapter_int {
			
 
				 	u64 ind_addr;
			
 
				 	u64 summary_addr;
			
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt {
 
				 #define KVM_CAP_GET_MSR_FEATURES 153
			
 
				 #define KVM_CAP_HYPERV_EVENTFD 154
			
 
				 #define KVM_CAP_HYPERV_TLBFLUSH 155
			
 
				+#define KVM_CAP_S390_HPAGE_1M 156
			
 
				 
			
 
				 #ifdef KVM_CAP_IRQ_ROUTING
			
 
				 
			
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1169,7 +1169,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
 
				 
			
 
				 	n = kvm_dirty_bitmap_bytes(memslot);
			
 
				 
			
 
				-	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
			
 
				+	dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
			
 
				 	memset(dirty_bitmap_buffer, 0, n);
			
 
				 
			
 
				 	spin_lock(&kvm->mmu_lock);