소스 검색

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull first batch of s390 updates from Martin Schwidefsky:
 "The most interesting change is that Martin converted s390 to generic
  hardirqs.  Which means that all current architectures have been
  converted and that CONFIG_GENERIC_HARDIRQS can be removed.  Martin
  prepared a patch for that already (see genirq branch), but the best
  time to merge that is probably at the end of the merge window / begin
  of -rc1.

  Another patch converts s390 to software referenced bits instead of
  relying on the reference bit in the storage key.  Therefore s390
  doesn't use storage keys anymore, except for kvm.

  Besides that we have improvements, cleanups and fixes in PCI, DASD and
  all over the place."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (32 commits)
  s390/pci: use virtual memory for iommu bitmap
  s390/cio: fix unlocked access of global bitmap
  s390/pci: update function handle after resume from hibernate
  s390/pci: try harder to modify a function
  s390/pci: split lpf
  s390/hibernate: add early resume function
  s390/pci: add recover sysfs knob
  s390/pci: use claim_resource
  s390/pci/hotplug: convert to be builtin only
  s390/mm: implement software referenced bits
  s390/dasd: fix statistics for recovered requests
  s390/tx: allow program interruption filtering in user space
  s390/pgtable: fix mprotect for single-threaded KVM guests
  s390/time: return with irqs disabled from psw_idle
  s390/kprobes: add support for compare and branch instructions
  s390/switch_to: fix save_access_regs() / restore_access_regs()
  s390/bitops: fix inline assembly constraints
  s390/dasd: enable raw_track_access reads without direct I/O
  s390/mm: introduce ptep_flush_lazy helper
  s390/time: clock comparator revalidation
  ...
Linus Torvalds 12 년 전
부모
커밋
1ccfd5eaf8
62개의 변경된 파일1468개의 추가작업 그리고 1402개의 파일을 삭제
  1. 11 0
      arch/s390/Kconfig
  2. 67 0
      arch/s390/include/asm/airq.h
  3. 6 6
      arch/s390/include/asm/bitops.h
  4. 1 0
      arch/s390/include/asm/cio.h
  5. 5 0
      arch/s390/include/asm/hardirq.h
  6. 33 102
      arch/s390/include/asm/hugetlb.h
  7. 3 14
      arch/s390/include/asm/hw_irq.h
  8. 20 15
      arch/s390/include/asm/irq.h
  9. 1 2
      arch/s390/include/asm/mmu_context.h
  10. 0 19
      arch/s390/include/asm/page.h
  11. 21 33
      arch/s390/include/asm/pci.h
  12. 6 6
      arch/s390/include/asm/pci_insn.h
  13. 5 5
      arch/s390/include/asm/pci_io.h
  14. 341 296
      arch/s390/include/asm/pgtable.h
  15. 6 0
      arch/s390/include/asm/serial.h
  16. 7 2
      arch/s390/include/asm/switch_to.h
  17. 2 1
      arch/s390/include/asm/tlb.h
  18. 3 3
      arch/s390/include/asm/tlbflush.h
  19. 11 5
      arch/s390/kernel/entry.S
  20. 9 2
      arch/s390/kernel/entry64.S
  21. 58 102
      arch/s390/kernel/irq.c
  22. 19 2
      arch/s390/kernel/kprobes.c
  23. 1 4
      arch/s390/kernel/nmi.c
  24. 1 0
      arch/s390/kernel/process.c
  25. 4 4
      arch/s390/kernel/ptrace.c
  26. 11 0
      arch/s390/kernel/suspend.c
  27. 2 5
      arch/s390/kernel/swsusp_asm64.S
  28. 0 1
      arch/s390/kernel/time.c
  29. 3 3
      arch/s390/kernel/vdso.c
  30. 0 2
      arch/s390/lib/delay.c
  31. 8 8
      arch/s390/lib/uaccess_pt.c
  32. 9 9
      arch/s390/mm/dump_pagetables.c
  33. 3 3
      arch/s390/mm/gup.c
  34. 115 9
      arch/s390/mm/hugetlbpage.c
  35. 1 1
      arch/s390/mm/pageattr.c
  36. 40 43
      arch/s390/mm/pgtable.c
  37. 9 6
      arch/s390/mm/vmem.c
  38. 1 1
      arch/s390/pci/Makefile
  39. 219 356
      arch/s390/pci/pci.c
  40. 99 47
      arch/s390/pci/pci_clp.c
  41. 5 11
      arch/s390/pci/pci_dma.c
  42. 1 1
      arch/s390/pci/pci_event.c
  43. 9 9
      arch/s390/pci/pci_insn.c
  44. 0 142
      arch/s390/pci/pci_msi.c
  45. 27 0
      arch/s390/pci/pci_sysfs.c
  46. 1 1
      drivers/pci/hotplug/Kconfig
  47. 2 61
      drivers/pci/hotplug/s390_pci_hpc.c
  48. 4 4
      drivers/s390/block/dasd_devmap.c
  49. 43 11
      drivers/s390/block/dasd_eckd.c
  50. 12 2
      drivers/s390/block/dasd_erp.c
  51. 1 1
      drivers/s390/char/sclp_config.c
  52. 172 2
      drivers/s390/cio/airq.c
  53. 1 1
      drivers/s390/cio/ccwgroup.c
  54. 23 23
      drivers/s390/cio/cio.c
  55. 0 3
      drivers/s390/cio/cio.h
  56. 1 1
      drivers/s390/cio/cmf.c
  57. 3 1
      drivers/s390/cio/css.c
  58. 0 2
      drivers/s390/cio/css.h
  59. 1 1
      drivers/s390/cio/device.c
  60. 1 1
      drivers/s390/net/qeth_l3_sys.c
  61. 0 4
      include/asm-generic/pgtable.h
  62. 0 3
      mm/rmap.c

+ 11 - 0
arch/s390/Kconfig

@@ -116,6 +116,7 @@ config S390
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+	select HAVE_GENERIC_HARDIRQS
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZ4
 	select HAVE_KERNEL_LZ4
@@ -445,6 +446,16 @@ config PCI_NR_FUNCTIONS
 	  This allows you to specify the maximum number of PCI functions which
 	  This allows you to specify the maximum number of PCI functions which
 	  this kernel will support.
 	  this kernel will support.
 
 
+config PCI_NR_MSI
+	int "Maximum number of MSI interrupts (64-32768)"
+	range 64 32768
+	default "256"
+	help
+	  This defines the number of virtual interrupts the kernel will
+	  provide for MSI interrupts. If you configure your system to have
+	  too few drivers will fail to allocate MSI interrupts for all
+	  PCI devices.
+
 source "drivers/pci/Kconfig"
 source "drivers/pci/Kconfig"
 source "drivers/pci/pcie/Kconfig"
 source "drivers/pci/pcie/Kconfig"
 source "drivers/pci/hotplug/Kconfig"
 source "drivers/pci/hotplug/Kconfig"

+ 67 - 0
arch/s390/include/asm/airq.h

@@ -9,6 +9,8 @@
 #ifndef _ASM_S390_AIRQ_H
 #ifndef _ASM_S390_AIRQ_H
 #define _ASM_S390_AIRQ_H
 #define _ASM_S390_AIRQ_H
 
 
+#include <linux/bit_spinlock.h>
+
 struct airq_struct {
 struct airq_struct {
 	struct hlist_node list;		/* Handler queueing. */
 	struct hlist_node list;		/* Handler queueing. */
 	void (*handler)(struct airq_struct *);	/* Thin-interrupt handler */
 	void (*handler)(struct airq_struct *);	/* Thin-interrupt handler */
@@ -23,4 +25,69 @@ struct airq_struct {
 int register_adapter_interrupt(struct airq_struct *airq);
 int register_adapter_interrupt(struct airq_struct *airq);
 void unregister_adapter_interrupt(struct airq_struct *airq);
 void unregister_adapter_interrupt(struct airq_struct *airq);
 
 
+/* Adapter interrupt bit vector */
+struct airq_iv {
+	unsigned long *vector;	/* Adapter interrupt bit vector */
+	unsigned long *avail;	/* Allocation bit mask for the bit vector */
+	unsigned long *bitlock;	/* Lock bit mask for the bit vector */
+	unsigned long *ptr;	/* Pointer associated with each bit */
+	unsigned int *data;	/* 32 bit value associated with each bit */
+	unsigned long bits;	/* Number of bits in the vector */
+	unsigned long end;	/* Number of highest allocated bit + 1 */
+	spinlock_t lock;	/* Lock to protect alloc & free */
+};
+
+#define AIRQ_IV_ALLOC	1	/* Use an allocation bit mask */
+#define AIRQ_IV_BITLOCK	2	/* Allocate the lock bit mask */
+#define AIRQ_IV_PTR	4	/* Allocate the ptr array */
+#define AIRQ_IV_DATA	8	/* Allocate the data array */
+
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
+void airq_iv_release(struct airq_iv *iv);
+unsigned long airq_iv_alloc_bit(struct airq_iv *iv);
+void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit);
+unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
+			   unsigned long end);
+
+static inline unsigned long airq_iv_end(struct airq_iv *iv)
+{
+	return iv->end;
+}
+
+static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit)
+{
+	const unsigned long be_to_le = BITS_PER_LONG - 1;
+	bit_spin_lock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit)
+{
+	const unsigned long be_to_le = BITS_PER_LONG - 1;
+	bit_spin_unlock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit,
+				    unsigned int data)
+{
+	iv->data[bit] = data;
+}
+
+static inline unsigned int airq_iv_get_data(struct airq_iv *iv,
+					    unsigned long bit)
+{
+	return iv->data[bit];
+}
+
+static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit,
+				   unsigned long ptr)
+{
+	iv->ptr[bit] = ptr;
+}
+
+static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv,
+					    unsigned long bit)
+{
+	return iv->ptr[bit];
+}
+
 #endif /* _ASM_S390_AIRQ_H */
 #endif /* _ASM_S390_AIRQ_H */

+ 6 - 6
arch/s390/include/asm/bitops.h

@@ -216,7 +216,7 @@ static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr)
 	addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
 	addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
 	asm volatile(
 	asm volatile(
 		"	oc	%O0(1,%R0),%1"
 		"	oc	%O0(1,%R0),%1"
-		: "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" );
+		: "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc");
 }
 }
 
 
 static inline void 
 static inline void 
@@ -244,7 +244,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *ptr)
 	addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
 	addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
 	asm volatile(
 	asm volatile(
 		"	nc	%O0(1,%R0),%1"
 		"	nc	%O0(1,%R0),%1"
-		: "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" );
+		: "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc");
 }
 }
 
 
 static inline void 
 static inline void 
@@ -271,7 +271,7 @@ static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr)
 	addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
 	addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
 	asm volatile(
 	asm volatile(
 		"	xc	%O0(1,%R0),%1"
 		"	xc	%O0(1,%R0),%1"
-		: "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" );
+		: "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc");
 }
 }
 
 
 static inline void 
 static inline void 
@@ -301,7 +301,7 @@ test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr)
 	ch = *(unsigned char *) addr;
 	ch = *(unsigned char *) addr;
 	asm volatile(
 	asm volatile(
 		"	oc	%O0(1,%R0),%1"
 		"	oc	%O0(1,%R0),%1"
-		: "=Q" (*(char *) addr)	: "Q" (_oi_bitmap[nr & 7])
+		: "+Q" (*(char *) addr)	: "Q" (_oi_bitmap[nr & 7])
 		: "cc", "memory");
 		: "cc", "memory");
 	return (ch >> (nr & 7)) & 1;
 	return (ch >> (nr & 7)) & 1;
 }
 }
@@ -320,7 +320,7 @@ test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr)
 	ch = *(unsigned char *) addr;
 	ch = *(unsigned char *) addr;
 	asm volatile(
 	asm volatile(
 		"	nc	%O0(1,%R0),%1"
 		"	nc	%O0(1,%R0),%1"
-		: "=Q" (*(char *) addr)	: "Q" (_ni_bitmap[nr & 7])
+		: "+Q" (*(char *) addr)	: "Q" (_ni_bitmap[nr & 7])
 		: "cc", "memory");
 		: "cc", "memory");
 	return (ch >> (nr & 7)) & 1;
 	return (ch >> (nr & 7)) & 1;
 }
 }
@@ -339,7 +339,7 @@ test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr)
 	ch = *(unsigned char *) addr;
 	ch = *(unsigned char *) addr;
 	asm volatile(
 	asm volatile(
 		"	xc	%O0(1,%R0),%1"
 		"	xc	%O0(1,%R0),%1"
-		: "=Q" (*(char *) addr)	: "Q" (_oi_bitmap[nr & 7])
+		: "+Q" (*(char *) addr)	: "Q" (_oi_bitmap[nr & 7])
 		: "cc", "memory");
 		: "cc", "memory");
 	return (ch >> (nr & 7)) & 1;
 	return (ch >> (nr & 7)) & 1;
 }
 }

+ 1 - 0
arch/s390/include/asm/cio.h

@@ -296,6 +296,7 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
 	return 0;
 	return 0;
 }
 }
 
 
+void channel_subsystem_reinit(void);
 extern void css_schedule_reprobe(void);
 extern void css_schedule_reprobe(void);
 
 
 extern void reipl_ccw_dev(struct ccw_dev_id *id);
 extern void reipl_ccw_dev(struct ccw_dev_id *id);

+ 5 - 0
arch/s390/include/asm/hardirq.h

@@ -20,4 +20,9 @@
 
 
 #define HARDIRQ_BITS	8
 #define HARDIRQ_BITS	8
 
 
+static inline void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+
 #endif /* __ASM_HARDIRQ_H */
 #endif /* __ASM_HARDIRQ_H */

+ 33 - 102
arch/s390/include/asm/hugetlb.h

@@ -17,6 +17,9 @@
 
 
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t pte);
 		     pte_t *ptep, pte_t pte);
+pte_t huge_ptep_get(pte_t *ptep);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pte_t *ptep);
 
 
 /*
 /*
  * If the arch doesn't supply something else, assume that hugepage
  * If the arch doesn't supply something else, assume that hugepage
@@ -38,147 +41,75 @@ static inline int prepare_hugepage_range(struct file *file,
 int arch_prepare_hugepage(struct page *page);
 int arch_prepare_hugepage(struct page *page);
 void arch_release_hugepage(struct page *page);
 void arch_release_hugepage(struct page *page);
 
 
-static inline pte_t huge_pte_wrprotect(pte_t pte)
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+				  pte_t *ptep)
 {
 {
-	pte_val(pte) |= _PAGE_RO;
-	return pte;
+	pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
 }
 }
 
 
-static inline int huge_pte_none(pte_t pte)
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long address, pte_t *ptep)
 {
 {
-	return (pte_val(pte) & _SEGMENT_ENTRY_INV) &&
-		!(pte_val(pte) & _SEGMENT_ENTRY_RO);
+	huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
 }
 }
 
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
 {
 {
-	pte_t pte = *ptep;
-	unsigned long mask;
-
-	if (!MACHINE_HAS_HPAGE) {
-		ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN);
-		if (ptep) {
-			mask = pte_val(pte) &
-				(_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
-			pte = pte_mkhuge(*ptep);
-			pte_val(pte) |= mask;
-		}
+	int changed = !pte_same(huge_ptep_get(ptep), pte);
+	if (changed) {
+		huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+		set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
 	}
 	}
-	return pte;
+	return changed;
 }
 }
 
 
-static inline void __pmd_csp(pmd_t *pmdp)
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
 {
 {
-	register unsigned long reg2 asm("2") = pmd_val(*pmdp);
-	register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
-					       _SEGMENT_ENTRY_INV;
-	register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
-
-	asm volatile(
-		"	csp %1,%3"
-		: "=m" (*pmdp)
-		: "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+	pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
+	set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
 }
 }
 
 
-static inline void huge_ptep_invalidate(struct mm_struct *mm,
-					unsigned long address, pte_t *ptep)
-{
-	pmd_t *pmdp = (pmd_t *) ptep;
-
-	if (MACHINE_HAS_IDTE)
-		__pmd_idte(address, pmdp);
-	else
-		__pmd_csp(pmdp);
-	pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
-}
-
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-					    unsigned long addr, pte_t *ptep)
-{
-	pte_t pte = huge_ptep_get(ptep);
-
-	huge_ptep_invalidate(mm, addr, ptep);
-	return pte;
-}
-
-#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
-({									    \
-	int __changed = !pte_same(huge_ptep_get(__ptep), __entry);	    \
-	if (__changed) {						    \
-		huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep);	    \
-		set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry);   \
-	}								    \
-	__changed;							    \
-})
-
-#define huge_ptep_set_wrprotect(__mm, __addr, __ptep)			\
-({									\
-	pte_t __pte = huge_ptep_get(__ptep);				\
-	if (huge_pte_write(__pte)) {					\
-		huge_ptep_invalidate(__mm, __addr, __ptep);		\
-		set_huge_pte_at(__mm, __addr, __ptep,			\
-				huge_pte_wrprotect(__pte));		\
-	}								\
-})
-
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
-					 unsigned long address, pte_t *ptep)
+static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
 {
 {
-	huge_ptep_invalidate(vma->vm_mm, address, ptep);
+	return mk_pte(page, pgprot);
 }
 }
 
 
-static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
+static inline int huge_pte_none(pte_t pte)
 {
 {
-	pte_t pte;
-	pmd_t pmd;
-
-	pmd = mk_pmd_phys(page_to_phys(page), pgprot);
-	pte_val(pte) = pmd_val(pmd);
-	return pte;
+	return pte_none(pte);
 }
 }
 
 
 static inline int huge_pte_write(pte_t pte)
 static inline int huge_pte_write(pte_t pte)
 {
 {
-	pmd_t pmd;
-
-	pmd_val(pmd) = pte_val(pte);
-	return pmd_write(pmd);
+	return pte_write(pte);
 }
 }
 
 
 static inline int huge_pte_dirty(pte_t pte)
 static inline int huge_pte_dirty(pte_t pte)
 {
 {
-	/* No dirty bit in the segment table entry. */
-	return 0;
+	return pte_dirty(pte);
 }
 }
 
 
 static inline pte_t huge_pte_mkwrite(pte_t pte)
 static inline pte_t huge_pte_mkwrite(pte_t pte)
 {
 {
-	pmd_t pmd;
-
-	pmd_val(pmd) = pte_val(pte);
-	pte_val(pte) = pmd_val(pmd_mkwrite(pmd));
-	return pte;
+	return pte_mkwrite(pte);
 }
 }
 
 
 static inline pte_t huge_pte_mkdirty(pte_t pte)
 static inline pte_t huge_pte_mkdirty(pte_t pte)
 {
 {
-	/* No dirty bit in the segment table entry. */
-	return pte;
+	return pte_mkdirty(pte);
 }
 }
 
 
-static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
+static inline pte_t huge_pte_wrprotect(pte_t pte)
 {
 {
-	pmd_t pmd;
-
-	pmd_val(pmd) = pte_val(pte);
-	pte_val(pte) = pmd_val(pmd_modify(pmd, newprot));
-	return pte;
+	return pte_wrprotect(pte);
 }
 }
 
 
-static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
-				  pte_t *ptep)
+static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
 {
 {
-	pmd_clear((pmd_t *) ptep);
+	return pte_modify(pte, newprot);
 }
 }
 
 
 #endif /* _ASM_S390_HUGETLB_H */
 #endif /* _ASM_S390_HUGETLB_H */

+ 3 - 14
arch/s390/include/asm/hw_irq.h

@@ -4,19 +4,8 @@
 #include <linux/msi.h>
 #include <linux/msi.h>
 #include <linux/pci.h>
 #include <linux/pci.h>
 
 
-static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
-{
-	return __irq_get_msi_desc(irq);
-}
-
-/* Must be called with msi map lock held */
-static inline int irq_set_msi_desc(unsigned int irq, struct msi_desc *msi)
-{
-	if (!msi)
-		return -EINVAL;
-
-	msi->irq = irq;
-	return 0;
-}
+void __init init_airq_interrupts(void);
+void __init init_cio_interrupts(void);
+void __init init_ext_interrupts(void);
 
 
 #endif
 #endif

+ 20 - 15
arch/s390/include/asm/irq.h

@@ -1,17 +1,28 @@
 #ifndef _ASM_IRQ_H
 #ifndef _ASM_IRQ_H
 #define _ASM_IRQ_H
 #define _ASM_IRQ_H
 
 
+#define EXT_INTERRUPT	1
+#define IO_INTERRUPT	2
+#define THIN_INTERRUPT	3
+
+#define NR_IRQS_BASE	4
+
+#ifdef CONFIG_PCI_NR_MSI
+# define NR_IRQS	(NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
+#else
+# define NR_IRQS	NR_IRQS_BASE
+#endif
+
+/* This number is used when no interrupt has been assigned */
+#define NO_IRQ		0
+
+#ifndef __ASSEMBLY__
+
 #include <linux/hardirq.h>
 #include <linux/hardirq.h>
 #include <linux/percpu.h>
 #include <linux/percpu.h>
 #include <linux/cache.h>
 #include <linux/cache.h>
 #include <linux/types.h>
 #include <linux/types.h>
 
 
-enum interruption_main_class {
-	EXTERNAL_INTERRUPT,
-	IO_INTERRUPT,
-	NR_IRQS
-};
-
 enum interruption_class {
 enum interruption_class {
 	IRQEXT_CLK,
 	IRQEXT_CLK,
 	IRQEXT_EXC,
 	IRQEXT_EXC,
@@ -72,14 +83,8 @@ void service_subclass_irq_unregister(void);
 void measurement_alert_subclass_register(void);
 void measurement_alert_subclass_register(void);
 void measurement_alert_subclass_unregister(void);
 void measurement_alert_subclass_unregister(void);
 
 
-#ifdef CONFIG_LOCKDEP
-#  define disable_irq_nosync_lockdep(irq)	disable_irq_nosync(irq)
-#  define disable_irq_nosync_lockdep_irqsave(irq, flags) \
-						disable_irq_nosync(irq)
-#  define disable_irq_lockdep(irq)		disable_irq(irq)
-#  define enable_irq_lockdep(irq)		enable_irq(irq)
-#  define enable_irq_lockdep_irqrestore(irq, flags) \
-						enable_irq(irq)
-#endif
+#define irq_canonicalize(irq)  (irq)
+
+#endif /* __ASSEMBLY__ */
 
 
 #endif /* _ASM_IRQ_H */
 #endif /* _ASM_IRQ_H */

+ 1 - 2
arch/s390/include/asm/mmu_context.h

@@ -77,8 +77,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	WARN_ON(atomic_read(&prev->context.attach_count) < 0);
 	WARN_ON(atomic_read(&prev->context.attach_count) < 0);
 	atomic_inc(&next->context.attach_count);
 	atomic_inc(&next->context.attach_count);
 	/* Check for TLBs not flushed yet */
 	/* Check for TLBs not flushed yet */
-	if (next->context.flush_mm)
-		__tlb_flush_mm(next);
+	__tlb_flush_mm_lazy(next);
 }
 }
 
 
 #define enter_lazy_tlb(mm,tsk)	do { } while (0)
 #define enter_lazy_tlb(mm,tsk)	do { } while (0)

+ 0 - 19
arch/s390/include/asm/page.h

@@ -32,16 +32,6 @@
 
 
 void storage_key_init_range(unsigned long start, unsigned long end);
 void storage_key_init_range(unsigned long start, unsigned long end);
 
 
-static inline unsigned long pfmf(unsigned long function, unsigned long address)
-{
-	asm volatile(
-		"	.insn	rre,0xb9af0000,%[function],%[address]"
-		: [address] "+a" (address)
-		: [function] "d" (function)
-		: "memory");
-	return address;
-}
-
 static inline void clear_page(void *page)
 static inline void clear_page(void *page)
 {
 {
 	register unsigned long reg1 asm ("1") = 0;
 	register unsigned long reg1 asm ("1") = 0;
@@ -150,15 +140,6 @@ static inline int page_reset_referenced(unsigned long addr)
 #define _PAGE_FP_BIT		0x08	/* HW fetch protection bit	*/
 #define _PAGE_FP_BIT		0x08	/* HW fetch protection bit	*/
 #define _PAGE_ACC_BITS		0xf0	/* HW access control bits	*/
 #define _PAGE_ACC_BITS		0xf0	/* HW access control bits	*/
 
 
-/*
- * Test and clear referenced bit in storage key.
- */
-#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
-static inline int page_test_and_clear_young(unsigned long pfn)
-{
-	return page_reset_referenced(pfn << PAGE_SHIFT);
-}
-
 struct page;
 struct page;
 void arch_free_page(struct page *page, int order);
 void arch_free_page(struct page *page, int order);
 void arch_alloc_page(struct page *page, int order);
 void arch_alloc_page(struct page *page, int order);

+ 21 - 33
arch/s390/include/asm/pci.h

@@ -6,6 +6,7 @@
 /* must be set before including pci_clp.h */
 /* must be set before including pci_clp.h */
 #define PCI_BAR_COUNT	6
 #define PCI_BAR_COUNT	6
 
 
+#include <linux/pci.h>
 #include <asm-generic/pci.h>
 #include <asm-generic/pci.h>
 #include <asm-generic/pci-dma-compat.h>
 #include <asm-generic/pci-dma-compat.h>
 #include <asm/pci_clp.h>
 #include <asm/pci_clp.h>
@@ -53,14 +54,9 @@ struct zpci_fmb {
 	atomic64_t unmapped_pages;
 	atomic64_t unmapped_pages;
 } __packed __aligned(16);
 } __packed __aligned(16);
 
 
-struct msi_map {
-	unsigned long irq;
-	struct msi_desc *msi;
-	struct hlist_node msi_chain;
-};
-
-#define ZPCI_NR_MSI_VECS	64
-#define ZPCI_MSI_MASK		(ZPCI_NR_MSI_VECS - 1)
+#define ZPCI_MSI_VEC_BITS	11
+#define ZPCI_MSI_VEC_MAX	(1 << ZPCI_MSI_VEC_BITS)
+#define ZPCI_MSI_VEC_MASK	(ZPCI_MSI_VEC_MAX - 1)
 
 
 enum zpci_state {
 enum zpci_state {
 	ZPCI_FN_STATE_RESERVED,
 	ZPCI_FN_STATE_RESERVED,
@@ -91,8 +87,7 @@ struct zpci_dev {
 
 
 	/* IRQ stuff */
 	/* IRQ stuff */
 	u64		msi_addr;	/* MSI address */
 	u64		msi_addr;	/* MSI address */
-	struct zdev_irq_map *irq_map;
-	struct msi_map *msi_map[ZPCI_NR_MSI_VECS];
+	struct airq_iv *aibv;		/* adapter interrupt bit vector */
 	unsigned int	aisb;		/* number of the summary bit */
 	unsigned int	aisb;		/* number of the summary bit */
 
 
 	/* DMA stuff */
 	/* DMA stuff */
@@ -122,11 +117,6 @@ struct zpci_dev {
 	struct dentry	*debugfs_perf;
 	struct dentry	*debugfs_perf;
 };
 };
 
 
-struct pci_hp_callback_ops {
-	int (*create_slot)	(struct zpci_dev *zdev);
-	void (*remove_slot)	(struct zpci_dev *zdev);
-};
-
 static inline bool zdev_enabled(struct zpci_dev *zdev)
 static inline bool zdev_enabled(struct zpci_dev *zdev)
 {
 {
 	return (zdev->fh & (1UL << 31)) ? true : false;
 	return (zdev->fh & (1UL << 31)) ? true : false;
@@ -146,32 +136,38 @@ int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
 int zpci_unregister_ioat(struct zpci_dev *, u8);
 int zpci_unregister_ioat(struct zpci_dev *, u8);
 
 
 /* CLP */
 /* CLP */
-int clp_find_pci_devices(void);
+int clp_scan_pci_devices(void);
+int clp_rescan_pci_devices(void);
+int clp_rescan_pci_devices_simple(void);
 int clp_add_pci_device(u32, u32, int);
 int clp_add_pci_device(u32, u32, int);
 int clp_enable_fh(struct zpci_dev *, u8);
 int clp_enable_fh(struct zpci_dev *, u8);
 int clp_disable_fh(struct zpci_dev *);
 int clp_disable_fh(struct zpci_dev *);
 
 
-/* MSI */
-struct msi_desc *__irq_get_msi_desc(unsigned int);
-int zpci_msi_set_mask_bits(struct msi_desc *, u32, u32);
-int zpci_setup_msi_irq(struct zpci_dev *, struct msi_desc *, unsigned int, int);
-void zpci_teardown_msi_irq(struct zpci_dev *, struct msi_desc *);
-int zpci_msihash_init(void);
-void zpci_msihash_exit(void);
-
 #ifdef CONFIG_PCI
 #ifdef CONFIG_PCI
 /* Error handling and recovery */
 /* Error handling and recovery */
 void zpci_event_error(void *);
 void zpci_event_error(void *);
 void zpci_event_availability(void *);
 void zpci_event_availability(void *);
+void zpci_rescan(void);
 #else /* CONFIG_PCI */
 #else /* CONFIG_PCI */
 static inline void zpci_event_error(void *e) {}
 static inline void zpci_event_error(void *e) {}
 static inline void zpci_event_availability(void *e) {}
 static inline void zpci_event_availability(void *e) {}
+static inline void zpci_rescan(void) {}
 #endif /* CONFIG_PCI */
 #endif /* CONFIG_PCI */
 
 
+#ifdef CONFIG_HOTPLUG_PCI_S390
+int zpci_init_slot(struct zpci_dev *);
+void zpci_exit_slot(struct zpci_dev *);
+#else /* CONFIG_HOTPLUG_PCI_S390 */
+static inline int zpci_init_slot(struct zpci_dev *zdev)
+{
+	return 0;
+}
+static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
+#endif /* CONFIG_HOTPLUG_PCI_S390 */
+
 /* Helpers */
 /* Helpers */
 struct zpci_dev *get_zdev(struct pci_dev *);
 struct zpci_dev *get_zdev(struct pci_dev *);
 struct zpci_dev *get_zdev_by_fid(u32);
 struct zpci_dev *get_zdev_by_fid(u32);
-bool zpci_fid_present(u32);
 
 
 /* sysfs */
 /* sysfs */
 int zpci_sysfs_add_device(struct device *);
 int zpci_sysfs_add_device(struct device *);
@@ -181,14 +177,6 @@ void zpci_sysfs_remove_device(struct device *);
 int zpci_dma_init(void);
 int zpci_dma_init(void);
 void zpci_dma_exit(void);
 void zpci_dma_exit(void);
 
 
-/* Hotplug */
-extern struct mutex zpci_list_lock;
-extern struct list_head zpci_list;
-extern unsigned int s390_pci_probe;
-
-void zpci_register_hp_ops(struct pci_hp_callback_ops *);
-void zpci_deregister_hp_ops(void);
-
 /* FMB */
 /* FMB */
 int zpci_fmb_enable_device(struct zpci_dev *);
 int zpci_fmb_enable_device(struct zpci_dev *);
 int zpci_fmb_disable_device(struct zpci_dev *);
 int zpci_fmb_disable_device(struct zpci_dev *);

+ 6 - 6
arch/s390/include/asm/pci_insn.h

@@ -79,11 +79,11 @@ struct zpci_fib {
 } __packed;
 } __packed;
 
 
 
 
-int s390pci_mod_fc(u64 req, struct zpci_fib *fib);
-int s390pci_refresh_trans(u64 fn, u64 addr, u64 range);
-int s390pci_load(u64 *data, u64 req, u64 offset);
-int s390pci_store(u64 data, u64 req, u64 offset);
-int s390pci_store_block(const u64 *data, u64 req, u64 offset);
-void set_irq_ctrl(u16 ctl, char *unused, u8 isc);
+int zpci_mod_fc(u64 req, struct zpci_fib *fib);
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
+int zpci_load(u64 *data, u64 req, u64 offset);
+int zpci_store(u64 data, u64 req, u64 offset);
+int zpci_store_block(const u64 *data, u64 req, u64 offset);
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
 
 
 #endif
 #endif

+ 5 - 5
arch/s390/include/asm/pci_io.h

@@ -36,7 +36,7 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr)	\
 	u64 data;								\
 	u64 data;								\
 	int rc;									\
 	int rc;									\
 										\
 										\
-	rc = s390pci_load(&data, req, ZPCI_OFFSET(addr));			\
+	rc = zpci_load(&data, req, ZPCI_OFFSET(addr));				\
 	if (rc)									\
 	if (rc)									\
 		data = -1ULL;							\
 		data = -1ULL;							\
 	return (RETTYPE) data;							\
 	return (RETTYPE) data;							\
@@ -50,7 +50,7 @@ static inline void zpci_write_##VALTYPE(VALTYPE val,				\
 	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH);		\
 	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH);		\
 	u64 data = (VALTYPE) val;						\
 	u64 data = (VALTYPE) val;						\
 										\
 										\
-	s390pci_store(data, req, ZPCI_OFFSET(addr));				\
+	zpci_store(data, req, ZPCI_OFFSET(addr));				\
 }
 }
 
 
 zpci_read(8, u64)
 zpci_read(8, u64)
@@ -83,7 +83,7 @@ static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len
 		val = 0;		/* let FW report error */
 		val = 0;		/* let FW report error */
 		break;
 		break;
 	}
 	}
-	return s390pci_store(val, req, offset);
+	return zpci_store(val, req, offset);
 }
 }
 
 
 static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
 static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
@@ -91,7 +91,7 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
 	u64 data;
 	u64 data;
 	int cc;
 	int cc;
 
 
-	cc = s390pci_load(&data, req, offset);
+	cc = zpci_load(&data, req, offset);
 	if (cc)
 	if (cc)
 		goto out;
 		goto out;
 
 
@@ -115,7 +115,7 @@ out:
 
 
 static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
 static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
 {
 {
-	return s390pci_store_block(data, req, offset);
+	return zpci_store_block(data, req, offset);
 }
 }
 
 
 static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
 static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)

+ 341 - 296
arch/s390/include/asm/pgtable.h

@@ -217,63 +217,57 @@ extern unsigned long MODULES_END;
 
 
 /* Hardware bits in the page table entry */
 /* Hardware bits in the page table entry */
 #define _PAGE_CO	0x100		/* HW Change-bit override */
 #define _PAGE_CO	0x100		/* HW Change-bit override */
-#define _PAGE_RO	0x200		/* HW read-only bit  */
+#define _PAGE_PROTECT	0x200		/* HW read-only bit  */
 #define _PAGE_INVALID	0x400		/* HW invalid bit    */
 #define _PAGE_INVALID	0x400		/* HW invalid bit    */
+#define _PAGE_LARGE	0x800		/* Bit to mark a large pte */
 
 
 /* Software bits in the page table entry */
 /* Software bits in the page table entry */
-#define _PAGE_SWT	0x001		/* SW pte type bit t */
-#define _PAGE_SWX	0x002		/* SW pte type bit x */
-#define _PAGE_SWC	0x004		/* SW pte changed bit */
-#define _PAGE_SWR	0x008		/* SW pte referenced bit */
-#define _PAGE_SWW	0x010		/* SW pte write bit */
-#define _PAGE_SPECIAL	0x020		/* SW associated with special page */
+#define _PAGE_PRESENT	0x001		/* SW pte present bit */
+#define _PAGE_TYPE	0x002		/* SW pte type bit */
+#define _PAGE_YOUNG	0x004		/* SW pte young bit */
+#define _PAGE_DIRTY	0x008		/* SW pte dirty bit */
+#define _PAGE_READ	0x010		/* SW pte read bit */
+#define _PAGE_WRITE	0x020		/* SW pte write bit */
+#define _PAGE_SPECIAL	0x040		/* SW associated with special page */
 #define __HAVE_ARCH_PTE_SPECIAL
 #define __HAVE_ARCH_PTE_SPECIAL
 
 
 /* Set of bits not changed in pte_modify */
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK		(PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \
 #define _PAGE_CHG_MASK		(PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \
-				 _PAGE_SWC | _PAGE_SWR)
-
-/* Six different types of pages. */
-#define _PAGE_TYPE_EMPTY	0x400
-#define _PAGE_TYPE_NONE		0x401
-#define _PAGE_TYPE_SWAP		0x403
-#define _PAGE_TYPE_FILE		0x601	/* bit 0x002 is used for offset !! */
-#define _PAGE_TYPE_RO		0x200
-#define _PAGE_TYPE_RW		0x000
+				 _PAGE_DIRTY | _PAGE_YOUNG)
 
 
 /*
 /*
- * Only four types for huge pages, using the invalid bit and protection bit
- * of a segment table entry.
- */
-#define _HPAGE_TYPE_EMPTY	0x020	/* _SEGMENT_ENTRY_INV */
-#define _HPAGE_TYPE_NONE	0x220
-#define _HPAGE_TYPE_RO		0x200	/* _SEGMENT_ENTRY_RO  */
-#define _HPAGE_TYPE_RW		0x000
-
-/*
- * PTE type bits are rather complicated. handle_pte_fault uses pte_present,
- * pte_none and pte_file to find out the pte type WITHOUT holding the page
- * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to
- * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs
- * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards.
- * This change is done while holding the lock, but the intermediate step
- * of a previously valid pte with the hw invalid bit set can be observed by
- * handle_pte_fault. That makes it necessary that all valid pte types with
- * the hw invalid bit set must be distinguishable from the four pte types
- * empty, none, swap and file.
+ * handle_pte_fault uses pte_present, pte_none and pte_file to find out the
+ * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit
+ * is used to distinguish present from not-present ptes. It is changed only
+ * with the page table lock held.
+ *
+ * The following table gives the different possible bit combinations for
+ * the pte hardware and software bits in the last 12 bits of a pte:
  *
  *
- *			irxt  ipte  irxt
- * _PAGE_TYPE_EMPTY	1000   ->   1000
- * _PAGE_TYPE_NONE	1001   ->   1001
- * _PAGE_TYPE_SWAP	1011   ->   1011
- * _PAGE_TYPE_FILE	11?1   ->   11?1
- * _PAGE_TYPE_RO	0100   ->   1100
- * _PAGE_TYPE_RW	0000   ->   1000
+ *				842100000000
+ *				000084210000
+ *				000000008421
+ *				.IR...wrdytp
+ * empty			.10...000000
+ * swap				.10...xxxx10
+ * file				.11...xxxxx0
+ * prot-none, clean, old	.11...000001
+ * prot-none, clean, young	.11...000101
+ * prot-none, dirty, old	.10...001001
+ * prot-none, dirty, young	.10...001101
+ * read-only, clean, old	.11...010001
+ * read-only, clean, young	.01...010101
+ * read-only, dirty, old	.11...011001
+ * read-only, dirty, young	.01...011101
+ * read-write, clean, old	.11...110001
+ * read-write, clean, young	.01...110101
+ * read-write, dirty, old	.10...111001
+ * read-write, dirty, young	.00...111101
  *
  *
- * pte_none is true for bits combinations 1000, 1010, 1100, 1110
- * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001
- * pte_file is true for bits combinations 1101, 1111
- * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
+ * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
+ * pte_none    is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
+ * pte_file    is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600
+ * pte_swap    is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
  */
  */
 
 
 #ifndef CONFIG_64BIT
 #ifndef CONFIG_64BIT
@@ -286,14 +280,25 @@ extern unsigned long MODULES_END;
 #define _ASCE_TABLE_LENGTH	0x7f	/* 128 x 64 entries = 8k	    */
 #define _ASCE_TABLE_LENGTH	0x7f	/* 128 x 64 entries = 8k	    */
 
 
 /* Bits in the segment table entry */
 /* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS	0x7fffffffUL	/* Valid segment table bits */
 #define _SEGMENT_ENTRY_ORIGIN	0x7fffffc0UL	/* page table origin	    */
 #define _SEGMENT_ENTRY_ORIGIN	0x7fffffc0UL	/* page table origin	    */
-#define _SEGMENT_ENTRY_RO	0x200	/* page protection bit		    */
-#define _SEGMENT_ENTRY_INV	0x20	/* invalid segment table entry	    */
+#define _SEGMENT_ENTRY_PROTECT	0x200	/* page protection bit		    */
+#define _SEGMENT_ENTRY_INVALID	0x20	/* invalid segment table entry	    */
 #define _SEGMENT_ENTRY_COMMON	0x10	/* common segment bit		    */
 #define _SEGMENT_ENTRY_COMMON	0x10	/* common segment bit		    */
 #define _SEGMENT_ENTRY_PTL	0x0f	/* page table length		    */
 #define _SEGMENT_ENTRY_PTL	0x0f	/* page table length		    */
+#define _SEGMENT_ENTRY_NONE	_SEGMENT_ENTRY_PROTECT
 
 
 #define _SEGMENT_ENTRY		(_SEGMENT_ENTRY_PTL)
 #define _SEGMENT_ENTRY		(_SEGMENT_ENTRY_PTL)
-#define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INV)
+#define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INVALID)
+
+/*
+ * Segment table entry encoding (I = invalid, R = read-only bit):
+ *		..R...I.....
+ * prot-none	..1...1.....
+ * read-only	..1...0.....
+ * read-write	..0...0.....
+ * empty	..0...1.....
+ */
 
 
 /* Page status table bits for virtualization */
 /* Page status table bits for virtualization */
 #define PGSTE_ACC_BITS	0xf0000000UL
 #define PGSTE_ACC_BITS	0xf0000000UL
@@ -303,9 +308,7 @@ extern unsigned long MODULES_END;
 #define PGSTE_HC_BIT	0x00200000UL
 #define PGSTE_HC_BIT	0x00200000UL
 #define PGSTE_GR_BIT	0x00040000UL
 #define PGSTE_GR_BIT	0x00040000UL
 #define PGSTE_GC_BIT	0x00020000UL
 #define PGSTE_GC_BIT	0x00020000UL
-#define PGSTE_UR_BIT	0x00008000UL
-#define PGSTE_UC_BIT	0x00004000UL	/* user dirty (migration) */
-#define PGSTE_IN_BIT	0x00002000UL	/* IPTE notify bit */
+#define PGSTE_IN_BIT	0x00008000UL	/* IPTE notify bit */
 
 
 #else /* CONFIG_64BIT */
 #else /* CONFIG_64BIT */
 
 
@@ -324,8 +327,8 @@ extern unsigned long MODULES_END;
 
 
 /* Bits in the region table entry */
 /* Bits in the region table entry */
 #define _REGION_ENTRY_ORIGIN	~0xfffUL/* region/segment table origin	    */
 #define _REGION_ENTRY_ORIGIN	~0xfffUL/* region/segment table origin	    */
-#define _REGION_ENTRY_RO	0x200	/* region protection bit	    */
-#define _REGION_ENTRY_INV	0x20	/* invalid region table entry	    */
+#define _REGION_ENTRY_PROTECT	0x200	/* region protection bit	    */
+#define _REGION_ENTRY_INVALID	0x20	/* invalid region table entry	    */
 #define _REGION_ENTRY_TYPE_MASK	0x0c	/* region/segment table type mask   */
 #define _REGION_ENTRY_TYPE_MASK	0x0c	/* region/segment table type mask   */
 #define _REGION_ENTRY_TYPE_R1	0x0c	/* region first table type	    */
 #define _REGION_ENTRY_TYPE_R1	0x0c	/* region first table type	    */
 #define _REGION_ENTRY_TYPE_R2	0x08	/* region second table type	    */
 #define _REGION_ENTRY_TYPE_R2	0x08	/* region second table type	    */
@@ -333,29 +336,47 @@ extern unsigned long MODULES_END;
 #define _REGION_ENTRY_LENGTH	0x03	/* region third length		    */
 #define _REGION_ENTRY_LENGTH	0x03	/* region third length		    */
 
 
 #define _REGION1_ENTRY		(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
 #define _REGION1_ENTRY		(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
-#define _REGION1_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV)
+#define _REGION1_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
 #define _REGION2_ENTRY		(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
 #define _REGION2_ENTRY		(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
-#define _REGION2_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV)
+#define _REGION2_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
 #define _REGION3_ENTRY		(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
 #define _REGION3_ENTRY		(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
-#define _REGION3_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV)
+#define _REGION3_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
 
 
 #define _REGION3_ENTRY_LARGE	0x400	/* RTTE-format control, large page  */
 #define _REGION3_ENTRY_LARGE	0x400	/* RTTE-format control, large page  */
 #define _REGION3_ENTRY_RO	0x200	/* page protection bit		    */
 #define _REGION3_ENTRY_RO	0x200	/* page protection bit		    */
 #define _REGION3_ENTRY_CO	0x100	/* change-recording override	    */
 #define _REGION3_ENTRY_CO	0x100	/* change-recording override	    */
 
 
 /* Bits in the segment table entry */
 /* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL
 #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
 #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
 #define _SEGMENT_ENTRY_ORIGIN	~0x7ffUL/* segment table origin		    */
 #define _SEGMENT_ENTRY_ORIGIN	~0x7ffUL/* segment table origin		    */
-#define _SEGMENT_ENTRY_RO	0x200	/* page protection bit		    */
-#define _SEGMENT_ENTRY_INV	0x20	/* invalid segment table entry	    */
+#define _SEGMENT_ENTRY_PROTECT	0x200	/* page protection bit		    */
+#define _SEGMENT_ENTRY_INVALID	0x20	/* invalid segment table entry	    */
 
 
 #define _SEGMENT_ENTRY		(0)
 #define _SEGMENT_ENTRY		(0)
-#define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INV)
+#define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INVALID)
 
 
 #define _SEGMENT_ENTRY_LARGE	0x400	/* STE-format control, large page   */
 #define _SEGMENT_ENTRY_LARGE	0x400	/* STE-format control, large page   */
 #define _SEGMENT_ENTRY_CO	0x100	/* change-recording override   */
 #define _SEGMENT_ENTRY_CO	0x100	/* change-recording override   */
+#define _SEGMENT_ENTRY_SPLIT	0x001	/* THP splitting bit */
+#define _SEGMENT_ENTRY_YOUNG	0x002	/* SW segment young bit */
+#define _SEGMENT_ENTRY_NONE	_SEGMENT_ENTRY_YOUNG
+
+/*
+ * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
+ *			..R...I...y.
+ * prot-none, old	..0...1...1.
+ * prot-none, young	..1...1...1.
+ * read-only, old	..1...1...0.
+ * read-only, young	..1...0...1.
+ * read-write, old	..0...1...0.
+ * read-write, young	..0...0...1.
+ * The segment table origin is used to distinguish empty (origin==0) from
+ * read-write, old segment table entries (origin!=0)
+ */
+
 #define _SEGMENT_ENTRY_SPLIT_BIT 0	/* THP splitting bit number */
 #define _SEGMENT_ENTRY_SPLIT_BIT 0	/* THP splitting bit number */
-#define _SEGMENT_ENTRY_SPLIT	(1UL << _SEGMENT_ENTRY_SPLIT_BIT)
 
 
 /* Set of bits not changed in pmd_modify */
 /* Set of bits not changed in pmd_modify */
 #define _SEGMENT_CHG_MASK	(_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
 #define _SEGMENT_CHG_MASK	(_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
@@ -369,9 +390,7 @@ extern unsigned long MODULES_END;
 #define PGSTE_HC_BIT	0x0020000000000000UL
 #define PGSTE_HC_BIT	0x0020000000000000UL
 #define PGSTE_GR_BIT	0x0004000000000000UL
 #define PGSTE_GR_BIT	0x0004000000000000UL
 #define PGSTE_GC_BIT	0x0002000000000000UL
 #define PGSTE_GC_BIT	0x0002000000000000UL
-#define PGSTE_UR_BIT	0x0000800000000000UL
-#define PGSTE_UC_BIT	0x0000400000000000UL	/* user dirty (migration) */
-#define PGSTE_IN_BIT	0x0000200000000000UL	/* IPTE notify bit */
+#define PGSTE_IN_BIT	0x0000800000000000UL	/* IPTE notify bit */
 
 
 #endif /* CONFIG_64BIT */
 #endif /* CONFIG_64BIT */
 
 
@@ -386,14 +405,18 @@ extern unsigned long MODULES_END;
 /*
 /*
  * Page protection definitions.
  * Page protection definitions.
  */
  */
-#define PAGE_NONE	__pgprot(_PAGE_TYPE_NONE)
-#define PAGE_RO		__pgprot(_PAGE_TYPE_RO)
-#define PAGE_RW		__pgprot(_PAGE_TYPE_RO | _PAGE_SWW)
-#define PAGE_RWC	__pgprot(_PAGE_TYPE_RW | _PAGE_SWW | _PAGE_SWC)
-
-#define PAGE_KERNEL	PAGE_RWC
-#define PAGE_SHARED	PAGE_KERNEL
-#define PAGE_COPY	PAGE_RO
+#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_INVALID)
+#define PAGE_READ	__pgprot(_PAGE_PRESENT | _PAGE_READ | \
+				 _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_WRITE	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_INVALID | _PAGE_PROTECT)
+
+#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+				 _PAGE_PROTECT)
 
 
 /*
 /*
  * On s390 the page table entry has an invalid bit and a read-only bit.
  * On s390 the page table entry has an invalid bit and a read-only bit.
@@ -402,35 +425,31 @@ extern unsigned long MODULES_END;
  */
  */
          /*xwr*/
          /*xwr*/
 #define __P000	PAGE_NONE
 #define __P000	PAGE_NONE
-#define __P001	PAGE_RO
-#define __P010	PAGE_RO
-#define __P011	PAGE_RO
-#define __P100	PAGE_RO
-#define __P101	PAGE_RO
-#define __P110	PAGE_RO
-#define __P111	PAGE_RO
+#define __P001	PAGE_READ
+#define __P010	PAGE_READ
+#define __P011	PAGE_READ
+#define __P100	PAGE_READ
+#define __P101	PAGE_READ
+#define __P110	PAGE_READ
+#define __P111	PAGE_READ
 
 
 #define __S000	PAGE_NONE
 #define __S000	PAGE_NONE
-#define __S001	PAGE_RO
-#define __S010	PAGE_RW
-#define __S011	PAGE_RW
-#define __S100	PAGE_RO
-#define __S101	PAGE_RO
-#define __S110	PAGE_RW
-#define __S111	PAGE_RW
+#define __S001	PAGE_READ
+#define __S010	PAGE_WRITE
+#define __S011	PAGE_WRITE
+#define __S100	PAGE_READ
+#define __S101	PAGE_READ
+#define __S110	PAGE_WRITE
+#define __S111	PAGE_WRITE
 
 
 /*
 /*
  * Segment entry (large page) protection definitions.
  * Segment entry (large page) protection definitions.
  */
  */
-#define SEGMENT_NONE	__pgprot(_HPAGE_TYPE_NONE)
-#define SEGMENT_RO	__pgprot(_HPAGE_TYPE_RO)
-#define SEGMENT_RW	__pgprot(_HPAGE_TYPE_RW)
-
-static inline int mm_exclusive(struct mm_struct *mm)
-{
-	return likely(mm == current->active_mm &&
-		      atomic_read(&mm->context.attach_count) <= 1);
-}
+#define SEGMENT_NONE	__pgprot(_SEGMENT_ENTRY_INVALID | \
+				 _SEGMENT_ENTRY_NONE)
+#define SEGMENT_READ	__pgprot(_SEGMENT_ENTRY_INVALID | \
+				 _SEGMENT_ENTRY_PROTECT)
+#define SEGMENT_WRITE	__pgprot(_SEGMENT_ENTRY_INVALID)
 
 
 static inline int mm_has_pgste(struct mm_struct *mm)
 static inline int mm_has_pgste(struct mm_struct *mm)
 {
 {
@@ -467,7 +486,7 @@ static inline int pgd_none(pgd_t pgd)
 {
 {
 	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
 	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
 		return 0;
 		return 0;
-	return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL;
+	return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
 }
 }
 
 
 static inline int pgd_bad(pgd_t pgd)
 static inline int pgd_bad(pgd_t pgd)
@@ -478,7 +497,7 @@ static inline int pgd_bad(pgd_t pgd)
 	 * invalid for either table entry.
 	 * invalid for either table entry.
 	 */
 	 */
 	unsigned long mask =
 	unsigned long mask =
-		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV &
+		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
 		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
 		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
 	return (pgd_val(pgd) & mask) != 0;
 	return (pgd_val(pgd) & mask) != 0;
 }
 }
@@ -494,7 +513,7 @@ static inline int pud_none(pud_t pud)
 {
 {
 	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
 	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
 		return 0;
 		return 0;
-	return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL;
+	return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL;
 }
 }
 
 
 static inline int pud_large(pud_t pud)
 static inline int pud_large(pud_t pud)
@@ -512,7 +531,7 @@ static inline int pud_bad(pud_t pud)
 	 * invalid for either table entry.
 	 * invalid for either table entry.
 	 */
 	 */
 	unsigned long mask =
 	unsigned long mask =
-		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV &
+		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
 		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
 		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
 	return (pud_val(pud) & mask) != 0;
 	return (pud_val(pud) & mask) != 0;
 }
 }
@@ -521,30 +540,36 @@ static inline int pud_bad(pud_t pud)
 
 
 static inline int pmd_present(pmd_t pmd)
 static inline int pmd_present(pmd_t pmd)
 {
 {
-	unsigned long mask = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO;
-	return (pmd_val(pmd) & mask) == _HPAGE_TYPE_NONE ||
-	       !(pmd_val(pmd) & _SEGMENT_ENTRY_INV);
+	return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID;
 }
 }
 
 
 static inline int pmd_none(pmd_t pmd)
 static inline int pmd_none(pmd_t pmd)
 {
 {
-	return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) &&
-	       !(pmd_val(pmd) & _SEGMENT_ENTRY_RO);
+	return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
 }
 }
 
 
 static inline int pmd_large(pmd_t pmd)
 static inline int pmd_large(pmd_t pmd)
 {
 {
 #ifdef CONFIG_64BIT
 #ifdef CONFIG_64BIT
-	return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
 #else
 #else
 	return 0;
 	return 0;
 #endif
 #endif
 }
 }
 
 
+static inline int pmd_prot_none(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) &&
+		(pmd_val(pmd) & _SEGMENT_ENTRY_NONE);
+}
+
 static inline int pmd_bad(pmd_t pmd)
 static inline int pmd_bad(pmd_t pmd)
 {
 {
-	unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV;
-	return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY;
+#ifdef CONFIG_64BIT
+	if (pmd_large(pmd))
+		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
+#endif
+	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
 }
 }
 
 
 #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
 #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
@@ -563,31 +588,40 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
 #define __HAVE_ARCH_PMD_WRITE
 #define __HAVE_ARCH_PMD_WRITE
 static inline int pmd_write(pmd_t pmd)
 static inline int pmd_write(pmd_t pmd)
 {
 {
-	return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0;
+	if (pmd_prot_none(pmd))
+		return 0;
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0;
 }
 }
 
 
 static inline int pmd_young(pmd_t pmd)
 static inline int pmd_young(pmd_t pmd)
 {
 {
-	return 0;
+	int young = 0;
+#ifdef CONFIG_64BIT
+	if (pmd_prot_none(pmd))
+		young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0;
+	else
+		young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
+#endif
+	return young;
 }
 }
 
 
-static inline int pte_none(pte_t pte)
+static inline int pte_present(pte_t pte)
 {
 {
-	return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT);
+	/* Bit pattern: (pte & 0x001) == 0x001 */
+	return (pte_val(pte) & _PAGE_PRESENT) != 0;
 }
 }
 
 
-static inline int pte_present(pte_t pte)
+static inline int pte_none(pte_t pte)
 {
 {
-	unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX;
-	return (pte_val(pte) & mask) == _PAGE_TYPE_NONE ||
-		(!(pte_val(pte) & _PAGE_INVALID) &&
-		 !(pte_val(pte) & _PAGE_SWT));
+	/* Bit pattern: pte == 0x400 */
+	return pte_val(pte) == _PAGE_INVALID;
 }
 }
 
 
 static inline int pte_file(pte_t pte)
 static inline int pte_file(pte_t pte)
 {
 {
-	unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT;
-	return (pte_val(pte) & mask) == _PAGE_TYPE_FILE;
+	/* Bit pattern: (pte & 0x601) == 0x600 */
+	return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT))
+		== (_PAGE_INVALID | _PAGE_PROTECT);
 }
 }
 
 
 static inline int pte_special(pte_t pte)
 static inline int pte_special(pte_t pte)
@@ -634,6 +668,15 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
 #endif
 #endif
 }
 }
 
 
+static inline pgste_t pgste_get(pte_t *ptep)
+{
+	unsigned long pgste = 0;
+#ifdef CONFIG_PGSTE
+	pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
+#endif
+	return __pgste(pgste);
+}
+
 static inline void pgste_set(pte_t *ptep, pgste_t pgste)
 static inline void pgste_set(pte_t *ptep, pgste_t pgste)
 {
 {
 #ifdef CONFIG_PGSTE
 #ifdef CONFIG_PGSTE
@@ -644,33 +687,28 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
 static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
 static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
 {
 {
 #ifdef CONFIG_PGSTE
 #ifdef CONFIG_PGSTE
-	unsigned long address, bits;
-	unsigned char skey;
+	unsigned long address, bits, skey;
 
 
 	if (pte_val(*ptep) & _PAGE_INVALID)
 	if (pte_val(*ptep) & _PAGE_INVALID)
 		return pgste;
 		return pgste;
 	address = pte_val(*ptep) & PAGE_MASK;
 	address = pte_val(*ptep) & PAGE_MASK;
-	skey = page_get_storage_key(address);
+	skey = (unsigned long) page_get_storage_key(address);
 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
-	/* Clear page changed & referenced bit in the storage key */
-	if (bits & _PAGE_CHANGED)
+	if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
+		/* Transfer dirty + referenced bit to host bits in pgste */
+		pgste_val(pgste) |= bits << 52;
 		page_set_storage_key(address, skey ^ bits, 0);
 		page_set_storage_key(address, skey ^ bits, 0);
-	else if (bits)
+	} else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
+		   (bits & _PAGE_REFERENCED)) {
+		/* Transfer referenced bit to host bit in pgste */
+		pgste_val(pgste) |= PGSTE_HR_BIT;
 		page_reset_referenced(address);
 		page_reset_referenced(address);
+	}
 	/* Transfer page changed & referenced bit to guest bits in pgste */
 	/* Transfer page changed & referenced bit to guest bits in pgste */
 	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
 	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
-	/* Get host changed & referenced bits from pgste */
-	bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52;
-	/* Transfer page changed & referenced bit to kvm user bits */
-	pgste_val(pgste) |= bits << 45;		/* PGSTE_UR_BIT & PGSTE_UC_BIT */
-	/* Clear relevant host bits in pgste. */
-	pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT);
-	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
 	/* Copy page access key and fetch protection bit to pgste */
 	/* Copy page access key and fetch protection bit to pgste */
-	pgste_val(pgste) |=
-		(unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
-	/* Transfer referenced bit to pte */
-	pte_val(*ptep) |= (bits & _PAGE_REFERENCED) << 1;
+	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
 #endif
 #endif
 	return pgste;
 	return pgste;
 
 
@@ -679,24 +717,11 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
 static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
 static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
 {
 {
 #ifdef CONFIG_PGSTE
 #ifdef CONFIG_PGSTE
-	int young;
-
 	if (pte_val(*ptep) & _PAGE_INVALID)
 	if (pte_val(*ptep) & _PAGE_INVALID)
 		return pgste;
 		return pgste;
 	/* Get referenced bit from storage key */
 	/* Get referenced bit from storage key */
-	young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
-	if (young)
-		pgste_val(pgste) |= PGSTE_GR_BIT;
-	/* Get host referenced bit from pgste */
-	if (pgste_val(pgste) & PGSTE_HR_BIT) {
-		pgste_val(pgste) &= ~PGSTE_HR_BIT;
-		young = 1;
-	}
-	/* Transfer referenced bit to kvm user bits and pte */
-	if (young) {
-		pgste_val(pgste) |= PGSTE_UR_BIT;
-		pte_val(*ptep) |= _PAGE_SWR;
-	}
+	if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
+		pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
 #endif
 #endif
 	return pgste;
 	return pgste;
 }
 }
@@ -723,13 +748,13 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
 
 
 static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
 static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
 {
 {
-	if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_SWW)) {
+	if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_WRITE)) {
 		/*
 		/*
 		 * Without enhanced suppression-on-protection force
 		 * Without enhanced suppression-on-protection force
 		 * the dirty bit on for all writable ptes.
 		 * the dirty bit on for all writable ptes.
 		 */
 		 */
-		pte_val(entry) |= _PAGE_SWC;
-		pte_val(entry) &= ~_PAGE_RO;
+		pte_val(entry) |= _PAGE_DIRTY;
+		pte_val(entry) &= ~_PAGE_PROTECT;
 	}
 	}
 	*ptep = entry;
 	*ptep = entry;
 }
 }
@@ -841,21 +866,17 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
  */
  */
 static inline int pte_write(pte_t pte)
 static inline int pte_write(pte_t pte)
 {
 {
-	return (pte_val(pte) & _PAGE_SWW) != 0;
+	return (pte_val(pte) & _PAGE_WRITE) != 0;
 }
 }
 
 
 static inline int pte_dirty(pte_t pte)
 static inline int pte_dirty(pte_t pte)
 {
 {
-	return (pte_val(pte) & _PAGE_SWC) != 0;
+	return (pte_val(pte) & _PAGE_DIRTY) != 0;
 }
 }
 
 
 static inline int pte_young(pte_t pte)
 static inline int pte_young(pte_t pte)
 {
 {
-#ifdef CONFIG_PGSTE
-	if (pte_val(pte) & _PAGE_SWR)
-		return 1;
-#endif
-	return 0;
+	return (pte_val(pte) & _PAGE_YOUNG) != 0;
 }
 }
 
 
 /*
 /*
@@ -880,12 +901,12 @@ static inline void pud_clear(pud_t *pud)
 
 
 static inline void pmd_clear(pmd_t *pmdp)
 static inline void pmd_clear(pmd_t *pmdp)
 {
 {
-	pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+	pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID;
 }
 }
 
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 {
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	pte_val(*ptep) = _PAGE_INVALID;
 }
 }
 
 
 /*
 /*
@@ -896,55 +917,63 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 {
 	pte_val(pte) &= _PAGE_CHG_MASK;
 	pte_val(pte) &= _PAGE_CHG_MASK;
 	pte_val(pte) |= pgprot_val(newprot);
 	pte_val(pte) |= pgprot_val(newprot);
-	if ((pte_val(pte) & _PAGE_SWC) && (pte_val(pte) & _PAGE_SWW))
-		pte_val(pte) &= ~_PAGE_RO;
+	/*
+	 * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the
+	 * invalid bit set, clear it again for readable, young pages
+	 */
+	if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
+		pte_val(pte) &= ~_PAGE_INVALID;
+	/*
+	 * newprot for PAGE_READ and PAGE_WRITE has the page protection
+	 * bit set, clear it again for writable, dirty pages
+	 */
+	if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
+		pte_val(pte) &= ~_PAGE_PROTECT;
 	return pte;
 	return pte;
 }
 }
 
 
 static inline pte_t pte_wrprotect(pte_t pte)
 static inline pte_t pte_wrprotect(pte_t pte)
 {
 {
-	pte_val(pte) &= ~_PAGE_SWW;
-	/* Do not clobber _PAGE_TYPE_NONE pages!  */
-	if (!(pte_val(pte) & _PAGE_INVALID))
-		pte_val(pte) |= _PAGE_RO;
+	pte_val(pte) &= ~_PAGE_WRITE;
+	pte_val(pte) |= _PAGE_PROTECT;
 	return pte;
 	return pte;
 }
 }
 
 
 static inline pte_t pte_mkwrite(pte_t pte)
 static inline pte_t pte_mkwrite(pte_t pte)
 {
 {
-	pte_val(pte) |= _PAGE_SWW;
-	if (pte_val(pte) & _PAGE_SWC)
-		pte_val(pte) &= ~_PAGE_RO;
+	pte_val(pte) |= _PAGE_WRITE;
+	if (pte_val(pte) & _PAGE_DIRTY)
+		pte_val(pte) &= ~_PAGE_PROTECT;
 	return pte;
 	return pte;
 }
 }
 
 
 static inline pte_t pte_mkclean(pte_t pte)
 static inline pte_t pte_mkclean(pte_t pte)
 {
 {
-	pte_val(pte) &= ~_PAGE_SWC;
-	/* Do not clobber _PAGE_TYPE_NONE pages!  */
-	if (!(pte_val(pte) & _PAGE_INVALID))
-		pte_val(pte) |= _PAGE_RO;
+	pte_val(pte) &= ~_PAGE_DIRTY;
+	pte_val(pte) |= _PAGE_PROTECT;
 	return pte;
 	return pte;
 }
 }
 
 
 static inline pte_t pte_mkdirty(pte_t pte)
 static inline pte_t pte_mkdirty(pte_t pte)
 {
 {
-	pte_val(pte) |= _PAGE_SWC;
-	if (pte_val(pte) & _PAGE_SWW)
-		pte_val(pte) &= ~_PAGE_RO;
+	pte_val(pte) |= _PAGE_DIRTY;
+	if (pte_val(pte) & _PAGE_WRITE)
+		pte_val(pte) &= ~_PAGE_PROTECT;
 	return pte;
 	return pte;
 }
 }
 
 
 static inline pte_t pte_mkold(pte_t pte)
 static inline pte_t pte_mkold(pte_t pte)
 {
 {
-#ifdef CONFIG_PGSTE
-	pte_val(pte) &= ~_PAGE_SWR;
-#endif
+	pte_val(pte) &= ~_PAGE_YOUNG;
+	pte_val(pte) |= _PAGE_INVALID;
 	return pte;
 	return pte;
 }
 }
 
 
 static inline pte_t pte_mkyoung(pte_t pte)
 static inline pte_t pte_mkyoung(pte_t pte)
 {
 {
+	pte_val(pte) |= _PAGE_YOUNG;
+	if (pte_val(pte) & _PAGE_READ)
+		pte_val(pte) &= ~_PAGE_INVALID;
 	return pte;
 	return pte;
 }
 }
 
 
@@ -957,7 +986,7 @@ static inline pte_t pte_mkspecial(pte_t pte)
 #ifdef CONFIG_HUGETLB_PAGE
 #ifdef CONFIG_HUGETLB_PAGE
 static inline pte_t pte_mkhuge(pte_t pte)
 static inline pte_t pte_mkhuge(pte_t pte)
 {
 {
-	pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO);
+	pte_val(pte) |= _PAGE_LARGE;
 	return pte;
 	return pte;
 }
 }
 #endif
 #endif
@@ -974,8 +1003,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
 	if (mm_has_pgste(mm)) {
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_update_all(ptep, pgste);
 		pgste = pgste_update_all(ptep, pgste);
-		dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
-		pgste_val(pgste) &= ~PGSTE_UC_BIT;
+		dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
+		pgste_val(pgste) &= ~PGSTE_HC_BIT;
 		pgste_set_unlock(ptep, pgste);
 		pgste_set_unlock(ptep, pgste);
 		return dirty;
 		return dirty;
 	}
 	}
@@ -994,59 +1023,75 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
 	if (mm_has_pgste(mm)) {
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_update_young(ptep, pgste);
 		pgste = pgste_update_young(ptep, pgste);
-		young = !!(pgste_val(pgste) & PGSTE_UR_BIT);
-		pgste_val(pgste) &= ~PGSTE_UR_BIT;
+		young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
+		pgste_val(pgste) &= ~PGSTE_HR_BIT;
 		pgste_set_unlock(ptep, pgste);
 		pgste_set_unlock(ptep, pgste);
 	}
 	}
 	return young;
 	return young;
 }
 }
 
 
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
+{
+	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+#ifndef CONFIG_64BIT
+		/* pto must point to the start of the segment table */
+		pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
+#else
+		/* ipte in zarch mode can do the math */
+		pte_t *pto = ptep;
+#endif
+		asm volatile(
+			"	ipte	%2,%3"
+			: "=m" (*ptep) : "m" (*ptep),
+			  "a" (pto), "a" (address));
+	}
+}
+
+static inline void ptep_flush_lazy(struct mm_struct *mm,
+				   unsigned long address, pte_t *ptep)
+{
+	int active = (mm == current->active_mm) ? 1 : 0;
+
+	if (atomic_read(&mm->context.attach_count) > active)
+		__ptep_ipte(address, ptep);
+	else
+		mm->context.flush_mm = 1;
+}
+
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long addr, pte_t *ptep)
 					    unsigned long addr, pte_t *ptep)
 {
 {
 	pgste_t pgste;
 	pgste_t pgste;
 	pte_t pte;
 	pte_t pte;
+	int young;
 
 
 	if (mm_has_pgste(vma->vm_mm)) {
 	if (mm_has_pgste(vma->vm_mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_get_lock(ptep);
-		pgste = pgste_update_young(ptep, pgste);
-		pte = *ptep;
-		*ptep = pte_mkold(pte);
-		pgste_set_unlock(ptep, pgste);
-		return pte_young(pte);
+		pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
 	}
 	}
-	return 0;
+
+	pte = *ptep;
+	__ptep_ipte(addr, ptep);
+	young = pte_young(pte);
+	pte = pte_mkold(pte);
+
+	if (mm_has_pgste(vma->vm_mm)) {
+		pgste_set_pte(ptep, pte);
+		pgste_set_unlock(ptep, pgste);
+	} else
+		*ptep = pte;
+
+	return young;
 }
 }
 
 
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 					 unsigned long address, pte_t *ptep)
 					 unsigned long address, pte_t *ptep)
 {
 {
-	/* No need to flush TLB
-	 * On s390 reference bits are in storage key and never in TLB
-	 * With virtualization we handle the reference bit, without we
-	 * we can simply return */
 	return ptep_test_and_clear_young(vma, address, ptep);
 	return ptep_test_and_clear_young(vma, address, ptep);
 }
 }
 
 
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
-{
-	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
-#ifndef CONFIG_64BIT
-		/* pto must point to the start of the segment table */
-		pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
-#else
-		/* ipte in zarch mode can do the math */
-		pte_t *pto = ptep;
-#endif
-		asm volatile(
-			"	ipte	%2,%3"
-			: "=m" (*ptep) : "m" (*ptep),
-			  "a" (pto), "a" (address));
-	}
-}
-
 /*
 /*
  * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
  * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
  * both clear the TLB for the unmapped pte. The reason is that
  * both clear the TLB for the unmapped pte. The reason is that
@@ -1067,16 +1112,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 	pgste_t pgste;
 	pgste_t pgste;
 	pte_t pte;
 	pte_t pte;
 
 
-	mm->context.flush_mm = 1;
 	if (mm_has_pgste(mm)) {
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 	}
 	}
 
 
 	pte = *ptep;
 	pte = *ptep;
-	if (!mm_exclusive(mm))
-		__ptep_ipte(address, ptep);
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	ptep_flush_lazy(mm, address, ptep);
+	pte_val(*ptep) = _PAGE_INVALID;
 
 
 	if (mm_has_pgste(mm)) {
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_update_all(&pte, pgste);
 		pgste = pgste_update_all(&pte, pgste);
@@ -1093,15 +1136,14 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
 	pgste_t pgste;
 	pgste_t pgste;
 	pte_t pte;
 	pte_t pte;
 
 
-	mm->context.flush_mm = 1;
 	if (mm_has_pgste(mm)) {
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_get_lock(ptep);
 		pgste_ipte_notify(mm, address, ptep, pgste);
 		pgste_ipte_notify(mm, address, ptep, pgste);
 	}
 	}
 
 
 	pte = *ptep;
 	pte = *ptep;
-	if (!mm_exclusive(mm))
-		__ptep_ipte(address, ptep);
+	ptep_flush_lazy(mm, address, ptep);
+	pte_val(*ptep) |= _PAGE_INVALID;
 
 
 	if (mm_has_pgste(mm)) {
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_update_all(&pte, pgste);
 		pgste = pgste_update_all(&pte, pgste);
@@ -1117,7 +1159,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 	pgste_t pgste;
 	pgste_t pgste;
 
 
 	if (mm_has_pgste(mm)) {
 	if (mm_has_pgste(mm)) {
-		pgste = *(pgste_t *)(ptep + PTRS_PER_PTE);
+		pgste = pgste_get(ptep);
 		pgste_set_key(ptep, pgste, pte);
 		pgste_set_key(ptep, pgste, pte);
 		pgste_set_pte(ptep, pte);
 		pgste_set_pte(ptep, pte);
 		pgste_set_unlock(ptep, pgste);
 		pgste_set_unlock(ptep, pgste);
@@ -1139,7 +1181,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
 
 
 	pte = *ptep;
 	pte = *ptep;
 	__ptep_ipte(address, ptep);
 	__ptep_ipte(address, ptep);
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	pte_val(*ptep) = _PAGE_INVALID;
 
 
 	if (mm_has_pgste(vma->vm_mm)) {
 	if (mm_has_pgste(vma->vm_mm)) {
 		pgste = pgste_update_all(&pte, pgste);
 		pgste = pgste_update_all(&pte, pgste);
@@ -1163,18 +1205,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 	pgste_t pgste;
 	pgste_t pgste;
 	pte_t pte;
 	pte_t pte;
 
 
-	if (mm_has_pgste(mm)) {
+	if (!full && mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_get_lock(ptep);
-		if (!full)
-			pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 	}
 	}
 
 
 	pte = *ptep;
 	pte = *ptep;
 	if (!full)
 	if (!full)
-		__ptep_ipte(address, ptep);
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+		ptep_flush_lazy(mm, address, ptep);
+	pte_val(*ptep) = _PAGE_INVALID;
 
 
-	if (mm_has_pgste(mm)) {
+	if (!full && mm_has_pgste(mm)) {
 		pgste = pgste_update_all(&pte, pgste);
 		pgste = pgste_update_all(&pte, pgste);
 		pgste_set_unlock(ptep, pgste);
 		pgste_set_unlock(ptep, pgste);
 	}
 	}
@@ -1189,14 +1230,12 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
 	pte_t pte = *ptep;
 	pte_t pte = *ptep;
 
 
 	if (pte_write(pte)) {
 	if (pte_write(pte)) {
-		mm->context.flush_mm = 1;
 		if (mm_has_pgste(mm)) {
 		if (mm_has_pgste(mm)) {
 			pgste = pgste_get_lock(ptep);
 			pgste = pgste_get_lock(ptep);
 			pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 			pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 		}
 		}
 
 
-		if (!mm_exclusive(mm))
-			__ptep_ipte(address, ptep);
+		ptep_flush_lazy(mm, address, ptep);
 		pte = pte_wrprotect(pte);
 		pte = pte_wrprotect(pte);
 
 
 		if (mm_has_pgste(mm)) {
 		if (mm_has_pgste(mm)) {
@@ -1240,7 +1279,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 {
 {
 	pte_t __pte;
 	pte_t __pte;
 	pte_val(__pte) = physpage + pgprot_val(pgprot);
 	pte_val(__pte) = physpage + pgprot_val(pgprot);
-	return __pte;
+	return pte_mkyoung(__pte);
 }
 }
 
 
 static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
 static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
@@ -1248,10 +1287,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
 	unsigned long physpage = page_to_phys(page);
 	unsigned long physpage = page_to_phys(page);
 	pte_t __pte = mk_pte_phys(physpage, pgprot);
 	pte_t __pte = mk_pte_phys(physpage, pgprot);
 
 
-	if ((pte_val(__pte) & _PAGE_SWW) && PageDirty(page)) {
-		pte_val(__pte) |= _PAGE_SWC;
-		pte_val(__pte) &= ~_PAGE_RO;
-	}
+	if (pte_write(__pte) && PageDirty(page))
+		__pte = pte_mkdirty(__pte);
 	return __pte;
 	return __pte;
 }
 }
 
 
@@ -1313,7 +1350,7 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
 	unsigned long sto = (unsigned long) pmdp -
 	unsigned long sto = (unsigned long) pmdp -
 			    pmd_index(address) * sizeof(pmd_t);
 			    pmd_index(address) * sizeof(pmd_t);
 
 
-	if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
+	if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) {
 		asm volatile(
 		asm volatile(
 			"	.insn	rrf,0xb98e0000,%2,%3,0,0"
 			"	.insn	rrf,0xb98e0000,%2,%3,0,0"
 			: "=m" (*pmdp)
 			: "=m" (*pmdp)
@@ -1324,24 +1361,68 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
 	}
 	}
 }
 }
 
 
+static inline void __pmd_csp(pmd_t *pmdp)
+{
+	register unsigned long reg2 asm("2") = pmd_val(*pmdp);
+	register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
+					       _SEGMENT_ENTRY_INVALID;
+	register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
+
+	asm volatile(
+		"	csp %1,%3"
+		: "=m" (*pmdp)
+		: "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+}
+
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
 static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
 static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
 {
 {
 	/*
 	/*
-	 * pgprot is PAGE_NONE, PAGE_RO, or PAGE_RW (see __Pxxx / __Sxxx)
+	 * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
 	 * Convert to segment table entry format.
 	 * Convert to segment table entry format.
 	 */
 	 */
 	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
 	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
 		return pgprot_val(SEGMENT_NONE);
 		return pgprot_val(SEGMENT_NONE);
-	if (pgprot_val(pgprot) == pgprot_val(PAGE_RO))
-		return pgprot_val(SEGMENT_RO);
-	return pgprot_val(SEGMENT_RW);
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
+		return pgprot_val(SEGMENT_READ);
+	return pgprot_val(SEGMENT_WRITE);
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+	if (pmd_prot_none(pmd)) {
+		pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+	} else {
+		pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
+	}
+#endif
+	return pmd;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+	if (pmd_prot_none(pmd)) {
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+	} else {
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+		pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+	}
+#endif
+	return pmd;
 }
 }
 
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
 {
+	int young;
+
+	young = pmd_young(pmd);
 	pmd_val(pmd) &= _SEGMENT_CHG_MASK;
 	pmd_val(pmd) &= _SEGMENT_CHG_MASK;
 	pmd_val(pmd) |= massage_pgprot_pmd(newprot);
 	pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+	if (young)
+		pmd = pmd_mkyoung(pmd);
 	return pmd;
 	return pmd;
 }
 }
 
 
@@ -1349,14 +1430,14 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
 {
 {
 	pmd_t __pmd;
 	pmd_t __pmd;
 	pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
 	pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
-	return __pmd;
+	return pmd_mkyoung(__pmd);
 }
 }
 
 
 static inline pmd_t pmd_mkwrite(pmd_t pmd)
 static inline pmd_t pmd_mkwrite(pmd_t pmd)
 {
 {
-	/* Do not clobber _HPAGE_TYPE_NONE pages! */
-	if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INV))
-		pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
+	/* Do not clobber PROT_NONE segments! */
+	if (!pmd_prot_none(pmd))
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
 	return pmd;
 	return pmd;
 }
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
@@ -1378,7 +1459,7 @@ static inline int pmd_trans_splitting(pmd_t pmd)
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			      pmd_t *pmdp, pmd_t entry)
 			      pmd_t *pmdp, pmd_t entry)
 {
 {
-	if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1)
+	if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1)
 		pmd_val(entry) |= _SEGMENT_ENTRY_CO;
 		pmd_val(entry) |= _SEGMENT_ENTRY_CO;
 	*pmdp = entry;
 	*pmdp = entry;
 }
 }
@@ -1391,7 +1472,9 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
 
 
 static inline pmd_t pmd_wrprotect(pmd_t pmd)
 static inline pmd_t pmd_wrprotect(pmd_t pmd)
 {
 {
-	pmd_val(pmd) |= _SEGMENT_ENTRY_RO;
+	/* Do not clobber PROT_NONE segments! */
+	if (!pmd_prot_none(pmd))
+		pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
 	return pmd;
 	return pmd;
 }
 }
 
 
@@ -1401,50 +1484,16 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
 	return pmd;
 	return pmd;
 }
 }
 
 
-static inline pmd_t pmd_mkold(pmd_t pmd)
-{
-	/* No referenced bit in the segment table entry. */
-	return pmd;
-}
-
-static inline pmd_t pmd_mkyoung(pmd_t pmd)
-{
-	/* No referenced bit in the segment table entry. */
-	return pmd;
-}
-
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long address, pmd_t *pmdp)
 					    unsigned long address, pmd_t *pmdp)
 {
 {
-	unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK;
-	long tmp, rc;
-	int counter;
+	pmd_t pmd;
 
 
-	rc = 0;
-	if (MACHINE_HAS_RRBM) {
-		counter = PTRS_PER_PTE >> 6;
-		asm volatile(
-			"0:	.insn	rre,0xb9ae0000,%0,%3\n"	/* rrbm */
-			"	ogr	%1,%0\n"
-			"	la	%3,0(%4,%3)\n"
-			"	brct	%2,0b\n"
-			: "=&d" (tmp), "+&d" (rc), "+d" (counter),
-			  "+a" (pmd_addr)
-			: "a" (64 * 4096UL) : "cc");
-		rc = !!rc;
-	} else {
-		counter = PTRS_PER_PTE;
-		asm volatile(
-			"0:	rrbe	0,%2\n"
-			"	la	%2,0(%3,%2)\n"
-			"	brc	12,1f\n"
-			"	lhi	%0,1\n"
-			"1:	brct	%1,0b\n"
-			: "+d" (rc), "+d" (counter), "+a" (pmd_addr)
-			: "a" (4096UL) : "cc");
-	}
-	return rc;
+	pmd = *pmdp;
+	__pmd_idte(address, pmdp);
+	*pmdp = pmd_mkold(pmd);
+	return pmd_young(pmd);
 }
 }
 
 
 #define __HAVE_ARCH_PMDP_GET_AND_CLEAR
 #define __HAVE_ARCH_PMDP_GET_AND_CLEAR
@@ -1510,10 +1559,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
  * exception will occur instead of a page translation exception. The
  * exception will occur instead of a page translation exception. The
  * specifiation exception has the bad habit not to store necessary
  * specifiation exception has the bad habit not to store necessary
  * information in the lowcore.
  * information in the lowcore.
- * Bit 21 and bit 22 are the page invalid bit and the page protection
- * bit. We set both to indicate a swapped page.
- * Bit 30 and 31 are used to distinguish the different page types. For
- * a swapped page these bits need to be zero.
+ * Bits 21, 22, 30 and 31 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
  * This leaves the bits 1-19 and bits 24-29 to store type and offset.
  * This leaves the bits 1-19 and bits 24-29 to store type and offset.
  * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
  * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
  * plus 24 for the offset.
  * plus 24 for the offset.
@@ -1527,10 +1574,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
  * exception will occur instead of a page translation exception. The
  * exception will occur instead of a page translation exception. The
  * specifiation exception has the bad habit not to store necessary
  * specifiation exception has the bad habit not to store necessary
  * information in the lowcore.
  * information in the lowcore.
- * Bit 53 and bit 54 are the page invalid bit and the page protection
- * bit. We set both to indicate a swapped page.
- * Bit 62 and 63 are used to distinguish the different page types. For
- * a swapped page these bits need to be zero.
+ * Bits 53, 54, 62 and 63 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
  * This leaves the bits 0-51 and bits 56-61 to store type and offset.
  * This leaves the bits 0-51 and bits 56-61 to store type and offset.
  * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
  * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
  * plus 56 for the offset.
  * plus 56 for the offset.
@@ -1547,7 +1592,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 {
 {
 	pte_t pte;
 	pte_t pte;
 	offset &= __SWP_OFFSET_MASK;
 	offset &= __SWP_OFFSET_MASK;
-	pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) |
+	pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) |
 		((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
 		((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
 	return pte;
 	return pte;
 }
 }
@@ -1570,7 +1615,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 
 
 #define pgoff_to_pte(__off) \
 #define pgoff_to_pte(__off) \
 	((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \
 	((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \
-		   | _PAGE_TYPE_FILE })
+		   | _PAGE_INVALID | _PAGE_PROTECT })
 
 
 #endif /* !__ASSEMBLY__ */
 #endif /* !__ASSEMBLY__ */
 
 

+ 6 - 0
arch/s390/include/asm/serial.h

@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_SERIAL_H
+#define _ASM_S390_SERIAL_H
+
+#define BASE_BAUD 0
+
+#endif /* _ASM_S390_SERIAL_H */

+ 7 - 2
arch/s390/include/asm/switch_to.h

@@ -8,6 +8,7 @@
 #define __ASM_SWITCH_TO_H
 #define __ASM_SWITCH_TO_H
 
 
 #include <linux/thread_info.h>
 #include <linux/thread_info.h>
+#include <asm/ptrace.h>
 
 
 extern struct task_struct *__switch_to(void *, void *);
 extern struct task_struct *__switch_to(void *, void *);
 extern void update_cr_regs(struct task_struct *task);
 extern void update_cr_regs(struct task_struct *task);
@@ -68,12 +69,16 @@ static inline void restore_fp_regs(s390_fp_regs *fpregs)
 
 
 static inline void save_access_regs(unsigned int *acrs)
 static inline void save_access_regs(unsigned int *acrs)
 {
 {
-	asm volatile("stam 0,15,%0" : "=Q" (*acrs));
+	typedef struct { int _[NUM_ACRS]; } acrstype;
+
+	asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
 }
 }
 
 
 static inline void restore_access_regs(unsigned int *acrs)
 static inline void restore_access_regs(unsigned int *acrs)
 {
 {
-	asm volatile("lam 0,15,%0" : : "Q" (*acrs));
+	typedef struct { int _[NUM_ACRS]; } acrstype;
+
+	asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
 }
 }
 
 
 #define switch_to(prev,next,last) do {					\
 #define switch_to(prev,next,last) do {					\

+ 2 - 1
arch/s390/include/asm/tlb.h

@@ -63,13 +63,14 @@ static inline void tlb_gather_mmu(struct mmu_gather *tlb,
 
 
 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 {
 {
+	__tlb_flush_mm_lazy(tlb->mm);
 	tlb_table_flush(tlb);
 	tlb_table_flush(tlb);
 }
 }
 
 
 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
 				  unsigned long start, unsigned long end)
 				  unsigned long start, unsigned long end)
 {
 {
-	tlb_table_flush(tlb);
+	tlb_flush_mmu(tlb);
 }
 }
 
 
 /*
 /*

+ 3 - 3
arch/s390/include/asm/tlbflush.h

@@ -86,7 +86,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
 		__tlb_flush_full(mm);
 		__tlb_flush_full(mm);
 }
 }
 
 
-static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
+static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
 {
 {
 	if (mm->context.flush_mm) {
 	if (mm->context.flush_mm) {
 		__tlb_flush_mm(mm);
 		__tlb_flush_mm(mm);
@@ -118,13 +118,13 @@ static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
 
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
 {
-	__tlb_flush_mm_cond(mm);
+	__tlb_flush_mm_lazy(mm);
 }
 }
 
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end)
 				   unsigned long start, unsigned long end)
 {
 {
-	__tlb_flush_mm_cond(vma->vm_mm);
+	__tlb_flush_mm_lazy(vma->vm_mm);
 }
 }
 
 
 static inline void flush_tlb_kernel_range(unsigned long start,
 static inline void flush_tlb_kernel_range(unsigned long start,

+ 11 - 5
arch/s390/kernel/entry.S

@@ -18,6 +18,7 @@
 #include <asm/unistd.h>
 #include <asm/unistd.h>
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/sigp.h>
 #include <asm/sigp.h>
+#include <asm/irq.h>
 
 
 __PT_R0      =	__PT_GPRS
 __PT_R0      =	__PT_GPRS
 __PT_R1      =	__PT_GPRS + 4
 __PT_R1      =	__PT_GPRS + 4
@@ -435,6 +436,11 @@ io_skip:
 io_loop:
 io_loop:
 	l	%r1,BASED(.Ldo_IRQ)
 	l	%r1,BASED(.Ldo_IRQ)
 	lr	%r2,%r11		# pass pointer to pt_regs
 	lr	%r2,%r11		# pass pointer to pt_regs
+	lhi	%r3,IO_INTERRUPT
+	tm	__PT_INT_CODE+8(%r11),0x80	# adapter interrupt ?
+	jz	io_call
+	lhi	%r3,THIN_INTERRUPT
+io_call:
 	basr	%r14,%r1		# call do_IRQ
 	basr	%r14,%r1		# call do_IRQ
 	tm	__LC_MACHINE_FLAGS+2,0x10	# MACHINE_FLAG_LPAR
 	tm	__LC_MACHINE_FLAGS+2,0x10	# MACHINE_FLAG_LPAR
 	jz	io_return
 	jz	io_return
@@ -584,9 +590,10 @@ ext_skip:
 	mvc	__PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
 	mvc	__PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
 	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
 	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
+	l	%r1,BASED(.Ldo_IRQ)
 	lr	%r2,%r11		# pass pointer to pt_regs
 	lr	%r2,%r11		# pass pointer to pt_regs
-	l	%r1,BASED(.Ldo_extint)
-	basr	%r14,%r1		# call do_extint
+	lhi	%r3,EXT_INTERRUPT
+	basr	%r14,%r1		# call do_IRQ
 	j	io_return
 	j	io_return
 
 
 /*
 /*
@@ -879,13 +886,13 @@ cleanup_idle:
 	stm	%r9,%r10,__LC_SYSTEM_TIMER
 	stm	%r9,%r10,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
 	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
 	# prepare return psw
 	# prepare return psw
-	n	%r8,BASED(cleanup_idle_wait)	# clear wait state bit
+	n	%r8,BASED(cleanup_idle_wait)	# clear irq & wait state bits
 	l	%r9,24(%r11)			# return from psw_idle
 	l	%r9,24(%r11)			# return from psw_idle
 	br	%r14
 	br	%r14
 cleanup_idle_insn:
 cleanup_idle_insn:
 	.long	psw_idle_lpsw + 0x80000000
 	.long	psw_idle_lpsw + 0x80000000
 cleanup_idle_wait:
 cleanup_idle_wait:
-	.long	0xfffdffff
+	.long	0xfcfdffff
 
 
 /*
 /*
  * Integer constants
  * Integer constants
@@ -902,7 +909,6 @@ cleanup_idle_wait:
 .Ldo_machine_check:	.long	s390_do_machine_check
 .Ldo_machine_check:	.long	s390_do_machine_check
 .Lhandle_mcck:		.long	s390_handle_mcck
 .Lhandle_mcck:		.long	s390_handle_mcck
 .Ldo_IRQ:		.long	do_IRQ
 .Ldo_IRQ:		.long	do_IRQ
-.Ldo_extint:		.long	do_extint
 .Ldo_signal:		.long	do_signal
 .Ldo_signal:		.long	do_signal
 .Ldo_notify_resume:	.long	do_notify_resume
 .Ldo_notify_resume:	.long	do_notify_resume
 .Ldo_per_trap:		.long	do_per_trap
 .Ldo_per_trap:		.long	do_per_trap

+ 9 - 2
arch/s390/kernel/entry64.S

@@ -19,6 +19,7 @@
 #include <asm/unistd.h>
 #include <asm/unistd.h>
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/sigp.h>
 #include <asm/sigp.h>
+#include <asm/irq.h>
 
 
 __PT_R0      =	__PT_GPRS
 __PT_R0      =	__PT_GPRS
 __PT_R1      =	__PT_GPRS + 8
 __PT_R1      =	__PT_GPRS + 8
@@ -468,6 +469,11 @@ io_skip:
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 io_loop:
 io_loop:
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	lgr	%r2,%r11		# pass pointer to pt_regs
+	lghi	%r3,IO_INTERRUPT
+	tm	__PT_INT_CODE+8(%r11),0x80	# adapter interrupt ?
+	jz	io_call
+	lghi	%r3,THIN_INTERRUPT
+io_call:
 	brasl	%r14,do_IRQ
 	brasl	%r14,do_IRQ
 	tm	__LC_MACHINE_FLAGS+6,0x10	# MACHINE_FLAG_LPAR
 	tm	__LC_MACHINE_FLAGS+6,0x10	# MACHINE_FLAG_LPAR
 	jz	io_return
 	jz	io_return
@@ -623,7 +629,8 @@ ext_skip:
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,do_extint
+	lghi	%r3,EXT_INTERRUPT
+	brasl	%r14,do_IRQ
 	j	io_return
 	j	io_return
 
 
 /*
 /*
@@ -922,7 +929,7 @@ cleanup_idle:
 	stg	%r9,__LC_SYSTEM_TIMER
 	stg	%r9,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
 	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
 	# prepare return psw
 	# prepare return psw
-	nihh	%r8,0xfffd		# clear wait state bit
+	nihh	%r8,0xfcfd		# clear irq & wait state bits
 	lg	%r9,48(%r11)		# return from psw_idle
 	lg	%r9,48(%r11)		# return from psw_idle
 	br	%r14
 	br	%r14
 cleanup_idle_insn:
 cleanup_idle_insn:

+ 58 - 102
arch/s390/kernel/irq.c

@@ -22,6 +22,7 @@
 #include <asm/cputime.h>
 #include <asm/cputime.h>
 #include <asm/lowcore.h>
 #include <asm/lowcore.h>
 #include <asm/irq.h>
 #include <asm/irq.h>
+#include <asm/hw_irq.h>
 #include "entry.h"
 #include "entry.h"
 
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
 DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -42,9 +43,10 @@ struct irq_class {
  * Since the external and I/O interrupt fields are already sums we would end
  * Since the external and I/O interrupt fields are already sums we would end
  * up with having a sum which accounts each interrupt twice.
  * up with having a sum which accounts each interrupt twice.
  */
  */
-static const struct irq_class irqclass_main_desc[NR_IRQS] = {
-	[EXTERNAL_INTERRUPT] = {.name = "EXT"},
-	[IO_INTERRUPT]	     = {.name = "I/O"}
+static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
+	[EXT_INTERRUPT]  = {.name = "EXT"},
+	[IO_INTERRUPT]	 = {.name = "I/O"},
+	[THIN_INTERRUPT] = {.name = "AIO"},
 };
 };
 
 
 /*
 /*
@@ -86,6 +88,28 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
 	[CPU_RST]    = {.name = "RST", .desc = "[CPU] CPU Restart"},
 	[CPU_RST]    = {.name = "RST", .desc = "[CPU] CPU Restart"},
 };
 };
 
 
+void __init init_IRQ(void)
+{
+	irq_reserve_irqs(0, THIN_INTERRUPT);
+	init_cio_interrupts();
+	init_airq_interrupts();
+	init_ext_interrupts();
+}
+
+void do_IRQ(struct pt_regs *regs, int irq)
+{
+	struct pt_regs *old_regs;
+
+	old_regs = set_irq_regs(regs);
+	irq_enter();
+	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+		/* Serve timer interrupts first. */
+		clock_comparator_work();
+	generic_handle_irq(irq);
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
 /*
 /*
  * show_interrupts is needed by /proc/interrupts.
  * show_interrupts is needed by /proc/interrupts.
  */
  */
@@ -100,27 +124,36 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(cpu)
 		for_each_online_cpu(cpu)
 			seq_printf(p, "CPU%d       ", cpu);
 			seq_printf(p, "CPU%d       ", cpu);
 		seq_putc(p, '\n');
 		seq_putc(p, '\n');
+		goto out;
 	}
 	}
 	if (irq < NR_IRQS) {
 	if (irq < NR_IRQS) {
+		if (irq >= NR_IRQS_BASE)
+			goto out;
 		seq_printf(p, "%s: ", irqclass_main_desc[irq].name);
 		seq_printf(p, "%s: ", irqclass_main_desc[irq].name);
 		for_each_online_cpu(cpu)
 		for_each_online_cpu(cpu)
-			seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[irq]);
+			seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
 		seq_putc(p, '\n');
 		seq_putc(p, '\n');
-		goto skip_arch_irqs;
+		goto out;
 	}
 	}
 	for (irq = 0; irq < NR_ARCH_IRQS; irq++) {
 	for (irq = 0; irq < NR_ARCH_IRQS; irq++) {
 		seq_printf(p, "%s: ", irqclass_sub_desc[irq].name);
 		seq_printf(p, "%s: ", irqclass_sub_desc[irq].name);
 		for_each_online_cpu(cpu)
 		for_each_online_cpu(cpu)
-			seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]);
+			seq_printf(p, "%10u ",
+				   per_cpu(irq_stat, cpu).irqs[irq]);
 		if (irqclass_sub_desc[irq].desc)
 		if (irqclass_sub_desc[irq].desc)
 			seq_printf(p, "  %s", irqclass_sub_desc[irq].desc);
 			seq_printf(p, "  %s", irqclass_sub_desc[irq].desc);
 		seq_putc(p, '\n');
 		seq_putc(p, '\n');
 	}
 	}
-skip_arch_irqs:
+out:
 	put_online_cpus();
 	put_online_cpus();
 	return 0;
 	return 0;
 }
 }
 
 
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+	return 0;
+}
+
 /*
 /*
  * Switch to the asynchronous interrupt stack for softirq execution.
  * Switch to the asynchronous interrupt stack for softirq execution.
  */
  */
@@ -159,14 +192,6 @@ asmlinkage void do_softirq(void)
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 }
 }
 
 
-#ifdef CONFIG_PROC_FS
-void init_irq_proc(void)
-{
-	if (proc_mkdir("irq", NULL))
-		create_prof_cpu_mask();
-}
-#endif
-
 /*
 /*
  * ext_int_hash[index] is the list head for all external interrupts that hash
  * ext_int_hash[index] is the list head for all external interrupts that hash
  * to this index.
  * to this index.
@@ -183,14 +208,6 @@ struct ext_int_info {
 /* ext_int_hash_lock protects the handler lists for external interrupts */
 /* ext_int_hash_lock protects the handler lists for external interrupts */
 DEFINE_SPINLOCK(ext_int_hash_lock);
 DEFINE_SPINLOCK(ext_int_hash_lock);
 
 
-static void __init init_external_interrupts(void)
-{
-	int idx;
-
-	for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
-		INIT_LIST_HEAD(&ext_int_hash[idx]);
-}
-
 static inline int ext_hash(u16 code)
 static inline int ext_hash(u16 code)
 {
 {
 	return (code + (code >> 9)) & 0xff;
 	return (code + (code >> 9)) & 0xff;
@@ -234,20 +251,13 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
 }
 }
 EXPORT_SYMBOL(unregister_external_interrupt);
 EXPORT_SYMBOL(unregister_external_interrupt);
 
 
-void __irq_entry do_extint(struct pt_regs *regs)
+static irqreturn_t do_ext_interrupt(int irq, void *dummy)
 {
 {
+	struct pt_regs *regs = get_irq_regs();
 	struct ext_code ext_code;
 	struct ext_code ext_code;
-	struct pt_regs *old_regs;
 	struct ext_int_info *p;
 	struct ext_int_info *p;
 	int index;
 	int index;
 
 
-	old_regs = set_irq_regs(regs);
-	irq_enter();
-	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) {
-		/* Serve timer interrupts first. */
-		clock_comparator_work();
-	}
-	kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL);
 	ext_code = *(struct ext_code *) &regs->int_code;
 	ext_code = *(struct ext_code *) &regs->int_code;
 	if (ext_code.code != 0x1004)
 	if (ext_code.code != 0x1004)
 		__get_cpu_var(s390_idle).nohz_delay = 1;
 		__get_cpu_var(s390_idle).nohz_delay = 1;
@@ -259,13 +269,25 @@ void __irq_entry do_extint(struct pt_regs *regs)
 			p->handler(ext_code, regs->int_parm,
 			p->handler(ext_code, regs->int_parm,
 				   regs->int_parm_long);
 				   regs->int_parm_long);
 	rcu_read_unlock();
 	rcu_read_unlock();
-	irq_exit();
-	set_irq_regs(old_regs);
+
+	return IRQ_HANDLED;
 }
 }
 
 
-void __init init_IRQ(void)
+static struct irqaction external_interrupt = {
+	.name	 = "EXT",
+	.handler = do_ext_interrupt,
+};
+
+void __init init_ext_interrupts(void)
 {
 {
-	init_external_interrupts();
+	int idx;
+
+	for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
+		INIT_LIST_HEAD(&ext_int_hash[idx]);
+
+	irq_set_chip_and_handler(EXT_INTERRUPT,
+				 &dummy_irq_chip, handle_percpu_irq);
+	setup_irq(EXT_INTERRUPT, &external_interrupt);
 }
 }
 
 
 static DEFINE_SPINLOCK(sc_irq_lock);
 static DEFINE_SPINLOCK(sc_irq_lock);
@@ -313,69 +335,3 @@ void measurement_alert_subclass_unregister(void)
 	spin_unlock(&ma_subclass_lock);
 	spin_unlock(&ma_subclass_lock);
 }
 }
 EXPORT_SYMBOL(measurement_alert_subclass_unregister);
 EXPORT_SYMBOL(measurement_alert_subclass_unregister);
-
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
-	/*
-	 * Not needed, the handler is protected by a lock and IRQs that occur
-	 * after the handler is deleted are just NOPs.
-	 */
-}
-EXPORT_SYMBOL_GPL(synchronize_irq);
-#endif
-
-#ifndef CONFIG_PCI
-
-/* Only PCI devices have dynamically-defined IRQ handlers */
-
-int request_irq(unsigned int irq, irq_handler_t handler,
-		unsigned long irqflags, const char *devname, void *dev_id)
-{
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(request_irq);
-
-void free_irq(unsigned int irq, void *dev_id)
-{
-	WARN_ON(1);
-}
-EXPORT_SYMBOL_GPL(free_irq);
-
-void enable_irq(unsigned int irq)
-{
-	WARN_ON(1);
-}
-EXPORT_SYMBOL_GPL(enable_irq);
-
-void disable_irq(unsigned int irq)
-{
-	WARN_ON(1);
-}
-EXPORT_SYMBOL_GPL(disable_irq);
-
-#endif /* !CONFIG_PCI */
-
-void disable_irq_nosync(unsigned int irq)
-{
-	disable_irq(irq);
-}
-EXPORT_SYMBOL_GPL(disable_irq_nosync);
-
-unsigned long probe_irq_on(void)
-{
-	return 0;
-}
-EXPORT_SYMBOL_GPL(probe_irq_on);
-
-int probe_irq_off(unsigned long val)
-{
-	return 0;
-}
-EXPORT_SYMBOL_GPL(probe_irq_off);
-
-unsigned int probe_irq_mask(unsigned long val)
-{
-	return val;
-}
-EXPORT_SYMBOL_GPL(probe_irq_mask);

+ 19 - 2
arch/s390/kernel/kprobes.c

@@ -105,14 +105,31 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
 		fixup |= FIXUP_RETURN_REGISTER;
 		fixup |= FIXUP_RETURN_REGISTER;
 		break;
 		break;
 	case 0xeb:
 	case 0xeb:
-		if ((insn[2] & 0xff) == 0x44 ||	/* bxhg  */
-		    (insn[2] & 0xff) == 0x45)	/* bxleg */
+		switch (insn[2] & 0xff) {
+		case 0x44: /* bxhg  */
+		case 0x45: /* bxleg */
 			fixup = FIXUP_BRANCH_NOT_TAKEN;
 			fixup = FIXUP_BRANCH_NOT_TAKEN;
+			break;
+		}
 		break;
 		break;
 	case 0xe3:	/* bctg	*/
 	case 0xe3:	/* bctg	*/
 		if ((insn[2] & 0xff) == 0x46)
 		if ((insn[2] & 0xff) == 0x46)
 			fixup = FIXUP_BRANCH_NOT_TAKEN;
 			fixup = FIXUP_BRANCH_NOT_TAKEN;
 		break;
 		break;
+	case 0xec:
+		switch (insn[2] & 0xff) {
+		case 0xe5: /* clgrb */
+		case 0xe6: /* cgrb  */
+		case 0xf6: /* crb   */
+		case 0xf7: /* clrb  */
+		case 0xfc: /* cgib  */
+		case 0xfd: /* cglib */
+		case 0xfe: /* cib   */
+		case 0xff: /* clib  */
+			fixup = FIXUP_BRANCH_NOT_TAKEN;
+			break;
+		}
+		break;
 	}
 	}
 	return fixup;
 	return fixup;
 }
 }

+ 1 - 4
arch/s390/kernel/nmi.c

@@ -214,10 +214,7 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 			: "0", "cc");
 			: "0", "cc");
 #endif
 #endif
 	/* Revalidate clock comparator register */
 	/* Revalidate clock comparator register */
-	if (S390_lowcore.clock_comparator == -1)
-		set_clock_comparator(S390_lowcore.mcck_clock);
-	else
-		set_clock_comparator(S390_lowcore.clock_comparator);
+	set_clock_comparator(S390_lowcore.clock_comparator);
 	/* Check if old PSW is valid */
 	/* Check if old PSW is valid */
 	if (!mci->wp)
 	if (!mci->wp)
 		/*
 		/*

+ 1 - 0
arch/s390/kernel/process.c

@@ -71,6 +71,7 @@ void arch_cpu_idle(void)
 	}
 	}
 	/* Halt the cpu and keep track of cpu time accounting. */
 	/* Halt the cpu and keep track of cpu time accounting. */
 	vtime_stop_cpu();
 	vtime_stop_cpu();
+	local_irq_enable();
 }
 }
 
 
 void arch_cpu_idle_exit(void)
 void arch_cpu_idle_exit(void)

+ 4 - 4
arch/s390/kernel/ptrace.c

@@ -60,11 +60,11 @@ void update_cr_regs(struct task_struct *task)
 
 
 		__ctl_store(cr, 0, 2);
 		__ctl_store(cr, 0, 2);
 		cr_new[1] = cr[1];
 		cr_new[1] = cr[1];
-		/* Set or clear transaction execution TXC/PIFO bits 8 and 9. */
+		/* Set or clear transaction execution TXC bit 8. */
 		if (task->thread.per_flags & PER_FLAG_NO_TE)
 		if (task->thread.per_flags & PER_FLAG_NO_TE)
-			cr_new[0] = cr[0] & ~(3UL << 54);
+			cr_new[0] = cr[0] & ~(1UL << 55);
 		else
 		else
-			cr_new[0] = cr[0] | (3UL << 54);
+			cr_new[0] = cr[0] | (1UL << 55);
 		/* Set or clear transaction execution TDC bits 62 and 63. */
 		/* Set or clear transaction execution TDC bits 62 and 63. */
 		cr_new[2] = cr[2] & ~3UL;
 		cr_new[2] = cr[2] & ~3UL;
 		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
 		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
@@ -1299,7 +1299,7 @@ int regs_query_register_offset(const char *name)
 
 
 	if (!name || *name != 'r')
 	if (!name || *name != 'r')
 		return -EINVAL;
 		return -EINVAL;
-	if (strict_strtoul(name + 1, 10, &offset))
+	if (kstrtoul(name + 1, 10, &offset))
 		return -EINVAL;
 		return -EINVAL;
 	if (offset >= NUM_GPRS)
 	if (offset >= NUM_GPRS)
 		return -EINVAL;
 		return -EINVAL;

+ 11 - 0
arch/s390/kernel/suspend.c

@@ -10,6 +10,9 @@
 #include <linux/suspend.h>
 #include <linux/suspend.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <asm/ctl_reg.h>
 #include <asm/ctl_reg.h>
+#include <asm/ipl.h>
+#include <asm/cio.h>
+#include <asm/pci.h>
 
 
 /*
 /*
  * References to section boundaries
  * References to section boundaries
@@ -211,3 +214,11 @@ void restore_processor_state(void)
 	__ctl_set_bit(0,28);
 	__ctl_set_bit(0,28);
 	local_mcck_enable();
 	local_mcck_enable();
 }
 }
+
+/* Called at the end of swsusp_arch_resume */
+void s390_early_resume(void)
+{
+	lgr_info_log();
+	channel_subsystem_reinit();
+	zpci_rescan();
+}

+ 2 - 5
arch/s390/kernel/swsusp_asm64.S

@@ -281,11 +281,8 @@ restore_registers:
 	lghi	%r2,0
 	lghi	%r2,0
 	brasl	%r14,arch_set_page_states
 	brasl	%r14,arch_set_page_states
 
 
-	/* Log potential guest relocation */
-	brasl	%r14,lgr_info_log
-
-	/* Reinitialize the channel subsystem */
-	brasl	%r14,channel_subsystem_reinit
+	/* Call arch specific early resume code */
+	brasl	%r14,s390_early_resume
 
 
 	/* Return 0 */
 	/* Return 0 */
 	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
 	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)

+ 0 - 1
arch/s390/kernel/time.c

@@ -92,7 +92,6 @@ void clock_comparator_work(void)
 	struct clock_event_device *cd;
 	struct clock_event_device *cd;
 
 
 	S390_lowcore.clock_comparator = -1ULL;
 	S390_lowcore.clock_comparator = -1ULL;
-	set_clock_comparator(S390_lowcore.clock_comparator);
 	cd = &__get_cpu_var(comparators);
 	cd = &__get_cpu_var(comparators);
 	cd->event_handler(cd);
 	cd->event_handler(cd);
 }
 }

+ 3 - 3
arch/s390/kernel/vdso.c

@@ -63,7 +63,7 @@ static int __init vdso_setup(char *s)
 	else if (strncmp(s, "off", 4) == 0)
 	else if (strncmp(s, "off", 4) == 0)
 		vdso_enabled = 0;
 		vdso_enabled = 0;
 	else {
 	else {
-		rc = strict_strtoul(s, 0, &val);
+		rc = kstrtoul(s, 0, &val);
 		vdso_enabled = rc ? 0 : !!val;
 		vdso_enabled = rc ? 0 : !!val;
 	}
 	}
 	return !rc;
 	return !rc;
@@ -113,11 +113,11 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore)
 
 
 	clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
 	clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
 		    PAGE_SIZE << SEGMENT_ORDER);
 		    PAGE_SIZE << SEGMENT_ORDER);
-	clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
+	clear_table((unsigned long *) page_table, _PAGE_INVALID,
 		    256*sizeof(unsigned long));
 		    256*sizeof(unsigned long));
 
 
 	*(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
 	*(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
-	*(unsigned long *) page_table = _PAGE_RO + page_frame;
+	*(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
 
 
 	psal = (u32 *) (page_table + 256*sizeof(unsigned long));
 	psal = (u32 *) (page_table + 256*sizeof(unsigned long));
 	aste = psal + 32;
 	aste = psal + 32;

+ 0 - 2
arch/s390/lib/delay.c

@@ -44,7 +44,6 @@ static void __udelay_disabled(unsigned long long usecs)
 	do {
 	do {
 		set_clock_comparator(end);
 		set_clock_comparator(end);
 		vtime_stop_cpu();
 		vtime_stop_cpu();
-		local_irq_disable();
 	} while (get_tod_clock() < end);
 	} while (get_tod_clock() < end);
 	lockdep_on();
 	lockdep_on();
 	__ctl_load(cr0, 0, 0);
 	__ctl_load(cr0, 0, 0);
@@ -64,7 +63,6 @@ static void __udelay_enabled(unsigned long long usecs)
 			set_clock_comparator(end);
 			set_clock_comparator(end);
 		}
 		}
 		vtime_stop_cpu();
 		vtime_stop_cpu();
-		local_irq_disable();
 		if (clock_saved)
 		if (clock_saved)
 			local_tick_enable(clock_saved);
 			local_tick_enable(clock_saved);
 	} while (get_tod_clock() < end);
 	} while (get_tod_clock() < end);

+ 8 - 8
arch/s390/lib/uaccess_pt.c

@@ -86,28 +86,28 @@ static unsigned long follow_table(struct mm_struct *mm,
 	switch (mm->context.asce_bits & _ASCE_TYPE_MASK) {
 	switch (mm->context.asce_bits & _ASCE_TYPE_MASK) {
 	case _ASCE_TYPE_REGION1:
 	case _ASCE_TYPE_REGION1:
 		table = table + ((address >> 53) & 0x7ff);
 		table = table + ((address >> 53) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INV))
+		if (unlikely(*table & _REGION_ENTRY_INVALID))
 			return -0x39UL;
 			return -0x39UL;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		/* fallthrough */
 		/* fallthrough */
 	case _ASCE_TYPE_REGION2:
 	case _ASCE_TYPE_REGION2:
 		table = table + ((address >> 42) & 0x7ff);
 		table = table + ((address >> 42) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INV))
+		if (unlikely(*table & _REGION_ENTRY_INVALID))
 			return -0x3aUL;
 			return -0x3aUL;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		/* fallthrough */
 		/* fallthrough */
 	case _ASCE_TYPE_REGION3:
 	case _ASCE_TYPE_REGION3:
 		table = table + ((address >> 31) & 0x7ff);
 		table = table + ((address >> 31) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INV))
+		if (unlikely(*table & _REGION_ENTRY_INVALID))
 			return -0x3bUL;
 			return -0x3bUL;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		/* fallthrough */
 		/* fallthrough */
 	case _ASCE_TYPE_SEGMENT:
 	case _ASCE_TYPE_SEGMENT:
 		table = table + ((address >> 20) & 0x7ff);
 		table = table + ((address >> 20) & 0x7ff);
-		if (unlikely(*table & _SEGMENT_ENTRY_INV))
+		if (unlikely(*table & _SEGMENT_ENTRY_INVALID))
 			return -0x10UL;
 			return -0x10UL;
 		if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) {
 		if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) {
-			if (write && (*table & _SEGMENT_ENTRY_RO))
+			if (write && (*table & _SEGMENT_ENTRY_PROTECT))
 				return -0x04UL;
 				return -0x04UL;
 			return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) +
 			return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) +
 				(address & ~_SEGMENT_ENTRY_ORIGIN_LARGE);
 				(address & ~_SEGMENT_ENTRY_ORIGIN_LARGE);
@@ -117,7 +117,7 @@ static unsigned long follow_table(struct mm_struct *mm,
 	table = table + ((address >> 12) & 0xff);
 	table = table + ((address >> 12) & 0xff);
 	if (unlikely(*table & _PAGE_INVALID))
 	if (unlikely(*table & _PAGE_INVALID))
 		return -0x11UL;
 		return -0x11UL;
-	if (write && (*table & _PAGE_RO))
+	if (write && (*table & _PAGE_PROTECT))
 		return -0x04UL;
 		return -0x04UL;
 	return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
 	return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
 }
 }
@@ -130,13 +130,13 @@ static unsigned long follow_table(struct mm_struct *mm,
 	unsigned long *table = (unsigned long *)__pa(mm->pgd);
 	unsigned long *table = (unsigned long *)__pa(mm->pgd);
 
 
 	table = table + ((address >> 20) & 0x7ff);
 	table = table + ((address >> 20) & 0x7ff);
-	if (unlikely(*table & _SEGMENT_ENTRY_INV))
+	if (unlikely(*table & _SEGMENT_ENTRY_INVALID))
 		return -0x10UL;
 		return -0x10UL;
 	table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
 	table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
 	table = table + ((address >> 12) & 0xff);
 	table = table + ((address >> 12) & 0xff);
 	if (unlikely(*table & _PAGE_INVALID))
 	if (unlikely(*table & _PAGE_INVALID))
 		return -0x11UL;
 		return -0x11UL;
-	if (write && (*table & _PAGE_RO))
+	if (write && (*table & _PAGE_PROTECT))
 		return -0x04UL;
 		return -0x04UL;
 	return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
 	return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
 }
 }

+ 9 - 9
arch/s390/mm/dump_pagetables.c

@@ -53,7 +53,7 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level)
 		seq_printf(m, "I\n");
 		seq_printf(m, "I\n");
 		return;
 		return;
 	}
 	}
-	seq_printf(m, "%s", pr & _PAGE_RO ? "RO " : "RW ");
+	seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW ");
 	seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : "   ");
 	seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : "   ");
 	seq_putc(m, '\n');
 	seq_putc(m, '\n');
 }
 }
@@ -105,12 +105,12 @@ static void note_page(struct seq_file *m, struct pg_state *st,
 }
 }
 
 
 /*
 /*
- * The actual page table walker functions. In order to keep the implementation
- * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO
- * flags to note_page() if a region, segment or page table entry is invalid or
- * read-only.
- * After all it's just a hint that the current level being walked contains an
- * invalid or read-only entry.
+ * The actual page table walker functions. In order to keep the
+ * implementation of print_prot() short, we only check and pass
+ * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
+ * segment or page table entry is invalid or read-only.
+ * After all it's just a hint that the current level being walked
+ * contains an invalid or read-only entry.
  */
  */
 static void walk_pte_level(struct seq_file *m, struct pg_state *st,
 static void walk_pte_level(struct seq_file *m, struct pg_state *st,
 			   pmd_t *pmd, unsigned long addr)
 			   pmd_t *pmd, unsigned long addr)
@@ -122,14 +122,14 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st,
 	for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
 	for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
 		st->current_address = addr;
 		st->current_address = addr;
 		pte = pte_offset_kernel(pmd, addr);
 		pte = pte_offset_kernel(pmd, addr);
-		prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID);
+		prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID);
 		note_page(m, st, prot, 4);
 		note_page(m, st, prot, 4);
 		addr += PAGE_SIZE;
 		addr += PAGE_SIZE;
 	}
 	}
 }
 }
 
 
 #ifdef CONFIG_64BIT
 #ifdef CONFIG_64BIT
-#define _PMD_PROT_MASK (_SEGMENT_ENTRY_RO | _SEGMENT_ENTRY_CO)
+#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO)
 #else
 #else
 #define _PMD_PROT_MASK 0
 #define _PMD_PROT_MASK 0
 #endif
 #endif

+ 3 - 3
arch/s390/mm/gup.c

@@ -24,7 +24,7 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 	pte_t *ptep, pte;
 	pte_t *ptep, pte;
 	struct page *page;
 	struct page *page;
 
 
-	mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
+	mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
 
 
 	ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
 	ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
 	do {
 	do {
@@ -55,8 +55,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 	struct page *head, *page, *tail;
 	struct page *head, *page, *tail;
 	int refs;
 	int refs;
 
 
-	result = write ? 0 : _SEGMENT_ENTRY_RO;
-	mask = result | _SEGMENT_ENTRY_INV;
+	result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
+	mask = result | _SEGMENT_ENTRY_INVALID;
 	if ((pmd_val(pmd) & mask) != result)
 	if ((pmd_val(pmd) & mask) != result)
 		return 0;
 		return 0;
 	VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
 	VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));

+ 115 - 9
arch/s390/mm/hugetlbpage.c

@@ -8,21 +8,127 @@
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/hugetlb.h>
 
 
+static inline pmd_t __pte_to_pmd(pte_t pte)
+{
+	int none, young, prot;
+	pmd_t pmd;
+
+	/*
+	 * Convert encoding		  pte bits	  pmd bits
+	 *				.IR...wrdytp	..R...I...y.
+	 * empty			.10...000000 -> ..0...1...0.
+	 * prot-none, clean, old	.11...000001 -> ..0...1...1.
+	 * prot-none, clean, young	.11...000101 -> ..1...1...1.
+	 * prot-none, dirty, old	.10...001001 -> ..0...1...1.
+	 * prot-none, dirty, young	.10...001101 -> ..1...1...1.
+	 * read-only, clean, old	.11...010001 -> ..1...1...0.
+	 * read-only, clean, young	.01...010101 -> ..1...0...1.
+	 * read-only, dirty, old	.11...011001 -> ..1...1...0.
+	 * read-only, dirty, young	.01...011101 -> ..1...0...1.
+	 * read-write, clean, old	.11...110001 -> ..0...1...0.
+	 * read-write, clean, young	.01...110101 -> ..0...0...1.
+	 * read-write, dirty, old	.10...111001 -> ..0...1...0.
+	 * read-write, dirty, young	.00...111101 -> ..0...0...1.
+	 * Huge ptes are dirty by definition, a clean pte is made dirty
+	 * by the conversion.
+	 */
+	if (pte_present(pte)) {
+		pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
+		if (pte_val(pte) & _PAGE_INVALID)
+			pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+		none = (pte_val(pte) & _PAGE_PRESENT) &&
+			!(pte_val(pte) & _PAGE_READ) &&
+			!(pte_val(pte) & _PAGE_WRITE);
+		prot = (pte_val(pte) & _PAGE_PROTECT) &&
+			!(pte_val(pte) & _PAGE_WRITE);
+		young = pte_val(pte) & _PAGE_YOUNG;
+		if (none || young)
+			pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+		if (prot || (none && young))
+			pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+	} else
+		pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
+	return pmd;
+}
+
+static inline pte_t __pmd_to_pte(pmd_t pmd)
+{
+	pte_t pte;
+
+	/*
+	 * Convert encoding	  pmd bits	  pte bits
+	 *			..R...I...y.	.IR...wrdytp
+	 * empty		..0...1...0. -> .10...000000
+	 * prot-none, old	..0...1...1. -> .10...001001
+	 * prot-none, young	..1...1...1. -> .10...001101
+	 * read-only, old	..1...1...0. -> .11...011001
+	 * read-only, young	..1...0...1. -> .01...011101
+	 * read-write, old	..0...1...0. -> .10...111001
+	 * read-write, young	..0...0...1. -> .00...111101
+	 * Huge ptes are dirty by definition
+	 */
+	if (pmd_present(pmd)) {
+		pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY |
+			(pmd_val(pmd) & PAGE_MASK);
+		if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)
+			pte_val(pte) |= _PAGE_INVALID;
+		if (pmd_prot_none(pmd)) {
+			if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
+				pte_val(pte) |= _PAGE_YOUNG;
+		} else {
+			pte_val(pte) |= _PAGE_READ;
+			if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
+				pte_val(pte) |= _PAGE_PROTECT;
+			else
+				pte_val(pte) |= _PAGE_WRITE;
+			if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)
+				pte_val(pte) |= _PAGE_YOUNG;
+		}
+	} else
+		pte_val(pte) = _PAGE_INVALID;
+	return pte;
+}
 
 
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-				   pte_t *pteptr, pte_t pteval)
+		     pte_t *ptep, pte_t pte)
 {
 {
-	pmd_t *pmdp = (pmd_t *) pteptr;
-	unsigned long mask;
+	pmd_t pmd;
 
 
+	pmd = __pte_to_pmd(pte);
 	if (!MACHINE_HAS_HPAGE) {
 	if (!MACHINE_HAS_HPAGE) {
-		pteptr = (pte_t *) pte_page(pteval)[1].index;
-		mask = pte_val(pteval) &
-				(_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
-		pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+		pmd_val(pmd) |= pte_page(pte)[1].index;
+	} else
+		pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO;
+	*(pmd_t *) ptep = pmd;
+}
+
+pte_t huge_ptep_get(pte_t *ptep)
+{
+	unsigned long origin;
+	pmd_t pmd;
+
+	pmd = *(pmd_t *) ptep;
+	if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) {
+		origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+		pmd_val(pmd) |= *(unsigned long *) origin;
 	}
 	}
+	return __pmd_to_pte(pmd);
+}
 
 
-	pmd_val(*pmdp) = pte_val(pteval);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pte_t *ptep)
+{
+	pmd_t *pmdp = (pmd_t *) ptep;
+	pte_t pte = huge_ptep_get(ptep);
+
+	if (MACHINE_HAS_IDTE)
+		__pmd_idte(addr, pmdp);
+	else
+		__pmd_csp(pmdp);
+	pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+	return pte;
 }
 }
 
 
 int arch_prepare_hugepage(struct page *page)
 int arch_prepare_hugepage(struct page *page)
@@ -58,7 +164,7 @@ void arch_release_hugepage(struct page *page)
 	ptep = (pte_t *) page[1].index;
 	ptep = (pte_t *) page[1].index;
 	if (!ptep)
 	if (!ptep)
 		return;
 		return;
-	clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY,
+	clear_table((unsigned long *) ptep, _PAGE_INVALID,
 		    PTRS_PER_PTE * sizeof(pte_t));
 		    PTRS_PER_PTE * sizeof(pte_t));
 	page_table_free(&init_mm, (unsigned long *) ptep);
 	page_table_free(&init_mm, (unsigned long *) ptep);
 	page[1].index = 0;
 	page[1].index = 0;

+ 1 - 1
arch/s390/mm/pageattr.c

@@ -118,7 +118,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 		pte = pte_offset_kernel(pmd, address);
 		pte = pte_offset_kernel(pmd, address);
 		if (!enable) {
 		if (!enable) {
 			__ptep_ipte(address, pte);
 			__ptep_ipte(address, pte);
-			pte_val(*pte) = _PAGE_TYPE_EMPTY;
+			pte_val(*pte) = _PAGE_INVALID;
 			continue;
 			continue;
 		}
 		}
 		pte_val(*pte) = __pa(address);
 		pte_val(*pte) = __pa(address);

+ 40 - 43
arch/s390/mm/pgtable.c

@@ -161,7 +161,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
 	struct gmap_rmap *rmap;
 	struct gmap_rmap *rmap;
 	struct page *page;
 	struct page *page;
 
 
-	if (*table & _SEGMENT_ENTRY_INV)
+	if (*table & _SEGMENT_ENTRY_INVALID)
 		return 0;
 		return 0;
 	page = pfn_to_page(*table >> PAGE_SHIFT);
 	page = pfn_to_page(*table >> PAGE_SHIFT);
 	mp = (struct gmap_pgtable *) page->index;
 	mp = (struct gmap_pgtable *) page->index;
@@ -172,7 +172,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
 		kfree(rmap);
 		kfree(rmap);
 		break;
 		break;
 	}
 	}
-	*table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
+	*table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
 	return 1;
 	return 1;
 }
 }
 
 
@@ -258,7 +258,7 @@ static int gmap_alloc_table(struct gmap *gmap,
 		return -ENOMEM;
 		return -ENOMEM;
 	new = (unsigned long *) page_to_phys(page);
 	new = (unsigned long *) page_to_phys(page);
 	crst_table_init(new, init);
 	crst_table_init(new, init);
-	if (*table & _REGION_ENTRY_INV) {
+	if (*table & _REGION_ENTRY_INVALID) {
 		list_add(&page->lru, &gmap->crst_list);
 		list_add(&page->lru, &gmap->crst_list);
 		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
 		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
 			(*table & _REGION_ENTRY_TYPE_MASK);
 			(*table & _REGION_ENTRY_TYPE_MASK);
@@ -292,22 +292,22 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
 	for (off = 0; off < len; off += PMD_SIZE) {
 	for (off = 0; off < len; off += PMD_SIZE) {
 		/* Walk the guest addr space page table */
 		/* Walk the guest addr space page table */
 		table = gmap->table + (((to + off) >> 53) & 0x7ff);
 		table = gmap->table + (((to + off) >> 53) & 0x7ff);
-		if (*table & _REGION_ENTRY_INV)
+		if (*table & _REGION_ENTRY_INVALID)
 			goto out;
 			goto out;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + (((to + off) >> 42) & 0x7ff);
 		table = table + (((to + off) >> 42) & 0x7ff);
-		if (*table & _REGION_ENTRY_INV)
+		if (*table & _REGION_ENTRY_INVALID)
 			goto out;
 			goto out;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + (((to + off) >> 31) & 0x7ff);
 		table = table + (((to + off) >> 31) & 0x7ff);
-		if (*table & _REGION_ENTRY_INV)
+		if (*table & _REGION_ENTRY_INVALID)
 			goto out;
 			goto out;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + (((to + off) >> 20) & 0x7ff);
 		table = table + (((to + off) >> 20) & 0x7ff);
 
 
 		/* Clear segment table entry in guest address space. */
 		/* Clear segment table entry in guest address space. */
 		flush |= gmap_unlink_segment(gmap, table);
 		flush |= gmap_unlink_segment(gmap, table);
-		*table = _SEGMENT_ENTRY_INV;
+		*table = _SEGMENT_ENTRY_INVALID;
 	}
 	}
 out:
 out:
 	spin_unlock(&gmap->mm->page_table_lock);
 	spin_unlock(&gmap->mm->page_table_lock);
@@ -345,17 +345,17 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 	for (off = 0; off < len; off += PMD_SIZE) {
 	for (off = 0; off < len; off += PMD_SIZE) {
 		/* Walk the gmap address space page table */
 		/* Walk the gmap address space page table */
 		table = gmap->table + (((to + off) >> 53) & 0x7ff);
 		table = gmap->table + (((to + off) >> 53) & 0x7ff);
-		if ((*table & _REGION_ENTRY_INV) &&
+		if ((*table & _REGION_ENTRY_INVALID) &&
 		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
 		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
 			goto out_unmap;
 			goto out_unmap;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + (((to + off) >> 42) & 0x7ff);
 		table = table + (((to + off) >> 42) & 0x7ff);
-		if ((*table & _REGION_ENTRY_INV) &&
+		if ((*table & _REGION_ENTRY_INVALID) &&
 		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
 		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
 			goto out_unmap;
 			goto out_unmap;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + (((to + off) >> 31) & 0x7ff);
 		table = table + (((to + off) >> 31) & 0x7ff);
-		if ((*table & _REGION_ENTRY_INV) &&
+		if ((*table & _REGION_ENTRY_INVALID) &&
 		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
 		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
 			goto out_unmap;
 			goto out_unmap;
 		table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
 		table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
@@ -363,7 +363,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 
 
 		/* Store 'from' address in an invalid segment table entry. */
 		/* Store 'from' address in an invalid segment table entry. */
 		flush |= gmap_unlink_segment(gmap, table);
 		flush |= gmap_unlink_segment(gmap, table);
-		*table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off);
+		*table =  (from + off) | (_SEGMENT_ENTRY_INVALID |
+					  _SEGMENT_ENTRY_PROTECT);
 	}
 	}
 	spin_unlock(&gmap->mm->page_table_lock);
 	spin_unlock(&gmap->mm->page_table_lock);
 	up_read(&gmap->mm->mmap_sem);
 	up_read(&gmap->mm->mmap_sem);
@@ -384,15 +385,15 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
 	unsigned long *table;
 	unsigned long *table;
 
 
 	table = gmap->table + ((address >> 53) & 0x7ff);
 	table = gmap->table + ((address >> 53) & 0x7ff);
-	if (unlikely(*table & _REGION_ENTRY_INV))
+	if (unlikely(*table & _REGION_ENTRY_INVALID))
 		return ERR_PTR(-EFAULT);
 		return ERR_PTR(-EFAULT);
 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 	table = table + ((address >> 42) & 0x7ff);
 	table = table + ((address >> 42) & 0x7ff);
-	if (unlikely(*table & _REGION_ENTRY_INV))
+	if (unlikely(*table & _REGION_ENTRY_INVALID))
 		return ERR_PTR(-EFAULT);
 		return ERR_PTR(-EFAULT);
 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 	table = table + ((address >> 31) & 0x7ff);
 	table = table + ((address >> 31) & 0x7ff);
-	if (unlikely(*table & _REGION_ENTRY_INV))
+	if (unlikely(*table & _REGION_ENTRY_INVALID))
 		return ERR_PTR(-EFAULT);
 		return ERR_PTR(-EFAULT);
 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 	table = table + ((address >> 20) & 0x7ff);
 	table = table + ((address >> 20) & 0x7ff);
@@ -422,11 +423,11 @@ unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
 		return PTR_ERR(segment_ptr);
 		return PTR_ERR(segment_ptr);
 	/* Convert the gmap address to an mm address. */
 	/* Convert the gmap address to an mm address. */
 	segment = *segment_ptr;
 	segment = *segment_ptr;
-	if (!(segment & _SEGMENT_ENTRY_INV)) {
+	if (!(segment & _SEGMENT_ENTRY_INVALID)) {
 		page = pfn_to_page(segment >> PAGE_SHIFT);
 		page = pfn_to_page(segment >> PAGE_SHIFT);
 		mp = (struct gmap_pgtable *) page->index;
 		mp = (struct gmap_pgtable *) page->index;
 		return mp->vmaddr | (address & ~PMD_MASK);
 		return mp->vmaddr | (address & ~PMD_MASK);
-	} else if (segment & _SEGMENT_ENTRY_RO) {
+	} else if (segment & _SEGMENT_ENTRY_PROTECT) {
 		vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
 		vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
 		return vmaddr | (address & ~PMD_MASK);
 		return vmaddr | (address & ~PMD_MASK);
 	}
 	}
@@ -517,8 +518,8 @@ static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	mp = (struct gmap_pgtable *) page->index;
 	mp = (struct gmap_pgtable *) page->index;
 	list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
 	list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
-		*rmap->entry =
-			_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
+		*rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
+					     _SEGMENT_ENTRY_PROTECT);
 		list_del(&rmap->list);
 		list_del(&rmap->list);
 		kfree(rmap);
 		kfree(rmap);
 		flush = 1;
 		flush = 1;
@@ -545,13 +546,13 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
 	/* Convert the gmap address to an mm address. */
 	/* Convert the gmap address to an mm address. */
 	while (1) {
 	while (1) {
 		segment = *segment_ptr;
 		segment = *segment_ptr;
-		if (!(segment & _SEGMENT_ENTRY_INV)) {
+		if (!(segment & _SEGMENT_ENTRY_INVALID)) {
 			/* Page table is present */
 			/* Page table is present */
 			page = pfn_to_page(segment >> PAGE_SHIFT);
 			page = pfn_to_page(segment >> PAGE_SHIFT);
 			mp = (struct gmap_pgtable *) page->index;
 			mp = (struct gmap_pgtable *) page->index;
 			return mp->vmaddr | (address & ~PMD_MASK);
 			return mp->vmaddr | (address & ~PMD_MASK);
 		}
 		}
-		if (!(segment & _SEGMENT_ENTRY_RO))
+		if (!(segment & _SEGMENT_ENTRY_PROTECT))
 			/* Nothing mapped in the gmap address space. */
 			/* Nothing mapped in the gmap address space. */
 			break;
 			break;
 		rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
 		rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
@@ -586,25 +587,25 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
 	while (address < to) {
 	while (address < to) {
 		/* Walk the gmap address space page table */
 		/* Walk the gmap address space page table */
 		table = gmap->table + ((address >> 53) & 0x7ff);
 		table = gmap->table + ((address >> 53) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INV)) {
+		if (unlikely(*table & _REGION_ENTRY_INVALID)) {
 			address = (address + PMD_SIZE) & PMD_MASK;
 			address = (address + PMD_SIZE) & PMD_MASK;
 			continue;
 			continue;
 		}
 		}
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + ((address >> 42) & 0x7ff);
 		table = table + ((address >> 42) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INV)) {
+		if (unlikely(*table & _REGION_ENTRY_INVALID)) {
 			address = (address + PMD_SIZE) & PMD_MASK;
 			address = (address + PMD_SIZE) & PMD_MASK;
 			continue;
 			continue;
 		}
 		}
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + ((address >> 31) & 0x7ff);
 		table = table + ((address >> 31) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INV)) {
+		if (unlikely(*table & _REGION_ENTRY_INVALID)) {
 			address = (address + PMD_SIZE) & PMD_MASK;
 			address = (address + PMD_SIZE) & PMD_MASK;
 			continue;
 			continue;
 		}
 		}
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + ((address >> 20) & 0x7ff);
 		table = table + ((address >> 20) & 0x7ff);
-		if (unlikely(*table & _SEGMENT_ENTRY_INV)) {
+		if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
 			address = (address + PMD_SIZE) & PMD_MASK;
 			address = (address + PMD_SIZE) & PMD_MASK;
 			continue;
 			continue;
 		}
 		}
@@ -687,7 +688,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
 			continue;
 			continue;
 		/* Set notification bit in the pgste of the pte */
 		/* Set notification bit in the pgste of the pte */
 		entry = *ptep;
 		entry = *ptep;
-		if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) {
+		if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
 			pgste = pgste_get_lock(ptep);
 			pgste = pgste_get_lock(ptep);
 			pgste_val(pgste) |= PGSTE_IN_BIT;
 			pgste_val(pgste) |= PGSTE_IN_BIT;
 			pgste_set_unlock(ptep, pgste);
 			pgste_set_unlock(ptep, pgste);
@@ -752,8 +753,9 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
 	page->index = (unsigned long) mp;
 	page->index = (unsigned long) mp;
 	atomic_set(&page->_mapcount, 3);
 	atomic_set(&page->_mapcount, 3);
 	table = (unsigned long *) page_to_phys(page);
 	table = (unsigned long *) page_to_phys(page);
-	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
-	clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+	clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+	clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
+		    PAGE_SIZE/2);
 	return table;
 	return table;
 }
 }
 
 
@@ -791,26 +793,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 	pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
 	pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
 	pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
 	pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
-		unsigned long address, bits;
-		unsigned char skey;
+		unsigned long address, bits, skey;
 
 
 		address = pte_val(*ptep) & PAGE_MASK;
 		address = pte_val(*ptep) & PAGE_MASK;
-		skey = page_get_storage_key(address);
+		skey = (unsigned long) page_get_storage_key(address);
 		bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
 		bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+		skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
 		/* Set storage key ACC and FP */
 		/* Set storage key ACC and FP */
-		page_set_storage_key(address,
-				(key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)),
-				!nq);
-
+		page_set_storage_key(address, skey, !nq);
 		/* Merge host changed & referenced into pgste  */
 		/* Merge host changed & referenced into pgste  */
 		pgste_val(new) |= bits << 52;
 		pgste_val(new) |= bits << 52;
-		/* Transfer skey changed & referenced bit to kvm user bits */
-		pgste_val(new) |= bits << 45;	/* PGSTE_UR_BIT & PGSTE_UC_BIT */
 	}
 	}
 	/* changing the guest storage key is considered a change of the page */
 	/* changing the guest storage key is considered a change of the page */
 	if ((pgste_val(new) ^ pgste_val(old)) &
 	if ((pgste_val(new) ^ pgste_val(old)) &
 	    (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
 	    (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
-		pgste_val(new) |= PGSTE_UC_BIT;
+		pgste_val(new) |= PGSTE_HC_BIT;
 
 
 	pgste_set_unlock(ptep, new);
 	pgste_set_unlock(ptep, new);
 	pte_unmap_unlock(*ptep, ptl);
 	pte_unmap_unlock(*ptep, ptl);
@@ -878,7 +875,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
 		pgtable_page_ctor(page);
 		pgtable_page_ctor(page);
 		atomic_set(&page->_mapcount, 1);
 		atomic_set(&page->_mapcount, 1);
 		table = (unsigned long *) page_to_phys(page);
 		table = (unsigned long *) page_to_phys(page);
-		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+		clear_table(table, _PAGE_INVALID, PAGE_SIZE);
 		spin_lock_bh(&mm->context.list_lock);
 		spin_lock_bh(&mm->context.list_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
 		list_add(&page->lru, &mm->context.pgtable_list);
 	} else {
 	} else {
@@ -1007,7 +1004,6 @@ void tlb_table_flush(struct mmu_gather *tlb)
 	struct mmu_table_batch **batch = &tlb->batch;
 	struct mmu_table_batch **batch = &tlb->batch;
 
 
 	if (*batch) {
 	if (*batch) {
-		__tlb_flush_mm(tlb->mm);
 		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
 		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
 		*batch = NULL;
 		*batch = NULL;
 	}
 	}
@@ -1017,11 +1013,12 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 {
 {
 	struct mmu_table_batch **batch = &tlb->batch;
 	struct mmu_table_batch **batch = &tlb->batch;
 
 
+	tlb->mm->context.flush_mm = 1;
 	if (*batch == NULL) {
 	if (*batch == NULL) {
 		*batch = (struct mmu_table_batch *)
 		*batch = (struct mmu_table_batch *)
 			__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
 			__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
 		if (*batch == NULL) {
 		if (*batch == NULL) {
-			__tlb_flush_mm(tlb->mm);
+			__tlb_flush_mm_lazy(tlb->mm);
 			tlb_remove_table_one(table);
 			tlb_remove_table_one(table);
 			return;
 			return;
 		}
 		}
@@ -1029,7 +1026,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 	}
 	}
 	(*batch)->tables[(*batch)->nr++] = table;
 	(*batch)->tables[(*batch)->nr++] = table;
 	if ((*batch)->nr == MAX_TABLE_BATCH)
 	if ((*batch)->nr == MAX_TABLE_BATCH)
-		tlb_table_flush(tlb);
+		tlb_flush_mmu(tlb);
 }
 }
 
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -1198,9 +1195,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 		list_del(lh);
 		list_del(lh);
 	}
 	}
 	ptep = (pte_t *) pgtable;
 	ptep = (pte_t *) pgtable;
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	pte_val(*ptep) = _PAGE_INVALID;
 	ptep++;
 	ptep++;
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	pte_val(*ptep) = _PAGE_INVALID;
 	return pgtable;
 	return pgtable;
 }
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */

+ 9 - 6
arch/s390/mm/vmem.c

@@ -69,7 +69,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
 		pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
 		pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
 	if (!pte)
 	if (!pte)
 		return NULL;
 		return NULL;
-	clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
+	clear_table((unsigned long *) pte, _PAGE_INVALID,
 		    PTRS_PER_PTE * sizeof(pte_t));
 		    PTRS_PER_PTE * sizeof(pte_t));
 	return pte;
 	return pte;
 }
 }
@@ -101,7 +101,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
 		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
 			pud_val(*pu_dir) = __pa(address) |
 			pud_val(*pu_dir) = __pa(address) |
 				_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
 				_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
-				(ro ? _REGION_ENTRY_RO : 0);
+				(ro ? _REGION_ENTRY_PROTECT : 0);
 			address += PUD_SIZE;
 			address += PUD_SIZE;
 			continue;
 			continue;
 		}
 		}
@@ -118,7 +118,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
 		    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
 			pmd_val(*pm_dir) = __pa(address) |
 			pmd_val(*pm_dir) = __pa(address) |
 				_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
 				_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
-				(ro ? _SEGMENT_ENTRY_RO : 0);
+				_SEGMENT_ENTRY_YOUNG |
+				(ro ? _SEGMENT_ENTRY_PROTECT : 0);
 			address += PMD_SIZE;
 			address += PMD_SIZE;
 			continue;
 			continue;
 		}
 		}
@@ -131,7 +132,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		}
 		}
 
 
 		pt_dir = pte_offset_kernel(pm_dir, address);
 		pt_dir = pte_offset_kernel(pm_dir, address);
-		pte_val(*pt_dir) = __pa(address) | (ro ? _PAGE_RO : 0);
+		pte_val(*pt_dir) = __pa(address) |
+			pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
 		address += PAGE_SIZE;
 		address += PAGE_SIZE;
 	}
 	}
 	ret = 0;
 	ret = 0;
@@ -154,7 +156,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 	pte_t *pt_dir;
 	pte_t *pt_dir;
 	pte_t  pte;
 	pte_t  pte;
 
 
-	pte_val(pte) = _PAGE_TYPE_EMPTY;
+	pte_val(pte) = _PAGE_INVALID;
 	while (address < end) {
 	while (address < end) {
 		pg_dir = pgd_offset_k(address);
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
 		if (pgd_none(*pg_dir)) {
@@ -255,7 +257,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 			new_page =__pa(vmem_alloc_pages(0));
 			new_page =__pa(vmem_alloc_pages(0));
 			if (!new_page)
 			if (!new_page)
 				goto out;
 				goto out;
-			pte_val(*pt_dir) = __pa(new_page);
+			pte_val(*pt_dir) =
+				__pa(new_page) | pgprot_val(PAGE_KERNEL);
 		}
 		}
 		address += PAGE_SIZE;
 		address += PAGE_SIZE;
 	}
 	}

+ 1 - 1
arch/s390/pci/Makefile

@@ -2,5 +2,5 @@
 # Makefile for the s390 PCI subsystem.
 # Makefile for the s390 PCI subsystem.
 #
 #
 
 
-obj-$(CONFIG_PCI)	+= pci.o pci_dma.o pci_clp.o pci_msi.o pci_sysfs.o \
+obj-$(CONFIG_PCI)	+= pci.o pci_dma.o pci_clp.o pci_sysfs.o \
 			   pci_event.o pci_debug.o pci_insn.o
 			   pci_event.o pci_debug.o pci_insn.o

+ 219 - 356
arch/s390/pci/pci.c

@@ -42,45 +42,26 @@
 #define	SIC_IRQ_MODE_SINGLE		1
 #define	SIC_IRQ_MODE_SINGLE		1
 
 
 #define ZPCI_NR_DMA_SPACES		1
 #define ZPCI_NR_DMA_SPACES		1
-#define ZPCI_MSI_VEC_BITS		6
 #define ZPCI_NR_DEVICES			CONFIG_PCI_NR_FUNCTIONS
 #define ZPCI_NR_DEVICES			CONFIG_PCI_NR_FUNCTIONS
 
 
 /* list of all detected zpci devices */
 /* list of all detected zpci devices */
-LIST_HEAD(zpci_list);
-EXPORT_SYMBOL_GPL(zpci_list);
-DEFINE_MUTEX(zpci_list_lock);
-EXPORT_SYMBOL_GPL(zpci_list_lock);
+static LIST_HEAD(zpci_list);
+static DEFINE_SPINLOCK(zpci_list_lock);
 
 
-static struct pci_hp_callback_ops *hotplug_ops;
+static void zpci_enable_irq(struct irq_data *data);
+static void zpci_disable_irq(struct irq_data *data);
 
 
-static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
-static DEFINE_SPINLOCK(zpci_domain_lock);
-
-struct callback {
-	irq_handler_t	handler;
-	void		*data;
+static struct irq_chip zpci_irq_chip = {
+	.name = "zPCI",
+	.irq_unmask = zpci_enable_irq,
+	.irq_mask = zpci_disable_irq,
 };
 };
 
 
-struct zdev_irq_map {
-	unsigned long	aibv;		/* AI bit vector */
-	int		msi_vecs;	/* consecutive MSI-vectors used */
-	int		__unused;
-	struct callback	cb[ZPCI_NR_MSI_VECS]; /* callback handler array */
-	spinlock_t	lock;		/* protect callbacks against de-reg */
-};
-
-struct intr_bucket {
-	/* amap of adapters, one bit per dev, corresponds to one irq nr */
-	unsigned long	*alloc;
-	/* AI summary bit, global page for all devices */
-	unsigned long	*aisb;
-	/* pointer to aibv and callback data in zdev */
-	struct zdev_irq_map *imap[ZPCI_NR_DEVICES];
-	/* protects the whole bucket struct */
-	spinlock_t	lock;
-};
+static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
+static DEFINE_SPINLOCK(zpci_domain_lock);
 
 
-static struct intr_bucket *bucket;
+static struct airq_iv *zpci_aisb_iv;
+static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
 
 
 /* Adapter interrupt definitions */
 /* Adapter interrupt definitions */
 static void zpci_irq_handler(struct airq_struct *airq);
 static void zpci_irq_handler(struct airq_struct *airq);
@@ -96,27 +77,8 @@ static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
 struct zpci_iomap_entry *zpci_iomap_start;
 struct zpci_iomap_entry *zpci_iomap_start;
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
 
-/* highest irq summary bit */
-static int __read_mostly aisb_max;
-
-static struct kmem_cache *zdev_irq_cache;
 static struct kmem_cache *zdev_fmb_cache;
 static struct kmem_cache *zdev_fmb_cache;
 
 
-static inline int irq_to_msi_nr(unsigned int irq)
-{
-	return irq & ZPCI_MSI_MASK;
-}
-
-static inline int irq_to_dev_nr(unsigned int irq)
-{
-	return irq >> ZPCI_MSI_VEC_BITS;
-}
-
-static inline struct zdev_irq_map *get_imap(unsigned int irq)
-{
-	return bucket->imap[irq_to_dev_nr(irq)];
-}
-
 struct zpci_dev *get_zdev(struct pci_dev *pdev)
 struct zpci_dev *get_zdev(struct pci_dev *pdev)
 {
 {
 	return (struct zpci_dev *) pdev->sysdata;
 	return (struct zpci_dev *) pdev->sysdata;
@@ -126,22 +88,17 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
 {
 {
 	struct zpci_dev *tmp, *zdev = NULL;
 	struct zpci_dev *tmp, *zdev = NULL;
 
 
-	mutex_lock(&zpci_list_lock);
+	spin_lock(&zpci_list_lock);
 	list_for_each_entry(tmp, &zpci_list, entry) {
 	list_for_each_entry(tmp, &zpci_list, entry) {
 		if (tmp->fid == fid) {
 		if (tmp->fid == fid) {
 			zdev = tmp;
 			zdev = tmp;
 			break;
 			break;
 		}
 		}
 	}
 	}
-	mutex_unlock(&zpci_list_lock);
+	spin_unlock(&zpci_list_lock);
 	return zdev;
 	return zdev;
 }
 }
 
 
-bool zpci_fid_present(u32 fid)
-{
-	return (get_zdev_by_fid(fid) != NULL) ? true : false;
-}
-
 static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
 static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
 {
 {
 	return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
 	return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
@@ -160,8 +117,7 @@ int pci_proc_domain(struct pci_bus *bus)
 EXPORT_SYMBOL_GPL(pci_proc_domain);
 EXPORT_SYMBOL_GPL(pci_proc_domain);
 
 
 /* Modify PCI: Register adapter interruptions */
 /* Modify PCI: Register adapter interruptions */
-static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
-			      u64 aibv)
+static int zpci_set_airq(struct zpci_dev *zdev)
 {
 {
 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
 	struct zpci_fib *fib;
 	struct zpci_fib *fib;
@@ -172,14 +128,14 @@ static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
 		return -ENOMEM;
 		return -ENOMEM;
 
 
 	fib->isc = PCI_ISC;
 	fib->isc = PCI_ISC;
-	fib->noi = zdev->irq_map->msi_vecs;
 	fib->sum = 1;		/* enable summary notifications */
 	fib->sum = 1;		/* enable summary notifications */
-	fib->aibv = aibv;
-	fib->aibvo = 0;		/* every function has its own page */
-	fib->aisb = (u64) bucket->aisb + aisb / 8;
-	fib->aisbo = aisb & ZPCI_MSI_MASK;
+	fib->noi = airq_iv_end(zdev->aibv);
+	fib->aibv = (unsigned long) zdev->aibv->vector;
+	fib->aibvo = 0;		/* each zdev has its own interrupt vector */
+	fib->aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
+	fib->aisbo = zdev->aisb & 63;
 
 
-	rc = s390pci_mod_fc(req, fib);
+	rc = zpci_mod_fc(req, fib);
 	pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi);
 	pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi);
 
 
 	free_page((unsigned long) fib);
 	free_page((unsigned long) fib);
@@ -209,7 +165,7 @@ static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args
 	fib->iota = args->iota;
 	fib->iota = args->iota;
 	fib->fmb_addr = args->fmb_addr;
 	fib->fmb_addr = args->fmb_addr;
 
 
-	rc = s390pci_mod_fc(req, fib);
+	rc = zpci_mod_fc(req, fib);
 	free_page((unsigned long) fib);
 	free_page((unsigned long) fib);
 	return rc;
 	return rc;
 }
 }
@@ -234,7 +190,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
 }
 }
 
 
 /* Modify PCI: Unregister adapter interruptions */
 /* Modify PCI: Unregister adapter interruptions */
-static int zpci_unregister_airq(struct zpci_dev *zdev)
+static int zpci_clear_airq(struct zpci_dev *zdev)
 {
 {
 	struct mod_pci_args args = { 0, 0, 0, 0 };
 	struct mod_pci_args args = { 0, 0, 0, 0 };
 
 
@@ -283,7 +239,7 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
 	u64 data;
 	u64 data;
 	int rc;
 	int rc;
 
 
-	rc = s390pci_load(&data, req, offset);
+	rc = zpci_load(&data, req, offset);
 	if (!rc) {
 	if (!rc) {
 		data = data << ((8 - len) * 8);
 		data = data << ((8 - len) * 8);
 		data = le64_to_cpu(data);
 		data = le64_to_cpu(data);
@@ -301,25 +257,46 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
 
 
 	data = cpu_to_le64(data);
 	data = cpu_to_le64(data);
 	data = data >> ((8 - len) * 8);
 	data = data >> ((8 - len) * 8);
-	rc = s390pci_store(data, req, offset);
+	rc = zpci_store(data, req, offset);
 	return rc;
 	return rc;
 }
 }
 
 
-void enable_irq(unsigned int irq)
+static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
+{
+	int offset, pos;
+	u32 mask_bits;
+
+	if (msi->msi_attrib.is_msix) {
+		offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+			PCI_MSIX_ENTRY_VECTOR_CTRL;
+		msi->masked = readl(msi->mask_base + offset);
+		writel(flag, msi->mask_base + offset);
+	} else if (msi->msi_attrib.maskbit) {
+		pos = (long) msi->mask_base;
+		pci_read_config_dword(msi->dev, pos, &mask_bits);
+		mask_bits &= ~(mask);
+		mask_bits |= flag & mask;
+		pci_write_config_dword(msi->dev, pos, mask_bits);
+	} else
+		return 0;
+
+	msi->msi_attrib.maskbit = !!flag;
+	return 1;
+}
+
+static void zpci_enable_irq(struct irq_data *data)
 {
 {
-	struct msi_desc *msi = irq_get_msi_desc(irq);
+	struct msi_desc *msi = irq_get_msi_desc(data->irq);
 
 
 	zpci_msi_set_mask_bits(msi, 1, 0);
 	zpci_msi_set_mask_bits(msi, 1, 0);
 }
 }
-EXPORT_SYMBOL_GPL(enable_irq);
 
 
-void disable_irq(unsigned int irq)
+static void zpci_disable_irq(struct irq_data *data)
 {
 {
-	struct msi_desc *msi = irq_get_msi_desc(irq);
+	struct msi_desc *msi = irq_get_msi_desc(data->irq);
 
 
 	zpci_msi_set_mask_bits(msi, 1, 1);
 	zpci_msi_set_mask_bits(msi, 1, 1);
 }
 }
-EXPORT_SYMBOL_GPL(disable_irq);
 
 
 void pcibios_fixup_bus(struct pci_bus *bus)
 void pcibios_fixup_bus(struct pci_bus *bus)
 {
 {
@@ -404,152 +381,147 @@ static struct pci_ops pci_root_ops = {
 	.write = pci_write,
 	.write = pci_write,
 };
 };
 
 
-/* store the last handled bit to implement fair scheduling of devices */
-static DEFINE_PER_CPU(unsigned long, next_sbit);
-
 static void zpci_irq_handler(struct airq_struct *airq)
 static void zpci_irq_handler(struct airq_struct *airq)
 {
 {
-	unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit);
-	int rescan = 0, max = aisb_max;
-	struct zdev_irq_map *imap;
+	unsigned long si, ai;
+	struct airq_iv *aibv;
+	int irqs_on = 0;
 
 
 	inc_irq_stat(IRQIO_PCI);
 	inc_irq_stat(IRQIO_PCI);
-	sbit = start;
-
-scan:
-	/* find summary_bit */
-	for_each_set_bit_left_cont(sbit, bucket->aisb, max) {
-		clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6));
-		last = sbit;
+	for (si = 0;;) {
+		/* Scan adapter summary indicator bit vector */
+		si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
+		if (si == -1UL) {
+			if (irqs_on++)
+				/* End of second scan with interrupts on. */
+				break;
+			/* First scan complete, reenable interrupts. */
+			zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+			si = 0;
+			continue;
+		}
 
 
-		/* find vector bit */
-		imap = bucket->imap[sbit];
-		for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) {
+		/* Scan the adapter interrupt vector for this device. */
+		aibv = zpci_aibv[si];
+		for (ai = 0;;) {
+			ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
+			if (ai == -1UL)
+				break;
 			inc_irq_stat(IRQIO_MSI);
 			inc_irq_stat(IRQIO_MSI);
-			clear_bit(63 - mbit, &imap->aibv);
-
-			spin_lock(&imap->lock);
-			if (imap->cb[mbit].handler)
-				imap->cb[mbit].handler(mbit,
-					imap->cb[mbit].data);
-			spin_unlock(&imap->lock);
+			airq_iv_lock(aibv, ai);
+			generic_handle_irq(airq_iv_get_data(aibv, ai));
+			airq_iv_unlock(aibv, ai);
 		}
 		}
 	}
 	}
-
-	if (rescan)
-		goto out;
-
-	/* scan the skipped bits */
-	if (start > 0) {
-		sbit = 0;
-		max = start;
-		start = 0;
-		goto scan;
-	}
-
-	/* enable interrupts again */
-	set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
-
-	/* check again to not lose initiative */
-	rmb();
-	max = aisb_max;
-	sbit = find_first_bit_left(bucket->aisb, max);
-	if (sbit != max) {
-		rescan++;
-		goto scan;
-	}
-out:
-	/* store next device bit to scan */
-	__get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last;
 }
 }
 
 
-/* msi_vecs - number of requested interrupts, 0 place function to error state */
-static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs)
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
 {
 	struct zpci_dev *zdev = get_zdev(pdev);
 	struct zpci_dev *zdev = get_zdev(pdev);
-	unsigned int aisb, msi_nr;
+	unsigned int hwirq, irq, msi_vecs;
+	unsigned long aisb;
 	struct msi_desc *msi;
 	struct msi_desc *msi;
+	struct msi_msg msg;
 	int rc;
 	int rc;
 
 
-	/* store the number of used MSI vectors */
-	zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS);
-
-	spin_lock(&bucket->lock);
-	aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE);
-	/* alloc map exhausted? */
-	if (aisb == PAGE_SIZE) {
-		spin_unlock(&bucket->lock);
-		return -EIO;
-	}
-	set_bit(aisb, bucket->alloc);
-	spin_unlock(&bucket->lock);
+	pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
+	if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
+		return -EINVAL;
+	msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
+	msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
 
 
+	/* Allocate adapter summary indicator bit */
+	rc = -EIO;
+	aisb = airq_iv_alloc_bit(zpci_aisb_iv);
+	if (aisb == -1UL)
+		goto out;
 	zdev->aisb = aisb;
 	zdev->aisb = aisb;
-	if (aisb + 1 > aisb_max)
-		aisb_max = aisb + 1;
 
 
-	/* wire up IRQ shortcut pointer */
-	bucket->imap[zdev->aisb] = zdev->irq_map;
-	pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map);
+	/* Create adapter interrupt vector */
+	rc = -ENOMEM;
+	zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+	if (!zdev->aibv)
+		goto out_si;
 
 
-	/* TODO: irq number 0 wont be found if we return less than requested MSIs.
-	 * ignore it for now and fix in common code.
-	 */
-	msi_nr = aisb << ZPCI_MSI_VEC_BITS;
+	/* Wire up shortcut pointer */
+	zpci_aibv[aisb] = zdev->aibv;
 
 
+	/* Request MSI interrupts */
+	hwirq = 0;
 	list_for_each_entry(msi, &pdev->msi_list, list) {
 	list_for_each_entry(msi, &pdev->msi_list, list) {
-		rc = zpci_setup_msi_irq(zdev, msi, msi_nr,
-					  aisb << ZPCI_MSI_VEC_BITS);
+		rc = -EIO;
+		irq = irq_alloc_desc(0);	/* Alloc irq on node 0 */
+		if (irq == NO_IRQ)
+			goto out_msi;
+		rc = irq_set_msi_desc(irq, msi);
 		if (rc)
 		if (rc)
-			return rc;
-		msi_nr++;
+			goto out_msi;
+		irq_set_chip_and_handler(irq, &zpci_irq_chip,
+					 handle_simple_irq);
+		msg.data = hwirq;
+		msg.address_lo = zdev->msi_addr & 0xffffffff;
+		msg.address_hi = zdev->msi_addr >> 32;
+		write_msi_msg(irq, &msg);
+		airq_iv_set_data(zdev->aibv, hwirq, irq);
+		hwirq++;
 	}
 	}
 
 
-	rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv);
-	if (rc) {
-		clear_bit(aisb, bucket->alloc);
-		dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
-		return rc;
+	/* Enable adapter interrupts */
+	rc = zpci_set_airq(zdev);
+	if (rc)
+		goto out_msi;
+
+	return (msi_vecs == nvec) ? 0 : msi_vecs;
+
+out_msi:
+	list_for_each_entry(msi, &pdev->msi_list, list) {
+		if (hwirq-- == 0)
+			break;
+		irq_set_msi_desc(msi->irq, NULL);
+		irq_free_desc(msi->irq);
+		msi->msg.address_lo = 0;
+		msi->msg.address_hi = 0;
+		msi->msg.data = 0;
+		msi->irq = 0;
 	}
 	}
-	return (zdev->irq_map->msi_vecs == msi_vecs) ?
-		0 : zdev->irq_map->msi_vecs;
+	zpci_aibv[aisb] = NULL;
+	airq_iv_release(zdev->aibv);
+out_si:
+	airq_iv_free_bit(zpci_aisb_iv, aisb);
+out:
+	dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
+	return rc;
 }
 }
 
 
-static void zpci_teardown_msi(struct pci_dev *pdev)
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
 {
 {
 	struct zpci_dev *zdev = get_zdev(pdev);
 	struct zpci_dev *zdev = get_zdev(pdev);
 	struct msi_desc *msi;
 	struct msi_desc *msi;
-	int aisb, rc;
+	int rc;
 
 
-	rc = zpci_unregister_airq(zdev);
+	pr_info("%s: on pdev: %p\n", __func__, pdev);
+
+	/* Disable adapter interrupts */
+	rc = zpci_clear_airq(zdev);
 	if (rc) {
 	if (rc) {
 		dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc);
 		dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc);
 		return;
 		return;
 	}
 	}
 
 
-	msi = list_first_entry(&pdev->msi_list, struct msi_desc, list);
-	aisb = irq_to_dev_nr(msi->irq);
-
-	list_for_each_entry(msi, &pdev->msi_list, list)
-		zpci_teardown_msi_irq(zdev, msi);
-
-	clear_bit(aisb, bucket->alloc);
-	if (aisb + 1 == aisb_max)
-		aisb_max--;
-}
-
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
-	pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
-	if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
-		return -EINVAL;
-	return zpci_setup_msi(pdev, nvec);
-}
+	/* Release MSI interrupts */
+	list_for_each_entry(msi, &pdev->msi_list, list) {
+		zpci_msi_set_mask_bits(msi, 1, 1);
+		irq_set_msi_desc(msi->irq, NULL);
+		irq_free_desc(msi->irq);
+		msi->msg.address_lo = 0;
+		msi->msg.address_hi = 0;
+		msi->msg.data = 0;
+		msi->irq = 0;
+	}
 
 
-void arch_teardown_msi_irqs(struct pci_dev *pdev)
-{
-	pr_info("%s: on pdev: %p\n", __func__, pdev);
-	zpci_teardown_msi(pdev);
+	zpci_aibv[zdev->aisb] = NULL;
+	airq_iv_release(zdev->aibv);
+	airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
 }
 }
 
 
 static void zpci_map_resources(struct zpci_dev *zdev)
 static void zpci_map_resources(struct zpci_dev *zdev)
@@ -564,8 +536,6 @@ static void zpci_map_resources(struct zpci_dev *zdev)
 			continue;
 			continue;
 		pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0);
 		pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0);
 		pdev->resource[i].end = pdev->resource[i].start + len - 1;
 		pdev->resource[i].end = pdev->resource[i].start + len - 1;
-		pr_debug("BAR%i: -> start: %Lx  end: %Lx\n",
-			i, pdev->resource[i].start, pdev->resource[i].end);
 	}
 	}
 }
 }
 
 
@@ -589,162 +559,47 @@ struct zpci_dev *zpci_alloc_device(void)
 
 
 	/* Alloc memory for our private pci device data */
 	/* Alloc memory for our private pci device data */
 	zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
 	zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
-	if (!zdev)
-		return ERR_PTR(-ENOMEM);
-
-	/* Alloc aibv & callback space */
-	zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL);
-	if (!zdev->irq_map)
-		goto error;
-	WARN_ON((u64) zdev->irq_map & 0xff);
-	return zdev;
-
-error:
-	kfree(zdev);
-	return ERR_PTR(-ENOMEM);
+	return zdev ? : ERR_PTR(-ENOMEM);
 }
 }
 
 
 void zpci_free_device(struct zpci_dev *zdev)
 void zpci_free_device(struct zpci_dev *zdev)
 {
 {
-	kmem_cache_free(zdev_irq_cache, zdev->irq_map);
 	kfree(zdev);
 	kfree(zdev);
 }
 }
 
 
-/*
- * Too late for any s390 specific setup, since interrupts must be set up
- * already which requires DMA setup too and the pci scan will access the
- * config space, which only works if the function handle is enabled.
- */
-int pcibios_enable_device(struct pci_dev *pdev, int mask)
-{
-	struct resource *res;
-	u16 cmd;
-	int i;
-
-	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
-
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
-		res = &pdev->resource[i];
-
-		if (res->flags & IORESOURCE_IO)
-			return -EINVAL;
-
-		if (res->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-	pci_write_config_word(pdev, PCI_COMMAND, cmd);
-	return 0;
-}
-
 int pcibios_add_platform_entries(struct pci_dev *pdev)
 int pcibios_add_platform_entries(struct pci_dev *pdev)
 {
 {
 	return zpci_sysfs_add_device(&pdev->dev);
 	return zpci_sysfs_add_device(&pdev->dev);
 }
 }
 
 
-int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data)
-{
-	int msi_nr = irq_to_msi_nr(irq);
-	struct zdev_irq_map *imap;
-	struct msi_desc *msi;
-
-	msi = irq_get_msi_desc(irq);
-	if (!msi)
-		return -EIO;
-
-	imap = get_imap(irq);
-	spin_lock_init(&imap->lock);
-
-	pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr);
-	imap->cb[msi_nr].handler = handler;
-	imap->cb[msi_nr].data = data;
-
-	/*
-	 * The generic MSI code returns with the interrupt disabled on the
-	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
-	 * at that level, so we do it here by hand.
-	 */
-	zpci_msi_set_mask_bits(msi, 1, 0);
-	return 0;
-}
-
-void zpci_free_irq(unsigned int irq)
-{
-	struct zdev_irq_map *imap = get_imap(irq);
-	int msi_nr = irq_to_msi_nr(irq);
-	unsigned long flags;
-
-	pr_debug("%s: for irq: %d\n", __func__, irq);
-
-	spin_lock_irqsave(&imap->lock, flags);
-	imap->cb[msi_nr].handler = NULL;
-	imap->cb[msi_nr].data = NULL;
-	spin_unlock_irqrestore(&imap->lock, flags);
-}
-
-int request_irq(unsigned int irq, irq_handler_t handler,
-		unsigned long irqflags, const char *devname, void *dev_id)
-{
-	pr_debug("%s: irq: %d  handler: %p  flags: %lx  dev: %s\n",
-		__func__, irq, handler, irqflags, devname);
-
-	return zpci_request_irq(irq, handler, dev_id);
-}
-EXPORT_SYMBOL_GPL(request_irq);
-
-void free_irq(unsigned int irq, void *dev_id)
-{
-	zpci_free_irq(irq);
-}
-EXPORT_SYMBOL_GPL(free_irq);
-
 static int __init zpci_irq_init(void)
 static int __init zpci_irq_init(void)
 {
 {
-	int cpu, rc;
-
-	bucket = kzalloc(sizeof(*bucket), GFP_KERNEL);
-	if (!bucket)
-		return -ENOMEM;
-
-	bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL);
-	if (!bucket->aisb) {
-		rc = -ENOMEM;
-		goto out_aisb;
-	}
-
-	bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL);
-	if (!bucket->alloc) {
-		rc = -ENOMEM;
-		goto out_alloc;
-	}
+	int rc;
 
 
 	rc = register_adapter_interrupt(&zpci_airq);
 	rc = register_adapter_interrupt(&zpci_airq);
 	if (rc)
 	if (rc)
-		goto out_ai;
+		goto out;
 	/* Set summary to 1 to be called every time for the ISC. */
 	/* Set summary to 1 to be called every time for the ISC. */
 	*zpci_airq.lsi_ptr = 1;
 	*zpci_airq.lsi_ptr = 1;
 
 
-	for_each_online_cpu(cpu)
-		per_cpu(next_sbit, cpu) = 0;
+	rc = -ENOMEM;
+	zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+	if (!zpci_aisb_iv)
+		goto out_airq;
 
 
-	spin_lock_init(&bucket->lock);
-	set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
 	return 0;
 	return 0;
 
 
-out_ai:
-	free_page((unsigned long) bucket->alloc);
-out_alloc:
-	free_page((unsigned long) bucket->aisb);
-out_aisb:
-	kfree(bucket);
+out_airq:
+	unregister_adapter_interrupt(&zpci_airq);
+out:
 	return rc;
 	return rc;
 }
 }
 
 
 static void zpci_irq_exit(void)
 static void zpci_irq_exit(void)
 {
 {
-	free_page((unsigned long) bucket->alloc);
-	free_page((unsigned long) bucket->aisb);
+	airq_iv_release(zpci_aisb_iv);
 	unregister_adapter_interrupt(&zpci_airq);
 	unregister_adapter_interrupt(&zpci_airq);
-	kfree(bucket);
 }
 }
 
 
 static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size,
 static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size,
@@ -801,16 +656,49 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
 int pcibios_add_device(struct pci_dev *pdev)
 int pcibios_add_device(struct pci_dev *pdev)
 {
 {
 	struct zpci_dev *zdev = get_zdev(pdev);
 	struct zpci_dev *zdev = get_zdev(pdev);
+	struct resource *res;
+	int i;
+
+	zdev->pdev = pdev;
+	zpci_map_resources(zdev);
+
+	for (i = 0; i < PCI_BAR_COUNT; i++) {
+		res = &pdev->resource[i];
+		if (res->parent || !res->flags)
+			continue;
+		pci_claim_resource(pdev, i);
+	}
+
+	return 0;
+}
+
+int pcibios_enable_device(struct pci_dev *pdev, int mask)
+{
+	struct zpci_dev *zdev = get_zdev(pdev);
+	struct resource *res;
+	u16 cmd;
+	int i;
 
 
 	zdev->pdev = pdev;
 	zdev->pdev = pdev;
 	zpci_debug_init_device(zdev);
 	zpci_debug_init_device(zdev);
 	zpci_fmb_enable_device(zdev);
 	zpci_fmb_enable_device(zdev);
 	zpci_map_resources(zdev);
 	zpci_map_resources(zdev);
 
 
+	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+	for (i = 0; i < PCI_BAR_COUNT; i++) {
+		res = &pdev->resource[i];
+
+		if (res->flags & IORESOURCE_IO)
+			return -EINVAL;
+
+		if (res->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+	pci_write_config_word(pdev, PCI_COMMAND, cmd);
 	return 0;
 	return 0;
 }
 }
 
 
-void pcibios_release_device(struct pci_dev *pdev)
+void pcibios_disable_device(struct pci_dev *pdev)
 {
 {
 	struct zpci_dev *zdev = get_zdev(pdev);
 	struct zpci_dev *zdev = get_zdev(pdev);
 
 
@@ -898,6 +786,8 @@ int zpci_enable_device(struct zpci_dev *zdev)
 	rc = zpci_dma_init_device(zdev);
 	rc = zpci_dma_init_device(zdev);
 	if (rc)
 	if (rc)
 		goto out_dma;
 		goto out_dma;
+
+	zdev->state = ZPCI_FN_STATE_ONLINE;
 	return 0;
 	return 0;
 
 
 out_dma:
 out_dma:
@@ -926,18 +816,16 @@ int zpci_create_device(struct zpci_dev *zdev)
 		rc = zpci_enable_device(zdev);
 		rc = zpci_enable_device(zdev);
 		if (rc)
 		if (rc)
 			goto out_free;
 			goto out_free;
-
-		zdev->state = ZPCI_FN_STATE_ONLINE;
 	}
 	}
 	rc = zpci_scan_bus(zdev);
 	rc = zpci_scan_bus(zdev);
 	if (rc)
 	if (rc)
 		goto out_disable;
 		goto out_disable;
 
 
-	mutex_lock(&zpci_list_lock);
+	spin_lock(&zpci_list_lock);
 	list_add_tail(&zdev->entry, &zpci_list);
 	list_add_tail(&zdev->entry, &zpci_list);
-	if (hotplug_ops)
-		hotplug_ops->create_slot(zdev);
-	mutex_unlock(&zpci_list_lock);
+	spin_unlock(&zpci_list_lock);
+
+	zpci_init_slot(zdev);
 
 
 	return 0;
 	return 0;
 
 
@@ -967,15 +855,10 @@ static inline int barsize(u8 size)
 
 
 static int zpci_mem_init(void)
 static int zpci_mem_init(void)
 {
 {
-	zdev_irq_cache = kmem_cache_create("PCI_IRQ_cache", sizeof(struct zdev_irq_map),
-				L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, NULL);
-	if (!zdev_irq_cache)
-		goto error_zdev;
-
 	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
 	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
 				16, 0, NULL);
 				16, 0, NULL);
 	if (!zdev_fmb_cache)
 	if (!zdev_fmb_cache)
-		goto error_fmb;
+		goto error_zdev;
 
 
 	/* TODO: use realloc */
 	/* TODO: use realloc */
 	zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
 	zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
@@ -986,8 +869,6 @@ static int zpci_mem_init(void)
 
 
 error_iomap:
 error_iomap:
 	kmem_cache_destroy(zdev_fmb_cache);
 	kmem_cache_destroy(zdev_fmb_cache);
-error_fmb:
-	kmem_cache_destroy(zdev_irq_cache);
 error_zdev:
 error_zdev:
 	return -ENOMEM;
 	return -ENOMEM;
 }
 }
@@ -995,28 +876,10 @@ error_zdev:
 static void zpci_mem_exit(void)
 static void zpci_mem_exit(void)
 {
 {
 	kfree(zpci_iomap_start);
 	kfree(zpci_iomap_start);
-	kmem_cache_destroy(zdev_irq_cache);
 	kmem_cache_destroy(zdev_fmb_cache);
 	kmem_cache_destroy(zdev_fmb_cache);
 }
 }
 
 
-void zpci_register_hp_ops(struct pci_hp_callback_ops *ops)
-{
-	mutex_lock(&zpci_list_lock);
-	hotplug_ops = ops;
-	mutex_unlock(&zpci_list_lock);
-}
-EXPORT_SYMBOL_GPL(zpci_register_hp_ops);
-
-void zpci_deregister_hp_ops(void)
-{
-	mutex_lock(&zpci_list_lock);
-	hotplug_ops = NULL;
-	mutex_unlock(&zpci_list_lock);
-}
-EXPORT_SYMBOL_GPL(zpci_deregister_hp_ops);
-
-unsigned int s390_pci_probe;
-EXPORT_SYMBOL_GPL(s390_pci_probe);
+static unsigned int s390_pci_probe;
 
 
 char * __init pcibios_setup(char *str)
 char * __init pcibios_setup(char *str)
 {
 {
@@ -1044,16 +907,12 @@ static int __init pci_base_init(void)
 
 
 	rc = zpci_debug_init();
 	rc = zpci_debug_init();
 	if (rc)
 	if (rc)
-		return rc;
+		goto out;
 
 
 	rc = zpci_mem_init();
 	rc = zpci_mem_init();
 	if (rc)
 	if (rc)
 		goto out_mem;
 		goto out_mem;
 
 
-	rc = zpci_msihash_init();
-	if (rc)
-		goto out_hash;
-
 	rc = zpci_irq_init();
 	rc = zpci_irq_init();
 	if (rc)
 	if (rc)
 		goto out_irq;
 		goto out_irq;
@@ -1062,7 +921,7 @@ static int __init pci_base_init(void)
 	if (rc)
 	if (rc)
 		goto out_dma;
 		goto out_dma;
 
 
-	rc = clp_find_pci_devices();
+	rc = clp_scan_pci_devices();
 	if (rc)
 	if (rc)
 		goto out_find;
 		goto out_find;
 
 
@@ -1073,11 +932,15 @@ out_find:
 out_dma:
 out_dma:
 	zpci_irq_exit();
 	zpci_irq_exit();
 out_irq:
 out_irq:
-	zpci_msihash_exit();
-out_hash:
 	zpci_mem_exit();
 	zpci_mem_exit();
 out_mem:
 out_mem:
 	zpci_debug_exit();
 	zpci_debug_exit();
+out:
 	return rc;
 	return rc;
 }
 }
-subsys_initcall(pci_base_init);
+subsys_initcall_sync(pci_base_init);
+
+void zpci_rescan(void)
+{
+	clp_rescan_pci_devices_simple();
+}

+ 99 - 47
arch/s390/pci/pci_clp.c

@@ -36,9 +36,9 @@ static inline u8 clp_instr(void *data)
 	return cc;
 	return cc;
 }
 }
 
 
-static void *clp_alloc_block(void)
+static void *clp_alloc_block(gfp_t gfp_mask)
 {
 {
-	return (void *) __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE));
+	return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE));
 }
 }
 
 
 static void clp_free_block(void *ptr)
 static void clp_free_block(void *ptr)
@@ -70,7 +70,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
 	struct clp_req_rsp_query_pci_grp *rrb;
 	struct clp_req_rsp_query_pci_grp *rrb;
 	int rc;
 	int rc;
 
 
-	rrb = clp_alloc_block();
+	rrb = clp_alloc_block(GFP_KERNEL);
 	if (!rrb)
 	if (!rrb)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
@@ -113,7 +113,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
 	struct clp_req_rsp_query_pci *rrb;
 	struct clp_req_rsp_query_pci *rrb;
 	int rc;
 	int rc;
 
 
-	rrb = clp_alloc_block();
+	rrb = clp_alloc_block(GFP_KERNEL);
 	if (!rrb)
 	if (!rrb)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
@@ -179,9 +179,9 @@ error:
 static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
 static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
 {
 {
 	struct clp_req_rsp_set_pci *rrb;
 	struct clp_req_rsp_set_pci *rrb;
-	int rc, retries = 1000;
+	int rc, retries = 100;
 
 
-	rrb = clp_alloc_block();
+	rrb = clp_alloc_block(GFP_KERNEL);
 	if (!rrb)
 	if (!rrb)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
@@ -199,7 +199,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
 			retries--;
 			retries--;
 			if (retries < 0)
 			if (retries < 0)
 				break;
 				break;
-			msleep(1);
+			msleep(20);
 		}
 		}
 	} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
 	} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
 
 
@@ -245,49 +245,12 @@ int clp_disable_fh(struct zpci_dev *zdev)
 	return rc;
 	return rc;
 }
 }
 
 
-static void clp_check_pcifn_entry(struct clp_fh_list_entry *entry)
+static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
+			void (*cb)(struct clp_fh_list_entry *entry))
 {
 {
-	int present, rc;
-
-	if (!entry->vendor_id)
-		return;
-
-	/* TODO: be a little bit more scalable */
-	present = zpci_fid_present(entry->fid);
-
-	if (present)
-		pr_debug("%s: device %x already present\n", __func__, entry->fid);
-
-	/* skip already used functions */
-	if (present && entry->config_state)
-		return;
-
-	/* aev 306: function moved to stand-by state */
-	if (present && !entry->config_state) {
-		/*
-		 * The handle is already disabled, that means no iota/irq freeing via
-		 * the firmware interfaces anymore. Need to free resources manually
-		 * (DMA memory, debug, sysfs)...
-		 */
-		zpci_stop_device(get_zdev_by_fid(entry->fid));
-		return;
-	}
-
-	rc = clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
-	if (rc)
-		pr_err("Failed to add fid: 0x%x\n", entry->fid);
-}
-
-int clp_find_pci_devices(void)
-{
-	struct clp_req_rsp_list_pci *rrb;
 	u64 resume_token = 0;
 	u64 resume_token = 0;
 	int entries, i, rc;
 	int entries, i, rc;
 
 
-	rrb = clp_alloc_block();
-	if (!rrb)
-		return -ENOMEM;
-
 	do {
 	do {
 		memset(rrb, 0, sizeof(*rrb));
 		memset(rrb, 0, sizeof(*rrb));
 		rrb->request.hdr.len = sizeof(rrb->request);
 		rrb->request.hdr.len = sizeof(rrb->request);
@@ -316,12 +279,101 @@ int clp_find_pci_devices(void)
 		resume_token = rrb->response.resume_token;
 		resume_token = rrb->response.resume_token;
 
 
 		for (i = 0; i < entries; i++)
 		for (i = 0; i < entries; i++)
-			clp_check_pcifn_entry(&rrb->response.fh_list[i]);
+			cb(&rrb->response.fh_list[i]);
 	} while (resume_token);
 	} while (resume_token);
 
 
 	pr_debug("Maximum number of supported PCI functions: %u\n",
 	pr_debug("Maximum number of supported PCI functions: %u\n",
 		rrb->response.max_fn);
 		rrb->response.max_fn);
 out:
 out:
+	return rc;
+}
+
+static void __clp_add(struct clp_fh_list_entry *entry)
+{
+	if (!entry->vendor_id)
+		return;
+
+	clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+}
+
+static void __clp_rescan(struct clp_fh_list_entry *entry)
+{
+	struct zpci_dev *zdev;
+
+	if (!entry->vendor_id)
+		return;
+
+	zdev = get_zdev_by_fid(entry->fid);
+	if (!zdev) {
+		clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+		return;
+	}
+
+	if (!entry->config_state) {
+		/*
+		 * The handle is already disabled, that means no iota/irq freeing via
+		 * the firmware interfaces anymore. Need to free resources manually
+		 * (DMA memory, debug, sysfs)...
+		 */
+		zpci_stop_device(zdev);
+	}
+}
+
+static void __clp_update(struct clp_fh_list_entry *entry)
+{
+	struct zpci_dev *zdev;
+
+	if (!entry->vendor_id)
+		return;
+
+	zdev = get_zdev_by_fid(entry->fid);
+	if (!zdev)
+		return;
+
+	zdev->fh = entry->fh;
+}
+
+int clp_scan_pci_devices(void)
+{
+	struct clp_req_rsp_list_pci *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	rc = clp_list_pci(rrb, __clp_add);
+
+	clp_free_block(rrb);
+	return rc;
+}
+
+int clp_rescan_pci_devices(void)
+{
+	struct clp_req_rsp_list_pci *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	rc = clp_list_pci(rrb, __clp_rescan);
+
+	clp_free_block(rrb);
+	return rc;
+}
+
+int clp_rescan_pci_devices_simple(void)
+{
+	struct clp_req_rsp_list_pci *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_NOWAIT);
+	if (!rrb)
+		return -ENOMEM;
+
+	rc = clp_list_pci(rrb, __clp_update);
+
 	clp_free_block(rrb);
 	clp_free_block(rrb);
 	return rc;
 	return rc;
 }
 }

+ 5 - 11
arch/s390/pci/pci_dma.c

@@ -10,6 +10,7 @@
 #include <linux/export.h>
 #include <linux/export.h>
 #include <linux/iommu-helper.h>
 #include <linux/iommu-helper.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
 #include <linux/pci.h>
 #include <linux/pci.h>
 #include <asm/pci_dma.h>
 #include <asm/pci_dma.h>
 
 
@@ -170,8 +171,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
 		 */
 		 */
 		goto no_refresh;
 		goto no_refresh;
 
 
-	rc = s390pci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
-				   nr_pages * PAGE_SIZE);
+	rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
+				nr_pages * PAGE_SIZE);
 
 
 no_refresh:
 no_refresh:
 	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
 	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
@@ -407,7 +408,6 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 
 
 int zpci_dma_init_device(struct zpci_dev *zdev)
 int zpci_dma_init_device(struct zpci_dev *zdev)
 {
 {
-	unsigned int bitmap_order;
 	int rc;
 	int rc;
 
 
 	spin_lock_init(&zdev->iommu_bitmap_lock);
 	spin_lock_init(&zdev->iommu_bitmap_lock);
@@ -421,12 +421,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 
 
 	zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
 	zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
 	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
 	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
-	bitmap_order = get_order(zdev->iommu_pages / 8);
-	pr_info("iommu_size: 0x%lx  iommu_pages: 0x%lx  bitmap_order: %i\n",
-		 zdev->iommu_size, zdev->iommu_pages, bitmap_order);
-
-	zdev->iommu_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-						       bitmap_order);
+	zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
 	if (!zdev->iommu_bitmap) {
 	if (!zdev->iommu_bitmap) {
 		rc = -ENOMEM;
 		rc = -ENOMEM;
 		goto out_reg;
 		goto out_reg;
@@ -451,8 +446,7 @@ void zpci_dma_exit_device(struct zpci_dev *zdev)
 {
 {
 	zpci_unregister_ioat(zdev, 0);
 	zpci_unregister_ioat(zdev, 0);
 	dma_cleanup_tables(zdev);
 	dma_cleanup_tables(zdev);
-	free_pages((unsigned long) zdev->iommu_bitmap,
-		   get_order(zdev->iommu_pages / 8));
+	vfree(zdev->iommu_bitmap);
 	zdev->iommu_bitmap = NULL;
 	zdev->iommu_bitmap = NULL;
 	zdev->next_bit = 0;
 	zdev->next_bit = 0;
 }
 }

+ 1 - 1
arch/s390/pci/pci_event.c

@@ -69,7 +69,7 @@ static void zpci_event_log_avail(struct zpci_ccdf_avail *ccdf)
 		clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
 		clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
 		break;
 		break;
 	case 0x0306:
 	case 0x0306:
-		clp_find_pci_devices();
+		clp_rescan_pci_devices();
 		break;
 		break;
 	default:
 	default:
 		break;
 		break;

+ 9 - 9
arch/s390/pci/pci_insn.c

@@ -27,7 +27,7 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
 	return cc;
 	return cc;
 }
 }
 
 
-int s390pci_mod_fc(u64 req, struct zpci_fib *fib)
+int zpci_mod_fc(u64 req, struct zpci_fib *fib)
 {
 {
 	u8 cc, status;
 	u8 cc, status;
 
 
@@ -61,7 +61,7 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
 	return cc;
 	return cc;
 }
 }
 
 
-int s390pci_refresh_trans(u64 fn, u64 addr, u64 range)
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
 {
 {
 	u8 cc, status;
 	u8 cc, status;
 
 
@@ -78,7 +78,7 @@ int s390pci_refresh_trans(u64 fn, u64 addr, u64 range)
 }
 }
 
 
 /* Set Interruption Controls */
 /* Set Interruption Controls */
-void set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
 {
 {
 	asm volatile (
 	asm volatile (
 		"	.insn	rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
 		"	.insn	rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
@@ -109,7 +109,7 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
 	return cc;
 	return cc;
 }
 }
 
 
-int s390pci_load(u64 *data, u64 req, u64 offset)
+int zpci_load(u64 *data, u64 req, u64 offset)
 {
 {
 	u8 status;
 	u8 status;
 	int cc;
 	int cc;
@@ -125,7 +125,7 @@ int s390pci_load(u64 *data, u64 req, u64 offset)
 			    __func__, cc, status, req, offset);
 			    __func__, cc, status, req, offset);
 	return (cc > 0) ? -EIO : cc;
 	return (cc > 0) ? -EIO : cc;
 }
 }
-EXPORT_SYMBOL_GPL(s390pci_load);
+EXPORT_SYMBOL_GPL(zpci_load);
 
 
 /* PCI Store */
 /* PCI Store */
 static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
 static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
@@ -147,7 +147,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
 	return cc;
 	return cc;
 }
 }
 
 
-int s390pci_store(u64 data, u64 req, u64 offset)
+int zpci_store(u64 data, u64 req, u64 offset)
 {
 {
 	u8 status;
 	u8 status;
 	int cc;
 	int cc;
@@ -163,7 +163,7 @@ int s390pci_store(u64 data, u64 req, u64 offset)
 			__func__, cc, status, req, offset);
 			__func__, cc, status, req, offset);
 	return (cc > 0) ? -EIO : cc;
 	return (cc > 0) ? -EIO : cc;
 }
 }
-EXPORT_SYMBOL_GPL(s390pci_store);
+EXPORT_SYMBOL_GPL(zpci_store);
 
 
 /* PCI Store Block */
 /* PCI Store Block */
 static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
 static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
@@ -183,7 +183,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
 	return cc;
 	return cc;
 }
 }
 
 
-int s390pci_store_block(const u64 *data, u64 req, u64 offset)
+int zpci_store_block(const u64 *data, u64 req, u64 offset)
 {
 {
 	u8 status;
 	u8 status;
 	int cc;
 	int cc;
@@ -199,4 +199,4 @@ int s390pci_store_block(const u64 *data, u64 req, u64 offset)
 			    __func__, cc, status, req, offset);
 			    __func__, cc, status, req, offset);
 	return (cc > 0) ? -EIO : cc;
 	return (cc > 0) ? -EIO : cc;
 }
 }
-EXPORT_SYMBOL_GPL(s390pci_store_block);
+EXPORT_SYMBOL_GPL(zpci_store_block);

+ 0 - 142
arch/s390/pci/pci_msi.c

@@ -1,142 +0,0 @@
-/*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- *   Jan Glauber <jang@linux.vnet.ibm.com>
- */
-
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/rculist.h>
-#include <linux/hash.h>
-#include <linux/pci.h>
-#include <linux/msi.h>
-#include <asm/hw_irq.h>
-
-/* mapping of irq numbers to msi_desc */
-static struct hlist_head *msi_hash;
-static const unsigned int msi_hash_bits = 8;
-#define MSI_HASH_BUCKETS (1U << msi_hash_bits)
-#define msi_hashfn(nr)	hash_long(nr, msi_hash_bits)
-
-static DEFINE_SPINLOCK(msi_map_lock);
-
-struct msi_desc *__irq_get_msi_desc(unsigned int irq)
-{
-	struct msi_map *map;
-
-	hlist_for_each_entry_rcu(map,
-			&msi_hash[msi_hashfn(irq)], msi_chain)
-		if (map->irq == irq)
-			return map->msi;
-	return NULL;
-}
-
-int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
-{
-	if (msi->msi_attrib.is_msix) {
-		int offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
-			PCI_MSIX_ENTRY_VECTOR_CTRL;
-		msi->masked = readl(msi->mask_base + offset);
-		writel(flag, msi->mask_base + offset);
-	} else {
-		if (msi->msi_attrib.maskbit) {
-			int pos;
-			u32 mask_bits;
-
-			pos = (long) msi->mask_base;
-			pci_read_config_dword(msi->dev, pos, &mask_bits);
-			mask_bits &= ~(mask);
-			mask_bits |= flag & mask;
-			pci_write_config_dword(msi->dev, pos, mask_bits);
-		} else {
-			return 0;
-		}
-	}
-
-	msi->msi_attrib.maskbit = !!flag;
-	return 1;
-}
-
-int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi,
-			unsigned int nr, int offset)
-{
-	struct msi_map *map;
-	struct msi_msg msg;
-	int rc;
-
-	map = kmalloc(sizeof(*map), GFP_KERNEL);
-	if (map == NULL)
-		return -ENOMEM;
-
-	map->irq = nr;
-	map->msi = msi;
-	zdev->msi_map[nr & ZPCI_MSI_MASK] = map;
-	INIT_HLIST_NODE(&map->msi_chain);
-
-	pr_debug("%s hashing irq: %u  to bucket nr: %llu\n",
-		__func__, nr, msi_hashfn(nr));
-	hlist_add_head_rcu(&map->msi_chain, &msi_hash[msi_hashfn(nr)]);
-
-	spin_lock(&msi_map_lock);
-	rc = irq_set_msi_desc(nr, msi);
-	if (rc) {
-		spin_unlock(&msi_map_lock);
-		hlist_del_rcu(&map->msi_chain);
-		kfree(map);
-		zdev->msi_map[nr & ZPCI_MSI_MASK] = NULL;
-		return rc;
-	}
-	spin_unlock(&msi_map_lock);
-
-	msg.data = nr - offset;
-	msg.address_lo = zdev->msi_addr & 0xffffffff;
-	msg.address_hi = zdev->msi_addr >> 32;
-	write_msi_msg(nr, &msg);
-	return 0;
-}
-
-void zpci_teardown_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi)
-{
-	int irq = msi->irq & ZPCI_MSI_MASK;
-	struct msi_map *map;
-
-	msi->msg.address_lo = 0;
-	msi->msg.address_hi = 0;
-	msi->msg.data = 0;
-	msi->irq = 0;
-	zpci_msi_set_mask_bits(msi, 1, 1);
-
-	spin_lock(&msi_map_lock);
-	map = zdev->msi_map[irq];
-	hlist_del_rcu(&map->msi_chain);
-	kfree(map);
-	zdev->msi_map[irq] = NULL;
-	spin_unlock(&msi_map_lock);
-}
-
-/*
- * The msi hash table has 256 entries which is good for 4..20
- * devices (a typical device allocates 10 + CPUs MSI's). Maybe make
- * the hash table size adjustable later.
- */
-int __init zpci_msihash_init(void)
-{
-	unsigned int i;
-
-	msi_hash = kmalloc(MSI_HASH_BUCKETS * sizeof(*msi_hash), GFP_KERNEL);
-	if (!msi_hash)
-		return -ENOMEM;
-
-	for (i = 0; i < MSI_HASH_BUCKETS; i++)
-		INIT_HLIST_HEAD(&msi_hash[i]);
-	return 0;
-}
-
-void __init zpci_msihash_exit(void)
-{
-	kfree(msi_hash);
-}

+ 27 - 0
arch/s390/pci/pci_sysfs.c

@@ -48,11 +48,38 @@ static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr,
 }
 }
 static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL);
 static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL);
 
 
+static void recover_callback(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct zpci_dev *zdev = get_zdev(pdev);
+	int ret;
+
+	pci_stop_and_remove_bus_device(pdev);
+	ret = zpci_disable_device(zdev);
+	if (ret)
+		return;
+
+	ret = zpci_enable_device(zdev);
+	if (ret)
+		return;
+
+	pci_rescan_bus(zdev->bus);
+}
+
+static ssize_t store_recover(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	int rc = device_schedule_callback(dev, recover_callback);
+	return rc ? rc : count;
+}
+static DEVICE_ATTR(recover, S_IWUSR, NULL, store_recover);
+
 static struct device_attribute *zpci_dev_attrs[] = {
 static struct device_attribute *zpci_dev_attrs[] = {
 	&dev_attr_function_id,
 	&dev_attr_function_id,
 	&dev_attr_function_handle,
 	&dev_attr_function_handle,
 	&dev_attr_pchid,
 	&dev_attr_pchid,
 	&dev_attr_pfgid,
 	&dev_attr_pfgid,
+	&dev_attr_recover,
 	NULL,
 	NULL,
 };
 };
 
 

+ 1 - 1
drivers/pci/hotplug/Kconfig

@@ -146,7 +146,7 @@ config HOTPLUG_PCI_SGI
 	  When in doubt, say N.
 	  When in doubt, say N.
 
 
 config HOTPLUG_PCI_S390
 config HOTPLUG_PCI_S390
-	tristate "System z PCI Hotplug Support"
+	bool "System z PCI Hotplug Support"
 	depends on S390 && 64BIT
 	depends on S390 && 64BIT
 	help
 	help
 	  Say Y here if you want to use the System z PCI Hotplug
 	  Say Y here if you want to use the System z PCI Hotplug

+ 2 - 61
drivers/pci/hotplug/s390_pci_hpc.c

@@ -79,8 +79,6 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
 	if (rc)
 	if (rc)
 		goto out_deconfigure;
 		goto out_deconfigure;
 
 
-	slot->zdev->state = ZPCI_FN_STATE_ONLINE;
-
 	pci_scan_slot(slot->zdev->bus, ZPCI_DEVFN);
 	pci_scan_slot(slot->zdev->bus, ZPCI_DEVFN);
 	pci_bus_add_devices(slot->zdev->bus);
 	pci_bus_add_devices(slot->zdev->bus);
 
 
@@ -148,7 +146,7 @@ static struct hotplug_slot_ops s390_hotplug_slot_ops = {
 	.get_adapter_status =	get_adapter_status,
 	.get_adapter_status =	get_adapter_status,
 };
 };
 
 
-static int init_pci_slot(struct zpci_dev *zdev)
+int zpci_init_slot(struct zpci_dev *zdev)
 {
 {
 	struct hotplug_slot *hotplug_slot;
 	struct hotplug_slot *hotplug_slot;
 	struct hotplug_slot_info *info;
 	struct hotplug_slot_info *info;
@@ -202,7 +200,7 @@ error:
 	return -ENOMEM;
 	return -ENOMEM;
 }
 }
 
 
-static void exit_pci_slot(struct zpci_dev *zdev)
+void zpci_exit_slot(struct zpci_dev *zdev)
 {
 {
 	struct list_head *tmp, *n;
 	struct list_head *tmp, *n;
 	struct slot *slot;
 	struct slot *slot;
@@ -215,60 +213,3 @@ static void exit_pci_slot(struct zpci_dev *zdev)
 		pci_hp_deregister(slot->hotplug_slot);
 		pci_hp_deregister(slot->hotplug_slot);
 	}
 	}
 }
 }
-
-static struct pci_hp_callback_ops hp_ops = {
-	.create_slot = init_pci_slot,
-	.remove_slot = exit_pci_slot,
-};
-
-static void __init init_pci_slots(void)
-{
-	struct zpci_dev *zdev;
-
-	/*
-	 * Create a structure for each slot, and register that slot
-	 * with the pci_hotplug subsystem.
-	 */
-	mutex_lock(&zpci_list_lock);
-	list_for_each_entry(zdev, &zpci_list, entry) {
-		init_pci_slot(zdev);
-	}
-	mutex_unlock(&zpci_list_lock);
-}
-
-static void __exit exit_pci_slots(void)
-{
-	struct list_head *tmp, *n;
-	struct slot *slot;
-
-	/*
-	 * Unregister all of our slots with the pci_hotplug subsystem.
-	 * Memory will be freed in release_slot() callback after slot's
-	 * lifespan is finished.
-	 */
-	list_for_each_safe(tmp, n, &s390_hotplug_slot_list) {
-		slot = list_entry(tmp, struct slot, slot_list);
-		list_del(&slot->slot_list);
-		pci_hp_deregister(slot->hotplug_slot);
-	}
-}
-
-static int __init pci_hotplug_s390_init(void)
-{
-	if (!s390_pci_probe)
-		return -EOPNOTSUPP;
-
-	zpci_register_hp_ops(&hp_ops);
-	init_pci_slots();
-
-	return 0;
-}
-
-static void __exit pci_hotplug_s390_exit(void)
-{
-	exit_pci_slots();
-	zpci_deregister_hp_ops();
-}
-
-module_init(pci_hotplug_s390_init);
-module_exit(pci_hotplug_s390_exit);

+ 4 - 4
drivers/s390/block/dasd_devmap.c

@@ -930,7 +930,7 @@ dasd_use_raw_store(struct device *dev, struct device_attribute *attr,
 	if (IS_ERR(devmap))
 	if (IS_ERR(devmap))
 		return PTR_ERR(devmap);
 		return PTR_ERR(devmap);
 
 
-	if ((strict_strtoul(buf, 10, &val) != 0) || val > 1)
+	if ((kstrtoul(buf, 10, &val) != 0) || val > 1)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	spin_lock(&dasd_devmap_lock);
 	spin_lock(&dasd_devmap_lock);
@@ -1225,7 +1225,7 @@ dasd_expires_store(struct device *dev, struct device_attribute *attr,
 	if (IS_ERR(device))
 	if (IS_ERR(device))
 		return -ENODEV;
 		return -ENODEV;
 
 
-	if ((strict_strtoul(buf, 10, &val) != 0) ||
+	if ((kstrtoul(buf, 10, &val) != 0) ||
 	    (val > DASD_EXPIRES_MAX) || val == 0) {
 	    (val > DASD_EXPIRES_MAX) || val == 0) {
 		dasd_put_device(device);
 		dasd_put_device(device);
 		return -EINVAL;
 		return -EINVAL;
@@ -1265,7 +1265,7 @@ dasd_retries_store(struct device *dev, struct device_attribute *attr,
 	if (IS_ERR(device))
 	if (IS_ERR(device))
 		return -ENODEV;
 		return -ENODEV;
 
 
-	if ((strict_strtoul(buf, 10, &val) != 0) ||
+	if ((kstrtoul(buf, 10, &val) != 0) ||
 	    (val > DASD_RETRIES_MAX)) {
 	    (val > DASD_RETRIES_MAX)) {
 		dasd_put_device(device);
 		dasd_put_device(device);
 		return -EINVAL;
 		return -EINVAL;
@@ -1307,7 +1307,7 @@ dasd_timeout_store(struct device *dev, struct device_attribute *attr,
 	if (IS_ERR(device) || !device->block)
 	if (IS_ERR(device) || !device->block)
 		return -ENODEV;
 		return -ENODEV;
 
 
-	if ((strict_strtoul(buf, 10, &val) != 0) ||
+	if ((kstrtoul(buf, 10, &val) != 0) ||
 	    val > UINT_MAX / HZ) {
 	    val > UINT_MAX / HZ) {
 		dasd_put_device(device);
 		dasd_put_device(device);
 		return -EINVAL;
 		return -EINVAL;

+ 43 - 11
drivers/s390/block/dasd_eckd.c

@@ -85,6 +85,8 @@ MODULE_DEVICE_TABLE(ccw, dasd_eckd_ids);
 
 
 static struct ccw_driver dasd_eckd_driver; /* see below */
 static struct ccw_driver dasd_eckd_driver; /* see below */
 
 
+static void *rawpadpage;
+
 #define INIT_CQR_OK 0
 #define INIT_CQR_OK 0
 #define INIT_CQR_UNFORMATTED 1
 #define INIT_CQR_UNFORMATTED 1
 #define INIT_CQR_ERROR 2
 #define INIT_CQR_ERROR 2
@@ -3237,18 +3239,26 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev,
 	unsigned int seg_len, len_to_track_end;
 	unsigned int seg_len, len_to_track_end;
 	unsigned int first_offs;
 	unsigned int first_offs;
 	unsigned int cidaw, cplength, datasize;
 	unsigned int cidaw, cplength, datasize;
-	sector_t first_trk, last_trk;
+	sector_t first_trk, last_trk, sectors;
+	sector_t start_padding_sectors, end_sector_offset, end_padding_sectors;
 	unsigned int pfx_datasize;
 	unsigned int pfx_datasize;
 
 
 	/*
 	/*
 	 * raw track access needs to be mutiple of 64k and on 64k boundary
 	 * raw track access needs to be mutiple of 64k and on 64k boundary
+	 * For read requests we can fix an incorrect alignment by padding
+	 * the request with dummy pages.
 	 */
 	 */
-	if ((blk_rq_pos(req) % DASD_RAW_SECTORS_PER_TRACK) != 0) {
-		cqr = ERR_PTR(-EINVAL);
-		goto out;
-	}
-	if (((blk_rq_pos(req) + blk_rq_sectors(req)) %
-	     DASD_RAW_SECTORS_PER_TRACK) != 0) {
+	start_padding_sectors = blk_rq_pos(req) % DASD_RAW_SECTORS_PER_TRACK;
+	end_sector_offset = (blk_rq_pos(req) + blk_rq_sectors(req)) %
+		DASD_RAW_SECTORS_PER_TRACK;
+	end_padding_sectors = (DASD_RAW_SECTORS_PER_TRACK - end_sector_offset) %
+		DASD_RAW_SECTORS_PER_TRACK;
+	basedev = block->base;
+	if ((start_padding_sectors || end_padding_sectors) &&
+	    (rq_data_dir(req) == WRITE)) {
+		DBF_DEV_EVENT(DBF_ERR, basedev,
+			      "raw write not track aligned (%lu,%lu) req %p",
+			      start_padding_sectors, end_padding_sectors, req);
 		cqr = ERR_PTR(-EINVAL);
 		cqr = ERR_PTR(-EINVAL);
 		goto out;
 		goto out;
 	}
 	}
@@ -3258,7 +3268,6 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev,
 		DASD_RAW_SECTORS_PER_TRACK;
 		DASD_RAW_SECTORS_PER_TRACK;
 	trkcount = last_trk - first_trk + 1;
 	trkcount = last_trk - first_trk + 1;
 	first_offs = 0;
 	first_offs = 0;
-	basedev = block->base;
 
 
 	if (rq_data_dir(req) == READ)
 	if (rq_data_dir(req) == READ)
 		cmd = DASD_ECKD_CCW_READ_TRACK;
 		cmd = DASD_ECKD_CCW_READ_TRACK;
@@ -3307,12 +3316,26 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev,
 	}
 	}
 
 
 	idaws = (unsigned long *)(cqr->data + pfx_datasize);
 	idaws = (unsigned long *)(cqr->data + pfx_datasize);
-
 	len_to_track_end = 0;
 	len_to_track_end = 0;
-
+	if (start_padding_sectors) {
+		ccw[-1].flags |= CCW_FLAG_CC;
+		ccw->cmd_code = cmd;
+		/* maximum 3390 track size */
+		ccw->count = 57326;
+		/* 64k map to one track */
+		len_to_track_end = 65536 - start_padding_sectors * 512;
+		ccw->cda = (__u32)(addr_t)idaws;
+		ccw->flags |= CCW_FLAG_IDA;
+		ccw->flags |= CCW_FLAG_SLI;
+		ccw++;
+		for (sectors = 0; sectors < start_padding_sectors; sectors += 8)
+			idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE);
+	}
 	rq_for_each_segment(bv, req, iter) {
 	rq_for_each_segment(bv, req, iter) {
 		dst = page_address(bv->bv_page) + bv->bv_offset;
 		dst = page_address(bv->bv_page) + bv->bv_offset;
 		seg_len = bv->bv_len;
 		seg_len = bv->bv_len;
+		if (cmd == DASD_ECKD_CCW_READ_TRACK)
+			memset(dst, 0, seg_len);
 		if (!len_to_track_end) {
 		if (!len_to_track_end) {
 			ccw[-1].flags |= CCW_FLAG_CC;
 			ccw[-1].flags |= CCW_FLAG_CC;
 			ccw->cmd_code = cmd;
 			ccw->cmd_code = cmd;
@@ -3328,7 +3351,8 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev,
 		len_to_track_end -= seg_len;
 		len_to_track_end -= seg_len;
 		idaws = idal_create_words(idaws, dst, seg_len);
 		idaws = idal_create_words(idaws, dst, seg_len);
 	}
 	}
-
+	for (sectors = 0; sectors < end_padding_sectors; sectors += 8)
+		idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE);
 	if (blk_noretry_request(req) ||
 	if (blk_noretry_request(req) ||
 	    block->base->features & DASD_FEATURE_FAILFAST)
 	    block->base->features & DASD_FEATURE_FAILFAST)
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
@@ -4479,12 +4503,19 @@ dasd_eckd_init(void)
 		kfree(dasd_reserve_req);
 		kfree(dasd_reserve_req);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
+	rawpadpage = (void *)__get_free_page(GFP_KERNEL);
+	if (!rawpadpage) {
+		kfree(path_verification_worker);
+		kfree(dasd_reserve_req);
+		return -ENOMEM;
+	}
 	ret = ccw_driver_register(&dasd_eckd_driver);
 	ret = ccw_driver_register(&dasd_eckd_driver);
 	if (!ret)
 	if (!ret)
 		wait_for_device_probe();
 		wait_for_device_probe();
 	else {
 	else {
 		kfree(path_verification_worker);
 		kfree(path_verification_worker);
 		kfree(dasd_reserve_req);
 		kfree(dasd_reserve_req);
+		free_page((unsigned long)rawpadpage);
 	}
 	}
 	return ret;
 	return ret;
 }
 }
@@ -4495,6 +4526,7 @@ dasd_eckd_cleanup(void)
 	ccw_driver_unregister(&dasd_eckd_driver);
 	ccw_driver_unregister(&dasd_eckd_driver);
 	kfree(path_verification_worker);
 	kfree(path_verification_worker);
 	kfree(dasd_reserve_req);
 	kfree(dasd_reserve_req);
+	free_page((unsigned long)rawpadpage);
 }
 }
 
 
 module_init(dasd_eckd_init);
 module_init(dasd_eckd_init);

+ 12 - 2
drivers/s390/block/dasd_erp.c

@@ -124,10 +124,15 @@ dasd_default_erp_action(struct dasd_ccw_req *cqr)
 struct dasd_ccw_req *dasd_default_erp_postaction(struct dasd_ccw_req *cqr)
 struct dasd_ccw_req *dasd_default_erp_postaction(struct dasd_ccw_req *cqr)
 {
 {
 	int success;
 	int success;
+	unsigned long long startclk, stopclk;
+	struct dasd_device *startdev;
 
 
 	BUG_ON(cqr->refers == NULL || cqr->function == NULL);
 	BUG_ON(cqr->refers == NULL || cqr->function == NULL);
 
 
 	success = cqr->status == DASD_CQR_DONE;
 	success = cqr->status == DASD_CQR_DONE;
+	startclk = cqr->startclk;
+	stopclk = cqr->stopclk;
+	startdev = cqr->startdev;
 
 
 	/* free all ERPs - but NOT the original cqr */
 	/* free all ERPs - but NOT the original cqr */
 	while (cqr->refers != NULL) {
 	while (cqr->refers != NULL) {
@@ -142,6 +147,9 @@ struct dasd_ccw_req *dasd_default_erp_postaction(struct dasd_ccw_req *cqr)
 	}
 	}
 
 
 	/* set corresponding status to original cqr */
 	/* set corresponding status to original cqr */
+	cqr->startclk = startclk;
+	cqr->stopclk = stopclk;
+	cqr->startdev = startdev;
 	if (success)
 	if (success)
 		cqr->status = DASD_CQR_DONE;
 		cqr->status = DASD_CQR_DONE;
 	else {
 	else {
@@ -160,11 +168,13 @@ dasd_log_sense(struct dasd_ccw_req *cqr, struct irb *irb)
 
 
 	device = cqr->startdev;
 	device = cqr->startdev;
 	if (cqr->intrc == -ETIMEDOUT) {
 	if (cqr->intrc == -ETIMEDOUT) {
-		dev_err(&device->cdev->dev, "cqr %p timeout error", cqr);
+		dev_err(&device->cdev->dev,
+			"A timeout error occurred for cqr %p", cqr);
 		return;
 		return;
 	}
 	}
 	if (cqr->intrc == -ENOLINK) {
 	if (cqr->intrc == -ENOLINK) {
-		dev_err(&device->cdev->dev, "cqr %p transport error", cqr);
+		dev_err(&device->cdev->dev,
+			"A transport error occurred for cqr %p", cqr);
 		return;
 		return;
 	}
 	}
 	/* dump sense data */
 	/* dump sense data */

+ 1 - 1
drivers/s390/char/sclp_config.c

@@ -32,7 +32,7 @@ static void sclp_cpu_capability_notify(struct work_struct *work)
 	struct device *dev;
 	struct device *dev;
 
 
 	s390_adjust_jiffies();
 	s390_adjust_jiffies();
-	pr_warning("cpu capability changed.\n");
+	pr_info("CPU capability may have changed\n");
 	get_online_cpus();
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
 	for_each_online_cpu(cpu) {
 		dev = get_cpu_device(cpu);
 		dev = get_cpu_device(cpu);

+ 172 - 2
drivers/s390/cio/airq.c

@@ -81,15 +81,185 @@ void unregister_adapter_interrupt(struct airq_struct *airq)
 }
 }
 EXPORT_SYMBOL(unregister_adapter_interrupt);
 EXPORT_SYMBOL(unregister_adapter_interrupt);
 
 
-void do_adapter_IO(u8 isc)
+static irqreturn_t do_airq_interrupt(int irq, void *dummy)
 {
 {
+	struct tpi_info *tpi_info;
 	struct airq_struct *airq;
 	struct airq_struct *airq;
 	struct hlist_head *head;
 	struct hlist_head *head;
 
 
-	head = &airq_lists[isc];
+	__this_cpu_write(s390_idle.nohz_delay, 1);
+	tpi_info = (struct tpi_info *) &get_irq_regs()->int_code;
+	head = &airq_lists[tpi_info->isc];
 	rcu_read_lock();
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(airq, head, list)
 	hlist_for_each_entry_rcu(airq, head, list)
 		if ((*airq->lsi_ptr & airq->lsi_mask) != 0)
 		if ((*airq->lsi_ptr & airq->lsi_mask) != 0)
 			airq->handler(airq);
 			airq->handler(airq);
 	rcu_read_unlock();
 	rcu_read_unlock();
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction airq_interrupt = {
+	.name	 = "AIO",
+	.handler = do_airq_interrupt,
+};
+
+void __init init_airq_interrupts(void)
+{
+	irq_set_chip_and_handler(THIN_INTERRUPT,
+				 &dummy_irq_chip, handle_percpu_irq);
+	setup_irq(THIN_INTERRUPT, &airq_interrupt);
+}
+
+/**
+ * airq_iv_create - create an interrupt vector
+ * @bits: number of bits in the interrupt vector
+ * @flags: allocation flags
+ *
+ * Returns a pointer to an interrupt vector structure
+ */
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags)
+{
+	struct airq_iv *iv;
+	unsigned long size;
+
+	iv = kzalloc(sizeof(*iv), GFP_KERNEL);
+	if (!iv)
+		goto out;
+	iv->bits = bits;
+	size = BITS_TO_LONGS(bits) * sizeof(unsigned long);
+	iv->vector = kzalloc(size, GFP_KERNEL);
+	if (!iv->vector)
+		goto out_free;
+	if (flags & AIRQ_IV_ALLOC) {
+		iv->avail = kmalloc(size, GFP_KERNEL);
+		if (!iv->avail)
+			goto out_free;
+		memset(iv->avail, 0xff, size);
+		iv->end = 0;
+	} else
+		iv->end = bits;
+	if (flags & AIRQ_IV_BITLOCK) {
+		iv->bitlock = kzalloc(size, GFP_KERNEL);
+		if (!iv->bitlock)
+			goto out_free;
+	}
+	if (flags & AIRQ_IV_PTR) {
+		size = bits * sizeof(unsigned long);
+		iv->ptr = kzalloc(size, GFP_KERNEL);
+		if (!iv->ptr)
+			goto out_free;
+	}
+	if (flags & AIRQ_IV_DATA) {
+		size = bits * sizeof(unsigned int);
+		iv->data = kzalloc(size, GFP_KERNEL);
+		if (!iv->data)
+			goto out_free;
+	}
+	spin_lock_init(&iv->lock);
+	return iv;
+
+out_free:
+	kfree(iv->ptr);
+	kfree(iv->bitlock);
+	kfree(iv->avail);
+	kfree(iv->vector);
+	kfree(iv);
+out:
+	return NULL;
+}
+EXPORT_SYMBOL(airq_iv_create);
+
+/**
+ * airq_iv_release - release an interrupt vector
+ * @iv: pointer to interrupt vector structure
+ */
+void airq_iv_release(struct airq_iv *iv)
+{
+	kfree(iv->data);
+	kfree(iv->ptr);
+	kfree(iv->bitlock);
+	kfree(iv->vector);
+	kfree(iv->avail);
+	kfree(iv);
+}
+EXPORT_SYMBOL(airq_iv_release);
+
+/**
+ * airq_iv_alloc_bit - allocate an irq bit from an interrupt vector
+ * @iv: pointer to an interrupt vector structure
+ *
+ * Returns the bit number of the allocated irq, or -1UL if no bit
+ * is available or the AIRQ_IV_ALLOC flag has not been specified
+ */
+unsigned long airq_iv_alloc_bit(struct airq_iv *iv)
+{
+	const unsigned long be_to_le = BITS_PER_LONG - 1;
+	unsigned long bit;
+
+	if (!iv->avail)
+		return -1UL;
+	spin_lock(&iv->lock);
+	bit = find_first_bit_left(iv->avail, iv->bits);
+	if (bit < iv->bits) {
+		clear_bit(bit ^ be_to_le, iv->avail);
+		if (bit >= iv->end)
+			iv->end = bit + 1;
+	} else
+		bit = -1UL;
+	spin_unlock(&iv->lock);
+	return bit;
+
+}
+EXPORT_SYMBOL(airq_iv_alloc_bit);
+
+/**
+ * airq_iv_free_bit - free an irq bit of an interrupt vector
+ * @iv: pointer to interrupt vector structure
+ * @bit: number of the irq bit to free
+ */
+void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit)
+{
+	const unsigned long be_to_le = BITS_PER_LONG - 1;
+
+	if (!iv->avail)
+		return;
+	spin_lock(&iv->lock);
+	/* Clear (possibly left over) interrupt bit */
+	clear_bit(bit ^ be_to_le, iv->vector);
+	/* Make the bit position available again */
+	set_bit(bit ^ be_to_le, iv->avail);
+	if (bit == iv->end - 1) {
+		/* Find new end of bit-field */
+		while (--iv->end > 0)
+			if (!test_bit((iv->end - 1) ^ be_to_le, iv->avail))
+				break;
+	}
+	spin_unlock(&iv->lock);
+}
+EXPORT_SYMBOL(airq_iv_free_bit);
+
+/**
+ * airq_iv_scan - scan interrupt vector for non-zero bits
+ * @iv: pointer to interrupt vector structure
+ * @start: bit number to start the search
+ * @end: bit number to end the search
+ *
+ * Returns the bit number of the next non-zero interrupt bit, or
+ * -1UL if the scan completed without finding any more any non-zero bits.
+ */
+unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
+			   unsigned long end)
+{
+	const unsigned long be_to_le = BITS_PER_LONG - 1;
+	unsigned long bit;
+
+	/* Find non-zero bit starting from 'ivs->next'. */
+	bit = find_next_bit_left(iv->vector, end, start);
+	if (bit >= end)
+		return -1UL;
+	/* Clear interrupt bit (find left uses big-endian bit numbers) */
+	clear_bit(bit ^ be_to_le, iv->vector);
+	return bit;
 }
 }
+EXPORT_SYMBOL(airq_iv_scan);

+ 1 - 1
drivers/s390/cio/ccwgroup.c

@@ -137,7 +137,7 @@ static ssize_t ccwgroup_online_store(struct device *dev,
 	if (!try_module_get(gdrv->driver.owner))
 	if (!try_module_get(gdrv->driver.owner))
 		return -EINVAL;
 		return -EINVAL;
 
 
-	ret = strict_strtoul(buf, 0, &value);
+	ret = kstrtoul(buf, 0, &value);
 	if (ret)
 	if (ret)
 		goto out;
 		goto out;
 
 

+ 23 - 23
drivers/s390/cio/cio.c

@@ -561,37 +561,23 @@ out:
 }
 }
 
 
 /*
 /*
- * do_IRQ() handles all normal I/O device IRQ's (the special
- *	    SMP cross-CPU interrupts have their own specific
- *	    handlers).
- *
+ * do_cio_interrupt() handles all normal I/O device IRQ's
  */
  */
-void __irq_entry do_IRQ(struct pt_regs *regs)
+static irqreturn_t do_cio_interrupt(int irq, void *dummy)
 {
 {
-	struct tpi_info *tpi_info = (struct tpi_info *) &regs->int_code;
+	struct tpi_info *tpi_info;
 	struct subchannel *sch;
 	struct subchannel *sch;
 	struct irb *irb;
 	struct irb *irb;
-	struct pt_regs *old_regs;
 
 
-	old_regs = set_irq_regs(regs);
-	irq_enter();
 	__this_cpu_write(s390_idle.nohz_delay, 1);
 	__this_cpu_write(s390_idle.nohz_delay, 1);
-	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
-		/* Serve timer interrupts first. */
-		clock_comparator_work();
-
-	kstat_incr_irqs_this_cpu(IO_INTERRUPT, NULL);
+	tpi_info = (struct tpi_info *) &get_irq_regs()->int_code;
 	irb = (struct irb *) &S390_lowcore.irb;
 	irb = (struct irb *) &S390_lowcore.irb;
-	if (tpi_info->adapter_IO) {
-		do_adapter_IO(tpi_info->isc);
-		goto out;
-	}
 	sch = (struct subchannel *)(unsigned long) tpi_info->intparm;
 	sch = (struct subchannel *)(unsigned long) tpi_info->intparm;
 	if (!sch) {
 	if (!sch) {
 		/* Clear pending interrupt condition. */
 		/* Clear pending interrupt condition. */
 		inc_irq_stat(IRQIO_CIO);
 		inc_irq_stat(IRQIO_CIO);
 		tsch(tpi_info->schid, irb);
 		tsch(tpi_info->schid, irb);
-		goto out;
+		return IRQ_HANDLED;
 	}
 	}
 	spin_lock(sch->lock);
 	spin_lock(sch->lock);
 	/* Store interrupt response block to lowcore. */
 	/* Store interrupt response block to lowcore. */
@@ -606,9 +592,23 @@ void __irq_entry do_IRQ(struct pt_regs *regs)
 	} else
 	} else
 		inc_irq_stat(IRQIO_CIO);
 		inc_irq_stat(IRQIO_CIO);
 	spin_unlock(sch->lock);
 	spin_unlock(sch->lock);
-out:
-	irq_exit();
-	set_irq_regs(old_regs);
+
+	return IRQ_HANDLED;
+}
+
+static struct irq_desc *irq_desc_io;
+
+static struct irqaction io_interrupt = {
+	.name	 = "IO",
+	.handler = do_cio_interrupt,
+};
+
+void __init init_cio_interrupts(void)
+{
+	irq_set_chip_and_handler(IO_INTERRUPT,
+				 &dummy_irq_chip, handle_percpu_irq);
+	setup_irq(IO_INTERRUPT, &io_interrupt);
+	irq_desc_io = irq_to_desc(IO_INTERRUPT);
 }
 }
 
 
 #ifdef CONFIG_CCW_CONSOLE
 #ifdef CONFIG_CCW_CONSOLE
@@ -635,7 +635,7 @@ void cio_tsch(struct subchannel *sch)
 		local_bh_disable();
 		local_bh_disable();
 		irq_enter();
 		irq_enter();
 	}
 	}
-	kstat_incr_irqs_this_cpu(IO_INTERRUPT, NULL);
+	kstat_incr_irqs_this_cpu(IO_INTERRUPT, irq_desc_io);
 	if (sch->driver && sch->driver->irq)
 	if (sch->driver && sch->driver->irq)
 		sch->driver->irq(sch);
 		sch->driver->irq(sch);
 	else
 	else

+ 0 - 3
drivers/s390/cio/cio.h

@@ -121,9 +121,6 @@ extern int cio_commit_config(struct subchannel *sch);
 int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key);
 int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key);
 int cio_tm_intrg(struct subchannel *sch);
 int cio_tm_intrg(struct subchannel *sch);
 
 
-void do_adapter_IO(u8 isc);
-void do_IRQ(struct pt_regs *);
-
 /* Use with care. */
 /* Use with care. */
 #ifdef CONFIG_CCW_CONSOLE
 #ifdef CONFIG_CCW_CONSOLE
 extern struct subchannel *cio_probe_console(void);
 extern struct subchannel *cio_probe_console(void);

+ 1 - 1
drivers/s390/cio/cmf.c

@@ -1182,7 +1182,7 @@ static ssize_t cmb_enable_store(struct device *dev,
 	int ret;
 	int ret;
 	unsigned long val;
 	unsigned long val;
 
 
-	ret = strict_strtoul(buf, 16, &val);
+	ret = kstrtoul(buf, 16, &val);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 

+ 3 - 1
drivers/s390/cio/css.c

@@ -546,7 +546,9 @@ static int slow_eval_unknown_fn(struct subchannel_id schid, void *data)
 		case -ENOMEM:
 		case -ENOMEM:
 		case -EIO:
 		case -EIO:
 			/* These should abort looping */
 			/* These should abort looping */
+			spin_lock_irq(&slow_subchannel_lock);
 			idset_sch_del_subseq(slow_subchannel_set, schid);
 			idset_sch_del_subseq(slow_subchannel_set, schid);
+			spin_unlock_irq(&slow_subchannel_lock);
 			break;
 			break;
 		default:
 		default:
 			rc = 0;
 			rc = 0;
@@ -740,7 +742,7 @@ css_cm_enable_store(struct device *dev, struct device_attribute *attr,
 	int ret;
 	int ret;
 	unsigned long val;
 	unsigned long val;
 
 
-	ret = strict_strtoul(buf, 16, &val);
+	ret = kstrtoul(buf, 16, &val);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 	mutex_lock(&css->mutex);
 	mutex_lock(&css->mutex);

+ 0 - 2
drivers/s390/cio/css.h

@@ -130,8 +130,6 @@ struct channel_subsystem {
 
 
 extern struct channel_subsystem *channel_subsystems[];
 extern struct channel_subsystem *channel_subsystems[];
 
 
-void channel_subsystem_reinit(void);
-
 /* Helper functions to build lists for the slow path. */
 /* Helper functions to build lists for the slow path. */
 void css_schedule_eval(struct subchannel_id schid);
 void css_schedule_eval(struct subchannel_id schid);
 void css_schedule_eval_all(void);
 void css_schedule_eval_all(void);

+ 1 - 1
drivers/s390/cio/device.c

@@ -564,7 +564,7 @@ static ssize_t online_store (struct device *dev, struct device_attribute *attr,
 		ret = 0;
 		ret = 0;
 	} else {
 	} else {
 		force = 0;
 		force = 0;
-		ret = strict_strtoul(buf, 16, &i);
+		ret = kstrtoul(buf, 16, &i);
 	}
 	}
 	if (ret)
 	if (ret)
 		goto out;
 		goto out;

+ 1 - 1
drivers/s390/net/qeth_l3_sys.c

@@ -208,7 +208,7 @@ static ssize_t qeth_l3_dev_sniffer_store(struct device *dev,
 		goto out;
 		goto out;
 	}
 	}
 
 
-	rc = strict_strtoul(buf, 16, &i);
+	rc = kstrtoul(buf, 16, &i);
 	if (rc) {
 	if (rc) {
 		rc = -EINVAL;
 		rc = -EINVAL;
 		goto out;
 		goto out;

+ 0 - 4
include/asm-generic/pgtable.h

@@ -208,10 +208,6 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 #endif
 
 
-#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
-#define page_test_and_clear_young(pfn) (0)
-#endif
-
 #ifndef __HAVE_ARCH_PGD_OFFSET_GATE
 #ifndef __HAVE_ARCH_PGD_OFFSET_GATE
 #define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
 #define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
 #endif
 #endif

+ 0 - 3
mm/rmap.c

@@ -873,9 +873,6 @@ int page_referenced(struct page *page,
 								vm_flags);
 								vm_flags);
 		if (we_locked)
 		if (we_locked)
 			unlock_page(page);
 			unlock_page(page);
-
-		if (page_test_and_clear_young(page_to_pfn(page)))
-			referenced++;
 	}
 	}
 out:
 out:
 	return referenced;
 	return referenced;