Kaynağa Gözat

Merge tag 'drm-intel-next-2013-07-26-fixed' of git://people.freedesktop.org/~danvet/drm-intel into drm-next

Neat that QA (and Ben) keeps on humming along while I'm on vacation, so
you already get the next feature pull request:
- proper eLLC support for HSW from Ben
- more interrupt refactoring
- add w/a tags where we implement them already (Damien)
- hangcheck fixes (Chris) + hangcheck stats (Mika)
- flesh out the new vm structs for ppgtt and ggtt (Ben)
- PSR for Haswell, still disabled by default (Rodrigo et al.)
- pc8+ refclock sequence code from Paulo
- more interrupt refactoring from Paulo, unifying ilk/snb with the ivb/hsw
  interrupt code
- full solution for the Haswell concurrent reg access issues (Chris)
- fix racy object accounting, used by some new leak tests
- fix sync polarity settings on ch7xxx dvo encoder
- random bits&pieces, little fixes and better debug output all over

[airlied: fix conflict with drm_mm cleanups]

* tag 'drm-intel-next-2013-07-26-fixed' of git://people.freedesktop.org/~danvet/drm-intel: (289 commits)
  drm/i915: Do not dereference NULL crtc or fb until after checking
  drm/i915: fix pnv display core clock readout out
  drm/i915: Replace open-coded offset_in_page()
  drm/i915: Retry DP aux_ch communications with a different clock after failure
  drm/i915: Add messages useful for HPD storm detection debugging (v2)
  drm/i915: dvo_ch7xxx: fix vsync polarity setting
  drm/i915: fix the racy object accounting
  drm/i915: Convert the register access tracepoint to be conditional
  drm/i915: Squash gen lookup through multiple indirections inside GT access
  drm/i915: Use the common register access functions for NOTRACE variants
  drm/i915: Use a private interface for register access within GT
  drm/i915: Colocate all GT access routines in the same file
  drm/i915: fix reference counting in i915_gem_create
  drm/i915: Use Graphics Base of Stolen Memory on all gen3+
  drm/i915: disable stolen mem for OVERLAY_NEEDS_PHYSICAL
  drm/i915: add functions to disable and restore LCPLL
  drm/i915: disable CLKOUT_DP when it's not needed
  drm/i915: extend lpt_enable_clkout_dp
  drm/i915: fix up error cleanup in i915_gem_object_bind_to_gtt
  drm/i915: Add some debug breadcrumbs to connector detection
  ...
Dave Airlie 12 yıl önce
ebeveyn
işleme
32c913e436
100 değiştirilmiş dosya ile 4444 ekleme ve 2145 silme
  1. 17 0
      Documentation/ABI/testing/sysfs-driver-xen-blkback
  2. 10 0
      Documentation/ABI/testing/sysfs-driver-xen-blkfront
  3. 24 13
      Documentation/bcache.txt
  4. 2 2
      MAINTAINERS
  5. 1 1
      Makefile
  6. 1 0
      arch/alpha/Kconfig
  7. 65 23
      arch/alpha/include/asm/atomic.h
  8. 5 3
      arch/alpha/include/asm/param.h
  9. 0 4
      arch/alpha/include/asm/spinlock.h
  10. 1 2
      arch/alpha/include/asm/unistd.h
  11. 0 7
      arch/alpha/include/uapi/asm/param.h
  12. 2 0
      arch/alpha/include/uapi/asm/unistd.h
  13. 288 111
      arch/alpha/kernel/entry.S
  14. 1 1
      arch/alpha/kernel/irq_alpha.c
  15. 3 2
      arch/alpha/kernel/smp.c
  16. 0 8
      arch/alpha/kernel/sys_dp264.c
  17. 2 1
      arch/alpha/kernel/sys_marvel.c
  18. 2 0
      arch/alpha/kernel/systbls.S
  19. 1 3
      arch/alpha/kernel/time.c
  20. 3 5
      arch/alpha/kernel/traps.c
  21. 1 1
      arch/arm/mach-s3c24xx/Kconfig
  22. 0 7
      arch/arm64/include/asm/debug-monitors.h
  23. 2 1
      arch/arm64/include/asm/system_misc.h
  24. 1 1
      arch/arm64/kernel/process.c
  25. 8 7
      arch/arm64/kernel/smp.c
  26. 20 26
      arch/arm64/mm/fault.c
  27. 1 0
      arch/mips/Kconfig
  28. 3 2
      arch/mips/cavium-octeon/octeon-platform.c
  29. 1 1
      arch/mips/kernel/smp-bmips.c
  30. 0 1
      arch/mips/kernel/traps.c
  31. 0 1
      arch/mips/kvm/Kconfig
  32. 2 2
      arch/mips/mm/tlbex.c
  33. 57 11
      arch/mips/netlogic/common/irq.c
  34. 2 1
      arch/mips/netlogic/dts/xlp_evp.dts
  35. 2 1
      arch/mips/netlogic/dts/xlp_svp.dts
  36. 1 1
      arch/mips/netlogic/xlp/usb-init.c
  37. 9 1
      arch/s390/include/asm/processor.h
  38. 2 2
      arch/s390/include/asm/switch_to.h
  39. 1 0
      arch/s390/include/uapi/asm/ptrace.h
  40. 47 4
      arch/s390/kernel/crash_dump.c
  41. 39 11
      arch/s390/kernel/ptrace.c
  42. 88 25
      arch/s390/net/bpf_jit_comp.c
  43. 4 4
      arch/um/include/shared/frame_kern.h
  44. 2 2
      arch/um/kernel/signal.c
  45. 1 1
      arch/um/kernel/skas/mmu.c
  46. 1 1
      arch/um/kernel/skas/uaccess.c
  47. 189 41
      arch/um/os-Linux/mem.c
  48. 4 4
      arch/um/os-Linux/signal.c
  49. 12 7
      arch/um/os-Linux/skas/process.c
  50. 7 0
      arch/x86/kvm/mmu.c
  51. 0 1
      arch/x86/um/signal.c
  52. 1 0
      drivers/acpi/acpi_memhotplug.c
  53. 0 13
      drivers/acpi/acpica/aclocal.h
  54. 11 0
      drivers/acpi/internal.h
  55. 9 4
      drivers/acpi/scan.c
  56. 85 13
      drivers/acpi/video.c
  57. 21 0
      drivers/acpi/video_detect.c
  58. 2 2
      drivers/block/Kconfig
  59. 21 0
      drivers/block/drbd/drbd_actlog.c
  60. 13 2
      drivers/block/drbd/drbd_int.h
  61. 34 27
      drivers/block/drbd/drbd_main.c
  62. 143 42
      drivers/block/drbd/drbd_nl.c
  63. 6 6
      drivers/block/drbd/drbd_receiver.c
  64. 3 1
      drivers/block/drbd/drbd_state.c
  65. 348 11
      drivers/block/rsxx/core.c
  66. 14 0
      drivers/block/rsxx/cregs.c
  67. 12 21
      drivers/block/rsxx/dev.c
  68. 96 89
      drivers/block/rsxx/dma.c
  69. 9 1
      drivers/block/rsxx/rsxx_priv.h
  70. 552 320
      drivers/block/xen-blkback/blkback.c
  71. 145 2
      drivers/block/xen-blkback/common.h
  72. 85 0
      drivers/block/xen-blkback/xenbus.c
  73. 432 100
      drivers/block/xen-blkfront.c
  74. 3 1
      drivers/cpufreq/cpufreq.c
  75. 0 3
      drivers/cpufreq/cpufreq_governor.c
  76. 2 4
      drivers/cpufreq/cpufreq_stats.c
  77. 2 2
      drivers/cpufreq/s3c24xx-cpufreq.c
  78. 1 1
      drivers/gpio/gpio-msm-v2.c
  79. 70 14
      drivers/gpio/gpio-omap.c
  80. 11 16
      drivers/gpu/drm/drm_crtc_helper.c
  81. 1 0
      drivers/gpu/drm/i915/Makefile
  82. 1 1
      drivers/gpu/drm/i915/dvo_ch7xxx.c
  83. 163 15
      drivers/gpu/drm/i915/i915_debugfs.c
  84. 34 31
      drivers/gpu/drm/i915/i915_dma.c
  85. 11 266
      drivers/gpu/drm/i915/i915_drv.c
  86. 158 103
      drivers/gpu/drm/i915/i915_drv.h
  87. 117 56
      drivers/gpu/drm/i915/i915_gem.c
  88. 8 15
      drivers/gpu/drm/i915/i915_gem_context.c
  89. 1 1
      drivers/gpu/drm/i915/i915_gem_debug.c
  90. 19 15
      drivers/gpu/drm/i915/i915_gem_evict.c
  91. 9 7
      drivers/gpu/drm/i915/i915_gem_execbuffer.c
  92. 129 84
      drivers/gpu/drm/i915/i915_gem_gtt.c
  93. 30 38
      drivers/gpu/drm/i915/i915_gem_stolen.c
  94. 5 3
      drivers/gpu/drm/i915/i915_gpu_error.c
  95. 224 328
      drivers/gpu/drm/i915/i915_irq.c
  96. 103 1
      drivers/gpu/drm/i915/i915_reg.h
  97. 5 3
      drivers/gpu/drm/i915/i915_trace.h
  98. 4 0
      drivers/gpu/drm/i915/intel_crt.c
  99. 8 4
      drivers/gpu/drm/i915/intel_ddi.c
  100. 348 125
      drivers/gpu/drm/i915/intel_display.c

+ 17 - 0
Documentation/ABI/testing/sysfs-driver-xen-blkback

@@ -0,0 +1,17 @@
+What:           /sys/module/xen_blkback/parameters/max_buffer_pages
+Date:           March 2013
+KernelVersion:  3.11
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                Maximum number of free pages to keep in each block
+                backend buffer.
+
+What:           /sys/module/xen_blkback/parameters/max_persistent_grants
+Date:           March 2013
+KernelVersion:  3.11
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                Maximum number of grants to map persistently in
+                blkback. If the frontend tries to use more than
+                max_persistent_grants, the LRU kicks in and starts
+                removing 5% of max_persistent_grants every 100ms.

+ 10 - 0
Documentation/ABI/testing/sysfs-driver-xen-blkfront

@@ -0,0 +1,10 @@
+What:           /sys/module/xen_blkfront/parameters/max
+Date:           June 2013
+KernelVersion:  3.11
+Contact:        Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                Maximum number of segments that the frontend will negotiate
+                with the backend for indirect descriptors. The default value
+                is 32 - higher value means more potential throughput but more
+                memory usage. The backend picks the minimum of the frontend
+                and its default backend value.

+ 24 - 13
Documentation/bcache.txt

@@ -46,29 +46,33 @@ you format your backing devices and cache device at the same time, you won't
 have to manually attach:
 have to manually attach:
   make-bcache -B /dev/sda /dev/sdb -C /dev/sdc
   make-bcache -B /dev/sda /dev/sdb -C /dev/sdc
 
 
-To make bcache devices known to the kernel, echo them to /sys/fs/bcache/register:
+bcache-tools now ships udev rules, and bcache devices are known to the kernel
+immediately.  Without udev, you can manually register devices like this:
 
 
   echo /dev/sdb > /sys/fs/bcache/register
   echo /dev/sdb > /sys/fs/bcache/register
   echo /dev/sdc > /sys/fs/bcache/register
   echo /dev/sdc > /sys/fs/bcache/register
 
 
-To register your bcache devices automatically, you could add something like
-this to an init script:
+Registering the backing device makes the bcache device show up in /dev; you can
+now format it and use it as normal. But the first time using a new bcache
+device, it'll be running in passthrough mode until you attach it to a cache.
+See the section on attaching.
 
 
-  echo /dev/sd* > /sys/fs/bcache/register_quiet
+The devices show up as:
 
 
-It'll look for bcache superblocks and ignore everything that doesn't have one.
+  /dev/bcache<N>
 
 
-Registering the backing device makes the bcache show up in /dev; you can now
-format it and use it as normal. But the first time using a new bcache device,
-it'll be running in passthrough mode until you attach it to a cache. See the
-section on attaching.
+As well as (with udev):
 
 
-The devices show up at /dev/bcacheN, and can be controlled via sysfs from
-/sys/block/bcacheN/bcache:
+  /dev/bcache/by-uuid/<uuid>
+  /dev/bcache/by-label/<label>
+
+To get started:
 
 
   mkfs.ext4 /dev/bcache0
   mkfs.ext4 /dev/bcache0
   mount /dev/bcache0 /mnt
   mount /dev/bcache0 /mnt
 
 
+You can control bcache devices through sysfs at /sys/block/bcache<N>/bcache .
+
 Cache devices are managed as sets; multiple caches per set isn't supported yet
 Cache devices are managed as sets; multiple caches per set isn't supported yet
 but will allow for mirroring of metadata and dirty data in the future. Your new
 but will allow for mirroring of metadata and dirty data in the future. Your new
 cache set shows up as /sys/fs/bcache/<UUID>
 cache set shows up as /sys/fs/bcache/<UUID>
@@ -80,11 +84,11 @@ must be attached to your cache set to enable caching. Attaching a backing
 device to a cache set is done thusly, with the UUID of the cache set in
 device to a cache set is done thusly, with the UUID of the cache set in
 /sys/fs/bcache:
 /sys/fs/bcache:
 
 
-  echo <UUID> > /sys/block/bcache0/bcache/attach
+  echo <CSET-UUID> > /sys/block/bcache0/bcache/attach
 
 
 This only has to be done once. The next time you reboot, just reregister all
 This only has to be done once. The next time you reboot, just reregister all
 your bcache devices. If a backing device has data in a cache somewhere, the
 your bcache devices. If a backing device has data in a cache somewhere, the
-/dev/bcache# device won't be created until the cache shows up - particularly
+/dev/bcache<N> device won't be created until the cache shows up - particularly
 important if you have writeback caching turned on.
 important if you have writeback caching turned on.
 
 
 If you're booting up and your cache device is gone and never coming back, you
 If you're booting up and your cache device is gone and never coming back, you
@@ -191,6 +195,9 @@ want for getting the best possible numbers when benchmarking.
 
 
 SYSFS - BACKING DEVICE:
 SYSFS - BACKING DEVICE:
 
 
+Available at /sys/block/<bdev>/bcache, /sys/block/bcache*/bcache and
+(if attached) /sys/fs/bcache/<cset-uuid>/bdev*
+
 attach
 attach
   Echo the UUID of a cache set to this file to enable caching.
   Echo the UUID of a cache set to this file to enable caching.
 
 
@@ -300,6 +307,8 @@ cache_readaheads
 
 
 SYSFS - CACHE SET:
 SYSFS - CACHE SET:
 
 
+Available at /sys/fs/bcache/<cset-uuid>
+
 average_key_size
 average_key_size
   Average data per key in the btree.
   Average data per key in the btree.
 
 
@@ -390,6 +399,8 @@ trigger_gc
 
 
 SYSFS - CACHE DEVICE:
 SYSFS - CACHE DEVICE:
 
 
+Available at /sys/block/<cdev>/bcache
+
 block_size
 block_size
   Minimum granularity of writes - should match hardware sector size.
   Minimum granularity of writes - should match hardware sector size.
 
 

+ 2 - 2
MAINTAINERS

@@ -1642,7 +1642,7 @@ S:	Maintained
 F:	drivers/net/hamradio/baycom*
 F:	drivers/net/hamradio/baycom*
 
 
 BCACHE (BLOCK LAYER CACHE)
 BCACHE (BLOCK LAYER CACHE)
-M:	Kent Overstreet <koverstreet@google.com>
+M:	Kent Overstreet <kmo@daterainc.com>
 L:	linux-bcache@vger.kernel.org
 L:	linux-bcache@vger.kernel.org
 W:	http://bcache.evilpiepirate.org
 W:	http://bcache.evilpiepirate.org
 S:	Maintained:
 S:	Maintained:
@@ -3346,7 +3346,7 @@ F:	Documentation/firmware_class/
 F:	drivers/base/firmware*.c
 F:	drivers/base/firmware*.c
 F:	include/linux/firmware.h
 F:	include/linux/firmware.h
 
 
-FLASHSYSTEM DRIVER (IBM FlashSystem 70/80 PCI SSD Flash Card)
+FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card)
 M:	Joshua Morris <josh.h.morris@us.ibm.com>
 M:	Joshua Morris <josh.h.morris@us.ibm.com>
 M:	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
 M:	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
 S:	Maintained
 S:	Maintained

+ 1 - 1
Makefile

@@ -1,7 +1,7 @@
 VERSION = 3
 VERSION = 3
 PATCHLEVEL = 11
 PATCHLEVEL = 11
 SUBLEVEL = 0
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Linux for Workgroups
 NAME = Linux for Workgroups
 
 
 # *DOCUMENTATION*
 # *DOCUMENTATION*

+ 1 - 0
arch/alpha/Kconfig

@@ -15,6 +15,7 @@ config ALPHA
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNCPY_FROM_USER

+ 65 - 23
arch/alpha/include/asm/atomic.h

@@ -186,17 +186,24 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v)
  */
  */
 static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 {
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
+	int c, new, old;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldl_l	%[old],%[mem]\n"
+	"	cmpeq	%[old],%[u],%[c]\n"
+	"	addl	%[old],%[a],%[new]\n"
+	"	bne	%[c],2f\n"
+	"	stl_c	%[new],%[mem]\n"
+	"	beq	%[new],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c)
+	: [mem] "m"(*v), [a] "rI"(a), [u] "rI"((long)u)
+	: "memory");
+	smp_mb();
+	return old;
 }
 }
 
 
 
 
@@ -207,21 +214,56 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
  * @u: ...unless v is equal to u.
  * @u: ...unless v is equal to u.
  *
  *
  * Atomically adds @a to @v, so long as it was not @u.
  * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
+ * Returns true iff @v was not @u.
  */
  */
 static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 {
 {
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
+	long c, tmp;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldq_l	%[tmp],%[mem]\n"
+	"	cmpeq	%[tmp],%[u],%[c]\n"
+	"	addq	%[tmp],%[a],%[tmp]\n"
+	"	bne	%[c],2f\n"
+	"	stq_c	%[tmp],%[mem]\n"
+	"	beq	%[tmp],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [tmp] "=&r"(tmp), [c] "=&r"(c)
+	: [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u)
+	: "memory");
+	smp_mb();
+	return !c;
+}
+
+/*
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+static inline long atomic64_dec_if_positive(atomic64_t *v)
+{
+	long old, tmp;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldq_l	%[old],%[mem]\n"
+	"	subq	%[old],1,%[tmp]\n"
+	"	ble	%[old],2f\n"
+	"	stq_c	%[tmp],%[mem]\n"
+	"	beq	%[tmp],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [tmp] "=&r"(tmp)
+	: [mem] "m"(*v)
+	: "memory");
+	smp_mb();
+	return old - 1;
 }
 }
 
 
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)

+ 5 - 3
arch/alpha/include/asm/param.h

@@ -3,7 +3,9 @@
 
 
 #include <uapi/asm/param.h>
 #include <uapi/asm/param.h>
 
 
-#define HZ		CONFIG_HZ
-#define USER_HZ		HZ
-# define CLOCKS_PER_SEC	HZ	/* frequency at which times() counts */
+# undef HZ
+# define HZ		CONFIG_HZ
+# define USER_HZ	1024
+# define CLOCKS_PER_SEC	USER_HZ	/* frequency at which times() counts */
+
 #endif /* _ASM_ALPHA_PARAM_H */
 #endif /* _ASM_ALPHA_PARAM_H */

+ 0 - 4
arch/alpha/include/asm/spinlock.h

@@ -168,8 +168,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock)
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
 
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* _ALPHA_SPINLOCK_H */
 #endif /* _ALPHA_SPINLOCK_H */

+ 1 - 2
arch/alpha/include/asm/unistd.h

@@ -3,8 +3,7 @@
 
 
 #include <uapi/asm/unistd.h>
 #include <uapi/asm/unistd.h>
 
 
-
-#define NR_SYSCALLS			506
+#define NR_SYSCALLS			508
 
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_STAT64

+ 0 - 7
arch/alpha/include/uapi/asm/param.h

@@ -1,13 +1,7 @@
 #ifndef _UAPI_ASM_ALPHA_PARAM_H
 #ifndef _UAPI_ASM_ALPHA_PARAM_H
 #define _UAPI_ASM_ALPHA_PARAM_H
 #define _UAPI_ASM_ALPHA_PARAM_H
 
 
-/* ??? Gross.  I don't want to parameterize this, and supposedly the
-   hardware ignores reprogramming.  We also need userland buy-in to the 
-   change in HZ, since this is visible in the wait4 resources etc.  */
-
-#ifndef __KERNEL__
 #define HZ		1024
 #define HZ		1024
-#endif
 
 
 #define EXEC_PAGESIZE	8192
 #define EXEC_PAGESIZE	8192
 
 
@@ -17,5 +11,4 @@
 
 
 #define MAXHOSTNAMELEN	64	/* max length of hostname */
 #define MAXHOSTNAMELEN	64	/* max length of hostname */
 
 
-
 #endif /* _UAPI_ASM_ALPHA_PARAM_H */
 #endif /* _UAPI_ASM_ALPHA_PARAM_H */

+ 2 - 0
arch/alpha/include/uapi/asm/unistd.h

@@ -467,5 +467,7 @@
 #define __NR_sendmmsg			503
 #define __NR_sendmmsg			503
 #define __NR_process_vm_readv		504
 #define __NR_process_vm_readv		504
 #define __NR_process_vm_writev		505
 #define __NR_process_vm_writev		505
+#define __NR_kcmp			506
+#define __NR_finit_module		507
 
 
 #endif /* _UAPI_ALPHA_UNISTD_H */
 #endif /* _UAPI_ALPHA_UNISTD_H */

+ 288 - 111
arch/alpha/kernel/entry.S

@@ -12,11 +12,32 @@
 
 
 	.text
 	.text
 	.set noat
 	.set noat
+	.cfi_sections	.debug_frame
 
 
 /* Stack offsets.  */
 /* Stack offsets.  */
 #define SP_OFF			184
 #define SP_OFF			184
 #define SWITCH_STACK_SIZE	320
 #define SWITCH_STACK_SIZE	320
 
 
+.macro	CFI_START_OSF_FRAME	func
+	.align	4
+	.globl	\func
+	.type	\func,@function
+\func:
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa	$sp, 48
+	.cfi_rel_offset	64, 8
+	.cfi_rel_offset	$gp, 16
+	.cfi_rel_offset	$16, 24
+	.cfi_rel_offset	$17, 32
+	.cfi_rel_offset	$18, 40
+.endm
+
+.macro	CFI_END_OSF_FRAME	func
+	.cfi_endproc
+	.size	\func, . - \func
+.endm
+
 /*
 /*
  * This defines the normal kernel pt-regs layout.
  * This defines the normal kernel pt-regs layout.
  *
  *
@@ -27,100 +48,158 @@
  * the palcode-provided values are available to the signal handler.
  * the palcode-provided values are available to the signal handler.
  */
  */
 
 
-#define SAVE_ALL			\
-	subq	$sp, SP_OFF, $sp;	\
-	stq	$0, 0($sp);		\
-	stq	$1, 8($sp);		\
-	stq	$2, 16($sp);		\
-	stq	$3, 24($sp);		\
-	stq	$4, 32($sp);		\
-	stq	$28, 144($sp);		\
-	lda	$2, alpha_mv;		\
-	stq	$5, 40($sp);		\
-	stq	$6, 48($sp);		\
-	stq	$7, 56($sp);		\
-	stq	$8, 64($sp);		\
-	stq	$19, 72($sp);		\
-	stq	$20, 80($sp);		\
-	stq	$21, 88($sp);		\
-	ldq	$2, HAE_CACHE($2);	\
-	stq	$22, 96($sp);		\
-	stq	$23, 104($sp);		\
-	stq	$24, 112($sp);		\
-	stq	$25, 120($sp);		\
-	stq	$26, 128($sp);		\
-	stq	$27, 136($sp);		\
-	stq	$2, 152($sp);		\
-	stq	$16, 160($sp);		\
-	stq	$17, 168($sp);		\
+.macro	SAVE_ALL
+	subq	$sp, SP_OFF, $sp
+	.cfi_adjust_cfa_offset	SP_OFF
+	stq	$0, 0($sp)
+	stq	$1, 8($sp)
+	stq	$2, 16($sp)
+	stq	$3, 24($sp)
+	stq	$4, 32($sp)
+	stq	$28, 144($sp)
+	.cfi_rel_offset	$0, 0
+	.cfi_rel_offset $1, 8
+	.cfi_rel_offset	$2, 16
+	.cfi_rel_offset	$3, 24
+	.cfi_rel_offset	$4, 32
+	.cfi_rel_offset	$28, 144
+	lda	$2, alpha_mv
+	stq	$5, 40($sp)
+	stq	$6, 48($sp)
+	stq	$7, 56($sp)
+	stq	$8, 64($sp)
+	stq	$19, 72($sp)
+	stq	$20, 80($sp)
+	stq	$21, 88($sp)
+	ldq	$2, HAE_CACHE($2)
+	stq	$22, 96($sp)
+	stq	$23, 104($sp)
+	stq	$24, 112($sp)
+	stq	$25, 120($sp)
+	stq	$26, 128($sp)
+	stq	$27, 136($sp)
+	stq	$2, 152($sp)
+	stq	$16, 160($sp)
+	stq	$17, 168($sp)
 	stq	$18, 176($sp)
 	stq	$18, 176($sp)
+	.cfi_rel_offset	$5, 40
+	.cfi_rel_offset	$6, 48
+	.cfi_rel_offset	$7, 56
+	.cfi_rel_offset	$8, 64
+	.cfi_rel_offset $19, 72
+	.cfi_rel_offset	$20, 80
+	.cfi_rel_offset	$21, 88
+	.cfi_rel_offset $22, 96
+	.cfi_rel_offset	$23, 104
+	.cfi_rel_offset	$24, 112
+	.cfi_rel_offset	$25, 120
+	.cfi_rel_offset	$26, 128
+	.cfi_rel_offset	$27, 136
+.endm
 
 
-#define RESTORE_ALL			\
-	lda	$19, alpha_mv;		\
-	ldq	$0, 0($sp);		\
-	ldq	$1, 8($sp);		\
-	ldq	$2, 16($sp);		\
-	ldq	$3, 24($sp);		\
-	ldq	$21, 152($sp);		\
-	ldq	$20, HAE_CACHE($19);	\
-	ldq	$4, 32($sp);		\
-	ldq	$5, 40($sp);		\
-	ldq	$6, 48($sp);		\
-	ldq	$7, 56($sp);		\
-	subq	$20, $21, $20;		\
-	ldq	$8, 64($sp);		\
-	beq	$20, 99f;		\
-	ldq	$20, HAE_REG($19);	\
-	stq	$21, HAE_CACHE($19);	\
-	stq	$21, 0($20);		\
-99:;					\
-	ldq	$19, 72($sp);		\
-	ldq	$20, 80($sp);		\
-	ldq	$21, 88($sp);		\
-	ldq	$22, 96($sp);		\
-	ldq	$23, 104($sp);		\
-	ldq	$24, 112($sp);		\
-	ldq	$25, 120($sp);		\
-	ldq	$26, 128($sp);		\
-	ldq	$27, 136($sp);		\
-	ldq	$28, 144($sp);		\
+.macro	RESTORE_ALL
+	lda	$19, alpha_mv
+	ldq	$0, 0($sp)
+	ldq	$1, 8($sp)
+	ldq	$2, 16($sp)
+	ldq	$3, 24($sp)
+	ldq	$21, 152($sp)
+	ldq	$20, HAE_CACHE($19)
+	ldq	$4, 32($sp)
+	ldq	$5, 40($sp)
+	ldq	$6, 48($sp)
+	ldq	$7, 56($sp)
+	subq	$20, $21, $20
+	ldq	$8, 64($sp)
+	beq	$20, 99f
+	ldq	$20, HAE_REG($19)
+	stq	$21, HAE_CACHE($19)
+	stq	$21, 0($20)
+99:	ldq	$19, 72($sp)
+	ldq	$20, 80($sp)
+	ldq	$21, 88($sp)
+	ldq	$22, 96($sp)
+	ldq	$23, 104($sp)
+	ldq	$24, 112($sp)
+	ldq	$25, 120($sp)
+	ldq	$26, 128($sp)
+	ldq	$27, 136($sp)
+	ldq	$28, 144($sp)
 	addq	$sp, SP_OFF, $sp
 	addq	$sp, SP_OFF, $sp
+	.cfi_restore	$0
+	.cfi_restore	$1
+	.cfi_restore	$2
+	.cfi_restore	$3
+	.cfi_restore	$4
+	.cfi_restore	$5
+	.cfi_restore	$6
+	.cfi_restore	$7
+	.cfi_restore	$8
+	.cfi_restore	$19
+	.cfi_restore	$20
+	.cfi_restore	$21
+	.cfi_restore	$22
+	.cfi_restore	$23
+	.cfi_restore	$24
+	.cfi_restore	$25
+	.cfi_restore	$26
+	.cfi_restore	$27
+	.cfi_restore	$28
+	.cfi_adjust_cfa_offset	-SP_OFF
+.endm
+
+.macro	DO_SWITCH_STACK
+	bsr	$1, do_switch_stack
+	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
+	/* We don't really care about the FP registers for debugging.  */
+.endm
+
+.macro	UNDO_SWITCH_STACK
+	bsr	$1, undo_switch_stack
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-SWITCH_STACK_SIZE
+.endm
 
 
 /*
 /*
  * Non-syscall kernel entry points.
  * Non-syscall kernel entry points.
  */
  */
 
 
-	.align	4
-	.globl	entInt
-	.ent	entInt
-entInt:
+CFI_START_OSF_FRAME entInt
 	SAVE_ALL
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	bic	$sp, $8, $8
 	mov	$sp, $19
 	mov	$sp, $19
 	jsr	$31, do_entInt
 	jsr	$31, do_entInt
-.end entInt
+CFI_END_OSF_FRAME entInt
 
 
-	.align	4
-	.globl	entArith
-	.ent	entArith
-entArith:
+CFI_START_OSF_FRAME entArith
 	SAVE_ALL
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	bic	$sp, $8, $8
 	mov	$sp, $18
 	mov	$sp, $18
 	jsr	$31, do_entArith
 	jsr	$31, do_entArith
-.end entArith
+CFI_END_OSF_FRAME entArith
 
 
-	.align	4
-	.globl	entMM
-	.ent	entMM
-entMM:
+CFI_START_OSF_FRAME entMM
 	SAVE_ALL
 	SAVE_ALL
 /* save $9 - $15 so the inline exception code can manipulate them.  */
 /* save $9 - $15 so the inline exception code can manipulate them.  */
 	subq	$sp, 56, $sp
 	subq	$sp, 56, $sp
+	.cfi_adjust_cfa_offset	56
 	stq	$9, 0($sp)
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
 	stq	$11, 16($sp)
@@ -128,6 +207,13 @@ entMM:
 	stq	$13, 32($sp)
 	stq	$13, 32($sp)
 	stq	$14, 40($sp)
 	stq	$14, 40($sp)
 	stq	$15, 48($sp)
 	stq	$15, 48($sp)
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
 	addq	$sp, 56, $19
 	addq	$sp, 56, $19
 /* handle the fault */
 /* handle the fault */
 	lda	$8, 0x3fff
 	lda	$8, 0x3fff
@@ -142,28 +228,33 @@ entMM:
 	ldq	$14, 40($sp)
 	ldq	$14, 40($sp)
 	ldq	$15, 48($sp)
 	ldq	$15, 48($sp)
 	addq	$sp, 56, $sp
 	addq	$sp, 56, $sp
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-56
 /* finish up the syscall as normal.  */
 /* finish up the syscall as normal.  */
 	br	ret_from_sys_call
 	br	ret_from_sys_call
-.end entMM
+CFI_END_OSF_FRAME entMM
 
 
-	.align	4
-	.globl	entIF
-	.ent	entIF
-entIF:
+CFI_START_OSF_FRAME entIF
 	SAVE_ALL
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	bic	$sp, $8, $8
 	mov	$sp, $17
 	mov	$sp, $17
 	jsr	$31, do_entIF
 	jsr	$31, do_entIF
-.end entIF
+CFI_END_OSF_FRAME entIF
 
 
-	.align	4
-	.globl	entUna
-	.ent	entUna
-entUna:
+CFI_START_OSF_FRAME entUna
 	lda	$sp, -256($sp)
 	lda	$sp, -256($sp)
+	.cfi_adjust_cfa_offset	256
 	stq	$0, 0($sp)
 	stq	$0, 0($sp)
+	.cfi_rel_offset	$0, 0
+	.cfi_remember_state
 	ldq	$0, 256($sp)	/* get PS */
 	ldq	$0, 256($sp)	/* get PS */
 	stq	$1, 8($sp)
 	stq	$1, 8($sp)
 	stq	$2, 16($sp)
 	stq	$2, 16($sp)
@@ -195,6 +286,32 @@ entUna:
 	stq	$28, 224($sp)
 	stq	$28, 224($sp)
 	mov	$sp, $19
 	mov	$sp, $19
 	stq	$gp, 232($sp)
 	stq	$gp, 232($sp)
+	.cfi_rel_offset	$1, 1*8
+	.cfi_rel_offset	$2, 2*8
+	.cfi_rel_offset	$3, 3*8
+	.cfi_rel_offset	$4, 4*8
+	.cfi_rel_offset	$5, 5*8
+	.cfi_rel_offset	$6, 6*8
+	.cfi_rel_offset	$7, 7*8
+	.cfi_rel_offset	$8, 8*8
+	.cfi_rel_offset	$9, 9*8
+	.cfi_rel_offset	$10, 10*8
+	.cfi_rel_offset	$11, 11*8
+	.cfi_rel_offset	$12, 12*8
+	.cfi_rel_offset	$13, 13*8
+	.cfi_rel_offset	$14, 14*8
+	.cfi_rel_offset	$15, 15*8
+	.cfi_rel_offset	$19, 19*8
+	.cfi_rel_offset	$20, 20*8
+	.cfi_rel_offset	$21, 21*8
+	.cfi_rel_offset	$22, 22*8
+	.cfi_rel_offset	$23, 23*8
+	.cfi_rel_offset	$24, 24*8
+	.cfi_rel_offset	$25, 25*8
+	.cfi_rel_offset	$26, 26*8
+	.cfi_rel_offset	$27, 27*8
+	.cfi_rel_offset	$28, 28*8
+	.cfi_rel_offset	$29, 29*8
 	lda	$8, 0x3fff
 	lda	$8, 0x3fff
 	stq	$31, 248($sp)
 	stq	$31, 248($sp)
 	bic	$sp, $8, $8
 	bic	$sp, $8, $8
@@ -228,16 +345,45 @@ entUna:
 	ldq	$28, 224($sp)
 	ldq	$28, 224($sp)
 	ldq	$gp, 232($sp)
 	ldq	$gp, 232($sp)
 	lda	$sp, 256($sp)
 	lda	$sp, 256($sp)
+	.cfi_restore	$1
+	.cfi_restore	$2
+	.cfi_restore	$3
+	.cfi_restore	$4
+	.cfi_restore	$5
+	.cfi_restore	$6
+	.cfi_restore	$7
+	.cfi_restore	$8
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_restore	$19
+	.cfi_restore	$20
+	.cfi_restore	$21
+	.cfi_restore	$22
+	.cfi_restore	$23
+	.cfi_restore	$24
+	.cfi_restore	$25
+	.cfi_restore	$26
+	.cfi_restore	$27
+	.cfi_restore	$28
+	.cfi_restore	$29
+	.cfi_adjust_cfa_offset	-256
 	call_pal PAL_rti
 	call_pal PAL_rti
-.end entUna
 
 
 	.align	4
 	.align	4
-	.ent	entUnaUser
 entUnaUser:
 entUnaUser:
+	.cfi_restore_state
 	ldq	$0, 0($sp)	/* restore original $0 */
 	ldq	$0, 0($sp)	/* restore original $0 */
 	lda	$sp, 256($sp)	/* pop entUna's stack frame */
 	lda	$sp, 256($sp)	/* pop entUna's stack frame */
+	.cfi_restore	$0
+	.cfi_adjust_cfa_offset	-256
 	SAVE_ALL		/* setup normal kernel stack */
 	SAVE_ALL		/* setup normal kernel stack */
 	lda	$sp, -56($sp)
 	lda	$sp, -56($sp)
+	.cfi_adjust_cfa_offset	56
 	stq	$9, 0($sp)
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
 	stq	$11, 16($sp)
@@ -245,6 +391,13 @@ entUnaUser:
 	stq	$13, 32($sp)
 	stq	$13, 32($sp)
 	stq	$14, 40($sp)
 	stq	$14, 40($sp)
 	stq	$15, 48($sp)
 	stq	$15, 48($sp)
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
 	lda	$8, 0x3fff
 	lda	$8, 0x3fff
 	addq	$sp, 56, $19
 	addq	$sp, 56, $19
 	bic	$sp, $8, $8
 	bic	$sp, $8, $8
@@ -257,20 +410,25 @@ entUnaUser:
 	ldq	$14, 40($sp)
 	ldq	$14, 40($sp)
 	ldq	$15, 48($sp)
 	ldq	$15, 48($sp)
 	lda	$sp, 56($sp)
 	lda	$sp, 56($sp)
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-56
 	br	ret_from_sys_call
 	br	ret_from_sys_call
-.end entUnaUser
+CFI_END_OSF_FRAME entUna
 
 
-	.align	4
-	.globl	entDbg
-	.ent	entDbg
-entDbg:
+CFI_START_OSF_FRAME entDbg
 	SAVE_ALL
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	bic	$sp, $8, $8
 	mov	$sp, $16
 	mov	$sp, $16
 	jsr	$31, do_entDbg
 	jsr	$31, do_entDbg
-.end entDbg
+CFI_END_OSF_FRAME entDbg
 
 
 /*
 /*
  * The system call entry point is special.  Most importantly, it looks
  * The system call entry point is special.  Most importantly, it looks
@@ -285,8 +443,12 @@ entDbg:
 
 
 	.align	4
 	.align	4
 	.globl	entSys
 	.globl	entSys
-	.globl	ret_from_sys_call
-	.ent	entSys
+	.type	entSys, @function
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa	$sp, 48
+	.cfi_rel_offset	64, 8
+	.cfi_rel_offset	$gp, 16
 entSys:
 entSys:
 	SAVE_ALL
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$8, 0x3fff
@@ -300,6 +462,9 @@ entSys:
 	stq	$17, SP_OFF+32($sp)
 	stq	$17, SP_OFF+32($sp)
 	s8addq	$0, $5, $5
 	s8addq	$0, $5, $5
 	stq	$18, SP_OFF+40($sp)
 	stq	$18, SP_OFF+40($sp)
+	.cfi_rel_offset	$16, SP_OFF+24
+	.cfi_rel_offset	$17, SP_OFF+32
+	.cfi_rel_offset	$18, SP_OFF+40
 	blbs	$3, strace
 	blbs	$3, strace
 	beq	$4, 1f
 	beq	$4, 1f
 	ldq	$27, 0($5)
 	ldq	$27, 0($5)
@@ -310,6 +475,7 @@ entSys:
 	stq	$31, 72($sp)		/* a3=0 => no error */
 	stq	$31, 72($sp)		/* a3=0 => no error */
 
 
 	.align	4
 	.align	4
+	.globl	ret_from_sys_call
 ret_from_sys_call:
 ret_from_sys_call:
 	cmovne	$26, 0, $18		/* $18 = 0 => non-restartable */
 	cmovne	$26, 0, $18		/* $18 = 0 => non-restartable */
 	ldq	$0, SP_OFF($sp)
 	ldq	$0, SP_OFF($sp)
@@ -324,10 +490,12 @@ ret_to_user:
 	and	$17, _TIF_WORK_MASK, $2
 	and	$17, _TIF_WORK_MASK, $2
 	bne	$2, work_pending
 	bne	$2, work_pending
 restore_all:
 restore_all:
+	.cfi_remember_state
 	RESTORE_ALL
 	RESTORE_ALL
 	call_pal PAL_rti
 	call_pal PAL_rti
 
 
 ret_to_kernel:
 ret_to_kernel:
+	.cfi_restore_state
 	lda	$16, 7
 	lda	$16, 7
 	call_pal PAL_swpipl
 	call_pal PAL_swpipl
 	br restore_all
 	br restore_all
@@ -356,7 +524,6 @@ $ret_success:
 	stq	$0, 0($sp)
 	stq	$0, 0($sp)
 	stq	$31, 72($sp)	/* a3=0 => no error */
 	stq	$31, 72($sp)	/* a3=0 => no error */
 	br	ret_from_sys_call
 	br	ret_from_sys_call
-.end entSys
 
 
 /*
 /*
  * Do all cleanup when returning from all interrupts and system calls.
  * Do all cleanup when returning from all interrupts and system calls.
@@ -370,7 +537,7 @@ $ret_success:
  */
  */
 
 
 	.align	4
 	.align	4
-	.ent	work_pending
+	.type	work_pending, @function
 work_pending:
 work_pending:
 	and	$17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2
 	and	$17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2
 	bne	$2, $work_notifysig
 	bne	$2, $work_notifysig
@@ -387,23 +554,22 @@ $work_resched:
 
 
 $work_notifysig:
 $work_notifysig:
 	mov	$sp, $16
 	mov	$sp, $16
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, do_work_pending
 	jsr	$26, do_work_pending
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	br	restore_all
 	br	restore_all
-.end work_pending
 
 
 /*
 /*
  * PTRACE syscall handler
  * PTRACE syscall handler
  */
  */
 
 
 	.align	4
 	.align	4
-	.ent	strace
+	.type	strace, @function
 strace:
 strace:
 	/* set up signal stack, call syscall_trace */
 	/* set up signal stack, call syscall_trace */
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, syscall_trace_enter /* returns the syscall number */
 	jsr	$26, syscall_trace_enter /* returns the syscall number */
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 
 
 	/* get the arguments back.. */
 	/* get the arguments back.. */
 	ldq	$16, SP_OFF+24($sp)
 	ldq	$16, SP_OFF+24($sp)
@@ -431,9 +597,9 @@ ret_from_straced:
 $strace_success:
 $strace_success:
 	stq	$0, 0($sp)		/* save return value */
 	stq	$0, 0($sp)		/* save return value */
 
 
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, syscall_trace_leave
 	jsr	$26, syscall_trace_leave
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	br	$31, ret_from_sys_call
 	br	$31, ret_from_sys_call
 
 
 	.align	3
 	.align	3
@@ -447,26 +613,31 @@ $strace_error:
 	stq	$0, 0($sp)
 	stq	$0, 0($sp)
 	stq	$1, 72($sp)	/* a3 for return */
 	stq	$1, 72($sp)	/* a3 for return */
 
 
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	mov	$18, $9		/* save old syscall number */
 	mov	$18, $9		/* save old syscall number */
 	mov	$19, $10	/* save old a3 */
 	mov	$19, $10	/* save old a3 */
 	jsr	$26, syscall_trace_leave
 	jsr	$26, syscall_trace_leave
 	mov	$9, $18
 	mov	$9, $18
 	mov	$10, $19
 	mov	$10, $19
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 
 
 	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
 	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
 	br	ret_from_sys_call
 	br	ret_from_sys_call
-.end strace
+CFI_END_OSF_FRAME entSys
 
 
 /*
 /*
  * Save and restore the switch stack -- aka the balance of the user context.
  * Save and restore the switch stack -- aka the balance of the user context.
  */
  */
 
 
 	.align	4
 	.align	4
-	.ent	do_switch_stack
+	.type	do_switch_stack, @function
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa $sp, 0
+	.cfi_register 64, $1
 do_switch_stack:
 do_switch_stack:
 	lda	$sp, -SWITCH_STACK_SIZE($sp)
 	lda	$sp, -SWITCH_STACK_SIZE($sp)
+	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
 	stq	$9, 0($sp)
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
 	stq	$11, 16($sp)
@@ -510,10 +681,14 @@ do_switch_stack:
 	stt	$f0, 312($sp)	# save fpcr in slot of $f31
 	stt	$f0, 312($sp)	# save fpcr in slot of $f31
 	ldt	$f0, 64($sp)	# dont let "do_switch_stack" change fp state.
 	ldt	$f0, 64($sp)	# dont let "do_switch_stack" change fp state.
 	ret	$31, ($1), 1
 	ret	$31, ($1), 1
-.end do_switch_stack
+	.cfi_endproc
+	.size	do_switch_stack, .-do_switch_stack
 
 
 	.align	4
 	.align	4
-	.ent	undo_switch_stack
+	.type	undo_switch_stack, @function
+	.cfi_startproc simple
+	.cfi_def_cfa $sp, 0
+	.cfi_register 64, $1
 undo_switch_stack:
 undo_switch_stack:
 	ldq	$9, 0($sp)
 	ldq	$9, 0($sp)
 	ldq	$10, 8($sp)
 	ldq	$10, 8($sp)
@@ -558,7 +733,8 @@ undo_switch_stack:
 	ldt	$f30, 304($sp)
 	ldt	$f30, 304($sp)
 	lda	$sp, SWITCH_STACK_SIZE($sp)
 	lda	$sp, SWITCH_STACK_SIZE($sp)
 	ret	$31, ($1), 1
 	ret	$31, ($1), 1
-.end undo_switch_stack
+	.cfi_endproc
+	.size	undo_switch_stack, .-undo_switch_stack
 
 
 /*
 /*
  * The meat of the context switch code.
  * The meat of the context switch code.
@@ -566,17 +742,18 @@ undo_switch_stack:
 
 
 	.align	4
 	.align	4
 	.globl	alpha_switch_to
 	.globl	alpha_switch_to
-	.ent	alpha_switch_to
+	.type	alpha_switch_to, @function
+	.cfi_startproc
 alpha_switch_to:
 alpha_switch_to:
-	.prologue 0
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	call_pal PAL_swpctx
 	call_pal PAL_swpctx
 	lda	$8, 0x3fff
 	lda	$8, 0x3fff
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	bic	$sp, $8, $8
 	bic	$sp, $8, $8
 	mov	$17, $0
 	mov	$17, $0
 	ret
 	ret
-.end alpha_switch_to
+	.cfi_endproc
+	.size	alpha_switch_to, .-alpha_switch_to
 
 
 /*
 /*
  * New processes begin life here.
  * New processes begin life here.

+ 1 - 1
arch/alpha/kernel/irq_alpha.c

@@ -236,7 +236,7 @@ void __init
 init_rtc_irq(void)
 init_rtc_irq(void)
 {
 {
 	irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip,
 	irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip,
-				      handle_simple_irq, "RTC");
+				      handle_percpu_irq, "RTC");
 	setup_irq(RTC_IRQ, &timer_irqaction);
 	setup_irq(RTC_IRQ, &timer_irqaction);
 }
 }
 
 

+ 3 - 2
arch/alpha/kernel/smp.c

@@ -264,9 +264,10 @@ recv_secondary_console_msg(void)
 		if (cnt <= 0 || cnt >= 80)
 		if (cnt <= 0 || cnt >= 80)
 			strcpy(buf, "<<< BOGUS MSG >>>");
 			strcpy(buf, "<<< BOGUS MSG >>>");
 		else {
 		else {
-			cp1 = (char *) &cpu->ipc_buffer[11];
+			cp1 = (char *) &cpu->ipc_buffer[1];
 			cp2 = buf;
 			cp2 = buf;
-			strcpy(cp2, cp1);
+			memcpy(cp2, cp1, cnt);
+			cp2[cnt] = '\0';
 			
 			
 			while ((cp2 = strchr(cp2, '\r')) != 0) {
 			while ((cp2 = strchr(cp2, '\r')) != 0) {
 				*cp2 = ' ';
 				*cp2 = ' ';

+ 0 - 8
arch/alpha/kernel/sys_dp264.c

@@ -190,9 +190,6 @@ static struct irq_chip clipper_irq_type = {
 static void
 static void
 dp264_device_interrupt(unsigned long vector)
 dp264_device_interrupt(unsigned long vector)
 {
 {
-#if 1
-	printk("dp264_device_interrupt: NOT IMPLEMENTED YET!!\n");
-#else
 	unsigned long pld;
 	unsigned long pld;
 	unsigned int i;
 	unsigned int i;
 
 
@@ -210,12 +207,7 @@ dp264_device_interrupt(unsigned long vector)
 			isa_device_interrupt(vector);
 			isa_device_interrupt(vector);
 		else
 		else
 			handle_irq(16 + i);
 			handle_irq(16 + i);
-#if 0
-		TSUNAMI_cchip->dir0.csr = 1UL << i; mb();
-		tmp = TSUNAMI_cchip->dir0.csr;
-#endif
 	}
 	}
-#endif
 }
 }
 
 
 static void 
 static void 

+ 2 - 1
arch/alpha/kernel/sys_marvel.c

@@ -317,8 +317,9 @@ marvel_init_irq(void)
 }
 }
 
 
 static int 
 static int 
-marvel_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
 {
 {
+	struct pci_dev *dev = (struct pci_dev *)cdev;
 	struct pci_controller *hose = dev->sysdata;
 	struct pci_controller *hose = dev->sysdata;
 	struct io7_port *io7_port = hose->sysdata;
 	struct io7_port *io7_port = hose->sysdata;
 	struct io7 *io7 = io7_port->io7;
 	struct io7 *io7 = io7_port->io7;

+ 2 - 0
arch/alpha/kernel/systbls.S

@@ -524,6 +524,8 @@ sys_call_table:
 	.quad sys_sendmmsg
 	.quad sys_sendmmsg
 	.quad sys_process_vm_readv
 	.quad sys_process_vm_readv
 	.quad sys_process_vm_writev		/* 505 */
 	.quad sys_process_vm_writev		/* 505 */
+	.quad sys_kcmp
+	.quad sys_finit_module
 
 
 	.size sys_call_table, . - sys_call_table
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object
 	.type sys_call_table, @object

+ 1 - 3
arch/alpha/kernel/time.c

@@ -105,9 +105,7 @@ void arch_irq_work_raise(void)
 
 
 static inline __u32 rpcc(void)
 static inline __u32 rpcc(void)
 {
 {
-    __u32 result;
-    asm volatile ("rpcc %0" : "=r"(result));
-    return result;
+	return __builtin_alpha_rpcc();
 }
 }
 
 
 int update_persistent_clock(struct timespec now)
 int update_persistent_clock(struct timespec now)

+ 3 - 5
arch/alpha/kernel/traps.c

@@ -66,8 +66,8 @@ dik_show_regs(struct pt_regs *regs, unsigned long *r9_15)
 {
 {
 	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx    %s\n",
 	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx    %s\n",
 	       regs->pc, regs->r26, regs->ps, print_tainted());
 	       regs->pc, regs->r26, regs->ps, print_tainted());
-	print_symbol("pc is at %s\n", regs->pc);
-	print_symbol("ra is at %s\n", regs->r26 );
+	printk("pc is at %pSR\n", (void *)regs->pc);
+	printk("ra is at %pSR\n", (void *)regs->r26);
 	printk("v0 = %016lx  t0 = %016lx  t1 = %016lx\n",
 	printk("v0 = %016lx  t0 = %016lx  t1 = %016lx\n",
 	       regs->r0, regs->r1, regs->r2);
 	       regs->r0, regs->r1, regs->r2);
 	printk("t2 = %016lx  t3 = %016lx  t4 = %016lx\n",
 	printk("t2 = %016lx  t3 = %016lx  t4 = %016lx\n",
@@ -132,9 +132,7 @@ dik_show_trace(unsigned long *sp)
 			continue;
 			continue;
 		if (tmp >= (unsigned long) &_etext)
 		if (tmp >= (unsigned long) &_etext)
 			continue;
 			continue;
-		printk("[<%lx>]", tmp);
-		print_symbol(" %s", tmp);
-		printk("\n");
+		printk("[<%lx>] %pSR\n", tmp, (void *)tmp);
 		if (i > 40) {
 		if (i > 40) {
 			printk(" ...");
 			printk(" ...");
 			break;
 			break;

+ 1 - 1
arch/arm/mach-s3c24xx/Kconfig

@@ -208,7 +208,7 @@ config S3C24XX_GPIO_EXTRA128
 
 
 config S3C24XX_PLL
 config S3C24XX_PLL
 	bool "Support CPUfreq changing of PLL frequency (EXPERIMENTAL)"
 	bool "Support CPUfreq changing of PLL frequency (EXPERIMENTAL)"
-	depends on ARM_S3C24XX
+	depends on ARM_S3C24XX_CPUFREQ
 	help
 	help
 	  Compile in support for changing the PLL frequency from the
 	  Compile in support for changing the PLL frequency from the
 	  S3C24XX series CPUfreq driver. The PLL takes time to settle
 	  S3C24XX series CPUfreq driver. The PLL takes time to settle

+ 0 - 7
arch/arm64/include/asm/debug-monitors.h

@@ -83,14 +83,7 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs)
 }
 }
 #endif
 #endif
 
 
-#ifdef CONFIG_COMPAT
 int aarch32_break_handler(struct pt_regs *regs);
 int aarch32_break_handler(struct pt_regs *regs);
-#else
-static int aarch32_break_handler(struct pt_regs *regs)
-{
-	return -EFAULT;
-}
-#endif
 
 
 #endif	/* __ASSEMBLY */
 #endif	/* __ASSEMBLY */
 #endif	/* __KERNEL__ */
 #endif	/* __KERNEL__ */

+ 2 - 1
arch/arm64/include/asm/system_misc.h

@@ -23,6 +23,7 @@
 #include <linux/compiler.h>
 #include <linux/compiler.h>
 #include <linux/linkage.h>
 #include <linux/linkage.h>
 #include <linux/irqflags.h>
 #include <linux/irqflags.h>
+#include <linux/reboot.h>
 
 
 struct pt_regs;
 struct pt_regs;
 
 
@@ -41,7 +42,7 @@ extern void show_pte(struct mm_struct *mm, unsigned long addr);
 extern void __show_regs(struct pt_regs *);
 extern void __show_regs(struct pt_regs *);
 
 
 void soft_restart(unsigned long);
 void soft_restart(unsigned long);
-extern void (*arm_pm_restart)(char str, const char *cmd);
+extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 
 
 #define UDBG_UNDEFINED	(1 << 0)
 #define UDBG_UNDEFINED	(1 << 0)
 #define UDBG_SYSCALL	(1 << 1)
 #define UDBG_SYSCALL	(1 << 1)

+ 1 - 1
arch/arm64/kernel/process.c

@@ -132,7 +132,7 @@ void machine_restart(char *cmd)
 
 
 	/* Now call the architecture specific reboot code. */
 	/* Now call the architecture specific reboot code. */
 	if (arm_pm_restart)
 	if (arm_pm_restart)
-		arm_pm_restart('h', cmd);
+		arm_pm_restart(reboot_mode, cmd);
 
 
 	/*
 	/*
 	 * Whoops - the architecture was unable to reboot.
 	 * Whoops - the architecture was unable to reboot.

+ 8 - 7
arch/arm64/kernel/smp.c

@@ -199,13 +199,6 @@ asmlinkage void secondary_start_kernel(void)
 	raw_spin_lock(&boot_lock);
 	raw_spin_lock(&boot_lock);
 	raw_spin_unlock(&boot_lock);
 	raw_spin_unlock(&boot_lock);
 
 
-	/*
-	 * Enable local interrupts.
-	 */
-	notify_cpu_starting(cpu);
-	local_irq_enable();
-	local_fiq_enable();
-
 	/*
 	/*
 	 * OK, now it's safe to let the boot CPU continue.  Wait for
 	 * OK, now it's safe to let the boot CPU continue.  Wait for
 	 * the CPU migration code to notice that the CPU is online
 	 * the CPU migration code to notice that the CPU is online
@@ -214,6 +207,14 @@ asmlinkage void secondary_start_kernel(void)
 	set_cpu_online(cpu, true);
 	set_cpu_online(cpu, true);
 	complete(&cpu_running);
 	complete(&cpu_running);
 
 
+	/*
+	 * Enable GIC and timers.
+	 */
+	notify_cpu_starting(cpu);
+
+	local_irq_enable();
+	local_fiq_enable();
+
 	/*
 	/*
 	 * OK, it's off to the idle thread for us
 	 * OK, it's off to the idle thread for us
 	 */
 	 */

+ 20 - 26
arch/arm64/mm/fault.c

@@ -152,25 +152,8 @@ void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 #define ESR_CM			(1 << 8)
 #define ESR_CM			(1 << 8)
 #define ESR_LNX_EXEC		(1 << 24)
 #define ESR_LNX_EXEC		(1 << 24)
 
 
-/*
- * Check that the permissions on the VMA allow for the fault which occurred.
- * If we encountered a write fault, we must have write permission, otherwise
- * we allow any permission.
- */
-static inline bool access_error(unsigned int esr, struct vm_area_struct *vma)
-{
-	unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
-
-	if (esr & ESR_WRITE)
-		mask = VM_WRITE;
-	if (esr & ESR_LNX_EXEC)
-		mask = VM_EXEC;
-
-	return vma->vm_flags & mask ? false : true;
-}
-
 static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
 static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
-			   unsigned int esr, unsigned int flags,
+			   unsigned int mm_flags, unsigned long vm_flags,
 			   struct task_struct *tsk)
 			   struct task_struct *tsk)
 {
 {
 	struct vm_area_struct *vma;
 	struct vm_area_struct *vma;
@@ -188,12 +171,17 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
 	 * it.
 	 * it.
 	 */
 	 */
 good_area:
 good_area:
-	if (access_error(esr, vma)) {
+	/*
+	 * Check that the permissions on the VMA allow for the fault which
+	 * occurred. If we encountered a write or exec fault, we must have
+	 * appropriate permissions, otherwise we allow any permission.
+	 */
+	if (!(vma->vm_flags & vm_flags)) {
 		fault = VM_FAULT_BADACCESS;
 		fault = VM_FAULT_BADACCESS;
 		goto out;
 		goto out;
 	}
 	}
 
 
-	return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags);
+	return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags);
 
 
 check_stack:
 check_stack:
 	if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
 	if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
@@ -208,9 +196,15 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	struct task_struct *tsk;
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	struct mm_struct *mm;
 	int fault, sig, code;
 	int fault, sig, code;
-	bool write = (esr & ESR_WRITE) && !(esr & ESR_CM);
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-		(write ? FAULT_FLAG_WRITE : 0);
+	unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
+	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+
+	if (esr & ESR_LNX_EXEC) {
+		vm_flags = VM_EXEC;
+	} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
+		vm_flags = VM_WRITE;
+		mm_flags |= FAULT_FLAG_WRITE;
+	}
 
 
 	tsk = current;
 	tsk = current;
 	mm  = tsk->mm;
 	mm  = tsk->mm;
@@ -248,7 +242,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 #endif
 #endif
 	}
 	}
 
 
-	fault = __do_page_fault(mm, addr, esr, flags, tsk);
+	fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
 
 
 	/*
 	/*
 	 * If we need to retry but a fatal signal is pending, handle the
 	 * If we need to retry but a fatal signal is pending, handle the
@@ -265,7 +259,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	 */
 	 */
 
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
-	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+	if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
 		if (fault & VM_FAULT_MAJOR) {
 		if (fault & VM_FAULT_MAJOR) {
 			tsk->maj_flt++;
 			tsk->maj_flt++;
 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
@@ -280,7 +274,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 			 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
 			 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
 			 * starvation.
 			 * starvation.
 			 */
 			 */
-			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
 			goto retry;
 			goto retry;
 		}
 		}
 	}
 	}

+ 1 - 0
arch/mips/Kconfig

@@ -1702,6 +1702,7 @@ endchoice
 
 
 config KVM_GUEST
 config KVM_GUEST
 	bool "KVM Guest Kernel"
 	bool "KVM Guest Kernel"
+	depends on BROKEN_ON_SMP
 	help
 	help
 	  Select this option if building a guest kernel for KVM (Trap & Emulate) mode
 	  Select this option if building a guest kernel for KVM (Trap & Emulate) mode
 
 

+ 3 - 2
arch/mips/cavium-octeon/octeon-platform.c

@@ -334,9 +334,10 @@ static void __init octeon_fdt_pip_iface(int pip, int idx, u64 *pmac)
 	char name_buffer[20];
 	char name_buffer[20];
 	int iface;
 	int iface;
 	int p;
 	int p;
-	int count;
+	int count = 0;
 
 
-	count = cvmx_helper_interface_enumerate(idx);
+	if (cvmx_helper_interface_enumerate(idx) == 0)
+		count = cvmx_helper_ports_on_interface(idx);
 
 
 	snprintf(name_buffer, sizeof(name_buffer), "interface@%d", idx);
 	snprintf(name_buffer, sizeof(name_buffer), "interface@%d", idx);
 	iface = fdt_subnode_offset(initial_boot_params, pip, name_buffer);
 	iface = fdt_subnode_offset(initial_boot_params, pip, name_buffer);

+ 1 - 1
arch/mips/kernel/smp-bmips.c

@@ -173,7 +173,7 @@ static void bmips_boot_secondary(int cpu, struct task_struct *idle)
 	else {
 	else {
 #if defined(CONFIG_CPU_BMIPS4350) || defined(CONFIG_CPU_BMIPS4380)
 #if defined(CONFIG_CPU_BMIPS4350) || defined(CONFIG_CPU_BMIPS4380)
 		/* Reset slave TP1 if booting from TP0 */
 		/* Reset slave TP1 if booting from TP0 */
-		if (cpu_logical_map(cpu) == 0)
+		if (cpu_logical_map(cpu) == 1)
 			set_c0_brcm_cmt_ctrl(0x01);
 			set_c0_brcm_cmt_ctrl(0x01);
 #elif defined(CONFIG_CPU_BMIPS5000)
 #elif defined(CONFIG_CPU_BMIPS5000)
 		if (cpu & 0x01)
 		if (cpu & 0x01)

+ 0 - 1
arch/mips/kernel/traps.c

@@ -1242,7 +1242,6 @@ asmlinkage void do_mcheck(struct pt_regs *regs)
 	panic("Caught Machine Check exception - %scaused by multiple "
 	panic("Caught Machine Check exception - %scaused by multiple "
 	      "matching entries in the TLB.",
 	      "matching entries in the TLB.",
 	      (multi_match) ? "" : "not ");
 	      (multi_match) ? "" : "not ");
-	exception_exit(prev_state);
 }
 }
 
 
 asmlinkage void do_mt(struct pt_regs *regs)
 asmlinkage void do_mt(struct pt_regs *regs)

+ 0 - 1
arch/mips/kvm/Kconfig

@@ -5,7 +5,6 @@ source "virt/kvm/Kconfig"
 
 
 menuconfig VIRTUALIZATION
 menuconfig VIRTUALIZATION
 	bool "Virtualization"
 	bool "Virtualization"
-	depends on HAVE_KVM
 	---help---
 	---help---
 	  Say Y here to get to see options for using your Linux host to run
 	  Say Y here to get to see options for using your Linux host to run
 	  other operating systems inside virtual machines (guests).
 	  other operating systems inside virtual machines (guests).

+ 2 - 2
arch/mips/mm/tlbex.c

@@ -1456,7 +1456,7 @@ static void build_r4000_setup_pgd(void)
 {
 {
 	const int a0 = 4;
 	const int a0 = 4;
 	const int a1 = 5;
 	const int a1 = 5;
-	u32 *p = tlbmiss_handler_setup_pgd_array;
+	u32 *p = tlbmiss_handler_setup_pgd;
 	const int tlbmiss_handler_setup_pgd_size =
 	const int tlbmiss_handler_setup_pgd_size =
 		tlbmiss_handler_setup_pgd_end - tlbmiss_handler_setup_pgd;
 		tlbmiss_handler_setup_pgd_end - tlbmiss_handler_setup_pgd;
 	struct uasm_label *l = labels;
 	struct uasm_label *l = labels;
@@ -1793,7 +1793,7 @@ static void build_r3000_tlb_store_handler(void)
 	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
 	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
 	uasm_i_nop(&p);
 	uasm_i_nop(&p);
 
 
-	if (p >= handle_tlbs)
+	if (p >= handle_tlbs_end)
 		panic("TLB store handler fastpath space exceeded");
 		panic("TLB store handler fastpath space exceeded");
 
 
 	uasm_resolve_relocs(relocs, labels);
 	uasm_resolve_relocs(relocs, labels);

+ 57 - 11
arch/mips/netlogic/common/irq.c

@@ -40,6 +40,10 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/irq.h>
 #include <linux/irq.h>
 
 
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
 #include <asm/errno.h>
 #include <asm/errno.h>
 #include <asm/signal.h>
 #include <asm/signal.h>
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
@@ -223,17 +227,6 @@ static void nlm_init_node_irqs(int node)
 	nodep->irqmask = irqmask;
 	nodep->irqmask = irqmask;
 }
 }
 
 
-void __init arch_init_irq(void)
-{
-	/* Initialize the irq descriptors */
-	nlm_init_percpu_irqs();
-	nlm_init_node_irqs(0);
-	write_c0_eimr(nlm_current_node()->irqmask);
-#if defined(CONFIG_CPU_XLR)
-	nlm_setup_fmn_irq();
-#endif
-}
-
 void nlm_smp_irq_init(int hwcpuid)
 void nlm_smp_irq_init(int hwcpuid)
 {
 {
 	int node, cpu;
 	int node, cpu;
@@ -266,3 +259,56 @@ asmlinkage void plat_irq_dispatch(void)
 	/* top level irq handling */
 	/* top level irq handling */
 	do_IRQ(nlm_irq_to_xirq(node, i));
 	do_IRQ(nlm_irq_to_xirq(node, i));
 }
 }
+
+#ifdef CONFIG_OF
+static struct irq_domain *xlp_pic_domain;
+
+static const struct irq_domain_ops xlp_pic_irq_domain_ops = {
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+static int __init xlp_of_pic_init(struct device_node *node,
+					struct device_node *parent)
+{
+	const int n_picirqs = PIC_IRT_LAST_IRQ - PIC_IRQ_BASE + 1;
+	struct resource res;
+	int socid, ret;
+
+	/* we need a hack to get the PIC's SoC chip id */
+	ret = of_address_to_resource(node, 0, &res);
+	if (ret < 0) {
+		pr_err("PIC %s: reg property not found!\n", node->name);
+		return -EINVAL;
+	}
+	socid = (res.start >> 18) & 0x3;
+	xlp_pic_domain = irq_domain_add_legacy(node, n_picirqs,
+		nlm_irq_to_xirq(socid, PIC_IRQ_BASE), PIC_IRQ_BASE,
+		&xlp_pic_irq_domain_ops, NULL);
+	if (xlp_pic_domain == NULL) {
+		pr_err("PIC %s: Creating legacy domain failed!\n", node->name);
+		return -EINVAL;
+	}
+	pr_info("Node %d: IRQ domain created for PIC@%pa\n", socid,
+							&res.start);
+	return 0;
+}
+
+static struct of_device_id __initdata xlp_pic_irq_ids[] = {
+	{ .compatible = "netlogic,xlp-pic", .data = xlp_of_pic_init },
+	{},
+};
+#endif
+
+void __init arch_init_irq(void)
+{
+	/* Initialize the irq descriptors */
+	nlm_init_percpu_irqs();
+	nlm_init_node_irqs(0);
+	write_c0_eimr(nlm_current_node()->irqmask);
+#if defined(CONFIG_CPU_XLR)
+	nlm_setup_fmn_irq();
+#endif
+#if defined(CONFIG_OF)
+	of_irq_init(xlp_pic_irq_ids);
+#endif
+}

+ 2 - 1
arch/mips/netlogic/dts/xlp_evp.dts

@@ -76,10 +76,11 @@
 			};
 			};
 		};
 		};
 		pic: pic@4000 {
 		pic: pic@4000 {
-			interrupt-controller;
+			compatible = "netlogic,xlp-pic";
 			#address-cells = <0>;
 			#address-cells = <0>;
 			#interrupt-cells = <1>;
 			#interrupt-cells = <1>;
 			reg = <0 0x4000 0x200>;
 			reg = <0 0x4000 0x200>;
+			interrupt-controller;
 		};
 		};
 
 
 		nor_flash@1,0 {
 		nor_flash@1,0 {

+ 2 - 1
arch/mips/netlogic/dts/xlp_svp.dts

@@ -76,10 +76,11 @@
 			};
 			};
 		};
 		};
 		pic: pic@4000 {
 		pic: pic@4000 {
-			interrupt-controller;
+			compatible = "netlogic,xlp-pic";
 			#address-cells = <0>;
 			#address-cells = <0>;
 			#interrupt-cells = <1>;
 			#interrupt-cells = <1>;
 			reg = <0 0x4000 0x200>;
 			reg = <0 0x4000 0x200>;
+			interrupt-controller;
 		};
 		};
 
 
 		nor_flash@1,0 {
 		nor_flash@1,0 {

+ 1 - 1
arch/mips/netlogic/xlp/usb-init.c

@@ -119,7 +119,7 @@ static u64 xlp_usb_dmamask = ~(u32)0;
 static void nlm_usb_fixup_final(struct pci_dev *dev)
 static void nlm_usb_fixup_final(struct pci_dev *dev)
 {
 {
 	dev->dev.dma_mask		= &xlp_usb_dmamask;
 	dev->dev.dma_mask		= &xlp_usb_dmamask;
-	dev->dev.coherent_dma_mask	= DMA_BIT_MASK(64);
+	dev->dev.coherent_dma_mask	= DMA_BIT_MASK(32);
 	switch (dev->devfn) {
 	switch (dev->devfn) {
 	case 0x10:
 	case 0x10:
 		dev->irq = PIC_EHCI_0_IRQ;
 		dev->irq = PIC_EHCI_0_IRQ;

+ 9 - 1
arch/s390/include/asm/processor.h

@@ -91,7 +91,15 @@ struct thread_struct {
 #endif
 #endif
 };
 };
 
 
-#define PER_FLAG_NO_TE		1UL	/* Flag to disable transactions. */
+/* Flag to disable transactions. */
+#define PER_FLAG_NO_TE			1UL
+/* Flag to enable random transaction aborts. */
+#define PER_FLAG_TE_ABORT_RAND		2UL
+/* Flag to specify random transaction abort mode:
+ * - abort each transaction at a random instruction before TEND if set.
+ * - abort random transactions at a random instruction if cleared.
+ */
+#define PER_FLAG_TE_ABORT_RAND_TEND	4UL
 
 
 typedef struct thread_struct thread_struct;
 typedef struct thread_struct thread_struct;
 
 

+ 2 - 2
arch/s390/include/asm/switch_to.h

@@ -10,7 +10,7 @@
 #include <linux/thread_info.h>
 #include <linux/thread_info.h>
 
 
 extern struct task_struct *__switch_to(void *, void *);
 extern struct task_struct *__switch_to(void *, void *);
-extern void update_per_regs(struct task_struct *task);
+extern void update_cr_regs(struct task_struct *task);
 
 
 static inline void save_fp_regs(s390_fp_regs *fpregs)
 static inline void save_fp_regs(s390_fp_regs *fpregs)
 {
 {
@@ -86,7 +86,7 @@ static inline void restore_access_regs(unsigned int *acrs)
 		restore_fp_regs(&next->thread.fp_regs);			\
 		restore_fp_regs(&next->thread.fp_regs);			\
 		restore_access_regs(&next->thread.acrs[0]);		\
 		restore_access_regs(&next->thread.acrs[0]);		\
 		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
 		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
-		update_per_regs(next);					\
+		update_cr_regs(next);					\
 	}								\
 	}								\
 	prev = __switch_to(prev,next);					\
 	prev = __switch_to(prev,next);					\
 } while (0)
 } while (0)

+ 1 - 0
arch/s390/include/uapi/asm/ptrace.h

@@ -400,6 +400,7 @@ typedef struct
 #define PTRACE_POKE_SYSTEM_CALL	      0x5008
 #define PTRACE_POKE_SYSTEM_CALL	      0x5008
 #define PTRACE_ENABLE_TE	      0x5009
 #define PTRACE_ENABLE_TE	      0x5009
 #define PTRACE_DISABLE_TE	      0x5010
 #define PTRACE_DISABLE_TE	      0x5010
+#define PTRACE_TE_ABORT_RAND	      0x5011
 
 
 /*
 /*
  * PT_PROT definition is loosely based on hppa bsd definition in
  * PT_PROT definition is loosely based on hppa bsd definition in

+ 47 - 4
arch/s390/kernel/crash_dump.c

@@ -21,6 +21,48 @@
 #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
 #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
 #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
 #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
 
 
+
+/*
+ * Return physical address for virtual address
+ */
+static inline void *load_real_addr(void *addr)
+{
+	unsigned long real_addr;
+
+	asm volatile(
+		   "	lra     %0,0(%1)\n"
+		   "	jz	0f\n"
+		   "	la	%0,0\n"
+		   "0:"
+		   : "=a" (real_addr) : "a" (addr) : "cc");
+	return (void *)real_addr;
+}
+
+/*
+ * Copy up to one page to vmalloc or real memory
+ */
+static ssize_t copy_page_real(void *buf, void *src, size_t csize)
+{
+	size_t size;
+
+	if (is_vmalloc_addr(buf)) {
+		BUG_ON(csize >= PAGE_SIZE);
+		/* If buf is not page aligned, copy first part */
+		size = min(roundup(__pa(buf), PAGE_SIZE) - __pa(buf), csize);
+		if (size) {
+			if (memcpy_real(load_real_addr(buf), src, size))
+				return -EFAULT;
+			buf += size;
+			src += size;
+		}
+		/* Copy second part */
+		size = csize - size;
+		return (size) ? memcpy_real(load_real_addr(buf), src, size) : 0;
+	} else {
+		return memcpy_real(buf, src, csize);
+	}
+}
+
 /*
 /*
  * Copy one page from "oldmem"
  * Copy one page from "oldmem"
  *
  *
@@ -32,6 +74,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 			 size_t csize, unsigned long offset, int userbuf)
 			 size_t csize, unsigned long offset, int userbuf)
 {
 {
 	unsigned long src;
 	unsigned long src;
+	int rc;
 
 
 	if (!csize)
 	if (!csize)
 		return 0;
 		return 0;
@@ -43,11 +86,11 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 		 src < OLDMEM_BASE + OLDMEM_SIZE)
 		 src < OLDMEM_BASE + OLDMEM_SIZE)
 		src -= OLDMEM_BASE;
 		src -= OLDMEM_BASE;
 	if (userbuf)
 	if (userbuf)
-		copy_to_user_real((void __force __user *) buf, (void *) src,
-				  csize);
+		rc = copy_to_user_real((void __force __user *) buf,
+				       (void *) src, csize);
 	else
 	else
-		memcpy_real(buf, (void *) src, csize);
-	return csize;
+		rc = copy_page_real(buf, (void *) src, csize);
+	return (rc == 0) ? csize : rc;
 }
 }
 
 
 /*
 /*

+ 39 - 11
arch/s390/kernel/ptrace.c

@@ -47,7 +47,7 @@ enum s390_regset {
 	REGSET_GENERAL_EXTENDED,
 	REGSET_GENERAL_EXTENDED,
 };
 };
 
 
-void update_per_regs(struct task_struct *task)
+void update_cr_regs(struct task_struct *task)
 {
 {
 	struct pt_regs *regs = task_pt_regs(task);
 	struct pt_regs *regs = task_pt_regs(task);
 	struct thread_struct *thread = &task->thread;
 	struct thread_struct *thread = &task->thread;
@@ -56,17 +56,25 @@ void update_per_regs(struct task_struct *task)
 #ifdef CONFIG_64BIT
 #ifdef CONFIG_64BIT
 	/* Take care of the enable/disable of transactional execution. */
 	/* Take care of the enable/disable of transactional execution. */
 	if (MACHINE_HAS_TE) {
 	if (MACHINE_HAS_TE) {
-		unsigned long cr0, cr0_new;
+		unsigned long cr[3], cr_new[3];
 
 
-		__ctl_store(cr0, 0, 0);
-		/* set or clear transaction execution bits 8 and 9. */
+		__ctl_store(cr, 0, 2);
+		cr_new[1] = cr[1];
+		/* Set or clear transaction execution TXC/PIFO bits 8 and 9. */
 		if (task->thread.per_flags & PER_FLAG_NO_TE)
 		if (task->thread.per_flags & PER_FLAG_NO_TE)
-			cr0_new = cr0 & ~(3UL << 54);
+			cr_new[0] = cr[0] & ~(3UL << 54);
 		else
 		else
-			cr0_new = cr0 | (3UL << 54);
-		/* Only load control register 0 if necessary. */
-		if (cr0 != cr0_new)
-			__ctl_load(cr0_new, 0, 0);
+			cr_new[0] = cr[0] | (3UL << 54);
+		/* Set or clear transaction execution TDC bits 62 and 63. */
+		cr_new[2] = cr[2] & ~3UL;
+		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+				cr_new[2] |= 1UL;
+			else
+				cr_new[2] |= 2UL;
+		}
+		if (memcmp(&cr_new, &cr, sizeof(cr)))
+			__ctl_load(cr_new, 0, 2);
 	}
 	}
 #endif
 #endif
 	/* Copy user specified PER registers */
 	/* Copy user specified PER registers */
@@ -100,14 +108,14 @@ void user_enable_single_step(struct task_struct *task)
 {
 {
 	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
 	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
 	if (task == current)
 	if (task == current)
-		update_per_regs(task);
+		update_cr_regs(task);
 }
 }
 
 
 void user_disable_single_step(struct task_struct *task)
 void user_disable_single_step(struct task_struct *task)
 {
 {
 	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
 	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
 	if (task == current)
 	if (task == current)
-		update_per_regs(task);
+		update_cr_regs(task);
 }
 }
 
 
 /*
 /*
@@ -447,6 +455,26 @@ long arch_ptrace(struct task_struct *child, long request,
 		if (!MACHINE_HAS_TE)
 		if (!MACHINE_HAS_TE)
 			return -EIO;
 			return -EIO;
 		child->thread.per_flags |= PER_FLAG_NO_TE;
 		child->thread.per_flags |= PER_FLAG_NO_TE;
+		child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+		return 0;
+	case PTRACE_TE_ABORT_RAND:
+		if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+			return -EIO;
+		switch (data) {
+		case 0UL:
+			child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+			break;
+		case 1UL:
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND;
+			break;
+		case 2UL:
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+			child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND;
+			break;
+		default:
+			return -EINVAL;
+		}
 		return 0;
 		return 0;
 	default:
 	default:
 		/* Removing high order bit from addr (only for 31 bit). */
 		/* Removing high order bit from addr (only for 31 bit). */

+ 88 - 25
arch/s390/net/bpf_jit_comp.c

@@ -9,6 +9,8 @@
 #include <linux/netdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/if_vlan.h>
 #include <linux/filter.h>
 #include <linux/filter.h>
+#include <linux/random.h>
+#include <linux/init.h>
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/facility.h>
 #include <asm/facility.h>
@@ -221,6 +223,37 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
 	EMIT2(0x07fe);
 	EMIT2(0x07fe);
 }
 }
 
 
+/* Helper to find the offset of pkt_type in sk_buff
+ * Make sure its still a 3bit field starting at the MSBs within a byte.
+ */
+#define PKT_TYPE_MAX 0xe0
+static int pkt_type_offset;
+
+static int __init bpf_pkt_type_offset_init(void)
+{
+	struct sk_buff skb_probe = {
+		.pkt_type = ~0,
+	};
+	char *ct = (char *)&skb_probe;
+	int off;
+
+	pkt_type_offset = -1;
+	for (off = 0; off < sizeof(struct sk_buff); off++) {
+		if (!ct[off])
+			continue;
+		if (ct[off] == PKT_TYPE_MAX)
+			pkt_type_offset = off;
+		else {
+			/* Found non matching bit pattern, fix needed. */
+			WARN_ON_ONCE(1);
+			pkt_type_offset = -1;
+			return -1;
+		}
+	}
+	return 0;
+}
+device_initcall(bpf_pkt_type_offset_init);
+
 /*
 /*
  * make sure we dont leak kernel information to user
  * make sure we dont leak kernel information to user
  */
  */
@@ -720,6 +753,16 @@ load_abs:	if ((int) K < 0)
 			EMIT4_DISP(0x88500000, 12);
 			EMIT4_DISP(0x88500000, 12);
 		}
 		}
 		break;
 		break;
+	case BPF_S_ANC_PKTTYPE:
+		if (pkt_type_offset < 0)
+			goto out;
+		/* lhi %r5,0 */
+		EMIT4(0xa7580000);
+		/* ic %r5,<d(pkt_type_offset)>(%r2) */
+		EMIT4_DISP(0x43502000, pkt_type_offset);
+		/* srl %r5,5 */
+		EMIT4_DISP(0x88500000, 5);
+		break;
 	case BPF_S_ANC_CPU: /* A = smp_processor_id() */
 	case BPF_S_ANC_CPU: /* A = smp_processor_id() */
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 		/* l %r5,<d(cpu_nr)> */
 		/* l %r5,<d(cpu_nr)> */
@@ -738,8 +781,41 @@ load_abs:	if ((int) K < 0)
 	return -1;
 	return -1;
 }
 }
 
 
+/*
+ * Note: for security reasons, bpf code will follow a randomly
+ *	 sized amount of illegal instructions.
+ */
+struct bpf_binary_header {
+	unsigned int pages;
+	u8 image[];
+};
+
+static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
+						  u8 **image_ptr)
+{
+	struct bpf_binary_header *header;
+	unsigned int sz, hole;
+
+	/* Most BPF filters are really small, but if some of them fill a page,
+	 * allow at least 128 extra bytes for illegal instructions.
+	 */
+	sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE);
+	header = module_alloc(sz);
+	if (!header)
+		return NULL;
+	memset(header, 0, sz);
+	header->pages = sz / PAGE_SIZE;
+	hole = sz - bpfsize + sizeof(*header);
+	/* Insert random number of illegal instructions before BPF code
+	 * and make sure the first instruction starts at an even address.
+	 */
+	*image_ptr = &header->image[(prandom_u32() % hole) & -2];
+	return header;
+}
+
 void bpf_jit_compile(struct sk_filter *fp)
 void bpf_jit_compile(struct sk_filter *fp)
 {
 {
+	struct bpf_binary_header *header = NULL;
 	unsigned long size, prg_len, lit_len;
 	unsigned long size, prg_len, lit_len;
 	struct bpf_jit jit, cjit;
 	struct bpf_jit jit, cjit;
 	unsigned int *addrs;
 	unsigned int *addrs;
@@ -772,12 +848,11 @@ void bpf_jit_compile(struct sk_filter *fp)
 		} else if (jit.prg == cjit.prg && jit.lit == cjit.lit) {
 		} else if (jit.prg == cjit.prg && jit.lit == cjit.lit) {
 			prg_len = jit.prg - jit.start;
 			prg_len = jit.prg - jit.start;
 			lit_len = jit.lit - jit.mid;
 			lit_len = jit.lit - jit.mid;
-			size = max_t(unsigned long, prg_len + lit_len,
-				     sizeof(struct work_struct));
+			size = prg_len + lit_len;
 			if (size >= BPF_SIZE_MAX)
 			if (size >= BPF_SIZE_MAX)
 				goto out;
 				goto out;
-			jit.start = module_alloc(size);
-			if (!jit.start)
+			header = bpf_alloc_binary(size, &jit.start);
+			if (!header)
 				goto out;
 				goto out;
 			jit.prg = jit.mid = jit.start + prg_len;
 			jit.prg = jit.mid = jit.start + prg_len;
 			jit.lit = jit.end = jit.start + prg_len + lit_len;
 			jit.lit = jit.end = jit.start + prg_len + lit_len;
@@ -788,37 +863,25 @@ void bpf_jit_compile(struct sk_filter *fp)
 		cjit = jit;
 		cjit = jit;
 	}
 	}
 	if (bpf_jit_enable > 1) {
 	if (bpf_jit_enable > 1) {
-		pr_err("flen=%d proglen=%lu pass=%d image=%p\n",
-		       fp->len, jit.end - jit.start, pass, jit.start);
-		if (jit.start) {
-			printk(KERN_ERR "JIT code:\n");
+		bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start);
+		if (jit.start)
 			print_fn_code(jit.start, jit.mid - jit.start);
 			print_fn_code(jit.start, jit.mid - jit.start);
-			print_hex_dump(KERN_ERR, "JIT literals:\n",
-				       DUMP_PREFIX_ADDRESS, 16, 1,
-				       jit.mid, jit.end - jit.mid, false);
-		}
 	}
 	}
-	if (jit.start)
+	if (jit.start) {
+		set_memory_ro((unsigned long)header, header->pages);
 		fp->bpf_func = (void *) jit.start;
 		fp->bpf_func = (void *) jit.start;
+	}
 out:
 out:
 	kfree(addrs);
 	kfree(addrs);
 }
 }
 
 
-static void jit_free_defer(struct work_struct *arg)
-{
-	module_free(NULL, arg);
-}
-
-/* run from softirq, we must use a work_struct to call
- * module_free() from process context
- */
 void bpf_jit_free(struct sk_filter *fp)
 void bpf_jit_free(struct sk_filter *fp)
 {
 {
-	struct work_struct *work;
+	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+	struct bpf_binary_header *header = (void *)addr;
 
 
 	if (fp->bpf_func == sk_run_filter)
 	if (fp->bpf_func == sk_run_filter)
 		return;
 		return;
-	work = (struct work_struct *)fp->bpf_func;
-	INIT_WORK(work, jit_free_defer);
-	schedule_work(work);
+	set_memory_rw(addr, header->pages);
+	module_free(NULL, header);
 }
 }

+ 4 - 4
arch/um/include/shared/frame_kern.h

@@ -6,13 +6,13 @@
 #ifndef __FRAME_KERN_H_
 #ifndef __FRAME_KERN_H_
 #define __FRAME_KERN_H_
 #define __FRAME_KERN_H_
 
 
-extern int setup_signal_stack_sc(unsigned long stack_top, int sig, 
+extern int setup_signal_stack_sc(unsigned long stack_top, int sig,
 				 struct k_sigaction *ka,
 				 struct k_sigaction *ka,
-				 struct pt_regs *regs, 
+				 struct pt_regs *regs,
 				 sigset_t *mask);
 				 sigset_t *mask);
-extern int setup_signal_stack_si(unsigned long stack_top, int sig, 
+extern int setup_signal_stack_si(unsigned long stack_top, int sig,
 				 struct k_sigaction *ka,
 				 struct k_sigaction *ka,
-				 struct pt_regs *regs, siginfo_t *info, 
+				 struct pt_regs *regs, struct siginfo *info,
 				 sigset_t *mask);
 				 sigset_t *mask);
 
 
 #endif
 #endif

+ 2 - 2
arch/um/kernel/signal.c

@@ -19,7 +19,7 @@ EXPORT_SYMBOL(unblock_signals);
  * OK, we're invoking a handler
  * OK, we're invoking a handler
  */
  */
 static void handle_signal(struct pt_regs *regs, unsigned long signr,
 static void handle_signal(struct pt_regs *regs, unsigned long signr,
-			 struct k_sigaction *ka, siginfo_t *info)
+			 struct k_sigaction *ka, struct siginfo *info)
 {
 {
 	sigset_t *oldset = sigmask_to_save();
 	sigset_t *oldset = sigmask_to_save();
 	int singlestep = 0;
 	int singlestep = 0;
@@ -71,7 +71,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 static int kern_do_signal(struct pt_regs *regs)
 static int kern_do_signal(struct pt_regs *regs)
 {
 {
 	struct k_sigaction ka_copy;
 	struct k_sigaction ka_copy;
-	siginfo_t info;
+	struct siginfo info;
 	int sig, handled_sig = 0;
 	int sig, handled_sig = 0;
 
 
 	while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) {
 	while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) {

+ 1 - 1
arch/um/kernel/skas/mmu.c

@@ -123,7 +123,7 @@ void uml_setup_stubs(struct mm_struct *mm)
 	/* dup_mmap already holds mmap_sem */
 	/* dup_mmap already holds mmap_sem */
 	err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START,
 	err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START,
 				      VM_READ | VM_MAYREAD | VM_EXEC |
 				      VM_READ | VM_MAYREAD | VM_EXEC |
-				      VM_MAYEXEC | VM_DONTCOPY,
+				      VM_MAYEXEC | VM_DONTCOPY | VM_PFNMAP,
 				      mm->context.stub_pages);
 				      mm->context.stub_pages);
 	if (err) {
 	if (err) {
 		printk(KERN_ERR "install_special_mapping returned %d\n", err);
 		printk(KERN_ERR "install_special_mapping returned %d\n", err);

+ 1 - 1
arch/um/kernel/skas/uaccess.c

@@ -254,6 +254,6 @@ int strnlen_user(const void __user *str, int len)
 	n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count);
 	n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count);
 	if (n == 0)
 	if (n == 0)
 		return count + 1;
 		return count + 1;
-	return -EFAULT;
+	return 0;
 }
 }
 EXPORT_SYMBOL(strnlen_user);
 EXPORT_SYMBOL(strnlen_user);

+ 189 - 41
arch/um/os-Linux/mem.c

@@ -52,6 +52,25 @@ static void __init find_tempdir(void)
 	strcat(tempdir, "/");
 	strcat(tempdir, "/");
 }
 }
 
 
+/*
+ * Remove bytes from the front of the buffer and refill it so that if there's a
+ * partial string that we care about, it will be completed, and we can recognize
+ * it.
+ */
+static int pop(int fd, char *buf, size_t size, size_t npop)
+{
+	ssize_t n;
+	size_t len = strlen(&buf[npop]);
+
+	memmove(buf, &buf[npop], len + 1);
+	n = read(fd, &buf[len], size - len - 1);
+	if (n < 0)
+		return -errno;
+
+	buf[len + n] = '\0';
+	return 1;
+}
+
 /*
 /*
  * This will return 1, with the first character in buf being the
  * This will return 1, with the first character in buf being the
  * character following the next instance of c in the file.  This will
  * character following the next instance of c in the file.  This will
@@ -61,7 +80,6 @@ static void __init find_tempdir(void)
 static int next(int fd, char *buf, size_t size, char c)
 static int next(int fd, char *buf, size_t size, char c)
 {
 {
 	ssize_t n;
 	ssize_t n;
-	size_t len;
 	char *ptr;
 	char *ptr;
 
 
 	while ((ptr = strchr(buf, c)) == NULL) {
 	while ((ptr = strchr(buf, c)) == NULL) {
@@ -74,20 +92,129 @@ static int next(int fd, char *buf, size_t size, char c)
 		buf[n] = '\0';
 		buf[n] = '\0';
 	}
 	}
 
 
-	ptr++;
-	len = strlen(ptr);
-	memmove(buf, ptr, len + 1);
+	return pop(fd, buf, size, ptr - buf + 1);
+}
+
+/*
+ * Decode an octal-escaped and space-terminated path of the form used by
+ * /proc/mounts. May be used to decode a path in-place. "out" must be at least
+ * as large as the input. The output is always null-terminated. "len" gets the
+ * length of the output, excluding the trailing null. Returns 0 if a full path
+ * was successfully decoded, otherwise an error.
+ */
+static int decode_path(const char *in, char *out, size_t *len)
+{
+	char *first = out;
+	int c;
+	int i;
+	int ret = -EINVAL;
+	while (1) {
+		switch (*in) {
+		case '\0':
+			goto out;
+
+		case ' ':
+			ret = 0;
+			goto out;
+
+		case '\\':
+			in++;
+			c = 0;
+			for (i = 0; i < 3; i++) {
+				if (*in < '0' || *in > '7')
+					goto out;
+				c = (c << 3) | (*in++ - '0');
+			}
+			*(unsigned char *)out++ = (unsigned char) c;
+			break;
+
+		default:
+			*out++ = *in++;
+			break;
+		}
+	}
+
+out:
+	*out = '\0';
+	*len = out - first;
+	return ret;
+}
+
+/*
+ * Computes the length of s when encoded with three-digit octal escape sequences
+ * for the characters in chars.
+ */
+static size_t octal_encoded_length(const char *s, const char *chars)
+{
+	size_t len = strlen(s);
+	while ((s = strpbrk(s, chars)) != NULL) {
+		len += 3;
+		s++;
+	}
+
+	return len;
+}
+
+enum {
+	OUTCOME_NOTHING_MOUNTED,
+	OUTCOME_TMPFS_MOUNT,
+	OUTCOME_NON_TMPFS_MOUNT,
+};
+
+/* Read a line of /proc/mounts data looking for a tmpfs mount at "path". */
+static int read_mount(int fd, char *buf, size_t bufsize, const char *path,
+		      int *outcome)
+{
+	int found;
+	int match;
+	char *space;
+	size_t len;
+
+	enum {
+		MATCH_NONE,
+		MATCH_EXACT,
+		MATCH_PARENT,
+	};
+
+	found = next(fd, buf, bufsize, ' ');
+	if (found != 1)
+		return found;
 
 
 	/*
 	/*
-	 * Refill the buffer so that if there's a partial string that we care
-	 * about, it will be completed, and we can recognize it.
+	 * If there's no following space in the buffer, then this path is
+	 * truncated, so it can't be the one we're looking for.
 	 */
 	 */
-	n = read(fd, &buf[len], size - len - 1);
-	if (n < 0)
-		return -errno;
+	space = strchr(buf, ' ');
+	if (space) {
+		match = MATCH_NONE;
+		if (!decode_path(buf, buf, &len)) {
+			if (!strcmp(buf, path))
+				match = MATCH_EXACT;
+			else if (!strncmp(buf, path, len)
+				 && (path[len] == '/' || !strcmp(buf, "/")))
+				match = MATCH_PARENT;
+		}
+
+		found = pop(fd, buf, bufsize, space - buf + 1);
+		if (found != 1)
+			return found;
+
+		switch (match) {
+		case MATCH_EXACT:
+			if (!strncmp(buf, "tmpfs", strlen("tmpfs")))
+				*outcome = OUTCOME_TMPFS_MOUNT;
+			else
+				*outcome = OUTCOME_NON_TMPFS_MOUNT;
+			break;
 
 
-	buf[len + n] = '\0';
-	return 1;
+		case MATCH_PARENT:
+			/* This mount obscures any previous ones. */
+			*outcome = OUTCOME_NOTHING_MOUNTED;
+			break;
+		}
+	}
+
+	return next(fd, buf, bufsize, '\n');
 }
 }
 
 
 /* which_tmpdir is called only during early boot */
 /* which_tmpdir is called only during early boot */
@@ -106,8 +233,12 @@ static int checked_tmpdir = 0;
  */
  */
 static void which_tmpdir(void)
 static void which_tmpdir(void)
 {
 {
-	int fd, found;
-	char buf[128] = { '\0' };
+	int fd;
+	int found;
+	int outcome;
+	char *path;
+	char *buf;
+	size_t bufsize;
 
 
 	if (checked_tmpdir)
 	if (checked_tmpdir)
 		return;
 		return;
@@ -116,49 +247,66 @@ static void which_tmpdir(void)
 
 
 	printf("Checking for tmpfs mount on /dev/shm...");
 	printf("Checking for tmpfs mount on /dev/shm...");
 
 
+	path = realpath("/dev/shm", NULL);
+	if (!path) {
+		printf("failed to check real path, errno = %d\n", errno);
+		return;
+	}
+	printf("%s...", path);
+
+	/*
+	 * The buffer needs to be able to fit the full octal-escaped path, a
+	 * space, and a trailing null in order to successfully decode it.
+	 */
+	bufsize = octal_encoded_length(path, " \t\n\\") + 2;
+
+	if (bufsize < 128)
+		bufsize = 128;
+
+	buf = malloc(bufsize);
+	if (!buf) {
+		printf("malloc failed, errno = %d\n", errno);
+		goto out;
+	}
+	buf[0] = '\0';
+
 	fd = open("/proc/mounts", O_RDONLY);
 	fd = open("/proc/mounts", O_RDONLY);
 	if (fd < 0) {
 	if (fd < 0) {
 		printf("failed to open /proc/mounts, errno = %d\n", errno);
 		printf("failed to open /proc/mounts, errno = %d\n", errno);
-		return;
+		goto out1;
 	}
 	}
 
 
+	outcome = OUTCOME_NOTHING_MOUNTED;
 	while (1) {
 	while (1) {
-		found = next(fd, buf, ARRAY_SIZE(buf), ' ');
-		if (found != 1)
-			break;
-
-		if (!strncmp(buf, "/dev/shm", strlen("/dev/shm")))
-			goto found;
-
-		found = next(fd, buf, ARRAY_SIZE(buf), '\n');
+		found = read_mount(fd, buf, bufsize, path, &outcome);
 		if (found != 1)
 		if (found != 1)
 			break;
 			break;
 	}
 	}
 
 
-err:
-	if (found == 0)
-		printf("nothing mounted on /dev/shm\n");
-	else if (found < 0)
+	if (found < 0) {
 		printf("read returned errno %d\n", -found);
 		printf("read returned errno %d\n", -found);
+	} else {
+		switch (outcome) {
+		case OUTCOME_TMPFS_MOUNT:
+			printf("OK\n");
+			default_tmpdir = "/dev/shm";
+			break;
 
 
-out:
-	close(fd);
-
-	return;
-
-found:
-	found = next(fd, buf, ARRAY_SIZE(buf), ' ');
-	if (found != 1)
-		goto err;
+		case OUTCOME_NON_TMPFS_MOUNT:
+			printf("not tmpfs\n");
+			break;
 
 
-	if (strncmp(buf, "tmpfs", strlen("tmpfs"))) {
-		printf("not tmpfs\n");
-		goto out;
+		default:
+			printf("nothing mounted on /dev/shm\n");
+			break;
+		}
 	}
 	}
 
 
-	printf("OK\n");
-	default_tmpdir = "/dev/shm";
-	goto out;
+	close(fd);
+out1:
+	free(buf);
+out:
+	free(path);
 }
 }
 
 
 static int __init make_tempfile(const char *template, char **out_tempname,
 static int __init make_tempfile(const char *template, char **out_tempname,

+ 4 - 4
arch/um/os-Linux/signal.c

@@ -25,7 +25,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
 	[SIGIO]		= sigio_handler,
 	[SIGIO]		= sigio_handler,
 	[SIGVTALRM]	= timer_handler };
 	[SIGVTALRM]	= timer_handler };
 
 
-static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc)
+static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
 {
 {
 	struct uml_pt_regs r;
 	struct uml_pt_regs r;
 	int save_errno = errno;
 	int save_errno = errno;
@@ -61,7 +61,7 @@ static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc)
 static int signals_enabled;
 static int signals_enabled;
 static unsigned int signals_pending;
 static unsigned int signals_pending;
 
 
-void sig_handler(int sig, siginfo_t *si, mcontext_t *mc)
+void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
 {
 {
 	int enabled;
 	int enabled;
 
 
@@ -120,7 +120,7 @@ void set_sigstack(void *sig_stack, int size)
 		panic("enabling signal stack failed, errno = %d\n", errno);
 		panic("enabling signal stack failed, errno = %d\n", errno);
 }
 }
 
 
-static void (*handlers[_NSIG])(int sig, siginfo_t *si, mcontext_t *mc) = {
+static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
 	[SIGSEGV] = sig_handler,
 	[SIGSEGV] = sig_handler,
 	[SIGBUS] = sig_handler,
 	[SIGBUS] = sig_handler,
 	[SIGILL] = sig_handler,
 	[SIGILL] = sig_handler,
@@ -162,7 +162,7 @@ static void hard_handler(int sig, siginfo_t *si, void *p)
 		while ((sig = ffs(pending)) != 0){
 		while ((sig = ffs(pending)) != 0){
 			sig--;
 			sig--;
 			pending &= ~(1 << sig);
 			pending &= ~(1 << sig);
-			(*handlers[sig])(sig, si, mc);
+			(*handlers[sig])(sig, (struct siginfo *)si, mc);
 		}
 		}
 
 
 		/*
 		/*

+ 12 - 7
arch/um/os-Linux/skas/process.c

@@ -54,7 +54,7 @@ static int ptrace_dump_regs(int pid)
 
 
 void wait_stub_done(int pid)
 void wait_stub_done(int pid)
 {
 {
-	int n, status, err;
+	int n, status, err, bad_stop = 0;
 
 
 	while (1) {
 	while (1) {
 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
@@ -74,6 +74,8 @@ void wait_stub_done(int pid)
 
 
 	if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0)
 	if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0)
 		return;
 		return;
+	else
+		bad_stop = 1;
 
 
 bad_wait:
 bad_wait:
 	err = ptrace_dump_regs(pid);
 	err = ptrace_dump_regs(pid);
@@ -83,7 +85,10 @@ void wait_stub_done(int pid)
 	printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, "
 	printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, "
 	       "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno,
 	       "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno,
 	       status);
 	       status);
-	fatal_sigsegv();
+	if (bad_stop)
+		kill(pid, SIGKILL);
+	else
+		fatal_sigsegv();
 }
 }
 
 
 extern unsigned long current_stub_stack(void);
 extern unsigned long current_stub_stack(void);
@@ -409,7 +414,7 @@ void userspace(struct uml_pt_regs *regs)
 		if (WIFSTOPPED(status)) {
 		if (WIFSTOPPED(status)) {
 			int sig = WSTOPSIG(status);
 			int sig = WSTOPSIG(status);
 
 
-			ptrace(PTRACE_GETSIGINFO, pid, 0, &si);
+			ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
 
 
 			switch (sig) {
 			switch (sig) {
 			case SIGSEGV:
 			case SIGSEGV:
@@ -417,7 +422,7 @@ void userspace(struct uml_pt_regs *regs)
 				    !ptrace_faultinfo) {
 				    !ptrace_faultinfo) {
 					get_skas_faultinfo(pid,
 					get_skas_faultinfo(pid,
 							   &regs->faultinfo);
 							   &regs->faultinfo);
-					(*sig_info[SIGSEGV])(SIGSEGV, &si,
+					(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
 							     regs);
 							     regs);
 				}
 				}
 				else handle_segv(pid, regs);
 				else handle_segv(pid, regs);
@@ -426,14 +431,14 @@ void userspace(struct uml_pt_regs *regs)
 			        handle_trap(pid, regs, local_using_sysemu);
 			        handle_trap(pid, regs, local_using_sysemu);
 				break;
 				break;
 			case SIGTRAP:
 			case SIGTRAP:
-				relay_signal(SIGTRAP, &si, regs);
+				relay_signal(SIGTRAP, (struct siginfo *)&si, regs);
 				break;
 				break;
 			case SIGVTALRM:
 			case SIGVTALRM:
 				now = os_nsecs();
 				now = os_nsecs();
 				if (now < nsecs)
 				if (now < nsecs)
 					break;
 					break;
 				block_signals();
 				block_signals();
-				(*sig_info[sig])(sig, &si, regs);
+				(*sig_info[sig])(sig, (struct siginfo *)&si, regs);
 				unblock_signals();
 				unblock_signals();
 				nsecs = timer.it_value.tv_sec *
 				nsecs = timer.it_value.tv_sec *
 					UM_NSEC_PER_SEC +
 					UM_NSEC_PER_SEC +
@@ -447,7 +452,7 @@ void userspace(struct uml_pt_regs *regs)
 			case SIGFPE:
 			case SIGFPE:
 			case SIGWINCH:
 			case SIGWINCH:
 				block_signals();
 				block_signals();
-				(*sig_info[sig])(sig, &si, regs);
+				(*sig_info[sig])(sig, (struct siginfo *)&si, regs);
 				unblock_signals();
 				unblock_signals();
 				break;
 				break;
 			default:
 			default:

+ 7 - 0
arch/x86/kvm/mmu.c

@@ -2810,6 +2810,13 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 
 
 static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code)
 static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code)
 {
 {
+	/*
+	 * Do not fix the mmio spte with invalid generation number which
+	 * need to be updated by slow page fault path.
+	 */
+	if (unlikely(error_code & PFERR_RSVD_MASK))
+		return false;
+
 	/*
 	/*
 	 * #PF can be fast only if the shadow page table is present and it
 	 * #PF can be fast only if the shadow page table is present and it
 	 * is caused by write-protect, that means we just need change the
 	 * is caused by write-protect, that means we just need change the

+ 0 - 1
arch/x86/um/signal.c

@@ -508,7 +508,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 {
 {
 	struct rt_sigframe __user *frame;
 	struct rt_sigframe __user *frame;
 	int err = 0;
 	int err = 0;
-	struct task_struct *me = current;
 
 
 	frame = (struct rt_sigframe __user *)
 	frame = (struct rt_sigframe __user *)
 		round_down(stack_top - sizeof(struct rt_sigframe), 16);
 		round_down(stack_top - sizeof(struct rt_sigframe), 16);

+ 1 - 0
drivers/acpi/acpi_memhotplug.c

@@ -323,6 +323,7 @@ static int acpi_memory_device_add(struct acpi_device *device,
 	/* Get the range from the _CRS */
 	/* Get the range from the _CRS */
 	result = acpi_memory_get_device_resources(mem_device);
 	result = acpi_memory_get_device_resources(mem_device);
 	if (result) {
 	if (result) {
+		device->driver_data = NULL;
 		kfree(mem_device);
 		kfree(mem_device);
 		return result;
 		return result;
 	}
 	}

+ 0 - 13
drivers/acpi/acpica/aclocal.h

@@ -931,19 +931,6 @@ struct acpi_bit_register_info {
 
 
 /* Structs and definitions for _OSI support and I/O port validation */
 /* Structs and definitions for _OSI support and I/O port validation */
 
 
-#define ACPI_OSI_WIN_2000               0x01
-#define ACPI_OSI_WIN_XP                 0x02
-#define ACPI_OSI_WIN_XP_SP1             0x03
-#define ACPI_OSI_WINSRV_2003            0x04
-#define ACPI_OSI_WIN_XP_SP2             0x05
-#define ACPI_OSI_WINSRV_2003_SP1        0x06
-#define ACPI_OSI_WIN_VISTA              0x07
-#define ACPI_OSI_WINSRV_2008            0x08
-#define ACPI_OSI_WIN_VISTA_SP1          0x09
-#define ACPI_OSI_WIN_VISTA_SP2          0x0A
-#define ACPI_OSI_WIN_7                  0x0B
-#define ACPI_OSI_WIN_8                  0x0C
-
 #define ACPI_ALWAYS_ILLEGAL             0x00
 #define ACPI_ALWAYS_ILLEGAL             0x00
 
 
 struct acpi_interface_info {
 struct acpi_interface_info {

+ 11 - 0
drivers/acpi/internal.h

@@ -164,4 +164,15 @@ struct platform_device;
 int acpi_create_platform_device(struct acpi_device *adev,
 int acpi_create_platform_device(struct acpi_device *adev,
 				const struct acpi_device_id *id);
 				const struct acpi_device_id *id);
 
 
+/*--------------------------------------------------------------------------
+					Video
+  -------------------------------------------------------------------------- */
+#if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
+bool acpi_video_backlight_quirks(void);
+bool acpi_video_verify_backlight_support(void);
+#else
+static inline bool acpi_video_backlight_quirks(void) { return false; }
+static inline bool acpi_video_verify_backlight_support(void) { return false; }
+#endif
+
 #endif /* _ACPI_INTERNAL_H_ */
 #endif /* _ACPI_INTERNAL_H_ */

+ 9 - 4
drivers/acpi/scan.c

@@ -352,10 +352,12 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source)
 	mutex_lock(&acpi_scan_lock);
 	mutex_lock(&acpi_scan_lock);
 	lock_device_hotplug();
 	lock_device_hotplug();
 
 
-	acpi_bus_get_device(handle, &device);
-	if (device) {
-		dev_warn(&device->dev, "Attempt to re-insert\n");
-		goto out;
+	if (ost_source != ACPI_NOTIFY_BUS_CHECK) {
+		acpi_bus_get_device(handle, &device);
+		if (device) {
+			dev_warn(&device->dev, "Attempt to re-insert\n");
+			goto out;
+		}
 	}
 	}
 	acpi_evaluate_hotplug_ost(handle, ost_source,
 	acpi_evaluate_hotplug_ost(handle, ost_source,
 				  ACPI_OST_SC_INSERT_IN_PROGRESS, NULL);
 				  ACPI_OST_SC_INSERT_IN_PROGRESS, NULL);
@@ -1981,6 +1983,9 @@ static acpi_status acpi_bus_device_attach(acpi_handle handle, u32 lvl_not_used,
 	if (acpi_bus_get_device(handle, &device))
 	if (acpi_bus_get_device(handle, &device))
 		return AE_CTRL_DEPTH;
 		return AE_CTRL_DEPTH;
 
 
+	if (device->handler)
+		return AE_OK;
+
 	ret = acpi_scan_attach_handler(device);
 	ret = acpi_scan_attach_handler(device);
 	if (ret)
 	if (ret)
 		return ret > 0 ? AE_OK : AE_CTRL_DEPTH;
 		return ret > 0 ? AE_OK : AE_CTRL_DEPTH;

+ 85 - 13
drivers/acpi/video.c

@@ -44,6 +44,8 @@
 #include <linux/suspend.h>
 #include <linux/suspend.h>
 #include <acpi/video.h>
 #include <acpi/video.h>
 
 
+#include "internal.h"
+
 #define PREFIX "ACPI: "
 #define PREFIX "ACPI: "
 
 
 #define ACPI_VIDEO_BUS_NAME		"Video Bus"
 #define ACPI_VIDEO_BUS_NAME		"Video Bus"
@@ -448,6 +450,14 @@ static struct dmi_system_id video_dmi_table[] __initdata = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13 - 2000 Notebook PC"),
 		DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13 - 2000 Notebook PC"),
 		},
 		},
 	},
 	},
+	{
+	 .callback = video_ignore_initial_backlight,
+	 .ident = "Fujitsu E753",
+	 .matches = {
+		DMI_MATCH(DMI_BOARD_VENDOR, "FUJITSU"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E753"),
+		},
+	},
 	{
 	{
 	 .callback = video_ignore_initial_backlight,
 	 .callback = video_ignore_initial_backlight,
 	 .ident = "HP Pavilion dm4",
 	 .ident = "HP Pavilion dm4",
@@ -898,7 +908,10 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 		device->cap._DDC = 1;
 		device->cap._DDC = 1;
 	}
 	}
 
 
-	if (acpi_video_backlight_support()) {
+	if (acpi_video_init_brightness(device))
+		return;
+
+	if (acpi_video_verify_backlight_support()) {
 		struct backlight_properties props;
 		struct backlight_properties props;
 		struct pci_dev *pdev;
 		struct pci_dev *pdev;
 		acpi_handle acpi_parent;
 		acpi_handle acpi_parent;
@@ -907,9 +920,6 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 		static int count = 0;
 		static int count = 0;
 		char *name;
 		char *name;
 
 
-		result = acpi_video_init_brightness(device);
-		if (result)
-			return;
 		name = kasprintf(GFP_KERNEL, "acpi_video%d", count);
 		name = kasprintf(GFP_KERNEL, "acpi_video%d", count);
 		if (!name)
 		if (!name)
 			return;
 			return;
@@ -969,6 +979,11 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 		if (result)
 		if (result)
 			printk(KERN_ERR PREFIX "Create sysfs link\n");
 			printk(KERN_ERR PREFIX "Create sysfs link\n");
 
 
+	} else {
+		/* Remove the brightness object. */
+		kfree(device->brightness->levels);
+		kfree(device->brightness);
+		device->brightness = NULL;
 	}
 	}
 }
 }
 
 
@@ -1351,8 +1366,8 @@ acpi_video_switch_brightness(struct acpi_video_device *device, int event)
 	unsigned long long level_current, level_next;
 	unsigned long long level_current, level_next;
 	int result = -EINVAL;
 	int result = -EINVAL;
 
 
-	/* no warning message if acpi_backlight=vendor is used */
-	if (!acpi_video_backlight_support())
+	/* no warning message if acpi_backlight=vendor or a quirk is used */
+	if (!acpi_video_verify_backlight_support())
 		return 0;
 		return 0;
 
 
 	if (!device->brightness)
 	if (!device->brightness)
@@ -1532,14 +1547,20 @@ static int acpi_video_bus_put_devices(struct acpi_video_bus *video)
 
 
 /* acpi_video interface */
 /* acpi_video interface */
 
 
+/*
+ * Win8 requires setting bit2 of _DOS to let firmware know it shouldn't
+ * preform any automatic brightness change on receiving a notification.
+ */
 static int acpi_video_bus_start_devices(struct acpi_video_bus *video)
 static int acpi_video_bus_start_devices(struct acpi_video_bus *video)
 {
 {
-	return acpi_video_bus_DOS(video, 0, 0);
+	return acpi_video_bus_DOS(video, 0,
+				  acpi_video_backlight_quirks() ? 1 : 0);
 }
 }
 
 
 static int acpi_video_bus_stop_devices(struct acpi_video_bus *video)
 static int acpi_video_bus_stop_devices(struct acpi_video_bus *video)
 {
 {
-	return acpi_video_bus_DOS(video, 0, 1);
+	return acpi_video_bus_DOS(video, 0,
+				  acpi_video_backlight_quirks() ? 0 : 1);
 }
 }
 
 
 static void acpi_video_bus_notify(struct acpi_device *device, u32 event)
 static void acpi_video_bus_notify(struct acpi_device *device, u32 event)
@@ -1854,6 +1875,46 @@ static int acpi_video_bus_remove(struct acpi_device *device)
 	return 0;
 	return 0;
 }
 }
 
 
+static acpi_status video_unregister_backlight(acpi_handle handle, u32 lvl,
+					      void *context, void **rv)
+{
+	struct acpi_device *acpi_dev;
+	struct acpi_video_bus *video;
+	struct acpi_video_device *dev, *next;
+
+	if (acpi_bus_get_device(handle, &acpi_dev))
+		return AE_OK;
+
+	if (acpi_match_device_ids(acpi_dev, video_device_ids))
+		return AE_OK;
+
+	video = acpi_driver_data(acpi_dev);
+	if (!video)
+		return AE_OK;
+
+	acpi_video_bus_stop_devices(video);
+	mutex_lock(&video->device_list_lock);
+	list_for_each_entry_safe(dev, next, &video->video_device_list, entry) {
+		if (dev->backlight) {
+			backlight_device_unregister(dev->backlight);
+			dev->backlight = NULL;
+			kfree(dev->brightness->levels);
+			kfree(dev->brightness);
+		}
+		if (dev->cooling_dev) {
+			sysfs_remove_link(&dev->dev->dev.kobj,
+					  "thermal_cooling");
+			sysfs_remove_link(&dev->cooling_dev->device.kobj,
+					  "device");
+			thermal_cooling_device_unregister(dev->cooling_dev);
+			dev->cooling_dev = NULL;
+		}
+	}
+	mutex_unlock(&video->device_list_lock);
+	acpi_video_bus_start_devices(video);
+	return AE_OK;
+}
+
 static int __init is_i740(struct pci_dev *dev)
 static int __init is_i740(struct pci_dev *dev)
 {
 {
 	if (dev->device == 0x00D1)
 	if (dev->device == 0x00D1)
@@ -1885,14 +1946,25 @@ static int __init intel_opregion_present(void)
 	return opregion;
 	return opregion;
 }
 }
 
 
-int acpi_video_register(void)
+int __acpi_video_register(bool backlight_quirks)
 {
 {
-	int result = 0;
+	bool no_backlight;
+	int result;
+
+	no_backlight = backlight_quirks ? acpi_video_backlight_quirks() : false;
+
 	if (register_count) {
 	if (register_count) {
 		/*
 		/*
-		 * if the function of acpi_video_register is already called,
-		 * don't register the acpi_vide_bus again and return no error.
+		 * If acpi_video_register() has been called already, don't try
+		 * to register acpi_video_bus, but unregister backlight devices
+		 * if no backlight support is requested.
 		 */
 		 */
+		if (no_backlight)
+			acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+					    ACPI_UINT32_MAX,
+					    video_unregister_backlight,
+					    NULL, NULL, NULL);
+
 		return 0;
 		return 0;
 	}
 	}
 
 
@@ -1908,7 +1980,7 @@ int acpi_video_register(void)
 
 
 	return 0;
 	return 0;
 }
 }
-EXPORT_SYMBOL(acpi_video_register);
+EXPORT_SYMBOL(__acpi_video_register);
 
 
 void acpi_video_unregister(void)
 void acpi_video_unregister(void)
 {
 {

+ 21 - 0
drivers/acpi/video_detect.c

@@ -38,6 +38,8 @@
 #include <linux/dmi.h>
 #include <linux/dmi.h>
 #include <linux/pci.h>
 #include <linux/pci.h>
 
 
+#include "internal.h"
+
 #define PREFIX "ACPI: "
 #define PREFIX "ACPI: "
 
 
 ACPI_MODULE_NAME("video");
 ACPI_MODULE_NAME("video");
@@ -234,6 +236,17 @@ static void acpi_video_caps_check(void)
 		acpi_video_get_capabilities(NULL);
 		acpi_video_get_capabilities(NULL);
 }
 }
 
 
+bool acpi_video_backlight_quirks(void)
+{
+	if (acpi_gbl_osi_data >= ACPI_OSI_WIN_8) {
+		acpi_video_caps_check();
+		acpi_video_support |= ACPI_VIDEO_SKIP_BACKLIGHT;
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL(acpi_video_backlight_quirks);
+
 /* Promote the vendor interface instead of the generic video module.
 /* Promote the vendor interface instead of the generic video module.
  * This function allow DMI blacklists to be implemented by externals
  * This function allow DMI blacklists to be implemented by externals
  * platform drivers instead of putting a big blacklist in video_detect.c
  * platform drivers instead of putting a big blacklist in video_detect.c
@@ -278,6 +291,14 @@ int acpi_video_backlight_support(void)
 }
 }
 EXPORT_SYMBOL(acpi_video_backlight_support);
 EXPORT_SYMBOL(acpi_video_backlight_support);
 
 
+/* For the ACPI video driver use only. */
+bool acpi_video_verify_backlight_support(void)
+{
+	return (acpi_video_support & ACPI_VIDEO_SKIP_BACKLIGHT) ?
+		false : acpi_video_backlight_support();
+}
+EXPORT_SYMBOL(acpi_video_verify_backlight_support);
+
 /*
 /*
  * Use acpi_backlight=vendor/video to force that backlight switching
  * Use acpi_backlight=vendor/video to force that backlight switching
  * is processed by vendor specific acpi drivers or video.ko driver.
  * is processed by vendor specific acpi drivers or video.ko driver.

+ 2 - 2
drivers/block/Kconfig

@@ -532,11 +532,11 @@ config BLK_DEV_RBD
 	  If unsure, say N.
 	  If unsure, say N.
 
 
 config BLK_DEV_RSXX
 config BLK_DEV_RSXX
-	tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver"
+	tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver"
 	depends on PCI
 	depends on PCI
 	help
 	help
 	  Device driver for IBM's high speed PCIe SSD
 	  Device driver for IBM's high speed PCIe SSD
-	  storage devices: FlashSystem-70 and FlashSystem-80.
+	  storage device: Flash Adapter 900GB Full Height.
 
 
 	  To compile this driver as a module, choose M here: the
 	  To compile this driver as a module, choose M here: the
 	  module will be called rsxx.
 	  module will be called rsxx.

+ 21 - 0
drivers/block/drbd/drbd_actlog.c

@@ -659,6 +659,27 @@ void drbd_al_shrink(struct drbd_conf *mdev)
 	wake_up(&mdev->al_wait);
 	wake_up(&mdev->al_wait);
 }
 }
 
 
+int drbd_initialize_al(struct drbd_conf *mdev, void *buffer)
+{
+	struct al_transaction_on_disk *al = buffer;
+	struct drbd_md *md = &mdev->ldev->md;
+	sector_t al_base = md->md_offset + md->al_offset;
+	int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
+	int i;
+
+	memset(al, 0, 4096);
+	al->magic = cpu_to_be32(DRBD_AL_MAGIC);
+	al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
+	al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
+
+	for (i = 0; i < al_size_4k; i++) {
+		int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 8, WRITE);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 static int w_update_odbm(struct drbd_work *w, int unused)
 static int w_update_odbm(struct drbd_work *w, int unused)
 {
 {
 	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
 	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);

+ 13 - 2
drivers/block/drbd/drbd_int.h

@@ -832,6 +832,7 @@ struct drbd_tconn {			/* is a resource from the config file */
 	unsigned susp_nod:1;		/* IO suspended because no data */
 	unsigned susp_nod:1;		/* IO suspended because no data */
 	unsigned susp_fen:1;		/* IO suspended because fence peer handler runs */
 	unsigned susp_fen:1;		/* IO suspended because fence peer handler runs */
 	struct mutex cstate_mutex;	/* Protects graceful disconnects */
 	struct mutex cstate_mutex;	/* Protects graceful disconnects */
+	unsigned int connect_cnt;	/* Inc each time a connection is established */
 
 
 	unsigned long flags;
 	unsigned long flags;
 	struct net_conf *net_conf;	/* content protected by rcu */
 	struct net_conf *net_conf;	/* content protected by rcu */
@@ -1132,6 +1133,7 @@ extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
 void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
 void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
 
 
 extern void conn_md_sync(struct drbd_tconn *tconn);
 extern void conn_md_sync(struct drbd_tconn *tconn);
+extern void drbd_md_write(struct drbd_conf *mdev, void *buffer);
 extern void drbd_md_sync(struct drbd_conf *mdev);
 extern void drbd_md_sync(struct drbd_conf *mdev);
 extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
 extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
 extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
 extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
@@ -1466,8 +1468,16 @@ extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
-enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
+enum determine_dev_size {
+	DS_ERROR_SHRINK = -3,
+	DS_ERROR_SPACE_MD = -2,
+	DS_ERROR = -1,
+	DS_UNCHANGED = 0,
+	DS_SHRUNK = 1,
+	DS_GREW = 2
+};
+extern enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
@@ -1633,6 +1643,7 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
 #define drbd_set_out_of_sync(mdev, sector, size) \
 #define drbd_set_out_of_sync(mdev, sector, size) \
 	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
 	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
 extern void drbd_al_shrink(struct drbd_conf *mdev);
 extern void drbd_al_shrink(struct drbd_conf *mdev);
+extern int drbd_initialize_al(struct drbd_conf *, void *);
 
 
 /* drbd_nl.c */
 /* drbd_nl.c */
 /* state info broadcast */
 /* state info broadcast */

+ 34 - 27
drivers/block/drbd/drbd_main.c

@@ -2762,8 +2762,6 @@ int __init drbd_init(void)
 	/*
 	/*
 	 * allocate all necessary structs
 	 * allocate all necessary structs
 	 */
 	 */
-	err = -ENOMEM;
-
 	init_waitqueue_head(&drbd_pp_wait);
 	init_waitqueue_head(&drbd_pp_wait);
 
 
 	drbd_proc = NULL; /* play safe for drbd_cleanup */
 	drbd_proc = NULL; /* play safe for drbd_cleanup */
@@ -2773,6 +2771,7 @@ int __init drbd_init(void)
 	if (err)
 	if (err)
 		goto fail;
 		goto fail;
 
 
+	err = -ENOMEM;
 	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
 	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
 	if (!drbd_proc)	{
 	if (!drbd_proc)	{
 		printk(KERN_ERR "drbd: unable to register proc file\n");
 		printk(KERN_ERR "drbd: unable to register proc file\n");
@@ -2803,7 +2802,6 @@ int __init drbd_init(void)
 fail:
 fail:
 	drbd_cleanup();
 	drbd_cleanup();
 	if (err == -ENOMEM)
 	if (err == -ENOMEM)
-		/* currently always the case */
 		printk(KERN_ERR "drbd: ran out of memory\n");
 		printk(KERN_ERR "drbd: ran out of memory\n");
 	else
 	else
 		printk(KERN_ERR "drbd: initialization failure\n");
 		printk(KERN_ERR "drbd: initialization failure\n");
@@ -2881,34 +2879,14 @@ struct meta_data_on_disk {
 	u8 reserved_u8[4096 - (7*8 + 10*4)];
 	u8 reserved_u8[4096 - (7*8 + 10*4)];
 } __packed;
 } __packed;
 
 
-/**
- * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
- * @mdev:	DRBD device.
- */
-void drbd_md_sync(struct drbd_conf *mdev)
+
+
+void drbd_md_write(struct drbd_conf *mdev, void *b)
 {
 {
-	struct meta_data_on_disk *buffer;
+	struct meta_data_on_disk *buffer = b;
 	sector_t sector;
 	sector_t sector;
 	int i;
 	int i;
 
 
-	/* Don't accidentally change the DRBD meta data layout. */
-	BUILD_BUG_ON(UI_SIZE != 4);
-	BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
-
-	del_timer(&mdev->md_sync_timer);
-	/* timer may be rearmed by drbd_md_mark_dirty() now. */
-	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
-		return;
-
-	/* We use here D_FAILED and not D_ATTACHING because we try to write
-	 * metadata even if we detach due to a disk failure! */
-	if (!get_ldev_if_state(mdev, D_FAILED))
-		return;
-
-	buffer = drbd_md_get_buffer(mdev);
-	if (!buffer)
-		goto out;
-
 	memset(buffer, 0, sizeof(*buffer));
 	memset(buffer, 0, sizeof(*buffer));
 
 
 	buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
 	buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
@@ -2937,6 +2915,35 @@ void drbd_md_sync(struct drbd_conf *mdev)
 		dev_err(DEV, "meta data update failed!\n");
 		dev_err(DEV, "meta data update failed!\n");
 		drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
 		drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
 	}
 	}
+}
+
+/**
+ * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
+ * @mdev:	DRBD device.
+ */
+void drbd_md_sync(struct drbd_conf *mdev)
+{
+	struct meta_data_on_disk *buffer;
+
+	/* Don't accidentally change the DRBD meta data layout. */
+	BUILD_BUG_ON(UI_SIZE != 4);
+	BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
+
+	del_timer(&mdev->md_sync_timer);
+	/* timer may be rearmed by drbd_md_mark_dirty() now. */
+	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
+		return;
+
+	/* We use here D_FAILED and not D_ATTACHING because we try to write
+	 * metadata even if we detach due to a disk failure! */
+	if (!get_ldev_if_state(mdev, D_FAILED))
+		return;
+
+	buffer = drbd_md_get_buffer(mdev);
+	if (!buffer)
+		goto out;
+
+	drbd_md_write(mdev, buffer);
 
 
 	/* Update mdev->ldev->md.la_size_sect,
 	/* Update mdev->ldev->md.la_size_sect,
 	 * since we updated it on metadata. */
 	 * since we updated it on metadata. */

+ 143 - 42
drivers/block/drbd/drbd_nl.c

@@ -417,6 +417,7 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
 
 
 bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 {
 {
+	unsigned int connect_cnt;
 	union drbd_state mask = { };
 	union drbd_state mask = { };
 	union drbd_state val = { };
 	union drbd_state val = { };
 	enum drbd_fencing_p fp;
 	enum drbd_fencing_p fp;
@@ -428,6 +429,10 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 		return false;
 		return false;
 	}
 	}
 
 
+	spin_lock_irq(&tconn->req_lock);
+	connect_cnt = tconn->connect_cnt;
+	spin_unlock_irq(&tconn->req_lock);
+
 	fp = highest_fencing_policy(tconn);
 	fp = highest_fencing_policy(tconn);
 	switch (fp) {
 	switch (fp) {
 	case FP_NOT_AVAIL:
 	case FP_NOT_AVAIL:
@@ -492,8 +497,14 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 	   here, because we might were able to re-establish the connection in the
 	   here, because we might were able to re-establish the connection in the
 	   meantime. */
 	   meantime. */
 	spin_lock_irq(&tconn->req_lock);
 	spin_lock_irq(&tconn->req_lock);
-	if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags))
-		_conn_request_state(tconn, mask, val, CS_VERBOSE);
+	if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) {
+		if (tconn->connect_cnt != connect_cnt)
+			/* In case the connection was established and droped
+			   while the fence-peer handler was running, ignore it */
+			conn_info(tconn, "Ignoring fence-peer exit code\n");
+		else
+			_conn_request_state(tconn, mask, val, CS_VERBOSE);
+	}
 	spin_unlock_irq(&tconn->req_lock);
 	spin_unlock_irq(&tconn->req_lock);
 
 
 	return conn_highest_pdsk(tconn) <= D_OUTDATED;
 	return conn_highest_pdsk(tconn) <= D_OUTDATED;
@@ -816,15 +827,20 @@ void drbd_resume_io(struct drbd_conf *mdev)
  * Returns 0 on success, negative return values indicate errors.
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  * You should call drbd_md_sync() after calling this function.
  */
  */
-enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
+enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 {
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size_sect, u_size;
 	sector_t la_size_sect, u_size;
+	struct drbd_md *md = &mdev->ldev->md;
+	u32 prev_al_stripe_size_4k;
+	u32 prev_al_stripes;
 	sector_t size;
 	sector_t size;
 	char ppb[10];
 	char ppb[10];
+	void *buffer;
 
 
 	int md_moved, la_size_changed;
 	int md_moved, la_size_changed;
-	enum determine_dev_size rv = unchanged;
+	enum determine_dev_size rv = DS_UNCHANGED;
 
 
 	/* race:
 	/* race:
 	 * application request passes inc_ap_bio,
 	 * application request passes inc_ap_bio,
@@ -836,6 +852,11 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	 * still lock the act_log to not trigger ASSERTs there.
 	 * still lock the act_log to not trigger ASSERTs there.
 	 */
 	 */
 	drbd_suspend_io(mdev);
 	drbd_suspend_io(mdev);
+	buffer = drbd_md_get_buffer(mdev); /* Lock meta-data IO */
+	if (!buffer) {
+		drbd_resume_io(mdev);
+		return DS_ERROR;
+	}
 
 
 	/* no wait necessary anymore, actually we could assert that */
 	/* no wait necessary anymore, actually we could assert that */
 	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
 	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
@@ -844,7 +865,17 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	prev_size = mdev->ldev->md.md_size_sect;
 	prev_size = mdev->ldev->md.md_size_sect;
 	la_size_sect = mdev->ldev->md.la_size_sect;
 	la_size_sect = mdev->ldev->md.la_size_sect;
 
 
-	/* TODO: should only be some assert here, not (re)init... */
+	if (rs) {
+		/* rs is non NULL if we should change the AL layout only */
+
+		prev_al_stripes = md->al_stripes;
+		prev_al_stripe_size_4k = md->al_stripe_size_4k;
+
+		md->al_stripes = rs->al_stripes;
+		md->al_stripe_size_4k = rs->al_stripe_size / 4;
+		md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
+	}
+
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
 
 	rcu_read_lock();
 	rcu_read_lock();
@@ -852,6 +883,21 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	rcu_read_unlock();
 	rcu_read_unlock();
 	size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
 	size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
 
 
+	if (size < la_size_sect) {
+		if (rs && u_size == 0) {
+			/* Remove "rs &&" later. This check should always be active, but
+			   right now the receiver expects the permissive behavior */
+			dev_warn(DEV, "Implicit shrink not allowed. "
+				 "Use --size=%llus for explicit shrink.\n",
+				 (unsigned long long)size);
+			rv = DS_ERROR_SHRINK;
+		}
+		if (u_size > size)
+			rv = DS_ERROR_SPACE_MD;
+		if (rv != DS_UNCHANGED)
+			goto err_out;
+	}
+
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
 	    drbd_bm_capacity(mdev) != size) {
 		int err;
 		int err;
@@ -867,7 +913,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 				    "Leaving size unchanged at size = %lu KB\n",
 				    "Leaving size unchanged at size = %lu KB\n",
 				    (unsigned long)size);
 				    (unsigned long)size);
 			}
 			}
-			rv = dev_size_error;
+			rv = DS_ERROR;
 		}
 		}
 		/* racy, see comments above. */
 		/* racy, see comments above. */
 		drbd_set_my_capacity(mdev, size);
 		drbd_set_my_capacity(mdev, size);
@@ -875,38 +921,57 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 		dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 		dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 		     (unsigned long long)size>>1);
 		     (unsigned long long)size>>1);
 	}
 	}
-	if (rv == dev_size_error)
-		goto out;
+	if (rv <= DS_ERROR)
+		goto err_out;
 
 
 	la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
 	la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
 
 
 	md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
 	md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
 		|| prev_size	   != mdev->ldev->md.md_size_sect;
 		|| prev_size	   != mdev->ldev->md.md_size_sect;
 
 
-	if (la_size_changed || md_moved) {
-		int err;
+	if (la_size_changed || md_moved || rs) {
+		u32 prev_flags;
 
 
 		drbd_al_shrink(mdev); /* All extents inactive. */
 		drbd_al_shrink(mdev); /* All extents inactive. */
+
+		prev_flags = md->flags;
+		md->flags &= ~MDF_PRIMARY_IND;
+		drbd_md_write(mdev, buffer);
+
 		dev_info(DEV, "Writing the whole bitmap, %s\n",
 		dev_info(DEV, "Writing the whole bitmap, %s\n",
 			 la_size_changed && md_moved ? "size changed and md moved" :
 			 la_size_changed && md_moved ? "size changed and md moved" :
 			 la_size_changed ? "size changed" : "md moved");
 			 la_size_changed ? "size changed" : "md moved");
 		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
 		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
-		err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
-				     "size changed", BM_LOCKED_MASK);
-		if (err) {
-			rv = dev_size_error;
-			goto out;
-		}
-		drbd_md_mark_dirty(mdev);
+		drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
+			       "size changed", BM_LOCKED_MASK);
+		drbd_initialize_al(mdev, buffer);
+
+		md->flags = prev_flags;
+		drbd_md_write(mdev, buffer);
+
+		if (rs)
+			dev_info(DEV, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
+				 md->al_stripes, md->al_stripe_size_4k * 4);
 	}
 	}
 
 
 	if (size > la_size_sect)
 	if (size > la_size_sect)
-		rv = grew;
+		rv = DS_GREW;
 	if (size < la_size_sect)
 	if (size < la_size_sect)
-		rv = shrunk;
-out:
+		rv = DS_SHRUNK;
+
+	if (0) {
+	err_out:
+		if (rs) {
+			md->al_stripes = prev_al_stripes;
+			md->al_stripe_size_4k = prev_al_stripe_size_4k;
+			md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
+
+			drbd_md_set_sector_offsets(mdev, mdev->ldev);
+		}
+	}
 	lc_unlock(mdev->act_log);
 	lc_unlock(mdev->act_log);
 	wake_up(&mdev->al_wait);
 	wake_up(&mdev->al_wait);
+	drbd_md_put_buffer(mdev);
 	drbd_resume_io(mdev);
 	drbd_resume_io(mdev);
 
 
 	return rv;
 	return rv;
@@ -1607,11 +1672,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
 
-	dd = drbd_determine_dev_size(mdev, 0);
-	if (dd == dev_size_error) {
+	dd = drbd_determine_dev_size(mdev, 0, NULL);
+	if (dd <= DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
 		goto force_diskless_dec;
-	} else if (dd == grew)
+	} else if (dd == DS_GREW)
 		set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 		set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 
 
 	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
 	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
@@ -2305,6 +2370,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	struct drbd_conf *mdev;
 	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
 	enum determine_dev_size dd;
+	bool change_al_layout = false;
 	enum dds_flags ddsf;
 	enum dds_flags ddsf;
 	sector_t u_size;
 	sector_t u_size;
 	int err;
 	int err;
@@ -2315,31 +2381,33 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto fail;
 		goto fail;
 
 
+	mdev = adm_ctx.mdev;
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto fail;
+	}
+
 	memset(&rs, 0, sizeof(struct resize_parms));
 	memset(&rs, 0, sizeof(struct resize_parms));
+	rs.al_stripes = mdev->ldev->md.al_stripes;
+	rs.al_stripe_size = mdev->ldev->md.al_stripe_size_4k * 4;
 	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
 	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
 		err = resize_parms_from_attrs(&rs, info);
 		err = resize_parms_from_attrs(&rs, info);
 		if (err) {
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
-			goto fail;
+			goto fail_ldev;
 		}
 		}
 	}
 	}
 
 
-	mdev = adm_ctx.mdev;
 	if (mdev->state.conn > C_CONNECTED) {
 	if (mdev->state.conn > C_CONNECTED) {
 		retcode = ERR_RESIZE_RESYNC;
 		retcode = ERR_RESIZE_RESYNC;
-		goto fail;
+		goto fail_ldev;
 	}
 	}
 
 
 	if (mdev->state.role == R_SECONDARY &&
 	if (mdev->state.role == R_SECONDARY &&
 	    mdev->state.peer == R_SECONDARY) {
 	    mdev->state.peer == R_SECONDARY) {
 		retcode = ERR_NO_PRIMARY;
 		retcode = ERR_NO_PRIMARY;
-		goto fail;
-	}
-
-	if (!get_ldev(mdev)) {
-		retcode = ERR_NO_DISK;
-		goto fail;
+		goto fail_ldev;
 	}
 	}
 
 
 	if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
 	if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
@@ -2358,6 +2426,28 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 		}
 		}
 	}
 	}
 
 
+	if (mdev->ldev->md.al_stripes != rs.al_stripes ||
+	    mdev->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
+		u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
+
+		if (al_size_k > (16 * 1024 * 1024)) {
+			retcode = ERR_MD_LAYOUT_TOO_BIG;
+			goto fail_ldev;
+		}
+
+		if (al_size_k < MD_32kB_SECT/2) {
+			retcode = ERR_MD_LAYOUT_TOO_SMALL;
+			goto fail_ldev;
+		}
+
+		if (mdev->state.conn != C_CONNECTED) {
+			retcode = ERR_MD_LAYOUT_CONNECTED;
+			goto fail_ldev;
+		}
+
+		change_al_layout = true;
+	}
+
 	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
 	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
 		mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
 		mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
 
 
@@ -2373,16 +2463,22 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	}
 	}
 
 
 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
-	dd = drbd_determine_dev_size(mdev, ddsf);
+	dd = drbd_determine_dev_size(mdev, ddsf, change_al_layout ? &rs : NULL);
 	drbd_md_sync(mdev);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
 	put_ldev(mdev);
-	if (dd == dev_size_error) {
+	if (dd == DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		retcode = ERR_NOMEM_BITMAP;
 		goto fail;
 		goto fail;
+	} else if (dd == DS_ERROR_SPACE_MD) {
+		retcode = ERR_MD_LAYOUT_NO_FIT;
+		goto fail;
+	} else if (dd == DS_ERROR_SHRINK) {
+		retcode = ERR_IMPLICIT_SHRINK;
+		goto fail;
 	}
 	}
 
 
 	if (mdev->state.conn == C_CONNECTED) {
 	if (mdev->state.conn == C_CONNECTED) {
-		if (dd == grew)
+		if (dd == DS_GREW)
 			set_bit(RESIZE_PENDING, &mdev->flags);
 			set_bit(RESIZE_PENDING, &mdev->flags);
 
 
 		drbd_send_uuids(mdev);
 		drbd_send_uuids(mdev);
@@ -2658,7 +2754,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 		const struct sib_info *sib)
 		const struct sib_info *sib)
 {
 {
 	struct state_info *si = NULL; /* for sizeof(si->member); */
 	struct state_info *si = NULL; /* for sizeof(si->member); */
-	struct net_conf *nc;
 	struct nlattr *nla;
 	struct nlattr *nla;
 	int got_ldev;
 	int got_ldev;
 	int err = 0;
 	int err = 0;
@@ -2688,13 +2783,19 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 		goto nla_put_failure;
 		goto nla_put_failure;
 
 
 	rcu_read_lock();
 	rcu_read_lock();
-	if (got_ldev)
-		if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive))
-			goto nla_put_failure;
+	if (got_ldev) {
+		struct disk_conf *disk_conf;
 
 
-	nc = rcu_dereference(mdev->tconn->net_conf);
-	if (nc)
-		err = net_conf_to_skb(skb, nc, exclude_sensitive);
+		disk_conf = rcu_dereference(mdev->ldev->disk_conf);
+		err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
+	}
+	if (!err) {
+		struct net_conf *nc;
+
+		nc = rcu_dereference(mdev->tconn->net_conf);
+		if (nc)
+			err = net_conf_to_skb(skb, nc, exclude_sensitive);
+	}
 	rcu_read_unlock();
 	rcu_read_unlock();
 	if (err)
 	if (err)
 		goto nla_put_failure;
 		goto nla_put_failure;

+ 6 - 6
drivers/block/drbd/drbd_receiver.c

@@ -1039,6 +1039,8 @@ static int conn_connect(struct drbd_tconn *tconn)
 	rcu_read_lock();
 	rcu_read_lock();
 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 		kref_get(&mdev->kref);
 		kref_get(&mdev->kref);
+		rcu_read_unlock();
+
 		/* Prevent a race between resync-handshake and
 		/* Prevent a race between resync-handshake and
 		 * being promoted to Primary.
 		 * being promoted to Primary.
 		 *
 		 *
@@ -1049,8 +1051,6 @@ static int conn_connect(struct drbd_tconn *tconn)
 		mutex_lock(mdev->state_mutex);
 		mutex_lock(mdev->state_mutex);
 		mutex_unlock(mdev->state_mutex);
 		mutex_unlock(mdev->state_mutex);
 
 
-		rcu_read_unlock();
-
 		if (discard_my_data)
 		if (discard_my_data)
 			set_bit(DISCARD_MY_DATA, &mdev->flags);
 			set_bit(DISCARD_MY_DATA, &mdev->flags);
 		else
 		else
@@ -3545,7 +3545,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 {
 {
 	struct drbd_conf *mdev;
 	struct drbd_conf *mdev;
 	struct p_sizes *p = pi->data;
 	struct p_sizes *p = pi->data;
-	enum determine_dev_size dd = unchanged;
+	enum determine_dev_size dd = DS_UNCHANGED;
 	sector_t p_size, p_usize, my_usize;
 	sector_t p_size, p_usize, my_usize;
 	int ldsc = 0; /* local disk size changed */
 	int ldsc = 0; /* local disk size changed */
 	enum dds_flags ddsf;
 	enum dds_flags ddsf;
@@ -3617,9 +3617,9 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(mdev)) {
 	if (get_ldev(mdev)) {
-		dd = drbd_determine_dev_size(mdev, ddsf);
+		dd = drbd_determine_dev_size(mdev, ddsf, NULL);
 		put_ldev(mdev);
 		put_ldev(mdev);
-		if (dd == dev_size_error)
+		if (dd == DS_ERROR)
 			return -EIO;
 			return -EIO;
 		drbd_md_sync(mdev);
 		drbd_md_sync(mdev);
 	} else {
 	} else {
@@ -3647,7 +3647,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 			drbd_send_sizes(mdev, 0, ddsf);
 			drbd_send_sizes(mdev, 0, ddsf);
 		}
 		}
 		if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
 		if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
-		    (dd == grew && mdev->state.conn == C_CONNECTED)) {
+		    (dd == DS_GREW && mdev->state.conn == C_CONNECTED)) {
 			if (mdev->state.pdsk >= D_INCONSISTENT &&
 			if (mdev->state.pdsk >= D_INCONSISTENT &&
 			    mdev->state.disk >= D_INCONSISTENT) {
 			    mdev->state.disk >= D_INCONSISTENT) {
 				if (ddsf & DDSF_NO_RESYNC)
 				if (ddsf & DDSF_NO_RESYNC)

+ 3 - 1
drivers/block/drbd/drbd_state.c

@@ -1115,8 +1115,10 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 		drbd_thread_restart_nowait(&mdev->tconn->receiver);
 		drbd_thread_restart_nowait(&mdev->tconn->receiver);
 
 
 	/* Resume AL writing if we get a connection */
 	/* Resume AL writing if we get a connection */
-	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
 		drbd_resume_al(mdev);
 		drbd_resume_al(mdev);
+		mdev->tconn->connect_cnt++;
+	}
 
 
 	/* remember last attach time so request_timer_fn() won't
 	/* remember last attach time so request_timer_fn() won't
 	 * kill newly established sessions while we are still trying to thaw
 	 * kill newly established sessions while we are still trying to thaw

+ 348 - 11
drivers/block/rsxx/core.c

@@ -31,6 +31,8 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/bitops.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 
 #include <linux/genhd.h>
 #include <linux/genhd.h>
 #include <linux/idr.h>
 #include <linux/idr.h>
@@ -39,8 +41,9 @@
 #include "rsxx_cfg.h"
 #include "rsxx_cfg.h"
 
 
 #define NO_LEGACY 0
 #define NO_LEGACY 0
+#define SYNC_START_TIMEOUT (10 * 60) /* 10 minutes */
 
 
-MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver");
+MODULE_DESCRIPTION("IBM Flash Adapter 900GB Full Height Device Driver");
 MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM");
 MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM");
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_VERSION(DRIVER_VERSION);
@@ -49,9 +52,282 @@ static unsigned int force_legacy = NO_LEGACY;
 module_param(force_legacy, uint, 0444);
 module_param(force_legacy, uint, 0444);
 MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts");
 MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts");
 
 
+static unsigned int sync_start = 1;
+module_param(sync_start, uint, 0444);
+MODULE_PARM_DESC(sync_start, "On by Default: Driver load will not complete "
+			     "until the card startup has completed.");
+
 static DEFINE_IDA(rsxx_disk_ida);
 static DEFINE_IDA(rsxx_disk_ida);
 static DEFINE_SPINLOCK(rsxx_ida_lock);
 static DEFINE_SPINLOCK(rsxx_ida_lock);
 
 
+/* --------------------Debugfs Setup ------------------- */
+
+struct rsxx_cram {
+	u32 f_pos;
+	u32 offset;
+	void *i_private;
+};
+
+static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p)
+{
+	struct rsxx_cardinfo *card = m->private;
+
+	seq_printf(m, "HWID		0x%08x\n",
+					ioread32(card->regmap + HWID));
+	seq_printf(m, "SCRATCH		0x%08x\n",
+					ioread32(card->regmap + SCRATCH));
+	seq_printf(m, "IER		0x%08x\n",
+					ioread32(card->regmap + IER));
+	seq_printf(m, "IPR		0x%08x\n",
+					ioread32(card->regmap + IPR));
+	seq_printf(m, "CREG_CMD		0x%08x\n",
+					ioread32(card->regmap + CREG_CMD));
+	seq_printf(m, "CREG_ADD		0x%08x\n",
+					ioread32(card->regmap + CREG_ADD));
+	seq_printf(m, "CREG_CNT		0x%08x\n",
+					ioread32(card->regmap + CREG_CNT));
+	seq_printf(m, "CREG_STAT	0x%08x\n",
+					ioread32(card->regmap + CREG_STAT));
+	seq_printf(m, "CREG_DATA0	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA0));
+	seq_printf(m, "CREG_DATA1	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA1));
+	seq_printf(m, "CREG_DATA2	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA2));
+	seq_printf(m, "CREG_DATA3	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA3));
+	seq_printf(m, "CREG_DATA4	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA4));
+	seq_printf(m, "CREG_DATA5	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA5));
+	seq_printf(m, "CREG_DATA6	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA6));
+	seq_printf(m, "CREG_DATA7	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA7));
+	seq_printf(m, "INTR_COAL	0x%08x\n",
+					ioread32(card->regmap + INTR_COAL));
+	seq_printf(m, "HW_ERROR		0x%08x\n",
+					ioread32(card->regmap + HW_ERROR));
+	seq_printf(m, "DEBUG0		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG0));
+	seq_printf(m, "DEBUG1		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG1));
+	seq_printf(m, "DEBUG2		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG2));
+	seq_printf(m, "DEBUG3		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG3));
+	seq_printf(m, "DEBUG4		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG4));
+	seq_printf(m, "DEBUG5		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG5));
+	seq_printf(m, "DEBUG6		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG6));
+	seq_printf(m, "DEBUG7		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG7));
+	seq_printf(m, "RECONFIG		0x%08x\n",
+					ioread32(card->regmap + PCI_RECONFIG));
+
+	return 0;
+}
+
+static int rsxx_attr_stats_show(struct seq_file *m, void *p)
+{
+	struct rsxx_cardinfo *card = m->private;
+	int i;
+
+	for (i = 0; i < card->n_targets; i++) {
+		seq_printf(m, "Ctrl %d CRC Errors	= %d\n",
+				i, card->ctrl[i].stats.crc_errors);
+		seq_printf(m, "Ctrl %d Hard Errors	= %d\n",
+				i, card->ctrl[i].stats.hard_errors);
+		seq_printf(m, "Ctrl %d Soft Errors	= %d\n",
+				i, card->ctrl[i].stats.soft_errors);
+		seq_printf(m, "Ctrl %d Writes Issued	= %d\n",
+				i, card->ctrl[i].stats.writes_issued);
+		seq_printf(m, "Ctrl %d Writes Failed	= %d\n",
+				i, card->ctrl[i].stats.writes_failed);
+		seq_printf(m, "Ctrl %d Reads Issued	= %d\n",
+				i, card->ctrl[i].stats.reads_issued);
+		seq_printf(m, "Ctrl %d Reads Failed	= %d\n",
+				i, card->ctrl[i].stats.reads_failed);
+		seq_printf(m, "Ctrl %d Reads Retried	= %d\n",
+				i, card->ctrl[i].stats.reads_retried);
+		seq_printf(m, "Ctrl %d Discards Issued	= %d\n",
+				i, card->ctrl[i].stats.discards_issued);
+		seq_printf(m, "Ctrl %d Discards Failed	= %d\n",
+				i, card->ctrl[i].stats.discards_failed);
+		seq_printf(m, "Ctrl %d DMA SW Errors	= %d\n",
+				i, card->ctrl[i].stats.dma_sw_err);
+		seq_printf(m, "Ctrl %d DMA HW Faults	= %d\n",
+				i, card->ctrl[i].stats.dma_hw_fault);
+		seq_printf(m, "Ctrl %d DMAs Cancelled	= %d\n",
+				i, card->ctrl[i].stats.dma_cancelled);
+		seq_printf(m, "Ctrl %d SW Queue Depth	= %d\n",
+				i, card->ctrl[i].stats.sw_q_depth);
+		seq_printf(m, "Ctrl %d HW Queue Depth	= %d\n",
+			i, atomic_read(&card->ctrl[i].stats.hw_q_depth));
+	}
+
+	return 0;
+}
+
+static int rsxx_attr_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rsxx_attr_stats_show, inode->i_private);
+}
+
+static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rsxx_attr_pci_regs_show, inode->i_private);
+}
+
+static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf,
+			      size_t cnt, loff_t *ppos)
+{
+	struct rsxx_cram *info = fp->private_data;
+	struct rsxx_cardinfo *card = info->i_private;
+	char *buf;
+	int st;
+
+	buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	info->f_pos = (u32)*ppos + info->offset;
+
+	st = rsxx_creg_read(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
+	if (st)
+		return st;
+
+	st = copy_to_user(ubuf, buf, cnt);
+	if (st)
+		return st;
+
+	info->offset += cnt;
+
+	kfree(buf);
+
+	return cnt;
+}
+
+static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf,
+			       size_t cnt, loff_t *ppos)
+{
+	struct rsxx_cram *info = fp->private_data;
+	struct rsxx_cardinfo *card = info->i_private;
+	char *buf;
+	int st;
+
+	buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	st = copy_from_user(buf, ubuf, cnt);
+	if (st)
+		return st;
+
+	info->f_pos = (u32)*ppos + info->offset;
+
+	st = rsxx_creg_write(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
+	if (st)
+		return st;
+
+	info->offset += cnt;
+
+	kfree(buf);
+
+	return cnt;
+}
+
+static int rsxx_cram_open(struct inode *inode, struct file *file)
+{
+	struct rsxx_cram *info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->i_private = inode->i_private;
+	info->f_pos = file->f_pos;
+	file->private_data = info;
+
+	return 0;
+}
+
+static int rsxx_cram_release(struct inode *inode, struct file *file)
+{
+	struct rsxx_cram *info = file->private_data;
+
+	if (!info)
+		return 0;
+
+	kfree(info);
+	file->private_data = NULL;
+
+	return 0;
+}
+
+static const struct file_operations debugfs_cram_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rsxx_cram_open,
+	.read		= rsxx_cram_read,
+	.write		= rsxx_cram_write,
+	.release	= rsxx_cram_release,
+};
+
+static const struct file_operations debugfs_stats_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rsxx_attr_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations debugfs_pci_regs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rsxx_attr_pci_regs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card)
+{
+	struct dentry *debugfs_stats;
+	struct dentry *debugfs_pci_regs;
+	struct dentry *debugfs_cram;
+
+	card->debugfs_dir = debugfs_create_dir(card->gendisk->disk_name, NULL);
+	if (IS_ERR_OR_NULL(card->debugfs_dir))
+		goto failed_debugfs_dir;
+
+	debugfs_stats = debugfs_create_file("stats", S_IRUGO,
+					    card->debugfs_dir, card,
+					    &debugfs_stats_fops);
+	if (IS_ERR_OR_NULL(debugfs_stats))
+		goto failed_debugfs_stats;
+
+	debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO,
+					       card->debugfs_dir, card,
+					       &debugfs_pci_regs_fops);
+	if (IS_ERR_OR_NULL(debugfs_pci_regs))
+		goto failed_debugfs_pci_regs;
+
+	debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR,
+					   card->debugfs_dir, card,
+					   &debugfs_cram_fops);
+	if (IS_ERR_OR_NULL(debugfs_cram))
+		goto failed_debugfs_cram;
+
+	return;
+failed_debugfs_cram:
+	debugfs_remove(debugfs_pci_regs);
+failed_debugfs_pci_regs:
+	debugfs_remove(debugfs_stats);
+failed_debugfs_stats:
+	debugfs_remove(card->debugfs_dir);
+failed_debugfs_dir:
+	card->debugfs_dir = NULL;
+}
+
 /*----------------- Interrupt Control & Handling -------------------*/
 /*----------------- Interrupt Control & Handling -------------------*/
 
 
 static void rsxx_mask_interrupts(struct rsxx_cardinfo *card)
 static void rsxx_mask_interrupts(struct rsxx_cardinfo *card)
@@ -163,12 +439,13 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)
 		}
 		}
 
 
 		if (isr & CR_INTR_CREG) {
 		if (isr & CR_INTR_CREG) {
-			schedule_work(&card->creg_ctrl.done_work);
+			queue_work(card->creg_ctrl.creg_wq,
+				   &card->creg_ctrl.done_work);
 			handled++;
 			handled++;
 		}
 		}
 
 
 		if (isr & CR_INTR_EVENT) {
 		if (isr & CR_INTR_EVENT) {
-			schedule_work(&card->event_work);
+			queue_work(card->event_wq, &card->event_work);
 			rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
 			rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
 			handled++;
 			handled++;
 		}
 		}
@@ -329,7 +606,7 @@ static int rsxx_eeh_frozen(struct pci_dev *dev)
 	int i;
 	int i;
 	int st;
 	int st;
 
 
-	dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n");
+	dev_warn(&dev->dev, "IBM Flash Adapter PCI: preparing for slot reset.\n");
 
 
 	card->eeh_state = 1;
 	card->eeh_state = 1;
 	rsxx_mask_interrupts(card);
 	rsxx_mask_interrupts(card);
@@ -367,15 +644,26 @@ static void rsxx_eeh_failure(struct pci_dev *dev)
 {
 {
 	struct rsxx_cardinfo *card = pci_get_drvdata(dev);
 	struct rsxx_cardinfo *card = pci_get_drvdata(dev);
 	int i;
 	int i;
+	int cnt = 0;
 
 
-	dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n");
+	dev_err(&dev->dev, "IBM Flash Adapter PCI: disabling failed card.\n");
 
 
 	card->eeh_state = 1;
 	card->eeh_state = 1;
+	card->halt = 1;
 
 
-	for (i = 0; i < card->n_targets; i++)
-		del_timer_sync(&card->ctrl[i].activity_timer);
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock_bh(&card->ctrl[i].queue_lock);
+		cnt = rsxx_cleanup_dma_queue(&card->ctrl[i],
+					     &card->ctrl[i].queue);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
+
+		cnt += rsxx_dma_cancel(&card->ctrl[i]);
 
 
-	rsxx_eeh_cancel_dmas(card);
+		if (cnt)
+			dev_info(CARD_TO_DEV(card),
+				"Freed %d queued DMAs on channel %d\n",
+				cnt, card->ctrl[i].id);
+	}
 }
 }
 
 
 static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
 static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
@@ -432,7 +720,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
 	int st;
 	int st;
 
 
 	dev_warn(&dev->dev,
 	dev_warn(&dev->dev,
-		"IBM FlashSystem PCI: recovering from slot reset.\n");
+		"IBM Flash Adapter PCI: recovering from slot reset.\n");
 
 
 	st = pci_enable_device(dev);
 	st = pci_enable_device(dev);
 	if (st)
 	if (st)
@@ -485,7 +773,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
 				&card->ctrl[i].issue_dma_work);
 				&card->ctrl[i].issue_dma_work);
 	}
 	}
 
 
-	dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n");
+	dev_info(&dev->dev, "IBM Flash Adapter PCI: recovery complete.\n");
 
 
 	return PCI_ERS_RESULT_RECOVERED;
 	return PCI_ERS_RESULT_RECOVERED;
 
 
@@ -528,6 +816,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 {
 {
 	struct rsxx_cardinfo *card;
 	struct rsxx_cardinfo *card;
 	int st;
 	int st;
+	unsigned int sync_timeout;
 
 
 	dev_info(&dev->dev, "PCI-Flash SSD discovered\n");
 	dev_info(&dev->dev, "PCI-Flash SSD discovered\n");
 
 
@@ -610,7 +899,11 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 	}
 	}
 
 
 	/************* Setup Processor Command Interface *************/
 	/************* Setup Processor Command Interface *************/
-	rsxx_creg_setup(card);
+	st = rsxx_creg_setup(card);
+	if (st) {
+		dev_err(CARD_TO_DEV(card), "Failed to setup creg interface.\n");
+		goto failed_creg_setup;
+	}
 
 
 	spin_lock_irq(&card->irq_lock);
 	spin_lock_irq(&card->irq_lock);
 	rsxx_enable_ier_and_isr(card, CR_INTR_CREG);
 	rsxx_enable_ier_and_isr(card, CR_INTR_CREG);
@@ -650,6 +943,12 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 	}
 	}
 
 
 	/************* Setup Card Event Handler *************/
 	/************* Setup Card Event Handler *************/
+	card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event");
+	if (!card->event_wq) {
+		dev_err(CARD_TO_DEV(card), "Failed card event setup.\n");
+		goto failed_event_handler;
+	}
+
 	INIT_WORK(&card->event_work, card_event_handler);
 	INIT_WORK(&card->event_work, card_event_handler);
 
 
 	st = rsxx_setup_dev(card);
 	st = rsxx_setup_dev(card);
@@ -676,6 +975,33 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 		if (st)
 		if (st)
 			dev_crit(CARD_TO_DEV(card),
 			dev_crit(CARD_TO_DEV(card),
 				"Failed issuing card startup\n");
 				"Failed issuing card startup\n");
+		if (sync_start) {
+			sync_timeout = SYNC_START_TIMEOUT;
+
+			dev_info(CARD_TO_DEV(card),
+				 "Waiting for card to startup\n");
+
+			do {
+				ssleep(1);
+				sync_timeout--;
+
+				rsxx_get_card_state(card, &card->state);
+			} while (sync_timeout &&
+				(card->state == CARD_STATE_STARTING));
+
+			if (card->state == CARD_STATE_STARTING) {
+				dev_warn(CARD_TO_DEV(card),
+					 "Card startup timed out\n");
+				card->size8 = 0;
+			} else {
+				dev_info(CARD_TO_DEV(card),
+					"card state: %s\n",
+					rsxx_card_state_to_str(card->state));
+				st = rsxx_get_card_size8(card, &card->size8);
+				if (st)
+					card->size8 = 0;
+			}
+		}
 	} else if (card->state == CARD_STATE_GOOD ||
 	} else if (card->state == CARD_STATE_GOOD ||
 		   card->state == CARD_STATE_RD_ONLY_FAULT) {
 		   card->state == CARD_STATE_RD_ONLY_FAULT) {
 		st = rsxx_get_card_size8(card, &card->size8);
 		st = rsxx_get_card_size8(card, &card->size8);
@@ -685,12 +1011,21 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 
 
 	rsxx_attach_dev(card);
 	rsxx_attach_dev(card);
 
 
+	/************* Setup Debugfs *************/
+	rsxx_debugfs_dev_new(card);
+
 	return 0;
 	return 0;
 
 
 failed_create_dev:
 failed_create_dev:
+	destroy_workqueue(card->event_wq);
+	card->event_wq = NULL;
+failed_event_handler:
 	rsxx_dma_destroy(card);
 	rsxx_dma_destroy(card);
 failed_dma_setup:
 failed_dma_setup:
 failed_compatiblity_check:
 failed_compatiblity_check:
+	destroy_workqueue(card->creg_ctrl.creg_wq);
+	card->creg_ctrl.creg_wq = NULL;
+failed_creg_setup:
 	spin_lock_irq(&card->irq_lock);
 	spin_lock_irq(&card->irq_lock);
 	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
 	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
 	spin_unlock_irq(&card->irq_lock);
 	spin_unlock_irq(&card->irq_lock);
@@ -756,6 +1091,8 @@ static void rsxx_pci_remove(struct pci_dev *dev)
 	/* Prevent work_structs from re-queuing themselves. */
 	/* Prevent work_structs from re-queuing themselves. */
 	card->halt = 1;
 	card->halt = 1;
 
 
+	debugfs_remove_recursive(card->debugfs_dir);
+
 	free_irq(dev->irq, card);
 	free_irq(dev->irq, card);
 
 
 	if (!force_legacy)
 	if (!force_legacy)

+ 14 - 0
drivers/block/rsxx/cregs.c

@@ -431,6 +431,15 @@ static int __issue_creg_rw(struct rsxx_cardinfo *card,
 	*hw_stat = completion.creg_status;
 	*hw_stat = completion.creg_status;
 
 
 	if (completion.st) {
 	if (completion.st) {
+		/*
+		* This read is needed to verify that there has not been any
+		* extreme errors that might have occurred, i.e. EEH. The
+		* function iowrite32 will not detect EEH errors, so it is
+		* necessary that we recover if such an error is the reason
+		* for the timeout. This is a dummy read.
+		*/
+		ioread32(card->regmap + SCRATCH);
+
 		dev_warn(CARD_TO_DEV(card),
 		dev_warn(CARD_TO_DEV(card),
 			"creg command failed(%d x%08x)\n",
 			"creg command failed(%d x%08x)\n",
 			completion.st, addr);
 			completion.st, addr);
@@ -727,6 +736,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card)
 {
 {
 	card->creg_ctrl.active_cmd = NULL;
 	card->creg_ctrl.active_cmd = NULL;
 
 
+	card->creg_ctrl.creg_wq =
+			create_singlethread_workqueue(DRIVER_NAME"_creg");
+	if (!card->creg_ctrl.creg_wq)
+		return -ENOMEM;
+
 	INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done);
 	INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done);
 	mutex_init(&card->creg_ctrl.reset_lock);
 	mutex_init(&card->creg_ctrl.reset_lock);
 	INIT_LIST_HEAD(&card->creg_ctrl.queue);
 	INIT_LIST_HEAD(&card->creg_ctrl.queue);

+ 12 - 21
drivers/block/rsxx/dev.c

@@ -155,7 +155,8 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
 		atomic_set(&meta->error, 1);
 		atomic_set(&meta->error, 1);
 
 
 	if (atomic_dec_and_test(&meta->pending_dmas)) {
 	if (atomic_dec_and_test(&meta->pending_dmas)) {
-		disk_stats_complete(card, meta->bio, meta->start_time);
+		if (!card->eeh_state && card->gendisk)
+			disk_stats_complete(card, meta->bio, meta->start_time);
 
 
 		bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0);
 		bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0);
 		kmem_cache_free(bio_meta_pool, meta);
 		kmem_cache_free(bio_meta_pool, meta);
@@ -170,6 +171,12 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
 
 
 	might_sleep();
 	might_sleep();
 
 
+	if (!card)
+		goto req_err;
+
+	if (bio->bi_sector + (bio->bi_size >> 9) > get_capacity(card->gendisk))
+		goto req_err;
+
 	if (unlikely(card->halt)) {
 	if (unlikely(card->halt)) {
 		st = -EFAULT;
 		st = -EFAULT;
 		goto req_err;
 		goto req_err;
@@ -196,7 +203,8 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
 	atomic_set(&bio_meta->pending_dmas, 0);
 	atomic_set(&bio_meta->pending_dmas, 0);
 	bio_meta->start_time = jiffies;
 	bio_meta->start_time = jiffies;
 
 
-	disk_stats_start(card, bio);
+	if (!unlikely(card->halt))
+		disk_stats_start(card, bio);
 
 
 	dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
 	dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
 		 bio_data_dir(bio) ? 'W' : 'R', bio_meta,
 		 bio_data_dir(bio) ? 'W' : 'R', bio_meta,
@@ -225,24 +233,6 @@ static bool rsxx_discard_supported(struct rsxx_cardinfo *card)
 	return (pci_rev >= RSXX_DISCARD_SUPPORT);
 	return (pci_rev >= RSXX_DISCARD_SUPPORT);
 }
 }
 
 
-static unsigned short rsxx_get_logical_block_size(
-					struct rsxx_cardinfo *card)
-{
-	u32 capabilities = 0;
-	int st;
-
-	st = rsxx_get_card_capabilities(card, &capabilities);
-	if (st)
-		dev_warn(CARD_TO_DEV(card),
-			"Failed reading card capabilities register\n");
-
-	/* Earlier firmware did not have support for 512 byte accesses */
-	if (capabilities & CARD_CAP_SUBPAGE_WRITES)
-		return 512;
-	else
-		return RSXX_HW_BLK_SIZE;
-}
-
 int rsxx_attach_dev(struct rsxx_cardinfo *card)
 int rsxx_attach_dev(struct rsxx_cardinfo *card)
 {
 {
 	mutex_lock(&card->dev_lock);
 	mutex_lock(&card->dev_lock);
@@ -305,7 +295,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
-	blk_size = rsxx_get_logical_block_size(card);
+	blk_size = card->config.data.block_size;
 
 
 	blk_queue_make_request(card->queue, rsxx_make_request);
 	blk_queue_make_request(card->queue, rsxx_make_request);
 	blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
 	blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
@@ -347,6 +337,7 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card)
 	card->gendisk = NULL;
 	card->gendisk = NULL;
 
 
 	blk_cleanup_queue(card->queue);
 	blk_cleanup_queue(card->queue);
+	card->queue->queuedata = NULL;
 	unregister_blkdev(card->major, DRIVER_NAME);
 	unregister_blkdev(card->major, DRIVER_NAME);
 }
 }
 
 

+ 96 - 89
drivers/block/rsxx/dma.c

@@ -245,6 +245,22 @@ static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
 	kmem_cache_free(rsxx_dma_pool, dma);
 	kmem_cache_free(rsxx_dma_pool, dma);
 }
 }
 
 
+int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
+			   struct list_head *q)
+{
+	struct rsxx_dma *dma;
+	struct rsxx_dma *tmp;
+	int cnt = 0;
+
+	list_for_each_entry_safe(dma, tmp, q, list) {
+		list_del(&dma->list);
+		rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
+		cnt++;
+	}
+
+	return cnt;
+}
+
 static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
 static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
 				 struct rsxx_dma *dma)
 				 struct rsxx_dma *dma)
 {
 {
@@ -252,9 +268,10 @@ static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
 	 * Requeued DMAs go to the front of the queue so they are issued
 	 * Requeued DMAs go to the front of the queue so they are issued
 	 * first.
 	 * first.
 	 */
 	 */
-	spin_lock(&ctrl->queue_lock);
+	spin_lock_bh(&ctrl->queue_lock);
+	ctrl->stats.sw_q_depth++;
 	list_add(&dma->list, &ctrl->queue);
 	list_add(&dma->list, &ctrl->queue);
-	spin_unlock(&ctrl->queue_lock);
+	spin_unlock_bh(&ctrl->queue_lock);
 }
 }
 
 
 static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
 static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
@@ -329,6 +346,7 @@ static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
 static void dma_engine_stalled(unsigned long data)
 static void dma_engine_stalled(unsigned long data)
 {
 {
 	struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data;
 	struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data;
+	int cnt;
 
 
 	if (atomic_read(&ctrl->stats.hw_q_depth) == 0 ||
 	if (atomic_read(&ctrl->stats.hw_q_depth) == 0 ||
 	    unlikely(ctrl->card->eeh_state))
 	    unlikely(ctrl->card->eeh_state))
@@ -349,18 +367,28 @@ static void dma_engine_stalled(unsigned long data)
 			"DMA channel %d has stalled, faulting interface.\n",
 			"DMA channel %d has stalled, faulting interface.\n",
 			ctrl->id);
 			ctrl->id);
 		ctrl->card->dma_fault = 1;
 		ctrl->card->dma_fault = 1;
+
+		/* Clean up the DMA queue */
+		spin_lock(&ctrl->queue_lock);
+		cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
+		spin_unlock(&ctrl->queue_lock);
+
+		cnt += rsxx_dma_cancel(ctrl);
+
+		if (cnt)
+			dev_info(CARD_TO_DEV(ctrl->card),
+				"Freed %d queued DMAs on channel %d\n",
+				cnt, ctrl->id);
 	}
 	}
 }
 }
 
 
-static void rsxx_issue_dmas(struct work_struct *work)
+static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
 {
 {
-	struct rsxx_dma_ctrl *ctrl;
 	struct rsxx_dma *dma;
 	struct rsxx_dma *dma;
 	int tag;
 	int tag;
 	int cmds_pending = 0;
 	int cmds_pending = 0;
 	struct hw_cmd *hw_cmd_buf;
 	struct hw_cmd *hw_cmd_buf;
 
 
-	ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
 	hw_cmd_buf = ctrl->cmd.buf;
 	hw_cmd_buf = ctrl->cmd.buf;
 
 
 	if (unlikely(ctrl->card->halt) ||
 	if (unlikely(ctrl->card->halt) ||
@@ -368,22 +396,22 @@ static void rsxx_issue_dmas(struct work_struct *work)
 		return;
 		return;
 
 
 	while (1) {
 	while (1) {
-		spin_lock(&ctrl->queue_lock);
+		spin_lock_bh(&ctrl->queue_lock);
 		if (list_empty(&ctrl->queue)) {
 		if (list_empty(&ctrl->queue)) {
-			spin_unlock(&ctrl->queue_lock);
+			spin_unlock_bh(&ctrl->queue_lock);
 			break;
 			break;
 		}
 		}
-		spin_unlock(&ctrl->queue_lock);
+		spin_unlock_bh(&ctrl->queue_lock);
 
 
 		tag = pop_tracker(ctrl->trackers);
 		tag = pop_tracker(ctrl->trackers);
 		if (tag == -1)
 		if (tag == -1)
 			break;
 			break;
 
 
-		spin_lock(&ctrl->queue_lock);
+		spin_lock_bh(&ctrl->queue_lock);
 		dma = list_entry(ctrl->queue.next, struct rsxx_dma, list);
 		dma = list_entry(ctrl->queue.next, struct rsxx_dma, list);
 		list_del(&dma->list);
 		list_del(&dma->list);
 		ctrl->stats.sw_q_depth--;
 		ctrl->stats.sw_q_depth--;
-		spin_unlock(&ctrl->queue_lock);
+		spin_unlock_bh(&ctrl->queue_lock);
 
 
 		/*
 		/*
 		 * This will catch any DMAs that slipped in right before the
 		 * This will catch any DMAs that slipped in right before the
@@ -440,9 +468,8 @@ static void rsxx_issue_dmas(struct work_struct *work)
 	}
 	}
 }
 }
 
 
-static void rsxx_dma_done(struct work_struct *work)
+static void rsxx_dma_done(struct rsxx_dma_ctrl *ctrl)
 {
 {
-	struct rsxx_dma_ctrl *ctrl;
 	struct rsxx_dma *dma;
 	struct rsxx_dma *dma;
 	unsigned long flags;
 	unsigned long flags;
 	u16 count;
 	u16 count;
@@ -450,7 +477,6 @@ static void rsxx_dma_done(struct work_struct *work)
 	u8 tag;
 	u8 tag;
 	struct hw_status *hw_st_buf;
 	struct hw_status *hw_st_buf;
 
 
-	ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
 	hw_st_buf = ctrl->status.buf;
 	hw_st_buf = ctrl->status.buf;
 
 
 	if (unlikely(ctrl->card->halt) ||
 	if (unlikely(ctrl->card->halt) ||
@@ -520,33 +546,32 @@ static void rsxx_dma_done(struct work_struct *work)
 	rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id));
 	rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id));
 	spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
 	spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
 
 
-	spin_lock(&ctrl->queue_lock);
+	spin_lock_bh(&ctrl->queue_lock);
 	if (ctrl->stats.sw_q_depth)
 	if (ctrl->stats.sw_q_depth)
 		queue_work(ctrl->issue_wq, &ctrl->issue_dma_work);
 		queue_work(ctrl->issue_wq, &ctrl->issue_dma_work);
-	spin_unlock(&ctrl->queue_lock);
+	spin_unlock_bh(&ctrl->queue_lock);
 }
 }
 
 
-static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card,
-				      struct list_head *q)
+static void rsxx_schedule_issue(struct work_struct *work)
 {
 {
-	struct rsxx_dma *dma;
-	struct rsxx_dma *tmp;
-	int cnt = 0;
+	struct rsxx_dma_ctrl *ctrl;
 
 
-	list_for_each_entry_safe(dma, tmp, q, list) {
-		list_del(&dma->list);
+	ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
 
 
-		if (dma->dma_addr)
-			pci_unmap_page(card->dev, dma->dma_addr,
-				       get_dma_size(dma),
-				       (dma->cmd == HW_CMD_BLK_WRITE) ?
-				       PCI_DMA_TODEVICE :
-				       PCI_DMA_FROMDEVICE);
-		kmem_cache_free(rsxx_dma_pool, dma);
-		cnt++;
-	}
+	mutex_lock(&ctrl->work_lock);
+	rsxx_issue_dmas(ctrl);
+	mutex_unlock(&ctrl->work_lock);
+}
 
 
-	return cnt;
+static void rsxx_schedule_done(struct work_struct *work)
+{
+	struct rsxx_dma_ctrl *ctrl;
+
+	ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
+
+	mutex_lock(&ctrl->work_lock);
+	rsxx_dma_done(ctrl);
+	mutex_unlock(&ctrl->work_lock);
 }
 }
 
 
 static int rsxx_queue_discard(struct rsxx_cardinfo *card,
 static int rsxx_queue_discard(struct rsxx_cardinfo *card,
@@ -698,10 +723,10 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 
 
 	for (i = 0; i < card->n_targets; i++) {
 	for (i = 0; i < card->n_targets; i++) {
 		if (!list_empty(&dma_list[i])) {
 		if (!list_empty(&dma_list[i])) {
-			spin_lock(&card->ctrl[i].queue_lock);
+			spin_lock_bh(&card->ctrl[i].queue_lock);
 			card->ctrl[i].stats.sw_q_depth += dma_cnt[i];
 			card->ctrl[i].stats.sw_q_depth += dma_cnt[i];
 			list_splice_tail(&dma_list[i], &card->ctrl[i].queue);
 			list_splice_tail(&dma_list[i], &card->ctrl[i].queue);
-			spin_unlock(&card->ctrl[i].queue_lock);
+			spin_unlock_bh(&card->ctrl[i].queue_lock);
 
 
 			queue_work(card->ctrl[i].issue_wq,
 			queue_work(card->ctrl[i].issue_wq,
 				   &card->ctrl[i].issue_dma_work);
 				   &card->ctrl[i].issue_dma_work);
@@ -711,8 +736,11 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 	return 0;
 	return 0;
 
 
 bvec_err:
 bvec_err:
-	for (i = 0; i < card->n_targets; i++)
-		rsxx_cleanup_dma_queue(card, &dma_list[i]);
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock_bh(&card->ctrl[i].queue_lock);
+		rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i]);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
+	}
 
 
 	return st;
 	return st;
 }
 }
@@ -780,6 +808,7 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
 	spin_lock_init(&ctrl->trackers->lock);
 	spin_lock_init(&ctrl->trackers->lock);
 
 
 	spin_lock_init(&ctrl->queue_lock);
 	spin_lock_init(&ctrl->queue_lock);
+	mutex_init(&ctrl->work_lock);
 	INIT_LIST_HEAD(&ctrl->queue);
 	INIT_LIST_HEAD(&ctrl->queue);
 
 
 	setup_timer(&ctrl->activity_timer, dma_engine_stalled,
 	setup_timer(&ctrl->activity_timer, dma_engine_stalled,
@@ -793,8 +822,8 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
 	if (!ctrl->done_wq)
 	if (!ctrl->done_wq)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas);
-	INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done);
+	INIT_WORK(&ctrl->issue_dma_work, rsxx_schedule_issue);
+	INIT_WORK(&ctrl->dma_done_work, rsxx_schedule_done);
 
 
 	st = rsxx_hw_buffers_init(dev, ctrl);
 	st = rsxx_hw_buffers_init(dev, ctrl);
 	if (st)
 	if (st)
@@ -918,13 +947,30 @@ int rsxx_dma_setup(struct rsxx_cardinfo *card)
 	return st;
 	return st;
 }
 }
 
 
+int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl)
+{
+	struct rsxx_dma *dma;
+	int i;
+	int cnt = 0;
+
+	/* Clean up issued DMAs */
+	for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) {
+		dma = get_tracker_dma(ctrl->trackers, i);
+		if (dma) {
+			atomic_dec(&ctrl->stats.hw_q_depth);
+			rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
+			push_tracker(ctrl->trackers, i);
+			cnt++;
+		}
+	}
+
+	return cnt;
+}
 
 
 void rsxx_dma_destroy(struct rsxx_cardinfo *card)
 void rsxx_dma_destroy(struct rsxx_cardinfo *card)
 {
 {
 	struct rsxx_dma_ctrl *ctrl;
 	struct rsxx_dma_ctrl *ctrl;
-	struct rsxx_dma *dma;
-	int i, j;
-	int cnt = 0;
+	int i;
 
 
 	for (i = 0; i < card->n_targets; i++) {
 	for (i = 0; i < card->n_targets; i++) {
 		ctrl = &card->ctrl[i];
 		ctrl = &card->ctrl[i];
@@ -943,33 +989,11 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)
 			del_timer_sync(&ctrl->activity_timer);
 			del_timer_sync(&ctrl->activity_timer);
 
 
 		/* Clean up the DMA queue */
 		/* Clean up the DMA queue */
-		spin_lock(&ctrl->queue_lock);
-		cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue);
-		spin_unlock(&ctrl->queue_lock);
-
-		if (cnt)
-			dev_info(CARD_TO_DEV(card),
-				"Freed %d queued DMAs on channel %d\n",
-				cnt, i);
-
-		/* Clean up issued DMAs */
-		for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) {
-			dma = get_tracker_dma(ctrl->trackers, j);
-			if (dma) {
-				pci_unmap_page(card->dev, dma->dma_addr,
-					       get_dma_size(dma),
-					       (dma->cmd == HW_CMD_BLK_WRITE) ?
-					       PCI_DMA_TODEVICE :
-					       PCI_DMA_FROMDEVICE);
-				kmem_cache_free(rsxx_dma_pool, dma);
-				cnt++;
-			}
-		}
+		spin_lock_bh(&ctrl->queue_lock);
+		rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
+		spin_unlock_bh(&ctrl->queue_lock);
 
 
-		if (cnt)
-			dev_info(CARD_TO_DEV(card),
-				"Freed %d pending DMAs on channel %d\n",
-				cnt, i);
+		rsxx_dma_cancel(ctrl);
 
 
 		vfree(ctrl->trackers);
 		vfree(ctrl->trackers);
 
 
@@ -1013,7 +1037,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 			cnt++;
 			cnt++;
 		}
 		}
 
 
-		spin_lock(&card->ctrl[i].queue_lock);
+		spin_lock_bh(&card->ctrl[i].queue_lock);
 		list_splice(&issued_dmas[i], &card->ctrl[i].queue);
 		list_splice(&issued_dmas[i], &card->ctrl[i].queue);
 
 
 		atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
 		atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
@@ -1028,7 +1052,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 					       PCI_DMA_TODEVICE :
 					       PCI_DMA_TODEVICE :
 					       PCI_DMA_FROMDEVICE);
 					       PCI_DMA_FROMDEVICE);
 		}
 		}
-		spin_unlock(&card->ctrl[i].queue_lock);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
 	}
 	}
 
 
 	kfree(issued_dmas);
 	kfree(issued_dmas);
@@ -1036,30 +1060,13 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 	return 0;
 	return 0;
 }
 }
 
 
-void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card)
-{
-	struct rsxx_dma *dma;
-	struct rsxx_dma *tmp;
-	int i;
-
-	for (i = 0; i < card->n_targets; i++) {
-		spin_lock(&card->ctrl[i].queue_lock);
-		list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) {
-			list_del(&dma->list);
-
-			rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED);
-		}
-		spin_unlock(&card->ctrl[i].queue_lock);
-	}
-}
-
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
 {
 {
 	struct rsxx_dma *dma;
 	struct rsxx_dma *dma;
 	int i;
 	int i;
 
 
 	for (i = 0; i < card->n_targets; i++) {
 	for (i = 0; i < card->n_targets; i++) {
-		spin_lock(&card->ctrl[i].queue_lock);
+		spin_lock_bh(&card->ctrl[i].queue_lock);
 		list_for_each_entry(dma, &card->ctrl[i].queue, list) {
 		list_for_each_entry(dma, &card->ctrl[i].queue, list) {
 			dma->dma_addr = pci_map_page(card->dev, dma->page,
 			dma->dma_addr = pci_map_page(card->dev, dma->page,
 					dma->pg_off, get_dma_size(dma),
 					dma->pg_off, get_dma_size(dma),
@@ -1067,12 +1074,12 @@ int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
 					PCI_DMA_TODEVICE :
 					PCI_DMA_TODEVICE :
 					PCI_DMA_FROMDEVICE);
 					PCI_DMA_FROMDEVICE);
 			if (!dma->dma_addr) {
 			if (!dma->dma_addr) {
-				spin_unlock(&card->ctrl[i].queue_lock);
+				spin_unlock_bh(&card->ctrl[i].queue_lock);
 				kmem_cache_free(rsxx_dma_pool, dma);
 				kmem_cache_free(rsxx_dma_pool, dma);
 				return -ENOMEM;
 				return -ENOMEM;
 			}
 			}
 		}
 		}
-		spin_unlock(&card->ctrl[i].queue_lock);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
 	}
 	}
 
 
 	return 0;
 	return 0;

+ 9 - 1
drivers/block/rsxx/rsxx_priv.h

@@ -39,6 +39,7 @@
 #include <linux/vmalloc.h>
 #include <linux/vmalloc.h>
 #include <linux/timer.h>
 #include <linux/timer.h>
 #include <linux/ioctl.h>
 #include <linux/ioctl.h>
+#include <linux/delay.h>
 
 
 #include "rsxx.h"
 #include "rsxx.h"
 #include "rsxx_cfg.h"
 #include "rsxx_cfg.h"
@@ -114,6 +115,7 @@ struct rsxx_dma_ctrl {
 	struct timer_list		activity_timer;
 	struct timer_list		activity_timer;
 	struct dma_tracker_list		*trackers;
 	struct dma_tracker_list		*trackers;
 	struct rsxx_dma_stats		stats;
 	struct rsxx_dma_stats		stats;
+	struct mutex			work_lock;
 };
 };
 
 
 struct rsxx_cardinfo {
 struct rsxx_cardinfo {
@@ -134,6 +136,7 @@ struct rsxx_cardinfo {
 		spinlock_t		lock;
 		spinlock_t		lock;
 		bool			active;
 		bool			active;
 		struct creg_cmd		*active_cmd;
 		struct creg_cmd		*active_cmd;
+		struct workqueue_struct	*creg_wq;
 		struct work_struct	done_work;
 		struct work_struct	done_work;
 		struct list_head	queue;
 		struct list_head	queue;
 		unsigned int		q_depth;
 		unsigned int		q_depth;
@@ -154,6 +157,7 @@ struct rsxx_cardinfo {
 		int buf_len;
 		int buf_len;
 	} log;
 	} log;
 
 
+	struct workqueue_struct	*event_wq;
 	struct work_struct	event_work;
 	struct work_struct	event_work;
 	unsigned int		state;
 	unsigned int		state;
 	u64			size8;
 	u64			size8;
@@ -181,6 +185,8 @@ struct rsxx_cardinfo {
 
 
 	int			n_targets;
 	int			n_targets;
 	struct rsxx_dma_ctrl	*ctrl;
 	struct rsxx_dma_ctrl	*ctrl;
+
+	struct dentry		*debugfs_dir;
 };
 };
 
 
 enum rsxx_pci_regmap {
 enum rsxx_pci_regmap {
@@ -283,6 +289,7 @@ enum rsxx_creg_addr {
 	CREG_ADD_CAPABILITIES		= 0x80001050,
 	CREG_ADD_CAPABILITIES		= 0x80001050,
 	CREG_ADD_LOG			= 0x80002000,
 	CREG_ADD_LOG			= 0x80002000,
 	CREG_ADD_NUM_TARGETS		= 0x80003000,
 	CREG_ADD_NUM_TARGETS		= 0x80003000,
+	CREG_ADD_CRAM			= 0xA0000000,
 	CREG_ADD_CONFIG			= 0xB0000000,
 	CREG_ADD_CONFIG			= 0xB0000000,
 };
 };
 
 
@@ -372,6 +379,8 @@ typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card,
 int rsxx_dma_setup(struct rsxx_cardinfo *card);
 int rsxx_dma_setup(struct rsxx_cardinfo *card);
 void rsxx_dma_destroy(struct rsxx_cardinfo *card);
 void rsxx_dma_destroy(struct rsxx_cardinfo *card);
 int rsxx_dma_init(void);
 int rsxx_dma_init(void);
+int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, struct list_head *q);
+int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
 void rsxx_dma_cleanup(void);
 void rsxx_dma_cleanup(void);
 void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
 void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
 int rsxx_dma_configure(struct rsxx_cardinfo *card);
 int rsxx_dma_configure(struct rsxx_cardinfo *card);
@@ -382,7 +391,6 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   void *cb_data);
 			   void *cb_data);
 int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl);
 int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl);
 int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
 int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
-void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card);
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);
 
 
 /***** cregs.c *****/
 /***** cregs.c *****/

+ 552 - 320
drivers/block/xen-blkback/blkback.c

@@ -50,110 +50,118 @@
 #include "common.h"
 #include "common.h"
 
 
 /*
 /*
- * These are rather arbitrary. They are fairly large because adjacent requests
- * pulled from a communication ring are quite likely to end up being part of
- * the same scatter/gather request at the disc.
+ * Maximum number of unused free pages to keep in the internal buffer.
+ * Setting this to a value too low will reduce memory used in each backend,
+ * but can have a performance penalty.
  *
  *
- * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
- *
- * This will increase the chances of being able to write whole tracks.
- * 64 should be enough to keep us competitive with Linux.
+ * A sane value is xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST, but can
+ * be set to a lower value that might degrade performance on some intensive
+ * IO workloads.
  */
  */
-static int xen_blkif_reqs = 64;
-module_param_named(reqs, xen_blkif_reqs, int, 0);
-MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
 
 
-/* Run-time switchable: /sys/module/blkback/parameters/ */
-static unsigned int log_stats;
-module_param(log_stats, int, 0644);
+static int xen_blkif_max_buffer_pages = 1024;
+module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644);
+MODULE_PARM_DESC(max_buffer_pages,
+"Maximum number of free pages to keep in each block backend buffer");
 
 
 /*
 /*
- * Each outstanding request that we've passed to the lower device layers has a
- * 'pending_req' allocated to it. Each buffer_head that completes decrements
- * the pendcnt towards zero. When it hits zero, the specified domain has a
- * response queued for it, with the saved 'id' passed back.
+ * Maximum number of grants to map persistently in blkback. For maximum
+ * performance this should be the total numbers of grants that can be used
+ * to fill the ring, but since this might become too high, specially with
+ * the use of indirect descriptors, we set it to a value that provides good
+ * performance without using too much memory.
+ *
+ * When the list of persistent grants is full we clean it up using a LRU
+ * algorithm.
  */
  */
-struct pending_req {
-	struct xen_blkif	*blkif;
-	u64			id;
-	int			nr_pages;
-	atomic_t		pendcnt;
-	unsigned short		operation;
-	int			status;
-	struct list_head	free_list;
-	DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-};
 
 
-#define BLKBACK_INVALID_HANDLE (~0)
+static int xen_blkif_max_pgrants = 1056;
+module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
+MODULE_PARM_DESC(max_persistent_grants,
+                 "Maximum number of grants to map persistently");
 
 
-struct xen_blkbk {
-	struct pending_req	*pending_reqs;
-	/* List of all 'pending_req' available */
-	struct list_head	pending_free;
-	/* And its spinlock. */
-	spinlock_t		pending_free_lock;
-	wait_queue_head_t	pending_free_wq;
-	/* The list of all pages that are available. */
-	struct page		**pending_pages;
-	/* And the grant handles that are available. */
-	grant_handle_t		*pending_grant_handles;
-};
-
-static struct xen_blkbk *blkbk;
+/*
+ * The LRU mechanism to clean the lists of persistent grants needs to
+ * be executed periodically. The time interval between consecutive executions
+ * of the purge mechanism is set in ms.
+ */
+#define LRU_INTERVAL 100
 
 
 /*
 /*
- * Maximum number of grant pages that can be mapped in blkback.
- * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
- * pages that blkback will persistently map.
- * Currently, this is:
- * RING_SIZE = 32 (for all known ring types)
- * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
- * sizeof(struct persistent_gnt) = 48
- * So the maximum memory used to store the grants is:
- * 32 * 11 * 48 = 16896 bytes
+ * When the persistent grants list is full we will remove unused grants
+ * from the list. The percent number of grants to be removed at each LRU
+ * execution.
  */
  */
-static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
+#define LRU_PERCENT_CLEAN 5
+
+/* Run-time switchable: /sys/module/blkback/parameters/ */
+static unsigned int log_stats;
+module_param(log_stats, int, 0644);
+
+#define BLKBACK_INVALID_HANDLE (~0)
+
+/* Number of free pages to remove on each call to free_xenballooned_pages */
+#define NUM_BATCH_FREE_PAGES 10
+
+static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
 {
 {
-	switch (protocol) {
-	case BLKIF_PROTOCOL_NATIVE:
-		return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
-			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	case BLKIF_PROTOCOL_X86_32:
-		return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
-			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	case BLKIF_PROTOCOL_X86_64:
-		return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
-			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	default:
-		BUG();
+	unsigned long flags;
+
+	spin_lock_irqsave(&blkif->free_pages_lock, flags);
+	if (list_empty(&blkif->free_pages)) {
+		BUG_ON(blkif->free_pages_num != 0);
+		spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+		return alloc_xenballooned_pages(1, page, false);
 	}
 	}
+	BUG_ON(blkif->free_pages_num == 0);
+	page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
+	list_del(&page[0]->lru);
+	blkif->free_pages_num--;
+	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+
 	return 0;
 	return 0;
 }
 }
 
 
-
-/*
- * Little helpful macro to figure out the index and virtual address of the
- * pending_pages[..]. For each 'pending_req' we have have up to
- * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
- * 10 and would index in the pending_pages[..].
- */
-static inline int vaddr_pagenr(struct pending_req *req, int seg)
+static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
+                                  int num)
 {
 {
-	return (req - blkbk->pending_reqs) *
-		BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
-}
+	unsigned long flags;
+	int i;
 
 
-#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
+	spin_lock_irqsave(&blkif->free_pages_lock, flags);
+	for (i = 0; i < num; i++)
+		list_add(&page[i]->lru, &blkif->free_pages);
+	blkif->free_pages_num += num;
+	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+}
 
 
-static inline unsigned long vaddr(struct pending_req *req, int seg)
+static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
 {
 {
-	unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
-	return (unsigned long)pfn_to_kaddr(pfn);
-}
+	/* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
+	struct page *page[NUM_BATCH_FREE_PAGES];
+	unsigned int num_pages = 0;
+	unsigned long flags;
 
 
-#define pending_handle(_req, _seg) \
-	(blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
+	spin_lock_irqsave(&blkif->free_pages_lock, flags);
+	while (blkif->free_pages_num > num) {
+		BUG_ON(list_empty(&blkif->free_pages));
+		page[num_pages] = list_first_entry(&blkif->free_pages,
+		                                   struct page, lru);
+		list_del(&page[num_pages]->lru);
+		blkif->free_pages_num--;
+		if (++num_pages == NUM_BATCH_FREE_PAGES) {
+			spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+			free_xenballooned_pages(num_pages, page);
+			spin_lock_irqsave(&blkif->free_pages_lock, flags);
+			num_pages = 0;
+		}
+	}
+	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+	if (num_pages != 0)
+		free_xenballooned_pages(num_pages, page);
+}
 
 
+#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
 
 
 static int do_block_io_op(struct xen_blkif *blkif);
 static int do_block_io_op(struct xen_blkif *blkif);
 static int dispatch_rw_block_io(struct xen_blkif *blkif,
 static int dispatch_rw_block_io(struct xen_blkif *blkif,
@@ -170,13 +178,29 @@ static void make_response(struct xen_blkif *blkif, u64 id,
 	     (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
 	     (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
 
 
 
 
-static void add_persistent_gnt(struct rb_root *root,
+/*
+ * We don't need locking around the persistent grant helpers
+ * because blkback uses a single-thread for each backed, so we
+ * can be sure that this functions will never be called recursively.
+ *
+ * The only exception to that is put_persistent_grant, that can be called
+ * from interrupt context (by xen_blkbk_unmap), so we have to use atomic
+ * bit operations to modify the flags of a persistent grant and to count
+ * the number of used grants.
+ */
+static int add_persistent_gnt(struct xen_blkif *blkif,
 			       struct persistent_gnt *persistent_gnt)
 			       struct persistent_gnt *persistent_gnt)
 {
 {
-	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct rb_node **new = NULL, *parent = NULL;
 	struct persistent_gnt *this;
 	struct persistent_gnt *this;
 
 
+	if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
+		if (!blkif->vbd.overflow_max_grants)
+			blkif->vbd.overflow_max_grants = 1;
+		return -EBUSY;
+	}
 	/* Figure out where to put new node */
 	/* Figure out where to put new node */
+	new = &blkif->persistent_gnts.rb_node;
 	while (*new) {
 	while (*new) {
 		this = container_of(*new, struct persistent_gnt, node);
 		this = container_of(*new, struct persistent_gnt, node);
 
 
@@ -186,22 +210,28 @@ static void add_persistent_gnt(struct rb_root *root,
 		else if (persistent_gnt->gnt > this->gnt)
 		else if (persistent_gnt->gnt > this->gnt)
 			new = &((*new)->rb_right);
 			new = &((*new)->rb_right);
 		else {
 		else {
-			pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
-			BUG();
+			pr_alert_ratelimited(DRV_PFX " trying to add a gref that's already in the tree\n");
+			return -EINVAL;
 		}
 		}
 	}
 	}
 
 
+	bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE);
+	set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
 	/* Add new node and rebalance tree. */
 	/* Add new node and rebalance tree. */
 	rb_link_node(&(persistent_gnt->node), parent, new);
 	rb_link_node(&(persistent_gnt->node), parent, new);
-	rb_insert_color(&(persistent_gnt->node), root);
+	rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
+	blkif->persistent_gnt_c++;
+	atomic_inc(&blkif->persistent_gnt_in_use);
+	return 0;
 }
 }
 
 
-static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
+static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
 						 grant_ref_t gref)
 						 grant_ref_t gref)
 {
 {
 	struct persistent_gnt *data;
 	struct persistent_gnt *data;
-	struct rb_node *node = root->rb_node;
+	struct rb_node *node = NULL;
 
 
+	node = blkif->persistent_gnts.rb_node;
 	while (node) {
 	while (node) {
 		data = container_of(node, struct persistent_gnt, node);
 		data = container_of(node, struct persistent_gnt, node);
 
 
@@ -209,13 +239,31 @@ static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
 			node = node->rb_left;
 			node = node->rb_left;
 		else if (gref > data->gnt)
 		else if (gref > data->gnt)
 			node = node->rb_right;
 			node = node->rb_right;
-		else
+		else {
+			if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
+				pr_alert_ratelimited(DRV_PFX " requesting a grant already in use\n");
+				return NULL;
+			}
+			set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
+			atomic_inc(&blkif->persistent_gnt_in_use);
 			return data;
 			return data;
+		}
 	}
 	}
 	return NULL;
 	return NULL;
 }
 }
 
 
-static void free_persistent_gnts(struct rb_root *root, unsigned int num)
+static void put_persistent_gnt(struct xen_blkif *blkif,
+                               struct persistent_gnt *persistent_gnt)
+{
+	if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+	          pr_alert_ratelimited(DRV_PFX " freeing a grant already unused");
+	set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+	clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+	atomic_dec(&blkif->persistent_gnt_in_use);
+}
+
+static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
+                                 unsigned int num)
 {
 {
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -240,7 +288,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
 			ret = gnttab_unmap_refs(unmap, NULL, pages,
 			ret = gnttab_unmap_refs(unmap, NULL, pages,
 				segs_to_unmap);
 				segs_to_unmap);
 			BUG_ON(ret);
 			BUG_ON(ret);
-			free_xenballooned_pages(segs_to_unmap, pages);
+			put_free_pages(blkif, pages, segs_to_unmap);
 			segs_to_unmap = 0;
 			segs_to_unmap = 0;
 		}
 		}
 
 
@@ -251,21 +299,148 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
 	BUG_ON(num != 0);
 	BUG_ON(num != 0);
 }
 }
 
 
+static void unmap_purged_grants(struct work_struct *work)
+{
+	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct persistent_gnt *persistent_gnt;
+	int ret, segs_to_unmap = 0;
+	struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+
+	while(!list_empty(&blkif->persistent_purge_list)) {
+		persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
+		                                  struct persistent_gnt,
+		                                  remove_node);
+		list_del(&persistent_gnt->remove_node);
+
+		gnttab_set_unmap_op(&unmap[segs_to_unmap],
+			vaddr(persistent_gnt->page),
+			GNTMAP_host_map,
+			persistent_gnt->handle);
+
+		pages[segs_to_unmap] = persistent_gnt->page;
+
+		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			ret = gnttab_unmap_refs(unmap, NULL, pages,
+				segs_to_unmap);
+			BUG_ON(ret);
+			put_free_pages(blkif, pages, segs_to_unmap);
+			segs_to_unmap = 0;
+		}
+		kfree(persistent_gnt);
+	}
+	if (segs_to_unmap > 0) {
+		ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
+		BUG_ON(ret);
+		put_free_pages(blkif, pages, segs_to_unmap);
+	}
+}
+
+static void purge_persistent_gnt(struct xen_blkif *blkif)
+{
+	struct persistent_gnt *persistent_gnt;
+	struct rb_node *n;
+	unsigned int num_clean, total;
+	bool scan_used = false, clean_used = false;
+	struct rb_root *root;
+
+	if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
+	    (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
+	    !blkif->vbd.overflow_max_grants)) {
+		return;
+	}
+
+	if (work_pending(&blkif->persistent_purge_work)) {
+		pr_alert_ratelimited(DRV_PFX "Scheduled work from previous purge is still pending, cannot purge list\n");
+		return;
+	}
+
+	num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
+	num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
+	num_clean = min(blkif->persistent_gnt_c, num_clean);
+	if ((num_clean == 0) ||
+	    (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
+		return;
+
+	/*
+	 * At this point, we can assure that there will be no calls
+         * to get_persistent_grant (because we are executing this code from
+         * xen_blkif_schedule), there can only be calls to put_persistent_gnt,
+         * which means that the number of currently used grants will go down,
+         * but never up, so we will always be able to remove the requested
+         * number of grants.
+	 */
+
+	total = num_clean;
+
+	pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean);
+
+	INIT_LIST_HEAD(&blkif->persistent_purge_list);
+	root = &blkif->persistent_gnts;
+purge_list:
+	foreach_grant_safe(persistent_gnt, n, root, node) {
+		BUG_ON(persistent_gnt->handle ==
+			BLKBACK_INVALID_HANDLE);
+
+		if (clean_used) {
+			clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+			continue;
+		}
+
+		if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+			continue;
+		if (!scan_used &&
+		    (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags)))
+			continue;
+
+		rb_erase(&persistent_gnt->node, root);
+		list_add(&persistent_gnt->remove_node,
+		         &blkif->persistent_purge_list);
+		if (--num_clean == 0)
+			goto finished;
+	}
+	/*
+	 * If we get here it means we also need to start cleaning
+	 * grants that were used since last purge in order to cope
+	 * with the requested num
+	 */
+	if (!scan_used && !clean_used) {
+		pr_debug(DRV_PFX "Still missing %u purged frames\n", num_clean);
+		scan_used = true;
+		goto purge_list;
+	}
+finished:
+	if (!clean_used) {
+		pr_debug(DRV_PFX "Finished scanning for grants to clean, removing used flag\n");
+		clean_used = true;
+		goto purge_list;
+	}
+
+	blkif->persistent_gnt_c -= (total - num_clean);
+	blkif->vbd.overflow_max_grants = 0;
+
+	/* We can defer this work */
+	INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants);
+	schedule_work(&blkif->persistent_purge_work);
+	pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total);
+	return;
+}
+
 /*
 /*
  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
  */
  */
-static struct pending_req *alloc_req(void)
+static struct pending_req *alloc_req(struct xen_blkif *blkif)
 {
 {
 	struct pending_req *req = NULL;
 	struct pending_req *req = NULL;
 	unsigned long flags;
 	unsigned long flags;
 
 
-	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
-	if (!list_empty(&blkbk->pending_free)) {
-		req = list_entry(blkbk->pending_free.next, struct pending_req,
+	spin_lock_irqsave(&blkif->pending_free_lock, flags);
+	if (!list_empty(&blkif->pending_free)) {
+		req = list_entry(blkif->pending_free.next, struct pending_req,
 				 free_list);
 				 free_list);
 		list_del(&req->free_list);
 		list_del(&req->free_list);
 	}
 	}
-	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+	spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
 	return req;
 	return req;
 }
 }
 
 
@@ -273,17 +448,17 @@ static struct pending_req *alloc_req(void)
  * Return the 'pending_req' structure back to the freepool. We also
  * Return the 'pending_req' structure back to the freepool. We also
  * wake up the thread if it was waiting for a free page.
  * wake up the thread if it was waiting for a free page.
  */
  */
-static void free_req(struct pending_req *req)
+static void free_req(struct xen_blkif *blkif, struct pending_req *req)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	int was_empty;
 	int was_empty;
 
 
-	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
-	was_empty = list_empty(&blkbk->pending_free);
-	list_add(&req->free_list, &blkbk->pending_free);
-	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+	spin_lock_irqsave(&blkif->pending_free_lock, flags);
+	was_empty = list_empty(&blkif->pending_free);
+	list_add(&req->free_list, &blkif->pending_free);
+	spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
 	if (was_empty)
 	if (was_empty)
-		wake_up(&blkbk->pending_free_wq);
+		wake_up(&blkif->pending_free_wq);
 }
 }
 
 
 /*
 /*
@@ -382,10 +557,12 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
 static void print_stats(struct xen_blkif *blkif)
 static void print_stats(struct xen_blkif *blkif)
 {
 {
 	pr_info("xen-blkback (%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
 	pr_info("xen-blkback (%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
-		 "  |  ds %4llu\n",
+		 "  |  ds %4llu | pg: %4u/%4d\n",
 		 current->comm, blkif->st_oo_req,
 		 current->comm, blkif->st_oo_req,
 		 blkif->st_rd_req, blkif->st_wr_req,
 		 blkif->st_rd_req, blkif->st_wr_req,
-		 blkif->st_f_req, blkif->st_ds_req);
+		 blkif->st_f_req, blkif->st_ds_req,
+		 blkif->persistent_gnt_c,
+		 xen_blkif_max_pgrants);
 	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
 	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
 	blkif->st_rd_req = 0;
 	blkif->st_rd_req = 0;
 	blkif->st_wr_req = 0;
 	blkif->st_wr_req = 0;
@@ -397,6 +574,8 @@ int xen_blkif_schedule(void *arg)
 {
 {
 	struct xen_blkif *blkif = arg;
 	struct xen_blkif *blkif = arg;
 	struct xen_vbd *vbd = &blkif->vbd;
 	struct xen_vbd *vbd = &blkif->vbd;
+	unsigned long timeout;
+	int ret;
 
 
 	xen_blkif_get(blkif);
 	xen_blkif_get(blkif);
 
 
@@ -406,27 +585,52 @@ int xen_blkif_schedule(void *arg)
 		if (unlikely(vbd->size != vbd_sz(vbd)))
 		if (unlikely(vbd->size != vbd_sz(vbd)))
 			xen_vbd_resize(blkif);
 			xen_vbd_resize(blkif);
 
 
-		wait_event_interruptible(
+		timeout = msecs_to_jiffies(LRU_INTERVAL);
+
+		timeout = wait_event_interruptible_timeout(
 			blkif->wq,
 			blkif->wq,
-			blkif->waiting_reqs || kthread_should_stop());
-		wait_event_interruptible(
-			blkbk->pending_free_wq,
-			!list_empty(&blkbk->pending_free) ||
-			kthread_should_stop());
+			blkif->waiting_reqs || kthread_should_stop(),
+			timeout);
+		if (timeout == 0)
+			goto purge_gnt_list;
+		timeout = wait_event_interruptible_timeout(
+			blkif->pending_free_wq,
+			!list_empty(&blkif->pending_free) ||
+			kthread_should_stop(),
+			timeout);
+		if (timeout == 0)
+			goto purge_gnt_list;
 
 
 		blkif->waiting_reqs = 0;
 		blkif->waiting_reqs = 0;
 		smp_mb(); /* clear flag *before* checking for work */
 		smp_mb(); /* clear flag *before* checking for work */
 
 
-		if (do_block_io_op(blkif))
+		ret = do_block_io_op(blkif);
+		if (ret > 0)
 			blkif->waiting_reqs = 1;
 			blkif->waiting_reqs = 1;
+		if (ret == -EACCES)
+			wait_event_interruptible(blkif->shutdown_wq,
+						 kthread_should_stop());
+
+purge_gnt_list:
+		if (blkif->vbd.feature_gnt_persistent &&
+		    time_after(jiffies, blkif->next_lru)) {
+			purge_persistent_gnt(blkif);
+			blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
+		}
+
+		/* Shrink if we have more than xen_blkif_max_buffer_pages */
+		shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
 
 
 		if (log_stats && time_after(jiffies, blkif->st_print))
 		if (log_stats && time_after(jiffies, blkif->st_print))
 			print_stats(blkif);
 			print_stats(blkif);
 	}
 	}
 
 
+	/* Since we are shutting down remove all pages from the buffer */
+	shrink_free_pagepool(blkif, 0 /* All */);
+
 	/* Free all persistent grant pages */
 	/* Free all persistent grant pages */
 	if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
 	if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
-		free_persistent_gnts(&blkif->persistent_gnts,
+		free_persistent_gnts(blkif, &blkif->persistent_gnts,
 			blkif->persistent_gnt_c);
 			blkif->persistent_gnt_c);
 
 
 	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
 	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
@@ -441,148 +645,98 @@ int xen_blkif_schedule(void *arg)
 	return 0;
 	return 0;
 }
 }
 
 
-struct seg_buf {
-	unsigned int offset;
-	unsigned int nsec;
-};
 /*
 /*
  * Unmap the grant references, and also remove the M2P over-rides
  * Unmap the grant references, and also remove the M2P over-rides
  * used in the 'pending_req'.
  * used in the 'pending_req'.
  */
  */
-static void xen_blkbk_unmap(struct pending_req *req)
+static void xen_blkbk_unmap(struct xen_blkif *blkif,
+                            struct grant_page *pages[],
+                            int num)
 {
 {
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	unsigned int i, invcount = 0;
 	unsigned int i, invcount = 0;
-	grant_handle_t handle;
 	int ret;
 	int ret;
 
 
-	for (i = 0; i < req->nr_pages; i++) {
-		if (!test_bit(i, req->unmap_seg))
+	for (i = 0; i < num; i++) {
+		if (pages[i]->persistent_gnt != NULL) {
+			put_persistent_gnt(blkif, pages[i]->persistent_gnt);
 			continue;
 			continue;
-		handle = pending_handle(req, i);
-		if (handle == BLKBACK_INVALID_HANDLE)
+		}
+		if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
 			continue;
 			continue;
-		gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
-				    GNTMAP_host_map, handle);
-		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
-		pages[invcount] = virt_to_page(vaddr(req, i));
-		invcount++;
+		unmap_pages[invcount] = pages[i]->page;
+		gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page),
+				    GNTMAP_host_map, pages[i]->handle);
+		pages[i]->handle = BLKBACK_INVALID_HANDLE;
+		if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
+			                        invcount);
+			BUG_ON(ret);
+			put_free_pages(blkif, unmap_pages, invcount);
+			invcount = 0;
+		}
+	}
+	if (invcount) {
+		ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
+		BUG_ON(ret);
+		put_free_pages(blkif, unmap_pages, invcount);
 	}
 	}
-
-	ret = gnttab_unmap_refs(unmap, NULL, pages, invcount);
-	BUG_ON(ret);
 }
 }
 
 
-static int xen_blkbk_map(struct blkif_request *req,
-			 struct pending_req *pending_req,
-			 struct seg_buf seg[],
-			 struct page *pages[])
+static int xen_blkbk_map(struct xen_blkif *blkif,
+			 struct grant_page *pages[],
+			 int num, bool ro)
 {
 {
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct persistent_gnt *persistent_gnt = NULL;
 	struct persistent_gnt *persistent_gnt = NULL;
-	struct xen_blkif *blkif = pending_req->blkif;
 	phys_addr_t addr = 0;
 	phys_addr_t addr = 0;
-	int i, j;
-	bool new_map;
-	int nseg = req->u.rw.nr_segments;
+	int i, seg_idx, new_map_idx;
 	int segs_to_map = 0;
 	int segs_to_map = 0;
 	int ret = 0;
 	int ret = 0;
+	int last_map = 0, map_until = 0;
 	int use_persistent_gnts;
 	int use_persistent_gnts;
 
 
 	use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
 	use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
 
 
-	BUG_ON(blkif->persistent_gnt_c >
-		   max_mapped_grant_pages(pending_req->blkif->blk_protocol));
-
 	/*
 	/*
 	 * Fill out preq.nr_sects with proper amount of sectors, and setup
 	 * Fill out preq.nr_sects with proper amount of sectors, and setup
 	 * assign map[..] with the PFN of the page in our domain with the
 	 * assign map[..] with the PFN of the page in our domain with the
 	 * corresponding grant reference for each page.
 	 * corresponding grant reference for each page.
 	 */
 	 */
-	for (i = 0; i < nseg; i++) {
+again:
+	for (i = map_until; i < num; i++) {
 		uint32_t flags;
 		uint32_t flags;
 
 
 		if (use_persistent_gnts)
 		if (use_persistent_gnts)
 			persistent_gnt = get_persistent_gnt(
 			persistent_gnt = get_persistent_gnt(
-				&blkif->persistent_gnts,
-				req->u.rw.seg[i].gref);
+				blkif,
+				pages[i]->gref);
 
 
 		if (persistent_gnt) {
 		if (persistent_gnt) {
 			/*
 			/*
 			 * We are using persistent grants and
 			 * We are using persistent grants and
 			 * the grant is already mapped
 			 * the grant is already mapped
 			 */
 			 */
-			new_map = false;
-		} else if (use_persistent_gnts &&
-			   blkif->persistent_gnt_c <
-			   max_mapped_grant_pages(blkif->blk_protocol)) {
-			/*
-			 * We are using persistent grants, the grant is
-			 * not mapped but we have room for it
-			 */
-			new_map = true;
-			persistent_gnt = kmalloc(
-				sizeof(struct persistent_gnt),
-				GFP_KERNEL);
-			if (!persistent_gnt)
-				return -ENOMEM;
-			if (alloc_xenballooned_pages(1, &persistent_gnt->page,
-			    false)) {
-				kfree(persistent_gnt);
-				return -ENOMEM;
-			}
-			persistent_gnt->gnt = req->u.rw.seg[i].gref;
-			persistent_gnt->handle = BLKBACK_INVALID_HANDLE;
-
-			pages_to_gnt[segs_to_map] =
-				persistent_gnt->page;
-			addr = (unsigned long) pfn_to_kaddr(
-				page_to_pfn(persistent_gnt->page));
-
-			add_persistent_gnt(&blkif->persistent_gnts,
-				persistent_gnt);
-			blkif->persistent_gnt_c++;
-			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
-				 persistent_gnt->gnt, blkif->persistent_gnt_c,
-				 max_mapped_grant_pages(blkif->blk_protocol));
+			pages[i]->page = persistent_gnt->page;
+			pages[i]->persistent_gnt = persistent_gnt;
 		} else {
 		} else {
-			/*
-			 * We are either using persistent grants and
-			 * hit the maximum limit of grants mapped,
-			 * or we are not using persistent grants.
-			 */
-			if (use_persistent_gnts &&
-				!blkif->vbd.overflow_max_grants) {
-				blkif->vbd.overflow_max_grants = 1;
-				pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
-					 blkif->domid, blkif->vbd.handle);
-			}
-			new_map = true;
-			pages[i] = blkbk->pending_page(pending_req, i);
-			addr = vaddr(pending_req, i);
-			pages_to_gnt[segs_to_map] =
-				blkbk->pending_page(pending_req, i);
-		}
-
-		if (persistent_gnt) {
-			pages[i] = persistent_gnt->page;
-			persistent_gnts[i] = persistent_gnt;
-		} else {
-			persistent_gnts[i] = NULL;
-		}
-
-		if (new_map) {
+			if (get_free_page(blkif, &pages[i]->page))
+				goto out_of_memory;
+			addr = vaddr(pages[i]->page);
+			pages_to_gnt[segs_to_map] = pages[i]->page;
+			pages[i]->persistent_gnt = NULL;
 			flags = GNTMAP_host_map;
 			flags = GNTMAP_host_map;
-			if (!persistent_gnt &&
-			    (pending_req->operation != BLKIF_OP_READ))
+			if (!use_persistent_gnts && ro)
 				flags |= GNTMAP_readonly;
 				flags |= GNTMAP_readonly;
 			gnttab_set_map_op(&map[segs_to_map++], addr,
 			gnttab_set_map_op(&map[segs_to_map++], addr,
-					  flags, req->u.rw.seg[i].gref,
+					  flags, pages[i]->gref,
 					  blkif->domid);
 					  blkif->domid);
 		}
 		}
+		map_until = i + 1;
+		if (segs_to_map == BLKIF_MAX_SEGMENTS_PER_REQUEST)
+			break;
 	}
 	}
 
 
 	if (segs_to_map) {
 	if (segs_to_map) {
@@ -595,49 +749,133 @@ static int xen_blkbk_map(struct blkif_request *req,
 	 * so that when we access vaddr(pending_req,i) it has the contents of
 	 * so that when we access vaddr(pending_req,i) it has the contents of
 	 * the page from the other domain.
 	 * the page from the other domain.
 	 */
 	 */
-	bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-	for (i = 0, j = 0; i < nseg; i++) {
-		if (!persistent_gnts[i] ||
-		    persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) {
+	for (seg_idx = last_map, new_map_idx = 0; seg_idx < map_until; seg_idx++) {
+		if (!pages[seg_idx]->persistent_gnt) {
 			/* This is a newly mapped grant */
 			/* This is a newly mapped grant */
-			BUG_ON(j >= segs_to_map);
-			if (unlikely(map[j].status != 0)) {
+			BUG_ON(new_map_idx >= segs_to_map);
+			if (unlikely(map[new_map_idx].status != 0)) {
 				pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
 				pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
-				map[j].handle = BLKBACK_INVALID_HANDLE;
+				pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
 				ret |= 1;
 				ret |= 1;
-				if (persistent_gnts[i]) {
-					rb_erase(&persistent_gnts[i]->node,
-						 &blkif->persistent_gnts);
-					blkif->persistent_gnt_c--;
-					kfree(persistent_gnts[i]);
-					persistent_gnts[i] = NULL;
-				}
+				goto next;
 			}
 			}
+			pages[seg_idx]->handle = map[new_map_idx].handle;
+		} else {
+			continue;
 		}
 		}
-		if (persistent_gnts[i]) {
-			if (persistent_gnts[i]->handle ==
-			    BLKBACK_INVALID_HANDLE) {
+		if (use_persistent_gnts &&
+		    blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
+			/*
+			 * We are using persistent grants, the grant is
+			 * not mapped but we might have room for it.
+			 */
+			persistent_gnt = kmalloc(sizeof(struct persistent_gnt),
+				                 GFP_KERNEL);
+			if (!persistent_gnt) {
 				/*
 				/*
-				 * If this is a new persistent grant
-				 * save the handler
+				 * If we don't have enough memory to
+				 * allocate the persistent_gnt struct
+				 * map this grant non-persistenly
 				 */
 				 */
-				persistent_gnts[i]->handle = map[j++].handle;
+				goto next;
 			}
 			}
-			pending_handle(pending_req, i) =
-				persistent_gnts[i]->handle;
+			persistent_gnt->gnt = map[new_map_idx].ref;
+			persistent_gnt->handle = map[new_map_idx].handle;
+			persistent_gnt->page = pages[seg_idx]->page;
+			if (add_persistent_gnt(blkif,
+			                       persistent_gnt)) {
+				kfree(persistent_gnt);
+				persistent_gnt = NULL;
+				goto next;
+			}
+			pages[seg_idx]->persistent_gnt = persistent_gnt;
+			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
+				 persistent_gnt->gnt, blkif->persistent_gnt_c,
+				 xen_blkif_max_pgrants);
+			goto next;
+		}
+		if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
+			blkif->vbd.overflow_max_grants = 1;
+			pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
+			         blkif->domid, blkif->vbd.handle);
+		}
+		/*
+		 * We could not map this grant persistently, so use it as
+		 * a non-persistent grant.
+		 */
+next:
+		new_map_idx++;
+	}
+	segs_to_map = 0;
+	last_map = map_until;
+	if (map_until != num)
+		goto again;
 
 
-			if (ret)
-				continue;
-		} else {
-			pending_handle(pending_req, i) = map[j++].handle;
-			bitmap_set(pending_req->unmap_seg, i, 1);
+	return ret;
+
+out_of_memory:
+	pr_alert(DRV_PFX "%s: out of memory\n", __func__);
+	put_free_pages(blkif, pages_to_gnt, segs_to_map);
+	return -ENOMEM;
+}
+
+static int xen_blkbk_map_seg(struct pending_req *pending_req)
+{
+	int rc;
+
+	rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
+			   pending_req->nr_pages,
+	                   (pending_req->operation != BLKIF_OP_READ));
+
+	return rc;
+}
 
 
-			if (ret)
-				continue;
+static int xen_blkbk_parse_indirect(struct blkif_request *req,
+				    struct pending_req *pending_req,
+				    struct seg_buf seg[],
+				    struct phys_req *preq)
+{
+	struct grant_page **pages = pending_req->indirect_pages;
+	struct xen_blkif *blkif = pending_req->blkif;
+	int indirect_grefs, rc, n, nseg, i;
+	struct blkif_request_segment_aligned *segments = NULL;
+
+	nseg = pending_req->nr_pages;
+	indirect_grefs = INDIRECT_PAGES(nseg);
+	BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
+
+	for (i = 0; i < indirect_grefs; i++)
+		pages[i]->gref = req->u.indirect.indirect_grefs[i];
+
+	rc = xen_blkbk_map(blkif, pages, indirect_grefs, true);
+	if (rc)
+		goto unmap;
+
+	for (n = 0, i = 0; n < nseg; n++) {
+		if ((n % SEGS_PER_INDIRECT_FRAME) == 0) {
+			/* Map indirect segments */
+			if (segments)
+				kunmap_atomic(segments);
+			segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page);
+		}
+		i = n % SEGS_PER_INDIRECT_FRAME;
+		pending_req->segments[n]->gref = segments[i].gref;
+		seg[n].nsec = segments[i].last_sect -
+			segments[i].first_sect + 1;
+		seg[n].offset = (segments[i].first_sect << 9);
+		if ((segments[i].last_sect >= (PAGE_SIZE >> 9)) ||
+		    (segments[i].last_sect < segments[i].first_sect)) {
+			rc = -EINVAL;
+			goto unmap;
 		}
 		}
-		seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
+		preq->nr_sects += seg[n].nsec;
 	}
 	}
-	return ret;
+
+unmap:
+	if (segments)
+		kunmap_atomic(segments);
+	xen_blkbk_unmap(blkif, pages, indirect_grefs);
+	return rc;
 }
 }
 
 
 static int dispatch_discard_io(struct xen_blkif *blkif,
 static int dispatch_discard_io(struct xen_blkif *blkif,
@@ -647,7 +885,18 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
 	int status = BLKIF_RSP_OKAY;
 	int status = BLKIF_RSP_OKAY;
 	struct block_device *bdev = blkif->vbd.bdev;
 	struct block_device *bdev = blkif->vbd.bdev;
 	unsigned long secure;
 	unsigned long secure;
+	struct phys_req preq;
+
+	preq.sector_number = req->u.discard.sector_number;
+	preq.nr_sects      = req->u.discard.nr_sectors;
 
 
+	err = xen_vbd_translate(&preq, blkif, WRITE);
+	if (err) {
+		pr_warn(DRV_PFX "access denied: DISCARD [%llu->%llu] on dev=%04x\n",
+			preq.sector_number,
+			preq.sector_number + preq.nr_sects, blkif->vbd.pdevice);
+		goto fail_response;
+	}
 	blkif->st_ds_req++;
 	blkif->st_ds_req++;
 
 
 	xen_blkif_get(blkif);
 	xen_blkif_get(blkif);
@@ -658,7 +907,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
 	err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
 	err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
 				   req->u.discard.nr_sectors,
 				   req->u.discard.nr_sectors,
 				   GFP_KERNEL, secure);
 				   GFP_KERNEL, secure);
-
+fail_response:
 	if (err == -EOPNOTSUPP) {
 	if (err == -EOPNOTSUPP) {
 		pr_debug(DRV_PFX "discard op failed, not supported\n");
 		pr_debug(DRV_PFX "discard op failed, not supported\n");
 		status = BLKIF_RSP_EOPNOTSUPP;
 		status = BLKIF_RSP_EOPNOTSUPP;
@@ -674,7 +923,7 @@ static int dispatch_other_io(struct xen_blkif *blkif,
 			     struct blkif_request *req,
 			     struct blkif_request *req,
 			     struct pending_req *pending_req)
 			     struct pending_req *pending_req)
 {
 {
-	free_req(pending_req);
+	free_req(blkif, pending_req);
 	make_response(blkif, req->u.other.id, req->operation,
 	make_response(blkif, req->u.other.id, req->operation,
 		      BLKIF_RSP_EOPNOTSUPP);
 		      BLKIF_RSP_EOPNOTSUPP);
 	return -EIO;
 	return -EIO;
@@ -726,7 +975,9 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
 	 * the proper response on the ring.
 	 * the proper response on the ring.
 	 */
 	 */
 	if (atomic_dec_and_test(&pending_req->pendcnt)) {
 	if (atomic_dec_and_test(&pending_req->pendcnt)) {
-		xen_blkbk_unmap(pending_req);
+		xen_blkbk_unmap(pending_req->blkif,
+		                pending_req->segments,
+		                pending_req->nr_pages);
 		make_response(pending_req->blkif, pending_req->id,
 		make_response(pending_req->blkif, pending_req->id,
 			      pending_req->operation, pending_req->status);
 			      pending_req->operation, pending_req->status);
 		xen_blkif_put(pending_req->blkif);
 		xen_blkif_put(pending_req->blkif);
@@ -734,7 +985,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
 			if (atomic_read(&pending_req->blkif->drain))
 			if (atomic_read(&pending_req->blkif->drain))
 				complete(&pending_req->blkif->drain_complete);
 				complete(&pending_req->blkif->drain_complete);
 		}
 		}
-		free_req(pending_req);
+		free_req(pending_req->blkif, pending_req);
 	}
 	}
 }
 }
 
 
@@ -767,6 +1018,12 @@ __do_block_io_op(struct xen_blkif *blkif)
 	rp = blk_rings->common.sring->req_prod;
 	rp = blk_rings->common.sring->req_prod;
 	rmb(); /* Ensure we see queued requests up to 'rp'. */
 	rmb(); /* Ensure we see queued requests up to 'rp'. */
 
 
+	if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
+		rc = blk_rings->common.rsp_prod_pvt;
+		pr_warn(DRV_PFX "Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
+			rp, rc, rp - rc, blkif->vbd.pdevice);
+		return -EACCES;
+	}
 	while (rc != rp) {
 	while (rc != rp) {
 
 
 		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
 		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
@@ -777,7 +1034,7 @@ __do_block_io_op(struct xen_blkif *blkif)
 			break;
 			break;
 		}
 		}
 
 
-		pending_req = alloc_req();
+		pending_req = alloc_req(blkif);
 		if (NULL == pending_req) {
 		if (NULL == pending_req) {
 			blkif->st_oo_req++;
 			blkif->st_oo_req++;
 			more_to_do = 1;
 			more_to_do = 1;
@@ -807,11 +1064,12 @@ __do_block_io_op(struct xen_blkif *blkif)
 		case BLKIF_OP_WRITE:
 		case BLKIF_OP_WRITE:
 		case BLKIF_OP_WRITE_BARRIER:
 		case BLKIF_OP_WRITE_BARRIER:
 		case BLKIF_OP_FLUSH_DISKCACHE:
 		case BLKIF_OP_FLUSH_DISKCACHE:
+		case BLKIF_OP_INDIRECT:
 			if (dispatch_rw_block_io(blkif, &req, pending_req))
 			if (dispatch_rw_block_io(blkif, &req, pending_req))
 				goto done;
 				goto done;
 			break;
 			break;
 		case BLKIF_OP_DISCARD:
 		case BLKIF_OP_DISCARD:
-			free_req(pending_req);
+			free_req(blkif, pending_req);
 			if (dispatch_discard_io(blkif, &req))
 			if (dispatch_discard_io(blkif, &req))
 				goto done;
 				goto done;
 			break;
 			break;
@@ -853,17 +1111,28 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 				struct pending_req *pending_req)
 				struct pending_req *pending_req)
 {
 {
 	struct phys_req preq;
 	struct phys_req preq;
-	struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct seg_buf *seg = pending_req->seg;
 	unsigned int nseg;
 	unsigned int nseg;
 	struct bio *bio = NULL;
 	struct bio *bio = NULL;
-	struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct bio **biolist = pending_req->biolist;
 	int i, nbio = 0;
 	int i, nbio = 0;
 	int operation;
 	int operation;
 	struct blk_plug plug;
 	struct blk_plug plug;
 	bool drain = false;
 	bool drain = false;
-	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct grant_page **pages = pending_req->segments;
+	unsigned short req_operation;
+
+	req_operation = req->operation == BLKIF_OP_INDIRECT ?
+			req->u.indirect.indirect_op : req->operation;
+	if ((req->operation == BLKIF_OP_INDIRECT) &&
+	    (req_operation != BLKIF_OP_READ) &&
+	    (req_operation != BLKIF_OP_WRITE)) {
+		pr_debug(DRV_PFX "Invalid indirect operation (%u)\n",
+			 req_operation);
+		goto fail_response;
+	}
 
 
-	switch (req->operation) {
+	switch (req_operation) {
 	case BLKIF_OP_READ:
 	case BLKIF_OP_READ:
 		blkif->st_rd_req++;
 		blkif->st_rd_req++;
 		operation = READ;
 		operation = READ;
@@ -885,33 +1154,47 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	}
 	}
 
 
 	/* Check that the number of segments is sane. */
 	/* Check that the number of segments is sane. */
-	nseg = req->u.rw.nr_segments;
+	nseg = req->operation == BLKIF_OP_INDIRECT ?
+	       req->u.indirect.nr_segments : req->u.rw.nr_segments;
 
 
 	if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
 	if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
-	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+	    unlikely((req->operation != BLKIF_OP_INDIRECT) &&
+		     (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) ||
+	    unlikely((req->operation == BLKIF_OP_INDIRECT) &&
+		     (nseg > MAX_INDIRECT_SEGMENTS))) {
 		pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
 		pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
 			 nseg);
 			 nseg);
 		/* Haven't submitted any bio's yet. */
 		/* Haven't submitted any bio's yet. */
 		goto fail_response;
 		goto fail_response;
 	}
 	}
 
 
-	preq.sector_number = req->u.rw.sector_number;
 	preq.nr_sects      = 0;
 	preq.nr_sects      = 0;
 
 
 	pending_req->blkif     = blkif;
 	pending_req->blkif     = blkif;
 	pending_req->id        = req->u.rw.id;
 	pending_req->id        = req->u.rw.id;
-	pending_req->operation = req->operation;
+	pending_req->operation = req_operation;
 	pending_req->status    = BLKIF_RSP_OKAY;
 	pending_req->status    = BLKIF_RSP_OKAY;
 	pending_req->nr_pages  = nseg;
 	pending_req->nr_pages  = nseg;
 
 
-	for (i = 0; i < nseg; i++) {
-		seg[i].nsec = req->u.rw.seg[i].last_sect -
-			req->u.rw.seg[i].first_sect + 1;
-		if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
-		    (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
+	if (req->operation != BLKIF_OP_INDIRECT) {
+		preq.dev               = req->u.rw.handle;
+		preq.sector_number     = req->u.rw.sector_number;
+		for (i = 0; i < nseg; i++) {
+			pages[i]->gref = req->u.rw.seg[i].gref;
+			seg[i].nsec = req->u.rw.seg[i].last_sect -
+				req->u.rw.seg[i].first_sect + 1;
+			seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
+			if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+			    (req->u.rw.seg[i].last_sect <
+			     req->u.rw.seg[i].first_sect))
+				goto fail_response;
+			preq.nr_sects += seg[i].nsec;
+		}
+	} else {
+		preq.dev               = req->u.indirect.handle;
+		preq.sector_number     = req->u.indirect.sector_number;
+		if (xen_blkbk_parse_indirect(req, pending_req, seg, &preq))
 			goto fail_response;
 			goto fail_response;
-		preq.nr_sects += seg[i].nsec;
-
 	}
 	}
 
 
 	if (xen_vbd_translate(&preq, blkif, operation) != 0) {
 	if (xen_vbd_translate(&preq, blkif, operation) != 0) {
@@ -948,7 +1231,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	 * the hypercall to unmap the grants - that is all done in
 	 * the hypercall to unmap the grants - that is all done in
 	 * xen_blkbk_unmap.
 	 * xen_blkbk_unmap.
 	 */
 	 */
-	if (xen_blkbk_map(req, pending_req, seg, pages))
+	if (xen_blkbk_map_seg(pending_req))
 		goto fail_flush;
 		goto fail_flush;
 
 
 	/*
 	/*
@@ -960,11 +1243,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	for (i = 0; i < nseg; i++) {
 	for (i = 0; i < nseg; i++) {
 		while ((bio == NULL) ||
 		while ((bio == NULL) ||
 		       (bio_add_page(bio,
 		       (bio_add_page(bio,
-				     pages[i],
+				     pages[i]->page,
 				     seg[i].nsec << 9,
 				     seg[i].nsec << 9,
 				     seg[i].offset) == 0)) {
 				     seg[i].offset) == 0)) {
 
 
-			bio = bio_alloc(GFP_KERNEL, nseg-i);
+			int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES);
+			bio = bio_alloc(GFP_KERNEL, nr_iovecs);
 			if (unlikely(bio == NULL))
 			if (unlikely(bio == NULL))
 				goto fail_put_bio;
 				goto fail_put_bio;
 
 
@@ -1009,11 +1293,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	return 0;
 	return 0;
 
 
  fail_flush:
  fail_flush:
-	xen_blkbk_unmap(pending_req);
+	xen_blkbk_unmap(blkif, pending_req->segments,
+	                pending_req->nr_pages);
  fail_response:
  fail_response:
 	/* Haven't submitted any bio's yet. */
 	/* Haven't submitted any bio's yet. */
-	make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
-	free_req(pending_req);
+	make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
+	free_req(blkif, pending_req);
 	msleep(1); /* back off a bit */
 	msleep(1); /* back off a bit */
 	return -EIO;
 	return -EIO;
 
 
@@ -1070,73 +1355,20 @@ static void make_response(struct xen_blkif *blkif, u64 id,
 
 
 static int __init xen_blkif_init(void)
 static int __init xen_blkif_init(void)
 {
 {
-	int i, mmap_pages;
 	int rc = 0;
 	int rc = 0;
 
 
 	if (!xen_domain())
 	if (!xen_domain())
 		return -ENODEV;
 		return -ENODEV;
 
 
-	blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
-	if (!blkbk) {
-		pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
-		return -ENOMEM;
-	}
-
-	mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
-
-	blkbk->pending_reqs          = kzalloc(sizeof(blkbk->pending_reqs[0]) *
-					xen_blkif_reqs, GFP_KERNEL);
-	blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
-					mmap_pages, GFP_KERNEL);
-	blkbk->pending_pages         = kzalloc(sizeof(blkbk->pending_pages[0]) *
-					mmap_pages, GFP_KERNEL);
-
-	if (!blkbk->pending_reqs || !blkbk->pending_grant_handles ||
-	    !blkbk->pending_pages) {
-		rc = -ENOMEM;
-		goto out_of_memory;
-	}
-
-	for (i = 0; i < mmap_pages; i++) {
-		blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
-		blkbk->pending_pages[i] = alloc_page(GFP_KERNEL);
-		if (blkbk->pending_pages[i] == NULL) {
-			rc = -ENOMEM;
-			goto out_of_memory;
-		}
-	}
 	rc = xen_blkif_interface_init();
 	rc = xen_blkif_interface_init();
 	if (rc)
 	if (rc)
 		goto failed_init;
 		goto failed_init;
 
 
-	INIT_LIST_HEAD(&blkbk->pending_free);
-	spin_lock_init(&blkbk->pending_free_lock);
-	init_waitqueue_head(&blkbk->pending_free_wq);
-
-	for (i = 0; i < xen_blkif_reqs; i++)
-		list_add_tail(&blkbk->pending_reqs[i].free_list,
-			      &blkbk->pending_free);
-
 	rc = xen_blkif_xenbus_init();
 	rc = xen_blkif_xenbus_init();
 	if (rc)
 	if (rc)
 		goto failed_init;
 		goto failed_init;
 
 
-	return 0;
-
- out_of_memory:
-	pr_alert(DRV_PFX "%s: out of memory\n", __func__);
  failed_init:
  failed_init:
-	kfree(blkbk->pending_reqs);
-	kfree(blkbk->pending_grant_handles);
-	if (blkbk->pending_pages) {
-		for (i = 0; i < mmap_pages; i++) {
-			if (blkbk->pending_pages[i])
-				__free_page(blkbk->pending_pages[i]);
-		}
-		kfree(blkbk->pending_pages);
-	}
-	kfree(blkbk);
-	blkbk = NULL;
 	return rc;
 	return rc;
 }
 }
 
 

+ 145 - 2
drivers/block/xen-blkback/common.h

@@ -50,6 +50,19 @@
 		 __func__, __LINE__, ##args)
 		 __func__, __LINE__, ##args)
 
 
 
 
+/*
+ * This is the maximum number of segments that would be allowed in indirect
+ * requests. This value will also be passed to the frontend.
+ */
+#define MAX_INDIRECT_SEGMENTS 256
+
+#define SEGS_PER_INDIRECT_FRAME \
+	(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+#define MAX_INDIRECT_PAGES \
+	((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+#define INDIRECT_PAGES(_segs) \
+	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+
 /* Not a real protocol.  Used to generate ring structs which contain
 /* Not a real protocol.  Used to generate ring structs which contain
  * the elements common to all protocols only.  This way we get a
  * the elements common to all protocols only.  This way we get a
  * compiler-checkable way to use common struct elements, so we can
  * compiler-checkable way to use common struct elements, so we can
@@ -83,12 +96,31 @@ struct blkif_x86_32_request_other {
 	uint64_t       id;           /* private guest value, echoed in resp  */
 	uint64_t       id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 } __attribute__((__packed__));
 
 
+struct blkif_x86_32_request_indirect {
+	uint8_t        indirect_op;
+	uint16_t       nr_segments;
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad1;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+	/*
+	 * The maximum number of indirect segments (and pages) that will
+	 * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+	 * is also exported to the guest (via xenstore
+	 * feature-max-indirect-segments entry), so the frontend knows how
+	 * many indirect segments the backend supports.
+	 */
+	uint64_t       _pad2;        /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
 struct blkif_x86_32_request {
 struct blkif_x86_32_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 	union {
 		struct blkif_x86_32_request_rw rw;
 		struct blkif_x86_32_request_rw rw;
 		struct blkif_x86_32_request_discard discard;
 		struct blkif_x86_32_request_discard discard;
 		struct blkif_x86_32_request_other other;
 		struct blkif_x86_32_request_other other;
+		struct blkif_x86_32_request_indirect indirect;
 	} u;
 	} u;
 } __attribute__((__packed__));
 } __attribute__((__packed__));
 
 
@@ -127,12 +159,32 @@ struct blkif_x86_64_request_other {
 	uint64_t       id;           /* private guest value, echoed in resp  */
 	uint64_t       id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 } __attribute__((__packed__));
 
 
+struct blkif_x86_64_request_indirect {
+	uint8_t        indirect_op;
+	uint16_t       nr_segments;
+	uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad2;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+	/*
+	 * The maximum number of indirect segments (and pages) that will
+	 * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+	 * is also exported to the guest (via xenstore
+	 * feature-max-indirect-segments entry), so the frontend knows how
+	 * many indirect segments the backend supports.
+	 */
+	uint32_t       _pad3;        /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
 struct blkif_x86_64_request {
 struct blkif_x86_64_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 	union {
 		struct blkif_x86_64_request_rw rw;
 		struct blkif_x86_64_request_rw rw;
 		struct blkif_x86_64_request_discard discard;
 		struct blkif_x86_64_request_discard discard;
 		struct blkif_x86_64_request_other other;
 		struct blkif_x86_64_request_other other;
+		struct blkif_x86_64_request_indirect indirect;
 	} u;
 	} u;
 } __attribute__((__packed__));
 } __attribute__((__packed__));
 
 
@@ -182,12 +234,26 @@ struct xen_vbd {
 
 
 struct backend_info;
 struct backend_info;
 
 
+/* Number of available flags */
+#define PERSISTENT_GNT_FLAGS_SIZE	2
+/* This persistent grant is currently in use */
+#define PERSISTENT_GNT_ACTIVE		0
+/*
+ * This persistent grant has been used, this flag is set when we remove the
+ * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
+ */
+#define PERSISTENT_GNT_WAS_ACTIVE	1
+
+/* Number of requests that we can fit in a ring */
+#define XEN_BLKIF_REQS			32
 
 
 struct persistent_gnt {
 struct persistent_gnt {
 	struct page *page;
 	struct page *page;
 	grant_ref_t gnt;
 	grant_ref_t gnt;
 	grant_handle_t handle;
 	grant_handle_t handle;
+	DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
 	struct rb_node node;
 	struct rb_node node;
+	struct list_head remove_node;
 };
 };
 
 
 struct xen_blkif {
 struct xen_blkif {
@@ -219,6 +285,23 @@ struct xen_blkif {
 	/* tree to store persistent grants */
 	/* tree to store persistent grants */
 	struct rb_root		persistent_gnts;
 	struct rb_root		persistent_gnts;
 	unsigned int		persistent_gnt_c;
 	unsigned int		persistent_gnt_c;
+	atomic_t		persistent_gnt_in_use;
+	unsigned long           next_lru;
+
+	/* used by the kworker that offload work from the persistent purge */
+	struct list_head	persistent_purge_list;
+	struct work_struct	persistent_purge_work;
+
+	/* buffer of free pages to map grant refs */
+	spinlock_t		free_pages_lock;
+	int			free_pages_num;
+	struct list_head	free_pages;
+
+	/* List of all 'pending_req' available */
+	struct list_head	pending_free;
+	/* And its spinlock. */
+	spinlock_t		pending_free_lock;
+	wait_queue_head_t	pending_free_wq;
 
 
 	/* statistics */
 	/* statistics */
 	unsigned long		st_print;
 	unsigned long		st_print;
@@ -231,6 +314,41 @@ struct xen_blkif {
 	unsigned long long			st_wr_sect;
 	unsigned long long			st_wr_sect;
 
 
 	wait_queue_head_t	waiting_to_free;
 	wait_queue_head_t	waiting_to_free;
+	/* Thread shutdown wait queue. */
+	wait_queue_head_t	shutdown_wq;
+};
+
+struct seg_buf {
+	unsigned long offset;
+	unsigned int nsec;
+};
+
+struct grant_page {
+	struct page 		*page;
+	struct persistent_gnt	*persistent_gnt;
+	grant_handle_t		handle;
+	grant_ref_t		gref;
+};
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
+ * response queued for it, with the saved 'id' passed back.
+ */
+struct pending_req {
+	struct xen_blkif	*blkif;
+	u64			id;
+	int			nr_pages;
+	atomic_t		pendcnt;
+	unsigned short		operation;
+	int			status;
+	struct list_head	free_list;
+	struct grant_page	*segments[MAX_INDIRECT_SEGMENTS];
+	/* Indirect descriptors */
+	struct grant_page	*indirect_pages[MAX_INDIRECT_PAGES];
+	struct seg_buf		seg[MAX_INDIRECT_SEGMENTS];
+	struct bio		*biolist[MAX_INDIRECT_SEGMENTS];
 };
 };
 
 
 
 
@@ -257,6 +375,7 @@ int xen_blkif_xenbus_init(void);
 
 
 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 int xen_blkif_schedule(void *arg);
 int xen_blkif_schedule(void *arg);
+int xen_blkif_purge_persistent(void *arg);
 
 
 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
 			      struct backend_info *be, int state);
 			      struct backend_info *be, int state);
@@ -268,7 +387,7 @@ struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
 static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 					struct blkif_x86_32_request *src)
 					struct blkif_x86_32_request *src)
 {
 {
-	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 	dst->operation = src->operation;
 	dst->operation = src->operation;
 	switch (src->operation) {
 	switch (src->operation) {
 	case BLKIF_OP_READ:
 	case BLKIF_OP_READ:
@@ -291,6 +410,18 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 		dst->u.discard.sector_number = src->u.discard.sector_number;
 		dst->u.discard.sector_number = src->u.discard.sector_number;
 		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 		break;
 		break;
+	case BLKIF_OP_INDIRECT:
+		dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+		dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+		dst->u.indirect.handle = src->u.indirect.handle;
+		dst->u.indirect.id = src->u.indirect.id;
+		dst->u.indirect.sector_number = src->u.indirect.sector_number;
+		barrier();
+		j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
+		for (i = 0; i < j; i++)
+			dst->u.indirect.indirect_grefs[i] =
+				src->u.indirect.indirect_grefs[i];
+		break;
 	default:
 	default:
 		/*
 		/*
 		 * Don't know how to translate this op. Only get the
 		 * Don't know how to translate this op. Only get the
@@ -304,7 +435,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 					struct blkif_x86_64_request *src)
 					struct blkif_x86_64_request *src)
 {
 {
-	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 	dst->operation = src->operation;
 	dst->operation = src->operation;
 	switch (src->operation) {
 	switch (src->operation) {
 	case BLKIF_OP_READ:
 	case BLKIF_OP_READ:
@@ -327,6 +458,18 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 		dst->u.discard.sector_number = src->u.discard.sector_number;
 		dst->u.discard.sector_number = src->u.discard.sector_number;
 		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 		break;
 		break;
+	case BLKIF_OP_INDIRECT:
+		dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+		dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+		dst->u.indirect.handle = src->u.indirect.handle;
+		dst->u.indirect.id = src->u.indirect.id;
+		dst->u.indirect.sector_number = src->u.indirect.sector_number;
+		barrier();
+		j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
+		for (i = 0; i < j; i++)
+			dst->u.indirect.indirect_grefs[i] =
+				src->u.indirect.indirect_grefs[i];
+		break;
 	default:
 	default:
 		/*
 		/*
 		 * Don't know how to translate this op. Only get the
 		 * Don't know how to translate this op. Only get the

+ 85 - 0
drivers/block/xen-blkback/xenbus.c

@@ -98,12 +98,17 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
 		err = PTR_ERR(blkif->xenblkd);
 		err = PTR_ERR(blkif->xenblkd);
 		blkif->xenblkd = NULL;
 		blkif->xenblkd = NULL;
 		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
 		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
+		return;
 	}
 	}
 }
 }
 
 
 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 {
 {
 	struct xen_blkif *blkif;
 	struct xen_blkif *blkif;
+	struct pending_req *req, *n;
+	int i, j;
+
+	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
 
 
 	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
 	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
 	if (!blkif)
 	if (!blkif)
@@ -118,8 +123,57 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	blkif->st_print = jiffies;
 	blkif->st_print = jiffies;
 	init_waitqueue_head(&blkif->waiting_to_free);
 	init_waitqueue_head(&blkif->waiting_to_free);
 	blkif->persistent_gnts.rb_node = NULL;
 	blkif->persistent_gnts.rb_node = NULL;
+	spin_lock_init(&blkif->free_pages_lock);
+	INIT_LIST_HEAD(&blkif->free_pages);
+	blkif->free_pages_num = 0;
+	atomic_set(&blkif->persistent_gnt_in_use, 0);
+
+	INIT_LIST_HEAD(&blkif->pending_free);
+
+	for (i = 0; i < XEN_BLKIF_REQS; i++) {
+		req = kzalloc(sizeof(*req), GFP_KERNEL);
+		if (!req)
+			goto fail;
+		list_add_tail(&req->free_list,
+		              &blkif->pending_free);
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+			req->segments[j] = kzalloc(sizeof(*req->segments[0]),
+			                           GFP_KERNEL);
+			if (!req->segments[j])
+				goto fail;
+		}
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
+			                                 GFP_KERNEL);
+			if (!req->indirect_pages[j])
+				goto fail;
+		}
+	}
+	spin_lock_init(&blkif->pending_free_lock);
+	init_waitqueue_head(&blkif->pending_free_wq);
+	init_waitqueue_head(&blkif->shutdown_wq);
 
 
 	return blkif;
 	return blkif;
+
+fail:
+	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+		list_del(&req->free_list);
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+			if (!req->segments[j])
+				break;
+			kfree(req->segments[j]);
+		}
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+			if (!req->indirect_pages[j])
+				break;
+			kfree(req->indirect_pages[j]);
+		}
+		kfree(req);
+	}
+
+	kmem_cache_free(xen_blkif_cachep, blkif);
+
+	return ERR_PTR(-ENOMEM);
 }
 }
 
 
 static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
 static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
@@ -178,6 +232,7 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
 {
 {
 	if (blkif->xenblkd) {
 	if (blkif->xenblkd) {
 		kthread_stop(blkif->xenblkd);
 		kthread_stop(blkif->xenblkd);
+		wake_up(&blkif->shutdown_wq);
 		blkif->xenblkd = NULL;
 		blkif->xenblkd = NULL;
 	}
 	}
 
 
@@ -198,8 +253,28 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
 
 
 static void xen_blkif_free(struct xen_blkif *blkif)
 static void xen_blkif_free(struct xen_blkif *blkif)
 {
 {
+	struct pending_req *req, *n;
+	int i = 0, j;
+
 	if (!atomic_dec_and_test(&blkif->refcnt))
 	if (!atomic_dec_and_test(&blkif->refcnt))
 		BUG();
 		BUG();
+
+	/* Check that there is no request in use */
+	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+		list_del(&req->free_list);
+
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
+			kfree(req->segments[j]);
+
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
+			kfree(req->indirect_pages[j]);
+
+		kfree(req);
+		i++;
+	}
+
+	WARN_ON(i != XEN_BLKIF_REQS);
+
 	kmem_cache_free(xen_blkif_cachep, blkif);
 	kmem_cache_free(xen_blkif_cachep, blkif);
 }
 }
 
 
@@ -678,6 +753,11 @@ static void connect(struct backend_info *be)
 				 dev->nodename);
 				 dev->nodename);
 		goto abort;
 		goto abort;
 	}
 	}
+	err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
+			    MAX_INDIRECT_SEGMENTS);
+	if (err)
+		dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
+			 dev->nodename, err);
 
 
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 			    (unsigned long long)vbd_sz(&be->blkif->vbd));
 			    (unsigned long long)vbd_sz(&be->blkif->vbd));
@@ -704,6 +784,11 @@ static void connect(struct backend_info *be)
 				 dev->nodename);
 				 dev->nodename);
 		goto abort;
 		goto abort;
 	}
 	}
+	err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
+			    bdev_physical_block_size(be->blkif->vbd.bdev));
+	if (err)
+		xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
+				 dev->nodename);
 
 
 	err = xenbus_transaction_end(xbt, 0);
 	err = xenbus_transaction_end(xbt, 0);
 	if (err == -EAGAIN)
 	if (err == -EAGAIN)

+ 432 - 100
drivers/block/xen-blkfront.c

@@ -74,12 +74,30 @@ struct grant {
 struct blk_shadow {
 struct blk_shadow {
 	struct blkif_request req;
 	struct blkif_request req;
 	struct request *request;
 	struct request *request;
-	struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct grant **grants_used;
+	struct grant **indirect_grants;
+	struct scatterlist *sg;
+};
+
+struct split_bio {
+	struct bio *bio;
+	atomic_t pending;
+	int err;
 };
 };
 
 
 static DEFINE_MUTEX(blkfront_mutex);
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;
 static const struct block_device_operations xlvbd_block_fops;
 
 
+/*
+ * Maximum number of segments in indirect requests, the actual value used by
+ * the frontend driver is the minimum of this value and the value provided
+ * by the backend driver.
+ */
+
+static unsigned int xen_blkif_max_segments = 32;
+module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
+MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
+
 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
 
 
 /*
 /*
@@ -98,7 +116,6 @@ struct blkfront_info
 	enum blkif_state connected;
 	enum blkif_state connected;
 	int ring_ref;
 	int ring_ref;
 	struct blkif_front_ring ring;
 	struct blkif_front_ring ring;
-	struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	unsigned int evtchn, irq;
 	unsigned int evtchn, irq;
 	struct request_queue *rq;
 	struct request_queue *rq;
 	struct work_struct work;
 	struct work_struct work;
@@ -114,6 +131,7 @@ struct blkfront_info
 	unsigned int discard_granularity;
 	unsigned int discard_granularity;
 	unsigned int discard_alignment;
 	unsigned int discard_alignment;
 	unsigned int feature_persistent:1;
 	unsigned int feature_persistent:1;
+	unsigned int max_indirect_segments;
 	int is_ready;
 	int is_ready;
 };
 };
 
 
@@ -142,6 +160,13 @@ static DEFINE_SPINLOCK(minor_lock);
 
 
 #define DEV_NAME	"xvd"	/* name in /dev */
 #define DEV_NAME	"xvd"	/* name in /dev */
 
 
+#define SEGS_PER_INDIRECT_FRAME \
+	(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+#define INDIRECT_GREFS(_segs) \
+	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+
+static int blkfront_setup_indirect(struct blkfront_info *info);
+
 static int get_id_from_freelist(struct blkfront_info *info)
 static int get_id_from_freelist(struct blkfront_info *info)
 {
 {
 	unsigned long free = info->shadow_free;
 	unsigned long free = info->shadow_free;
@@ -358,7 +383,8 @@ static int blkif_queue_request(struct request *req)
 	struct blkif_request *ring_req;
 	struct blkif_request *ring_req;
 	unsigned long id;
 	unsigned long id;
 	unsigned int fsect, lsect;
 	unsigned int fsect, lsect;
-	int i, ref;
+	int i, ref, n;
+	struct blkif_request_segment_aligned *segments = NULL;
 
 
 	/*
 	/*
 	 * Used to store if we are able to queue the request by just using
 	 * Used to store if we are able to queue the request by just using
@@ -369,21 +395,27 @@ static int blkif_queue_request(struct request *req)
 	grant_ref_t gref_head;
 	grant_ref_t gref_head;
 	struct grant *gnt_list_entry = NULL;
 	struct grant *gnt_list_entry = NULL;
 	struct scatterlist *sg;
 	struct scatterlist *sg;
+	int nseg, max_grefs;
 
 
 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
 		return 1;
 		return 1;
 
 
-	/* Check if we have enought grants to allocate a requests */
-	if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+	max_grefs = info->max_indirect_segments ?
+		    info->max_indirect_segments +
+		    INDIRECT_GREFS(info->max_indirect_segments) :
+		    BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+	/* Check if we have enough grants to allocate a requests */
+	if (info->persistent_gnts_c < max_grefs) {
 		new_persistent_gnts = 1;
 		new_persistent_gnts = 1;
 		if (gnttab_alloc_grant_references(
 		if (gnttab_alloc_grant_references(
-		    BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
+		    max_grefs - info->persistent_gnts_c,
 		    &gref_head) < 0) {
 		    &gref_head) < 0) {
 			gnttab_request_free_callback(
 			gnttab_request_free_callback(
 				&info->callback,
 				&info->callback,
 				blkif_restart_queue_callback,
 				blkif_restart_queue_callback,
 				info,
 				info,
-				BLKIF_MAX_SEGMENTS_PER_REQUEST);
+				max_grefs);
 			return 1;
 			return 1;
 		}
 		}
 	} else
 	} else
@@ -394,42 +426,67 @@ static int blkif_queue_request(struct request *req)
 	id = get_id_from_freelist(info);
 	id = get_id_from_freelist(info);
 	info->shadow[id].request = req;
 	info->shadow[id].request = req;
 
 
-	ring_req->u.rw.id = id;
-	ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
-	ring_req->u.rw.handle = info->handle;
-
-	ring_req->operation = rq_data_dir(req) ?
-		BLKIF_OP_WRITE : BLKIF_OP_READ;
-
-	if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
-		/*
-		 * Ideally we can do an unordered flush-to-disk. In case the
-		 * backend onlysupports barriers, use that. A barrier request
-		 * a superset of FUA, so we can implement it the same
-		 * way.  (It's also a FLUSH+FUA, since it is
-		 * guaranteed ordered WRT previous writes.)
-		 */
-		ring_req->operation = info->flush_op;
-	}
-
 	if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
 	if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
-		/* id, sector_number and handle are set above. */
 		ring_req->operation = BLKIF_OP_DISCARD;
 		ring_req->operation = BLKIF_OP_DISCARD;
 		ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
 		ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
+		ring_req->u.discard.id = id;
+		ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req);
 		if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
 		if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
 			ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
 			ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
 		else
 		else
 			ring_req->u.discard.flag = 0;
 			ring_req->u.discard.flag = 0;
 	} else {
 	} else {
-		ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
-							   info->sg);
-		BUG_ON(ring_req->u.rw.nr_segments >
-		       BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
-		for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
+		BUG_ON(info->max_indirect_segments == 0 &&
+		       req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+		BUG_ON(info->max_indirect_segments &&
+		       req->nr_phys_segments > info->max_indirect_segments);
+		nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg);
+		ring_req->u.rw.id = id;
+		if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			/*
+			 * The indirect operation can only be a BLKIF_OP_READ or
+			 * BLKIF_OP_WRITE
+			 */
+			BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA));
+			ring_req->operation = BLKIF_OP_INDIRECT;
+			ring_req->u.indirect.indirect_op = rq_data_dir(req) ?
+				BLKIF_OP_WRITE : BLKIF_OP_READ;
+			ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req);
+			ring_req->u.indirect.handle = info->handle;
+			ring_req->u.indirect.nr_segments = nseg;
+		} else {
+			ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
+			ring_req->u.rw.handle = info->handle;
+			ring_req->operation = rq_data_dir(req) ?
+				BLKIF_OP_WRITE : BLKIF_OP_READ;
+			if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+				/*
+				 * Ideally we can do an unordered flush-to-disk. In case the
+				 * backend onlysupports barriers, use that. A barrier request
+				 * a superset of FUA, so we can implement it the same
+				 * way.  (It's also a FLUSH+FUA, since it is
+				 * guaranteed ordered WRT previous writes.)
+				 */
+				ring_req->operation = info->flush_op;
+			}
+			ring_req->u.rw.nr_segments = nseg;
+		}
+		for_each_sg(info->shadow[id].sg, sg, nseg, i) {
 			fsect = sg->offset >> 9;
 			fsect = sg->offset >> 9;
 			lsect = fsect + (sg->length >> 9) - 1;
 			lsect = fsect + (sg->length >> 9) - 1;
 
 
+			if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
+			    (i % SEGS_PER_INDIRECT_FRAME == 0)) {
+				if (segments)
+					kunmap_atomic(segments);
+
+				n = i / SEGS_PER_INDIRECT_FRAME;
+				gnt_list_entry = get_grant(&gref_head, info);
+				info->shadow[id].indirect_grants[n] = gnt_list_entry;
+				segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
+				ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
+			}
+
 			gnt_list_entry = get_grant(&gref_head, info);
 			gnt_list_entry = get_grant(&gref_head, info);
 			ref = gnt_list_entry->gref;
 			ref = gnt_list_entry->gref;
 
 
@@ -441,8 +498,7 @@ static int blkif_queue_request(struct request *req)
 
 
 				BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 				BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 
 
-				shared_data = kmap_atomic(
-					pfn_to_page(gnt_list_entry->pfn));
+				shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
 				bvec_data = kmap_atomic(sg_page(sg));
 				bvec_data = kmap_atomic(sg_page(sg));
 
 
 				/*
 				/*
@@ -461,13 +517,23 @@ static int blkif_queue_request(struct request *req)
 				kunmap_atomic(bvec_data);
 				kunmap_atomic(bvec_data);
 				kunmap_atomic(shared_data);
 				kunmap_atomic(shared_data);
 			}
 			}
-
-			ring_req->u.rw.seg[i] =
-					(struct blkif_request_segment) {
-						.gref       = ref,
-						.first_sect = fsect,
-						.last_sect  = lsect };
+			if (ring_req->operation != BLKIF_OP_INDIRECT) {
+				ring_req->u.rw.seg[i] =
+						(struct blkif_request_segment) {
+							.gref       = ref,
+							.first_sect = fsect,
+							.last_sect  = lsect };
+			} else {
+				n = i % SEGS_PER_INDIRECT_FRAME;
+				segments[n] =
+					(struct blkif_request_segment_aligned) {
+							.gref       = ref,
+							.first_sect = fsect,
+							.last_sect  = lsect };
+			}
 		}
 		}
+		if (segments)
+			kunmap_atomic(segments);
 	}
 	}
 
 
 	info->ring.req_prod_pvt++;
 	info->ring.req_prod_pvt++;
@@ -542,7 +608,9 @@ static void do_blkif_request(struct request_queue *rq)
 		flush_requests(info);
 		flush_requests(info);
 }
 }
 
 
-static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
+				unsigned int physical_sector_size,
+				unsigned int segments)
 {
 {
 	struct request_queue *rq;
 	struct request_queue *rq;
 	struct blkfront_info *info = gd->private_data;
 	struct blkfront_info *info = gd->private_data;
@@ -564,14 +632,15 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
 	blk_queue_logical_block_size(rq, sector_size);
 	blk_queue_logical_block_size(rq, sector_size);
-	blk_queue_max_hw_sectors(rq, 512);
+	blk_queue_physical_block_size(rq, physical_sector_size);
+	blk_queue_max_hw_sectors(rq, (segments * PAGE_SIZE) / 512);
 
 
 	/* Each segment in a request is up to an aligned page in size. */
 	/* Each segment in a request is up to an aligned page in size. */
 	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
 	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
 	blk_queue_max_segment_size(rq, PAGE_SIZE);
 	blk_queue_max_segment_size(rq, PAGE_SIZE);
 
 
 	/* Ensure a merged request will fit in a single I/O ring slot. */
 	/* Ensure a merged request will fit in a single I/O ring slot. */
-	blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+	blk_queue_max_segments(rq, segments);
 
 
 	/* Make sure buffer addresses are sector-aligned. */
 	/* Make sure buffer addresses are sector-aligned. */
 	blk_queue_dma_alignment(rq, 511);
 	blk_queue_dma_alignment(rq, 511);
@@ -588,13 +657,16 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 static void xlvbd_flush(struct blkfront_info *info)
 static void xlvbd_flush(struct blkfront_info *info)
 {
 {
 	blk_queue_flush(info->rq, info->feature_flush);
 	blk_queue_flush(info->rq, info->feature_flush);
-	printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
+	printk(KERN_INFO "blkfront: %s: %s: %s %s %s %s %s\n",
 	       info->gd->disk_name,
 	       info->gd->disk_name,
 	       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
 	       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
 		"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
 		"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
 		"flush diskcache" : "barrier or flush"),
 		"flush diskcache" : "barrier or flush"),
-	       info->feature_flush ? "enabled" : "disabled",
-	       info->feature_persistent ? "using persistent grants" : "");
+	       info->feature_flush ? "enabled;" : "disabled;",
+	       "persistent grants:",
+	       info->feature_persistent ? "enabled;" : "disabled;",
+	       "indirect descriptors:",
+	       info->max_indirect_segments ? "enabled;" : "disabled;");
 }
 }
 
 
 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -667,7 +739,8 @@ static char *encode_disk_name(char *ptr, unsigned int n)
 
 
 static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 			       struct blkfront_info *info,
 			       struct blkfront_info *info,
-			       u16 vdisk_info, u16 sector_size)
+			       u16 vdisk_info, u16 sector_size,
+			       unsigned int physical_sector_size)
 {
 {
 	struct gendisk *gd;
 	struct gendisk *gd;
 	int nr_minors = 1;
 	int nr_minors = 1;
@@ -734,7 +807,9 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 	gd->driverfs_dev = &(info->xbdev->dev);
 	gd->driverfs_dev = &(info->xbdev->dev);
 	set_capacity(gd, capacity);
 	set_capacity(gd, capacity);
 
 
-	if (xlvbd_init_blk_queue(gd, sector_size)) {
+	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
+				 info->max_indirect_segments ? :
+				 BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
 		del_gendisk(gd);
 		del_gendisk(gd);
 		goto release;
 		goto release;
 	}
 	}
@@ -818,6 +893,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 {
 {
 	struct grant *persistent_gnt;
 	struct grant *persistent_gnt;
 	struct grant *n;
 	struct grant *n;
+	int i, j, segs;
 
 
 	/* Prevent new requests being issued until we fix things up. */
 	/* Prevent new requests being issued until we fix things up. */
 	spin_lock_irq(&info->io_lock);
 	spin_lock_irq(&info->io_lock);
@@ -843,6 +919,47 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	}
 	}
 	BUG_ON(info->persistent_gnts_c != 0);
 	BUG_ON(info->persistent_gnts_c != 0);
 
 
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		/*
+		 * Clear persistent grants present in requests already
+		 * on the shared ring
+		 */
+		if (!info->shadow[i].request)
+			goto free_shadow;
+
+		segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
+		       info->shadow[i].req.u.indirect.nr_segments :
+		       info->shadow[i].req.u.rw.nr_segments;
+		for (j = 0; j < segs; j++) {
+			persistent_gnt = info->shadow[i].grants_used[j];
+			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+			__free_page(pfn_to_page(persistent_gnt->pfn));
+			kfree(persistent_gnt);
+		}
+
+		if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT)
+			/*
+			 * If this is not an indirect operation don't try to
+			 * free indirect segments
+			 */
+			goto free_shadow;
+
+		for (j = 0; j < INDIRECT_GREFS(segs); j++) {
+			persistent_gnt = info->shadow[i].indirect_grants[j];
+			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+			__free_page(pfn_to_page(persistent_gnt->pfn));
+			kfree(persistent_gnt);
+		}
+
+free_shadow:
+		kfree(info->shadow[i].grants_used);
+		info->shadow[i].grants_used = NULL;
+		kfree(info->shadow[i].indirect_grants);
+		info->shadow[i].indirect_grants = NULL;
+		kfree(info->shadow[i].sg);
+		info->shadow[i].sg = NULL;
+	}
+
 	/* No more gnttab callback work. */
 	/* No more gnttab callback work. */
 	gnttab_cancel_free_callback(&info->callback);
 	gnttab_cancel_free_callback(&info->callback);
 	spin_unlock_irq(&info->io_lock);
 	spin_unlock_irq(&info->io_lock);
@@ -867,12 +984,13 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
 			     struct blkif_response *bret)
 			     struct blkif_response *bret)
 {
 {
 	int i = 0;
 	int i = 0;
-	struct bio_vec *bvec;
-	struct req_iterator iter;
-	unsigned long flags;
+	struct scatterlist *sg;
 	char *bvec_data;
 	char *bvec_data;
 	void *shared_data;
 	void *shared_data;
-	unsigned int offset = 0;
+	int nseg;
+
+	nseg = s->req.operation == BLKIF_OP_INDIRECT ?
+		s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
 
 
 	if (bret->operation == BLKIF_OP_READ) {
 	if (bret->operation == BLKIF_OP_READ) {
 		/*
 		/*
@@ -881,26 +999,29 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
 		 * than PAGE_SIZE, we have to keep track of the current offset,
 		 * than PAGE_SIZE, we have to keep track of the current offset,
 		 * to be sure we are copying the data from the right shared page.
 		 * to be sure we are copying the data from the right shared page.
 		 */
 		 */
-		rq_for_each_segment(bvec, s->request, iter) {
-			BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
-			if (bvec->bv_offset < offset)
-				i++;
-			BUG_ON(i >= s->req.u.rw.nr_segments);
+		for_each_sg(s->sg, sg, nseg, i) {
+			BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 			shared_data = kmap_atomic(
 			shared_data = kmap_atomic(
 				pfn_to_page(s->grants_used[i]->pfn));
 				pfn_to_page(s->grants_used[i]->pfn));
-			bvec_data = bvec_kmap_irq(bvec, &flags);
-			memcpy(bvec_data, shared_data + bvec->bv_offset,
-				bvec->bv_len);
-			bvec_kunmap_irq(bvec_data, &flags);
+			bvec_data = kmap_atomic(sg_page(sg));
+			memcpy(bvec_data   + sg->offset,
+			       shared_data + sg->offset,
+			       sg->length);
+			kunmap_atomic(bvec_data);
 			kunmap_atomic(shared_data);
 			kunmap_atomic(shared_data);
-			offset = bvec->bv_offset + bvec->bv_len;
 		}
 		}
 	}
 	}
 	/* Add the persistent grant into the list of free grants */
 	/* Add the persistent grant into the list of free grants */
-	for (i = 0; i < s->req.u.rw.nr_segments; i++) {
+	for (i = 0; i < nseg; i++) {
 		list_add(&s->grants_used[i]->node, &info->persistent_gnts);
 		list_add(&s->grants_used[i]->node, &info->persistent_gnts);
 		info->persistent_gnts_c++;
 		info->persistent_gnts_c++;
 	}
 	}
+	if (s->req.operation == BLKIF_OP_INDIRECT) {
+		for (i = 0; i < INDIRECT_GREFS(nseg); i++) {
+			list_add(&s->indirect_grants[i]->node, &info->persistent_gnts);
+			info->persistent_gnts_c++;
+		}
+	}
 }
 }
 
 
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1034,14 +1155,6 @@ static int setup_blkring(struct xenbus_device *dev,
 	SHARED_RING_INIT(sring);
 	SHARED_RING_INIT(sring);
 	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 
 
-	sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
-	/* Allocate memory for grants */
-	err = fill_grant_buffer(info, BLK_RING_SIZE *
-	                              BLKIF_MAX_SEGMENTS_PER_REQUEST);
-	if (err)
-		goto fail;
-
 	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
 	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
 	if (err < 0) {
 	if (err < 0) {
 		free_page((unsigned long)sring);
 		free_page((unsigned long)sring);
@@ -1223,13 +1336,84 @@ static int blkfront_probe(struct xenbus_device *dev,
 	return 0;
 	return 0;
 }
 }
 
 
+/*
+ * This is a clone of md_trim_bio, used to split a bio into smaller ones
+ */
+static void trim_bio(struct bio *bio, int offset, int size)
+{
+	/* 'bio' is a cloned bio which we need to trim to match
+	 * the given offset and size.
+	 * This requires adjusting bi_sector, bi_size, and bi_io_vec
+	 */
+	int i;
+	struct bio_vec *bvec;
+	int sofar = 0;
+
+	size <<= 9;
+	if (offset == 0 && size == bio->bi_size)
+		return;
+
+	bio->bi_sector += offset;
+	bio->bi_size = size;
+	offset <<= 9;
+	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
+	while (bio->bi_idx < bio->bi_vcnt &&
+	       bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
+		/* remove this whole bio_vec */
+		offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
+		bio->bi_idx++;
+	}
+	if (bio->bi_idx < bio->bi_vcnt) {
+		bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
+		bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
+	}
+	/* avoid any complications with bi_idx being non-zero*/
+	if (bio->bi_idx) {
+		memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
+			(bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
+		bio->bi_vcnt -= bio->bi_idx;
+		bio->bi_idx = 0;
+	}
+	/* Make sure vcnt and last bv are not too big */
+	bio_for_each_segment(bvec, bio, i) {
+		if (sofar + bvec->bv_len > size)
+			bvec->bv_len = size - sofar;
+		if (bvec->bv_len == 0) {
+			bio->bi_vcnt = i;
+			break;
+		}
+		sofar += bvec->bv_len;
+	}
+}
+
+static void split_bio_end(struct bio *bio, int error)
+{
+	struct split_bio *split_bio = bio->bi_private;
+
+	if (error)
+		split_bio->err = error;
+
+	if (atomic_dec_and_test(&split_bio->pending)) {
+		split_bio->bio->bi_phys_segments = 0;
+		bio_endio(split_bio->bio, split_bio->err);
+		kfree(split_bio);
+	}
+	bio_put(bio);
+}
 
 
 static int blkif_recover(struct blkfront_info *info)
 static int blkif_recover(struct blkfront_info *info)
 {
 {
 	int i;
 	int i;
-	struct blkif_request *req;
+	struct request *req, *n;
 	struct blk_shadow *copy;
 	struct blk_shadow *copy;
-	int j;
+	int rc;
+	struct bio *bio, *cloned_bio;
+	struct bio_list bio_list, merge_bio;
+	unsigned int segs, offset;
+	int pending, size;
+	struct split_bio *split_bio;
+	struct list_head requests;
 
 
 	/* Stage 1: Make a safe copy of the shadow state. */
 	/* Stage 1: Make a safe copy of the shadow state. */
 	copy = kmemdup(info->shadow, sizeof(info->shadow),
 	copy = kmemdup(info->shadow, sizeof(info->shadow),
@@ -1244,36 +1428,64 @@ static int blkif_recover(struct blkfront_info *info)
 	info->shadow_free = info->ring.req_prod_pvt;
 	info->shadow_free = info->ring.req_prod_pvt;
 	info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
 	info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
 
 
-	/* Stage 3: Find pending requests and requeue them. */
+	rc = blkfront_setup_indirect(info);
+	if (rc) {
+		kfree(copy);
+		return rc;
+	}
+
+	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	blk_queue_max_segments(info->rq, segs);
+	bio_list_init(&bio_list);
+	INIT_LIST_HEAD(&requests);
 	for (i = 0; i < BLK_RING_SIZE; i++) {
 	for (i = 0; i < BLK_RING_SIZE; i++) {
 		/* Not in use? */
 		/* Not in use? */
 		if (!copy[i].request)
 		if (!copy[i].request)
 			continue;
 			continue;
 
 
-		/* Grab a request slot and copy shadow state into it. */
-		req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
-		*req = copy[i].req;
-
-		/* We get a new request id, and must reset the shadow state. */
-		req->u.rw.id = get_id_from_freelist(info);
-		memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
-
-		if (req->operation != BLKIF_OP_DISCARD) {
-		/* Rewrite any grant references invalidated by susp/resume. */
-			for (j = 0; j < req->u.rw.nr_segments; j++)
-				gnttab_grant_foreign_access_ref(
-					req->u.rw.seg[j].gref,
-					info->xbdev->otherend_id,
-					pfn_to_mfn(copy[i].grants_used[j]->pfn),
-					0);
+		/*
+		 * Get the bios in the request so we can re-queue them.
+		 */
+		if (copy[i].request->cmd_flags &
+		    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+			/*
+			 * Flush operations don't contain bios, so
+			 * we need to requeue the whole request
+			 */
+			list_add(&copy[i].request->queuelist, &requests);
+			continue;
 		}
 		}
-		info->shadow[req->u.rw.id].req = *req;
-
-		info->ring.req_prod_pvt++;
+		merge_bio.head = copy[i].request->bio;
+		merge_bio.tail = copy[i].request->biotail;
+		bio_list_merge(&bio_list, &merge_bio);
+		copy[i].request->bio = NULL;
+		blk_put_request(copy[i].request);
 	}
 	}
 
 
 	kfree(copy);
 	kfree(copy);
 
 
+	/*
+	 * Empty the queue, this is important because we might have
+	 * requests in the queue with more segments than what we
+	 * can handle now.
+	 */
+	spin_lock_irq(&info->io_lock);
+	while ((req = blk_fetch_request(info->rq)) != NULL) {
+		if (req->cmd_flags &
+		    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+			list_add(&req->queuelist, &requests);
+			continue;
+		}
+		merge_bio.head = req->bio;
+		merge_bio.tail = req->biotail;
+		bio_list_merge(&bio_list, &merge_bio);
+		req->bio = NULL;
+		if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
+			pr_alert("diskcache flush request found!\n");
+		__blk_put_request(info->rq, req);
+	}
+	spin_unlock_irq(&info->io_lock);
+
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
 
 	spin_lock_irq(&info->io_lock);
 	spin_lock_irq(&info->io_lock);
@@ -1281,14 +1493,50 @@ static int blkif_recover(struct blkfront_info *info)
 	/* Now safe for us to use the shared ring */
 	/* Now safe for us to use the shared ring */
 	info->connected = BLKIF_STATE_CONNECTED;
 	info->connected = BLKIF_STATE_CONNECTED;
 
 
-	/* Send off requeued requests */
-	flush_requests(info);
-
 	/* Kick any other new requests queued since we resumed */
 	/* Kick any other new requests queued since we resumed */
 	kick_pending_request_queues(info);
 	kick_pending_request_queues(info);
 
 
+	list_for_each_entry_safe(req, n, &requests, queuelist) {
+		/* Requeue pending requests (flush or discard) */
+		list_del_init(&req->queuelist);
+		BUG_ON(req->nr_phys_segments > segs);
+		blk_requeue_request(info->rq, req);
+	}
 	spin_unlock_irq(&info->io_lock);
 	spin_unlock_irq(&info->io_lock);
 
 
+	while ((bio = bio_list_pop(&bio_list)) != NULL) {
+		/* Traverse the list of pending bios and re-queue them */
+		if (bio_segments(bio) > segs) {
+			/*
+			 * This bio has more segments than what we can
+			 * handle, we have to split it.
+			 */
+			pending = (bio_segments(bio) + segs - 1) / segs;
+			split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
+			BUG_ON(split_bio == NULL);
+			atomic_set(&split_bio->pending, pending);
+			split_bio->bio = bio;
+			for (i = 0; i < pending; i++) {
+				offset = (i * segs * PAGE_SIZE) >> 9;
+				size = min((unsigned int)(segs * PAGE_SIZE) >> 9,
+					   (unsigned int)(bio->bi_size >> 9) - offset);
+				cloned_bio = bio_clone(bio, GFP_NOIO);
+				BUG_ON(cloned_bio == NULL);
+				trim_bio(cloned_bio, offset, size);
+				cloned_bio->bi_private = split_bio;
+				cloned_bio->bi_end_io = split_bio_end;
+				submit_bio(cloned_bio->bi_rw, cloned_bio);
+			}
+			/*
+			 * Now we have to wait for all those smaller bios to
+			 * end, so we can also end the "parent" bio.
+			 */
+			continue;
+		}
+		/* We don't need to split this bio */
+		submit_bio(bio->bi_rw, bio);
+	}
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1308,8 +1556,12 @@ static int blkfront_resume(struct xenbus_device *dev)
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
 
 	err = talk_to_blkback(dev, info);
 	err = talk_to_blkback(dev, info);
-	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
-		err = blkif_recover(info);
+
+	/*
+	 * We have to wait for the backend to switch to
+	 * connected state, since we want to read which
+	 * features it supports.
+	 */
 
 
 	return err;
 	return err;
 }
 }
@@ -1387,6 +1639,60 @@ static void blkfront_setup_discard(struct blkfront_info *info)
 	kfree(type);
 	kfree(type);
 }
 }
 
 
+static int blkfront_setup_indirect(struct blkfront_info *info)
+{
+	unsigned int indirect_segments, segs;
+	int err, i;
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-max-indirect-segments", "%u", &indirect_segments,
+			    NULL);
+	if (err) {
+		info->max_indirect_segments = 0;
+		segs = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	} else {
+		info->max_indirect_segments = min(indirect_segments,
+						  xen_blkif_max_segments);
+		segs = info->max_indirect_segments;
+	}
+
+	err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
+	if (err)
+		goto out_of_memory;
+
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		info->shadow[i].grants_used = kzalloc(
+			sizeof(info->shadow[i].grants_used[0]) * segs,
+			GFP_NOIO);
+		info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * segs, GFP_NOIO);
+		if (info->max_indirect_segments)
+			info->shadow[i].indirect_grants = kzalloc(
+				sizeof(info->shadow[i].indirect_grants[0]) *
+				INDIRECT_GREFS(segs),
+				GFP_NOIO);
+		if ((info->shadow[i].grants_used == NULL) ||
+			(info->shadow[i].sg == NULL) ||
+		     (info->max_indirect_segments &&
+		     (info->shadow[i].indirect_grants == NULL)))
+			goto out_of_memory;
+		sg_init_table(info->shadow[i].sg, segs);
+	}
+
+
+	return 0;
+
+out_of_memory:
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		kfree(info->shadow[i].grants_used);
+		info->shadow[i].grants_used = NULL;
+		kfree(info->shadow[i].sg);
+		info->shadow[i].sg = NULL;
+		kfree(info->shadow[i].indirect_grants);
+		info->shadow[i].indirect_grants = NULL;
+	}
+	return -ENOMEM;
+}
+
 /*
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
  * Invoked when the backend is finally 'ready' (and has told produced
  * the details about the physical device - #sectors, size, etc).
  * the details about the physical device - #sectors, size, etc).
@@ -1395,6 +1701,7 @@ static void blkfront_connect(struct blkfront_info *info)
 {
 {
 	unsigned long long sectors;
 	unsigned long long sectors;
 	unsigned long sector_size;
 	unsigned long sector_size;
+	unsigned int physical_sector_size;
 	unsigned int binfo;
 	unsigned int binfo;
 	int err;
 	int err;
 	int barrier, flush, discard, persistent;
 	int barrier, flush, discard, persistent;
@@ -1414,8 +1721,15 @@ static void blkfront_connect(struct blkfront_info *info)
 		set_capacity(info->gd, sectors);
 		set_capacity(info->gd, sectors);
 		revalidate_disk(info->gd);
 		revalidate_disk(info->gd);
 
 
-		/* fall through */
+		return;
 	case BLKIF_STATE_SUSPENDED:
 	case BLKIF_STATE_SUSPENDED:
+		/*
+		 * If we are recovering from suspension, we need to wait
+		 * for the backend to announce it's features before
+		 * reconnecting, at least we need to know if the backend
+		 * supports indirect descriptors, and how many.
+		 */
+		blkif_recover(info);
 		return;
 		return;
 
 
 	default:
 	default:
@@ -1437,6 +1751,16 @@ static void blkfront_connect(struct blkfront_info *info)
 		return;
 		return;
 	}
 	}
 
 
+	/*
+	 * physcial-sector-size is a newer field, so old backends may not
+	 * provide this. Assume physical sector size to be the same as
+	 * sector_size in that case.
+	 */
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "physical-sector-size", "%u", &physical_sector_size);
+	if (err != 1)
+		physical_sector_size = sector_size;
+
 	info->feature_flush = 0;
 	info->feature_flush = 0;
 	info->flush_op = 0;
 	info->flush_op = 0;
 
 
@@ -1483,7 +1807,15 @@ static void blkfront_connect(struct blkfront_info *info)
 	else
 	else
 		info->feature_persistent = persistent;
 		info->feature_persistent = persistent;
 
 
-	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
+	err = blkfront_setup_indirect(info);
+	if (err) {
+		xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
+				 info->xbdev->otherend);
+		return;
+	}
+
+	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
+				  physical_sector_size);
 	if (err) {
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
 				 info->xbdev->otherend);
 				 info->xbdev->otherend);

+ 3 - 1
drivers/cpufreq/cpufreq.c

@@ -1942,13 +1942,15 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
 	if (dev) {
 	if (dev) {
 		switch (action) {
 		switch (action) {
 		case CPU_ONLINE:
 		case CPU_ONLINE:
+		case CPU_ONLINE_FROZEN:
 			cpufreq_add_dev(dev, NULL);
 			cpufreq_add_dev(dev, NULL);
 			break;
 			break;
 		case CPU_DOWN_PREPARE:
 		case CPU_DOWN_PREPARE:
-		case CPU_UP_CANCELED_FROZEN:
+		case CPU_DOWN_PREPARE_FROZEN:
 			__cpufreq_remove_dev(dev, NULL);
 			__cpufreq_remove_dev(dev, NULL);
 			break;
 			break;
 		case CPU_DOWN_FAILED:
 		case CPU_DOWN_FAILED:
+		case CPU_DOWN_FAILED_FROZEN:
 			cpufreq_add_dev(dev, NULL);
 			cpufreq_add_dev(dev, NULL);
 			break;
 			break;
 		}
 		}

+ 0 - 3
drivers/cpufreq/cpufreq_governor.c

@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 #include <linux/workqueue.h>
-#include <linux/cpu.h>
 
 
 #include "cpufreq_governor.h"
 #include "cpufreq_governor.h"
 
 
@@ -137,10 +136,8 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
 	if (!all_cpus) {
 	if (!all_cpus) {
 		__gov_queue_work(smp_processor_id(), dbs_data, delay);
 		__gov_queue_work(smp_processor_id(), dbs_data, delay);
 	} else {
 	} else {
-		get_online_cpus();
 		for_each_cpu(i, policy->cpus)
 		for_each_cpu(i, policy->cpus)
 			__gov_queue_work(i, dbs_data, delay);
 			__gov_queue_work(i, dbs_data, delay);
-		put_online_cpus();
 	}
 	}
 }
 }
 EXPORT_SYMBOL_GPL(gov_queue_work);
 EXPORT_SYMBOL_GPL(gov_queue_work);

+ 2 - 4
drivers/cpufreq/cpufreq_stats.c

@@ -353,13 +353,11 @@ static int cpufreq_stat_cpu_callback(struct notifier_block *nfb,
 		cpufreq_update_policy(cpu);
 		cpufreq_update_policy(cpu);
 		break;
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
 		cpufreq_stats_free_sysfs(cpu);
 		cpufreq_stats_free_sysfs(cpu);
 		break;
 		break;
 	case CPU_DEAD:
 	case CPU_DEAD:
-		cpufreq_stats_free_table(cpu);
-		break;
-	case CPU_UP_CANCELED_FROZEN:
-		cpufreq_stats_free_sysfs(cpu);
+	case CPU_DEAD_FROZEN:
 		cpufreq_stats_free_table(cpu);
 		cpufreq_stats_free_table(cpu);
 		break;
 		break;
 	}
 	}

+ 2 - 2
drivers/cpufreq/s3c24xx-cpufreq.c

@@ -49,7 +49,7 @@ static struct clk *clk_hclk;
 static struct clk *clk_pclk;
 static struct clk *clk_pclk;
 static struct clk *clk_arm;
 static struct clk *clk_arm;
 
 
-#ifdef CONFIG_CPU_FREQ_S3C24XX_DEBUGFS
+#ifdef CONFIG_ARM_S3C24XX_CPUFREQ_DEBUGFS
 struct s3c_cpufreq_config *s3c_cpufreq_getconfig(void)
 struct s3c_cpufreq_config *s3c_cpufreq_getconfig(void)
 {
 {
 	return &cpu_cur;
 	return &cpu_cur;
@@ -59,7 +59,7 @@ struct s3c_iotimings *s3c_cpufreq_getiotimings(void)
 {
 {
 	return &s3c24xx_iotiming;
 	return &s3c24xx_iotiming;
 }
 }
-#endif /* CONFIG_CPU_FREQ_S3C24XX_DEBUGFS */
+#endif /* CONFIG_ARM_S3C24XX_CPUFREQ_DEBUGFS */
 
 
 static void s3c_cpufreq_getcur(struct s3c_cpufreq_config *cfg)
 static void s3c_cpufreq_getcur(struct s3c_cpufreq_config *cfg)
 {
 {

+ 1 - 1
drivers/gpio/gpio-msm-v2.c

@@ -378,7 +378,7 @@ static int msm_gpio_probe(struct platform_device *pdev)
 	int ret, ngpio;
 	int ret, ngpio;
 	struct resource *res;
 	struct resource *res;
 
 
-	if (!of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) {
+	if (of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) {
 		dev_err(&pdev->dev, "%s: ngpio property missing\n", __func__);
 		dev_err(&pdev->dev, "%s: ngpio property missing\n", __func__);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}

+ 70 - 14
drivers/gpio/gpio-omap.c

@@ -1037,6 +1037,18 @@ omap_mpuio_alloc_gc(struct gpio_bank *bank, unsigned int irq_start,
 			       IRQ_NOREQUEST | IRQ_NOPROBE, 0);
 			       IRQ_NOREQUEST | IRQ_NOPROBE, 0);
 }
 }
 
 
+#if defined(CONFIG_OF_GPIO)
+static inline bool omap_gpio_chip_boot_dt(struct gpio_chip *chip)
+{
+	return chip->of_node != NULL;
+}
+#else
+static inline bool omap_gpio_chip_boot_dt(struct gpio_chip *chip)
+{
+	return false;
+}
+#endif
+
 static void omap_gpio_chip_init(struct gpio_bank *bank)
 static void omap_gpio_chip_init(struct gpio_bank *bank)
 {
 {
 	int j;
 	int j;
@@ -1068,24 +1080,68 @@ static void omap_gpio_chip_init(struct gpio_bank *bank)
 
 
 	gpiochip_add(&bank->chip);
 	gpiochip_add(&bank->chip);
 
 
-	for (j = 0; j < bank->width; j++) {
-		int irq = irq_create_mapping(bank->domain, j);
-		irq_set_lockdep_class(irq, &gpio_lock_class);
-		irq_set_chip_data(irq, bank);
-		if (bank->is_mpuio) {
-			omap_mpuio_alloc_gc(bank, irq, bank->width);
-		} else {
-			irq_set_chip_and_handler(irq, &gpio_irq_chip,
-						 handle_simple_irq);
-			set_irq_flags(irq, IRQF_VALID);
-		}
-	}
+	/*
+	 * REVISIT these explicit calls to irq_create_mapping()
+	 * to do the GPIO to IRQ domain mapping for each GPIO in
+	 * the bank can be removed once all OMAP platforms have
+	 * been migrated to Device Tree boot only.
+	 * Since in DT boot irq_create_mapping() is called from
+	 * irq_create_of_mapping() only for the GPIO lines that
+	 * are used as interrupts.
+	 */
+	if (!omap_gpio_chip_boot_dt(&bank->chip))
+		for (j = 0; j < bank->width; j++)
+			irq_create_mapping(bank->domain, j);
 	irq_set_chained_handler(bank->irq, gpio_irq_handler);
 	irq_set_chained_handler(bank->irq, gpio_irq_handler);
 	irq_set_handler_data(bank->irq, bank);
 	irq_set_handler_data(bank->irq, bank);
 }
 }
 
 
 static const struct of_device_id omap_gpio_match[];
 static const struct of_device_id omap_gpio_match[];
 
 
+static int omap_gpio_irq_map(struct irq_domain *d, unsigned int virq,
+			     irq_hw_number_t hwirq)
+{
+	struct gpio_bank *bank = d->host_data;
+	int gpio;
+	int ret;
+
+	if (!bank)
+		return -EINVAL;
+
+	irq_set_lockdep_class(virq, &gpio_lock_class);
+	irq_set_chip_data(virq, bank);
+	if (bank->is_mpuio) {
+		omap_mpuio_alloc_gc(bank, virq, bank->width);
+	} else {
+		irq_set_chip_and_handler(virq, &gpio_irq_chip,
+					 handle_simple_irq);
+		set_irq_flags(virq, IRQF_VALID);
+	}
+
+	/*
+	 * REVISIT most GPIO IRQ chip drivers need to call
+	 * gpio_request() before a GPIO line can be used as an
+	 * IRQ. Ideally this should be handled by the IRQ core
+	 * but until then this has to be done on a per driver
+	 * basis. Remove this once this is managed by the core.
+	 */
+	if (omap_gpio_chip_boot_dt(&bank->chip)) {
+		gpio = irq_to_gpio(bank, hwirq);
+		ret = gpio_request_one(gpio, GPIOF_IN, NULL);
+		if (ret) {
+			dev_err(bank->dev, "Could not request GPIO%d\n", gpio);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static struct irq_domain_ops omap_gpio_irq_ops = {
+	.xlate  = irq_domain_xlate_onetwocell,
+	.map    = omap_gpio_irq_map,
+};
+
 static int omap_gpio_probe(struct platform_device *pdev)
 static int omap_gpio_probe(struct platform_device *pdev)
 {
 {
 	struct device *dev = &pdev->dev;
 	struct device *dev = &pdev->dev;
@@ -1151,10 +1207,10 @@ static int omap_gpio_probe(struct platform_device *pdev)
 	}
 	}
 
 
 	bank->domain = irq_domain_add_legacy(node, bank->width, irq_base,
 	bank->domain = irq_domain_add_legacy(node, bank->width, irq_base,
-					     0, &irq_domain_simple_ops, NULL);
+					     0, &omap_gpio_irq_ops, bank);
 #else
 #else
 	bank->domain = irq_domain_add_linear(node, bank->width,
 	bank->domain = irq_domain_add_linear(node, bank->width,
-					     &irq_domain_simple_ops, NULL);
+					     &omap_gpio_irq_ops, bank);
 #endif
 #endif
 	if (!bank->domain) {
 	if (!bank->domain) {
 		dev_err(dev, "Couldn't register an IRQ domain\n");
 		dev_err(dev, "Couldn't register an IRQ domain\n");

+ 11 - 16
drivers/gpu/drm/drm_crtc_helper.c

@@ -677,6 +677,11 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 					/* don't break so fail path works correct */
 					/* don't break so fail path works correct */
 					fail = 1;
 					fail = 1;
 				break;
 				break;
+
+				if (connector->dpms != DRM_MODE_DPMS_ON) {
+					DRM_DEBUG_KMS("connector dpms not on, full mode switch\n");
+					mode_changed = true;
+				}
 			}
 			}
 		}
 		}
 
 
@@ -754,6 +759,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 				ret = -EINVAL;
 				ret = -EINVAL;
 				goto fail;
 				goto fail;
 			}
 			}
+			DRM_DEBUG_KMS("Setting connector DPMS state to on\n");
+			for (i = 0; i < set->num_connectors; i++) {
+				DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id,
+					      drm_get_connector_name(set->connectors[i]));
+				set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON);
+			}
 		}
 		}
 		drm_helper_disable_unused_functions(dev);
 		drm_helper_disable_unused_functions(dev);
 	} else if (fb_changed) {
 	} else if (fb_changed) {
@@ -771,22 +782,6 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		}
 		}
 	}
 	}
 
 
-	/*
-	 * crtc set_config helpers implicit set the crtc and all connected
-	 * encoders to DPMS on for a full mode set. But for just an fb update it
-	 * doesn't do that. To not confuse userspace, do an explicit DPMS_ON
-	 * unconditionally. This will also ensure driver internal dpms state is
-	 * consistent again.
-	 */
-	if (set->crtc->enabled) {
-		DRM_DEBUG_KMS("Setting connector DPMS state to on\n");
-		for (i = 0; i < set->num_connectors; i++) {
-			DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id,
-				      drm_get_connector_name(set->connectors[i]));
-			set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON);
-		}
-	}
-
 	kfree(save_connectors);
 	kfree(save_connectors);
 	kfree(save_encoders);
 	kfree(save_encoders);
 	kfree(save_crtcs);
 	kfree(save_crtcs);

+ 1 - 0
drivers/gpu/drm/i915/Makefile

@@ -38,6 +38,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  intel_sprite.o \
 	  intel_sprite.o \
 	  intel_opregion.o \
 	  intel_opregion.o \
 	  intel_sideband.o \
 	  intel_sideband.o \
+	  intel_uncore.o \
 	  dvo_ch7xxx.o \
 	  dvo_ch7xxx.o \
 	  dvo_ch7017.o \
 	  dvo_ch7017.o \
 	  dvo_ivch.o \
 	  dvo_ivch.o \

+ 1 - 1
drivers/gpu/drm/i915/dvo_ch7xxx.c

@@ -307,7 +307,7 @@ static void ch7xxx_mode_set(struct intel_dvo_device *dvo,
 		idf |= CH7xxx_IDF_HSP;
 		idf |= CH7xxx_IDF_HSP;
 
 
 	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
 	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
-		idf |= CH7xxx_IDF_HSP;
+		idf |= CH7xxx_IDF_VSP;
 
 
 	ch7xxx_writeb(dvo, CH7xxx_IDF, idf);
 	ch7xxx_writeb(dvo, CH7xxx_IDF, idf);
 }
 }

+ 163 - 15
drivers/gpu/drm/i915/i915_debugfs.c

@@ -135,7 +135,8 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
 	uintptr_t list = (uintptr_t) node->info_ent->data;
 	uintptr_t list = (uintptr_t) node->info_ent->data;
 	struct list_head *head;
 	struct list_head *head;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_device *dev = node->minor->dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	struct drm_i915_gem_object *obj;
 	size_t total_obj_size, total_gtt_size;
 	size_t total_obj_size, total_gtt_size;
 	int count, ret;
 	int count, ret;
@@ -147,11 +148,11 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
 	switch (list) {
 	switch (list) {
 	case ACTIVE_LIST:
 	case ACTIVE_LIST:
 		seq_puts(m, "Active:\n");
 		seq_puts(m, "Active:\n");
-		head = &dev_priv->mm.active_list;
+		head = &vm->active_list;
 		break;
 		break;
 	case INACTIVE_LIST:
 	case INACTIVE_LIST:
 		seq_puts(m, "Inactive:\n");
 		seq_puts(m, "Inactive:\n");
-		head = &dev_priv->mm.inactive_list;
+		head = &vm->inactive_list;
 		break;
 		break;
 	default:
 	default:
 		mutex_unlock(&dev->struct_mutex);
 		mutex_unlock(&dev->struct_mutex);
@@ -219,6 +220,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	u32 count, mappable_count, purgeable_count;
 	u32 count, mappable_count, purgeable_count;
 	size_t size, mappable_size, purgeable_size;
 	size_t size, mappable_size, purgeable_size;
 	struct drm_i915_gem_object *obj;
 	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_file *file;
 	struct drm_file *file;
 	int ret;
 	int ret;
 
 
@@ -236,12 +238,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		   count, mappable_count, size, mappable_size);
 		   count, mappable_count, size, mappable_size);
 
 
 	size = count = mappable_size = mappable_count = 0;
 	size = count = mappable_size = mappable_count = 0;
-	count_objects(&dev_priv->mm.active_list, mm_list);
+	count_objects(&vm->active_list, mm_list);
 	seq_printf(m, "  %u [%u] active objects, %zu [%zu] bytes\n",
 	seq_printf(m, "  %u [%u] active objects, %zu [%zu] bytes\n",
 		   count, mappable_count, size, mappable_size);
 		   count, mappable_count, size, mappable_size);
 
 
 	size = count = mappable_size = mappable_count = 0;
 	size = count = mappable_size = mappable_count = 0;
-	count_objects(&dev_priv->mm.inactive_list, mm_list);
+	count_objects(&vm->inactive_list, mm_list);
 	seq_printf(m, "  %u [%u] inactive objects, %zu [%zu] bytes\n",
 	seq_printf(m, "  %u [%u] inactive objects, %zu [%zu] bytes\n",
 		   count, mappable_count, size, mappable_size);
 		   count, mappable_count, size, mappable_size);
 
 
@@ -276,8 +278,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		   count, size);
 		   count, size);
 
 
 	seq_printf(m, "%zu [%lu] gtt total\n",
 	seq_printf(m, "%zu [%lu] gtt total\n",
-		   dev_priv->gtt.total,
-		   dev_priv->gtt.mappable_end - dev_priv->gtt.start);
+		   dev_priv->gtt.base.total,
+		   dev_priv->gtt.mappable_end - dev_priv->gtt.base.start);
 
 
 	seq_putc(m, '\n');
 	seq_putc(m, '\n');
 	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
 	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
@@ -987,9 +989,9 @@ static int gen6_drpc_info(struct seq_file *m)
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
-	spin_lock_irq(&dev_priv->gt_lock);
-	forcewake_count = dev_priv->forcewake_count;
-	spin_unlock_irq(&dev_priv->gt_lock);
+	spin_lock_irq(&dev_priv->uncore.lock);
+	forcewake_count = dev_priv->uncore.forcewake_count;
+	spin_unlock_irq(&dev_priv->uncore.lock);
 
 
 	if (forcewake_count) {
 	if (forcewake_count) {
 		seq_puts(m, "RC information inaccurate because somebody "
 		seq_puts(m, "RC information inaccurate because somebody "
@@ -1002,7 +1004,7 @@ static int gen6_drpc_info(struct seq_file *m)
 	}
 	}
 
 
 	gt_core_status = readl(dev_priv->regs + GEN6_GT_CORE_STATUS);
 	gt_core_status = readl(dev_priv->regs + GEN6_GT_CORE_STATUS);
-	trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4);
+	trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true);
 
 
 	rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
 	rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
 	rcctl1 = I915_READ(GEN6_RC_CONTROL);
 	rcctl1 = I915_READ(GEN6_RC_CONTROL);
@@ -1373,9 +1375,9 @@ static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned forcewake_count;
 	unsigned forcewake_count;
 
 
-	spin_lock_irq(&dev_priv->gt_lock);
-	forcewake_count = dev_priv->forcewake_count;
-	spin_unlock_irq(&dev_priv->gt_lock);
+	spin_lock_irq(&dev_priv->uncore.lock);
+	forcewake_count = dev_priv->uncore.forcewake_count;
+	spin_unlock_irq(&dev_priv->uncore.lock);
 
 
 	seq_printf(m, "forcewake count = %u\n", forcewake_count);
 	seq_printf(m, "forcewake count = %u\n", forcewake_count);
 
 
@@ -1530,6 +1532,148 @@ static int i915_dpio_info(struct seq_file *m, void *data)
 	return 0;
 	return 0;
 }
 }
 
 
+static int i915_llc(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* Size calculation for LLC is a bit of a pain. Ignore for now. */
+	seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(dev)));
+	seq_printf(m, "eLLC: %zuMB\n", dev_priv->ellc_size);
+
+	return 0;
+}
+
+static int i915_edp_psr_status(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 psrstat, psrperf;
+
+	if (!IS_HASWELL(dev)) {
+		seq_puts(m, "PSR not supported on this platform\n");
+	} else if (IS_HASWELL(dev) && I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE) {
+		seq_puts(m, "PSR enabled\n");
+	} else {
+		seq_puts(m, "PSR disabled: ");
+		switch (dev_priv->no_psr_reason) {
+		case PSR_NO_SOURCE:
+			seq_puts(m, "not supported on this platform");
+			break;
+		case PSR_NO_SINK:
+			seq_puts(m, "not supported by panel");
+			break;
+		case PSR_MODULE_PARAM:
+			seq_puts(m, "disabled by flag");
+			break;
+		case PSR_CRTC_NOT_ACTIVE:
+			seq_puts(m, "crtc not active");
+			break;
+		case PSR_PWR_WELL_ENABLED:
+			seq_puts(m, "power well enabled");
+			break;
+		case PSR_NOT_TILED:
+			seq_puts(m, "not tiled");
+			break;
+		case PSR_SPRITE_ENABLED:
+			seq_puts(m, "sprite enabled");
+			break;
+		case PSR_S3D_ENABLED:
+			seq_puts(m, "stereo 3d enabled");
+			break;
+		case PSR_INTERLACED_ENABLED:
+			seq_puts(m, "interlaced enabled");
+			break;
+		case PSR_HSW_NOT_DDIA:
+			seq_puts(m, "HSW ties PSR to DDI A (eDP)");
+			break;
+		default:
+			seq_puts(m, "unknown reason");
+		}
+		seq_puts(m, "\n");
+		return 0;
+	}
+
+	psrstat = I915_READ(EDP_PSR_STATUS_CTL);
+
+	seq_puts(m, "PSR Current State: ");
+	switch (psrstat & EDP_PSR_STATUS_STATE_MASK) {
+	case EDP_PSR_STATUS_STATE_IDLE:
+		seq_puts(m, "Reset state\n");
+		break;
+	case EDP_PSR_STATUS_STATE_SRDONACK:
+		seq_puts(m, "Wait for TG/Stream to send on frame of data after SRD conditions are met\n");
+		break;
+	case EDP_PSR_STATUS_STATE_SRDENT:
+		seq_puts(m, "SRD entry\n");
+		break;
+	case EDP_PSR_STATUS_STATE_BUFOFF:
+		seq_puts(m, "Wait for buffer turn off\n");
+		break;
+	case EDP_PSR_STATUS_STATE_BUFON:
+		seq_puts(m, "Wait for buffer turn on\n");
+		break;
+	case EDP_PSR_STATUS_STATE_AUXACK:
+		seq_puts(m, "Wait for AUX to acknowledge on SRD exit\n");
+		break;
+	case EDP_PSR_STATUS_STATE_SRDOFFACK:
+		seq_puts(m, "Wait for TG/Stream to acknowledge the SRD VDM exit\n");
+		break;
+	default:
+		seq_puts(m, "Unknown\n");
+		break;
+	}
+
+	seq_puts(m, "Link Status: ");
+	switch (psrstat & EDP_PSR_STATUS_LINK_MASK) {
+	case EDP_PSR_STATUS_LINK_FULL_OFF:
+		seq_puts(m, "Link is fully off\n");
+		break;
+	case EDP_PSR_STATUS_LINK_FULL_ON:
+		seq_puts(m, "Link is fully on\n");
+		break;
+	case EDP_PSR_STATUS_LINK_STANDBY:
+		seq_puts(m, "Link is in standby\n");
+		break;
+	default:
+		seq_puts(m, "Unknown\n");
+		break;
+	}
+
+	seq_printf(m, "PSR Entry Count: %u\n",
+		   psrstat >> EDP_PSR_STATUS_COUNT_SHIFT &
+		   EDP_PSR_STATUS_COUNT_MASK);
+
+	seq_printf(m, "Max Sleep Timer Counter: %u\n",
+		   psrstat >> EDP_PSR_STATUS_MAX_SLEEP_TIMER_SHIFT &
+		   EDP_PSR_STATUS_MAX_SLEEP_TIMER_MASK);
+
+	seq_printf(m, "Had AUX error: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_AUX_ERROR));
+
+	seq_printf(m, "Sending AUX: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_AUX_SENDING));
+
+	seq_printf(m, "Sending Idle: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_SENDING_IDLE));
+
+	seq_printf(m, "Sending TP2 TP3: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_SENDING_TP2_TP3));
+
+	seq_printf(m, "Sending TP1: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_SENDING_TP1));
+
+	seq_printf(m, "Idle Count: %u\n",
+		   psrstat & EDP_PSR_STATUS_IDLE_MASK);
+
+	psrperf = (I915_READ(EDP_PSR_PERF_CNT)) & EDP_PSR_PERF_CNT_MASK;
+	seq_printf(m, "Performance Counter: %u\n", psrperf);
+
+	return 0;
+}
+
 static int
 static int
 i915_wedged_get(void *data, u64 *val)
 i915_wedged_get(void *data, u64 *val)
 {
 {
@@ -1612,6 +1756,7 @@ i915_drop_caches_set(void *data, u64 val)
 	struct drm_device *dev = data;
 	struct drm_device *dev = data;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj, *next;
 	struct drm_i915_gem_object *obj, *next;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	int ret;
 	int ret;
 
 
 	DRM_DEBUG_DRIVER("Dropping caches: 0x%08llx\n", val);
 	DRM_DEBUG_DRIVER("Dropping caches: 0x%08llx\n", val);
@@ -1632,7 +1777,8 @@ i915_drop_caches_set(void *data, u64 val)
 		i915_gem_retire_requests(dev);
 		i915_gem_retire_requests(dev);
 
 
 	if (val & DROP_BOUND) {
 	if (val & DROP_BOUND) {
-		list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, mm_list)
+		list_for_each_entry_safe(obj, next, &vm->inactive_list,
+					 mm_list)
 			if (obj->pin_count == 0) {
 			if (obj->pin_count == 0) {
 				ret = i915_gem_object_unbind(obj);
 				ret = i915_gem_object_unbind(obj);
 				if (ret)
 				if (ret)
@@ -1959,6 +2105,8 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_swizzle_info", i915_swizzle_info, 0},
 	{"i915_swizzle_info", i915_swizzle_info, 0},
 	{"i915_ppgtt_info", i915_ppgtt_info, 0},
 	{"i915_ppgtt_info", i915_ppgtt_info, 0},
 	{"i915_dpio", i915_dpio_info, 0},
 	{"i915_dpio", i915_dpio_info, 0},
+	{"i915_llc", i915_llc, 0},
+	{"i915_edp_psr_status", i915_edp_psr_status, 0},
 };
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
 

+ 34 - 31
drivers/gpu/drm/i915/i915_dma.c

@@ -1358,7 +1358,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	i915_gem_context_fini(dev);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
 	mutex_unlock(&dev->struct_mutex);
 	i915_gem_cleanup_aliasing_ppgtt(dev);
 	i915_gem_cleanup_aliasing_ppgtt(dev);
-	drm_mm_takedown(&dev_priv->mm.gtt_space);
+	drm_mm_takedown(&dev_priv->gtt.base.mm);
 cleanup_irq:
 cleanup_irq:
 	drm_irq_uninstall(dev);
 	drm_irq_uninstall(dev);
 cleanup_gem_stolen:
 cleanup_gem_stolen:
@@ -1435,22 +1435,6 @@ static void i915_dump_device_info(struct drm_i915_private *dev_priv)
 #undef SEP_COMMA
 #undef SEP_COMMA
 }
 }
 
 
-/**
- * intel_early_sanitize_regs - clean up BIOS state
- * @dev: DRM device
- *
- * This function must be called before we do any I915_READ or I915_WRITE. Its
- * purpose is to clean up any state left by the BIOS that may affect us when
- * reading and/or writing registers.
- */
-static void intel_early_sanitize_regs(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (HAS_FPGA_DBG_UNCLAIMED(dev))
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-}
-
 /**
 /**
  * i915_driver_load - setup chip and create an initial config
  * i915_driver_load - setup chip and create an initial config
  * @dev: DRM device
  * @dev: DRM device
@@ -1490,8 +1474,21 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	dev_priv->dev = dev;
 	dev_priv->dev = dev;
 	dev_priv->info = info;
 	dev_priv->info = info;
 
 
+	spin_lock_init(&dev_priv->irq_lock);
+	spin_lock_init(&dev_priv->gpu_error.lock);
+	spin_lock_init(&dev_priv->backlight.lock);
+	spin_lock_init(&dev_priv->uncore.lock);
+	spin_lock_init(&dev_priv->mm.object_stat_lock);
+	mutex_init(&dev_priv->dpio_lock);
+	mutex_init(&dev_priv->rps.hw_lock);
+	mutex_init(&dev_priv->modeset_restore_lock);
+
 	i915_dump_device_info(dev_priv);
 	i915_dump_device_info(dev_priv);
 
 
+	INIT_LIST_HEAD(&dev_priv->vm_list);
+	INIT_LIST_HEAD(&dev_priv->gtt.base.global_link);
+	list_add(&dev_priv->gtt.base.global_link, &dev_priv->vm_list);
+
 	if (i915_get_bridge_dev(dev)) {
 	if (i915_get_bridge_dev(dev)) {
 		ret = -EIO;
 		ret = -EIO;
 		goto free_priv;
 		goto free_priv;
@@ -1517,7 +1514,17 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 		goto put_bridge;
 		goto put_bridge;
 	}
 	}
 
 
-	intel_early_sanitize_regs(dev);
+	intel_uncore_early_sanitize(dev);
+
+	if (IS_HASWELL(dev) && (I915_READ(HSW_EDRAM_PRESENT) == 1)) {
+		/* The docs do not explain exactly how the calculation can be
+		 * made. It is somewhat guessable, but for now, it's always
+		 * 128MB.
+		 * NB: We can't write IDICR yet because we do not have gt funcs
+		 * set up */
+		dev_priv->ellc_size = 128;
+		DRM_INFO("Found %zuMB of eLLC\n", dev_priv->ellc_size);
+	}
 
 
 	ret = i915_gem_gtt_init(dev);
 	ret = i915_gem_gtt_init(dev);
 	if (ret)
 	if (ret)
@@ -1580,7 +1587,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	intel_detect_pch(dev);
 	intel_detect_pch(dev);
 
 
 	intel_irq_init(dev);
 	intel_irq_init(dev);
-	intel_gt_init(dev);
+	intel_pm_init(dev);
+	intel_uncore_sanitize(dev);
+	intel_uncore_init(dev);
 
 
 	/* Try to make sure MCHBAR is enabled before poking at it */
 	/* Try to make sure MCHBAR is enabled before poking at it */
 	intel_setup_mchbar(dev);
 	intel_setup_mchbar(dev);
@@ -1605,14 +1614,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	if (!IS_I945G(dev) && !IS_I945GM(dev))
 	if (!IS_I945G(dev) && !IS_I945GM(dev))
 		pci_enable_msi(dev->pdev);
 		pci_enable_msi(dev->pdev);
 
 
-	spin_lock_init(&dev_priv->irq_lock);
-	spin_lock_init(&dev_priv->gpu_error.lock);
-	spin_lock_init(&dev_priv->backlight.lock);
-	mutex_init(&dev_priv->dpio_lock);
-
-	mutex_init(&dev_priv->rps.hw_lock);
-	mutex_init(&dev_priv->modeset_restore_lock);
-
 	dev_priv->num_plane = 1;
 	dev_priv->num_plane = 1;
 	if (IS_VALLEYVIEW(dev))
 	if (IS_VALLEYVIEW(dev))
 		dev_priv->num_plane = 2;
 		dev_priv->num_plane = 2;
@@ -1642,7 +1643,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	if (INTEL_INFO(dev)->num_pipes) {
 	if (INTEL_INFO(dev)->num_pipes) {
 		/* Must be done after probing outputs */
 		/* Must be done after probing outputs */
 		intel_opregion_init(dev);
 		intel_opregion_init(dev);
-		acpi_video_register();
+		acpi_video_register_with_quirks();
 	}
 	}
 
 
 	if (IS_GEN5(dev))
 	if (IS_GEN5(dev))
@@ -1663,7 +1664,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 out_mtrrfree:
 out_mtrrfree:
 	arch_phys_wc_del(dev_priv->gtt.mtrr);
 	arch_phys_wc_del(dev_priv->gtt.mtrr);
 	io_mapping_free(dev_priv->gtt.mappable);
 	io_mapping_free(dev_priv->gtt.mappable);
-	dev_priv->gtt.gtt_remove(dev);
+	dev_priv->gtt.base.cleanup(&dev_priv->gtt.base);
 out_rmmap:
 out_rmmap:
 	pci_iounmap(dev->pdev, dev_priv->regs);
 	pci_iounmap(dev->pdev, dev_priv->regs);
 put_bridge:
 put_bridge:
@@ -1748,7 +1749,9 @@ int i915_driver_unload(struct drm_device *dev)
 			i915_free_hws(dev);
 			i915_free_hws(dev);
 	}
 	}
 
 
-	drm_mm_takedown(&dev_priv->mm.gtt_space);
+	list_del(&dev_priv->gtt.base.global_link);
+	WARN_ON(!list_empty(&dev_priv->vm_list));
+	drm_mm_takedown(&dev_priv->gtt.base.mm);
 	if (dev_priv->regs != NULL)
 	if (dev_priv->regs != NULL)
 		pci_iounmap(dev->pdev, dev_priv->regs);
 		pci_iounmap(dev->pdev, dev_priv->regs);
 
 
@@ -1758,7 +1761,7 @@ int i915_driver_unload(struct drm_device *dev)
 	destroy_workqueue(dev_priv->wq);
 	destroy_workqueue(dev_priv->wq);
 	pm_qos_remove_request(&dev_priv->pm_qos);
 	pm_qos_remove_request(&dev_priv->pm_qos);
 
 
-	dev_priv->gtt.gtt_remove(dev);
+	dev_priv->gtt.base.cleanup(&dev_priv->gtt.base);
 
 
 	if (dev_priv->slab)
 	if (dev_priv->slab)
 		kmem_cache_destroy(dev_priv->slab);
 		kmem_cache_destroy(dev_priv->slab);

+ 11 - 266
drivers/gpu/drm/i915/i915_drv.c

@@ -118,6 +118,10 @@ module_param_named(i915_enable_ppgtt, i915_enable_ppgtt, int, 0600);
 MODULE_PARM_DESC(i915_enable_ppgtt,
 MODULE_PARM_DESC(i915_enable_ppgtt,
 		"Enable PPGTT (default: true)");
 		"Enable PPGTT (default: true)");
 
 
+int i915_enable_psr __read_mostly = 0;
+module_param_named(enable_psr, i915_enable_psr, int, 0600);
+MODULE_PARM_DESC(enable_psr, "Enable PSR (default: false)");
+
 unsigned int i915_preliminary_hw_support __read_mostly = 0;
 unsigned int i915_preliminary_hw_support __read_mostly = 0;
 module_param_named(preliminary_hw_support, i915_preliminary_hw_support, int, 0600);
 module_param_named(preliminary_hw_support, i915_preliminary_hw_support, int, 0600);
 MODULE_PARM_DESC(preliminary_hw_support,
 MODULE_PARM_DESC(preliminary_hw_support,
@@ -137,6 +141,11 @@ module_param_named(fastboot, i915_fastboot, bool, 0600);
 MODULE_PARM_DESC(fastboot, "Try to skip unnecessary mode sets at boot time "
 MODULE_PARM_DESC(fastboot, "Try to skip unnecessary mode sets at boot time "
 		 "(default: false)");
 		 "(default: false)");
 
 
+bool i915_prefault_disable __read_mostly;
+module_param_named(prefault_disable, i915_prefault_disable, bool, 0600);
+MODULE_PARM_DESC(prefault_disable,
+		"Disable page prefaulting for pread/pwrite/reloc (default:false). For developers only.");
+
 static struct drm_driver driver;
 static struct drm_driver driver;
 extern int intel_agp_enabled;
 extern int intel_agp_enabled;
 
 
@@ -714,7 +723,7 @@ static int i915_drm_thaw(struct drm_device *dev)
 {
 {
 	int error = 0;
 	int error = 0;
 
 
-	intel_gt_reset(dev);
+	intel_uncore_sanitize(dev);
 
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		mutex_lock(&dev->struct_mutex);
 		mutex_lock(&dev->struct_mutex);
@@ -740,7 +749,7 @@ int i915_resume(struct drm_device *dev)
 
 
 	pci_set_master(dev->pdev);
 	pci_set_master(dev->pdev);
 
 
-	intel_gt_reset(dev);
+	intel_uncore_sanitize(dev);
 
 
 	/*
 	/*
 	 * Platforms with opregion should have sane BIOS, older ones (gen3 and
 	 * Platforms with opregion should have sane BIOS, older ones (gen3 and
@@ -761,140 +770,6 @@ int i915_resume(struct drm_device *dev)
 	return 0;
 	return 0;
 }
 }
 
 
-static int i8xx_do_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (IS_I85X(dev))
-		return -ENODEV;
-
-	I915_WRITE(D_STATE, I915_READ(D_STATE) | DSTATE_GFX_RESET_I830);
-	POSTING_READ(D_STATE);
-
-	if (IS_I830(dev) || IS_845G(dev)) {
-		I915_WRITE(DEBUG_RESET_I830,
-			   DEBUG_RESET_DISPLAY |
-			   DEBUG_RESET_RENDER |
-			   DEBUG_RESET_FULL);
-		POSTING_READ(DEBUG_RESET_I830);
-		msleep(1);
-
-		I915_WRITE(DEBUG_RESET_I830, 0);
-		POSTING_READ(DEBUG_RESET_I830);
-	}
-
-	msleep(1);
-
-	I915_WRITE(D_STATE, I915_READ(D_STATE) & ~DSTATE_GFX_RESET_I830);
-	POSTING_READ(D_STATE);
-
-	return 0;
-}
-
-static int i965_reset_complete(struct drm_device *dev)
-{
-	u8 gdrst;
-	pci_read_config_byte(dev->pdev, I965_GDRST, &gdrst);
-	return (gdrst & GRDOM_RESET_ENABLE) == 0;
-}
-
-static int i965_do_reset(struct drm_device *dev)
-{
-	int ret;
-
-	/*
-	 * Set the domains we want to reset (GRDOM/bits 2 and 3) as
-	 * well as the reset bit (GR/bit 0).  Setting the GR bit
-	 * triggers the reset; when done, the hardware will clear it.
-	 */
-	pci_write_config_byte(dev->pdev, I965_GDRST,
-			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret =  wait_for(i965_reset_complete(dev), 500);
-	if (ret)
-		return ret;
-
-	/* We can't reset render&media without also resetting display ... */
-	pci_write_config_byte(dev->pdev, I965_GDRST,
-			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-
-	ret =  wait_for(i965_reset_complete(dev), 500);
-	if (ret)
-		return ret;
-
-	pci_write_config_byte(dev->pdev, I965_GDRST, 0);
-
-	return 0;
-}
-
-static int ironlake_do_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 gdrst;
-	int ret;
-
-	gdrst = I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR);
-	gdrst &= ~GRDOM_MASK;
-	I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR,
-		   gdrst | GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret = wait_for(I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) & 0x1, 500);
-	if (ret)
-		return ret;
-
-	/* We can't reset render&media without also resetting display ... */
-	gdrst = I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR);
-	gdrst &= ~GRDOM_MASK;
-	I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR,
-		   gdrst | GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-	return wait_for(I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) & 0x1, 500);
-}
-
-static int gen6_do_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int	ret;
-	unsigned long irqflags;
-
-	/* Hold gt_lock across reset to prevent any register access
-	 * with forcewake not set correctly
-	 */
-	spin_lock_irqsave(&dev_priv->gt_lock, irqflags);
-
-	/* Reset the chip */
-
-	/* GEN6_GDRST is not in the gt power well, no need to check
-	 * for fifo space for the write or forcewake the chip for
-	 * the read
-	 */
-	I915_WRITE_NOTRACE(GEN6_GDRST, GEN6_GRDOM_FULL);
-
-	/* Spin waiting for the device to ack the reset request */
-	ret = wait_for((I915_READ_NOTRACE(GEN6_GDRST) & GEN6_GRDOM_FULL) == 0, 500);
-
-	/* If reset with a user forcewake, try to restore, otherwise turn it off */
-	if (dev_priv->forcewake_count)
-		dev_priv->gt.force_wake_get(dev_priv);
-	else
-		dev_priv->gt.force_wake_put(dev_priv);
-
-	/* Restore fifo count */
-	dev_priv->gt_fifo_count = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
-
-	spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags);
-	return ret;
-}
-
-int intel_gpu_reset(struct drm_device *dev)
-{
-	switch (INTEL_INFO(dev)->gen) {
-	case 7:
-	case 6: return gen6_do_reset(dev);
-	case 5: return ironlake_do_reset(dev);
-	case 4: return i965_do_reset(dev);
-	case 2: return i8xx_do_reset(dev);
-	default: return -ENODEV;
-	}
-}
-
 /**
 /**
  * i915_reset - reset chip after a hang
  * i915_reset - reset chip after a hang
  * @dev: drm device to reset
  * @dev: drm device to reset
@@ -1224,133 +1099,3 @@ module_exit(i915_exit);
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
 MODULE_LICENSE("GPL and additional rights");
-
-/* We give fast paths for the really cool registers */
-#define NEEDS_FORCE_WAKE(dev_priv, reg) \
-	((HAS_FORCE_WAKE((dev_priv)->dev)) && \
-	 ((reg) < 0x40000) &&            \
-	 ((reg) != FORCEWAKE))
-static void
-ilk_dummy_write(struct drm_i915_private *dev_priv)
-{
-	/* WaIssueDummyWriteToWakeupFromRC6:ilk Issue a dummy write to wake up
-	 * the chip from rc6 before touching it for real. MI_MODE is masked,
-	 * hence harmless to write 0 into. */
-	I915_WRITE_NOTRACE(MI_MODE, 0);
-}
-
-static void
-hsw_unclaimed_reg_clear(struct drm_i915_private *dev_priv, u32 reg)
-{
-	if (HAS_FPGA_DBG_UNCLAIMED(dev_priv->dev) &&
-	    (I915_READ_NOTRACE(FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
-		DRM_ERROR("Unknown unclaimed register before writing to %x\n",
-			  reg);
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-	}
-}
-
-static void
-hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg)
-{
-	if (HAS_FPGA_DBG_UNCLAIMED(dev_priv->dev) &&
-	    (I915_READ_NOTRACE(FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
-		DRM_ERROR("Unclaimed write to %x\n", reg);
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-	}
-}
-
-#define __i915_read(x, y) \
-u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
-	u##x val = 0; \
-	if (IS_GEN5(dev_priv->dev)) \
-		ilk_dummy_write(dev_priv); \
-	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		unsigned long irqflags; \
-		spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \
-		if (dev_priv->forcewake_count == 0) \
-			dev_priv->gt.force_wake_get(dev_priv); \
-		val = read##y(dev_priv->regs + reg); \
-		if (dev_priv->forcewake_count == 0) \
-			dev_priv->gt.force_wake_put(dev_priv); \
-		spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \
-	} else { \
-		val = read##y(dev_priv->regs + reg); \
-	} \
-	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
-	return val; \
-}
-
-__i915_read(8, b)
-__i915_read(16, w)
-__i915_read(32, l)
-__i915_read(64, q)
-#undef __i915_read
-
-#define __i915_write(x, y) \
-void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
-	u32 __fifo_ret = 0; \
-	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
-	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
-	} \
-	if (IS_GEN5(dev_priv->dev)) \
-		ilk_dummy_write(dev_priv); \
-	hsw_unclaimed_reg_clear(dev_priv, reg); \
-	write##y(val, dev_priv->regs + reg); \
-	if (unlikely(__fifo_ret)) { \
-		gen6_gt_check_fifodbg(dev_priv); \
-	} \
-	hsw_unclaimed_reg_check(dev_priv, reg); \
-}
-__i915_write(8, b)
-__i915_write(16, w)
-__i915_write(32, l)
-__i915_write(64, q)
-#undef __i915_write
-
-static const struct register_whitelist {
-	uint64_t offset;
-	uint32_t size;
-	uint32_t gen_bitmask; /* support gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
-} whitelist[] = {
-	{ RING_TIMESTAMP(RENDER_RING_BASE), 8, 0xF0 },
-};
-
-int i915_reg_read_ioctl(struct drm_device *dev,
-			void *data, struct drm_file *file)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_reg_read *reg = data;
-	struct register_whitelist const *entry = whitelist;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
-		if (entry->offset == reg->offset &&
-		    (1 << INTEL_INFO(dev)->gen & entry->gen_bitmask))
-			break;
-	}
-
-	if (i == ARRAY_SIZE(whitelist))
-		return -EINVAL;
-
-	switch (entry->size) {
-	case 8:
-		reg->val = I915_READ64(reg->offset);
-		break;
-	case 4:
-		reg->val = I915_READ(reg->offset);
-		break;
-	case 2:
-		reg->val = I915_READ16(reg->offset);
-		break;
-	case 1:
-		reg->val = I915_READ8(reg->offset);
-		break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	return 0;
-}

+ 158 - 103
drivers/gpu/drm/i915/i915_drv.h

@@ -391,11 +391,20 @@ struct drm_i915_display_funcs {
 	/* pll clock increase/decrease */
 	/* pll clock increase/decrease */
 };
 };
 
 
-struct drm_i915_gt_funcs {
+struct intel_uncore_funcs {
 	void (*force_wake_get)(struct drm_i915_private *dev_priv);
 	void (*force_wake_get)(struct drm_i915_private *dev_priv);
 	void (*force_wake_put)(struct drm_i915_private *dev_priv);
 	void (*force_wake_put)(struct drm_i915_private *dev_priv);
 };
 };
 
 
+struct intel_uncore {
+	spinlock_t lock; /** lock is also taken in irq contexts. */
+
+	struct intel_uncore_funcs funcs;
+
+	unsigned fifo_count;
+	unsigned forcewake_count;
+};
+
 #define DEV_INFO_FOR_EACH_FLAG(func, sep) \
 #define DEV_INFO_FOR_EACH_FLAG(func, sep) \
 	func(is_mobile) sep \
 	func(is_mobile) sep \
 	func(is_i85x) sep \
 	func(is_i85x) sep \
@@ -446,6 +455,54 @@ enum i915_cache_level {
 
 
 typedef uint32_t gen6_gtt_pte_t;
 typedef uint32_t gen6_gtt_pte_t;
 
 
+struct i915_address_space {
+	struct drm_mm mm;
+	struct drm_device *dev;
+	struct list_head global_link;
+	unsigned long start;		/* Start offset always 0 for dri2 */
+	size_t total;		/* size addr space maps (ex. 2GB for ggtt) */
+
+	struct {
+		dma_addr_t addr;
+		struct page *page;
+	} scratch;
+
+	/**
+	 * List of objects currently involved in rendering.
+	 *
+	 * Includes buffers having the contents of their GPU caches
+	 * flushed, not necessarily primitives.  last_rendering_seqno
+	 * represents when the rendering involved will be completed.
+	 *
+	 * A reference is held on the buffer while on this list.
+	 */
+	struct list_head active_list;
+
+	/**
+	 * LRU list of objects which are not in the ringbuffer and
+	 * are ready to unbind, but are still in the GTT.
+	 *
+	 * last_rendering_seqno is 0 while an object is in this list.
+	 *
+	 * A reference is not held on the buffer while on this list,
+	 * as merely being GTT-bound shouldn't prevent its being
+	 * freed, and we'll pull it off the list in the free path.
+	 */
+	struct list_head inactive_list;
+
+	/* FIXME: Need a more generic return type */
+	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
+				     enum i915_cache_level level);
+	void (*clear_range)(struct i915_address_space *vm,
+			    unsigned int first_entry,
+			    unsigned int num_entries);
+	void (*insert_entries)(struct i915_address_space *vm,
+			       struct sg_table *st,
+			       unsigned int first_entry,
+			       enum i915_cache_level cache_level);
+	void (*cleanup)(struct i915_address_space *vm);
+};
+
 /* The Graphics Translation Table is the way in which GEN hardware translates a
 /* The Graphics Translation Table is the way in which GEN hardware translates a
  * Graphics Virtual Address into a Physical Address. In addition to the normal
  * Graphics Virtual Address into a Physical Address. In addition to the normal
  * collateral associated with any va->pa translations GEN hardware also has a
  * collateral associated with any va->pa translations GEN hardware also has a
@@ -454,8 +511,7 @@ typedef uint32_t gen6_gtt_pte_t;
  * the spec.
  * the spec.
  */
  */
 struct i915_gtt {
 struct i915_gtt {
-	unsigned long start;		/* Start offset of used GTT */
-	size_t total;			/* Total size GTT can map */
+	struct i915_address_space base;
 	size_t stolen_size;		/* Total size of stolen memory */
 	size_t stolen_size;		/* Total size of stolen memory */
 
 
 	unsigned long mappable_end;	/* End offset that we can CPU map */
 	unsigned long mappable_end;	/* End offset that we can CPU map */
@@ -466,10 +522,6 @@ struct i915_gtt {
 	void __iomem *gsm;
 	void __iomem *gsm;
 
 
 	bool do_idle_maps;
 	bool do_idle_maps;
-	struct {
-		dma_addr_t addr;
-		struct page *page;
-	} scratch;
 
 
 	int mtrr;
 	int mtrr;
 
 
@@ -477,38 +529,28 @@ struct i915_gtt {
 	int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
 	int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
 			  size_t *stolen, phys_addr_t *mappable_base,
 			  size_t *stolen, phys_addr_t *mappable_base,
 			  unsigned long *mappable_end);
 			  unsigned long *mappable_end);
-	void (*gtt_remove)(struct drm_device *dev);
-	void (*gtt_clear_range)(struct drm_device *dev,
-				unsigned int first_entry,
-				unsigned int num_entries);
-	void (*gtt_insert_entries)(struct drm_device *dev,
-				   struct sg_table *st,
-				   unsigned int pg_start,
-				   enum i915_cache_level cache_level);
-	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
-				     enum i915_cache_level level);
 };
 };
-#define gtt_total_entries(gtt) ((gtt).total >> PAGE_SHIFT)
+#define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
 
 
 struct i915_hw_ppgtt {
 struct i915_hw_ppgtt {
-	struct drm_device *dev;
+	struct i915_address_space base;
 	unsigned num_pd_entries;
 	unsigned num_pd_entries;
 	struct page **pt_pages;
 	struct page **pt_pages;
 	uint32_t pd_offset;
 	uint32_t pd_offset;
 	dma_addr_t *pt_dma_addr;
 	dma_addr_t *pt_dma_addr;
 
 
-	/* pte functions, mirroring the interface of the global gtt. */
-	void (*clear_range)(struct i915_hw_ppgtt *ppgtt,
-			    unsigned int first_entry,
-			    unsigned int num_entries);
-	void (*insert_entries)(struct i915_hw_ppgtt *ppgtt,
-			       struct sg_table *st,
-			       unsigned int pg_start,
-			       enum i915_cache_level cache_level);
-	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
-				     enum i915_cache_level level);
 	int (*enable)(struct drm_device *dev);
 	int (*enable)(struct drm_device *dev);
-	void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
+};
+
+/* To make things as simple as possible (ie. no refcounting), a VMA's lifetime
+ * will always be <= an objects lifetime. So object refcounting should cover us.
+ */
+struct i915_vma {
+	struct drm_mm_node node;
+	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm;
+
+	struct list_head vma_link; /* Link in the object's VMA list */
 };
 };
 
 
 struct i915_ctx_hang_stats {
 struct i915_ctx_hang_stats {
@@ -560,6 +602,18 @@ struct i915_fbc {
 	} no_fbc_reason;
 	} no_fbc_reason;
 };
 };
 
 
+enum no_psr_reason {
+	PSR_NO_SOURCE, /* Not supported on platform */
+	PSR_NO_SINK, /* Not supported by panel */
+	PSR_MODULE_PARAM,
+	PSR_CRTC_NOT_ACTIVE,
+	PSR_PWR_WELL_ENABLED,
+	PSR_NOT_TILED,
+	PSR_SPRITE_ENABLED,
+	PSR_S3D_ENABLED,
+	PSR_INTERLACED_ENABLED,
+	PSR_HSW_NOT_DDIA,
+};
 
 
 enum intel_pch {
 enum intel_pch {
 	PCH_NONE = 0,	/* No PCH present */
 	PCH_NONE = 0,	/* No PCH present */
@@ -577,6 +631,7 @@ enum intel_sbi_destination {
 #define QUIRK_PIPEA_FORCE (1<<0)
 #define QUIRK_PIPEA_FORCE (1<<0)
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
+#define QUIRK_NO_PCH_PWM_ENABLE (1<<3)
 
 
 struct intel_fbdev;
 struct intel_fbdev;
 struct intel_fbc_work;
 struct intel_fbc_work;
@@ -834,8 +889,6 @@ struct intel_l3_parity {
 struct i915_gem_mm {
 struct i915_gem_mm {
 	/** Memory allocator for GTT stolen memory */
 	/** Memory allocator for GTT stolen memory */
 	struct drm_mm stolen;
 	struct drm_mm stolen;
-	/** Memory allocator for GTT */
-	struct drm_mm gtt_space;
 	/** List of all objects in gtt_space. Used to restore gtt
 	/** List of all objects in gtt_space. Used to restore gtt
 	 * mappings on resume */
 	 * mappings on resume */
 	struct list_head bound_list;
 	struct list_head bound_list;
@@ -855,29 +908,6 @@ struct i915_gem_mm {
 	struct shrinker inactive_shrinker;
 	struct shrinker inactive_shrinker;
 	bool shrinker_no_lock_stealing;
 	bool shrinker_no_lock_stealing;
 
 
-	/**
-	 * List of objects currently involved in rendering.
-	 *
-	 * Includes buffers having the contents of their GPU caches
-	 * flushed, not necessarily primitives.  last_rendering_seqno
-	 * represents when the rendering involved will be completed.
-	 *
-	 * A reference is held on the buffer while on this list.
-	 */
-	struct list_head active_list;
-
-	/**
-	 * LRU list of objects which are not in the ringbuffer and
-	 * are ready to unbind, but are still in the GTT.
-	 *
-	 * last_rendering_seqno is 0 while an object is in this list.
-	 *
-	 * A reference is not held on the buffer while on this list,
-	 * as merely being GTT-bound shouldn't prevent its being
-	 * freed, and we'll pull it off the list in the free path.
-	 */
-	struct list_head inactive_list;
-
 	/** LRU list of objects with fence regs on them. */
 	/** LRU list of objects with fence regs on them. */
 	struct list_head fence_list;
 	struct list_head fence_list;
 
 
@@ -905,6 +935,7 @@ struct i915_gem_mm {
 	struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT];
 	struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT];
 
 
 	/* accounting, useful for userland debugging */
 	/* accounting, useful for userland debugging */
+	spinlock_t object_stat_lock;
 	size_t object_memory;
 	size_t object_memory;
 	u32 object_count;
 	u32 object_count;
 };
 };
@@ -1024,14 +1055,7 @@ typedef struct drm_i915_private {
 
 
 	void __iomem *regs;
 	void __iomem *regs;
 
 
-	struct drm_i915_gt_funcs gt;
-	/** gt_fifo_count and the subsequent register write are synchronized
-	 * with dev->struct_mutex. */
-	unsigned gt_fifo_count;
-	/** forcewake_count is protected by gt_lock */
-	unsigned forcewake_count;
-	/** gt_lock is also taken in irq contexts. */
-	spinlock_t gt_lock;
+	struct intel_uncore uncore;
 
 
 	struct intel_gmbus gmbus[GMBUS_NUM_PORTS];
 	struct intel_gmbus gmbus[GMBUS_NUM_PORTS];
 
 
@@ -1124,7 +1148,8 @@ typedef struct drm_i915_private {
 	enum modeset_restore modeset_restore;
 	enum modeset_restore modeset_restore;
 	struct mutex modeset_restore_lock;
 	struct mutex modeset_restore_lock;
 
 
-	struct i915_gtt gtt;
+	struct list_head vm_list; /* Global list of all address spaces */
+	struct i915_gtt gtt; /* VMA representing the global address space */
 
 
 	struct i915_gem_mm mm;
 	struct i915_gem_mm mm;
 
 
@@ -1151,6 +1176,9 @@ typedef struct drm_i915_private {
 
 
 	struct intel_l3_parity l3_parity;
 	struct intel_l3_parity l3_parity;
 
 
+	/* Cannot be determined by PCIID. You must always read a register. */
+	size_t ellc_size;
+
 	/* gen6+ rps state */
 	/* gen6+ rps state */
 	struct intel_gen6_power_mgmt rps;
 	struct intel_gen6_power_mgmt rps;
 
 
@@ -1161,6 +1189,8 @@ typedef struct drm_i915_private {
 	/* Haswell power well */
 	/* Haswell power well */
 	struct i915_power_well power_well;
 	struct i915_power_well power_well;
 
 
+	enum no_psr_reason no_psr_reason;
+
 	struct i915_gpu_error gpu_error;
 	struct i915_gpu_error gpu_error;
 
 
 	struct drm_i915_gem_object *vlv_pctx;
 	struct drm_i915_gem_object *vlv_pctx;
@@ -1228,8 +1258,9 @@ struct drm_i915_gem_object {
 
 
 	const struct drm_i915_gem_object_ops *ops;
 	const struct drm_i915_gem_object_ops *ops;
 
 
-	/** Current space allocated to this object in the GTT, if any. */
-	struct drm_mm_node gtt_space;
+	/** List of VMAs backed by this object */
+	struct list_head vma_list;
+
 	/** Stolen memory for this object, instead of being backed by shmem. */
 	/** Stolen memory for this object, instead of being backed by shmem. */
 	struct drm_mm_node *stolen;
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 	struct list_head global_list;
@@ -1355,18 +1386,32 @@ struct drm_i915_gem_object {
 
 
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 
-/* Offset of the first PTE pointing to this object */
-static inline unsigned long
-i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o)
+/* This is a temporary define to help transition us to real VMAs. If you see
+ * this, you're either reviewing code, or bisecting it. */
+static inline struct i915_vma *
+__i915_gem_obj_to_vma(struct drm_i915_gem_object *obj)
 {
 {
-	return o->gtt_space.start;
+	if (list_empty(&obj->vma_list))
+		return NULL;
+	return list_first_entry(&obj->vma_list, struct i915_vma, vma_link);
 }
 }
 
 
 /* Whether or not this object is currently mapped by the translation tables */
 /* Whether or not this object is currently mapped by the translation tables */
 static inline bool
 static inline bool
 i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *o)
 i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *o)
 {
 {
-	return drm_mm_node_allocated(&o->gtt_space);
+	struct i915_vma *vma = __i915_gem_obj_to_vma(o);
+	if (vma == NULL)
+		return false;
+	return drm_mm_node_allocated(&vma->node);
+}
+
+/* Offset of the first PTE pointing to this object */
+static inline unsigned long
+i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o)
+{
+	BUG_ON(list_empty(&o->vma_list));
+	return __i915_gem_obj_to_vma(o)->node.start;
 }
 }
 
 
 /* The size used in the translation tables may be larger than the actual size of
 /* The size used in the translation tables may be larger than the actual size of
@@ -1376,14 +1421,15 @@ i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *o)
 static inline unsigned long
 static inline unsigned long
 i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
 i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
 {
 {
-	return o->gtt_space.size;
+	BUG_ON(list_empty(&o->vma_list));
+	return __i915_gem_obj_to_vma(o)->node.size;
 }
 }
 
 
 static inline void
 static inline void
 i915_gem_obj_ggtt_set_color(struct drm_i915_gem_object *o,
 i915_gem_obj_ggtt_set_color(struct drm_i915_gem_object *o,
 			    enum i915_cache_level color)
 			    enum i915_cache_level color)
 {
 {
-	o->gtt_space.color = color;
+	__i915_gem_obj_to_vma(o)->node.color = color;
 }
 }
 
 
 /**
 /**
@@ -1580,10 +1626,12 @@ extern int i915_enable_rc6 __read_mostly;
 extern int i915_enable_fbc __read_mostly;
 extern int i915_enable_fbc __read_mostly;
 extern bool i915_enable_hangcheck __read_mostly;
 extern bool i915_enable_hangcheck __read_mostly;
 extern int i915_enable_ppgtt __read_mostly;
 extern int i915_enable_ppgtt __read_mostly;
+extern int i915_enable_psr __read_mostly;
 extern unsigned int i915_preliminary_hw_support __read_mostly;
 extern unsigned int i915_preliminary_hw_support __read_mostly;
 extern int i915_disable_power_well __read_mostly;
 extern int i915_disable_power_well __read_mostly;
 extern int i915_enable_ips __read_mostly;
 extern int i915_enable_ips __read_mostly;
 extern bool i915_fastboot __read_mostly;
 extern bool i915_fastboot __read_mostly;
+extern bool i915_prefault_disable __read_mostly;
 
 
 extern int i915_suspend(struct drm_device *dev, pm_message_t state);
 extern int i915_suspend(struct drm_device *dev, pm_message_t state);
 extern int i915_resume(struct drm_device *dev);
 extern int i915_resume(struct drm_device *dev);
@@ -1619,13 +1667,20 @@ extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 extern void intel_console_resume(struct work_struct *work);
 extern void intel_console_resume(struct work_struct *work);
 
 
 /* i915_irq.c */
 /* i915_irq.c */
+void i915_queue_hangcheck(struct drm_device *dev);
 void i915_hangcheck_elapsed(unsigned long data);
 void i915_hangcheck_elapsed(unsigned long data);
 void i915_handle_error(struct drm_device *dev, bool wedged);
 void i915_handle_error(struct drm_device *dev, bool wedged);
 
 
 extern void intel_irq_init(struct drm_device *dev);
 extern void intel_irq_init(struct drm_device *dev);
 extern void intel_hpd_init(struct drm_device *dev);
 extern void intel_hpd_init(struct drm_device *dev);
-extern void intel_gt_init(struct drm_device *dev);
-extern void intel_gt_reset(struct drm_device *dev);
+extern void intel_pm_init(struct drm_device *dev);
+
+extern void intel_uncore_sanitize(struct drm_device *dev);
+extern void intel_uncore_early_sanitize(struct drm_device *dev);
+extern void intel_uncore_init(struct drm_device *dev);
+extern void intel_uncore_reset(struct drm_device *dev);
+extern void intel_uncore_clear_errors(struct drm_device *dev);
+extern void intel_uncore_check_errors(struct drm_device *dev);
 
 
 void
 void
 i915_enable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
 i915_enable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
@@ -1689,6 +1744,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
 						  size_t size);
 void i915_gem_free_object(struct drm_gem_object *obj);
 void i915_gem_free_object(struct drm_gem_object *obj);
+struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj,
+				     struct i915_address_space *vm);
+void i915_gem_vma_destroy(struct i915_vma *vma);
 
 
 int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
 int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
 				     uint32_t alignment,
 				     uint32_t alignment,
@@ -1857,7 +1915,7 @@ static inline void i915_gem_context_unreference(struct i915_hw_context *ctx)
 }
 }
 
 
 struct i915_ctx_hang_stats * __must_check
 struct i915_ctx_hang_stats * __must_check
-i915_gem_context_get_hang_stats(struct intel_ring_buffer *ring,
+i915_gem_context_get_hang_stats(struct drm_device *dev,
 				struct drm_file *file,
 				struct drm_file *file,
 				u32 id);
 				u32 id);
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
@@ -2056,7 +2114,6 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
  */
  */
 void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
 void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
 void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
 void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
-int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 
 
 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val);
 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val);
 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val);
 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val);
@@ -2075,39 +2132,37 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
 int vlv_gpu_freq(int ddr_freq, int val);
 int vlv_gpu_freq(int ddr_freq, int val);
 int vlv_freq_opcode(int ddr_freq, int val);
 int vlv_freq_opcode(int ddr_freq, int val);
 
 
-#define __i915_read(x, y) \
-	u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
-
-__i915_read(8, b)
-__i915_read(16, w)
-__i915_read(32, l)
-__i915_read(64, q)
+#define __i915_read(x) \
+	u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg, bool trace);
+__i915_read(8)
+__i915_read(16)
+__i915_read(32)
+__i915_read(64)
 #undef __i915_read
 #undef __i915_read
 
 
-#define __i915_write(x, y) \
-	void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val);
-
-__i915_write(8, b)
-__i915_write(16, w)
-__i915_write(32, l)
-__i915_write(64, q)
+#define __i915_write(x) \
+	void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val, bool trace);
+__i915_write(8)
+__i915_write(16)
+__i915_write(32)
+__i915_write(64)
 #undef __i915_write
 #undef __i915_write
 
 
-#define I915_READ8(reg)		i915_read8(dev_priv, (reg))
-#define I915_WRITE8(reg, val)	i915_write8(dev_priv, (reg), (val))
+#define I915_READ8(reg)		i915_read8(dev_priv, (reg), true)
+#define I915_WRITE8(reg, val)	i915_write8(dev_priv, (reg), (val), true)
 
 
-#define I915_READ16(reg)	i915_read16(dev_priv, (reg))
-#define I915_WRITE16(reg, val)	i915_write16(dev_priv, (reg), (val))
-#define I915_READ16_NOTRACE(reg)	readw(dev_priv->regs + (reg))
-#define I915_WRITE16_NOTRACE(reg, val)	writew(val, dev_priv->regs + (reg))
+#define I915_READ16(reg)	i915_read16(dev_priv, (reg), true)
+#define I915_WRITE16(reg, val)	i915_write16(dev_priv, (reg), (val), true)
+#define I915_READ16_NOTRACE(reg)	i915_read16(dev_priv, (reg), false)
+#define I915_WRITE16_NOTRACE(reg, val)	i915_write16(dev_priv, (reg), (val), false)
 
 
-#define I915_READ(reg)		i915_read32(dev_priv, (reg))
-#define I915_WRITE(reg, val)	i915_write32(dev_priv, (reg), (val))
-#define I915_READ_NOTRACE(reg)		readl(dev_priv->regs + (reg))
-#define I915_WRITE_NOTRACE(reg, val)	writel(val, dev_priv->regs + (reg))
+#define I915_READ(reg)		i915_read32(dev_priv, (reg), true)
+#define I915_WRITE(reg, val)	i915_write32(dev_priv, (reg), (val), true)
+#define I915_READ_NOTRACE(reg)		i915_read32(dev_priv, (reg), false)
+#define I915_WRITE_NOTRACE(reg, val)	i915_write32(dev_priv, (reg), (val), false)
 
 
-#define I915_WRITE64(reg, val)	i915_write64(dev_priv, (reg), (val))
-#define I915_READ64(reg)	i915_read64(dev_priv, (reg))
+#define I915_WRITE64(reg, val)	i915_write64(dev_priv, (reg), (val), true)
+#define I915_READ64(reg)	i915_read64(dev_priv, (reg), true)
 
 
 #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
 #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)

+ 117 - 56
drivers/gpu/drm/i915/i915_gem.c

@@ -76,15 +76,19 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
 				  size_t size)
 				  size_t size)
 {
 {
+	spin_lock(&dev_priv->mm.object_stat_lock);
 	dev_priv->mm.object_count++;
 	dev_priv->mm.object_count++;
 	dev_priv->mm.object_memory += size;
 	dev_priv->mm.object_memory += size;
+	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 }
 
 
 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 				     size_t size)
 				     size_t size)
 {
 {
+	spin_lock(&dev_priv->mm.object_stat_lock);
 	dev_priv->mm.object_count--;
 	dev_priv->mm.object_count--;
 	dev_priv->mm.object_memory -= size;
 	dev_priv->mm.object_memory -= size;
+	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 }
 
 
 static int
 static int
@@ -182,7 +186,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			pinned += i915_gem_obj_ggtt_size(obj);
 			pinned += i915_gem_obj_ggtt_size(obj);
 	mutex_unlock(&dev->struct_mutex);
 	mutex_unlock(&dev->struct_mutex);
 
 
-	args->aper_size = dev_priv->gtt.total;
+	args->aper_size = dev_priv->gtt.base.total;
 	args->aper_available_size = args->aper_size - pinned;
 	args->aper_available_size = args->aper_size - pinned;
 
 
 	return 0;
 	return 0;
@@ -220,16 +224,10 @@ i915_gem_create(struct drm_file *file,
 		return -ENOMEM;
 		return -ENOMEM;
 
 
 	ret = drm_gem_handle_create(file, &obj->base, &handle);
 	ret = drm_gem_handle_create(file, &obj->base, &handle);
-	if (ret) {
-		drm_gem_object_release(&obj->base);
-		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
-		i915_gem_object_free(obj);
-		return ret;
-	}
-
 	/* drop reference from allocate - handle holds it now */
 	/* drop reference from allocate - handle holds it now */
-	drm_gem_object_unreference(&obj->base);
-	trace_i915_gem_object_create(obj);
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		return ret;
 
 
 	*handle_p = handle;
 	*handle_p = handle;
 	return 0;
 	return 0;
@@ -459,7 +457,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
 
 
 		mutex_unlock(&dev->struct_mutex);
 		mutex_unlock(&dev->struct_mutex);
 
 
-		if (!prefaulted) {
+		if (likely(!i915_prefault_disable) && !prefaulted) {
 			ret = fault_in_multipages_writeable(user_data, remain);
 			ret = fault_in_multipages_writeable(user_data, remain);
 			/* Userspace is tricking us, but we've already clobbered
 			/* Userspace is tricking us, but we've already clobbered
 			 * its pages with the prefault and promised to write the
 			 * its pages with the prefault and promised to write the
@@ -854,10 +852,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		       args->size))
 		       args->size))
 		return -EFAULT;
 		return -EFAULT;
 
 
-	ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
-					   args->size);
-	if (ret)
-		return -EFAULT;
+	if (likely(!i915_prefault_disable)) {
+		ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
+						   args->size);
+		if (ret)
+			return -EFAULT;
+	}
 
 
 	ret = i915_mutex_lock_interruptible(dev);
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 	if (ret)
@@ -1679,6 +1679,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 		  bool purgeable_only)
 		  bool purgeable_only)
 {
 {
 	struct drm_i915_gem_object *obj, *next;
 	struct drm_i915_gem_object *obj, *next;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	long count = 0;
 	long count = 0;
 
 
 	list_for_each_entry_safe(obj, next,
 	list_for_each_entry_safe(obj, next,
@@ -1692,9 +1693,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 		}
 		}
 	}
 	}
 
 
-	list_for_each_entry_safe(obj, next,
-				 &dev_priv->mm.inactive_list,
-				 mm_list) {
+	list_for_each_entry_safe(obj, next, &vm->inactive_list, mm_list) {
 		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
 		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
 		    i915_gem_object_unbind(obj) == 0 &&
 		    i915_gem_object_unbind(obj) == 0 &&
 		    i915_gem_object_put_pages(obj) == 0) {
 		    i915_gem_object_put_pages(obj) == 0) {
@@ -1865,6 +1864,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 {
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	u32 seqno = intel_ring_get_seqno(ring);
 	u32 seqno = intel_ring_get_seqno(ring);
 
 
 	BUG_ON(ring == NULL);
 	BUG_ON(ring == NULL);
@@ -1881,7 +1881,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 	}
 	}
 
 
 	/* Move from whatever list we were on to the tail of execution. */
 	/* Move from whatever list we were on to the tail of execution. */
-	list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
+	list_move_tail(&obj->mm_list, &vm->active_list);
 	list_move_tail(&obj->ring_list, &ring->active_list);
 	list_move_tail(&obj->ring_list, &ring->active_list);
 
 
 	obj->last_read_seqno = seqno;
 	obj->last_read_seqno = seqno;
@@ -1905,11 +1905,12 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 {
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 
 
 	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
 	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
 	BUG_ON(!obj->active);
 	BUG_ON(!obj->active);
 
 
-	list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+	list_move_tail(&obj->mm_list, &vm->inactive_list);
 
 
 	list_del_init(&obj->ring_list);
 	list_del_init(&obj->ring_list);
 	obj->ring = NULL;
 	obj->ring = NULL;
@@ -2074,10 +2075,8 @@ int __i915_add_request(struct intel_ring_buffer *ring,
 	ring->outstanding_lazy_request = 0;
 	ring->outstanding_lazy_request = 0;
 
 
 	if (!dev_priv->ums.mm_suspended) {
 	if (!dev_priv->ums.mm_suspended) {
-		if (i915_enable_hangcheck) {
-			mod_timer(&dev_priv->gpu_error.hangcheck_timer,
-				  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
-		}
+		i915_queue_hangcheck(ring->dev);
+
 		if (was_empty) {
 		if (was_empty) {
 			queue_delayed_work(dev_priv->wq,
 			queue_delayed_work(dev_priv->wq,
 					   &dev_priv->mm.retire_work,
 					   &dev_priv->mm.retire_work,
@@ -2246,13 +2245,24 @@ void i915_gem_restore_fences(struct drm_device *dev)
 
 
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
-		i915_gem_write_fence(dev, i, reg->obj);
+
+		/*
+		 * Commit delayed tiling changes if we have an object still
+		 * attached to the fence, otherwise just clear the fence.
+		 */
+		if (reg->obj) {
+			i915_gem_object_update_fence(reg->obj, reg,
+						     reg->obj->tiling_mode);
+		} else {
+			i915_gem_write_fence(dev, i, NULL);
+		}
 	}
 	}
 }
 }
 
 
 void i915_gem_reset(struct drm_device *dev)
 void i915_gem_reset(struct drm_device *dev)
 {
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	struct drm_i915_gem_object *obj;
 	struct intel_ring_buffer *ring;
 	struct intel_ring_buffer *ring;
 	int i;
 	int i;
@@ -2263,12 +2273,8 @@ void i915_gem_reset(struct drm_device *dev)
 	/* Move everything out of the GPU domains to ensure we do any
 	/* Move everything out of the GPU domains to ensure we do any
 	 * necessary invalidation upon reuse.
 	 * necessary invalidation upon reuse.
 	 */
 	 */
-	list_for_each_entry(obj,
-			    &dev_priv->mm.inactive_list,
-			    mm_list)
-	{
+	list_for_each_entry(obj, &vm->inactive_list, mm_list)
 		obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
 		obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
-	}
 
 
 	i915_gem_restore_fences(dev);
 	i915_gem_restore_fences(dev);
 }
 }
@@ -2571,6 +2577,7 @@ int
 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
 {
 	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
 	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
+	struct i915_vma *vma;
 	int ret;
 	int ret;
 
 
 	if (!i915_gem_obj_ggtt_bound(obj))
 	if (!i915_gem_obj_ggtt_bound(obj))
@@ -2608,11 +2615,20 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	i915_gem_object_unpin_pages(obj);
 	i915_gem_object_unpin_pages(obj);
 
 
 	list_del(&obj->mm_list);
 	list_del(&obj->mm_list);
-	list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
 	/* Avoid an unnecessary call to unbind on rebind. */
 	/* Avoid an unnecessary call to unbind on rebind. */
 	obj->map_and_fenceable = true;
 	obj->map_and_fenceable = true;
 
 
-	drm_mm_remove_node(&obj->gtt_space);
+	vma = __i915_gem_obj_to_vma(obj);
+	list_del(&vma->vma_link);
+	drm_mm_remove_node(&vma->node);
+	i915_gem_vma_destroy(vma);
+
+	/* Since the unbound list is global, only move to that list if
+	 * no more VMAs exist.
+	 * NB: Until we have real VMAs there will only ever be one */
+	WARN_ON(!list_empty(&obj->vma_list));
+	if (list_empty(&obj->vma_list))
+		list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -2781,6 +2797,10 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
 	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
 	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
 		mb();
 		mb();
 
 
+	WARN(obj && (!obj->stride || !obj->tiling_mode),
+	     "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
+	     obj->stride, obj->tiling_mode);
+
 	switch (INTEL_INFO(dev)->gen) {
 	switch (INTEL_INFO(dev)->gen) {
 	case 7:
 	case 7:
 	case 6:
 	case 6:
@@ -2822,6 +2842,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 		fence->obj = NULL;
 		fence->obj = NULL;
 		list_del_init(&fence->lru_list);
 		list_del_init(&fence->lru_list);
 	}
 	}
+	obj->fence_dirty = false;
 }
 }
 
 
 static int
 static int
@@ -2951,7 +2972,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 		return 0;
 		return 0;
 
 
 	i915_gem_object_update_fence(obj, reg, enable);
 	i915_gem_object_update_fence(obj, reg, enable);
-	obj->fence_dirty = false;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -3037,12 +3057,17 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 {
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	u32 size, fence_size, fence_alignment, unfenced_alignment;
 	u32 size, fence_size, fence_alignment, unfenced_alignment;
 	bool mappable, fenceable;
 	bool mappable, fenceable;
 	size_t gtt_max = map_and_fenceable ?
 	size_t gtt_max = map_and_fenceable ?
-		dev_priv->gtt.mappable_end : dev_priv->gtt.total;
+		dev_priv->gtt.mappable_end : dev_priv->gtt.base.total;
+	struct i915_vma *vma;
 	int ret;
 	int ret;
 
 
+	if (WARN_ON(!list_empty(&obj->vma_list)))
+		return -EBUSY;
+
 	fence_size = i915_gem_get_gtt_size(dev,
 	fence_size = i915_gem_get_gtt_size(dev,
 					   obj->base.size,
 					   obj->base.size,
 					   obj->tiling_mode);
 					   obj->tiling_mode);
@@ -3081,9 +3106,15 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 
 
 	i915_gem_object_pin_pages(obj);
 	i915_gem_object_pin_pages(obj);
 
 
+	vma = i915_gem_vma_create(obj, &dev_priv->gtt.base);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unpin;
+	}
+
 search_free:
 search_free:
-	ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space,
-						  &obj->gtt_space,
+	ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
+						  &vma->node,
 						  size, alignment,
 						  size, alignment,
 						  obj->cache_level, 0, gtt_max,
 						  obj->cache_level, 0, gtt_max,
 						  DRM_MM_SEARCH_DEFAULT);
 						  DRM_MM_SEARCH_DEFAULT);
@@ -3095,25 +3126,21 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 		if (ret == 0)
 		if (ret == 0)
 			goto search_free;
 			goto search_free;
 
 
-		i915_gem_object_unpin_pages(obj);
-		return ret;
+		goto err_free_vma;
 	}
 	}
-	if (WARN_ON(!i915_gem_valid_gtt_space(dev, &obj->gtt_space,
+	if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node,
 					      obj->cache_level))) {
 					      obj->cache_level))) {
-		i915_gem_object_unpin_pages(obj);
-		drm_mm_remove_node(&obj->gtt_space);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_remove_node;
 	}
 	}
 
 
 	ret = i915_gem_gtt_prepare_object(obj);
 	ret = i915_gem_gtt_prepare_object(obj);
-	if (ret) {
-		i915_gem_object_unpin_pages(obj);
-		drm_mm_remove_node(&obj->gtt_space);
-		return ret;
-	}
+	if (ret)
+		goto err_remove_node;
 
 
 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
-	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+	list_add_tail(&obj->mm_list, &vm->inactive_list);
+	list_add(&vma->vma_link, &obj->vma_list);
 
 
 	fenceable =
 	fenceable =
 		i915_gem_obj_ggtt_size(obj) == fence_size &&
 		i915_gem_obj_ggtt_size(obj) == fence_size &&
@@ -3127,6 +3154,14 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 	trace_i915_gem_object_bind(obj, map_and_fenceable);
 	trace_i915_gem_object_bind(obj, map_and_fenceable);
 	i915_gem_verify_gtt(dev);
 	i915_gem_verify_gtt(dev);
 	return 0;
 	return 0;
+
+err_remove_node:
+	drm_mm_remove_node(&vma->node);
+err_free_vma:
+	i915_gem_vma_destroy(vma);
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
 }
 }
 
 
 void
 void
@@ -3261,7 +3296,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 
 
 	/* And bump the LRU for this access */
 	/* And bump the LRU for this access */
 	if (i915_gem_object_is_inactive(obj))
 	if (i915_gem_object_is_inactive(obj))
-		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+		list_move_tail(&obj->mm_list,
+			       &dev_priv->gtt.base.inactive_list);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -3271,6 +3307,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 {
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_vma *vma = __i915_gem_obj_to_vma(obj);
 	int ret;
 	int ret;
 
 
 	if (obj->cache_level == cache_level)
 	if (obj->cache_level == cache_level)
@@ -3281,7 +3318,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		return -EBUSY;
 		return -EBUSY;
 	}
 	}
 
 
-	if (!i915_gem_valid_gtt_space(dev, &obj->gtt_space, cache_level)) {
+	if (vma && !i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) {
 		ret = i915_gem_object_unbind(obj);
 		ret = i915_gem_object_unbind(obj);
 		if (ret)
 		if (ret)
 			return ret;
 			return ret;
@@ -3826,6 +3863,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&obj->global_list);
 	INIT_LIST_HEAD(&obj->global_list);
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->exec_list);
 	INIT_LIST_HEAD(&obj->exec_list);
+	INIT_LIST_HEAD(&obj->vma_list);
 
 
 	obj->ops = ops;
 	obj->ops = ops;
 
 
@@ -3890,6 +3928,8 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	} else
 	} else
 		obj->cache_level = I915_CACHE_NONE;
 		obj->cache_level = I915_CACHE_NONE;
 
 
+	trace_i915_gem_object_create(obj);
+
 	return obj;
 	return obj;
 }
 }
 
 
@@ -3946,6 +3986,26 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	i915_gem_object_free(obj);
 	i915_gem_object_free(obj);
 }
 }
 
 
+struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj,
+				     struct i915_address_space *vm)
+{
+	struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	if (vma == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&vma->vma_link);
+	vma->vm = vm;
+	vma->obj = obj;
+
+	return vma;
+}
+
+void i915_gem_vma_destroy(struct i915_vma *vma)
+{
+	WARN_ON(vma->node.allocated);
+	kfree(vma);
+}
+
 int
 int
 i915_gem_idle(struct drm_device *dev)
 i915_gem_idle(struct drm_device *dev)
 {
 {
@@ -4105,8 +4165,8 @@ i915_gem_init_hw(struct drm_device *dev)
 	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
 	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
 		return -EIO;
 		return -EIO;
 
 
-	if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
-		I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
+	if (dev_priv->ellc_size)
+		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
 
 
 	if (HAS_PCH_NOP(dev)) {
 	if (HAS_PCH_NOP(dev)) {
 		u32 temp = I915_READ(GEN7_MSG_CTL);
 		u32 temp = I915_READ(GEN7_MSG_CTL);
@@ -4202,7 +4262,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 		return ret;
 		return ret;
 	}
 	}
 
 
-	BUG_ON(!list_empty(&dev_priv->mm.active_list));
+	BUG_ON(!list_empty(&dev_priv->gtt.base.active_list));
 	mutex_unlock(&dev->struct_mutex);
 	mutex_unlock(&dev->struct_mutex);
 
 
 	ret = drm_irq_install(dev);
 	ret = drm_irq_install(dev);
@@ -4280,8 +4340,8 @@ i915_gem_load(struct drm_device *dev)
 				  SLAB_HWCACHE_ALIGN,
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
 				  NULL);
 
 
-	INIT_LIST_HEAD(&dev_priv->mm.active_list);
-	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
+	INIT_LIST_HEAD(&dev_priv->gtt.base.active_list);
+	INIT_LIST_HEAD(&dev_priv->gtt.base.inactive_list);
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
@@ -4552,6 +4612,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 			     struct drm_i915_private,
 			     struct drm_i915_private,
 			     mm.inactive_shrinker);
 			     mm.inactive_shrinker);
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_device *dev = dev_priv->dev;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	struct drm_i915_gem_object *obj;
 	int nr_to_scan = sc->nr_to_scan;
 	int nr_to_scan = sc->nr_to_scan;
 	bool unlock = true;
 	bool unlock = true;
@@ -4580,7 +4641,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
 		if (obj->pages_pin_count == 0)
 		if (obj->pages_pin_count == 0)
 			cnt += obj->base.size >> PAGE_SHIFT;
 			cnt += obj->base.size >> PAGE_SHIFT;
-	list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list)
+	list_for_each_entry(obj, &vm->inactive_list, mm_list)
 		if (obj->pin_count == 0 && obj->pages_pin_count == 0)
 		if (obj->pin_count == 0 && obj->pages_pin_count == 0)
 			cnt += obj->base.size >> PAGE_SHIFT;
 			cnt += obj->base.size >> PAGE_SHIFT;
 
 

+ 8 - 15
drivers/gpu/drm/i915/i915_gem_context.c

@@ -304,31 +304,24 @@ static int context_idr_cleanup(int id, void *p, void *data)
 }
 }
 
 
 struct i915_ctx_hang_stats *
 struct i915_ctx_hang_stats *
-i915_gem_context_get_hang_stats(struct intel_ring_buffer *ring,
+i915_gem_context_get_hang_stats(struct drm_device *dev,
 				struct drm_file *file,
 				struct drm_file *file,
 				u32 id)
 				u32 id)
 {
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_hw_context *to;
-
-	if (dev_priv->hw_contexts_disabled)
-		return ERR_PTR(-ENOENT);
-
-	if (ring->id != RCS)
-		return ERR_PTR(-EINVAL);
-
-	if (file == NULL)
-		return ERR_PTR(-EINVAL);
+	struct i915_hw_context *ctx;
 
 
 	if (id == DEFAULT_CONTEXT_ID)
 	if (id == DEFAULT_CONTEXT_ID)
 		return &file_priv->hang_stats;
 		return &file_priv->hang_stats;
 
 
-	to = i915_gem_context_get(file->driver_priv, id);
-	if (to == NULL)
+	ctx = NULL;
+	if (!dev_priv->hw_contexts_disabled)
+		ctx = i915_gem_context_get(file->driver_priv, id);
+	if (ctx == NULL)
 		return ERR_PTR(-ENOENT);
 		return ERR_PTR(-ENOENT);
 
 
-	return &to->hang_stats;
+	return &ctx->hang_stats;
 }
 }
 
 
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)

+ 1 - 1
drivers/gpu/drm/i915/i915_gem_debug.c

@@ -97,7 +97,7 @@ i915_verify_lists(struct drm_device *dev)
 		}
 		}
 	}
 	}
 
 
-	list_for_each_entry(obj, &dev_priv->mm.inactive_list, list) {
+	list_for_each_entry(obj, &i915_gtt_vm->inactive_list, list) {
 		if (obj->base.dev != dev ||
 		if (obj->base.dev != dev ||
 		    !atomic_read(&obj->base.refcount.refcount)) {
 		    !atomic_read(&obj->base.refcount.refcount)) {
 			DRM_ERROR("freed inactive %p\n", obj);
 			DRM_ERROR("freed inactive %p\n", obj);

+ 19 - 15
drivers/gpu/drm/i915/i915_gem_evict.c

@@ -34,11 +34,13 @@
 static bool
 static bool
 mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind)
 mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind)
 {
 {
+	struct i915_vma *vma = __i915_gem_obj_to_vma(obj);
+
 	if (obj->pin_count)
 	if (obj->pin_count)
 		return false;
 		return false;
 
 
 	list_add(&obj->exec_list, unwind);
 	list_add(&obj->exec_list, unwind);
-	return drm_mm_scan_add_block(&obj->gtt_space);
+	return drm_mm_scan_add_block(&vma->node);
 }
 }
 
 
 int
 int
@@ -47,7 +49,9 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
 			 bool mappable, bool nonblocking)
 			 bool mappable, bool nonblocking)
 {
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct list_head eviction_list, unwind_list;
 	struct list_head eviction_list, unwind_list;
+	struct i915_vma *vma;
 	struct drm_i915_gem_object *obj;
 	struct drm_i915_gem_object *obj;
 	int ret = 0;
 	int ret = 0;
 
 
@@ -78,15 +82,14 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
 
 
 	INIT_LIST_HEAD(&unwind_list);
 	INIT_LIST_HEAD(&unwind_list);
 	if (mappable)
 	if (mappable)
-		drm_mm_init_scan_with_range(&dev_priv->mm.gtt_space,
-					    min_size, alignment, cache_level,
-					    0, dev_priv->gtt.mappable_end);
+		drm_mm_init_scan_with_range(&vm->mm, min_size,
+					    alignment, cache_level, 0,
+					    dev_priv->gtt.mappable_end);
 	else
 	else
-		drm_mm_init_scan(&dev_priv->mm.gtt_space,
-				 min_size, alignment, cache_level);
+		drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level);
 
 
 	/* First see if there is a large enough contiguous idle region... */
 	/* First see if there is a large enough contiguous idle region... */
-	list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) {
+	list_for_each_entry(obj, &vm->inactive_list, mm_list) {
 		if (mark_free(obj, &unwind_list))
 		if (mark_free(obj, &unwind_list))
 			goto found;
 			goto found;
 	}
 	}
@@ -95,7 +98,7 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
 		goto none;
 		goto none;
 
 
 	/* Now merge in the soon-to-be-expired objects... */
 	/* Now merge in the soon-to-be-expired objects... */
-	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
+	list_for_each_entry(obj, &vm->active_list, mm_list) {
 		if (mark_free(obj, &unwind_list))
 		if (mark_free(obj, &unwind_list))
 			goto found;
 			goto found;
 	}
 	}
@@ -106,8 +109,8 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
 		obj = list_first_entry(&unwind_list,
 		obj = list_first_entry(&unwind_list,
 				       struct drm_i915_gem_object,
 				       struct drm_i915_gem_object,
 				       exec_list);
 				       exec_list);
-
-		ret = drm_mm_scan_remove_block(&obj->gtt_space);
+		vma = __i915_gem_obj_to_vma(obj);
+		ret = drm_mm_scan_remove_block(&vma->node);
 		BUG_ON(ret);
 		BUG_ON(ret);
 
 
 		list_del_init(&obj->exec_list);
 		list_del_init(&obj->exec_list);
@@ -127,7 +130,8 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
 		obj = list_first_entry(&unwind_list,
 		obj = list_first_entry(&unwind_list,
 				       struct drm_i915_gem_object,
 				       struct drm_i915_gem_object,
 				       exec_list);
 				       exec_list);
-		if (drm_mm_scan_remove_block(&obj->gtt_space)) {
+		vma = __i915_gem_obj_to_vma(obj);
+		if (drm_mm_scan_remove_block(&vma->node)) {
 			list_move(&obj->exec_list, &eviction_list);
 			list_move(&obj->exec_list, &eviction_list);
 			drm_gem_object_reference(&obj->base);
 			drm_gem_object_reference(&obj->base);
 			continue;
 			continue;
@@ -154,12 +158,13 @@ int
 i915_gem_evict_everything(struct drm_device *dev)
 i915_gem_evict_everything(struct drm_device *dev)
 {
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj, *next;
 	struct drm_i915_gem_object *obj, *next;
 	bool lists_empty;
 	bool lists_empty;
 	int ret;
 	int ret;
 
 
-	lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
-		       list_empty(&dev_priv->mm.active_list));
+	lists_empty = (list_empty(&vm->inactive_list) &&
+		       list_empty(&vm->active_list));
 	if (lists_empty)
 	if (lists_empty)
 		return -ENOSPC;
 		return -ENOSPC;
 
 
@@ -176,8 +181,7 @@ i915_gem_evict_everything(struct drm_device *dev)
 	i915_gem_retire_requests(dev);
 	i915_gem_retire_requests(dev);
 
 
 	/* Having flushed everything, unbind() should never raise an error */
 	/* Having flushed everything, unbind() should never raise an error */
-	list_for_each_entry_safe(obj, next,
-				 &dev_priv->mm.inactive_list, mm_list)
+	list_for_each_entry_safe(obj, next, &vm->inactive_list, mm_list)
 		if (obj->pin_count == 0)
 		if (obj->pin_count == 0)
 			WARN_ON(i915_gem_object_unbind(obj));
 			WARN_ON(i915_gem_object_unbind(obj));
 
 

+ 9 - 7
drivers/gpu/drm/i915/i915_gem_execbuffer.c

@@ -255,7 +255,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 
 
 	reloc->delta += target_offset;
 	reloc->delta += target_offset;
 	if (use_cpu_reloc(obj)) {
 	if (use_cpu_reloc(obj)) {
-		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
+		uint32_t page_offset = offset_in_page(reloc->offset);
 		char *vaddr;
 		char *vaddr;
 
 
 		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
@@ -284,7 +284,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
 		reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
 						      reloc->offset & PAGE_MASK);
 						      reloc->offset & PAGE_MASK);
 		reloc_entry = (uint32_t __iomem *)
 		reloc_entry = (uint32_t __iomem *)
-			(reloc_page + (reloc->offset & ~PAGE_MASK));
+			(reloc_page + offset_in_page(reloc->offset));
 		iowrite32(reloc->delta, reloc_entry);
 		iowrite32(reloc->delta, reloc_entry);
 		io_mapping_unmap_atomic(reloc_page);
 		io_mapping_unmap_atomic(reloc_page);
 	}
 	}
@@ -759,8 +759,10 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 		if (!access_ok(VERIFY_WRITE, ptr, length))
 		if (!access_ok(VERIFY_WRITE, ptr, length))
 			return -EFAULT;
 			return -EFAULT;
 
 
-		if (fault_in_multipages_readable(ptr, length))
-			return -EFAULT;
+		if (likely(!i915_prefault_disable)) {
+			if (fault_in_multipages_readable(ptr, length))
+				return -EFAULT;
+		}
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -873,7 +875,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		break;
 		break;
 	case I915_EXEC_BSD:
 	case I915_EXEC_BSD:
 		ring = &dev_priv->ring[VCS];
 		ring = &dev_priv->ring[VCS];
-		if (ctx_id != 0) {
+		if (ctx_id != DEFAULT_CONTEXT_ID) {
 			DRM_DEBUG("Ring %s doesn't support contexts\n",
 			DRM_DEBUG("Ring %s doesn't support contexts\n",
 				  ring->name);
 				  ring->name);
 			return -EPERM;
 			return -EPERM;
@@ -881,7 +883,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		break;
 		break;
 	case I915_EXEC_BLT:
 	case I915_EXEC_BLT:
 		ring = &dev_priv->ring[BCS];
 		ring = &dev_priv->ring[BCS];
-		if (ctx_id != 0) {
+		if (ctx_id != DEFAULT_CONTEXT_ID) {
 			DRM_DEBUG("Ring %s doesn't support contexts\n",
 			DRM_DEBUG("Ring %s doesn't support contexts\n",
 				  ring->name);
 				  ring->name);
 			return -EPERM;
 			return -EPERM;
@@ -889,7 +891,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		break;
 		break;
 	case I915_EXEC_VEBOX:
 	case I915_EXEC_VEBOX:
 		ring = &dev_priv->ring[VECS];
 		ring = &dev_priv->ring[VECS];
-		if (ctx_id != 0) {
+		if (ctx_id != DEFAULT_CONTEXT_ID) {
 			DRM_DEBUG("Ring %s doesn't support contexts\n",
 			DRM_DEBUG("Ring %s doesn't support contexts\n",
 				  ring->name);
 				  ring->name);
 			return -EPERM;
 			return -EPERM;

+ 129 - 84
drivers/gpu/drm/i915/i915_gem_gtt.c

@@ -33,6 +33,7 @@
 
 
 /* PPGTT stuff */
 /* PPGTT stuff */
 #define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
 #define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
+#define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
 
 
 #define GEN6_PDE_VALID			(1 << 0)
 #define GEN6_PDE_VALID			(1 << 0)
 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
@@ -44,6 +45,15 @@
 #define GEN6_PTE_CACHE_LLC		(2 << 1)
 #define GEN6_PTE_CACHE_LLC		(2 << 1)
 #define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
 #define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
+#define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
+
+/* Cacheability Control is a 4-bit value. The low three bits are stored in *
+ * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
+ */
+#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
+					 (((bits) & 0x8) << (11 - 3)))
+#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
+#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
 
 
 static gen6_gtt_pte_t gen6_pte_encode(dma_addr_t addr,
 static gen6_gtt_pte_t gen6_pte_encode(dma_addr_t addr,
 				      enum i915_cache_level level)
 				      enum i915_cache_level level)
@@ -92,17 +102,29 @@ static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level)
 				     enum i915_cache_level level)
 {
 {
 	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
 	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
-	pte |= GEN6_PTE_ADDR_ENCODE(addr);
+	pte |= HSW_PTE_ADDR_ENCODE(addr);
 
 
 	if (level != I915_CACHE_NONE)
 	if (level != I915_CACHE_NONE)
-		pte |= GEN6_PTE_CACHE_LLC;
+		pte |= HSW_WB_LLC_AGE0;
+
+	return pte;
+}
+
+static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
+				      enum i915_cache_level level)
+{
+	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
+	pte |= HSW_PTE_ADDR_ENCODE(addr);
+
+	if (level != I915_CACHE_NONE)
+		pte |= HSW_WB_ELLC_LLC_AGE0;
 
 
 	return pte;
 	return pte;
 }
 }
 
 
 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
 {
 {
-	struct drm_i915_private *dev_priv = ppgtt->dev->dev_private;
+	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
 	gen6_gtt_pte_t __iomem *pd_addr;
 	gen6_gtt_pte_t __iomem *pd_addr;
 	uint32_t pd_entry;
 	uint32_t pd_entry;
 	int i;
 	int i;
@@ -181,18 +203,18 @@ static int gen6_ppgtt_enable(struct drm_device *dev)
 }
 }
 
 
 /* PPGTT support for Sandybdrige/Gen6 and later */
 /* PPGTT support for Sandybdrige/Gen6 and later */
-static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
+static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 				   unsigned first_entry,
 				   unsigned first_entry,
 				   unsigned num_entries)
 				   unsigned num_entries)
 {
 {
-	struct drm_i915_private *dev_priv = ppgtt->dev->dev_private;
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
 	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned last_pte, i;
 	unsigned last_pte, i;
 
 
-	scratch_pte = ppgtt->pte_encode(dev_priv->gtt.scratch.addr,
-					I915_CACHE_LLC);
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC);
 
 
 	while (num_entries) {
 	while (num_entries) {
 		last_pte = first_pte + num_entries;
 		last_pte = first_pte + num_entries;
@@ -212,11 +234,13 @@ static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 	}
 	}
 }
 }
 
 
-static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
+static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      struct sg_table *pages,
 				      unsigned first_entry,
 				      unsigned first_entry,
 				      enum i915_cache_level cache_level)
 				      enum i915_cache_level cache_level)
 {
 {
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr;
 	gen6_gtt_pte_t *pt_vaddr;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
@@ -227,7 +251,7 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
 		dma_addr_t page_addr;
 		dma_addr_t page_addr;
 
 
 		page_addr = sg_page_iter_dma_address(&sg_iter);
 		page_addr = sg_page_iter_dma_address(&sg_iter);
-		pt_vaddr[act_pte] = ppgtt->pte_encode(page_addr, cache_level);
+		pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level);
 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			kunmap_atomic(pt_vaddr);
 			act_pt++;
 			act_pt++;
@@ -239,13 +263,17 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
 	kunmap_atomic(pt_vaddr);
 	kunmap_atomic(pt_vaddr);
 }
 }
 
 
-static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
+static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 {
 {
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
 	int i;
 	int i;
 
 
+	drm_mm_takedown(&ppgtt->base.mm);
+
 	if (ppgtt->pt_dma_addr) {
 	if (ppgtt->pt_dma_addr) {
 		for (i = 0; i < ppgtt->num_pd_entries; i++)
 		for (i = 0; i < ppgtt->num_pd_entries; i++)
-			pci_unmap_page(ppgtt->dev->pdev,
+			pci_unmap_page(ppgtt->base.dev->pdev,
 				       ppgtt->pt_dma_addr[i],
 				       ppgtt->pt_dma_addr[i],
 				       4096, PCI_DMA_BIDIRECTIONAL);
 				       4096, PCI_DMA_BIDIRECTIONAL);
 	}
 	}
@@ -259,7 +287,7 @@ static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
 
 
 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 {
 {
-	struct drm_device *dev = ppgtt->dev;
+	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned first_pd_entry_in_global_pt;
 	unsigned first_pd_entry_in_global_pt;
 	int i;
 	int i;
@@ -271,17 +299,18 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt);
 	first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt);
 
 
 	if (IS_HASWELL(dev)) {
 	if (IS_HASWELL(dev)) {
-		ppgtt->pte_encode = hsw_pte_encode;
+		ppgtt->base.pte_encode = hsw_pte_encode;
 	} else if (IS_VALLEYVIEW(dev)) {
 	} else if (IS_VALLEYVIEW(dev)) {
-		ppgtt->pte_encode = byt_pte_encode;
+		ppgtt->base.pte_encode = byt_pte_encode;
 	} else {
 	} else {
-		ppgtt->pte_encode = gen6_pte_encode;
+		ppgtt->base.pte_encode = gen6_pte_encode;
 	}
 	}
 	ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
 	ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
 	ppgtt->enable = gen6_ppgtt_enable;
 	ppgtt->enable = gen6_ppgtt_enable;
-	ppgtt->clear_range = gen6_ppgtt_clear_range;
-	ppgtt->insert_entries = gen6_ppgtt_insert_entries;
-	ppgtt->cleanup = gen6_ppgtt_cleanup;
+	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
+	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
+	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
+	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
 				  GFP_KERNEL);
 				  GFP_KERNEL);
 	if (!ppgtt->pt_pages)
 	if (!ppgtt->pt_pages)
@@ -312,8 +341,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 		ppgtt->pt_dma_addr[i] = pt_addr;
 		ppgtt->pt_dma_addr[i] = pt_addr;
 	}
 	}
 
 
-	ppgtt->clear_range(ppgtt, 0,
-			   ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
+	ppgtt->base.clear_range(&ppgtt->base, 0,
+				ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES);
 
 
 	ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t);
 	ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t);
 
 
@@ -346,7 +375,7 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 	if (!ppgtt)
 	if (!ppgtt)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	ppgtt->dev = dev;
+	ppgtt->base.dev = dev;
 
 
 	if (INTEL_INFO(dev)->gen < 8)
 	if (INTEL_INFO(dev)->gen < 8)
 		ret = gen6_ppgtt_init(ppgtt);
 		ret = gen6_ppgtt_init(ppgtt);
@@ -355,8 +384,11 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 
 
 	if (ret)
 	if (ret)
 		kfree(ppgtt);
 		kfree(ppgtt);
-	else
+	else {
 		dev_priv->mm.aliasing_ppgtt = ppgtt;
 		dev_priv->mm.aliasing_ppgtt = ppgtt;
+		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
+			    ppgtt->base.total);
+	}
 
 
 	return ret;
 	return ret;
 }
 }
@@ -369,7 +401,7 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
 	if (!ppgtt)
 	if (!ppgtt)
 		return;
 		return;
 
 
-	ppgtt->cleanup(ppgtt);
+	ppgtt->base.cleanup(&ppgtt->base);
 	dev_priv->mm.aliasing_ppgtt = NULL;
 	dev_priv->mm.aliasing_ppgtt = NULL;
 }
 }
 
 
@@ -377,17 +409,17 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 			    struct drm_i915_gem_object *obj,
 			    struct drm_i915_gem_object *obj,
 			    enum i915_cache_level cache_level)
 			    enum i915_cache_level cache_level)
 {
 {
-	ppgtt->insert_entries(ppgtt, obj->pages,
-			      i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
-			      cache_level);
+	ppgtt->base.insert_entries(&ppgtt->base, obj->pages,
+				   i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
+				   cache_level);
 }
 }
 
 
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
 			      struct drm_i915_gem_object *obj)
 			      struct drm_i915_gem_object *obj)
 {
 {
-	ppgtt->clear_range(ppgtt,
-			   i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
-			   obj->base.size >> PAGE_SHIFT);
+	ppgtt->base.clear_range(&ppgtt->base,
+				i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
+				obj->base.size >> PAGE_SHIFT);
 }
 }
 
 
 extern int intel_iommu_gfx_mapped;
 extern int intel_iommu_gfx_mapped;
@@ -434,8 +466,9 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 	struct drm_i915_gem_object *obj;
 	struct drm_i915_gem_object *obj;
 
 
 	/* First fill our portion of the GTT with scratch pages */
 	/* First fill our portion of the GTT with scratch pages */
-	dev_priv->gtt.gtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE,
-				      dev_priv->gtt.total / PAGE_SIZE);
+	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+				       dev_priv->gtt.base.start / PAGE_SIZE,
+				       dev_priv->gtt.base.total / PAGE_SIZE);
 
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		i915_gem_clflush_object(obj);
 		i915_gem_clflush_object(obj);
@@ -464,12 +497,12 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
  * within the global GTT as well as accessible by the GPU through the GMADR
  * within the global GTT as well as accessible by the GPU through the GMADR
  * mapped BAR (dev_priv->mm.gtt->gtt).
  * mapped BAR (dev_priv->mm.gtt->gtt).
  */
  */
-static void gen6_ggtt_insert_entries(struct drm_device *dev,
+static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     struct sg_table *st,
 				     unsigned int first_entry,
 				     unsigned int first_entry,
 				     enum i915_cache_level level)
 				     enum i915_cache_level level)
 {
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	gen6_gtt_pte_t __iomem *gtt_entries =
 	gen6_gtt_pte_t __iomem *gtt_entries =
 		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
 		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
 	int i = 0;
 	int i = 0;
@@ -478,8 +511,7 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 
 
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
 		addr = sg_page_iter_dma_address(&sg_iter);
 		addr = sg_page_iter_dma_address(&sg_iter);
-		iowrite32(dev_priv->gtt.pte_encode(addr, level),
-			  &gtt_entries[i]);
+		iowrite32(vm->pte_encode(addr, level), &gtt_entries[i]);
 		i++;
 		i++;
 	}
 	}
 
 
@@ -490,8 +522,8 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 	 * hardware should work, we must keep this posting read for paranoia.
 	 * hardware should work, we must keep this posting read for paranoia.
 	 */
 	 */
 	if (i != 0)
 	if (i != 0)
-		WARN_ON(readl(&gtt_entries[i-1])
-			!= dev_priv->gtt.pte_encode(addr, level));
+		WARN_ON(readl(&gtt_entries[i-1]) !=
+			vm->pte_encode(addr, level));
 
 
 	/* This next bit makes the above posting read even more important. We
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -501,11 +533,11 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
 }
 }
 
 
-static void gen6_ggtt_clear_range(struct drm_device *dev,
+static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  unsigned int first_entry,
 				  unsigned int first_entry,
 				  unsigned int num_entries)
 				  unsigned int num_entries)
 {
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
 	gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
 		(gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
 		(gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
@@ -516,15 +548,14 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
 		 first_entry, num_entries, max_entries))
 		 first_entry, num_entries, max_entries))
 		num_entries = max_entries;
 		num_entries = max_entries;
 
 
-	scratch_pte = dev_priv->gtt.pte_encode(dev_priv->gtt.scratch.addr,
-					       I915_CACHE_LLC);
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC);
 	for (i = 0; i < num_entries; i++)
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
 		iowrite32(scratch_pte, &gtt_base[i]);
 	readl(gtt_base);
 	readl(gtt_base);
 }
 }
 
 
 
 
-static void i915_ggtt_insert_entries(struct drm_device *dev,
+static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     struct sg_table *st,
 				     unsigned int pg_start,
 				     unsigned int pg_start,
 				     enum i915_cache_level cache_level)
 				     enum i915_cache_level cache_level)
@@ -536,7 +567,7 @@ static void i915_ggtt_insert_entries(struct drm_device *dev,
 
 
 }
 }
 
 
-static void i915_ggtt_clear_range(struct drm_device *dev,
+static void i915_ggtt_clear_range(struct i915_address_space *vm,
 				  unsigned int first_entry,
 				  unsigned int first_entry,
 				  unsigned int num_entries)
 				  unsigned int num_entries)
 {
 {
@@ -549,10 +580,11 @@ void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
 {
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
 
 
-	dev_priv->gtt.gtt_insert_entries(dev, obj->pages,
-					 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
-					 cache_level);
+	dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages,
+					  entry,
+					  cache_level);
 
 
 	obj->has_global_gtt_mapping = 1;
 	obj->has_global_gtt_mapping = 1;
 }
 }
@@ -561,10 +593,11 @@ void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
 {
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
 
 
-	dev_priv->gtt.gtt_clear_range(obj->base.dev,
-				      i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
-				      obj->base.size >> PAGE_SHIFT);
+	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+				       entry,
+				       obj->base.size >> PAGE_SHIFT);
 
 
 	obj->has_global_gtt_mapping = 0;
 	obj->has_global_gtt_mapping = 0;
 }
 }
@@ -623,38 +656,42 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 	BUG_ON(mappable_end > end);
 	BUG_ON(mappable_end > end);
 
 
 	/* Subtract the guard page ... */
 	/* Subtract the guard page ... */
-	drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
+	drm_mm_init(&dev_priv->gtt.base.mm, start, end - start - PAGE_SIZE);
 	if (!HAS_LLC(dev))
 	if (!HAS_LLC(dev))
-		dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
+		dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
 
 
 	/* Mark any preallocated objects as occupied */
 	/* Mark any preallocated objects as occupied */
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		struct i915_vma *vma = __i915_gem_obj_to_vma(obj);
 		int ret;
 		int ret;
 		DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
 		DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
 			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
 			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
 
 
 		WARN_ON(i915_gem_obj_ggtt_bound(obj));
 		WARN_ON(i915_gem_obj_ggtt_bound(obj));
-		ret = drm_mm_reserve_node(&dev_priv->mm.gtt_space,
-					  &obj->gtt_space);
+		ret = drm_mm_reserve_node(&dev_priv->gtt.base.mm, &vma->node);
 		if (ret)
 		if (ret)
 			DRM_DEBUG_KMS("Reservation failed\n");
 			DRM_DEBUG_KMS("Reservation failed\n");
 		obj->has_global_gtt_mapping = 1;
 		obj->has_global_gtt_mapping = 1;
+		list_add(&vma->vma_link, &obj->vma_list);
 	}
 	}
 
 
-	dev_priv->gtt.start = start;
-	dev_priv->gtt.total = end - start;
+	dev_priv->gtt.base.start = start;
+	dev_priv->gtt.base.total = end - start;
 
 
 	/* Clear any non-preallocated blocks */
 	/* Clear any non-preallocated blocks */
-	drm_mm_for_each_hole(entry, &dev_priv->mm.gtt_space,
+	drm_mm_for_each_hole(entry, &dev_priv->gtt.base.mm,
 			     hole_start, hole_end) {
 			     hole_start, hole_end) {
+		const unsigned long count = (hole_end - hole_start) / PAGE_SIZE;
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
 			      hole_start, hole_end);
 			      hole_start, hole_end);
-		dev_priv->gtt.gtt_clear_range(dev, hole_start / PAGE_SIZE,
-					      (hole_end-hole_start) / PAGE_SIZE);
+		dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+					       hole_start / PAGE_SIZE,
+					       count);
 	}
 	}
 
 
 	/* And finally clear the reserved guard page */
 	/* And finally clear the reserved guard page */
-	dev_priv->gtt.gtt_clear_range(dev, end / PAGE_SIZE - 1, 1);
+	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+				       end / PAGE_SIZE - 1, 1);
 }
 }
 
 
 static bool
 static bool
@@ -677,7 +714,7 @@ void i915_gem_init_global_gtt(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned long gtt_size, mappable_size;
 	unsigned long gtt_size, mappable_size;
 
 
-	gtt_size = dev_priv->gtt.total;
+	gtt_size = dev_priv->gtt.base.total;
 	mappable_size = dev_priv->gtt.mappable_end;
 	mappable_size = dev_priv->gtt.mappable_end;
 
 
 	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
 	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
@@ -696,7 +733,7 @@ void i915_gem_init_global_gtt(struct drm_device *dev)
 			return;
 			return;
 
 
 		DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
 		DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
-		drm_mm_takedown(&dev_priv->mm.gtt_space);
+		drm_mm_takedown(&dev_priv->gtt.base.mm);
 		gtt_size += GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE;
 		gtt_size += GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE;
 	}
 	}
 	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
 	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
@@ -722,8 +759,8 @@ static int setup_scratch_page(struct drm_device *dev)
 #else
 #else
 	dma_addr = page_to_phys(page);
 	dma_addr = page_to_phys(page);
 #endif
 #endif
-	dev_priv->gtt.scratch.page = page;
-	dev_priv->gtt.scratch.addr = dma_addr;
+	dev_priv->gtt.base.scratch.page = page;
+	dev_priv->gtt.base.scratch.addr = dma_addr;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -731,11 +768,13 @@ static int setup_scratch_page(struct drm_device *dev)
 static void teardown_scratch_page(struct drm_device *dev)
 static void teardown_scratch_page(struct drm_device *dev)
 {
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	set_pages_wb(dev_priv->gtt.scratch.page, 1);
-	pci_unmap_page(dev->pdev, dev_priv->gtt.scratch.addr,
+	struct page *page = dev_priv->gtt.base.scratch.page;
+
+	set_pages_wb(page, 1);
+	pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
 		       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 		       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	put_page(dev_priv->gtt.scratch.page);
-	__free_page(dev_priv->gtt.scratch.page);
+	put_page(page);
+	__free_page(page);
 }
 }
 
 
 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@@ -798,17 +837,18 @@ static int gen6_gmch_probe(struct drm_device *dev,
 	if (ret)
 	if (ret)
 		DRM_ERROR("Scratch setup failed\n");
 		DRM_ERROR("Scratch setup failed\n");
 
 
-	dev_priv->gtt.gtt_clear_range = gen6_ggtt_clear_range;
-	dev_priv->gtt.gtt_insert_entries = gen6_ggtt_insert_entries;
+	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
+	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
 
 
 	return ret;
 	return ret;
 }
 }
 
 
-static void gen6_gmch_remove(struct drm_device *dev)
+static void gen6_gmch_remove(struct i915_address_space *vm)
 {
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	iounmap(dev_priv->gtt.gsm);
-	teardown_scratch_page(dev_priv->dev);
+
+	struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
+	iounmap(gtt->gsm);
+	teardown_scratch_page(vm->dev);
 }
 }
 
 
 static int i915_gmch_probe(struct drm_device *dev,
 static int i915_gmch_probe(struct drm_device *dev,
@@ -829,13 +869,13 @@ static int i915_gmch_probe(struct drm_device *dev,
 	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
 	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
 
 
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
-	dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
-	dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries;
+	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
+	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
 
 
 	return 0;
 	return 0;
 }
 }
 
 
-static void i915_gmch_remove(struct drm_device *dev)
+static void i915_gmch_remove(struct i915_address_space *vm)
 {
 {
 	intel_gmch_remove();
 	intel_gmch_remove();
 }
 }
@@ -848,25 +888,30 @@ int i915_gem_gtt_init(struct drm_device *dev)
 
 
 	if (INTEL_INFO(dev)->gen <= 5) {
 	if (INTEL_INFO(dev)->gen <= 5) {
 		gtt->gtt_probe = i915_gmch_probe;
 		gtt->gtt_probe = i915_gmch_probe;
-		gtt->gtt_remove = i915_gmch_remove;
+		gtt->base.cleanup = i915_gmch_remove;
 	} else {
 	} else {
 		gtt->gtt_probe = gen6_gmch_probe;
 		gtt->gtt_probe = gen6_gmch_probe;
-		gtt->gtt_remove = gen6_gmch_remove;
-		if (IS_HASWELL(dev))
-			gtt->pte_encode = hsw_pte_encode;
+		gtt->base.cleanup = gen6_gmch_remove;
+		if (IS_HASWELL(dev) && dev_priv->ellc_size)
+			gtt->base.pte_encode = iris_pte_encode;
+		else if (IS_HASWELL(dev))
+			gtt->base.pte_encode = hsw_pte_encode;
 		else if (IS_VALLEYVIEW(dev))
 		else if (IS_VALLEYVIEW(dev))
-			gtt->pte_encode = byt_pte_encode;
+			gtt->base.pte_encode = byt_pte_encode;
 		else
 		else
-			gtt->pte_encode = gen6_pte_encode;
+			gtt->base.pte_encode = gen6_pte_encode;
 	}
 	}
 
 
-	ret = gtt->gtt_probe(dev, &gtt->total, &gtt->stolen_size,
+	ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
 			     &gtt->mappable_base, &gtt->mappable_end);
 			     &gtt->mappable_base, &gtt->mappable_end);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
+	gtt->base.dev = dev;
+
 	/* GMADR is the PCI mmio aperture into the global GTT. */
 	/* GMADR is the PCI mmio aperture into the global GTT. */
-	DRM_INFO("Memory usable by graphics device = %zdM\n", gtt->total >> 20);
+	DRM_INFO("Memory usable by graphics device = %zdM\n",
+		 gtt->base.total >> 20);
 	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
 	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
 	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
 	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
 
 

+ 30 - 38
drivers/gpu/drm/i915/i915_gem_stolen.c

@@ -45,45 +45,27 @@
 static unsigned long i915_stolen_to_physical(struct drm_device *dev)
 static unsigned long i915_stolen_to_physical(struct drm_device *dev)
 {
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct pci_dev *pdev = dev_priv->bridge_dev;
 	struct resource *r;
 	struct resource *r;
 	u32 base;
 	u32 base;
 
 
-	/* On the machines I have tested the Graphics Base of Stolen Memory
-	 * is unreliable, so on those compute the base by subtracting the
-	 * stolen memory from the Top of Low Usable DRAM which is where the
-	 * BIOS places the graphics stolen memory.
+	/* Almost universally we can find the Graphics Base of Stolen Memory
+	 * at offset 0x5c in the igfx configuration space. On a few (desktop)
+	 * machines this is also mirrored in the bridge device at different
+	 * locations, or in the MCHBAR. On gen2, the layout is again slightly
+	 * different with the Graphics Segment immediately following Top of
+	 * Memory (or Top of Usable DRAM). Note it appears that TOUD is only
+	 * reported by 865g, so we just use the top of memory as determined
+	 * by the e820 probe.
 	 *
 	 *
-	 * On gen2, the layout is slightly different with the Graphics Segment
-	 * immediately following Top of Memory (or Top of Usable DRAM). Note
-	 * it appears that TOUD is only reported by 865g, so we just use the
-	 * top of memory as determined by the e820 probe.
-	 *
-	 * XXX gen2 requires an unavailable symbol and 945gm fails with
-	 * its value of TOLUD.
+	 * XXX However gen2 requires an unavailable symbol.
 	 */
 	 */
 	base = 0;
 	base = 0;
-	if (IS_VALLEYVIEW(dev)) {
+	if (INTEL_INFO(dev)->gen >= 3) {
+		/* Read Graphics Base of Stolen Memory directly */
 		pci_read_config_dword(dev->pdev, 0x5c, &base);
 		pci_read_config_dword(dev->pdev, 0x5c, &base);
 		base &= ~((1<<20) - 1);
 		base &= ~((1<<20) - 1);
-	} else if (INTEL_INFO(dev)->gen >= 6) {
-		/* Read Base Data of Stolen Memory Register (BDSM) directly.
-		 * Note that there is also a MCHBAR miror at 0x1080c0 or
-		 * we could use device 2:0x5c instead.
-		*/
-		pci_read_config_dword(pdev, 0xB0, &base);
-		base &= ~4095; /* lower bits used for locking register */
-	} else if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
-		/* Read Graphics Base of Stolen Memory directly */
-		pci_read_config_dword(pdev, 0xA4, &base);
+	} else { /* GEN2 */
 #if 0
 #if 0
-	} else if (IS_GEN3(dev)) {
-		u8 val;
-		/* Stolen is immediately below Top of Low Usable DRAM */
-		pci_read_config_byte(pdev, 0x9c, &val);
-		base = val >> 3 << 27;
-		base -= dev_priv->mm.gtt->stolen_size;
-	} else {
 		/* Stolen is immediately above Top of Memory */
 		/* Stolen is immediately above Top of Memory */
 		base = max_low_pfn_mapped << PAGE_SHIFT;
 		base = max_low_pfn_mapped << PAGE_SHIFT;
 #endif
 #endif
@@ -367,8 +349,10 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 					       u32 size)
 					       u32 size)
 {
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	struct drm_i915_gem_object *obj;
 	struct drm_mm_node *stolen;
 	struct drm_mm_node *stolen;
+	struct i915_vma *vma;
 	int ret;
 	int ret;
 
 
 	if (!drm_mm_initialized(&dev_priv->mm.stolen))
 	if (!drm_mm_initialized(&dev_priv->mm.stolen))
@@ -409,30 +393,38 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 	if (gtt_offset == I915_GTT_OFFSET_NONE)
 	if (gtt_offset == I915_GTT_OFFSET_NONE)
 		return obj;
 		return obj;
 
 
+	vma = i915_gem_vma_create(obj, &dev_priv->gtt.base);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_out;
+	}
+
 	/* To simplify the initialisation sequence between KMS and GTT,
 	/* To simplify the initialisation sequence between KMS and GTT,
 	 * we allow construction of the stolen object prior to
 	 * we allow construction of the stolen object prior to
 	 * setting up the GTT space. The actual reservation will occur
 	 * setting up the GTT space. The actual reservation will occur
 	 * later.
 	 * later.
 	 */
 	 */
-	obj->gtt_space.start = gtt_offset;
-	obj->gtt_space.size = size;
-	if (drm_mm_initialized(&dev_priv->mm.gtt_space)) {
-		ret = drm_mm_reserve_node(&dev_priv->mm.gtt_space,
-					  &obj->gtt_space);
+	vma->node.start = gtt_offset;
+	vma->node.size = size;
+	if (drm_mm_initialized(&dev_priv->gtt.base.mm)) {
+		ret = drm_mm_reserve_node(&dev_priv->gtt.base.mm, &vma->node);
 		if (ret) {
 		if (ret) {
 			DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
 			DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
-			goto unref_out;
+			i915_gem_vma_destroy(vma);
+			goto err_out;
 		}
 		}
 	}
 	}
 
 
 	obj->has_global_gtt_mapping = 1;
 	obj->has_global_gtt_mapping = 1;
 
 
 	list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
 	list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
-	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+	list_add_tail(&obj->mm_list, &vm->inactive_list);
 
 
 	return obj;
 	return obj;
 
 
-unref_out:
+err_out:
+	drm_mm_remove_node(stolen);
+	kfree(stolen);
 	drm_gem_object_unreference(&obj->base);
 	drm_gem_object_unreference(&obj->base);
 	return NULL;
 	return NULL;
 }
 }

+ 5 - 3
drivers/gpu/drm/i915/i915_gpu_error.c

@@ -622,6 +622,7 @@ static struct drm_i915_error_object *
 i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 			     struct intel_ring_buffer *ring)
 			     struct intel_ring_buffer *ring)
 {
 {
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	struct drm_i915_gem_object *obj;
 	u32 seqno;
 	u32 seqno;
 
 
@@ -641,7 +642,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 	}
 	}
 
 
 	seqno = ring->get_seqno(ring, false);
 	seqno = ring->get_seqno(ring, false);
-	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
+	list_for_each_entry(obj, &vm->active_list, mm_list) {
 		if (obj->ring != ring)
 		if (obj->ring != ring)
 			continue;
 			continue;
 
 
@@ -773,11 +774,12 @@ static void i915_gem_record_rings(struct drm_device *dev,
 static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
 static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
 				     struct drm_i915_error_state *error)
 				     struct drm_i915_error_state *error)
 {
 {
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	struct drm_i915_gem_object *obj;
 	int i;
 	int i;
 
 
 	i = 0;
 	i = 0;
-	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list)
+	list_for_each_entry(obj, &vm->active_list, mm_list)
 		i++;
 		i++;
 	error->active_bo_count = i;
 	error->active_bo_count = i;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
@@ -797,7 +799,7 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
 		error->active_bo_count =
 		error->active_bo_count =
 			capture_active_bo(error->active_bo,
 			capture_active_bo(error->active_bo,
 					  error->active_bo_count,
 					  error->active_bo_count,
-					  &dev_priv->mm.active_list);
+					  &vm->active_list);
 
 
 	if (error->pinned_bo)
 	if (error->pinned_bo)
 		error->pinned_bo_count =
 		error->pinned_bo_count =

+ 224 - 328
drivers/gpu/drm/i915/i915_irq.c

@@ -698,18 +698,13 @@ static void ironlake_rps_change_irq_handler(struct drm_device *dev)
 static void notify_ring(struct drm_device *dev,
 static void notify_ring(struct drm_device *dev,
 			struct intel_ring_buffer *ring)
 			struct intel_ring_buffer *ring)
 {
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
 	if (ring->obj == NULL)
 	if (ring->obj == NULL)
 		return;
 		return;
 
 
 	trace_i915_gem_request_complete(ring, ring->get_seqno(ring, false));
 	trace_i915_gem_request_complete(ring, ring->get_seqno(ring, false));
 
 
 	wake_up_all(&ring->irq_queue);
 	wake_up_all(&ring->irq_queue);
-	if (i915_enable_hangcheck) {
-		mod_timer(&dev_priv->gpu_error.hangcheck_timer,
-			  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
-	}
+	i915_queue_hangcheck(dev);
 }
 }
 
 
 static void gen6_pm_rps_work(struct work_struct *work)
 static void gen6_pm_rps_work(struct work_struct *work)
@@ -817,7 +812,7 @@ static void ivybridge_parity_work(struct work_struct *work)
 
 
 	mutex_unlock(&dev_priv->dev->struct_mutex);
 	mutex_unlock(&dev_priv->dev->struct_mutex);
 
 
-	parity_event[0] = "L3_PARITY_ERROR=1";
+	parity_event[0] = I915_L3_PARITY_UEVENT "=1";
 	parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
 	parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
 	parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank);
 	parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank);
 	parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank);
 	parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank);
@@ -849,6 +844,17 @@ static void ivybridge_parity_error_irq_handler(struct drm_device *dev)
 	queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work);
 	queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work);
 }
 }
 
 
+static void ilk_gt_irq_handler(struct drm_device *dev,
+			       struct drm_i915_private *dev_priv,
+			       u32 gt_iir)
+{
+	if (gt_iir &
+	    (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
+		notify_ring(dev, &dev_priv->ring[RCS]);
+	if (gt_iir & ILK_BSD_USER_INTERRUPT)
+		notify_ring(dev, &dev_priv->ring[VCS]);
+}
+
 static void snb_gt_irq_handler(struct drm_device *dev,
 static void snb_gt_irq_handler(struct drm_device *dev,
 			       struct drm_i915_private *dev_priv,
 			       struct drm_i915_private *dev_priv,
 			       u32 gt_iir)
 			       u32 gt_iir)
@@ -913,6 +919,10 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev,
 	spin_lock(&dev_priv->irq_lock);
 	spin_lock(&dev_priv->irq_lock);
 	for (i = 1; i < HPD_NUM_PINS; i++) {
 	for (i = 1; i < HPD_NUM_PINS; i++) {
 
 
+		WARN(((hpd[i] & hotplug_trigger) &&
+		      dev_priv->hpd_stats[i].hpd_mark != HPD_ENABLED),
+		     "Received HPD interrupt although disabled\n");
+
 		if (!(hpd[i] & hotplug_trigger) ||
 		if (!(hpd[i] & hotplug_trigger) ||
 		    dev_priv->hpd_stats[i].hpd_mark != HPD_ENABLED)
 		    dev_priv->hpd_stats[i].hpd_mark != HPD_ENABLED)
 			continue;
 			continue;
@@ -923,6 +933,7 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev,
 				   + msecs_to_jiffies(HPD_STORM_DETECT_PERIOD))) {
 				   + msecs_to_jiffies(HPD_STORM_DETECT_PERIOD))) {
 			dev_priv->hpd_stats[i].hpd_last_jiffies = jiffies;
 			dev_priv->hpd_stats[i].hpd_last_jiffies = jiffies;
 			dev_priv->hpd_stats[i].hpd_cnt = 0;
 			dev_priv->hpd_stats[i].hpd_cnt = 0;
+			DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: 0\n", i);
 		} else if (dev_priv->hpd_stats[i].hpd_cnt > HPD_STORM_THRESHOLD) {
 		} else if (dev_priv->hpd_stats[i].hpd_cnt > HPD_STORM_THRESHOLD) {
 			dev_priv->hpd_stats[i].hpd_mark = HPD_MARK_DISABLED;
 			dev_priv->hpd_stats[i].hpd_mark = HPD_MARK_DISABLED;
 			dev_priv->hpd_event_bits &= ~(1 << i);
 			dev_priv->hpd_event_bits &= ~(1 << i);
@@ -930,6 +941,8 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev,
 			storm_detected = true;
 			storm_detected = true;
 		} else {
 		} else {
 			dev_priv->hpd_stats[i].hpd_cnt++;
 			dev_priv->hpd_stats[i].hpd_cnt++;
+			DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: %d\n", i,
+				      dev_priv->hpd_stats[i].hpd_cnt);
 		}
 		}
 	}
 	}
 
 
@@ -1202,27 +1215,111 @@ static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir)
 		cpt_serr_int_handler(dev);
 		cpt_serr_int_handler(dev);
 }
 }
 
 
-static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
+static void ilk_display_irq_handler(struct drm_device *dev, u32 de_iir)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (de_iir & DE_AUX_CHANNEL_A)
+		dp_aux_irq_handler(dev);
+
+	if (de_iir & DE_GSE)
+		intel_opregion_asle_intr(dev);
+
+	if (de_iir & DE_PIPEA_VBLANK)
+		drm_handle_vblank(dev, 0);
+
+	if (de_iir & DE_PIPEB_VBLANK)
+		drm_handle_vblank(dev, 1);
+
+	if (de_iir & DE_POISON)
+		DRM_ERROR("Poison interrupt\n");
+
+	if (de_iir & DE_PIPEA_FIFO_UNDERRUN)
+		if (intel_set_cpu_fifo_underrun_reporting(dev, PIPE_A, false))
+			DRM_DEBUG_DRIVER("Pipe A FIFO underrun\n");
+
+	if (de_iir & DE_PIPEB_FIFO_UNDERRUN)
+		if (intel_set_cpu_fifo_underrun_reporting(dev, PIPE_B, false))
+			DRM_DEBUG_DRIVER("Pipe B FIFO underrun\n");
+
+	if (de_iir & DE_PLANEA_FLIP_DONE) {
+		intel_prepare_page_flip(dev, 0);
+		intel_finish_page_flip_plane(dev, 0);
+	}
+
+	if (de_iir & DE_PLANEB_FLIP_DONE) {
+		intel_prepare_page_flip(dev, 1);
+		intel_finish_page_flip_plane(dev, 1);
+	}
+
+	/* check event from PCH */
+	if (de_iir & DE_PCH_EVENT) {
+		u32 pch_iir = I915_READ(SDEIIR);
+
+		if (HAS_PCH_CPT(dev))
+			cpt_irq_handler(dev, pch_iir);
+		else
+			ibx_irq_handler(dev, pch_iir);
+
+		/* should clear PCH hotplug event before clear CPU irq */
+		I915_WRITE(SDEIIR, pch_iir);
+	}
+
+	if (IS_GEN5(dev) && de_iir & DE_PCU_EVENT)
+		ironlake_rps_change_irq_handler(dev);
+}
+
+static void ivb_display_irq_handler(struct drm_device *dev, u32 de_iir)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int i;
+
+	if (de_iir & DE_ERR_INT_IVB)
+		ivb_err_int_handler(dev);
+
+	if (de_iir & DE_AUX_CHANNEL_A_IVB)
+		dp_aux_irq_handler(dev);
+
+	if (de_iir & DE_GSE_IVB)
+		intel_opregion_asle_intr(dev);
+
+	for (i = 0; i < 3; i++) {
+		if (de_iir & (DE_PIPEA_VBLANK_IVB << (5 * i)))
+			drm_handle_vblank(dev, i);
+		if (de_iir & (DE_PLANEA_FLIP_DONE_IVB << (5 * i))) {
+			intel_prepare_page_flip(dev, i);
+			intel_finish_page_flip_plane(dev, i);
+		}
+	}
+
+	/* check event from PCH */
+	if (!HAS_PCH_NOP(dev) && (de_iir & DE_PCH_EVENT_IVB)) {
+		u32 pch_iir = I915_READ(SDEIIR);
+
+		cpt_irq_handler(dev, pch_iir);
+
+		/* clear PCH hotplug event before clear CPU irq */
+		I915_WRITE(SDEIIR, pch_iir);
+	}
+}
+
+static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 {
 {
 	struct drm_device *dev = (struct drm_device *) arg;
 	struct drm_device *dev = (struct drm_device *) arg;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	u32 de_iir, gt_iir, de_ier, pm_iir, sde_ier = 0;
+	u32 de_iir, gt_iir, de_ier, sde_ier = 0;
 	irqreturn_t ret = IRQ_NONE;
 	irqreturn_t ret = IRQ_NONE;
-	int i;
 
 
 	atomic_inc(&dev_priv->irq_received);
 	atomic_inc(&dev_priv->irq_received);
 
 
 	/* We get interrupts on unclaimed registers, so check for this before we
 	/* We get interrupts on unclaimed registers, so check for this before we
 	 * do any I915_{READ,WRITE}. */
 	 * do any I915_{READ,WRITE}. */
-	if (IS_HASWELL(dev) &&
-	    (I915_READ_NOTRACE(FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
-		DRM_ERROR("Unclaimed register before interrupt\n");
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-	}
+	intel_uncore_check_errors(dev);
 
 
 	/* disable master interrupt before clearing iir  */
 	/* disable master interrupt before clearing iir  */
 	de_ier = I915_READ(DEIER);
 	de_ier = I915_READ(DEIER);
 	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
 	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
+	POSTING_READ(DEIER);
 
 
 	/* Disable south interrupts. We'll only write to SDEIIR once, so further
 	/* Disable south interrupts. We'll only write to SDEIIR once, so further
 	 * interrupts will will be stored on its back queue, and then we'll be
 	 * interrupts will will be stored on its back queue, and then we'll be
@@ -1246,53 +1343,34 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
 
 
 	gt_iir = I915_READ(GTIIR);
 	gt_iir = I915_READ(GTIIR);
 	if (gt_iir) {
 	if (gt_iir) {
-		snb_gt_irq_handler(dev, dev_priv, gt_iir);
+		if (INTEL_INFO(dev)->gen >= 6)
+			snb_gt_irq_handler(dev, dev_priv, gt_iir);
+		else
+			ilk_gt_irq_handler(dev, dev_priv, gt_iir);
 		I915_WRITE(GTIIR, gt_iir);
 		I915_WRITE(GTIIR, gt_iir);
 		ret = IRQ_HANDLED;
 		ret = IRQ_HANDLED;
 	}
 	}
 
 
 	de_iir = I915_READ(DEIIR);
 	de_iir = I915_READ(DEIIR);
 	if (de_iir) {
 	if (de_iir) {
-		if (de_iir & DE_ERR_INT_IVB)
-			ivb_err_int_handler(dev);
-
-		if (de_iir & DE_AUX_CHANNEL_A_IVB)
-			dp_aux_irq_handler(dev);
-
-		if (de_iir & DE_GSE_IVB)
-			intel_opregion_asle_intr(dev);
-
-		for (i = 0; i < 3; i++) {
-			if (de_iir & (DE_PIPEA_VBLANK_IVB << (5 * i)))
-				drm_handle_vblank(dev, i);
-			if (de_iir & (DE_PLANEA_FLIP_DONE_IVB << (5 * i))) {
-				intel_prepare_page_flip(dev, i);
-				intel_finish_page_flip_plane(dev, i);
-			}
-		}
-
-		/* check event from PCH */
-		if (!HAS_PCH_NOP(dev) && (de_iir & DE_PCH_EVENT_IVB)) {
-			u32 pch_iir = I915_READ(SDEIIR);
-
-			cpt_irq_handler(dev, pch_iir);
-
-			/* clear PCH hotplug event before clear CPU irq */
-			I915_WRITE(SDEIIR, pch_iir);
-		}
-
+		if (INTEL_INFO(dev)->gen >= 7)
+			ivb_display_irq_handler(dev, de_iir);
+		else
+			ilk_display_irq_handler(dev, de_iir);
 		I915_WRITE(DEIIR, de_iir);
 		I915_WRITE(DEIIR, de_iir);
 		ret = IRQ_HANDLED;
 		ret = IRQ_HANDLED;
 	}
 	}
 
 
-	pm_iir = I915_READ(GEN6_PMIIR);
-	if (pm_iir) {
-		if (IS_HASWELL(dev))
-			hsw_pm_irq_handler(dev_priv, pm_iir);
-		else if (pm_iir & GEN6_PM_RPS_EVENTS)
-			gen6_rps_irq_handler(dev_priv, pm_iir);
-		I915_WRITE(GEN6_PMIIR, pm_iir);
-		ret = IRQ_HANDLED;
+	if (INTEL_INFO(dev)->gen >= 6) {
+		u32 pm_iir = I915_READ(GEN6_PMIIR);
+		if (pm_iir) {
+			if (IS_HASWELL(dev))
+				hsw_pm_irq_handler(dev_priv, pm_iir);
+			else if (pm_iir & GEN6_PM_RPS_EVENTS)
+				gen6_rps_irq_handler(dev_priv, pm_iir);
+			I915_WRITE(GEN6_PMIIR, pm_iir);
+			ret = IRQ_HANDLED;
+		}
 	}
 	}
 
 
 	if (IS_HASWELL(dev)) {
 	if (IS_HASWELL(dev)) {
@@ -1312,119 +1390,6 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
 	return ret;
 	return ret;
 }
 }
 
 
-static void ilk_gt_irq_handler(struct drm_device *dev,
-			       struct drm_i915_private *dev_priv,
-			       u32 gt_iir)
-{
-	if (gt_iir &
-	    (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
-		notify_ring(dev, &dev_priv->ring[RCS]);
-	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		notify_ring(dev, &dev_priv->ring[VCS]);
-}
-
-static irqreturn_t ironlake_irq_handler(int irq, void *arg)
-{
-	struct drm_device *dev = (struct drm_device *) arg;
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	int ret = IRQ_NONE;
-	u32 de_iir, gt_iir, de_ier, pm_iir, sde_ier;
-
-	atomic_inc(&dev_priv->irq_received);
-
-	/* disable master interrupt before clearing iir  */
-	de_ier = I915_READ(DEIER);
-	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
-	POSTING_READ(DEIER);
-
-	/* Disable south interrupts. We'll only write to SDEIIR once, so further
-	 * interrupts will will be stored on its back queue, and then we'll be
-	 * able to process them after we restore SDEIER (as soon as we restore
-	 * it, we'll get an interrupt if SDEIIR still has something to process
-	 * due to its back queue). */
-	sde_ier = I915_READ(SDEIER);
-	I915_WRITE(SDEIER, 0);
-	POSTING_READ(SDEIER);
-
-	de_iir = I915_READ(DEIIR);
-	gt_iir = I915_READ(GTIIR);
-	pm_iir = I915_READ(GEN6_PMIIR);
-
-	if (de_iir == 0 && gt_iir == 0 && (!IS_GEN6(dev) || pm_iir == 0))
-		goto done;
-
-	ret = IRQ_HANDLED;
-
-	if (IS_GEN5(dev))
-		ilk_gt_irq_handler(dev, dev_priv, gt_iir);
-	else
-		snb_gt_irq_handler(dev, dev_priv, gt_iir);
-
-	if (de_iir & DE_AUX_CHANNEL_A)
-		dp_aux_irq_handler(dev);
-
-	if (de_iir & DE_GSE)
-		intel_opregion_asle_intr(dev);
-
-	if (de_iir & DE_PIPEA_VBLANK)
-		drm_handle_vblank(dev, 0);
-
-	if (de_iir & DE_PIPEB_VBLANK)
-		drm_handle_vblank(dev, 1);
-
-	if (de_iir & DE_POISON)
-		DRM_ERROR("Poison interrupt\n");
-
-	if (de_iir & DE_PIPEA_FIFO_UNDERRUN)
-		if (intel_set_cpu_fifo_underrun_reporting(dev, PIPE_A, false))
-			DRM_DEBUG_DRIVER("Pipe A FIFO underrun\n");
-
-	if (de_iir & DE_PIPEB_FIFO_UNDERRUN)
-		if (intel_set_cpu_fifo_underrun_reporting(dev, PIPE_B, false))
-			DRM_DEBUG_DRIVER("Pipe B FIFO underrun\n");
-
-	if (de_iir & DE_PLANEA_FLIP_DONE) {
-		intel_prepare_page_flip(dev, 0);
-		intel_finish_page_flip_plane(dev, 0);
-	}
-
-	if (de_iir & DE_PLANEB_FLIP_DONE) {
-		intel_prepare_page_flip(dev, 1);
-		intel_finish_page_flip_plane(dev, 1);
-	}
-
-	/* check event from PCH */
-	if (de_iir & DE_PCH_EVENT) {
-		u32 pch_iir = I915_READ(SDEIIR);
-
-		if (HAS_PCH_CPT(dev))
-			cpt_irq_handler(dev, pch_iir);
-		else
-			ibx_irq_handler(dev, pch_iir);
-
-		/* should clear PCH hotplug event before clear CPU irq */
-		I915_WRITE(SDEIIR, pch_iir);
-	}
-
-	if (IS_GEN5(dev) &&  de_iir & DE_PCU_EVENT)
-		ironlake_rps_change_irq_handler(dev);
-
-	if (IS_GEN6(dev) && pm_iir & GEN6_PM_RPS_EVENTS)
-		gen6_rps_irq_handler(dev_priv, pm_iir);
-
-	I915_WRITE(GTIIR, gt_iir);
-	I915_WRITE(DEIIR, de_iir);
-	I915_WRITE(GEN6_PMIIR, pm_iir);
-
-done:
-	I915_WRITE(DEIER, de_ier);
-	POSTING_READ(DEIER);
-	I915_WRITE(SDEIER, sde_ier);
-	POSTING_READ(SDEIER);
-
-	return ret;
-}
-
 /**
 /**
  * i915_error_work_func - do process context error handling work
  * i915_error_work_func - do process context error handling work
  * @work: work struct
  * @work: work struct
@@ -1440,9 +1405,9 @@ static void i915_error_work_func(struct work_struct *work)
 						    gpu_error);
 						    gpu_error);
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_device *dev = dev_priv->dev;
 	struct intel_ring_buffer *ring;
 	struct intel_ring_buffer *ring;
-	char *error_event[] = { "ERROR=1", NULL };
-	char *reset_event[] = { "RESET=1", NULL };
-	char *reset_done_event[] = { "ERROR=0", NULL };
+	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
+	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
+	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
 	int i, ret;
 	int i, ret;
 
 
 	kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
 	kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
@@ -1696,29 +1661,14 @@ static int ironlake_enable_vblank(struct drm_device *dev, int pipe)
 {
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	unsigned long irqflags;
 	unsigned long irqflags;
+	uint32_t bit = (INTEL_INFO(dev)->gen >= 7) ? DE_PIPE_VBLANK_IVB(pipe) :
+						     DE_PIPE_VBLANK_ILK(pipe);
 
 
 	if (!i915_pipe_enabled(dev, pipe))
 	if (!i915_pipe_enabled(dev, pipe))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_enable_display_irq(dev_priv, (pipe == 0) ?
-				    DE_PIPEA_VBLANK : DE_PIPEB_VBLANK);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
-
-	return 0;
-}
-
-static int ivybridge_enable_vblank(struct drm_device *dev, int pipe)
-{
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	unsigned long irqflags;
-
-	if (!i915_pipe_enabled(dev, pipe))
-		return -EINVAL;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_enable_display_irq(dev_priv,
-				    DE_PIPEA_VBLANK_IVB << (5 * pipe));
+	ironlake_enable_display_irq(dev_priv, bit);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 
 
 	return 0;
 	return 0;
@@ -1769,21 +1719,11 @@ static void ironlake_disable_vblank(struct drm_device *dev, int pipe)
 {
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	unsigned long irqflags;
 	unsigned long irqflags;
+	uint32_t bit = (INTEL_INFO(dev)->gen >= 7) ? DE_PIPE_VBLANK_IVB(pipe) :
+						     DE_PIPE_VBLANK_ILK(pipe);
 
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_disable_display_irq(dev_priv, (pipe == 0) ?
-				     DE_PIPEA_VBLANK : DE_PIPEB_VBLANK);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
-}
-
-static void ivybridge_disable_vblank(struct drm_device *dev, int pipe)
-{
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	unsigned long irqflags;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_disable_display_irq(dev_priv,
-				     DE_PIPEA_VBLANK_IVB << (pipe * 5));
+	ironlake_disable_display_irq(dev_priv, bit);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 }
 
 
@@ -2030,9 +1970,17 @@ void i915_hangcheck_elapsed(unsigned long data)
 	if (busy_count)
 	if (busy_count)
 		/* Reset timer case chip hangs without another request
 		/* Reset timer case chip hangs without another request
 		 * being added */
 		 * being added */
-		mod_timer(&dev_priv->gpu_error.hangcheck_timer,
-			  round_jiffies_up(jiffies +
-					   DRM_I915_HANGCHECK_JIFFIES));
+		i915_queue_hangcheck(dev);
+}
+
+void i915_queue_hangcheck(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	if (!i915_enable_hangcheck)
+		return;
+
+	mod_timer(&dev_priv->gpu_error.hangcheck_timer,
+		  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
 }
 }
 
 
 static void ibx_irq_preinstall(struct drm_device *dev)
 static void ibx_irq_preinstall(struct drm_device *dev)
@@ -2054,31 +2002,26 @@ static void ibx_irq_preinstall(struct drm_device *dev)
 	POSTING_READ(SDEIER);
 	POSTING_READ(SDEIER);
 }
 }
 
 
-/* drm_dma.h hooks
-*/
-static void ironlake_irq_preinstall(struct drm_device *dev)
+static void gen5_gt_irq_preinstall(struct drm_device *dev)
 {
 {
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-
-	atomic_set(&dev_priv->irq_received, 0);
-
-	I915_WRITE(HWSTAM, 0xeffe);
-
-	/* XXX hotplug from PCH */
-
-	I915_WRITE(DEIMR, 0xffffffff);
-	I915_WRITE(DEIER, 0x0);
-	POSTING_READ(DEIER);
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 
 	/* and GT */
 	/* and GT */
 	I915_WRITE(GTIMR, 0xffffffff);
 	I915_WRITE(GTIMR, 0xffffffff);
 	I915_WRITE(GTIER, 0x0);
 	I915_WRITE(GTIER, 0x0);
 	POSTING_READ(GTIER);
 	POSTING_READ(GTIER);
 
 
-	ibx_irq_preinstall(dev);
+	if (INTEL_INFO(dev)->gen >= 6) {
+		/* and PM */
+		I915_WRITE(GEN6_PMIMR, 0xffffffff);
+		I915_WRITE(GEN6_PMIER, 0x0);
+		POSTING_READ(GEN6_PMIER);
+	}
 }
 }
 
 
-static void ivybridge_irq_preinstall(struct drm_device *dev)
+/* drm_dma.h hooks
+*/
+static void ironlake_irq_preinstall(struct drm_device *dev)
 {
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 
 
@@ -2086,21 +2029,11 @@ static void ivybridge_irq_preinstall(struct drm_device *dev)
 
 
 	I915_WRITE(HWSTAM, 0xeffe);
 	I915_WRITE(HWSTAM, 0xeffe);
 
 
-	/* XXX hotplug from PCH */
-
 	I915_WRITE(DEIMR, 0xffffffff);
 	I915_WRITE(DEIMR, 0xffffffff);
 	I915_WRITE(DEIER, 0x0);
 	I915_WRITE(DEIER, 0x0);
 	POSTING_READ(DEIER);
 	POSTING_READ(DEIER);
 
 
-	/* and GT */
-	I915_WRITE(GTIMR, 0xffffffff);
-	I915_WRITE(GTIER, 0x0);
-	POSTING_READ(GTIER);
-
-	/* Power management */
-	I915_WRITE(GEN6_PMIMR, 0xffffffff);
-	I915_WRITE(GEN6_PMIER, 0x0);
-	POSTING_READ(GEN6_PMIER);
+	gen5_gt_irq_preinstall(dev);
 
 
 	ibx_irq_preinstall(dev);
 	ibx_irq_preinstall(dev);
 }
 }
@@ -2121,9 +2054,8 @@ static void valleyview_irq_preinstall(struct drm_device *dev)
 	/* and GT */
 	/* and GT */
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, 0xffffffff);
-	I915_WRITE(GTIER, 0x0);
-	POSTING_READ(GTIER);
+
+	gen5_gt_irq_preinstall(dev);
 
 
 	I915_WRITE(DPINVGTT, 0xff);
 	I915_WRITE(DPINVGTT, 0xff);
 
 
@@ -2193,120 +2125,99 @@ static void ibx_irq_postinstall(struct drm_device *dev)
 	I915_WRITE(SDEIMR, ~mask);
 	I915_WRITE(SDEIMR, ~mask);
 }
 }
 
 
-static int ironlake_irq_postinstall(struct drm_device *dev)
+static void gen5_gt_irq_postinstall(struct drm_device *dev)
 {
 {
-	unsigned long irqflags;
-
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	/* enable kind of interrupts always enabled */
-	u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
-			   DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE |
-			   DE_AUX_CHANNEL_A | DE_PIPEB_FIFO_UNDERRUN |
-			   DE_PIPEA_FIFO_UNDERRUN | DE_POISON;
-	u32 gt_irqs;
-
-	dev_priv->irq_mask = ~display_mask;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 pm_irqs, gt_irqs;
 
 
-	/* should always can generate irq */
-	I915_WRITE(DEIIR, I915_READ(DEIIR));
-	I915_WRITE(DEIMR, dev_priv->irq_mask);
-	I915_WRITE(DEIER, display_mask |
-			  DE_PIPEA_VBLANK | DE_PIPEB_VBLANK | DE_PCU_EVENT);
-	POSTING_READ(DEIER);
+	pm_irqs = gt_irqs = 0;
 
 
 	dev_priv->gt_irq_mask = ~0;
 	dev_priv->gt_irq_mask = ~0;
+	if (HAS_L3_GPU_CACHE(dev)) {
+		/* L3 parity interrupt is always unmasked. */
+		dev_priv->gt_irq_mask = ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+		gt_irqs |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+	}
 
 
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-
-	gt_irqs = GT_RENDER_USER_INTERRUPT;
-
-	if (IS_GEN6(dev))
-		gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
-	else
+	gt_irqs |= GT_RENDER_USER_INTERRUPT;
+	if (IS_GEN5(dev)) {
 		gt_irqs |= GT_RENDER_PIPECTL_NOTIFY_INTERRUPT |
 		gt_irqs |= GT_RENDER_PIPECTL_NOTIFY_INTERRUPT |
 			   ILK_BSD_USER_INTERRUPT;
 			   ILK_BSD_USER_INTERRUPT;
+	} else {
+		gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
+	}
 
 
+	I915_WRITE(GTIIR, I915_READ(GTIIR));
+	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
 	I915_WRITE(GTIER, gt_irqs);
 	I915_WRITE(GTIER, gt_irqs);
 	POSTING_READ(GTIER);
 	POSTING_READ(GTIER);
 
 
-	ibx_irq_postinstall(dev);
+	if (INTEL_INFO(dev)->gen >= 6) {
+		pm_irqs |= GEN6_PM_RPS_EVENTS;
 
 
-	if (IS_IRONLAKE_M(dev)) {
-		/* Enable PCU event interrupts
-		 *
-		 * spinlocking not required here for correctness since interrupt
-		 * setup is guaranteed to run in single-threaded context. But we
-		 * need it to make the assert_spin_locked happy. */
-		spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-		ironlake_enable_display_irq(dev_priv, DE_PCU_EVENT);
-		spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
-	}
+		if (HAS_VEBOX(dev))
+			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
 
 
-	return 0;
+		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
+		I915_WRITE(GEN6_PMIMR, 0xffffffff);
+		I915_WRITE(GEN6_PMIER, pm_irqs);
+		POSTING_READ(GEN6_PMIER);
+	}
 }
 }
 
 
-static int ivybridge_irq_postinstall(struct drm_device *dev)
+static int ironlake_irq_postinstall(struct drm_device *dev)
 {
 {
+	unsigned long irqflags;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	/* enable kind of interrupts always enabled */
-	u32 display_mask =
-		DE_MASTER_IRQ_CONTROL | DE_GSE_IVB | DE_PCH_EVENT_IVB |
-		DE_PLANEC_FLIP_DONE_IVB |
-		DE_PLANEB_FLIP_DONE_IVB |
-		DE_PLANEA_FLIP_DONE_IVB |
-		DE_AUX_CHANNEL_A_IVB |
-		DE_ERR_INT_IVB;
-	u32 pm_irqs = GEN6_PM_RPS_EVENTS;
-	u32 gt_irqs;
+	u32 display_mask, extra_mask;
+
+	if (INTEL_INFO(dev)->gen >= 7) {
+		display_mask = (DE_MASTER_IRQ_CONTROL | DE_GSE_IVB |
+				DE_PCH_EVENT_IVB | DE_PLANEC_FLIP_DONE_IVB |
+				DE_PLANEB_FLIP_DONE_IVB |
+				DE_PLANEA_FLIP_DONE_IVB | DE_AUX_CHANNEL_A_IVB |
+				DE_ERR_INT_IVB);
+		extra_mask = (DE_PIPEC_VBLANK_IVB | DE_PIPEB_VBLANK_IVB |
+			      DE_PIPEA_VBLANK_IVB);
+
+		I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
+	} else {
+		display_mask = (DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
+				DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE |
+				DE_AUX_CHANNEL_A | DE_PIPEB_FIFO_UNDERRUN |
+				DE_PIPEA_FIFO_UNDERRUN | DE_POISON);
+		extra_mask = DE_PIPEA_VBLANK | DE_PIPEB_VBLANK | DE_PCU_EVENT;
+	}
 
 
 	dev_priv->irq_mask = ~display_mask;
 	dev_priv->irq_mask = ~display_mask;
 
 
 	/* should always can generate irq */
 	/* should always can generate irq */
-	I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
 	I915_WRITE(DEIIR, I915_READ(DEIIR));
 	I915_WRITE(DEIIR, I915_READ(DEIIR));
 	I915_WRITE(DEIMR, dev_priv->irq_mask);
 	I915_WRITE(DEIMR, dev_priv->irq_mask);
-	I915_WRITE(DEIER,
-		   display_mask |
-		   DE_PIPEC_VBLANK_IVB |
-		   DE_PIPEB_VBLANK_IVB |
-		   DE_PIPEA_VBLANK_IVB);
+	I915_WRITE(DEIER, display_mask | extra_mask);
 	POSTING_READ(DEIER);
 	POSTING_READ(DEIER);
 
 
-	dev_priv->gt_irq_mask = ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-
-	gt_irqs = GT_RENDER_USER_INTERRUPT | GT_BSD_USER_INTERRUPT |
-		  GT_BLT_USER_INTERRUPT | GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-	I915_WRITE(GTIER, gt_irqs);
-	POSTING_READ(GTIER);
-
-	I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
-	if (HAS_VEBOX(dev))
-		pm_irqs |= PM_VEBOX_USER_INTERRUPT;
-
-	/* Our enable/disable rps functions may touch these registers so
-	 * make sure to set a known state for only the non-RPS bits.
-	 * The RMW is extra paranoia since this should be called after being set
-	 * to a known state in preinstall.
-	 * */
-	I915_WRITE(GEN6_PMIMR,
-		   (I915_READ(GEN6_PMIMR) | ~GEN6_PM_RPS_EVENTS) & ~pm_irqs);
-	I915_WRITE(GEN6_PMIER,
-		   (I915_READ(GEN6_PMIER) & GEN6_PM_RPS_EVENTS) | pm_irqs);
-	POSTING_READ(GEN6_PMIER);
+	gen5_gt_irq_postinstall(dev);
 
 
 	ibx_irq_postinstall(dev);
 	ibx_irq_postinstall(dev);
 
 
+	if (IS_IRONLAKE_M(dev)) {
+		/* Enable PCU event interrupts
+		 *
+		 * spinlocking not required here for correctness since interrupt
+		 * setup is guaranteed to run in single-threaded context. But we
+		 * need it to make the assert_spin_locked happy. */
+		spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+		ironlake_enable_display_irq(dev_priv, DE_PCU_EVENT);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	}
+
 	return 0;
 	return 0;
 }
 }
 
 
 static int valleyview_irq_postinstall(struct drm_device *dev)
 static int valleyview_irq_postinstall(struct drm_device *dev)
 {
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	u32 gt_irqs;
 	u32 enable_mask;
 	u32 enable_mask;
 	u32 pipestat_enable = PLANE_FLIP_DONE_INT_EN_VLV;
 	u32 pipestat_enable = PLANE_FLIP_DONE_INT_EN_VLV;
 	unsigned long irqflags;
 	unsigned long irqflags;
@@ -2346,13 +2257,7 @@ static int valleyview_irq_postinstall(struct drm_device *dev)
 	I915_WRITE(VLV_IIR, 0xffffffff);
 	I915_WRITE(VLV_IIR, 0xffffffff);
 	I915_WRITE(VLV_IIR, 0xffffffff);
 	I915_WRITE(VLV_IIR, 0xffffffff);
 
 
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-
-	gt_irqs = GT_RENDER_USER_INTERRUPT | GT_BSD_USER_INTERRUPT |
-		GT_BLT_USER_INTERRUPT;
-	I915_WRITE(GTIER, gt_irqs);
-	POSTING_READ(GTIER);
+	gen5_gt_irq_postinstall(dev);
 
 
 	/* ack & enable invalid PTE error interrupts */
 	/* ack & enable invalid PTE error interrupts */
 #if 0 /* FIXME: add support to irq handler for checking these bits */
 #if 0 /* FIXME: add support to irq handler for checking these bits */
@@ -3118,15 +3023,6 @@ void intel_irq_init(struct drm_device *dev)
 		dev->driver->enable_vblank = valleyview_enable_vblank;
 		dev->driver->enable_vblank = valleyview_enable_vblank;
 		dev->driver->disable_vblank = valleyview_disable_vblank;
 		dev->driver->disable_vblank = valleyview_disable_vblank;
 		dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup;
 		dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup;
-	} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
-		/* Share uninstall handlers with ILK/SNB */
-		dev->driver->irq_handler = ivybridge_irq_handler;
-		dev->driver->irq_preinstall = ivybridge_irq_preinstall;
-		dev->driver->irq_postinstall = ivybridge_irq_postinstall;
-		dev->driver->irq_uninstall = ironlake_irq_uninstall;
-		dev->driver->enable_vblank = ivybridge_enable_vblank;
-		dev->driver->disable_vblank = ivybridge_disable_vblank;
-		dev_priv->display.hpd_irq_setup = ibx_hpd_irq_setup;
 	} else if (HAS_PCH_SPLIT(dev)) {
 	} else if (HAS_PCH_SPLIT(dev)) {
 		dev->driver->irq_handler = ironlake_irq_handler;
 		dev->driver->irq_handler = ironlake_irq_handler;
 		dev->driver->irq_preinstall = ironlake_irq_preinstall;
 		dev->driver->irq_preinstall = ironlake_irq_preinstall;

+ 103 - 1
drivers/gpu/drm/i915/i915_reg.h

@@ -61,6 +61,12 @@
 #define   GC_LOW_FREQUENCY_ENABLE	(1 << 7)
 #define   GC_LOW_FREQUENCY_ENABLE	(1 << 7)
 #define   GC_DISPLAY_CLOCK_190_200_MHZ	(0 << 4)
 #define   GC_DISPLAY_CLOCK_190_200_MHZ	(0 << 4)
 #define   GC_DISPLAY_CLOCK_333_MHZ	(4 << 4)
 #define   GC_DISPLAY_CLOCK_333_MHZ	(4 << 4)
+#define   GC_DISPLAY_CLOCK_267_MHZ_PNV	(0 << 4)
+#define   GC_DISPLAY_CLOCK_333_MHZ_PNV	(1 << 4)
+#define   GC_DISPLAY_CLOCK_444_MHZ_PNV	(2 << 4)
+#define   GC_DISPLAY_CLOCK_200_MHZ_PNV	(5 << 4)
+#define   GC_DISPLAY_CLOCK_133_MHZ_PNV	(6 << 4)
+#define   GC_DISPLAY_CLOCK_167_MHZ_PNV	(7 << 4)
 #define   GC_DISPLAY_CLOCK_MASK		(7 << 4)
 #define   GC_DISPLAY_CLOCK_MASK		(7 << 4)
 #define   GM45_GC_RENDER_CLOCK_MASK	(0xf << 0)
 #define   GM45_GC_RENDER_CLOCK_MASK	(0xf << 0)
 #define   GM45_GC_RENDER_CLOCK_266_MHZ	(8 << 0)
 #define   GM45_GC_RENDER_CLOCK_266_MHZ	(8 << 0)
@@ -1779,6 +1785,71 @@
 #define BCLRPAT(pipe) _PIPE(pipe, _BCLRPAT_A, _BCLRPAT_B)
 #define BCLRPAT(pipe) _PIPE(pipe, _BCLRPAT_A, _BCLRPAT_B)
 #define VSYNCSHIFT(trans) _TRANSCODER(trans, _VSYNCSHIFT_A, _VSYNCSHIFT_B)
 #define VSYNCSHIFT(trans) _TRANSCODER(trans, _VSYNCSHIFT_A, _VSYNCSHIFT_B)
 
 
+/* HSW eDP PSR registers */
+#define EDP_PSR_CTL				0x64800
+#define   EDP_PSR_ENABLE			(1<<31)
+#define   EDP_PSR_LINK_DISABLE			(0<<27)
+#define   EDP_PSR_LINK_STANDBY			(1<<27)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_MASK	(3<<25)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES	(0<<25)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_4_LINES	(1<<25)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_2_LINES	(2<<25)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_0_LINES	(3<<25)
+#define   EDP_PSR_MAX_SLEEP_TIME_SHIFT		20
+#define   EDP_PSR_SKIP_AUX_EXIT			(1<<12)
+#define   EDP_PSR_TP1_TP2_SEL			(0<<11)
+#define   EDP_PSR_TP1_TP3_SEL			(1<<11)
+#define   EDP_PSR_TP2_TP3_TIME_500us		(0<<8)
+#define   EDP_PSR_TP2_TP3_TIME_100us		(1<<8)
+#define   EDP_PSR_TP2_TP3_TIME_2500us		(2<<8)
+#define   EDP_PSR_TP2_TP3_TIME_0us		(3<<8)
+#define   EDP_PSR_TP1_TIME_500us		(0<<4)
+#define   EDP_PSR_TP1_TIME_100us		(1<<4)
+#define   EDP_PSR_TP1_TIME_2500us		(2<<4)
+#define   EDP_PSR_TP1_TIME_0us			(3<<4)
+#define   EDP_PSR_IDLE_FRAME_SHIFT		0
+
+#define EDP_PSR_AUX_CTL			0x64810
+#define EDP_PSR_AUX_DATA1		0x64814
+#define   EDP_PSR_DPCD_COMMAND		0x80060000
+#define EDP_PSR_AUX_DATA2		0x64818
+#define   EDP_PSR_DPCD_NORMAL_OPERATION	(1<<24)
+#define EDP_PSR_AUX_DATA3		0x6481c
+#define EDP_PSR_AUX_DATA4		0x64820
+#define EDP_PSR_AUX_DATA5		0x64824
+
+#define EDP_PSR_STATUS_CTL			0x64840
+#define   EDP_PSR_STATUS_STATE_MASK		(7<<29)
+#define   EDP_PSR_STATUS_STATE_IDLE		(0<<29)
+#define   EDP_PSR_STATUS_STATE_SRDONACK		(1<<29)
+#define   EDP_PSR_STATUS_STATE_SRDENT		(2<<29)
+#define   EDP_PSR_STATUS_STATE_BUFOFF		(3<<29)
+#define   EDP_PSR_STATUS_STATE_BUFON		(4<<29)
+#define   EDP_PSR_STATUS_STATE_AUXACK		(5<<29)
+#define   EDP_PSR_STATUS_STATE_SRDOFFACK	(6<<29)
+#define   EDP_PSR_STATUS_LINK_MASK		(3<<26)
+#define   EDP_PSR_STATUS_LINK_FULL_OFF		(0<<26)
+#define   EDP_PSR_STATUS_LINK_FULL_ON		(1<<26)
+#define   EDP_PSR_STATUS_LINK_STANDBY		(2<<26)
+#define   EDP_PSR_STATUS_MAX_SLEEP_TIMER_SHIFT	20
+#define   EDP_PSR_STATUS_MAX_SLEEP_TIMER_MASK	0x1f
+#define   EDP_PSR_STATUS_COUNT_SHIFT		16
+#define   EDP_PSR_STATUS_COUNT_MASK		0xf
+#define   EDP_PSR_STATUS_AUX_ERROR		(1<<15)
+#define   EDP_PSR_STATUS_AUX_SENDING		(1<<12)
+#define   EDP_PSR_STATUS_SENDING_IDLE		(1<<9)
+#define   EDP_PSR_STATUS_SENDING_TP2_TP3	(1<<8)
+#define   EDP_PSR_STATUS_SENDING_TP1		(1<<4)
+#define   EDP_PSR_STATUS_IDLE_MASK		0xf
+
+#define EDP_PSR_PERF_CNT		0x64844
+#define   EDP_PSR_PERF_CNT_MASK		0xffffff
+
+#define EDP_PSR_DEBUG_CTL		0x64860
+#define   EDP_PSR_DEBUG_MASK_LPSP	(1<<27)
+#define   EDP_PSR_DEBUG_MASK_MEMUP	(1<<26)
+#define   EDP_PSR_DEBUG_MASK_HPD	(1<<25)
+
 /* VGA port control */
 /* VGA port control */
 #define ADPA			0x61100
 #define ADPA			0x61100
 #define PCH_ADPA                0xe1100
 #define PCH_ADPA                0xe1100
@@ -2048,6 +2119,7 @@
  * (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte
  * (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte
  * of the infoframe structure specified by CEA-861. */
  * of the infoframe structure specified by CEA-861. */
 #define   VIDEO_DIP_DATA_SIZE	32
 #define   VIDEO_DIP_DATA_SIZE	32
+#define   VIDEO_DIP_VSC_DATA_SIZE	36
 #define VIDEO_DIP_CTL		0x61170
 #define VIDEO_DIP_CTL		0x61170
 /* Pre HSW: */
 /* Pre HSW: */
 #define   VIDEO_DIP_ENABLE		(1 << 31)
 #define   VIDEO_DIP_ENABLE		(1 << 31)
@@ -2195,6 +2267,8 @@
 #define BLC_PWM_CPU_CTL2	0x48250
 #define BLC_PWM_CPU_CTL2	0x48250
 #define BLC_PWM_CPU_CTL		0x48254
 #define BLC_PWM_CPU_CTL		0x48254
 
 
+#define HSW_BLC_PWM2_CTL	0x48350
+
 /* PCH CTL1 is totally different, all but the below bits are reserved. CTL2 is
 /* PCH CTL1 is totally different, all but the below bits are reserved. CTL2 is
  * like the normal CTL from gen4 and earlier. Hooray for confusing naming. */
  * like the normal CTL from gen4 and earlier. Hooray for confusing naming. */
 #define BLC_PWM_PCH_CTL1	0xc8250
 #define BLC_PWM_PCH_CTL1	0xc8250
@@ -2203,6 +2277,12 @@
 #define   BLM_PCH_POLARITY			(1 << 29)
 #define   BLM_PCH_POLARITY			(1 << 29)
 #define BLC_PWM_PCH_CTL2	0xc8254
 #define BLC_PWM_PCH_CTL2	0xc8254
 
 
+#define UTIL_PIN_CTL		0x48400
+#define   UTIL_PIN_ENABLE	(1 << 31)
+
+#define PCH_GTC_CTL		0xe7000
+#define   PCH_GTC_ENABLE	(1 << 31)
+
 /* TV port control */
 /* TV port control */
 #define TV_CTL			0x68000
 #define TV_CTL			0x68000
 /** Enables the TV encoder */
 /** Enables the TV encoder */
@@ -3721,6 +3801,9 @@
 #define DE_PLANEA_FLIP_DONE_IVB		(1<<3)
 #define DE_PLANEA_FLIP_DONE_IVB		(1<<3)
 #define DE_PIPEA_VBLANK_IVB		(1<<0)
 #define DE_PIPEA_VBLANK_IVB		(1<<0)
 
 
+#define DE_PIPE_VBLANK_ILK(pipe)	(1 << ((pipe * 8) + 7))
+#define DE_PIPE_VBLANK_IVB(pipe)	(1 << (pipe * 5))
+
 #define VLV_MASTER_IER			0x4400c /* Gunit master IER */
 #define VLV_MASTER_IER			0x4400c /* Gunit master IER */
 #define   MASTER_INTERRUPT_ENABLE	(1<<31)
 #define   MASTER_INTERRUPT_ENABLE	(1<<31)
 
 
@@ -4084,6 +4167,13 @@
 #define HSW_TVIDEO_DIP_VSC_DATA(trans) \
 #define HSW_TVIDEO_DIP_VSC_DATA(trans) \
 	 _TRANSCODER(trans, HSW_VIDEO_DIP_VSC_DATA_A, HSW_VIDEO_DIP_VSC_DATA_B)
 	 _TRANSCODER(trans, HSW_VIDEO_DIP_VSC_DATA_A, HSW_VIDEO_DIP_VSC_DATA_B)
 
 
+#define HSW_STEREO_3D_CTL_A	0x70020
+#define   S3D_ENABLE		(1<<31)
+#define HSW_STEREO_3D_CTL_B	0x71020
+
+#define HSW_STEREO_3D_CTL(trans) \
+	_TRANSCODER(trans, HSW_STEREO_3D_CTL_A, HSW_STEREO_3D_CTL_A)
+
 #define _PCH_TRANS_HTOTAL_B          0xe1000
 #define _PCH_TRANS_HTOTAL_B          0xe1000
 #define _PCH_TRANS_HBLANK_B          0xe1004
 #define _PCH_TRANS_HBLANK_B          0xe1004
 #define _PCH_TRANS_HSYNC_B           0xe1008
 #define _PCH_TRANS_HSYNC_B           0xe1008
@@ -4472,6 +4562,10 @@
 #define  GT_FIFO_FREE_ENTRIES			0x120008
 #define  GT_FIFO_FREE_ENTRIES			0x120008
 #define    GT_FIFO_NUM_RESERVED_ENTRIES		20
 #define    GT_FIFO_NUM_RESERVED_ENTRIES		20
 
 
+#define  HSW_IDICR				0x9008
+#define    IDIHASHMSK(x)			(((x) & 0x3f) << 16)
+#define  HSW_EDRAM_PRESENT			0x120010
+
 #define GEN6_UCGCTL1				0x9400
 #define GEN6_UCGCTL1				0x9400
 # define GEN6_BLBUNIT_CLOCK_GATE_DISABLE		(1 << 5)
 # define GEN6_BLBUNIT_CLOCK_GATE_DISABLE		(1 << 5)
 # define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
 # define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
@@ -4862,7 +4956,8 @@
 #define  SBI_SSCAUXDIV6				0x0610
 #define  SBI_SSCAUXDIV6				0x0610
 #define   SBI_SSCAUXDIV_FINALDIV2SEL(x)		((x)<<4)
 #define   SBI_SSCAUXDIV_FINALDIV2SEL(x)		((x)<<4)
 #define  SBI_DBUFF0				0x2a00
 #define  SBI_DBUFF0				0x2a00
-#define   SBI_DBUFF0_ENABLE			(1<<0)
+#define  SBI_GEN0				0x1f00
+#define   SBI_GEN0_CFG_BUFFENABLE_DISABLE	(1<<0)
 
 
 /* LPT PIXCLK_GATE */
 /* LPT PIXCLK_GATE */
 #define PIXCLK_GATE			0xC6020
 #define PIXCLK_GATE			0xC6020
@@ -4928,7 +5023,14 @@
 #define  LCPLL_CLK_FREQ_450		(0<<26)
 #define  LCPLL_CLK_FREQ_450		(0<<26)
 #define  LCPLL_CD_CLOCK_DISABLE		(1<<25)
 #define  LCPLL_CD_CLOCK_DISABLE		(1<<25)
 #define  LCPLL_CD2X_CLOCK_DISABLE	(1<<23)
 #define  LCPLL_CD2X_CLOCK_DISABLE	(1<<23)
+#define  LCPLL_POWER_DOWN_ALLOW		(1<<22)
 #define  LCPLL_CD_SOURCE_FCLK		(1<<21)
 #define  LCPLL_CD_SOURCE_FCLK		(1<<21)
+#define  LCPLL_CD_SOURCE_FCLK_DONE	(1<<19)
+
+#define D_COMP				(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
+#define  D_COMP_RCOMP_IN_PROGRESS	(1<<9)
+#define  D_COMP_COMP_FORCE		(1<<8)
+#define  D_COMP_COMP_DISABLE		(1<<0)
 
 
 /* Pipe WM_LINETIME - watermark line time */
 /* Pipe WM_LINETIME - watermark line time */
 #define PIPE_WM_LINETIME_A		0x45270
 #define PIPE_WM_LINETIME_A		0x45270

+ 5 - 3
drivers/gpu/drm/i915/i915_trace.h

@@ -406,10 +406,12 @@ TRACE_EVENT(i915_flip_complete,
 	    TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj)
 	    TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj)
 );
 );
 
 
-TRACE_EVENT(i915_reg_rw,
-	TP_PROTO(bool write, u32 reg, u64 val, int len),
+TRACE_EVENT_CONDITION(i915_reg_rw,
+	TP_PROTO(bool write, u32 reg, u64 val, int len, bool trace),
 
 
-	TP_ARGS(write, reg, val, len),
+	TP_ARGS(write, reg, val, len, trace),
+
+	TP_CONDITION(trace),
 
 
 	TP_STRUCT__entry(
 	TP_STRUCT__entry(
 		__field(u64, val)
 		__field(u64, val)

+ 4 - 0
drivers/gpu/drm/i915/intel_crt.c

@@ -613,6 +613,10 @@ intel_crt_detect(struct drm_connector *connector, bool force)
 	enum drm_connector_status status;
 	enum drm_connector_status status;
 	struct intel_load_detect_pipe tmp;
 	struct intel_load_detect_pipe tmp;
 
 
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force=%d\n",
+		      connector->base.id, drm_get_connector_name(connector),
+		      force);
+
 	if (I915_HAS_HOTPLUG(dev)) {
 	if (I915_HAS_HOTPLUG(dev)) {
 		/* We can not rely on the HPD pin always being correctly wired
 		/* We can not rely on the HPD pin always being correctly wired
 		 * up, for example many KVM do not pass it through, and so
 		 * up, for example many KVM do not pass it through, and so

+ 8 - 4
drivers/gpu/drm/i915/intel_ddi.c

@@ -301,7 +301,7 @@ static void intel_ddi_mode_set(struct drm_encoder *encoder,
 		struct intel_digital_port *intel_dig_port =
 		struct intel_digital_port *intel_dig_port =
 			enc_to_dig_port(encoder);
 			enc_to_dig_port(encoder);
 
 
-		intel_dp->DP = intel_dig_port->port_reversal |
+		intel_dp->DP = intel_dig_port->saved_port_bits |
 			       DDI_BUF_CTL_ENABLE | DDI_BUF_EMP_400MV_0DB_HSW;
 			       DDI_BUF_CTL_ENABLE | DDI_BUF_EMP_400MV_0DB_HSW;
 		intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count);
 		intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count);
 
 
@@ -1109,7 +1109,8 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder)
 		 * enabling the port.
 		 * enabling the port.
 		 */
 		 */
 		I915_WRITE(DDI_BUF_CTL(port),
 		I915_WRITE(DDI_BUF_CTL(port),
-			   intel_dig_port->port_reversal | DDI_BUF_CTL_ENABLE);
+			   intel_dig_port->saved_port_bits |
+			   DDI_BUF_CTL_ENABLE);
 	} else if (type == INTEL_OUTPUT_EDP) {
 	} else if (type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 
@@ -1117,6 +1118,7 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder)
 			intel_dp_stop_link_train(intel_dp);
 			intel_dp_stop_link_train(intel_dp);
 
 
 		ironlake_edp_backlight_on(intel_dp);
 		ironlake_edp_backlight_on(intel_dp);
+		intel_edp_psr_enable(intel_dp);
 	}
 	}
 
 
 	if (intel_crtc->eld_vld && type != INTEL_OUTPUT_EDP) {
 	if (intel_crtc->eld_vld && type != INTEL_OUTPUT_EDP) {
@@ -1147,6 +1149,7 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder)
 	if (type == INTEL_OUTPUT_EDP) {
 	if (type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 
+		intel_edp_psr_disable(intel_dp);
 		ironlake_edp_backlight_off(intel_dp);
 		ironlake_edp_backlight_off(intel_dp);
 	}
 	}
 }
 }
@@ -1347,8 +1350,9 @@ void intel_ddi_init(struct drm_device *dev, enum port port)
 	intel_encoder->get_config = intel_ddi_get_config;
 	intel_encoder->get_config = intel_ddi_get_config;
 
 
 	intel_dig_port->port = port;
 	intel_dig_port->port = port;
-	intel_dig_port->port_reversal = I915_READ(DDI_BUF_CTL(port)) &
-					DDI_BUF_PORT_REVERSAL;
+	intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
+					  (DDI_BUF_PORT_REVERSAL |
+					   DDI_A_4_LANES);
 	intel_dig_port->dp.output_reg = DDI_BUF_CTL(port);
 	intel_dig_port->dp.output_reg = DDI_BUF_CTL(port);
 
 
 	intel_encoder->type = INTEL_OUTPUT_UNKNOWN;
 	intel_encoder->type = INTEL_OUTPUT_UNKNOWN;

+ 348 - 125
drivers/gpu/drm/i915/intel_display.c

@@ -2274,6 +2274,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 	}
 	}
 
 
 	intel_update_fbc(dev);
 	intel_update_fbc(dev);
+	intel_edp_psr_update(dev);
 	mutex_unlock(&dev->struct_mutex);
 	mutex_unlock(&dev->struct_mutex);
 
 
 	intel_crtc_update_sarea_pos(crtc, x, y);
 	intel_crtc_update_sarea_pos(crtc, x, y);
@@ -4162,6 +4163,30 @@ static int i9xx_misc_get_display_clock_speed(struct drm_device *dev)
 	return 200000;
 	return 200000;
 }
 }
 
 
+static int pnv_get_display_clock_speed(struct drm_device *dev)
+{
+	u16 gcfgc = 0;
+
+	pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+
+	switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
+	case GC_DISPLAY_CLOCK_267_MHZ_PNV:
+		return 267000;
+	case GC_DISPLAY_CLOCK_333_MHZ_PNV:
+		return 333000;
+	case GC_DISPLAY_CLOCK_444_MHZ_PNV:
+		return 444000;
+	case GC_DISPLAY_CLOCK_200_MHZ_PNV:
+		return 200000;
+	default:
+		DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc);
+	case GC_DISPLAY_CLOCK_133_MHZ_PNV:
+		return 133000;
+	case GC_DISPLAY_CLOCK_167_MHZ_PNV:
+		return 167000;
+	}
+}
+
 static int i915gm_get_display_clock_speed(struct drm_device *dev)
 static int i915gm_get_display_clock_speed(struct drm_device *dev)
 {
 {
 	u16 gcfgc = 0;
 	u16 gcfgc = 0;
@@ -4946,22 +4971,19 @@ static void i9xx_get_pfit_config(struct intel_crtc *crtc,
 	uint32_t tmp;
 	uint32_t tmp;
 
 
 	tmp = I915_READ(PFIT_CONTROL);
 	tmp = I915_READ(PFIT_CONTROL);
+	if (!(tmp & PFIT_ENABLE))
+		return;
 
 
+	/* Check whether the pfit is attached to our pipe. */
 	if (INTEL_INFO(dev)->gen < 4) {
 	if (INTEL_INFO(dev)->gen < 4) {
 		if (crtc->pipe != PIPE_B)
 		if (crtc->pipe != PIPE_B)
 			return;
 			return;
-
-		/* gen2/3 store dither state in pfit control, needs to match */
-		pipe_config->gmch_pfit.control = tmp & PANEL_8TO6_DITHER_ENABLE;
 	} else {
 	} else {
 		if ((tmp & PFIT_PIPE_MASK) != (crtc->pipe << PFIT_PIPE_SHIFT))
 		if ((tmp & PFIT_PIPE_MASK) != (crtc->pipe << PFIT_PIPE_SHIFT))
 			return;
 			return;
 	}
 	}
 
 
-	if (!(tmp & PFIT_ENABLE))
-		return;
-
-	pipe_config->gmch_pfit.control = I915_READ(PFIT_CONTROL);
+	pipe_config->gmch_pfit.control = tmp;
 	pipe_config->gmch_pfit.pgm_ratios = I915_READ(PFIT_PGM_RATIOS);
 	pipe_config->gmch_pfit.pgm_ratios = I915_READ(PFIT_PGM_RATIOS);
 	if (INTEL_INFO(dev)->gen < 5)
 	if (INTEL_INFO(dev)->gen < 5)
 		pipe_config->gmch_pfit.lvds_border_bits =
 		pipe_config->gmch_pfit.lvds_border_bits =
@@ -5166,74 +5188,37 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
 	BUG_ON(val != final);
 	BUG_ON(val != final);
 }
 }
 
 
-/* Sequence to enable CLKOUT_DP for FDI usage and configure PCH FDI I/O. */
-static void lpt_init_pch_refclk(struct drm_device *dev)
+static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv)
 {
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_mode_config *mode_config = &dev->mode_config;
-	struct intel_encoder *encoder;
-	bool has_vga = false;
-	bool is_sdv = false;
-	u32 tmp;
-
-	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
-		switch (encoder->type) {
-		case INTEL_OUTPUT_ANALOG:
-			has_vga = true;
-			break;
-		}
-	}
-
-	if (!has_vga)
-		return;
-
-	mutex_lock(&dev_priv->dpio_lock);
-
-	/* XXX: Rip out SDV support once Haswell ships for real. */
-	if (IS_HASWELL(dev) && (dev->pci_device & 0xFF00) == 0x0C00)
-		is_sdv = true;
+	uint32_t tmp;
 
 
-	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
-	tmp &= ~SBI_SSCCTL_DISABLE;
-	tmp |= SBI_SSCCTL_PATHALT;
-	intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+	tmp = I915_READ(SOUTH_CHICKEN2);
+	tmp |= FDI_MPHY_IOSFSB_RESET_CTL;
+	I915_WRITE(SOUTH_CHICKEN2, tmp);
 
 
-	udelay(24);
+	if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) &
+			       FDI_MPHY_IOSFSB_RESET_STATUS, 100))
+		DRM_ERROR("FDI mPHY reset assert timeout\n");
 
 
-	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
-	tmp &= ~SBI_SSCCTL_PATHALT;
-	intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+	tmp = I915_READ(SOUTH_CHICKEN2);
+	tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL;
+	I915_WRITE(SOUTH_CHICKEN2, tmp);
 
 
-	if (!is_sdv) {
-		tmp = I915_READ(SOUTH_CHICKEN2);
-		tmp |= FDI_MPHY_IOSFSB_RESET_CTL;
-		I915_WRITE(SOUTH_CHICKEN2, tmp);
-
-		if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) &
-				       FDI_MPHY_IOSFSB_RESET_STATUS, 100))
-			DRM_ERROR("FDI mPHY reset assert timeout\n");
-
-		tmp = I915_READ(SOUTH_CHICKEN2);
-		tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL;
-		I915_WRITE(SOUTH_CHICKEN2, tmp);
+	if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) &
+				FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100))
+		DRM_ERROR("FDI mPHY reset de-assert timeout\n");
+}
 
 
-		if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) &
-				        FDI_MPHY_IOSFSB_RESET_STATUS) == 0,
-				       100))
-			DRM_ERROR("FDI mPHY reset de-assert timeout\n");
-	}
+/* WaMPhyProgramming:hsw */
+static void lpt_program_fdi_mphy(struct drm_i915_private *dev_priv)
+{
+	uint32_t tmp;
 
 
 	tmp = intel_sbi_read(dev_priv, 0x8008, SBI_MPHY);
 	tmp = intel_sbi_read(dev_priv, 0x8008, SBI_MPHY);
 	tmp &= ~(0xFF << 24);
 	tmp &= ~(0xFF << 24);
 	tmp |= (0x12 << 24);
 	tmp |= (0x12 << 24);
 	intel_sbi_write(dev_priv, 0x8008, tmp, SBI_MPHY);
 	intel_sbi_write(dev_priv, 0x8008, tmp, SBI_MPHY);
 
 
-	if (is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x800C, SBI_MPHY);
-		tmp |= 0x7FFF;
-		intel_sbi_write(dev_priv, 0x800C, tmp, SBI_MPHY);
-	}
-
 	tmp = intel_sbi_read(dev_priv, 0x2008, SBI_MPHY);
 	tmp = intel_sbi_read(dev_priv, 0x2008, SBI_MPHY);
 	tmp |= (1 << 11);
 	tmp |= (1 << 11);
 	intel_sbi_write(dev_priv, 0x2008, tmp, SBI_MPHY);
 	intel_sbi_write(dev_priv, 0x2008, tmp, SBI_MPHY);
@@ -5242,24 +5227,6 @@ static void lpt_init_pch_refclk(struct drm_device *dev)
 	tmp |= (1 << 11);
 	tmp |= (1 << 11);
 	intel_sbi_write(dev_priv, 0x2108, tmp, SBI_MPHY);
 	intel_sbi_write(dev_priv, 0x2108, tmp, SBI_MPHY);
 
 
-	if (is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x2038, SBI_MPHY);
-		tmp |= (0x3F << 24) | (0xF << 20) | (0xF << 16);
-		intel_sbi_write(dev_priv, 0x2038, tmp, SBI_MPHY);
-
-		tmp = intel_sbi_read(dev_priv, 0x2138, SBI_MPHY);
-		tmp |= (0x3F << 24) | (0xF << 20) | (0xF << 16);
-		intel_sbi_write(dev_priv, 0x2138, tmp, SBI_MPHY);
-
-		tmp = intel_sbi_read(dev_priv, 0x203C, SBI_MPHY);
-		tmp |= (0x3F << 8);
-		intel_sbi_write(dev_priv, 0x203C, tmp, SBI_MPHY);
-
-		tmp = intel_sbi_read(dev_priv, 0x213C, SBI_MPHY);
-		tmp |= (0x3F << 8);
-		intel_sbi_write(dev_priv, 0x213C, tmp, SBI_MPHY);
-	}
-
 	tmp = intel_sbi_read(dev_priv, 0x206C, SBI_MPHY);
 	tmp = intel_sbi_read(dev_priv, 0x206C, SBI_MPHY);
 	tmp |= (1 << 24) | (1 << 21) | (1 << 18);
 	tmp |= (1 << 24) | (1 << 21) | (1 << 18);
 	intel_sbi_write(dev_priv, 0x206C, tmp, SBI_MPHY);
 	intel_sbi_write(dev_priv, 0x206C, tmp, SBI_MPHY);
@@ -5268,17 +5235,15 @@ static void lpt_init_pch_refclk(struct drm_device *dev)
 	tmp |= (1 << 24) | (1 << 21) | (1 << 18);
 	tmp |= (1 << 24) | (1 << 21) | (1 << 18);
 	intel_sbi_write(dev_priv, 0x216C, tmp, SBI_MPHY);
 	intel_sbi_write(dev_priv, 0x216C, tmp, SBI_MPHY);
 
 
-	if (!is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x2080, SBI_MPHY);
-		tmp &= ~(7 << 13);
-		tmp |= (5 << 13);
-		intel_sbi_write(dev_priv, 0x2080, tmp, SBI_MPHY);
+	tmp = intel_sbi_read(dev_priv, 0x2080, SBI_MPHY);
+	tmp &= ~(7 << 13);
+	tmp |= (5 << 13);
+	intel_sbi_write(dev_priv, 0x2080, tmp, SBI_MPHY);
 
 
-		tmp = intel_sbi_read(dev_priv, 0x2180, SBI_MPHY);
-		tmp &= ~(7 << 13);
-		tmp |= (5 << 13);
-		intel_sbi_write(dev_priv, 0x2180, tmp, SBI_MPHY);
-	}
+	tmp = intel_sbi_read(dev_priv, 0x2180, SBI_MPHY);
+	tmp &= ~(7 << 13);
+	tmp |= (5 << 13);
+	intel_sbi_write(dev_priv, 0x2180, tmp, SBI_MPHY);
 
 
 	tmp = intel_sbi_read(dev_priv, 0x208C, SBI_MPHY);
 	tmp = intel_sbi_read(dev_priv, 0x208C, SBI_MPHY);
 	tmp &= ~0xFF;
 	tmp &= ~0xFF;
@@ -5300,34 +5265,120 @@ static void lpt_init_pch_refclk(struct drm_device *dev)
 	tmp |= (0x1C << 16);
 	tmp |= (0x1C << 16);
 	intel_sbi_write(dev_priv, 0x2198, tmp, SBI_MPHY);
 	intel_sbi_write(dev_priv, 0x2198, tmp, SBI_MPHY);
 
 
-	if (!is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x20C4, SBI_MPHY);
-		tmp |= (1 << 27);
-		intel_sbi_write(dev_priv, 0x20C4, tmp, SBI_MPHY);
+	tmp = intel_sbi_read(dev_priv, 0x20C4, SBI_MPHY);
+	tmp |= (1 << 27);
+	intel_sbi_write(dev_priv, 0x20C4, tmp, SBI_MPHY);
+
+	tmp = intel_sbi_read(dev_priv, 0x21C4, SBI_MPHY);
+	tmp |= (1 << 27);
+	intel_sbi_write(dev_priv, 0x21C4, tmp, SBI_MPHY);
+
+	tmp = intel_sbi_read(dev_priv, 0x20EC, SBI_MPHY);
+	tmp &= ~(0xF << 28);
+	tmp |= (4 << 28);
+	intel_sbi_write(dev_priv, 0x20EC, tmp, SBI_MPHY);
+
+	tmp = intel_sbi_read(dev_priv, 0x21EC, SBI_MPHY);
+	tmp &= ~(0xF << 28);
+	tmp |= (4 << 28);
+	intel_sbi_write(dev_priv, 0x21EC, tmp, SBI_MPHY);
+}
+
+/* Implements 3 different sequences from BSpec chapter "Display iCLK
+ * Programming" based on the parameters passed:
+ * - Sequence to enable CLKOUT_DP
+ * - Sequence to enable CLKOUT_DP without spread
+ * - Sequence to enable CLKOUT_DP for FDI usage and configure PCH FDI I/O
+ */
+static void lpt_enable_clkout_dp(struct drm_device *dev, bool with_spread,
+				 bool with_fdi)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t reg, tmp;
+
+	if (WARN(with_fdi && !with_spread, "FDI requires downspread\n"))
+		with_spread = true;
+	if (WARN(dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE &&
+		 with_fdi, "LP PCH doesn't have FDI\n"))
+		with_fdi = false;
 
 
-		tmp = intel_sbi_read(dev_priv, 0x21C4, SBI_MPHY);
-		tmp |= (1 << 27);
-		intel_sbi_write(dev_priv, 0x21C4, tmp, SBI_MPHY);
+	mutex_lock(&dev_priv->dpio_lock);
 
 
-		tmp = intel_sbi_read(dev_priv, 0x20EC, SBI_MPHY);
-		tmp &= ~(0xF << 28);
-		tmp |= (4 << 28);
-		intel_sbi_write(dev_priv, 0x20EC, tmp, SBI_MPHY);
+	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
+	tmp &= ~SBI_SSCCTL_DISABLE;
+	tmp |= SBI_SSCCTL_PATHALT;
+	intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+
+	udelay(24);
 
 
-		tmp = intel_sbi_read(dev_priv, 0x21EC, SBI_MPHY);
-		tmp &= ~(0xF << 28);
-		tmp |= (4 << 28);
-		intel_sbi_write(dev_priv, 0x21EC, tmp, SBI_MPHY);
+	if (with_spread) {
+		tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
+		tmp &= ~SBI_SSCCTL_PATHALT;
+		intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+
+		if (with_fdi) {
+			lpt_reset_fdi_mphy(dev_priv);
+			lpt_program_fdi_mphy(dev_priv);
+		}
 	}
 	}
 
 
-	/* ULT uses SBI_GEN0, but ULT doesn't have VGA, so we don't care. */
-	tmp = intel_sbi_read(dev_priv, SBI_DBUFF0, SBI_ICLK);
-	tmp |= SBI_DBUFF0_ENABLE;
-	intel_sbi_write(dev_priv, SBI_DBUFF0, tmp, SBI_ICLK);
+	reg = (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) ?
+	       SBI_GEN0 : SBI_DBUFF0;
+	tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK);
+	tmp |= SBI_GEN0_CFG_BUFFENABLE_DISABLE;
+	intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK);
+
+	mutex_unlock(&dev_priv->dpio_lock);
+}
+
+/* Sequence to disable CLKOUT_DP */
+static void lpt_disable_clkout_dp(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t reg, tmp;
+
+	mutex_lock(&dev_priv->dpio_lock);
+
+	reg = (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) ?
+	       SBI_GEN0 : SBI_DBUFF0;
+	tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK);
+	tmp &= ~SBI_GEN0_CFG_BUFFENABLE_DISABLE;
+	intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK);
+
+	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
+	if (!(tmp & SBI_SSCCTL_DISABLE)) {
+		if (!(tmp & SBI_SSCCTL_PATHALT)) {
+			tmp |= SBI_SSCCTL_PATHALT;
+			intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+			udelay(32);
+		}
+		tmp |= SBI_SSCCTL_DISABLE;
+		intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+	}
 
 
 	mutex_unlock(&dev_priv->dpio_lock);
 	mutex_unlock(&dev_priv->dpio_lock);
 }
 }
 
 
+static void lpt_init_pch_refclk(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	struct intel_encoder *encoder;
+	bool has_vga = false;
+
+	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
+		switch (encoder->type) {
+		case INTEL_OUTPUT_ANALOG:
+			has_vga = true;
+			break;
+		}
+	}
+
+	if (has_vga)
+		lpt_enable_clkout_dp(dev, true, true);
+	else
+		lpt_disable_clkout_dp(dev);
+}
+
 /*
 /*
  * Initialize reference clocks when the driver loads
  * Initialize reference clocks when the driver loads
  */
  */
@@ -5895,6 +5946,142 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 	return true;
 	return true;
 }
 }
 
 
+static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct intel_ddi_plls *plls = &dev_priv->ddi_plls;
+	struct intel_crtc *crtc;
+	unsigned long irqflags;
+	uint32_t val, pch_hpd_mask;
+
+	pch_hpd_mask = SDE_PORTB_HOTPLUG_CPT | SDE_PORTC_HOTPLUG_CPT;
+	if (!(dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE))
+		pch_hpd_mask |= SDE_PORTD_HOTPLUG_CPT | SDE_CRT_HOTPLUG_CPT;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
+		WARN(crtc->base.enabled, "CRTC for pipe %c enabled\n",
+		     pipe_name(crtc->pipe));
+
+	WARN(I915_READ(HSW_PWR_WELL_DRIVER), "Power well on\n");
+	WARN(plls->spll_refcount, "SPLL enabled\n");
+	WARN(plls->wrpll1_refcount, "WRPLL1 enabled\n");
+	WARN(plls->wrpll2_refcount, "WRPLL2 enabled\n");
+	WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n");
+	WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
+	     "CPU PWM1 enabled\n");
+	WARN(I915_READ(HSW_BLC_PWM2_CTL) & BLM_PWM_ENABLE,
+	     "CPU PWM2 enabled\n");
+	WARN(I915_READ(BLC_PWM_PCH_CTL1) & BLM_PCH_PWM_ENABLE,
+	     "PCH PWM1 enabled\n");
+	WARN(I915_READ(UTIL_PIN_CTL) & UTIL_PIN_ENABLE,
+	     "Utility pin enabled\n");
+	WARN(I915_READ(PCH_GTC_CTL) & PCH_GTC_ENABLE, "PCH GTC enabled\n");
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	val = I915_READ(DEIMR);
+	WARN((val & ~DE_PCH_EVENT_IVB) != val,
+	     "Unexpected DEIMR bits enabled: 0x%x\n", val);
+	val = I915_READ(SDEIMR);
+	WARN((val & ~pch_hpd_mask) != val,
+	     "Unexpected SDEIMR bits enabled: 0x%x\n", val);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+}
+
+/*
+ * This function implements pieces of two sequences from BSpec:
+ * - Sequence for display software to disable LCPLL
+ * - Sequence for display software to allow package C8+
+ * The steps implemented here are just the steps that actually touch the LCPLL
+ * register. Callers should take care of disabling all the display engine
+ * functions, doing the mode unset, fixing interrupts, etc.
+ */
+void hsw_disable_lcpll(struct drm_i915_private *dev_priv,
+		       bool switch_to_fclk, bool allow_power_down)
+{
+	uint32_t val;
+
+	assert_can_disable_lcpll(dev_priv);
+
+	val = I915_READ(LCPLL_CTL);
+
+	if (switch_to_fclk) {
+		val |= LCPLL_CD_SOURCE_FCLK;
+		I915_WRITE(LCPLL_CTL, val);
+
+		if (wait_for_atomic_us(I915_READ(LCPLL_CTL) &
+				       LCPLL_CD_SOURCE_FCLK_DONE, 1))
+			DRM_ERROR("Switching to FCLK failed\n");
+
+		val = I915_READ(LCPLL_CTL);
+	}
+
+	val |= LCPLL_PLL_DISABLE;
+	I915_WRITE(LCPLL_CTL, val);
+	POSTING_READ(LCPLL_CTL);
+
+	if (wait_for((I915_READ(LCPLL_CTL) & LCPLL_PLL_LOCK) == 0, 1))
+		DRM_ERROR("LCPLL still locked\n");
+
+	val = I915_READ(D_COMP);
+	val |= D_COMP_COMP_DISABLE;
+	I915_WRITE(D_COMP, val);
+	POSTING_READ(D_COMP);
+	ndelay(100);
+
+	if (wait_for((I915_READ(D_COMP) & D_COMP_RCOMP_IN_PROGRESS) == 0, 1))
+		DRM_ERROR("D_COMP RCOMP still in progress\n");
+
+	if (allow_power_down) {
+		val = I915_READ(LCPLL_CTL);
+		val |= LCPLL_POWER_DOWN_ALLOW;
+		I915_WRITE(LCPLL_CTL, val);
+		POSTING_READ(LCPLL_CTL);
+	}
+}
+
+/*
+ * Fully restores LCPLL, disallowing power down and switching back to LCPLL
+ * source.
+ */
+void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
+{
+	uint32_t val;
+
+	val = I915_READ(LCPLL_CTL);
+
+	if ((val & (LCPLL_PLL_LOCK | LCPLL_PLL_DISABLE | LCPLL_CD_SOURCE_FCLK |
+		    LCPLL_POWER_DOWN_ALLOW)) == LCPLL_PLL_LOCK)
+		return;
+
+	if (val & LCPLL_POWER_DOWN_ALLOW) {
+		val &= ~LCPLL_POWER_DOWN_ALLOW;
+		I915_WRITE(LCPLL_CTL, val);
+	}
+
+	val = I915_READ(D_COMP);
+	val |= D_COMP_COMP_FORCE;
+	val &= ~D_COMP_COMP_DISABLE;
+	I915_WRITE(D_COMP, val);
+	I915_READ(D_COMP);
+
+	val = I915_READ(LCPLL_CTL);
+	val &= ~LCPLL_PLL_DISABLE;
+	I915_WRITE(LCPLL_CTL, val);
+
+	if (wait_for(I915_READ(LCPLL_CTL) & LCPLL_PLL_LOCK, 5))
+		DRM_ERROR("LCPLL not locked yet\n");
+
+	if (val & LCPLL_CD_SOURCE_FCLK) {
+		val = I915_READ(LCPLL_CTL);
+		val &= ~LCPLL_CD_SOURCE_FCLK;
+		I915_WRITE(LCPLL_CTL, val);
+
+		if (wait_for_atomic_us((I915_READ(LCPLL_CTL) &
+					LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
+			DRM_ERROR("Switching back to LCPLL failed\n");
+	}
+}
+
 static void haswell_modeset_global_resources(struct drm_device *dev)
 static void haswell_modeset_global_resources(struct drm_device *dev)
 {
 {
 	bool enable = false;
 	bool enable = false;
@@ -8434,6 +8621,8 @@ check_shared_dpll_state(struct drm_device *dev)
 		     pll->active, pll->refcount);
 		     pll->active, pll->refcount);
 		WARN(pll->active && !pll->on,
 		WARN(pll->active && !pll->on,
 		     "pll in active use but not on in sw tracking\n");
 		     "pll in active use but not on in sw tracking\n");
+		WARN(pll->on && !pll->active,
+		     "pll in on but not on in use in sw tracking\n");
 		WARN(pll->on != active,
 		WARN(pll->on != active,
 		     "pll on state mismatch (expected %i, found %i)\n",
 		     "pll on state mismatch (expected %i, found %i)\n",
 		     pll->on, active);
 		     pll->on, active);
@@ -8658,15 +8847,20 @@ static void intel_set_config_restore_state(struct drm_device *dev,
 }
 }
 
 
 static bool
 static bool
-is_crtc_connector_off(struct drm_crtc *crtc, struct drm_connector *connectors,
-		      int num_connectors)
+is_crtc_connector_off(struct drm_mode_set *set)
 {
 {
 	int i;
 	int i;
 
 
-	for (i = 0; i < num_connectors; i++)
-		if (connectors[i].encoder &&
-		    connectors[i].encoder->crtc == crtc &&
-		    connectors[i].dpms != DRM_MODE_DPMS_ON)
+	if (set->num_connectors == 0)
+		return false;
+
+	if (WARN_ON(set->connectors == NULL))
+		return false;
+
+	for (i = 0; i < set->num_connectors; i++)
+		if (set->connectors[i]->encoder &&
+		    set->connectors[i]->encoder->crtc == set->crtc &&
+		    set->connectors[i]->dpms != DRM_MODE_DPMS_ON)
 			return true;
 			return true;
 
 
 	return false;
 	return false;
@@ -8679,10 +8873,8 @@ intel_set_config_compute_mode_changes(struct drm_mode_set *set,
 
 
 	/* We should be able to check here if the fb has the same properties
 	/* We should be able to check here if the fb has the same properties
 	 * and then just flip_or_move it */
 	 * and then just flip_or_move it */
-	if (set->connectors != NULL &&
-	    is_crtc_connector_off(set->crtc, *set->connectors,
-				  set->num_connectors)) {
-			config->mode_changed = true;
+	if (is_crtc_connector_off(set)) {
+		config->mode_changed = true;
 	} else if (set->crtc->fb != set->fb) {
 	} else if (set->crtc->fb != set->fb) {
 		/* If we have no fb then treat it as a full mode set */
 		/* If we have no fb then treat it as a full mode set */
 		if (set->crtc->fb == NULL) {
 		if (set->crtc->fb == NULL) {
@@ -9437,9 +9629,12 @@ static void intel_init_display(struct drm_device *dev)
 	else if (IS_I915G(dev))
 	else if (IS_I915G(dev))
 		dev_priv->display.get_display_clock_speed =
 		dev_priv->display.get_display_clock_speed =
 			i915_get_display_clock_speed;
 			i915_get_display_clock_speed;
-	else if (IS_I945GM(dev) || IS_845G(dev) || IS_PINEVIEW_M(dev))
+	else if (IS_I945GM(dev) || IS_845G(dev))
 		dev_priv->display.get_display_clock_speed =
 		dev_priv->display.get_display_clock_speed =
 			i9xx_misc_get_display_clock_speed;
 			i9xx_misc_get_display_clock_speed;
+	else if (IS_PINEVIEW(dev))
+		dev_priv->display.get_display_clock_speed =
+			pnv_get_display_clock_speed;
 	else if (IS_I915GM(dev))
 	else if (IS_I915GM(dev))
 		dev_priv->display.get_display_clock_speed =
 		dev_priv->display.get_display_clock_speed =
 			i915gm_get_display_clock_speed;
 			i915gm_get_display_clock_speed;
@@ -9536,6 +9731,17 @@ static void quirk_invert_brightness(struct drm_device *dev)
 	DRM_INFO("applying inverted panel brightness quirk\n");
 	DRM_INFO("applying inverted panel brightness quirk\n");
 }
 }
 
 
+/*
+ * Some machines (Dell XPS13) suffer broken backlight controls if
+ * BLM_PCH_PWM_ENABLE is set.
+ */
+static void quirk_no_pcm_pwm_enable(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	dev_priv->quirks |= QUIRK_NO_PCH_PWM_ENABLE;
+	DRM_INFO("applying no-PCH_PWM_ENABLE quirk\n");
+}
+
 struct intel_quirk {
 struct intel_quirk {
 	int device;
 	int device;
 	int subsystem_vendor;
 	int subsystem_vendor;
@@ -9605,6 +9811,11 @@ static struct intel_quirk intel_quirks[] = {
 
 
 	/* Acer Aspire 4736Z */
 	/* Acer Aspire 4736Z */
 	{ 0x2a42, 0x1025, 0x0260, quirk_invert_brightness },
 	{ 0x2a42, 0x1025, 0x0260, quirk_invert_brightness },
+
+	/* Dell XPS13 HD Sandy Bridge */
+	{ 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable },
+	/* Dell XPS13 HD and XPS13 FHD Ivy Bridge */
+	{ 0x0166, 0x1028, 0x058b, quirk_no_pcm_pwm_enable },
 };
 };
 
 
 static void intel_init_quirks(struct drm_device *dev)
 static void intel_init_quirks(struct drm_device *dev)
@@ -9955,8 +10166,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 		}
 		}
 		pll->refcount = pll->active;
 		pll->refcount = pll->active;
 
 
-		DRM_DEBUG_KMS("%s hw state readout: refcount %i\n",
-			      pll->name, pll->refcount);
+		DRM_DEBUG_KMS("%s hw state readout: refcount %i, on %i\n",
+			      pll->name, pll->refcount, pll->on);
 	}
 	}
 
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
@@ -10016,6 +10227,7 @@ void intel_modeset_setup_hw_state(struct drm_device *dev,
 	struct drm_plane *plane;
 	struct drm_plane *plane;
 	struct intel_crtc *crtc;
 	struct intel_crtc *crtc;
 	struct intel_encoder *encoder;
 	struct intel_encoder *encoder;
+	int i;
 
 
 	intel_modeset_readout_hw_state(dev);
 	intel_modeset_readout_hw_state(dev);
 
 
@@ -10047,6 +10259,18 @@ void intel_modeset_setup_hw_state(struct drm_device *dev,
 		intel_dump_pipe_config(crtc, &crtc->config, "[setup_hw_state]");
 		intel_dump_pipe_config(crtc, &crtc->config, "[setup_hw_state]");
 	}
 	}
 
 
+	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+		struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i];
+
+		if (!pll->on || pll->active)
+			continue;
+
+		DRM_DEBUG_KMS("%s enabled but not in use, disabling\n", pll->name);
+
+		pll->disable(dev_priv, pll);
+		pll->on = false;
+	}
+
 	if (force_restore) {
 	if (force_restore) {
 		/*
 		/*
 		 * We need to use raw interfaces for restoring state to avoid
 		 * We need to use raw interfaces for restoring state to avoid
@@ -10256,8 +10480,7 @@ intel_display_capture_error_state(struct drm_device *dev)
 	 * well was on, so here we have to clear the FPGA_DBG_RM_NOCLAIM bit to
 	 * well was on, so here we have to clear the FPGA_DBG_RM_NOCLAIM bit to
 	 * prevent the next I915_WRITE from detecting it and printing an error
 	 * prevent the next I915_WRITE from detecting it and printing an error
 	 * message. */
 	 * message. */
-	if (HAS_POWER_WELL(dev))
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+	intel_uncore_clear_errors(dev);
 
 
 	return error;
 	return error;
 }
 }

Bu fark içinde çok fazla dosya değişikliği olduğu için bazı dosyalar gösterilmiyor