瀏覽代碼

Merge tag 'drm-intel-next-2015-05-22' of git://anongit.freedesktop.org/drm-intel into drm-next

- cpt modeset sequence fixes from Ville
- more rps boosting tuning from Chris
- S3 support for skl (Damien)
- a pile of w/a for bxt from various people
- cleanup of primary plane pixel formats (Damien)
- a big pile of small patches with fixes and cleanups all over

* tag 'drm-intel-next-2015-05-22' of git://anongit.freedesktop.org/drm-intel: (90 commits)
  drm/i915: Update DRIVER_DATE to 20150522
  drm/i915: Introduce DRM_I915_THROTTLE_JIFFIES
  drm/i915: Use the correct destructor for freeing requests on error
  drm/i915/skl: don't fail colorkey + scaler request
  drm/i915: Enable GTT caching on gen8
  drm/i915: Move WaProgramL3SqcReg1Default:bdw to init_clock_gating()
  drm/i915: Use ilk_init_lp_watermarks() on BDW
  drm/i915: Disable FDI RX/TX before the ports
  drm/i915: Disable CRT port after pipe on PCH platforms
  drm/i915: Disable SDVO port after the pipe on PCH platforms
  drm/i915: Disable HDMI port after the pipe on PCH platforms
  drm/i915: Fix the IBX transcoder B workarounds
  drm/i915: Write the SDVO reg twice on IBX
  drm/i915: Fix DP enhanced framing for CPT
  drm/i915: Clean up the CPT DP .get_hw_state() port readout
  drm/i915: Clarfify the DP code platform checks
  drm/i915: Remove the double register write from intel_disable_hdmi()
  drm/i915: Remove a bogus 12bpc "toggle" from intel_disable_hdmi()
  drm/i915/skl: Deinit/init the display at suspend/resume
  drm/i915: Free RPS boosts for all laggards
  ...
Dave Airlie 10 年之前
父節點
當前提交
c99d153013
共有 34 個文件被更改,包括 1667 次插入978 次删除
  1. 6 1
      Documentation/DocBook/drm.tmpl
  2. 8 0
      drivers/gpu/drm/i915/Kconfig
  3. 5 0
      drivers/gpu/drm/i915/Kconfig.debug
  4. 2 0
      drivers/gpu/drm/i915/Makefile
  5. 50 14
      drivers/gpu/drm/i915/i915_debugfs.c
  6. 15 9
      drivers/gpu/drm/i915/i915_drv.c
  7. 57 28
      drivers/gpu/drm/i915/i915_drv.h
  8. 374 264
      drivers/gpu/drm/i915/i915_gem.c
  9. 0 2
      drivers/gpu/drm/i915/i915_gem_context.c
  10. 22 70
      drivers/gpu/drm/i915/i915_gem_debug.c
  11. 7 3
      drivers/gpu/drm/i915/i915_gem_execbuffer.c
  12. 1 1
      drivers/gpu/drm/i915/i915_gem_gtt.c
  13. 7 4
      drivers/gpu/drm/i915/i915_gem_userptr.c
  14. 13 6
      drivers/gpu/drm/i915/i915_gpu_error.c
  15. 21 7
      drivers/gpu/drm/i915/i915_irq.c
  16. 24 0
      drivers/gpu/drm/i915/i915_reg.h
  17. 81 69
      drivers/gpu/drm/i915/intel_bios.c
  18. 14 1
      drivers/gpu/drm/i915/intel_crt.c
  19. 53 0
      drivers/gpu/drm/i915/intel_csr.c
  20. 83 37
      drivers/gpu/drm/i915/intel_ddi.c
  21. 401 128
      drivers/gpu/drm/i915/intel_display.c
  22. 67 68
      drivers/gpu/drm/i915/intel_dp.c
  23. 16 7
      drivers/gpu/drm/i915/intel_drv.h
  24. 26 40
      drivers/gpu/drm/i915/intel_dsi_pll.c
  25. 47 42
      drivers/gpu/drm/i915/intel_hdmi.c
  26. 21 0
      drivers/gpu/drm/i915/intel_i2c.c
  27. 20 14
      drivers/gpu/drm/i915/intel_lrc.c
  28. 0 2
      drivers/gpu/drm/i915/intel_overlay.c
  29. 130 83
      drivers/gpu/drm/i915/intel_pm.c
  30. 25 25
      drivers/gpu/drm/i915/intel_ringbuffer.c
  31. 2 2
      drivers/gpu/drm/i915/intel_runtime_pm.c
  32. 41 33
      drivers/gpu/drm/i915/intel_sdvo.c
  33. 26 14
      drivers/gpu/drm/i915/intel_sprite.c
  34. 2 4
      include/drm/i915_pciids.h

+ 6 - 1
Documentation/DocBook/drm.tmpl

@@ -4153,6 +4153,12 @@ int num_ioctls;</synopsis>
 	  </tgroup>
 	</table>
       </sect2>
+
+      <sect2>
+       <title>CSR firmware support for DMC</title>
+!Pdrivers/gpu/drm/i915/intel_csr.c csr support for dmc
+!Idrivers/gpu/drm/i915/intel_csr.c
+      </sect2>
     </sect1>
 
     <sect1>
@@ -4204,7 +4210,6 @@ int num_ioctls;</synopsis>
 !Idrivers/gpu/drm/i915/i915_gem_shrinker.c
       </sect2>
     </sect1>
-
     <sect1>
       <title> Tracing </title>
       <para>

+ 8 - 0
drivers/gpu/drm/i915/Kconfig

@@ -71,3 +71,11 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
 	  option changes the default for that module option.
 
 	  If in doubt, say "N".
+
+menu "DRM i915 Debugging"
+
+depends on DRM_I915
+
+source drivers/gpu/drm/i915/Kconfig.debug
+
+endmenu

+ 5 - 0
drivers/gpu/drm/i915/Kconfig.debug

@@ -0,0 +1,5 @@
+config DRM_I915_WERROR
+	bool "Force GCC to throw an error instead of a warning when compiling"
+	default n
+	---help---
+	  Add -Werror to the build flags for (and only for) i915.ko

+ 2 - 0
drivers/gpu/drm/i915/Makefile

@@ -2,6 +2,8 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
+subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror
+
 # Please keep these build lists sorted!
 
 # core driver code

+ 50 - 14
drivers/gpu/drm/i915/i915_debugfs.c

@@ -120,10 +120,13 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
 static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct intel_engine_cs *ring;
 	struct i915_vma *vma;
 	int pin_count = 0;
+	int i;
 
-	seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
+	seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x [ ",
 		   &obj->base,
 		   obj->active ? "*" : " ",
 		   get_pin_flag(obj),
@@ -131,8 +134,11 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   get_global_flag(obj),
 		   obj->base.size / 1024,
 		   obj->base.read_domains,
-		   obj->base.write_domain,
-		   i915_gem_request_get_seqno(obj->last_read_req),
+		   obj->base.write_domain);
+	for_each_ring(ring, dev_priv, i)
+		seq_printf(m, "%x ",
+				i915_gem_request_get_seqno(obj->last_read_req[i]));
+	seq_printf(m, "] %x %x%s%s%s",
 		   i915_gem_request_get_seqno(obj->last_write_req),
 		   i915_gem_request_get_seqno(obj->last_fenced_req),
 		   i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
@@ -169,9 +175,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		*t = '\0';
 		seq_printf(m, " (%s mappable)", s);
 	}
-	if (obj->last_read_req != NULL)
+	if (obj->last_write_req != NULL)
 		seq_printf(m, " (%s)",
-			   i915_gem_request_get_ring(obj->last_read_req)->name);
+			   i915_gem_request_get_ring(obj->last_write_req)->name);
 	if (obj->frontbuffer_bits)
 		seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
@@ -665,7 +671,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
-	struct drm_i915_gem_request *rq;
+	struct drm_i915_gem_request *req;
 	int ret, any, i;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -677,22 +683,22 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 		int count;
 
 		count = 0;
-		list_for_each_entry(rq, &ring->request_list, list)
+		list_for_each_entry(req, &ring->request_list, list)
 			count++;
 		if (count == 0)
 			continue;
 
 		seq_printf(m, "%s requests: %d\n", ring->name, count);
-		list_for_each_entry(rq, &ring->request_list, list) {
+		list_for_each_entry(req, &ring->request_list, list) {
 			struct task_struct *task;
 
 			rcu_read_lock();
 			task = NULL;
-			if (rq->pid)
-				task = pid_task(rq->pid, PIDTYPE_PID);
+			if (req->pid)
+				task = pid_task(req->pid, PIDTYPE_PID);
 			seq_printf(m, "    %x @ %d: %s [%d]\n",
-				   rq->seqno,
-				   (int) (jiffies - rq->emitted_jiffies),
+				   req->seqno,
+				   (int) (jiffies - req->emitted_jiffies),
 				   task ? task->comm : "<unknown>",
 				   task ? task->pid : -1);
 			rcu_read_unlock();
@@ -2276,6 +2282,18 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int count_irq_waiters(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *ring;
+	int count = 0;
+	int i;
+
+	for_each_ring(ring, i915, i)
+		count += ring->irq_refcount;
+
+	return count;
+}
+
 static int i915_rps_boost_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = m->private;
@@ -2292,6 +2310,15 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	if (ret)
 		goto unlock;
 
+	seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled);
+	seq_printf(m, "GPU busy? %d\n", dev_priv->mm.busy);
+	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
+	seq_printf(m, "Frequency requested %d; min hard:%d, soft:%d; max soft:%d, hard:%d\n",
+		   intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
+		   intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
+		   intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit),
+		   intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit),
+		   intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
 	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
 		struct drm_i915_file_private *file_priv = file->driver_priv;
 		struct task_struct *task;
@@ -2301,10 +2328,16 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 		seq_printf(m, "%s [%d]: %d boosts%s\n",
 			   task ? task->comm : "<unknown>",
 			   task ? task->pid : -1,
-			   file_priv->rps_boosts,
-			   list_empty(&file_priv->rps_boost) ? "" : ", active");
+			   file_priv->rps.boosts,
+			   list_empty(&file_priv->rps.link) ? "" : ", active");
 		rcu_read_unlock();
 	}
+	seq_printf(m, "Semaphore boosts: %d%s\n",
+		   dev_priv->rps.semaphores.boosts,
+		   list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active");
+	seq_printf(m, "MMIO flip boosts: %d%s\n",
+		   dev_priv->rps.mmioflips.boosts,
+		   list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active");
 	seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
 
 	mutex_unlock(&dev_priv->rps.hw_lock);
@@ -5154,6 +5187,9 @@ static int i915_dpcd_show(struct seq_file *m, void *data)
 	ssize_t err;
 	int i;
 
+	if (connector->status != connector_status_connected)
+		return -ENODEV;
+
 	for (i = 0; i < ARRAY_SIZE(i915_dpcd_debug); i++) {
 		const struct dpcd_block *b = &i915_dpcd_debug[i];
 		size_t size = b->end ? b->end - b->offset + 1 : (b->size ?: 1);

+ 15 - 9
drivers/gpu/drm/i915/i915_drv.c

@@ -595,6 +595,7 @@ static int intel_suspend_complete(struct drm_i915_private *dev_priv);
 static int vlv_resume_prepare(struct drm_i915_private *dev_priv,
 			      bool rpm_resume);
 static int skl_resume_prepare(struct drm_i915_private *dev_priv);
+static int bxt_resume_prepare(struct drm_i915_private *dev_priv);
 
 
 static int i915_drm_suspend(struct drm_device *dev)
@@ -811,14 +812,17 @@ static int i915_drm_resume_early(struct drm_device *dev)
 	if (IS_VALLEYVIEW(dev_priv))
 		ret = vlv_resume_prepare(dev_priv, false);
 	if (ret)
-		DRM_ERROR("Resume prepare failed: %d,Continuing resume\n", ret);
+		DRM_ERROR("Resume prepare failed: %d, continuing anyway\n",
+			  ret);
 
 	intel_uncore_early_sanitize(dev, true);
 
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		hsw_disable_pc8(dev_priv);
+	if (IS_BROXTON(dev))
+		ret = bxt_resume_prepare(dev_priv);
 	else if (IS_SKYLAKE(dev_priv))
 		ret = skl_resume_prepare(dev_priv);
+	else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+		hsw_disable_pc8(dev_priv);
 
 	intel_uncore_sanitize(dev);
 	intel_power_domains_init_hw(dev_priv);
@@ -989,7 +993,7 @@ static int i915_pm_suspend_late(struct device *dev)
 	struct drm_device *drm_dev = dev_to_i915(dev)->dev;
 
 	/*
-	 * We have a suspedn ordering issue with the snd-hda driver also
+	 * We have a suspend ordering issue with the snd-hda driver also
 	 * requiring our device to be power up. Due to the lack of a
 	 * parent/child relationship we currently solve this with an late
 	 * suspend hook.
@@ -1043,6 +1047,8 @@ static int skl_suspend_complete(struct drm_i915_private *dev_priv)
 	 */
 	intel_csr_load_status_set(dev_priv, FW_UNINITIALIZED);
 
+	skl_uninit_cdclk(dev_priv);
+
 	return 0;
 }
 
@@ -1089,6 +1095,7 @@ static int skl_resume_prepare(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
 
+	skl_init_cdclk(dev_priv);
 	intel_csr_load_program(dev);
 
 	return 0;
@@ -1586,16 +1593,15 @@ static int intel_runtime_resume(struct device *device)
  */
 static int intel_suspend_complete(struct drm_i915_private *dev_priv)
 {
-	struct drm_device *dev = dev_priv->dev;
 	int ret;
 
-	if (IS_BROXTON(dev))
+	if (IS_BROXTON(dev_priv))
 		ret = bxt_suspend_complete(dev_priv);
-	else if (IS_SKYLAKE(dev))
+	else if (IS_SKYLAKE(dev_priv))
 		ret = skl_suspend_complete(dev_priv);
-	else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+	else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		ret = hsw_suspend_complete(dev_priv);
-	else if (IS_VALLEYVIEW(dev))
+	else if (IS_VALLEYVIEW(dev_priv))
 		ret = vlv_suspend_complete(dev_priv);
 	else
 		ret = 0;

+ 57 - 28
drivers/gpu/drm/i915/i915_drv.h

@@ -56,7 +56,7 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20150508"
+#define DRIVER_DATE		"20150522"
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -272,6 +272,30 @@ struct drm_i915_private;
 struct i915_mm_struct;
 struct i915_mmu_object;
 
+struct drm_i915_file_private {
+	struct drm_i915_private *dev_priv;
+	struct drm_file *file;
+
+	struct {
+		spinlock_t lock;
+		struct list_head request_list;
+/* 20ms is a fairly arbitrary limit (greater than the average frame time)
+ * chosen to prevent the CPU getting more than a frame ahead of the GPU
+ * (when using lax throttling for the frontbuffer). We also use it to
+ * offer free GPU waitboosts for severely congested workloads.
+ */
+#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
+	} mm;
+	struct idr context_idr;
+
+	struct intel_rps_client {
+		struct list_head link;
+		unsigned boosts;
+	} rps;
+
+	struct intel_engine_cs *bsd_ring;
+};
+
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
 	/* real shared dpll ids must be >= 0 */
@@ -309,7 +333,7 @@ struct intel_dpll_hw_state {
 	uint32_t cfgcr1, cfgcr2;
 
 	/* bxt */
-	uint32_t ebb0, pll0, pll1, pll2, pll3, pll6, pll8, pcsdw12;
+	uint32_t ebb0, pll0, pll1, pll2, pll3, pll6, pll8, pll10, pcsdw12;
 };
 
 struct intel_shared_dpll_config {
@@ -508,7 +532,7 @@ struct drm_i915_error_state {
 	struct drm_i915_error_buffer {
 		u32 size;
 		u32 name;
-		u32 rseqno, wseqno;
+		u32 rseqno[I915_NUM_RINGS], wseqno;
 		u32 gtt_offset;
 		u32 read_domains;
 		u32 write_domain;
@@ -1070,6 +1094,8 @@ struct intel_gen6_power_mgmt {
 	struct list_head clients;
 	unsigned boosts;
 
+	struct intel_rps_client semaphores, mmioflips;
+
 	/* manual wa residency calculations */
 	struct intel_rps_ei up_ei, down_ei;
 
@@ -1468,7 +1494,8 @@ static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1,
 
 struct skl_ddb_allocation {
 	struct skl_ddb_entry pipe[I915_MAX_PIPES];
-	struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES];
+	struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; /* packed/uv */
+	struct skl_ddb_entry y_plane[I915_MAX_PIPES][I915_MAX_PLANES]; /* y-plane */
 	struct skl_ddb_entry cursor[I915_MAX_PIPES];
 };
 
@@ -1684,6 +1711,7 @@ struct drm_i915_private {
 	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
 
 	unsigned int fsb_freq, mem_freq, is_ddr3;
+	unsigned int skl_boot_cdclk;
 	unsigned int cdclk_freq;
 	unsigned int hpll_freq;
 
@@ -1938,7 +1966,7 @@ struct drm_i915_gem_object {
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 
-	struct list_head ring_list;
+	struct list_head ring_list[I915_NUM_RINGS];
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
@@ -1949,7 +1977,7 @@ struct drm_i915_gem_object {
 	 * rendering and so a non-zero seqno), and is not set if it i s on
 	 * inactive (ready to be unbound) list.
 	 */
-	unsigned int active:1;
+	unsigned int active:I915_NUM_RINGS;
 
 	/**
 	 * This is set if the object has been written to since last bound
@@ -2020,8 +2048,17 @@ struct drm_i915_gem_object {
 	void *dma_buf_vmapping;
 	int vmapping_count;
 
-	/** Breadcrumb of last rendering to the buffer. */
-	struct drm_i915_gem_request *last_read_req;
+	/** Breadcrumb of last rendering to the buffer.
+	 * There can only be one writer, but we allow for multiple readers.
+	 * If there is a writer that necessarily implies that all other
+	 * read requests are complete - but we may only be lazily clearing
+	 * the read requests. A read request is naturally the most recent
+	 * request on a ring, so we may have two different write and read
+	 * requests on one ring where the write request is older than the
+	 * read request. This allows for the CPU to read from an active
+	 * buffer by only waiting for the write to complete.
+	 * */
+	struct drm_i915_gem_request *last_read_req[I915_NUM_RINGS];
 	struct drm_i915_gem_request *last_write_req;
 	/** Breadcrumb of last fenced GPU access to the buffer. */
 	struct drm_i915_gem_request *last_fenced_req;
@@ -2160,10 +2197,12 @@ i915_gem_request_get_ring(struct drm_i915_gem_request *req)
 	return req ? req->ring : NULL;
 }
 
-static inline void
+static inline struct drm_i915_gem_request *
 i915_gem_request_reference(struct drm_i915_gem_request *req)
 {
-	kref_get(&req->ref);
+	if (req)
+		kref_get(&req->ref);
+	return req;
 }
 
 static inline void
@@ -2204,22 +2243,6 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
  * a later patch when the call to i915_seqno_passed() is obsoleted...
  */
 
-struct drm_i915_file_private {
-	struct drm_i915_private *dev_priv;
-	struct drm_file *file;
-
-	struct {
-		spinlock_t lock;
-		struct list_head request_list;
-	} mm;
-	struct idr context_idr;
-
-	struct list_head rps_boost;
-	struct intel_engine_cs *bsd_ring;
-
-	unsigned rps_boosts;
-};
-
 /*
  * A command that requires special handling by the command parser.
  */
@@ -2375,6 +2398,7 @@ struct drm_i915_cmd_table {
 #define SKL_REVID_C0		(0x2)
 #define SKL_REVID_D0		(0x3)
 #define SKL_REVID_E0		(0x4)
+#define SKL_REVID_F0		(0x5)
 
 #define BXT_REVID_A0		(0x0)
 #define BXT_REVID_B0		(0x3)
@@ -2445,6 +2469,9 @@ struct drm_i915_cmd_table {
 
 #define HAS_IPS(dev)		(IS_HSW_ULT(dev) || IS_BROADWELL(dev))
 
+#define HAS_DP_MST(dev)		(IS_HASWELL(dev) || IS_BROADWELL(dev) || \
+				 INTEL_INFO(dev)->gen >= 9)
+
 #define HAS_DDI(dev)		(INTEL_INFO(dev)->has_ddi)
 #define HAS_FPGA_DBG_UNCLAIMED(dev)	(INTEL_INFO(dev)->has_fpga_dbg)
 #define HAS_PSR(dev)		(IS_HASWELL(dev) || IS_BROADWELL(dev) || \
@@ -2820,7 +2847,6 @@ static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv)
 
 void i915_gem_reset(struct drm_device *dev);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
-int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int i915_gem_init_rings(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
@@ -2838,10 +2864,13 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 			unsigned reset_counter,
 			bool interruptible,
 			s64 *timeout,
-			struct drm_i915_file_private *file_priv);
+			struct intel_rps_client *rps);
 int __must_check i915_wait_request(struct drm_i915_gem_request *req);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+			       bool readonly);
+int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
 				  bool write);
 int __must_check

+ 374 - 264
drivers/gpu/drm/i915/i915_gem.c

@@ -38,14 +38,14 @@
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
 
+#define RQ_BUG_ON(expr)
+
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
-static __must_check int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
-			       bool readonly);
 static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj);
-
+i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
+static void
+i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
 static void i915_gem_write_fence(struct drm_device *dev, int reg,
 				 struct drm_i915_gem_object *obj);
 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
@@ -518,8 +518,6 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 		ret = i915_gem_object_wait_rendering(obj, true);
 		if (ret)
 			return ret;
-
-		i915_gem_object_retire(obj);
 	}
 
 	ret = i915_gem_object_get_pages(obj);
@@ -939,8 +937,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		ret = i915_gem_object_wait_rendering(obj, false);
 		if (ret)
 			return ret;
-
-		i915_gem_object_retire(obj);
 	}
 	/* Same trick applies to invalidate partially written cachelines read
 	 * before writing. */
@@ -1181,16 +1177,16 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
 	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
 }
 
-static int __i915_spin_request(struct drm_i915_gem_request *rq)
+static int __i915_spin_request(struct drm_i915_gem_request *req)
 {
 	unsigned long timeout;
 
-	if (i915_gem_request_get_ring(rq)->irq_refcount)
+	if (i915_gem_request_get_ring(req)->irq_refcount)
 		return -EBUSY;
 
 	timeout = jiffies + 1;
 	while (!need_resched()) {
-		if (i915_gem_request_completed(rq, true))
+		if (i915_gem_request_completed(req, true))
 			return 0;
 
 		if (time_after_eq(jiffies, timeout))
@@ -1198,7 +1194,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *rq)
 
 		cpu_relax_lowlatency();
 	}
-	if (i915_gem_request_completed(rq, false))
+	if (i915_gem_request_completed(req, false))
 		return 0;
 
 	return -EAGAIN;
@@ -1225,7 +1221,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 			unsigned reset_counter,
 			bool interruptible,
 			s64 *timeout,
-			struct drm_i915_file_private *file_priv)
+			struct intel_rps_client *rps)
 {
 	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
 	struct drm_device *dev = ring->dev;
@@ -1239,14 +1235,17 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 
 	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
 
+	if (list_empty(&req->list))
+		return 0;
+
 	if (i915_gem_request_completed(req, true))
 		return 0;
 
 	timeout_expire = timeout ?
 		jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
 
-	if (INTEL_INFO(dev)->gen >= 6)
-		gen6_rps_boost(dev_priv, file_priv);
+	if (INTEL_INFO(dev_priv)->gen >= 6)
+		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
 
 	/* Record current time in case interrupted by signal, or wedged */
 	trace_i915_gem_request_wait_begin(req);
@@ -1338,6 +1337,63 @@ out:
 	return ret;
 }
 
+static inline void
+i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
+{
+	struct drm_i915_file_private *file_priv = request->file_priv;
+
+	if (!file_priv)
+		return;
+
+	spin_lock(&file_priv->mm.lock);
+	list_del(&request->client_list);
+	request->file_priv = NULL;
+	spin_unlock(&file_priv->mm.lock);
+}
+
+static void i915_gem_request_retire(struct drm_i915_gem_request *request)
+{
+	trace_i915_gem_request_retire(request);
+
+	/* We know the GPU must have read the request to have
+	 * sent us the seqno + interrupt, so use the position
+	 * of tail of the request to update the last known position
+	 * of the GPU head.
+	 *
+	 * Note this requires that we are always called in request
+	 * completion order.
+	 */
+	request->ringbuf->last_retired_head = request->postfix;
+
+	list_del_init(&request->list);
+	i915_gem_request_remove_from_client(request);
+
+	put_pid(request->pid);
+
+	i915_gem_request_unreference(request);
+}
+
+static void
+__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
+{
+	struct intel_engine_cs *engine = req->ring;
+	struct drm_i915_gem_request *tmp;
+
+	lockdep_assert_held(&engine->dev->struct_mutex);
+
+	if (list_empty(&req->list))
+		return;
+
+	do {
+		tmp = list_first_entry(&engine->request_list,
+				       typeof(*tmp), list);
+
+		i915_gem_request_retire(tmp);
+	} while (tmp != req);
+
+	WARN_ON(i915_verify_lists(engine->dev));
+}
+
 /**
  * Waits for a request to be signaled, and cleans up the
  * request and object lists appropriately for that event.
@@ -1348,7 +1404,6 @@ i915_wait_request(struct drm_i915_gem_request *req)
 	struct drm_device *dev;
 	struct drm_i915_private *dev_priv;
 	bool interruptible;
-	unsigned reset_counter;
 	int ret;
 
 	BUG_ON(req == NULL);
@@ -1367,29 +1422,13 @@ i915_wait_request(struct drm_i915_gem_request *req)
 	if (ret)
 		return ret;
 
-	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
-	i915_gem_request_reference(req);
-	ret = __i915_wait_request(req, reset_counter,
+	ret = __i915_wait_request(req,
+				  atomic_read(&dev_priv->gpu_error.reset_counter),
 				  interruptible, NULL, NULL);
-	i915_gem_request_unreference(req);
-	return ret;
-}
-
-static int
-i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj)
-{
-	if (!obj->active)
-		return 0;
-
-	/* Manually manage the write flush as we may have not yet
-	 * retired the buffer.
-	 *
-	 * Note that the last_write_req is always the earlier of
-	 * the two (read/write) requests, so if we haved successfully waited,
-	 * we know we have passed the last write.
-	 */
-	i915_gem_request_assign(&obj->last_write_req, NULL);
+	if (ret)
+		return ret;
 
+	__i915_gem_request_retire__upto(req);
 	return 0;
 }
 
@@ -1397,22 +1436,56 @@ i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj)
  * Ensures that all rendering to the object has completed and the object is
  * safe to unbind from the GTT or access from the CPU.
  */
-static __must_check int
+int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly)
 {
-	struct drm_i915_gem_request *req;
-	int ret;
+	int ret, i;
 
-	req = readonly ? obj->last_write_req : obj->last_read_req;
-	if (!req)
+	if (!obj->active)
 		return 0;
 
-	ret = i915_wait_request(req);
-	if (ret)
-		return ret;
+	if (readonly) {
+		if (obj->last_write_req != NULL) {
+			ret = i915_wait_request(obj->last_write_req);
+			if (ret)
+				return ret;
 
-	return i915_gem_object_wait_rendering__tail(obj);
+			i = obj->last_write_req->ring->id;
+			if (obj->last_read_req[i] == obj->last_write_req)
+				i915_gem_object_retire__read(obj, i);
+			else
+				i915_gem_object_retire__write(obj);
+		}
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			if (obj->last_read_req[i] == NULL)
+				continue;
+
+			ret = i915_wait_request(obj->last_read_req[i]);
+			if (ret)
+				return ret;
+
+			i915_gem_object_retire__read(obj, i);
+		}
+		RQ_BUG_ON(obj->active);
+	}
+
+	return 0;
+}
+
+static void
+i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
+			       struct drm_i915_gem_request *req)
+{
+	int ring = req->ring->id;
+
+	if (obj->last_read_req[ring] == req)
+		i915_gem_object_retire__read(obj, ring);
+	else if (obj->last_write_req == req)
+		i915_gem_object_retire__write(obj);
+
+	__i915_gem_request_retire__upto(req);
 }
 
 /* A nonblocking variant of the above wait. This is a highly dangerous routine
@@ -1420,40 +1493,75 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
  */
 static __must_check int
 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
-					    struct drm_i915_file_private *file_priv,
+					    struct intel_rps_client *rps,
 					    bool readonly)
 {
-	struct drm_i915_gem_request *req;
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_request *requests[I915_NUM_RINGS];
 	unsigned reset_counter;
-	int ret;
+	int ret, i, n = 0;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 	BUG_ON(!dev_priv->mm.interruptible);
 
-	req = readonly ? obj->last_write_req : obj->last_read_req;
-	if (!req)
+	if (!obj->active)
 		return 0;
 
 	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
 	if (ret)
 		return ret;
 
-	ret = i915_gem_check_olr(req);
-	if (ret)
-		return ret;
-
 	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
-	i915_gem_request_reference(req);
+
+	if (readonly) {
+		struct drm_i915_gem_request *req;
+
+		req = obj->last_write_req;
+		if (req == NULL)
+			return 0;
+
+		ret = i915_gem_check_olr(req);
+		if (ret)
+			goto err;
+
+		requests[n++] = i915_gem_request_reference(req);
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			struct drm_i915_gem_request *req;
+
+			req = obj->last_read_req[i];
+			if (req == NULL)
+				continue;
+
+			ret = i915_gem_check_olr(req);
+			if (ret)
+				goto err;
+
+			requests[n++] = i915_gem_request_reference(req);
+		}
+	}
+
 	mutex_unlock(&dev->struct_mutex);
-	ret = __i915_wait_request(req, reset_counter, true, NULL, file_priv);
+	for (i = 0; ret == 0 && i < n; i++)
+		ret = __i915_wait_request(requests[i], reset_counter, true,
+					  NULL, rps);
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_request_unreference(req);
-	if (ret)
-		return ret;
 
-	return i915_gem_object_wait_rendering__tail(obj);
+err:
+	for (i = 0; i < n; i++) {
+		if (ret == 0)
+			i915_gem_object_retire_request(obj, requests[i]);
+		i915_gem_request_unreference(requests[i]);
+	}
+
+	return ret;
+}
+
+static struct intel_rps_client *to_rps_client(struct drm_file *file)
+{
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	return &fpriv->rps;
 }
 
 /**
@@ -1498,7 +1606,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	 * to catch cases where we are gazumped.
 	 */
 	ret = i915_gem_object_wait_rendering__nonblocking(obj,
-							  file->driver_priv,
+							  to_rps_client(file),
 							  !write_domain);
 	if (ret)
 		goto unref;
@@ -1919,7 +2027,6 @@ i915_gem_mmap_gtt(struct drm_file *file,
 		  uint32_t handle,
 		  uint64_t *offset)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
 	int ret;
 
@@ -2235,78 +2342,58 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
-static void
-i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-			       struct intel_engine_cs *ring)
+void i915_vma_move_to_active(struct i915_vma *vma,
+			     struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_request *req;
-	struct intel_engine_cs *old_ring;
-
-	BUG_ON(ring == NULL);
-
-	req = intel_ring_get_request(ring);
-	old_ring = i915_gem_request_get_ring(obj->last_read_req);
-
-	if (old_ring != ring && obj->last_write_req) {
-		/* Keep the request relative to the current ring */
-		i915_gem_request_assign(&obj->last_write_req, req);
-	}
+	struct drm_i915_gem_object *obj = vma->obj;
 
 	/* Add a reference if we're newly entering the active list. */
-	if (!obj->active) {
+	if (obj->active == 0)
 		drm_gem_object_reference(&obj->base);
-		obj->active = 1;
-	}
+	obj->active |= intel_ring_flag(ring);
 
-	list_move_tail(&obj->ring_list, &ring->active_list);
+	list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
+	i915_gem_request_assign(&obj->last_read_req[ring->id],
+				intel_ring_get_request(ring));
 
-	i915_gem_request_assign(&obj->last_read_req, req);
+	list_move_tail(&vma->mm_list, &vma->vm->active_list);
 }
 
-void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *ring)
+static void
+i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
 {
-	list_move_tail(&vma->mm_list, &vma->vm->active_list);
-	return i915_gem_object_move_to_active(vma->obj, ring);
+	RQ_BUG_ON(obj->last_write_req == NULL);
+	RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
+
+	i915_gem_request_assign(&obj->last_write_req, NULL);
+	intel_fb_obj_flush(obj, true);
 }
 
 static void
-i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
+i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
 {
 	struct i915_vma *vma;
 
-	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
-	BUG_ON(!obj->active);
+	RQ_BUG_ON(obj->last_read_req[ring] == NULL);
+	RQ_BUG_ON(!(obj->active & (1 << ring)));
+
+	list_del_init(&obj->ring_list[ring]);
+	i915_gem_request_assign(&obj->last_read_req[ring], NULL);
+
+	if (obj->last_write_req && obj->last_write_req->ring->id == ring)
+		i915_gem_object_retire__write(obj);
+
+	obj->active &= ~(1 << ring);
+	if (obj->active)
+		return;
 
 	list_for_each_entry(vma, &obj->vma_list, vma_link) {
 		if (!list_empty(&vma->mm_list))
 			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
 	}
 
-	intel_fb_obj_flush(obj, true);
-
-	list_del_init(&obj->ring_list);
-
-	i915_gem_request_assign(&obj->last_read_req, NULL);
-	i915_gem_request_assign(&obj->last_write_req, NULL);
-	obj->base.write_domain = 0;
-
 	i915_gem_request_assign(&obj->last_fenced_req, NULL);
-
-	obj->active = 0;
 	drm_gem_object_unreference(&obj->base);
-
-	WARN_ON(i915_verify_lists(dev));
-}
-
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj)
-{
-	if (obj->last_read_req == NULL)
-		return;
-
-	if (i915_gem_request_completed(obj->last_read_req, true))
-		i915_gem_object_move_to_inactive(obj);
 }
 
 static int
@@ -2483,20 +2570,6 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	return 0;
 }
 
-static inline void
-i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
-{
-	struct drm_i915_file_private *file_priv = request->file_priv;
-
-	if (!file_priv)
-		return;
-
-	spin_lock(&file_priv->mm.lock);
-	list_del(&request->client_list);
-	request->file_priv = NULL;
-	spin_unlock(&file_priv->mm.lock);
-}
-
 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
 				   const struct intel_context *ctx)
 {
@@ -2542,16 +2615,6 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
 	}
 }
 
-static void i915_gem_free_request(struct drm_i915_gem_request *request)
-{
-	list_del(&request->list);
-	i915_gem_request_remove_from_client(request);
-
-	put_pid(request->pid);
-
-	i915_gem_request_unreference(request);
-}
-
 void i915_gem_request_free(struct kref *req_ref)
 {
 	struct drm_i915_gem_request *req = container_of(req_ref,
@@ -2576,38 +2639,38 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
 			   struct intel_context *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(ring->dev);
-	struct drm_i915_gem_request *rq;
+	struct drm_i915_gem_request *req;
 	int ret;
 
 	if (ring->outstanding_lazy_request)
 		return 0;
 
-	rq = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
-	if (rq == NULL)
+	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
+	if (req == NULL)
 		return -ENOMEM;
 
-	kref_init(&rq->ref);
-	rq->i915 = dev_priv;
+	kref_init(&req->ref);
+	req->i915 = dev_priv;
 
-	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
-	if (ret) {
-		kfree(rq);
-		return ret;
-	}
+	ret = i915_gem_get_seqno(ring->dev, &req->seqno);
+	if (ret)
+		goto err;
 
-	rq->ring = ring;
+	req->ring = ring;
 
 	if (i915.enable_execlists)
-		ret = intel_logical_ring_alloc_request_extras(rq, ctx);
+		ret = intel_logical_ring_alloc_request_extras(req, ctx);
 	else
-		ret = intel_ring_alloc_request_extras(rq);
-	if (ret) {
-		kfree(rq);
-		return ret;
-	}
+		ret = intel_ring_alloc_request_extras(req);
+	if (ret)
+		goto err;
 
-	ring->outstanding_lazy_request = rq;
+	ring->outstanding_lazy_request = req;
 	return 0;
+
+err:
+	kmem_cache_free(dev_priv->requests, req);
+	return ret;
 }
 
 struct drm_i915_gem_request *
@@ -2652,9 +2715,9 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 
 		obj = list_first_entry(&ring->active_list,
 				       struct drm_i915_gem_object,
-				       ring_list);
+				       ring_list[ring->id]);
 
-		i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_retire__read(obj, ring->id);
 	}
 
 	/*
@@ -2690,7 +2753,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 					   struct drm_i915_gem_request,
 					   list);
 
-		i915_gem_free_request(request);
+		i915_gem_request_retire(request);
 	}
 
 	/* This may not have been flushed before the reset, so clean it now */
@@ -2738,6 +2801,8 @@ void i915_gem_reset(struct drm_device *dev)
 	i915_gem_context_reset(dev);
 
 	i915_gem_restore_fences(dev);
+
+	WARN_ON(i915_verify_lists(dev));
 }
 
 /**
@@ -2746,11 +2811,11 @@ void i915_gem_reset(struct drm_device *dev)
 void
 i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 {
-	if (list_empty(&ring->request_list))
-		return;
-
 	WARN_ON(i915_verify_lists(ring->dev));
 
+	if (list_empty(&ring->active_list))
+		return;
+
 	/* Retire requests first as we use it above for the early return.
 	 * If we retire requests last, we may use a later seqno and so clear
 	 * the requests lists without clearing the active list, leading to
@@ -2766,16 +2831,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 		if (!i915_gem_request_completed(request, true))
 			break;
 
-		trace_i915_gem_request_retire(request);
-
-		/* We know the GPU must have read the request to have
-		 * sent us the seqno + interrupt, so use the position
-		 * of tail of the request to update the last known position
-		 * of the GPU head.
-		 */
-		request->ringbuf->last_retired_head = request->postfix;
-
-		i915_gem_free_request(request);
+		i915_gem_request_retire(request);
 	}
 
 	/* Move any buffers on the active list that are no longer referenced
@@ -2787,12 +2843,12 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 
 		obj = list_first_entry(&ring->active_list,
 				      struct drm_i915_gem_object,
-				      ring_list);
+				      ring_list[ring->id]);
 
-		if (!i915_gem_request_completed(obj->last_read_req, true))
+		if (!list_empty(&obj->last_read_req[ring->id]->list))
 			break;
 
-		i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_retire__read(obj, ring->id);
 	}
 
 	if (unlikely(ring->trace_irq_req &&
@@ -2887,17 +2943,30 @@ i915_gem_idle_work_handler(struct work_struct *work)
 static int
 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 {
-	struct intel_engine_cs *ring;
-	int ret;
+	int ret, i;
+
+	if (!obj->active)
+		return 0;
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct drm_i915_gem_request *req;
+
+		req = obj->last_read_req[i];
+		if (req == NULL)
+			continue;
 
-	if (obj->active) {
-		ring = i915_gem_request_get_ring(obj->last_read_req);
+		if (list_empty(&req->list))
+			goto retire;
 
-		ret = i915_gem_check_olr(obj->last_read_req);
+		ret = i915_gem_check_olr(req);
 		if (ret)
 			return ret;
 
-		i915_gem_retire_requests_ring(ring);
+		if (i915_gem_request_completed(req, true)) {
+			__i915_gem_request_retire__upto(req);
+retire:
+			i915_gem_object_retire__read(obj, i);
+		}
 	}
 
 	return 0;
@@ -2931,9 +3000,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_wait *args = data;
 	struct drm_i915_gem_object *obj;
-	struct drm_i915_gem_request *req;
+	struct drm_i915_gem_request *req[I915_NUM_RINGS];
 	unsigned reset_counter;
-	int ret = 0;
+	int i, n = 0;
+	int ret;
 
 	if (args->flags != 0)
 		return -EINVAL;
@@ -2953,11 +3023,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (ret)
 		goto out;
 
-	if (!obj->active || !obj->last_read_req)
+	if (!obj->active)
 		goto out;
 
-	req = obj->last_read_req;
-
 	/* Do this after OLR check to make sure we make forward progress polling
 	 * on this IOCTL with a timeout == 0 (like busy ioctl)
 	 */
@@ -2968,13 +3036,23 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 	drm_gem_object_unreference(&obj->base);
 	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
-	i915_gem_request_reference(req);
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		if (obj->last_read_req[i] == NULL)
+			continue;
+
+		req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
+	}
+
 	mutex_unlock(&dev->struct_mutex);
 
-	ret = __i915_wait_request(req, reset_counter, true,
-				  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
-				  file->driver_priv);
-	i915_gem_request_unreference__unlocked(req);
+	for (i = 0; i < n; i++) {
+		if (ret == 0)
+			ret = __i915_wait_request(req[i], reset_counter, true,
+						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
+						  file->driver_priv);
+		i915_gem_request_unreference__unlocked(req[i]);
+	}
 	return ret;
 
 out:
@@ -2983,6 +3061,59 @@ out:
 	return ret;
 }
 
+static int
+__i915_gem_object_sync(struct drm_i915_gem_object *obj,
+		       struct intel_engine_cs *to,
+		       struct drm_i915_gem_request *req)
+{
+	struct intel_engine_cs *from;
+	int ret;
+
+	from = i915_gem_request_get_ring(req);
+	if (to == from)
+		return 0;
+
+	if (i915_gem_request_completed(req, true))
+		return 0;
+
+	ret = i915_gem_check_olr(req);
+	if (ret)
+		return ret;
+
+	if (!i915_semaphore_is_enabled(obj->base.dev)) {
+		struct drm_i915_private *i915 = to_i915(obj->base.dev);
+		ret = __i915_wait_request(req,
+					  atomic_read(&i915->gpu_error.reset_counter),
+					  i915->mm.interruptible,
+					  NULL,
+					  &i915->rps.semaphores);
+		if (ret)
+			return ret;
+
+		i915_gem_object_retire_request(obj, req);
+	} else {
+		int idx = intel_ring_sync_index(from, to);
+		u32 seqno = i915_gem_request_get_seqno(req);
+
+		if (seqno <= from->semaphore.sync_seqno[idx])
+			return 0;
+
+		trace_i915_gem_ring_sync_to(from, to, req);
+		ret = to->semaphore.sync_to(to, from, seqno);
+		if (ret)
+			return ret;
+
+		/* We use last_read_req because sync_to()
+		 * might have just caused seqno wrap under
+		 * the radar.
+		 */
+		from->semaphore.sync_seqno[idx] =
+			i915_gem_request_get_seqno(obj->last_read_req[from->id]);
+	}
+
+	return 0;
+}
+
 /**
  * i915_gem_object_sync - sync an object to a ring.
  *
@@ -2991,7 +3122,17 @@ out:
  *
  * This code is meant to abstract object synchronization with the GPU.
  * Calling with NULL implies synchronizing the object with the CPU
- * rather than a particular GPU ring.
+ * rather than a particular GPU ring. Conceptually we serialise writes
+ * between engines inside the GPU. We only allow on engine to write
+ * into a buffer at any time, but multiple readers. To ensure each has
+ * a coherent view of memory, we must:
+ *
+ * - If there is an outstanding write request to the object, the new
+ *   request must wait for it to complete (either CPU or in hw, requests
+ *   on the same ring will be naturally ordered).
+ *
+ * - If we are a write request (pending_write_domain is set), the new
+ *   request must wait for outstanding read requests to complete.
  *
  * Returns 0 if successful, else propagates up the lower layer error.
  */
@@ -2999,41 +3140,32 @@ int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		     struct intel_engine_cs *to)
 {
-	struct intel_engine_cs *from;
-	u32 seqno;
-	int ret, idx;
-
-	from = i915_gem_request_get_ring(obj->last_read_req);
-
-	if (from == NULL || to == from)
-		return 0;
+	const bool readonly = obj->base.pending_write_domain == 0;
+	struct drm_i915_gem_request *req[I915_NUM_RINGS];
+	int ret, i, n;
 
-	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
-		return i915_gem_object_wait_rendering(obj, false);
-
-	idx = intel_ring_sync_index(from, to);
-
-	seqno = i915_gem_request_get_seqno(obj->last_read_req);
-	/* Optimization: Avoid semaphore sync when we are sure we already
-	 * waited for an object with higher seqno */
-	if (seqno <= from->semaphore.sync_seqno[idx])
+	if (!obj->active)
 		return 0;
 
-	ret = i915_gem_check_olr(obj->last_read_req);
-	if (ret)
-		return ret;
+	if (to == NULL)
+		return i915_gem_object_wait_rendering(obj, readonly);
 
-	trace_i915_gem_ring_sync_to(from, to, obj->last_read_req);
-	ret = to->semaphore.sync_to(to, from, seqno);
-	if (!ret)
-		/* We use last_read_req because sync_to()
-		 * might have just caused seqno wrap under
-		 * the radar.
-		 */
-		from->semaphore.sync_seqno[idx] =
-				i915_gem_request_get_seqno(obj->last_read_req);
+	n = 0;
+	if (readonly) {
+		if (obj->last_write_req)
+			req[n++] = obj->last_write_req;
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++)
+			if (obj->last_read_req[i])
+				req[n++] = obj->last_read_req[i];
+	}
+	for (i = 0; i < n; i++) {
+		ret = __i915_gem_object_sync(obj, to, req[i]);
+		if (ret)
+			return ret;
+	}
 
-	return ret;
+	return 0;
 }
 
 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
@@ -3079,7 +3211,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 
 	BUG_ON(obj->pages == NULL);
 
-	ret = i915_gem_object_finish_gpu(obj);
+	ret = i915_gem_object_wait_rendering(obj, false);
 	if (ret)
 		return ret;
 	/* Continue on if we fail due to EIO, the GPU is hung so we
@@ -3119,10 +3251,6 @@ int i915_vma_unbind(struct i915_vma *vma)
 	/* Since the unbound list is global, only move to that list if
 	 * no more VMAs exist. */
 	if (list_empty(&obj->vma_list)) {
-		/* Throw away the active reference before
-		 * moving to the unbound list. */
-		i915_gem_object_retire(obj);
-
 		i915_gem_gtt_finish_object(obj);
 		list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
 	}
@@ -3155,6 +3283,7 @@ int i915_gpu_idle(struct drm_device *dev)
 			return ret;
 	}
 
+	WARN_ON(i915_verify_lists(dev));
 	return 0;
 }
 
@@ -3777,8 +3906,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	i915_gem_object_retire(obj);
-
 	/* Flush and acquire obj->pages so that we are coherent through
 	 * direct access in memory with previous cached writes through
 	 * shmemfs and that our cache domain tracking remains valid.
@@ -3854,7 +3981,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	}
 
 	if (i915_gem_obj_bound_any(obj)) {
-		ret = i915_gem_object_finish_gpu(obj);
+		ret = i915_gem_object_wait_rendering(obj, false);
 		if (ret)
 			return ret;
 
@@ -3976,11 +4103,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	u32 old_read_domains, old_write_domain;
 	int ret;
 
-	if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) {
-		ret = i915_gem_object_sync(obj, pipelined);
-		if (ret)
-			return ret;
-	}
+	ret = i915_gem_object_sync(obj, pipelined);
+	if (ret)
+		return ret;
 
 	/* Mark the pin_display early so that we account for the
 	 * display coherency whilst setting up the cache domains.
@@ -4045,23 +4170,6 @@ i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
 	obj->pin_display--;
 }
 
-int
-i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
-{
-	int ret;
-
-	if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
-		return 0;
-
-	ret = i915_gem_object_wait_rendering(obj, false);
-	if (ret)
-		return ret;
-
-	/* Ensure that we invalidate the GPU's caches and TLBs. */
-	obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
-	return 0;
-}
-
 /**
  * Moves a single object to the CPU read, and possibly write domain.
  *
@@ -4081,7 +4189,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	i915_gem_object_retire(obj);
 	i915_gem_object_flush_gtt_write_domain(obj);
 
 	old_write_domain = obj->base.write_domain;
@@ -4132,7 +4239,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
+	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
 	struct drm_i915_gem_request *request, *target = NULL;
 	unsigned reset_counter;
 	int ret;
@@ -4370,15 +4477,15 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	 * necessary flushes here.
 	 */
 	ret = i915_gem_object_flush_active(obj);
+	if (ret)
+		goto unref;
 
-	args->busy = obj->active;
-	if (obj->last_read_req) {
-		struct intel_engine_cs *ring;
-		BUILD_BUG_ON(I915_NUM_RINGS > 16);
-		ring = i915_gem_request_get_ring(obj->last_read_req);
-		args->busy |= intel_ring_flag(ring) << 16;
-	}
+	BUILD_BUG_ON(I915_NUM_RINGS > 16);
+	args->busy = obj->active << 16;
+	if (obj->last_write_req)
+		args->busy |= obj->last_write_req->ring->id;
 
+unref:
 	drm_gem_object_unreference(&obj->base);
 unlock:
 	mutex_unlock(&dev->struct_mutex);
@@ -4452,8 +4559,11 @@ unlock:
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
+	int i;
+
 	INIT_LIST_HEAD(&obj->global_list);
-	INIT_LIST_HEAD(&obj->ring_list);
+	for (i = 0; i < I915_NUM_RINGS; i++)
+		INIT_LIST_HEAD(&obj->ring_list[i]);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -5112,9 +5222,9 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 	}
 	spin_unlock(&file_priv->mm.lock);
 
-	if (!list_empty(&file_priv->rps_boost)) {
+	if (!list_empty(&file_priv->rps.link)) {
 		mutex_lock(&to_i915(dev)->rps.hw_lock);
-		list_del(&file_priv->rps_boost);
+		list_del(&file_priv->rps.link);
 		mutex_unlock(&to_i915(dev)->rps.hw_lock);
 	}
 }
@@ -5133,7 +5243,7 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
 	file->driver_priv = file_priv;
 	file_priv->dev_priv = dev->dev_private;
 	file_priv->file = file;
-	INIT_LIST_HEAD(&file_priv->rps_boost);
+	INIT_LIST_HEAD(&file_priv->rps.link);
 
 	spin_lock_init(&file_priv->mm.lock);
 	INIT_LIST_HEAD(&file_priv->mm.request_list);

+ 0 - 2
drivers/gpu/drm/i915/i915_gem_context.c

@@ -753,8 +753,6 @@ static int do_switch(struct intel_engine_cs *ring,
 		 * swapped, but there is no way to do that yet.
 		 */
 		from->legacy_hw_ctx.rcs_state->dirty = 1;
-		BUG_ON(i915_gem_request_get_ring(
-			from->legacy_hw_ctx.rcs_state->last_read_req) != ring);
 
 		/* obj is kept alive until the next request by its active ref */
 		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);

+ 22 - 70
drivers/gpu/drm/i915/i915_gem_debug.c

@@ -34,82 +34,34 @@ int
 i915_verify_lists(struct drm_device *dev)
 {
 	static int warned;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_object *obj;
+	struct intel_engine_cs *ring;
 	int err = 0;
+	int i;
 
 	if (warned)
 		return 0;
 
-	list_for_each_entry(obj, &dev_priv->render_ring.active_list, list) {
-		if (obj->base.dev != dev ||
-		    !atomic_read(&obj->base.refcount.refcount)) {
-			DRM_ERROR("freed render active %p\n", obj);
-			err++;
-			break;
-		} else if (!obj->active ||
-			   (obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) {
-			DRM_ERROR("invalid render active %p (a %d r %x)\n",
-				  obj,
-				  obj->active,
-				  obj->base.read_domains);
-			err++;
-		} else if (obj->base.write_domain && list_empty(&obj->gpu_write_list)) {
-			DRM_ERROR("invalid render active %p (w %x, gwl %d)\n",
-				  obj,
-				  obj->base.write_domain,
-				  !list_empty(&obj->gpu_write_list));
-			err++;
-		}
-	}
-
-	list_for_each_entry(obj, &dev_priv->mm.flushing_list, list) {
-		if (obj->base.dev != dev ||
-		    !atomic_read(&obj->base.refcount.refcount)) {
-			DRM_ERROR("freed flushing %p\n", obj);
-			err++;
-			break;
-		} else if (!obj->active ||
-			   (obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0 ||
-			   list_empty(&obj->gpu_write_list)) {
-			DRM_ERROR("invalid flushing %p (a %d w %x gwl %d)\n",
-				  obj,
-				  obj->active,
-				  obj->base.write_domain,
-				  !list_empty(&obj->gpu_write_list));
-			err++;
-		}
-	}
-
-	list_for_each_entry(obj, &dev_priv->mm.gpu_write_list, gpu_write_list) {
-		if (obj->base.dev != dev ||
-		    !atomic_read(&obj->base.refcount.refcount)) {
-			DRM_ERROR("freed gpu write %p\n", obj);
-			err++;
-			break;
-		} else if (!obj->active ||
-			   (obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) {
-			DRM_ERROR("invalid gpu write %p (a %d w %x)\n",
-				  obj,
-				  obj->active,
-				  obj->base.write_domain);
-			err++;
-		}
-	}
-
-	list_for_each_entry(obj, &i915_gtt_vm->inactive_list, list) {
-		if (obj->base.dev != dev ||
-		    !atomic_read(&obj->base.refcount.refcount)) {
-			DRM_ERROR("freed inactive %p\n", obj);
-			err++;
-			break;
-		} else if (obj->pin_count || obj->active ||
-			   (obj->base.write_domain & I915_GEM_GPU_DOMAINS)) {
-			DRM_ERROR("invalid inactive %p (p %d a %d w %x)\n",
-				  obj,
-				  obj->pin_count, obj->active,
-				  obj->base.write_domain);
-			err++;
+	for_each_ring(ring, dev_priv, i) {
+		list_for_each_entry(obj, &ring->active_list, ring_list[ring->id]) {
+			if (obj->base.dev != dev ||
+			    !atomic_read(&obj->base.refcount.refcount)) {
+				DRM_ERROR("%s: freed active obj %p\n",
+					  ring->name, obj);
+				err++;
+				break;
+			} else if (!obj->active ||
+				   obj->last_read_req[ring->id] == NULL) {
+				DRM_ERROR("%s: invalid active obj %p\n",
+					  ring->name, obj);
+				err++;
+			} else if (obj->base.write_domain) {
+				DRM_ERROR("%s: invalid write obj %p (w %x)\n",
+					  ring->name,
+					  obj, obj->base.write_domain);
+				err++;
+			}
 		}
 	}
 

+ 7 - 3
drivers/gpu/drm/i915/i915_gem_execbuffer.c

@@ -889,6 +889,7 @@ static int
 i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 				struct list_head *vmas)
 {
+	const unsigned other_rings = ~intel_ring_flag(ring);
 	struct i915_vma *vma;
 	uint32_t flush_domains = 0;
 	bool flush_chipset = false;
@@ -896,9 +897,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		ret = i915_gem_object_sync(obj, ring);
-		if (ret)
-			return ret;
+
+		if (obj->active & other_rings) {
+			ret = i915_gem_object_sync(obj, ring);
+			if (ret)
+				return ret;
+		}
 
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 			flush_chipset |= i915_gem_clflush_object(obj, false);

+ 1 - 1
drivers/gpu/drm/i915/i915_gem_gtt.c

@@ -757,7 +757,7 @@ static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
 	WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
 
 	/* FIXME: upper bound must not overflow 32 bits  */
-	WARN_ON((start + length) >= (1ULL << 32));
+	WARN_ON((start + length) > (1ULL << 32));
 
 	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
 		if (pd)

+ 7 - 4
drivers/gpu/drm/i915/i915_gem_userptr.c

@@ -219,11 +219,14 @@ i915_mmu_notifier_add(struct drm_device *dev,
 		      struct i915_mmu_object *mo)
 {
 	struct interval_tree_node *it;
-	int ret;
+	int ret = 0;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
+	/* By this point we have already done a lot of expensive setup that
+	 * we do not want to repeat just because the caller (e.g. X) has a
+	 * signal pending (and partly because of that expensive setup, X
+	 * using an interrupt timer is likely to get stuck in an EINTR loop).
+	 */
+	mutex_lock(&dev->struct_mutex);
 
 	/* Make sure we drop the final active reference (and thereby
 	 * remove the objects from the interval tree) before we do

+ 13 - 6
drivers/gpu/drm/i915/i915_gpu_error.c

@@ -192,15 +192,20 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 				struct drm_i915_error_buffer *err,
 				int count)
 {
+	int i;
+
 	err_printf(m, "  %s [%d]:\n", name, count);
 
 	while (count--) {
-		err_printf(m, "    %08x %8u %02x %02x %x %x",
+		err_printf(m, "    %08x %8u %02x %02x [ ",
 			   err->gtt_offset,
 			   err->size,
 			   err->read_domains,
-			   err->write_domain,
-			   err->rseqno, err->wseqno);
+			   err->write_domain);
+		for (i = 0; i < I915_NUM_RINGS; i++)
+			err_printf(m, "%02x ", err->rseqno[i]);
+
+		err_printf(m, "] %02x", err->wseqno);
 		err_puts(m, pin_flag(err->pinned));
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
@@ -681,10 +686,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
+	int i;
 
 	err->size = obj->base.size;
 	err->name = obj->base.name;
-	err->rseqno = i915_gem_request_get_seqno(obj->last_read_req);
+	for (i = 0; i < I915_NUM_RINGS; i++)
+		err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read_req[i]);
 	err->wseqno = i915_gem_request_get_seqno(obj->last_write_req);
 	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
@@ -697,8 +704,8 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
 	err->userptr = obj->userptr.mm != NULL;
-	err->ring = obj->last_read_req ?
-			i915_gem_request_get_ring(obj->last_read_req)->id : -1;
+	err->ring = obj->last_write_req ?
+			i915_gem_request_get_ring(obj->last_write_req)->id : -1;
 	err->cache_level = obj->cache_level;
 }
 

+ 21 - 7
drivers/gpu/drm/i915/i915_irq.c

@@ -79,7 +79,7 @@ static const u32 hpd_status_g4x[HPD_NUM_PINS] = {
 	[HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS
 };
 
-static const u32 hpd_status_i915[HPD_NUM_PINS] = { /* i915 and valleyview are the same */
+static const u32 hpd_status_i915[HPD_NUM_PINS] = {
 	[HPD_CRT] = CRT_HOTPLUG_INT_STATUS,
 	[HPD_SDVO_B] = SDVOB_HOTPLUG_INT_STATUS_I915,
 	[HPD_SDVO_C] = SDVOC_HOTPLUG_INT_STATUS_I915,
@@ -1070,6 +1070,18 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
 	return events;
 }
 
+static bool any_waiters(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *ring;
+	int i;
+
+	for_each_ring(ring, dev_priv, i)
+		if (ring->irq_refcount)
+			return true;
+
+	return false;
+}
+
 static void gen6_pm_rps_work(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
@@ -1114,6 +1126,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
 			new_delay = dev_priv->rps.efficient_freq;
 			adj = 0;
 		}
+	} else if (any_waiters(dev_priv)) {
+		adj = 0;
 	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
 		if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
 			new_delay = dev_priv->rps.efficient_freq;
@@ -1386,7 +1400,7 @@ static int i915_port_to_hotplug_shift(enum port port)
 	}
 }
 
-static inline enum port get_port_from_pin(enum hpd_pin pin)
+static enum port get_port_from_pin(enum hpd_pin pin)
 {
 	switch (pin) {
 	case HPD_PORT_B:
@@ -1400,10 +1414,10 @@ static inline enum port get_port_from_pin(enum hpd_pin pin)
 	}
 }
 
-static inline void intel_hpd_irq_handler(struct drm_device *dev,
-					 u32 hotplug_trigger,
-					 u32 dig_hotplug_reg,
-					 const u32 hpd[HPD_NUM_PINS])
+static void intel_hpd_irq_handler(struct drm_device *dev,
+				  u32 hotplug_trigger,
+				  u32 dig_hotplug_reg,
+				  const u32 hpd[HPD_NUM_PINS])
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int i;
@@ -1743,7 +1757,7 @@ static void i9xx_hpd_irq_handler(struct drm_device *dev)
 		 */
 		POSTING_READ(PORT_HOTPLUG_STAT);
 
-		if (IS_G4X(dev)) {
+		if (IS_G4X(dev) || IS_VALLEYVIEW(dev)) {
 			u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X;
 
 			intel_hpd_irq_handler(dev, hotplug_trigger, 0, hpd_status_g4x);

+ 24 - 0
drivers/gpu/drm/i915/i915_reg.h

@@ -1204,6 +1204,12 @@ enum skl_disp_power_wells {
 #define   PORT_PLL_GAIN_CTL(x)		((x)  << 16)
 /* PORT_PLL_8_A */
 #define   PORT_PLL_TARGET_CNT_MASK	0x3FF
+/* PORT_PLL_9_A */
+#define  PORT_PLL_LOCK_THRESHOLD_MASK	0xe
+/* PORT_PLL_10_A */
+#define  PORT_PLL_DCO_AMP_OVR_EN_H	(1<<27)
+#define  PORT_PLL_DCO_AMP_MASK		0x3c00
+#define  PORT_PLL_DCO_AMP(x)		(x<<10)
 #define _PORT_PLL_BASE(port)		_PORT3(port, _PORT_PLL_0_A,	\
 						_PORT_PLL_0_B,		\
 						_PORT_PLL_0_C)
@@ -1455,6 +1461,8 @@ enum skl_disp_power_wells {
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
 
+#define HSW_GTT_CACHE_EN	0x4024
+#define   GTT_CACHE_EN_ALL	0xF0007FFF
 #define GEN7_WR_WATERMARK	0x4028
 #define GEN7_GFX_PRIO_CTRL	0x402C
 #define ARB_MODE		0x4030
@@ -5167,6 +5175,8 @@ enum skl_disp_power_wells {
 #define _PLANE_KEYMAX_2_A			0x702a0
 #define _PLANE_BUF_CFG_1_A			0x7027c
 #define _PLANE_BUF_CFG_2_A			0x7037c
+#define _PLANE_NV12_BUF_CFG_1_A		0x70278
+#define _PLANE_NV12_BUF_CFG_2_A		0x70378
 
 #define _PLANE_CTL_1_B				0x71180
 #define _PLANE_CTL_2_B				0x71280
@@ -5253,6 +5263,15 @@ enum skl_disp_power_wells {
 #define PLANE_BUF_CFG(pipe, plane)	\
 	_PLANE(plane, _PLANE_BUF_CFG_1(pipe), _PLANE_BUF_CFG_2(pipe))
 
+#define _PLANE_NV12_BUF_CFG_1_B		0x71278
+#define _PLANE_NV12_BUF_CFG_2_B		0x71378
+#define _PLANE_NV12_BUF_CFG_1(pipe)	\
+	_PIPE(pipe, _PLANE_NV12_BUF_CFG_1_A, _PLANE_NV12_BUF_CFG_1_B)
+#define _PLANE_NV12_BUF_CFG_2(pipe)	\
+	_PIPE(pipe, _PLANE_NV12_BUF_CFG_2_A, _PLANE_NV12_BUF_CFG_2_B)
+#define PLANE_NV12_BUF_CFG(pipe, plane)	\
+	_PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe))
+
 /* SKL new cursor registers */
 #define _CUR_BUF_CFG_A				0x7017c
 #define _CUR_BUF_CFG_B				0x7117c
@@ -5774,6 +5793,7 @@ enum skl_disp_power_wells {
 
 /* GEN8 chicken */
 #define HDC_CHICKEN0				0x7300
+#define  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE	(1<<15)
 #define  HDC_FENCE_DEST_SLM_DISABLE		(1<<14)
 #define  HDC_DONOT_FETCH_MEM_WHEN_MASKED	(1<<11)
 #define  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT	(1<<5)
@@ -6422,6 +6442,7 @@ enum skl_disp_power_wells {
 #define  TRANS_DP_PORT_SEL_D	(2<<29)
 #define  TRANS_DP_PORT_SEL_NONE	(3<<29)
 #define  TRANS_DP_PORT_SEL_MASK	(3<<29)
+#define  TRANS_DP_PIPE_TO_PORT(val)	((((val) & TRANS_DP_PORT_SEL_MASK) >> 29) + PORT_B)
 #define  TRANS_DP_AUDIO_ONLY	(1<<26)
 #define  TRANS_DP_ENH_FRAMING	(1<<18)
 #define  TRANS_DP_8BPC		(0<<9)
@@ -6681,6 +6702,9 @@ enum skl_disp_power_wells {
 #define     GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT	8
 #define     GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT	16
 #define     GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT	24
+#define   SKL_PCODE_CDCLK_CONTROL		0x7
+#define     SKL_CDCLK_PREPARE_FOR_CHANGE	0x3
+#define     SKL_CDCLK_READY_FOR_CHANGE		0x1
 #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
 #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
 #define   GEN6_READ_OC_PARAMS			0xc

+ 81 - 69
drivers/gpu/drm/i915/intel_bios.c

@@ -36,10 +36,11 @@
 
 static int panel_type;
 
-static void *
-find_section(struct bdb_header *bdb, int section_id)
+static const void *
+find_section(const void *_bdb, int section_id)
 {
-	u8 *base = (u8 *)bdb;
+	const struct bdb_header *bdb = _bdb;
+	const u8 *base = _bdb;
 	int index = 0;
 	u16 total, current_size;
 	u8 current_id;
@@ -53,7 +54,7 @@ find_section(struct bdb_header *bdb, int section_id)
 		current_id = *(base + index);
 		index++;
 
-		current_size = *((u16 *)(base + index));
+		current_size = *((const u16 *)(base + index));
 		index += 2;
 
 		if (index + current_size > total)
@@ -69,7 +70,7 @@ find_section(struct bdb_header *bdb, int section_id)
 }
 
 static u16
-get_blocksize(void *p)
+get_blocksize(const void *p)
 {
 	u16 *block_ptr, block_size;
 
@@ -204,7 +205,7 @@ get_lvds_fp_timing(const struct bdb_header *bdb,
 /* Try to find integrated panel data */
 static void
 parse_lfp_panel_data(struct drm_i915_private *dev_priv,
-			    struct bdb_header *bdb)
+		     const struct bdb_header *bdb)
 {
 	const struct bdb_lvds_options *lvds_options;
 	const struct bdb_lvds_lfp_data *lvds_lfp_data;
@@ -310,7 +311,8 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv,
 }
 
 static void
-parse_lfp_backlight(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+parse_lfp_backlight(struct drm_i915_private *dev_priv,
+		    const struct bdb_header *bdb)
 {
 	const struct bdb_lfp_backlight_data *backlight_data;
 	const struct bdb_lfp_backlight_data_entry *entry;
@@ -348,9 +350,9 @@ parse_lfp_backlight(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
 /* Try to find sdvo panel data */
 static void
 parse_sdvo_panel_data(struct drm_i915_private *dev_priv,
-		      struct bdb_header *bdb)
+		      const struct bdb_header *bdb)
 {
-	struct lvds_dvo_timing *dvo_timing;
+	const struct lvds_dvo_timing *dvo_timing;
 	struct drm_display_mode *panel_fixed_mode;
 	int index;
 
@@ -361,7 +363,7 @@ parse_sdvo_panel_data(struct drm_i915_private *dev_priv,
 	}
 
 	if (index == -1) {
-		struct bdb_sdvo_lvds_options *sdvo_lvds_options;
+		const struct bdb_sdvo_lvds_options *sdvo_lvds_options;
 
 		sdvo_lvds_options = find_section(bdb, BDB_SDVO_LVDS_OPTIONS);
 		if (!sdvo_lvds_options)
@@ -402,10 +404,10 @@ static int intel_bios_ssc_frequency(struct drm_device *dev,
 
 static void
 parse_general_features(struct drm_i915_private *dev_priv,
-		       struct bdb_header *bdb)
+		       const struct bdb_header *bdb)
 {
 	struct drm_device *dev = dev_priv->dev;
-	struct bdb_general_features *general;
+	const struct bdb_general_features *general;
 
 	general = find_section(bdb, BDB_GENERAL_FEATURES);
 	if (general) {
@@ -428,9 +430,9 @@ parse_general_features(struct drm_i915_private *dev_priv,
 
 static void
 parse_general_definitions(struct drm_i915_private *dev_priv,
-			  struct bdb_header *bdb)
+			  const struct bdb_header *bdb)
 {
-	struct bdb_general_definitions *general;
+	const struct bdb_general_definitions *general;
 
 	general = find_section(bdb, BDB_GENERAL_DEFINITIONS);
 	if (general) {
@@ -447,19 +449,19 @@ parse_general_definitions(struct drm_i915_private *dev_priv,
 	}
 }
 
-static union child_device_config *
-child_device_ptr(struct bdb_general_definitions *p_defs, int i)
+static const union child_device_config *
+child_device_ptr(const struct bdb_general_definitions *p_defs, int i)
 {
-	return (void *) &p_defs->devices[i * p_defs->child_dev_size];
+	return (const void *) &p_defs->devices[i * p_defs->child_dev_size];
 }
 
 static void
 parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
-			  struct bdb_header *bdb)
+			  const struct bdb_header *bdb)
 {
 	struct sdvo_device_mapping *p_mapping;
-	struct bdb_general_definitions *p_defs;
-	union child_device_config *p_child;
+	const struct bdb_general_definitions *p_defs;
+	const union child_device_config *p_child;
 	int i, child_device_num, count;
 	u16	block_size;
 
@@ -545,9 +547,9 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
 
 static void
 parse_driver_features(struct drm_i915_private *dev_priv,
-		       struct bdb_header *bdb)
+		      const struct bdb_header *bdb)
 {
-	struct bdb_driver_features *driver;
+	const struct bdb_driver_features *driver;
 
 	driver = find_section(bdb, BDB_DRIVER_FEATURES);
 	if (!driver)
@@ -571,11 +573,11 @@ parse_driver_features(struct drm_i915_private *dev_priv,
 }
 
 static void
-parse_edp(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+parse_edp(struct drm_i915_private *dev_priv, const struct bdb_header *bdb)
 {
-	struct bdb_edp *edp;
-	struct edp_power_seq *edp_pps;
-	struct edp_link_params *edp_link_params;
+	const struct bdb_edp *edp;
+	const struct edp_power_seq *edp_pps;
+	const struct edp_link_params *edp_link_params;
 
 	edp = find_section(bdb, BDB_EDP);
 	if (!edp) {
@@ -683,10 +685,10 @@ parse_edp(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
 }
 
 static void
-parse_psr(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb)
 {
-	struct bdb_psr *psr;
-	struct psr_table *psr_table;
+	const struct bdb_psr *psr;
+	const struct psr_table *psr_table;
 
 	psr = find_section(bdb, BDB_PSR);
 	if (!psr) {
@@ -794,13 +796,14 @@ static u8 *goto_next_sequence(u8 *data, int *size)
 }
 
 static void
-parse_mipi(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+parse_mipi(struct drm_i915_private *dev_priv, const struct bdb_header *bdb)
 {
-	struct bdb_mipi_config *start;
-	struct bdb_mipi_sequence *sequence;
-	struct mipi_config *config;
-	struct mipi_pps_data *pps;
-	u8 *data, *seq_data;
+	const struct bdb_mipi_config *start;
+	const struct bdb_mipi_sequence *sequence;
+	const struct mipi_config *config;
+	const struct mipi_pps_data *pps;
+	u8 *data;
+	const u8 *seq_data;
 	int i, panel_id, seq_size;
 	u16 block_size;
 
@@ -944,7 +947,7 @@ err:
 }
 
 static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
-			   struct bdb_header *bdb)
+			   const struct bdb_header *bdb)
 {
 	union child_device_config *it, *child = NULL;
 	struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port];
@@ -1046,7 +1049,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
 }
 
 static void parse_ddi_ports(struct drm_i915_private *dev_priv,
-			    struct bdb_header *bdb)
+			    const struct bdb_header *bdb)
 {
 	struct drm_device *dev = dev_priv->dev;
 	enum port port;
@@ -1066,10 +1069,11 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv,
 
 static void
 parse_device_mapping(struct drm_i915_private *dev_priv,
-		       struct bdb_header *bdb)
+		     const struct bdb_header *bdb)
 {
-	struct bdb_general_definitions *p_defs;
-	union child_device_config *p_child, *child_dev_ptr;
+	const struct bdb_general_definitions *p_defs;
+	const union child_device_config *p_child;
+	union child_device_config *child_dev_ptr;
 	int i, child_device_num, count;
 	u16	block_size;
 
@@ -1126,8 +1130,7 @@ parse_device_mapping(struct drm_i915_private *dev_priv,
 
 		child_dev_ptr = dev_priv->vbt.child_dev + count;
 		count++;
-		memcpy((void *)child_dev_ptr, (void *)p_child,
-					sizeof(*p_child));
+		memcpy(child_dev_ptr, p_child, sizeof(*p_child));
 	}
 	return;
 }
@@ -1196,19 +1199,22 @@ static const struct dmi_system_id intel_no_opregion_vbt[] = {
 	{ }
 };
 
-static struct bdb_header *validate_vbt(char *base, size_t size,
-				       struct vbt_header *vbt,
-				       const char *source)
+static const struct bdb_header *validate_vbt(const void __iomem *_base,
+					     size_t size,
+					     const void __iomem *_vbt,
+					     const char *source)
 {
-	size_t offset;
-	struct bdb_header *bdb;
-
-	if (vbt == NULL) {
-		DRM_DEBUG_DRIVER("VBT signature missing\n");
-		return NULL;
-	}
+	/*
+	 * This is the one place where we explicitly discard the address space
+	 * (__iomem) of the BIOS/VBT. (And this will cause a sparse complaint.)
+	 * From now on everything is based on 'base', and treated as regular
+	 * memory.
+	 */
+	const void *base = (const void *) _base;
+	size_t offset = _vbt - _base;
+	const struct vbt_header *vbt = base + offset;
+	const struct bdb_header *bdb;
 
-	offset = (char *)vbt - base;
 	if (offset + sizeof(struct vbt_header) > size) {
 		DRM_DEBUG_DRIVER("VBT header incomplete\n");
 		return NULL;
@@ -1225,7 +1231,7 @@ static struct bdb_header *validate_vbt(char *base, size_t size,
 		return NULL;
 	}
 
-	bdb = (struct bdb_header *)(base + offset);
+	bdb = base + offset;
 	if (offset + bdb->bdb_size > size) {
 		DRM_DEBUG_DRIVER("BDB incomplete\n");
 		return NULL;
@@ -1236,6 +1242,22 @@ static struct bdb_header *validate_vbt(char *base, size_t size,
 	return bdb;
 }
 
+static const struct bdb_header *find_vbt(void __iomem *bios, size_t size)
+{
+	const struct bdb_header *bdb = NULL;
+	size_t i;
+
+	/* Scour memory looking for the VBT signature. */
+	for (i = 0; i + 4 < size; i++) {
+		if (ioread32(bios + i) == *((const u32 *) "$VBT")) {
+			bdb = validate_vbt(bios, size, bios + i, "PCI ROM");
+			break;
+		}
+	}
+
+	return bdb;
+}
+
 /**
  * intel_parse_bios - find VBT and initialize settings from the BIOS
  * @dev: DRM device
@@ -1250,7 +1272,7 @@ intel_parse_bios(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct pci_dev *pdev = dev->pdev;
-	struct bdb_header *bdb = NULL;
+	const struct bdb_header *bdb = NULL;
 	u8 __iomem *bios = NULL;
 
 	if (HAS_PCH_NOP(dev))
@@ -1260,27 +1282,17 @@ intel_parse_bios(struct drm_device *dev)
 
 	/* XXX Should this validation be moved to intel_opregion.c? */
 	if (!dmi_check_system(intel_no_opregion_vbt) && dev_priv->opregion.vbt)
-		bdb = validate_vbt((char *)dev_priv->opregion.header, OPREGION_SIZE,
-				   (struct vbt_header *)dev_priv->opregion.vbt,
-				   "OpRegion");
+		bdb = validate_vbt(dev_priv->opregion.header, OPREGION_SIZE,
+				   dev_priv->opregion.vbt, "OpRegion");
 
 	if (bdb == NULL) {
-		size_t i, size;
+		size_t size;
 
 		bios = pci_map_rom(pdev, &size);
 		if (!bios)
 			return -1;
 
-		/* Scour memory looking for the VBT signature */
-		for (i = 0; i + 4 < size; i++) {
-			if (memcmp(bios + i, "$VBT", 4) == 0) {
-				bdb = validate_vbt(bios, size,
-						   (struct vbt_header *)(bios + i),
-						   "PCI ROM");
-				break;
-			}
-		}
-
+		bdb = find_vbt(bios, size);
 		if (!bdb) {
 			pci_unmap_rom(pdev, bios);
 			return -1;

+ 14 - 1
drivers/gpu/drm/i915/intel_crt.c

@@ -207,6 +207,14 @@ static void intel_disable_crt(struct intel_encoder *encoder)
 	intel_crt_set_dpms(encoder, DRM_MODE_DPMS_OFF);
 }
 
+static void pch_disable_crt(struct intel_encoder *encoder)
+{
+}
+
+static void pch_post_disable_crt(struct intel_encoder *encoder)
+{
+	intel_disable_crt(encoder);
+}
 
 static void hsw_crt_post_disable(struct intel_encoder *encoder)
 {
@@ -888,7 +896,12 @@ void intel_crt_init(struct drm_device *dev)
 		crt->adpa_reg = ADPA;
 
 	crt->base.compute_config = intel_crt_compute_config;
-	crt->base.disable = intel_disable_crt;
+	if (HAS_PCH_SPLIT(dev) && !HAS_DDI(dev)) {
+		crt->base.disable = pch_disable_crt;
+		crt->base.post_disable = pch_post_disable_crt;
+	} else {
+		crt->base.disable = intel_disable_crt;
+	}
 	crt->base.enable = intel_enable_crt;
 	if (I915_HAS_HOTPLUG(dev))
 		crt->base.hpd_pin = HPD_CRT;

+ 53 - 0
drivers/gpu/drm/i915/intel_csr.c

@@ -25,6 +25,22 @@
 #include "i915_drv.h"
 #include "i915_reg.h"
 
+/**
+ * DOC: csr support for dmc
+ *
+ * Display Context Save and Restore (CSR) firmware support added from gen9
+ * onwards to drive newly added DMC (Display microcontroller) in display
+ * engine to save and restore the state of display engine when it enter into
+ * low-power state and comes back to normal.
+ *
+ * Firmware loading status will be one of the below states: FW_UNINITIALIZED,
+ * FW_LOADED, FW_FAILED.
+ *
+ * Once the firmware is written into the registers status will be moved from
+ * FW_UNINITIALIZED to FW_LOADED and for any erroneous condition status will
+ * be moved to FW_FAILED.
+ */
+
 #define I915_CSR_SKL "i915/skl_dmc_ver4.bin"
 
 MODULE_FIRMWARE(I915_CSR_SKL);
@@ -183,6 +199,14 @@ static char intel_get_substepping(struct drm_device *dev)
 		return -ENODATA;
 }
 
+/**
+ * intel_csr_load_status_get() - to get firmware loading status.
+ * @dev_priv: i915 device.
+ *
+ * This function helps to get the firmware loading status.
+ *
+ * Return: Firmware loading status.
+ */
 enum csr_state intel_csr_load_status_get(struct drm_i915_private *dev_priv)
 {
 	enum csr_state state;
@@ -194,6 +218,13 @@ enum csr_state intel_csr_load_status_get(struct drm_i915_private *dev_priv)
 	return state;
 }
 
+/**
+ * intel_csr_load_status_set() - help to set firmware loading status.
+ * @dev_priv: i915 device.
+ * @state: enumeration of firmware loading status.
+ *
+ * Set the firmware loading status.
+ */
 void intel_csr_load_status_set(struct drm_i915_private *dev_priv,
 			enum csr_state state)
 {
@@ -202,6 +233,14 @@ void intel_csr_load_status_set(struct drm_i915_private *dev_priv,
 	mutex_unlock(&dev_priv->csr_lock);
 }
 
+/**
+ * intel_csr_load_program() - write the firmware from memory to register.
+ * @dev: drm device.
+ *
+ * CSR firmware is read from a .bin file and kept in internal memory one time.
+ * Everytime display comes back from low power state this function is called to
+ * copy the firmware from internal memory to registers.
+ */
 void intel_csr_load_program(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -359,6 +398,13 @@ out:
 	release_firmware(fw);
 }
 
+/**
+ * intel_csr_ucode_init() - initialize the firmware loading.
+ * @dev: drm device.
+ *
+ * This function is called at the time of loading the display driver to read
+ * firmware from a .bin file and copied into a internal memory.
+ */
 void intel_csr_ucode_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -393,6 +439,13 @@ void intel_csr_ucode_init(struct drm_device *dev)
 	}
 }
 
+/**
+ * intel_csr_ucode_fini() - unload the CSR firmware.
+ * @dev: drm device.
+ *
+ * Firmmware unloading includes freeing the internal momory and reset the
+ * firmware loading status.
+ */
 void intel_csr_ucode_fini(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;

+ 83 - 37
drivers/gpu/drm/i915/intel_ddi.c

@@ -1087,6 +1087,9 @@ hsw_ddi_pll_select(struct intel_crtc *intel_crtc,
 		      WRPLL_DIVIDER_REFERENCE(r2) | WRPLL_DIVIDER_FEEDBACK(n2) |
 		      WRPLL_DIVIDER_POST(p);
 
+		memset(&crtc_state->dpll_hw_state, 0,
+		       sizeof(crtc_state->dpll_hw_state));
+
 		crtc_state->dpll_hw_state.wrpll = val;
 
 		pll = intel_get_shared_dpll(intel_crtc, crtc_state);
@@ -1309,6 +1312,9 @@ skl_ddi_pll_select(struct intel_crtc *intel_crtc,
 	} else /* eDP */
 		return true;
 
+	memset(&crtc_state->dpll_hw_state, 0,
+	       sizeof(crtc_state->dpll_hw_state));
+
 	crtc_state->dpll_hw_state.ctrl1 = ctrl1;
 	crtc_state->dpll_hw_state.cfgcr1 = cfgcr1;
 	crtc_state->dpll_hw_state.cfgcr2 = cfgcr2;
@@ -1334,22 +1340,17 @@ struct bxt_clk_div {
 	uint32_t m2_frac;
 	bool m2_frac_en;
 	uint32_t n;
-	uint32_t prop_coef;
-	uint32_t int_coef;
-	uint32_t gain_ctl;
-	uint32_t targ_cnt;
-	uint32_t lanestagger;
 };
 
 /* pre-calculated values for DP linkrates */
 static struct bxt_clk_div bxt_dp_clk_val[7] = {
-	/* 162 */ {4, 2, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd},
-	/* 270 */ {4, 1, 27,       0, 0, 1, 3,  8, 1, 9, 0xd},
-	/* 540 */ {2, 1, 27,       0, 0, 1, 3,  8, 1, 9, 0x18},
-	/* 216 */ {3, 2, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd},
-	/* 243 */ {4, 1, 24, 1258291, 1, 1, 5, 11, 2, 9, 0xd},
-	/* 324 */ {4, 1, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd},
-	/* 432 */ {3, 1, 32, 1677722, 1, 1, 5, 11, 2, 9, 0x18}
+	/* 162 */ {4, 2, 32, 1677722, 1, 1},
+	/* 270 */ {4, 1, 27,       0, 0, 1},
+	/* 540 */ {2, 1, 27,       0, 0, 1},
+	/* 216 */ {3, 2, 32, 1677722, 1, 1},
+	/* 243 */ {4, 1, 24, 1258291, 1, 1},
+	/* 324 */ {4, 1, 32, 1677722, 1, 1},
+	/* 432 */ {3, 1, 32, 1677722, 1, 1}
 };
 
 static bool
@@ -1360,6 +1361,9 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
 {
 	struct intel_shared_dpll *pll;
 	struct bxt_clk_div clk_div = {0};
+	int vco = 0;
+	uint32_t prop_coef, int_coef, gain_ctl, targ_cnt;
+	uint32_t dcoampovr_en_h, dco_amp, lanestagger;
 
 	if (intel_encoder->type == INTEL_OUTPUT_HDMI) {
 		intel_clock_t best_clock;
@@ -1383,21 +1387,7 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
 		clk_div.m2_frac = best_clock.m2 & ((1 << 22) - 1);
 		clk_div.m2_frac_en = clk_div.m2_frac != 0;
 
-		/* FIXME: set coef, gain, targcnt based on freq band */
-		clk_div.prop_coef = 5;
-		clk_div.int_coef = 11;
-		clk_div.gain_ctl = 2;
-		clk_div.targ_cnt = 9;
-		if (clock > 270000)
-			clk_div.lanestagger = 0x18;
-		else if (clock > 135000)
-			clk_div.lanestagger = 0x0d;
-		else if (clock > 67000)
-			clk_div.lanestagger = 0x07;
-		else if (clock > 33000)
-			clk_div.lanestagger = 0x04;
-		else
-			clk_div.lanestagger = 0x02;
+		vco = best_clock.vco;
 	} else if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
 			intel_encoder->type == INTEL_OUTPUT_EDP) {
 		struct drm_encoder *encoder = &intel_encoder->base;
@@ -1417,8 +1407,48 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
 			clk_div = bxt_dp_clk_val[0];
 			DRM_ERROR("Unknown link rate\n");
 		}
+		vco = clock * 10 / 2 * clk_div.p1 * clk_div.p2;
+	}
+
+	dco_amp = 15;
+	dcoampovr_en_h = 0;
+	if (vco >= 6200000 && vco <= 6480000) {
+		prop_coef = 4;
+		int_coef = 9;
+		gain_ctl = 3;
+		targ_cnt = 8;
+	} else if ((vco > 5400000 && vco < 6200000) ||
+			(vco >= 4800000 && vco < 5400000)) {
+		prop_coef = 5;
+		int_coef = 11;
+		gain_ctl = 3;
+		targ_cnt = 9;
+		if (vco >= 4800000 && vco < 5400000)
+			dcoampovr_en_h = 1;
+	} else if (vco == 5400000) {
+		prop_coef = 3;
+		int_coef = 8;
+		gain_ctl = 1;
+		targ_cnt = 9;
+	} else {
+		DRM_ERROR("Invalid VCO\n");
+		return false;
 	}
 
+	memset(&crtc_state->dpll_hw_state, 0,
+	       sizeof(crtc_state->dpll_hw_state));
+
+	if (clock > 270000)
+		lanestagger = 0x18;
+	else if (clock > 135000)
+		lanestagger = 0x0d;
+	else if (clock > 67000)
+		lanestagger = 0x07;
+	else if (clock > 33000)
+		lanestagger = 0x04;
+	else
+		lanestagger = 0x02;
+
 	crtc_state->dpll_hw_state.ebb0 =
 		PORT_PLL_P1(clk_div.p1) | PORT_PLL_P2(clk_div.p2);
 	crtc_state->dpll_hw_state.pll0 = clk_div.m2_int;
@@ -1430,14 +1460,19 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
 			PORT_PLL_M2_FRAC_ENABLE;
 
 	crtc_state->dpll_hw_state.pll6 =
-		clk_div.prop_coef | PORT_PLL_INT_COEFF(clk_div.int_coef);
+		prop_coef | PORT_PLL_INT_COEFF(int_coef);
 	crtc_state->dpll_hw_state.pll6 |=
-		PORT_PLL_GAIN_CTL(clk_div.gain_ctl);
+		PORT_PLL_GAIN_CTL(gain_ctl);
+
+	crtc_state->dpll_hw_state.pll8 = targ_cnt;
 
-	crtc_state->dpll_hw_state.pll8 = clk_div.targ_cnt;
+	if (dcoampovr_en_h)
+		crtc_state->dpll_hw_state.pll10 = PORT_PLL_DCO_AMP_OVR_EN_H;
+
+	crtc_state->dpll_hw_state.pll10 |= PORT_PLL_DCO_AMP(dco_amp);
 
 	crtc_state->dpll_hw_state.pcsdw12 =
-		LANESTAGGER_STRAP_OVRD | clk_div.lanestagger;
+		LANESTAGGER_STRAP_OVRD | lanestagger;
 
 	pll = intel_get_shared_dpll(intel_crtc, crtc_state);
 	if (pll == NULL) {
@@ -2367,10 +2402,16 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv,
 	temp |= pll->config.hw_state.pll8;
 	I915_WRITE(BXT_PORT_PLL(port, 8), temp);
 
-	/*
-	 * FIXME: program PORT_PLL_9/i_lockthresh according to the latest
-	 * specification update.
-	 */
+	temp = I915_READ(BXT_PORT_PLL(port, 9));
+	temp &= ~PORT_PLL_LOCK_THRESHOLD_MASK;
+	temp |= (5 << 1);
+	I915_WRITE(BXT_PORT_PLL(port, 9), temp);
+
+	temp = I915_READ(BXT_PORT_PLL(port, 10));
+	temp &= ~PORT_PLL_DCO_AMP_OVR_EN_H;
+	temp &= ~PORT_PLL_DCO_AMP_MASK;
+	temp |= pll->config.hw_state.pll10;
+	I915_WRITE(BXT_PORT_PLL(port, 10), temp);
 
 	/* Recalibrate with new settings */
 	temp = I915_READ(BXT_PORT_PLL_EBB_4(port));
@@ -2434,6 +2475,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	hw_state->pll3 = I915_READ(BXT_PORT_PLL(port, 3));
 	hw_state->pll6 = I915_READ(BXT_PORT_PLL(port, 6));
 	hw_state->pll8 = I915_READ(BXT_PORT_PLL(port, 8));
+	hw_state->pll10 = I915_READ(BXT_PORT_PLL(port, 10));
 	/*
 	 * While we write to the group register to program all lanes at once we
 	 * can read only lane registers. We configure all lanes the same way, so
@@ -2468,6 +2510,7 @@ void intel_ddi_pll_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t val = I915_READ(LCPLL_CTL);
+	int cdclk_freq;
 
 	if (IS_SKYLAKE(dev))
 		skl_shared_dplls_init(dev_priv);
@@ -2476,12 +2519,15 @@ void intel_ddi_pll_init(struct drm_device *dev)
 	else
 		hsw_shared_dplls_init(dev_priv);
 
-	DRM_DEBUG_KMS("CDCLK running at %dKHz\n",
-		      dev_priv->display.get_display_clock_speed(dev));
+	cdclk_freq = dev_priv->display.get_display_clock_speed(dev);
+	DRM_DEBUG_KMS("CDCLK running at %dKHz\n", cdclk_freq);
 
 	if (IS_SKYLAKE(dev)) {
+		dev_priv->skl_boot_cdclk = cdclk_freq;
 		if (!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE))
 			DRM_ERROR("LCPLL1 is disabled\n");
+		else
+			intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS);
 	} else if (IS_BROXTON(dev)) {
 		broxton_init_cdclk(dev);
 		broxton_ddi_phy_init(dev);

+ 401 - 128
drivers/gpu/drm/i915/intel_display.c

@@ -45,29 +45,33 @@
 #include <drm/drm_rect.h>
 #include <linux/dma_remapping.h>
 
-/* Primary plane formats supported by all gen */
-#define COMMON_PRIMARY_FORMATS \
-	DRM_FORMAT_C8, \
-	DRM_FORMAT_RGB565, \
-	DRM_FORMAT_XRGB8888, \
-	DRM_FORMAT_ARGB8888
-
 /* Primary plane formats for gen <= 3 */
-static const uint32_t intel_primary_formats_gen2[] = {
-	COMMON_PRIMARY_FORMATS,
+static const uint32_t i8xx_primary_formats[] = {
+	DRM_FORMAT_C8,
+	DRM_FORMAT_RGB565,
 	DRM_FORMAT_XRGB1555,
-	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_XRGB8888,
 };
 
 /* Primary plane formats for gen >= 4 */
-static const uint32_t intel_primary_formats_gen4[] = {
-	COMMON_PRIMARY_FORMATS, \
+static const uint32_t i965_primary_formats[] = {
+	DRM_FORMAT_C8,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_XRGB2101010,
+	DRM_FORMAT_XBGR2101010,
+};
+
+static const uint32_t skl_primary_formats[] = {
+	DRM_FORMAT_C8,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ARGB8888,
 	DRM_FORMAT_ABGR8888,
 	DRM_FORMAT_XRGB2101010,
-	DRM_FORMAT_ARGB2101010,
 	DRM_FORMAT_XBGR2101010,
-	DRM_FORMAT_ABGR2101010,
 };
 
 /* Cursor formats */
@@ -2702,26 +2706,21 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc,
 		dspcntr |= DISPPLANE_8BPP;
 		break;
 	case DRM_FORMAT_XRGB1555:
-	case DRM_FORMAT_ARGB1555:
 		dspcntr |= DISPPLANE_BGRX555;
 		break;
 	case DRM_FORMAT_RGB565:
 		dspcntr |= DISPPLANE_BGRX565;
 		break;
 	case DRM_FORMAT_XRGB8888:
-	case DRM_FORMAT_ARGB8888:
 		dspcntr |= DISPPLANE_BGRX888;
 		break;
 	case DRM_FORMAT_XBGR8888:
-	case DRM_FORMAT_ABGR8888:
 		dspcntr |= DISPPLANE_RGBX888;
 		break;
 	case DRM_FORMAT_XRGB2101010:
-	case DRM_FORMAT_ARGB2101010:
 		dspcntr |= DISPPLANE_BGRX101010;
 		break;
 	case DRM_FORMAT_XBGR2101010:
-	case DRM_FORMAT_ABGR2101010:
 		dspcntr |= DISPPLANE_RGBX101010;
 		break;
 	default:
@@ -2817,19 +2816,15 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc,
 		dspcntr |= DISPPLANE_BGRX565;
 		break;
 	case DRM_FORMAT_XRGB8888:
-	case DRM_FORMAT_ARGB8888:
 		dspcntr |= DISPPLANE_BGRX888;
 		break;
 	case DRM_FORMAT_XBGR8888:
-	case DRM_FORMAT_ABGR8888:
 		dspcntr |= DISPPLANE_RGBX888;
 		break;
 	case DRM_FORMAT_XRGB2101010:
-	case DRM_FORMAT_ARGB2101010:
 		dspcntr |= DISPPLANE_BGRX101010;
 		break;
 	case DRM_FORMAT_XBGR2101010:
-	case DRM_FORMAT_ABGR2101010:
 		dspcntr |= DISPPLANE_RGBX101010;
 		break;
 	default:
@@ -2953,95 +2948,83 @@ void skl_detach_scalers(struct intel_crtc *intel_crtc)
 
 u32 skl_plane_ctl_format(uint32_t pixel_format)
 {
-	u32 plane_ctl_format = 0;
 	switch (pixel_format) {
+	case DRM_FORMAT_C8:
+		return PLANE_CTL_FORMAT_INDEXED;
 	case DRM_FORMAT_RGB565:
-		plane_ctl_format = PLANE_CTL_FORMAT_RGB_565;
-		break;
+		return PLANE_CTL_FORMAT_RGB_565;
 	case DRM_FORMAT_XBGR8888:
-		plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX;
-		break;
+		return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX;
 	case DRM_FORMAT_XRGB8888:
-		plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888;
-		break;
+		return PLANE_CTL_FORMAT_XRGB_8888;
 	/*
 	 * XXX: For ARBG/ABGR formats we default to expecting scanout buffers
 	 * to be already pre-multiplied. We need to add a knob (or a different
 	 * DRM_FORMAT) for user-space to configure that.
 	 */
 	case DRM_FORMAT_ABGR8888:
-		plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX |
+		return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX |
 			PLANE_CTL_ALPHA_SW_PREMULTIPLY;
-		break;
 	case DRM_FORMAT_ARGB8888:
-		plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888 |
+		return PLANE_CTL_FORMAT_XRGB_8888 |
 			PLANE_CTL_ALPHA_SW_PREMULTIPLY;
-		break;
 	case DRM_FORMAT_XRGB2101010:
-		plane_ctl_format = PLANE_CTL_FORMAT_XRGB_2101010;
-		break;
+		return PLANE_CTL_FORMAT_XRGB_2101010;
 	case DRM_FORMAT_XBGR2101010:
-		plane_ctl_format = PLANE_CTL_ORDER_RGBX | PLANE_CTL_FORMAT_XRGB_2101010;
-		break;
+		return PLANE_CTL_ORDER_RGBX | PLANE_CTL_FORMAT_XRGB_2101010;
 	case DRM_FORMAT_YUYV:
-		plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YUYV;
-		break;
+		return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YUYV;
 	case DRM_FORMAT_YVYU:
-		plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YVYU;
-		break;
+		return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YVYU;
 	case DRM_FORMAT_UYVY:
-		plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_UYVY;
-		break;
+		return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_UYVY;
 	case DRM_FORMAT_VYUY:
-		plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_VYUY;
-		break;
+		return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_VYUY;
 	default:
-		BUG();
+		MISSING_CASE(pixel_format);
 	}
-	return plane_ctl_format;
+
+	return 0;
 }
 
 u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
 {
-	u32 plane_ctl_tiling = 0;
 	switch (fb_modifier) {
 	case DRM_FORMAT_MOD_NONE:
 		break;
 	case I915_FORMAT_MOD_X_TILED:
-		plane_ctl_tiling = PLANE_CTL_TILED_X;
-		break;
+		return PLANE_CTL_TILED_X;
 	case I915_FORMAT_MOD_Y_TILED:
-		plane_ctl_tiling = PLANE_CTL_TILED_Y;
-		break;
+		return PLANE_CTL_TILED_Y;
 	case I915_FORMAT_MOD_Yf_TILED:
-		plane_ctl_tiling = PLANE_CTL_TILED_YF;
-		break;
+		return PLANE_CTL_TILED_YF;
 	default:
 		MISSING_CASE(fb_modifier);
 	}
-	return plane_ctl_tiling;
+
+	return 0;
 }
 
 u32 skl_plane_ctl_rotation(unsigned int rotation)
 {
-	u32 plane_ctl_rotation = 0;
 	switch (rotation) {
 	case BIT(DRM_ROTATE_0):
 		break;
+	/*
+	 * DRM_ROTATE_ is counter clockwise to stay compatible with Xrandr
+	 * while i915 HW rotation is clockwise, thats why this swapping.
+	 */
 	case BIT(DRM_ROTATE_90):
-		plane_ctl_rotation = PLANE_CTL_ROTATE_90;
-		break;
+		return PLANE_CTL_ROTATE_270;
 	case BIT(DRM_ROTATE_180):
-		plane_ctl_rotation = PLANE_CTL_ROTATE_180;
-		break;
+		return PLANE_CTL_ROTATE_180;
 	case BIT(DRM_ROTATE_270):
-		plane_ctl_rotation = PLANE_CTL_ROTATE_270;
-		break;
+		return PLANE_CTL_ROTATE_90;
 	default:
 		MISSING_CASE(rotation);
 	}
 
-	return plane_ctl_rotation;
+	return 0;
 }
 
 static void skylake_update_primary_plane(struct drm_crtc *crtc,
@@ -3115,7 +3098,7 @@ static void skylake_update_primary_plane(struct drm_crtc *crtc,
 
 	if (intel_rotation_90_or_270(rotation)) {
 		/* stride = Surface height in tiles */
-		tile_height = intel_tile_height(dev, fb->bits_per_pixel,
+		tile_height = intel_tile_height(dev, fb->pixel_format,
 						fb->modifier[0]);
 		stride = DIV_ROUND_UP(fb->height, tile_height);
 		x_offset = stride * tile_height - y - src_h;
@@ -3295,27 +3278,30 @@ void intel_finish_reset(struct drm_device *dev)
 	drm_modeset_unlock_all(dev);
 }
 
-static int
+static void
 intel_finish_fb(struct drm_framebuffer *old_fb)
 {
 	struct drm_i915_gem_object *obj = intel_fb_obj(old_fb);
-	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	bool was_interruptible = dev_priv->mm.interruptible;
 	int ret;
 
 	/* Big Hammer, we also need to ensure that any pending
 	 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
 	 * current scanout is retired before unpinning the old
-	 * framebuffer.
+	 * framebuffer. Note that we rely on userspace rendering
+	 * into the buffer attached to the pipe they are waiting
+	 * on. If not, userspace generates a GPU hang with IPEHR
+	 * point to the MI_WAIT_FOR_EVENT.
 	 *
 	 * This should only fail upon a hung GPU, in which case we
 	 * can safely continue.
 	 */
 	dev_priv->mm.interruptible = false;
-	ret = i915_gem_object_finish_gpu(obj);
+	ret = i915_gem_object_wait_rendering(obj, true);
 	dev_priv->mm.interruptible = was_interruptible;
 
-	return ret;
+	WARN_ON(ret);
 }
 
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
@@ -4182,8 +4168,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc)
 		temp &= ~(TRANS_DP_PORT_SEL_MASK |
 			  TRANS_DP_SYNC_MASK |
 			  TRANS_DP_BPC_MASK);
-		temp |= (TRANS_DP_OUTPUT_ENABLE |
-			 TRANS_DP_ENH_FRAMING);
+		temp |= TRANS_DP_OUTPUT_ENABLE;
 		temp |= bpc << 9; /* same format but at 11:9 */
 
 		if (crtc->mode.flags & DRM_MODE_FLAG_PHSYNC)
@@ -4517,9 +4502,10 @@ skl_update_scaler_users(
 	}
 
 	/* check colorkey */
-	if (intel_plane && intel_plane->ckey.flags != I915_SET_COLORKEY_NONE) {
-		DRM_DEBUG_KMS("PLANE:%d scaling with color key not allowed",
-			intel_plane->base.base.id);
+	if (WARN_ON(intel_plane &&
+		intel_plane->ckey.flags != I915_SET_COLORKEY_NONE)) {
+		DRM_DEBUG_KMS("PLANE:%d scaling %ux%u->%ux%u not allowed with colorkey",
+			intel_plane->base.base.id, src_w, src_h, dst_w, dst_h);
 		return -EINVAL;
 	}
 
@@ -4532,9 +4518,7 @@ skl_update_scaler_users(
 		case DRM_FORMAT_ABGR8888:
 		case DRM_FORMAT_ARGB8888:
 		case DRM_FORMAT_XRGB2101010:
-		case DRM_FORMAT_ARGB2101010:
 		case DRM_FORMAT_XBGR2101010:
-		case DRM_FORMAT_ABGR2101010:
 		case DRM_FORMAT_YUYV:
 		case DRM_FORMAT_YVYU:
 		case DRM_FORMAT_UYVY:
@@ -5128,13 +5112,14 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc)
 
 	ironlake_pfit_disable(intel_crtc);
 
+	if (intel_crtc->config->has_pch_encoder)
+		ironlake_fdi_disable(crtc);
+
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->post_disable)
 			encoder->post_disable(encoder);
 
 	if (intel_crtc->config->has_pch_encoder) {
-		ironlake_fdi_disable(crtc);
-
 		ironlake_disable_pch_transcoder(dev_priv, pipe);
 
 		if (HAS_PCH_CPT(dev)) {
@@ -5543,6 +5528,214 @@ void broxton_uninit_cdclk(struct drm_device *dev)
 	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
 }
 
+static const struct skl_cdclk_entry {
+	unsigned int freq;
+	unsigned int vco;
+} skl_cdclk_frequencies[] = {
+	{ .freq = 308570, .vco = 8640 },
+	{ .freq = 337500, .vco = 8100 },
+	{ .freq = 432000, .vco = 8640 },
+	{ .freq = 450000, .vco = 8100 },
+	{ .freq = 540000, .vco = 8100 },
+	{ .freq = 617140, .vco = 8640 },
+	{ .freq = 675000, .vco = 8100 },
+};
+
+static unsigned int skl_cdclk_decimal(unsigned int freq)
+{
+	return (freq - 1000) / 500;
+}
+
+static unsigned int skl_cdclk_get_vco(unsigned int freq)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(skl_cdclk_frequencies); i++) {
+		const struct skl_cdclk_entry *e = &skl_cdclk_frequencies[i];
+
+		if (e->freq == freq)
+			return e->vco;
+	}
+
+	return 8100;
+}
+
+static void
+skl_dpll0_enable(struct drm_i915_private *dev_priv, unsigned int required_vco)
+{
+	unsigned int min_freq;
+	u32 val;
+
+	/* select the minimum CDCLK before enabling DPLL 0 */
+	val = I915_READ(CDCLK_CTL);
+	val &= ~CDCLK_FREQ_SEL_MASK | ~CDCLK_FREQ_DECIMAL_MASK;
+	val |= CDCLK_FREQ_337_308;
+
+	if (required_vco == 8640)
+		min_freq = 308570;
+	else
+		min_freq = 337500;
+
+	val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_freq);
+
+	I915_WRITE(CDCLK_CTL, val);
+	POSTING_READ(CDCLK_CTL);
+
+	/*
+	 * We always enable DPLL0 with the lowest link rate possible, but still
+	 * taking into account the VCO required to operate the eDP panel at the
+	 * desired frequency. The usual DP link rates operate with a VCO of
+	 * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640.
+	 * The modeset code is responsible for the selection of the exact link
+	 * rate later on, with the constraint of choosing a frequency that
+	 * works with required_vco.
+	 */
+	val = I915_READ(DPLL_CTRL1);
+
+	val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) |
+		 DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0));
+	val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0);
+	if (required_vco == 8640)
+		val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080,
+					    SKL_DPLL0);
+	else
+		val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810,
+					    SKL_DPLL0);
+
+	I915_WRITE(DPLL_CTRL1, val);
+	POSTING_READ(DPLL_CTRL1);
+
+	I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE);
+
+	if (wait_for(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK, 5))
+		DRM_ERROR("DPLL0 not locked\n");
+}
+
+static bool skl_cdclk_pcu_ready(struct drm_i915_private *dev_priv)
+{
+	int ret;
+	u32 val;
+
+	/* inform PCU we want to change CDCLK */
+	val = SKL_CDCLK_PREPARE_FOR_CHANGE;
+	mutex_lock(&dev_priv->rps.hw_lock);
+	ret = sandybridge_pcode_read(dev_priv, SKL_PCODE_CDCLK_CONTROL, &val);
+	mutex_unlock(&dev_priv->rps.hw_lock);
+
+	return ret == 0 && (val & SKL_CDCLK_READY_FOR_CHANGE);
+}
+
+static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv)
+{
+	unsigned int i;
+
+	for (i = 0; i < 15; i++) {
+		if (skl_cdclk_pcu_ready(dev_priv))
+			return true;
+		udelay(10);
+	}
+
+	return false;
+}
+
+static void skl_set_cdclk(struct drm_i915_private *dev_priv, unsigned int freq)
+{
+	u32 freq_select, pcu_ack;
+
+	DRM_DEBUG_DRIVER("Changing CDCLK to %dKHz\n", freq);
+
+	if (!skl_cdclk_wait_for_pcu_ready(dev_priv)) {
+		DRM_ERROR("failed to inform PCU about cdclk change\n");
+		return;
+	}
+
+	/* set CDCLK_CTL */
+	switch(freq) {
+	case 450000:
+	case 432000:
+		freq_select = CDCLK_FREQ_450_432;
+		pcu_ack = 1;
+		break;
+	case 540000:
+		freq_select = CDCLK_FREQ_540;
+		pcu_ack = 2;
+		break;
+	case 308570:
+	case 337500:
+	default:
+		freq_select = CDCLK_FREQ_337_308;
+		pcu_ack = 0;
+		break;
+	case 617140:
+	case 675000:
+		freq_select = CDCLK_FREQ_675_617;
+		pcu_ack = 3;
+		break;
+	}
+
+	I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(freq));
+	POSTING_READ(CDCLK_CTL);
+
+	/* inform PCU of the change */
+	mutex_lock(&dev_priv->rps.hw_lock);
+	sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack);
+	mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
+void skl_uninit_cdclk(struct drm_i915_private *dev_priv)
+{
+	/* disable DBUF power */
+	I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST);
+	POSTING_READ(DBUF_CTL);
+
+	udelay(10);
+
+	if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE)
+		DRM_ERROR("DBuf power disable timeout\n");
+
+	/* disable DPLL0 */
+	I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE);
+	if (wait_for(!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK), 1))
+		DRM_ERROR("Couldn't disable DPLL0\n");
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+}
+
+void skl_init_cdclk(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+	unsigned int required_vco;
+
+	/* enable PCH reset handshake */
+	val = I915_READ(HSW_NDE_RSTWRN_OPT);
+	I915_WRITE(HSW_NDE_RSTWRN_OPT, val | RESET_PCH_HANDSHAKE_ENABLE);
+
+	/* enable PG1 and Misc I/O */
+	intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS);
+
+	/* DPLL0 already enabed !? */
+	if (I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE) {
+		DRM_DEBUG_DRIVER("DPLL0 already running\n");
+		return;
+	}
+
+	/* enable DPLL0 */
+	required_vco = skl_cdclk_get_vco(dev_priv->skl_boot_cdclk);
+	skl_dpll0_enable(dev_priv, required_vco);
+
+	/* set CDCLK to the frequency the BIOS chose */
+	skl_set_cdclk(dev_priv, dev_priv->skl_boot_cdclk);
+
+	/* enable DBUF power */
+	I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST);
+	POSTING_READ(DBUF_CTL);
+
+	udelay(10);
+
+	if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE))
+		DRM_ERROR("DBuf power enable timeout\n");
+}
+
 /* returns HPLL frequency in kHz */
 static int valleyview_get_vco(struct drm_i915_private *dev_priv)
 {
@@ -7477,6 +7670,9 @@ static int i9xx_crtc_compute_clock(struct intel_crtc *crtc,
 	struct drm_connector_state *connector_state;
 	int i;
 
+	memset(&crtc_state->dpll_hw_state, 0,
+	       sizeof(crtc_state->dpll_hw_state));
+
 	for_each_connector_in_state(state, connector, connector_state, i) {
 		if (connector_state->crtc != &crtc->base)
 			continue;
@@ -8518,6 +8714,9 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
 	bool is_lvds = false;
 	struct intel_shared_dpll *pll;
 
+	memset(&crtc_state->dpll_hw_state, 0,
+	       sizeof(crtc_state->dpll_hw_state));
+
 	is_lvds = intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS);
 
 	WARN(!(HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)),
@@ -9353,6 +9552,12 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 	}
 
 	pfit_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
+
+	if (INTEL_INFO(dev)->gen >= 9) {
+		pipe_config->scaler_state.scaler_id = -1;
+		pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX);
+	}
+
 	if (intel_display_power_is_enabled(dev_priv, pfit_domain)) {
 		if (INTEL_INFO(dev)->gen == 9)
 			skylake_get_pfit_config(crtc, pipe_config);
@@ -9360,10 +9565,6 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 			ironlake_get_pfit_config(crtc, pipe_config);
 		else
 			MISSING_CASE(INTEL_INFO(dev)->gen);
-
-	} else {
-		pipe_config->scaler_state.scaler_id = -1;
-		pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX);
 	}
 
 	if (IS_HASWELL(dev))
@@ -9868,7 +10069,7 @@ retry:
 		goto fail;
 	}
 
-	crtc_state->base.enable = true;
+	crtc_state->base.active = crtc_state->base.enable = true;
 
 	if (!mode)
 		mode = &load_detect_mode;
@@ -9965,7 +10166,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 		connector_state->best_encoder = NULL;
 		connector_state->crtc = NULL;
 
-		crtc_state->base.enable = false;
+		crtc_state->base.enable = crtc_state->base.active = false;
 
 		ret = intel_modeset_setup_plane_state(state, crtc, NULL, NULL,
 						      0, 0);
@@ -10690,7 +10891,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
 	else if (i915.enable_execlists)
 		return true;
 	else
-		return ring != i915_gem_request_get_ring(obj->last_read_req);
+		return ring != i915_gem_request_get_ring(obj->last_write_req);
 }
 
 static void skl_do_mmio_flip(struct intel_crtc *intel_crtc)
@@ -10790,22 +10991,19 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
 
 static void intel_mmio_flip_work_func(struct work_struct *work)
 {
-	struct intel_crtc *crtc =
-		container_of(work, struct intel_crtc, mmio_flip.work);
-	struct intel_mmio_flip *mmio_flip;
+	struct intel_mmio_flip *mmio_flip =
+		container_of(work, struct intel_mmio_flip, work);
 
-	mmio_flip = &crtc->mmio_flip;
 	if (mmio_flip->req)
 		WARN_ON(__i915_wait_request(mmio_flip->req,
-					    crtc->reset_counter,
-					    false, NULL, NULL) != 0);
+					    mmio_flip->crtc->reset_counter,
+					    false, NULL,
+					    &mmio_flip->i915->rps.mmioflips));
 
-	intel_do_mmio_flip(crtc);
-	if (mmio_flip->req) {
-		mutex_lock(&crtc->base.dev->struct_mutex);
-		i915_gem_request_assign(&mmio_flip->req, NULL);
-		mutex_unlock(&crtc->base.dev->struct_mutex);
-	}
+	intel_do_mmio_flip(mmio_flip->crtc);
+
+	i915_gem_request_unreference__unlocked(mmio_flip->req);
+	kfree(mmio_flip);
 }
 
 static int intel_queue_mmio_flip(struct drm_device *dev,
@@ -10815,12 +11013,18 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
 				 struct intel_engine_cs *ring,
 				 uint32_t flags)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_mmio_flip *mmio_flip;
+
+	mmio_flip = kmalloc(sizeof(*mmio_flip), GFP_KERNEL);
+	if (mmio_flip == NULL)
+		return -ENOMEM;
 
-	i915_gem_request_assign(&intel_crtc->mmio_flip.req,
-				obj->last_write_req);
+	mmio_flip->i915 = to_i915(dev);
+	mmio_flip->req = i915_gem_request_reference(obj->last_write_req);
+	mmio_flip->crtc = to_intel_crtc(crtc);
 
-	schedule_work(&intel_crtc->mmio_flip.work);
+	INIT_WORK(&mmio_flip->work, intel_mmio_flip_work_func);
+	schedule_work(&mmio_flip->work);
 
 	return 0;
 }
@@ -11005,7 +11209,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
 		ring = &dev_priv->ring[BCS];
 	} else if (INTEL_INFO(dev)->gen >= 7) {
-		ring = i915_gem_request_get_ring(obj->last_read_req);
+		ring = i915_gem_request_get_ring(obj->last_write_req);
 		if (ring == NULL || ring->id != RCS)
 			ring = &dev_priv->ring[BCS];
 	} else {
@@ -11021,7 +11225,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	 */
 	ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
 					 crtc->primary->state,
-					 mmio_flip ? i915_gem_request_get_ring(obj->last_read_req) : ring);
+					 mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring);
 	if (ret)
 		goto cleanup_pending;
 
@@ -11037,6 +11241,12 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 		i915_gem_request_assign(&work->flip_queued_req,
 					obj->last_write_req);
 	} else {
+		if (obj->last_write_req) {
+			ret = i915_gem_check_olr(obj->last_write_req);
+			if (ret)
+				goto cleanup_unpin;
+		}
+
 		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring,
 						   page_flip_flags);
 		if (ret)
@@ -11303,9 +11513,10 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
 	DRM_DEBUG_KMS("port clock: %d\n", pipe_config->port_clock);
 	DRM_DEBUG_KMS("pipe src size: %dx%d\n",
 		      pipe_config->pipe_src_w, pipe_config->pipe_src_h);
-	DRM_DEBUG_KMS("num_scalers: %d\n", crtc->num_scalers);
-	DRM_DEBUG_KMS("scaler_users: 0x%x\n", pipe_config->scaler_state.scaler_users);
-	DRM_DEBUG_KMS("scaler id: %d\n", pipe_config->scaler_state.scaler_id);
+	DRM_DEBUG_KMS("num_scalers: %d, scaler_users: 0x%x, scaler_id: %d\n",
+		      crtc->num_scalers,
+		      pipe_config->scaler_state.scaler_users,
+		      pipe_config->scaler_state.scaler_id);
 	DRM_DEBUG_KMS("gmch pfit: control: 0x%08x, ratios: 0x%08x, lvds border: 0x%08x\n",
 		      pipe_config->gmch_pfit.control,
 		      pipe_config->gmch_pfit.pgm_ratios,
@@ -11317,6 +11528,39 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
 	DRM_DEBUG_KMS("ips: %i\n", pipe_config->ips_enabled);
 	DRM_DEBUG_KMS("double wide: %i\n", pipe_config->double_wide);
 
+	if (IS_BROXTON(dev)) {
+		DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: ebb0: 0x%x, "
+			      "pll0: 0x%x, pll1: 0x%x, pll2: 0x%x, pll3: 0x%x, "
+			      "pll6: 0x%x, pll8: 0x%x, pcsdw12: 0x%x\n",
+			      pipe_config->ddi_pll_sel,
+			      pipe_config->dpll_hw_state.ebb0,
+			      pipe_config->dpll_hw_state.pll0,
+			      pipe_config->dpll_hw_state.pll1,
+			      pipe_config->dpll_hw_state.pll2,
+			      pipe_config->dpll_hw_state.pll3,
+			      pipe_config->dpll_hw_state.pll6,
+			      pipe_config->dpll_hw_state.pll8,
+			      pipe_config->dpll_hw_state.pcsdw12);
+	} else if (IS_SKYLAKE(dev)) {
+		DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: "
+			      "ctrl1: 0x%x, cfgcr1: 0x%x, cfgcr2: 0x%x\n",
+			      pipe_config->ddi_pll_sel,
+			      pipe_config->dpll_hw_state.ctrl1,
+			      pipe_config->dpll_hw_state.cfgcr1,
+			      pipe_config->dpll_hw_state.cfgcr2);
+	} else if (HAS_DDI(dev)) {
+		DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: wrpll: 0x%x\n",
+			      pipe_config->ddi_pll_sel,
+			      pipe_config->dpll_hw_state.wrpll);
+	} else {
+		DRM_DEBUG_KMS("dpll_hw_state: dpll: 0x%x, dpll_md: 0x%x, "
+			      "fp0: 0x%x, fp1: 0x%x\n",
+			      pipe_config->dpll_hw_state.dpll,
+			      pipe_config->dpll_hw_state.dpll_md,
+			      pipe_config->dpll_hw_state.fp0,
+			      pipe_config->dpll_hw_state.fp1);
+	}
+
 	DRM_DEBUG_KMS("planes on this crtc\n");
 	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
 		intel_plane = to_intel_plane(plane);
@@ -11454,12 +11698,18 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
 	struct intel_crtc_scaler_state scaler_state;
 	struct intel_dpll_hw_state dpll_hw_state;
 	enum intel_dpll_id shared_dpll;
+	uint32_t ddi_pll_sel;
+
+	/* FIXME: before the switch to atomic started, a new pipe_config was
+	 * kzalloc'd. Code that depends on any field being zero should be
+	 * fixed, so that the crtc_state can be safely duplicated. For now,
+	 * only fields that are know to not cause problems are preserved. */
 
-	/* Clear only the intel specific part of the crtc state excluding scalers */
 	tmp_state = crtc_state->base;
 	scaler_state = crtc_state->scaler_state;
 	shared_dpll = crtc_state->shared_dpll;
 	dpll_hw_state = crtc_state->dpll_hw_state;
+	ddi_pll_sel = crtc_state->ddi_pll_sel;
 
 	memset(crtc_state, 0, sizeof *crtc_state);
 
@@ -11467,6 +11717,7 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
 	crtc_state->scaler_state = scaler_state;
 	crtc_state->shared_dpll = shared_dpll;
 	crtc_state->dpll_hw_state = dpll_hw_state;
+	crtc_state->ddi_pll_sel = ddi_pll_sel;
 }
 
 static int
@@ -12264,8 +12515,6 @@ static int __intel_set_mode_setup_plls(struct drm_atomic_state *state)
 		if (needs_modeset(crtc_state)) {
 			clear_pipes |= 1 << intel_crtc->pipe;
 			intel_crtc_state->shared_dpll = DPLL_ID_PRIVATE;
-			memset(&intel_crtc_state->dpll_hw_state, 0,
-			       sizeof(intel_crtc_state->dpll_hw_state));
 		}
 	}
 
@@ -12342,7 +12591,6 @@ static int __intel_set_mode(struct drm_crtc *modeset_crtc,
 			continue;
 
 		if (!crtc_state->enable) {
-			crtc_state->active = false;
 			intel_crtc_disable(crtc);
 		} else if (crtc->state->enable) {
 			intel_crtc_disable_planes(crtc);
@@ -12492,7 +12740,8 @@ void intel_crtc_restore_mode(struct drm_crtc *crtc)
 			continue;
 		}
 
-		crtc_state->base.enable = intel_crtc->new_enabled;
+		crtc_state->base.active = crtc_state->base.enable =
+			intel_crtc->new_enabled;
 
 		if (&intel_crtc->base == crtc)
 			drm_mode_copy(&crtc_state->base.mode, &crtc->mode);
@@ -12617,11 +12866,16 @@ intel_modeset_stage_output_state(struct drm_device *dev,
 	}
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		bool has_connectors;
+
 		ret = drm_atomic_add_affected_connectors(state, crtc);
 		if (ret)
 			return ret;
 
-		crtc_state->enable = drm_atomic_connectors_for_crtc(state, crtc);
+		has_connectors = !!drm_atomic_connectors_for_crtc(state, crtc);
+		if (has_connectors != crtc_state->enable)
+			crtc_state->enable =
+			crtc_state->active = has_connectors;
 	}
 
 	ret = intel_modeset_setup_plane_state(state, set->crtc, set->mode,
@@ -13008,8 +13262,11 @@ intel_check_primary_plane(struct drm_plane *plane,
 		intel_atomic_get_crtc_state(state->base.state, intel_crtc) : NULL;
 
 	if (INTEL_INFO(dev)->gen >= 9) {
-		min_scale = 1;
-		max_scale = skl_max_scale(intel_crtc, crtc_state);
+		/* use scaler when colorkey is not required */
+		if (to_intel_plane(plane)->ckey.flags == I915_SET_COLORKEY_NONE) {
+			min_scale = 1;
+			max_scale = skl_max_scale(intel_crtc, crtc_state);
+		}
 		can_position = true;
 	}
 
@@ -13251,8 +13508,8 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev,
 	primary->max_downscale = 1;
 	if (INTEL_INFO(dev)->gen >= 9) {
 		primary->can_scale = true;
+		state->scaler_id = -1;
 	}
-	state->scaler_id = -1;
 	primary->pipe = pipe;
 	primary->plane = pipe;
 	primary->check_plane = intel_check_primary_plane;
@@ -13262,12 +13519,15 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev,
 	if (HAS_FBC(dev) && INTEL_INFO(dev)->gen < 4)
 		primary->plane = !pipe;
 
-	if (INTEL_INFO(dev)->gen <= 3) {
-		intel_primary_formats = intel_primary_formats_gen2;
-		num_formats = ARRAY_SIZE(intel_primary_formats_gen2);
+	if (INTEL_INFO(dev)->gen >= 9) {
+		intel_primary_formats = skl_primary_formats;
+		num_formats = ARRAY_SIZE(skl_primary_formats);
+	} else if (INTEL_INFO(dev)->gen >= 4) {
+		intel_primary_formats = i965_primary_formats;
+		num_formats = ARRAY_SIZE(i965_primary_formats);
 	} else {
-		intel_primary_formats = intel_primary_formats_gen4;
-		num_formats = ARRAY_SIZE(intel_primary_formats_gen4);
+		intel_primary_formats = i8xx_primary_formats;
+		num_formats = ARRAY_SIZE(i8xx_primary_formats);
 	}
 
 	drm_universal_plane_init(dev, &primary->base, 0,
@@ -13434,7 +13694,6 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev,
 	cursor->max_downscale = 1;
 	cursor->pipe = pipe;
 	cursor->plane = pipe;
-	state->scaler_id = -1;
 	cursor->check_plane = intel_check_cursor_plane;
 	cursor->commit_plane = intel_commit_cursor_plane;
 	cursor->disable_plane = intel_disable_cursor_plane;
@@ -13457,6 +13716,9 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev,
 				state->base.rotation);
 	}
 
+	if (INTEL_INFO(dev)->gen >=9)
+		state->scaler_id = -1;
+
 	drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs);
 
 	return &cursor->base;
@@ -13550,8 +13812,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
 	dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = &intel_crtc->base;
 	dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = &intel_crtc->base;
 
-	INIT_WORK(&intel_crtc->mmio_flip.work, intel_mmio_flip_work_func);
-
 	drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
 
 	WARN_ON(drm_crtc_index(&intel_crtc->base) != intel_crtc->pipe);
@@ -13948,25 +14208,35 @@ static int intel_framebuffer_init(struct drm_device *dev,
 	case DRM_FORMAT_ARGB8888:
 		break;
 	case DRM_FORMAT_XRGB1555:
-	case DRM_FORMAT_ARGB1555:
 		if (INTEL_INFO(dev)->gen > 3) {
 			DRM_DEBUG("unsupported pixel format: %s\n",
 				  drm_get_format_name(mode_cmd->pixel_format));
 			return -EINVAL;
 		}
 		break;
-	case DRM_FORMAT_XBGR8888:
 	case DRM_FORMAT_ABGR8888:
+		if (!IS_VALLEYVIEW(dev) && INTEL_INFO(dev)->gen < 9) {
+			DRM_DEBUG("unsupported pixel format: %s\n",
+				  drm_get_format_name(mode_cmd->pixel_format));
+			return -EINVAL;
+		}
+		break;
+	case DRM_FORMAT_XBGR8888:
 	case DRM_FORMAT_XRGB2101010:
-	case DRM_FORMAT_ARGB2101010:
 	case DRM_FORMAT_XBGR2101010:
-	case DRM_FORMAT_ABGR2101010:
 		if (INTEL_INFO(dev)->gen < 4) {
 			DRM_DEBUG("unsupported pixel format: %s\n",
 				  drm_get_format_name(mode_cmd->pixel_format));
 			return -EINVAL;
 		}
 		break;
+	case DRM_FORMAT_ABGR2101010:
+		if (!IS_VALLEYVIEW(dev)) {
+			DRM_DEBUG("unsupported pixel format: %s\n",
+				  drm_get_format_name(mode_cmd->pixel_format));
+			return -EINVAL;
+		}
+		break;
 	case DRM_FORMAT_YUYV:
 	case DRM_FORMAT_UYVY:
 	case DRM_FORMAT_YVYU:
@@ -14595,6 +14865,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 
 		WARN_ON(crtc->active);
 		crtc->base.state->enable = false;
+		crtc->base.state->active = false;
 		crtc->base.enabled = false;
 	}
 
@@ -14623,6 +14894,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 			      crtc->active ? "enabled" : "disabled");
 
 		crtc->base.state->enable = crtc->active;
+		crtc->base.state->active = crtc->active;
 		crtc->base.enabled = crtc->active;
 
 		/* Because we only establish the connector -> encoder ->
@@ -14761,6 +15033,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 								 crtc->config);
 
 		crtc->base.state->enable = crtc->active;
+		crtc->base.state->active = crtc->active;
 		crtc->base.enabled = crtc->active;
 
 		plane_state = to_intel_plane_state(primary->state);

+ 67 - 68
drivers/gpu/drm/i915/intel_dp.c

@@ -1097,6 +1097,9 @@ skl_edp_set_pll_config(struct intel_crtc_state *pipe_config, int link_clock)
 {
 	u32 ctrl1;
 
+	memset(&pipe_config->dpll_hw_state, 0,
+	       sizeof(pipe_config->dpll_hw_state));
+
 	pipe_config->ddi_pll_sel = SKL_DPLL0;
 	pipe_config->dpll_hw_state.cfgcr1 = 0;
 	pipe_config->dpll_hw_state.cfgcr2 = 0;
@@ -1266,7 +1269,7 @@ static void snprintf_int_array(char *str, size_t len,
 	str[0] = '\0';
 
 	for (i = 0; i < nelem; i++) {
-		int r = snprintf(str, len, "%d,", array[i]);
+		int r = snprintf(str, len, "%s%d", i ? ", " : "", array[i]);
 		if (r >= len)
 			return;
 		str += r;
@@ -1567,7 +1570,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder)
 
 	/* Split out the IBX/CPU vs CPT settings */
 
-	if (port == PORT_A && IS_GEN7(dev) && !IS_VALLEYVIEW(dev)) {
+	if (IS_GEN7(dev) && port == PORT_A) {
 		if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
 			intel_dp->DP |= DP_SYNC_HS_HIGH;
 		if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
@@ -1578,7 +1581,18 @@ static void intel_dp_prepare(struct intel_encoder *encoder)
 			intel_dp->DP |= DP_ENHANCED_FRAMING;
 
 		intel_dp->DP |= crtc->pipe << 29;
-	} else if (!HAS_PCH_CPT(dev) || port == PORT_A) {
+	} else if (HAS_PCH_CPT(dev) && port != PORT_A) {
+		u32 trans_dp;
+
+		intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT;
+
+		trans_dp = I915_READ(TRANS_DP_CTL(crtc->pipe));
+		if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
+			trans_dp |= TRANS_DP_ENH_FRAMING;
+		else
+			trans_dp &= ~TRANS_DP_ENH_FRAMING;
+		I915_WRITE(TRANS_DP_CTL(crtc->pipe), trans_dp);
+	} else {
 		if (!HAS_PCH_SPLIT(dev) && !IS_VALLEYVIEW(dev))
 			intel_dp->DP |= intel_dp->color_range;
 
@@ -1591,14 +1605,10 @@ static void intel_dp_prepare(struct intel_encoder *encoder)
 		if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
 			intel_dp->DP |= DP_ENHANCED_FRAMING;
 
-		if (!IS_CHERRYVIEW(dev)) {
-			if (crtc->pipe == 1)
-				intel_dp->DP |= DP_PIPEB_SELECT;
-		} else {
+		if (IS_CHERRYVIEW(dev))
 			intel_dp->DP |= DP_PIPE_SELECT_CHV(crtc->pipe);
-		}
-	} else {
-		intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT;
+		else if (crtc->pipe == PIPE_B)
+			intel_dp->DP |= DP_PIPEB_SELECT;
 	}
 }
 
@@ -2182,41 +2192,25 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
 	if (!(tmp & DP_PORT_EN))
 		return false;
 
-	if (port == PORT_A && IS_GEN7(dev) && !IS_VALLEYVIEW(dev)) {
+	if (IS_GEN7(dev) && port == PORT_A) {
 		*pipe = PORT_TO_PIPE_CPT(tmp);
-	} else if (IS_CHERRYVIEW(dev)) {
-		*pipe = DP_PORT_TO_PIPE_CHV(tmp);
-	} else if (!HAS_PCH_CPT(dev) || port == PORT_A) {
-		*pipe = PORT_TO_PIPE(tmp);
-	} else {
-		u32 trans_sel;
-		u32 trans_dp;
-		int i;
-
-		switch (intel_dp->output_reg) {
-		case PCH_DP_B:
-			trans_sel = TRANS_DP_PORT_SEL_B;
-			break;
-		case PCH_DP_C:
-			trans_sel = TRANS_DP_PORT_SEL_C;
-			break;
-		case PCH_DP_D:
-			trans_sel = TRANS_DP_PORT_SEL_D;
-			break;
-		default:
-			return true;
-		}
+	} else if (HAS_PCH_CPT(dev) && port != PORT_A) {
+		enum pipe p;
 
-		for_each_pipe(dev_priv, i) {
-			trans_dp = I915_READ(TRANS_DP_CTL(i));
-			if ((trans_dp & TRANS_DP_PORT_SEL_MASK) == trans_sel) {
-				*pipe = i;
+		for_each_pipe(dev_priv, p) {
+			u32 trans_dp = I915_READ(TRANS_DP_CTL(p));
+			if (TRANS_DP_PIPE_TO_PORT(trans_dp) == port) {
+				*pipe = p;
 				return true;
 			}
 		}
 
 		DRM_DEBUG_KMS("No pipe for dp port 0x%x found\n",
 			      intel_dp->output_reg);
+	} else if (IS_CHERRYVIEW(dev)) {
+		*pipe = DP_PORT_TO_PIPE_CHV(tmp);
+	} else {
+		*pipe = PORT_TO_PIPE(tmp);
 	}
 
 	return true;
@@ -2237,24 +2231,24 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
 
 	pipe_config->has_audio = tmp & DP_AUDIO_OUTPUT_ENABLE && port != PORT_A;
 
-	if ((port == PORT_A) || !HAS_PCH_CPT(dev)) {
-		if (tmp & DP_SYNC_HS_HIGH)
+	if (HAS_PCH_CPT(dev) && port != PORT_A) {
+		tmp = I915_READ(TRANS_DP_CTL(crtc->pipe));
+		if (tmp & TRANS_DP_HSYNC_ACTIVE_HIGH)
 			flags |= DRM_MODE_FLAG_PHSYNC;
 		else
 			flags |= DRM_MODE_FLAG_NHSYNC;
 
-		if (tmp & DP_SYNC_VS_HIGH)
+		if (tmp & TRANS_DP_VSYNC_ACTIVE_HIGH)
 			flags |= DRM_MODE_FLAG_PVSYNC;
 		else
 			flags |= DRM_MODE_FLAG_NVSYNC;
 	} else {
-		tmp = I915_READ(TRANS_DP_CTL(crtc->pipe));
-		if (tmp & TRANS_DP_HSYNC_ACTIVE_HIGH)
+		if (tmp & DP_SYNC_HS_HIGH)
 			flags |= DRM_MODE_FLAG_PHSYNC;
 		else
 			flags |= DRM_MODE_FLAG_NHSYNC;
 
-		if (tmp & TRANS_DP_VSYNC_ACTIVE_HIGH)
+		if (tmp & DP_SYNC_VS_HIGH)
 			flags |= DRM_MODE_FLAG_PVSYNC;
 		else
 			flags |= DRM_MODE_FLAG_NVSYNC;
@@ -2419,7 +2413,8 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp,
 		}
 		I915_WRITE(DP_TP_CTL(port), temp);
 
-	} else if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || port != PORT_A)) {
+	} else if ((IS_GEN7(dev) && port == PORT_A) ||
+		   (HAS_PCH_CPT(dev) && port != PORT_A)) {
 		*DP &= ~DP_LINK_TRAIN_MASK_CPT;
 
 		switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) {
@@ -3848,6 +3843,7 @@ static void
 intel_dp_link_down(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc);
 	enum port port = intel_dig_port->port;
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3861,36 +3857,41 @@ intel_dp_link_down(struct intel_dp *intel_dp)
 
 	DRM_DEBUG_KMS("\n");
 
-	if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || port != PORT_A)) {
+	if ((IS_GEN7(dev) && port == PORT_A) ||
+	    (HAS_PCH_CPT(dev) && port != PORT_A)) {
 		DP &= ~DP_LINK_TRAIN_MASK_CPT;
-		I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE_CPT);
+		DP |= DP_LINK_TRAIN_PAT_IDLE_CPT;
 	} else {
 		if (IS_CHERRYVIEW(dev))
 			DP &= ~DP_LINK_TRAIN_MASK_CHV;
 		else
 			DP &= ~DP_LINK_TRAIN_MASK;
-		I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE);
+		DP |= DP_LINK_TRAIN_PAT_IDLE;
 	}
+	I915_WRITE(intel_dp->output_reg, DP);
 	POSTING_READ(intel_dp->output_reg);
 
-	if (HAS_PCH_IBX(dev) &&
-	    I915_READ(intel_dp->output_reg) & DP_PIPEB_SELECT) {
-		/* Hardware workaround: leaving our transcoder select
-		 * set to transcoder B while it's off will prevent the
-		 * corresponding HDMI output on transcoder A.
-		 *
-		 * Combine this with another hardware workaround:
-		 * transcoder select bit can only be cleared while the
-		 * port is enabled.
-		 */
-		DP &= ~DP_PIPEB_SELECT;
+	DP &= ~(DP_PORT_EN | DP_AUDIO_OUTPUT_ENABLE);
+	I915_WRITE(intel_dp->output_reg, DP);
+	POSTING_READ(intel_dp->output_reg);
+
+	/*
+	 * HW workaround for IBX, we need to move the port
+	 * to transcoder A after disabling it to allow the
+	 * matching HDMI port to be enabled on transcoder A.
+	 */
+	if (HAS_PCH_IBX(dev) && crtc->pipe == PIPE_B && port != PORT_A) {
+		/* always enable with pattern 1 (as per spec) */
+		DP &= ~(DP_PIPEB_SELECT | DP_LINK_TRAIN_MASK);
+		DP |= DP_PORT_EN | DP_LINK_TRAIN_PAT_1;
+		I915_WRITE(intel_dp->output_reg, DP);
+		POSTING_READ(intel_dp->output_reg);
+
+		DP &= ~DP_PORT_EN;
 		I915_WRITE(intel_dp->output_reg, DP);
 		POSTING_READ(intel_dp->output_reg);
 	}
 
-	DP &= ~DP_AUDIO_OUTPUT_ENABLE;
-	I915_WRITE(intel_dp->output_reg, DP & ~DP_PORT_EN);
-	POSTING_READ(intel_dp->output_reg);
 	msleep(intel_dp->panel_power_down_delay);
 }
 
@@ -4142,7 +4143,7 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp)
 		if (!drm_dp_dpcd_write(&intel_dp->aux,
 					DP_TEST_EDID_CHECKSUM,
 					&intel_connector->detect_edid->checksum,
-					1));
+					1))
 			DRM_DEBUG_KMS("Failed to write EDID checksum\n");
 
 		test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE;
@@ -5814,12 +5815,10 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 	intel_dp_aux_init(intel_dp, intel_connector);
 
 	/* init MST on ports that can support it */
-	if (IS_HASWELL(dev) || IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9) {
-		if (port == PORT_B || port == PORT_C || port == PORT_D) {
-			intel_dp_mst_encoder_init(intel_dig_port,
-						  intel_connector->base.base.id);
-		}
-	}
+	if (HAS_DP_MST(dev) &&
+	    (port == PORT_B || port == PORT_C || port == PORT_D))
+		intel_dp_mst_encoder_init(intel_dig_port,
+					  intel_connector->base.base.id);
 
 	if (!intel_edp_init_connector(intel_dp, intel_connector)) {
 		drm_dp_aux_unregister(&intel_dp->aux);

+ 16 - 7
drivers/gpu/drm/i915/intel_drv.h

@@ -459,8 +459,10 @@ struct intel_pipe_wm {
 };
 
 struct intel_mmio_flip {
-	struct drm_i915_gem_request *req;
 	struct work_struct work;
+	struct drm_i915_private *i915;
+	struct drm_i915_gem_request *req;
+	struct intel_crtc *crtc;
 };
 
 struct skl_pipe_wm {
@@ -544,7 +546,6 @@ struct intel_crtc {
 	} wm;
 
 	int scanline_offset;
-	struct intel_mmio_flip mmio_flip;
 
 	struct intel_crtc_atomic_commit atomic;
 
@@ -555,7 +556,15 @@ struct intel_crtc {
 struct intel_plane_wm_parameters {
 	uint32_t horiz_pixels;
 	uint32_t vert_pixels;
+	/*
+	 *   For packed pixel formats:
+	 *     bytes_per_pixel - holds bytes per pixel
+	 *   For planar pixel formats:
+	 *     bytes_per_pixel - holds bytes per pixel for uv-plane
+	 *     y_bytes_per_pixel - holds bytes per pixel for y-plane
+	 */
 	uint8_t bytes_per_pixel;
+	uint8_t y_bytes_per_pixel;
 	bool enabled;
 	bool scaled;
 	u64 tiling;
@@ -1059,9 +1068,6 @@ intel_rotation_90_or_270(unsigned int rotation)
 	return rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270));
 }
 
-unsigned int
-intel_tile_height(struct drm_device *dev, uint32_t bits_per_pixel,
-		  uint64_t fb_modifier);
 void intel_create_rotation_property(struct drm_device *dev,
 					struct intel_plane *plane);
 
@@ -1112,6 +1118,8 @@ void broxton_ddi_phy_init(struct drm_device *dev);
 void broxton_ddi_phy_uninit(struct drm_device *dev);
 void bxt_enable_dc9(struct drm_i915_private *dev_priv);
 void bxt_disable_dc9(struct drm_i915_private *dev_priv);
+void skl_init_cdclk(struct drm_i915_private *dev_priv);
+void skl_uninit_cdclk(struct drm_i915_private *dev_priv);
 void intel_dp_get_m_n(struct intel_crtc *crtc,
 		      struct intel_crtc_state *pipe_config);
 void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n);
@@ -1359,9 +1367,10 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv);
 void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
 void gen6_rps_boost(struct drm_i915_private *dev_priv,
-		    struct drm_i915_file_private *file_priv);
+		    struct intel_rps_client *rps,
+		    unsigned long submitted);
 void intel_queue_rps_boost_for_request(struct drm_device *dev,
-				       struct drm_i915_gem_request *rq);
+				       struct drm_i915_gem_request *req);
 void ilk_wm_get_hw_state(struct drm_device *dev);
 void skl_wm_get_hw_state(struct drm_device *dev);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,

+ 26 - 40
drivers/gpu/drm/i915/intel_dsi_pll.c

@@ -162,59 +162,41 @@ static u32 dsi_clk_from_pclk(u32 pclk, int pixel_format, int lane_count)
 
 #endif
 
-static int dsi_calc_mnp(u32 dsi_clk, struct dsi_mnp *dsi_mnp)
+static int dsi_calc_mnp(int target_dsi_clk, struct dsi_mnp *dsi_mnp)
 {
-	u32 m, n, p;
-	u32 ref_clk;
-	u32 error;
-	u32 tmp_error;
-	int target_dsi_clk;
-	int calc_dsi_clk;
-	u32 calc_m;
-	u32 calc_p;
+	unsigned int calc_m = 0, calc_p = 0;
+	unsigned int m, n = 1, p;
+	int ref_clk = 25000;
+	int delta = target_dsi_clk;
 	u32 m_seed;
 
-	/* dsi_clk is expected in KHZ */
-	if (dsi_clk < 300000 || dsi_clk > 1150000) {
+	/* target_dsi_clk is expected in kHz */
+	if (target_dsi_clk < 300000 || target_dsi_clk > 1150000) {
 		DRM_ERROR("DSI CLK Out of Range\n");
 		return -ECHRNG;
 	}
 
-	ref_clk = 25000;
-	target_dsi_clk = dsi_clk;
-	error = 0xFFFFFFFF;
-	tmp_error = 0xFFFFFFFF;
-	calc_m = 0;
-	calc_p = 0;
-
-	for (m = 62; m <= 92; m++) {
-		for (p = 2; p <= 6; p++) {
-			/* Find the optimal m and p divisors
-			   with minimal error +/- the required clock */
-			calc_dsi_clk = (m * ref_clk) / p;
-			if (calc_dsi_clk == target_dsi_clk) {
-				calc_m = m;
-				calc_p = p;
-				error = 0;
-				break;
-			} else
-				tmp_error = abs(target_dsi_clk - calc_dsi_clk);
-
-			if (tmp_error < error) {
-				error = tmp_error;
+	for (m = 62; m <= 92 && delta; m++) {
+		for (p = 2; p <= 6 && delta; p++) {
+			/*
+			 * Find the optimal m and p divisors with minimal delta
+			 * +/- the required clock
+			 */
+			int calc_dsi_clk = (m * ref_clk) / (p * n);
+			int d = abs(target_dsi_clk - calc_dsi_clk);
+			if (d < delta) {
+				delta = d;
 				calc_m = m;
 				calc_p = p;
 			}
 		}
-
-		if (error == 0)
-			break;
 	}
 
+	/* register has log2(N1), this works fine for powers of two */
+	n = ffs(n) - 1;
 	m_seed = lfsr_converts[calc_m - 62];
-	n = 1;
 	dsi_mnp->dsi_pll_ctrl = 1 << (DSI_PLL_P1_POST_DIV_SHIFT + calc_p - 2);
-	dsi_mnp->dsi_pll_div = (n - 1) << DSI_PLL_N1_DIV_SHIFT |
+	dsi_mnp->dsi_pll_div = n << DSI_PLL_N1_DIV_SHIFT |
 		m_seed << DSI_PLL_M1_DIV_SHIFT;
 
 	return 0;
@@ -331,7 +313,7 @@ u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp)
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	u32 dsi_clock, pclk;
 	u32 pll_ctl, pll_div;
-	u32 m = 0, p = 0;
+	u32 m = 0, p = 0, n;
 	int refclk = 25000;
 	int i;
 
@@ -346,6 +328,10 @@ u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp)
 	pll_ctl &= DSI_PLL_P1_POST_DIV_MASK;
 	pll_ctl = pll_ctl >> (DSI_PLL_P1_POST_DIV_SHIFT - 2);
 
+	/* N1 divisor */
+	n = (pll_div & DSI_PLL_N1_DIV_MASK) >> DSI_PLL_N1_DIV_SHIFT;
+	n = 1 << n; /* register has log2(N1) */
+
 	/* mask out the other bits and extract the M1 divisor */
 	pll_div &= DSI_PLL_M1_DIV_MASK;
 	pll_div = pll_div >> DSI_PLL_M1_DIV_SHIFT;
@@ -373,7 +359,7 @@ u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp)
 
 	m = i + 62;
 
-	dsi_clock = (m * refclk) / p;
+	dsi_clock = (m * refclk) / (p * n);
 
 	/* pixel_format and pipe_bpp should agree */
 	assert_bpp_mismatch(intel_dsi->pixel_format, pipe_bpp);

+ 47 - 42
drivers/gpu/drm/i915/intel_hdmi.c

@@ -873,59 +873,59 @@ static void intel_disable_hdmi(struct intel_encoder *encoder)
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
 	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
 	u32 temp;
-	u32 enable_bits = SDVO_ENABLE | SDVO_AUDIO_ENABLE;
-
-	if (crtc->config->has_audio)
-		intel_audio_codec_disable(encoder);
 
 	temp = I915_READ(intel_hdmi->hdmi_reg);
 
-	/* HW workaround for IBX, we need to move the port to transcoder A
-	 * before disabling it. */
-	if (HAS_PCH_IBX(dev)) {
-		struct drm_crtc *crtc = encoder->base.crtc;
-		int pipe = crtc ? to_intel_crtc(crtc)->pipe : -1;
-
-		if (temp & SDVO_PIPE_B_SELECT) {
-			temp &= ~SDVO_PIPE_B_SELECT;
-			I915_WRITE(intel_hdmi->hdmi_reg, temp);
-			POSTING_READ(intel_hdmi->hdmi_reg);
-
-			/* Again we need to write this twice. */
-			I915_WRITE(intel_hdmi->hdmi_reg, temp);
-			POSTING_READ(intel_hdmi->hdmi_reg);
-
-			/* Transcoder selection bits only update
-			 * effectively on vblank. */
-			if (crtc)
-				intel_wait_for_vblank(dev, pipe);
-			else
-				msleep(50);
-		}
-	}
-
-	/* HW workaround, need to toggle enable bit off and on for 12bpc, but
-	 * we do this anyway which shows more stable in testing.
-	 */
-	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(intel_hdmi->hdmi_reg, temp & ~SDVO_ENABLE);
-		POSTING_READ(intel_hdmi->hdmi_reg);
-	}
-
-	temp &= ~enable_bits;
-
+	temp &= ~(SDVO_ENABLE | SDVO_AUDIO_ENABLE);
 	I915_WRITE(intel_hdmi->hdmi_reg, temp);
 	POSTING_READ(intel_hdmi->hdmi_reg);
 
-	/* HW workaround, need to write this twice for issue that may result
-	 * in first write getting masked.
+	/*
+	 * HW workaround for IBX, we need to move the port
+	 * to transcoder A after disabling it to allow the
+	 * matching DP port to be enabled on transcoder A.
 	 */
-	if (HAS_PCH_SPLIT(dev)) {
+	if (HAS_PCH_IBX(dev) && crtc->pipe == PIPE_B) {
+		temp &= ~SDVO_PIPE_B_SELECT;
+		temp |= SDVO_ENABLE;
+		/*
+		 * HW workaround, need to write this twice for issue
+		 * that may result in first write getting masked.
+		 */
+		I915_WRITE(intel_hdmi->hdmi_reg, temp);
+		POSTING_READ(intel_hdmi->hdmi_reg);
+		I915_WRITE(intel_hdmi->hdmi_reg, temp);
+		POSTING_READ(intel_hdmi->hdmi_reg);
+
+		temp &= ~SDVO_ENABLE;
 		I915_WRITE(intel_hdmi->hdmi_reg, temp);
 		POSTING_READ(intel_hdmi->hdmi_reg);
 	}
 }
 
+static void g4x_disable_hdmi(struct intel_encoder *encoder)
+{
+	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+
+	if (crtc->config->has_audio)
+		intel_audio_codec_disable(encoder);
+
+	intel_disable_hdmi(encoder);
+}
+
+static void pch_disable_hdmi(struct intel_encoder *encoder)
+{
+	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+
+	if (crtc->config->has_audio)
+		intel_audio_codec_disable(encoder);
+}
+
+static void pch_post_disable_hdmi(struct intel_encoder *encoder)
+{
+	intel_disable_hdmi(encoder);
+}
+
 static int hdmi_portclock_limit(struct intel_hdmi *hdmi, bool respect_dvi_limit)
 {
 	struct drm_device *dev = intel_hdmi_to_dev(hdmi);
@@ -1806,7 +1806,12 @@ void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port)
 			 DRM_MODE_ENCODER_TMDS);
 
 	intel_encoder->compute_config = intel_hdmi_compute_config;
-	intel_encoder->disable = intel_disable_hdmi;
+	if (HAS_PCH_SPLIT(dev)) {
+		intel_encoder->disable = pch_disable_hdmi;
+		intel_encoder->post_disable = pch_post_disable_hdmi;
+	} else {
+		intel_encoder->disable = g4x_disable_hdmi;
+	}
 	intel_encoder->get_hw_state = intel_hdmi_get_hw_state;
 	intel_encoder->get_config = intel_hdmi_get_config;
 	if (IS_CHERRYVIEW(dev)) {

+ 21 - 0
drivers/gpu/drm/i915/intel_i2c.c

@@ -49,6 +49,19 @@ static const struct gmbus_pin gmbus_pins[] = {
 	[GMBUS_PIN_DPD] = { "dpd", GPIOF },
 };
 
+static const struct gmbus_pin gmbus_pins_bdw[] = {
+	[GMBUS_PIN_VGADDC] = { "vga", GPIOA },
+	[GMBUS_PIN_DPC] = { "dpc", GPIOD },
+	[GMBUS_PIN_DPB] = { "dpb", GPIOE },
+	[GMBUS_PIN_DPD] = { "dpd", GPIOF },
+};
+
+static const struct gmbus_pin gmbus_pins_skl[] = {
+	[GMBUS_PIN_DPC] = { "dpc", GPIOD },
+	[GMBUS_PIN_DPB] = { "dpb", GPIOE },
+	[GMBUS_PIN_DPD] = { "dpd", GPIOF },
+};
+
 static const struct gmbus_pin gmbus_pins_bxt[] = {
 	[GMBUS_PIN_1_BXT] = { "dpb", PCH_GPIOB },
 	[GMBUS_PIN_2_BXT] = { "dpc", PCH_GPIOC },
@@ -61,6 +74,10 @@ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv,
 {
 	if (IS_BROXTON(dev_priv))
 		return &gmbus_pins_bxt[pin];
+	else if (IS_SKYLAKE(dev_priv))
+		return &gmbus_pins_skl[pin];
+	else if (IS_BROADWELL(dev_priv))
+		return &gmbus_pins_bdw[pin];
 	else
 		return &gmbus_pins[pin];
 }
@@ -72,6 +89,10 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv,
 
 	if (IS_BROXTON(dev_priv))
 		size = ARRAY_SIZE(gmbus_pins_bxt);
+	else if (IS_SKYLAKE(dev_priv))
+		size = ARRAY_SIZE(gmbus_pins_skl);
+	else if (IS_BROADWELL(dev_priv))
+		size = ARRAY_SIZE(gmbus_pins_bdw);
 	else
 		size = ARRAY_SIZE(gmbus_pins);
 

+ 20 - 14
drivers/gpu/drm/i915/intel_lrc.c

@@ -394,6 +394,12 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 
 	assert_spin_locked(&ring->execlist_lock);
 
+	/*
+	 * If irqs are not active generate a warning as batches that finish
+	 * without the irqs may get lost and a GPU Hang may occur.
+	 */
+	WARN_ON(!intel_irqs_enabled(ring->dev->dev_private));
+
 	if (list_empty(&ring->execlist_queue))
 		return;
 
@@ -622,6 +628,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
 				 struct list_head *vmas)
 {
 	struct intel_engine_cs *ring = ringbuf->ring;
+	const unsigned other_rings = ~intel_ring_flag(ring);
 	struct i915_vma *vma;
 	uint32_t flush_domains = 0;
 	bool flush_chipset = false;
@@ -630,9 +637,11 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
-		ret = i915_gem_object_sync(obj, ring);
-		if (ret)
-			return ret;
+		if (obj->active & other_rings) {
+			ret = i915_gem_object_sync(obj, ring);
+			if (ret)
+				return ret;
+		}
 
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 			flush_chipset |= i915_gem_clflush_object(obj, false);
@@ -673,7 +682,8 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
 {
 	struct intel_engine_cs *ring = ringbuf->ring;
 	struct drm_i915_gem_request *request;
-	int ret, new_space;
+	unsigned space;
+	int ret;
 
 	if (intel_ring_space(ringbuf) >= bytes)
 		return 0;
@@ -684,14 +694,13 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
 		 * from multiple ringbuffers. Here, we must ignore any that
 		 * aren't from the ringbuffer we're considering.
 		 */
-		struct intel_context *ctx = request->ctx;
-		if (ctx->engine[ring->id].ringbuf != ringbuf)
+		if (request->ringbuf != ringbuf)
 			continue;
 
 		/* Would completion of this request free enough space? */
-		new_space = __intel_ring_space(request->postfix, ringbuf->tail,
-				       ringbuf->size);
-		if (new_space >= bytes)
+		space = __intel_ring_space(request->postfix, ringbuf->tail,
+					   ringbuf->size);
+		if (space >= bytes)
 			break;
 	}
 
@@ -702,11 +711,8 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
 	if (ret)
 		return ret;
 
-	i915_gem_retire_requests_ring(ring);
-
-	WARN_ON(intel_ring_space(ringbuf) < new_space);
-
-	return intel_ring_space(ringbuf) >= bytes ? 0 : -ENOSPC;
+	ringbuf->space = space;
+	return 0;
 }
 
 /*

+ 0 - 2
drivers/gpu/drm/i915/intel_overlay.c

@@ -228,7 +228,6 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
 	ret = i915_wait_request(overlay->last_flip_req);
 	if (ret)
 		return ret;
-	i915_gem_retire_requests(dev);
 
 	i915_gem_request_assign(&overlay->last_flip_req, NULL);
 	return 0;
@@ -376,7 +375,6 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 	ret = i915_wait_request(overlay->last_flip_req);
 	if (ret)
 		return ret;
-	i915_gem_retire_requests(overlay->dev);
 
 	if (overlay->flip_tail)
 		overlay->flip_tail(overlay);

+ 130 - 83
drivers/gpu/drm/i915/intel_pm.c

@@ -1946,7 +1946,7 @@ static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
 int ilk_wm_max_level(const struct drm_device *dev)
 {
 	/* how many WM levels are we expecting */
-	if (IS_GEN9(dev))
+	if (INTEL_INFO(dev)->gen >= 9)
 		return 7;
 	else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 		return 4;
@@ -2639,8 +2639,18 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 }
 
 static unsigned int
-skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p)
+skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
 {
+
+	/* for planar format */
+	if (p->y_bytes_per_pixel) {
+		if (y)  /* y-plane data rate */
+			return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
+		else    /* uv-plane data rate */
+			return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
+	}
+
+	/* for packed formats */
 	return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
 }
 
@@ -2663,7 +2673,10 @@ skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
 		if (!p->enabled)
 			continue;
 
-		total_data_rate += skl_plane_relative_data_rate(p);
+		total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
+		if (p->y_bytes_per_pixel) {
+			total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
+		}
 	}
 
 	return total_data_rate;
@@ -2682,6 +2695,7 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
 	struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
 	uint16_t alloc_size, start, cursor_blocks;
 	uint16_t minimum[I915_MAX_PLANES];
+	uint16_t y_minimum[I915_MAX_PLANES];
 	unsigned int total_data_rate;
 	int plane;
 
@@ -2710,6 +2724,8 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
 
 		minimum[plane] = 8;
 		alloc_size -= minimum[plane];
+		y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
+		alloc_size -= y_minimum[plane];
 	}
 
 	/*
@@ -2723,16 +2739,17 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
 	start = alloc->start;
 	for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
 		const struct intel_plane_wm_parameters *p;
-		unsigned int data_rate;
-		uint16_t plane_blocks;
+		unsigned int data_rate, y_data_rate;
+		uint16_t plane_blocks, y_plane_blocks = 0;
 
 		p = &params->plane[plane];
 		if (!p->enabled)
 			continue;
 
-		data_rate = skl_plane_relative_data_rate(p);
+		data_rate = skl_plane_relative_data_rate(p, 0);
 
 		/*
+		 * allocation for (packed formats) or (uv-plane part of planar format):
 		 * promote the expression to 64 bits to avoid overflowing, the
 		 * result is < available as data_rate / total_data_rate < 1
 		 */
@@ -2744,6 +2761,22 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
 		ddb->plane[pipe][plane].end = start + plane_blocks;
 
 		start += plane_blocks;
+
+		/*
+		 * allocation for y_plane part of planar format:
+		 */
+		if (p->y_bytes_per_pixel) {
+			y_data_rate = skl_plane_relative_data_rate(p, 1);
+			y_plane_blocks = y_minimum[plane];
+			y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
+						total_data_rate);
+
+			ddb->y_plane[pipe][plane].start = start;
+			ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
+
+			start += y_plane_blocks;
+		}
+
 	}
 
 }
@@ -2856,13 +2889,18 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
 		p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
 
 		fb = crtc->primary->state->fb;
+		/* For planar: Bpp is for uv plane, y_Bpp is for y plane */
 		if (fb) {
 			p->plane[0].enabled = true;
-			p->plane[0].bytes_per_pixel = fb->bits_per_pixel / 8;
+			p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
+				drm_format_plane_cpp(fb->pixel_format, 1) : fb->bits_per_pixel / 8;
+			p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
+				drm_format_plane_cpp(fb->pixel_format, 0) : 0;
 			p->plane[0].tiling = fb->modifier[0];
 		} else {
 			p->plane[0].enabled = false;
 			p->plane[0].bytes_per_pixel = 0;
+			p->plane[0].y_bytes_per_pixel = 0;
 			p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
 		}
 		p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
@@ -2870,6 +2908,7 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
 		p->plane[0].rotation = crtc->primary->state->rotation;
 
 		fb = crtc->cursor->state->fb;
+		p->cursor.y_bytes_per_pixel = 0;
 		if (fb) {
 			p->cursor.enabled = true;
 			p->cursor.bytes_per_pixel = fb->bits_per_pixel / 8;
@@ -2905,22 +2944,25 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	uint32_t plane_bytes_per_line, plane_blocks_per_line;
 	uint32_t res_blocks, res_lines;
 	uint32_t selected_result;
+	uint8_t bytes_per_pixel;
 
 	if (latency == 0 || !p->active || !p_params->enabled)
 		return false;
 
+	bytes_per_pixel = p_params->y_bytes_per_pixel ?
+		p_params->y_bytes_per_pixel :
+		p_params->bytes_per_pixel;
 	method1 = skl_wm_method1(p->pixel_rate,
-				 p_params->bytes_per_pixel,
+				 bytes_per_pixel,
 				 latency);
 	method2 = skl_wm_method2(p->pixel_rate,
 				 p->pipe_htotal,
 				 p_params->horiz_pixels,
-				 p_params->bytes_per_pixel,
+				 bytes_per_pixel,
 				 p_params->tiling,
 				 latency);
 
-	plane_bytes_per_line = p_params->horiz_pixels *
-					p_params->bytes_per_pixel;
+	plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
 	plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
 
 	if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
@@ -3137,10 +3179,14 @@ static void skl_write_wm_values(struct drm_i915_private *dev_priv,
 				   new->plane_trans[pipe][i]);
 		I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]);
 
-		for (i = 0; i < intel_num_planes(crtc); i++)
+		for (i = 0; i < intel_num_planes(crtc); i++) {
 			skl_ddb_entry_write(dev_priv,
 					    PLANE_BUF_CFG(pipe, i),
 					    &new->ddb.plane[pipe][i]);
+			skl_ddb_entry_write(dev_priv,
+					    PLANE_NV12_BUF_CFG(pipe, i),
+					    &new->ddb.y_plane[pipe][i]);
+		}
 
 		skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
 				    &new->ddb.cursor[pipe]);
@@ -3298,6 +3344,7 @@ static bool skl_update_pipe_wm(struct drm_crtc *crtc,
 		return false;
 
 	intel_crtc->wm.skl_active = *pipe_wm;
+
 	return true;
 }
 
@@ -3391,8 +3438,16 @@ skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
 	intel_plane->wm.scaled = scaled;
 	intel_plane->wm.horiz_pixels = sprite_width;
 	intel_plane->wm.vert_pixels = sprite_height;
-	intel_plane->wm.bytes_per_pixel = pixel_size;
 	intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
+
+	/* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
+	intel_plane->wm.bytes_per_pixel =
+		(fb && fb->pixel_format == DRM_FORMAT_NV12) ?
+		drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
+	intel_plane->wm.y_bytes_per_pixel =
+		(fb && fb->pixel_format == DRM_FORMAT_NV12) ?
+		drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
+
 	/*
 	 * Framebuffer can be NULL on plane disable, but it does not
 	 * matter for watermarks if we assume no tiling in that case.
@@ -4042,51 +4097,25 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
 	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
 }
 
-/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down
+/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
  *
  * * If Gfx is Idle, then
- * 1. Mask Turbo interrupts
- * 2. Bring up Gfx clock
- * 3. Change the freq to Rpn and wait till P-Unit updates freq
- * 4. Clear the Force GFX CLK ON bit so that Gfx can down
- * 5. Unmask Turbo interrupts
+ * 1. Forcewake Media well.
+ * 2. Request idle freq.
+ * 3. Release Forcewake of Media well.
 */
 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 {
-	struct drm_device *dev = dev_priv->dev;
 	u32 val = dev_priv->rps.idle_freq;
 
-	/* CHV and latest VLV don't need to force the gfx clock */
-	if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) {
-		valleyview_set_rps(dev_priv->dev, val);
-		return;
-	}
-
-	/*
-	 * When we are idle.  Drop to min voltage state.
-	 */
-
 	if (dev_priv->rps.cur_freq <= val)
 		return;
 
-	/* Mask turbo interrupt so that they will not come in between */
-	I915_WRITE(GEN6_PMINTRMSK,
-		   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
-
-	vlv_force_gfx_clock(dev_priv, true);
-
-	dev_priv->rps.cur_freq = val;
-
-	vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
-
-	if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
-				& GENFREQSTATUS) == 0, 100))
-		DRM_ERROR("timed out waiting for Punit\n");
-
-	gen6_set_rps_thresholds(dev_priv, val);
-	vlv_force_gfx_clock(dev_priv, false);
-
-	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+	/* Wake up the media well, as that takes a lot less
+	 * power than the Render well. */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
+	valleyview_set_rps(dev_priv->dev, val);
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
 }
 
 void gen6_rps_busy(struct drm_i915_private *dev_priv)
@@ -4121,22 +4150,29 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 }
 
 void gen6_rps_boost(struct drm_i915_private *dev_priv,
-		    struct drm_i915_file_private *file_priv)
+		    struct intel_rps_client *rps,
+		    unsigned long submitted)
 {
 	u32 val;
 
+	/* Force a RPS boost (and don't count it against the client) if
+	 * the GPU is severely congested.
+	 */
+	if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
+		rps = NULL;
+
 	mutex_lock(&dev_priv->rps.hw_lock);
 	val = dev_priv->rps.max_freq_softlimit;
 	if (dev_priv->rps.enabled &&
 	    dev_priv->mm.busy &&
 	    dev_priv->rps.cur_freq < val &&
-	    (file_priv == NULL || list_empty(&file_priv->rps_boost))) {
+	    (rps == NULL || list_empty(&rps->link))) {
 		intel_set_rps(dev_priv->dev, val);
 		dev_priv->rps.last_adj = 0;
 
-		if (file_priv != NULL) {
-			list_add(&file_priv->rps_boost, &dev_priv->rps.clients);
-			file_priv->rps_boosts++;
+		if (rps != NULL) {
+			list_add(&rps->link, &dev_priv->rps.clients);
+			rps->boosts++;
 		} else
 			dev_priv->rps.boosts++;
 	}
@@ -4714,24 +4750,6 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
 	return rp1;
 }
 
-static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = dev_priv->dev;
-	u32 val, rpn;
-
-	if (dev->pdev->revision >= 0x20) {
-		val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
-		rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
-		       FB_GFX_FREQ_FUSE_MASK);
-	} else { /* For pre-production hardware */
-		val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
-		rpn = ((val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) &
-		       PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK);
-	}
-
-	return rpn;
-}
-
 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
 {
 	u32 val, rp1;
@@ -4983,7 +5001,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
 			 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
 			 dev_priv->rps.rp1_freq);
 
-	dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv);
+	/* PUnit validated range is only [RPe, RP0] */
+	dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;
 	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
 			 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
 			 dev_priv->rps.min_freq);
@@ -6155,10 +6174,9 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum pipe pipe;
+	uint32_t misccpctl;
 
-	I915_WRITE(WM3_LP_ILK, 0);
-	I915_WRITE(WM2_LP_ILK, 0);
-	I915_WRITE(WM1_LP_ILK, 0);
+	ilk_init_lp_watermarks(dev);
 
 	/* WaSwitchSolVfFArbitrationPriority:bdw */
 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
@@ -6187,6 +6205,22 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
 	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
+	/*
+	 * WaProgramL3SqcReg1Default:bdw
+	 * WaTempDisableDOPClkGating:bdw
+	 */
+	misccpctl = I915_READ(GEN7_MISCCPCTL);
+	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
+	I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
+	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+
+	/*
+	 * WaGttCachingOffByDefault:bdw
+	 * GTT cache may not work with big pages, so if those
+	 * are ever enabled GTT cache may need to be disabled.
+	 */
+	I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
+
 	lpt_init_clock_gating(dev);
 }
 
@@ -6462,6 +6496,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
 	/* WaDisableSDEUnitClockGating:chv */
 	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+	/*
+	 * GTT cache may not work with big pages, so if those
+	 * are ever enabled GTT cache may need to be disabled.
+	 */
+	I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
 }
 
 static void g4x_init_clock_gating(struct drm_device *dev)
@@ -6830,34 +6870,39 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
 
 struct request_boost {
 	struct work_struct work;
-	struct drm_i915_gem_request *rq;
+	struct drm_i915_gem_request *req;
 };
 
 static void __intel_rps_boost_work(struct work_struct *work)
 {
 	struct request_boost *boost = container_of(work, struct request_boost, work);
+	struct drm_i915_gem_request *req = boost->req;
 
-	if (!i915_gem_request_completed(boost->rq, true))
-		gen6_rps_boost(to_i915(boost->rq->ring->dev), NULL);
+	if (!i915_gem_request_completed(req, true))
+		gen6_rps_boost(to_i915(req->ring->dev), NULL,
+			       req->emitted_jiffies);
 
-	i915_gem_request_unreference__unlocked(boost->rq);
+	i915_gem_request_unreference__unlocked(req);
 	kfree(boost);
 }
 
 void intel_queue_rps_boost_for_request(struct drm_device *dev,
-				       struct drm_i915_gem_request *rq)
+				       struct drm_i915_gem_request *req)
 {
 	struct request_boost *boost;
 
-	if (rq == NULL || INTEL_INFO(dev)->gen < 6)
+	if (req == NULL || INTEL_INFO(dev)->gen < 6)
+		return;
+
+	if (i915_gem_request_completed(req, true))
 		return;
 
 	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
 	if (boost == NULL)
 		return;
 
-	i915_gem_request_reference(rq);
-	boost->rq = rq;
+	i915_gem_request_reference(req);
+	boost->req = req;
 
 	INIT_WORK(&boost->work, __intel_rps_boost_work);
 	queue_work(to_i915(dev)->wq, &boost->work);
@@ -6872,6 +6917,8 @@ void intel_pm_setup(struct drm_device *dev)
 	INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
 			  intel_gen6_powersave_work);
 	INIT_LIST_HEAD(&dev_priv->rps.clients);
+	INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
+	INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
 
 	dev_priv->pm.suspended = false;
 }

+ 25 - 25
drivers/gpu/drm/i915/intel_ringbuffer.c

@@ -853,9 +853,6 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
 			    GEN6_WIZ_HASHING_MASK,
 			    GEN6_WIZ_HASHING_16x4);
 
-	/* WaProgramL3SqcReg1Default:bdw */
-	WA_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
-
 	return 0;
 }
 
@@ -918,6 +915,7 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t tmp;
 
 	/* WaDisablePartialInstShootdown:skl,bxt */
 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
@@ -961,15 +959,19 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
 	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
 			  GEN9_CCS_TLB_PREFETCH_ENABLE);
 
-	/*
-	 * FIXME: don't apply the following on BXT for stepping C. On BXT A0
-	 * the flag reads back as 0.
-	 */
-	/* WaDisableMaskBasedCammingInRCC:sklC,bxtA */
-	if (INTEL_REVID(dev) == SKL_REVID_C0 || IS_BROXTON(dev))
+	/* WaDisableMaskBasedCammingInRCC:skl,bxt */
+	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
+	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
 		WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
 				  PIXEL_MASK_CAMMING_DISABLE);
 
+	/* WaForceContextSaveRestoreNonCoherent:skl,bxt */
+	tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
+	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
+	    (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
+		tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
+	WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
+
 	return 0;
 }
 
@@ -1060,10 +1062,6 @@ static int bxt_init_workarounds(struct intel_engine_cs *ring)
 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 	}
 
-	/* WaForceContextSaveRestoreNonCoherent:bxt */
-	WA_SET_BIT_MASKED(HDC_CHICKEN0,
-			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
-
 	return 0;
 }
 
@@ -2102,15 +2100,16 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 {
 	struct intel_ringbuffer *ringbuf = ring->buffer;
 	struct drm_i915_gem_request *request;
-	int ret, new_space;
+	unsigned space;
+	int ret;
 
 	if (intel_ring_space(ringbuf) >= n)
 		return 0;
 
 	list_for_each_entry(request, &ring->request_list, list) {
-		new_space = __intel_ring_space(request->postfix, ringbuf->tail,
-				       ringbuf->size);
-		if (new_space >= n)
+		space = __intel_ring_space(request->postfix, ringbuf->tail,
+					   ringbuf->size);
+		if (space >= n)
 			break;
 	}
 
@@ -2121,10 +2120,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 	if (ret)
 		return ret;
 
-	i915_gem_retire_requests_ring(ring);
-
-	WARN_ON(intel_ring_space(ringbuf) < new_space);
-
+	ringbuf->space = space;
 	return 0;
 }
 
@@ -2168,10 +2164,14 @@ int intel_ring_idle(struct intel_engine_cs *ring)
 		return 0;
 
 	req = list_entry(ring->request_list.prev,
-			   struct drm_i915_gem_request,
-			   list);
-
-	return i915_wait_request(req);
+			struct drm_i915_gem_request,
+			list);
+
+	/* Make sure we do not trigger any retires */
+	return __i915_wait_request(req,
+				   atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
+				   to_i915(ring->dev)->mm.interruptible,
+				   NULL, NULL);
 }
 
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)

+ 2 - 2
drivers/gpu/drm/i915/intel_runtime_pm.c

@@ -771,7 +771,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
 	vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, ctrl);
 
 	if (wait_for(COND, 100))
-		DRM_ERROR("timout setting power well state %08x (%08x)\n",
+		DRM_ERROR("timeout setting power well state %08x (%08x)\n",
 			  state,
 			  vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL));
 
@@ -1029,7 +1029,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
 	vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, ctrl);
 
 	if (wait_for(COND, 100))
-		DRM_ERROR("timout setting power well state %08x (%08x)\n",
+		DRM_ERROR("timeout setting power well state %08x (%08x)\n",
 			  state,
 			  vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ));
 

+ 41 - 33
drivers/gpu/drm/i915/intel_sdvo.c

@@ -243,6 +243,14 @@ static void intel_sdvo_write_sdvox(struct intel_sdvo *intel_sdvo, u32 val)
 	if (intel_sdvo->sdvo_reg == PCH_SDVOB) {
 		I915_WRITE(intel_sdvo->sdvo_reg, val);
 		POSTING_READ(intel_sdvo->sdvo_reg);
+		/*
+		 * HW workaround, need to write this twice for issue
+		 * that may result in first write getting masked.
+		 */
+		if (HAS_PCH_IBX(dev)) {
+			I915_WRITE(intel_sdvo->sdvo_reg, val);
+			POSTING_READ(intel_sdvo->sdvo_reg);
+		}
 		return;
 	}
 
@@ -1429,6 +1437,7 @@ static void intel_disable_sdvo(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
 	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
+	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
 	u32 temp;
 
 	intel_sdvo_set_active_outputs(intel_sdvo, 0);
@@ -1437,35 +1446,34 @@ static void intel_disable_sdvo(struct intel_encoder *encoder)
 						   DRM_MODE_DPMS_OFF);
 
 	temp = I915_READ(intel_sdvo->sdvo_reg);
-	if ((temp & SDVO_ENABLE) != 0) {
-		/* HW workaround for IBX, we need to move the port to
-		 * transcoder A before disabling it. */
-		if (HAS_PCH_IBX(encoder->base.dev)) {
-			struct drm_crtc *crtc = encoder->base.crtc;
-			int pipe = crtc ? to_intel_crtc(crtc)->pipe : -1;
-
-			if (temp & SDVO_PIPE_B_SELECT) {
-				temp &= ~SDVO_PIPE_B_SELECT;
-				I915_WRITE(intel_sdvo->sdvo_reg, temp);
-				POSTING_READ(intel_sdvo->sdvo_reg);
-
-				/* Again we need to write this twice. */
-				I915_WRITE(intel_sdvo->sdvo_reg, temp);
-				POSTING_READ(intel_sdvo->sdvo_reg);
-
-				/* Transcoder selection bits only update
-				 * effectively on vblank. */
-				if (crtc)
-					intel_wait_for_vblank(encoder->base.dev, pipe);
-				else
-					msleep(50);
-			}
-		}
 
-		intel_sdvo_write_sdvox(intel_sdvo, temp & ~SDVO_ENABLE);
+	temp &= ~SDVO_ENABLE;
+	intel_sdvo_write_sdvox(intel_sdvo, temp);
+
+	/*
+	 * HW workaround for IBX, we need to move the port
+	 * to transcoder A after disabling it to allow the
+	 * matching DP port to be enabled on transcoder A.
+	 */
+	if (HAS_PCH_IBX(dev_priv) && crtc->pipe == PIPE_B) {
+		temp &= ~SDVO_PIPE_B_SELECT;
+		temp |= SDVO_ENABLE;
+		intel_sdvo_write_sdvox(intel_sdvo, temp);
+
+		temp &= ~SDVO_ENABLE;
+		intel_sdvo_write_sdvox(intel_sdvo, temp);
 	}
 }
 
+static void pch_disable_sdvo(struct intel_encoder *encoder)
+{
+}
+
+static void pch_post_disable_sdvo(struct intel_encoder *encoder)
+{
+	intel_disable_sdvo(encoder);
+}
+
 static void intel_enable_sdvo(struct intel_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
@@ -1478,14 +1486,9 @@ static void intel_enable_sdvo(struct intel_encoder *encoder)
 	bool success;
 
 	temp = I915_READ(intel_sdvo->sdvo_reg);
-	if ((temp & SDVO_ENABLE) == 0) {
-		/* HW workaround for IBX, we need to move the port
-		 * to transcoder A before disabling it, so restore it here. */
-		if (HAS_PCH_IBX(dev))
-			temp |= SDVO_PIPE_SEL(intel_crtc->pipe);
+	temp |= SDVO_ENABLE;
+	intel_sdvo_write_sdvox(intel_sdvo, temp);
 
-		intel_sdvo_write_sdvox(intel_sdvo, temp | SDVO_ENABLE);
-	}
 	for (i = 0; i < 2; i++)
 		intel_wait_for_vblank(dev, intel_crtc->pipe);
 
@@ -2988,7 +2991,12 @@ bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob)
 	}
 
 	intel_encoder->compute_config = intel_sdvo_compute_config;
-	intel_encoder->disable = intel_disable_sdvo;
+	if (HAS_PCH_SPLIT(dev)) {
+		intel_encoder->disable = pch_disable_sdvo;
+		intel_encoder->post_disable = pch_post_disable_sdvo;
+	} else {
+		intel_encoder->disable = intel_disable_sdvo;
+	}
 	intel_encoder->pre_enable = intel_sdvo_pre_enable;
 	intel_encoder->enable = intel_enable_sdvo;
 	intel_encoder->get_hw_state = intel_sdvo_get_hw_state;

+ 26 - 14
drivers/gpu/drm/i915/intel_sprite.c

@@ -229,8 +229,8 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_crtc *crtc,
 
 	if (intel_rotation_90_or_270(rotation)) {
 		/* stride: Surface height in tiles */
-		tile_height = intel_tile_height(dev, fb->bits_per_pixel,
-							fb->modifier[0]);
+		tile_height = intel_tile_height(dev, fb->pixel_format,
+						fb->modifier[0]);
 		stride = DIV_ROUND_UP(fb->height, tile_height);
 		plane_size = (src_w << 16) | src_h;
 		x_offset = stride * tile_height - y - (src_h + 1);
@@ -770,6 +770,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
 	const struct drm_rect *clip = &state->clip;
 	int hscale, vscale;
 	int max_scale, min_scale;
+	bool can_scale;
 	int pixel_size;
 	int ret;
 
@@ -794,18 +795,29 @@ intel_check_sprite_plane(struct drm_plane *plane,
 		return -EINVAL;
 	}
 
+	/* setup can_scale, min_scale, max_scale */
+	if (INTEL_INFO(dev)->gen >= 9) {
+		/* use scaler when colorkey is not required */
+		if (intel_plane->ckey.flags == I915_SET_COLORKEY_NONE) {
+			can_scale = 1;
+			min_scale = 1;
+			max_scale = skl_max_scale(intel_crtc, crtc_state);
+		} else {
+			can_scale = 0;
+			min_scale = DRM_PLANE_HELPER_NO_SCALING;
+			max_scale = DRM_PLANE_HELPER_NO_SCALING;
+		}
+	} else {
+		can_scale = intel_plane->can_scale;
+		max_scale = intel_plane->max_downscale << 16;
+		min_scale = intel_plane->can_scale ? 1 : (1 << 16);
+	}
+
 	/*
 	 * FIXME the following code does a bunch of fuzzy adjustments to the
 	 * coordinates and sizes. We probably need some way to decide whether
 	 * more strict checking should be done instead.
 	 */
-	max_scale = intel_plane->max_downscale << 16;
-	min_scale = intel_plane->can_scale ? 1 : (1 << 16);
-
-	if (INTEL_INFO(dev)->gen >= 9) {
-		min_scale = 1;
-		max_scale = skl_max_scale(intel_crtc, crtc_state);
-	}
 
 	drm_rect_rotate(src, fb->width << 16, fb->height << 16,
 			state->base.rotation);
@@ -876,7 +888,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
 			 * Must keep src and dst the
 			 * same if we can't scale.
 			 */
-			if (!intel_plane->can_scale)
+			if (!can_scale)
 				crtc_w &= ~1;
 
 			if (crtc_w == 0)
@@ -888,7 +900,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
 	if (state->visible && (src_w != crtc_w || src_h != crtc_h)) {
 		unsigned int width_bytes;
 
-		WARN_ON(!intel_plane->can_scale);
+		WARN_ON(!can_scale);
 
 		/* FIXME interlacing min height is 6 */
 
@@ -1052,7 +1064,7 @@ int intel_plane_restore(struct drm_plane *plane)
 				       plane->state->src_w, plane->state->src_h);
 }
 
-static uint32_t ilk_plane_formats[] = {
+static const uint32_t ilk_plane_formats[] = {
 	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_YUYV,
 	DRM_FORMAT_YVYU,
@@ -1060,7 +1072,7 @@ static uint32_t ilk_plane_formats[] = {
 	DRM_FORMAT_VYUY,
 };
 
-static uint32_t snb_plane_formats[] = {
+static const uint32_t snb_plane_formats[] = {
 	DRM_FORMAT_XBGR8888,
 	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_YUYV,
@@ -1069,7 +1081,7 @@ static uint32_t snb_plane_formats[] = {
 	DRM_FORMAT_VYUY,
 };
 
-static uint32_t vlv_plane_formats[] = {
+static const uint32_t vlv_plane_formats[] = {
 	DRM_FORMAT_RGB565,
 	DRM_FORMAT_ABGR8888,
 	DRM_FORMAT_ARGB8888,

+ 2 - 4
include/drm/i915_pciids.h

@@ -286,11 +286,9 @@
 	INTEL_SKL_GT2_IDS(info), \
 	INTEL_SKL_GT3_IDS(info)
 
-
 #define INTEL_BXT_IDS(info) \
 	INTEL_VGA_DEVICE(0x0A84, info), \
-	INTEL_VGA_DEVICE(0x0A85, info), \
-	INTEL_VGA_DEVICE(0x0A86, info), \
-	INTEL_VGA_DEVICE(0x0A87, info)
+	INTEL_VGA_DEVICE(0x1A84, info), \
+	INTEL_VGA_DEVICE(0x5A84, info)
 
 #endif /* _I915_PCIIDS_H */