intel_workarounds.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085
  1. /*
  2. * SPDX-License-Identifier: MIT
  3. *
  4. * Copyright © 2014-2018 Intel Corporation
  5. */
  6. #include "i915_drv.h"
  7. #include "intel_workarounds.h"
  8. /**
  9. * DOC: Hardware workarounds
  10. *
  11. * This file is intended as a central place to implement most [1]_ of the
  12. * required workarounds for hardware to work as originally intended. They fall
  13. * in five basic categories depending on how/when they are applied:
  14. *
  15. * - Workarounds that touch registers that are saved/restored to/from the HW
  16. * context image. The list is emitted (via Load Register Immediate commands)
  17. * everytime a new context is created.
  18. * - GT workarounds. The list of these WAs is applied whenever these registers
  19. * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
  20. * - Display workarounds. The list is applied during display clock-gating
  21. * initialization.
  22. * - Workarounds that whitelist a privileged register, so that UMDs can manage
  23. * them directly. This is just a special case of a MMMIO workaround (as we
  24. * write the list of these to/be-whitelisted registers to some special HW
  25. * registers).
  26. * - Workaround batchbuffers, that get executed automatically by the hardware
  27. * on every HW context restore.
  28. *
  29. * .. [1] Please notice that there are other WAs that, due to their nature,
  30. * cannot be applied from a central place. Those are peppered around the rest
  31. * of the code, as needed.
  32. *
  33. * .. [2] Technically, some registers are powercontext saved & restored, so they
  34. * survive a suspend/resume. In practice, writing them again is not too
  35. * costly and simplifies things. We can revisit this in the future.
  36. *
  37. * Layout
  38. * ''''''
  39. *
  40. * Keep things in this file ordered by WA type, as per the above (context, GT,
  41. * display, register whitelist, batchbuffer). Then, inside each type, keep the
  42. * following order:
  43. *
  44. * - Infrastructure functions and macros
  45. * - WAs per platform in standard gen/chrono order
  46. * - Public functions to init or apply the given workaround type.
  47. */
  48. static void wa_add(struct drm_i915_private *i915,
  49. i915_reg_t reg, const u32 mask, const u32 val)
  50. {
  51. struct i915_workarounds *wa = &i915->workarounds;
  52. unsigned int start = 0, end = wa->count;
  53. unsigned int addr = i915_mmio_reg_offset(reg);
  54. struct i915_wa_reg *r;
  55. while (start < end) {
  56. unsigned int mid = start + (end - start) / 2;
  57. if (wa->reg[mid].addr < addr) {
  58. start = mid + 1;
  59. } else if (wa->reg[mid].addr > addr) {
  60. end = mid;
  61. } else {
  62. r = &wa->reg[mid];
  63. if ((mask & ~r->mask) == 0) {
  64. DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
  65. addr, r->mask, r->value);
  66. r->value &= ~mask;
  67. }
  68. r->value |= val;
  69. r->mask |= mask;
  70. return;
  71. }
  72. }
  73. if (WARN_ON_ONCE(wa->count >= I915_MAX_WA_REGS)) {
  74. DRM_ERROR("Dropping w/a for reg %04x (mask: %08x, value: %08x)\n",
  75. addr, mask, val);
  76. return;
  77. }
  78. r = &wa->reg[wa->count++];
  79. r->addr = addr;
  80. r->value = val;
  81. r->mask = mask;
  82. while (r-- > wa->reg) {
  83. GEM_BUG_ON(r[0].addr == r[1].addr);
  84. if (r[1].addr > r[0].addr)
  85. break;
  86. swap(r[1], r[0]);
  87. }
  88. }
  89. #define WA_REG(addr, mask, val) wa_add(dev_priv, (addr), (mask), (val))
  90. #define WA_SET_BIT_MASKED(addr, mask) \
  91. WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
  92. #define WA_CLR_BIT_MASKED(addr, mask) \
  93. WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
  94. #define WA_SET_FIELD_MASKED(addr, mask, value) \
  95. WA_REG(addr, (mask), _MASKED_FIELD(mask, value))
  96. static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  97. {
  98. WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
  99. /* WaDisableAsyncFlipPerfMode:bdw,chv */
  100. WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
  101. /* WaDisablePartialInstShootdown:bdw,chv */
  102. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  103. PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
  104. /* Use Force Non-Coherent whenever executing a 3D context. This is a
  105. * workaround for for a possible hang in the unlikely event a TLB
  106. * invalidation occurs during a PSD flush.
  107. */
  108. /* WaForceEnableNonCoherent:bdw,chv */
  109. /* WaHdcDisableFetchWhenMasked:bdw,chv */
  110. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  111. HDC_DONOT_FETCH_MEM_WHEN_MASKED |
  112. HDC_FORCE_NON_COHERENT);
  113. /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
  114. * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
  115. * polygons in the same 8x4 pixel/sample area to be processed without
  116. * stalling waiting for the earlier ones to write to Hierarchical Z
  117. * buffer."
  118. *
  119. * This optimization is off by default for BDW and CHV; turn it on.
  120. */
  121. WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
  122. /* Wa4x4STCOptimizationDisable:bdw,chv */
  123. WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
  124. /*
  125. * BSpec recommends 8x4 when MSAA is used,
  126. * however in practice 16x4 seems fastest.
  127. *
  128. * Note that PS/WM thread counts depend on the WIZ hashing
  129. * disable bit, which we don't touch here, but it's good
  130. * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
  131. */
  132. WA_SET_FIELD_MASKED(GEN7_GT_MODE,
  133. GEN6_WIZ_HASHING_MASK,
  134. GEN6_WIZ_HASHING_16x4);
  135. return 0;
  136. }
  137. static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  138. {
  139. int ret;
  140. ret = gen8_ctx_workarounds_init(dev_priv);
  141. if (ret)
  142. return ret;
  143. /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
  144. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
  145. /* WaDisableDopClockGating:bdw
  146. *
  147. * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
  148. * to disable EUTC clock gating.
  149. */
  150. WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
  151. DOP_CLOCK_GATING_DISABLE);
  152. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  153. GEN8_SAMPLER_POWER_BYPASS_DIS);
  154. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  155. /* WaForceContextSaveRestoreNonCoherent:bdw */
  156. HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
  157. /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
  158. (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
  159. return 0;
  160. }
  161. static int chv_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  162. {
  163. int ret;
  164. ret = gen8_ctx_workarounds_init(dev_priv);
  165. if (ret)
  166. return ret;
  167. /* WaDisableThreadStallDopClockGating:chv */
  168. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
  169. /* Improve HiZ throughput on CHV. */
  170. WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
  171. return 0;
  172. }
  173. static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  174. {
  175. if (HAS_LLC(dev_priv)) {
  176. /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
  177. *
  178. * Must match Display Engine. See
  179. * WaCompressedResourceDisplayNewHashMode.
  180. */
  181. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  182. GEN9_PBE_COMPRESSED_HASH_SELECTION);
  183. WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
  184. GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
  185. }
  186. /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
  187. /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
  188. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  189. FLOW_CONTROL_ENABLE |
  190. PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
  191. /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
  192. if (!IS_COFFEELAKE(dev_priv))
  193. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  194. GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
  195. /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
  196. /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
  197. WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
  198. GEN9_ENABLE_YV12_BUGFIX |
  199. GEN9_ENABLE_GPGPU_PREEMPTION);
  200. /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
  201. /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
  202. WA_SET_BIT_MASKED(CACHE_MODE_1,
  203. GEN8_4x4_STC_OPTIMIZATION_DISABLE |
  204. GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
  205. /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
  206. WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
  207. GEN9_CCS_TLB_PREFETCH_ENABLE);
  208. /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
  209. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  210. HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
  211. HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
  212. /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
  213. * both tied to WaForceContextSaveRestoreNonCoherent
  214. * in some hsds for skl. We keep the tie for all gen9. The
  215. * documentation is a bit hazy and so we want to get common behaviour,
  216. * even though there is no clear evidence we would need both on kbl/bxt.
  217. * This area has been source of system hangs so we play it safe
  218. * and mimic the skl regardless of what bspec says.
  219. *
  220. * Use Force Non-Coherent whenever executing a 3D context. This
  221. * is a workaround for a possible hang in the unlikely event
  222. * a TLB invalidation occurs during a PSD flush.
  223. */
  224. /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
  225. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  226. HDC_FORCE_NON_COHERENT);
  227. /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
  228. if (IS_SKYLAKE(dev_priv) ||
  229. IS_KABYLAKE(dev_priv) ||
  230. IS_COFFEELAKE(dev_priv))
  231. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  232. GEN8_SAMPLER_POWER_BYPASS_DIS);
  233. /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
  234. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
  235. /*
  236. * Supporting preemption with fine-granularity requires changes in the
  237. * batch buffer programming. Since we can't break old userspace, we
  238. * need to set our default preemption level to safe value. Userspace is
  239. * still able to use more fine-grained preemption levels, since in
  240. * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
  241. * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
  242. * not real HW workarounds, but merely a way to start using preemption
  243. * while maintaining old contract with userspace.
  244. */
  245. /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
  246. WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
  247. /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
  248. WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
  249. GEN9_PREEMPT_GPGPU_LEVEL_MASK,
  250. GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
  251. /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
  252. if (IS_GEN9_LP(dev_priv))
  253. WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
  254. return 0;
  255. }
  256. static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv)
  257. {
  258. u8 vals[3] = { 0, 0, 0 };
  259. unsigned int i;
  260. for (i = 0; i < 3; i++) {
  261. u8 ss;
  262. /*
  263. * Only consider slices where one, and only one, subslice has 7
  264. * EUs
  265. */
  266. if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
  267. continue;
  268. /*
  269. * subslice_7eu[i] != 0 (because of the check above) and
  270. * ss_max == 4 (maximum number of subslices possible per slice)
  271. *
  272. * -> 0 <= ss <= 3;
  273. */
  274. ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
  275. vals[i] = 3 - ss;
  276. }
  277. if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
  278. return 0;
  279. /* Tune IZ hashing. See intel_device_info_runtime_init() */
  280. WA_SET_FIELD_MASKED(GEN7_GT_MODE,
  281. GEN9_IZ_HASHING_MASK(2) |
  282. GEN9_IZ_HASHING_MASK(1) |
  283. GEN9_IZ_HASHING_MASK(0),
  284. GEN9_IZ_HASHING(2, vals[2]) |
  285. GEN9_IZ_HASHING(1, vals[1]) |
  286. GEN9_IZ_HASHING(0, vals[0]));
  287. return 0;
  288. }
  289. static int skl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  290. {
  291. int ret;
  292. ret = gen9_ctx_workarounds_init(dev_priv);
  293. if (ret)
  294. return ret;
  295. return skl_tune_iz_hashing(dev_priv);
  296. }
  297. static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  298. {
  299. int ret;
  300. ret = gen9_ctx_workarounds_init(dev_priv);
  301. if (ret)
  302. return ret;
  303. /* WaDisableThreadStallDopClockGating:bxt */
  304. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  305. STALL_DOP_GATING_DISABLE);
  306. /* WaToEnableHwFixForPushConstHWBug:bxt */
  307. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  308. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  309. return 0;
  310. }
  311. static int kbl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  312. {
  313. int ret;
  314. ret = gen9_ctx_workarounds_init(dev_priv);
  315. if (ret)
  316. return ret;
  317. /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
  318. if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
  319. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  320. HDC_FENCE_DEST_SLM_DISABLE);
  321. /* WaToEnableHwFixForPushConstHWBug:kbl */
  322. if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
  323. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  324. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  325. /* WaDisableSbeCacheDispatchPortSharing:kbl */
  326. WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
  327. GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
  328. return 0;
  329. }
  330. static int glk_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  331. {
  332. int ret;
  333. ret = gen9_ctx_workarounds_init(dev_priv);
  334. if (ret)
  335. return ret;
  336. /* WaToEnableHwFixForPushConstHWBug:glk */
  337. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  338. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  339. return 0;
  340. }
  341. static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  342. {
  343. int ret;
  344. ret = gen9_ctx_workarounds_init(dev_priv);
  345. if (ret)
  346. return ret;
  347. /* WaToEnableHwFixForPushConstHWBug:cfl */
  348. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  349. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  350. /* WaDisableSbeCacheDispatchPortSharing:cfl */
  351. WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
  352. GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
  353. return 0;
  354. }
  355. static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  356. {
  357. /* WaForceContextSaveRestoreNonCoherent:cnl */
  358. WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
  359. HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
  360. /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
  361. if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
  362. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
  363. /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
  364. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  365. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  366. /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
  367. if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0))
  368. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  369. GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
  370. /* WaPushConstantDereferenceHoldDisable:cnl */
  371. WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
  372. /* FtrEnableFastAnisoL1BankingFix:cnl */
  373. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
  374. /* WaDisable3DMidCmdPreemption:cnl */
  375. WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
  376. /* WaDisableGPGPUMidCmdPreemption:cnl */
  377. WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
  378. GEN9_PREEMPT_GPGPU_LEVEL_MASK,
  379. GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
  380. /* WaDisableEarlyEOT:cnl */
  381. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
  382. return 0;
  383. }
  384. static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  385. {
  386. /* Wa_1604370585:icl (pre-prod)
  387. * Formerly known as WaPushConstantDereferenceHoldDisable
  388. */
  389. if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
  390. WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
  391. PUSH_CONSTANT_DEREF_DISABLE);
  392. /* WaForceEnableNonCoherent:icl
  393. * This is not the same workaround as in early Gen9 platforms, where
  394. * lacking this could cause system hangs, but coherency performance
  395. * overhead is high and only a few compute workloads really need it
  396. * (the register is whitelisted in hardware now, so UMDs can opt in
  397. * for coherency if they have a good reason).
  398. */
  399. WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
  400. /* Wa_2006611047:icl (pre-prod)
  401. * Formerly known as WaDisableImprovedTdlClkGating
  402. */
  403. if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
  404. WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
  405. GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
  406. /* WaEnableStateCacheRedirectToCS:icl */
  407. WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1,
  408. GEN11_STATE_CACHE_REDIRECT_TO_CS);
  409. /* Wa_2006665173:icl (pre-prod) */
  410. if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
  411. WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
  412. GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
  413. /* WaEnableFloatBlendOptimization:icl */
  414. WA_SET_BIT_MASKED(GEN10_CACHE_MODE_SS, FLOAT_BLEND_OPTIMIZATION_ENABLE);
  415. return 0;
  416. }
  417. int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  418. {
  419. int err = 0;
  420. dev_priv->workarounds.count = 0;
  421. if (INTEL_GEN(dev_priv) < 8)
  422. err = 0;
  423. else if (IS_BROADWELL(dev_priv))
  424. err = bdw_ctx_workarounds_init(dev_priv);
  425. else if (IS_CHERRYVIEW(dev_priv))
  426. err = chv_ctx_workarounds_init(dev_priv);
  427. else if (IS_SKYLAKE(dev_priv))
  428. err = skl_ctx_workarounds_init(dev_priv);
  429. else if (IS_BROXTON(dev_priv))
  430. err = bxt_ctx_workarounds_init(dev_priv);
  431. else if (IS_KABYLAKE(dev_priv))
  432. err = kbl_ctx_workarounds_init(dev_priv);
  433. else if (IS_GEMINILAKE(dev_priv))
  434. err = glk_ctx_workarounds_init(dev_priv);
  435. else if (IS_COFFEELAKE(dev_priv))
  436. err = cfl_ctx_workarounds_init(dev_priv);
  437. else if (IS_CANNONLAKE(dev_priv))
  438. err = cnl_ctx_workarounds_init(dev_priv);
  439. else if (IS_ICELAKE(dev_priv))
  440. err = icl_ctx_workarounds_init(dev_priv);
  441. else
  442. MISSING_CASE(INTEL_GEN(dev_priv));
  443. if (err)
  444. return err;
  445. DRM_DEBUG_DRIVER("Number of context specific w/a: %d\n",
  446. dev_priv->workarounds.count);
  447. return 0;
  448. }
  449. int intel_ctx_workarounds_emit(struct i915_request *rq)
  450. {
  451. struct i915_workarounds *w = &rq->i915->workarounds;
  452. u32 *cs;
  453. int ret, i;
  454. if (w->count == 0)
  455. return 0;
  456. ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
  457. if (ret)
  458. return ret;
  459. cs = intel_ring_begin(rq, (w->count * 2 + 2));
  460. if (IS_ERR(cs))
  461. return PTR_ERR(cs);
  462. *cs++ = MI_LOAD_REGISTER_IMM(w->count);
  463. for (i = 0; i < w->count; i++) {
  464. *cs++ = w->reg[i].addr;
  465. *cs++ = w->reg[i].value;
  466. }
  467. *cs++ = MI_NOOP;
  468. intel_ring_advance(rq, cs);
  469. ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
  470. if (ret)
  471. return ret;
  472. return 0;
  473. }
  474. static void bdw_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  475. {
  476. }
  477. static void chv_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  478. {
  479. }
  480. static void gen9_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  481. {
  482. /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
  483. I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
  484. _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
  485. /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
  486. I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
  487. GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
  488. /* WaDisableKillLogic:bxt,skl,kbl */
  489. if (!IS_COFFEELAKE(dev_priv))
  490. I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
  491. ECOCHK_DIS_TLB);
  492. if (HAS_LLC(dev_priv)) {
  493. /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
  494. *
  495. * Must match Display Engine. See
  496. * WaCompressedResourceDisplayNewHashMode.
  497. */
  498. I915_WRITE(MMCD_MISC_CTRL,
  499. I915_READ(MMCD_MISC_CTRL) |
  500. MMCD_PCLA |
  501. MMCD_HOTSPOT_EN);
  502. }
  503. /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
  504. I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
  505. BDW_DISABLE_HDC_INVALIDATION);
  506. /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
  507. if (IS_GEN9_LP(dev_priv)) {
  508. u32 val = I915_READ(GEN8_L3SQCREG1);
  509. val &= ~L3_PRIO_CREDITS_MASK;
  510. val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
  511. I915_WRITE(GEN8_L3SQCREG1, val);
  512. }
  513. /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
  514. I915_WRITE(GEN8_L3SQCREG4,
  515. I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES);
  516. /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
  517. I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
  518. _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
  519. }
  520. static void skl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  521. {
  522. gen9_gt_workarounds_apply(dev_priv);
  523. /* WaEnableGapsTsvCreditFix:skl */
  524. I915_WRITE(GEN8_GARBCNTL,
  525. I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
  526. /* WaDisableGafsUnitClkGating:skl */
  527. I915_WRITE(GEN7_UCGCTL4,
  528. I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
  529. /* WaInPlaceDecompressionHang:skl */
  530. if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
  531. I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
  532. I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
  533. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  534. }
  535. static void bxt_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  536. {
  537. gen9_gt_workarounds_apply(dev_priv);
  538. /* WaDisablePooledEuLoadBalancingFix:bxt */
  539. I915_WRITE(FF_SLICE_CS_CHICKEN2,
  540. _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
  541. /* WaInPlaceDecompressionHang:bxt */
  542. I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
  543. I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
  544. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  545. }
  546. static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  547. {
  548. gen9_gt_workarounds_apply(dev_priv);
  549. /* WaEnableGapsTsvCreditFix:kbl */
  550. I915_WRITE(GEN8_GARBCNTL,
  551. I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
  552. /* WaDisableDynamicCreditSharing:kbl */
  553. if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
  554. I915_WRITE(GAMT_CHKN_BIT_REG,
  555. I915_READ(GAMT_CHKN_BIT_REG) |
  556. GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
  557. /* WaDisableGafsUnitClkGating:kbl */
  558. I915_WRITE(GEN7_UCGCTL4,
  559. I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
  560. /* WaInPlaceDecompressionHang:kbl */
  561. I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
  562. I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
  563. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  564. /* WaKBLVECSSemaphoreWaitPoll:kbl */
  565. if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_E0)) {
  566. struct intel_engine_cs *engine;
  567. unsigned int tmp;
  568. for_each_engine(engine, dev_priv, tmp) {
  569. if (engine->id == RCS)
  570. continue;
  571. I915_WRITE(RING_SEMA_WAIT_POLL(engine->mmio_base), 1);
  572. }
  573. }
  574. }
  575. static void glk_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  576. {
  577. gen9_gt_workarounds_apply(dev_priv);
  578. }
  579. static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  580. {
  581. gen9_gt_workarounds_apply(dev_priv);
  582. /* WaEnableGapsTsvCreditFix:cfl */
  583. I915_WRITE(GEN8_GARBCNTL,
  584. I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
  585. /* WaDisableGafsUnitClkGating:cfl */
  586. I915_WRITE(GEN7_UCGCTL4,
  587. I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
  588. /* WaInPlaceDecompressionHang:cfl */
  589. I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
  590. I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
  591. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  592. }
  593. static void wa_init_mcr(struct drm_i915_private *dev_priv)
  594. {
  595. const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu);
  596. u32 mcr;
  597. u32 mcr_slice_subslice_mask;
  598. /*
  599. * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
  600. * L3Banks could be fused off in single slice scenario. If that is
  601. * the case, we might need to program MCR select to a valid L3Bank
  602. * by default, to make sure we correctly read certain registers
  603. * later on (in the range 0xB100 - 0xB3FF).
  604. * This might be incompatible with
  605. * WaProgramMgsrForCorrectSliceSpecificMmioReads.
  606. * Fortunately, this should not happen in production hardware, so
  607. * we only assert that this is the case (instead of implementing
  608. * something more complex that requires checking the range of every
  609. * MMIO read).
  610. */
  611. if (INTEL_GEN(dev_priv) >= 10 &&
  612. is_power_of_2(sseu->slice_mask)) {
  613. /*
  614. * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
  615. * enabled subslice, no need to redirect MCR packet
  616. */
  617. u32 slice = fls(sseu->slice_mask);
  618. u32 fuse3 = I915_READ(GEN10_MIRROR_FUSE3);
  619. u8 ss_mask = sseu->subslice_mask[slice];
  620. u8 enabled_mask = (ss_mask | ss_mask >>
  621. GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
  622. u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
  623. /*
  624. * Production silicon should have matched L3Bank and
  625. * subslice enabled
  626. */
  627. WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
  628. }
  629. mcr = I915_READ(GEN8_MCR_SELECTOR);
  630. if (INTEL_GEN(dev_priv) >= 11)
  631. mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
  632. GEN11_MCR_SUBSLICE_MASK;
  633. else
  634. mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
  635. GEN8_MCR_SUBSLICE_MASK;
  636. /*
  637. * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
  638. * Before any MMIO read into slice/subslice specific registers, MCR
  639. * packet control register needs to be programmed to point to any
  640. * enabled s/ss pair. Otherwise, incorrect values will be returned.
  641. * This means each subsequent MMIO read will be forwarded to an
  642. * specific s/ss combination, but this is OK since these registers
  643. * are consistent across s/ss in almost all cases. In the rare
  644. * occasions, such as INSTDONE, where this value is dependent
  645. * on s/ss combo, the read should be done with read_subslice_reg.
  646. */
  647. mcr &= ~mcr_slice_subslice_mask;
  648. mcr |= intel_calculate_mcr_s_ss_select(dev_priv);
  649. I915_WRITE(GEN8_MCR_SELECTOR, mcr);
  650. }
  651. static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  652. {
  653. wa_init_mcr(dev_priv);
  654. /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
  655. if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
  656. I915_WRITE(GAMT_CHKN_BIT_REG,
  657. I915_READ(GAMT_CHKN_BIT_REG) |
  658. GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
  659. /* WaInPlaceDecompressionHang:cnl */
  660. I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
  661. I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
  662. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  663. /* WaEnablePreemptionGranularityControlByUMD:cnl */
  664. I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
  665. _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
  666. }
  667. static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  668. {
  669. wa_init_mcr(dev_priv);
  670. /* This is not an Wa. Enable for better image quality */
  671. I915_WRITE(_3D_CHICKEN3,
  672. _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
  673. /* WaInPlaceDecompressionHang:icl */
  674. I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
  675. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  676. /* WaPipelineFlushCoherentLines:icl */
  677. I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
  678. GEN8_LQSC_FLUSH_COHERENT_LINES);
  679. /* Wa_1405543622:icl
  680. * Formerly known as WaGAPZPriorityScheme
  681. */
  682. I915_WRITE(GEN8_GARBCNTL, I915_READ(GEN8_GARBCNTL) |
  683. GEN11_ARBITRATION_PRIO_ORDER_MASK);
  684. /* Wa_1604223664:icl
  685. * Formerly known as WaL3BankAddressHashing
  686. */
  687. I915_WRITE(GEN8_GARBCNTL,
  688. (I915_READ(GEN8_GARBCNTL) & ~GEN11_HASH_CTRL_EXCL_MASK) |
  689. GEN11_HASH_CTRL_EXCL_BIT0);
  690. I915_WRITE(GEN11_GLBLINVL,
  691. (I915_READ(GEN11_GLBLINVL) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK) |
  692. GEN11_BANK_HASH_ADDR_EXCL_BIT0);
  693. /* WaModifyGamTlbPartitioning:icl */
  694. I915_WRITE(GEN11_GACB_PERF_CTRL,
  695. (I915_READ(GEN11_GACB_PERF_CTRL) & ~GEN11_HASH_CTRL_MASK) |
  696. GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
  697. /* Wa_1405733216:icl
  698. * Formerly known as WaDisableCleanEvicts
  699. */
  700. I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
  701. GEN11_LQSC_CLEAN_EVICT_DISABLE);
  702. /* Wa_1405766107:icl
  703. * Formerly known as WaCL2SFHalfMaxAlloc
  704. */
  705. I915_WRITE(GEN11_LSN_UNSLCVC, I915_READ(GEN11_LSN_UNSLCVC) |
  706. GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
  707. GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
  708. /* Wa_220166154:icl
  709. * Formerly known as WaDisCtxReload
  710. */
  711. I915_WRITE(GAMW_ECO_DEV_RW_IA_REG, I915_READ(GAMW_ECO_DEV_RW_IA_REG) |
  712. GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
  713. /* Wa_1405779004:icl (pre-prod) */
  714. if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
  715. I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE,
  716. I915_READ(SLICE_UNIT_LEVEL_CLKGATE) |
  717. MSCUNIT_CLKGATE_DIS);
  718. /* Wa_1406680159:icl */
  719. I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE,
  720. I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE) |
  721. GWUNIT_CLKGATE_DIS);
  722. /* Wa_1604302699:icl */
  723. I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER,
  724. I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER) |
  725. GEN11_I2M_WRITE_DISABLE);
  726. /* Wa_1406838659:icl (pre-prod) */
  727. if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
  728. I915_WRITE(INF_UNIT_LEVEL_CLKGATE,
  729. I915_READ(INF_UNIT_LEVEL_CLKGATE) |
  730. CGPSF_CLKGATE_DIS);
  731. /* WaForwardProgressSoftReset:icl */
  732. I915_WRITE(GEN10_SCRATCH_LNCF2,
  733. I915_READ(GEN10_SCRATCH_LNCF2) |
  734. PMFLUSHDONE_LNICRSDROP |
  735. PMFLUSH_GAPL3UNBLOCK |
  736. PMFLUSHDONE_LNEBLK);
  737. /* Wa_1406463099:icl
  738. * Formerly known as WaGamTlbPendError
  739. */
  740. I915_WRITE(GAMT_CHKN_BIT_REG,
  741. I915_READ(GAMT_CHKN_BIT_REG) |
  742. GAMT_CHKN_DISABLE_L3_COH_PIPE);
  743. }
  744. void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  745. {
  746. if (INTEL_GEN(dev_priv) < 8)
  747. return;
  748. else if (IS_BROADWELL(dev_priv))
  749. bdw_gt_workarounds_apply(dev_priv);
  750. else if (IS_CHERRYVIEW(dev_priv))
  751. chv_gt_workarounds_apply(dev_priv);
  752. else if (IS_SKYLAKE(dev_priv))
  753. skl_gt_workarounds_apply(dev_priv);
  754. else if (IS_BROXTON(dev_priv))
  755. bxt_gt_workarounds_apply(dev_priv);
  756. else if (IS_KABYLAKE(dev_priv))
  757. kbl_gt_workarounds_apply(dev_priv);
  758. else if (IS_GEMINILAKE(dev_priv))
  759. glk_gt_workarounds_apply(dev_priv);
  760. else if (IS_COFFEELAKE(dev_priv))
  761. cfl_gt_workarounds_apply(dev_priv);
  762. else if (IS_CANNONLAKE(dev_priv))
  763. cnl_gt_workarounds_apply(dev_priv);
  764. else if (IS_ICELAKE(dev_priv))
  765. icl_gt_workarounds_apply(dev_priv);
  766. else
  767. MISSING_CASE(INTEL_GEN(dev_priv));
  768. }
  769. struct whitelist {
  770. i915_reg_t reg[RING_MAX_NONPRIV_SLOTS];
  771. unsigned int count;
  772. u32 nopid;
  773. };
  774. static void whitelist_reg(struct whitelist *w, i915_reg_t reg)
  775. {
  776. if (GEM_WARN_ON(w->count >= RING_MAX_NONPRIV_SLOTS))
  777. return;
  778. w->reg[w->count++] = reg;
  779. }
  780. static void bdw_whitelist_build(struct whitelist *w)
  781. {
  782. }
  783. static void chv_whitelist_build(struct whitelist *w)
  784. {
  785. }
  786. static void gen9_whitelist_build(struct whitelist *w)
  787. {
  788. /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
  789. whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
  790. /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
  791. whitelist_reg(w, GEN8_CS_CHICKEN1);
  792. /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
  793. whitelist_reg(w, GEN8_HDC_CHICKEN1);
  794. }
  795. static void skl_whitelist_build(struct whitelist *w)
  796. {
  797. gen9_whitelist_build(w);
  798. /* WaDisableLSQCROPERFforOCL:skl */
  799. whitelist_reg(w, GEN8_L3SQCREG4);
  800. }
  801. static void bxt_whitelist_build(struct whitelist *w)
  802. {
  803. gen9_whitelist_build(w);
  804. }
  805. static void kbl_whitelist_build(struct whitelist *w)
  806. {
  807. gen9_whitelist_build(w);
  808. /* WaDisableLSQCROPERFforOCL:kbl */
  809. whitelist_reg(w, GEN8_L3SQCREG4);
  810. }
  811. static void glk_whitelist_build(struct whitelist *w)
  812. {
  813. gen9_whitelist_build(w);
  814. /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
  815. whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
  816. }
  817. static void cfl_whitelist_build(struct whitelist *w)
  818. {
  819. gen9_whitelist_build(w);
  820. }
  821. static void cnl_whitelist_build(struct whitelist *w)
  822. {
  823. /* WaEnablePreemptionGranularityControlByUMD:cnl */
  824. whitelist_reg(w, GEN8_CS_CHICKEN1);
  825. }
  826. static void icl_whitelist_build(struct whitelist *w)
  827. {
  828. }
  829. static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
  830. struct whitelist *w)
  831. {
  832. struct drm_i915_private *i915 = engine->i915;
  833. GEM_BUG_ON(engine->id != RCS);
  834. w->count = 0;
  835. w->nopid = i915_mmio_reg_offset(RING_NOPID(engine->mmio_base));
  836. if (INTEL_GEN(i915) < 8)
  837. return NULL;
  838. else if (IS_BROADWELL(i915))
  839. bdw_whitelist_build(w);
  840. else if (IS_CHERRYVIEW(i915))
  841. chv_whitelist_build(w);
  842. else if (IS_SKYLAKE(i915))
  843. skl_whitelist_build(w);
  844. else if (IS_BROXTON(i915))
  845. bxt_whitelist_build(w);
  846. else if (IS_KABYLAKE(i915))
  847. kbl_whitelist_build(w);
  848. else if (IS_GEMINILAKE(i915))
  849. glk_whitelist_build(w);
  850. else if (IS_COFFEELAKE(i915))
  851. cfl_whitelist_build(w);
  852. else if (IS_CANNONLAKE(i915))
  853. cnl_whitelist_build(w);
  854. else if (IS_ICELAKE(i915))
  855. icl_whitelist_build(w);
  856. else
  857. MISSING_CASE(INTEL_GEN(i915));
  858. return w;
  859. }
  860. static void whitelist_apply(struct intel_engine_cs *engine,
  861. const struct whitelist *w)
  862. {
  863. struct drm_i915_private *dev_priv = engine->i915;
  864. const u32 base = engine->mmio_base;
  865. unsigned int i;
  866. if (!w)
  867. return;
  868. intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
  869. for (i = 0; i < w->count; i++)
  870. I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i),
  871. i915_mmio_reg_offset(w->reg[i]));
  872. /* And clear the rest just in case of garbage */
  873. for (; i < RING_MAX_NONPRIV_SLOTS; i++)
  874. I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), w->nopid);
  875. intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
  876. }
  877. void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine)
  878. {
  879. struct whitelist w;
  880. whitelist_apply(engine, whitelist_build(engine, &w));
  881. }
  882. #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  883. #include "selftests/intel_workarounds.c"
  884. #endif