intel_workarounds.c 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271
  1. /*
  2. * SPDX-License-Identifier: MIT
  3. *
  4. * Copyright © 2014-2018 Intel Corporation
  5. */
  6. #include "i915_drv.h"
  7. #include "intel_workarounds.h"
  8. /**
  9. * DOC: Hardware workarounds
  10. *
  11. * This file is intended as a central place to implement most [1]_ of the
  12. * required workarounds for hardware to work as originally intended. They fall
  13. * in five basic categories depending on how/when they are applied:
  14. *
  15. * - Workarounds that touch registers that are saved/restored to/from the HW
  16. * context image. The list is emitted (via Load Register Immediate commands)
  17. * everytime a new context is created.
  18. * - GT workarounds. The list of these WAs is applied whenever these registers
  19. * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
  20. * - Display workarounds. The list is applied during display clock-gating
  21. * initialization.
  22. * - Workarounds that whitelist a privileged register, so that UMDs can manage
  23. * them directly. This is just a special case of a MMMIO workaround (as we
  24. * write the list of these to/be-whitelisted registers to some special HW
  25. * registers).
  26. * - Workaround batchbuffers, that get executed automatically by the hardware
  27. * on every HW context restore.
  28. *
  29. * .. [1] Please notice that there are other WAs that, due to their nature,
  30. * cannot be applied from a central place. Those are peppered around the rest
  31. * of the code, as needed.
  32. *
  33. * .. [2] Technically, some registers are powercontext saved & restored, so they
  34. * survive a suspend/resume. In practice, writing them again is not too
  35. * costly and simplifies things. We can revisit this in the future.
  36. *
  37. * Layout
  38. * ''''''
  39. *
  40. * Keep things in this file ordered by WA type, as per the above (context, GT,
  41. * display, register whitelist, batchbuffer). Then, inside each type, keep the
  42. * following order:
  43. *
  44. * - Infrastructure functions and macros
  45. * - WAs per platform in standard gen/chrono order
  46. * - Public functions to init or apply the given workaround type.
  47. */
  48. static void wa_init_start(struct i915_wa_list *wal, const char *name)
  49. {
  50. wal->name = name;
  51. }
  52. static void wa_init_finish(struct i915_wa_list *wal)
  53. {
  54. if (!wal->count)
  55. return;
  56. DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
  57. wal->count, wal->name);
  58. }
  59. static void wa_add(struct drm_i915_private *i915,
  60. i915_reg_t reg, const u32 mask, const u32 val)
  61. {
  62. struct i915_workarounds *wa = &i915->workarounds;
  63. unsigned int start = 0, end = wa->count;
  64. unsigned int addr = i915_mmio_reg_offset(reg);
  65. struct i915_wa_reg *r;
  66. while (start < end) {
  67. unsigned int mid = start + (end - start) / 2;
  68. if (wa->reg[mid].addr < addr) {
  69. start = mid + 1;
  70. } else if (wa->reg[mid].addr > addr) {
  71. end = mid;
  72. } else {
  73. r = &wa->reg[mid];
  74. if ((mask & ~r->mask) == 0) {
  75. DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
  76. addr, r->mask, r->value);
  77. r->value &= ~mask;
  78. }
  79. r->value |= val;
  80. r->mask |= mask;
  81. return;
  82. }
  83. }
  84. if (WARN_ON_ONCE(wa->count >= I915_MAX_WA_REGS)) {
  85. DRM_ERROR("Dropping w/a for reg %04x (mask: %08x, value: %08x)\n",
  86. addr, mask, val);
  87. return;
  88. }
  89. r = &wa->reg[wa->count++];
  90. r->addr = addr;
  91. r->value = val;
  92. r->mask = mask;
  93. while (r-- > wa->reg) {
  94. GEM_BUG_ON(r[0].addr == r[1].addr);
  95. if (r[1].addr > r[0].addr)
  96. break;
  97. swap(r[1], r[0]);
  98. }
  99. }
  100. #define WA_REG(addr, mask, val) wa_add(dev_priv, (addr), (mask), (val))
  101. #define WA_SET_BIT_MASKED(addr, mask) \
  102. WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
  103. #define WA_CLR_BIT_MASKED(addr, mask) \
  104. WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
  105. #define WA_SET_FIELD_MASKED(addr, mask, value) \
  106. WA_REG(addr, (mask), _MASKED_FIELD(mask, value))
  107. static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  108. {
  109. WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
  110. /* WaDisableAsyncFlipPerfMode:bdw,chv */
  111. WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
  112. /* WaDisablePartialInstShootdown:bdw,chv */
  113. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  114. PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
  115. /* Use Force Non-Coherent whenever executing a 3D context. This is a
  116. * workaround for for a possible hang in the unlikely event a TLB
  117. * invalidation occurs during a PSD flush.
  118. */
  119. /* WaForceEnableNonCoherent:bdw,chv */
  120. /* WaHdcDisableFetchWhenMasked:bdw,chv */
  121. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  122. HDC_DONOT_FETCH_MEM_WHEN_MASKED |
  123. HDC_FORCE_NON_COHERENT);
  124. /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
  125. * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
  126. * polygons in the same 8x4 pixel/sample area to be processed without
  127. * stalling waiting for the earlier ones to write to Hierarchical Z
  128. * buffer."
  129. *
  130. * This optimization is off by default for BDW and CHV; turn it on.
  131. */
  132. WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
  133. /* Wa4x4STCOptimizationDisable:bdw,chv */
  134. WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
  135. /*
  136. * BSpec recommends 8x4 when MSAA is used,
  137. * however in practice 16x4 seems fastest.
  138. *
  139. * Note that PS/WM thread counts depend on the WIZ hashing
  140. * disable bit, which we don't touch here, but it's good
  141. * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
  142. */
  143. WA_SET_FIELD_MASKED(GEN7_GT_MODE,
  144. GEN6_WIZ_HASHING_MASK,
  145. GEN6_WIZ_HASHING_16x4);
  146. return 0;
  147. }
  148. static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  149. {
  150. int ret;
  151. ret = gen8_ctx_workarounds_init(dev_priv);
  152. if (ret)
  153. return ret;
  154. /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
  155. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
  156. /* WaDisableDopClockGating:bdw
  157. *
  158. * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
  159. * to disable EUTC clock gating.
  160. */
  161. WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
  162. DOP_CLOCK_GATING_DISABLE);
  163. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  164. GEN8_SAMPLER_POWER_BYPASS_DIS);
  165. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  166. /* WaForceContextSaveRestoreNonCoherent:bdw */
  167. HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
  168. /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
  169. (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
  170. return 0;
  171. }
  172. static int chv_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  173. {
  174. int ret;
  175. ret = gen8_ctx_workarounds_init(dev_priv);
  176. if (ret)
  177. return ret;
  178. /* WaDisableThreadStallDopClockGating:chv */
  179. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
  180. /* Improve HiZ throughput on CHV. */
  181. WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
  182. return 0;
  183. }
  184. static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  185. {
  186. if (HAS_LLC(dev_priv)) {
  187. /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
  188. *
  189. * Must match Display Engine. See
  190. * WaCompressedResourceDisplayNewHashMode.
  191. */
  192. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  193. GEN9_PBE_COMPRESSED_HASH_SELECTION);
  194. WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
  195. GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
  196. }
  197. /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
  198. /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
  199. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  200. FLOW_CONTROL_ENABLE |
  201. PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
  202. /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
  203. if (!IS_COFFEELAKE(dev_priv))
  204. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  205. GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
  206. /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
  207. /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
  208. WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
  209. GEN9_ENABLE_YV12_BUGFIX |
  210. GEN9_ENABLE_GPGPU_PREEMPTION);
  211. /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
  212. /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
  213. WA_SET_BIT_MASKED(CACHE_MODE_1,
  214. GEN8_4x4_STC_OPTIMIZATION_DISABLE |
  215. GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
  216. /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
  217. WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
  218. GEN9_CCS_TLB_PREFETCH_ENABLE);
  219. /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
  220. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  221. HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
  222. HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
  223. /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
  224. * both tied to WaForceContextSaveRestoreNonCoherent
  225. * in some hsds for skl. We keep the tie for all gen9. The
  226. * documentation is a bit hazy and so we want to get common behaviour,
  227. * even though there is no clear evidence we would need both on kbl/bxt.
  228. * This area has been source of system hangs so we play it safe
  229. * and mimic the skl regardless of what bspec says.
  230. *
  231. * Use Force Non-Coherent whenever executing a 3D context. This
  232. * is a workaround for a possible hang in the unlikely event
  233. * a TLB invalidation occurs during a PSD flush.
  234. */
  235. /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
  236. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  237. HDC_FORCE_NON_COHERENT);
  238. /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
  239. if (IS_SKYLAKE(dev_priv) ||
  240. IS_KABYLAKE(dev_priv) ||
  241. IS_COFFEELAKE(dev_priv))
  242. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  243. GEN8_SAMPLER_POWER_BYPASS_DIS);
  244. /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
  245. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
  246. /*
  247. * Supporting preemption with fine-granularity requires changes in the
  248. * batch buffer programming. Since we can't break old userspace, we
  249. * need to set our default preemption level to safe value. Userspace is
  250. * still able to use more fine-grained preemption levels, since in
  251. * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
  252. * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
  253. * not real HW workarounds, but merely a way to start using preemption
  254. * while maintaining old contract with userspace.
  255. */
  256. /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
  257. WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
  258. /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
  259. WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
  260. GEN9_PREEMPT_GPGPU_LEVEL_MASK,
  261. GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
  262. /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
  263. if (IS_GEN9_LP(dev_priv))
  264. WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
  265. return 0;
  266. }
  267. static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv)
  268. {
  269. u8 vals[3] = { 0, 0, 0 };
  270. unsigned int i;
  271. for (i = 0; i < 3; i++) {
  272. u8 ss;
  273. /*
  274. * Only consider slices where one, and only one, subslice has 7
  275. * EUs
  276. */
  277. if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
  278. continue;
  279. /*
  280. * subslice_7eu[i] != 0 (because of the check above) and
  281. * ss_max == 4 (maximum number of subslices possible per slice)
  282. *
  283. * -> 0 <= ss <= 3;
  284. */
  285. ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
  286. vals[i] = 3 - ss;
  287. }
  288. if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
  289. return 0;
  290. /* Tune IZ hashing. See intel_device_info_runtime_init() */
  291. WA_SET_FIELD_MASKED(GEN7_GT_MODE,
  292. GEN9_IZ_HASHING_MASK(2) |
  293. GEN9_IZ_HASHING_MASK(1) |
  294. GEN9_IZ_HASHING_MASK(0),
  295. GEN9_IZ_HASHING(2, vals[2]) |
  296. GEN9_IZ_HASHING(1, vals[1]) |
  297. GEN9_IZ_HASHING(0, vals[0]));
  298. return 0;
  299. }
  300. static int skl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  301. {
  302. int ret;
  303. ret = gen9_ctx_workarounds_init(dev_priv);
  304. if (ret)
  305. return ret;
  306. return skl_tune_iz_hashing(dev_priv);
  307. }
  308. static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  309. {
  310. int ret;
  311. ret = gen9_ctx_workarounds_init(dev_priv);
  312. if (ret)
  313. return ret;
  314. /* WaDisableThreadStallDopClockGating:bxt */
  315. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  316. STALL_DOP_GATING_DISABLE);
  317. /* WaToEnableHwFixForPushConstHWBug:bxt */
  318. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  319. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  320. return 0;
  321. }
  322. static int kbl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  323. {
  324. int ret;
  325. ret = gen9_ctx_workarounds_init(dev_priv);
  326. if (ret)
  327. return ret;
  328. /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
  329. if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
  330. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  331. HDC_FENCE_DEST_SLM_DISABLE);
  332. /* WaToEnableHwFixForPushConstHWBug:kbl */
  333. if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
  334. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  335. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  336. /* WaDisableSbeCacheDispatchPortSharing:kbl */
  337. WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
  338. GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
  339. return 0;
  340. }
  341. static int glk_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  342. {
  343. int ret;
  344. ret = gen9_ctx_workarounds_init(dev_priv);
  345. if (ret)
  346. return ret;
  347. /* WaToEnableHwFixForPushConstHWBug:glk */
  348. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  349. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  350. return 0;
  351. }
  352. static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  353. {
  354. int ret;
  355. ret = gen9_ctx_workarounds_init(dev_priv);
  356. if (ret)
  357. return ret;
  358. /* WaToEnableHwFixForPushConstHWBug:cfl */
  359. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  360. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  361. /* WaDisableSbeCacheDispatchPortSharing:cfl */
  362. WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
  363. GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
  364. return 0;
  365. }
  366. static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  367. {
  368. /* WaForceContextSaveRestoreNonCoherent:cnl */
  369. WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
  370. HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
  371. /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
  372. if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
  373. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
  374. /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
  375. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  376. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  377. /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
  378. if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0))
  379. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  380. GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
  381. /* WaPushConstantDereferenceHoldDisable:cnl */
  382. WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
  383. /* FtrEnableFastAnisoL1BankingFix:cnl */
  384. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
  385. /* WaDisable3DMidCmdPreemption:cnl */
  386. WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
  387. /* WaDisableGPGPUMidCmdPreemption:cnl */
  388. WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
  389. GEN9_PREEMPT_GPGPU_LEVEL_MASK,
  390. GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
  391. /* WaDisableEarlyEOT:cnl */
  392. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
  393. return 0;
  394. }
  395. static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  396. {
  397. /* Wa_1604370585:icl (pre-prod)
  398. * Formerly known as WaPushConstantDereferenceHoldDisable
  399. */
  400. if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
  401. WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
  402. PUSH_CONSTANT_DEREF_DISABLE);
  403. /* WaForceEnableNonCoherent:icl
  404. * This is not the same workaround as in early Gen9 platforms, where
  405. * lacking this could cause system hangs, but coherency performance
  406. * overhead is high and only a few compute workloads really need it
  407. * (the register is whitelisted in hardware now, so UMDs can opt in
  408. * for coherency if they have a good reason).
  409. */
  410. WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
  411. /* Wa_2006611047:icl (pre-prod)
  412. * Formerly known as WaDisableImprovedTdlClkGating
  413. */
  414. if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
  415. WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
  416. GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
  417. /* WaEnableStateCacheRedirectToCS:icl */
  418. WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1,
  419. GEN11_STATE_CACHE_REDIRECT_TO_CS);
  420. /* Wa_2006665173:icl (pre-prod) */
  421. if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
  422. WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
  423. GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
  424. return 0;
  425. }
  426. int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  427. {
  428. int err = 0;
  429. dev_priv->workarounds.count = 0;
  430. if (INTEL_GEN(dev_priv) < 8)
  431. err = 0;
  432. else if (IS_BROADWELL(dev_priv))
  433. err = bdw_ctx_workarounds_init(dev_priv);
  434. else if (IS_CHERRYVIEW(dev_priv))
  435. err = chv_ctx_workarounds_init(dev_priv);
  436. else if (IS_SKYLAKE(dev_priv))
  437. err = skl_ctx_workarounds_init(dev_priv);
  438. else if (IS_BROXTON(dev_priv))
  439. err = bxt_ctx_workarounds_init(dev_priv);
  440. else if (IS_KABYLAKE(dev_priv))
  441. err = kbl_ctx_workarounds_init(dev_priv);
  442. else if (IS_GEMINILAKE(dev_priv))
  443. err = glk_ctx_workarounds_init(dev_priv);
  444. else if (IS_COFFEELAKE(dev_priv))
  445. err = cfl_ctx_workarounds_init(dev_priv);
  446. else if (IS_CANNONLAKE(dev_priv))
  447. err = cnl_ctx_workarounds_init(dev_priv);
  448. else if (IS_ICELAKE(dev_priv))
  449. err = icl_ctx_workarounds_init(dev_priv);
  450. else
  451. MISSING_CASE(INTEL_GEN(dev_priv));
  452. if (err)
  453. return err;
  454. DRM_DEBUG_DRIVER("Number of context specific w/a: %d\n",
  455. dev_priv->workarounds.count);
  456. return 0;
  457. }
  458. int intel_ctx_workarounds_emit(struct i915_request *rq)
  459. {
  460. struct i915_workarounds *w = &rq->i915->workarounds;
  461. u32 *cs;
  462. int ret, i;
  463. if (w->count == 0)
  464. return 0;
  465. ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
  466. if (ret)
  467. return ret;
  468. cs = intel_ring_begin(rq, (w->count * 2 + 2));
  469. if (IS_ERR(cs))
  470. return PTR_ERR(cs);
  471. *cs++ = MI_LOAD_REGISTER_IMM(w->count);
  472. for (i = 0; i < w->count; i++) {
  473. *cs++ = w->reg[i].addr;
  474. *cs++ = w->reg[i].value;
  475. }
  476. *cs++ = MI_NOOP;
  477. intel_ring_advance(rq, cs);
  478. ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
  479. if (ret)
  480. return ret;
  481. return 0;
  482. }
  483. static void
  484. wal_add(struct i915_wa_list *wal, const struct i915_wa *wa)
  485. {
  486. const unsigned int grow = 1 << 4;
  487. GEM_BUG_ON(!is_power_of_2(grow));
  488. if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
  489. struct i915_wa *list;
  490. list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
  491. GFP_KERNEL);
  492. if (!list) {
  493. DRM_ERROR("No space for workaround init!\n");
  494. return;
  495. }
  496. if (wal->list)
  497. memcpy(list, wal->list, sizeof(*wa) * wal->count);
  498. wal->list = list;
  499. }
  500. wal->list[wal->count++] = *wa;
  501. }
  502. static void
  503. wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
  504. {
  505. struct i915_wa wa = {
  506. .reg = reg,
  507. .mask = val,
  508. .val = _MASKED_BIT_ENABLE(val)
  509. };
  510. wal_add(wal, &wa);
  511. }
  512. static void
  513. wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
  514. u32 val)
  515. {
  516. struct i915_wa wa = {
  517. .reg = reg,
  518. .mask = mask,
  519. .val = val
  520. };
  521. wal_add(wal, &wa);
  522. }
  523. static void
  524. wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
  525. {
  526. wa_write_masked_or(wal, reg, ~0, val);
  527. }
  528. static void
  529. wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
  530. {
  531. wa_write_masked_or(wal, reg, val, val);
  532. }
  533. static void gen9_gt_workarounds_init(struct drm_i915_private *i915)
  534. {
  535. struct i915_wa_list *wal = &i915->gt_wa_list;
  536. /* WaDisableKillLogic:bxt,skl,kbl */
  537. if (!IS_COFFEELAKE(i915))
  538. wa_write_or(wal,
  539. GAM_ECOCHK,
  540. ECOCHK_DIS_TLB);
  541. if (HAS_LLC(i915)) {
  542. /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
  543. *
  544. * Must match Display Engine. See
  545. * WaCompressedResourceDisplayNewHashMode.
  546. */
  547. wa_write_or(wal,
  548. MMCD_MISC_CTRL,
  549. MMCD_PCLA | MMCD_HOTSPOT_EN);
  550. }
  551. /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
  552. wa_write_or(wal,
  553. GAM_ECOCHK,
  554. BDW_DISABLE_HDC_INVALIDATION);
  555. }
  556. static void skl_gt_workarounds_init(struct drm_i915_private *i915)
  557. {
  558. struct i915_wa_list *wal = &i915->gt_wa_list;
  559. gen9_gt_workarounds_init(i915);
  560. /* WaDisableGafsUnitClkGating:skl */
  561. wa_write_or(wal,
  562. GEN7_UCGCTL4,
  563. GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
  564. /* WaInPlaceDecompressionHang:skl */
  565. if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
  566. wa_write_or(wal,
  567. GEN9_GAMT_ECO_REG_RW_IA,
  568. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  569. }
  570. static void bxt_gt_workarounds_init(struct drm_i915_private *i915)
  571. {
  572. struct i915_wa_list *wal = &i915->gt_wa_list;
  573. gen9_gt_workarounds_init(i915);
  574. /* WaInPlaceDecompressionHang:bxt */
  575. wa_write_or(wal,
  576. GEN9_GAMT_ECO_REG_RW_IA,
  577. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  578. }
  579. static void kbl_gt_workarounds_init(struct drm_i915_private *i915)
  580. {
  581. struct i915_wa_list *wal = &i915->gt_wa_list;
  582. gen9_gt_workarounds_init(i915);
  583. /* WaDisableDynamicCreditSharing:kbl */
  584. if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
  585. wa_write_or(wal,
  586. GAMT_CHKN_BIT_REG,
  587. GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
  588. /* WaDisableGafsUnitClkGating:kbl */
  589. wa_write_or(wal,
  590. GEN7_UCGCTL4,
  591. GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
  592. /* WaInPlaceDecompressionHang:kbl */
  593. wa_write_or(wal,
  594. GEN9_GAMT_ECO_REG_RW_IA,
  595. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  596. }
  597. static void glk_gt_workarounds_init(struct drm_i915_private *i915)
  598. {
  599. gen9_gt_workarounds_init(i915);
  600. }
  601. static void cfl_gt_workarounds_init(struct drm_i915_private *i915)
  602. {
  603. struct i915_wa_list *wal = &i915->gt_wa_list;
  604. gen9_gt_workarounds_init(i915);
  605. /* WaDisableGafsUnitClkGating:cfl */
  606. wa_write_or(wal,
  607. GEN7_UCGCTL4,
  608. GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
  609. /* WaInPlaceDecompressionHang:cfl */
  610. wa_write_or(wal,
  611. GEN9_GAMT_ECO_REG_RW_IA,
  612. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  613. }
  614. static void wa_init_mcr(struct drm_i915_private *dev_priv)
  615. {
  616. const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu);
  617. struct i915_wa_list *wal = &dev_priv->gt_wa_list;
  618. u32 mcr_slice_subslice_mask;
  619. /*
  620. * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
  621. * L3Banks could be fused off in single slice scenario. If that is
  622. * the case, we might need to program MCR select to a valid L3Bank
  623. * by default, to make sure we correctly read certain registers
  624. * later on (in the range 0xB100 - 0xB3FF).
  625. * This might be incompatible with
  626. * WaProgramMgsrForCorrectSliceSpecificMmioReads.
  627. * Fortunately, this should not happen in production hardware, so
  628. * we only assert that this is the case (instead of implementing
  629. * something more complex that requires checking the range of every
  630. * MMIO read).
  631. */
  632. if (INTEL_GEN(dev_priv) >= 10 &&
  633. is_power_of_2(sseu->slice_mask)) {
  634. /*
  635. * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
  636. * enabled subslice, no need to redirect MCR packet
  637. */
  638. u32 slice = fls(sseu->slice_mask);
  639. u32 fuse3 = I915_READ(GEN10_MIRROR_FUSE3);
  640. u8 ss_mask = sseu->subslice_mask[slice];
  641. u8 enabled_mask = (ss_mask | ss_mask >>
  642. GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
  643. u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
  644. /*
  645. * Production silicon should have matched L3Bank and
  646. * subslice enabled
  647. */
  648. WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
  649. }
  650. if (INTEL_GEN(dev_priv) >= 11)
  651. mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
  652. GEN11_MCR_SUBSLICE_MASK;
  653. else
  654. mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
  655. GEN8_MCR_SUBSLICE_MASK;
  656. /*
  657. * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
  658. * Before any MMIO read into slice/subslice specific registers, MCR
  659. * packet control register needs to be programmed to point to any
  660. * enabled s/ss pair. Otherwise, incorrect values will be returned.
  661. * This means each subsequent MMIO read will be forwarded to an
  662. * specific s/ss combination, but this is OK since these registers
  663. * are consistent across s/ss in almost all cases. In the rare
  664. * occasions, such as INSTDONE, where this value is dependent
  665. * on s/ss combo, the read should be done with read_subslice_reg.
  666. */
  667. wa_write_masked_or(wal,
  668. GEN8_MCR_SELECTOR,
  669. mcr_slice_subslice_mask,
  670. intel_calculate_mcr_s_ss_select(dev_priv));
  671. }
  672. static void cnl_gt_workarounds_init(struct drm_i915_private *i915)
  673. {
  674. struct i915_wa_list *wal = &i915->gt_wa_list;
  675. wa_init_mcr(i915);
  676. /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
  677. if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
  678. wa_write_or(wal,
  679. GAMT_CHKN_BIT_REG,
  680. GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
  681. /* WaInPlaceDecompressionHang:cnl */
  682. wa_write_or(wal,
  683. GEN9_GAMT_ECO_REG_RW_IA,
  684. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  685. }
  686. static void icl_gt_workarounds_init(struct drm_i915_private *i915)
  687. {
  688. struct i915_wa_list *wal = &i915->gt_wa_list;
  689. wa_init_mcr(i915);
  690. /* WaInPlaceDecompressionHang:icl */
  691. wa_write_or(wal,
  692. GEN9_GAMT_ECO_REG_RW_IA,
  693. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  694. /* WaModifyGamTlbPartitioning:icl */
  695. wa_write_masked_or(wal,
  696. GEN11_GACB_PERF_CTRL,
  697. GEN11_HASH_CTRL_MASK,
  698. GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
  699. /* Wa_1405766107:icl
  700. * Formerly known as WaCL2SFHalfMaxAlloc
  701. */
  702. wa_write_or(wal,
  703. GEN11_LSN_UNSLCVC,
  704. GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
  705. GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
  706. /* Wa_220166154:icl
  707. * Formerly known as WaDisCtxReload
  708. */
  709. wa_write_or(wal,
  710. GEN8_GAMW_ECO_DEV_RW_IA,
  711. GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
  712. /* Wa_1405779004:icl (pre-prod) */
  713. if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
  714. wa_write_or(wal,
  715. SLICE_UNIT_LEVEL_CLKGATE,
  716. MSCUNIT_CLKGATE_DIS);
  717. /* Wa_1406680159:icl */
  718. wa_write_or(wal,
  719. SUBSLICE_UNIT_LEVEL_CLKGATE,
  720. GWUNIT_CLKGATE_DIS);
  721. /* Wa_1406838659:icl (pre-prod) */
  722. if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
  723. wa_write_or(wal,
  724. INF_UNIT_LEVEL_CLKGATE,
  725. CGPSF_CLKGATE_DIS);
  726. /* Wa_1406463099:icl
  727. * Formerly known as WaGamTlbPendError
  728. */
  729. wa_write_or(wal,
  730. GAMT_CHKN_BIT_REG,
  731. GAMT_CHKN_DISABLE_L3_COH_PIPE);
  732. }
  733. void intel_gt_init_workarounds(struct drm_i915_private *i915)
  734. {
  735. struct i915_wa_list *wal = &i915->gt_wa_list;
  736. wa_init_start(wal, "GT");
  737. if (INTEL_GEN(i915) < 8)
  738. return;
  739. else if (IS_BROADWELL(i915))
  740. return;
  741. else if (IS_CHERRYVIEW(i915))
  742. return;
  743. else if (IS_SKYLAKE(i915))
  744. skl_gt_workarounds_init(i915);
  745. else if (IS_BROXTON(i915))
  746. bxt_gt_workarounds_init(i915);
  747. else if (IS_KABYLAKE(i915))
  748. kbl_gt_workarounds_init(i915);
  749. else if (IS_GEMINILAKE(i915))
  750. glk_gt_workarounds_init(i915);
  751. else if (IS_COFFEELAKE(i915))
  752. cfl_gt_workarounds_init(i915);
  753. else if (IS_CANNONLAKE(i915))
  754. cnl_gt_workarounds_init(i915);
  755. else if (IS_ICELAKE(i915))
  756. icl_gt_workarounds_init(i915);
  757. else
  758. MISSING_CASE(INTEL_GEN(i915));
  759. wa_init_finish(wal);
  760. }
  761. static enum forcewake_domains
  762. wal_get_fw_for_rmw(struct drm_i915_private *dev_priv,
  763. const struct i915_wa_list *wal)
  764. {
  765. enum forcewake_domains fw = 0;
  766. struct i915_wa *wa;
  767. unsigned int i;
  768. for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
  769. fw |= intel_uncore_forcewake_for_reg(dev_priv,
  770. wa->reg,
  771. FW_REG_READ |
  772. FW_REG_WRITE);
  773. return fw;
  774. }
  775. static void
  776. wa_list_apply(struct drm_i915_private *dev_priv, const struct i915_wa_list *wal)
  777. {
  778. enum forcewake_domains fw;
  779. unsigned long flags;
  780. struct i915_wa *wa;
  781. unsigned int i;
  782. if (!wal->count)
  783. return;
  784. fw = wal_get_fw_for_rmw(dev_priv, wal);
  785. spin_lock_irqsave(&dev_priv->uncore.lock, flags);
  786. intel_uncore_forcewake_get__locked(dev_priv, fw);
  787. for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
  788. u32 val = I915_READ_FW(wa->reg);
  789. val &= ~wa->mask;
  790. val |= wa->val;
  791. I915_WRITE_FW(wa->reg, val);
  792. }
  793. intel_uncore_forcewake_put__locked(dev_priv, fw);
  794. spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
  795. DRM_DEBUG_DRIVER("Applied %u %s workarounds\n", wal->count, wal->name);
  796. }
  797. void intel_gt_apply_workarounds(struct drm_i915_private *dev_priv)
  798. {
  799. wa_list_apply(dev_priv, &dev_priv->gt_wa_list);
  800. }
  801. struct whitelist {
  802. i915_reg_t reg[RING_MAX_NONPRIV_SLOTS];
  803. unsigned int count;
  804. u32 nopid;
  805. };
  806. static void whitelist_reg(struct whitelist *w, i915_reg_t reg)
  807. {
  808. if (GEM_WARN_ON(w->count >= RING_MAX_NONPRIV_SLOTS))
  809. return;
  810. w->reg[w->count++] = reg;
  811. }
  812. static void bdw_whitelist_build(struct whitelist *w)
  813. {
  814. }
  815. static void chv_whitelist_build(struct whitelist *w)
  816. {
  817. }
  818. static void gen9_whitelist_build(struct whitelist *w)
  819. {
  820. /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
  821. whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
  822. /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
  823. whitelist_reg(w, GEN8_CS_CHICKEN1);
  824. /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
  825. whitelist_reg(w, GEN8_HDC_CHICKEN1);
  826. }
  827. static void skl_whitelist_build(struct whitelist *w)
  828. {
  829. gen9_whitelist_build(w);
  830. /* WaDisableLSQCROPERFforOCL:skl */
  831. whitelist_reg(w, GEN8_L3SQCREG4);
  832. }
  833. static void bxt_whitelist_build(struct whitelist *w)
  834. {
  835. gen9_whitelist_build(w);
  836. }
  837. static void kbl_whitelist_build(struct whitelist *w)
  838. {
  839. gen9_whitelist_build(w);
  840. /* WaDisableLSQCROPERFforOCL:kbl */
  841. whitelist_reg(w, GEN8_L3SQCREG4);
  842. }
  843. static void glk_whitelist_build(struct whitelist *w)
  844. {
  845. gen9_whitelist_build(w);
  846. /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
  847. whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
  848. }
  849. static void cfl_whitelist_build(struct whitelist *w)
  850. {
  851. gen9_whitelist_build(w);
  852. }
  853. static void cnl_whitelist_build(struct whitelist *w)
  854. {
  855. /* WaEnablePreemptionGranularityControlByUMD:cnl */
  856. whitelist_reg(w, GEN8_CS_CHICKEN1);
  857. }
  858. static void icl_whitelist_build(struct whitelist *w)
  859. {
  860. }
  861. static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
  862. struct whitelist *w)
  863. {
  864. struct drm_i915_private *i915 = engine->i915;
  865. GEM_BUG_ON(engine->id != RCS);
  866. w->count = 0;
  867. w->nopid = i915_mmio_reg_offset(RING_NOPID(engine->mmio_base));
  868. if (INTEL_GEN(i915) < 8)
  869. return NULL;
  870. else if (IS_BROADWELL(i915))
  871. bdw_whitelist_build(w);
  872. else if (IS_CHERRYVIEW(i915))
  873. chv_whitelist_build(w);
  874. else if (IS_SKYLAKE(i915))
  875. skl_whitelist_build(w);
  876. else if (IS_BROXTON(i915))
  877. bxt_whitelist_build(w);
  878. else if (IS_KABYLAKE(i915))
  879. kbl_whitelist_build(w);
  880. else if (IS_GEMINILAKE(i915))
  881. glk_whitelist_build(w);
  882. else if (IS_COFFEELAKE(i915))
  883. cfl_whitelist_build(w);
  884. else if (IS_CANNONLAKE(i915))
  885. cnl_whitelist_build(w);
  886. else if (IS_ICELAKE(i915))
  887. icl_whitelist_build(w);
  888. else
  889. MISSING_CASE(INTEL_GEN(i915));
  890. return w;
  891. }
  892. static void whitelist_apply(struct intel_engine_cs *engine,
  893. const struct whitelist *w)
  894. {
  895. struct drm_i915_private *dev_priv = engine->i915;
  896. const u32 base = engine->mmio_base;
  897. unsigned int i;
  898. if (!w)
  899. return;
  900. intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
  901. for (i = 0; i < w->count; i++)
  902. I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i),
  903. i915_mmio_reg_offset(w->reg[i]));
  904. /* And clear the rest just in case of garbage */
  905. for (; i < RING_MAX_NONPRIV_SLOTS; i++)
  906. I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), w->nopid);
  907. intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
  908. }
  909. void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine)
  910. {
  911. struct whitelist w;
  912. whitelist_apply(engine, whitelist_build(engine, &w));
  913. }
  914. static void rcs_engine_wa_init(struct intel_engine_cs *engine)
  915. {
  916. struct drm_i915_private *i915 = engine->i915;
  917. struct i915_wa_list *wal = &engine->wa_list;
  918. if (IS_ICELAKE(i915)) {
  919. /* This is not an Wa. Enable for better image quality */
  920. wa_masked_en(wal,
  921. _3D_CHICKEN3,
  922. _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
  923. /* WaPipelineFlushCoherentLines:icl */
  924. wa_write_or(wal,
  925. GEN8_L3SQCREG4,
  926. GEN8_LQSC_FLUSH_COHERENT_LINES);
  927. /*
  928. * Wa_1405543622:icl
  929. * Formerly known as WaGAPZPriorityScheme
  930. */
  931. wa_write_or(wal,
  932. GEN8_GARBCNTL,
  933. GEN11_ARBITRATION_PRIO_ORDER_MASK);
  934. /*
  935. * Wa_1604223664:icl
  936. * Formerly known as WaL3BankAddressHashing
  937. */
  938. wa_write_masked_or(wal,
  939. GEN8_GARBCNTL,
  940. GEN11_HASH_CTRL_EXCL_MASK,
  941. GEN11_HASH_CTRL_EXCL_BIT0);
  942. wa_write_masked_or(wal,
  943. GEN11_GLBLINVL,
  944. GEN11_BANK_HASH_ADDR_EXCL_MASK,
  945. GEN11_BANK_HASH_ADDR_EXCL_BIT0);
  946. /*
  947. * Wa_1405733216:icl
  948. * Formerly known as WaDisableCleanEvicts
  949. */
  950. wa_write_or(wal,
  951. GEN8_L3SQCREG4,
  952. GEN11_LQSC_CLEAN_EVICT_DISABLE);
  953. /* Wa_1604302699:icl */
  954. wa_write_or(wal,
  955. GEN10_L3_CHICKEN_MODE_REGISTER,
  956. GEN11_I2M_WRITE_DISABLE);
  957. /* WaForwardProgressSoftReset:icl */
  958. wa_write_or(wal,
  959. GEN10_SCRATCH_LNCF2,
  960. PMFLUSHDONE_LNICRSDROP |
  961. PMFLUSH_GAPL3UNBLOCK |
  962. PMFLUSHDONE_LNEBLK);
  963. }
  964. if (IS_GEN9(i915) || IS_CANNONLAKE(i915)) {
  965. /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,cnl */
  966. wa_masked_en(wal,
  967. GEN7_FF_SLICE_CS_CHICKEN1,
  968. GEN9_FFSC_PERCTX_PREEMPT_CTRL);
  969. }
  970. if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
  971. /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
  972. wa_write_or(wal,
  973. GEN8_GARBCNTL,
  974. GEN9_GAPS_TSV_CREDIT_DISABLE);
  975. }
  976. if (IS_BROXTON(i915)) {
  977. /* WaDisablePooledEuLoadBalancingFix:bxt */
  978. wa_masked_en(wal,
  979. FF_SLICE_CS_CHICKEN2,
  980. GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
  981. }
  982. if (IS_GEN9(i915)) {
  983. /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
  984. wa_masked_en(wal,
  985. GEN9_CSFE_CHICKEN1_RCS,
  986. GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
  987. /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
  988. wa_write_or(wal,
  989. BDW_SCRATCH1,
  990. GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
  991. /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
  992. if (IS_GEN9_LP(i915))
  993. wa_write_masked_or(wal,
  994. GEN8_L3SQCREG1,
  995. L3_PRIO_CREDITS_MASK,
  996. L3_GENERAL_PRIO_CREDITS(62) |
  997. L3_HIGH_PRIO_CREDITS(2));
  998. /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
  999. wa_write_or(wal,
  1000. GEN8_L3SQCREG4,
  1001. GEN8_LQSC_FLUSH_COHERENT_LINES);
  1002. }
  1003. }
  1004. static void xcs_engine_wa_init(struct intel_engine_cs *engine)
  1005. {
  1006. struct drm_i915_private *i915 = engine->i915;
  1007. struct i915_wa_list *wal = &engine->wa_list;
  1008. /* WaKBLVECSSemaphoreWaitPoll:kbl */
  1009. if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
  1010. wa_write(wal,
  1011. RING_SEMA_WAIT_POLL(engine->mmio_base),
  1012. 1);
  1013. }
  1014. }
  1015. void intel_engine_init_workarounds(struct intel_engine_cs *engine)
  1016. {
  1017. struct i915_wa_list *wal = &engine->wa_list;
  1018. if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
  1019. return;
  1020. wa_init_start(wal, engine->name);
  1021. if (engine->id == RCS)
  1022. rcs_engine_wa_init(engine);
  1023. else
  1024. xcs_engine_wa_init(engine);
  1025. wa_init_finish(wal);
  1026. }
  1027. void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
  1028. {
  1029. wa_list_apply(engine->i915, &engine->wa_list);
  1030. }
  1031. #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  1032. #include "selftests/intel_workarounds.c"
  1033. #endif