intel_workarounds.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. /*
  2. * SPDX-License-Identifier: MIT
  3. *
  4. * Copyright © 2014-2018 Intel Corporation
  5. */
  6. #include "i915_drv.h"
  7. #include "intel_workarounds.h"
  8. /**
  9. * DOC: Hardware workarounds
  10. *
  11. * This file is intended as a central place to implement most [1]_ of the
  12. * required workarounds for hardware to work as originally intended. They fall
  13. * in five basic categories depending on how/when they are applied:
  14. *
  15. * - Workarounds that touch registers that are saved/restored to/from the HW
  16. * context image. The list is emitted (via Load Register Immediate commands)
  17. * everytime a new context is created.
  18. * - GT workarounds. The list of these WAs is applied whenever these registers
  19. * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
  20. * - Display workarounds. The list is applied during display clock-gating
  21. * initialization.
  22. * - Workarounds that whitelist a privileged register, so that UMDs can manage
  23. * them directly. This is just a special case of a MMMIO workaround (as we
  24. * write the list of these to/be-whitelisted registers to some special HW
  25. * registers).
  26. * - Workaround batchbuffers, that get executed automatically by the hardware
  27. * on every HW context restore.
  28. *
  29. * .. [1] Please notice that there are other WAs that, due to their nature,
  30. * cannot be applied from a central place. Those are peppered around the rest
  31. * of the code, as needed.
  32. *
  33. * .. [2] Technically, some registers are powercontext saved & restored, so they
  34. * survive a suspend/resume. In practice, writing them again is not too
  35. * costly and simplifies things. We can revisit this in the future.
  36. *
  37. * Layout
  38. * ''''''
  39. *
  40. * Keep things in this file ordered by WA type, as per the above (context, GT,
  41. * display, register whitelist, batchbuffer). Then, inside each type, keep the
  42. * following order:
  43. *
  44. * - Infrastructure functions and macros
  45. * - WAs per platform in standard gen/chrono order
  46. * - Public functions to init or apply the given workaround type.
  47. */
  48. static int wa_add(struct drm_i915_private *dev_priv,
  49. i915_reg_t addr,
  50. const u32 mask, const u32 val)
  51. {
  52. const unsigned int idx = dev_priv->workarounds.count;
  53. if (WARN_ON(idx >= I915_MAX_WA_REGS))
  54. return -ENOSPC;
  55. dev_priv->workarounds.reg[idx].addr = addr;
  56. dev_priv->workarounds.reg[idx].value = val;
  57. dev_priv->workarounds.reg[idx].mask = mask;
  58. dev_priv->workarounds.count++;
  59. return 0;
  60. }
  61. #define WA_REG(addr, mask, val) do { \
  62. const int r = wa_add(dev_priv, (addr), (mask), (val)); \
  63. if (r) \
  64. return r; \
  65. } while (0)
  66. #define WA_SET_BIT_MASKED(addr, mask) \
  67. WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
  68. #define WA_CLR_BIT_MASKED(addr, mask) \
  69. WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
  70. #define WA_SET_FIELD_MASKED(addr, mask, value) \
  71. WA_REG(addr, (mask), _MASKED_FIELD(mask, value))
  72. static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  73. {
  74. WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
  75. /* WaDisableAsyncFlipPerfMode:bdw,chv */
  76. WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
  77. /* WaDisablePartialInstShootdown:bdw,chv */
  78. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  79. PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
  80. /* Use Force Non-Coherent whenever executing a 3D context. This is a
  81. * workaround for for a possible hang in the unlikely event a TLB
  82. * invalidation occurs during a PSD flush.
  83. */
  84. /* WaForceEnableNonCoherent:bdw,chv */
  85. /* WaHdcDisableFetchWhenMasked:bdw,chv */
  86. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  87. HDC_DONOT_FETCH_MEM_WHEN_MASKED |
  88. HDC_FORCE_NON_COHERENT);
  89. /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
  90. * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
  91. * polygons in the same 8x4 pixel/sample area to be processed without
  92. * stalling waiting for the earlier ones to write to Hierarchical Z
  93. * buffer."
  94. *
  95. * This optimization is off by default for BDW and CHV; turn it on.
  96. */
  97. WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
  98. /* Wa4x4STCOptimizationDisable:bdw,chv */
  99. WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
  100. /*
  101. * BSpec recommends 8x4 when MSAA is used,
  102. * however in practice 16x4 seems fastest.
  103. *
  104. * Note that PS/WM thread counts depend on the WIZ hashing
  105. * disable bit, which we don't touch here, but it's good
  106. * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
  107. */
  108. WA_SET_FIELD_MASKED(GEN7_GT_MODE,
  109. GEN6_WIZ_HASHING_MASK,
  110. GEN6_WIZ_HASHING_16x4);
  111. return 0;
  112. }
  113. static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  114. {
  115. int ret;
  116. ret = gen8_ctx_workarounds_init(dev_priv);
  117. if (ret)
  118. return ret;
  119. /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
  120. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
  121. /* WaDisableDopClockGating:bdw
  122. *
  123. * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
  124. * to disable EUTC clock gating.
  125. */
  126. WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
  127. DOP_CLOCK_GATING_DISABLE);
  128. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  129. GEN8_SAMPLER_POWER_BYPASS_DIS);
  130. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  131. /* WaForceContextSaveRestoreNonCoherent:bdw */
  132. HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
  133. /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
  134. (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
  135. return 0;
  136. }
  137. static int chv_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  138. {
  139. int ret;
  140. ret = gen8_ctx_workarounds_init(dev_priv);
  141. if (ret)
  142. return ret;
  143. /* WaDisableThreadStallDopClockGating:chv */
  144. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
  145. /* Improve HiZ throughput on CHV. */
  146. WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
  147. return 0;
  148. }
  149. static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  150. {
  151. if (HAS_LLC(dev_priv)) {
  152. /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
  153. *
  154. * Must match Display Engine. See
  155. * WaCompressedResourceDisplayNewHashMode.
  156. */
  157. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  158. GEN9_PBE_COMPRESSED_HASH_SELECTION);
  159. WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
  160. GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
  161. }
  162. /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
  163. /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
  164. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  165. FLOW_CONTROL_ENABLE |
  166. PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
  167. /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
  168. if (!IS_COFFEELAKE(dev_priv))
  169. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  170. GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
  171. /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
  172. /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
  173. WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
  174. GEN9_ENABLE_YV12_BUGFIX |
  175. GEN9_ENABLE_GPGPU_PREEMPTION);
  176. /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
  177. /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
  178. WA_SET_BIT_MASKED(CACHE_MODE_1,
  179. GEN8_4x4_STC_OPTIMIZATION_DISABLE |
  180. GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
  181. /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
  182. WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
  183. GEN9_CCS_TLB_PREFETCH_ENABLE);
  184. /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
  185. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  186. HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
  187. HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
  188. /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
  189. * both tied to WaForceContextSaveRestoreNonCoherent
  190. * in some hsds for skl. We keep the tie for all gen9. The
  191. * documentation is a bit hazy and so we want to get common behaviour,
  192. * even though there is no clear evidence we would need both on kbl/bxt.
  193. * This area has been source of system hangs so we play it safe
  194. * and mimic the skl regardless of what bspec says.
  195. *
  196. * Use Force Non-Coherent whenever executing a 3D context. This
  197. * is a workaround for a possible hang in the unlikely event
  198. * a TLB invalidation occurs during a PSD flush.
  199. */
  200. /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
  201. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  202. HDC_FORCE_NON_COHERENT);
  203. /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
  204. if (IS_SKYLAKE(dev_priv) ||
  205. IS_KABYLAKE(dev_priv) ||
  206. IS_COFFEELAKE(dev_priv))
  207. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  208. GEN8_SAMPLER_POWER_BYPASS_DIS);
  209. /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
  210. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
  211. /*
  212. * Supporting preemption with fine-granularity requires changes in the
  213. * batch buffer programming. Since we can't break old userspace, we
  214. * need to set our default preemption level to safe value. Userspace is
  215. * still able to use more fine-grained preemption levels, since in
  216. * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
  217. * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
  218. * not real HW workarounds, but merely a way to start using preemption
  219. * while maintaining old contract with userspace.
  220. */
  221. /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
  222. WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
  223. /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
  224. WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
  225. GEN9_PREEMPT_GPGPU_LEVEL_MASK,
  226. GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
  227. /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
  228. if (IS_GEN9_LP(dev_priv))
  229. WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
  230. return 0;
  231. }
  232. static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv)
  233. {
  234. u8 vals[3] = { 0, 0, 0 };
  235. unsigned int i;
  236. for (i = 0; i < 3; i++) {
  237. u8 ss;
  238. /*
  239. * Only consider slices where one, and only one, subslice has 7
  240. * EUs
  241. */
  242. if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
  243. continue;
  244. /*
  245. * subslice_7eu[i] != 0 (because of the check above) and
  246. * ss_max == 4 (maximum number of subslices possible per slice)
  247. *
  248. * -> 0 <= ss <= 3;
  249. */
  250. ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
  251. vals[i] = 3 - ss;
  252. }
  253. if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
  254. return 0;
  255. /* Tune IZ hashing. See intel_device_info_runtime_init() */
  256. WA_SET_FIELD_MASKED(GEN7_GT_MODE,
  257. GEN9_IZ_HASHING_MASK(2) |
  258. GEN9_IZ_HASHING_MASK(1) |
  259. GEN9_IZ_HASHING_MASK(0),
  260. GEN9_IZ_HASHING(2, vals[2]) |
  261. GEN9_IZ_HASHING(1, vals[1]) |
  262. GEN9_IZ_HASHING(0, vals[0]));
  263. return 0;
  264. }
  265. static int skl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  266. {
  267. int ret;
  268. ret = gen9_ctx_workarounds_init(dev_priv);
  269. if (ret)
  270. return ret;
  271. return skl_tune_iz_hashing(dev_priv);
  272. }
  273. static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  274. {
  275. int ret;
  276. ret = gen9_ctx_workarounds_init(dev_priv);
  277. if (ret)
  278. return ret;
  279. /* WaDisableThreadStallDopClockGating:bxt */
  280. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  281. STALL_DOP_GATING_DISABLE);
  282. /* WaToEnableHwFixForPushConstHWBug:bxt */
  283. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  284. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  285. return 0;
  286. }
  287. static int kbl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  288. {
  289. int ret;
  290. ret = gen9_ctx_workarounds_init(dev_priv);
  291. if (ret)
  292. return ret;
  293. /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
  294. if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
  295. WA_SET_BIT_MASKED(HDC_CHICKEN0,
  296. HDC_FENCE_DEST_SLM_DISABLE);
  297. /* WaToEnableHwFixForPushConstHWBug:kbl */
  298. if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
  299. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  300. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  301. /* WaDisableSbeCacheDispatchPortSharing:kbl */
  302. WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
  303. GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
  304. return 0;
  305. }
  306. static int glk_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  307. {
  308. int ret;
  309. ret = gen9_ctx_workarounds_init(dev_priv);
  310. if (ret)
  311. return ret;
  312. /* WaToEnableHwFixForPushConstHWBug:glk */
  313. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  314. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  315. return 0;
  316. }
  317. static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  318. {
  319. int ret;
  320. ret = gen9_ctx_workarounds_init(dev_priv);
  321. if (ret)
  322. return ret;
  323. /* WaToEnableHwFixForPushConstHWBug:cfl */
  324. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  325. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  326. /* WaDisableSbeCacheDispatchPortSharing:cfl */
  327. WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
  328. GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
  329. return 0;
  330. }
  331. static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  332. {
  333. /* WaForceContextSaveRestoreNonCoherent:cnl */
  334. WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
  335. HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
  336. /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
  337. if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
  338. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
  339. /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
  340. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  341. GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  342. /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
  343. if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0))
  344. WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
  345. GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
  346. /* WaPushConstantDereferenceHoldDisable:cnl */
  347. WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
  348. /* FtrEnableFastAnisoL1BankingFix:cnl */
  349. WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
  350. /* WaDisable3DMidCmdPreemption:cnl */
  351. WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
  352. /* WaDisableGPGPUMidCmdPreemption:cnl */
  353. WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
  354. GEN9_PREEMPT_GPGPU_LEVEL_MASK,
  355. GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
  356. /* WaDisableEarlyEOT:cnl */
  357. WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
  358. return 0;
  359. }
  360. static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  361. {
  362. /* Wa_1604370585:icl (pre-prod)
  363. * Formerly known as WaPushConstantDereferenceHoldDisable
  364. */
  365. if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
  366. WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
  367. PUSH_CONSTANT_DEREF_DISABLE);
  368. /* WaForceEnableNonCoherent:icl
  369. * This is not the same workaround as in early Gen9 platforms, where
  370. * lacking this could cause system hangs, but coherency performance
  371. * overhead is high and only a few compute workloads really need it
  372. * (the register is whitelisted in hardware now, so UMDs can opt in
  373. * for coherency if they have a good reason).
  374. */
  375. WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
  376. return 0;
  377. }
  378. int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
  379. {
  380. int err = 0;
  381. dev_priv->workarounds.count = 0;
  382. if (INTEL_GEN(dev_priv) < 8)
  383. err = 0;
  384. else if (IS_BROADWELL(dev_priv))
  385. err = bdw_ctx_workarounds_init(dev_priv);
  386. else if (IS_CHERRYVIEW(dev_priv))
  387. err = chv_ctx_workarounds_init(dev_priv);
  388. else if (IS_SKYLAKE(dev_priv))
  389. err = skl_ctx_workarounds_init(dev_priv);
  390. else if (IS_BROXTON(dev_priv))
  391. err = bxt_ctx_workarounds_init(dev_priv);
  392. else if (IS_KABYLAKE(dev_priv))
  393. err = kbl_ctx_workarounds_init(dev_priv);
  394. else if (IS_GEMINILAKE(dev_priv))
  395. err = glk_ctx_workarounds_init(dev_priv);
  396. else if (IS_COFFEELAKE(dev_priv))
  397. err = cfl_ctx_workarounds_init(dev_priv);
  398. else if (IS_CANNONLAKE(dev_priv))
  399. err = cnl_ctx_workarounds_init(dev_priv);
  400. else if (IS_ICELAKE(dev_priv))
  401. err = icl_ctx_workarounds_init(dev_priv);
  402. else
  403. MISSING_CASE(INTEL_GEN(dev_priv));
  404. if (err)
  405. return err;
  406. DRM_DEBUG_DRIVER("Number of context specific w/a: %d\n",
  407. dev_priv->workarounds.count);
  408. return 0;
  409. }
  410. int intel_ctx_workarounds_emit(struct i915_request *rq)
  411. {
  412. struct i915_workarounds *w = &rq->i915->workarounds;
  413. u32 *cs;
  414. int ret, i;
  415. if (w->count == 0)
  416. return 0;
  417. ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
  418. if (ret)
  419. return ret;
  420. cs = intel_ring_begin(rq, (w->count * 2 + 2));
  421. if (IS_ERR(cs))
  422. return PTR_ERR(cs);
  423. *cs++ = MI_LOAD_REGISTER_IMM(w->count);
  424. for (i = 0; i < w->count; i++) {
  425. *cs++ = i915_mmio_reg_offset(w->reg[i].addr);
  426. *cs++ = w->reg[i].value;
  427. }
  428. *cs++ = MI_NOOP;
  429. intel_ring_advance(rq, cs);
  430. ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
  431. if (ret)
  432. return ret;
  433. return 0;
  434. }
  435. static void bdw_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  436. {
  437. }
  438. static void chv_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  439. {
  440. }
  441. static void gen9_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  442. {
  443. /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
  444. I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
  445. _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
  446. /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
  447. I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
  448. GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
  449. /* WaDisableKillLogic:bxt,skl,kbl */
  450. if (!IS_COFFEELAKE(dev_priv))
  451. I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
  452. ECOCHK_DIS_TLB);
  453. if (HAS_LLC(dev_priv)) {
  454. /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
  455. *
  456. * Must match Display Engine. See
  457. * WaCompressedResourceDisplayNewHashMode.
  458. */
  459. I915_WRITE(MMCD_MISC_CTRL,
  460. I915_READ(MMCD_MISC_CTRL) |
  461. MMCD_PCLA |
  462. MMCD_HOTSPOT_EN);
  463. }
  464. /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
  465. I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
  466. BDW_DISABLE_HDC_INVALIDATION);
  467. /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
  468. if (IS_GEN9_LP(dev_priv)) {
  469. u32 val = I915_READ(GEN8_L3SQCREG1);
  470. val &= ~L3_PRIO_CREDITS_MASK;
  471. val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
  472. I915_WRITE(GEN8_L3SQCREG1, val);
  473. }
  474. /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
  475. I915_WRITE(GEN8_L3SQCREG4,
  476. I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES);
  477. /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
  478. I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
  479. _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
  480. }
  481. static void skl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  482. {
  483. gen9_gt_workarounds_apply(dev_priv);
  484. /* WaEnableGapsTsvCreditFix:skl */
  485. I915_WRITE(GEN8_GARBCNTL,
  486. I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
  487. /* WaDisableGafsUnitClkGating:skl */
  488. I915_WRITE(GEN7_UCGCTL4,
  489. I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
  490. /* WaInPlaceDecompressionHang:skl */
  491. if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
  492. I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
  493. I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
  494. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  495. }
  496. static void bxt_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  497. {
  498. gen9_gt_workarounds_apply(dev_priv);
  499. /* WaDisablePooledEuLoadBalancingFix:bxt */
  500. I915_WRITE(FF_SLICE_CS_CHICKEN2,
  501. _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
  502. /* WaInPlaceDecompressionHang:bxt */
  503. I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
  504. I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
  505. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  506. }
  507. static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  508. {
  509. gen9_gt_workarounds_apply(dev_priv);
  510. /* WaEnableGapsTsvCreditFix:kbl */
  511. I915_WRITE(GEN8_GARBCNTL,
  512. I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
  513. /* WaDisableDynamicCreditSharing:kbl */
  514. if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
  515. I915_WRITE(GAMT_CHKN_BIT_REG,
  516. I915_READ(GAMT_CHKN_BIT_REG) |
  517. GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
  518. /* WaDisableGafsUnitClkGating:kbl */
  519. I915_WRITE(GEN7_UCGCTL4,
  520. I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
  521. /* WaInPlaceDecompressionHang:kbl */
  522. I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
  523. I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
  524. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  525. }
  526. static void glk_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  527. {
  528. gen9_gt_workarounds_apply(dev_priv);
  529. }
  530. static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  531. {
  532. gen9_gt_workarounds_apply(dev_priv);
  533. /* WaEnableGapsTsvCreditFix:cfl */
  534. I915_WRITE(GEN8_GARBCNTL,
  535. I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
  536. /* WaDisableGafsUnitClkGating:cfl */
  537. I915_WRITE(GEN7_UCGCTL4,
  538. I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
  539. /* WaInPlaceDecompressionHang:cfl */
  540. I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
  541. I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
  542. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  543. }
  544. static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  545. {
  546. /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
  547. if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
  548. I915_WRITE(GAMT_CHKN_BIT_REG,
  549. I915_READ(GAMT_CHKN_BIT_REG) |
  550. GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
  551. /* WaInPlaceDecompressionHang:cnl */
  552. I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
  553. I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
  554. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  555. /* WaEnablePreemptionGranularityControlByUMD:cnl */
  556. I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
  557. _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
  558. }
  559. static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  560. {
  561. /* This is not an Wa. Enable for better image quality */
  562. I915_WRITE(_3D_CHICKEN3,
  563. _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
  564. /* WaInPlaceDecompressionHang:icl */
  565. I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
  566. GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
  567. /* WaPipelineFlushCoherentLines:icl */
  568. I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
  569. GEN8_LQSC_FLUSH_COHERENT_LINES);
  570. /* Wa_1405543622:icl
  571. * Formerly known as WaGAPZPriorityScheme
  572. */
  573. I915_WRITE(GEN8_GARBCNTL, I915_READ(GEN8_GARBCNTL) |
  574. GEN11_ARBITRATION_PRIO_ORDER_MASK);
  575. /* Wa_1604223664:icl
  576. * Formerly known as WaL3BankAddressHashing
  577. */
  578. I915_WRITE(GEN8_GARBCNTL,
  579. (I915_READ(GEN8_GARBCNTL) & ~GEN11_HASH_CTRL_EXCL_MASK) |
  580. GEN11_HASH_CTRL_EXCL_BIT0);
  581. I915_WRITE(GEN11_GLBLINVL,
  582. (I915_READ(GEN11_GLBLINVL) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK) |
  583. GEN11_BANK_HASH_ADDR_EXCL_BIT0);
  584. /* WaModifyGamTlbPartitioning:icl */
  585. I915_WRITE(GEN11_GACB_PERF_CTRL,
  586. (I915_READ(GEN11_GACB_PERF_CTRL) & ~GEN11_HASH_CTRL_MASK) |
  587. GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
  588. /* Wa_1405733216:icl
  589. * Formerly known as WaDisableCleanEvicts
  590. */
  591. I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
  592. GEN11_LQSC_CLEAN_EVICT_DISABLE);
  593. /* Wa_1405766107:icl
  594. * Formerly known as WaCL2SFHalfMaxAlloc
  595. */
  596. I915_WRITE(GEN11_LSN_UNSLCVC, I915_READ(GEN11_LSN_UNSLCVC) |
  597. GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
  598. GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
  599. /* Wa_220166154:icl
  600. * Formerly known as WaDisCtxReload
  601. */
  602. I915_WRITE(GAMW_ECO_DEV_RW_IA_REG, I915_READ(GAMW_ECO_DEV_RW_IA_REG) |
  603. GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
  604. /* Wa_1405779004:icl (pre-prod) */
  605. if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
  606. I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE,
  607. I915_READ(SLICE_UNIT_LEVEL_CLKGATE) |
  608. MSCUNIT_CLKGATE_DIS);
  609. /* Wa_1406680159:icl */
  610. I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE,
  611. I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE) |
  612. GWUNIT_CLKGATE_DIS);
  613. /* Wa_1604302699:icl */
  614. I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER,
  615. I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER) |
  616. GEN11_I2M_WRITE_DISABLE);
  617. /* Wa_1406838659:icl (pre-prod) */
  618. if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
  619. I915_WRITE(INF_UNIT_LEVEL_CLKGATE,
  620. I915_READ(INF_UNIT_LEVEL_CLKGATE) |
  621. CGPSF_CLKGATE_DIS);
  622. /* WaForwardProgressSoftReset:icl */
  623. I915_WRITE(GEN10_SCRATCH_LNCF2,
  624. I915_READ(GEN10_SCRATCH_LNCF2) |
  625. PMFLUSHDONE_LNICRSDROP |
  626. PMFLUSH_GAPL3UNBLOCK |
  627. PMFLUSHDONE_LNEBLK);
  628. }
  629. void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
  630. {
  631. if (INTEL_GEN(dev_priv) < 8)
  632. return;
  633. else if (IS_BROADWELL(dev_priv))
  634. bdw_gt_workarounds_apply(dev_priv);
  635. else if (IS_CHERRYVIEW(dev_priv))
  636. chv_gt_workarounds_apply(dev_priv);
  637. else if (IS_SKYLAKE(dev_priv))
  638. skl_gt_workarounds_apply(dev_priv);
  639. else if (IS_BROXTON(dev_priv))
  640. bxt_gt_workarounds_apply(dev_priv);
  641. else if (IS_KABYLAKE(dev_priv))
  642. kbl_gt_workarounds_apply(dev_priv);
  643. else if (IS_GEMINILAKE(dev_priv))
  644. glk_gt_workarounds_apply(dev_priv);
  645. else if (IS_COFFEELAKE(dev_priv))
  646. cfl_gt_workarounds_apply(dev_priv);
  647. else if (IS_CANNONLAKE(dev_priv))
  648. cnl_gt_workarounds_apply(dev_priv);
  649. else if (IS_ICELAKE(dev_priv))
  650. icl_gt_workarounds_apply(dev_priv);
  651. else
  652. MISSING_CASE(INTEL_GEN(dev_priv));
  653. }
  654. struct whitelist {
  655. i915_reg_t reg[RING_MAX_NONPRIV_SLOTS];
  656. unsigned int count;
  657. u32 nopid;
  658. };
  659. static void whitelist_reg(struct whitelist *w, i915_reg_t reg)
  660. {
  661. if (GEM_WARN_ON(w->count >= RING_MAX_NONPRIV_SLOTS))
  662. return;
  663. w->reg[w->count++] = reg;
  664. }
  665. static void bdw_whitelist_build(struct whitelist *w)
  666. {
  667. }
  668. static void chv_whitelist_build(struct whitelist *w)
  669. {
  670. }
  671. static void gen9_whitelist_build(struct whitelist *w)
  672. {
  673. /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
  674. whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
  675. /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
  676. whitelist_reg(w, GEN8_CS_CHICKEN1);
  677. /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
  678. whitelist_reg(w, GEN8_HDC_CHICKEN1);
  679. }
  680. static void skl_whitelist_build(struct whitelist *w)
  681. {
  682. gen9_whitelist_build(w);
  683. /* WaDisableLSQCROPERFforOCL:skl */
  684. whitelist_reg(w, GEN8_L3SQCREG4);
  685. }
  686. static void bxt_whitelist_build(struct whitelist *w)
  687. {
  688. gen9_whitelist_build(w);
  689. }
  690. static void kbl_whitelist_build(struct whitelist *w)
  691. {
  692. gen9_whitelist_build(w);
  693. /* WaDisableLSQCROPERFforOCL:kbl */
  694. whitelist_reg(w, GEN8_L3SQCREG4);
  695. }
  696. static void glk_whitelist_build(struct whitelist *w)
  697. {
  698. gen9_whitelist_build(w);
  699. /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
  700. whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
  701. }
  702. static void cfl_whitelist_build(struct whitelist *w)
  703. {
  704. gen9_whitelist_build(w);
  705. }
  706. static void cnl_whitelist_build(struct whitelist *w)
  707. {
  708. /* WaEnablePreemptionGranularityControlByUMD:cnl */
  709. whitelist_reg(w, GEN8_CS_CHICKEN1);
  710. }
  711. static void icl_whitelist_build(struct whitelist *w)
  712. {
  713. }
  714. static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
  715. struct whitelist *w)
  716. {
  717. struct drm_i915_private *i915 = engine->i915;
  718. GEM_BUG_ON(engine->id != RCS);
  719. w->count = 0;
  720. w->nopid = i915_mmio_reg_offset(RING_NOPID(engine->mmio_base));
  721. if (INTEL_GEN(i915) < 8)
  722. return NULL;
  723. else if (IS_BROADWELL(i915))
  724. bdw_whitelist_build(w);
  725. else if (IS_CHERRYVIEW(i915))
  726. chv_whitelist_build(w);
  727. else if (IS_SKYLAKE(i915))
  728. skl_whitelist_build(w);
  729. else if (IS_BROXTON(i915))
  730. bxt_whitelist_build(w);
  731. else if (IS_KABYLAKE(i915))
  732. kbl_whitelist_build(w);
  733. else if (IS_GEMINILAKE(i915))
  734. glk_whitelist_build(w);
  735. else if (IS_COFFEELAKE(i915))
  736. cfl_whitelist_build(w);
  737. else if (IS_CANNONLAKE(i915))
  738. cnl_whitelist_build(w);
  739. else if (IS_ICELAKE(i915))
  740. icl_whitelist_build(w);
  741. else
  742. MISSING_CASE(INTEL_GEN(i915));
  743. return w;
  744. }
  745. static void whitelist_apply(struct intel_engine_cs *engine,
  746. const struct whitelist *w)
  747. {
  748. struct drm_i915_private *dev_priv = engine->i915;
  749. const u32 base = engine->mmio_base;
  750. unsigned int i;
  751. if (!w)
  752. return;
  753. intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
  754. for (i = 0; i < w->count; i++)
  755. I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i),
  756. i915_mmio_reg_offset(w->reg[i]));
  757. /* And clear the rest just in case of garbage */
  758. for (; i < RING_MAX_NONPRIV_SLOTS; i++)
  759. I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), w->nopid);
  760. intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
  761. }
  762. void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine)
  763. {
  764. struct whitelist w;
  765. whitelist_apply(engine, whitelist_build(engine, &w));
  766. }
  767. #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  768. #include "selftests/intel_workarounds.c"
  769. #endif