sdma_v4_0.c 60 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852
  1. /*
  2. * Copyright 2016 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. */
  23. #include <linux/firmware.h>
  24. #include <drm/drmP.h>
  25. #include "amdgpu.h"
  26. #include "amdgpu_ucode.h"
  27. #include "amdgpu_trace.h"
  28. #include "sdma0/sdma0_4_2_offset.h"
  29. #include "sdma0/sdma0_4_2_sh_mask.h"
  30. #include "sdma1/sdma1_4_2_offset.h"
  31. #include "sdma1/sdma1_4_2_sh_mask.h"
  32. #include "hdp/hdp_4_0_offset.h"
  33. #include "sdma0/sdma0_4_1_default.h"
  34. #include "soc15_common.h"
  35. #include "soc15.h"
  36. #include "vega10_sdma_pkt_open.h"
  37. #include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
  38. #include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
  39. MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
  40. MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
  41. MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
  42. MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
  43. MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
  44. MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
  45. MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
  46. MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");
  47. MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
  48. #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
  49. #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
  50. static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
  51. static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
  52. static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
  53. static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
  54. static const struct soc15_reg_golden golden_settings_sdma_4[] = {
  55. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
  56. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xff000ff0, 0x3f000100),
  57. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0100, 0x00000100),
  58. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  59. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL, 0x800f0100, 0x00000100),
  60. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
  61. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003ff006, 0x0003c000),
  62. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0100, 0x00000100),
  63. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
  64. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
  65. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
  66. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
  67. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000),
  68. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
  69. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
  70. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100),
  71. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
  72. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL, 0x800f0100, 0x00000100),
  73. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
  74. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_POWER_CNTL, 0x003ff000, 0x0003c000),
  75. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL, 0x800f0100, 0x00000100),
  76. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
  77. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
  78. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
  79. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
  80. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xfc000000, 0x00000000)
  81. };
  82. static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
  83. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
  84. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
  85. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
  86. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002)
  87. };
  88. static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
  89. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
  90. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
  91. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
  92. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001)
  93. };
  94. static const struct soc15_reg_golden golden_settings_sdma_4_1[] = {
  95. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
  96. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
  97. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100),
  98. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  99. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0xfc3fffff, 0x40000051),
  100. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100),
  101. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  102. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
  103. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  104. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
  105. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
  106. };
  107. static const struct soc15_reg_golden golden_settings_sdma0_4_2_init[] = {
  108. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
  109. };
  110. static const struct soc15_reg_golden golden_settings_sdma0_4_2[] =
  111. {
  112. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
  113. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
  114. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
  115. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
  116. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  117. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  118. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  119. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  120. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RD_BURST_CNTL, 0x0000000f, 0x00000003),
  121. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  122. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
  123. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  124. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  125. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  126. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  127. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  128. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  129. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  130. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  131. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  132. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  133. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  134. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  135. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  136. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  137. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
  138. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xFE000000, 0x00000000),
  139. };
  140. static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = {
  141. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
  142. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
  143. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
  144. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
  145. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  146. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  147. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  148. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  149. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RD_BURST_CNTL, 0x0000000f, 0x00000003),
  150. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  151. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
  152. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  153. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  154. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  155. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  156. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  157. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  158. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  159. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  160. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  161. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  162. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  163. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  164. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
  165. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
  166. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
  167. SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xFE000000, 0x00000000),
  168. };
  169. static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
  170. {
  171. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
  172. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002)
  173. };
  174. static const struct soc15_reg_golden golden_settings_sdma_rv2[] =
  175. {
  176. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00003001),
  177. SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001)
  178. };
  179. static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
  180. u32 instance, u32 offset)
  181. {
  182. return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) :
  183. (adev->reg_offset[SDMA1_HWIP][0][0] + offset));
  184. }
  185. static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
  186. {
  187. switch (adev->asic_type) {
  188. case CHIP_VEGA10:
  189. soc15_program_register_sequence(adev,
  190. golden_settings_sdma_4,
  191. ARRAY_SIZE(golden_settings_sdma_4));
  192. soc15_program_register_sequence(adev,
  193. golden_settings_sdma_vg10,
  194. ARRAY_SIZE(golden_settings_sdma_vg10));
  195. break;
  196. case CHIP_VEGA12:
  197. soc15_program_register_sequence(adev,
  198. golden_settings_sdma_4,
  199. ARRAY_SIZE(golden_settings_sdma_4));
  200. soc15_program_register_sequence(adev,
  201. golden_settings_sdma_vg12,
  202. ARRAY_SIZE(golden_settings_sdma_vg12));
  203. break;
  204. case CHIP_VEGA20:
  205. soc15_program_register_sequence(adev,
  206. golden_settings_sdma0_4_2_init,
  207. ARRAY_SIZE(golden_settings_sdma0_4_2_init));
  208. soc15_program_register_sequence(adev,
  209. golden_settings_sdma0_4_2,
  210. ARRAY_SIZE(golden_settings_sdma0_4_2));
  211. soc15_program_register_sequence(adev,
  212. golden_settings_sdma1_4_2,
  213. ARRAY_SIZE(golden_settings_sdma1_4_2));
  214. break;
  215. case CHIP_RAVEN:
  216. soc15_program_register_sequence(adev,
  217. golden_settings_sdma_4_1,
  218. ARRAY_SIZE(golden_settings_sdma_4_1));
  219. if (adev->rev_id >= 8)
  220. soc15_program_register_sequence(adev,
  221. golden_settings_sdma_rv2,
  222. ARRAY_SIZE(golden_settings_sdma_rv2));
  223. else
  224. soc15_program_register_sequence(adev,
  225. golden_settings_sdma_rv1,
  226. ARRAY_SIZE(golden_settings_sdma_rv1));
  227. break;
  228. default:
  229. break;
  230. }
  231. }
  232. /**
  233. * sdma_v4_0_init_microcode - load ucode images from disk
  234. *
  235. * @adev: amdgpu_device pointer
  236. *
  237. * Use the firmware interface to load the ucode images into
  238. * the driver (not loaded into hw).
  239. * Returns 0 on success, error on failure.
  240. */
  241. // emulation only, won't work on real chip
  242. // vega10 real chip need to use PSP to load firmware
  243. static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
  244. {
  245. const char *chip_name;
  246. char fw_name[30];
  247. int err = 0, i;
  248. struct amdgpu_firmware_info *info = NULL;
  249. const struct common_firmware_header *header = NULL;
  250. const struct sdma_firmware_header_v1_0 *hdr;
  251. DRM_DEBUG("\n");
  252. switch (adev->asic_type) {
  253. case CHIP_VEGA10:
  254. chip_name = "vega10";
  255. break;
  256. case CHIP_VEGA12:
  257. chip_name = "vega12";
  258. break;
  259. case CHIP_VEGA20:
  260. chip_name = "vega20";
  261. break;
  262. case CHIP_RAVEN:
  263. if (adev->rev_id >= 8)
  264. chip_name = "raven2";
  265. else if (adev->pdev->device == 0x15d8)
  266. chip_name = "picasso";
  267. else
  268. chip_name = "raven";
  269. break;
  270. default:
  271. BUG();
  272. }
  273. for (i = 0; i < adev->sdma.num_instances; i++) {
  274. if (i == 0)
  275. snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
  276. else
  277. snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
  278. err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
  279. if (err)
  280. goto out;
  281. err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
  282. if (err)
  283. goto out;
  284. hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
  285. adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
  286. adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
  287. if (adev->sdma.instance[i].feature_version >= 20)
  288. adev->sdma.instance[i].burst_nop = true;
  289. DRM_DEBUG("psp_load == '%s'\n",
  290. adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
  291. if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
  292. info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
  293. info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
  294. info->fw = adev->sdma.instance[i].fw;
  295. header = (const struct common_firmware_header *)info->fw->data;
  296. adev->firmware.fw_size +=
  297. ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
  298. }
  299. }
  300. out:
  301. if (err) {
  302. DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
  303. for (i = 0; i < adev->sdma.num_instances; i++) {
  304. release_firmware(adev->sdma.instance[i].fw);
  305. adev->sdma.instance[i].fw = NULL;
  306. }
  307. }
  308. return err;
  309. }
  310. /**
  311. * sdma_v4_0_ring_get_rptr - get the current read pointer
  312. *
  313. * @ring: amdgpu ring pointer
  314. *
  315. * Get the current rptr from the hardware (VEGA10+).
  316. */
  317. static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
  318. {
  319. u64 *rptr;
  320. /* XXX check if swapping is necessary on BE */
  321. rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
  322. DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
  323. return ((*rptr) >> 2);
  324. }
  325. /**
  326. * sdma_v4_0_ring_get_wptr - get the current write pointer
  327. *
  328. * @ring: amdgpu ring pointer
  329. *
  330. * Get the current wptr from the hardware (VEGA10+).
  331. */
  332. static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
  333. {
  334. struct amdgpu_device *adev = ring->adev;
  335. u64 wptr;
  336. if (ring->use_doorbell) {
  337. /* XXX check if swapping is necessary on BE */
  338. wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
  339. DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
  340. } else {
  341. u32 lowbit, highbit;
  342. lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
  343. highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
  344. DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
  345. ring->me, highbit, lowbit);
  346. wptr = highbit;
  347. wptr = wptr << 32;
  348. wptr |= lowbit;
  349. }
  350. return wptr >> 2;
  351. }
  352. /**
  353. * sdma_v4_0_ring_set_wptr - commit the write pointer
  354. *
  355. * @ring: amdgpu ring pointer
  356. *
  357. * Write the wptr back to the hardware (VEGA10+).
  358. */
  359. static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
  360. {
  361. struct amdgpu_device *adev = ring->adev;
  362. DRM_DEBUG("Setting write pointer\n");
  363. if (ring->use_doorbell) {
  364. u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
  365. DRM_DEBUG("Using doorbell -- "
  366. "wptr_offs == 0x%08x "
  367. "lower_32_bits(ring->wptr) << 2 == 0x%08x "
  368. "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
  369. ring->wptr_offs,
  370. lower_32_bits(ring->wptr << 2),
  371. upper_32_bits(ring->wptr << 2));
  372. /* XXX check if swapping is necessary on BE */
  373. WRITE_ONCE(*wb, (ring->wptr << 2));
  374. DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
  375. ring->doorbell_index, ring->wptr << 2);
  376. WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
  377. } else {
  378. DRM_DEBUG("Not using doorbell -- "
  379. "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
  380. "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
  381. ring->me,
  382. lower_32_bits(ring->wptr << 2),
  383. ring->me,
  384. upper_32_bits(ring->wptr << 2));
  385. WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
  386. WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
  387. }
  388. }
  389. static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  390. {
  391. struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
  392. int i;
  393. for (i = 0; i < count; i++)
  394. if (sdma && sdma->burst_nop && (i == 0))
  395. amdgpu_ring_write(ring, ring->funcs->nop |
  396. SDMA_PKT_NOP_HEADER_COUNT(count - 1));
  397. else
  398. amdgpu_ring_write(ring, ring->funcs->nop);
  399. }
  400. /**
  401. * sdma_v4_0_ring_emit_ib - Schedule an IB on the DMA engine
  402. *
  403. * @ring: amdgpu ring pointer
  404. * @ib: IB object to schedule
  405. *
  406. * Schedule an IB in the DMA ring (VEGA10).
  407. */
  408. static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
  409. struct amdgpu_ib *ib,
  410. unsigned vmid, bool ctx_switch)
  411. {
  412. /* IB packet must end on a 8 DW boundary */
  413. sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
  414. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
  415. SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
  416. /* base must be 32 byte aligned */
  417. amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
  418. amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
  419. amdgpu_ring_write(ring, ib->length_dw);
  420. amdgpu_ring_write(ring, 0);
  421. amdgpu_ring_write(ring, 0);
  422. }
  423. static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring,
  424. int mem_space, int hdp,
  425. uint32_t addr0, uint32_t addr1,
  426. uint32_t ref, uint32_t mask,
  427. uint32_t inv)
  428. {
  429. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
  430. SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
  431. SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
  432. SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
  433. if (mem_space) {
  434. /* memory */
  435. amdgpu_ring_write(ring, addr0);
  436. amdgpu_ring_write(ring, addr1);
  437. } else {
  438. /* registers */
  439. amdgpu_ring_write(ring, addr0 << 2);
  440. amdgpu_ring_write(ring, addr1 << 2);
  441. }
  442. amdgpu_ring_write(ring, ref); /* reference */
  443. amdgpu_ring_write(ring, mask); /* mask */
  444. amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
  445. SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
  446. }
  447. /**
  448. * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
  449. *
  450. * @ring: amdgpu ring pointer
  451. *
  452. * Emit an hdp flush packet on the requested DMA ring.
  453. */
  454. static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
  455. {
  456. struct amdgpu_device *adev = ring->adev;
  457. u32 ref_and_mask = 0;
  458. const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
  459. if (ring->me == 0)
  460. ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
  461. else
  462. ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
  463. sdma_v4_0_wait_reg_mem(ring, 0, 1,
  464. adev->nbio_funcs->get_hdp_flush_done_offset(adev),
  465. adev->nbio_funcs->get_hdp_flush_req_offset(adev),
  466. ref_and_mask, ref_and_mask, 10);
  467. }
  468. /**
  469. * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring
  470. *
  471. * @ring: amdgpu ring pointer
  472. * @fence: amdgpu fence object
  473. *
  474. * Add a DMA fence packet to the ring to write
  475. * the fence seq number and DMA trap packet to generate
  476. * an interrupt if needed (VEGA10).
  477. */
  478. static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
  479. unsigned flags)
  480. {
  481. bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
  482. /* write the fence */
  483. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
  484. /* zero in first two bits */
  485. BUG_ON(addr & 0x3);
  486. amdgpu_ring_write(ring, lower_32_bits(addr));
  487. amdgpu_ring_write(ring, upper_32_bits(addr));
  488. amdgpu_ring_write(ring, lower_32_bits(seq));
  489. /* optionally write high bits as well */
  490. if (write64bit) {
  491. addr += 4;
  492. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
  493. /* zero in first two bits */
  494. BUG_ON(addr & 0x3);
  495. amdgpu_ring_write(ring, lower_32_bits(addr));
  496. amdgpu_ring_write(ring, upper_32_bits(addr));
  497. amdgpu_ring_write(ring, upper_32_bits(seq));
  498. }
  499. /* generate an interrupt */
  500. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
  501. amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
  502. }
  503. /**
  504. * sdma_v4_0_gfx_stop - stop the gfx async dma engines
  505. *
  506. * @adev: amdgpu_device pointer
  507. *
  508. * Stop the gfx async dma ring buffers (VEGA10).
  509. */
  510. static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
  511. {
  512. struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
  513. struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
  514. u32 rb_cntl, ib_cntl;
  515. int i;
  516. if ((adev->mman.buffer_funcs_ring == sdma0) ||
  517. (adev->mman.buffer_funcs_ring == sdma1))
  518. amdgpu_ttm_set_buffer_funcs_status(adev, false);
  519. for (i = 0; i < adev->sdma.num_instances; i++) {
  520. rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
  521. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
  522. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
  523. ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
  524. ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
  525. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
  526. }
  527. sdma0->ready = false;
  528. sdma1->ready = false;
  529. }
  530. /**
  531. * sdma_v4_0_rlc_stop - stop the compute async dma engines
  532. *
  533. * @adev: amdgpu_device pointer
  534. *
  535. * Stop the compute async dma queues (VEGA10).
  536. */
  537. static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
  538. {
  539. /* XXX todo */
  540. }
  541. /**
  542. * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
  543. *
  544. * @adev: amdgpu_device pointer
  545. * @enable: enable/disable the DMA MEs context switch.
  546. *
  547. * Halt or unhalt the async dma engines context switch (VEGA10).
  548. */
  549. static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
  550. {
  551. u32 f32_cntl, phase_quantum = 0;
  552. int i;
  553. if (amdgpu_sdma_phase_quantum) {
  554. unsigned value = amdgpu_sdma_phase_quantum;
  555. unsigned unit = 0;
  556. while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
  557. SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
  558. value = (value + 1) >> 1;
  559. unit++;
  560. }
  561. if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
  562. SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
  563. value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
  564. SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
  565. unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
  566. SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
  567. WARN_ONCE(1,
  568. "clamping sdma_phase_quantum to %uK clock cycles\n",
  569. value << unit);
  570. }
  571. phase_quantum =
  572. value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
  573. unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
  574. }
  575. for (i = 0; i < adev->sdma.num_instances; i++) {
  576. f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
  577. f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
  578. AUTO_CTXSW_ENABLE, enable ? 1 : 0);
  579. if (enable && amdgpu_sdma_phase_quantum) {
  580. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
  581. phase_quantum);
  582. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
  583. phase_quantum);
  584. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
  585. phase_quantum);
  586. }
  587. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
  588. }
  589. }
  590. /**
  591. * sdma_v4_0_enable - stop the async dma engines
  592. *
  593. * @adev: amdgpu_device pointer
  594. * @enable: enable/disable the DMA MEs.
  595. *
  596. * Halt or unhalt the async dma engines (VEGA10).
  597. */
  598. static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
  599. {
  600. u32 f32_cntl;
  601. int i;
  602. if (enable == false) {
  603. sdma_v4_0_gfx_stop(adev);
  604. sdma_v4_0_rlc_stop(adev);
  605. }
  606. for (i = 0; i < adev->sdma.num_instances; i++) {
  607. f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
  608. f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
  609. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
  610. }
  611. }
  612. /**
  613. * sdma_v4_0_gfx_resume - setup and start the async dma engines
  614. *
  615. * @adev: amdgpu_device pointer
  616. *
  617. * Set up the gfx DMA ring buffers and enable them (VEGA10).
  618. * Returns 0 for success, error for failure.
  619. */
  620. static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
  621. {
  622. struct amdgpu_ring *ring;
  623. u32 rb_cntl, ib_cntl, wptr_poll_cntl;
  624. u32 rb_bufsz;
  625. u32 wb_offset;
  626. u32 doorbell;
  627. u32 doorbell_offset;
  628. u32 temp;
  629. u64 wptr_gpu_addr;
  630. int i, r;
  631. for (i = 0; i < adev->sdma.num_instances; i++) {
  632. ring = &adev->sdma.instance[i].ring;
  633. wb_offset = (ring->rptr_offs * 4);
  634. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
  635. /* Set ring buffer size in dwords */
  636. rb_bufsz = order_base_2(ring->ring_size / 4);
  637. rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
  638. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
  639. #ifdef __BIG_ENDIAN
  640. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
  641. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
  642. RPTR_WRITEBACK_SWAP_ENABLE, 1);
  643. #endif
  644. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
  645. /* Initialize the ring buffer's read and write pointers */
  646. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
  647. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
  648. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
  649. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
  650. /* set the wb address whether it's enabled or not */
  651. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
  652. upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
  653. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
  654. lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
  655. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
  656. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
  657. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
  658. ring->wptr = 0;
  659. /* before programing wptr to a less value, need set minor_ptr_update first */
  660. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
  661. if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
  662. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
  663. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
  664. }
  665. doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
  666. doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
  667. if (ring->use_doorbell) {
  668. doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
  669. doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
  670. OFFSET, ring->doorbell_index);
  671. } else {
  672. doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
  673. }
  674. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
  675. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
  676. adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
  677. ring->doorbell_index);
  678. if (amdgpu_sriov_vf(adev))
  679. sdma_v4_0_ring_set_wptr(ring);
  680. /* set minor_ptr_update to 0 after wptr programed */
  681. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
  682. /* set utc l1 enable flag always to 1 */
  683. temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
  684. temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
  685. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
  686. if (!amdgpu_sriov_vf(adev)) {
  687. /* unhalt engine */
  688. temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
  689. temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
  690. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
  691. }
  692. /* setup the wptr shadow polling */
  693. wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
  694. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
  695. lower_32_bits(wptr_gpu_addr));
  696. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
  697. upper_32_bits(wptr_gpu_addr));
  698. wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
  699. if (amdgpu_sriov_vf(adev))
  700. wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
  701. else
  702. wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
  703. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
  704. /* enable DMA RB */
  705. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
  706. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
  707. ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
  708. ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
  709. #ifdef __BIG_ENDIAN
  710. ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
  711. #endif
  712. /* enable DMA IBs */
  713. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
  714. ring->ready = true;
  715. if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
  716. sdma_v4_0_ctx_switch_enable(adev, true);
  717. sdma_v4_0_enable(adev, true);
  718. }
  719. r = amdgpu_ring_test_ring(ring);
  720. if (r) {
  721. ring->ready = false;
  722. return r;
  723. }
  724. if (adev->mman.buffer_funcs_ring == ring)
  725. amdgpu_ttm_set_buffer_funcs_status(adev, true);
  726. }
  727. return 0;
  728. }
  729. static void
  730. sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable)
  731. {
  732. uint32_t def, data;
  733. if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) {
  734. /* enable idle interrupt */
  735. def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
  736. data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
  737. if (data != def)
  738. WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
  739. } else {
  740. /* disable idle interrupt */
  741. def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
  742. data &= ~SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
  743. if (data != def)
  744. WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
  745. }
  746. }
  747. static void sdma_v4_1_init_power_gating(struct amdgpu_device *adev)
  748. {
  749. uint32_t def, data;
  750. /* Enable HW based PG. */
  751. def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
  752. data |= SDMA0_POWER_CNTL__PG_CNTL_ENABLE_MASK;
  753. if (data != def)
  754. WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
  755. /* enable interrupt */
  756. def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
  757. data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
  758. if (data != def)
  759. WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
  760. /* Configure hold time to filter in-valid power on/off request. Use default right now */
  761. def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
  762. data &= ~SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK;
  763. data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK);
  764. /* Configure switch time for hysteresis purpose. Use default right now */
  765. data &= ~SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK;
  766. data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK);
  767. if(data != def)
  768. WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
  769. }
  770. static void sdma_v4_0_init_pg(struct amdgpu_device *adev)
  771. {
  772. if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA))
  773. return;
  774. switch (adev->asic_type) {
  775. case CHIP_RAVEN:
  776. sdma_v4_1_init_power_gating(adev);
  777. sdma_v4_1_update_power_gating(adev, true);
  778. break;
  779. default:
  780. break;
  781. }
  782. }
  783. /**
  784. * sdma_v4_0_rlc_resume - setup and start the async dma engines
  785. *
  786. * @adev: amdgpu_device pointer
  787. *
  788. * Set up the compute DMA queues and enable them (VEGA10).
  789. * Returns 0 for success, error for failure.
  790. */
  791. static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev)
  792. {
  793. sdma_v4_0_init_pg(adev);
  794. return 0;
  795. }
  796. /**
  797. * sdma_v4_0_load_microcode - load the sDMA ME ucode
  798. *
  799. * @adev: amdgpu_device pointer
  800. *
  801. * Loads the sDMA0/1 ucode.
  802. * Returns 0 for success, -EINVAL if the ucode is not available.
  803. */
  804. static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
  805. {
  806. const struct sdma_firmware_header_v1_0 *hdr;
  807. const __le32 *fw_data;
  808. u32 fw_size;
  809. int i, j;
  810. /* halt the MEs */
  811. sdma_v4_0_enable(adev, false);
  812. for (i = 0; i < adev->sdma.num_instances; i++) {
  813. if (!adev->sdma.instance[i].fw)
  814. return -EINVAL;
  815. hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
  816. amdgpu_ucode_print_sdma_hdr(&hdr->header);
  817. fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
  818. fw_data = (const __le32 *)
  819. (adev->sdma.instance[i].fw->data +
  820. le32_to_cpu(hdr->header.ucode_array_offset_bytes));
  821. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
  822. for (j = 0; j < fw_size; j++)
  823. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
  824. WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
  825. }
  826. return 0;
  827. }
  828. /**
  829. * sdma_v4_0_start - setup and start the async dma engines
  830. *
  831. * @adev: amdgpu_device pointer
  832. *
  833. * Set up the DMA engines and enable them (VEGA10).
  834. * Returns 0 for success, error for failure.
  835. */
  836. static int sdma_v4_0_start(struct amdgpu_device *adev)
  837. {
  838. int r = 0;
  839. if (amdgpu_sriov_vf(adev)) {
  840. sdma_v4_0_ctx_switch_enable(adev, false);
  841. sdma_v4_0_enable(adev, false);
  842. /* set RB registers */
  843. r = sdma_v4_0_gfx_resume(adev);
  844. return r;
  845. }
  846. if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
  847. r = sdma_v4_0_load_microcode(adev);
  848. if (r)
  849. return r;
  850. }
  851. /* unhalt the MEs */
  852. sdma_v4_0_enable(adev, true);
  853. /* enable sdma ring preemption */
  854. sdma_v4_0_ctx_switch_enable(adev, true);
  855. /* start the gfx rings and rlc compute queues */
  856. r = sdma_v4_0_gfx_resume(adev);
  857. if (r)
  858. return r;
  859. r = sdma_v4_0_rlc_resume(adev);
  860. return r;
  861. }
  862. /**
  863. * sdma_v4_0_ring_test_ring - simple async dma engine test
  864. *
  865. * @ring: amdgpu_ring structure holding ring information
  866. *
  867. * Test the DMA engine by writing using it to write an
  868. * value to memory. (VEGA10).
  869. * Returns 0 for success, error for failure.
  870. */
  871. static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
  872. {
  873. struct amdgpu_device *adev = ring->adev;
  874. unsigned i;
  875. unsigned index;
  876. int r;
  877. u32 tmp;
  878. u64 gpu_addr;
  879. r = amdgpu_device_wb_get(adev, &index);
  880. if (r) {
  881. dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
  882. return r;
  883. }
  884. gpu_addr = adev->wb.gpu_addr + (index * 4);
  885. tmp = 0xCAFEDEAD;
  886. adev->wb.wb[index] = cpu_to_le32(tmp);
  887. r = amdgpu_ring_alloc(ring, 5);
  888. if (r) {
  889. DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
  890. amdgpu_device_wb_free(adev, index);
  891. return r;
  892. }
  893. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
  894. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
  895. amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
  896. amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
  897. amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
  898. amdgpu_ring_write(ring, 0xDEADBEEF);
  899. amdgpu_ring_commit(ring);
  900. for (i = 0; i < adev->usec_timeout; i++) {
  901. tmp = le32_to_cpu(adev->wb.wb[index]);
  902. if (tmp == 0xDEADBEEF)
  903. break;
  904. DRM_UDELAY(1);
  905. }
  906. if (i < adev->usec_timeout) {
  907. DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
  908. } else {
  909. DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
  910. ring->idx, tmp);
  911. r = -EINVAL;
  912. }
  913. amdgpu_device_wb_free(adev, index);
  914. return r;
  915. }
  916. /**
  917. * sdma_v4_0_ring_test_ib - test an IB on the DMA engine
  918. *
  919. * @ring: amdgpu_ring structure holding ring information
  920. *
  921. * Test a simple IB in the DMA ring (VEGA10).
  922. * Returns 0 on success, error on failure.
  923. */
  924. static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
  925. {
  926. struct amdgpu_device *adev = ring->adev;
  927. struct amdgpu_ib ib;
  928. struct dma_fence *f = NULL;
  929. unsigned index;
  930. long r;
  931. u32 tmp = 0;
  932. u64 gpu_addr;
  933. r = amdgpu_device_wb_get(adev, &index);
  934. if (r) {
  935. dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
  936. return r;
  937. }
  938. gpu_addr = adev->wb.gpu_addr + (index * 4);
  939. tmp = 0xCAFEDEAD;
  940. adev->wb.wb[index] = cpu_to_le32(tmp);
  941. memset(&ib, 0, sizeof(ib));
  942. r = amdgpu_ib_get(adev, NULL, 256, &ib);
  943. if (r) {
  944. DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
  945. goto err0;
  946. }
  947. ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
  948. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
  949. ib.ptr[1] = lower_32_bits(gpu_addr);
  950. ib.ptr[2] = upper_32_bits(gpu_addr);
  951. ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
  952. ib.ptr[4] = 0xDEADBEEF;
  953. ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
  954. ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
  955. ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
  956. ib.length_dw = 8;
  957. r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
  958. if (r)
  959. goto err1;
  960. r = dma_fence_wait_timeout(f, false, timeout);
  961. if (r == 0) {
  962. DRM_ERROR("amdgpu: IB test timed out\n");
  963. r = -ETIMEDOUT;
  964. goto err1;
  965. } else if (r < 0) {
  966. DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
  967. goto err1;
  968. }
  969. tmp = le32_to_cpu(adev->wb.wb[index]);
  970. if (tmp == 0xDEADBEEF) {
  971. DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
  972. r = 0;
  973. } else {
  974. DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
  975. r = -EINVAL;
  976. }
  977. err1:
  978. amdgpu_ib_free(adev, &ib, NULL);
  979. dma_fence_put(f);
  980. err0:
  981. amdgpu_device_wb_free(adev, index);
  982. return r;
  983. }
  984. /**
  985. * sdma_v4_0_vm_copy_pte - update PTEs by copying them from the GART
  986. *
  987. * @ib: indirect buffer to fill with commands
  988. * @pe: addr of the page entry
  989. * @src: src addr to copy from
  990. * @count: number of page entries to update
  991. *
  992. * Update PTEs by copying them from the GART using sDMA (VEGA10).
  993. */
  994. static void sdma_v4_0_vm_copy_pte(struct amdgpu_ib *ib,
  995. uint64_t pe, uint64_t src,
  996. unsigned count)
  997. {
  998. unsigned bytes = count * 8;
  999. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
  1000. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
  1001. ib->ptr[ib->length_dw++] = bytes - 1;
  1002. ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
  1003. ib->ptr[ib->length_dw++] = lower_32_bits(src);
  1004. ib->ptr[ib->length_dw++] = upper_32_bits(src);
  1005. ib->ptr[ib->length_dw++] = lower_32_bits(pe);
  1006. ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  1007. }
  1008. /**
  1009. * sdma_v4_0_vm_write_pte - update PTEs by writing them manually
  1010. *
  1011. * @ib: indirect buffer to fill with commands
  1012. * @pe: addr of the page entry
  1013. * @addr: dst addr to write into pe
  1014. * @count: number of page entries to update
  1015. * @incr: increase next addr by incr bytes
  1016. * @flags: access flags
  1017. *
  1018. * Update PTEs by writing them manually using sDMA (VEGA10).
  1019. */
  1020. static void sdma_v4_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
  1021. uint64_t value, unsigned count,
  1022. uint32_t incr)
  1023. {
  1024. unsigned ndw = count * 2;
  1025. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
  1026. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
  1027. ib->ptr[ib->length_dw++] = lower_32_bits(pe);
  1028. ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  1029. ib->ptr[ib->length_dw++] = ndw - 1;
  1030. for (; ndw > 0; ndw -= 2) {
  1031. ib->ptr[ib->length_dw++] = lower_32_bits(value);
  1032. ib->ptr[ib->length_dw++] = upper_32_bits(value);
  1033. value += incr;
  1034. }
  1035. }
  1036. /**
  1037. * sdma_v4_0_vm_set_pte_pde - update the page tables using sDMA
  1038. *
  1039. * @ib: indirect buffer to fill with commands
  1040. * @pe: addr of the page entry
  1041. * @addr: dst addr to write into pe
  1042. * @count: number of page entries to update
  1043. * @incr: increase next addr by incr bytes
  1044. * @flags: access flags
  1045. *
  1046. * Update the page tables using sDMA (VEGA10).
  1047. */
  1048. static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib,
  1049. uint64_t pe,
  1050. uint64_t addr, unsigned count,
  1051. uint32_t incr, uint64_t flags)
  1052. {
  1053. /* for physically contiguous pages (vram) */
  1054. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
  1055. ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
  1056. ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  1057. ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
  1058. ib->ptr[ib->length_dw++] = upper_32_bits(flags);
  1059. ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
  1060. ib->ptr[ib->length_dw++] = upper_32_bits(addr);
  1061. ib->ptr[ib->length_dw++] = incr; /* increment size */
  1062. ib->ptr[ib->length_dw++] = 0;
  1063. ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
  1064. }
  1065. /**
  1066. * sdma_v4_0_ring_pad_ib - pad the IB to the required number of dw
  1067. *
  1068. * @ib: indirect buffer to fill with padding
  1069. *
  1070. */
  1071. static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
  1072. {
  1073. struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
  1074. u32 pad_count;
  1075. int i;
  1076. pad_count = (8 - (ib->length_dw & 0x7)) % 8;
  1077. for (i = 0; i < pad_count; i++)
  1078. if (sdma && sdma->burst_nop && (i == 0))
  1079. ib->ptr[ib->length_dw++] =
  1080. SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
  1081. SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
  1082. else
  1083. ib->ptr[ib->length_dw++] =
  1084. SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
  1085. }
  1086. /**
  1087. * sdma_v4_0_ring_emit_pipeline_sync - sync the pipeline
  1088. *
  1089. * @ring: amdgpu_ring pointer
  1090. *
  1091. * Make sure all previous operations are completed (CIK).
  1092. */
  1093. static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
  1094. {
  1095. uint32_t seq = ring->fence_drv.sync_seq;
  1096. uint64_t addr = ring->fence_drv.gpu_addr;
  1097. /* wait for idle */
  1098. sdma_v4_0_wait_reg_mem(ring, 1, 0,
  1099. addr & 0xfffffffc,
  1100. upper_32_bits(addr) & 0xffffffff,
  1101. seq, 0xffffffff, 4);
  1102. }
  1103. /**
  1104. * sdma_v4_0_ring_emit_vm_flush - vm flush using sDMA
  1105. *
  1106. * @ring: amdgpu_ring pointer
  1107. * @vm: amdgpu_vm pointer
  1108. *
  1109. * Update the page table base and flush the VM TLB
  1110. * using sDMA (VEGA10).
  1111. */
  1112. static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
  1113. unsigned vmid, uint64_t pd_addr)
  1114. {
  1115. amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
  1116. }
  1117. static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
  1118. uint32_t reg, uint32_t val)
  1119. {
  1120. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
  1121. SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
  1122. amdgpu_ring_write(ring, reg);
  1123. amdgpu_ring_write(ring, val);
  1124. }
  1125. static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
  1126. uint32_t val, uint32_t mask)
  1127. {
  1128. sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
  1129. }
  1130. static int sdma_v4_0_early_init(void *handle)
  1131. {
  1132. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  1133. if (adev->asic_type == CHIP_RAVEN)
  1134. adev->sdma.num_instances = 1;
  1135. else
  1136. adev->sdma.num_instances = 2;
  1137. sdma_v4_0_set_ring_funcs(adev);
  1138. sdma_v4_0_set_buffer_funcs(adev);
  1139. sdma_v4_0_set_vm_pte_funcs(adev);
  1140. sdma_v4_0_set_irq_funcs(adev);
  1141. return 0;
  1142. }
  1143. static int sdma_v4_0_sw_init(void *handle)
  1144. {
  1145. struct amdgpu_ring *ring;
  1146. int r, i;
  1147. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  1148. /* SDMA trap event */
  1149. r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP,
  1150. &adev->sdma.trap_irq);
  1151. if (r)
  1152. return r;
  1153. /* SDMA trap event */
  1154. r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP,
  1155. &adev->sdma.trap_irq);
  1156. if (r)
  1157. return r;
  1158. r = sdma_v4_0_init_microcode(adev);
  1159. if (r) {
  1160. DRM_ERROR("Failed to load sdma firmware!\n");
  1161. return r;
  1162. }
  1163. for (i = 0; i < adev->sdma.num_instances; i++) {
  1164. ring = &adev->sdma.instance[i].ring;
  1165. ring->ring_obj = NULL;
  1166. ring->use_doorbell = true;
  1167. DRM_INFO("use_doorbell being set to: [%s]\n",
  1168. ring->use_doorbell?"true":"false");
  1169. if (adev->asic_type == CHIP_VEGA10)
  1170. ring->doorbell_index = (i == 0) ?
  1171. (AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset
  1172. : (AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset
  1173. else
  1174. ring->doorbell_index = (i == 0) ?
  1175. (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset
  1176. : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset
  1177. sprintf(ring->name, "sdma%d", i);
  1178. r = amdgpu_ring_init(adev, ring, 1024,
  1179. &adev->sdma.trap_irq,
  1180. (i == 0) ?
  1181. AMDGPU_SDMA_IRQ_TRAP0 :
  1182. AMDGPU_SDMA_IRQ_TRAP1);
  1183. if (r)
  1184. return r;
  1185. }
  1186. return r;
  1187. }
  1188. static int sdma_v4_0_sw_fini(void *handle)
  1189. {
  1190. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  1191. int i;
  1192. for (i = 0; i < adev->sdma.num_instances; i++)
  1193. amdgpu_ring_fini(&adev->sdma.instance[i].ring);
  1194. for (i = 0; i < adev->sdma.num_instances; i++) {
  1195. release_firmware(adev->sdma.instance[i].fw);
  1196. adev->sdma.instance[i].fw = NULL;
  1197. }
  1198. return 0;
  1199. }
  1200. static int sdma_v4_0_hw_init(void *handle)
  1201. {
  1202. int r;
  1203. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  1204. if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
  1205. adev->powerplay.pp_funcs->set_powergating_by_smu)
  1206. amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
  1207. sdma_v4_0_init_golden_registers(adev);
  1208. r = sdma_v4_0_start(adev);
  1209. return r;
  1210. }
  1211. static int sdma_v4_0_hw_fini(void *handle)
  1212. {
  1213. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  1214. if (amdgpu_sriov_vf(adev))
  1215. return 0;
  1216. sdma_v4_0_ctx_switch_enable(adev, false);
  1217. sdma_v4_0_enable(adev, false);
  1218. if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
  1219. && adev->powerplay.pp_funcs->set_powergating_by_smu)
  1220. amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
  1221. return 0;
  1222. }
  1223. static int sdma_v4_0_suspend(void *handle)
  1224. {
  1225. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  1226. return sdma_v4_0_hw_fini(adev);
  1227. }
  1228. static int sdma_v4_0_resume(void *handle)
  1229. {
  1230. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  1231. return sdma_v4_0_hw_init(adev);
  1232. }
  1233. static bool sdma_v4_0_is_idle(void *handle)
  1234. {
  1235. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  1236. u32 i;
  1237. for (i = 0; i < adev->sdma.num_instances; i++) {
  1238. u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
  1239. if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
  1240. return false;
  1241. }
  1242. return true;
  1243. }
  1244. static int sdma_v4_0_wait_for_idle(void *handle)
  1245. {
  1246. unsigned i;
  1247. u32 sdma0, sdma1;
  1248. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  1249. for (i = 0; i < adev->usec_timeout; i++) {
  1250. sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
  1251. sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
  1252. if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
  1253. return 0;
  1254. udelay(1);
  1255. }
  1256. return -ETIMEDOUT;
  1257. }
  1258. static int sdma_v4_0_soft_reset(void *handle)
  1259. {
  1260. /* todo */
  1261. return 0;
  1262. }
  1263. static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
  1264. struct amdgpu_irq_src *source,
  1265. unsigned type,
  1266. enum amdgpu_interrupt_state state)
  1267. {
  1268. u32 sdma_cntl;
  1269. u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ?
  1270. sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
  1271. sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
  1272. sdma_cntl = RREG32(reg_offset);
  1273. sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
  1274. state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
  1275. WREG32(reg_offset, sdma_cntl);
  1276. return 0;
  1277. }
  1278. static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
  1279. struct amdgpu_irq_src *source,
  1280. struct amdgpu_iv_entry *entry)
  1281. {
  1282. DRM_DEBUG("IH: SDMA trap\n");
  1283. switch (entry->client_id) {
  1284. case SOC15_IH_CLIENTID_SDMA0:
  1285. switch (entry->ring_id) {
  1286. case 0:
  1287. amdgpu_fence_process(&adev->sdma.instance[0].ring);
  1288. break;
  1289. case 1:
  1290. /* XXX compute */
  1291. break;
  1292. case 2:
  1293. /* XXX compute */
  1294. break;
  1295. case 3:
  1296. /* XXX page queue*/
  1297. break;
  1298. }
  1299. break;
  1300. case SOC15_IH_CLIENTID_SDMA1:
  1301. switch (entry->ring_id) {
  1302. case 0:
  1303. amdgpu_fence_process(&adev->sdma.instance[1].ring);
  1304. break;
  1305. case 1:
  1306. /* XXX compute */
  1307. break;
  1308. case 2:
  1309. /* XXX compute */
  1310. break;
  1311. case 3:
  1312. /* XXX page queue*/
  1313. break;
  1314. }
  1315. break;
  1316. }
  1317. return 0;
  1318. }
  1319. static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
  1320. struct amdgpu_irq_src *source,
  1321. struct amdgpu_iv_entry *entry)
  1322. {
  1323. DRM_ERROR("Illegal instruction in SDMA command stream\n");
  1324. schedule_work(&adev->reset_work);
  1325. return 0;
  1326. }
  1327. static void sdma_v4_0_update_medium_grain_clock_gating(
  1328. struct amdgpu_device *adev,
  1329. bool enable)
  1330. {
  1331. uint32_t data, def;
  1332. if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
  1333. /* enable sdma0 clock gating */
  1334. def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
  1335. data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
  1336. SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
  1337. SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
  1338. SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
  1339. SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
  1340. SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
  1341. SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
  1342. SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
  1343. if (def != data)
  1344. WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
  1345. if (adev->sdma.num_instances > 1) {
  1346. def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
  1347. data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
  1348. SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
  1349. SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
  1350. SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
  1351. SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
  1352. SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
  1353. SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
  1354. SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
  1355. if (def != data)
  1356. WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
  1357. }
  1358. } else {
  1359. /* disable sdma0 clock gating */
  1360. def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
  1361. data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
  1362. SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
  1363. SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
  1364. SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
  1365. SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
  1366. SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
  1367. SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
  1368. SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
  1369. if (def != data)
  1370. WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
  1371. if (adev->sdma.num_instances > 1) {
  1372. def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
  1373. data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
  1374. SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
  1375. SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
  1376. SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
  1377. SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
  1378. SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
  1379. SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
  1380. SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
  1381. if (def != data)
  1382. WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
  1383. }
  1384. }
  1385. }
  1386. static void sdma_v4_0_update_medium_grain_light_sleep(
  1387. struct amdgpu_device *adev,
  1388. bool enable)
  1389. {
  1390. uint32_t data, def;
  1391. if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
  1392. /* 1-not override: enable sdma0 mem light sleep */
  1393. def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
  1394. data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
  1395. if (def != data)
  1396. WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
  1397. /* 1-not override: enable sdma1 mem light sleep */
  1398. if (adev->sdma.num_instances > 1) {
  1399. def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
  1400. data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
  1401. if (def != data)
  1402. WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
  1403. }
  1404. } else {
  1405. /* 0-override:disable sdma0 mem light sleep */
  1406. def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
  1407. data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
  1408. if (def != data)
  1409. WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
  1410. /* 0-override:disable sdma1 mem light sleep */
  1411. if (adev->sdma.num_instances > 1) {
  1412. def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
  1413. data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
  1414. if (def != data)
  1415. WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
  1416. }
  1417. }
  1418. }
  1419. static int sdma_v4_0_set_clockgating_state(void *handle,
  1420. enum amd_clockgating_state state)
  1421. {
  1422. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  1423. if (amdgpu_sriov_vf(adev))
  1424. return 0;
  1425. switch (adev->asic_type) {
  1426. case CHIP_VEGA10:
  1427. case CHIP_VEGA12:
  1428. case CHIP_VEGA20:
  1429. case CHIP_RAVEN:
  1430. sdma_v4_0_update_medium_grain_clock_gating(adev,
  1431. state == AMD_CG_STATE_GATE ? true : false);
  1432. sdma_v4_0_update_medium_grain_light_sleep(adev,
  1433. state == AMD_CG_STATE_GATE ? true : false);
  1434. break;
  1435. default:
  1436. break;
  1437. }
  1438. return 0;
  1439. }
  1440. static int sdma_v4_0_set_powergating_state(void *handle,
  1441. enum amd_powergating_state state)
  1442. {
  1443. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  1444. switch (adev->asic_type) {
  1445. case CHIP_RAVEN:
  1446. sdma_v4_1_update_power_gating(adev,
  1447. state == AMD_PG_STATE_GATE ? true : false);
  1448. break;
  1449. default:
  1450. break;
  1451. }
  1452. return 0;
  1453. }
  1454. static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags)
  1455. {
  1456. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  1457. int data;
  1458. if (amdgpu_sriov_vf(adev))
  1459. *flags = 0;
  1460. /* AMD_CG_SUPPORT_SDMA_MGCG */
  1461. data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
  1462. if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK))
  1463. *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
  1464. /* AMD_CG_SUPPORT_SDMA_LS */
  1465. data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
  1466. if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
  1467. *flags |= AMD_CG_SUPPORT_SDMA_LS;
  1468. }
  1469. const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
  1470. .name = "sdma_v4_0",
  1471. .early_init = sdma_v4_0_early_init,
  1472. .late_init = NULL,
  1473. .sw_init = sdma_v4_0_sw_init,
  1474. .sw_fini = sdma_v4_0_sw_fini,
  1475. .hw_init = sdma_v4_0_hw_init,
  1476. .hw_fini = sdma_v4_0_hw_fini,
  1477. .suspend = sdma_v4_0_suspend,
  1478. .resume = sdma_v4_0_resume,
  1479. .is_idle = sdma_v4_0_is_idle,
  1480. .wait_for_idle = sdma_v4_0_wait_for_idle,
  1481. .soft_reset = sdma_v4_0_soft_reset,
  1482. .set_clockgating_state = sdma_v4_0_set_clockgating_state,
  1483. .set_powergating_state = sdma_v4_0_set_powergating_state,
  1484. .get_clockgating_state = sdma_v4_0_get_clockgating_state,
  1485. };
  1486. static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
  1487. .type = AMDGPU_RING_TYPE_SDMA,
  1488. .align_mask = 0xf,
  1489. .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
  1490. .support_64bit_ptrs = true,
  1491. .vmhub = AMDGPU_MMHUB,
  1492. .get_rptr = sdma_v4_0_ring_get_rptr,
  1493. .get_wptr = sdma_v4_0_ring_get_wptr,
  1494. .set_wptr = sdma_v4_0_ring_set_wptr,
  1495. .emit_frame_size =
  1496. 6 + /* sdma_v4_0_ring_emit_hdp_flush */
  1497. 3 + /* hdp invalidate */
  1498. 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
  1499. /* sdma_v4_0_ring_emit_vm_flush */
  1500. SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
  1501. SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
  1502. 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
  1503. .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
  1504. .emit_ib = sdma_v4_0_ring_emit_ib,
  1505. .emit_fence = sdma_v4_0_ring_emit_fence,
  1506. .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
  1507. .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
  1508. .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
  1509. .test_ring = sdma_v4_0_ring_test_ring,
  1510. .test_ib = sdma_v4_0_ring_test_ib,
  1511. .insert_nop = sdma_v4_0_ring_insert_nop,
  1512. .pad_ib = sdma_v4_0_ring_pad_ib,
  1513. .emit_wreg = sdma_v4_0_ring_emit_wreg,
  1514. .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
  1515. .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
  1516. };
  1517. static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
  1518. {
  1519. int i;
  1520. for (i = 0; i < adev->sdma.num_instances; i++) {
  1521. adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
  1522. adev->sdma.instance[i].ring.me = i;
  1523. }
  1524. }
  1525. static const struct amdgpu_irq_src_funcs sdma_v4_0_trap_irq_funcs = {
  1526. .set = sdma_v4_0_set_trap_irq_state,
  1527. .process = sdma_v4_0_process_trap_irq,
  1528. };
  1529. static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = {
  1530. .process = sdma_v4_0_process_illegal_inst_irq,
  1531. };
  1532. static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
  1533. {
  1534. adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
  1535. adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
  1536. adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
  1537. }
  1538. /**
  1539. * sdma_v4_0_emit_copy_buffer - copy buffer using the sDMA engine
  1540. *
  1541. * @ring: amdgpu_ring structure holding ring information
  1542. * @src_offset: src GPU address
  1543. * @dst_offset: dst GPU address
  1544. * @byte_count: number of bytes to xfer
  1545. *
  1546. * Copy GPU buffers using the DMA engine (VEGA10/12).
  1547. * Used by the amdgpu ttm implementation to move pages if
  1548. * registered as the asic copy callback.
  1549. */
  1550. static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
  1551. uint64_t src_offset,
  1552. uint64_t dst_offset,
  1553. uint32_t byte_count)
  1554. {
  1555. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
  1556. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
  1557. ib->ptr[ib->length_dw++] = byte_count - 1;
  1558. ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
  1559. ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
  1560. ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
  1561. ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
  1562. ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
  1563. }
  1564. /**
  1565. * sdma_v4_0_emit_fill_buffer - fill buffer using the sDMA engine
  1566. *
  1567. * @ring: amdgpu_ring structure holding ring information
  1568. * @src_data: value to write to buffer
  1569. * @dst_offset: dst GPU address
  1570. * @byte_count: number of bytes to xfer
  1571. *
  1572. * Fill GPU buffers using the DMA engine (VEGA10/12).
  1573. */
  1574. static void sdma_v4_0_emit_fill_buffer(struct amdgpu_ib *ib,
  1575. uint32_t src_data,
  1576. uint64_t dst_offset,
  1577. uint32_t byte_count)
  1578. {
  1579. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
  1580. ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
  1581. ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
  1582. ib->ptr[ib->length_dw++] = src_data;
  1583. ib->ptr[ib->length_dw++] = byte_count - 1;
  1584. }
  1585. static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = {
  1586. .copy_max_bytes = 0x400000,
  1587. .copy_num_dw = 7,
  1588. .emit_copy_buffer = sdma_v4_0_emit_copy_buffer,
  1589. .fill_max_bytes = 0x400000,
  1590. .fill_num_dw = 5,
  1591. .emit_fill_buffer = sdma_v4_0_emit_fill_buffer,
  1592. };
  1593. static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
  1594. {
  1595. adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
  1596. adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
  1597. }
  1598. static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
  1599. .copy_pte_num_dw = 7,
  1600. .copy_pte = sdma_v4_0_vm_copy_pte,
  1601. .write_pte = sdma_v4_0_vm_write_pte,
  1602. .set_pte_pde = sdma_v4_0_vm_set_pte_pde,
  1603. };
  1604. static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
  1605. {
  1606. struct drm_gpu_scheduler *sched;
  1607. unsigned i;
  1608. adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
  1609. for (i = 0; i < adev->sdma.num_instances; i++) {
  1610. sched = &adev->sdma.instance[i].ring.sched;
  1611. adev->vm_manager.vm_pte_rqs[i] =
  1612. &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
  1613. }
  1614. adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
  1615. }
  1616. const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
  1617. .type = AMD_IP_BLOCK_TYPE_SDMA,
  1618. .major = 4,
  1619. .minor = 0,
  1620. .rev = 0,
  1621. .funcs = &sdma_v4_0_ip_funcs,
  1622. };