sdma_v2_4.c 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. * Authors: Alex Deucher
  23. */
  24. #include <linux/firmware.h>
  25. #include <drm/drmP.h>
  26. #include "amdgpu.h"
  27. #include "amdgpu_ucode.h"
  28. #include "amdgpu_trace.h"
  29. #include "vi.h"
  30. #include "vid.h"
  31. #include "oss/oss_2_4_d.h"
  32. #include "oss/oss_2_4_sh_mask.h"
  33. #include "gmc/gmc_7_1_d.h"
  34. #include "gmc/gmc_7_1_sh_mask.h"
  35. #include "gca/gfx_8_0_d.h"
  36. #include "gca/gfx_8_0_enum.h"
  37. #include "gca/gfx_8_0_sh_mask.h"
  38. #include "bif/bif_5_0_d.h"
  39. #include "bif/bif_5_0_sh_mask.h"
  40. #include "iceland_sdma_pkt_open.h"
  41. #include "ivsrcid/ivsrcid_vislands30.h"
  42. static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev);
  43. static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev);
  44. static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev);
  45. static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev);
  46. MODULE_FIRMWARE("amdgpu/topaz_sdma.bin");
  47. MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin");
  48. static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
  49. {
  50. SDMA0_REGISTER_OFFSET,
  51. SDMA1_REGISTER_OFFSET
  52. };
  53. static const u32 golden_settings_iceland_a11[] =
  54. {
  55. mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
  56. mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
  57. mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
  58. mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
  59. };
  60. static const u32 iceland_mgcg_cgcg_init[] =
  61. {
  62. mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
  63. mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
  64. };
  65. /*
  66. * sDMA - System DMA
  67. * Starting with CIK, the GPU has new asynchronous
  68. * DMA engines. These engines are used for compute
  69. * and gfx. There are two DMA engines (SDMA0, SDMA1)
  70. * and each one supports 1 ring buffer used for gfx
  71. * and 2 queues used for compute.
  72. *
  73. * The programming model is very similar to the CP
  74. * (ring buffer, IBs, etc.), but sDMA has it's own
  75. * packet format that is different from the PM4 format
  76. * used by the CP. sDMA supports copying data, writing
  77. * embedded data, solid fills, and a number of other
  78. * things. It also has support for tiling/detiling of
  79. * buffers.
  80. */
  81. static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev)
  82. {
  83. switch (adev->asic_type) {
  84. case CHIP_TOPAZ:
  85. amdgpu_device_program_register_sequence(adev,
  86. iceland_mgcg_cgcg_init,
  87. ARRAY_SIZE(iceland_mgcg_cgcg_init));
  88. amdgpu_device_program_register_sequence(adev,
  89. golden_settings_iceland_a11,
  90. ARRAY_SIZE(golden_settings_iceland_a11));
  91. break;
  92. default:
  93. break;
  94. }
  95. }
  96. static void sdma_v2_4_free_microcode(struct amdgpu_device *adev)
  97. {
  98. int i;
  99. for (i = 0; i < adev->sdma.num_instances; i++) {
  100. release_firmware(adev->sdma.instance[i].fw);
  101. adev->sdma.instance[i].fw = NULL;
  102. }
  103. }
  104. /**
  105. * sdma_v2_4_init_microcode - load ucode images from disk
  106. *
  107. * @adev: amdgpu_device pointer
  108. *
  109. * Use the firmware interface to load the ucode images into
  110. * the driver (not loaded into hw).
  111. * Returns 0 on success, error on failure.
  112. */
  113. static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
  114. {
  115. const char *chip_name;
  116. char fw_name[30];
  117. int err = 0, i;
  118. struct amdgpu_firmware_info *info = NULL;
  119. const struct common_firmware_header *header = NULL;
  120. const struct sdma_firmware_header_v1_0 *hdr;
  121. DRM_DEBUG("\n");
  122. switch (adev->asic_type) {
  123. case CHIP_TOPAZ:
  124. chip_name = "topaz";
  125. break;
  126. default: BUG();
  127. }
  128. for (i = 0; i < adev->sdma.num_instances; i++) {
  129. if (i == 0)
  130. snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
  131. else
  132. snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
  133. err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
  134. if (err)
  135. goto out;
  136. err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
  137. if (err)
  138. goto out;
  139. hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
  140. adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
  141. adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
  142. if (adev->sdma.instance[i].feature_version >= 20)
  143. adev->sdma.instance[i].burst_nop = true;
  144. if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
  145. info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
  146. info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
  147. info->fw = adev->sdma.instance[i].fw;
  148. header = (const struct common_firmware_header *)info->fw->data;
  149. adev->firmware.fw_size +=
  150. ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
  151. }
  152. }
  153. out:
  154. if (err) {
  155. pr_err("sdma_v2_4: Failed to load firmware \"%s\"\n", fw_name);
  156. for (i = 0; i < adev->sdma.num_instances; i++) {
  157. release_firmware(adev->sdma.instance[i].fw);
  158. adev->sdma.instance[i].fw = NULL;
  159. }
  160. }
  161. return err;
  162. }
  163. /**
  164. * sdma_v2_4_ring_get_rptr - get the current read pointer
  165. *
  166. * @ring: amdgpu ring pointer
  167. *
  168. * Get the current rptr from the hardware (VI+).
  169. */
  170. static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
  171. {
  172. /* XXX check if swapping is necessary on BE */
  173. return ring->adev->wb.wb[ring->rptr_offs] >> 2;
  174. }
  175. /**
  176. * sdma_v2_4_ring_get_wptr - get the current write pointer
  177. *
  178. * @ring: amdgpu ring pointer
  179. *
  180. * Get the current wptr from the hardware (VI+).
  181. */
  182. static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)
  183. {
  184. struct amdgpu_device *adev = ring->adev;
  185. u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;
  186. return wptr;
  187. }
  188. /**
  189. * sdma_v2_4_ring_set_wptr - commit the write pointer
  190. *
  191. * @ring: amdgpu ring pointer
  192. *
  193. * Write the wptr back to the hardware (VI+).
  194. */
  195. static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
  196. {
  197. struct amdgpu_device *adev = ring->adev;
  198. WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);
  199. }
  200. static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  201. {
  202. struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
  203. int i;
  204. for (i = 0; i < count; i++)
  205. if (sdma && sdma->burst_nop && (i == 0))
  206. amdgpu_ring_write(ring, ring->funcs->nop |
  207. SDMA_PKT_NOP_HEADER_COUNT(count - 1));
  208. else
  209. amdgpu_ring_write(ring, ring->funcs->nop);
  210. }
  211. /**
  212. * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
  213. *
  214. * @ring: amdgpu ring pointer
  215. * @ib: IB object to schedule
  216. *
  217. * Schedule an IB in the DMA ring (VI).
  218. */
  219. static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
  220. struct amdgpu_ib *ib,
  221. unsigned vmid, bool ctx_switch)
  222. {
  223. /* IB packet must end on a 8 DW boundary */
  224. sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
  225. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
  226. SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
  227. /* base must be 32 byte aligned */
  228. amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
  229. amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
  230. amdgpu_ring_write(ring, ib->length_dw);
  231. amdgpu_ring_write(ring, 0);
  232. amdgpu_ring_write(ring, 0);
  233. }
  234. /**
  235. * sdma_v2_4_hdp_flush_ring_emit - emit an hdp flush on the DMA ring
  236. *
  237. * @ring: amdgpu ring pointer
  238. *
  239. * Emit an hdp flush packet on the requested DMA ring.
  240. */
  241. static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
  242. {
  243. u32 ref_and_mask = 0;
  244. if (ring->me == 0)
  245. ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
  246. else
  247. ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
  248. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
  249. SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
  250. SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
  251. amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2);
  252. amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2);
  253. amdgpu_ring_write(ring, ref_and_mask); /* reference */
  254. amdgpu_ring_write(ring, ref_and_mask); /* mask */
  255. amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
  256. SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
  257. }
  258. /**
  259. * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring
  260. *
  261. * @ring: amdgpu ring pointer
  262. * @fence: amdgpu fence object
  263. *
  264. * Add a DMA fence packet to the ring to write
  265. * the fence seq number and DMA trap packet to generate
  266. * an interrupt if needed (VI).
  267. */
  268. static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
  269. unsigned flags)
  270. {
  271. bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
  272. /* write the fence */
  273. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
  274. amdgpu_ring_write(ring, lower_32_bits(addr));
  275. amdgpu_ring_write(ring, upper_32_bits(addr));
  276. amdgpu_ring_write(ring, lower_32_bits(seq));
  277. /* optionally write high bits as well */
  278. if (write64bit) {
  279. addr += 4;
  280. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
  281. amdgpu_ring_write(ring, lower_32_bits(addr));
  282. amdgpu_ring_write(ring, upper_32_bits(addr));
  283. amdgpu_ring_write(ring, upper_32_bits(seq));
  284. }
  285. /* generate an interrupt */
  286. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
  287. amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
  288. }
  289. /**
  290. * sdma_v2_4_gfx_stop - stop the gfx async dma engines
  291. *
  292. * @adev: amdgpu_device pointer
  293. *
  294. * Stop the gfx async dma ring buffers (VI).
  295. */
  296. static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
  297. {
  298. struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
  299. struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
  300. u32 rb_cntl, ib_cntl;
  301. int i;
  302. if ((adev->mman.buffer_funcs_ring == sdma0) ||
  303. (adev->mman.buffer_funcs_ring == sdma1))
  304. amdgpu_ttm_set_buffer_funcs_status(adev, false);
  305. for (i = 0; i < adev->sdma.num_instances; i++) {
  306. rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
  307. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
  308. WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
  309. ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
  310. ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
  311. WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
  312. }
  313. sdma0->ready = false;
  314. sdma1->ready = false;
  315. }
  316. /**
  317. * sdma_v2_4_rlc_stop - stop the compute async dma engines
  318. *
  319. * @adev: amdgpu_device pointer
  320. *
  321. * Stop the compute async dma queues (VI).
  322. */
  323. static void sdma_v2_4_rlc_stop(struct amdgpu_device *adev)
  324. {
  325. /* XXX todo */
  326. }
  327. /**
  328. * sdma_v2_4_enable - stop the async dma engines
  329. *
  330. * @adev: amdgpu_device pointer
  331. * @enable: enable/disable the DMA MEs.
  332. *
  333. * Halt or unhalt the async dma engines (VI).
  334. */
  335. static void sdma_v2_4_enable(struct amdgpu_device *adev, bool enable)
  336. {
  337. u32 f32_cntl;
  338. int i;
  339. if (!enable) {
  340. sdma_v2_4_gfx_stop(adev);
  341. sdma_v2_4_rlc_stop(adev);
  342. }
  343. for (i = 0; i < adev->sdma.num_instances; i++) {
  344. f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]);
  345. if (enable)
  346. f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0);
  347. else
  348. f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
  349. WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], f32_cntl);
  350. }
  351. }
  352. /**
  353. * sdma_v2_4_gfx_resume - setup and start the async dma engines
  354. *
  355. * @adev: amdgpu_device pointer
  356. *
  357. * Set up the gfx DMA ring buffers and enable them (VI).
  358. * Returns 0 for success, error for failure.
  359. */
  360. static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
  361. {
  362. struct amdgpu_ring *ring;
  363. u32 rb_cntl, ib_cntl;
  364. u32 rb_bufsz;
  365. u32 wb_offset;
  366. int i, j, r;
  367. for (i = 0; i < adev->sdma.num_instances; i++) {
  368. ring = &adev->sdma.instance[i].ring;
  369. wb_offset = (ring->rptr_offs * 4);
  370. mutex_lock(&adev->srbm_mutex);
  371. for (j = 0; j < 16; j++) {
  372. vi_srbm_select(adev, 0, 0, 0, j);
  373. /* SDMA GFX */
  374. WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0);
  375. WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0);
  376. }
  377. vi_srbm_select(adev, 0, 0, 0, 0);
  378. mutex_unlock(&adev->srbm_mutex);
  379. WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
  380. adev->gfx.config.gb_addr_config & 0x70);
  381. WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
  382. /* Set ring buffer size in dwords */
  383. rb_bufsz = order_base_2(ring->ring_size / 4);
  384. rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
  385. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
  386. #ifdef __BIG_ENDIAN
  387. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
  388. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
  389. RPTR_WRITEBACK_SWAP_ENABLE, 1);
  390. #endif
  391. WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
  392. /* Initialize the ring buffer's read and write pointers */
  393. WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
  394. WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
  395. WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
  396. WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
  397. /* set the wb address whether it's enabled or not */
  398. WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
  399. upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
  400. WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
  401. lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
  402. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
  403. WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
  404. WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
  405. ring->wptr = 0;
  406. WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
  407. /* enable DMA RB */
  408. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
  409. WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
  410. ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
  411. ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
  412. #ifdef __BIG_ENDIAN
  413. ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
  414. #endif
  415. /* enable DMA IBs */
  416. WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
  417. ring->ready = true;
  418. }
  419. sdma_v2_4_enable(adev, true);
  420. for (i = 0; i < adev->sdma.num_instances; i++) {
  421. ring = &adev->sdma.instance[i].ring;
  422. r = amdgpu_ring_test_ring(ring);
  423. if (r) {
  424. ring->ready = false;
  425. return r;
  426. }
  427. if (adev->mman.buffer_funcs_ring == ring)
  428. amdgpu_ttm_set_buffer_funcs_status(adev, true);
  429. }
  430. return 0;
  431. }
  432. /**
  433. * sdma_v2_4_rlc_resume - setup and start the async dma engines
  434. *
  435. * @adev: amdgpu_device pointer
  436. *
  437. * Set up the compute DMA queues and enable them (VI).
  438. * Returns 0 for success, error for failure.
  439. */
  440. static int sdma_v2_4_rlc_resume(struct amdgpu_device *adev)
  441. {
  442. /* XXX todo */
  443. return 0;
  444. }
  445. /**
  446. * sdma_v2_4_load_microcode - load the sDMA ME ucode
  447. *
  448. * @adev: amdgpu_device pointer
  449. *
  450. * Loads the sDMA0/1 ucode.
  451. * Returns 0 for success, -EINVAL if the ucode is not available.
  452. */
  453. static int sdma_v2_4_load_microcode(struct amdgpu_device *adev)
  454. {
  455. const struct sdma_firmware_header_v1_0 *hdr;
  456. const __le32 *fw_data;
  457. u32 fw_size;
  458. int i, j;
  459. /* halt the MEs */
  460. sdma_v2_4_enable(adev, false);
  461. for (i = 0; i < adev->sdma.num_instances; i++) {
  462. if (!adev->sdma.instance[i].fw)
  463. return -EINVAL;
  464. hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
  465. amdgpu_ucode_print_sdma_hdr(&hdr->header);
  466. fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
  467. fw_data = (const __le32 *)
  468. (adev->sdma.instance[i].fw->data +
  469. le32_to_cpu(hdr->header.ucode_array_offset_bytes));
  470. WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
  471. for (j = 0; j < fw_size; j++)
  472. WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++));
  473. WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version);
  474. }
  475. return 0;
  476. }
  477. /**
  478. * sdma_v2_4_start - setup and start the async dma engines
  479. *
  480. * @adev: amdgpu_device pointer
  481. *
  482. * Set up the DMA engines and enable them (VI).
  483. * Returns 0 for success, error for failure.
  484. */
  485. static int sdma_v2_4_start(struct amdgpu_device *adev)
  486. {
  487. int r;
  488. if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
  489. r = sdma_v2_4_load_microcode(adev);
  490. if (r)
  491. return r;
  492. }
  493. /* halt the engine before programing */
  494. sdma_v2_4_enable(adev, false);
  495. /* start the gfx rings and rlc compute queues */
  496. r = sdma_v2_4_gfx_resume(adev);
  497. if (r)
  498. return r;
  499. r = sdma_v2_4_rlc_resume(adev);
  500. if (r)
  501. return r;
  502. return 0;
  503. }
  504. /**
  505. * sdma_v2_4_ring_test_ring - simple async dma engine test
  506. *
  507. * @ring: amdgpu_ring structure holding ring information
  508. *
  509. * Test the DMA engine by writing using it to write an
  510. * value to memory. (VI).
  511. * Returns 0 for success, error for failure.
  512. */
  513. static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
  514. {
  515. struct amdgpu_device *adev = ring->adev;
  516. unsigned i;
  517. unsigned index;
  518. int r;
  519. u32 tmp;
  520. u64 gpu_addr;
  521. r = amdgpu_device_wb_get(adev, &index);
  522. if (r) {
  523. dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
  524. return r;
  525. }
  526. gpu_addr = adev->wb.gpu_addr + (index * 4);
  527. tmp = 0xCAFEDEAD;
  528. adev->wb.wb[index] = cpu_to_le32(tmp);
  529. r = amdgpu_ring_alloc(ring, 5);
  530. if (r) {
  531. DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
  532. amdgpu_device_wb_free(adev, index);
  533. return r;
  534. }
  535. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
  536. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
  537. amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
  538. amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
  539. amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
  540. amdgpu_ring_write(ring, 0xDEADBEEF);
  541. amdgpu_ring_commit(ring);
  542. for (i = 0; i < adev->usec_timeout; i++) {
  543. tmp = le32_to_cpu(adev->wb.wb[index]);
  544. if (tmp == 0xDEADBEEF)
  545. break;
  546. DRM_UDELAY(1);
  547. }
  548. if (i < adev->usec_timeout) {
  549. DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
  550. } else {
  551. DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
  552. ring->idx, tmp);
  553. r = -EINVAL;
  554. }
  555. amdgpu_device_wb_free(adev, index);
  556. return r;
  557. }
  558. /**
  559. * sdma_v2_4_ring_test_ib - test an IB on the DMA engine
  560. *
  561. * @ring: amdgpu_ring structure holding ring information
  562. *
  563. * Test a simple IB in the DMA ring (VI).
  564. * Returns 0 on success, error on failure.
  565. */
  566. static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
  567. {
  568. struct amdgpu_device *adev = ring->adev;
  569. struct amdgpu_ib ib;
  570. struct dma_fence *f = NULL;
  571. unsigned index;
  572. u32 tmp = 0;
  573. u64 gpu_addr;
  574. long r;
  575. r = amdgpu_device_wb_get(adev, &index);
  576. if (r) {
  577. dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
  578. return r;
  579. }
  580. gpu_addr = adev->wb.gpu_addr + (index * 4);
  581. tmp = 0xCAFEDEAD;
  582. adev->wb.wb[index] = cpu_to_le32(tmp);
  583. memset(&ib, 0, sizeof(ib));
  584. r = amdgpu_ib_get(adev, NULL, 256, &ib);
  585. if (r) {
  586. DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
  587. goto err0;
  588. }
  589. ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
  590. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
  591. ib.ptr[1] = lower_32_bits(gpu_addr);
  592. ib.ptr[2] = upper_32_bits(gpu_addr);
  593. ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1);
  594. ib.ptr[4] = 0xDEADBEEF;
  595. ib.ptr[5] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
  596. ib.ptr[6] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
  597. ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
  598. ib.length_dw = 8;
  599. r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
  600. if (r)
  601. goto err1;
  602. r = dma_fence_wait_timeout(f, false, timeout);
  603. if (r == 0) {
  604. DRM_ERROR("amdgpu: IB test timed out\n");
  605. r = -ETIMEDOUT;
  606. goto err1;
  607. } else if (r < 0) {
  608. DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
  609. goto err1;
  610. }
  611. tmp = le32_to_cpu(adev->wb.wb[index]);
  612. if (tmp == 0xDEADBEEF) {
  613. DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
  614. r = 0;
  615. } else {
  616. DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
  617. r = -EINVAL;
  618. }
  619. err1:
  620. amdgpu_ib_free(adev, &ib, NULL);
  621. dma_fence_put(f);
  622. err0:
  623. amdgpu_device_wb_free(adev, index);
  624. return r;
  625. }
  626. /**
  627. * sdma_v2_4_vm_copy_pte - update PTEs by copying them from the GART
  628. *
  629. * @ib: indirect buffer to fill with commands
  630. * @pe: addr of the page entry
  631. * @src: src addr to copy from
  632. * @count: number of page entries to update
  633. *
  634. * Update PTEs by copying them from the GART using sDMA (CIK).
  635. */
  636. static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
  637. uint64_t pe, uint64_t src,
  638. unsigned count)
  639. {
  640. unsigned bytes = count * 8;
  641. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
  642. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
  643. ib->ptr[ib->length_dw++] = bytes;
  644. ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
  645. ib->ptr[ib->length_dw++] = lower_32_bits(src);
  646. ib->ptr[ib->length_dw++] = upper_32_bits(src);
  647. ib->ptr[ib->length_dw++] = lower_32_bits(pe);
  648. ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  649. }
  650. /**
  651. * sdma_v2_4_vm_write_pte - update PTEs by writing them manually
  652. *
  653. * @ib: indirect buffer to fill with commands
  654. * @pe: addr of the page entry
  655. * @value: dst addr to write into pe
  656. * @count: number of page entries to update
  657. * @incr: increase next addr by incr bytes
  658. *
  659. * Update PTEs by writing them manually using sDMA (CIK).
  660. */
  661. static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
  662. uint64_t value, unsigned count,
  663. uint32_t incr)
  664. {
  665. unsigned ndw = count * 2;
  666. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
  667. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
  668. ib->ptr[ib->length_dw++] = pe;
  669. ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  670. ib->ptr[ib->length_dw++] = ndw;
  671. for (; ndw > 0; ndw -= 2) {
  672. ib->ptr[ib->length_dw++] = lower_32_bits(value);
  673. ib->ptr[ib->length_dw++] = upper_32_bits(value);
  674. value += incr;
  675. }
  676. }
  677. /**
  678. * sdma_v2_4_vm_set_pte_pde - update the page tables using sDMA
  679. *
  680. * @ib: indirect buffer to fill with commands
  681. * @pe: addr of the page entry
  682. * @addr: dst addr to write into pe
  683. * @count: number of page entries to update
  684. * @incr: increase next addr by incr bytes
  685. * @flags: access flags
  686. *
  687. * Update the page tables using sDMA (CIK).
  688. */
  689. static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
  690. uint64_t addr, unsigned count,
  691. uint32_t incr, uint64_t flags)
  692. {
  693. /* for physically contiguous pages (vram) */
  694. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
  695. ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
  696. ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  697. ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
  698. ib->ptr[ib->length_dw++] = upper_32_bits(flags);
  699. ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
  700. ib->ptr[ib->length_dw++] = upper_32_bits(addr);
  701. ib->ptr[ib->length_dw++] = incr; /* increment size */
  702. ib->ptr[ib->length_dw++] = 0;
  703. ib->ptr[ib->length_dw++] = count; /* number of entries */
  704. }
  705. /**
  706. * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw
  707. *
  708. * @ib: indirect buffer to fill with padding
  709. *
  710. */
  711. static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
  712. {
  713. struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
  714. u32 pad_count;
  715. int i;
  716. pad_count = (8 - (ib->length_dw & 0x7)) % 8;
  717. for (i = 0; i < pad_count; i++)
  718. if (sdma && sdma->burst_nop && (i == 0))
  719. ib->ptr[ib->length_dw++] =
  720. SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
  721. SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
  722. else
  723. ib->ptr[ib->length_dw++] =
  724. SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
  725. }
  726. /**
  727. * sdma_v2_4_ring_emit_pipeline_sync - sync the pipeline
  728. *
  729. * @ring: amdgpu_ring pointer
  730. *
  731. * Make sure all previous operations are completed (CIK).
  732. */
  733. static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
  734. {
  735. uint32_t seq = ring->fence_drv.sync_seq;
  736. uint64_t addr = ring->fence_drv.gpu_addr;
  737. /* wait for idle */
  738. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
  739. SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
  740. SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
  741. SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
  742. amdgpu_ring_write(ring, addr & 0xfffffffc);
  743. amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
  744. amdgpu_ring_write(ring, seq); /* reference */
  745. amdgpu_ring_write(ring, 0xffffffff); /* mask */
  746. amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
  747. SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
  748. }
  749. /**
  750. * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA
  751. *
  752. * @ring: amdgpu_ring pointer
  753. * @vm: amdgpu_vm pointer
  754. *
  755. * Update the page table base and flush the VM TLB
  756. * using sDMA (VI).
  757. */
  758. static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
  759. unsigned vmid, uint64_t pd_addr)
  760. {
  761. amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
  762. /* wait for flush */
  763. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
  764. SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
  765. SDMA_PKT_POLL_REGMEM_HEADER_FUNC(0)); /* always */
  766. amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
  767. amdgpu_ring_write(ring, 0);
  768. amdgpu_ring_write(ring, 0); /* reference */
  769. amdgpu_ring_write(ring, 0); /* mask */
  770. amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
  771. SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
  772. }
  773. static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring,
  774. uint32_t reg, uint32_t val)
  775. {
  776. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
  777. SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
  778. amdgpu_ring_write(ring, reg);
  779. amdgpu_ring_write(ring, val);
  780. }
  781. static int sdma_v2_4_early_init(void *handle)
  782. {
  783. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  784. adev->sdma.num_instances = SDMA_MAX_INSTANCE;
  785. sdma_v2_4_set_ring_funcs(adev);
  786. sdma_v2_4_set_buffer_funcs(adev);
  787. sdma_v2_4_set_vm_pte_funcs(adev);
  788. sdma_v2_4_set_irq_funcs(adev);
  789. return 0;
  790. }
  791. static int sdma_v2_4_sw_init(void *handle)
  792. {
  793. struct amdgpu_ring *ring;
  794. int r, i;
  795. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  796. /* SDMA trap event */
  797. r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
  798. &adev->sdma.trap_irq);
  799. if (r)
  800. return r;
  801. /* SDMA Privileged inst */
  802. r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241,
  803. &adev->sdma.illegal_inst_irq);
  804. if (r)
  805. return r;
  806. /* SDMA Privileged inst */
  807. r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
  808. &adev->sdma.illegal_inst_irq);
  809. if (r)
  810. return r;
  811. r = sdma_v2_4_init_microcode(adev);
  812. if (r) {
  813. DRM_ERROR("Failed to load sdma firmware!\n");
  814. return r;
  815. }
  816. for (i = 0; i < adev->sdma.num_instances; i++) {
  817. ring = &adev->sdma.instance[i].ring;
  818. ring->ring_obj = NULL;
  819. ring->use_doorbell = false;
  820. sprintf(ring->name, "sdma%d", i);
  821. r = amdgpu_ring_init(adev, ring, 1024,
  822. &adev->sdma.trap_irq,
  823. (i == 0) ?
  824. AMDGPU_SDMA_IRQ_TRAP0 :
  825. AMDGPU_SDMA_IRQ_TRAP1);
  826. if (r)
  827. return r;
  828. }
  829. return r;
  830. }
  831. static int sdma_v2_4_sw_fini(void *handle)
  832. {
  833. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  834. int i;
  835. for (i = 0; i < adev->sdma.num_instances; i++)
  836. amdgpu_ring_fini(&adev->sdma.instance[i].ring);
  837. sdma_v2_4_free_microcode(adev);
  838. return 0;
  839. }
  840. static int sdma_v2_4_hw_init(void *handle)
  841. {
  842. int r;
  843. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  844. sdma_v2_4_init_golden_registers(adev);
  845. r = sdma_v2_4_start(adev);
  846. if (r)
  847. return r;
  848. return r;
  849. }
  850. static int sdma_v2_4_hw_fini(void *handle)
  851. {
  852. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  853. sdma_v2_4_enable(adev, false);
  854. return 0;
  855. }
  856. static int sdma_v2_4_suspend(void *handle)
  857. {
  858. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  859. return sdma_v2_4_hw_fini(adev);
  860. }
  861. static int sdma_v2_4_resume(void *handle)
  862. {
  863. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  864. return sdma_v2_4_hw_init(adev);
  865. }
  866. static bool sdma_v2_4_is_idle(void *handle)
  867. {
  868. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  869. u32 tmp = RREG32(mmSRBM_STATUS2);
  870. if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
  871. SRBM_STATUS2__SDMA1_BUSY_MASK))
  872. return false;
  873. return true;
  874. }
  875. static int sdma_v2_4_wait_for_idle(void *handle)
  876. {
  877. unsigned i;
  878. u32 tmp;
  879. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  880. for (i = 0; i < adev->usec_timeout; i++) {
  881. tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
  882. SRBM_STATUS2__SDMA1_BUSY_MASK);
  883. if (!tmp)
  884. return 0;
  885. udelay(1);
  886. }
  887. return -ETIMEDOUT;
  888. }
  889. static int sdma_v2_4_soft_reset(void *handle)
  890. {
  891. u32 srbm_soft_reset = 0;
  892. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  893. u32 tmp = RREG32(mmSRBM_STATUS2);
  894. if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
  895. /* sdma0 */
  896. tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
  897. tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
  898. WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
  899. srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
  900. }
  901. if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) {
  902. /* sdma1 */
  903. tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
  904. tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
  905. WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
  906. srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
  907. }
  908. if (srbm_soft_reset) {
  909. tmp = RREG32(mmSRBM_SOFT_RESET);
  910. tmp |= srbm_soft_reset;
  911. dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
  912. WREG32(mmSRBM_SOFT_RESET, tmp);
  913. tmp = RREG32(mmSRBM_SOFT_RESET);
  914. udelay(50);
  915. tmp &= ~srbm_soft_reset;
  916. WREG32(mmSRBM_SOFT_RESET, tmp);
  917. tmp = RREG32(mmSRBM_SOFT_RESET);
  918. /* Wait a little for things to settle down */
  919. udelay(50);
  920. }
  921. return 0;
  922. }
  923. static int sdma_v2_4_set_trap_irq_state(struct amdgpu_device *adev,
  924. struct amdgpu_irq_src *src,
  925. unsigned type,
  926. enum amdgpu_interrupt_state state)
  927. {
  928. u32 sdma_cntl;
  929. switch (type) {
  930. case AMDGPU_SDMA_IRQ_TRAP0:
  931. switch (state) {
  932. case AMDGPU_IRQ_STATE_DISABLE:
  933. sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
  934. sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
  935. WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
  936. break;
  937. case AMDGPU_IRQ_STATE_ENABLE:
  938. sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
  939. sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
  940. WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
  941. break;
  942. default:
  943. break;
  944. }
  945. break;
  946. case AMDGPU_SDMA_IRQ_TRAP1:
  947. switch (state) {
  948. case AMDGPU_IRQ_STATE_DISABLE:
  949. sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
  950. sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
  951. WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
  952. break;
  953. case AMDGPU_IRQ_STATE_ENABLE:
  954. sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
  955. sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
  956. WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
  957. break;
  958. default:
  959. break;
  960. }
  961. break;
  962. default:
  963. break;
  964. }
  965. return 0;
  966. }
  967. static int sdma_v2_4_process_trap_irq(struct amdgpu_device *adev,
  968. struct amdgpu_irq_src *source,
  969. struct amdgpu_iv_entry *entry)
  970. {
  971. u8 instance_id, queue_id;
  972. instance_id = (entry->ring_id & 0x3) >> 0;
  973. queue_id = (entry->ring_id & 0xc) >> 2;
  974. DRM_DEBUG("IH: SDMA trap\n");
  975. switch (instance_id) {
  976. case 0:
  977. switch (queue_id) {
  978. case 0:
  979. amdgpu_fence_process(&adev->sdma.instance[0].ring);
  980. break;
  981. case 1:
  982. /* XXX compute */
  983. break;
  984. case 2:
  985. /* XXX compute */
  986. break;
  987. }
  988. break;
  989. case 1:
  990. switch (queue_id) {
  991. case 0:
  992. amdgpu_fence_process(&adev->sdma.instance[1].ring);
  993. break;
  994. case 1:
  995. /* XXX compute */
  996. break;
  997. case 2:
  998. /* XXX compute */
  999. break;
  1000. }
  1001. break;
  1002. }
  1003. return 0;
  1004. }
  1005. static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
  1006. struct amdgpu_irq_src *source,
  1007. struct amdgpu_iv_entry *entry)
  1008. {
  1009. DRM_ERROR("Illegal instruction in SDMA command stream\n");
  1010. schedule_work(&adev->reset_work);
  1011. return 0;
  1012. }
  1013. static int sdma_v2_4_set_clockgating_state(void *handle,
  1014. enum amd_clockgating_state state)
  1015. {
  1016. /* XXX handled via the smc on VI */
  1017. return 0;
  1018. }
  1019. static int sdma_v2_4_set_powergating_state(void *handle,
  1020. enum amd_powergating_state state)
  1021. {
  1022. return 0;
  1023. }
  1024. static const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
  1025. .name = "sdma_v2_4",
  1026. .early_init = sdma_v2_4_early_init,
  1027. .late_init = NULL,
  1028. .sw_init = sdma_v2_4_sw_init,
  1029. .sw_fini = sdma_v2_4_sw_fini,
  1030. .hw_init = sdma_v2_4_hw_init,
  1031. .hw_fini = sdma_v2_4_hw_fini,
  1032. .suspend = sdma_v2_4_suspend,
  1033. .resume = sdma_v2_4_resume,
  1034. .is_idle = sdma_v2_4_is_idle,
  1035. .wait_for_idle = sdma_v2_4_wait_for_idle,
  1036. .soft_reset = sdma_v2_4_soft_reset,
  1037. .set_clockgating_state = sdma_v2_4_set_clockgating_state,
  1038. .set_powergating_state = sdma_v2_4_set_powergating_state,
  1039. };
  1040. static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
  1041. .type = AMDGPU_RING_TYPE_SDMA,
  1042. .align_mask = 0xf,
  1043. .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
  1044. .support_64bit_ptrs = false,
  1045. .get_rptr = sdma_v2_4_ring_get_rptr,
  1046. .get_wptr = sdma_v2_4_ring_get_wptr,
  1047. .set_wptr = sdma_v2_4_ring_set_wptr,
  1048. .emit_frame_size =
  1049. 6 + /* sdma_v2_4_ring_emit_hdp_flush */
  1050. 3 + /* hdp invalidate */
  1051. 6 + /* sdma_v2_4_ring_emit_pipeline_sync */
  1052. VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v2_4_ring_emit_vm_flush */
  1053. 10 + 10 + 10, /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */
  1054. .emit_ib_size = 7 + 6, /* sdma_v2_4_ring_emit_ib */
  1055. .emit_ib = sdma_v2_4_ring_emit_ib,
  1056. .emit_fence = sdma_v2_4_ring_emit_fence,
  1057. .emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync,
  1058. .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
  1059. .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
  1060. .test_ring = sdma_v2_4_ring_test_ring,
  1061. .test_ib = sdma_v2_4_ring_test_ib,
  1062. .insert_nop = sdma_v2_4_ring_insert_nop,
  1063. .pad_ib = sdma_v2_4_ring_pad_ib,
  1064. .emit_wreg = sdma_v2_4_ring_emit_wreg,
  1065. };
  1066. static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
  1067. {
  1068. int i;
  1069. for (i = 0; i < adev->sdma.num_instances; i++) {
  1070. adev->sdma.instance[i].ring.funcs = &sdma_v2_4_ring_funcs;
  1071. adev->sdma.instance[i].ring.me = i;
  1072. }
  1073. }
  1074. static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = {
  1075. .set = sdma_v2_4_set_trap_irq_state,
  1076. .process = sdma_v2_4_process_trap_irq,
  1077. };
  1078. static const struct amdgpu_irq_src_funcs sdma_v2_4_illegal_inst_irq_funcs = {
  1079. .process = sdma_v2_4_process_illegal_inst_irq,
  1080. };
  1081. static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
  1082. {
  1083. adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
  1084. adev->sdma.trap_irq.funcs = &sdma_v2_4_trap_irq_funcs;
  1085. adev->sdma.illegal_inst_irq.funcs = &sdma_v2_4_illegal_inst_irq_funcs;
  1086. }
  1087. /**
  1088. * sdma_v2_4_emit_copy_buffer - copy buffer using the sDMA engine
  1089. *
  1090. * @ring: amdgpu_ring structure holding ring information
  1091. * @src_offset: src GPU address
  1092. * @dst_offset: dst GPU address
  1093. * @byte_count: number of bytes to xfer
  1094. *
  1095. * Copy GPU buffers using the DMA engine (VI).
  1096. * Used by the amdgpu ttm implementation to move pages if
  1097. * registered as the asic copy callback.
  1098. */
  1099. static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
  1100. uint64_t src_offset,
  1101. uint64_t dst_offset,
  1102. uint32_t byte_count)
  1103. {
  1104. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
  1105. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
  1106. ib->ptr[ib->length_dw++] = byte_count;
  1107. ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
  1108. ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
  1109. ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
  1110. ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
  1111. ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
  1112. }
  1113. /**
  1114. * sdma_v2_4_emit_fill_buffer - fill buffer using the sDMA engine
  1115. *
  1116. * @ring: amdgpu_ring structure holding ring information
  1117. * @src_data: value to write to buffer
  1118. * @dst_offset: dst GPU address
  1119. * @byte_count: number of bytes to xfer
  1120. *
  1121. * Fill GPU buffers using the DMA engine (VI).
  1122. */
  1123. static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ib *ib,
  1124. uint32_t src_data,
  1125. uint64_t dst_offset,
  1126. uint32_t byte_count)
  1127. {
  1128. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
  1129. ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
  1130. ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
  1131. ib->ptr[ib->length_dw++] = src_data;
  1132. ib->ptr[ib->length_dw++] = byte_count;
  1133. }
  1134. static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = {
  1135. .copy_max_bytes = 0x1fffff,
  1136. .copy_num_dw = 7,
  1137. .emit_copy_buffer = sdma_v2_4_emit_copy_buffer,
  1138. .fill_max_bytes = 0x1fffff,
  1139. .fill_num_dw = 7,
  1140. .emit_fill_buffer = sdma_v2_4_emit_fill_buffer,
  1141. };
  1142. static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
  1143. {
  1144. if (adev->mman.buffer_funcs == NULL) {
  1145. adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs;
  1146. adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
  1147. }
  1148. }
  1149. static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
  1150. .copy_pte_num_dw = 7,
  1151. .copy_pte = sdma_v2_4_vm_copy_pte,
  1152. .write_pte = sdma_v2_4_vm_write_pte,
  1153. .set_pte_pde = sdma_v2_4_vm_set_pte_pde,
  1154. };
  1155. static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
  1156. {
  1157. unsigned i;
  1158. if (adev->vm_manager.vm_pte_funcs == NULL) {
  1159. adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
  1160. for (i = 0; i < adev->sdma.num_instances; i++)
  1161. adev->vm_manager.vm_pte_rings[i] =
  1162. &adev->sdma.instance[i].ring;
  1163. adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
  1164. }
  1165. }
  1166. const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
  1167. {
  1168. .type = AMD_IP_BLOCK_TYPE_SDMA,
  1169. .major = 2,
  1170. .minor = 4,
  1171. .rev = 0,
  1172. .funcs = &sdma_v2_4_ip_funcs,
  1173. };