sdma_v2_4.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. * Authors: Alex Deucher
  23. */
  24. #include <linux/firmware.h>
  25. #include <drm/drmP.h>
  26. #include "amdgpu.h"
  27. #include "amdgpu_ucode.h"
  28. #include "amdgpu_trace.h"
  29. #include "vi.h"
  30. #include "vid.h"
  31. #include "oss/oss_2_4_d.h"
  32. #include "oss/oss_2_4_sh_mask.h"
  33. #include "gmc/gmc_7_1_d.h"
  34. #include "gmc/gmc_7_1_sh_mask.h"
  35. #include "gca/gfx_8_0_d.h"
  36. #include "gca/gfx_8_0_enum.h"
  37. #include "gca/gfx_8_0_sh_mask.h"
  38. #include "bif/bif_5_0_d.h"
  39. #include "bif/bif_5_0_sh_mask.h"
  40. #include "iceland_sdma_pkt_open.h"
  41. #include "ivsrcid/ivsrcid_vislands30.h"
  42. static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev);
  43. static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev);
  44. static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev);
  45. static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev);
  46. MODULE_FIRMWARE("amdgpu/topaz_sdma.bin");
  47. MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin");
  48. static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
  49. {
  50. SDMA0_REGISTER_OFFSET,
  51. SDMA1_REGISTER_OFFSET
  52. };
  53. static const u32 golden_settings_iceland_a11[] =
  54. {
  55. mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
  56. mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
  57. mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
  58. mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
  59. };
  60. static const u32 iceland_mgcg_cgcg_init[] =
  61. {
  62. mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
  63. mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
  64. };
  65. /*
  66. * sDMA - System DMA
  67. * Starting with CIK, the GPU has new asynchronous
  68. * DMA engines. These engines are used for compute
  69. * and gfx. There are two DMA engines (SDMA0, SDMA1)
  70. * and each one supports 1 ring buffer used for gfx
  71. * and 2 queues used for compute.
  72. *
  73. * The programming model is very similar to the CP
  74. * (ring buffer, IBs, etc.), but sDMA has it's own
  75. * packet format that is different from the PM4 format
  76. * used by the CP. sDMA supports copying data, writing
  77. * embedded data, solid fills, and a number of other
  78. * things. It also has support for tiling/detiling of
  79. * buffers.
  80. */
  81. static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev)
  82. {
  83. switch (adev->asic_type) {
  84. case CHIP_TOPAZ:
  85. amdgpu_device_program_register_sequence(adev,
  86. iceland_mgcg_cgcg_init,
  87. ARRAY_SIZE(iceland_mgcg_cgcg_init));
  88. amdgpu_device_program_register_sequence(adev,
  89. golden_settings_iceland_a11,
  90. ARRAY_SIZE(golden_settings_iceland_a11));
  91. break;
  92. default:
  93. break;
  94. }
  95. }
  96. static void sdma_v2_4_free_microcode(struct amdgpu_device *adev)
  97. {
  98. int i;
  99. for (i = 0; i < adev->sdma.num_instances; i++) {
  100. release_firmware(adev->sdma.instance[i].fw);
  101. adev->sdma.instance[i].fw = NULL;
  102. }
  103. }
  104. /**
  105. * sdma_v2_4_init_microcode - load ucode images from disk
  106. *
  107. * @adev: amdgpu_device pointer
  108. *
  109. * Use the firmware interface to load the ucode images into
  110. * the driver (not loaded into hw).
  111. * Returns 0 on success, error on failure.
  112. */
  113. static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
  114. {
  115. const char *chip_name;
  116. char fw_name[30];
  117. int err = 0, i;
  118. struct amdgpu_firmware_info *info = NULL;
  119. const struct common_firmware_header *header = NULL;
  120. const struct sdma_firmware_header_v1_0 *hdr;
  121. DRM_DEBUG("\n");
  122. switch (adev->asic_type) {
  123. case CHIP_TOPAZ:
  124. chip_name = "topaz";
  125. break;
  126. default: BUG();
  127. }
  128. for (i = 0; i < adev->sdma.num_instances; i++) {
  129. if (i == 0)
  130. snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
  131. else
  132. snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
  133. err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
  134. if (err)
  135. goto out;
  136. err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
  137. if (err)
  138. goto out;
  139. hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
  140. adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
  141. adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
  142. if (adev->sdma.instance[i].feature_version >= 20)
  143. adev->sdma.instance[i].burst_nop = true;
  144. if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
  145. info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
  146. info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
  147. info->fw = adev->sdma.instance[i].fw;
  148. header = (const struct common_firmware_header *)info->fw->data;
  149. adev->firmware.fw_size +=
  150. ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
  151. }
  152. }
  153. out:
  154. if (err) {
  155. pr_err("sdma_v2_4: Failed to load firmware \"%s\"\n", fw_name);
  156. for (i = 0; i < adev->sdma.num_instances; i++) {
  157. release_firmware(adev->sdma.instance[i].fw);
  158. adev->sdma.instance[i].fw = NULL;
  159. }
  160. }
  161. return err;
  162. }
  163. /**
  164. * sdma_v2_4_ring_get_rptr - get the current read pointer
  165. *
  166. * @ring: amdgpu ring pointer
  167. *
  168. * Get the current rptr from the hardware (VI+).
  169. */
  170. static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
  171. {
  172. /* XXX check if swapping is necessary on BE */
  173. return ring->adev->wb.wb[ring->rptr_offs] >> 2;
  174. }
  175. /**
  176. * sdma_v2_4_ring_get_wptr - get the current write pointer
  177. *
  178. * @ring: amdgpu ring pointer
  179. *
  180. * Get the current wptr from the hardware (VI+).
  181. */
  182. static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)
  183. {
  184. struct amdgpu_device *adev = ring->adev;
  185. u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;
  186. return wptr;
  187. }
  188. /**
  189. * sdma_v2_4_ring_set_wptr - commit the write pointer
  190. *
  191. * @ring: amdgpu ring pointer
  192. *
  193. * Write the wptr back to the hardware (VI+).
  194. */
  195. static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
  196. {
  197. struct amdgpu_device *adev = ring->adev;
  198. WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);
  199. }
  200. static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  201. {
  202. struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
  203. int i;
  204. for (i = 0; i < count; i++)
  205. if (sdma && sdma->burst_nop && (i == 0))
  206. amdgpu_ring_write(ring, ring->funcs->nop |
  207. SDMA_PKT_NOP_HEADER_COUNT(count - 1));
  208. else
  209. amdgpu_ring_write(ring, ring->funcs->nop);
  210. }
  211. /**
  212. * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
  213. *
  214. * @ring: amdgpu ring pointer
  215. * @ib: IB object to schedule
  216. *
  217. * Schedule an IB in the DMA ring (VI).
  218. */
  219. static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
  220. struct amdgpu_ib *ib,
  221. unsigned vmid, bool ctx_switch)
  222. {
  223. /* IB packet must end on a 8 DW boundary */
  224. sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
  225. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
  226. SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
  227. /* base must be 32 byte aligned */
  228. amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
  229. amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
  230. amdgpu_ring_write(ring, ib->length_dw);
  231. amdgpu_ring_write(ring, 0);
  232. amdgpu_ring_write(ring, 0);
  233. }
  234. /**
  235. * sdma_v2_4_hdp_flush_ring_emit - emit an hdp flush on the DMA ring
  236. *
  237. * @ring: amdgpu ring pointer
  238. *
  239. * Emit an hdp flush packet on the requested DMA ring.
  240. */
  241. static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
  242. {
  243. u32 ref_and_mask = 0;
  244. if (ring->me == 0)
  245. ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
  246. else
  247. ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
  248. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
  249. SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
  250. SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
  251. amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2);
  252. amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2);
  253. amdgpu_ring_write(ring, ref_and_mask); /* reference */
  254. amdgpu_ring_write(ring, ref_and_mask); /* mask */
  255. amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
  256. SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
  257. }
  258. /**
  259. * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring
  260. *
  261. * @ring: amdgpu ring pointer
  262. * @fence: amdgpu fence object
  263. *
  264. * Add a DMA fence packet to the ring to write
  265. * the fence seq number and DMA trap packet to generate
  266. * an interrupt if needed (VI).
  267. */
  268. static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
  269. unsigned flags)
  270. {
  271. bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
  272. /* write the fence */
  273. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
  274. amdgpu_ring_write(ring, lower_32_bits(addr));
  275. amdgpu_ring_write(ring, upper_32_bits(addr));
  276. amdgpu_ring_write(ring, lower_32_bits(seq));
  277. /* optionally write high bits as well */
  278. if (write64bit) {
  279. addr += 4;
  280. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
  281. amdgpu_ring_write(ring, lower_32_bits(addr));
  282. amdgpu_ring_write(ring, upper_32_bits(addr));
  283. amdgpu_ring_write(ring, upper_32_bits(seq));
  284. }
  285. /* generate an interrupt */
  286. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
  287. amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
  288. }
  289. /**
  290. * sdma_v2_4_gfx_stop - stop the gfx async dma engines
  291. *
  292. * @adev: amdgpu_device pointer
  293. *
  294. * Stop the gfx async dma ring buffers (VI).
  295. */
  296. static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
  297. {
  298. struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
  299. struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
  300. u32 rb_cntl, ib_cntl;
  301. int i;
  302. if ((adev->mman.buffer_funcs_ring == sdma0) ||
  303. (adev->mman.buffer_funcs_ring == sdma1))
  304. amdgpu_ttm_set_buffer_funcs_status(adev, false);
  305. for (i = 0; i < adev->sdma.num_instances; i++) {
  306. rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
  307. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
  308. WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
  309. ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
  310. ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
  311. WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
  312. }
  313. sdma0->ready = false;
  314. sdma1->ready = false;
  315. }
  316. /**
  317. * sdma_v2_4_rlc_stop - stop the compute async dma engines
  318. *
  319. * @adev: amdgpu_device pointer
  320. *
  321. * Stop the compute async dma queues (VI).
  322. */
  323. static void sdma_v2_4_rlc_stop(struct amdgpu_device *adev)
  324. {
  325. /* XXX todo */
  326. }
  327. /**
  328. * sdma_v2_4_enable - stop the async dma engines
  329. *
  330. * @adev: amdgpu_device pointer
  331. * @enable: enable/disable the DMA MEs.
  332. *
  333. * Halt or unhalt the async dma engines (VI).
  334. */
  335. static void sdma_v2_4_enable(struct amdgpu_device *adev, bool enable)
  336. {
  337. u32 f32_cntl;
  338. int i;
  339. if (!enable) {
  340. sdma_v2_4_gfx_stop(adev);
  341. sdma_v2_4_rlc_stop(adev);
  342. }
  343. for (i = 0; i < adev->sdma.num_instances; i++) {
  344. f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]);
  345. if (enable)
  346. f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0);
  347. else
  348. f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
  349. WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], f32_cntl);
  350. }
  351. }
  352. /**
  353. * sdma_v2_4_gfx_resume - setup and start the async dma engines
  354. *
  355. * @adev: amdgpu_device pointer
  356. *
  357. * Set up the gfx DMA ring buffers and enable them (VI).
  358. * Returns 0 for success, error for failure.
  359. */
  360. static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
  361. {
  362. struct amdgpu_ring *ring;
  363. u32 rb_cntl, ib_cntl;
  364. u32 rb_bufsz;
  365. u32 wb_offset;
  366. int i, j, r;
  367. for (i = 0; i < adev->sdma.num_instances; i++) {
  368. ring = &adev->sdma.instance[i].ring;
  369. wb_offset = (ring->rptr_offs * 4);
  370. mutex_lock(&adev->srbm_mutex);
  371. for (j = 0; j < 16; j++) {
  372. vi_srbm_select(adev, 0, 0, 0, j);
  373. /* SDMA GFX */
  374. WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0);
  375. WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0);
  376. }
  377. vi_srbm_select(adev, 0, 0, 0, 0);
  378. mutex_unlock(&adev->srbm_mutex);
  379. WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
  380. adev->gfx.config.gb_addr_config & 0x70);
  381. WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
  382. /* Set ring buffer size in dwords */
  383. rb_bufsz = order_base_2(ring->ring_size / 4);
  384. rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
  385. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
  386. #ifdef __BIG_ENDIAN
  387. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
  388. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
  389. RPTR_WRITEBACK_SWAP_ENABLE, 1);
  390. #endif
  391. WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
  392. /* Initialize the ring buffer's read and write pointers */
  393. WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
  394. WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
  395. WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
  396. WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
  397. /* set the wb address whether it's enabled or not */
  398. WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
  399. upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
  400. WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
  401. lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
  402. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
  403. WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
  404. WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
  405. ring->wptr = 0;
  406. WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
  407. /* enable DMA RB */
  408. rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
  409. WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
  410. ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
  411. ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
  412. #ifdef __BIG_ENDIAN
  413. ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
  414. #endif
  415. /* enable DMA IBs */
  416. WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
  417. ring->ready = true;
  418. }
  419. sdma_v2_4_enable(adev, true);
  420. for (i = 0; i < adev->sdma.num_instances; i++) {
  421. ring = &adev->sdma.instance[i].ring;
  422. r = amdgpu_ring_test_ring(ring);
  423. if (r) {
  424. ring->ready = false;
  425. return r;
  426. }
  427. if (adev->mman.buffer_funcs_ring == ring)
  428. amdgpu_ttm_set_buffer_funcs_status(adev, true);
  429. }
  430. return 0;
  431. }
  432. /**
  433. * sdma_v2_4_rlc_resume - setup and start the async dma engines
  434. *
  435. * @adev: amdgpu_device pointer
  436. *
  437. * Set up the compute DMA queues and enable them (VI).
  438. * Returns 0 for success, error for failure.
  439. */
  440. static int sdma_v2_4_rlc_resume(struct amdgpu_device *adev)
  441. {
  442. /* XXX todo */
  443. return 0;
  444. }
  445. /**
  446. * sdma_v2_4_start - setup and start the async dma engines
  447. *
  448. * @adev: amdgpu_device pointer
  449. *
  450. * Set up the DMA engines and enable them (VI).
  451. * Returns 0 for success, error for failure.
  452. */
  453. static int sdma_v2_4_start(struct amdgpu_device *adev)
  454. {
  455. int r;
  456. /* halt the engine before programing */
  457. sdma_v2_4_enable(adev, false);
  458. /* start the gfx rings and rlc compute queues */
  459. r = sdma_v2_4_gfx_resume(adev);
  460. if (r)
  461. return r;
  462. r = sdma_v2_4_rlc_resume(adev);
  463. if (r)
  464. return r;
  465. return 0;
  466. }
  467. /**
  468. * sdma_v2_4_ring_test_ring - simple async dma engine test
  469. *
  470. * @ring: amdgpu_ring structure holding ring information
  471. *
  472. * Test the DMA engine by writing using it to write an
  473. * value to memory. (VI).
  474. * Returns 0 for success, error for failure.
  475. */
  476. static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
  477. {
  478. struct amdgpu_device *adev = ring->adev;
  479. unsigned i;
  480. unsigned index;
  481. int r;
  482. u32 tmp;
  483. u64 gpu_addr;
  484. r = amdgpu_device_wb_get(adev, &index);
  485. if (r) {
  486. dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
  487. return r;
  488. }
  489. gpu_addr = adev->wb.gpu_addr + (index * 4);
  490. tmp = 0xCAFEDEAD;
  491. adev->wb.wb[index] = cpu_to_le32(tmp);
  492. r = amdgpu_ring_alloc(ring, 5);
  493. if (r) {
  494. DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
  495. amdgpu_device_wb_free(adev, index);
  496. return r;
  497. }
  498. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
  499. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
  500. amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
  501. amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
  502. amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
  503. amdgpu_ring_write(ring, 0xDEADBEEF);
  504. amdgpu_ring_commit(ring);
  505. for (i = 0; i < adev->usec_timeout; i++) {
  506. tmp = le32_to_cpu(adev->wb.wb[index]);
  507. if (tmp == 0xDEADBEEF)
  508. break;
  509. DRM_UDELAY(1);
  510. }
  511. if (i < adev->usec_timeout) {
  512. DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
  513. } else {
  514. DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
  515. ring->idx, tmp);
  516. r = -EINVAL;
  517. }
  518. amdgpu_device_wb_free(adev, index);
  519. return r;
  520. }
  521. /**
  522. * sdma_v2_4_ring_test_ib - test an IB on the DMA engine
  523. *
  524. * @ring: amdgpu_ring structure holding ring information
  525. *
  526. * Test a simple IB in the DMA ring (VI).
  527. * Returns 0 on success, error on failure.
  528. */
  529. static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
  530. {
  531. struct amdgpu_device *adev = ring->adev;
  532. struct amdgpu_ib ib;
  533. struct dma_fence *f = NULL;
  534. unsigned index;
  535. u32 tmp = 0;
  536. u64 gpu_addr;
  537. long r;
  538. r = amdgpu_device_wb_get(adev, &index);
  539. if (r) {
  540. dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
  541. return r;
  542. }
  543. gpu_addr = adev->wb.gpu_addr + (index * 4);
  544. tmp = 0xCAFEDEAD;
  545. adev->wb.wb[index] = cpu_to_le32(tmp);
  546. memset(&ib, 0, sizeof(ib));
  547. r = amdgpu_ib_get(adev, NULL, 256, &ib);
  548. if (r) {
  549. DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
  550. goto err0;
  551. }
  552. ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
  553. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
  554. ib.ptr[1] = lower_32_bits(gpu_addr);
  555. ib.ptr[2] = upper_32_bits(gpu_addr);
  556. ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1);
  557. ib.ptr[4] = 0xDEADBEEF;
  558. ib.ptr[5] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
  559. ib.ptr[6] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
  560. ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
  561. ib.length_dw = 8;
  562. r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
  563. if (r)
  564. goto err1;
  565. r = dma_fence_wait_timeout(f, false, timeout);
  566. if (r == 0) {
  567. DRM_ERROR("amdgpu: IB test timed out\n");
  568. r = -ETIMEDOUT;
  569. goto err1;
  570. } else if (r < 0) {
  571. DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
  572. goto err1;
  573. }
  574. tmp = le32_to_cpu(adev->wb.wb[index]);
  575. if (tmp == 0xDEADBEEF) {
  576. DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
  577. r = 0;
  578. } else {
  579. DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
  580. r = -EINVAL;
  581. }
  582. err1:
  583. amdgpu_ib_free(adev, &ib, NULL);
  584. dma_fence_put(f);
  585. err0:
  586. amdgpu_device_wb_free(adev, index);
  587. return r;
  588. }
  589. /**
  590. * sdma_v2_4_vm_copy_pte - update PTEs by copying them from the GART
  591. *
  592. * @ib: indirect buffer to fill with commands
  593. * @pe: addr of the page entry
  594. * @src: src addr to copy from
  595. * @count: number of page entries to update
  596. *
  597. * Update PTEs by copying them from the GART using sDMA (CIK).
  598. */
  599. static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
  600. uint64_t pe, uint64_t src,
  601. unsigned count)
  602. {
  603. unsigned bytes = count * 8;
  604. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
  605. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
  606. ib->ptr[ib->length_dw++] = bytes;
  607. ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
  608. ib->ptr[ib->length_dw++] = lower_32_bits(src);
  609. ib->ptr[ib->length_dw++] = upper_32_bits(src);
  610. ib->ptr[ib->length_dw++] = lower_32_bits(pe);
  611. ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  612. }
  613. /**
  614. * sdma_v2_4_vm_write_pte - update PTEs by writing them manually
  615. *
  616. * @ib: indirect buffer to fill with commands
  617. * @pe: addr of the page entry
  618. * @value: dst addr to write into pe
  619. * @count: number of page entries to update
  620. * @incr: increase next addr by incr bytes
  621. *
  622. * Update PTEs by writing them manually using sDMA (CIK).
  623. */
  624. static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
  625. uint64_t value, unsigned count,
  626. uint32_t incr)
  627. {
  628. unsigned ndw = count * 2;
  629. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
  630. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
  631. ib->ptr[ib->length_dw++] = pe;
  632. ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  633. ib->ptr[ib->length_dw++] = ndw;
  634. for (; ndw > 0; ndw -= 2) {
  635. ib->ptr[ib->length_dw++] = lower_32_bits(value);
  636. ib->ptr[ib->length_dw++] = upper_32_bits(value);
  637. value += incr;
  638. }
  639. }
  640. /**
  641. * sdma_v2_4_vm_set_pte_pde - update the page tables using sDMA
  642. *
  643. * @ib: indirect buffer to fill with commands
  644. * @pe: addr of the page entry
  645. * @addr: dst addr to write into pe
  646. * @count: number of page entries to update
  647. * @incr: increase next addr by incr bytes
  648. * @flags: access flags
  649. *
  650. * Update the page tables using sDMA (CIK).
  651. */
  652. static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
  653. uint64_t addr, unsigned count,
  654. uint32_t incr, uint64_t flags)
  655. {
  656. /* for physically contiguous pages (vram) */
  657. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
  658. ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
  659. ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  660. ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
  661. ib->ptr[ib->length_dw++] = upper_32_bits(flags);
  662. ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
  663. ib->ptr[ib->length_dw++] = upper_32_bits(addr);
  664. ib->ptr[ib->length_dw++] = incr; /* increment size */
  665. ib->ptr[ib->length_dw++] = 0;
  666. ib->ptr[ib->length_dw++] = count; /* number of entries */
  667. }
  668. /**
  669. * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw
  670. *
  671. * @ib: indirect buffer to fill with padding
  672. *
  673. */
  674. static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
  675. {
  676. struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
  677. u32 pad_count;
  678. int i;
  679. pad_count = (8 - (ib->length_dw & 0x7)) % 8;
  680. for (i = 0; i < pad_count; i++)
  681. if (sdma && sdma->burst_nop && (i == 0))
  682. ib->ptr[ib->length_dw++] =
  683. SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
  684. SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
  685. else
  686. ib->ptr[ib->length_dw++] =
  687. SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
  688. }
  689. /**
  690. * sdma_v2_4_ring_emit_pipeline_sync - sync the pipeline
  691. *
  692. * @ring: amdgpu_ring pointer
  693. *
  694. * Make sure all previous operations are completed (CIK).
  695. */
  696. static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
  697. {
  698. uint32_t seq = ring->fence_drv.sync_seq;
  699. uint64_t addr = ring->fence_drv.gpu_addr;
  700. /* wait for idle */
  701. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
  702. SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
  703. SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
  704. SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
  705. amdgpu_ring_write(ring, addr & 0xfffffffc);
  706. amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
  707. amdgpu_ring_write(ring, seq); /* reference */
  708. amdgpu_ring_write(ring, 0xffffffff); /* mask */
  709. amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
  710. SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
  711. }
  712. /**
  713. * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA
  714. *
  715. * @ring: amdgpu_ring pointer
  716. * @vm: amdgpu_vm pointer
  717. *
  718. * Update the page table base and flush the VM TLB
  719. * using sDMA (VI).
  720. */
  721. static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
  722. unsigned vmid, uint64_t pd_addr)
  723. {
  724. amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
  725. /* wait for flush */
  726. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
  727. SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
  728. SDMA_PKT_POLL_REGMEM_HEADER_FUNC(0)); /* always */
  729. amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
  730. amdgpu_ring_write(ring, 0);
  731. amdgpu_ring_write(ring, 0); /* reference */
  732. amdgpu_ring_write(ring, 0); /* mask */
  733. amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
  734. SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
  735. }
  736. static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring,
  737. uint32_t reg, uint32_t val)
  738. {
  739. amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
  740. SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
  741. amdgpu_ring_write(ring, reg);
  742. amdgpu_ring_write(ring, val);
  743. }
  744. static int sdma_v2_4_early_init(void *handle)
  745. {
  746. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  747. adev->sdma.num_instances = SDMA_MAX_INSTANCE;
  748. sdma_v2_4_set_ring_funcs(adev);
  749. sdma_v2_4_set_buffer_funcs(adev);
  750. sdma_v2_4_set_vm_pte_funcs(adev);
  751. sdma_v2_4_set_irq_funcs(adev);
  752. return 0;
  753. }
  754. static int sdma_v2_4_sw_init(void *handle)
  755. {
  756. struct amdgpu_ring *ring;
  757. int r, i;
  758. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  759. /* SDMA trap event */
  760. r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
  761. &adev->sdma.trap_irq);
  762. if (r)
  763. return r;
  764. /* SDMA Privileged inst */
  765. r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
  766. &adev->sdma.illegal_inst_irq);
  767. if (r)
  768. return r;
  769. /* SDMA Privileged inst */
  770. r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
  771. &adev->sdma.illegal_inst_irq);
  772. if (r)
  773. return r;
  774. r = sdma_v2_4_init_microcode(adev);
  775. if (r) {
  776. DRM_ERROR("Failed to load sdma firmware!\n");
  777. return r;
  778. }
  779. for (i = 0; i < adev->sdma.num_instances; i++) {
  780. ring = &adev->sdma.instance[i].ring;
  781. ring->ring_obj = NULL;
  782. ring->use_doorbell = false;
  783. sprintf(ring->name, "sdma%d", i);
  784. r = amdgpu_ring_init(adev, ring, 1024,
  785. &adev->sdma.trap_irq,
  786. (i == 0) ?
  787. AMDGPU_SDMA_IRQ_TRAP0 :
  788. AMDGPU_SDMA_IRQ_TRAP1);
  789. if (r)
  790. return r;
  791. }
  792. return r;
  793. }
  794. static int sdma_v2_4_sw_fini(void *handle)
  795. {
  796. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  797. int i;
  798. for (i = 0; i < adev->sdma.num_instances; i++)
  799. amdgpu_ring_fini(&adev->sdma.instance[i].ring);
  800. sdma_v2_4_free_microcode(adev);
  801. return 0;
  802. }
  803. static int sdma_v2_4_hw_init(void *handle)
  804. {
  805. int r;
  806. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  807. sdma_v2_4_init_golden_registers(adev);
  808. r = sdma_v2_4_start(adev);
  809. if (r)
  810. return r;
  811. return r;
  812. }
  813. static int sdma_v2_4_hw_fini(void *handle)
  814. {
  815. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  816. sdma_v2_4_enable(adev, false);
  817. return 0;
  818. }
  819. static int sdma_v2_4_suspend(void *handle)
  820. {
  821. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  822. return sdma_v2_4_hw_fini(adev);
  823. }
  824. static int sdma_v2_4_resume(void *handle)
  825. {
  826. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  827. return sdma_v2_4_hw_init(adev);
  828. }
  829. static bool sdma_v2_4_is_idle(void *handle)
  830. {
  831. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  832. u32 tmp = RREG32(mmSRBM_STATUS2);
  833. if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
  834. SRBM_STATUS2__SDMA1_BUSY_MASK))
  835. return false;
  836. return true;
  837. }
  838. static int sdma_v2_4_wait_for_idle(void *handle)
  839. {
  840. unsigned i;
  841. u32 tmp;
  842. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  843. for (i = 0; i < adev->usec_timeout; i++) {
  844. tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
  845. SRBM_STATUS2__SDMA1_BUSY_MASK);
  846. if (!tmp)
  847. return 0;
  848. udelay(1);
  849. }
  850. return -ETIMEDOUT;
  851. }
  852. static int sdma_v2_4_soft_reset(void *handle)
  853. {
  854. u32 srbm_soft_reset = 0;
  855. struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  856. u32 tmp = RREG32(mmSRBM_STATUS2);
  857. if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
  858. /* sdma0 */
  859. tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
  860. tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
  861. WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
  862. srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
  863. }
  864. if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) {
  865. /* sdma1 */
  866. tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
  867. tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
  868. WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
  869. srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
  870. }
  871. if (srbm_soft_reset) {
  872. tmp = RREG32(mmSRBM_SOFT_RESET);
  873. tmp |= srbm_soft_reset;
  874. dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
  875. WREG32(mmSRBM_SOFT_RESET, tmp);
  876. tmp = RREG32(mmSRBM_SOFT_RESET);
  877. udelay(50);
  878. tmp &= ~srbm_soft_reset;
  879. WREG32(mmSRBM_SOFT_RESET, tmp);
  880. tmp = RREG32(mmSRBM_SOFT_RESET);
  881. /* Wait a little for things to settle down */
  882. udelay(50);
  883. }
  884. return 0;
  885. }
  886. static int sdma_v2_4_set_trap_irq_state(struct amdgpu_device *adev,
  887. struct amdgpu_irq_src *src,
  888. unsigned type,
  889. enum amdgpu_interrupt_state state)
  890. {
  891. u32 sdma_cntl;
  892. switch (type) {
  893. case AMDGPU_SDMA_IRQ_TRAP0:
  894. switch (state) {
  895. case AMDGPU_IRQ_STATE_DISABLE:
  896. sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
  897. sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
  898. WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
  899. break;
  900. case AMDGPU_IRQ_STATE_ENABLE:
  901. sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
  902. sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
  903. WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
  904. break;
  905. default:
  906. break;
  907. }
  908. break;
  909. case AMDGPU_SDMA_IRQ_TRAP1:
  910. switch (state) {
  911. case AMDGPU_IRQ_STATE_DISABLE:
  912. sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
  913. sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
  914. WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
  915. break;
  916. case AMDGPU_IRQ_STATE_ENABLE:
  917. sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
  918. sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
  919. WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
  920. break;
  921. default:
  922. break;
  923. }
  924. break;
  925. default:
  926. break;
  927. }
  928. return 0;
  929. }
  930. static int sdma_v2_4_process_trap_irq(struct amdgpu_device *adev,
  931. struct amdgpu_irq_src *source,
  932. struct amdgpu_iv_entry *entry)
  933. {
  934. u8 instance_id, queue_id;
  935. instance_id = (entry->ring_id & 0x3) >> 0;
  936. queue_id = (entry->ring_id & 0xc) >> 2;
  937. DRM_DEBUG("IH: SDMA trap\n");
  938. switch (instance_id) {
  939. case 0:
  940. switch (queue_id) {
  941. case 0:
  942. amdgpu_fence_process(&adev->sdma.instance[0].ring);
  943. break;
  944. case 1:
  945. /* XXX compute */
  946. break;
  947. case 2:
  948. /* XXX compute */
  949. break;
  950. }
  951. break;
  952. case 1:
  953. switch (queue_id) {
  954. case 0:
  955. amdgpu_fence_process(&adev->sdma.instance[1].ring);
  956. break;
  957. case 1:
  958. /* XXX compute */
  959. break;
  960. case 2:
  961. /* XXX compute */
  962. break;
  963. }
  964. break;
  965. }
  966. return 0;
  967. }
  968. static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
  969. struct amdgpu_irq_src *source,
  970. struct amdgpu_iv_entry *entry)
  971. {
  972. DRM_ERROR("Illegal instruction in SDMA command stream\n");
  973. schedule_work(&adev->reset_work);
  974. return 0;
  975. }
  976. static int sdma_v2_4_set_clockgating_state(void *handle,
  977. enum amd_clockgating_state state)
  978. {
  979. /* XXX handled via the smc on VI */
  980. return 0;
  981. }
  982. static int sdma_v2_4_set_powergating_state(void *handle,
  983. enum amd_powergating_state state)
  984. {
  985. return 0;
  986. }
  987. static const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
  988. .name = "sdma_v2_4",
  989. .early_init = sdma_v2_4_early_init,
  990. .late_init = NULL,
  991. .sw_init = sdma_v2_4_sw_init,
  992. .sw_fini = sdma_v2_4_sw_fini,
  993. .hw_init = sdma_v2_4_hw_init,
  994. .hw_fini = sdma_v2_4_hw_fini,
  995. .suspend = sdma_v2_4_suspend,
  996. .resume = sdma_v2_4_resume,
  997. .is_idle = sdma_v2_4_is_idle,
  998. .wait_for_idle = sdma_v2_4_wait_for_idle,
  999. .soft_reset = sdma_v2_4_soft_reset,
  1000. .set_clockgating_state = sdma_v2_4_set_clockgating_state,
  1001. .set_powergating_state = sdma_v2_4_set_powergating_state,
  1002. };
  1003. static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
  1004. .type = AMDGPU_RING_TYPE_SDMA,
  1005. .align_mask = 0xf,
  1006. .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
  1007. .support_64bit_ptrs = false,
  1008. .get_rptr = sdma_v2_4_ring_get_rptr,
  1009. .get_wptr = sdma_v2_4_ring_get_wptr,
  1010. .set_wptr = sdma_v2_4_ring_set_wptr,
  1011. .emit_frame_size =
  1012. 6 + /* sdma_v2_4_ring_emit_hdp_flush */
  1013. 3 + /* hdp invalidate */
  1014. 6 + /* sdma_v2_4_ring_emit_pipeline_sync */
  1015. VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v2_4_ring_emit_vm_flush */
  1016. 10 + 10 + 10, /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */
  1017. .emit_ib_size = 7 + 6, /* sdma_v2_4_ring_emit_ib */
  1018. .emit_ib = sdma_v2_4_ring_emit_ib,
  1019. .emit_fence = sdma_v2_4_ring_emit_fence,
  1020. .emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync,
  1021. .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
  1022. .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
  1023. .test_ring = sdma_v2_4_ring_test_ring,
  1024. .test_ib = sdma_v2_4_ring_test_ib,
  1025. .insert_nop = sdma_v2_4_ring_insert_nop,
  1026. .pad_ib = sdma_v2_4_ring_pad_ib,
  1027. .emit_wreg = sdma_v2_4_ring_emit_wreg,
  1028. };
  1029. static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
  1030. {
  1031. int i;
  1032. for (i = 0; i < adev->sdma.num_instances; i++) {
  1033. adev->sdma.instance[i].ring.funcs = &sdma_v2_4_ring_funcs;
  1034. adev->sdma.instance[i].ring.me = i;
  1035. }
  1036. }
  1037. static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = {
  1038. .set = sdma_v2_4_set_trap_irq_state,
  1039. .process = sdma_v2_4_process_trap_irq,
  1040. };
  1041. static const struct amdgpu_irq_src_funcs sdma_v2_4_illegal_inst_irq_funcs = {
  1042. .process = sdma_v2_4_process_illegal_inst_irq,
  1043. };
  1044. static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
  1045. {
  1046. adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
  1047. adev->sdma.trap_irq.funcs = &sdma_v2_4_trap_irq_funcs;
  1048. adev->sdma.illegal_inst_irq.funcs = &sdma_v2_4_illegal_inst_irq_funcs;
  1049. }
  1050. /**
  1051. * sdma_v2_4_emit_copy_buffer - copy buffer using the sDMA engine
  1052. *
  1053. * @ring: amdgpu_ring structure holding ring information
  1054. * @src_offset: src GPU address
  1055. * @dst_offset: dst GPU address
  1056. * @byte_count: number of bytes to xfer
  1057. *
  1058. * Copy GPU buffers using the DMA engine (VI).
  1059. * Used by the amdgpu ttm implementation to move pages if
  1060. * registered as the asic copy callback.
  1061. */
  1062. static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
  1063. uint64_t src_offset,
  1064. uint64_t dst_offset,
  1065. uint32_t byte_count)
  1066. {
  1067. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
  1068. SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
  1069. ib->ptr[ib->length_dw++] = byte_count;
  1070. ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
  1071. ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
  1072. ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
  1073. ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
  1074. ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
  1075. }
  1076. /**
  1077. * sdma_v2_4_emit_fill_buffer - fill buffer using the sDMA engine
  1078. *
  1079. * @ring: amdgpu_ring structure holding ring information
  1080. * @src_data: value to write to buffer
  1081. * @dst_offset: dst GPU address
  1082. * @byte_count: number of bytes to xfer
  1083. *
  1084. * Fill GPU buffers using the DMA engine (VI).
  1085. */
  1086. static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ib *ib,
  1087. uint32_t src_data,
  1088. uint64_t dst_offset,
  1089. uint32_t byte_count)
  1090. {
  1091. ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
  1092. ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
  1093. ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
  1094. ib->ptr[ib->length_dw++] = src_data;
  1095. ib->ptr[ib->length_dw++] = byte_count;
  1096. }
  1097. static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = {
  1098. .copy_max_bytes = 0x1fffff,
  1099. .copy_num_dw = 7,
  1100. .emit_copy_buffer = sdma_v2_4_emit_copy_buffer,
  1101. .fill_max_bytes = 0x1fffff,
  1102. .fill_num_dw = 7,
  1103. .emit_fill_buffer = sdma_v2_4_emit_fill_buffer,
  1104. };
  1105. static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
  1106. {
  1107. adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs;
  1108. adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
  1109. }
  1110. static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
  1111. .copy_pte_num_dw = 7,
  1112. .copy_pte = sdma_v2_4_vm_copy_pte,
  1113. .write_pte = sdma_v2_4_vm_write_pte,
  1114. .set_pte_pde = sdma_v2_4_vm_set_pte_pde,
  1115. };
  1116. static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
  1117. {
  1118. struct drm_gpu_scheduler *sched;
  1119. unsigned i;
  1120. adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
  1121. for (i = 0; i < adev->sdma.num_instances; i++) {
  1122. sched = &adev->sdma.instance[i].ring.sched;
  1123. adev->vm_manager.vm_pte_rqs[i] =
  1124. &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
  1125. }
  1126. adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
  1127. }
  1128. const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
  1129. {
  1130. .type = AMD_IP_BLOCK_TYPE_SDMA,
  1131. .major = 2,
  1132. .minor = 4,
  1133. .rev = 0,
  1134. .funcs = &sdma_v2_4_ip_funcs,
  1135. };