mxgpu_ai.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. */
  23. #include "amdgpu.h"
  24. #include "nbio/nbio_6_1_offset.h"
  25. #include "nbio/nbio_6_1_sh_mask.h"
  26. #include "gc/gc_9_0_offset.h"
  27. #include "gc/gc_9_0_sh_mask.h"
  28. #include "soc15.h"
  29. #include "vega10_ih.h"
  30. #include "soc15_common.h"
  31. #include "mxgpu_ai.h"
  32. static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
  33. {
  34. u32 reg;
  35. int timeout = AI_MAILBOX_TIMEDOUT;
  36. u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
  37. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  38. mmBIF_BX_PF0_MAILBOX_CONTROL));
  39. reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1);
  40. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  41. mmBIF_BX_PF0_MAILBOX_CONTROL), reg);
  42. /*Wait for RCV_MSG_VALID to be 0*/
  43. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  44. mmBIF_BX_PF0_MAILBOX_CONTROL));
  45. while (reg & mask) {
  46. if (timeout <= 0) {
  47. pr_err("RCV_MSG_VALID is not cleared\n");
  48. break;
  49. }
  50. mdelay(1);
  51. timeout -=1;
  52. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  53. mmBIF_BX_PF0_MAILBOX_CONTROL));
  54. }
  55. }
  56. static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)
  57. {
  58. u32 reg;
  59. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  60. mmBIF_BX_PF0_MAILBOX_CONTROL));
  61. reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL,
  62. TRN_MSG_VALID, val ? 1 : 0);
  63. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL),
  64. reg);
  65. }
  66. static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,
  67. enum idh_event event)
  68. {
  69. u32 reg;
  70. u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
  71. if (event != IDH_FLR_NOTIFICATION_CMPL) {
  72. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  73. mmBIF_BX_PF0_MAILBOX_CONTROL));
  74. if (!(reg & mask))
  75. return -ENOENT;
  76. }
  77. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  78. mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
  79. if (reg != event)
  80. return -ENOENT;
  81. xgpu_ai_mailbox_send_ack(adev);
  82. return 0;
  83. }
  84. static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
  85. {
  86. int r = 0, timeout = AI_MAILBOX_TIMEDOUT;
  87. u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, TRN_MSG_ACK);
  88. u32 reg;
  89. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  90. mmBIF_BX_PF0_MAILBOX_CONTROL));
  91. while (!(reg & mask)) {
  92. if (timeout <= 0) {
  93. pr_err("Doesn't get ack from pf.\n");
  94. r = -ETIME;
  95. break;
  96. }
  97. mdelay(5);
  98. timeout -= 5;
  99. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  100. mmBIF_BX_PF0_MAILBOX_CONTROL));
  101. }
  102. return r;
  103. }
  104. static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
  105. {
  106. int r = 0, timeout = AI_MAILBOX_TIMEDOUT;
  107. r = xgpu_ai_mailbox_rcv_msg(adev, event);
  108. while (r) {
  109. if (timeout <= 0) {
  110. pr_err("Doesn't get msg:%d from pf.\n", event);
  111. r = -ETIME;
  112. break;
  113. }
  114. mdelay(5);
  115. timeout -= 5;
  116. r = xgpu_ai_mailbox_rcv_msg(adev, event);
  117. }
  118. return r;
  119. }
  120. static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
  121. enum idh_request req, u32 data1, u32 data2, u32 data3) {
  122. u32 reg;
  123. int r;
  124. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  125. mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0));
  126. reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0,
  127. MSGBUF_DATA, req);
  128. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0),
  129. reg);
  130. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW1),
  131. data1);
  132. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW2),
  133. data2);
  134. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW3),
  135. data3);
  136. xgpu_ai_mailbox_set_valid(adev, true);
  137. /* start to poll ack */
  138. r = xgpu_ai_poll_ack(adev);
  139. if (r)
  140. pr_err("Doesn't get ack from pf, continue\n");
  141. xgpu_ai_mailbox_set_valid(adev, false);
  142. }
  143. static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
  144. enum idh_request req)
  145. {
  146. int r;
  147. xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0);
  148. /* start to check msg if request is idh_req_gpu_init_access */
  149. if (req == IDH_REQ_GPU_INIT_ACCESS ||
  150. req == IDH_REQ_GPU_FINI_ACCESS ||
  151. req == IDH_REQ_GPU_RESET_ACCESS) {
  152. r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
  153. if (r) {
  154. pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
  155. return r;
  156. }
  157. /* Retrieve checksum from mailbox2 */
  158. if (req == IDH_REQ_GPU_INIT_ACCESS) {
  159. adev->virt.fw_reserve.checksum_key =
  160. RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  161. mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2));
  162. }
  163. }
  164. return 0;
  165. }
  166. static int xgpu_ai_request_reset(struct amdgpu_device *adev)
  167. {
  168. return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
  169. }
  170. static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev,
  171. bool init)
  172. {
  173. enum idh_request req;
  174. req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS;
  175. return xgpu_ai_send_access_requests(adev, req);
  176. }
  177. static int xgpu_ai_release_full_gpu_access(struct amdgpu_device *adev,
  178. bool init)
  179. {
  180. enum idh_request req;
  181. int r = 0;
  182. req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS;
  183. r = xgpu_ai_send_access_requests(adev, req);
  184. return r;
  185. }
  186. static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
  187. struct amdgpu_irq_src *source,
  188. struct amdgpu_iv_entry *entry)
  189. {
  190. DRM_DEBUG("get ack intr and do nothing.\n");
  191. return 0;
  192. }
  193. static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev,
  194. struct amdgpu_irq_src *source,
  195. unsigned type,
  196. enum amdgpu_interrupt_state state)
  197. {
  198. u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL));
  199. tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_MAILBOX_INT_CNTL, ACK_INT_EN,
  200. (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
  201. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL), tmp);
  202. return 0;
  203. }
  204. static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
  205. {
  206. struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
  207. struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
  208. /* wait until RCV_MSG become 3 */
  209. if (xgpu_ai_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) {
  210. pr_err("failed to recieve FLR_CMPL\n");
  211. return;
  212. }
  213. /* Trigger recovery due to world switch failure */
  214. amdgpu_device_gpu_recover(adev, NULL, false);
  215. }
  216. static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
  217. struct amdgpu_irq_src *src,
  218. unsigned type,
  219. enum amdgpu_interrupt_state state)
  220. {
  221. u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL));
  222. tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_MAILBOX_INT_CNTL, VALID_INT_EN,
  223. (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
  224. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL), tmp);
  225. return 0;
  226. }
  227. static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
  228. struct amdgpu_irq_src *source,
  229. struct amdgpu_iv_entry *entry)
  230. {
  231. int r;
  232. /* trigger gpu-reset by hypervisor only if TDR disbaled */
  233. if (!amdgpu_gpu_recovery) {
  234. /* see what event we get */
  235. r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
  236. /* sometimes the interrupt is delayed to inject to VM, so under such case
  237. * the IDH_FLR_NOTIFICATION is overwritten by VF FLR from GIM side, thus
  238. * above recieve message could be failed, we should schedule the flr_work
  239. * anyway
  240. */
  241. if (r) {
  242. DRM_ERROR("FLR_NOTIFICATION is missed\n");
  243. xgpu_ai_mailbox_send_ack(adev);
  244. }
  245. schedule_work(&adev->virt.flr_work);
  246. }
  247. return 0;
  248. }
  249. static const struct amdgpu_irq_src_funcs xgpu_ai_mailbox_ack_irq_funcs = {
  250. .set = xgpu_ai_set_mailbox_ack_irq,
  251. .process = xgpu_ai_mailbox_ack_irq,
  252. };
  253. static const struct amdgpu_irq_src_funcs xgpu_ai_mailbox_rcv_irq_funcs = {
  254. .set = xgpu_ai_set_mailbox_rcv_irq,
  255. .process = xgpu_ai_mailbox_rcv_irq,
  256. };
  257. void xgpu_ai_mailbox_set_irq_funcs(struct amdgpu_device *adev)
  258. {
  259. adev->virt.ack_irq.num_types = 1;
  260. adev->virt.ack_irq.funcs = &xgpu_ai_mailbox_ack_irq_funcs;
  261. adev->virt.rcv_irq.num_types = 1;
  262. adev->virt.rcv_irq.funcs = &xgpu_ai_mailbox_rcv_irq_funcs;
  263. }
  264. int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
  265. {
  266. int r;
  267. r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
  268. if (r)
  269. return r;
  270. r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
  271. if (r) {
  272. amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
  273. return r;
  274. }
  275. return 0;
  276. }
  277. int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
  278. {
  279. int r;
  280. r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0);
  281. if (r)
  282. return r;
  283. r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0);
  284. if (r) {
  285. amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
  286. return r;
  287. }
  288. INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
  289. return 0;
  290. }
  291. void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
  292. {
  293. amdgpu_irq_put(adev, &adev->virt.ack_irq, 0);
  294. amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
  295. }
  296. const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
  297. .req_full_gpu = xgpu_ai_request_full_gpu_access,
  298. .rel_full_gpu = xgpu_ai_release_full_gpu_access,
  299. .reset_gpu = xgpu_ai_request_reset,
  300. .wait_reset = NULL,
  301. .trans_msg = xgpu_ai_mailbox_trans_msg,
  302. };