kfd_dbgdev.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. */
  23. #include <linux/types.h>
  24. #include <linux/kernel.h>
  25. #include <linux/log2.h>
  26. #include <linux/sched.h>
  27. #include <linux/slab.h>
  28. #include <linux/mutex.h>
  29. #include <linux/device.h>
  30. #include "kfd_pm4_headers.h"
  31. #include "kfd_pm4_headers_diq.h"
  32. #include "kfd_kernel_queue.h"
  33. #include "kfd_priv.h"
  34. #include "kfd_pm4_opcodes.h"
  35. #include "cik_regs.h"
  36. #include "kfd_dbgmgr.h"
  37. #include "kfd_dbgdev.h"
  38. #include "kfd_device_queue_manager.h"
  39. #include "../../radeon/cik_reg.h"
  40. static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
  41. {
  42. dev->kfd2kgd->address_watch_disable(dev->kgd);
  43. }
  44. static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
  45. unsigned int pasid, uint64_t vmid0_address,
  46. uint32_t *packet_buff, size_t size_in_bytes)
  47. {
  48. struct pm4__release_mem *rm_packet;
  49. struct pm4__indirect_buffer_pasid *ib_packet;
  50. struct kfd_mem_obj *mem_obj;
  51. size_t pq_packets_size_in_bytes;
  52. union ULARGE_INTEGER *largep;
  53. union ULARGE_INTEGER addr;
  54. struct kernel_queue *kq;
  55. uint64_t *rm_state;
  56. unsigned int *ib_packet_buff;
  57. int status;
  58. if (WARN_ON(!size_in_bytes))
  59. return -EINVAL;
  60. kq = dbgdev->kq;
  61. pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
  62. sizeof(struct pm4__indirect_buffer_pasid);
  63. /*
  64. * We acquire a buffer from DIQ
  65. * The receive packet buff will be sitting on the Indirect Buffer
  66. * and in the PQ we put the IB packet + sync packet(s).
  67. */
  68. status = kq->ops.acquire_packet_buffer(kq,
  69. pq_packets_size_in_bytes / sizeof(uint32_t),
  70. &ib_packet_buff);
  71. if (status) {
  72. pr_err("acquire_packet_buffer failed\n");
  73. return status;
  74. }
  75. memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
  76. ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
  77. ib_packet->header.count = 3;
  78. ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
  79. ib_packet->header.type = PM4_TYPE_3;
  80. largep = (union ULARGE_INTEGER *) &vmid0_address;
  81. ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
  82. ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
  83. ib_packet->control = (1 << 23) | (1 << 31) |
  84. ((size_in_bytes / 4) & 0xfffff);
  85. ib_packet->bitfields5.pasid = pasid;
  86. /*
  87. * for now we use release mem for GPU-CPU synchronization
  88. * Consider WaitRegMem + WriteData as a better alternative
  89. * we get a GART allocations ( gpu/cpu mapping),
  90. * for the sync variable, and wait until:
  91. * (a) Sync with HW
  92. * (b) Sync var is written by CP to mem.
  93. */
  94. rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
  95. (sizeof(struct pm4__indirect_buffer_pasid) /
  96. sizeof(unsigned int)));
  97. status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
  98. &mem_obj);
  99. if (status) {
  100. pr_err("Failed to allocate GART memory\n");
  101. kq->ops.rollback_packet(kq);
  102. return status;
  103. }
  104. rm_state = (uint64_t *) mem_obj->cpu_ptr;
  105. *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
  106. rm_packet->header.opcode = IT_RELEASE_MEM;
  107. rm_packet->header.type = PM4_TYPE_3;
  108. rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
  109. rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
  110. rm_packet->bitfields2.event_index =
  111. event_index___release_mem__end_of_pipe;
  112. rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
  113. rm_packet->bitfields2.atc = 0;
  114. rm_packet->bitfields2.tc_wb_action_ena = 1;
  115. addr.quad_part = mem_obj->gpu_addr;
  116. rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
  117. rm_packet->address_hi = addr.u.high_part;
  118. rm_packet->bitfields3.data_sel =
  119. data_sel___release_mem__send_64_bit_data;
  120. rm_packet->bitfields3.int_sel =
  121. int_sel___release_mem__send_data_after_write_confirm;
  122. rm_packet->bitfields3.dst_sel =
  123. dst_sel___release_mem__memory_controller;
  124. rm_packet->data_lo = QUEUESTATE__ACTIVE;
  125. kq->ops.submit_packet(kq);
  126. /* Wait till CP writes sync code: */
  127. status = amdkfd_fence_wait_timeout(
  128. (unsigned int *) rm_state,
  129. QUEUESTATE__ACTIVE, 1500);
  130. kfd_gtt_sa_free(dbgdev->dev, mem_obj);
  131. return status;
  132. }
  133. static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
  134. {
  135. /*
  136. * no action is needed in this case,
  137. * just make sure diq will not be used
  138. */
  139. dbgdev->kq = NULL;
  140. return 0;
  141. }
  142. static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
  143. {
  144. struct queue_properties properties;
  145. unsigned int qid;
  146. struct kernel_queue *kq = NULL;
  147. int status;
  148. properties.type = KFD_QUEUE_TYPE_DIQ;
  149. status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
  150. &properties, &qid);
  151. if (status) {
  152. pr_err("Failed to create DIQ\n");
  153. return status;
  154. }
  155. pr_debug("DIQ Created with queue id: %d\n", qid);
  156. kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
  157. if (!kq) {
  158. pr_err("Error getting DIQ\n");
  159. pqm_destroy_queue(dbgdev->pqm, qid);
  160. return -EFAULT;
  161. }
  162. dbgdev->kq = kq;
  163. return status;
  164. }
  165. static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
  166. {
  167. /* disable watch address */
  168. dbgdev_address_watch_disable_nodiq(dbgdev->dev);
  169. return 0;
  170. }
  171. static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
  172. {
  173. /* todo - disable address watch */
  174. int status;
  175. status = pqm_destroy_queue(dbgdev->pqm,
  176. dbgdev->kq->queue->properties.queue_id);
  177. dbgdev->kq = NULL;
  178. return status;
  179. }
  180. static void dbgdev_address_watch_set_registers(
  181. const struct dbg_address_watch_info *adw_info,
  182. union TCP_WATCH_ADDR_H_BITS *addrHi,
  183. union TCP_WATCH_ADDR_L_BITS *addrLo,
  184. union TCP_WATCH_CNTL_BITS *cntl,
  185. unsigned int index, unsigned int vmid)
  186. {
  187. union ULARGE_INTEGER addr;
  188. addr.quad_part = 0;
  189. addrHi->u32All = 0;
  190. addrLo->u32All = 0;
  191. cntl->u32All = 0;
  192. if (adw_info->watch_mask)
  193. cntl->bitfields.mask =
  194. (uint32_t) (adw_info->watch_mask[index] &
  195. ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
  196. else
  197. cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
  198. addr.quad_part = (unsigned long long) adw_info->watch_address[index];
  199. addrHi->bitfields.addr = addr.u.high_part &
  200. ADDRESS_WATCH_REG_ADDHIGH_MASK;
  201. addrLo->bitfields.addr =
  202. (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
  203. cntl->bitfields.mode = adw_info->watch_mode[index];
  204. cntl->bitfields.vmid = (uint32_t) vmid;
  205. /* for now assume it is an ATC address */
  206. cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
  207. pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
  208. pr_debug("\t\t%20s %08x\n", "set reg add high :",
  209. addrHi->bitfields.addr);
  210. pr_debug("\t\t%20s %08x\n", "set reg add low :",
  211. addrLo->bitfields.addr);
  212. }
  213. static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
  214. struct dbg_address_watch_info *adw_info)
  215. {
  216. union TCP_WATCH_ADDR_H_BITS addrHi;
  217. union TCP_WATCH_ADDR_L_BITS addrLo;
  218. union TCP_WATCH_CNTL_BITS cntl;
  219. struct kfd_process_device *pdd;
  220. unsigned int i;
  221. /* taking the vmid for that process on the safe way using pdd */
  222. pdd = kfd_get_process_device_data(dbgdev->dev,
  223. adw_info->process);
  224. if (!pdd) {
  225. pr_err("Failed to get pdd for wave control no DIQ\n");
  226. return -EFAULT;
  227. }
  228. addrHi.u32All = 0;
  229. addrLo.u32All = 0;
  230. cntl.u32All = 0;
  231. if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
  232. (adw_info->num_watch_points == 0)) {
  233. pr_err("num_watch_points is invalid\n");
  234. return -EINVAL;
  235. }
  236. if (!adw_info->watch_mode || !adw_info->watch_address) {
  237. pr_err("adw_info fields are not valid\n");
  238. return -EINVAL;
  239. }
  240. for (i = 0; i < adw_info->num_watch_points; i++) {
  241. dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
  242. &cntl, i, pdd->qpd.vmid);
  243. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  244. pr_debug("\t\t%20s %08x\n", "register index :", i);
  245. pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
  246. pr_debug("\t\t%20s %08x\n", "Address Low is :",
  247. addrLo.bitfields.addr);
  248. pr_debug("\t\t%20s %08x\n", "Address high is :",
  249. addrHi.bitfields.addr);
  250. pr_debug("\t\t%20s %08x\n", "Address high is :",
  251. addrHi.bitfields.addr);
  252. pr_debug("\t\t%20s %08x\n", "Control Mask is :",
  253. cntl.bitfields.mask);
  254. pr_debug("\t\t%20s %08x\n", "Control Mode is :",
  255. cntl.bitfields.mode);
  256. pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
  257. cntl.bitfields.vmid);
  258. pr_debug("\t\t%20s %08x\n", "Control atc is :",
  259. cntl.bitfields.atc);
  260. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  261. pdd->dev->kfd2kgd->address_watch_execute(
  262. dbgdev->dev->kgd,
  263. i,
  264. cntl.u32All,
  265. addrHi.u32All,
  266. addrLo.u32All);
  267. }
  268. return 0;
  269. }
  270. static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
  271. struct dbg_address_watch_info *adw_info)
  272. {
  273. struct pm4__set_config_reg *packets_vec;
  274. union TCP_WATCH_ADDR_H_BITS addrHi;
  275. union TCP_WATCH_ADDR_L_BITS addrLo;
  276. union TCP_WATCH_CNTL_BITS cntl;
  277. struct kfd_mem_obj *mem_obj;
  278. unsigned int aw_reg_add_dword;
  279. uint32_t *packet_buff_uint;
  280. unsigned int i;
  281. int status;
  282. size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
  283. /* we do not control the vmid in DIQ mode, just a place holder */
  284. unsigned int vmid = 0;
  285. addrHi.u32All = 0;
  286. addrLo.u32All = 0;
  287. cntl.u32All = 0;
  288. if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
  289. (adw_info->num_watch_points == 0)) {
  290. pr_err("num_watch_points is invalid\n");
  291. return -EINVAL;
  292. }
  293. if (!adw_info->watch_mode || !adw_info->watch_address) {
  294. pr_err("adw_info fields are not valid\n");
  295. return -EINVAL;
  296. }
  297. status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
  298. if (status) {
  299. pr_err("Failed to allocate GART memory\n");
  300. return status;
  301. }
  302. packet_buff_uint = mem_obj->cpu_ptr;
  303. memset(packet_buff_uint, 0, ib_size);
  304. packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
  305. packets_vec[0].header.count = 1;
  306. packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
  307. packets_vec[0].header.type = PM4_TYPE_3;
  308. packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
  309. packets_vec[0].bitfields2.insert_vmid = 1;
  310. packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
  311. packets_vec[1].bitfields2.insert_vmid = 0;
  312. packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
  313. packets_vec[2].bitfields2.insert_vmid = 0;
  314. packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
  315. packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
  316. packets_vec[3].bitfields2.insert_vmid = 1;
  317. for (i = 0; i < adw_info->num_watch_points; i++) {
  318. dbgdev_address_watch_set_registers(adw_info,
  319. &addrHi,
  320. &addrLo,
  321. &cntl,
  322. i,
  323. vmid);
  324. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  325. pr_debug("\t\t%20s %08x\n", "register index :", i);
  326. pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
  327. pr_debug("\t\t%20s %p\n", "Add ptr is :",
  328. adw_info->watch_address);
  329. pr_debug("\t\t%20s %08llx\n", "Add is :",
  330. adw_info->watch_address[i]);
  331. pr_debug("\t\t%20s %08x\n", "Address Low is :",
  332. addrLo.bitfields.addr);
  333. pr_debug("\t\t%20s %08x\n", "Address high is :",
  334. addrHi.bitfields.addr);
  335. pr_debug("\t\t%20s %08x\n", "Control Mask is :",
  336. cntl.bitfields.mask);
  337. pr_debug("\t\t%20s %08x\n", "Control Mode is :",
  338. cntl.bitfields.mode);
  339. pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
  340. cntl.bitfields.vmid);
  341. pr_debug("\t\t%20s %08x\n", "Control atc is :",
  342. cntl.bitfields.atc);
  343. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  344. aw_reg_add_dword =
  345. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  346. dbgdev->dev->kgd,
  347. i,
  348. ADDRESS_WATCH_REG_CNTL);
  349. packets_vec[0].bitfields2.reg_offset =
  350. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  351. packets_vec[0].reg_data[0] = cntl.u32All;
  352. aw_reg_add_dword =
  353. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  354. dbgdev->dev->kgd,
  355. i,
  356. ADDRESS_WATCH_REG_ADDR_HI);
  357. packets_vec[1].bitfields2.reg_offset =
  358. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  359. packets_vec[1].reg_data[0] = addrHi.u32All;
  360. aw_reg_add_dword =
  361. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  362. dbgdev->dev->kgd,
  363. i,
  364. ADDRESS_WATCH_REG_ADDR_LO);
  365. packets_vec[2].bitfields2.reg_offset =
  366. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  367. packets_vec[2].reg_data[0] = addrLo.u32All;
  368. /* enable watch flag if address is not zero*/
  369. if (adw_info->watch_address[i] > 0)
  370. cntl.bitfields.valid = 1;
  371. else
  372. cntl.bitfields.valid = 0;
  373. aw_reg_add_dword =
  374. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  375. dbgdev->dev->kgd,
  376. i,
  377. ADDRESS_WATCH_REG_CNTL);
  378. packets_vec[3].bitfields2.reg_offset =
  379. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  380. packets_vec[3].reg_data[0] = cntl.u32All;
  381. status = dbgdev_diq_submit_ib(
  382. dbgdev,
  383. adw_info->process->pasid,
  384. mem_obj->gpu_addr,
  385. packet_buff_uint,
  386. ib_size);
  387. if (status) {
  388. pr_err("Failed to submit IB to DIQ\n");
  389. break;
  390. }
  391. }
  392. kfd_gtt_sa_free(dbgdev->dev, mem_obj);
  393. return status;
  394. }
  395. static int dbgdev_wave_control_set_registers(
  396. struct dbg_wave_control_info *wac_info,
  397. union SQ_CMD_BITS *in_reg_sq_cmd,
  398. union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
  399. {
  400. int status = 0;
  401. union SQ_CMD_BITS reg_sq_cmd;
  402. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  403. struct HsaDbgWaveMsgAMDGen2 *pMsg;
  404. reg_sq_cmd.u32All = 0;
  405. reg_gfx_index.u32All = 0;
  406. pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
  407. switch (wac_info->mode) {
  408. /* Send command to single wave */
  409. case HSA_DBG_WAVEMODE_SINGLE:
  410. /*
  411. * Limit access to the process waves only,
  412. * by setting vmid check
  413. */
  414. reg_sq_cmd.bits.check_vmid = 1;
  415. reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
  416. reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
  417. reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
  418. reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
  419. reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
  420. reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
  421. break;
  422. /* Send command to all waves with matching VMID */
  423. case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
  424. reg_gfx_index.bits.sh_broadcast_writes = 1;
  425. reg_gfx_index.bits.se_broadcast_writes = 1;
  426. reg_gfx_index.bits.instance_broadcast_writes = 1;
  427. reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
  428. break;
  429. /* Send command to all CU waves with matching VMID */
  430. case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
  431. reg_sq_cmd.bits.check_vmid = 1;
  432. reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
  433. reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
  434. reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
  435. reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
  436. break;
  437. default:
  438. return -EINVAL;
  439. }
  440. switch (wac_info->operand) {
  441. case HSA_DBG_WAVEOP_HALT:
  442. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
  443. break;
  444. case HSA_DBG_WAVEOP_RESUME:
  445. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
  446. break;
  447. case HSA_DBG_WAVEOP_KILL:
  448. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
  449. break;
  450. case HSA_DBG_WAVEOP_DEBUG:
  451. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
  452. break;
  453. case HSA_DBG_WAVEOP_TRAP:
  454. if (wac_info->trapId < MAX_TRAPID) {
  455. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
  456. reg_sq_cmd.bits.trap_id = wac_info->trapId;
  457. } else {
  458. status = -EINVAL;
  459. }
  460. break;
  461. default:
  462. status = -EINVAL;
  463. break;
  464. }
  465. if (status == 0) {
  466. *in_reg_sq_cmd = reg_sq_cmd;
  467. *in_reg_gfx_index = reg_gfx_index;
  468. }
  469. return status;
  470. }
  471. static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
  472. struct dbg_wave_control_info *wac_info)
  473. {
  474. int status;
  475. union SQ_CMD_BITS reg_sq_cmd;
  476. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  477. struct kfd_mem_obj *mem_obj;
  478. uint32_t *packet_buff_uint;
  479. struct pm4__set_config_reg *packets_vec;
  480. size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
  481. reg_sq_cmd.u32All = 0;
  482. status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
  483. &reg_gfx_index);
  484. if (status) {
  485. pr_err("Failed to set wave control registers\n");
  486. return status;
  487. }
  488. /* we do not control the VMID in DIQ, so reset it to a known value */
  489. reg_sq_cmd.bits.vm_id = 0;
  490. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  491. pr_debug("\t\t mode is: %u\n", wac_info->mode);
  492. pr_debug("\t\t operand is: %u\n", wac_info->operand);
  493. pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
  494. pr_debug("\t\t msg value is: %u\n",
  495. wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
  496. pr_debug("\t\t vmid is: N/A\n");
  497. pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
  498. pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
  499. pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
  500. pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
  501. pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
  502. pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
  503. pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
  504. pr_debug("\t\t ibw is : %u\n",
  505. reg_gfx_index.bitfields.instance_broadcast_writes);
  506. pr_debug("\t\t ii is : %u\n",
  507. reg_gfx_index.bitfields.instance_index);
  508. pr_debug("\t\t sebw is : %u\n",
  509. reg_gfx_index.bitfields.se_broadcast_writes);
  510. pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
  511. pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
  512. pr_debug("\t\t sbw is : %u\n",
  513. reg_gfx_index.bitfields.sh_broadcast_writes);
  514. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  515. status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
  516. if (status != 0) {
  517. pr_err("Failed to allocate GART memory\n");
  518. return status;
  519. }
  520. packet_buff_uint = mem_obj->cpu_ptr;
  521. memset(packet_buff_uint, 0, ib_size);
  522. packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
  523. packets_vec[0].header.count = 1;
  524. packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
  525. packets_vec[0].header.type = PM4_TYPE_3;
  526. packets_vec[0].bitfields2.reg_offset =
  527. GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
  528. packets_vec[0].bitfields2.insert_vmid = 0;
  529. packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
  530. packets_vec[1].header.count = 1;
  531. packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
  532. packets_vec[1].header.type = PM4_TYPE_3;
  533. packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
  534. packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
  535. packets_vec[1].bitfields2.insert_vmid = 1;
  536. packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
  537. /* Restore the GRBM_GFX_INDEX register */
  538. reg_gfx_index.u32All = 0;
  539. reg_gfx_index.bits.sh_broadcast_writes = 1;
  540. reg_gfx_index.bits.instance_broadcast_writes = 1;
  541. reg_gfx_index.bits.se_broadcast_writes = 1;
  542. packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
  543. packets_vec[2].bitfields2.reg_offset =
  544. GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
  545. packets_vec[2].bitfields2.insert_vmid = 0;
  546. packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
  547. status = dbgdev_diq_submit_ib(
  548. dbgdev,
  549. wac_info->process->pasid,
  550. mem_obj->gpu_addr,
  551. packet_buff_uint,
  552. ib_size);
  553. if (status)
  554. pr_err("Failed to submit IB to DIQ\n");
  555. kfd_gtt_sa_free(dbgdev->dev, mem_obj);
  556. return status;
  557. }
  558. static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
  559. struct dbg_wave_control_info *wac_info)
  560. {
  561. int status;
  562. union SQ_CMD_BITS reg_sq_cmd;
  563. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  564. struct kfd_process_device *pdd;
  565. reg_sq_cmd.u32All = 0;
  566. /* taking the VMID for that process on the safe way using PDD */
  567. pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
  568. if (!pdd) {
  569. pr_err("Failed to get pdd for wave control no DIQ\n");
  570. return -EFAULT;
  571. }
  572. status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
  573. &reg_gfx_index);
  574. if (status) {
  575. pr_err("Failed to set wave control registers\n");
  576. return status;
  577. }
  578. /* for non DIQ we need to patch the VMID: */
  579. reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
  580. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  581. pr_debug("\t\t mode is: %u\n", wac_info->mode);
  582. pr_debug("\t\t operand is: %u\n", wac_info->operand);
  583. pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
  584. pr_debug("\t\t msg value is: %u\n",
  585. wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
  586. pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
  587. pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
  588. pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
  589. pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
  590. pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
  591. pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
  592. pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
  593. pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
  594. pr_debug("\t\t ibw is : %u\n",
  595. reg_gfx_index.bitfields.instance_broadcast_writes);
  596. pr_debug("\t\t ii is : %u\n",
  597. reg_gfx_index.bitfields.instance_index);
  598. pr_debug("\t\t sebw is : %u\n",
  599. reg_gfx_index.bitfields.se_broadcast_writes);
  600. pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
  601. pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
  602. pr_debug("\t\t sbw is : %u\n",
  603. reg_gfx_index.bitfields.sh_broadcast_writes);
  604. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  605. return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
  606. reg_gfx_index.u32All,
  607. reg_sq_cmd.u32All);
  608. }
  609. int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
  610. {
  611. int status = 0;
  612. unsigned int vmid;
  613. union SQ_CMD_BITS reg_sq_cmd;
  614. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  615. struct kfd_process_device *pdd;
  616. struct dbg_wave_control_info wac_info;
  617. int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
  618. int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
  619. reg_sq_cmd.u32All = 0;
  620. status = 0;
  621. wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
  622. wac_info.operand = HSA_DBG_WAVEOP_KILL;
  623. pr_debug("Killing all process wavefronts\n");
  624. /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
  625. * ATC_VMID15_PASID_MAPPING
  626. * to check which VMID the current process is mapped to.
  627. */
  628. for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
  629. if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
  630. (dev->kgd, vmid)) {
  631. if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
  632. (dev->kgd, vmid) == p->pasid) {
  633. pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
  634. vmid, p->pasid);
  635. break;
  636. }
  637. }
  638. }
  639. if (vmid > last_vmid_to_scan) {
  640. pr_err("Didn't find vmid for pasid %d\n", p->pasid);
  641. return -EFAULT;
  642. }
  643. /* taking the VMID for that process on the safe way using PDD */
  644. pdd = kfd_get_process_device_data(dev, p);
  645. if (!pdd)
  646. return -EFAULT;
  647. status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
  648. &reg_gfx_index);
  649. if (status != 0)
  650. return -EINVAL;
  651. /* for non DIQ we need to patch the VMID: */
  652. reg_sq_cmd.bits.vm_id = vmid;
  653. dev->kfd2kgd->wave_control_execute(dev->kgd,
  654. reg_gfx_index.u32All,
  655. reg_sq_cmd.u32All);
  656. return 0;
  657. }
  658. void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
  659. enum DBGDEV_TYPE type)
  660. {
  661. pdbgdev->dev = pdev;
  662. pdbgdev->kq = NULL;
  663. pdbgdev->type = type;
  664. pdbgdev->pqm = NULL;
  665. switch (type) {
  666. case DBGDEV_TYPE_NODIQ:
  667. pdbgdev->dbgdev_register = dbgdev_register_nodiq;
  668. pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
  669. pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
  670. pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
  671. break;
  672. case DBGDEV_TYPE_DIQ:
  673. default:
  674. pdbgdev->dbgdev_register = dbgdev_register_diq;
  675. pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
  676. pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
  677. pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
  678. break;
  679. }
  680. }