kfd_dbgdev.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. */
  23. #include <linux/types.h>
  24. #include <linux/kernel.h>
  25. #include <linux/log2.h>
  26. #include <linux/sched.h>
  27. #include <linux/slab.h>
  28. #include <linux/mutex.h>
  29. #include <linux/device.h>
  30. #include "kfd_pm4_headers.h"
  31. #include "kfd_pm4_headers_diq.h"
  32. #include "kfd_kernel_queue.h"
  33. #include "kfd_priv.h"
  34. #include "kfd_pm4_opcodes.h"
  35. #include "cik_regs.h"
  36. #include "kfd_dbgmgr.h"
  37. #include "kfd_dbgdev.h"
  38. #include "kfd_device_queue_manager.h"
  39. static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
  40. {
  41. dev->kfd2kgd->address_watch_disable(dev->kgd);
  42. }
  43. static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
  44. unsigned int pasid, uint64_t vmid0_address,
  45. uint32_t *packet_buff, size_t size_in_bytes)
  46. {
  47. struct pm4__release_mem *rm_packet;
  48. struct pm4__indirect_buffer_pasid *ib_packet;
  49. struct kfd_mem_obj *mem_obj;
  50. size_t pq_packets_size_in_bytes;
  51. union ULARGE_INTEGER *largep;
  52. union ULARGE_INTEGER addr;
  53. struct kernel_queue *kq;
  54. uint64_t *rm_state;
  55. unsigned int *ib_packet_buff;
  56. int status;
  57. if (WARN_ON(!size_in_bytes))
  58. return -EINVAL;
  59. kq = dbgdev->kq;
  60. pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
  61. sizeof(struct pm4__indirect_buffer_pasid);
  62. /*
  63. * We acquire a buffer from DIQ
  64. * The receive packet buff will be sitting on the Indirect Buffer
  65. * and in the PQ we put the IB packet + sync packet(s).
  66. */
  67. status = kq->ops.acquire_packet_buffer(kq,
  68. pq_packets_size_in_bytes / sizeof(uint32_t),
  69. &ib_packet_buff);
  70. if (status) {
  71. pr_err("acquire_packet_buffer failed\n");
  72. return status;
  73. }
  74. memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
  75. ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
  76. ib_packet->header.count = 3;
  77. ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
  78. ib_packet->header.type = PM4_TYPE_3;
  79. largep = (union ULARGE_INTEGER *) &vmid0_address;
  80. ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
  81. ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
  82. ib_packet->control = (1 << 23) | (1 << 31) |
  83. ((size_in_bytes / 4) & 0xfffff);
  84. ib_packet->bitfields5.pasid = pasid;
  85. /*
  86. * for now we use release mem for GPU-CPU synchronization
  87. * Consider WaitRegMem + WriteData as a better alternative
  88. * we get a GART allocations ( gpu/cpu mapping),
  89. * for the sync variable, and wait until:
  90. * (a) Sync with HW
  91. * (b) Sync var is written by CP to mem.
  92. */
  93. rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
  94. (sizeof(struct pm4__indirect_buffer_pasid) /
  95. sizeof(unsigned int)));
  96. status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
  97. &mem_obj);
  98. if (status) {
  99. pr_err("Failed to allocate GART memory\n");
  100. kq->ops.rollback_packet(kq);
  101. return status;
  102. }
  103. rm_state = (uint64_t *) mem_obj->cpu_ptr;
  104. *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
  105. rm_packet->header.opcode = IT_RELEASE_MEM;
  106. rm_packet->header.type = PM4_TYPE_3;
  107. rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
  108. rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
  109. rm_packet->bitfields2.event_index =
  110. event_index___release_mem__end_of_pipe;
  111. rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
  112. rm_packet->bitfields2.atc = 0;
  113. rm_packet->bitfields2.tc_wb_action_ena = 1;
  114. addr.quad_part = mem_obj->gpu_addr;
  115. rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
  116. rm_packet->address_hi = addr.u.high_part;
  117. rm_packet->bitfields3.data_sel =
  118. data_sel___release_mem__send_64_bit_data;
  119. rm_packet->bitfields3.int_sel =
  120. int_sel___release_mem__send_data_after_write_confirm;
  121. rm_packet->bitfields3.dst_sel =
  122. dst_sel___release_mem__memory_controller;
  123. rm_packet->data_lo = QUEUESTATE__ACTIVE;
  124. kq->ops.submit_packet(kq);
  125. /* Wait till CP writes sync code: */
  126. status = amdkfd_fence_wait_timeout(
  127. (unsigned int *) rm_state,
  128. QUEUESTATE__ACTIVE, 1500);
  129. kfd_gtt_sa_free(dbgdev->dev, mem_obj);
  130. return status;
  131. }
  132. static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
  133. {
  134. /*
  135. * no action is needed in this case,
  136. * just make sure diq will not be used
  137. */
  138. dbgdev->kq = NULL;
  139. return 0;
  140. }
  141. static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
  142. {
  143. struct queue_properties properties;
  144. unsigned int qid;
  145. struct kernel_queue *kq = NULL;
  146. int status;
  147. properties.type = KFD_QUEUE_TYPE_DIQ;
  148. status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
  149. &properties, &qid);
  150. if (status) {
  151. pr_err("Failed to create DIQ\n");
  152. return status;
  153. }
  154. pr_debug("DIQ Created with queue id: %d\n", qid);
  155. kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
  156. if (!kq) {
  157. pr_err("Error getting DIQ\n");
  158. pqm_destroy_queue(dbgdev->pqm, qid);
  159. return -EFAULT;
  160. }
  161. dbgdev->kq = kq;
  162. return status;
  163. }
  164. static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
  165. {
  166. /* disable watch address */
  167. dbgdev_address_watch_disable_nodiq(dbgdev->dev);
  168. return 0;
  169. }
  170. static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
  171. {
  172. /* todo - disable address watch */
  173. int status;
  174. status = pqm_destroy_queue(dbgdev->pqm,
  175. dbgdev->kq->queue->properties.queue_id);
  176. dbgdev->kq = NULL;
  177. return status;
  178. }
  179. static void dbgdev_address_watch_set_registers(
  180. const struct dbg_address_watch_info *adw_info,
  181. union TCP_WATCH_ADDR_H_BITS *addrHi,
  182. union TCP_WATCH_ADDR_L_BITS *addrLo,
  183. union TCP_WATCH_CNTL_BITS *cntl,
  184. unsigned int index, unsigned int vmid)
  185. {
  186. union ULARGE_INTEGER addr;
  187. addr.quad_part = 0;
  188. addrHi->u32All = 0;
  189. addrLo->u32All = 0;
  190. cntl->u32All = 0;
  191. if (adw_info->watch_mask)
  192. cntl->bitfields.mask =
  193. (uint32_t) (adw_info->watch_mask[index] &
  194. ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
  195. else
  196. cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
  197. addr.quad_part = (unsigned long long) adw_info->watch_address[index];
  198. addrHi->bitfields.addr = addr.u.high_part &
  199. ADDRESS_WATCH_REG_ADDHIGH_MASK;
  200. addrLo->bitfields.addr =
  201. (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
  202. cntl->bitfields.mode = adw_info->watch_mode[index];
  203. cntl->bitfields.vmid = (uint32_t) vmid;
  204. /* for now assume it is an ATC address */
  205. cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
  206. pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
  207. pr_debug("\t\t%20s %08x\n", "set reg add high :",
  208. addrHi->bitfields.addr);
  209. pr_debug("\t\t%20s %08x\n", "set reg add low :",
  210. addrLo->bitfields.addr);
  211. }
  212. static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
  213. struct dbg_address_watch_info *adw_info)
  214. {
  215. union TCP_WATCH_ADDR_H_BITS addrHi;
  216. union TCP_WATCH_ADDR_L_BITS addrLo;
  217. union TCP_WATCH_CNTL_BITS cntl;
  218. struct kfd_process_device *pdd;
  219. unsigned int i;
  220. /* taking the vmid for that process on the safe way using pdd */
  221. pdd = kfd_get_process_device_data(dbgdev->dev,
  222. adw_info->process);
  223. if (!pdd) {
  224. pr_err("Failed to get pdd for wave control no DIQ\n");
  225. return -EFAULT;
  226. }
  227. addrHi.u32All = 0;
  228. addrLo.u32All = 0;
  229. cntl.u32All = 0;
  230. if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
  231. (adw_info->num_watch_points == 0)) {
  232. pr_err("num_watch_points is invalid\n");
  233. return -EINVAL;
  234. }
  235. if (!adw_info->watch_mode || !adw_info->watch_address) {
  236. pr_err("adw_info fields are not valid\n");
  237. return -EINVAL;
  238. }
  239. for (i = 0; i < adw_info->num_watch_points; i++) {
  240. dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
  241. &cntl, i, pdd->qpd.vmid);
  242. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  243. pr_debug("\t\t%20s %08x\n", "register index :", i);
  244. pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
  245. pr_debug("\t\t%20s %08x\n", "Address Low is :",
  246. addrLo.bitfields.addr);
  247. pr_debug("\t\t%20s %08x\n", "Address high is :",
  248. addrHi.bitfields.addr);
  249. pr_debug("\t\t%20s %08x\n", "Address high is :",
  250. addrHi.bitfields.addr);
  251. pr_debug("\t\t%20s %08x\n", "Control Mask is :",
  252. cntl.bitfields.mask);
  253. pr_debug("\t\t%20s %08x\n", "Control Mode is :",
  254. cntl.bitfields.mode);
  255. pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
  256. cntl.bitfields.vmid);
  257. pr_debug("\t\t%20s %08x\n", "Control atc is :",
  258. cntl.bitfields.atc);
  259. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  260. pdd->dev->kfd2kgd->address_watch_execute(
  261. dbgdev->dev->kgd,
  262. i,
  263. cntl.u32All,
  264. addrHi.u32All,
  265. addrLo.u32All);
  266. }
  267. return 0;
  268. }
  269. static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
  270. struct dbg_address_watch_info *adw_info)
  271. {
  272. struct pm4__set_config_reg *packets_vec;
  273. union TCP_WATCH_ADDR_H_BITS addrHi;
  274. union TCP_WATCH_ADDR_L_BITS addrLo;
  275. union TCP_WATCH_CNTL_BITS cntl;
  276. struct kfd_mem_obj *mem_obj;
  277. unsigned int aw_reg_add_dword;
  278. uint32_t *packet_buff_uint;
  279. unsigned int i;
  280. int status;
  281. size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
  282. /* we do not control the vmid in DIQ mode, just a place holder */
  283. unsigned int vmid = 0;
  284. addrHi.u32All = 0;
  285. addrLo.u32All = 0;
  286. cntl.u32All = 0;
  287. if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
  288. (adw_info->num_watch_points == 0)) {
  289. pr_err("num_watch_points is invalid\n");
  290. return -EINVAL;
  291. }
  292. if (!adw_info->watch_mode || !adw_info->watch_address) {
  293. pr_err("adw_info fields are not valid\n");
  294. return -EINVAL;
  295. }
  296. status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
  297. if (status) {
  298. pr_err("Failed to allocate GART memory\n");
  299. return status;
  300. }
  301. packet_buff_uint = mem_obj->cpu_ptr;
  302. memset(packet_buff_uint, 0, ib_size);
  303. packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
  304. packets_vec[0].header.count = 1;
  305. packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
  306. packets_vec[0].header.type = PM4_TYPE_3;
  307. packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
  308. packets_vec[0].bitfields2.insert_vmid = 1;
  309. packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
  310. packets_vec[1].bitfields2.insert_vmid = 0;
  311. packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
  312. packets_vec[2].bitfields2.insert_vmid = 0;
  313. packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
  314. packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
  315. packets_vec[3].bitfields2.insert_vmid = 1;
  316. for (i = 0; i < adw_info->num_watch_points; i++) {
  317. dbgdev_address_watch_set_registers(adw_info,
  318. &addrHi,
  319. &addrLo,
  320. &cntl,
  321. i,
  322. vmid);
  323. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  324. pr_debug("\t\t%20s %08x\n", "register index :", i);
  325. pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
  326. pr_debug("\t\t%20s %p\n", "Add ptr is :",
  327. adw_info->watch_address);
  328. pr_debug("\t\t%20s %08llx\n", "Add is :",
  329. adw_info->watch_address[i]);
  330. pr_debug("\t\t%20s %08x\n", "Address Low is :",
  331. addrLo.bitfields.addr);
  332. pr_debug("\t\t%20s %08x\n", "Address high is :",
  333. addrHi.bitfields.addr);
  334. pr_debug("\t\t%20s %08x\n", "Control Mask is :",
  335. cntl.bitfields.mask);
  336. pr_debug("\t\t%20s %08x\n", "Control Mode is :",
  337. cntl.bitfields.mode);
  338. pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
  339. cntl.bitfields.vmid);
  340. pr_debug("\t\t%20s %08x\n", "Control atc is :",
  341. cntl.bitfields.atc);
  342. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  343. aw_reg_add_dword =
  344. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  345. dbgdev->dev->kgd,
  346. i,
  347. ADDRESS_WATCH_REG_CNTL);
  348. packets_vec[0].bitfields2.reg_offset =
  349. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  350. packets_vec[0].reg_data[0] = cntl.u32All;
  351. aw_reg_add_dword =
  352. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  353. dbgdev->dev->kgd,
  354. i,
  355. ADDRESS_WATCH_REG_ADDR_HI);
  356. packets_vec[1].bitfields2.reg_offset =
  357. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  358. packets_vec[1].reg_data[0] = addrHi.u32All;
  359. aw_reg_add_dword =
  360. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  361. dbgdev->dev->kgd,
  362. i,
  363. ADDRESS_WATCH_REG_ADDR_LO);
  364. packets_vec[2].bitfields2.reg_offset =
  365. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  366. packets_vec[2].reg_data[0] = addrLo.u32All;
  367. /* enable watch flag if address is not zero*/
  368. if (adw_info->watch_address[i] > 0)
  369. cntl.bitfields.valid = 1;
  370. else
  371. cntl.bitfields.valid = 0;
  372. aw_reg_add_dword =
  373. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  374. dbgdev->dev->kgd,
  375. i,
  376. ADDRESS_WATCH_REG_CNTL);
  377. packets_vec[3].bitfields2.reg_offset =
  378. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  379. packets_vec[3].reg_data[0] = cntl.u32All;
  380. status = dbgdev_diq_submit_ib(
  381. dbgdev,
  382. adw_info->process->pasid,
  383. mem_obj->gpu_addr,
  384. packet_buff_uint,
  385. ib_size);
  386. if (status) {
  387. pr_err("Failed to submit IB to DIQ\n");
  388. break;
  389. }
  390. }
  391. kfd_gtt_sa_free(dbgdev->dev, mem_obj);
  392. return status;
  393. }
  394. static int dbgdev_wave_control_set_registers(
  395. struct dbg_wave_control_info *wac_info,
  396. union SQ_CMD_BITS *in_reg_sq_cmd,
  397. union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
  398. {
  399. int status = 0;
  400. union SQ_CMD_BITS reg_sq_cmd;
  401. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  402. struct HsaDbgWaveMsgAMDGen2 *pMsg;
  403. reg_sq_cmd.u32All = 0;
  404. reg_gfx_index.u32All = 0;
  405. pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
  406. switch (wac_info->mode) {
  407. /* Send command to single wave */
  408. case HSA_DBG_WAVEMODE_SINGLE:
  409. /*
  410. * Limit access to the process waves only,
  411. * by setting vmid check
  412. */
  413. reg_sq_cmd.bits.check_vmid = 1;
  414. reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
  415. reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
  416. reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
  417. reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
  418. reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
  419. reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
  420. break;
  421. /* Send command to all waves with matching VMID */
  422. case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
  423. reg_gfx_index.bits.sh_broadcast_writes = 1;
  424. reg_gfx_index.bits.se_broadcast_writes = 1;
  425. reg_gfx_index.bits.instance_broadcast_writes = 1;
  426. reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
  427. break;
  428. /* Send command to all CU waves with matching VMID */
  429. case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
  430. reg_sq_cmd.bits.check_vmid = 1;
  431. reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
  432. reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
  433. reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
  434. reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
  435. break;
  436. default:
  437. return -EINVAL;
  438. }
  439. switch (wac_info->operand) {
  440. case HSA_DBG_WAVEOP_HALT:
  441. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
  442. break;
  443. case HSA_DBG_WAVEOP_RESUME:
  444. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
  445. break;
  446. case HSA_DBG_WAVEOP_KILL:
  447. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
  448. break;
  449. case HSA_DBG_WAVEOP_DEBUG:
  450. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
  451. break;
  452. case HSA_DBG_WAVEOP_TRAP:
  453. if (wac_info->trapId < MAX_TRAPID) {
  454. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
  455. reg_sq_cmd.bits.trap_id = wac_info->trapId;
  456. } else {
  457. status = -EINVAL;
  458. }
  459. break;
  460. default:
  461. status = -EINVAL;
  462. break;
  463. }
  464. if (status == 0) {
  465. *in_reg_sq_cmd = reg_sq_cmd;
  466. *in_reg_gfx_index = reg_gfx_index;
  467. }
  468. return status;
  469. }
  470. static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
  471. struct dbg_wave_control_info *wac_info)
  472. {
  473. int status;
  474. union SQ_CMD_BITS reg_sq_cmd;
  475. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  476. struct kfd_mem_obj *mem_obj;
  477. uint32_t *packet_buff_uint;
  478. struct pm4__set_config_reg *packets_vec;
  479. size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
  480. reg_sq_cmd.u32All = 0;
  481. status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
  482. &reg_gfx_index);
  483. if (status) {
  484. pr_err("Failed to set wave control registers\n");
  485. return status;
  486. }
  487. /* we do not control the VMID in DIQ, so reset it to a known value */
  488. reg_sq_cmd.bits.vm_id = 0;
  489. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  490. pr_debug("\t\t mode is: %u\n", wac_info->mode);
  491. pr_debug("\t\t operand is: %u\n", wac_info->operand);
  492. pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
  493. pr_debug("\t\t msg value is: %u\n",
  494. wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
  495. pr_debug("\t\t vmid is: N/A\n");
  496. pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
  497. pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
  498. pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
  499. pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
  500. pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
  501. pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
  502. pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
  503. pr_debug("\t\t ibw is : %u\n",
  504. reg_gfx_index.bitfields.instance_broadcast_writes);
  505. pr_debug("\t\t ii is : %u\n",
  506. reg_gfx_index.bitfields.instance_index);
  507. pr_debug("\t\t sebw is : %u\n",
  508. reg_gfx_index.bitfields.se_broadcast_writes);
  509. pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
  510. pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
  511. pr_debug("\t\t sbw is : %u\n",
  512. reg_gfx_index.bitfields.sh_broadcast_writes);
  513. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  514. status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
  515. if (status != 0) {
  516. pr_err("Failed to allocate GART memory\n");
  517. return status;
  518. }
  519. packet_buff_uint = mem_obj->cpu_ptr;
  520. memset(packet_buff_uint, 0, ib_size);
  521. packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
  522. packets_vec[0].header.count = 1;
  523. packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
  524. packets_vec[0].header.type = PM4_TYPE_3;
  525. packets_vec[0].bitfields2.reg_offset =
  526. GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
  527. packets_vec[0].bitfields2.insert_vmid = 0;
  528. packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
  529. packets_vec[1].header.count = 1;
  530. packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
  531. packets_vec[1].header.type = PM4_TYPE_3;
  532. packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
  533. packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
  534. packets_vec[1].bitfields2.insert_vmid = 1;
  535. packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
  536. /* Restore the GRBM_GFX_INDEX register */
  537. reg_gfx_index.u32All = 0;
  538. reg_gfx_index.bits.sh_broadcast_writes = 1;
  539. reg_gfx_index.bits.instance_broadcast_writes = 1;
  540. reg_gfx_index.bits.se_broadcast_writes = 1;
  541. packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
  542. packets_vec[2].bitfields2.reg_offset =
  543. GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
  544. packets_vec[2].bitfields2.insert_vmid = 0;
  545. packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
  546. status = dbgdev_diq_submit_ib(
  547. dbgdev,
  548. wac_info->process->pasid,
  549. mem_obj->gpu_addr,
  550. packet_buff_uint,
  551. ib_size);
  552. if (status)
  553. pr_err("Failed to submit IB to DIQ\n");
  554. kfd_gtt_sa_free(dbgdev->dev, mem_obj);
  555. return status;
  556. }
  557. static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
  558. struct dbg_wave_control_info *wac_info)
  559. {
  560. int status;
  561. union SQ_CMD_BITS reg_sq_cmd;
  562. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  563. struct kfd_process_device *pdd;
  564. reg_sq_cmd.u32All = 0;
  565. /* taking the VMID for that process on the safe way using PDD */
  566. pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
  567. if (!pdd) {
  568. pr_err("Failed to get pdd for wave control no DIQ\n");
  569. return -EFAULT;
  570. }
  571. status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
  572. &reg_gfx_index);
  573. if (status) {
  574. pr_err("Failed to set wave control registers\n");
  575. return status;
  576. }
  577. /* for non DIQ we need to patch the VMID: */
  578. reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
  579. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  580. pr_debug("\t\t mode is: %u\n", wac_info->mode);
  581. pr_debug("\t\t operand is: %u\n", wac_info->operand);
  582. pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
  583. pr_debug("\t\t msg value is: %u\n",
  584. wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
  585. pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
  586. pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
  587. pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
  588. pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
  589. pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
  590. pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
  591. pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
  592. pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
  593. pr_debug("\t\t ibw is : %u\n",
  594. reg_gfx_index.bitfields.instance_broadcast_writes);
  595. pr_debug("\t\t ii is : %u\n",
  596. reg_gfx_index.bitfields.instance_index);
  597. pr_debug("\t\t sebw is : %u\n",
  598. reg_gfx_index.bitfields.se_broadcast_writes);
  599. pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
  600. pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
  601. pr_debug("\t\t sbw is : %u\n",
  602. reg_gfx_index.bitfields.sh_broadcast_writes);
  603. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  604. return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
  605. reg_gfx_index.u32All,
  606. reg_sq_cmd.u32All);
  607. }
  608. int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
  609. {
  610. int status = 0;
  611. unsigned int vmid;
  612. union SQ_CMD_BITS reg_sq_cmd;
  613. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  614. struct kfd_process_device *pdd;
  615. struct dbg_wave_control_info wac_info;
  616. int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
  617. int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
  618. reg_sq_cmd.u32All = 0;
  619. status = 0;
  620. wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
  621. wac_info.operand = HSA_DBG_WAVEOP_KILL;
  622. pr_debug("Killing all process wavefronts\n");
  623. /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
  624. * ATC_VMID15_PASID_MAPPING
  625. * to check which VMID the current process is mapped to.
  626. */
  627. for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
  628. if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
  629. (dev->kgd, vmid)) {
  630. if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
  631. (dev->kgd, vmid) == p->pasid) {
  632. pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
  633. vmid, p->pasid);
  634. break;
  635. }
  636. }
  637. }
  638. if (vmid > last_vmid_to_scan) {
  639. pr_err("Didn't find vmid for pasid %d\n", p->pasid);
  640. return -EFAULT;
  641. }
  642. /* taking the VMID for that process on the safe way using PDD */
  643. pdd = kfd_get_process_device_data(dev, p);
  644. if (!pdd)
  645. return -EFAULT;
  646. status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
  647. &reg_gfx_index);
  648. if (status != 0)
  649. return -EINVAL;
  650. /* for non DIQ we need to patch the VMID: */
  651. reg_sq_cmd.bits.vm_id = vmid;
  652. dev->kfd2kgd->wave_control_execute(dev->kgd,
  653. reg_gfx_index.u32All,
  654. reg_sq_cmd.u32All);
  655. return 0;
  656. }
  657. void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
  658. enum DBGDEV_TYPE type)
  659. {
  660. pdbgdev->dev = pdev;
  661. pdbgdev->kq = NULL;
  662. pdbgdev->type = type;
  663. pdbgdev->pqm = NULL;
  664. switch (type) {
  665. case DBGDEV_TYPE_NODIQ:
  666. pdbgdev->dbgdev_register = dbgdev_register_nodiq;
  667. pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
  668. pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
  669. pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
  670. break;
  671. case DBGDEV_TYPE_DIQ:
  672. default:
  673. pdbgdev->dbgdev_register = dbgdev_register_diq;
  674. pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
  675. pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
  676. pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
  677. break;
  678. }
  679. }