adreno_gpu.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. /*
  2. * Copyright (C) 2013 Red Hat
  3. * Author: Rob Clark <robdclark@gmail.com>
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms of the GNU General Public License version 2 as published by
  7. * the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. * You should have received a copy of the GNU General Public License along with
  15. * this program. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include "adreno_gpu.h"
  18. #include "msm_gem.h"
  19. #include "msm_mmu.h"
  20. #define RB_SIZE SZ_32K
  21. #define RB_BLKSIZE 16
  22. int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
  23. {
  24. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  25. switch (param) {
  26. case MSM_PARAM_GPU_ID:
  27. *value = adreno_gpu->info->revn;
  28. return 0;
  29. case MSM_PARAM_GMEM_SIZE:
  30. *value = adreno_gpu->gmem;
  31. return 0;
  32. case MSM_PARAM_CHIP_ID:
  33. *value = adreno_gpu->rev.patchid |
  34. (adreno_gpu->rev.minor << 8) |
  35. (adreno_gpu->rev.major << 16) |
  36. (adreno_gpu->rev.core << 24);
  37. return 0;
  38. default:
  39. DBG("%s: invalid param: %u", gpu->name, param);
  40. return -EINVAL;
  41. }
  42. }
  43. #define rbmemptr(adreno_gpu, member) \
  44. ((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member))
  45. int adreno_hw_init(struct msm_gpu *gpu)
  46. {
  47. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  48. int ret;
  49. DBG("%s", gpu->name);
  50. ret = msm_gem_get_iova(gpu->rb->bo, gpu->id, &gpu->rb_iova);
  51. if (ret) {
  52. gpu->rb_iova = 0;
  53. dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret);
  54. return ret;
  55. }
  56. /* Setup REG_CP_RB_CNTL: */
  57. gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
  58. /* size is log2(quad-words): */
  59. AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) |
  60. AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)));
  61. /* Setup ringbuffer address: */
  62. gpu_write(gpu, REG_AXXX_CP_RB_BASE, gpu->rb_iova);
  63. gpu_write(gpu, REG_AXXX_CP_RB_RPTR_ADDR, rbmemptr(adreno_gpu, rptr));
  64. /* Setup scratch/timestamp: */
  65. gpu_write(gpu, REG_AXXX_SCRATCH_ADDR, rbmemptr(adreno_gpu, fence));
  66. gpu_write(gpu, REG_AXXX_SCRATCH_UMSK, 0x1);
  67. return 0;
  68. }
  69. static uint32_t get_wptr(struct msm_ringbuffer *ring)
  70. {
  71. return ring->cur - ring->start;
  72. }
  73. uint32_t adreno_last_fence(struct msm_gpu *gpu)
  74. {
  75. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  76. return adreno_gpu->memptrs->fence;
  77. }
  78. void adreno_recover(struct msm_gpu *gpu)
  79. {
  80. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  81. struct drm_device *dev = gpu->dev;
  82. int ret;
  83. gpu->funcs->pm_suspend(gpu);
  84. /* reset ringbuffer: */
  85. gpu->rb->cur = gpu->rb->start;
  86. /* reset completed fence seqno, just discard anything pending: */
  87. adreno_gpu->memptrs->fence = gpu->submitted_fence;
  88. adreno_gpu->memptrs->rptr = 0;
  89. adreno_gpu->memptrs->wptr = 0;
  90. gpu->funcs->pm_resume(gpu);
  91. ret = gpu->funcs->hw_init(gpu);
  92. if (ret) {
  93. dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
  94. /* hmm, oh well? */
  95. }
  96. }
  97. int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
  98. struct msm_file_private *ctx)
  99. {
  100. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  101. struct msm_drm_private *priv = gpu->dev->dev_private;
  102. struct msm_ringbuffer *ring = gpu->rb;
  103. unsigned i, ibs = 0;
  104. for (i = 0; i < submit->nr_cmds; i++) {
  105. switch (submit->cmd[i].type) {
  106. case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  107. /* ignore IB-targets */
  108. break;
  109. case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  110. /* ignore if there has not been a ctx switch: */
  111. if (priv->lastctx == ctx)
  112. break;
  113. case MSM_SUBMIT_CMD_BUF:
  114. OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
  115. OUT_RING(ring, submit->cmd[i].iova);
  116. OUT_RING(ring, submit->cmd[i].size);
  117. ibs++;
  118. break;
  119. }
  120. }
  121. /* on a320, at least, we seem to need to pad things out to an
  122. * even number of qwords to avoid issue w/ CP hanging on wrap-
  123. * around:
  124. */
  125. if (ibs % 2)
  126. OUT_PKT2(ring);
  127. OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
  128. OUT_RING(ring, submit->fence);
  129. if (adreno_is_a3xx(adreno_gpu)) {
  130. /* Flush HLSQ lazy updates to make sure there is nothing
  131. * pending for indirect loads after the timestamp has
  132. * passed:
  133. */
  134. OUT_PKT3(ring, CP_EVENT_WRITE, 1);
  135. OUT_RING(ring, HLSQ_FLUSH);
  136. OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
  137. OUT_RING(ring, 0x00000000);
  138. }
  139. OUT_PKT3(ring, CP_EVENT_WRITE, 3);
  140. OUT_RING(ring, CACHE_FLUSH_TS);
  141. OUT_RING(ring, rbmemptr(adreno_gpu, fence));
  142. OUT_RING(ring, submit->fence);
  143. /* we could maybe be clever and only CP_COND_EXEC the interrupt: */
  144. OUT_PKT3(ring, CP_INTERRUPT, 1);
  145. OUT_RING(ring, 0x80000000);
  146. #if 0
  147. if (adreno_is_a3xx(adreno_gpu)) {
  148. /* Dummy set-constant to trigger context rollover */
  149. OUT_PKT3(ring, CP_SET_CONSTANT, 2);
  150. OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
  151. OUT_RING(ring, 0x00000000);
  152. }
  153. #endif
  154. gpu->funcs->flush(gpu);
  155. return 0;
  156. }
  157. void adreno_flush(struct msm_gpu *gpu)
  158. {
  159. uint32_t wptr = get_wptr(gpu->rb);
  160. /* ensure writes to ringbuffer have hit system memory: */
  161. mb();
  162. gpu_write(gpu, REG_AXXX_CP_RB_WPTR, wptr);
  163. }
  164. void adreno_idle(struct msm_gpu *gpu)
  165. {
  166. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  167. uint32_t wptr = get_wptr(gpu->rb);
  168. /* wait for CP to drain ringbuffer: */
  169. if (spin_until(adreno_gpu->memptrs->rptr == wptr))
  170. DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
  171. /* TODO maybe we need to reset GPU here to recover from hang? */
  172. }
  173. #ifdef CONFIG_DEBUG_FS
  174. void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
  175. {
  176. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  177. int i;
  178. seq_printf(m, "revision: %d (%d.%d.%d.%d)\n",
  179. adreno_gpu->info->revn, adreno_gpu->rev.core,
  180. adreno_gpu->rev.major, adreno_gpu->rev.minor,
  181. adreno_gpu->rev.patchid);
  182. seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence,
  183. gpu->submitted_fence);
  184. seq_printf(m, "rptr: %d\n", adreno_gpu->memptrs->rptr);
  185. seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr);
  186. seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb));
  187. gpu->funcs->pm_resume(gpu);
  188. /* dump these out in a form that can be parsed by demsm: */
  189. seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
  190. for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
  191. uint32_t start = adreno_gpu->registers[i];
  192. uint32_t end = adreno_gpu->registers[i+1];
  193. uint32_t addr;
  194. for (addr = start; addr <= end; addr++) {
  195. uint32_t val = gpu_read(gpu, addr);
  196. seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
  197. }
  198. }
  199. gpu->funcs->pm_suspend(gpu);
  200. }
  201. #endif
  202. /* would be nice to not have to duplicate the _show() stuff with printk(): */
  203. void adreno_dump(struct msm_gpu *gpu)
  204. {
  205. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  206. int i;
  207. printk("revision: %d (%d.%d.%d.%d)\n",
  208. adreno_gpu->info->revn, adreno_gpu->rev.core,
  209. adreno_gpu->rev.major, adreno_gpu->rev.minor,
  210. adreno_gpu->rev.patchid);
  211. printk("fence: %d/%d\n", adreno_gpu->memptrs->fence,
  212. gpu->submitted_fence);
  213. printk("rptr: %d\n", adreno_gpu->memptrs->rptr);
  214. printk("wptr: %d\n", adreno_gpu->memptrs->wptr);
  215. printk("rb wptr: %d\n", get_wptr(gpu->rb));
  216. /* dump these out in a form that can be parsed by demsm: */
  217. printk("IO:region %s 00000000 00020000\n", gpu->name);
  218. for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
  219. uint32_t start = adreno_gpu->registers[i];
  220. uint32_t end = adreno_gpu->registers[i+1];
  221. uint32_t addr;
  222. for (addr = start; addr <= end; addr++) {
  223. uint32_t val = gpu_read(gpu, addr);
  224. printk("IO:R %08x %08x\n", addr<<2, val);
  225. }
  226. }
  227. }
  228. static uint32_t ring_freewords(struct msm_gpu *gpu)
  229. {
  230. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  231. uint32_t size = gpu->rb->size / 4;
  232. uint32_t wptr = get_wptr(gpu->rb);
  233. uint32_t rptr = adreno_gpu->memptrs->rptr;
  234. return (rptr + (size - 1) - wptr) % size;
  235. }
  236. void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords)
  237. {
  238. if (spin_until(ring_freewords(gpu) >= ndwords))
  239. DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name);
  240. }
  241. static const char *iommu_ports[] = {
  242. "gfx3d_user", "gfx3d_priv",
  243. "gfx3d1_user", "gfx3d1_priv",
  244. };
  245. int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
  246. struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs)
  247. {
  248. struct adreno_platform_config *config = pdev->dev.platform_data;
  249. struct msm_gpu *gpu = &adreno_gpu->base;
  250. struct msm_mmu *mmu;
  251. int ret;
  252. adreno_gpu->funcs = funcs;
  253. adreno_gpu->info = adreno_info(config->rev);
  254. adreno_gpu->gmem = adreno_gpu->info->gmem;
  255. adreno_gpu->revn = adreno_gpu->info->revn;
  256. adreno_gpu->rev = config->rev;
  257. gpu->fast_rate = config->fast_rate;
  258. gpu->slow_rate = config->slow_rate;
  259. gpu->bus_freq = config->bus_freq;
  260. #ifdef CONFIG_MSM_BUS_SCALING
  261. gpu->bus_scale_table = config->bus_scale_table;
  262. #endif
  263. DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
  264. gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
  265. ret = request_firmware(&adreno_gpu->pm4, adreno_gpu->info->pm4fw, drm->dev);
  266. if (ret) {
  267. dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n",
  268. adreno_gpu->info->pm4fw, ret);
  269. return ret;
  270. }
  271. ret = request_firmware(&adreno_gpu->pfp, adreno_gpu->info->pfpfw, drm->dev);
  272. if (ret) {
  273. dev_err(drm->dev, "failed to load %s PFP firmware: %d\n",
  274. adreno_gpu->info->pfpfw, ret);
  275. return ret;
  276. }
  277. ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base,
  278. adreno_gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq",
  279. RB_SIZE);
  280. if (ret)
  281. return ret;
  282. mmu = gpu->mmu;
  283. if (mmu) {
  284. ret = mmu->funcs->attach(mmu, iommu_ports,
  285. ARRAY_SIZE(iommu_ports));
  286. if (ret)
  287. return ret;
  288. }
  289. mutex_lock(&drm->struct_mutex);
  290. adreno_gpu->memptrs_bo = msm_gem_new(drm, sizeof(*adreno_gpu->memptrs),
  291. MSM_BO_UNCACHED);
  292. mutex_unlock(&drm->struct_mutex);
  293. if (IS_ERR(adreno_gpu->memptrs_bo)) {
  294. ret = PTR_ERR(adreno_gpu->memptrs_bo);
  295. adreno_gpu->memptrs_bo = NULL;
  296. dev_err(drm->dev, "could not allocate memptrs: %d\n", ret);
  297. return ret;
  298. }
  299. adreno_gpu->memptrs = msm_gem_vaddr(adreno_gpu->memptrs_bo);
  300. if (!adreno_gpu->memptrs) {
  301. dev_err(drm->dev, "could not vmap memptrs\n");
  302. return -ENOMEM;
  303. }
  304. ret = msm_gem_get_iova(adreno_gpu->memptrs_bo, gpu->id,
  305. &adreno_gpu->memptrs_iova);
  306. if (ret) {
  307. dev_err(drm->dev, "could not map memptrs: %d\n", ret);
  308. return ret;
  309. }
  310. return 0;
  311. }
  312. void adreno_gpu_cleanup(struct adreno_gpu *gpu)
  313. {
  314. if (gpu->memptrs_bo) {
  315. if (gpu->memptrs_iova)
  316. msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id);
  317. drm_gem_object_unreference(gpu->memptrs_bo);
  318. }
  319. if (gpu->pm4)
  320. release_firmware(gpu->pm4);
  321. if (gpu->pfp)
  322. release_firmware(gpu->pfp);
  323. msm_gpu_cleanup(&gpu->base);
  324. }