msm_gpu.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651
  1. /*
  2. * Copyright (C) 2013 Red Hat
  3. * Author: Rob Clark <robdclark@gmail.com>
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms of the GNU General Public License version 2 as published by
  7. * the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. * You should have received a copy of the GNU General Public License along with
  15. * this program. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include "msm_gpu.h"
  18. #include "msm_gem.h"
  19. #include "msm_mmu.h"
  20. /*
  21. * Power Management:
  22. */
  23. #ifdef CONFIG_MSM_BUS_SCALING
  24. #include <mach/board.h>
  25. static void bs_init(struct msm_gpu *gpu)
  26. {
  27. if (gpu->bus_scale_table) {
  28. gpu->bsc = msm_bus_scale_register_client(gpu->bus_scale_table);
  29. DBG("bus scale client: %08x", gpu->bsc);
  30. }
  31. }
  32. static void bs_fini(struct msm_gpu *gpu)
  33. {
  34. if (gpu->bsc) {
  35. msm_bus_scale_unregister_client(gpu->bsc);
  36. gpu->bsc = 0;
  37. }
  38. }
  39. static void bs_set(struct msm_gpu *gpu, int idx)
  40. {
  41. if (gpu->bsc) {
  42. DBG("set bus scaling: %d", idx);
  43. msm_bus_scale_client_update_request(gpu->bsc, idx);
  44. }
  45. }
  46. #else
  47. static void bs_init(struct msm_gpu *gpu) {}
  48. static void bs_fini(struct msm_gpu *gpu) {}
  49. static void bs_set(struct msm_gpu *gpu, int idx) {}
  50. #endif
  51. static int enable_pwrrail(struct msm_gpu *gpu)
  52. {
  53. struct drm_device *dev = gpu->dev;
  54. int ret = 0;
  55. if (gpu->gpu_reg) {
  56. ret = regulator_enable(gpu->gpu_reg);
  57. if (ret) {
  58. dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret);
  59. return ret;
  60. }
  61. }
  62. if (gpu->gpu_cx) {
  63. ret = regulator_enable(gpu->gpu_cx);
  64. if (ret) {
  65. dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret);
  66. return ret;
  67. }
  68. }
  69. return 0;
  70. }
  71. static int disable_pwrrail(struct msm_gpu *gpu)
  72. {
  73. if (gpu->gpu_cx)
  74. regulator_disable(gpu->gpu_cx);
  75. if (gpu->gpu_reg)
  76. regulator_disable(gpu->gpu_reg);
  77. return 0;
  78. }
  79. static int enable_clk(struct msm_gpu *gpu)
  80. {
  81. struct clk *rate_clk = NULL;
  82. int i;
  83. /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */
  84. for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) {
  85. if (gpu->grp_clks[i]) {
  86. clk_prepare(gpu->grp_clks[i]);
  87. rate_clk = gpu->grp_clks[i];
  88. }
  89. }
  90. if (rate_clk && gpu->fast_rate)
  91. clk_set_rate(rate_clk, gpu->fast_rate);
  92. for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--)
  93. if (gpu->grp_clks[i])
  94. clk_enable(gpu->grp_clks[i]);
  95. return 0;
  96. }
  97. static int disable_clk(struct msm_gpu *gpu)
  98. {
  99. struct clk *rate_clk = NULL;
  100. int i;
  101. /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */
  102. for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) {
  103. if (gpu->grp_clks[i]) {
  104. clk_disable(gpu->grp_clks[i]);
  105. rate_clk = gpu->grp_clks[i];
  106. }
  107. }
  108. if (rate_clk && gpu->slow_rate)
  109. clk_set_rate(rate_clk, gpu->slow_rate);
  110. for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--)
  111. if (gpu->grp_clks[i])
  112. clk_unprepare(gpu->grp_clks[i]);
  113. return 0;
  114. }
  115. static int enable_axi(struct msm_gpu *gpu)
  116. {
  117. if (gpu->ebi1_clk)
  118. clk_prepare_enable(gpu->ebi1_clk);
  119. if (gpu->bus_freq)
  120. bs_set(gpu, gpu->bus_freq);
  121. return 0;
  122. }
  123. static int disable_axi(struct msm_gpu *gpu)
  124. {
  125. if (gpu->ebi1_clk)
  126. clk_disable_unprepare(gpu->ebi1_clk);
  127. if (gpu->bus_freq)
  128. bs_set(gpu, 0);
  129. return 0;
  130. }
  131. int msm_gpu_pm_resume(struct msm_gpu *gpu)
  132. {
  133. struct drm_device *dev = gpu->dev;
  134. int ret;
  135. DBG("%s: active_cnt=%d", gpu->name, gpu->active_cnt);
  136. WARN_ON(!mutex_is_locked(&dev->struct_mutex));
  137. if (gpu->active_cnt++ > 0)
  138. return 0;
  139. if (WARN_ON(gpu->active_cnt <= 0))
  140. return -EINVAL;
  141. ret = enable_pwrrail(gpu);
  142. if (ret)
  143. return ret;
  144. ret = enable_clk(gpu);
  145. if (ret)
  146. return ret;
  147. ret = enable_axi(gpu);
  148. if (ret)
  149. return ret;
  150. return 0;
  151. }
  152. int msm_gpu_pm_suspend(struct msm_gpu *gpu)
  153. {
  154. struct drm_device *dev = gpu->dev;
  155. int ret;
  156. DBG("%s: active_cnt=%d", gpu->name, gpu->active_cnt);
  157. WARN_ON(!mutex_is_locked(&dev->struct_mutex));
  158. if (--gpu->active_cnt > 0)
  159. return 0;
  160. if (WARN_ON(gpu->active_cnt < 0))
  161. return -EINVAL;
  162. ret = disable_axi(gpu);
  163. if (ret)
  164. return ret;
  165. ret = disable_clk(gpu);
  166. if (ret)
  167. return ret;
  168. ret = disable_pwrrail(gpu);
  169. if (ret)
  170. return ret;
  171. return 0;
  172. }
  173. /*
  174. * Inactivity detection (for suspend):
  175. */
  176. static void inactive_worker(struct work_struct *work)
  177. {
  178. struct msm_gpu *gpu = container_of(work, struct msm_gpu, inactive_work);
  179. struct drm_device *dev = gpu->dev;
  180. if (gpu->inactive)
  181. return;
  182. DBG("%s: inactive!\n", gpu->name);
  183. mutex_lock(&dev->struct_mutex);
  184. if (!(msm_gpu_active(gpu) || gpu->inactive)) {
  185. disable_axi(gpu);
  186. disable_clk(gpu);
  187. gpu->inactive = true;
  188. }
  189. mutex_unlock(&dev->struct_mutex);
  190. }
  191. static void inactive_handler(unsigned long data)
  192. {
  193. struct msm_gpu *gpu = (struct msm_gpu *)data;
  194. struct msm_drm_private *priv = gpu->dev->dev_private;
  195. queue_work(priv->wq, &gpu->inactive_work);
  196. }
  197. /* cancel inactive timer and make sure we are awake: */
  198. static void inactive_cancel(struct msm_gpu *gpu)
  199. {
  200. DBG("%s", gpu->name);
  201. del_timer(&gpu->inactive_timer);
  202. if (gpu->inactive) {
  203. enable_clk(gpu);
  204. enable_axi(gpu);
  205. gpu->inactive = false;
  206. }
  207. }
  208. static void inactive_start(struct msm_gpu *gpu)
  209. {
  210. DBG("%s", gpu->name);
  211. mod_timer(&gpu->inactive_timer,
  212. round_jiffies_up(jiffies + DRM_MSM_INACTIVE_JIFFIES));
  213. }
  214. /*
  215. * Hangcheck detection for locked gpu:
  216. */
  217. static void recover_worker(struct work_struct *work)
  218. {
  219. struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
  220. struct drm_device *dev = gpu->dev;
  221. dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
  222. mutex_lock(&dev->struct_mutex);
  223. if (msm_gpu_active(gpu)) {
  224. inactive_cancel(gpu);
  225. gpu->funcs->recover(gpu);
  226. }
  227. mutex_unlock(&dev->struct_mutex);
  228. msm_gpu_retire(gpu);
  229. }
  230. static void hangcheck_timer_reset(struct msm_gpu *gpu)
  231. {
  232. DBG("%s", gpu->name);
  233. mod_timer(&gpu->hangcheck_timer,
  234. round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES));
  235. }
  236. static void hangcheck_handler(unsigned long data)
  237. {
  238. struct msm_gpu *gpu = (struct msm_gpu *)data;
  239. struct drm_device *dev = gpu->dev;
  240. struct msm_drm_private *priv = dev->dev_private;
  241. uint32_t fence = gpu->funcs->last_fence(gpu);
  242. if (fence != gpu->hangcheck_fence) {
  243. /* some progress has been made.. ya! */
  244. gpu->hangcheck_fence = fence;
  245. } else if (fence < gpu->submitted_fence) {
  246. /* no progress and not done.. hung! */
  247. gpu->hangcheck_fence = fence;
  248. dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n",
  249. gpu->name);
  250. dev_err(dev->dev, "%s: completed fence: %u\n",
  251. gpu->name, fence);
  252. dev_err(dev->dev, "%s: submitted fence: %u\n",
  253. gpu->name, gpu->submitted_fence);
  254. queue_work(priv->wq, &gpu->recover_work);
  255. }
  256. /* if still more pending work, reset the hangcheck timer: */
  257. if (gpu->submitted_fence > gpu->hangcheck_fence)
  258. hangcheck_timer_reset(gpu);
  259. /* workaround for missing irq: */
  260. queue_work(priv->wq, &gpu->retire_work);
  261. }
  262. /*
  263. * Performance Counters:
  264. */
  265. /* called under perf_lock */
  266. static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs)
  267. {
  268. uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)];
  269. int i, n = min(ncntrs, gpu->num_perfcntrs);
  270. /* read current values: */
  271. for (i = 0; i < gpu->num_perfcntrs; i++)
  272. current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg);
  273. /* update cntrs: */
  274. for (i = 0; i < n; i++)
  275. cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i];
  276. /* save current values: */
  277. for (i = 0; i < gpu->num_perfcntrs; i++)
  278. gpu->last_cntrs[i] = current_cntrs[i];
  279. return n;
  280. }
  281. static void update_sw_cntrs(struct msm_gpu *gpu)
  282. {
  283. ktime_t time;
  284. uint32_t elapsed;
  285. unsigned long flags;
  286. spin_lock_irqsave(&gpu->perf_lock, flags);
  287. if (!gpu->perfcntr_active)
  288. goto out;
  289. time = ktime_get();
  290. elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time));
  291. gpu->totaltime += elapsed;
  292. if (gpu->last_sample.active)
  293. gpu->activetime += elapsed;
  294. gpu->last_sample.active = msm_gpu_active(gpu);
  295. gpu->last_sample.time = time;
  296. out:
  297. spin_unlock_irqrestore(&gpu->perf_lock, flags);
  298. }
  299. void msm_gpu_perfcntr_start(struct msm_gpu *gpu)
  300. {
  301. unsigned long flags;
  302. spin_lock_irqsave(&gpu->perf_lock, flags);
  303. /* we could dynamically enable/disable perfcntr registers too.. */
  304. gpu->last_sample.active = msm_gpu_active(gpu);
  305. gpu->last_sample.time = ktime_get();
  306. gpu->activetime = gpu->totaltime = 0;
  307. gpu->perfcntr_active = true;
  308. update_hw_cntrs(gpu, 0, NULL);
  309. spin_unlock_irqrestore(&gpu->perf_lock, flags);
  310. }
  311. void msm_gpu_perfcntr_stop(struct msm_gpu *gpu)
  312. {
  313. gpu->perfcntr_active = false;
  314. }
  315. /* returns -errno or # of cntrs sampled */
  316. int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
  317. uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs)
  318. {
  319. unsigned long flags;
  320. int ret;
  321. spin_lock_irqsave(&gpu->perf_lock, flags);
  322. if (!gpu->perfcntr_active) {
  323. ret = -EINVAL;
  324. goto out;
  325. }
  326. *activetime = gpu->activetime;
  327. *totaltime = gpu->totaltime;
  328. gpu->activetime = gpu->totaltime = 0;
  329. ret = update_hw_cntrs(gpu, ncntrs, cntrs);
  330. out:
  331. spin_unlock_irqrestore(&gpu->perf_lock, flags);
  332. return ret;
  333. }
  334. /*
  335. * Cmdstream submission/retirement:
  336. */
  337. static void retire_worker(struct work_struct *work)
  338. {
  339. struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
  340. struct drm_device *dev = gpu->dev;
  341. uint32_t fence = gpu->funcs->last_fence(gpu);
  342. msm_update_fence(gpu->dev, fence);
  343. mutex_lock(&dev->struct_mutex);
  344. while (!list_empty(&gpu->active_list)) {
  345. struct msm_gem_object *obj;
  346. obj = list_first_entry(&gpu->active_list,
  347. struct msm_gem_object, mm_list);
  348. if ((obj->read_fence <= fence) &&
  349. (obj->write_fence <= fence)) {
  350. /* move to inactive: */
  351. msm_gem_move_to_inactive(&obj->base);
  352. msm_gem_put_iova(&obj->base, gpu->id);
  353. drm_gem_object_unreference(&obj->base);
  354. } else {
  355. break;
  356. }
  357. }
  358. mutex_unlock(&dev->struct_mutex);
  359. if (!msm_gpu_active(gpu))
  360. inactive_start(gpu);
  361. }
  362. /* call from irq handler to schedule work to retire bo's */
  363. void msm_gpu_retire(struct msm_gpu *gpu)
  364. {
  365. struct msm_drm_private *priv = gpu->dev->dev_private;
  366. queue_work(priv->wq, &gpu->retire_work);
  367. update_sw_cntrs(gpu);
  368. }
  369. /* add bo's to gpu's ring, and kick gpu: */
  370. int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
  371. struct msm_file_private *ctx)
  372. {
  373. struct drm_device *dev = gpu->dev;
  374. struct msm_drm_private *priv = dev->dev_private;
  375. int i, ret;
  376. submit->fence = ++priv->next_fence;
  377. gpu->submitted_fence = submit->fence;
  378. inactive_cancel(gpu);
  379. msm_rd_dump_submit(submit);
  380. gpu->submitted_fence = submit->fence;
  381. update_sw_cntrs(gpu);
  382. ret = gpu->funcs->submit(gpu, submit, ctx);
  383. priv->lastctx = ctx;
  384. for (i = 0; i < submit->nr_bos; i++) {
  385. struct msm_gem_object *msm_obj = submit->bos[i].obj;
  386. /* can't happen yet.. but when we add 2d support we'll have
  387. * to deal w/ cross-ring synchronization:
  388. */
  389. WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu));
  390. if (!is_active(msm_obj)) {
  391. uint32_t iova;
  392. /* ring takes a reference to the bo and iova: */
  393. drm_gem_object_reference(&msm_obj->base);
  394. msm_gem_get_iova_locked(&msm_obj->base,
  395. submit->gpu->id, &iova);
  396. }
  397. if (submit->bos[i].flags & MSM_SUBMIT_BO_READ)
  398. msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence);
  399. if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
  400. msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
  401. }
  402. hangcheck_timer_reset(gpu);
  403. return ret;
  404. }
  405. /*
  406. * Init/Cleanup:
  407. */
  408. static irqreturn_t irq_handler(int irq, void *data)
  409. {
  410. struct msm_gpu *gpu = data;
  411. return gpu->funcs->irq(gpu);
  412. }
  413. static const char *clk_names[] = {
  414. "src_clk", "core_clk", "iface_clk", "mem_clk", "mem_iface_clk",
  415. };
  416. int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
  417. struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
  418. const char *name, const char *ioname, const char *irqname, int ringsz)
  419. {
  420. struct iommu_domain *iommu;
  421. int i, ret;
  422. if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs)))
  423. gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs);
  424. gpu->dev = drm;
  425. gpu->funcs = funcs;
  426. gpu->name = name;
  427. gpu->inactive = true;
  428. INIT_LIST_HEAD(&gpu->active_list);
  429. INIT_WORK(&gpu->retire_work, retire_worker);
  430. INIT_WORK(&gpu->inactive_work, inactive_worker);
  431. INIT_WORK(&gpu->recover_work, recover_worker);
  432. setup_timer(&gpu->inactive_timer, inactive_handler,
  433. (unsigned long)gpu);
  434. setup_timer(&gpu->hangcheck_timer, hangcheck_handler,
  435. (unsigned long)gpu);
  436. spin_lock_init(&gpu->perf_lock);
  437. BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks));
  438. /* Map registers: */
  439. gpu->mmio = msm_ioremap(pdev, ioname, name);
  440. if (IS_ERR(gpu->mmio)) {
  441. ret = PTR_ERR(gpu->mmio);
  442. goto fail;
  443. }
  444. /* Get Interrupt: */
  445. gpu->irq = platform_get_irq_byname(pdev, irqname);
  446. if (gpu->irq < 0) {
  447. ret = gpu->irq;
  448. dev_err(drm->dev, "failed to get irq: %d\n", ret);
  449. goto fail;
  450. }
  451. ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler,
  452. IRQF_TRIGGER_HIGH, gpu->name, gpu);
  453. if (ret) {
  454. dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret);
  455. goto fail;
  456. }
  457. /* Acquire clocks: */
  458. for (i = 0; i < ARRAY_SIZE(clk_names); i++) {
  459. gpu->grp_clks[i] = devm_clk_get(&pdev->dev, clk_names[i]);
  460. DBG("grp_clks[%s]: %p", clk_names[i], gpu->grp_clks[i]);
  461. if (IS_ERR(gpu->grp_clks[i]))
  462. gpu->grp_clks[i] = NULL;
  463. }
  464. gpu->ebi1_clk = devm_clk_get(&pdev->dev, "bus_clk");
  465. DBG("ebi1_clk: %p", gpu->ebi1_clk);
  466. if (IS_ERR(gpu->ebi1_clk))
  467. gpu->ebi1_clk = NULL;
  468. /* Acquire regulators: */
  469. gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd");
  470. DBG("gpu_reg: %p", gpu->gpu_reg);
  471. if (IS_ERR(gpu->gpu_reg))
  472. gpu->gpu_reg = NULL;
  473. gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx");
  474. DBG("gpu_cx: %p", gpu->gpu_cx);
  475. if (IS_ERR(gpu->gpu_cx))
  476. gpu->gpu_cx = NULL;
  477. /* Setup IOMMU.. eventually we will (I think) do this once per context
  478. * and have separate page tables per context. For now, to keep things
  479. * simple and to get something working, just use a single address space:
  480. */
  481. iommu = iommu_domain_alloc(&platform_bus_type);
  482. if (iommu) {
  483. dev_info(drm->dev, "%s: using IOMMU\n", name);
  484. gpu->mmu = msm_iommu_new(&pdev->dev, iommu);
  485. } else {
  486. dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
  487. }
  488. gpu->id = msm_register_mmu(drm, gpu->mmu);
  489. /* Create ringbuffer: */
  490. mutex_lock(&drm->struct_mutex);
  491. gpu->rb = msm_ringbuffer_new(gpu, ringsz);
  492. mutex_unlock(&drm->struct_mutex);
  493. if (IS_ERR(gpu->rb)) {
  494. ret = PTR_ERR(gpu->rb);
  495. gpu->rb = NULL;
  496. dev_err(drm->dev, "could not create ringbuffer: %d\n", ret);
  497. goto fail;
  498. }
  499. bs_init(gpu);
  500. return 0;
  501. fail:
  502. return ret;
  503. }
  504. void msm_gpu_cleanup(struct msm_gpu *gpu)
  505. {
  506. DBG("%s", gpu->name);
  507. WARN_ON(!list_empty(&gpu->active_list));
  508. bs_fini(gpu);
  509. if (gpu->rb) {
  510. if (gpu->rb_iova)
  511. msm_gem_put_iova(gpu->rb->bo, gpu->id);
  512. msm_ringbuffer_destroy(gpu->rb);
  513. }
  514. if (gpu->mmu)
  515. gpu->mmu->funcs->destroy(gpu->mmu);
  516. }