gk20a.c 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. /*
  2. * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. #include "gf100.h"
  23. #include "ctxgf100.h"
  24. #include <subdev/timer.h>
  25. #include <nvif/class.h>
  26. static void
  27. gk20a_gr_init_dtor(struct gf100_gr_pack *pack)
  28. {
  29. vfree(pack);
  30. }
  31. struct gk20a_fw_av
  32. {
  33. u32 addr;
  34. u32 data;
  35. };
  36. static struct gf100_gr_pack *
  37. gk20a_gr_av_to_init(struct gf100_gr_fuc *fuc)
  38. {
  39. struct gf100_gr_init *init;
  40. struct gf100_gr_pack *pack;
  41. const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
  42. int i;
  43. pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
  44. if (!pack)
  45. return ERR_PTR(-ENOMEM);
  46. init = (void *)(pack + 2);
  47. pack[0].init = init;
  48. for (i = 0; i < nent; i++) {
  49. struct gf100_gr_init *ent = &init[i];
  50. struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
  51. ent->addr = av->addr;
  52. ent->data = av->data;
  53. ent->count = 1;
  54. ent->pitch = 1;
  55. }
  56. return pack;
  57. }
  58. struct gk20a_fw_aiv
  59. {
  60. u32 addr;
  61. u32 index;
  62. u32 data;
  63. };
  64. static struct gf100_gr_pack *
  65. gk20a_gr_aiv_to_init(struct gf100_gr_fuc *fuc)
  66. {
  67. struct gf100_gr_init *init;
  68. struct gf100_gr_pack *pack;
  69. const int nent = (fuc->size / sizeof(struct gk20a_fw_aiv));
  70. int i;
  71. pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
  72. if (!pack)
  73. return ERR_PTR(-ENOMEM);
  74. init = (void *)(pack + 2);
  75. pack[0].init = init;
  76. for (i = 0; i < nent; i++) {
  77. struct gf100_gr_init *ent = &init[i];
  78. struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)fuc->data)[i];
  79. ent->addr = av->addr;
  80. ent->data = av->data;
  81. ent->count = 1;
  82. ent->pitch = 1;
  83. }
  84. return pack;
  85. }
  86. static struct gf100_gr_pack *
  87. gk20a_gr_av_to_method(struct gf100_gr_fuc *fuc)
  88. {
  89. struct gf100_gr_init *init;
  90. struct gf100_gr_pack *pack;
  91. /* We don't suppose we will initialize more than 16 classes here... */
  92. static const unsigned int max_classes = 16;
  93. const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
  94. int i, classidx = 0;
  95. u32 prevclass = 0;
  96. pack = vzalloc((sizeof(*pack) * max_classes) +
  97. (sizeof(*init) * (nent + 1)));
  98. if (!pack)
  99. return ERR_PTR(-ENOMEM);
  100. init = (void *)(pack + max_classes);
  101. for (i = 0; i < nent; i++) {
  102. struct gf100_gr_init *ent = &init[i];
  103. struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
  104. u32 class = av->addr & 0xffff;
  105. u32 addr = (av->addr & 0xffff0000) >> 14;
  106. if (prevclass != class) {
  107. pack[classidx].init = ent;
  108. pack[classidx].type = class;
  109. prevclass = class;
  110. if (++classidx >= max_classes) {
  111. vfree(pack);
  112. return ERR_PTR(-ENOSPC);
  113. }
  114. }
  115. ent->addr = addr;
  116. ent->data = av->data;
  117. ent->count = 1;
  118. ent->pitch = 1;
  119. }
  120. return pack;
  121. }
  122. static int
  123. gk20a_gr_wait_mem_scrubbing(struct gf100_gr *gr)
  124. {
  125. struct nvkm_subdev *subdev = &gr->base.engine.subdev;
  126. struct nvkm_device *device = subdev->device;
  127. if (nvkm_msec(device, 2000,
  128. if (!(nvkm_rd32(device, 0x40910c) & 0x00000006))
  129. break;
  130. ) < 0) {
  131. nvkm_error(subdev, "FECS mem scrubbing timeout\n");
  132. return -ETIMEDOUT;
  133. }
  134. if (nvkm_msec(device, 2000,
  135. if (!(nvkm_rd32(device, 0x41a10c) & 0x00000006))
  136. break;
  137. ) < 0) {
  138. nvkm_error(subdev, "GPCCS mem scrubbing timeout\n");
  139. return -ETIMEDOUT;
  140. }
  141. return 0;
  142. }
  143. static void
  144. gk20a_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
  145. {
  146. struct nvkm_device *device = gr->base.engine.subdev.device;
  147. nvkm_wr32(device, 0x419e44, 0x1ffffe);
  148. nvkm_wr32(device, 0x419e4c, 0x7f);
  149. }
  150. int
  151. gk20a_gr_init(struct gf100_gr *gr)
  152. {
  153. struct nvkm_device *device = gr->base.engine.subdev.device;
  154. const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
  155. u32 data[TPC_MAX / 8] = {};
  156. u8 tpcnr[GPC_MAX];
  157. int gpc, tpc;
  158. int ret, i;
  159. /* Clear SCC RAM */
  160. nvkm_wr32(device, 0x40802c, 0x1);
  161. gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
  162. ret = gk20a_gr_wait_mem_scrubbing(gr);
  163. if (ret)
  164. return ret;
  165. ret = gf100_gr_wait_idle(gr);
  166. if (ret)
  167. return ret;
  168. /* MMU debug buffer */
  169. nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(gr->unk4188b4) >> 8);
  170. nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(gr->unk4188b8) >> 8);
  171. if (gr->func->init_gpc_mmu)
  172. gr->func->init_gpc_mmu(gr);
  173. /* Set the PE as stream master */
  174. nvkm_mask(device, 0x503018, 0x1, 0x1);
  175. /* Zcull init */
  176. memset(data, 0x00, sizeof(data));
  177. memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
  178. for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
  179. do {
  180. gpc = (gpc + 1) % gr->gpc_nr;
  181. } while (!tpcnr[gpc]);
  182. tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
  183. data[i / 8] |= tpc << ((i % 8) * 4);
  184. }
  185. nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
  186. nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
  187. nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
  188. nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
  189. for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
  190. nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
  191. gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
  192. nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
  193. gr->tpc_total);
  194. nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
  195. }
  196. nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
  197. /* Enable FIFO access */
  198. nvkm_wr32(device, 0x400500, 0x00010001);
  199. /* Enable interrupts */
  200. nvkm_wr32(device, 0x400100, 0xffffffff);
  201. nvkm_wr32(device, 0x40013c, 0xffffffff);
  202. /* Enable FECS error interrupts */
  203. nvkm_wr32(device, 0x409c24, 0x000f0000);
  204. /* Enable hardware warning exceptions */
  205. nvkm_wr32(device, 0x404000, 0xc0000000);
  206. nvkm_wr32(device, 0x404600, 0xc0000000);
  207. if (gr->func->set_hww_esr_report_mask)
  208. gr->func->set_hww_esr_report_mask(gr);
  209. /* Enable TPC exceptions per GPC */
  210. nvkm_wr32(device, 0x419d0c, 0x2);
  211. nvkm_wr32(device, 0x41ac94, (((1 << gr->tpc_total) - 1) & 0xff) << 16);
  212. /* Reset and enable all exceptions */
  213. nvkm_wr32(device, 0x400108, 0xffffffff);
  214. nvkm_wr32(device, 0x400138, 0xffffffff);
  215. nvkm_wr32(device, 0x400118, 0xffffffff);
  216. nvkm_wr32(device, 0x400130, 0xffffffff);
  217. nvkm_wr32(device, 0x40011c, 0xffffffff);
  218. nvkm_wr32(device, 0x400134, 0xffffffff);
  219. gf100_gr_zbc_init(gr);
  220. return gf100_gr_init_ctxctl(gr);
  221. }
  222. void
  223. gk20a_gr_dtor(struct gf100_gr *gr)
  224. {
  225. gk20a_gr_init_dtor(gr->fuc_method);
  226. gk20a_gr_init_dtor(gr->fuc_bundle);
  227. gk20a_gr_init_dtor(gr->fuc_sw_ctx);
  228. gk20a_gr_init_dtor(gr->fuc_sw_nonctx);
  229. }
  230. int
  231. gk20a_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
  232. int index, struct nvkm_gr **pgr)
  233. {
  234. struct gf100_gr_fuc fuc;
  235. struct gf100_gr *gr;
  236. int ret;
  237. if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
  238. return -ENOMEM;
  239. *pgr = &gr->base;
  240. ret = gf100_gr_ctor(func, device, index, gr);
  241. if (ret)
  242. return ret;
  243. ret = gf100_gr_ctor_fw(gr, "sw_nonctx", &fuc);
  244. if (ret)
  245. return ret;
  246. gr->fuc_sw_nonctx = gk20a_gr_av_to_init(&fuc);
  247. gf100_gr_dtor_fw(&fuc);
  248. if (IS_ERR(gr->fuc_sw_nonctx))
  249. return PTR_ERR(gr->fuc_sw_nonctx);
  250. ret = gf100_gr_ctor_fw(gr, "sw_ctx", &fuc);
  251. if (ret)
  252. return ret;
  253. gr->fuc_sw_ctx = gk20a_gr_aiv_to_init(&fuc);
  254. gf100_gr_dtor_fw(&fuc);
  255. if (IS_ERR(gr->fuc_sw_ctx))
  256. return PTR_ERR(gr->fuc_sw_ctx);
  257. ret = gf100_gr_ctor_fw(gr, "sw_bundle_init", &fuc);
  258. if (ret)
  259. return ret;
  260. gr->fuc_bundle = gk20a_gr_av_to_init(&fuc);
  261. gf100_gr_dtor_fw(&fuc);
  262. if (IS_ERR(gr->fuc_bundle))
  263. return PTR_ERR(gr->fuc_bundle);
  264. ret = gf100_gr_ctor_fw(gr, "sw_method_init", &fuc);
  265. if (ret)
  266. return ret;
  267. gr->fuc_method = gk20a_gr_av_to_method(&fuc);
  268. gf100_gr_dtor_fw(&fuc);
  269. if (IS_ERR(gr->fuc_method))
  270. return PTR_ERR(gr->fuc_method);
  271. return 0;
  272. }
  273. static const struct gf100_gr_func
  274. gk20a_gr = {
  275. .dtor = gk20a_gr_dtor,
  276. .init = gk20a_gr_init,
  277. .set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask,
  278. .ppc_nr = 1,
  279. .grctx = &gk20a_grctx,
  280. .sclass = {
  281. { -1, -1, FERMI_TWOD_A },
  282. { -1, -1, KEPLER_INLINE_TO_MEMORY_A },
  283. { -1, -1, KEPLER_C, &gf100_fermi },
  284. { -1, -1, KEPLER_COMPUTE_A },
  285. {}
  286. }
  287. };
  288. int
  289. gk20a_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
  290. {
  291. return gk20a_gr_new_(&gk20a_gr, device, index, pgr);
  292. }