gf100.c 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843
  1. /*
  2. * Copyright 2012 Red Hat Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. * Authors: Ben Skeggs
  23. */
  24. #include "gf100.h"
  25. #include "ctxgf100.h"
  26. #include "fuc/os.h"
  27. #include <core/client.h>
  28. #include <core/option.h>
  29. #include <subdev/fb.h>
  30. #include <subdev/mc.h>
  31. #include <subdev/pmu.h>
  32. #include <subdev/timer.h>
  33. #include <engine/fifo.h>
  34. #include <nvif/class.h>
  35. #include <nvif/unpack.h>
  36. /*******************************************************************************
  37. * Zero Bandwidth Clear
  38. ******************************************************************************/
  39. static void
  40. gf100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
  41. {
  42. struct nvkm_device *device = gr->base.engine.subdev.device;
  43. if (gr->zbc_color[zbc].format) {
  44. nvkm_wr32(device, 0x405804, gr->zbc_color[zbc].ds[0]);
  45. nvkm_wr32(device, 0x405808, gr->zbc_color[zbc].ds[1]);
  46. nvkm_wr32(device, 0x40580c, gr->zbc_color[zbc].ds[2]);
  47. nvkm_wr32(device, 0x405810, gr->zbc_color[zbc].ds[3]);
  48. }
  49. nvkm_wr32(device, 0x405814, gr->zbc_color[zbc].format);
  50. nvkm_wr32(device, 0x405820, zbc);
  51. nvkm_wr32(device, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
  52. }
  53. static int
  54. gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
  55. const u32 ds[4], const u32 l2[4])
  56. {
  57. struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
  58. int zbc = -ENOSPC, i;
  59. for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
  60. if (gr->zbc_color[i].format) {
  61. if (gr->zbc_color[i].format != format)
  62. continue;
  63. if (memcmp(gr->zbc_color[i].ds, ds, sizeof(
  64. gr->zbc_color[i].ds)))
  65. continue;
  66. if (memcmp(gr->zbc_color[i].l2, l2, sizeof(
  67. gr->zbc_color[i].l2))) {
  68. WARN_ON(1);
  69. return -EINVAL;
  70. }
  71. return i;
  72. } else {
  73. zbc = (zbc < 0) ? i : zbc;
  74. }
  75. }
  76. if (zbc < 0)
  77. return zbc;
  78. memcpy(gr->zbc_color[zbc].ds, ds, sizeof(gr->zbc_color[zbc].ds));
  79. memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
  80. gr->zbc_color[zbc].format = format;
  81. nvkm_ltc_zbc_color_get(ltc, zbc, l2);
  82. gf100_gr_zbc_clear_color(gr, zbc);
  83. return zbc;
  84. }
  85. static void
  86. gf100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
  87. {
  88. struct nvkm_device *device = gr->base.engine.subdev.device;
  89. if (gr->zbc_depth[zbc].format)
  90. nvkm_wr32(device, 0x405818, gr->zbc_depth[zbc].ds);
  91. nvkm_wr32(device, 0x40581c, gr->zbc_depth[zbc].format);
  92. nvkm_wr32(device, 0x405820, zbc);
  93. nvkm_wr32(device, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
  94. }
  95. static int
  96. gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
  97. const u32 ds, const u32 l2)
  98. {
  99. struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
  100. int zbc = -ENOSPC, i;
  101. for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
  102. if (gr->zbc_depth[i].format) {
  103. if (gr->zbc_depth[i].format != format)
  104. continue;
  105. if (gr->zbc_depth[i].ds != ds)
  106. continue;
  107. if (gr->zbc_depth[i].l2 != l2) {
  108. WARN_ON(1);
  109. return -EINVAL;
  110. }
  111. return i;
  112. } else {
  113. zbc = (zbc < 0) ? i : zbc;
  114. }
  115. }
  116. if (zbc < 0)
  117. return zbc;
  118. gr->zbc_depth[zbc].format = format;
  119. gr->zbc_depth[zbc].ds = ds;
  120. gr->zbc_depth[zbc].l2 = l2;
  121. nvkm_ltc_zbc_depth_get(ltc, zbc, l2);
  122. gf100_gr_zbc_clear_depth(gr, zbc);
  123. return zbc;
  124. }
  125. /*******************************************************************************
  126. * Graphics object classes
  127. ******************************************************************************/
  128. static int
  129. gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
  130. {
  131. struct gf100_gr *gr = (void *)object->engine;
  132. union {
  133. struct fermi_a_zbc_color_v0 v0;
  134. } *args = data;
  135. int ret;
  136. if (nvif_unpack(args->v0, 0, 0, false)) {
  137. switch (args->v0.format) {
  138. case FERMI_A_ZBC_COLOR_V0_FMT_ZERO:
  139. case FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE:
  140. case FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32:
  141. case FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16:
  142. case FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16:
  143. case FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16:
  144. case FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16:
  145. case FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16:
  146. case FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8:
  147. case FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8:
  148. case FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10:
  149. case FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10:
  150. case FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8:
  151. case FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8:
  152. case FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8:
  153. case FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8:
  154. case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8:
  155. case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10:
  156. case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11:
  157. ret = gf100_gr_zbc_color_get(gr, args->v0.format,
  158. args->v0.ds,
  159. args->v0.l2);
  160. if (ret >= 0) {
  161. args->v0.index = ret;
  162. return 0;
  163. }
  164. break;
  165. default:
  166. return -EINVAL;
  167. }
  168. }
  169. return ret;
  170. }
  171. static int
  172. gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
  173. {
  174. struct gf100_gr *gr = (void *)object->engine;
  175. union {
  176. struct fermi_a_zbc_depth_v0 v0;
  177. } *args = data;
  178. int ret;
  179. if (nvif_unpack(args->v0, 0, 0, false)) {
  180. switch (args->v0.format) {
  181. case FERMI_A_ZBC_DEPTH_V0_FMT_FP32:
  182. ret = gf100_gr_zbc_depth_get(gr, args->v0.format,
  183. args->v0.ds,
  184. args->v0.l2);
  185. return (ret >= 0) ? 0 : -ENOSPC;
  186. default:
  187. return -EINVAL;
  188. }
  189. }
  190. return ret;
  191. }
  192. static int
  193. gf100_fermi_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
  194. {
  195. switch (mthd) {
  196. case FERMI_A_ZBC_COLOR:
  197. return gf100_fermi_mthd_zbc_color(object, data, size);
  198. case FERMI_A_ZBC_DEPTH:
  199. return gf100_fermi_mthd_zbc_depth(object, data, size);
  200. default:
  201. break;
  202. }
  203. return -EINVAL;
  204. }
  205. const struct nvkm_object_func
  206. gf100_fermi = {
  207. .mthd = gf100_fermi_mthd,
  208. };
  209. static void
  210. gf100_gr_mthd_set_shader_exceptions(struct nvkm_device *device, u32 data)
  211. {
  212. nvkm_wr32(device, 0x419e44, data ? 0xffffffff : 0x00000000);
  213. nvkm_wr32(device, 0x419e4c, data ? 0xffffffff : 0x00000000);
  214. }
  215. static bool
  216. gf100_gr_mthd_sw(struct nvkm_device *device, u16 class, u32 mthd, u32 data)
  217. {
  218. switch (class & 0x00ff) {
  219. case 0x97:
  220. case 0xc0:
  221. switch (mthd) {
  222. case 0x1528:
  223. gf100_gr_mthd_set_shader_exceptions(device, data);
  224. return true;
  225. default:
  226. break;
  227. }
  228. break;
  229. default:
  230. break;
  231. }
  232. return false;
  233. }
  234. static int
  235. gf100_gr_object_get(struct nvkm_gr *base, int index, struct nvkm_sclass *sclass)
  236. {
  237. struct gf100_gr *gr = gf100_gr(base);
  238. int c = 0;
  239. while (gr->func->sclass[c].oclass) {
  240. if (c++ == index) {
  241. *sclass = gr->func->sclass[index];
  242. return index;
  243. }
  244. }
  245. return c;
  246. }
  247. /*******************************************************************************
  248. * PGRAPH context
  249. ******************************************************************************/
  250. static int
  251. gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
  252. int align, struct nvkm_gpuobj **pgpuobj)
  253. {
  254. struct gf100_gr_chan *chan = gf100_gr_chan(object);
  255. struct gf100_gr *gr = chan->gr;
  256. int ret, i;
  257. ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
  258. align, false, parent, pgpuobj);
  259. if (ret)
  260. return ret;
  261. nvkm_kmap(*pgpuobj);
  262. for (i = 0; i < gr->size; i += 4)
  263. nvkm_wo32(*pgpuobj, i, gr->data[i / 4]);
  264. if (!gr->firmware) {
  265. nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2);
  266. nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma.offset >> 8);
  267. } else {
  268. nvkm_wo32(*pgpuobj, 0xf4, 0);
  269. nvkm_wo32(*pgpuobj, 0xf8, 0);
  270. nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2);
  271. nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma.offset));
  272. nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma.offset));
  273. nvkm_wo32(*pgpuobj, 0x1c, 1);
  274. nvkm_wo32(*pgpuobj, 0x20, 0);
  275. nvkm_wo32(*pgpuobj, 0x28, 0);
  276. nvkm_wo32(*pgpuobj, 0x2c, 0);
  277. }
  278. nvkm_done(*pgpuobj);
  279. return 0;
  280. }
  281. static void *
  282. gf100_gr_chan_dtor(struct nvkm_object *object)
  283. {
  284. struct gf100_gr_chan *chan = gf100_gr_chan(object);
  285. int i;
  286. for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
  287. if (chan->data[i].vma.node) {
  288. nvkm_vm_unmap(&chan->data[i].vma);
  289. nvkm_vm_put(&chan->data[i].vma);
  290. }
  291. nvkm_memory_del(&chan->data[i].mem);
  292. }
  293. if (chan->mmio_vma.node) {
  294. nvkm_vm_unmap(&chan->mmio_vma);
  295. nvkm_vm_put(&chan->mmio_vma);
  296. }
  297. nvkm_memory_del(&chan->mmio);
  298. return chan;
  299. }
  300. static const struct nvkm_object_func
  301. gf100_gr_chan = {
  302. .dtor = gf100_gr_chan_dtor,
  303. .bind = gf100_gr_chan_bind,
  304. };
  305. static int
  306. gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
  307. const struct nvkm_oclass *oclass,
  308. struct nvkm_object **pobject)
  309. {
  310. struct gf100_gr *gr = gf100_gr(base);
  311. struct gf100_gr_data *data = gr->mmio_data;
  312. struct gf100_gr_mmio *mmio = gr->mmio_list;
  313. struct gf100_gr_chan *chan;
  314. struct nvkm_device *device = gr->base.engine.subdev.device;
  315. int ret, i;
  316. if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
  317. return -ENOMEM;
  318. nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object);
  319. chan->gr = gr;
  320. *pobject = &chan->object;
  321. /* allocate memory for a "mmio list" buffer that's used by the HUB
  322. * fuc to modify some per-context register settings on first load
  323. * of the context.
  324. */
  325. ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
  326. false, &chan->mmio);
  327. if (ret)
  328. return ret;
  329. ret = nvkm_vm_get(fifoch->vm, 0x1000, 12, NV_MEM_ACCESS_RW |
  330. NV_MEM_ACCESS_SYS, &chan->mmio_vma);
  331. if (ret)
  332. return ret;
  333. nvkm_memory_map(chan->mmio, &chan->mmio_vma, 0);
  334. /* allocate buffers referenced by mmio list */
  335. for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
  336. ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
  337. data->size, data->align, false,
  338. &chan->data[i].mem);
  339. if (ret)
  340. return ret;
  341. ret = nvkm_vm_get(fifoch->vm,
  342. nvkm_memory_size(chan->data[i].mem), 12,
  343. data->access, &chan->data[i].vma);
  344. if (ret)
  345. return ret;
  346. nvkm_memory_map(chan->data[i].mem, &chan->data[i].vma, 0);
  347. data++;
  348. }
  349. /* finally, fill in the mmio list and point the context at it */
  350. nvkm_kmap(chan->mmio);
  351. for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
  352. u32 addr = mmio->addr;
  353. u32 data = mmio->data;
  354. if (mmio->buffer >= 0) {
  355. u64 info = chan->data[mmio->buffer].vma.offset;
  356. data |= info >> mmio->shift;
  357. }
  358. nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
  359. nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
  360. mmio++;
  361. }
  362. nvkm_done(chan->mmio);
  363. return 0;
  364. }
  365. /*******************************************************************************
  366. * PGRAPH register lists
  367. ******************************************************************************/
  368. const struct gf100_gr_init
  369. gf100_gr_init_main_0[] = {
  370. { 0x400080, 1, 0x04, 0x003083c2 },
  371. { 0x400088, 1, 0x04, 0x00006fe7 },
  372. { 0x40008c, 1, 0x04, 0x00000000 },
  373. { 0x400090, 1, 0x04, 0x00000030 },
  374. { 0x40013c, 1, 0x04, 0x013901f7 },
  375. { 0x400140, 1, 0x04, 0x00000100 },
  376. { 0x400144, 1, 0x04, 0x00000000 },
  377. { 0x400148, 1, 0x04, 0x00000110 },
  378. { 0x400138, 1, 0x04, 0x00000000 },
  379. { 0x400130, 2, 0x04, 0x00000000 },
  380. { 0x400124, 1, 0x04, 0x00000002 },
  381. {}
  382. };
  383. const struct gf100_gr_init
  384. gf100_gr_init_fe_0[] = {
  385. { 0x40415c, 1, 0x04, 0x00000000 },
  386. { 0x404170, 1, 0x04, 0x00000000 },
  387. {}
  388. };
  389. const struct gf100_gr_init
  390. gf100_gr_init_pri_0[] = {
  391. { 0x404488, 2, 0x04, 0x00000000 },
  392. {}
  393. };
  394. const struct gf100_gr_init
  395. gf100_gr_init_rstr2d_0[] = {
  396. { 0x407808, 1, 0x04, 0x00000000 },
  397. {}
  398. };
  399. const struct gf100_gr_init
  400. gf100_gr_init_pd_0[] = {
  401. { 0x406024, 1, 0x04, 0x00000000 },
  402. {}
  403. };
  404. const struct gf100_gr_init
  405. gf100_gr_init_ds_0[] = {
  406. { 0x405844, 1, 0x04, 0x00ffffff },
  407. { 0x405850, 1, 0x04, 0x00000000 },
  408. { 0x405908, 1, 0x04, 0x00000000 },
  409. {}
  410. };
  411. const struct gf100_gr_init
  412. gf100_gr_init_scc_0[] = {
  413. { 0x40803c, 1, 0x04, 0x00000000 },
  414. {}
  415. };
  416. const struct gf100_gr_init
  417. gf100_gr_init_prop_0[] = {
  418. { 0x4184a0, 1, 0x04, 0x00000000 },
  419. {}
  420. };
  421. const struct gf100_gr_init
  422. gf100_gr_init_gpc_unk_0[] = {
  423. { 0x418604, 1, 0x04, 0x00000000 },
  424. { 0x418680, 1, 0x04, 0x00000000 },
  425. { 0x418714, 1, 0x04, 0x80000000 },
  426. { 0x418384, 1, 0x04, 0x00000000 },
  427. {}
  428. };
  429. const struct gf100_gr_init
  430. gf100_gr_init_setup_0[] = {
  431. { 0x418814, 3, 0x04, 0x00000000 },
  432. {}
  433. };
  434. const struct gf100_gr_init
  435. gf100_gr_init_crstr_0[] = {
  436. { 0x418b04, 1, 0x04, 0x00000000 },
  437. {}
  438. };
  439. const struct gf100_gr_init
  440. gf100_gr_init_setup_1[] = {
  441. { 0x4188c8, 1, 0x04, 0x80000000 },
  442. { 0x4188cc, 1, 0x04, 0x00000000 },
  443. { 0x4188d0, 1, 0x04, 0x00010000 },
  444. { 0x4188d4, 1, 0x04, 0x00000001 },
  445. {}
  446. };
  447. const struct gf100_gr_init
  448. gf100_gr_init_zcull_0[] = {
  449. { 0x418910, 1, 0x04, 0x00010001 },
  450. { 0x418914, 1, 0x04, 0x00000301 },
  451. { 0x418918, 1, 0x04, 0x00800000 },
  452. { 0x418980, 1, 0x04, 0x77777770 },
  453. { 0x418984, 3, 0x04, 0x77777777 },
  454. {}
  455. };
  456. const struct gf100_gr_init
  457. gf100_gr_init_gpm_0[] = {
  458. { 0x418c04, 1, 0x04, 0x00000000 },
  459. { 0x418c88, 1, 0x04, 0x00000000 },
  460. {}
  461. };
  462. const struct gf100_gr_init
  463. gf100_gr_init_gpc_unk_1[] = {
  464. { 0x418d00, 1, 0x04, 0x00000000 },
  465. { 0x418f08, 1, 0x04, 0x00000000 },
  466. { 0x418e00, 1, 0x04, 0x00000050 },
  467. { 0x418e08, 1, 0x04, 0x00000000 },
  468. {}
  469. };
  470. const struct gf100_gr_init
  471. gf100_gr_init_gcc_0[] = {
  472. { 0x41900c, 1, 0x04, 0x00000000 },
  473. { 0x419018, 1, 0x04, 0x00000000 },
  474. {}
  475. };
  476. const struct gf100_gr_init
  477. gf100_gr_init_tpccs_0[] = {
  478. { 0x419d08, 2, 0x04, 0x00000000 },
  479. { 0x419d10, 1, 0x04, 0x00000014 },
  480. {}
  481. };
  482. const struct gf100_gr_init
  483. gf100_gr_init_tex_0[] = {
  484. { 0x419ab0, 1, 0x04, 0x00000000 },
  485. { 0x419ab8, 1, 0x04, 0x000000e7 },
  486. { 0x419abc, 2, 0x04, 0x00000000 },
  487. {}
  488. };
  489. const struct gf100_gr_init
  490. gf100_gr_init_pe_0[] = {
  491. { 0x41980c, 3, 0x04, 0x00000000 },
  492. { 0x419844, 1, 0x04, 0x00000000 },
  493. { 0x41984c, 1, 0x04, 0x00005bc5 },
  494. { 0x419850, 4, 0x04, 0x00000000 },
  495. {}
  496. };
  497. const struct gf100_gr_init
  498. gf100_gr_init_l1c_0[] = {
  499. { 0x419c98, 1, 0x04, 0x00000000 },
  500. { 0x419ca8, 1, 0x04, 0x80000000 },
  501. { 0x419cb4, 1, 0x04, 0x00000000 },
  502. { 0x419cb8, 1, 0x04, 0x00008bf4 },
  503. { 0x419cbc, 1, 0x04, 0x28137606 },
  504. { 0x419cc0, 2, 0x04, 0x00000000 },
  505. {}
  506. };
  507. const struct gf100_gr_init
  508. gf100_gr_init_wwdx_0[] = {
  509. { 0x419bd4, 1, 0x04, 0x00800000 },
  510. { 0x419bdc, 1, 0x04, 0x00000000 },
  511. {}
  512. };
  513. const struct gf100_gr_init
  514. gf100_gr_init_tpccs_1[] = {
  515. { 0x419d2c, 1, 0x04, 0x00000000 },
  516. {}
  517. };
  518. const struct gf100_gr_init
  519. gf100_gr_init_mpc_0[] = {
  520. { 0x419c0c, 1, 0x04, 0x00000000 },
  521. {}
  522. };
  523. static const struct gf100_gr_init
  524. gf100_gr_init_sm_0[] = {
  525. { 0x419e00, 1, 0x04, 0x00000000 },
  526. { 0x419ea0, 1, 0x04, 0x00000000 },
  527. { 0x419ea4, 1, 0x04, 0x00000100 },
  528. { 0x419ea8, 1, 0x04, 0x00001100 },
  529. { 0x419eac, 1, 0x04, 0x11100702 },
  530. { 0x419eb0, 1, 0x04, 0x00000003 },
  531. { 0x419eb4, 4, 0x04, 0x00000000 },
  532. { 0x419ec8, 1, 0x04, 0x06060618 },
  533. { 0x419ed0, 1, 0x04, 0x0eff0e38 },
  534. { 0x419ed4, 1, 0x04, 0x011104f1 },
  535. { 0x419edc, 1, 0x04, 0x00000000 },
  536. { 0x419f00, 1, 0x04, 0x00000000 },
  537. { 0x419f2c, 1, 0x04, 0x00000000 },
  538. {}
  539. };
  540. const struct gf100_gr_init
  541. gf100_gr_init_be_0[] = {
  542. { 0x40880c, 1, 0x04, 0x00000000 },
  543. { 0x408910, 9, 0x04, 0x00000000 },
  544. { 0x408950, 1, 0x04, 0x00000000 },
  545. { 0x408954, 1, 0x04, 0x0000ffff },
  546. { 0x408984, 1, 0x04, 0x00000000 },
  547. { 0x408988, 1, 0x04, 0x08040201 },
  548. { 0x40898c, 1, 0x04, 0x80402010 },
  549. {}
  550. };
  551. const struct gf100_gr_init
  552. gf100_gr_init_fe_1[] = {
  553. { 0x4040f0, 1, 0x04, 0x00000000 },
  554. {}
  555. };
  556. const struct gf100_gr_init
  557. gf100_gr_init_pe_1[] = {
  558. { 0x419880, 1, 0x04, 0x00000002 },
  559. {}
  560. };
  561. static const struct gf100_gr_pack
  562. gf100_gr_pack_mmio[] = {
  563. { gf100_gr_init_main_0 },
  564. { gf100_gr_init_fe_0 },
  565. { gf100_gr_init_pri_0 },
  566. { gf100_gr_init_rstr2d_0 },
  567. { gf100_gr_init_pd_0 },
  568. { gf100_gr_init_ds_0 },
  569. { gf100_gr_init_scc_0 },
  570. { gf100_gr_init_prop_0 },
  571. { gf100_gr_init_gpc_unk_0 },
  572. { gf100_gr_init_setup_0 },
  573. { gf100_gr_init_crstr_0 },
  574. { gf100_gr_init_setup_1 },
  575. { gf100_gr_init_zcull_0 },
  576. { gf100_gr_init_gpm_0 },
  577. { gf100_gr_init_gpc_unk_1 },
  578. { gf100_gr_init_gcc_0 },
  579. { gf100_gr_init_tpccs_0 },
  580. { gf100_gr_init_tex_0 },
  581. { gf100_gr_init_pe_0 },
  582. { gf100_gr_init_l1c_0 },
  583. { gf100_gr_init_wwdx_0 },
  584. { gf100_gr_init_tpccs_1 },
  585. { gf100_gr_init_mpc_0 },
  586. { gf100_gr_init_sm_0 },
  587. { gf100_gr_init_be_0 },
  588. { gf100_gr_init_fe_1 },
  589. { gf100_gr_init_pe_1 },
  590. {}
  591. };
  592. /*******************************************************************************
  593. * PGRAPH engine/subdev functions
  594. ******************************************************************************/
  595. void
  596. gf100_gr_zbc_init(struct gf100_gr *gr)
  597. {
  598. const u32 zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  599. 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
  600. const u32 one[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
  601. 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
  602. const u32 f32_0[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  603. 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
  604. const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
  605. 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
  606. struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
  607. int index;
  608. if (!gr->zbc_color[0].format) {
  609. gf100_gr_zbc_color_get(gr, 1, & zero[0], &zero[4]);
  610. gf100_gr_zbc_color_get(gr, 2, & one[0], &one[4]);
  611. gf100_gr_zbc_color_get(gr, 4, &f32_0[0], &f32_0[4]);
  612. gf100_gr_zbc_color_get(gr, 4, &f32_1[0], &f32_1[4]);
  613. gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000);
  614. gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000);
  615. }
  616. for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
  617. gf100_gr_zbc_clear_color(gr, index);
  618. for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
  619. gf100_gr_zbc_clear_depth(gr, index);
  620. }
  621. /**
  622. * Wait until GR goes idle. GR is considered idle if it is disabled by the
  623. * MC (0x200) register, or GR is not busy and a context switch is not in
  624. * progress.
  625. */
  626. int
  627. gf100_gr_wait_idle(struct gf100_gr *gr)
  628. {
  629. struct nvkm_subdev *subdev = &gr->base.engine.subdev;
  630. struct nvkm_device *device = subdev->device;
  631. unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
  632. bool gr_enabled, ctxsw_active, gr_busy;
  633. do {
  634. /*
  635. * required to make sure FIFO_ENGINE_STATUS (0x2640) is
  636. * up-to-date
  637. */
  638. nvkm_rd32(device, 0x400700);
  639. gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
  640. ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
  641. gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
  642. if (!gr_enabled || (!gr_busy && !ctxsw_active))
  643. return 0;
  644. } while (time_before(jiffies, end_jiffies));
  645. nvkm_error(subdev,
  646. "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
  647. gr_enabled, ctxsw_active, gr_busy);
  648. return -EAGAIN;
  649. }
  650. void
  651. gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
  652. {
  653. struct nvkm_device *device = gr->base.engine.subdev.device;
  654. const struct gf100_gr_pack *pack;
  655. const struct gf100_gr_init *init;
  656. pack_for_each_init(init, pack, p) {
  657. u32 next = init->addr + init->count * init->pitch;
  658. u32 addr = init->addr;
  659. while (addr < next) {
  660. nvkm_wr32(device, addr, init->data);
  661. addr += init->pitch;
  662. }
  663. }
  664. }
  665. void
  666. gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
  667. {
  668. struct nvkm_device *device = gr->base.engine.subdev.device;
  669. const struct gf100_gr_pack *pack;
  670. const struct gf100_gr_init *init;
  671. u32 data = 0;
  672. nvkm_wr32(device, 0x400208, 0x80000000);
  673. pack_for_each_init(init, pack, p) {
  674. u32 next = init->addr + init->count * init->pitch;
  675. u32 addr = init->addr;
  676. if ((pack == p && init == p->init) || data != init->data) {
  677. nvkm_wr32(device, 0x400204, init->data);
  678. data = init->data;
  679. }
  680. while (addr < next) {
  681. nvkm_wr32(device, 0x400200, addr);
  682. /**
  683. * Wait for GR to go idle after submitting a
  684. * GO_IDLE bundle
  685. */
  686. if ((addr & 0xffff) == 0xe100)
  687. gf100_gr_wait_idle(gr);
  688. nvkm_msec(device, 2000,
  689. if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
  690. break;
  691. );
  692. addr += init->pitch;
  693. }
  694. }
  695. nvkm_wr32(device, 0x400208, 0x00000000);
  696. }
  697. void
  698. gf100_gr_mthd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
  699. {
  700. struct nvkm_device *device = gr->base.engine.subdev.device;
  701. const struct gf100_gr_pack *pack;
  702. const struct gf100_gr_init *init;
  703. u32 data = 0;
  704. pack_for_each_init(init, pack, p) {
  705. u32 ctrl = 0x80000000 | pack->type;
  706. u32 next = init->addr + init->count * init->pitch;
  707. u32 addr = init->addr;
  708. if ((pack == p && init == p->init) || data != init->data) {
  709. nvkm_wr32(device, 0x40448c, init->data);
  710. data = init->data;
  711. }
  712. while (addr < next) {
  713. nvkm_wr32(device, 0x404488, ctrl | (addr << 14));
  714. addr += init->pitch;
  715. }
  716. }
  717. }
  718. u64
  719. gf100_gr_units(struct nvkm_gr *base)
  720. {
  721. struct gf100_gr *gr = gf100_gr(base);
  722. u64 cfg;
  723. cfg = (u32)gr->gpc_nr;
  724. cfg |= (u32)gr->tpc_total << 8;
  725. cfg |= (u64)gr->rop_nr << 32;
  726. return cfg;
  727. }
  728. static const struct nvkm_bitfield gk104_sked_error[] = {
  729. { 0x00000080, "CONSTANT_BUFFER_SIZE" },
  730. { 0x00000200, "LOCAL_MEMORY_SIZE_POS" },
  731. { 0x00000400, "LOCAL_MEMORY_SIZE_NEG" },
  732. { 0x00000800, "WARP_CSTACK_SIZE" },
  733. { 0x00001000, "TOTAL_TEMP_SIZE" },
  734. { 0x00002000, "REGISTER_COUNT" },
  735. { 0x00040000, "TOTAL_THREADS" },
  736. { 0x00100000, "PROGRAM_OFFSET" },
  737. { 0x00200000, "SHARED_MEMORY_SIZE" },
  738. { 0x02000000, "SHARED_CONFIG_TOO_SMALL" },
  739. { 0x04000000, "TOTAL_REGISTER_COUNT" },
  740. {}
  741. };
  742. static const struct nvkm_bitfield gf100_gpc_rop_error[] = {
  743. { 0x00000002, "RT_PITCH_OVERRUN" },
  744. { 0x00000010, "RT_WIDTH_OVERRUN" },
  745. { 0x00000020, "RT_HEIGHT_OVERRUN" },
  746. { 0x00000080, "ZETA_STORAGE_TYPE_MISMATCH" },
  747. { 0x00000100, "RT_STORAGE_TYPE_MISMATCH" },
  748. { 0x00000400, "RT_LINEAR_MISMATCH" },
  749. {}
  750. };
  751. static void
  752. gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
  753. {
  754. struct nvkm_subdev *subdev = &gr->base.engine.subdev;
  755. struct nvkm_device *device = subdev->device;
  756. char error[128];
  757. u32 trap[4];
  758. trap[0] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0420)) & 0x3fffffff;
  759. trap[1] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0434));
  760. trap[2] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0438));
  761. trap[3] = nvkm_rd32(device, GPC_UNIT(gpc, 0x043c));
  762. nvkm_snprintbf(error, sizeof(error), gf100_gpc_rop_error, trap[0]);
  763. nvkm_error(subdev, "GPC%d/PROP trap: %08x [%s] x = %u, y = %u, "
  764. "format = %x, storage type = %x\n",
  765. gpc, trap[0], error, trap[1] & 0xffff, trap[1] >> 16,
  766. (trap[2] >> 8) & 0x3f, trap[3] & 0xff);
  767. nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
  768. }
  769. static const struct nvkm_enum gf100_mp_warp_error[] = {
  770. { 0x00, "NO_ERROR" },
  771. { 0x01, "STACK_MISMATCH" },
  772. { 0x05, "MISALIGNED_PC" },
  773. { 0x08, "MISALIGNED_GPR" },
  774. { 0x09, "INVALID_OPCODE" },
  775. { 0x0d, "GPR_OUT_OF_BOUNDS" },
  776. { 0x0e, "MEM_OUT_OF_BOUNDS" },
  777. { 0x0f, "UNALIGNED_MEM_ACCESS" },
  778. { 0x11, "INVALID_PARAM" },
  779. {}
  780. };
  781. static const struct nvkm_bitfield gf100_mp_global_error[] = {
  782. { 0x00000004, "MULTIPLE_WARP_ERRORS" },
  783. { 0x00000008, "OUT_OF_STACK_SPACE" },
  784. {}
  785. };
  786. static void
  787. gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
  788. {
  789. struct nvkm_subdev *subdev = &gr->base.engine.subdev;
  790. struct nvkm_device *device = subdev->device;
  791. u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x648));
  792. u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x650));
  793. const struct nvkm_enum *warp;
  794. char glob[128];
  795. nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
  796. warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);
  797. nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
  798. "global %08x [%s] warp %04x [%s]\n",
  799. gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
  800. nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
  801. nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x650), gerr);
  802. }
  803. static void
  804. gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
  805. {
  806. struct nvkm_subdev *subdev = &gr->base.engine.subdev;
  807. struct nvkm_device *device = subdev->device;
  808. u32 stat = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0508));
  809. if (stat & 0x00000001) {
  810. u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0224));
  811. nvkm_error(subdev, "GPC%d/TPC%d/TEX: %08x\n", gpc, tpc, trap);
  812. nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
  813. stat &= ~0x00000001;
  814. }
  815. if (stat & 0x00000002) {
  816. gf100_gr_trap_mp(gr, gpc, tpc);
  817. stat &= ~0x00000002;
  818. }
  819. if (stat & 0x00000004) {
  820. u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0084));
  821. nvkm_error(subdev, "GPC%d/TPC%d/POLY: %08x\n", gpc, tpc, trap);
  822. nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
  823. stat &= ~0x00000004;
  824. }
  825. if (stat & 0x00000008) {
  826. u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x048c));
  827. nvkm_error(subdev, "GPC%d/TPC%d/L1C: %08x\n", gpc, tpc, trap);
  828. nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
  829. stat &= ~0x00000008;
  830. }
  831. if (stat) {
  832. nvkm_error(subdev, "GPC%d/TPC%d/%08x: unknown\n", gpc, tpc, stat);
  833. }
  834. }
  835. static void
  836. gf100_gr_trap_gpc(struct gf100_gr *gr, int gpc)
  837. {
  838. struct nvkm_subdev *subdev = &gr->base.engine.subdev;
  839. struct nvkm_device *device = subdev->device;
  840. u32 stat = nvkm_rd32(device, GPC_UNIT(gpc, 0x2c90));
  841. int tpc;
  842. if (stat & 0x00000001) {
  843. gf100_gr_trap_gpc_rop(gr, gpc);
  844. stat &= ~0x00000001;
  845. }
  846. if (stat & 0x00000002) {
  847. u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0900));
  848. nvkm_error(subdev, "GPC%d/ZCULL: %08x\n", gpc, trap);
  849. nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
  850. stat &= ~0x00000002;
  851. }
  852. if (stat & 0x00000004) {
  853. u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x1028));
  854. nvkm_error(subdev, "GPC%d/CCACHE: %08x\n", gpc, trap);
  855. nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
  856. stat &= ~0x00000004;
  857. }
  858. if (stat & 0x00000008) {
  859. u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0824));
  860. nvkm_error(subdev, "GPC%d/ESETUP: %08x\n", gpc, trap);
  861. nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
  862. stat &= ~0x00000009;
  863. }
  864. for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
  865. u32 mask = 0x00010000 << tpc;
  866. if (stat & mask) {
  867. gf100_gr_trap_tpc(gr, gpc, tpc);
  868. nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), mask);
  869. stat &= ~mask;
  870. }
  871. }
  872. if (stat) {
  873. nvkm_error(subdev, "GPC%d/%08x: unknown\n", gpc, stat);
  874. }
  875. }
  876. static void
  877. gf100_gr_trap_intr(struct gf100_gr *gr)
  878. {
  879. struct nvkm_subdev *subdev = &gr->base.engine.subdev;
  880. struct nvkm_device *device = subdev->device;
  881. u32 trap = nvkm_rd32(device, 0x400108);
  882. int rop, gpc;
  883. if (trap & 0x00000001) {
  884. u32 stat = nvkm_rd32(device, 0x404000);
  885. nvkm_error(subdev, "DISPATCH %08x\n", stat);
  886. nvkm_wr32(device, 0x404000, 0xc0000000);
  887. nvkm_wr32(device, 0x400108, 0x00000001);
  888. trap &= ~0x00000001;
  889. }
  890. if (trap & 0x00000002) {
  891. u32 stat = nvkm_rd32(device, 0x404600);
  892. nvkm_error(subdev, "M2MF %08x\n", stat);
  893. nvkm_wr32(device, 0x404600, 0xc0000000);
  894. nvkm_wr32(device, 0x400108, 0x00000002);
  895. trap &= ~0x00000002;
  896. }
  897. if (trap & 0x00000008) {
  898. u32 stat = nvkm_rd32(device, 0x408030);
  899. nvkm_error(subdev, "CCACHE %08x\n", stat);
  900. nvkm_wr32(device, 0x408030, 0xc0000000);
  901. nvkm_wr32(device, 0x400108, 0x00000008);
  902. trap &= ~0x00000008;
  903. }
  904. if (trap & 0x00000010) {
  905. u32 stat = nvkm_rd32(device, 0x405840);
  906. nvkm_error(subdev, "SHADER %08x\n", stat);
  907. nvkm_wr32(device, 0x405840, 0xc0000000);
  908. nvkm_wr32(device, 0x400108, 0x00000010);
  909. trap &= ~0x00000010;
  910. }
  911. if (trap & 0x00000040) {
  912. u32 stat = nvkm_rd32(device, 0x40601c);
  913. nvkm_error(subdev, "UNK6 %08x\n", stat);
  914. nvkm_wr32(device, 0x40601c, 0xc0000000);
  915. nvkm_wr32(device, 0x400108, 0x00000040);
  916. trap &= ~0x00000040;
  917. }
  918. if (trap & 0x00000080) {
  919. u32 stat = nvkm_rd32(device, 0x404490);
  920. nvkm_error(subdev, "MACRO %08x\n", stat);
  921. nvkm_wr32(device, 0x404490, 0xc0000000);
  922. nvkm_wr32(device, 0x400108, 0x00000080);
  923. trap &= ~0x00000080;
  924. }
  925. if (trap & 0x00000100) {
  926. u32 stat = nvkm_rd32(device, 0x407020) & 0x3fffffff;
  927. char sked[128];
  928. nvkm_snprintbf(sked, sizeof(sked), gk104_sked_error, stat);
  929. nvkm_error(subdev, "SKED: %08x [%s]\n", stat, sked);
  930. if (stat)
  931. nvkm_wr32(device, 0x407020, 0x40000000);
  932. nvkm_wr32(device, 0x400108, 0x00000100);
  933. trap &= ~0x00000100;
  934. }
  935. if (trap & 0x01000000) {
  936. u32 stat = nvkm_rd32(device, 0x400118);
  937. for (gpc = 0; stat && gpc < gr->gpc_nr; gpc++) {
  938. u32 mask = 0x00000001 << gpc;
  939. if (stat & mask) {
  940. gf100_gr_trap_gpc(gr, gpc);
  941. nvkm_wr32(device, 0x400118, mask);
  942. stat &= ~mask;
  943. }
  944. }
  945. nvkm_wr32(device, 0x400108, 0x01000000);
  946. trap &= ~0x01000000;
  947. }
  948. if (trap & 0x02000000) {
  949. for (rop = 0; rop < gr->rop_nr; rop++) {
  950. u32 statz = nvkm_rd32(device, ROP_UNIT(rop, 0x070));
  951. u32 statc = nvkm_rd32(device, ROP_UNIT(rop, 0x144));
  952. nvkm_error(subdev, "ROP%d %08x %08x\n",
  953. rop, statz, statc);
  954. nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
  955. nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
  956. }
  957. nvkm_wr32(device, 0x400108, 0x02000000);
  958. trap &= ~0x02000000;
  959. }
  960. if (trap) {
  961. nvkm_error(subdev, "TRAP UNHANDLED %08x\n", trap);
  962. nvkm_wr32(device, 0x400108, trap);
  963. }
  964. }
  965. static void
  966. gf100_gr_ctxctl_debug_unit(struct gf100_gr *gr, u32 base)
  967. {
  968. struct nvkm_subdev *subdev = &gr->base.engine.subdev;
  969. struct nvkm_device *device = subdev->device;
  970. nvkm_error(subdev, "%06x - done %08x\n", base,
  971. nvkm_rd32(device, base + 0x400));
  972. nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
  973. nvkm_rd32(device, base + 0x800),
  974. nvkm_rd32(device, base + 0x804),
  975. nvkm_rd32(device, base + 0x808),
  976. nvkm_rd32(device, base + 0x80c));
  977. nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
  978. nvkm_rd32(device, base + 0x810),
  979. nvkm_rd32(device, base + 0x814),
  980. nvkm_rd32(device, base + 0x818),
  981. nvkm_rd32(device, base + 0x81c));
  982. }
  983. void
  984. gf100_gr_ctxctl_debug(struct gf100_gr *gr)
  985. {
  986. struct nvkm_device *device = gr->base.engine.subdev.device;
  987. u32 gpcnr = nvkm_rd32(device, 0x409604) & 0xffff;
  988. u32 gpc;
  989. gf100_gr_ctxctl_debug_unit(gr, 0x409000);
  990. for (gpc = 0; gpc < gpcnr; gpc++)
  991. gf100_gr_ctxctl_debug_unit(gr, 0x502000 + (gpc * 0x8000));
  992. }
  993. static void
  994. gf100_gr_ctxctl_isr(struct gf100_gr *gr)
  995. {
  996. struct nvkm_subdev *subdev = &gr->base.engine.subdev;
  997. struct nvkm_device *device = subdev->device;
  998. u32 stat = nvkm_rd32(device, 0x409c18);
  999. if (stat & 0x00000001) {
  1000. u32 code = nvkm_rd32(device, 0x409814);
  1001. if (code == E_BAD_FWMTHD) {
  1002. u32 class = nvkm_rd32(device, 0x409808);
  1003. u32 addr = nvkm_rd32(device, 0x40980c);
  1004. u32 subc = (addr & 0x00070000) >> 16;
  1005. u32 mthd = (addr & 0x00003ffc);
  1006. u32 data = nvkm_rd32(device, 0x409810);
  1007. nvkm_error(subdev, "FECS MTHD subc %d class %04x "
  1008. "mthd %04x data %08x\n",
  1009. subc, class, mthd, data);
  1010. nvkm_wr32(device, 0x409c20, 0x00000001);
  1011. stat &= ~0x00000001;
  1012. } else {
  1013. nvkm_error(subdev, "FECS ucode error %d\n", code);
  1014. }
  1015. }
  1016. if (stat & 0x00080000) {
  1017. nvkm_error(subdev, "FECS watchdog timeout\n");
  1018. gf100_gr_ctxctl_debug(gr);
  1019. nvkm_wr32(device, 0x409c20, 0x00080000);
  1020. stat &= ~0x00080000;
  1021. }
  1022. if (stat) {
  1023. nvkm_error(subdev, "FECS %08x\n", stat);
  1024. gf100_gr_ctxctl_debug(gr);
  1025. nvkm_wr32(device, 0x409c20, stat);
  1026. }
  1027. }
  1028. static void
  1029. gf100_gr_intr(struct nvkm_gr *base)
  1030. {
  1031. struct gf100_gr *gr = gf100_gr(base);
  1032. struct nvkm_subdev *subdev = &gr->base.engine.subdev;
  1033. struct nvkm_device *device = subdev->device;
  1034. struct nvkm_fifo_chan *chan;
  1035. unsigned long flags;
  1036. u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
  1037. u32 stat = nvkm_rd32(device, 0x400100);
  1038. u32 addr = nvkm_rd32(device, 0x400704);
  1039. u32 mthd = (addr & 0x00003ffc);
  1040. u32 subc = (addr & 0x00070000) >> 16;
  1041. u32 data = nvkm_rd32(device, 0x400708);
  1042. u32 code = nvkm_rd32(device, 0x400110);
  1043. u32 class;
  1044. const char *name = "unknown";
  1045. int chid = -1;
  1046. chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
  1047. if (chan) {
  1048. name = chan->object.client->name;
  1049. chid = chan->chid;
  1050. }
  1051. if (device->card_type < NV_E0 || subc < 4)
  1052. class = nvkm_rd32(device, 0x404200 + (subc * 4));
  1053. else
  1054. class = 0x0000;
  1055. if (stat & 0x00000001) {
  1056. /*
  1057. * notifier interrupt, only needed for cyclestats
  1058. * can be safely ignored
  1059. */
  1060. nvkm_wr32(device, 0x400100, 0x00000001);
  1061. stat &= ~0x00000001;
  1062. }
  1063. if (stat & 0x00000010) {
  1064. if (!gf100_gr_mthd_sw(device, class, mthd, data)) {
  1065. nvkm_error(subdev, "ILLEGAL_MTHD ch %d [%010llx %s] "
  1066. "subc %d class %04x mthd %04x data %08x\n",
  1067. chid, inst << 12, name, subc,
  1068. class, mthd, data);
  1069. }
  1070. nvkm_wr32(device, 0x400100, 0x00000010);
  1071. stat &= ~0x00000010;
  1072. }
  1073. if (stat & 0x00000020) {
  1074. nvkm_error(subdev, "ILLEGAL_CLASS ch %d [%010llx %s] "
  1075. "subc %d class %04x mthd %04x data %08x\n",
  1076. chid, inst << 12, name, subc, class, mthd, data);
  1077. nvkm_wr32(device, 0x400100, 0x00000020);
  1078. stat &= ~0x00000020;
  1079. }
  1080. if (stat & 0x00100000) {
  1081. const struct nvkm_enum *en =
  1082. nvkm_enum_find(nv50_data_error_names, code);
  1083. nvkm_error(subdev, "DATA_ERROR %08x [%s] ch %d [%010llx %s] "
  1084. "subc %d class %04x mthd %04x data %08x\n",
  1085. code, en ? en->name : "", chid, inst << 12,
  1086. name, subc, class, mthd, data);
  1087. nvkm_wr32(device, 0x400100, 0x00100000);
  1088. stat &= ~0x00100000;
  1089. }
  1090. if (stat & 0x00200000) {
  1091. nvkm_error(subdev, "TRAP ch %d [%010llx %s]\n",
  1092. chid, inst << 12, name);
  1093. gf100_gr_trap_intr(gr);
  1094. nvkm_wr32(device, 0x400100, 0x00200000);
  1095. stat &= ~0x00200000;
  1096. }
  1097. if (stat & 0x00080000) {
  1098. gf100_gr_ctxctl_isr(gr);
  1099. nvkm_wr32(device, 0x400100, 0x00080000);
  1100. stat &= ~0x00080000;
  1101. }
  1102. if (stat) {
  1103. nvkm_error(subdev, "intr %08x\n", stat);
  1104. nvkm_wr32(device, 0x400100, stat);
  1105. }
  1106. nvkm_wr32(device, 0x400500, 0x00010001);
  1107. nvkm_fifo_chan_put(device->fifo, flags, &chan);
  1108. }
  1109. void
  1110. gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
  1111. struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
  1112. {
  1113. struct nvkm_device *device = gr->base.engine.subdev.device;
  1114. int i;
  1115. nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
  1116. for (i = 0; i < data->size / 4; i++)
  1117. nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
  1118. nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
  1119. for (i = 0; i < code->size / 4; i++) {
  1120. if ((i & 0x3f) == 0)
  1121. nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
  1122. nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
  1123. }
  1124. /* code must be padded to 0x40 words */
  1125. for (; i & 0x3f; i++)
  1126. nvkm_wr32(device, fuc_base + 0x0184, 0);
  1127. }
  1128. static void
  1129. gf100_gr_init_csdata(struct gf100_gr *gr,
  1130. const struct gf100_gr_pack *pack,
  1131. u32 falcon, u32 starstar, u32 base)
  1132. {
  1133. struct nvkm_device *device = gr->base.engine.subdev.device;
  1134. const struct gf100_gr_pack *iter;
  1135. const struct gf100_gr_init *init;
  1136. u32 addr = ~0, prev = ~0, xfer = 0;
  1137. u32 star, temp;
  1138. nvkm_wr32(device, falcon + 0x01c0, 0x02000000 + starstar);
  1139. star = nvkm_rd32(device, falcon + 0x01c4);
  1140. temp = nvkm_rd32(device, falcon + 0x01c4);
  1141. if (temp > star)
  1142. star = temp;
  1143. nvkm_wr32(device, falcon + 0x01c0, 0x01000000 + star);
  1144. pack_for_each_init(init, iter, pack) {
  1145. u32 head = init->addr - base;
  1146. u32 tail = head + init->count * init->pitch;
  1147. while (head < tail) {
  1148. if (head != prev + 4 || xfer >= 32) {
  1149. if (xfer) {
  1150. u32 data = ((--xfer << 26) | addr);
  1151. nvkm_wr32(device, falcon + 0x01c4, data);
  1152. star += 4;
  1153. }
  1154. addr = head;
  1155. xfer = 0;
  1156. }
  1157. prev = head;
  1158. xfer = xfer + 1;
  1159. head = head + init->pitch;
  1160. }
  1161. }
  1162. nvkm_wr32(device, falcon + 0x01c4, (--xfer << 26) | addr);
  1163. nvkm_wr32(device, falcon + 0x01c0, 0x01000004 + starstar);
  1164. nvkm_wr32(device, falcon + 0x01c4, star + 4);
  1165. }
  1166. int
  1167. gf100_gr_init_ctxctl(struct gf100_gr *gr)
  1168. {
  1169. const struct gf100_grctx_func *grctx = gr->func->grctx;
  1170. struct nvkm_subdev *subdev = &gr->base.engine.subdev;
  1171. struct nvkm_device *device = subdev->device;
  1172. int i;
  1173. if (gr->firmware) {
  1174. /* load fuc microcode */
  1175. nvkm_mc_unk260(device->mc, 0);
  1176. gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c, &gr->fuc409d);
  1177. gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac, &gr->fuc41ad);
  1178. nvkm_mc_unk260(device->mc, 1);
  1179. /* start both of them running */
  1180. nvkm_wr32(device, 0x409840, 0xffffffff);
  1181. nvkm_wr32(device, 0x41a10c, 0x00000000);
  1182. nvkm_wr32(device, 0x40910c, 0x00000000);
  1183. nvkm_wr32(device, 0x41a100, 0x00000002);
  1184. nvkm_wr32(device, 0x409100, 0x00000002);
  1185. if (nvkm_msec(device, 2000,
  1186. if (nvkm_rd32(device, 0x409800) & 0x00000001)
  1187. break;
  1188. ) < 0)
  1189. return -EBUSY;
  1190. nvkm_wr32(device, 0x409840, 0xffffffff);
  1191. nvkm_wr32(device, 0x409500, 0x7fffffff);
  1192. nvkm_wr32(device, 0x409504, 0x00000021);
  1193. nvkm_wr32(device, 0x409840, 0xffffffff);
  1194. nvkm_wr32(device, 0x409500, 0x00000000);
  1195. nvkm_wr32(device, 0x409504, 0x00000010);
  1196. if (nvkm_msec(device, 2000,
  1197. if ((gr->size = nvkm_rd32(device, 0x409800)))
  1198. break;
  1199. ) < 0)
  1200. return -EBUSY;
  1201. nvkm_wr32(device, 0x409840, 0xffffffff);
  1202. nvkm_wr32(device, 0x409500, 0x00000000);
  1203. nvkm_wr32(device, 0x409504, 0x00000016);
  1204. if (nvkm_msec(device, 2000,
  1205. if (nvkm_rd32(device, 0x409800))
  1206. break;
  1207. ) < 0)
  1208. return -EBUSY;
  1209. nvkm_wr32(device, 0x409840, 0xffffffff);
  1210. nvkm_wr32(device, 0x409500, 0x00000000);
  1211. nvkm_wr32(device, 0x409504, 0x00000025);
  1212. if (nvkm_msec(device, 2000,
  1213. if (nvkm_rd32(device, 0x409800))
  1214. break;
  1215. ) < 0)
  1216. return -EBUSY;
  1217. if (device->chipset >= 0xe0) {
  1218. nvkm_wr32(device, 0x409800, 0x00000000);
  1219. nvkm_wr32(device, 0x409500, 0x00000001);
  1220. nvkm_wr32(device, 0x409504, 0x00000030);
  1221. if (nvkm_msec(device, 2000,
  1222. if (nvkm_rd32(device, 0x409800))
  1223. break;
  1224. ) < 0)
  1225. return -EBUSY;
  1226. nvkm_wr32(device, 0x409810, 0xb00095c8);
  1227. nvkm_wr32(device, 0x409800, 0x00000000);
  1228. nvkm_wr32(device, 0x409500, 0x00000001);
  1229. nvkm_wr32(device, 0x409504, 0x00000031);
  1230. if (nvkm_msec(device, 2000,
  1231. if (nvkm_rd32(device, 0x409800))
  1232. break;
  1233. ) < 0)
  1234. return -EBUSY;
  1235. nvkm_wr32(device, 0x409810, 0x00080420);
  1236. nvkm_wr32(device, 0x409800, 0x00000000);
  1237. nvkm_wr32(device, 0x409500, 0x00000001);
  1238. nvkm_wr32(device, 0x409504, 0x00000032);
  1239. if (nvkm_msec(device, 2000,
  1240. if (nvkm_rd32(device, 0x409800))
  1241. break;
  1242. ) < 0)
  1243. return -EBUSY;
  1244. nvkm_wr32(device, 0x409614, 0x00000070);
  1245. nvkm_wr32(device, 0x409614, 0x00000770);
  1246. nvkm_wr32(device, 0x40802c, 0x00000001);
  1247. }
  1248. if (gr->data == NULL) {
  1249. int ret = gf100_grctx_generate(gr);
  1250. if (ret) {
  1251. nvkm_error(subdev, "failed to construct context\n");
  1252. return ret;
  1253. }
  1254. }
  1255. return 0;
  1256. } else
  1257. if (!gr->func->fecs.ucode) {
  1258. return -ENOSYS;
  1259. }
  1260. /* load HUB microcode */
  1261. nvkm_mc_unk260(device->mc, 0);
  1262. nvkm_wr32(device, 0x4091c0, 0x01000000);
  1263. for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
  1264. nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
  1265. nvkm_wr32(device, 0x409180, 0x01000000);
  1266. for (i = 0; i < gr->func->fecs.ucode->code.size / 4; i++) {
  1267. if ((i & 0x3f) == 0)
  1268. nvkm_wr32(device, 0x409188, i >> 6);
  1269. nvkm_wr32(device, 0x409184, gr->func->fecs.ucode->code.data[i]);
  1270. }
  1271. /* load GPC microcode */
  1272. nvkm_wr32(device, 0x41a1c0, 0x01000000);
  1273. for (i = 0; i < gr->func->gpccs.ucode->data.size / 4; i++)
  1274. nvkm_wr32(device, 0x41a1c4, gr->func->gpccs.ucode->data.data[i]);
  1275. nvkm_wr32(device, 0x41a180, 0x01000000);
  1276. for (i = 0; i < gr->func->gpccs.ucode->code.size / 4; i++) {
  1277. if ((i & 0x3f) == 0)
  1278. nvkm_wr32(device, 0x41a188, i >> 6);
  1279. nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
  1280. }
  1281. nvkm_mc_unk260(device->mc, 1);
  1282. /* load register lists */
  1283. gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
  1284. gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000);
  1285. gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
  1286. gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
  1287. /* start HUB ucode running, it'll init the GPCs */
  1288. nvkm_wr32(device, 0x40910c, 0x00000000);
  1289. nvkm_wr32(device, 0x409100, 0x00000002);
  1290. if (nvkm_msec(device, 2000,
  1291. if (nvkm_rd32(device, 0x409800) & 0x80000000)
  1292. break;
  1293. ) < 0) {
  1294. gf100_gr_ctxctl_debug(gr);
  1295. return -EBUSY;
  1296. }
  1297. gr->size = nvkm_rd32(device, 0x409804);
  1298. if (gr->data == NULL) {
  1299. int ret = gf100_grctx_generate(gr);
  1300. if (ret) {
  1301. nvkm_error(subdev, "failed to construct context\n");
  1302. return ret;
  1303. }
  1304. }
  1305. return 0;
  1306. }
  1307. static int
  1308. gf100_gr_oneinit(struct nvkm_gr *base)
  1309. {
  1310. struct gf100_gr *gr = gf100_gr(base);
  1311. struct nvkm_device *device = gr->base.engine.subdev.device;
  1312. int ret, i, j;
  1313. nvkm_pmu_pgob(device->pmu, false);
  1314. ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
  1315. &gr->unk4188b4);
  1316. if (ret)
  1317. return ret;
  1318. ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
  1319. &gr->unk4188b8);
  1320. if (ret)
  1321. return ret;
  1322. nvkm_kmap(gr->unk4188b4);
  1323. for (i = 0; i < 0x1000; i += 4)
  1324. nvkm_wo32(gr->unk4188b4, i, 0x00000010);
  1325. nvkm_done(gr->unk4188b4);
  1326. nvkm_kmap(gr->unk4188b8);
  1327. for (i = 0; i < 0x1000; i += 4)
  1328. nvkm_wo32(gr->unk4188b8, i, 0x00000010);
  1329. nvkm_done(gr->unk4188b8);
  1330. gr->rop_nr = (nvkm_rd32(device, 0x409604) & 0x001f0000) >> 16;
  1331. gr->gpc_nr = nvkm_rd32(device, 0x409604) & 0x0000001f;
  1332. for (i = 0; i < gr->gpc_nr; i++) {
  1333. gr->tpc_nr[i] = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
  1334. gr->tpc_total += gr->tpc_nr[i];
  1335. gr->ppc_nr[i] = gr->func->ppc_nr;
  1336. for (j = 0; j < gr->ppc_nr[i]; j++) {
  1337. u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
  1338. gr->ppc_tpc_nr[i][j] = hweight8(mask);
  1339. }
  1340. }
  1341. /*XXX: these need figuring out... though it might not even matter */
  1342. switch (device->chipset) {
  1343. case 0xc0:
  1344. if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
  1345. gr->magic_not_rop_nr = 0x07;
  1346. } else
  1347. if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
  1348. gr->magic_not_rop_nr = 0x05;
  1349. } else
  1350. if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
  1351. gr->magic_not_rop_nr = 0x06;
  1352. }
  1353. break;
  1354. case 0xc3: /* 450, 4/0/0/0, 2 */
  1355. gr->magic_not_rop_nr = 0x03;
  1356. break;
  1357. case 0xc4: /* 460, 3/4/0/0, 4 */
  1358. gr->magic_not_rop_nr = 0x01;
  1359. break;
  1360. case 0xc1: /* 2/0/0/0, 1 */
  1361. gr->magic_not_rop_nr = 0x01;
  1362. break;
  1363. case 0xc8: /* 4/4/3/4, 5 */
  1364. gr->magic_not_rop_nr = 0x06;
  1365. break;
  1366. case 0xce: /* 4/4/0/0, 4 */
  1367. gr->magic_not_rop_nr = 0x03;
  1368. break;
  1369. case 0xcf: /* 4/0/0/0, 3 */
  1370. gr->magic_not_rop_nr = 0x03;
  1371. break;
  1372. case 0xd7:
  1373. case 0xd9: /* 1/0/0/0, 1 */
  1374. case 0xea: /* gk20a */
  1375. case 0x12b: /* gm20b */
  1376. gr->magic_not_rop_nr = 0x01;
  1377. break;
  1378. }
  1379. return 0;
  1380. }
  1381. int
  1382. gf100_gr_init_(struct nvkm_gr *base)
  1383. {
  1384. struct gf100_gr *gr = gf100_gr(base);
  1385. nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
  1386. return gr->func->init(gr);
  1387. }
  1388. void
  1389. gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
  1390. {
  1391. kfree(fuc->data);
  1392. fuc->data = NULL;
  1393. }
  1394. void *
  1395. gf100_gr_dtor(struct nvkm_gr *base)
  1396. {
  1397. struct gf100_gr *gr = gf100_gr(base);
  1398. if (gr->func->dtor)
  1399. gr->func->dtor(gr);
  1400. kfree(gr->data);
  1401. gf100_gr_dtor_fw(&gr->fuc409c);
  1402. gf100_gr_dtor_fw(&gr->fuc409d);
  1403. gf100_gr_dtor_fw(&gr->fuc41ac);
  1404. gf100_gr_dtor_fw(&gr->fuc41ad);
  1405. nvkm_memory_del(&gr->unk4188b8);
  1406. nvkm_memory_del(&gr->unk4188b4);
  1407. return gr;
  1408. }
  1409. static const struct nvkm_gr_func
  1410. gf100_gr_ = {
  1411. .dtor = gf100_gr_dtor,
  1412. .oneinit = gf100_gr_oneinit,
  1413. .init = gf100_gr_init_,
  1414. .intr = gf100_gr_intr,
  1415. .units = gf100_gr_units,
  1416. .chan_new = gf100_gr_chan_new,
  1417. .object_get = gf100_gr_object_get,
  1418. };
  1419. int
  1420. gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
  1421. struct gf100_gr_fuc *fuc)
  1422. {
  1423. struct nvkm_subdev *subdev = &gr->base.engine.subdev;
  1424. struct nvkm_device *device = subdev->device;
  1425. const struct firmware *fw;
  1426. char f[64];
  1427. char cname[16];
  1428. int ret;
  1429. int i;
  1430. /* Convert device name to lowercase */
  1431. strncpy(cname, device->chip->name, sizeof(cname));
  1432. cname[sizeof(cname) - 1] = '\0';
  1433. i = strlen(cname);
  1434. while (i) {
  1435. --i;
  1436. cname[i] = tolower(cname[i]);
  1437. }
  1438. snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
  1439. ret = request_firmware(&fw, f, device->dev);
  1440. if (ret) {
  1441. nvkm_error(subdev, "failed to load %s\n", fwname);
  1442. return ret;
  1443. }
  1444. fuc->size = fw->size;
  1445. fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
  1446. release_firmware(fw);
  1447. return (fuc->data != NULL) ? 0 : -ENOMEM;
  1448. }
  1449. int
  1450. gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
  1451. int index, struct gf100_gr *gr)
  1452. {
  1453. int ret;
  1454. gr->func = func;
  1455. gr->firmware = nvkm_boolopt(device->cfgopt, "NvGrUseFW",
  1456. func->fecs.ucode == NULL);
  1457. ret = nvkm_gr_ctor(&gf100_gr_, device, index, 0x08001000,
  1458. gr->firmware || func->fecs.ucode != NULL,
  1459. &gr->base);
  1460. if (ret)
  1461. return ret;
  1462. if (gr->firmware) {
  1463. nvkm_info(&gr->base.engine.subdev, "using external firmware\n");
  1464. if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
  1465. gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
  1466. gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
  1467. gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
  1468. return -ENODEV;
  1469. }
  1470. return 0;
  1471. }
  1472. int
  1473. gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
  1474. int index, struct nvkm_gr **pgr)
  1475. {
  1476. struct gf100_gr *gr;
  1477. if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
  1478. return -ENOMEM;
  1479. *pgr = &gr->base;
  1480. return gf100_gr_ctor(func, device, index, gr);
  1481. }
  1482. int
  1483. gf100_gr_init(struct gf100_gr *gr)
  1484. {
  1485. struct nvkm_device *device = gr->base.engine.subdev.device;
  1486. const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
  1487. u32 data[TPC_MAX / 8] = {};
  1488. u8 tpcnr[GPC_MAX];
  1489. int gpc, tpc, rop;
  1490. int i;
  1491. nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
  1492. nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
  1493. nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
  1494. nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
  1495. nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
  1496. nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
  1497. nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(gr->unk4188b4) >> 8);
  1498. nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(gr->unk4188b8) >> 8);
  1499. gf100_gr_mmio(gr, gr->func->mmio);
  1500. memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
  1501. for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
  1502. do {
  1503. gpc = (gpc + 1) % gr->gpc_nr;
  1504. } while (!tpcnr[gpc]);
  1505. tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
  1506. data[i / 8] |= tpc << ((i % 8) * 4);
  1507. }
  1508. nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
  1509. nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
  1510. nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
  1511. nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
  1512. for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
  1513. nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
  1514. gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
  1515. nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
  1516. gr->tpc_total);
  1517. nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
  1518. }
  1519. if (device->chipset != 0xd7)
  1520. nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
  1521. else
  1522. nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
  1523. nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
  1524. nvkm_wr32(device, 0x400500, 0x00010001);
  1525. nvkm_wr32(device, 0x400100, 0xffffffff);
  1526. nvkm_wr32(device, 0x40013c, 0xffffffff);
  1527. nvkm_wr32(device, 0x409c24, 0x000f0000);
  1528. nvkm_wr32(device, 0x404000, 0xc0000000);
  1529. nvkm_wr32(device, 0x404600, 0xc0000000);
  1530. nvkm_wr32(device, 0x408030, 0xc0000000);
  1531. nvkm_wr32(device, 0x40601c, 0xc0000000);
  1532. nvkm_wr32(device, 0x404490, 0xc0000000);
  1533. nvkm_wr32(device, 0x406018, 0xc0000000);
  1534. nvkm_wr32(device, 0x405840, 0xc0000000);
  1535. nvkm_wr32(device, 0x405844, 0x00ffffff);
  1536. nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
  1537. nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
  1538. for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
  1539. nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
  1540. nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
  1541. nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
  1542. nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
  1543. for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
  1544. nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
  1545. nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
  1546. nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
  1547. nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
  1548. nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
  1549. nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
  1550. nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
  1551. }
  1552. nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
  1553. nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
  1554. }
  1555. for (rop = 0; rop < gr->rop_nr; rop++) {
  1556. nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
  1557. nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
  1558. nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
  1559. nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
  1560. }
  1561. nvkm_wr32(device, 0x400108, 0xffffffff);
  1562. nvkm_wr32(device, 0x400138, 0xffffffff);
  1563. nvkm_wr32(device, 0x400118, 0xffffffff);
  1564. nvkm_wr32(device, 0x400130, 0xffffffff);
  1565. nvkm_wr32(device, 0x40011c, 0xffffffff);
  1566. nvkm_wr32(device, 0x400134, 0xffffffff);
  1567. nvkm_wr32(device, 0x400054, 0x34ce3464);
  1568. gf100_gr_zbc_init(gr);
  1569. return gf100_gr_init_ctxctl(gr);
  1570. }
  1571. #include "fuc/hubgf100.fuc3.h"
  1572. struct gf100_gr_ucode
  1573. gf100_gr_fecs_ucode = {
  1574. .code.data = gf100_grhub_code,
  1575. .code.size = sizeof(gf100_grhub_code),
  1576. .data.data = gf100_grhub_data,
  1577. .data.size = sizeof(gf100_grhub_data),
  1578. };
  1579. #include "fuc/gpcgf100.fuc3.h"
  1580. struct gf100_gr_ucode
  1581. gf100_gr_gpccs_ucode = {
  1582. .code.data = gf100_grgpc_code,
  1583. .code.size = sizeof(gf100_grgpc_code),
  1584. .data.data = gf100_grgpc_data,
  1585. .data.size = sizeof(gf100_grgpc_data),
  1586. };
  1587. static const struct gf100_gr_func
  1588. gf100_gr = {
  1589. .init = gf100_gr_init,
  1590. .mmio = gf100_gr_pack_mmio,
  1591. .fecs.ucode = &gf100_gr_fecs_ucode,
  1592. .gpccs.ucode = &gf100_gr_gpccs_ucode,
  1593. .grctx = &gf100_grctx,
  1594. .sclass = {
  1595. { -1, -1, FERMI_TWOD_A },
  1596. { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
  1597. { -1, -1, FERMI_A, &gf100_fermi },
  1598. { -1, -1, FERMI_COMPUTE_A },
  1599. {}
  1600. }
  1601. };
  1602. int
  1603. gf100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
  1604. {
  1605. return gf100_gr_new_(&gf100_gr, device, index, pgr);
  1606. }