hub.fuc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696
  1. /* fuc microcode for gf100 PGRAPH/HUB
  2. *
  3. * Copyright 2011 Red Hat Inc.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice shall be included in
  13. * all copies or substantial portions of the Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21. * OTHER DEALINGS IN THE SOFTWARE.
  22. *
  23. * Authors: Ben Skeggs
  24. */
  25. #ifdef INCLUDE_DATA
  26. hub_mmio_list_head: .b32 #hub_mmio_list_base
  27. hub_mmio_list_tail: .b32 #hub_mmio_list_next
  28. gpc_count: .b32 0
  29. rop_count: .b32 0
  30. cmd_queue: queue_init
  31. ctx_current: .b32 0
  32. .align 256
  33. chan_data:
  34. chan_mmio_count: .b32 0
  35. chan_mmio_address: .b32 0
  36. .align 256
  37. xfer_data: .skip 256
  38. hub_mmio_list_base:
  39. .b32 0x0417e91c // 0x17e91c, 2
  40. hub_mmio_list_next:
  41. #endif
  42. #ifdef INCLUDE_CODE
  43. // reports an exception to the host
  44. //
  45. // In: $r15 error code (see os.h)
  46. //
  47. error:
  48. nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(5), 0, $r15)
  49. mov $r15 1
  50. nv_iowr(NV_PGRAPH_FECS_INTR_UP_SET, 0, $r15)
  51. ret
  52. // HUB fuc initialisation, executed by triggering ucode start, will
  53. // fall through to main loop after completion.
  54. //
  55. // Output:
  56. // CC_SCRATCH[0]:
  57. // 31:31: set to signal completion
  58. // CC_SCRATCH[1]:
  59. // 31:0: total PGRAPH context size
  60. //
  61. init:
  62. clear b32 $r0
  63. mov $xdbase $r0
  64. // setup stack
  65. nv_iord($r1, NV_PGRAPH_FECS_CAPS, 0)
  66. extr $r1 $r1 9:17
  67. shl b32 $r1 8
  68. mov $sp $r1
  69. // enable fifo access
  70. mov $r2 NV_PGRAPH_FECS_ACCESS_FIFO
  71. nv_iowr(NV_PGRAPH_FECS_ACCESS, 0, $r2)
  72. // setup i0 handler, and route all interrupts to it
  73. mov $r1 #ih
  74. mov $iv0 $r1
  75. clear b32 $r2
  76. nv_iowr(NV_PGRAPH_FECS_INTR_ROUTE, 0, $r2)
  77. // route HUB_CHSW_PULSE to fuc interrupt 8
  78. mov $r2 0x2003 // { HUB_CHSW_PULSE, ZERO } -> intr 8
  79. nv_iowr(NV_PGRAPH_FECS_IROUTE, 0, $r2)
  80. // not sure what these are, route them because NVIDIA does, and
  81. // the IRQ handler will signal the host if we ever get one.. we
  82. // may find out if/why we need to handle these if so..
  83. //
  84. mov $r2 0x2004 // { 0x04, ZERO } -> intr 9
  85. nv_iowr(NV_PGRAPH_FECS_IROUTE, 1, $r2)
  86. mov $r2 0x200b // { HUB_FIRMWARE_MTHD, ZERO } -> intr 10
  87. nv_iowr(NV_PGRAPH_FECS_IROUTE, 2, $r2)
  88. mov $r2 0x200c // { 0x0c, ZERO } -> intr 15
  89. nv_iowr(NV_PGRAPH_FECS_IROUTE, 7, $r2)
  90. // enable all INTR_UP interrupts
  91. sub b32 $r3 $r0 1
  92. nv_iowr(NV_PGRAPH_FECS_INTR_UP_EN, 0, $r3)
  93. // enable fifo, ctxsw, 9, fwmthd, 15 interrupts
  94. imm32($r2, 0x8704)
  95. nv_iowr(NV_PGRAPH_FECS_INTR_EN_SET, 0, $r2)
  96. // fifo level triggered, rest edge
  97. mov $r2 NV_PGRAPH_FECS_INTR_MODE_FIFO_LEVEL
  98. nv_iowr(NV_PGRAPH_FECS_INTR_MODE, 0, $r2)
  99. // enable interrupts
  100. bset $flags ie0
  101. // fetch enabled GPC/ROP counts
  102. nv_rd32($r14, 0x409604)
  103. extr $r1 $r15 16:20
  104. st b32 D[$r0 + #rop_count] $r1
  105. and $r15 0x1f
  106. st b32 D[$r0 + #gpc_count] $r15
  107. // set BAR_REQMASK to GPC mask
  108. mov $r1 1
  109. shl b32 $r1 $r15
  110. sub b32 $r1 1
  111. nv_iowr(NV_PGRAPH_FECS_BAR_MASK0, 0, $r1)
  112. nv_iowr(NV_PGRAPH_FECS_BAR_MASK1, 0, $r1)
  113. // context size calculation, reserve first 256 bytes for use by fuc
  114. mov $r1 256
  115. //
  116. mov $r15 2
  117. call(ctx_4170s)
  118. call(ctx_4170w)
  119. mov $r15 0x10
  120. call(ctx_86c)
  121. // calculate size of mmio context data
  122. ld b32 $r14 D[$r0 + #hub_mmio_list_head]
  123. ld b32 $r15 D[$r0 + #hub_mmio_list_tail]
  124. call(mmctx_size)
  125. // set mmctx base addresses now so we don't have to do it later,
  126. // they don't (currently) ever change
  127. shr b32 $r4 $r1 8
  128. nv_iowr(NV_PGRAPH_FECS_MMCTX_SAVE_SWBASE, 0, $r4)
  129. nv_iowr(NV_PGRAPH_FECS_MMCTX_LOAD_SWBASE, 0, $r4)
  130. add b32 $r3 0x1300
  131. add b32 $r1 $r15
  132. shr b32 $r15 2
  133. nv_iowr(NV_PGRAPH_FECS_MMCTX_LOAD_COUNT, 0, $r15) // wtf??
  134. // strands, base offset needs to be aligned to 256 bytes
  135. shr b32 $r1 8
  136. add b32 $r1 1
  137. shl b32 $r1 8
  138. mov b32 $r15 $r1
  139. call(strand_ctx_init)
  140. add b32 $r1 $r15
  141. // initialise each GPC in sequence by passing in the offset of its
  142. // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
  143. // has previously been uploaded by the host) running.
  144. //
  145. // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
  146. // when it has completed, and return the size of its context data
  147. // in GPCn_CC_SCRATCH[1]
  148. //
  149. ld b32 $r3 D[$r0 + #gpc_count]
  150. imm32($r4, 0x502000)
  151. init_gpc:
  152. // setup, and start GPC ucode running
  153. add b32 $r14 $r4 0x804
  154. mov b32 $r15 $r1
  155. call(nv_wr32) // CC_SCRATCH[1] = ctx offset
  156. add b32 $r14 $r4 0x10c
  157. clear b32 $r15
  158. call(nv_wr32)
  159. add b32 $r14 $r4 0x104
  160. call(nv_wr32) // ENTRY
  161. add b32 $r14 $r4 0x100
  162. mov $r15 2 // CTRL_START_TRIGGER
  163. call(nv_wr32) // CTRL
  164. // wait for it to complete, and adjust context size
  165. add b32 $r14 $r4 0x800
  166. init_gpc_wait:
  167. call(nv_rd32)
  168. xbit $r15 $r15 31
  169. bra e #init_gpc_wait
  170. add b32 $r14 $r4 0x804
  171. call(nv_rd32)
  172. add b32 $r1 $r15
  173. // next!
  174. add b32 $r4 0x8000
  175. sub b32 $r3 1
  176. bra ne #init_gpc
  177. //
  178. mov $r15 0
  179. call(ctx_86c)
  180. mov $r15 0
  181. call(ctx_4170s)
  182. // save context size, and tell host we're ready
  183. nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(1), 0, $r1)
  184. clear b32 $r1
  185. bset $r1 31
  186. nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_SET(0), 0, $r1)
  187. // Main program loop, very simple, sleeps until woken up by the interrupt
  188. // handler, pulls a command from the queue and executes its handler
  189. //
  190. main:
  191. // sleep until we have something to do
  192. bset $flags $p0
  193. sleep $p0
  194. mov $r13 #cmd_queue
  195. call(queue_get)
  196. bra $p1 #main
  197. // context switch, requested by GPU?
  198. cmpu b32 $r14 0x4001
  199. bra ne #main_not_ctx_switch
  200. trace_set(T_AUTO)
  201. nv_iord($r1, NV_PGRAPH_FECS_CHAN_ADDR, 0)
  202. nv_iord($r2, NV_PGRAPH_FECS_CHAN_NEXT, 0)
  203. xbit $r3 $r1 31
  204. bra e #chsw_no_prev
  205. xbit $r3 $r2 31
  206. bra e #chsw_prev_no_next
  207. push $r2
  208. mov b32 $r2 $r1
  209. trace_set(T_SAVE)
  210. bclr $flags $p1
  211. bset $flags $p2
  212. call(ctx_xfer)
  213. trace_clr(T_SAVE);
  214. pop $r2
  215. trace_set(T_LOAD);
  216. bset $flags $p1
  217. call(ctx_xfer)
  218. trace_clr(T_LOAD);
  219. bra #chsw_done
  220. chsw_prev_no_next:
  221. push $r2
  222. mov b32 $r2 $r1
  223. bclr $flags $p1
  224. bclr $flags $p2
  225. call(ctx_xfer)
  226. pop $r2
  227. nv_iowr(NV_PGRAPH_FECS_CHAN_ADDR, 0, $r2)
  228. bra #chsw_done
  229. chsw_no_prev:
  230. xbit $r3 $r2 31
  231. bra e #chsw_done
  232. bset $flags $p1
  233. bclr $flags $p2
  234. call(ctx_xfer)
  235. // ack the context switch request
  236. chsw_done:
  237. mov $r2 NV_PGRAPH_FECS_CHSW_ACK
  238. nv_iowr(NV_PGRAPH_FECS_CHSW, 0, $r2)
  239. trace_clr(T_AUTO)
  240. bra #main
  241. // request to set current channel? (*not* a context switch)
  242. main_not_ctx_switch:
  243. cmpu b32 $r14 0x0001
  244. bra ne #main_not_ctx_chan
  245. mov b32 $r2 $r15
  246. call(ctx_chan)
  247. bra #main_done
  248. // request to store current channel context?
  249. main_not_ctx_chan:
  250. cmpu b32 $r14 0x0002
  251. bra ne #main_not_ctx_save
  252. trace_set(T_SAVE)
  253. bclr $flags $p1
  254. bclr $flags $p2
  255. call(ctx_xfer)
  256. trace_clr(T_SAVE)
  257. bra #main_done
  258. main_not_ctx_save:
  259. shl b32 $r15 $r14 16
  260. or $r15 E_BAD_COMMAND
  261. call(error)
  262. bra #main
  263. main_done:
  264. clear b32 $r2
  265. bset $r2 31
  266. nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_SET(0), 0, $r2)
  267. bra #main
  268. // interrupt handler
  269. ih:
  270. push $r8
  271. mov $r8 $flags
  272. push $r8
  273. push $r9
  274. push $r10
  275. push $r11
  276. push $r13
  277. push $r14
  278. push $r15
  279. clear b32 $r0
  280. // incoming fifo command?
  281. nv_iord($r10, NV_PGRAPH_FECS_INTR, 0)
  282. and $r11 $r10 NV_PGRAPH_FECS_INTR_FIFO
  283. bra e #ih_no_fifo
  284. // queue incoming fifo command for later processing
  285. mov $r13 #cmd_queue
  286. nv_iord($r14, NV_PGRAPH_FECS_FIFO_CMD, 0)
  287. nv_iord($r15, NV_PGRAPH_FECS_FIFO_DATA, 0)
  288. call(queue_put)
  289. add b32 $r11 0x400
  290. mov $r14 1
  291. nv_iowr(NV_PGRAPH_FECS_FIFO_ACK, 0, $r14)
  292. // context switch request?
  293. ih_no_fifo:
  294. and $r11 $r10 NV_PGRAPH_FECS_INTR_CHSW
  295. bra e #ih_no_ctxsw
  296. // enqueue a context switch for later processing
  297. mov $r13 #cmd_queue
  298. mov $r14 0x4001
  299. call(queue_put)
  300. // firmware method?
  301. ih_no_ctxsw:
  302. and $r11 $r10 NV_PGRAPH_FECS_INTR_FWMTHD
  303. bra e #ih_no_fwmthd
  304. // none we handle; report to host and ack
  305. nv_rd32($r15, NV_PGRAPH_TRAPPED_DATA_LO)
  306. nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(4), 0, $r15)
  307. nv_rd32($r15, NV_PGRAPH_TRAPPED_ADDR)
  308. nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(3), 0, $r15)
  309. extr $r14 $r15 16:18
  310. shl b32 $r14 $r14 2
  311. imm32($r15, NV_PGRAPH_FE_OBJECT_TABLE(0))
  312. add b32 $r14 $r15
  313. call(nv_rd32)
  314. nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(2), 0, $r15)
  315. mov $r15 E_BAD_FWMTHD
  316. call(error)
  317. mov $r11 0x100
  318. nv_wr32(0x400144, $r11)
  319. // anything we didn't handle, bring it to the host's attention
  320. ih_no_fwmthd:
  321. mov $r11 0x504 // FIFO | CHSW | FWMTHD
  322. not b32 $r11
  323. and $r11 $r10 $r11
  324. bra e #ih_no_other
  325. nv_iowr(NV_PGRAPH_FECS_INTR_UP_SET, 0, $r11)
  326. // ack, and wake up main()
  327. ih_no_other:
  328. nv_iowr(NV_PGRAPH_FECS_INTR_ACK, 0, $r10)
  329. pop $r15
  330. pop $r14
  331. pop $r13
  332. pop $r11
  333. pop $r10
  334. pop $r9
  335. pop $r8
  336. mov $flags $r8
  337. pop $r8
  338. bclr $flags $p0
  339. iret
  340. #if CHIPSET < GK100
  341. // Not real sure, but, MEM_CMD 7 will hang forever if this isn't done
  342. ctx_4160s:
  343. mov $r15 1
  344. nv_wr32(0x404160, $r15)
  345. ctx_4160s_wait:
  346. nv_rd32($r15, 0x404160)
  347. xbit $r15 $r15 4
  348. bra e #ctx_4160s_wait
  349. ret
  350. // Without clearing again at end of xfer, some things cause PGRAPH
  351. // to hang with STATUS=0x00000007 until it's cleared.. fbcon can
  352. // still function with it set however...
  353. ctx_4160c:
  354. clear b32 $r15
  355. nv_wr32(0x404160, $r15)
  356. ret
  357. #endif
  358. // Again, not real sure
  359. //
  360. // In: $r15 value to set 0x404170 to
  361. //
  362. ctx_4170s:
  363. or $r15 0x10
  364. nv_wr32(0x404170, $r15)
  365. ret
  366. // Waits for a ctx_4170s() call to complete
  367. //
  368. ctx_4170w:
  369. nv_rd32($r15, 0x404170)
  370. and $r15 0x10
  371. bra ne #ctx_4170w
  372. ret
  373. // Disables various things, waits a bit, and re-enables them..
  374. //
  375. // Not sure how exactly this helps, perhaps "ENABLE" is not such a
  376. // good description for the bits we turn off? Anyways, without this,
  377. // funny things happen.
  378. //
  379. ctx_redswitch:
  380. mov $r14 NV_PGRAPH_FECS_RED_SWITCH_ENABLE_GPC
  381. or $r14 NV_PGRAPH_FECS_RED_SWITCH_POWER_ROP
  382. or $r14 NV_PGRAPH_FECS_RED_SWITCH_POWER_GPC
  383. or $r14 NV_PGRAPH_FECS_RED_SWITCH_POWER_MAIN
  384. nv_iowr(NV_PGRAPH_FECS_RED_SWITCH, 0, $r14)
  385. mov $r15 8
  386. ctx_redswitch_delay:
  387. sub b32 $r15 1
  388. bra ne #ctx_redswitch_delay
  389. or $r14 NV_PGRAPH_FECS_RED_SWITCH_ENABLE_ROP
  390. or $r14 NV_PGRAPH_FECS_RED_SWITCH_ENABLE_MAIN
  391. nv_iowr(NV_PGRAPH_FECS_RED_SWITCH, 0, $r14)
  392. ret
  393. // Not a clue what this is for, except that unless the value is 0x10, the
  394. // strand context is saved (and presumably restored) incorrectly..
  395. //
  396. // In: $r15 value to set to (0x00/0x10 are used)
  397. //
  398. ctx_86c:
  399. nv_iowr(NV_PGRAPH_FECS_UNK86C, 0, $r15)
  400. nv_wr32(0x408a14, $r15)
  401. nv_wr32(NV_PGRAPH_GPCX_GPCCS_UNK86C, $r15)
  402. ret
  403. // In: $r15 NV_PGRAPH_FECS_MEM_CMD_*
  404. ctx_mem:
  405. nv_iowr(NV_PGRAPH_FECS_MEM_CMD, 0, $r15)
  406. ctx_mem_wait:
  407. nv_iord($r15, NV_PGRAPH_FECS_MEM_CMD, 0)
  408. or $r15 $r15
  409. bra ne #ctx_mem_wait
  410. ret
  411. // ctx_load - load's a channel's ctxctl data, and selects its vm
  412. //
  413. // In: $r2 channel address
  414. //
  415. ctx_load:
  416. trace_set(T_CHAN)
  417. // switch to channel, somewhat magic in parts..
  418. mov $r10 12 // DONE_UNK12
  419. call(wait_donez)
  420. clear b32 $r15
  421. nv_iowr(0x409a24, 0, $r15)
  422. nv_iowr(NV_PGRAPH_FECS_CHAN_NEXT, 0, $r2)
  423. nv_iowr(NV_PGRAPH_FECS_MEM_CHAN, 0, $r2)
  424. mov $r15 NV_PGRAPH_FECS_MEM_CMD_LOAD_CHAN
  425. call(ctx_mem)
  426. nv_iowr(NV_PGRAPH_FECS_CHAN_ADDR, 0, $r2)
  427. // load channel header, fetch PGRAPH context pointer
  428. mov $xtargets $r0
  429. bclr $r2 31
  430. shl b32 $r2 4
  431. add b32 $r2 2
  432. trace_set(T_LCHAN)
  433. nv_iowr(NV_PGRAPH_FECS_MEM_BASE, 0, $r2)
  434. imm32($r2, NV_PGRAPH_FECS_MEM_TARGET_UNK31)
  435. or $r2 NV_PGRAPH_FECS_MEM_TARGET_AS_VRAM
  436. nv_iowr(NV_PGRAPH_FECS_MEM_TARGET, 0, $r2)
  437. mov $r1 0x10 // chan + 0x0210
  438. mov $r2 #xfer_data
  439. sethi $r2 0x00020000 // 16 bytes
  440. xdld $r1 $r2
  441. xdwait
  442. trace_clr(T_LCHAN)
  443. // update current context
  444. ld b32 $r1 D[$r0 + #xfer_data + 4]
  445. shl b32 $r1 24
  446. ld b32 $r2 D[$r0 + #xfer_data + 0]
  447. shr b32 $r2 8
  448. or $r1 $r2
  449. st b32 D[$r0 + #ctx_current] $r1
  450. // set transfer base to start of context, and fetch context header
  451. trace_set(T_LCTXH)
  452. nv_iowr(NV_PGRAPH_FECS_MEM_BASE, 0, $r1)
  453. mov $r2 NV_PGRAPH_FECS_MEM_TARGET_AS_VM
  454. nv_iowr(NV_PGRAPH_FECS_MEM_TARGET, 0, $r2)
  455. mov $r1 #chan_data
  456. sethi $r1 0x00060000 // 256 bytes
  457. xdld $r0 $r1
  458. xdwait
  459. trace_clr(T_LCTXH)
  460. trace_clr(T_CHAN)
  461. ret
  462. // ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
  463. // the active channel for ctxctl, but not actually transfer
  464. // any context data. intended for use only during initial
  465. // context construction.
  466. //
  467. // In: $r2 channel address
  468. //
  469. ctx_chan:
  470. #if CHIPSET < GK100
  471. call(ctx_4160s)
  472. #endif
  473. call(ctx_load)
  474. mov $r10 12 // DONE_UNK12
  475. call(wait_donez)
  476. mov $r15 5 // MEM_CMD 5 ???
  477. call(ctx_mem)
  478. #if CHIPSET < GK100
  479. call(ctx_4160c)
  480. #endif
  481. ret
  482. // Execute per-context state overrides list
  483. //
  484. // Only executed on the first load of a channel. Might want to look into
  485. // removing this and having the host directly modify the channel's context
  486. // to change this state... The nouveau DRM already builds this list as
  487. // it's definitely needed for NVIDIA's, so we may as well use it for now
  488. //
  489. // Input: $r1 mmio list length
  490. //
  491. ctx_mmio_exec:
  492. // set transfer base to be the mmio list
  493. ld b32 $r3 D[$r0 + #chan_mmio_address]
  494. nv_iowr(NV_PGRAPH_FECS_MEM_BASE, 0, $r3)
  495. clear b32 $r3
  496. ctx_mmio_loop:
  497. // fetch next 256 bytes of mmio list if necessary
  498. and $r4 $r3 0xff
  499. bra ne #ctx_mmio_pull
  500. mov $r5 #xfer_data
  501. sethi $r5 0x00060000 // 256 bytes
  502. xdld $r3 $r5
  503. xdwait
  504. // execute a single list entry
  505. ctx_mmio_pull:
  506. ld b32 $r14 D[$r4 + #xfer_data + 0x00]
  507. ld b32 $r15 D[$r4 + #xfer_data + 0x04]
  508. call(nv_wr32)
  509. // next!
  510. add b32 $r3 8
  511. sub b32 $r1 1
  512. bra ne #ctx_mmio_loop
  513. // set transfer base back to the current context
  514. ctx_mmio_done:
  515. ld b32 $r3 D[$r0 + #ctx_current]
  516. nv_iowr(NV_PGRAPH_FECS_MEM_BASE, 0, $r3)
  517. // disable the mmio list now, we don't need/want to execute it again
  518. st b32 D[$r0 + #chan_mmio_count] $r0
  519. mov $r1 #chan_data
  520. sethi $r1 0x00060000 // 256 bytes
  521. xdst $r0 $r1
  522. xdwait
  523. ret
  524. // Transfer HUB context data between GPU and storage area
  525. //
  526. // In: $r2 channel address
  527. // $p1 clear on save, set on load
  528. // $p2 set if opposite direction done/will be done, so:
  529. // on save it means: "a load will follow this save"
  530. // on load it means: "a save preceeded this load"
  531. //
  532. ctx_xfer:
  533. // according to mwk, some kind of wait for idle
  534. mov $r14 4
  535. nv_iowr(0x409c08, 0, $r14)
  536. ctx_xfer_idle:
  537. nv_iord($r14, 0x409c00, 0)
  538. and $r14 0x2000
  539. bra ne #ctx_xfer_idle
  540. bra not $p1 #ctx_xfer_pre
  541. bra $p2 #ctx_xfer_pre_load
  542. ctx_xfer_pre:
  543. mov $r15 0x10
  544. call(ctx_86c)
  545. #if CHIPSET < GK100
  546. call(ctx_4160s)
  547. #endif
  548. bra not $p1 #ctx_xfer_exec
  549. ctx_xfer_pre_load:
  550. mov $r15 2
  551. call(ctx_4170s)
  552. call(ctx_4170w)
  553. call(ctx_redswitch)
  554. clear b32 $r15
  555. call(ctx_4170s)
  556. call(ctx_load)
  557. // fetch context pointer, and initiate xfer on all GPCs
  558. ctx_xfer_exec:
  559. ld b32 $r1 D[$r0 + #ctx_current]
  560. clear b32 $r2
  561. nv_iowr(NV_PGRAPH_FECS_BAR, 0, $r2)
  562. nv_wr32(0x41a500, $r1) // GPC_BCAST_WRCMD_DATA = ctx pointer
  563. xbit $r15 $flags $p1
  564. xbit $r2 $flags $p2
  565. shl b32 $r2 1
  566. or $r15 $r2
  567. nv_wr32(0x41a504, $r15) // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
  568. // strands
  569. call(strand_pre)
  570. clear b32 $r2
  571. nv_iowr(NV_PGRAPH_FECS_STRAND_SELECT, 0x3f, $r2)
  572. xbit $r2 $flags $p1 // SAVE/LOAD
  573. add b32 $r2 NV_PGRAPH_FECS_STRAND_CMD_SAVE
  574. nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r2)
  575. // mmio context
  576. xbit $r10 $flags $p1 // direction
  577. or $r10 6 // first, last
  578. mov $r11 0 // base = 0
  579. ld b32 $r12 D[$r0 + #hub_mmio_list_head]
  580. ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
  581. mov $r14 0 // not multi
  582. call(mmctx_xfer)
  583. // wait for GPCs to all complete
  584. mov $r10 8 // DONE_BAR
  585. call(wait_doneo)
  586. // wait for strand xfer to complete
  587. call(strand_wait)
  588. // post-op
  589. bra $p1 #ctx_xfer_post
  590. mov $r10 12 // DONE_UNK12
  591. call(wait_donez)
  592. mov $r15 5 // MEM_CMD 5 ???
  593. call(ctx_mem)
  594. bra $p2 #ctx_xfer_done
  595. ctx_xfer_post:
  596. mov $r15 2
  597. call(ctx_4170s)
  598. clear b32 $r15
  599. call(ctx_86c)
  600. call(strand_post)
  601. call(ctx_4170w)
  602. clear b32 $r15
  603. call(ctx_4170s)
  604. bra not $p1 #ctx_xfer_no_post_mmio
  605. ld b32 $r1 D[$r0 + #chan_mmio_count]
  606. or $r1 $r1
  607. bra e #ctx_xfer_no_post_mmio
  608. call(ctx_mmio_exec)
  609. ctx_xfer_no_post_mmio:
  610. #if CHIPSET < GK100
  611. call(ctx_4160c)
  612. #endif
  613. ctx_xfer_done:
  614. ret
  615. #endif