com.fuc 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864
  1. /* fuc microcode for copy engine on gt215- chipsets
  2. *
  3. * Copyright 2011 Red Hat Inc.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice shall be included in
  13. * all copies or substantial portions of the Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21. * OTHER DEALINGS IN THE SOFTWARE.
  22. *
  23. * Authors: Ben Skeggs
  24. */
  25. #ifdef GT215
  26. .section #gt215_ce_data
  27. #else
  28. .section #gf100_ce_data
  29. #endif
  30. ctx_object: .b32 0
  31. #ifdef GT215
  32. ctx_dma:
  33. ctx_dma_query: .b32 0
  34. ctx_dma_src: .b32 0
  35. ctx_dma_dst: .b32 0
  36. #endif
  37. .equ #ctx_dma_count 3
  38. ctx_query_address_high: .b32 0
  39. ctx_query_address_low: .b32 0
  40. ctx_query_counter: .b32 0
  41. ctx_src_address_high: .b32 0
  42. ctx_src_address_low: .b32 0
  43. ctx_src_pitch: .b32 0
  44. ctx_src_tile_mode: .b32 0
  45. ctx_src_xsize: .b32 0
  46. ctx_src_ysize: .b32 0
  47. ctx_src_zsize: .b32 0
  48. ctx_src_zoff: .b32 0
  49. ctx_src_xoff: .b32 0
  50. ctx_src_yoff: .b32 0
  51. ctx_src_cpp: .b32 0
  52. ctx_dst_address_high: .b32 0
  53. ctx_dst_address_low: .b32 0
  54. ctx_dst_pitch: .b32 0
  55. ctx_dst_tile_mode: .b32 0
  56. ctx_dst_xsize: .b32 0
  57. ctx_dst_ysize: .b32 0
  58. ctx_dst_zsize: .b32 0
  59. ctx_dst_zoff: .b32 0
  60. ctx_dst_xoff: .b32 0
  61. ctx_dst_yoff: .b32 0
  62. ctx_dst_cpp: .b32 0
  63. ctx_format: .b32 0
  64. ctx_swz_const0: .b32 0
  65. ctx_swz_const1: .b32 0
  66. ctx_xcnt: .b32 0
  67. ctx_ycnt: .b32 0
  68. .align 256
  69. dispatch_table:
  70. // mthd 0x0000, NAME
  71. .b16 0x000 1
  72. .b32 #ctx_object ~0xffffffff
  73. // mthd 0x0100, NOP
  74. .b16 0x040 1
  75. .b32 0x00010000 + #cmd_nop ~0xffffffff
  76. // mthd 0x0140, PM_TRIGGER
  77. .b16 0x050 1
  78. .b32 0x00010000 + #cmd_pm_trigger ~0xffffffff
  79. #ifdef GT215
  80. // mthd 0x0180-0x018c, DMA_
  81. .b16 0x060 #ctx_dma_count
  82. dispatch_dma:
  83. .b32 0x00010000 + #cmd_dma ~0xffffffff
  84. .b32 0x00010000 + #cmd_dma ~0xffffffff
  85. .b32 0x00010000 + #cmd_dma ~0xffffffff
  86. #endif
  87. // mthd 0x0200-0x0218, SRC_TILE
  88. .b16 0x80 7
  89. .b32 #ctx_src_tile_mode ~0x00000fff
  90. .b32 #ctx_src_xsize ~0x0007ffff
  91. .b32 #ctx_src_ysize ~0x00001fff
  92. .b32 #ctx_src_zsize ~0x000007ff
  93. .b32 #ctx_src_zoff ~0x00000fff
  94. .b32 #ctx_src_xoff ~0x0007ffff
  95. .b32 #ctx_src_yoff ~0x00001fff
  96. // mthd 0x0220-0x0238, DST_TILE
  97. .b16 0x88 7
  98. .b32 #ctx_dst_tile_mode ~0x00000fff
  99. .b32 #ctx_dst_xsize ~0x0007ffff
  100. .b32 #ctx_dst_ysize ~0x00001fff
  101. .b32 #ctx_dst_zsize ~0x000007ff
  102. .b32 #ctx_dst_zoff ~0x00000fff
  103. .b32 #ctx_dst_xoff ~0x0007ffff
  104. .b32 #ctx_dst_yoff ~0x00001fff
  105. // mthd 0x0300-0x0304, EXEC, WRCACHE_FLUSH
  106. .b16 0xc0 2
  107. .b32 0x00010000 + #cmd_exec ~0xffffffff
  108. .b32 0x00010000 + #cmd_wrcache_flush ~0xffffffff
  109. // mthd 0x030c-0x0340, various stuff
  110. .b16 0xc3 14
  111. .b32 #ctx_src_address_high ~0x000000ff
  112. .b32 #ctx_src_address_low ~0xffffffff
  113. .b32 #ctx_dst_address_high ~0x000000ff
  114. .b32 #ctx_dst_address_low ~0xffffffff
  115. .b32 #ctx_src_pitch ~0x0007ffff
  116. .b32 #ctx_dst_pitch ~0x0007ffff
  117. .b32 #ctx_xcnt ~0x0000ffff
  118. .b32 #ctx_ycnt ~0x00001fff
  119. .b32 #ctx_format ~0x0333ffff
  120. .b32 #ctx_swz_const0 ~0xffffffff
  121. .b32 #ctx_swz_const1 ~0xffffffff
  122. .b32 #ctx_query_address_high ~0x000000ff
  123. .b32 #ctx_query_address_low ~0xffffffff
  124. .b32 #ctx_query_counter ~0xffffffff
  125. .b16 0x800 0
  126. #ifdef GT215
  127. .section #gt215_ce_code
  128. #else
  129. .section #gf100_ce_code
  130. #endif
  131. main:
  132. clear b32 $r0
  133. mov $sp $r0
  134. // setup i0 handler and route fifo and ctxswitch to it
  135. mov $r1 #ih
  136. mov $iv0 $r1
  137. mov $r1 0x400
  138. movw $r2 0xfff3
  139. sethi $r2 0
  140. iowr I[$r1 + 0x300] $r2
  141. // enable interrupts
  142. or $r2 0xc
  143. iowr I[$r1] $r2
  144. bset $flags ie0
  145. // enable fifo access and context switching
  146. mov $r1 0x1200
  147. mov $r2 3
  148. iowr I[$r1] $r2
  149. // sleep forever, waking for interrupts
  150. bset $flags $p0
  151. spin:
  152. sleep $p0
  153. bra #spin
  154. // i0 handler
  155. ih:
  156. iord $r1 I[$r0 + 0x200]
  157. and $r2 $r1 0x00000008
  158. bra e #ih_no_chsw
  159. call #chsw
  160. ih_no_chsw:
  161. and $r2 $r1 0x00000004
  162. bra e #ih_no_cmd
  163. call #dispatch
  164. ih_no_cmd:
  165. and $r1 $r1 0x0000000c
  166. iowr I[$r0 + 0x100] $r1
  167. iret
  168. // $p1 direction (0 = unload, 1 = load)
  169. // $r3 channel
  170. swctx:
  171. mov $r4 0x7700
  172. mov $xtargets $r4
  173. #ifdef GT215
  174. // target 7 hardcoded to ctx dma object
  175. mov $xdbase $r0
  176. #else
  177. // read SCRATCH3 to decide if we are PCOPY0 or PCOPY1
  178. mov $r4 0x2100
  179. iord $r4 I[$r4 + 0]
  180. and $r4 1
  181. shl b32 $r4 4
  182. add b32 $r4 0x30
  183. // channel is in vram
  184. mov $r15 0x61c
  185. shl b32 $r15 6
  186. mov $r5 0x114
  187. iowrs I[$r15] $r5
  188. // read 16-byte PCOPYn info, containing context pointer, from channel
  189. shl b32 $r5 $r3 4
  190. add b32 $r5 2
  191. mov $xdbase $r5
  192. mov $r5 $sp
  193. // get a chunk of stack space, aligned to 256 byte boundary
  194. sub b32 $r5 0x100
  195. mov $r6 0xff
  196. not b32 $r6
  197. and $r5 $r6
  198. sethi $r5 0x00020000
  199. xdld $r4 $r5
  200. xdwait
  201. sethi $r5 0
  202. // set context pointer, from within channel VM
  203. mov $r14 0
  204. iowrs I[$r15] $r14
  205. ld b32 $r4 D[$r5 + 0]
  206. shr b32 $r4 8
  207. ld b32 $r6 D[$r5 + 4]
  208. shl b32 $r6 24
  209. or $r4 $r6
  210. mov $xdbase $r4
  211. #endif
  212. // 256-byte context, at start of data segment
  213. mov b32 $r4 $r0
  214. sethi $r4 0x60000
  215. // swap!
  216. bra $p1 #swctx_load
  217. xdst $r0 $r4
  218. bra #swctx_done
  219. swctx_load:
  220. xdld $r0 $r4
  221. swctx_done:
  222. xdwait
  223. ret
  224. chsw:
  225. // read current channel
  226. mov $r2 0x1400
  227. iord $r3 I[$r2]
  228. // if it's active, unload it and return
  229. xbit $r15 $r3 0x1e
  230. bra e #chsw_no_unload
  231. bclr $flags $p1
  232. call #swctx
  233. bclr $r3 0x1e
  234. iowr I[$r2] $r3
  235. mov $r4 1
  236. iowr I[$r2 + 0x200] $r4
  237. ret
  238. // read next channel
  239. chsw_no_unload:
  240. iord $r3 I[$r2 + 0x100]
  241. // is there a channel waiting to be loaded?
  242. xbit $r13 $r3 0x1e
  243. bra e #chsw_finish_load
  244. bset $flags $p1
  245. call #swctx
  246. #ifdef GT215
  247. // load dma objects back into TARGET regs
  248. mov $r5 #ctx_dma
  249. mov $r6 #ctx_dma_count
  250. chsw_load_ctx_dma:
  251. ld b32 $r7 D[$r5 + $r6 * 4]
  252. add b32 $r8 $r6 0x180
  253. shl b32 $r8 8
  254. iowr I[$r8] $r7
  255. sub b32 $r6 1
  256. bra nc #chsw_load_ctx_dma
  257. #endif
  258. chsw_finish_load:
  259. mov $r3 2
  260. iowr I[$r2 + 0x200] $r3
  261. ret
  262. dispatch:
  263. // read incoming fifo command
  264. mov $r3 0x1900
  265. iord $r2 I[$r3 + 0x100]
  266. iord $r3 I[$r3 + 0x000]
  267. and $r4 $r2 0x7ff
  268. // $r2 will be used to store exception data
  269. shl b32 $r2 0x10
  270. // lookup method in the dispatch table, ILLEGAL_MTHD if not found
  271. mov $r5 #dispatch_table
  272. clear b32 $r6
  273. clear b32 $r7
  274. dispatch_loop:
  275. ld b16 $r6 D[$r5 + 0]
  276. ld b16 $r7 D[$r5 + 2]
  277. add b32 $r5 4
  278. cmpu b32 $r4 $r6
  279. bra c #dispatch_illegal_mthd
  280. add b32 $r7 $r6
  281. cmpu b32 $r4 $r7
  282. bra c #dispatch_valid_mthd
  283. sub b32 $r7 $r6
  284. shl b32 $r7 3
  285. add b32 $r5 $r7
  286. bra #dispatch_loop
  287. // ensure no bits set in reserved fields, INVALID_BITFIELD
  288. dispatch_valid_mthd:
  289. sub b32 $r4 $r6
  290. shl b32 $r4 3
  291. add b32 $r4 $r5
  292. ld b32 $r5 D[$r4 + 4]
  293. and $r5 $r3
  294. cmpu b32 $r5 0
  295. bra ne #dispatch_invalid_bitfield
  296. // depending on dispatch flags: execute method, or save data as state
  297. ld b16 $r5 D[$r4 + 0]
  298. ld b16 $r6 D[$r4 + 2]
  299. cmpu b32 $r6 0
  300. bra ne #dispatch_cmd
  301. st b32 D[$r5] $r3
  302. bra #dispatch_done
  303. dispatch_cmd:
  304. bclr $flags $p1
  305. call $r5
  306. bra $p1 #dispatch_error
  307. bra #dispatch_done
  308. dispatch_invalid_bitfield:
  309. or $r2 2
  310. dispatch_illegal_mthd:
  311. or $r2 1
  312. // store exception data in SCRATCH0/SCRATCH1, signal hostirq
  313. dispatch_error:
  314. mov $r4 0x1000
  315. iowr I[$r4 + 0x000] $r2
  316. iowr I[$r4 + 0x100] $r3
  317. mov $r2 0x40
  318. iowr I[$r0] $r2
  319. hostirq_wait:
  320. iord $r2 I[$r0 + 0x200]
  321. and $r2 0x40
  322. cmpu b32 $r2 0
  323. bra ne #hostirq_wait
  324. dispatch_done:
  325. mov $r2 0x1d00
  326. mov $r3 1
  327. iowr I[$r2] $r3
  328. ret
  329. // No-operation
  330. //
  331. // Inputs:
  332. // $r1: irqh state
  333. // $r2: hostirq state
  334. // $r3: data
  335. // $r4: dispatch table entry
  336. // Outputs:
  337. // $r1: irqh state
  338. // $p1: set on error
  339. // $r2: hostirq state
  340. // $r3: data
  341. cmd_nop:
  342. ret
  343. // PM_TRIGGER
  344. //
  345. // Inputs:
  346. // $r1: irqh state
  347. // $r2: hostirq state
  348. // $r3: data
  349. // $r4: dispatch table entry
  350. // Outputs:
  351. // $r1: irqh state
  352. // $p1: set on error
  353. // $r2: hostirq state
  354. // $r3: data
  355. cmd_pm_trigger:
  356. mov $r2 0x2200
  357. clear b32 $r3
  358. sethi $r3 0x20000
  359. iowr I[$r2] $r3
  360. ret
  361. #ifdef GT215
  362. // SET_DMA_* method handler
  363. //
  364. // Inputs:
  365. // $r1: irqh state
  366. // $r2: hostirq state
  367. // $r3: data
  368. // $r4: dispatch table entry
  369. // Outputs:
  370. // $r1: irqh state
  371. // $p1: set on error
  372. // $r2: hostirq state
  373. // $r3: data
  374. cmd_dma:
  375. sub b32 $r4 #dispatch_dma
  376. shr b32 $r4 1
  377. bset $r3 0x1e
  378. st b32 D[$r4 + #ctx_dma] $r3
  379. add b32 $r4 0x600
  380. shl b32 $r4 6
  381. iowr I[$r4] $r3
  382. ret
  383. #endif
  384. // Calculates the hw swizzle mask and adjusts the surface's xcnt to match
  385. //
  386. cmd_exec_set_format:
  387. // zero out a chunk of the stack to store the swizzle into
  388. add $sp -0x10
  389. st b32 D[$sp + 0x00] $r0
  390. st b32 D[$sp + 0x04] $r0
  391. st b32 D[$sp + 0x08] $r0
  392. st b32 D[$sp + 0x0c] $r0
  393. // extract cpp, src_ncomp and dst_ncomp from FORMAT
  394. ld b32 $r4 D[$r0 + #ctx_format]
  395. extr $r5 $r4 16:17
  396. add b32 $r5 1
  397. extr $r6 $r4 20:21
  398. add b32 $r6 1
  399. extr $r7 $r4 24:25
  400. add b32 $r7 1
  401. // convert FORMAT swizzle mask to hw swizzle mask
  402. bclr $flags $p2
  403. clear b32 $r8
  404. clear b32 $r9
  405. ncomp_loop:
  406. and $r10 $r4 0xf
  407. shr b32 $r4 4
  408. clear b32 $r11
  409. bpc_loop:
  410. cmpu b8 $r10 4
  411. bra nc #cmp_c0
  412. mulu $r12 $r10 $r5
  413. add b32 $r12 $r11
  414. bset $flags $p2
  415. bra #bpc_next
  416. cmp_c0:
  417. bra ne #cmp_c1
  418. mov $r12 0x10
  419. add b32 $r12 $r11
  420. bra #bpc_next
  421. cmp_c1:
  422. cmpu b8 $r10 6
  423. bra nc #cmp_zero
  424. mov $r12 0x14
  425. add b32 $r12 $r11
  426. bra #bpc_next
  427. cmp_zero:
  428. mov $r12 0x80
  429. bpc_next:
  430. st b8 D[$sp + $r8] $r12
  431. add b32 $r8 1
  432. add b32 $r11 1
  433. cmpu b32 $r11 $r5
  434. bra c #bpc_loop
  435. add b32 $r9 1
  436. cmpu b32 $r9 $r7
  437. bra c #ncomp_loop
  438. // SRC_XCNT = (xcnt * src_cpp), or 0 if no src ref in swz (hw will hang)
  439. mulu $r6 $r5
  440. st b32 D[$r0 + #ctx_src_cpp] $r6
  441. ld b32 $r8 D[$r0 + #ctx_xcnt]
  442. mulu $r6 $r8
  443. bra $p2 #dst_xcnt
  444. clear b32 $r6
  445. dst_xcnt:
  446. mulu $r7 $r5
  447. st b32 D[$r0 + #ctx_dst_cpp] $r7
  448. mulu $r7 $r8
  449. mov $r5 0x810
  450. shl b32 $r5 6
  451. iowr I[$r5 + 0x000] $r6
  452. iowr I[$r5 + 0x100] $r7
  453. add b32 $r5 0x800
  454. ld b32 $r6 D[$r0 + #ctx_dst_cpp]
  455. sub b32 $r6 1
  456. shl b32 $r6 8
  457. ld b32 $r7 D[$r0 + #ctx_src_cpp]
  458. sub b32 $r7 1
  459. or $r6 $r7
  460. iowr I[$r5 + 0x000] $r6
  461. add b32 $r5 0x100
  462. ld b32 $r6 D[$sp + 0x00]
  463. iowr I[$r5 + 0x000] $r6
  464. ld b32 $r6 D[$sp + 0x04]
  465. iowr I[$r5 + 0x100] $r6
  466. ld b32 $r6 D[$sp + 0x08]
  467. iowr I[$r5 + 0x200] $r6
  468. ld b32 $r6 D[$sp + 0x0c]
  469. iowr I[$r5 + 0x300] $r6
  470. add b32 $r5 0x400
  471. ld b32 $r6 D[$r0 + #ctx_swz_const0]
  472. iowr I[$r5 + 0x000] $r6
  473. ld b32 $r6 D[$r0 + #ctx_swz_const1]
  474. iowr I[$r5 + 0x100] $r6
  475. add $sp 0x10
  476. ret
  477. // Setup to handle a tiled surface
  478. //
  479. // Calculates a number of parameters the hardware requires in order
  480. // to correctly handle tiling.
  481. //
  482. // Offset calculation is performed as follows (Tp/Th/Td from TILE_MODE):
  483. // nTx = round_up(w * cpp, 1 << Tp) >> Tp
  484. // nTy = round_up(h, 1 << Th) >> Th
  485. // Txo = (x * cpp) & ((1 << Tp) - 1)
  486. // Tx = (x * cpp) >> Tp
  487. // Tyo = y & ((1 << Th) - 1)
  488. // Ty = y >> Th
  489. // Tzo = z & ((1 << Td) - 1)
  490. // Tz = z >> Td
  491. //
  492. // off = (Tzo << Tp << Th) + (Tyo << Tp) + Txo
  493. // off += ((Tz * nTy * nTx)) + (Ty * nTx) + Tx) << Td << Th << Tp;
  494. //
  495. // Inputs:
  496. // $r4: hw command (0x104800)
  497. // $r5: ctx offset adjustment for src/dst selection
  498. // $p2: set if dst surface
  499. //
  500. cmd_exec_set_surface_tiled:
  501. // translate TILE_MODE into Tp, Th, Td shift values
  502. ld b32 $r7 D[$r5 + #ctx_src_tile_mode]
  503. extr $r9 $r7 8:11
  504. extr $r8 $r7 4:7
  505. #ifdef GT215
  506. add b32 $r8 2
  507. #else
  508. add b32 $r8 3
  509. #endif
  510. extr $r7 $r7 0:3
  511. cmp b32 $r7 0xe
  512. bra ne #xtile64
  513. mov $r7 4
  514. bra #xtileok
  515. xtile64:
  516. xbit $r7 $flags $p2
  517. add b32 $r7 17
  518. bset $r4 $r7
  519. mov $r7 6
  520. xtileok:
  521. // Op = (x * cpp) & ((1 << Tp) - 1)
  522. // Tx = (x * cpp) >> Tp
  523. ld b32 $r10 D[$r5 + #ctx_src_xoff]
  524. ld b32 $r11 D[$r5 + #ctx_src_cpp]
  525. mulu $r10 $r11
  526. mov $r11 1
  527. shl b32 $r11 $r7
  528. sub b32 $r11 1
  529. and $r12 $r10 $r11
  530. shr b32 $r10 $r7
  531. // Tyo = y & ((1 << Th) - 1)
  532. // Ty = y >> Th
  533. ld b32 $r13 D[$r5 + #ctx_src_yoff]
  534. mov $r14 1
  535. shl b32 $r14 $r8
  536. sub b32 $r14 1
  537. and $r11 $r13 $r14
  538. shr b32 $r13 $r8
  539. // YTILE = ((1 << Th) << 12) | ((1 << Th) - Tyo)
  540. add b32 $r14 1
  541. shl b32 $r15 $r14 12
  542. sub b32 $r14 $r11
  543. or $r15 $r14
  544. xbit $r6 $flags $p2
  545. add b32 $r6 0x208
  546. shl b32 $r6 8
  547. iowr I[$r6 + 0x000] $r15
  548. // Op += Tyo << Tp
  549. shl b32 $r11 $r7
  550. add b32 $r12 $r11
  551. // nTx = ((w * cpp) + ((1 << Tp) - 1) >> Tp)
  552. ld b32 $r15 D[$r5 + #ctx_src_xsize]
  553. ld b32 $r11 D[$r5 + #ctx_src_cpp]
  554. mulu $r15 $r11
  555. mov $r11 1
  556. shl b32 $r11 $r7
  557. sub b32 $r11 1
  558. add b32 $r15 $r11
  559. shr b32 $r15 $r7
  560. push $r15
  561. // nTy = (h + ((1 << Th) - 1)) >> Th
  562. ld b32 $r15 D[$r5 + #ctx_src_ysize]
  563. mov $r11 1
  564. shl b32 $r11 $r8
  565. sub b32 $r11 1
  566. add b32 $r15 $r11
  567. shr b32 $r15 $r8
  568. push $r15
  569. // Tys = Tp + Th
  570. // CFG_YZ_TILE_SIZE = ((1 << Th) >> 2) << Td
  571. add b32 $r7 $r8
  572. sub b32 $r8 2
  573. mov $r11 1
  574. shl b32 $r11 $r8
  575. shl b32 $r11 $r9
  576. // Tzo = z & ((1 << Td) - 1)
  577. // Tz = z >> Td
  578. // Op += Tzo << Tys
  579. // Ts = Tys + Td
  580. ld b32 $r8 D[$r5 + #ctx_src_zoff]
  581. mov $r14 1
  582. shl b32 $r14 $r9
  583. sub b32 $r14 1
  584. and $r15 $r8 $r14
  585. shl b32 $r15 $r7
  586. add b32 $r12 $r15
  587. add b32 $r7 $r9
  588. shr b32 $r8 $r9
  589. // Ot = ((Tz * nTy * nTx) + (Ty * nTx) + Tx) << Ts
  590. pop $r15
  591. pop $r9
  592. mulu $r13 $r9
  593. add b32 $r10 $r13
  594. mulu $r8 $r9
  595. mulu $r8 $r15
  596. add b32 $r10 $r8
  597. shl b32 $r10 $r7
  598. // PITCH = (nTx - 1) << Ts
  599. sub b32 $r9 1
  600. shl b32 $r9 $r7
  601. iowr I[$r6 + 0x200] $r9
  602. // SRC_ADDRESS_LOW = (Ot + Op) & 0xffffffff
  603. // CFG_ADDRESS_HIGH |= ((Ot + Op) >> 32) << 16
  604. ld b32 $r7 D[$r5 + #ctx_src_address_low]
  605. ld b32 $r8 D[$r5 + #ctx_src_address_high]
  606. add b32 $r10 $r12
  607. add b32 $r7 $r10
  608. adc b32 $r8 0
  609. shl b32 $r8 16
  610. or $r8 $r11
  611. sub b32 $r6 0x600
  612. iowr I[$r6 + 0x000] $r7
  613. add b32 $r6 0x400
  614. iowr I[$r6 + 0x000] $r8
  615. ret
  616. // Setup to handle a linear surface
  617. //
  618. // Nothing to see here.. Sets ADDRESS and PITCH, pretty non-exciting
  619. //
  620. cmd_exec_set_surface_linear:
  621. xbit $r6 $flags $p2
  622. add b32 $r6 0x202
  623. shl b32 $r6 8
  624. ld b32 $r7 D[$r5 + #ctx_src_address_low]
  625. iowr I[$r6 + 0x000] $r7
  626. add b32 $r6 0x400
  627. ld b32 $r7 D[$r5 + #ctx_src_address_high]
  628. shl b32 $r7 16
  629. iowr I[$r6 + 0x000] $r7
  630. add b32 $r6 0x400
  631. ld b32 $r7 D[$r5 + #ctx_src_pitch]
  632. iowr I[$r6 + 0x000] $r7
  633. ret
  634. // wait for regs to be available for use
  635. cmd_exec_wait:
  636. push $r0
  637. push $r1
  638. mov $r0 0x800
  639. shl b32 $r0 6
  640. loop:
  641. iord $r1 I[$r0]
  642. and $r1 1
  643. bra ne #loop
  644. pop $r1
  645. pop $r0
  646. ret
  647. cmd_exec_query:
  648. // if QUERY_SHORT not set, write out { -, 0, TIME_LO, TIME_HI }
  649. xbit $r4 $r3 13
  650. bra ne #query_counter
  651. call #cmd_exec_wait
  652. mov $r4 0x80c
  653. shl b32 $r4 6
  654. ld b32 $r5 D[$r0 + #ctx_query_address_low]
  655. add b32 $r5 4
  656. iowr I[$r4 + 0x000] $r5
  657. iowr I[$r4 + 0x100] $r0
  658. mov $r5 0xc
  659. iowr I[$r4 + 0x200] $r5
  660. add b32 $r4 0x400
  661. ld b32 $r5 D[$r0 + #ctx_query_address_high]
  662. shl b32 $r5 16
  663. iowr I[$r4 + 0x000] $r5
  664. add b32 $r4 0x500
  665. mov $r5 0x00000b00
  666. sethi $r5 0x00010000
  667. iowr I[$r4 + 0x000] $r5
  668. mov $r5 0x00004040
  669. shl b32 $r5 1
  670. sethi $r5 0x80800000
  671. iowr I[$r4 + 0x100] $r5
  672. mov $r5 0x00001110
  673. sethi $r5 0x13120000
  674. iowr I[$r4 + 0x200] $r5
  675. mov $r5 0x00001514
  676. sethi $r5 0x17160000
  677. iowr I[$r4 + 0x300] $r5
  678. mov $r5 0x00002601
  679. sethi $r5 0x00010000
  680. mov $r4 0x800
  681. shl b32 $r4 6
  682. iowr I[$r4 + 0x000] $r5
  683. // write COUNTER
  684. query_counter:
  685. call #cmd_exec_wait
  686. mov $r4 0x80c
  687. shl b32 $r4 6
  688. ld b32 $r5 D[$r0 + #ctx_query_address_low]
  689. iowr I[$r4 + 0x000] $r5
  690. iowr I[$r4 + 0x100] $r0
  691. mov $r5 0x4
  692. iowr I[$r4 + 0x200] $r5
  693. add b32 $r4 0x400
  694. ld b32 $r5 D[$r0 + #ctx_query_address_high]
  695. shl b32 $r5 16
  696. iowr I[$r4 + 0x000] $r5
  697. add b32 $r4 0x500
  698. mov $r5 0x00000300
  699. iowr I[$r4 + 0x000] $r5
  700. mov $r5 0x00001110
  701. sethi $r5 0x13120000
  702. iowr I[$r4 + 0x100] $r5
  703. ld b32 $r5 D[$r0 + #ctx_query_counter]
  704. add b32 $r4 0x500
  705. iowr I[$r4 + 0x000] $r5
  706. mov $r5 0x00002601
  707. sethi $r5 0x00010000
  708. mov $r4 0x800
  709. shl b32 $r4 6
  710. iowr I[$r4 + 0x000] $r5
  711. ret
  712. // Execute a copy operation
  713. //
  714. // Inputs:
  715. // $r1: irqh state
  716. // $r2: hostirq state
  717. // $r3: data
  718. // 000002000 QUERY_SHORT
  719. // 000001000 QUERY
  720. // 000000100 DST_LINEAR
  721. // 000000010 SRC_LINEAR
  722. // 000000001 FORMAT
  723. // $r4: dispatch table entry
  724. // Outputs:
  725. // $r1: irqh state
  726. // $p1: set on error
  727. // $r2: hostirq state
  728. // $r3: data
  729. cmd_exec:
  730. call #cmd_exec_wait
  731. // if format requested, call function to calculate it, otherwise
  732. // fill in cpp/xcnt for both surfaces as if (cpp == 1)
  733. xbit $r15 $r3 0
  734. bra e #cmd_exec_no_format
  735. call #cmd_exec_set_format
  736. mov $r4 0x200
  737. bra #cmd_exec_init_src_surface
  738. cmd_exec_no_format:
  739. mov $r6 0x810
  740. shl b32 $r6 6
  741. mov $r7 1
  742. st b32 D[$r0 + #ctx_src_cpp] $r7
  743. st b32 D[$r0 + #ctx_dst_cpp] $r7
  744. ld b32 $r7 D[$r0 + #ctx_xcnt]
  745. iowr I[$r6 + 0x000] $r7
  746. iowr I[$r6 + 0x100] $r7
  747. clear b32 $r4
  748. cmd_exec_init_src_surface:
  749. bclr $flags $p2
  750. clear b32 $r5
  751. xbit $r15 $r3 4
  752. bra e #src_tiled
  753. call #cmd_exec_set_surface_linear
  754. bra #cmd_exec_init_dst_surface
  755. src_tiled:
  756. call #cmd_exec_set_surface_tiled
  757. bset $r4 7
  758. cmd_exec_init_dst_surface:
  759. bset $flags $p2
  760. mov $r5 #ctx_dst_address_high - #ctx_src_address_high
  761. xbit $r15 $r3 8
  762. bra e #dst_tiled
  763. call #cmd_exec_set_surface_linear
  764. bra #cmd_exec_kick
  765. dst_tiled:
  766. call #cmd_exec_set_surface_tiled
  767. bset $r4 8
  768. cmd_exec_kick:
  769. mov $r5 0x800
  770. shl b32 $r5 6
  771. ld b32 $r6 D[$r0 + #ctx_ycnt]
  772. iowr I[$r5 + 0x100] $r6
  773. mov $r6 0x0041
  774. // SRC_TARGET = 1, DST_TARGET = 2
  775. sethi $r6 0x44000000
  776. or $r4 $r6
  777. iowr I[$r5] $r4
  778. // if requested, queue up a QUERY write after the copy has completed
  779. xbit $r15 $r3 12
  780. bra e #cmd_exec_done
  781. call #cmd_exec_query
  782. cmd_exec_done:
  783. ret
  784. // Flush write cache
  785. //
  786. // Inputs:
  787. // $r1: irqh state
  788. // $r2: hostirq state
  789. // $r3: data
  790. // $r4: dispatch table entry
  791. // Outputs:
  792. // $r1: irqh state
  793. // $p1: set on error
  794. // $r2: hostirq state
  795. // $r3: data
  796. cmd_wrcache_flush:
  797. mov $r2 0x2200
  798. clear b32 $r3
  799. sethi $r3 0x10000
  800. iowr I[$r2] $r3
  801. ret
  802. .align 0x100