ctxgp100.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. /*
  2. * Copyright 2016 Red Hat Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. * Authors: Ben Skeggs <bskeggs@redhat.com>
  23. */
  24. #include "ctxgf100.h"
  25. #include <subdev/fb.h>
  26. /*******************************************************************************
  27. * PGRAPH context implementation
  28. ******************************************************************************/
  29. static void
  30. gp100_grctx_generate_pagepool(struct gf100_grctx *info)
  31. {
  32. const struct gf100_grctx_func *grctx = info->gr->func->grctx;
  33. const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS;
  34. const int s = 8;
  35. const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access);
  36. mmio_refn(info, 0x40800c, 0x00000000, s, b);
  37. mmio_wr32(info, 0x408010, 0x80000000);
  38. mmio_refn(info, 0x419004, 0x00000000, s, b);
  39. mmio_wr32(info, 0x419008, 0x00000000);
  40. }
  41. static void
  42. gp100_grctx_generate_attrib(struct gf100_grctx *info)
  43. {
  44. struct gf100_gr *gr = info->gr;
  45. const struct gf100_grctx_func *grctx = gr->func->grctx;
  46. const u32 alpha = grctx->alpha_nr;
  47. const u32 attrib = grctx->attrib_nr;
  48. const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
  49. const u32 size = roundup(gr->tpc_total * pertpc, 0x80);
  50. const u32 access = NV_MEM_ACCESS_RW;
  51. const int s = 12;
  52. const int b = mmio_vram(info, size, (1 << s), access);
  53. const int max_batches = 0xffff;
  54. u32 ao = 0;
  55. u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total;
  56. int gpc, ppc, n = 0;
  57. mmio_refn(info, 0x418810, 0x80000000, s, b);
  58. mmio_refn(info, 0x419848, 0x10000000, s, b);
  59. mmio_refn(info, 0x419c2c, 0x10000000, s, b);
  60. mmio_refn(info, 0x419b00, 0x00000000, s, b);
  61. mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7);
  62. mmio_wr32(info, 0x405830, attrib);
  63. mmio_wr32(info, 0x40585c, alpha);
  64. mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
  65. for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
  66. for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
  67. const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc];
  68. const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
  69. const u32 u = 0x418ea0 + (n * 0x04);
  70. const u32 o = PPC_UNIT(gpc, ppc, 0);
  71. if (!(gr->ppc_mask[gpc] & (1 << ppc)))
  72. continue;
  73. mmio_wr32(info, o + 0xc0, bs);
  74. mmio_wr32(info, o + 0xf4, bo);
  75. mmio_wr32(info, o + 0xf0, bs);
  76. bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
  77. mmio_wr32(info, o + 0xe4, as);
  78. mmio_wr32(info, o + 0xf8, ao);
  79. ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
  80. mmio_wr32(info, u, bs);
  81. }
  82. }
  83. mmio_wr32(info, 0x418eec, 0x00000000);
  84. mmio_wr32(info, 0x41befc, 0x00000000);
  85. }
  86. static void
  87. gp100_grctx_generate_405b60(struct gf100_gr *gr)
  88. {
  89. struct nvkm_device *device = gr->base.engine.subdev.device;
  90. const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
  91. u32 dist[TPC_MAX / 4] = {};
  92. u32 gpcs[GPC_MAX * 2] = {};
  93. u8 tpcnr[GPC_MAX];
  94. int tpc, gpc, i;
  95. memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
  96. /* won't result in the same distribution as the binary driver where
  97. * some of the gpcs have more tpcs than others, but this shall do
  98. * for the moment. the code for earlier gpus has this issue too.
  99. */
  100. for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
  101. do {
  102. gpc = (gpc + 1) % gr->gpc_nr;
  103. } while(!tpcnr[gpc]);
  104. tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
  105. dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
  106. gpcs[gpc + (gr->gpc_nr * (tpc / 4))] |= i << (tpc * 8);
  107. }
  108. for (i = 0; i < dist_nr; i++)
  109. nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]);
  110. for (i = 0; i < gr->gpc_nr * 2; i++)
  111. nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
  112. }
  113. static void
  114. gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
  115. {
  116. struct nvkm_device *device = gr->base.engine.subdev.device;
  117. const struct gf100_grctx_func *grctx = gr->func->grctx;
  118. u32 idle_timeout, tmp;
  119. int i;
  120. gf100_gr_mmio(gr, gr->fuc_sw_ctx);
  121. idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
  122. grctx->pagepool(info);
  123. grctx->bundle(info);
  124. grctx->attrib(info);
  125. grctx->unkn(gr);
  126. gm200_grctx_generate_tpcid(gr);
  127. gf100_grctx_generate_r406028(gr);
  128. gk104_grctx_generate_r418bb8(gr);
  129. for (i = 0; i < 8; i++)
  130. nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
  131. nvkm_wr32(device, 0x406500, 0x00000000);
  132. nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
  133. for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
  134. tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 5);
  135. nvkm_wr32(device, 0x4041c4, tmp);
  136. gp100_grctx_generate_405b60(gr);
  137. gf100_gr_icmd(gr, gr->fuc_bundle);
  138. nvkm_wr32(device, 0x404154, idle_timeout);
  139. gf100_gr_mthd(gr, gr->fuc_method);
  140. }
  141. const struct gf100_grctx_func
  142. gp100_grctx = {
  143. .main = gp100_grctx_generate_main,
  144. .unkn = gk104_grctx_generate_unkn,
  145. .bundle = gm107_grctx_generate_bundle,
  146. .bundle_size = 0x3000,
  147. .bundle_min_gpm_fifo_depth = 0x180,
  148. .bundle_token_limit = 0x1080,
  149. .pagepool = gp100_grctx_generate_pagepool,
  150. .pagepool_size = 0x20000,
  151. .attrib = gp100_grctx_generate_attrib,
  152. .attrib_nr_max = 0x660,
  153. .attrib_nr = 0x440,
  154. .alpha_nr_max = 0xc00,
  155. .alpha_nr = 0x800,
  156. };