nouveau_bo.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677
  1. /*
  2. * Copyright 2007 Dave Airlied
  3. * All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice (including the next
  13. * paragraph) shall be included in all copies or substantial portions of the
  14. * Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22. * OTHER DEALINGS IN THE SOFTWARE.
  23. */
  24. /*
  25. * Authors: Dave Airlied <airlied@linux.ie>
  26. * Ben Skeggs <darktama@iinet.net.au>
  27. * Jeremy Kolb <jkolb@brandeis.edu>
  28. */
  29. #include <linux/dma-mapping.h>
  30. #include <linux/swiotlb.h>
  31. #include "nouveau_drv.h"
  32. #include "nouveau_dma.h"
  33. #include "nouveau_fence.h"
  34. #include "nouveau_bo.h"
  35. #include "nouveau_ttm.h"
  36. #include "nouveau_gem.h"
  37. #include "nouveau_mem.h"
  38. #include "nouveau_vmm.h"
  39. #include <nvif/class.h>
  40. #include <nvif/if500b.h>
  41. #include <nvif/if900b.h>
  42. /*
  43. * NV10-NV40 tiling helpers
  44. */
  45. static void
  46. nv10_bo_update_tile_region(struct drm_device *dev, struct nouveau_drm_tile *reg,
  47. u32 addr, u32 size, u32 pitch, u32 flags)
  48. {
  49. struct nouveau_drm *drm = nouveau_drm(dev);
  50. int i = reg - drm->tile.reg;
  51. struct nvkm_fb *fb = nvxx_fb(&drm->client.device);
  52. struct nvkm_fb_tile *tile = &fb->tile.region[i];
  53. nouveau_fence_unref(&reg->fence);
  54. if (tile->pitch)
  55. nvkm_fb_tile_fini(fb, i, tile);
  56. if (pitch)
  57. nvkm_fb_tile_init(fb, i, addr, size, pitch, flags, tile);
  58. nvkm_fb_tile_prog(fb, i, tile);
  59. }
  60. static struct nouveau_drm_tile *
  61. nv10_bo_get_tile_region(struct drm_device *dev, int i)
  62. {
  63. struct nouveau_drm *drm = nouveau_drm(dev);
  64. struct nouveau_drm_tile *tile = &drm->tile.reg[i];
  65. spin_lock(&drm->tile.lock);
  66. if (!tile->used &&
  67. (!tile->fence || nouveau_fence_done(tile->fence)))
  68. tile->used = true;
  69. else
  70. tile = NULL;
  71. spin_unlock(&drm->tile.lock);
  72. return tile;
  73. }
  74. static void
  75. nv10_bo_put_tile_region(struct drm_device *dev, struct nouveau_drm_tile *tile,
  76. struct dma_fence *fence)
  77. {
  78. struct nouveau_drm *drm = nouveau_drm(dev);
  79. if (tile) {
  80. spin_lock(&drm->tile.lock);
  81. tile->fence = (struct nouveau_fence *)dma_fence_get(fence);
  82. tile->used = false;
  83. spin_unlock(&drm->tile.lock);
  84. }
  85. }
  86. static struct nouveau_drm_tile *
  87. nv10_bo_set_tiling(struct drm_device *dev, u32 addr,
  88. u32 size, u32 pitch, u32 zeta)
  89. {
  90. struct nouveau_drm *drm = nouveau_drm(dev);
  91. struct nvkm_fb *fb = nvxx_fb(&drm->client.device);
  92. struct nouveau_drm_tile *tile, *found = NULL;
  93. int i;
  94. for (i = 0; i < fb->tile.regions; i++) {
  95. tile = nv10_bo_get_tile_region(dev, i);
  96. if (pitch && !found) {
  97. found = tile;
  98. continue;
  99. } else if (tile && fb->tile.region[i].pitch) {
  100. /* Kill an unused tile region. */
  101. nv10_bo_update_tile_region(dev, tile, 0, 0, 0, 0);
  102. }
  103. nv10_bo_put_tile_region(dev, tile, NULL);
  104. }
  105. if (found)
  106. nv10_bo_update_tile_region(dev, found, addr, size, pitch, zeta);
  107. return found;
  108. }
  109. static void
  110. nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
  111. {
  112. struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
  113. struct drm_device *dev = drm->dev;
  114. struct nouveau_bo *nvbo = nouveau_bo(bo);
  115. if (unlikely(nvbo->gem.filp))
  116. DRM_ERROR("bo %p still attached to GEM object\n", bo);
  117. WARN_ON(nvbo->pin_refcnt > 0);
  118. nv10_bo_put_tile_region(dev, nvbo->tile, NULL);
  119. kfree(nvbo);
  120. }
  121. static inline u64
  122. roundup_64(u64 x, u32 y)
  123. {
  124. x += y - 1;
  125. do_div(x, y);
  126. return x * y;
  127. }
  128. static void
  129. nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags,
  130. int *align, u64 *size)
  131. {
  132. struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
  133. struct nvif_device *device = &drm->client.device;
  134. if (device->info.family < NV_DEVICE_INFO_V0_TESLA) {
  135. if (nvbo->mode) {
  136. if (device->info.chipset >= 0x40) {
  137. *align = 65536;
  138. *size = roundup_64(*size, 64 * nvbo->mode);
  139. } else if (device->info.chipset >= 0x30) {
  140. *align = 32768;
  141. *size = roundup_64(*size, 64 * nvbo->mode);
  142. } else if (device->info.chipset >= 0x20) {
  143. *align = 16384;
  144. *size = roundup_64(*size, 64 * nvbo->mode);
  145. } else if (device->info.chipset >= 0x10) {
  146. *align = 16384;
  147. *size = roundup_64(*size, 32 * nvbo->mode);
  148. }
  149. }
  150. } else {
  151. *size = roundup_64(*size, (1 << nvbo->page));
  152. *align = max((1 << nvbo->page), *align);
  153. }
  154. *size = roundup_64(*size, PAGE_SIZE);
  155. }
  156. int
  157. nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
  158. uint32_t flags, uint32_t tile_mode, uint32_t tile_flags,
  159. struct sg_table *sg, struct reservation_object *robj,
  160. struct nouveau_bo **pnvbo)
  161. {
  162. struct nouveau_drm *drm = cli->drm;
  163. struct nouveau_bo *nvbo;
  164. struct nvif_mmu *mmu = &cli->mmu;
  165. struct nvif_vmm *vmm = &cli->vmm.vmm;
  166. size_t acc_size;
  167. int type = ttm_bo_type_device;
  168. int ret, i, pi = -1;
  169. if (!size) {
  170. NV_WARN(drm, "skipped size %016llx\n", size);
  171. return -EINVAL;
  172. }
  173. if (sg)
  174. type = ttm_bo_type_sg;
  175. nvbo = kzalloc(sizeof(struct nouveau_bo), GFP_KERNEL);
  176. if (!nvbo)
  177. return -ENOMEM;
  178. INIT_LIST_HEAD(&nvbo->head);
  179. INIT_LIST_HEAD(&nvbo->entry);
  180. INIT_LIST_HEAD(&nvbo->vma_list);
  181. nvbo->bo.bdev = &drm->ttm.bdev;
  182. /* This is confusing, and doesn't actually mean we want an uncached
  183. * mapping, but is what NOUVEAU_GEM_DOMAIN_COHERENT gets translated
  184. * into in nouveau_gem_new().
  185. */
  186. if (flags & TTM_PL_FLAG_UNCACHED) {
  187. /* Determine if we can get a cache-coherent map, forcing
  188. * uncached mapping if we can't.
  189. */
  190. if (!nouveau_drm_use_coherent_gpu_mapping(drm))
  191. nvbo->force_coherent = true;
  192. }
  193. if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
  194. nvbo->kind = (tile_flags & 0x0000ff00) >> 8;
  195. if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
  196. kfree(nvbo);
  197. return -EINVAL;
  198. }
  199. nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind;
  200. } else
  201. if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
  202. nvbo->kind = (tile_flags & 0x00007f00) >> 8;
  203. nvbo->comp = (tile_flags & 0x00030000) >> 16;
  204. if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
  205. kfree(nvbo);
  206. return -EINVAL;
  207. }
  208. } else {
  209. nvbo->zeta = (tile_flags & 0x00000007);
  210. }
  211. nvbo->mode = tile_mode;
  212. nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG);
  213. /* Determine the desirable target GPU page size for the buffer. */
  214. for (i = 0; i < vmm->page_nr; i++) {
  215. /* Because we cannot currently allow VMM maps to fail
  216. * during buffer migration, we need to determine page
  217. * size for the buffer up-front, and pre-allocate its
  218. * page tables.
  219. *
  220. * Skip page sizes that can't support needed domains.
  221. */
  222. if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
  223. (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram)
  224. continue;
  225. if ((flags & TTM_PL_FLAG_TT) &&
  226. (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
  227. continue;
  228. /* Select this page size if it's the first that supports
  229. * the potential memory domains, or when it's compatible
  230. * with the requested compression settings.
  231. */
  232. if (pi < 0 || !nvbo->comp || vmm->page[i].comp)
  233. pi = i;
  234. /* Stop once the buffer is larger than the current page size. */
  235. if (size >= 1ULL << vmm->page[i].shift)
  236. break;
  237. }
  238. if (WARN_ON(pi < 0))
  239. return -EINVAL;
  240. /* Disable compression if suitable settings couldn't be found. */
  241. if (nvbo->comp && !vmm->page[pi].comp) {
  242. if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100)
  243. nvbo->kind = mmu->kind[nvbo->kind];
  244. nvbo->comp = 0;
  245. }
  246. nvbo->page = vmm->page[pi].shift;
  247. nouveau_bo_fixup_align(nvbo, flags, &align, &size);
  248. nvbo->bo.mem.num_pages = size >> PAGE_SHIFT;
  249. nouveau_bo_placement_set(nvbo, flags, 0);
  250. acc_size = ttm_bo_dma_acc_size(&drm->ttm.bdev, size,
  251. sizeof(struct nouveau_bo));
  252. ret = ttm_bo_init(&drm->ttm.bdev, &nvbo->bo, size,
  253. type, &nvbo->placement,
  254. align >> PAGE_SHIFT, false, acc_size, sg,
  255. robj, nouveau_bo_del_ttm);
  256. if (ret) {
  257. /* ttm will call nouveau_bo_del_ttm if it fails.. */
  258. return ret;
  259. }
  260. *pnvbo = nvbo;
  261. return 0;
  262. }
  263. static void
  264. set_placement_list(struct ttm_place *pl, unsigned *n, uint32_t type, uint32_t flags)
  265. {
  266. *n = 0;
  267. if (type & TTM_PL_FLAG_VRAM)
  268. pl[(*n)++].flags = TTM_PL_FLAG_VRAM | flags;
  269. if (type & TTM_PL_FLAG_TT)
  270. pl[(*n)++].flags = TTM_PL_FLAG_TT | flags;
  271. if (type & TTM_PL_FLAG_SYSTEM)
  272. pl[(*n)++].flags = TTM_PL_FLAG_SYSTEM | flags;
  273. }
  274. static void
  275. set_placement_range(struct nouveau_bo *nvbo, uint32_t type)
  276. {
  277. struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
  278. u32 vram_pages = drm->client.device.info.ram_size >> PAGE_SHIFT;
  279. unsigned i, fpfn, lpfn;
  280. if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
  281. nvbo->mode && (type & TTM_PL_FLAG_VRAM) &&
  282. nvbo->bo.mem.num_pages < vram_pages / 4) {
  283. /*
  284. * Make sure that the color and depth buffers are handled
  285. * by independent memory controller units. Up to a 9x
  286. * speed up when alpha-blending and depth-test are enabled
  287. * at the same time.
  288. */
  289. if (nvbo->zeta) {
  290. fpfn = vram_pages / 2;
  291. lpfn = ~0;
  292. } else {
  293. fpfn = 0;
  294. lpfn = vram_pages / 2;
  295. }
  296. for (i = 0; i < nvbo->placement.num_placement; ++i) {
  297. nvbo->placements[i].fpfn = fpfn;
  298. nvbo->placements[i].lpfn = lpfn;
  299. }
  300. for (i = 0; i < nvbo->placement.num_busy_placement; ++i) {
  301. nvbo->busy_placements[i].fpfn = fpfn;
  302. nvbo->busy_placements[i].lpfn = lpfn;
  303. }
  304. }
  305. }
  306. void
  307. nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy)
  308. {
  309. struct ttm_placement *pl = &nvbo->placement;
  310. uint32_t flags = (nvbo->force_coherent ? TTM_PL_FLAG_UNCACHED :
  311. TTM_PL_MASK_CACHING) |
  312. (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0);
  313. pl->placement = nvbo->placements;
  314. set_placement_list(nvbo->placements, &pl->num_placement,
  315. type, flags);
  316. pl->busy_placement = nvbo->busy_placements;
  317. set_placement_list(nvbo->busy_placements, &pl->num_busy_placement,
  318. type | busy, flags);
  319. set_placement_range(nvbo, type);
  320. }
  321. int
  322. nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype, bool contig)
  323. {
  324. struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
  325. struct ttm_buffer_object *bo = &nvbo->bo;
  326. bool force = false, evict = false;
  327. int ret;
  328. ret = ttm_bo_reserve(bo, false, false, NULL);
  329. if (ret)
  330. return ret;
  331. if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
  332. memtype == TTM_PL_FLAG_VRAM && contig) {
  333. if (!nvbo->contig) {
  334. nvbo->contig = true;
  335. force = true;
  336. evict = true;
  337. }
  338. }
  339. if (nvbo->pin_refcnt) {
  340. if (!(memtype & (1 << bo->mem.mem_type)) || evict) {
  341. NV_ERROR(drm, "bo %p pinned elsewhere: "
  342. "0x%08x vs 0x%08x\n", bo,
  343. 1 << bo->mem.mem_type, memtype);
  344. ret = -EBUSY;
  345. }
  346. nvbo->pin_refcnt++;
  347. goto out;
  348. }
  349. if (evict) {
  350. nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_TT, 0);
  351. ret = nouveau_bo_validate(nvbo, false, false);
  352. if (ret)
  353. goto out;
  354. }
  355. nvbo->pin_refcnt++;
  356. nouveau_bo_placement_set(nvbo, memtype, 0);
  357. /* drop pin_refcnt temporarily, so we don't trip the assertion
  358. * in nouveau_bo_move() that makes sure we're not trying to
  359. * move a pinned buffer
  360. */
  361. nvbo->pin_refcnt--;
  362. ret = nouveau_bo_validate(nvbo, false, false);
  363. if (ret)
  364. goto out;
  365. nvbo->pin_refcnt++;
  366. switch (bo->mem.mem_type) {
  367. case TTM_PL_VRAM:
  368. drm->gem.vram_available -= bo->mem.size;
  369. break;
  370. case TTM_PL_TT:
  371. drm->gem.gart_available -= bo->mem.size;
  372. break;
  373. default:
  374. break;
  375. }
  376. out:
  377. if (force && ret)
  378. nvbo->contig = false;
  379. ttm_bo_unreserve(bo);
  380. return ret;
  381. }
  382. int
  383. nouveau_bo_unpin(struct nouveau_bo *nvbo)
  384. {
  385. struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
  386. struct ttm_buffer_object *bo = &nvbo->bo;
  387. int ret, ref;
  388. ret = ttm_bo_reserve(bo, false, false, NULL);
  389. if (ret)
  390. return ret;
  391. ref = --nvbo->pin_refcnt;
  392. WARN_ON_ONCE(ref < 0);
  393. if (ref)
  394. goto out;
  395. nouveau_bo_placement_set(nvbo, bo->mem.placement, 0);
  396. ret = nouveau_bo_validate(nvbo, false, false);
  397. if (ret == 0) {
  398. switch (bo->mem.mem_type) {
  399. case TTM_PL_VRAM:
  400. drm->gem.vram_available += bo->mem.size;
  401. break;
  402. case TTM_PL_TT:
  403. drm->gem.gart_available += bo->mem.size;
  404. break;
  405. default:
  406. break;
  407. }
  408. }
  409. out:
  410. ttm_bo_unreserve(bo);
  411. return ret;
  412. }
  413. int
  414. nouveau_bo_map(struct nouveau_bo *nvbo)
  415. {
  416. int ret;
  417. ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
  418. if (ret)
  419. return ret;
  420. ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages, &nvbo->kmap);
  421. ttm_bo_unreserve(&nvbo->bo);
  422. return ret;
  423. }
  424. void
  425. nouveau_bo_unmap(struct nouveau_bo *nvbo)
  426. {
  427. if (!nvbo)
  428. return;
  429. ttm_bo_kunmap(&nvbo->kmap);
  430. }
  431. void
  432. nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
  433. {
  434. struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
  435. struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
  436. int i;
  437. if (!ttm_dma)
  438. return;
  439. /* Don't waste time looping if the object is coherent */
  440. if (nvbo->force_coherent)
  441. return;
  442. for (i = 0; i < ttm_dma->ttm.num_pages; i++)
  443. dma_sync_single_for_device(drm->dev->dev,
  444. ttm_dma->dma_address[i],
  445. PAGE_SIZE, DMA_TO_DEVICE);
  446. }
  447. void
  448. nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
  449. {
  450. struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
  451. struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
  452. int i;
  453. if (!ttm_dma)
  454. return;
  455. /* Don't waste time looping if the object is coherent */
  456. if (nvbo->force_coherent)
  457. return;
  458. for (i = 0; i < ttm_dma->ttm.num_pages; i++)
  459. dma_sync_single_for_cpu(drm->dev->dev, ttm_dma->dma_address[i],
  460. PAGE_SIZE, DMA_FROM_DEVICE);
  461. }
  462. int
  463. nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
  464. bool no_wait_gpu)
  465. {
  466. struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu };
  467. int ret;
  468. ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement, &ctx);
  469. if (ret)
  470. return ret;
  471. nouveau_bo_sync_for_device(nvbo);
  472. return 0;
  473. }
  474. void
  475. nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val)
  476. {
  477. bool is_iomem;
  478. u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
  479. mem += index;
  480. if (is_iomem)
  481. iowrite16_native(val, (void __force __iomem *)mem);
  482. else
  483. *mem = val;
  484. }
  485. u32
  486. nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index)
  487. {
  488. bool is_iomem;
  489. u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
  490. mem += index;
  491. if (is_iomem)
  492. return ioread32_native((void __force __iomem *)mem);
  493. else
  494. return *mem;
  495. }
  496. void
  497. nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val)
  498. {
  499. bool is_iomem;
  500. u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
  501. mem += index;
  502. if (is_iomem)
  503. iowrite32_native(val, (void __force __iomem *)mem);
  504. else
  505. *mem = val;
  506. }
  507. static struct ttm_tt *
  508. nouveau_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags)
  509. {
  510. #if IS_ENABLED(CONFIG_AGP)
  511. struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
  512. if (drm->agp.bridge) {
  513. return ttm_agp_tt_create(bo, drm->agp.bridge, page_flags);
  514. }
  515. #endif
  516. return nouveau_sgdma_create_ttm(bo, page_flags);
  517. }
  518. static int
  519. nouveau_bo_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
  520. {
  521. /* We'll do this from user space. */
  522. return 0;
  523. }
  524. static int
  525. nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
  526. struct ttm_mem_type_manager *man)
  527. {
  528. struct nouveau_drm *drm = nouveau_bdev(bdev);
  529. struct nvif_mmu *mmu = &drm->client.mmu;
  530. switch (type) {
  531. case TTM_PL_SYSTEM:
  532. man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
  533. man->available_caching = TTM_PL_MASK_CACHING;
  534. man->default_caching = TTM_PL_FLAG_CACHED;
  535. break;
  536. case TTM_PL_VRAM:
  537. man->flags = TTM_MEMTYPE_FLAG_FIXED |
  538. TTM_MEMTYPE_FLAG_MAPPABLE;
  539. man->available_caching = TTM_PL_FLAG_UNCACHED |
  540. TTM_PL_FLAG_WC;
  541. man->default_caching = TTM_PL_FLAG_WC;
  542. if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
  543. /* Some BARs do not support being ioremapped WC */
  544. const u8 type = mmu->type[drm->ttm.type_vram].type;
  545. if (type & NVIF_MEM_UNCACHED) {
  546. man->available_caching = TTM_PL_FLAG_UNCACHED;
  547. man->default_caching = TTM_PL_FLAG_UNCACHED;
  548. }
  549. man->func = &nouveau_vram_manager;
  550. man->io_reserve_fastpath = false;
  551. man->use_io_reserve_lru = true;
  552. } else {
  553. man->func = &ttm_bo_manager_func;
  554. }
  555. break;
  556. case TTM_PL_TT:
  557. if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
  558. man->func = &nouveau_gart_manager;
  559. else
  560. if (!drm->agp.bridge)
  561. man->func = &nv04_gart_manager;
  562. else
  563. man->func = &ttm_bo_manager_func;
  564. if (drm->agp.bridge) {
  565. man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
  566. man->available_caching = TTM_PL_FLAG_UNCACHED |
  567. TTM_PL_FLAG_WC;
  568. man->default_caching = TTM_PL_FLAG_WC;
  569. } else {
  570. man->flags = TTM_MEMTYPE_FLAG_MAPPABLE |
  571. TTM_MEMTYPE_FLAG_CMA;
  572. man->available_caching = TTM_PL_MASK_CACHING;
  573. man->default_caching = TTM_PL_FLAG_CACHED;
  574. }
  575. break;
  576. default:
  577. return -EINVAL;
  578. }
  579. return 0;
  580. }
  581. static void
  582. nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
  583. {
  584. struct nouveau_bo *nvbo = nouveau_bo(bo);
  585. switch (bo->mem.mem_type) {
  586. case TTM_PL_VRAM:
  587. nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_TT,
  588. TTM_PL_FLAG_SYSTEM);
  589. break;
  590. default:
  591. nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_SYSTEM, 0);
  592. break;
  593. }
  594. *pl = nvbo->placement;
  595. }
  596. static int
  597. nve0_bo_move_init(struct nouveau_channel *chan, u32 handle)
  598. {
  599. int ret = RING_SPACE(chan, 2);
  600. if (ret == 0) {
  601. BEGIN_NVC0(chan, NvSubCopy, 0x0000, 1);
  602. OUT_RING (chan, handle & 0x0000ffff);
  603. FIRE_RING (chan);
  604. }
  605. return ret;
  606. }
  607. static int
  608. nve0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
  609. struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
  610. {
  611. struct nouveau_mem *mem = nouveau_mem(old_reg);
  612. int ret = RING_SPACE(chan, 10);
  613. if (ret == 0) {
  614. BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8);
  615. OUT_RING (chan, upper_32_bits(mem->vma[0].addr));
  616. OUT_RING (chan, lower_32_bits(mem->vma[0].addr));
  617. OUT_RING (chan, upper_32_bits(mem->vma[1].addr));
  618. OUT_RING (chan, lower_32_bits(mem->vma[1].addr));
  619. OUT_RING (chan, PAGE_SIZE);
  620. OUT_RING (chan, PAGE_SIZE);
  621. OUT_RING (chan, PAGE_SIZE);
  622. OUT_RING (chan, new_reg->num_pages);
  623. BEGIN_IMC0(chan, NvSubCopy, 0x0300, 0x0386);
  624. }
  625. return ret;
  626. }
  627. static int
  628. nvc0_bo_move_init(struct nouveau_channel *chan, u32 handle)
  629. {
  630. int ret = RING_SPACE(chan, 2);
  631. if (ret == 0) {
  632. BEGIN_NVC0(chan, NvSubCopy, 0x0000, 1);
  633. OUT_RING (chan, handle);
  634. }
  635. return ret;
  636. }
  637. static int
  638. nvc0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
  639. struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
  640. {
  641. struct nouveau_mem *mem = nouveau_mem(old_reg);
  642. u64 src_offset = mem->vma[0].addr;
  643. u64 dst_offset = mem->vma[1].addr;
  644. u32 page_count = new_reg->num_pages;
  645. int ret;
  646. page_count = new_reg->num_pages;
  647. while (page_count) {
  648. int line_count = (page_count > 8191) ? 8191 : page_count;
  649. ret = RING_SPACE(chan, 11);
  650. if (ret)
  651. return ret;
  652. BEGIN_NVC0(chan, NvSubCopy, 0x030c, 8);
  653. OUT_RING (chan, upper_32_bits(src_offset));
  654. OUT_RING (chan, lower_32_bits(src_offset));
  655. OUT_RING (chan, upper_32_bits(dst_offset));
  656. OUT_RING (chan, lower_32_bits(dst_offset));
  657. OUT_RING (chan, PAGE_SIZE);
  658. OUT_RING (chan, PAGE_SIZE);
  659. OUT_RING (chan, PAGE_SIZE);
  660. OUT_RING (chan, line_count);
  661. BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1);
  662. OUT_RING (chan, 0x00000110);
  663. page_count -= line_count;
  664. src_offset += (PAGE_SIZE * line_count);
  665. dst_offset += (PAGE_SIZE * line_count);
  666. }
  667. return 0;
  668. }
  669. static int
  670. nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
  671. struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
  672. {
  673. struct nouveau_mem *mem = nouveau_mem(old_reg);
  674. u64 src_offset = mem->vma[0].addr;
  675. u64 dst_offset = mem->vma[1].addr;
  676. u32 page_count = new_reg->num_pages;
  677. int ret;
  678. page_count = new_reg->num_pages;
  679. while (page_count) {
  680. int line_count = (page_count > 2047) ? 2047 : page_count;
  681. ret = RING_SPACE(chan, 12);
  682. if (ret)
  683. return ret;
  684. BEGIN_NVC0(chan, NvSubCopy, 0x0238, 2);
  685. OUT_RING (chan, upper_32_bits(dst_offset));
  686. OUT_RING (chan, lower_32_bits(dst_offset));
  687. BEGIN_NVC0(chan, NvSubCopy, 0x030c, 6);
  688. OUT_RING (chan, upper_32_bits(src_offset));
  689. OUT_RING (chan, lower_32_bits(src_offset));
  690. OUT_RING (chan, PAGE_SIZE); /* src_pitch */
  691. OUT_RING (chan, PAGE_SIZE); /* dst_pitch */
  692. OUT_RING (chan, PAGE_SIZE); /* line_length */
  693. OUT_RING (chan, line_count);
  694. BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1);
  695. OUT_RING (chan, 0x00100110);
  696. page_count -= line_count;
  697. src_offset += (PAGE_SIZE * line_count);
  698. dst_offset += (PAGE_SIZE * line_count);
  699. }
  700. return 0;
  701. }
  702. static int
  703. nva3_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
  704. struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
  705. {
  706. struct nouveau_mem *mem = nouveau_mem(old_reg);
  707. u64 src_offset = mem->vma[0].addr;
  708. u64 dst_offset = mem->vma[1].addr;
  709. u32 page_count = new_reg->num_pages;
  710. int ret;
  711. page_count = new_reg->num_pages;
  712. while (page_count) {
  713. int line_count = (page_count > 8191) ? 8191 : page_count;
  714. ret = RING_SPACE(chan, 11);
  715. if (ret)
  716. return ret;
  717. BEGIN_NV04(chan, NvSubCopy, 0x030c, 8);
  718. OUT_RING (chan, upper_32_bits(src_offset));
  719. OUT_RING (chan, lower_32_bits(src_offset));
  720. OUT_RING (chan, upper_32_bits(dst_offset));
  721. OUT_RING (chan, lower_32_bits(dst_offset));
  722. OUT_RING (chan, PAGE_SIZE);
  723. OUT_RING (chan, PAGE_SIZE);
  724. OUT_RING (chan, PAGE_SIZE);
  725. OUT_RING (chan, line_count);
  726. BEGIN_NV04(chan, NvSubCopy, 0x0300, 1);
  727. OUT_RING (chan, 0x00000110);
  728. page_count -= line_count;
  729. src_offset += (PAGE_SIZE * line_count);
  730. dst_offset += (PAGE_SIZE * line_count);
  731. }
  732. return 0;
  733. }
  734. static int
  735. nv98_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
  736. struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
  737. {
  738. struct nouveau_mem *mem = nouveau_mem(old_reg);
  739. int ret = RING_SPACE(chan, 7);
  740. if (ret == 0) {
  741. BEGIN_NV04(chan, NvSubCopy, 0x0320, 6);
  742. OUT_RING (chan, upper_32_bits(mem->vma[0].addr));
  743. OUT_RING (chan, lower_32_bits(mem->vma[0].addr));
  744. OUT_RING (chan, upper_32_bits(mem->vma[1].addr));
  745. OUT_RING (chan, lower_32_bits(mem->vma[1].addr));
  746. OUT_RING (chan, 0x00000000 /* COPY */);
  747. OUT_RING (chan, new_reg->num_pages << PAGE_SHIFT);
  748. }
  749. return ret;
  750. }
  751. static int
  752. nv84_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
  753. struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
  754. {
  755. struct nouveau_mem *mem = nouveau_mem(old_reg);
  756. int ret = RING_SPACE(chan, 7);
  757. if (ret == 0) {
  758. BEGIN_NV04(chan, NvSubCopy, 0x0304, 6);
  759. OUT_RING (chan, new_reg->num_pages << PAGE_SHIFT);
  760. OUT_RING (chan, upper_32_bits(mem->vma[0].addr));
  761. OUT_RING (chan, lower_32_bits(mem->vma[0].addr));
  762. OUT_RING (chan, upper_32_bits(mem->vma[1].addr));
  763. OUT_RING (chan, lower_32_bits(mem->vma[1].addr));
  764. OUT_RING (chan, 0x00000000 /* MODE_COPY, QUERY_NONE */);
  765. }
  766. return ret;
  767. }
  768. static int
  769. nv50_bo_move_init(struct nouveau_channel *chan, u32 handle)
  770. {
  771. int ret = RING_SPACE(chan, 6);
  772. if (ret == 0) {
  773. BEGIN_NV04(chan, NvSubCopy, 0x0000, 1);
  774. OUT_RING (chan, handle);
  775. BEGIN_NV04(chan, NvSubCopy, 0x0180, 3);
  776. OUT_RING (chan, chan->drm->ntfy.handle);
  777. OUT_RING (chan, chan->vram.handle);
  778. OUT_RING (chan, chan->vram.handle);
  779. }
  780. return ret;
  781. }
  782. static int
  783. nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
  784. struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
  785. {
  786. struct nouveau_mem *mem = nouveau_mem(old_reg);
  787. u64 length = (new_reg->num_pages << PAGE_SHIFT);
  788. u64 src_offset = mem->vma[0].addr;
  789. u64 dst_offset = mem->vma[1].addr;
  790. int src_tiled = !!mem->kind;
  791. int dst_tiled = !!nouveau_mem(new_reg)->kind;
  792. int ret;
  793. while (length) {
  794. u32 amount, stride, height;
  795. ret = RING_SPACE(chan, 18 + 6 * (src_tiled + dst_tiled));
  796. if (ret)
  797. return ret;
  798. amount = min(length, (u64)(4 * 1024 * 1024));
  799. stride = 16 * 4;
  800. height = amount / stride;
  801. if (src_tiled) {
  802. BEGIN_NV04(chan, NvSubCopy, 0x0200, 7);
  803. OUT_RING (chan, 0);
  804. OUT_RING (chan, 0);
  805. OUT_RING (chan, stride);
  806. OUT_RING (chan, height);
  807. OUT_RING (chan, 1);
  808. OUT_RING (chan, 0);
  809. OUT_RING (chan, 0);
  810. } else {
  811. BEGIN_NV04(chan, NvSubCopy, 0x0200, 1);
  812. OUT_RING (chan, 1);
  813. }
  814. if (dst_tiled) {
  815. BEGIN_NV04(chan, NvSubCopy, 0x021c, 7);
  816. OUT_RING (chan, 0);
  817. OUT_RING (chan, 0);
  818. OUT_RING (chan, stride);
  819. OUT_RING (chan, height);
  820. OUT_RING (chan, 1);
  821. OUT_RING (chan, 0);
  822. OUT_RING (chan, 0);
  823. } else {
  824. BEGIN_NV04(chan, NvSubCopy, 0x021c, 1);
  825. OUT_RING (chan, 1);
  826. }
  827. BEGIN_NV04(chan, NvSubCopy, 0x0238, 2);
  828. OUT_RING (chan, upper_32_bits(src_offset));
  829. OUT_RING (chan, upper_32_bits(dst_offset));
  830. BEGIN_NV04(chan, NvSubCopy, 0x030c, 8);
  831. OUT_RING (chan, lower_32_bits(src_offset));
  832. OUT_RING (chan, lower_32_bits(dst_offset));
  833. OUT_RING (chan, stride);
  834. OUT_RING (chan, stride);
  835. OUT_RING (chan, stride);
  836. OUT_RING (chan, height);
  837. OUT_RING (chan, 0x00000101);
  838. OUT_RING (chan, 0x00000000);
  839. BEGIN_NV04(chan, NvSubCopy, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
  840. OUT_RING (chan, 0);
  841. length -= amount;
  842. src_offset += amount;
  843. dst_offset += amount;
  844. }
  845. return 0;
  846. }
  847. static int
  848. nv04_bo_move_init(struct nouveau_channel *chan, u32 handle)
  849. {
  850. int ret = RING_SPACE(chan, 4);
  851. if (ret == 0) {
  852. BEGIN_NV04(chan, NvSubCopy, 0x0000, 1);
  853. OUT_RING (chan, handle);
  854. BEGIN_NV04(chan, NvSubCopy, 0x0180, 1);
  855. OUT_RING (chan, chan->drm->ntfy.handle);
  856. }
  857. return ret;
  858. }
  859. static inline uint32_t
  860. nouveau_bo_mem_ctxdma(struct ttm_buffer_object *bo,
  861. struct nouveau_channel *chan, struct ttm_mem_reg *reg)
  862. {
  863. if (reg->mem_type == TTM_PL_TT)
  864. return NvDmaTT;
  865. return chan->vram.handle;
  866. }
  867. static int
  868. nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
  869. struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
  870. {
  871. u32 src_offset = old_reg->start << PAGE_SHIFT;
  872. u32 dst_offset = new_reg->start << PAGE_SHIFT;
  873. u32 page_count = new_reg->num_pages;
  874. int ret;
  875. ret = RING_SPACE(chan, 3);
  876. if (ret)
  877. return ret;
  878. BEGIN_NV04(chan, NvSubCopy, NV_MEMORY_TO_MEMORY_FORMAT_DMA_SOURCE, 2);
  879. OUT_RING (chan, nouveau_bo_mem_ctxdma(bo, chan, old_reg));
  880. OUT_RING (chan, nouveau_bo_mem_ctxdma(bo, chan, new_reg));
  881. page_count = new_reg->num_pages;
  882. while (page_count) {
  883. int line_count = (page_count > 2047) ? 2047 : page_count;
  884. ret = RING_SPACE(chan, 11);
  885. if (ret)
  886. return ret;
  887. BEGIN_NV04(chan, NvSubCopy,
  888. NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
  889. OUT_RING (chan, src_offset);
  890. OUT_RING (chan, dst_offset);
  891. OUT_RING (chan, PAGE_SIZE); /* src_pitch */
  892. OUT_RING (chan, PAGE_SIZE); /* dst_pitch */
  893. OUT_RING (chan, PAGE_SIZE); /* line_length */
  894. OUT_RING (chan, line_count);
  895. OUT_RING (chan, 0x00000101);
  896. OUT_RING (chan, 0x00000000);
  897. BEGIN_NV04(chan, NvSubCopy, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
  898. OUT_RING (chan, 0);
  899. page_count -= line_count;
  900. src_offset += (PAGE_SIZE * line_count);
  901. dst_offset += (PAGE_SIZE * line_count);
  902. }
  903. return 0;
  904. }
  905. static int
  906. nouveau_bo_move_prep(struct nouveau_drm *drm, struct ttm_buffer_object *bo,
  907. struct ttm_mem_reg *reg)
  908. {
  909. struct nouveau_mem *old_mem = nouveau_mem(&bo->mem);
  910. struct nouveau_mem *new_mem = nouveau_mem(reg);
  911. struct nvif_vmm *vmm = &drm->client.vmm.vmm;
  912. int ret;
  913. ret = nvif_vmm_get(vmm, LAZY, false, old_mem->mem.page, 0,
  914. old_mem->mem.size, &old_mem->vma[0]);
  915. if (ret)
  916. return ret;
  917. ret = nvif_vmm_get(vmm, LAZY, false, new_mem->mem.page, 0,
  918. new_mem->mem.size, &old_mem->vma[1]);
  919. if (ret)
  920. goto done;
  921. ret = nouveau_mem_map(old_mem, vmm, &old_mem->vma[0]);
  922. if (ret)
  923. goto done;
  924. ret = nouveau_mem_map(new_mem, vmm, &old_mem->vma[1]);
  925. done:
  926. if (ret) {
  927. nvif_vmm_put(vmm, &old_mem->vma[1]);
  928. nvif_vmm_put(vmm, &old_mem->vma[0]);
  929. }
  930. return 0;
  931. }
  932. static int
  933. nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
  934. bool no_wait_gpu, struct ttm_mem_reg *new_reg)
  935. {
  936. struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
  937. struct nouveau_channel *chan = drm->ttm.chan;
  938. struct nouveau_cli *cli = (void *)chan->user.client;
  939. struct nouveau_fence *fence;
  940. int ret;
  941. /* create temporary vmas for the transfer and attach them to the
  942. * old nvkm_mem node, these will get cleaned up after ttm has
  943. * destroyed the ttm_mem_reg
  944. */
  945. if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
  946. ret = nouveau_bo_move_prep(drm, bo, new_reg);
  947. if (ret)
  948. return ret;
  949. }
  950. mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
  951. ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, intr);
  952. if (ret == 0) {
  953. ret = drm->ttm.move(chan, bo, &bo->mem, new_reg);
  954. if (ret == 0) {
  955. ret = nouveau_fence_new(chan, false, &fence);
  956. if (ret == 0) {
  957. ret = ttm_bo_move_accel_cleanup(bo,
  958. &fence->base,
  959. evict,
  960. new_reg);
  961. nouveau_fence_unref(&fence);
  962. }
  963. }
  964. }
  965. mutex_unlock(&cli->mutex);
  966. return ret;
  967. }
  968. void
  969. nouveau_bo_move_init(struct nouveau_drm *drm)
  970. {
  971. static const struct {
  972. const char *name;
  973. int engine;
  974. s32 oclass;
  975. int (*exec)(struct nouveau_channel *,
  976. struct ttm_buffer_object *,
  977. struct ttm_mem_reg *, struct ttm_mem_reg *);
  978. int (*init)(struct nouveau_channel *, u32 handle);
  979. } _methods[] = {
  980. { "COPY", 4, 0xc3b5, nve0_bo_move_copy, nve0_bo_move_init },
  981. { "GRCE", 0, 0xc3b5, nve0_bo_move_copy, nvc0_bo_move_init },
  982. { "COPY", 4, 0xc1b5, nve0_bo_move_copy, nve0_bo_move_init },
  983. { "GRCE", 0, 0xc1b5, nve0_bo_move_copy, nvc0_bo_move_init },
  984. { "COPY", 4, 0xc0b5, nve0_bo_move_copy, nve0_bo_move_init },
  985. { "GRCE", 0, 0xc0b5, nve0_bo_move_copy, nvc0_bo_move_init },
  986. { "COPY", 4, 0xb0b5, nve0_bo_move_copy, nve0_bo_move_init },
  987. { "GRCE", 0, 0xb0b5, nve0_bo_move_copy, nvc0_bo_move_init },
  988. { "COPY", 4, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init },
  989. { "GRCE", 0, 0xa0b5, nve0_bo_move_copy, nvc0_bo_move_init },
  990. { "COPY1", 5, 0x90b8, nvc0_bo_move_copy, nvc0_bo_move_init },
  991. { "COPY0", 4, 0x90b5, nvc0_bo_move_copy, nvc0_bo_move_init },
  992. { "COPY", 0, 0x85b5, nva3_bo_move_copy, nv50_bo_move_init },
  993. { "CRYPT", 0, 0x74c1, nv84_bo_move_exec, nv50_bo_move_init },
  994. { "M2MF", 0, 0x9039, nvc0_bo_move_m2mf, nvc0_bo_move_init },
  995. { "M2MF", 0, 0x5039, nv50_bo_move_m2mf, nv50_bo_move_init },
  996. { "M2MF", 0, 0x0039, nv04_bo_move_m2mf, nv04_bo_move_init },
  997. {},
  998. { "CRYPT", 0, 0x88b4, nv98_bo_move_exec, nv50_bo_move_init },
  999. }, *mthd = _methods;
  1000. const char *name = "CPU";
  1001. int ret;
  1002. do {
  1003. struct nouveau_channel *chan;
  1004. if (mthd->engine)
  1005. chan = drm->cechan;
  1006. else
  1007. chan = drm->channel;
  1008. if (chan == NULL)
  1009. continue;
  1010. ret = nvif_object_init(&chan->user,
  1011. mthd->oclass | (mthd->engine << 16),
  1012. mthd->oclass, NULL, 0,
  1013. &drm->ttm.copy);
  1014. if (ret == 0) {
  1015. ret = mthd->init(chan, drm->ttm.copy.handle);
  1016. if (ret) {
  1017. nvif_object_fini(&drm->ttm.copy);
  1018. continue;
  1019. }
  1020. drm->ttm.move = mthd->exec;
  1021. drm->ttm.chan = chan;
  1022. name = mthd->name;
  1023. break;
  1024. }
  1025. } while ((++mthd)->exec);
  1026. NV_INFO(drm, "MM: using %s for buffer copies\n", name);
  1027. }
  1028. static int
  1029. nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
  1030. bool no_wait_gpu, struct ttm_mem_reg *new_reg)
  1031. {
  1032. struct ttm_operation_ctx ctx = { intr, no_wait_gpu };
  1033. struct ttm_place placement_memtype = {
  1034. .fpfn = 0,
  1035. .lpfn = 0,
  1036. .flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING
  1037. };
  1038. struct ttm_placement placement;
  1039. struct ttm_mem_reg tmp_reg;
  1040. int ret;
  1041. placement.num_placement = placement.num_busy_placement = 1;
  1042. placement.placement = placement.busy_placement = &placement_memtype;
  1043. tmp_reg = *new_reg;
  1044. tmp_reg.mm_node = NULL;
  1045. ret = ttm_bo_mem_space(bo, &placement, &tmp_reg, &ctx);
  1046. if (ret)
  1047. return ret;
  1048. ret = ttm_tt_bind(bo->ttm, &tmp_reg, &ctx);
  1049. if (ret)
  1050. goto out;
  1051. ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, &tmp_reg);
  1052. if (ret)
  1053. goto out;
  1054. ret = ttm_bo_move_ttm(bo, &ctx, new_reg);
  1055. out:
  1056. ttm_bo_mem_put(bo, &tmp_reg);
  1057. return ret;
  1058. }
  1059. static int
  1060. nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
  1061. bool no_wait_gpu, struct ttm_mem_reg *new_reg)
  1062. {
  1063. struct ttm_operation_ctx ctx = { intr, no_wait_gpu };
  1064. struct ttm_place placement_memtype = {
  1065. .fpfn = 0,
  1066. .lpfn = 0,
  1067. .flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING
  1068. };
  1069. struct ttm_placement placement;
  1070. struct ttm_mem_reg tmp_reg;
  1071. int ret;
  1072. placement.num_placement = placement.num_busy_placement = 1;
  1073. placement.placement = placement.busy_placement = &placement_memtype;
  1074. tmp_reg = *new_reg;
  1075. tmp_reg.mm_node = NULL;
  1076. ret = ttm_bo_mem_space(bo, &placement, &tmp_reg, &ctx);
  1077. if (ret)
  1078. return ret;
  1079. ret = ttm_bo_move_ttm(bo, &ctx, &tmp_reg);
  1080. if (ret)
  1081. goto out;
  1082. ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, new_reg);
  1083. if (ret)
  1084. goto out;
  1085. out:
  1086. ttm_bo_mem_put(bo, &tmp_reg);
  1087. return ret;
  1088. }
  1089. static void
  1090. nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, bool evict,
  1091. struct ttm_mem_reg *new_reg)
  1092. {
  1093. struct nouveau_mem *mem = new_reg ? nouveau_mem(new_reg) : NULL;
  1094. struct nouveau_bo *nvbo = nouveau_bo(bo);
  1095. struct nouveau_vma *vma;
  1096. /* ttm can now (stupidly) pass the driver bos it didn't create... */
  1097. if (bo->destroy != nouveau_bo_del_ttm)
  1098. return;
  1099. if (mem && new_reg->mem_type != TTM_PL_SYSTEM &&
  1100. mem->mem.page == nvbo->page) {
  1101. list_for_each_entry(vma, &nvbo->vma_list, head) {
  1102. nouveau_vma_map(vma, mem);
  1103. }
  1104. } else {
  1105. list_for_each_entry(vma, &nvbo->vma_list, head) {
  1106. WARN_ON(ttm_bo_wait(bo, false, false));
  1107. nouveau_vma_unmap(vma);
  1108. }
  1109. }
  1110. }
  1111. static int
  1112. nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_reg,
  1113. struct nouveau_drm_tile **new_tile)
  1114. {
  1115. struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
  1116. struct drm_device *dev = drm->dev;
  1117. struct nouveau_bo *nvbo = nouveau_bo(bo);
  1118. u64 offset = new_reg->start << PAGE_SHIFT;
  1119. *new_tile = NULL;
  1120. if (new_reg->mem_type != TTM_PL_VRAM)
  1121. return 0;
  1122. if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
  1123. *new_tile = nv10_bo_set_tiling(dev, offset, new_reg->size,
  1124. nvbo->mode, nvbo->zeta);
  1125. }
  1126. return 0;
  1127. }
  1128. static void
  1129. nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
  1130. struct nouveau_drm_tile *new_tile,
  1131. struct nouveau_drm_tile **old_tile)
  1132. {
  1133. struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
  1134. struct drm_device *dev = drm->dev;
  1135. struct dma_fence *fence = reservation_object_get_excl(bo->resv);
  1136. nv10_bo_put_tile_region(dev, *old_tile, fence);
  1137. *old_tile = new_tile;
  1138. }
  1139. static int
  1140. nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
  1141. struct ttm_operation_ctx *ctx,
  1142. struct ttm_mem_reg *new_reg)
  1143. {
  1144. struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
  1145. struct nouveau_bo *nvbo = nouveau_bo(bo);
  1146. struct ttm_mem_reg *old_reg = &bo->mem;
  1147. struct nouveau_drm_tile *new_tile = NULL;
  1148. int ret = 0;
  1149. ret = ttm_bo_wait(bo, ctx->interruptible, ctx->no_wait_gpu);
  1150. if (ret)
  1151. return ret;
  1152. if (nvbo->pin_refcnt)
  1153. NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
  1154. if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
  1155. ret = nouveau_bo_vm_bind(bo, new_reg, &new_tile);
  1156. if (ret)
  1157. return ret;
  1158. }
  1159. /* Fake bo copy. */
  1160. if (old_reg->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
  1161. BUG_ON(bo->mem.mm_node != NULL);
  1162. bo->mem = *new_reg;
  1163. new_reg->mm_node = NULL;
  1164. goto out;
  1165. }
  1166. /* Hardware assisted copy. */
  1167. if (drm->ttm.move) {
  1168. if (new_reg->mem_type == TTM_PL_SYSTEM)
  1169. ret = nouveau_bo_move_flipd(bo, evict,
  1170. ctx->interruptible,
  1171. ctx->no_wait_gpu, new_reg);
  1172. else if (old_reg->mem_type == TTM_PL_SYSTEM)
  1173. ret = nouveau_bo_move_flips(bo, evict,
  1174. ctx->interruptible,
  1175. ctx->no_wait_gpu, new_reg);
  1176. else
  1177. ret = nouveau_bo_move_m2mf(bo, evict,
  1178. ctx->interruptible,
  1179. ctx->no_wait_gpu, new_reg);
  1180. if (!ret)
  1181. goto out;
  1182. }
  1183. /* Fallback to software copy. */
  1184. ret = ttm_bo_wait(bo, ctx->interruptible, ctx->no_wait_gpu);
  1185. if (ret == 0)
  1186. ret = ttm_bo_move_memcpy(bo, ctx, new_reg);
  1187. out:
  1188. if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
  1189. if (ret)
  1190. nouveau_bo_vm_cleanup(bo, NULL, &new_tile);
  1191. else
  1192. nouveau_bo_vm_cleanup(bo, new_tile, &nvbo->tile);
  1193. }
  1194. return ret;
  1195. }
  1196. static int
  1197. nouveau_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
  1198. {
  1199. struct nouveau_bo *nvbo = nouveau_bo(bo);
  1200. return drm_vma_node_verify_access(&nvbo->gem.vma_node,
  1201. filp->private_data);
  1202. }
  1203. static int
  1204. nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
  1205. {
  1206. struct ttm_mem_type_manager *man = &bdev->man[reg->mem_type];
  1207. struct nouveau_drm *drm = nouveau_bdev(bdev);
  1208. struct nvkm_device *device = nvxx_device(&drm->client.device);
  1209. struct nouveau_mem *mem = nouveau_mem(reg);
  1210. reg->bus.addr = NULL;
  1211. reg->bus.offset = 0;
  1212. reg->bus.size = reg->num_pages << PAGE_SHIFT;
  1213. reg->bus.base = 0;
  1214. reg->bus.is_iomem = false;
  1215. if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
  1216. return -EINVAL;
  1217. switch (reg->mem_type) {
  1218. case TTM_PL_SYSTEM:
  1219. /* System memory */
  1220. return 0;
  1221. case TTM_PL_TT:
  1222. #if IS_ENABLED(CONFIG_AGP)
  1223. if (drm->agp.bridge) {
  1224. reg->bus.offset = reg->start << PAGE_SHIFT;
  1225. reg->bus.base = drm->agp.base;
  1226. reg->bus.is_iomem = !drm->agp.cma;
  1227. }
  1228. #endif
  1229. if (drm->client.mem->oclass < NVIF_CLASS_MEM_NV50 || !mem->kind)
  1230. /* untiled */
  1231. break;
  1232. /* fallthrough, tiled memory */
  1233. case TTM_PL_VRAM:
  1234. reg->bus.offset = reg->start << PAGE_SHIFT;
  1235. reg->bus.base = device->func->resource_addr(device, 1);
  1236. reg->bus.is_iomem = true;
  1237. if (drm->client.mem->oclass >= NVIF_CLASS_MEM_NV50) {
  1238. union {
  1239. struct nv50_mem_map_v0 nv50;
  1240. struct gf100_mem_map_v0 gf100;
  1241. } args;
  1242. u64 handle, length;
  1243. u32 argc = 0;
  1244. int ret;
  1245. switch (mem->mem.object.oclass) {
  1246. case NVIF_CLASS_MEM_NV50:
  1247. args.nv50.version = 0;
  1248. args.nv50.ro = 0;
  1249. args.nv50.kind = mem->kind;
  1250. args.nv50.comp = mem->comp;
  1251. argc = sizeof(args.nv50);
  1252. break;
  1253. case NVIF_CLASS_MEM_GF100:
  1254. args.gf100.version = 0;
  1255. args.gf100.ro = 0;
  1256. args.gf100.kind = mem->kind;
  1257. argc = sizeof(args.gf100);
  1258. break;
  1259. default:
  1260. WARN_ON(1);
  1261. break;
  1262. }
  1263. ret = nvif_object_map_handle(&mem->mem.object,
  1264. &args, argc,
  1265. &handle, &length);
  1266. if (ret != 1)
  1267. return ret ? ret : -EINVAL;
  1268. reg->bus.base = 0;
  1269. reg->bus.offset = handle;
  1270. }
  1271. break;
  1272. default:
  1273. return -EINVAL;
  1274. }
  1275. return 0;
  1276. }
  1277. static void
  1278. nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
  1279. {
  1280. struct nouveau_drm *drm = nouveau_bdev(bdev);
  1281. struct nouveau_mem *mem = nouveau_mem(reg);
  1282. if (drm->client.mem->oclass >= NVIF_CLASS_MEM_NV50) {
  1283. switch (reg->mem_type) {
  1284. case TTM_PL_TT:
  1285. if (mem->kind)
  1286. nvif_object_unmap_handle(&mem->mem.object);
  1287. break;
  1288. case TTM_PL_VRAM:
  1289. nvif_object_unmap_handle(&mem->mem.object);
  1290. break;
  1291. default:
  1292. break;
  1293. }
  1294. }
  1295. }
  1296. static int
  1297. nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
  1298. {
  1299. struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
  1300. struct nouveau_bo *nvbo = nouveau_bo(bo);
  1301. struct nvkm_device *device = nvxx_device(&drm->client.device);
  1302. u32 mappable = device->func->resource_size(device, 1) >> PAGE_SHIFT;
  1303. int i, ret;
  1304. /* as long as the bo isn't in vram, and isn't tiled, we've got
  1305. * nothing to do here.
  1306. */
  1307. if (bo->mem.mem_type != TTM_PL_VRAM) {
  1308. if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA ||
  1309. !nvbo->kind)
  1310. return 0;
  1311. if (bo->mem.mem_type == TTM_PL_SYSTEM) {
  1312. nouveau_bo_placement_set(nvbo, TTM_PL_TT, 0);
  1313. ret = nouveau_bo_validate(nvbo, false, false);
  1314. if (ret)
  1315. return ret;
  1316. }
  1317. return 0;
  1318. }
  1319. /* make sure bo is in mappable vram */
  1320. if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA ||
  1321. bo->mem.start + bo->mem.num_pages < mappable)
  1322. return 0;
  1323. for (i = 0; i < nvbo->placement.num_placement; ++i) {
  1324. nvbo->placements[i].fpfn = 0;
  1325. nvbo->placements[i].lpfn = mappable;
  1326. }
  1327. for (i = 0; i < nvbo->placement.num_busy_placement; ++i) {
  1328. nvbo->busy_placements[i].fpfn = 0;
  1329. nvbo->busy_placements[i].lpfn = mappable;
  1330. }
  1331. nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_VRAM, 0);
  1332. return nouveau_bo_validate(nvbo, false, false);
  1333. }
  1334. static int
  1335. nouveau_ttm_tt_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
  1336. {
  1337. struct ttm_dma_tt *ttm_dma = (void *)ttm;
  1338. struct nouveau_drm *drm;
  1339. struct device *dev;
  1340. unsigned i;
  1341. int r;
  1342. bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
  1343. if (ttm->state != tt_unpopulated)
  1344. return 0;
  1345. if (slave && ttm->sg) {
  1346. /* make userspace faulting work */
  1347. drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
  1348. ttm_dma->dma_address, ttm->num_pages);
  1349. ttm->state = tt_unbound;
  1350. return 0;
  1351. }
  1352. drm = nouveau_bdev(ttm->bdev);
  1353. dev = drm->dev->dev;
  1354. #if IS_ENABLED(CONFIG_AGP)
  1355. if (drm->agp.bridge) {
  1356. return ttm_agp_tt_populate(ttm, ctx);
  1357. }
  1358. #endif
  1359. #if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86)
  1360. if (swiotlb_nr_tbl()) {
  1361. return ttm_dma_populate((void *)ttm, dev, ctx);
  1362. }
  1363. #endif
  1364. r = ttm_pool_populate(ttm, ctx);
  1365. if (r) {
  1366. return r;
  1367. }
  1368. for (i = 0; i < ttm->num_pages; i++) {
  1369. dma_addr_t addr;
  1370. addr = dma_map_page(dev, ttm->pages[i], 0, PAGE_SIZE,
  1371. DMA_BIDIRECTIONAL);
  1372. if (dma_mapping_error(dev, addr)) {
  1373. while (i--) {
  1374. dma_unmap_page(dev, ttm_dma->dma_address[i],
  1375. PAGE_SIZE, DMA_BIDIRECTIONAL);
  1376. ttm_dma->dma_address[i] = 0;
  1377. }
  1378. ttm_pool_unpopulate(ttm);
  1379. return -EFAULT;
  1380. }
  1381. ttm_dma->dma_address[i] = addr;
  1382. }
  1383. return 0;
  1384. }
  1385. static void
  1386. nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
  1387. {
  1388. struct ttm_dma_tt *ttm_dma = (void *)ttm;
  1389. struct nouveau_drm *drm;
  1390. struct device *dev;
  1391. unsigned i;
  1392. bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
  1393. if (slave)
  1394. return;
  1395. drm = nouveau_bdev(ttm->bdev);
  1396. dev = drm->dev->dev;
  1397. #if IS_ENABLED(CONFIG_AGP)
  1398. if (drm->agp.bridge) {
  1399. ttm_agp_tt_unpopulate(ttm);
  1400. return;
  1401. }
  1402. #endif
  1403. #if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86)
  1404. if (swiotlb_nr_tbl()) {
  1405. ttm_dma_unpopulate((void *)ttm, dev);
  1406. return;
  1407. }
  1408. #endif
  1409. for (i = 0; i < ttm->num_pages; i++) {
  1410. if (ttm_dma->dma_address[i]) {
  1411. dma_unmap_page(dev, ttm_dma->dma_address[i], PAGE_SIZE,
  1412. DMA_BIDIRECTIONAL);
  1413. }
  1414. }
  1415. ttm_pool_unpopulate(ttm);
  1416. }
  1417. void
  1418. nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence, bool exclusive)
  1419. {
  1420. struct reservation_object *resv = nvbo->bo.resv;
  1421. if (exclusive)
  1422. reservation_object_add_excl_fence(resv, &fence->base);
  1423. else if (fence)
  1424. reservation_object_add_shared_fence(resv, &fence->base);
  1425. }
  1426. struct ttm_bo_driver nouveau_bo_driver = {
  1427. .ttm_tt_create = &nouveau_ttm_tt_create,
  1428. .ttm_tt_populate = &nouveau_ttm_tt_populate,
  1429. .ttm_tt_unpopulate = &nouveau_ttm_tt_unpopulate,
  1430. .invalidate_caches = nouveau_bo_invalidate_caches,
  1431. .init_mem_type = nouveau_bo_init_mem_type,
  1432. .eviction_valuable = ttm_bo_eviction_valuable,
  1433. .evict_flags = nouveau_bo_evict_flags,
  1434. .move_notify = nouveau_bo_move_ntfy,
  1435. .move = nouveau_bo_move,
  1436. .verify_access = nouveau_bo_verify_access,
  1437. .fault_reserve_notify = &nouveau_ttm_fault_reserve_notify,
  1438. .io_mem_reserve = &nouveau_ttm_io_mem_reserve,
  1439. .io_mem_free = &nouveau_ttm_io_mem_free,
  1440. };