i915_gem_gtt.c 51 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842
  1. /*
  2. * Copyright © 2010 Daniel Vetter
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. *
  23. */
  24. #include <linux/seq_file.h>
  25. #include <drm/drmP.h>
  26. #include <drm/i915_drm.h>
  27. #include "i915_drv.h"
  28. #include "i915_trace.h"
  29. #include "intel_drv.h"
  30. #define GEN6_PPGTT_PD_ENTRIES 512
  31. #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
  32. typedef uint64_t gen8_gtt_pte_t;
  33. typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
  34. /* PPGTT stuff */
  35. #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0))
  36. #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0))
  37. #define GEN6_PDE_VALID (1 << 0)
  38. /* gen6+ has bit 11-4 for physical addr bit 39-32 */
  39. #define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
  40. #define GEN6_PTE_VALID (1 << 0)
  41. #define GEN6_PTE_UNCACHED (1 << 1)
  42. #define HSW_PTE_UNCACHED (0)
  43. #define GEN6_PTE_CACHE_LLC (2 << 1)
  44. #define GEN7_PTE_CACHE_L3_LLC (3 << 1)
  45. #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
  46. #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr)
  47. /* Cacheability Control is a 4-bit value. The low three bits are stored in *
  48. * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
  49. */
  50. #define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \
  51. (((bits) & 0x8) << (11 - 3)))
  52. #define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2)
  53. #define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3)
  54. #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb)
  55. #define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8)
  56. #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6)
  57. #define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7)
  58. #define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t))
  59. #define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
  60. #define GEN8_LEGACY_PDPS 4
  61. #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD)
  62. #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */
  63. #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */
  64. #define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */
  65. static void ppgtt_bind_vma(struct i915_vma *vma,
  66. enum i915_cache_level cache_level,
  67. u32 flags);
  68. static void ppgtt_unbind_vma(struct i915_vma *vma);
  69. static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt);
  70. static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
  71. enum i915_cache_level level,
  72. bool valid)
  73. {
  74. gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
  75. pte |= addr;
  76. if (level != I915_CACHE_NONE)
  77. pte |= PPAT_CACHED_INDEX;
  78. else
  79. pte |= PPAT_UNCACHED_INDEX;
  80. return pte;
  81. }
  82. static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
  83. dma_addr_t addr,
  84. enum i915_cache_level level)
  85. {
  86. gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
  87. pde |= addr;
  88. if (level != I915_CACHE_NONE)
  89. pde |= PPAT_CACHED_PDE_INDEX;
  90. else
  91. pde |= PPAT_UNCACHED_INDEX;
  92. return pde;
  93. }
  94. static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
  95. enum i915_cache_level level,
  96. bool valid)
  97. {
  98. gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  99. pte |= GEN6_PTE_ADDR_ENCODE(addr);
  100. switch (level) {
  101. case I915_CACHE_L3_LLC:
  102. case I915_CACHE_LLC:
  103. pte |= GEN6_PTE_CACHE_LLC;
  104. break;
  105. case I915_CACHE_NONE:
  106. pte |= GEN6_PTE_UNCACHED;
  107. break;
  108. default:
  109. WARN_ON(1);
  110. }
  111. return pte;
  112. }
  113. static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
  114. enum i915_cache_level level,
  115. bool valid)
  116. {
  117. gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  118. pte |= GEN6_PTE_ADDR_ENCODE(addr);
  119. switch (level) {
  120. case I915_CACHE_L3_LLC:
  121. pte |= GEN7_PTE_CACHE_L3_LLC;
  122. break;
  123. case I915_CACHE_LLC:
  124. pte |= GEN6_PTE_CACHE_LLC;
  125. break;
  126. case I915_CACHE_NONE:
  127. pte |= GEN6_PTE_UNCACHED;
  128. break;
  129. default:
  130. WARN_ON(1);
  131. }
  132. return pte;
  133. }
  134. #define BYT_PTE_WRITEABLE (1 << 1)
  135. #define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2)
  136. static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
  137. enum i915_cache_level level,
  138. bool valid)
  139. {
  140. gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  141. pte |= GEN6_PTE_ADDR_ENCODE(addr);
  142. /* Mark the page as writeable. Other platforms don't have a
  143. * setting for read-only/writable, so this matches that behavior.
  144. */
  145. pte |= BYT_PTE_WRITEABLE;
  146. if (level != I915_CACHE_NONE)
  147. pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
  148. return pte;
  149. }
  150. static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
  151. enum i915_cache_level level,
  152. bool valid)
  153. {
  154. gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  155. pte |= HSW_PTE_ADDR_ENCODE(addr);
  156. if (level != I915_CACHE_NONE)
  157. pte |= HSW_WB_LLC_AGE3;
  158. return pte;
  159. }
  160. static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
  161. enum i915_cache_level level,
  162. bool valid)
  163. {
  164. gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  165. pte |= HSW_PTE_ADDR_ENCODE(addr);
  166. switch (level) {
  167. case I915_CACHE_NONE:
  168. break;
  169. case I915_CACHE_WT:
  170. pte |= HSW_WT_ELLC_LLC_AGE3;
  171. break;
  172. default:
  173. pte |= HSW_WB_ELLC_LLC_AGE3;
  174. break;
  175. }
  176. return pte;
  177. }
  178. /* Broadwell Page Directory Pointer Descriptors */
  179. static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
  180. uint64_t val, bool synchronous)
  181. {
  182. struct drm_i915_private *dev_priv = ring->dev->dev_private;
  183. int ret;
  184. BUG_ON(entry >= 4);
  185. if (synchronous) {
  186. I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32);
  187. I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val);
  188. return 0;
  189. }
  190. ret = intel_ring_begin(ring, 6);
  191. if (ret)
  192. return ret;
  193. intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
  194. intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
  195. intel_ring_emit(ring, (u32)(val >> 32));
  196. intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
  197. intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
  198. intel_ring_emit(ring, (u32)(val));
  199. intel_ring_advance(ring);
  200. return 0;
  201. }
  202. static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
  203. struct intel_ring_buffer *ring,
  204. bool synchronous)
  205. {
  206. int i, ret;
  207. /* bit of a hack to find the actual last used pd */
  208. int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
  209. for (i = used_pd - 1; i >= 0; i--) {
  210. dma_addr_t addr = ppgtt->pd_dma_addr[i];
  211. ret = gen8_write_pdp(ring, i, addr, synchronous);
  212. if (ret)
  213. return ret;
  214. }
  215. return 0;
  216. }
  217. static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
  218. unsigned first_entry,
  219. unsigned num_entries,
  220. bool use_scratch)
  221. {
  222. struct i915_hw_ppgtt *ppgtt =
  223. container_of(vm, struct i915_hw_ppgtt, base);
  224. gen8_gtt_pte_t *pt_vaddr, scratch_pte;
  225. unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
  226. unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE;
  227. unsigned last_pte, i;
  228. scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
  229. I915_CACHE_LLC, use_scratch);
  230. while (num_entries) {
  231. struct page *page_table = &ppgtt->gen8_pt_pages[act_pt];
  232. last_pte = first_pte + num_entries;
  233. if (last_pte > GEN8_PTES_PER_PAGE)
  234. last_pte = GEN8_PTES_PER_PAGE;
  235. pt_vaddr = kmap_atomic(page_table);
  236. for (i = first_pte; i < last_pte; i++)
  237. pt_vaddr[i] = scratch_pte;
  238. kunmap_atomic(pt_vaddr);
  239. num_entries -= last_pte - first_pte;
  240. first_pte = 0;
  241. act_pt++;
  242. }
  243. }
  244. static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
  245. struct sg_table *pages,
  246. unsigned first_entry,
  247. enum i915_cache_level cache_level)
  248. {
  249. struct i915_hw_ppgtt *ppgtt =
  250. container_of(vm, struct i915_hw_ppgtt, base);
  251. gen8_gtt_pte_t *pt_vaddr;
  252. unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
  253. unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE;
  254. struct sg_page_iter sg_iter;
  255. pt_vaddr = NULL;
  256. for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
  257. if (pt_vaddr == NULL)
  258. pt_vaddr = kmap_atomic(&ppgtt->gen8_pt_pages[act_pt]);
  259. pt_vaddr[act_pte] =
  260. gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
  261. cache_level, true);
  262. if (++act_pte == GEN8_PTES_PER_PAGE) {
  263. kunmap_atomic(pt_vaddr);
  264. pt_vaddr = NULL;
  265. act_pt++;
  266. act_pte = 0;
  267. }
  268. }
  269. if (pt_vaddr)
  270. kunmap_atomic(pt_vaddr);
  271. }
  272. static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
  273. {
  274. struct i915_hw_ppgtt *ppgtt =
  275. container_of(vm, struct i915_hw_ppgtt, base);
  276. int i, j;
  277. list_del(&vm->global_link);
  278. drm_mm_takedown(&vm->mm);
  279. for (i = 0; i < ppgtt->num_pd_pages ; i++) {
  280. if (ppgtt->pd_dma_addr[i]) {
  281. pci_unmap_page(ppgtt->base.dev->pdev,
  282. ppgtt->pd_dma_addr[i],
  283. PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  284. for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
  285. dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
  286. if (addr)
  287. pci_unmap_page(ppgtt->base.dev->pdev,
  288. addr,
  289. PAGE_SIZE,
  290. PCI_DMA_BIDIRECTIONAL);
  291. }
  292. }
  293. kfree(ppgtt->gen8_pt_dma_addr[i]);
  294. }
  295. __free_pages(ppgtt->gen8_pt_pages, get_order(ppgtt->num_pt_pages << PAGE_SHIFT));
  296. __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT));
  297. }
  298. /**
  299. * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a
  300. * net effect resembling a 2-level page table in normal x86 terms. Each PDP
  301. * represents 1GB of memory
  302. * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space.
  303. *
  304. * TODO: Do something with the size parameter
  305. **/
  306. static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
  307. {
  308. struct page *pt_pages;
  309. int i, j, ret = -ENOMEM;
  310. const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
  311. const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
  312. if (size % (1<<30))
  313. DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
  314. /* FIXME: split allocation into smaller pieces. For now we only ever do
  315. * this once, but with full PPGTT, the multiple contiguous allocations
  316. * will be bad.
  317. */
  318. ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT));
  319. if (!ppgtt->pd_pages)
  320. return -ENOMEM;
  321. pt_pages = alloc_pages(GFP_KERNEL, get_order(num_pt_pages << PAGE_SHIFT));
  322. if (!pt_pages) {
  323. __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
  324. return -ENOMEM;
  325. }
  326. ppgtt->gen8_pt_pages = pt_pages;
  327. ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT);
  328. ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT);
  329. ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
  330. ppgtt->enable = gen8_ppgtt_enable;
  331. ppgtt->switch_mm = gen8_mm_switch;
  332. ppgtt->base.clear_range = gen8_ppgtt_clear_range;
  333. ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
  334. ppgtt->base.cleanup = gen8_ppgtt_cleanup;
  335. ppgtt->base.start = 0;
  336. ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE;
  337. BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
  338. /*
  339. * - Create a mapping for the page directories.
  340. * - For each page directory:
  341. * allocate space for page table mappings.
  342. * map each page table
  343. */
  344. for (i = 0; i < max_pdp; i++) {
  345. dma_addr_t temp;
  346. temp = pci_map_page(ppgtt->base.dev->pdev,
  347. &ppgtt->pd_pages[i], 0,
  348. PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  349. if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp))
  350. goto err_out;
  351. ppgtt->pd_dma_addr[i] = temp;
  352. ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, GFP_KERNEL);
  353. if (!ppgtt->gen8_pt_dma_addr[i])
  354. goto err_out;
  355. for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
  356. struct page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j];
  357. temp = pci_map_page(ppgtt->base.dev->pdev,
  358. p, 0, PAGE_SIZE,
  359. PCI_DMA_BIDIRECTIONAL);
  360. if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp))
  361. goto err_out;
  362. ppgtt->gen8_pt_dma_addr[i][j] = temp;
  363. }
  364. }
  365. /* For now, the PPGTT helper functions all require that the PDEs are
  366. * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
  367. * will never need to touch the PDEs again */
  368. for (i = 0; i < max_pdp; i++) {
  369. gen8_ppgtt_pde_t *pd_vaddr;
  370. pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
  371. for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
  372. dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
  373. pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
  374. I915_CACHE_LLC);
  375. }
  376. kunmap_atomic(pd_vaddr);
  377. }
  378. ppgtt->base.clear_range(&ppgtt->base, 0,
  379. ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE,
  380. true);
  381. DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
  382. ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
  383. DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
  384. ppgtt->num_pt_pages,
  385. (ppgtt->num_pt_pages - num_pt_pages) +
  386. size % (1<<30));
  387. return 0;
  388. err_out:
  389. ppgtt->base.cleanup(&ppgtt->base);
  390. return ret;
  391. }
  392. static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
  393. {
  394. struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
  395. struct i915_address_space *vm = &ppgtt->base;
  396. gen6_gtt_pte_t __iomem *pd_addr;
  397. gen6_gtt_pte_t scratch_pte;
  398. uint32_t pd_entry;
  399. int pte, pde;
  400. scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
  401. pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
  402. ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
  403. seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm,
  404. ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries);
  405. for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
  406. u32 expected;
  407. gen6_gtt_pte_t *pt_vaddr;
  408. dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde];
  409. pd_entry = readl(pd_addr + pde);
  410. expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
  411. if (pd_entry != expected)
  412. seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
  413. pde,
  414. pd_entry,
  415. expected);
  416. seq_printf(m, "\tPDE: %x\n", pd_entry);
  417. pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
  418. for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
  419. unsigned long va =
  420. (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
  421. (pte * PAGE_SIZE);
  422. int i;
  423. bool found = false;
  424. for (i = 0; i < 4; i++)
  425. if (pt_vaddr[pte + i] != scratch_pte)
  426. found = true;
  427. if (!found)
  428. continue;
  429. seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
  430. for (i = 0; i < 4; i++) {
  431. if (pt_vaddr[pte + i] != scratch_pte)
  432. seq_printf(m, " %08x", pt_vaddr[pte + i]);
  433. else
  434. seq_puts(m, " SCRATCH ");
  435. }
  436. seq_puts(m, "\n");
  437. }
  438. kunmap_atomic(pt_vaddr);
  439. }
  440. }
  441. static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
  442. {
  443. struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
  444. gen6_gtt_pte_t __iomem *pd_addr;
  445. uint32_t pd_entry;
  446. int i;
  447. WARN_ON(ppgtt->pd_offset & 0x3f);
  448. pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
  449. ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
  450. for (i = 0; i < ppgtt->num_pd_entries; i++) {
  451. dma_addr_t pt_addr;
  452. pt_addr = ppgtt->pt_dma_addr[i];
  453. pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
  454. pd_entry |= GEN6_PDE_VALID;
  455. writel(pd_entry, pd_addr + i);
  456. }
  457. readl(pd_addr);
  458. }
  459. static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
  460. {
  461. BUG_ON(ppgtt->pd_offset & 0x3f);
  462. return (ppgtt->pd_offset / 64) << 16;
  463. }
  464. static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
  465. struct intel_ring_buffer *ring,
  466. bool synchronous)
  467. {
  468. struct drm_device *dev = ppgtt->base.dev;
  469. struct drm_i915_private *dev_priv = dev->dev_private;
  470. int ret;
  471. /* If we're in reset, we can assume the GPU is sufficiently idle to
  472. * manually frob these bits. Ideally we could use the ring functions,
  473. * except our error handling makes it quite difficult (can't use
  474. * intel_ring_begin, ring->flush, or intel_ring_advance)
  475. *
  476. * FIXME: We should try not to special case reset
  477. */
  478. if (synchronous ||
  479. i915_reset_in_progress(&dev_priv->gpu_error)) {
  480. WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
  481. I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
  482. I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
  483. POSTING_READ(RING_PP_DIR_BASE(ring));
  484. return 0;
  485. }
  486. /* NB: TLBs must be flushed and invalidated before a switch */
  487. ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
  488. if (ret)
  489. return ret;
  490. ret = intel_ring_begin(ring, 6);
  491. if (ret)
  492. return ret;
  493. intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
  494. intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
  495. intel_ring_emit(ring, PP_DIR_DCLV_2G);
  496. intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
  497. intel_ring_emit(ring, get_pd_offset(ppgtt));
  498. intel_ring_emit(ring, MI_NOOP);
  499. intel_ring_advance(ring);
  500. return 0;
  501. }
  502. static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
  503. struct intel_ring_buffer *ring,
  504. bool synchronous)
  505. {
  506. struct drm_device *dev = ppgtt->base.dev;
  507. struct drm_i915_private *dev_priv = dev->dev_private;
  508. int ret;
  509. /* If we're in reset, we can assume the GPU is sufficiently idle to
  510. * manually frob these bits. Ideally we could use the ring functions,
  511. * except our error handling makes it quite difficult (can't use
  512. * intel_ring_begin, ring->flush, or intel_ring_advance)
  513. *
  514. * FIXME: We should try not to special case reset
  515. */
  516. if (synchronous ||
  517. i915_reset_in_progress(&dev_priv->gpu_error)) {
  518. WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
  519. I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
  520. I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
  521. POSTING_READ(RING_PP_DIR_BASE(ring));
  522. return 0;
  523. }
  524. /* NB: TLBs must be flushed and invalidated before a switch */
  525. ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
  526. if (ret)
  527. return ret;
  528. ret = intel_ring_begin(ring, 6);
  529. if (ret)
  530. return ret;
  531. intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
  532. intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
  533. intel_ring_emit(ring, PP_DIR_DCLV_2G);
  534. intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
  535. intel_ring_emit(ring, get_pd_offset(ppgtt));
  536. intel_ring_emit(ring, MI_NOOP);
  537. intel_ring_advance(ring);
  538. /* XXX: RCS is the only one to auto invalidate the TLBs? */
  539. if (ring->id != RCS) {
  540. ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
  541. if (ret)
  542. return ret;
  543. }
  544. return 0;
  545. }
  546. static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
  547. struct intel_ring_buffer *ring,
  548. bool synchronous)
  549. {
  550. struct drm_device *dev = ppgtt->base.dev;
  551. struct drm_i915_private *dev_priv = dev->dev_private;
  552. if (!synchronous)
  553. return 0;
  554. I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
  555. I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
  556. POSTING_READ(RING_PP_DIR_DCLV(ring));
  557. return 0;
  558. }
  559. static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
  560. {
  561. struct drm_device *dev = ppgtt->base.dev;
  562. struct drm_i915_private *dev_priv = dev->dev_private;
  563. struct intel_ring_buffer *ring;
  564. int j, ret;
  565. for_each_ring(ring, dev_priv, j) {
  566. I915_WRITE(RING_MODE_GEN7(ring),
  567. _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  568. /* We promise to do a switch later with FULL PPGTT. If this is
  569. * aliasing, this is the one and only switch we'll do */
  570. if (USES_FULL_PPGTT(dev))
  571. continue;
  572. ret = ppgtt->switch_mm(ppgtt, ring, true);
  573. if (ret)
  574. goto err_out;
  575. }
  576. return 0;
  577. err_out:
  578. for_each_ring(ring, dev_priv, j)
  579. I915_WRITE(RING_MODE_GEN7(ring),
  580. _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
  581. return ret;
  582. }
  583. static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
  584. {
  585. struct drm_device *dev = ppgtt->base.dev;
  586. drm_i915_private_t *dev_priv = dev->dev_private;
  587. struct intel_ring_buffer *ring;
  588. uint32_t ecochk, ecobits;
  589. int i;
  590. ecobits = I915_READ(GAC_ECO_BITS);
  591. I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
  592. ecochk = I915_READ(GAM_ECOCHK);
  593. if (IS_HASWELL(dev)) {
  594. ecochk |= ECOCHK_PPGTT_WB_HSW;
  595. } else {
  596. ecochk |= ECOCHK_PPGTT_LLC_IVB;
  597. ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
  598. }
  599. I915_WRITE(GAM_ECOCHK, ecochk);
  600. for_each_ring(ring, dev_priv, i) {
  601. int ret;
  602. /* GFX_MODE is per-ring on gen7+ */
  603. I915_WRITE(RING_MODE_GEN7(ring),
  604. _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  605. /* We promise to do a switch later with FULL PPGTT. If this is
  606. * aliasing, this is the one and only switch we'll do */
  607. if (USES_FULL_PPGTT(dev))
  608. continue;
  609. ret = ppgtt->switch_mm(ppgtt, ring, true);
  610. if (ret)
  611. return ret;
  612. }
  613. return 0;
  614. }
  615. static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
  616. {
  617. struct drm_device *dev = ppgtt->base.dev;
  618. drm_i915_private_t *dev_priv = dev->dev_private;
  619. struct intel_ring_buffer *ring;
  620. uint32_t ecochk, gab_ctl, ecobits;
  621. int i;
  622. ecobits = I915_READ(GAC_ECO_BITS);
  623. I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
  624. ECOBITS_PPGTT_CACHE64B);
  625. gab_ctl = I915_READ(GAB_CTL);
  626. I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
  627. ecochk = I915_READ(GAM_ECOCHK);
  628. I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
  629. I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  630. for_each_ring(ring, dev_priv, i) {
  631. int ret = ppgtt->switch_mm(ppgtt, ring, true);
  632. if (ret)
  633. return ret;
  634. }
  635. return 0;
  636. }
  637. /* PPGTT support for Sandybdrige/Gen6 and later */
  638. static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
  639. unsigned first_entry,
  640. unsigned num_entries,
  641. bool use_scratch)
  642. {
  643. struct i915_hw_ppgtt *ppgtt =
  644. container_of(vm, struct i915_hw_ppgtt, base);
  645. gen6_gtt_pte_t *pt_vaddr, scratch_pte;
  646. unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
  647. unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
  648. unsigned last_pte, i;
  649. scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
  650. while (num_entries) {
  651. last_pte = first_pte + num_entries;
  652. if (last_pte > I915_PPGTT_PT_ENTRIES)
  653. last_pte = I915_PPGTT_PT_ENTRIES;
  654. pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
  655. for (i = first_pte; i < last_pte; i++)
  656. pt_vaddr[i] = scratch_pte;
  657. kunmap_atomic(pt_vaddr);
  658. num_entries -= last_pte - first_pte;
  659. first_pte = 0;
  660. act_pt++;
  661. }
  662. }
  663. static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
  664. struct sg_table *pages,
  665. unsigned first_entry,
  666. enum i915_cache_level cache_level)
  667. {
  668. struct i915_hw_ppgtt *ppgtt =
  669. container_of(vm, struct i915_hw_ppgtt, base);
  670. gen6_gtt_pte_t *pt_vaddr;
  671. unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
  672. unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
  673. struct sg_page_iter sg_iter;
  674. pt_vaddr = NULL;
  675. for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
  676. if (pt_vaddr == NULL)
  677. pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
  678. pt_vaddr[act_pte] =
  679. vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
  680. cache_level, true);
  681. if (++act_pte == I915_PPGTT_PT_ENTRIES) {
  682. kunmap_atomic(pt_vaddr);
  683. pt_vaddr = NULL;
  684. act_pt++;
  685. act_pte = 0;
  686. }
  687. }
  688. if (pt_vaddr)
  689. kunmap_atomic(pt_vaddr);
  690. }
  691. static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
  692. {
  693. struct i915_hw_ppgtt *ppgtt =
  694. container_of(vm, struct i915_hw_ppgtt, base);
  695. int i;
  696. list_del(&vm->global_link);
  697. drm_mm_takedown(&ppgtt->base.mm);
  698. drm_mm_remove_node(&ppgtt->node);
  699. if (ppgtt->pt_dma_addr) {
  700. for (i = 0; i < ppgtt->num_pd_entries; i++)
  701. pci_unmap_page(ppgtt->base.dev->pdev,
  702. ppgtt->pt_dma_addr[i],
  703. 4096, PCI_DMA_BIDIRECTIONAL);
  704. }
  705. kfree(ppgtt->pt_dma_addr);
  706. for (i = 0; i < ppgtt->num_pd_entries; i++)
  707. __free_page(ppgtt->pt_pages[i]);
  708. kfree(ppgtt->pt_pages);
  709. kfree(ppgtt);
  710. }
  711. static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
  712. {
  713. #define GEN6_PD_ALIGN (PAGE_SIZE * 16)
  714. #define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE)
  715. struct drm_device *dev = ppgtt->base.dev;
  716. struct drm_i915_private *dev_priv = dev->dev_private;
  717. bool retried = false;
  718. int i, ret;
  719. /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
  720. * allocator works in address space sizes, so it's multiplied by page
  721. * size. We allocate at the top of the GTT to avoid fragmentation.
  722. */
  723. BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
  724. alloc:
  725. ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
  726. &ppgtt->node, GEN6_PD_SIZE,
  727. GEN6_PD_ALIGN, 0,
  728. 0, dev_priv->gtt.base.total,
  729. DRM_MM_SEARCH_DEFAULT);
  730. if (ret == -ENOSPC && !retried) {
  731. ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
  732. GEN6_PD_SIZE, GEN6_PD_ALIGN,
  733. I915_CACHE_NONE, false, true);
  734. if (ret)
  735. return ret;
  736. retried = true;
  737. goto alloc;
  738. }
  739. if (ppgtt->node.start < dev_priv->gtt.mappable_end)
  740. DRM_DEBUG("Forced to use aperture for PDEs\n");
  741. ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
  742. ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
  743. if (IS_GEN6(dev)) {
  744. ppgtt->enable = gen6_ppgtt_enable;
  745. ppgtt->switch_mm = gen6_mm_switch;
  746. } else if (IS_HASWELL(dev)) {
  747. ppgtt->enable = gen7_ppgtt_enable;
  748. ppgtt->switch_mm = hsw_mm_switch;
  749. } else if (IS_GEN7(dev)) {
  750. ppgtt->enable = gen7_ppgtt_enable;
  751. ppgtt->switch_mm = gen7_mm_switch;
  752. } else
  753. BUG();
  754. ppgtt->base.clear_range = gen6_ppgtt_clear_range;
  755. ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
  756. ppgtt->base.cleanup = gen6_ppgtt_cleanup;
  757. ppgtt->base.scratch = dev_priv->gtt.base.scratch;
  758. ppgtt->base.start = 0;
  759. ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
  760. ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *),
  761. GFP_KERNEL);
  762. if (!ppgtt->pt_pages) {
  763. drm_mm_remove_node(&ppgtt->node);
  764. return -ENOMEM;
  765. }
  766. for (i = 0; i < ppgtt->num_pd_entries; i++) {
  767. ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
  768. if (!ppgtt->pt_pages[i])
  769. goto err_pt_alloc;
  770. }
  771. ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t),
  772. GFP_KERNEL);
  773. if (!ppgtt->pt_dma_addr)
  774. goto err_pt_alloc;
  775. for (i = 0; i < ppgtt->num_pd_entries; i++) {
  776. dma_addr_t pt_addr;
  777. pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096,
  778. PCI_DMA_BIDIRECTIONAL);
  779. if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
  780. ret = -EIO;
  781. goto err_pd_pin;
  782. }
  783. ppgtt->pt_dma_addr[i] = pt_addr;
  784. }
  785. ppgtt->base.clear_range(&ppgtt->base, 0,
  786. ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true);
  787. ppgtt->debug_dump = gen6_dump_ppgtt;
  788. DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
  789. ppgtt->node.size >> 20,
  790. ppgtt->node.start / PAGE_SIZE);
  791. ppgtt->pd_offset =
  792. ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
  793. return 0;
  794. err_pd_pin:
  795. if (ppgtt->pt_dma_addr) {
  796. for (i--; i >= 0; i--)
  797. pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i],
  798. 4096, PCI_DMA_BIDIRECTIONAL);
  799. }
  800. err_pt_alloc:
  801. kfree(ppgtt->pt_dma_addr);
  802. for (i = 0; i < ppgtt->num_pd_entries; i++) {
  803. if (ppgtt->pt_pages[i])
  804. __free_page(ppgtt->pt_pages[i]);
  805. }
  806. kfree(ppgtt->pt_pages);
  807. drm_mm_remove_node(&ppgtt->node);
  808. return ret;
  809. }
  810. int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
  811. {
  812. struct drm_i915_private *dev_priv = dev->dev_private;
  813. int ret = 0;
  814. ppgtt->base.dev = dev;
  815. if (INTEL_INFO(dev)->gen < 8)
  816. ret = gen6_ppgtt_init(ppgtt);
  817. else if (IS_GEN8(dev))
  818. ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
  819. else
  820. BUG();
  821. if (!ret) {
  822. struct drm_i915_private *dev_priv = dev->dev_private;
  823. kref_init(&ppgtt->ref);
  824. drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
  825. ppgtt->base.total);
  826. i915_init_vm(dev_priv, &ppgtt->base);
  827. if (INTEL_INFO(dev)->gen < 8) {
  828. gen6_write_pdes(ppgtt);
  829. DRM_DEBUG("Adding PPGTT at offset %x\n",
  830. ppgtt->pd_offset << 10);
  831. }
  832. }
  833. return ret;
  834. }
  835. static void
  836. ppgtt_bind_vma(struct i915_vma *vma,
  837. enum i915_cache_level cache_level,
  838. u32 flags)
  839. {
  840. const unsigned long entry = vma->node.start >> PAGE_SHIFT;
  841. WARN_ON(flags);
  842. vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
  843. }
  844. static void ppgtt_unbind_vma(struct i915_vma *vma)
  845. {
  846. const unsigned long entry = vma->node.start >> PAGE_SHIFT;
  847. vma->vm->clear_range(vma->vm,
  848. entry,
  849. vma->obj->base.size >> PAGE_SHIFT,
  850. true);
  851. }
  852. extern int intel_iommu_gfx_mapped;
  853. /* Certain Gen5 chipsets require require idling the GPU before
  854. * unmapping anything from the GTT when VT-d is enabled.
  855. */
  856. static inline bool needs_idle_maps(struct drm_device *dev)
  857. {
  858. #ifdef CONFIG_INTEL_IOMMU
  859. /* Query intel_iommu to see if we need the workaround. Presumably that
  860. * was loaded first.
  861. */
  862. if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
  863. return true;
  864. #endif
  865. return false;
  866. }
  867. static bool do_idling(struct drm_i915_private *dev_priv)
  868. {
  869. bool ret = dev_priv->mm.interruptible;
  870. if (unlikely(dev_priv->gtt.do_idle_maps)) {
  871. dev_priv->mm.interruptible = false;
  872. if (i915_gpu_idle(dev_priv->dev)) {
  873. DRM_ERROR("Couldn't idle GPU\n");
  874. /* Wait a bit, in hopes it avoids the hang */
  875. udelay(10);
  876. }
  877. }
  878. return ret;
  879. }
  880. static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
  881. {
  882. if (unlikely(dev_priv->gtt.do_idle_maps))
  883. dev_priv->mm.interruptible = interruptible;
  884. }
  885. void i915_check_and_clear_faults(struct drm_device *dev)
  886. {
  887. struct drm_i915_private *dev_priv = dev->dev_private;
  888. struct intel_ring_buffer *ring;
  889. int i;
  890. if (INTEL_INFO(dev)->gen < 6)
  891. return;
  892. for_each_ring(ring, dev_priv, i) {
  893. u32 fault_reg;
  894. fault_reg = I915_READ(RING_FAULT_REG(ring));
  895. if (fault_reg & RING_FAULT_VALID) {
  896. DRM_DEBUG_DRIVER("Unexpected fault\n"
  897. "\tAddr: 0x%08lx\\n"
  898. "\tAddress space: %s\n"
  899. "\tSource ID: %d\n"
  900. "\tType: %d\n",
  901. fault_reg & PAGE_MASK,
  902. fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
  903. RING_FAULT_SRCID(fault_reg),
  904. RING_FAULT_FAULT_TYPE(fault_reg));
  905. I915_WRITE(RING_FAULT_REG(ring),
  906. fault_reg & ~RING_FAULT_VALID);
  907. }
  908. }
  909. POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
  910. }
  911. void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
  912. {
  913. struct drm_i915_private *dev_priv = dev->dev_private;
  914. /* Don't bother messing with faults pre GEN6 as we have little
  915. * documentation supporting that it's a good idea.
  916. */
  917. if (INTEL_INFO(dev)->gen < 6)
  918. return;
  919. i915_check_and_clear_faults(dev);
  920. dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
  921. dev_priv->gtt.base.start / PAGE_SIZE,
  922. dev_priv->gtt.base.total / PAGE_SIZE,
  923. false);
  924. }
  925. void i915_gem_restore_gtt_mappings(struct drm_device *dev)
  926. {
  927. struct drm_i915_private *dev_priv = dev->dev_private;
  928. struct drm_i915_gem_object *obj;
  929. struct i915_address_space *vm;
  930. i915_check_and_clear_faults(dev);
  931. /* First fill our portion of the GTT with scratch pages */
  932. dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
  933. dev_priv->gtt.base.start / PAGE_SIZE,
  934. dev_priv->gtt.base.total / PAGE_SIZE,
  935. true);
  936. list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
  937. struct i915_vma *vma = i915_gem_obj_to_vma(obj,
  938. &dev_priv->gtt.base);
  939. if (!vma)
  940. continue;
  941. i915_gem_clflush_object(obj, obj->pin_display);
  942. /* The bind_vma code tries to be smart about tracking mappings.
  943. * Unfortunately above, we've just wiped out the mappings
  944. * without telling our object about it. So we need to fake it.
  945. */
  946. obj->has_global_gtt_mapping = 0;
  947. vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
  948. }
  949. if (INTEL_INFO(dev)->gen >= 8)
  950. return;
  951. list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
  952. /* TODO: Perhaps it shouldn't be gen6 specific */
  953. if (i915_is_ggtt(vm)) {
  954. if (dev_priv->mm.aliasing_ppgtt)
  955. gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
  956. continue;
  957. }
  958. gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
  959. }
  960. i915_gem_chipset_flush(dev);
  961. }
  962. int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
  963. {
  964. if (obj->has_dma_mapping)
  965. return 0;
  966. if (!dma_map_sg(&obj->base.dev->pdev->dev,
  967. obj->pages->sgl, obj->pages->nents,
  968. PCI_DMA_BIDIRECTIONAL))
  969. return -ENOSPC;
  970. return 0;
  971. }
  972. static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
  973. {
  974. #ifdef writeq
  975. writeq(pte, addr);
  976. #else
  977. iowrite32((u32)pte, addr);
  978. iowrite32(pte >> 32, addr + 4);
  979. #endif
  980. }
  981. static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
  982. struct sg_table *st,
  983. unsigned int first_entry,
  984. enum i915_cache_level level)
  985. {
  986. struct drm_i915_private *dev_priv = vm->dev->dev_private;
  987. gen8_gtt_pte_t __iomem *gtt_entries =
  988. (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
  989. int i = 0;
  990. struct sg_page_iter sg_iter;
  991. dma_addr_t addr;
  992. for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
  993. addr = sg_dma_address(sg_iter.sg) +
  994. (sg_iter.sg_pgoffset << PAGE_SHIFT);
  995. gen8_set_pte(&gtt_entries[i],
  996. gen8_pte_encode(addr, level, true));
  997. i++;
  998. }
  999. /*
  1000. * XXX: This serves as a posting read to make sure that the PTE has
  1001. * actually been updated. There is some concern that even though
  1002. * registers and PTEs are within the same BAR that they are potentially
  1003. * of NUMA access patterns. Therefore, even with the way we assume
  1004. * hardware should work, we must keep this posting read for paranoia.
  1005. */
  1006. if (i != 0)
  1007. WARN_ON(readq(&gtt_entries[i-1])
  1008. != gen8_pte_encode(addr, level, true));
  1009. /* This next bit makes the above posting read even more important. We
  1010. * want to flush the TLBs only after we're certain all the PTE updates
  1011. * have finished.
  1012. */
  1013. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  1014. POSTING_READ(GFX_FLSH_CNTL_GEN6);
  1015. }
  1016. /*
  1017. * Binds an object into the global gtt with the specified cache level. The object
  1018. * will be accessible to the GPU via commands whose operands reference offsets
  1019. * within the global GTT as well as accessible by the GPU through the GMADR
  1020. * mapped BAR (dev_priv->mm.gtt->gtt).
  1021. */
  1022. static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
  1023. struct sg_table *st,
  1024. unsigned int first_entry,
  1025. enum i915_cache_level level)
  1026. {
  1027. struct drm_i915_private *dev_priv = vm->dev->dev_private;
  1028. gen6_gtt_pte_t __iomem *gtt_entries =
  1029. (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
  1030. int i = 0;
  1031. struct sg_page_iter sg_iter;
  1032. dma_addr_t addr;
  1033. for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
  1034. addr = sg_page_iter_dma_address(&sg_iter);
  1035. iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
  1036. i++;
  1037. }
  1038. /* XXX: This serves as a posting read to make sure that the PTE has
  1039. * actually been updated. There is some concern that even though
  1040. * registers and PTEs are within the same BAR that they are potentially
  1041. * of NUMA access patterns. Therefore, even with the way we assume
  1042. * hardware should work, we must keep this posting read for paranoia.
  1043. */
  1044. if (i != 0)
  1045. WARN_ON(readl(&gtt_entries[i-1]) !=
  1046. vm->pte_encode(addr, level, true));
  1047. /* This next bit makes the above posting read even more important. We
  1048. * want to flush the TLBs only after we're certain all the PTE updates
  1049. * have finished.
  1050. */
  1051. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  1052. POSTING_READ(GFX_FLSH_CNTL_GEN6);
  1053. }
  1054. static void gen8_ggtt_clear_range(struct i915_address_space *vm,
  1055. unsigned int first_entry,
  1056. unsigned int num_entries,
  1057. bool use_scratch)
  1058. {
  1059. struct drm_i915_private *dev_priv = vm->dev->dev_private;
  1060. gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
  1061. (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
  1062. const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
  1063. int i;
  1064. if (WARN(num_entries > max_entries,
  1065. "First entry = %d; Num entries = %d (max=%d)\n",
  1066. first_entry, num_entries, max_entries))
  1067. num_entries = max_entries;
  1068. scratch_pte = gen8_pte_encode(vm->scratch.addr,
  1069. I915_CACHE_LLC,
  1070. use_scratch);
  1071. for (i = 0; i < num_entries; i++)
  1072. gen8_set_pte(&gtt_base[i], scratch_pte);
  1073. readl(gtt_base);
  1074. }
  1075. static void gen6_ggtt_clear_range(struct i915_address_space *vm,
  1076. unsigned int first_entry,
  1077. unsigned int num_entries,
  1078. bool use_scratch)
  1079. {
  1080. struct drm_i915_private *dev_priv = vm->dev->dev_private;
  1081. gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
  1082. (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
  1083. const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
  1084. int i;
  1085. if (WARN(num_entries > max_entries,
  1086. "First entry = %d; Num entries = %d (max=%d)\n",
  1087. first_entry, num_entries, max_entries))
  1088. num_entries = max_entries;
  1089. scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch);
  1090. for (i = 0; i < num_entries; i++)
  1091. iowrite32(scratch_pte, &gtt_base[i]);
  1092. readl(gtt_base);
  1093. }
  1094. static void i915_ggtt_bind_vma(struct i915_vma *vma,
  1095. enum i915_cache_level cache_level,
  1096. u32 unused)
  1097. {
  1098. const unsigned long entry = vma->node.start >> PAGE_SHIFT;
  1099. unsigned int flags = (cache_level == I915_CACHE_NONE) ?
  1100. AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
  1101. BUG_ON(!i915_is_ggtt(vma->vm));
  1102. intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags);
  1103. vma->obj->has_global_gtt_mapping = 1;
  1104. }
  1105. static void i915_ggtt_clear_range(struct i915_address_space *vm,
  1106. unsigned int first_entry,
  1107. unsigned int num_entries,
  1108. bool unused)
  1109. {
  1110. intel_gtt_clear_range(first_entry, num_entries);
  1111. }
  1112. static void i915_ggtt_unbind_vma(struct i915_vma *vma)
  1113. {
  1114. const unsigned int first = vma->node.start >> PAGE_SHIFT;
  1115. const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
  1116. BUG_ON(!i915_is_ggtt(vma->vm));
  1117. vma->obj->has_global_gtt_mapping = 0;
  1118. intel_gtt_clear_range(first, size);
  1119. }
  1120. static void ggtt_bind_vma(struct i915_vma *vma,
  1121. enum i915_cache_level cache_level,
  1122. u32 flags)
  1123. {
  1124. struct drm_device *dev = vma->vm->dev;
  1125. struct drm_i915_private *dev_priv = dev->dev_private;
  1126. struct drm_i915_gem_object *obj = vma->obj;
  1127. const unsigned long entry = vma->node.start >> PAGE_SHIFT;
  1128. /* If there is no aliasing PPGTT, or the caller needs a global mapping,
  1129. * or we have a global mapping already but the cacheability flags have
  1130. * changed, set the global PTEs.
  1131. *
  1132. * If there is an aliasing PPGTT it is anecdotally faster, so use that
  1133. * instead if none of the above hold true.
  1134. *
  1135. * NB: A global mapping should only be needed for special regions like
  1136. * "gtt mappable", SNB errata, or if specified via special execbuf
  1137. * flags. At all other times, the GPU will use the aliasing PPGTT.
  1138. */
  1139. if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
  1140. if (!obj->has_global_gtt_mapping ||
  1141. (cache_level != obj->cache_level)) {
  1142. vma->vm->insert_entries(vma->vm, obj->pages, entry,
  1143. cache_level);
  1144. obj->has_global_gtt_mapping = 1;
  1145. }
  1146. }
  1147. if (dev_priv->mm.aliasing_ppgtt &&
  1148. (!obj->has_aliasing_ppgtt_mapping ||
  1149. (cache_level != obj->cache_level))) {
  1150. struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
  1151. appgtt->base.insert_entries(&appgtt->base,
  1152. vma->obj->pages, entry, cache_level);
  1153. vma->obj->has_aliasing_ppgtt_mapping = 1;
  1154. }
  1155. }
  1156. static void ggtt_unbind_vma(struct i915_vma *vma)
  1157. {
  1158. struct drm_device *dev = vma->vm->dev;
  1159. struct drm_i915_private *dev_priv = dev->dev_private;
  1160. struct drm_i915_gem_object *obj = vma->obj;
  1161. const unsigned long entry = vma->node.start >> PAGE_SHIFT;
  1162. if (obj->has_global_gtt_mapping) {
  1163. vma->vm->clear_range(vma->vm, entry,
  1164. vma->obj->base.size >> PAGE_SHIFT,
  1165. true);
  1166. obj->has_global_gtt_mapping = 0;
  1167. }
  1168. if (obj->has_aliasing_ppgtt_mapping) {
  1169. struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
  1170. appgtt->base.clear_range(&appgtt->base,
  1171. entry,
  1172. obj->base.size >> PAGE_SHIFT,
  1173. true);
  1174. obj->has_aliasing_ppgtt_mapping = 0;
  1175. }
  1176. }
  1177. void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
  1178. {
  1179. struct drm_device *dev = obj->base.dev;
  1180. struct drm_i915_private *dev_priv = dev->dev_private;
  1181. bool interruptible;
  1182. interruptible = do_idling(dev_priv);
  1183. if (!obj->has_dma_mapping)
  1184. dma_unmap_sg(&dev->pdev->dev,
  1185. obj->pages->sgl, obj->pages->nents,
  1186. PCI_DMA_BIDIRECTIONAL);
  1187. undo_idling(dev_priv, interruptible);
  1188. }
  1189. static void i915_gtt_color_adjust(struct drm_mm_node *node,
  1190. unsigned long color,
  1191. unsigned long *start,
  1192. unsigned long *end)
  1193. {
  1194. if (node->color != color)
  1195. *start += 4096;
  1196. if (!list_empty(&node->node_list)) {
  1197. node = list_entry(node->node_list.next,
  1198. struct drm_mm_node,
  1199. node_list);
  1200. if (node->allocated && node->color != color)
  1201. *end -= 4096;
  1202. }
  1203. }
  1204. void i915_gem_setup_global_gtt(struct drm_device *dev,
  1205. unsigned long start,
  1206. unsigned long mappable_end,
  1207. unsigned long end)
  1208. {
  1209. /* Let GEM Manage all of the aperture.
  1210. *
  1211. * However, leave one page at the end still bound to the scratch page.
  1212. * There are a number of places where the hardware apparently prefetches
  1213. * past the end of the object, and we've seen multiple hangs with the
  1214. * GPU head pointer stuck in a batchbuffer bound at the last page of the
  1215. * aperture. One page should be enough to keep any prefetching inside
  1216. * of the aperture.
  1217. */
  1218. struct drm_i915_private *dev_priv = dev->dev_private;
  1219. struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
  1220. struct drm_mm_node *entry;
  1221. struct drm_i915_gem_object *obj;
  1222. unsigned long hole_start, hole_end;
  1223. BUG_ON(mappable_end > end);
  1224. /* Subtract the guard page ... */
  1225. drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
  1226. if (!HAS_LLC(dev))
  1227. dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
  1228. /* Mark any preallocated objects as occupied */
  1229. list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
  1230. struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
  1231. int ret;
  1232. DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
  1233. i915_gem_obj_ggtt_offset(obj), obj->base.size);
  1234. WARN_ON(i915_gem_obj_ggtt_bound(obj));
  1235. ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
  1236. if (ret)
  1237. DRM_DEBUG_KMS("Reservation failed\n");
  1238. obj->has_global_gtt_mapping = 1;
  1239. }
  1240. dev_priv->gtt.base.start = start;
  1241. dev_priv->gtt.base.total = end - start;
  1242. /* Clear any non-preallocated blocks */
  1243. drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
  1244. const unsigned long count = (hole_end - hole_start) / PAGE_SIZE;
  1245. DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
  1246. hole_start, hole_end);
  1247. ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true);
  1248. }
  1249. /* And finally clear the reserved guard page */
  1250. ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true);
  1251. }
  1252. void i915_gem_init_global_gtt(struct drm_device *dev)
  1253. {
  1254. struct drm_i915_private *dev_priv = dev->dev_private;
  1255. unsigned long gtt_size, mappable_size;
  1256. gtt_size = dev_priv->gtt.base.total;
  1257. mappable_size = dev_priv->gtt.mappable_end;
  1258. i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
  1259. }
  1260. static int setup_scratch_page(struct drm_device *dev)
  1261. {
  1262. struct drm_i915_private *dev_priv = dev->dev_private;
  1263. struct page *page;
  1264. dma_addr_t dma_addr;
  1265. page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
  1266. if (page == NULL)
  1267. return -ENOMEM;
  1268. get_page(page);
  1269. set_pages_uc(page, 1);
  1270. #ifdef CONFIG_INTEL_IOMMU
  1271. dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
  1272. PCI_DMA_BIDIRECTIONAL);
  1273. if (pci_dma_mapping_error(dev->pdev, dma_addr))
  1274. return -EINVAL;
  1275. #else
  1276. dma_addr = page_to_phys(page);
  1277. #endif
  1278. dev_priv->gtt.base.scratch.page = page;
  1279. dev_priv->gtt.base.scratch.addr = dma_addr;
  1280. return 0;
  1281. }
  1282. static void teardown_scratch_page(struct drm_device *dev)
  1283. {
  1284. struct drm_i915_private *dev_priv = dev->dev_private;
  1285. struct page *page = dev_priv->gtt.base.scratch.page;
  1286. set_pages_wb(page, 1);
  1287. pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
  1288. PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  1289. put_page(page);
  1290. __free_page(page);
  1291. }
  1292. static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
  1293. {
  1294. snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
  1295. snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
  1296. return snb_gmch_ctl << 20;
  1297. }
  1298. static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
  1299. {
  1300. bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
  1301. bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
  1302. if (bdw_gmch_ctl)
  1303. bdw_gmch_ctl = 1 << bdw_gmch_ctl;
  1304. if (bdw_gmch_ctl > 4) {
  1305. WARN_ON(!i915.preliminary_hw_support);
  1306. return 4<<20;
  1307. }
  1308. return bdw_gmch_ctl << 20;
  1309. }
  1310. static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
  1311. {
  1312. snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
  1313. snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
  1314. return snb_gmch_ctl << 25; /* 32 MB units */
  1315. }
  1316. static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
  1317. {
  1318. bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
  1319. bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
  1320. return bdw_gmch_ctl << 25; /* 32 MB units */
  1321. }
  1322. static int ggtt_probe_common(struct drm_device *dev,
  1323. size_t gtt_size)
  1324. {
  1325. struct drm_i915_private *dev_priv = dev->dev_private;
  1326. phys_addr_t gtt_phys_addr;
  1327. int ret;
  1328. /* For Modern GENs the PTEs and register space are split in the BAR */
  1329. gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
  1330. (pci_resource_len(dev->pdev, 0) / 2);
  1331. dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
  1332. if (!dev_priv->gtt.gsm) {
  1333. DRM_ERROR("Failed to map the gtt page table\n");
  1334. return -ENOMEM;
  1335. }
  1336. ret = setup_scratch_page(dev);
  1337. if (ret) {
  1338. DRM_ERROR("Scratch setup failed\n");
  1339. /* iounmap will also get called at remove, but meh */
  1340. iounmap(dev_priv->gtt.gsm);
  1341. }
  1342. return ret;
  1343. }
  1344. /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
  1345. * bits. When using advanced contexts each context stores its own PAT, but
  1346. * writing this data shouldn't be harmful even in those cases. */
  1347. static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
  1348. {
  1349. #define GEN8_PPAT_UC (0<<0)
  1350. #define GEN8_PPAT_WC (1<<0)
  1351. #define GEN8_PPAT_WT (2<<0)
  1352. #define GEN8_PPAT_WB (3<<0)
  1353. #define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
  1354. /* FIXME(BDW): Bspec is completely confused about cache control bits. */
  1355. #define GEN8_PPAT_LLC (1<<2)
  1356. #define GEN8_PPAT_LLCELLC (2<<2)
  1357. #define GEN8_PPAT_LLCeLLC (3<<2)
  1358. #define GEN8_PPAT_AGE(x) (x<<4)
  1359. #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
  1360. uint64_t pat;
  1361. pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
  1362. GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
  1363. GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
  1364. GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
  1365. GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
  1366. GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
  1367. GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
  1368. GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
  1369. /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
  1370. * write would work. */
  1371. I915_WRITE(GEN8_PRIVATE_PAT, pat);
  1372. I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
  1373. }
  1374. static int gen8_gmch_probe(struct drm_device *dev,
  1375. size_t *gtt_total,
  1376. size_t *stolen,
  1377. phys_addr_t *mappable_base,
  1378. unsigned long *mappable_end)
  1379. {
  1380. struct drm_i915_private *dev_priv = dev->dev_private;
  1381. unsigned int gtt_size;
  1382. u16 snb_gmch_ctl;
  1383. int ret;
  1384. /* TODO: We're not aware of mappable constraints on gen8 yet */
  1385. *mappable_base = pci_resource_start(dev->pdev, 2);
  1386. *mappable_end = pci_resource_len(dev->pdev, 2);
  1387. if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
  1388. pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
  1389. pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
  1390. *stolen = gen8_get_stolen_size(snb_gmch_ctl);
  1391. gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
  1392. *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
  1393. gen8_setup_private_ppat(dev_priv);
  1394. ret = ggtt_probe_common(dev, gtt_size);
  1395. dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
  1396. dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
  1397. return ret;
  1398. }
  1399. static int gen6_gmch_probe(struct drm_device *dev,
  1400. size_t *gtt_total,
  1401. size_t *stolen,
  1402. phys_addr_t *mappable_base,
  1403. unsigned long *mappable_end)
  1404. {
  1405. struct drm_i915_private *dev_priv = dev->dev_private;
  1406. unsigned int gtt_size;
  1407. u16 snb_gmch_ctl;
  1408. int ret;
  1409. *mappable_base = pci_resource_start(dev->pdev, 2);
  1410. *mappable_end = pci_resource_len(dev->pdev, 2);
  1411. /* 64/512MB is the current min/max we actually know of, but this is just
  1412. * a coarse sanity check.
  1413. */
  1414. if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
  1415. DRM_ERROR("Unknown GMADR size (%lx)\n",
  1416. dev_priv->gtt.mappable_end);
  1417. return -ENXIO;
  1418. }
  1419. if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
  1420. pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
  1421. pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
  1422. *stolen = gen6_get_stolen_size(snb_gmch_ctl);
  1423. gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
  1424. *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
  1425. ret = ggtt_probe_common(dev, gtt_size);
  1426. dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
  1427. dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
  1428. return ret;
  1429. }
  1430. static void gen6_gmch_remove(struct i915_address_space *vm)
  1431. {
  1432. struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
  1433. drm_mm_takedown(&vm->mm);
  1434. iounmap(gtt->gsm);
  1435. teardown_scratch_page(vm->dev);
  1436. }
  1437. static int i915_gmch_probe(struct drm_device *dev,
  1438. size_t *gtt_total,
  1439. size_t *stolen,
  1440. phys_addr_t *mappable_base,
  1441. unsigned long *mappable_end)
  1442. {
  1443. struct drm_i915_private *dev_priv = dev->dev_private;
  1444. int ret;
  1445. ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
  1446. if (!ret) {
  1447. DRM_ERROR("failed to set up gmch\n");
  1448. return -EIO;
  1449. }
  1450. intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
  1451. dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
  1452. dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
  1453. if (unlikely(dev_priv->gtt.do_idle_maps))
  1454. DRM_INFO("applying Ironlake quirks for intel_iommu\n");
  1455. return 0;
  1456. }
  1457. static void i915_gmch_remove(struct i915_address_space *vm)
  1458. {
  1459. intel_gmch_remove();
  1460. }
  1461. int i915_gem_gtt_init(struct drm_device *dev)
  1462. {
  1463. struct drm_i915_private *dev_priv = dev->dev_private;
  1464. struct i915_gtt *gtt = &dev_priv->gtt;
  1465. int ret;
  1466. if (INTEL_INFO(dev)->gen <= 5) {
  1467. gtt->gtt_probe = i915_gmch_probe;
  1468. gtt->base.cleanup = i915_gmch_remove;
  1469. } else if (INTEL_INFO(dev)->gen < 8) {
  1470. gtt->gtt_probe = gen6_gmch_probe;
  1471. gtt->base.cleanup = gen6_gmch_remove;
  1472. if (IS_HASWELL(dev) && dev_priv->ellc_size)
  1473. gtt->base.pte_encode = iris_pte_encode;
  1474. else if (IS_HASWELL(dev))
  1475. gtt->base.pte_encode = hsw_pte_encode;
  1476. else if (IS_VALLEYVIEW(dev))
  1477. gtt->base.pte_encode = byt_pte_encode;
  1478. else if (INTEL_INFO(dev)->gen >= 7)
  1479. gtt->base.pte_encode = ivb_pte_encode;
  1480. else
  1481. gtt->base.pte_encode = snb_pte_encode;
  1482. } else {
  1483. dev_priv->gtt.gtt_probe = gen8_gmch_probe;
  1484. dev_priv->gtt.base.cleanup = gen6_gmch_remove;
  1485. }
  1486. ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
  1487. &gtt->mappable_base, &gtt->mappable_end);
  1488. if (ret)
  1489. return ret;
  1490. gtt->base.dev = dev;
  1491. /* GMADR is the PCI mmio aperture into the global GTT. */
  1492. DRM_INFO("Memory usable by graphics device = %zdM\n",
  1493. gtt->base.total >> 20);
  1494. DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
  1495. DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
  1496. return 0;
  1497. }
  1498. static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
  1499. struct i915_address_space *vm)
  1500. {
  1501. struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
  1502. if (vma == NULL)
  1503. return ERR_PTR(-ENOMEM);
  1504. INIT_LIST_HEAD(&vma->vma_link);
  1505. INIT_LIST_HEAD(&vma->mm_list);
  1506. INIT_LIST_HEAD(&vma->exec_list);
  1507. vma->vm = vm;
  1508. vma->obj = obj;
  1509. switch (INTEL_INFO(vm->dev)->gen) {
  1510. case 8:
  1511. case 7:
  1512. case 6:
  1513. if (i915_is_ggtt(vm)) {
  1514. vma->unbind_vma = ggtt_unbind_vma;
  1515. vma->bind_vma = ggtt_bind_vma;
  1516. } else {
  1517. vma->unbind_vma = ppgtt_unbind_vma;
  1518. vma->bind_vma = ppgtt_bind_vma;
  1519. }
  1520. break;
  1521. case 5:
  1522. case 4:
  1523. case 3:
  1524. case 2:
  1525. BUG_ON(!i915_is_ggtt(vm));
  1526. vma->unbind_vma = i915_ggtt_unbind_vma;
  1527. vma->bind_vma = i915_ggtt_bind_vma;
  1528. break;
  1529. default:
  1530. BUG();
  1531. }
  1532. /* Keep GGTT vmas first to make debug easier */
  1533. if (i915_is_ggtt(vm))
  1534. list_add(&vma->vma_link, &obj->vma_list);
  1535. else
  1536. list_add_tail(&vma->vma_link, &obj->vma_list);
  1537. return vma;
  1538. }
  1539. struct i915_vma *
  1540. i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
  1541. struct i915_address_space *vm)
  1542. {
  1543. struct i915_vma *vma;
  1544. vma = i915_gem_obj_to_vma(obj, vm);
  1545. if (!vma)
  1546. vma = __i915_gem_vma_create(obj, vm);
  1547. return vma;
  1548. }