i915_gem_gtt.c 107 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115
  1. /*
  2. * Copyright © 2010 Daniel Vetter
  3. * Copyright © 2011-2014 Intel Corporation
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice (including the next
  13. * paragraph) shall be included in all copies or substantial portions of the
  14. * Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22. * IN THE SOFTWARE.
  23. *
  24. */
  25. #include <linux/slab.h> /* fault-inject.h is not standalone! */
  26. #include <linux/fault-inject.h>
  27. #include <linux/log2.h>
  28. #include <linux/random.h>
  29. #include <linux/seq_file.h>
  30. #include <linux/stop_machine.h>
  31. #include <asm/set_memory.h>
  32. #include <drm/drmP.h>
  33. #include <drm/i915_drm.h>
  34. #include "i915_drv.h"
  35. #include "i915_vgpu.h"
  36. #include "i915_trace.h"
  37. #include "intel_drv.h"
  38. #include "intel_frontbuffer.h"
  39. #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
  40. /**
  41. * DOC: Global GTT views
  42. *
  43. * Background and previous state
  44. *
  45. * Historically objects could exists (be bound) in global GTT space only as
  46. * singular instances with a view representing all of the object's backing pages
  47. * in a linear fashion. This view will be called a normal view.
  48. *
  49. * To support multiple views of the same object, where the number of mapped
  50. * pages is not equal to the backing store, or where the layout of the pages
  51. * is not linear, concept of a GGTT view was added.
  52. *
  53. * One example of an alternative view is a stereo display driven by a single
  54. * image. In this case we would have a framebuffer looking like this
  55. * (2x2 pages):
  56. *
  57. * 12
  58. * 34
  59. *
  60. * Above would represent a normal GGTT view as normally mapped for GPU or CPU
  61. * rendering. In contrast, fed to the display engine would be an alternative
  62. * view which could look something like this:
  63. *
  64. * 1212
  65. * 3434
  66. *
  67. * In this example both the size and layout of pages in the alternative view is
  68. * different from the normal view.
  69. *
  70. * Implementation and usage
  71. *
  72. * GGTT views are implemented using VMAs and are distinguished via enum
  73. * i915_ggtt_view_type and struct i915_ggtt_view.
  74. *
  75. * A new flavour of core GEM functions which work with GGTT bound objects were
  76. * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
  77. * renaming in large amounts of code. They take the struct i915_ggtt_view
  78. * parameter encapsulating all metadata required to implement a view.
  79. *
  80. * As a helper for callers which are only interested in the normal view,
  81. * globally const i915_ggtt_view_normal singleton instance exists. All old core
  82. * GEM API functions, the ones not taking the view parameter, are operating on,
  83. * or with the normal GGTT view.
  84. *
  85. * Code wanting to add or use a new GGTT view needs to:
  86. *
  87. * 1. Add a new enum with a suitable name.
  88. * 2. Extend the metadata in the i915_ggtt_view structure if required.
  89. * 3. Add support to i915_get_vma_pages().
  90. *
  91. * New views are required to build a scatter-gather table from within the
  92. * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
  93. * exists for the lifetime of an VMA.
  94. *
  95. * Core API is designed to have copy semantics which means that passed in
  96. * struct i915_ggtt_view does not need to be persistent (left around after
  97. * calling the core API functions).
  98. *
  99. */
  100. static int
  101. i915_get_ggtt_vma_pages(struct i915_vma *vma);
  102. static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
  103. {
  104. /*
  105. * Note that as an uncached mmio write, this will flush the
  106. * WCB of the writes into the GGTT before it triggers the invalidate.
  107. */
  108. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  109. }
  110. static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
  111. {
  112. gen6_ggtt_invalidate(dev_priv);
  113. I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
  114. }
  115. static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
  116. {
  117. intel_gtt_chipset_flush();
  118. }
  119. static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
  120. {
  121. i915->ggtt.invalidate(i915);
  122. }
  123. int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
  124. int enable_ppgtt)
  125. {
  126. bool has_full_ppgtt;
  127. bool has_full_48bit_ppgtt;
  128. if (!dev_priv->info.has_aliasing_ppgtt)
  129. return 0;
  130. has_full_ppgtt = dev_priv->info.has_full_ppgtt;
  131. has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt;
  132. if (intel_vgpu_active(dev_priv)) {
  133. /* GVT-g has no support for 32bit ppgtt */
  134. has_full_ppgtt = false;
  135. has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv);
  136. }
  137. /*
  138. * We don't allow disabling PPGTT for gen9+ as it's a requirement for
  139. * execlists, the sole mechanism available to submit work.
  140. */
  141. if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
  142. return 0;
  143. if (enable_ppgtt == 1)
  144. return 1;
  145. if (enable_ppgtt == 2 && has_full_ppgtt)
  146. return 2;
  147. if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
  148. return 3;
  149. /* Disable ppgtt on SNB if VT-d is on. */
  150. if (IS_GEN6(dev_priv) && intel_vtd_active()) {
  151. DRM_INFO("Disabling PPGTT because VT-d is on\n");
  152. return 0;
  153. }
  154. if (has_full_48bit_ppgtt)
  155. return 3;
  156. if (has_full_ppgtt)
  157. return 2;
  158. return 1;
  159. }
  160. static int ppgtt_bind_vma(struct i915_vma *vma,
  161. enum i915_cache_level cache_level,
  162. u32 unused)
  163. {
  164. u32 pte_flags;
  165. int err;
  166. if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
  167. err = vma->vm->allocate_va_range(vma->vm,
  168. vma->node.start, vma->size);
  169. if (err)
  170. return err;
  171. }
  172. /* Applicable to VLV, and gen8+ */
  173. pte_flags = 0;
  174. if (i915_gem_object_is_readonly(vma->obj))
  175. pte_flags |= PTE_READ_ONLY;
  176. vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
  177. return 0;
  178. }
  179. static void ppgtt_unbind_vma(struct i915_vma *vma)
  180. {
  181. vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
  182. }
  183. static int ppgtt_set_pages(struct i915_vma *vma)
  184. {
  185. GEM_BUG_ON(vma->pages);
  186. vma->pages = vma->obj->mm.pages;
  187. vma->page_sizes = vma->obj->mm.page_sizes;
  188. return 0;
  189. }
  190. static void clear_pages(struct i915_vma *vma)
  191. {
  192. GEM_BUG_ON(!vma->pages);
  193. if (vma->pages != vma->obj->mm.pages) {
  194. sg_free_table(vma->pages);
  195. kfree(vma->pages);
  196. }
  197. vma->pages = NULL;
  198. memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
  199. }
  200. static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
  201. enum i915_cache_level level,
  202. u32 flags)
  203. {
  204. gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
  205. if (unlikely(flags & PTE_READ_ONLY))
  206. pte &= ~_PAGE_RW;
  207. switch (level) {
  208. case I915_CACHE_NONE:
  209. pte |= PPAT_UNCACHED;
  210. break;
  211. case I915_CACHE_WT:
  212. pte |= PPAT_DISPLAY_ELLC;
  213. break;
  214. default:
  215. pte |= PPAT_CACHED;
  216. break;
  217. }
  218. return pte;
  219. }
  220. static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
  221. const enum i915_cache_level level)
  222. {
  223. gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
  224. pde |= addr;
  225. if (level != I915_CACHE_NONE)
  226. pde |= PPAT_CACHED_PDE;
  227. else
  228. pde |= PPAT_UNCACHED;
  229. return pde;
  230. }
  231. #define gen8_pdpe_encode gen8_pde_encode
  232. #define gen8_pml4e_encode gen8_pde_encode
  233. static gen6_pte_t snb_pte_encode(dma_addr_t addr,
  234. enum i915_cache_level level,
  235. u32 unused)
  236. {
  237. gen6_pte_t pte = GEN6_PTE_VALID;
  238. pte |= GEN6_PTE_ADDR_ENCODE(addr);
  239. switch (level) {
  240. case I915_CACHE_L3_LLC:
  241. case I915_CACHE_LLC:
  242. pte |= GEN6_PTE_CACHE_LLC;
  243. break;
  244. case I915_CACHE_NONE:
  245. pte |= GEN6_PTE_UNCACHED;
  246. break;
  247. default:
  248. MISSING_CASE(level);
  249. }
  250. return pte;
  251. }
  252. static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
  253. enum i915_cache_level level,
  254. u32 unused)
  255. {
  256. gen6_pte_t pte = GEN6_PTE_VALID;
  257. pte |= GEN6_PTE_ADDR_ENCODE(addr);
  258. switch (level) {
  259. case I915_CACHE_L3_LLC:
  260. pte |= GEN7_PTE_CACHE_L3_LLC;
  261. break;
  262. case I915_CACHE_LLC:
  263. pte |= GEN6_PTE_CACHE_LLC;
  264. break;
  265. case I915_CACHE_NONE:
  266. pte |= GEN6_PTE_UNCACHED;
  267. break;
  268. default:
  269. MISSING_CASE(level);
  270. }
  271. return pte;
  272. }
  273. static gen6_pte_t byt_pte_encode(dma_addr_t addr,
  274. enum i915_cache_level level,
  275. u32 flags)
  276. {
  277. gen6_pte_t pte = GEN6_PTE_VALID;
  278. pte |= GEN6_PTE_ADDR_ENCODE(addr);
  279. if (!(flags & PTE_READ_ONLY))
  280. pte |= BYT_PTE_WRITEABLE;
  281. if (level != I915_CACHE_NONE)
  282. pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
  283. return pte;
  284. }
  285. static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
  286. enum i915_cache_level level,
  287. u32 unused)
  288. {
  289. gen6_pte_t pte = GEN6_PTE_VALID;
  290. pte |= HSW_PTE_ADDR_ENCODE(addr);
  291. if (level != I915_CACHE_NONE)
  292. pte |= HSW_WB_LLC_AGE3;
  293. return pte;
  294. }
  295. static gen6_pte_t iris_pte_encode(dma_addr_t addr,
  296. enum i915_cache_level level,
  297. u32 unused)
  298. {
  299. gen6_pte_t pte = GEN6_PTE_VALID;
  300. pte |= HSW_PTE_ADDR_ENCODE(addr);
  301. switch (level) {
  302. case I915_CACHE_NONE:
  303. break;
  304. case I915_CACHE_WT:
  305. pte |= HSW_WT_ELLC_LLC_AGE3;
  306. break;
  307. default:
  308. pte |= HSW_WB_ELLC_LLC_AGE3;
  309. break;
  310. }
  311. return pte;
  312. }
  313. static void stash_init(struct pagestash *stash)
  314. {
  315. pagevec_init(&stash->pvec);
  316. spin_lock_init(&stash->lock);
  317. }
  318. static struct page *stash_pop_page(struct pagestash *stash)
  319. {
  320. struct page *page = NULL;
  321. spin_lock(&stash->lock);
  322. if (likely(stash->pvec.nr))
  323. page = stash->pvec.pages[--stash->pvec.nr];
  324. spin_unlock(&stash->lock);
  325. return page;
  326. }
  327. static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
  328. {
  329. int nr;
  330. spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
  331. nr = min_t(int, pvec->nr, pagevec_space(&stash->pvec));
  332. memcpy(stash->pvec.pages + stash->pvec.nr,
  333. pvec->pages + pvec->nr - nr,
  334. sizeof(pvec->pages[0]) * nr);
  335. stash->pvec.nr += nr;
  336. spin_unlock(&stash->lock);
  337. pvec->nr -= nr;
  338. }
  339. static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
  340. {
  341. struct pagevec stack;
  342. struct page *page;
  343. if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
  344. i915_gem_shrink_all(vm->i915);
  345. page = stash_pop_page(&vm->free_pages);
  346. if (page)
  347. return page;
  348. if (!vm->pt_kmap_wc)
  349. return alloc_page(gfp);
  350. /* Look in our global stash of WC pages... */
  351. page = stash_pop_page(&vm->i915->mm.wc_stash);
  352. if (page)
  353. return page;
  354. /*
  355. * Otherwise batch allocate pages to amortize cost of set_pages_wc.
  356. *
  357. * We have to be careful as page allocation may trigger the shrinker
  358. * (via direct reclaim) which will fill up the WC stash underneath us.
  359. * So we add our WB pages into a temporary pvec on the stack and merge
  360. * them into the WC stash after all the allocations are complete.
  361. */
  362. pagevec_init(&stack);
  363. do {
  364. struct page *page;
  365. page = alloc_page(gfp);
  366. if (unlikely(!page))
  367. break;
  368. stack.pages[stack.nr++] = page;
  369. } while (pagevec_space(&stack));
  370. if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
  371. page = stack.pages[--stack.nr];
  372. /* Merge spare WC pages to the global stash */
  373. stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
  374. /* Push any surplus WC pages onto the local VM stash */
  375. if (stack.nr)
  376. stash_push_pagevec(&vm->free_pages, &stack);
  377. }
  378. /* Return unwanted leftovers */
  379. if (unlikely(stack.nr)) {
  380. WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
  381. __pagevec_release(&stack);
  382. }
  383. return page;
  384. }
  385. static void vm_free_pages_release(struct i915_address_space *vm,
  386. bool immediate)
  387. {
  388. struct pagevec *pvec = &vm->free_pages.pvec;
  389. struct pagevec stack;
  390. lockdep_assert_held(&vm->free_pages.lock);
  391. GEM_BUG_ON(!pagevec_count(pvec));
  392. if (vm->pt_kmap_wc) {
  393. /*
  394. * When we use WC, first fill up the global stash and then
  395. * only if full immediately free the overflow.
  396. */
  397. stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
  398. /*
  399. * As we have made some room in the VM's free_pages,
  400. * we can wait for it to fill again. Unless we are
  401. * inside i915_address_space_fini() and must
  402. * immediately release the pages!
  403. */
  404. if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
  405. return;
  406. /*
  407. * We have to drop the lock to allow ourselves to sleep,
  408. * so take a copy of the pvec and clear the stash for
  409. * others to use it as we sleep.
  410. */
  411. stack = *pvec;
  412. pagevec_reinit(pvec);
  413. spin_unlock(&vm->free_pages.lock);
  414. pvec = &stack;
  415. set_pages_array_wb(pvec->pages, pvec->nr);
  416. spin_lock(&vm->free_pages.lock);
  417. }
  418. __pagevec_release(pvec);
  419. }
  420. static void vm_free_page(struct i915_address_space *vm, struct page *page)
  421. {
  422. /*
  423. * On !llc, we need to change the pages back to WB. We only do so
  424. * in bulk, so we rarely need to change the page attributes here,
  425. * but doing so requires a stop_machine() from deep inside arch/x86/mm.
  426. * To make detection of the possible sleep more likely, use an
  427. * unconditional might_sleep() for everybody.
  428. */
  429. might_sleep();
  430. spin_lock(&vm->free_pages.lock);
  431. if (!pagevec_add(&vm->free_pages.pvec, page))
  432. vm_free_pages_release(vm, false);
  433. spin_unlock(&vm->free_pages.lock);
  434. }
  435. static void i915_address_space_init(struct i915_address_space *vm,
  436. struct drm_i915_private *dev_priv)
  437. {
  438. /*
  439. * The vm->mutex must be reclaim safe (for use in the shrinker).
  440. * Do a dummy acquire now under fs_reclaim so that any allocation
  441. * attempt holding the lock is immediately reported by lockdep.
  442. */
  443. mutex_init(&vm->mutex);
  444. i915_gem_shrinker_taints_mutex(&vm->mutex);
  445. GEM_BUG_ON(!vm->total);
  446. drm_mm_init(&vm->mm, 0, vm->total);
  447. vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
  448. stash_init(&vm->free_pages);
  449. INIT_LIST_HEAD(&vm->active_list);
  450. INIT_LIST_HEAD(&vm->inactive_list);
  451. INIT_LIST_HEAD(&vm->unbound_list);
  452. }
  453. static void i915_address_space_fini(struct i915_address_space *vm)
  454. {
  455. spin_lock(&vm->free_pages.lock);
  456. if (pagevec_count(&vm->free_pages.pvec))
  457. vm_free_pages_release(vm, true);
  458. GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
  459. spin_unlock(&vm->free_pages.lock);
  460. drm_mm_takedown(&vm->mm);
  461. mutex_destroy(&vm->mutex);
  462. }
  463. static int __setup_page_dma(struct i915_address_space *vm,
  464. struct i915_page_dma *p,
  465. gfp_t gfp)
  466. {
  467. p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
  468. if (unlikely(!p->page))
  469. return -ENOMEM;
  470. p->daddr = dma_map_page_attrs(vm->dma,
  471. p->page, 0, PAGE_SIZE,
  472. PCI_DMA_BIDIRECTIONAL,
  473. DMA_ATTR_SKIP_CPU_SYNC |
  474. DMA_ATTR_NO_WARN);
  475. if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
  476. vm_free_page(vm, p->page);
  477. return -ENOMEM;
  478. }
  479. return 0;
  480. }
  481. static int setup_page_dma(struct i915_address_space *vm,
  482. struct i915_page_dma *p)
  483. {
  484. return __setup_page_dma(vm, p, __GFP_HIGHMEM);
  485. }
  486. static void cleanup_page_dma(struct i915_address_space *vm,
  487. struct i915_page_dma *p)
  488. {
  489. dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  490. vm_free_page(vm, p->page);
  491. }
  492. #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
  493. #define setup_px(vm, px) setup_page_dma((vm), px_base(px))
  494. #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
  495. #define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v))
  496. #define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v))
  497. static void fill_page_dma(struct i915_address_space *vm,
  498. struct i915_page_dma *p,
  499. const u64 val)
  500. {
  501. u64 * const vaddr = kmap_atomic(p->page);
  502. memset64(vaddr, val, PAGE_SIZE / sizeof(val));
  503. kunmap_atomic(vaddr);
  504. }
  505. static void fill_page_dma_32(struct i915_address_space *vm,
  506. struct i915_page_dma *p,
  507. const u32 v)
  508. {
  509. fill_page_dma(vm, p, (u64)v << 32 | v);
  510. }
  511. static int
  512. setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
  513. {
  514. unsigned long size;
  515. /*
  516. * In order to utilize 64K pages for an object with a size < 2M, we will
  517. * need to support a 64K scratch page, given that every 16th entry for a
  518. * page-table operating in 64K mode must point to a properly aligned 64K
  519. * region, including any PTEs which happen to point to scratch.
  520. *
  521. * This is only relevant for the 48b PPGTT where we support
  522. * huge-gtt-pages, see also i915_vma_insert().
  523. *
  524. * TODO: we should really consider write-protecting the scratch-page and
  525. * sharing between ppgtt
  526. */
  527. size = I915_GTT_PAGE_SIZE_4K;
  528. if (i915_vm_is_48bit(vm) &&
  529. HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
  530. size = I915_GTT_PAGE_SIZE_64K;
  531. gfp |= __GFP_NOWARN;
  532. }
  533. gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
  534. do {
  535. int order = get_order(size);
  536. struct page *page;
  537. dma_addr_t addr;
  538. page = alloc_pages(gfp, order);
  539. if (unlikely(!page))
  540. goto skip;
  541. addr = dma_map_page_attrs(vm->dma,
  542. page, 0, size,
  543. PCI_DMA_BIDIRECTIONAL,
  544. DMA_ATTR_SKIP_CPU_SYNC |
  545. DMA_ATTR_NO_WARN);
  546. if (unlikely(dma_mapping_error(vm->dma, addr)))
  547. goto free_page;
  548. if (unlikely(!IS_ALIGNED(addr, size)))
  549. goto unmap_page;
  550. vm->scratch_page.page = page;
  551. vm->scratch_page.daddr = addr;
  552. vm->scratch_page.order = order;
  553. return 0;
  554. unmap_page:
  555. dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
  556. free_page:
  557. __free_pages(page, order);
  558. skip:
  559. if (size == I915_GTT_PAGE_SIZE_4K)
  560. return -ENOMEM;
  561. size = I915_GTT_PAGE_SIZE_4K;
  562. gfp &= ~__GFP_NOWARN;
  563. } while (1);
  564. }
  565. static void cleanup_scratch_page(struct i915_address_space *vm)
  566. {
  567. struct i915_page_dma *p = &vm->scratch_page;
  568. dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT,
  569. PCI_DMA_BIDIRECTIONAL);
  570. __free_pages(p->page, p->order);
  571. }
  572. static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
  573. {
  574. struct i915_page_table *pt;
  575. pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
  576. if (unlikely(!pt))
  577. return ERR_PTR(-ENOMEM);
  578. if (unlikely(setup_px(vm, pt))) {
  579. kfree(pt);
  580. return ERR_PTR(-ENOMEM);
  581. }
  582. pt->used_ptes = 0;
  583. return pt;
  584. }
  585. static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
  586. {
  587. cleanup_px(vm, pt);
  588. kfree(pt);
  589. }
  590. static void gen8_initialize_pt(struct i915_address_space *vm,
  591. struct i915_page_table *pt)
  592. {
  593. fill_px(vm, pt,
  594. gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
  595. }
  596. static void gen6_initialize_pt(struct gen6_hw_ppgtt *ppgtt,
  597. struct i915_page_table *pt)
  598. {
  599. fill32_px(&ppgtt->base.vm, pt, ppgtt->scratch_pte);
  600. }
  601. static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
  602. {
  603. struct i915_page_directory *pd;
  604. pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
  605. if (unlikely(!pd))
  606. return ERR_PTR(-ENOMEM);
  607. if (unlikely(setup_px(vm, pd))) {
  608. kfree(pd);
  609. return ERR_PTR(-ENOMEM);
  610. }
  611. pd->used_pdes = 0;
  612. return pd;
  613. }
  614. static void free_pd(struct i915_address_space *vm,
  615. struct i915_page_directory *pd)
  616. {
  617. cleanup_px(vm, pd);
  618. kfree(pd);
  619. }
  620. static void gen8_initialize_pd(struct i915_address_space *vm,
  621. struct i915_page_directory *pd)
  622. {
  623. fill_px(vm, pd,
  624. gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
  625. memset_p((void **)pd->page_table, vm->scratch_pt, I915_PDES);
  626. }
  627. static int __pdp_init(struct i915_address_space *vm,
  628. struct i915_page_directory_pointer *pdp)
  629. {
  630. const unsigned int pdpes = i915_pdpes_per_pdp(vm);
  631. pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
  632. I915_GFP_ALLOW_FAIL);
  633. if (unlikely(!pdp->page_directory))
  634. return -ENOMEM;
  635. memset_p((void **)pdp->page_directory, vm->scratch_pd, pdpes);
  636. return 0;
  637. }
  638. static void __pdp_fini(struct i915_page_directory_pointer *pdp)
  639. {
  640. kfree(pdp->page_directory);
  641. pdp->page_directory = NULL;
  642. }
  643. static inline bool use_4lvl(const struct i915_address_space *vm)
  644. {
  645. return i915_vm_is_48bit(vm);
  646. }
  647. static struct i915_page_directory_pointer *
  648. alloc_pdp(struct i915_address_space *vm)
  649. {
  650. struct i915_page_directory_pointer *pdp;
  651. int ret = -ENOMEM;
  652. GEM_BUG_ON(!use_4lvl(vm));
  653. pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
  654. if (!pdp)
  655. return ERR_PTR(-ENOMEM);
  656. ret = __pdp_init(vm, pdp);
  657. if (ret)
  658. goto fail_bitmap;
  659. ret = setup_px(vm, pdp);
  660. if (ret)
  661. goto fail_page_m;
  662. return pdp;
  663. fail_page_m:
  664. __pdp_fini(pdp);
  665. fail_bitmap:
  666. kfree(pdp);
  667. return ERR_PTR(ret);
  668. }
  669. static void free_pdp(struct i915_address_space *vm,
  670. struct i915_page_directory_pointer *pdp)
  671. {
  672. __pdp_fini(pdp);
  673. if (!use_4lvl(vm))
  674. return;
  675. cleanup_px(vm, pdp);
  676. kfree(pdp);
  677. }
  678. static void gen8_initialize_pdp(struct i915_address_space *vm,
  679. struct i915_page_directory_pointer *pdp)
  680. {
  681. gen8_ppgtt_pdpe_t scratch_pdpe;
  682. scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
  683. fill_px(vm, pdp, scratch_pdpe);
  684. }
  685. static void gen8_initialize_pml4(struct i915_address_space *vm,
  686. struct i915_pml4 *pml4)
  687. {
  688. fill_px(vm, pml4,
  689. gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
  690. memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4);
  691. }
  692. /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
  693. * the page table structures, we mark them dirty so that
  694. * context switching/execlist queuing code takes extra steps
  695. * to ensure that tlbs are flushed.
  696. */
  697. static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
  698. {
  699. ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->vm.i915)->ring_mask;
  700. }
  701. /* Removes entries from a single page table, releasing it if it's empty.
  702. * Caller can use the return value to update higher-level entries.
  703. */
  704. static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
  705. struct i915_page_table *pt,
  706. u64 start, u64 length)
  707. {
  708. unsigned int num_entries = gen8_pte_count(start, length);
  709. unsigned int pte = gen8_pte_index(start);
  710. unsigned int pte_end = pte + num_entries;
  711. const gen8_pte_t scratch_pte =
  712. gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
  713. gen8_pte_t *vaddr;
  714. GEM_BUG_ON(num_entries > pt->used_ptes);
  715. pt->used_ptes -= num_entries;
  716. if (!pt->used_ptes)
  717. return true;
  718. vaddr = kmap_atomic_px(pt);
  719. while (pte < pte_end)
  720. vaddr[pte++] = scratch_pte;
  721. kunmap_atomic(vaddr);
  722. return false;
  723. }
  724. static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
  725. struct i915_page_directory *pd,
  726. struct i915_page_table *pt,
  727. unsigned int pde)
  728. {
  729. gen8_pde_t *vaddr;
  730. pd->page_table[pde] = pt;
  731. vaddr = kmap_atomic_px(pd);
  732. vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
  733. kunmap_atomic(vaddr);
  734. }
  735. static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
  736. struct i915_page_directory *pd,
  737. u64 start, u64 length)
  738. {
  739. struct i915_page_table *pt;
  740. u32 pde;
  741. gen8_for_each_pde(pt, pd, start, length, pde) {
  742. GEM_BUG_ON(pt == vm->scratch_pt);
  743. if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
  744. continue;
  745. gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
  746. GEM_BUG_ON(!pd->used_pdes);
  747. pd->used_pdes--;
  748. free_pt(vm, pt);
  749. }
  750. return !pd->used_pdes;
  751. }
  752. static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
  753. struct i915_page_directory_pointer *pdp,
  754. struct i915_page_directory *pd,
  755. unsigned int pdpe)
  756. {
  757. gen8_ppgtt_pdpe_t *vaddr;
  758. pdp->page_directory[pdpe] = pd;
  759. if (!use_4lvl(vm))
  760. return;
  761. vaddr = kmap_atomic_px(pdp);
  762. vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
  763. kunmap_atomic(vaddr);
  764. }
  765. /* Removes entries from a single page dir pointer, releasing it if it's empty.
  766. * Caller can use the return value to update higher-level entries
  767. */
  768. static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
  769. struct i915_page_directory_pointer *pdp,
  770. u64 start, u64 length)
  771. {
  772. struct i915_page_directory *pd;
  773. unsigned int pdpe;
  774. gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
  775. GEM_BUG_ON(pd == vm->scratch_pd);
  776. if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
  777. continue;
  778. gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
  779. GEM_BUG_ON(!pdp->used_pdpes);
  780. pdp->used_pdpes--;
  781. free_pd(vm, pd);
  782. }
  783. return !pdp->used_pdpes;
  784. }
  785. static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
  786. u64 start, u64 length)
  787. {
  788. gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
  789. }
  790. static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
  791. struct i915_page_directory_pointer *pdp,
  792. unsigned int pml4e)
  793. {
  794. gen8_ppgtt_pml4e_t *vaddr;
  795. pml4->pdps[pml4e] = pdp;
  796. vaddr = kmap_atomic_px(pml4);
  797. vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
  798. kunmap_atomic(vaddr);
  799. }
  800. /* Removes entries from a single pml4.
  801. * This is the top-level structure in 4-level page tables used on gen8+.
  802. * Empty entries are always scratch pml4e.
  803. */
  804. static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
  805. u64 start, u64 length)
  806. {
  807. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  808. struct i915_pml4 *pml4 = &ppgtt->pml4;
  809. struct i915_page_directory_pointer *pdp;
  810. unsigned int pml4e;
  811. GEM_BUG_ON(!use_4lvl(vm));
  812. gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
  813. GEM_BUG_ON(pdp == vm->scratch_pdp);
  814. if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
  815. continue;
  816. gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
  817. free_pdp(vm, pdp);
  818. }
  819. }
  820. static inline struct sgt_dma {
  821. struct scatterlist *sg;
  822. dma_addr_t dma, max;
  823. } sgt_dma(struct i915_vma *vma) {
  824. struct scatterlist *sg = vma->pages->sgl;
  825. dma_addr_t addr = sg_dma_address(sg);
  826. return (struct sgt_dma) { sg, addr, addr + sg->length };
  827. }
  828. struct gen8_insert_pte {
  829. u16 pml4e;
  830. u16 pdpe;
  831. u16 pde;
  832. u16 pte;
  833. };
  834. static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
  835. {
  836. return (struct gen8_insert_pte) {
  837. gen8_pml4e_index(start),
  838. gen8_pdpe_index(start),
  839. gen8_pde_index(start),
  840. gen8_pte_index(start),
  841. };
  842. }
  843. static __always_inline bool
  844. gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
  845. struct i915_page_directory_pointer *pdp,
  846. struct sgt_dma *iter,
  847. struct gen8_insert_pte *idx,
  848. enum i915_cache_level cache_level,
  849. u32 flags)
  850. {
  851. struct i915_page_directory *pd;
  852. const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
  853. gen8_pte_t *vaddr;
  854. bool ret;
  855. GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
  856. pd = pdp->page_directory[idx->pdpe];
  857. vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
  858. do {
  859. vaddr[idx->pte] = pte_encode | iter->dma;
  860. iter->dma += I915_GTT_PAGE_SIZE;
  861. if (iter->dma >= iter->max) {
  862. iter->sg = __sg_next(iter->sg);
  863. if (!iter->sg) {
  864. ret = false;
  865. break;
  866. }
  867. iter->dma = sg_dma_address(iter->sg);
  868. iter->max = iter->dma + iter->sg->length;
  869. }
  870. if (++idx->pte == GEN8_PTES) {
  871. idx->pte = 0;
  872. if (++idx->pde == I915_PDES) {
  873. idx->pde = 0;
  874. /* Limited by sg length for 3lvl */
  875. if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
  876. idx->pdpe = 0;
  877. ret = true;
  878. break;
  879. }
  880. GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
  881. pd = pdp->page_directory[idx->pdpe];
  882. }
  883. kunmap_atomic(vaddr);
  884. vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
  885. }
  886. } while (1);
  887. kunmap_atomic(vaddr);
  888. return ret;
  889. }
  890. static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
  891. struct i915_vma *vma,
  892. enum i915_cache_level cache_level,
  893. u32 flags)
  894. {
  895. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  896. struct sgt_dma iter = sgt_dma(vma);
  897. struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
  898. gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
  899. cache_level, flags);
  900. vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
  901. }
  902. static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
  903. struct i915_page_directory_pointer **pdps,
  904. struct sgt_dma *iter,
  905. enum i915_cache_level cache_level,
  906. u32 flags)
  907. {
  908. const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
  909. u64 start = vma->node.start;
  910. dma_addr_t rem = iter->sg->length;
  911. do {
  912. struct gen8_insert_pte idx = gen8_insert_pte(start);
  913. struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
  914. struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
  915. unsigned int page_size;
  916. bool maybe_64K = false;
  917. gen8_pte_t encode = pte_encode;
  918. gen8_pte_t *vaddr;
  919. u16 index, max;
  920. if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
  921. IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
  922. rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
  923. index = idx.pde;
  924. max = I915_PDES;
  925. page_size = I915_GTT_PAGE_SIZE_2M;
  926. encode |= GEN8_PDE_PS_2M;
  927. vaddr = kmap_atomic_px(pd);
  928. } else {
  929. struct i915_page_table *pt = pd->page_table[idx.pde];
  930. index = idx.pte;
  931. max = GEN8_PTES;
  932. page_size = I915_GTT_PAGE_SIZE;
  933. if (!index &&
  934. vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
  935. IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
  936. (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
  937. rem >= (max - index) * I915_GTT_PAGE_SIZE))
  938. maybe_64K = true;
  939. vaddr = kmap_atomic_px(pt);
  940. }
  941. do {
  942. GEM_BUG_ON(iter->sg->length < page_size);
  943. vaddr[index++] = encode | iter->dma;
  944. start += page_size;
  945. iter->dma += page_size;
  946. rem -= page_size;
  947. if (iter->dma >= iter->max) {
  948. iter->sg = __sg_next(iter->sg);
  949. if (!iter->sg)
  950. break;
  951. rem = iter->sg->length;
  952. iter->dma = sg_dma_address(iter->sg);
  953. iter->max = iter->dma + rem;
  954. if (maybe_64K && index < max &&
  955. !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
  956. (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
  957. rem >= (max - index) * I915_GTT_PAGE_SIZE)))
  958. maybe_64K = false;
  959. if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
  960. break;
  961. }
  962. } while (rem >= page_size && index < max);
  963. kunmap_atomic(vaddr);
  964. /*
  965. * Is it safe to mark the 2M block as 64K? -- Either we have
  966. * filled whole page-table with 64K entries, or filled part of
  967. * it and have reached the end of the sg table and we have
  968. * enough padding.
  969. */
  970. if (maybe_64K &&
  971. (index == max ||
  972. (i915_vm_has_scratch_64K(vma->vm) &&
  973. !iter->sg && IS_ALIGNED(vma->node.start +
  974. vma->node.size,
  975. I915_GTT_PAGE_SIZE_2M)))) {
  976. vaddr = kmap_atomic_px(pd);
  977. vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
  978. kunmap_atomic(vaddr);
  979. page_size = I915_GTT_PAGE_SIZE_64K;
  980. /*
  981. * We write all 4K page entries, even when using 64K
  982. * pages. In order to verify that the HW isn't cheating
  983. * by using the 4K PTE instead of the 64K PTE, we want
  984. * to remove all the surplus entries. If the HW skipped
  985. * the 64K PTE, it will read/write into the scratch page
  986. * instead - which we detect as missing results during
  987. * selftests.
  988. */
  989. if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
  990. u16 i;
  991. encode = pte_encode | vma->vm->scratch_page.daddr;
  992. vaddr = kmap_atomic_px(pd->page_table[idx.pde]);
  993. for (i = 1; i < index; i += 16)
  994. memset64(vaddr + i, encode, 15);
  995. kunmap_atomic(vaddr);
  996. }
  997. }
  998. vma->page_sizes.gtt |= page_size;
  999. } while (iter->sg);
  1000. }
  1001. static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
  1002. struct i915_vma *vma,
  1003. enum i915_cache_level cache_level,
  1004. u32 flags)
  1005. {
  1006. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  1007. struct sgt_dma iter = sgt_dma(vma);
  1008. struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
  1009. if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
  1010. gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level,
  1011. flags);
  1012. } else {
  1013. struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
  1014. while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
  1015. &iter, &idx, cache_level,
  1016. flags))
  1017. GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
  1018. vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
  1019. }
  1020. }
  1021. static void gen8_free_page_tables(struct i915_address_space *vm,
  1022. struct i915_page_directory *pd)
  1023. {
  1024. int i;
  1025. for (i = 0; i < I915_PDES; i++) {
  1026. if (pd->page_table[i] != vm->scratch_pt)
  1027. free_pt(vm, pd->page_table[i]);
  1028. }
  1029. }
  1030. static int gen8_init_scratch(struct i915_address_space *vm)
  1031. {
  1032. int ret;
  1033. ret = setup_scratch_page(vm, __GFP_HIGHMEM);
  1034. if (ret)
  1035. return ret;
  1036. vm->scratch_pt = alloc_pt(vm);
  1037. if (IS_ERR(vm->scratch_pt)) {
  1038. ret = PTR_ERR(vm->scratch_pt);
  1039. goto free_scratch_page;
  1040. }
  1041. vm->scratch_pd = alloc_pd(vm);
  1042. if (IS_ERR(vm->scratch_pd)) {
  1043. ret = PTR_ERR(vm->scratch_pd);
  1044. goto free_pt;
  1045. }
  1046. if (use_4lvl(vm)) {
  1047. vm->scratch_pdp = alloc_pdp(vm);
  1048. if (IS_ERR(vm->scratch_pdp)) {
  1049. ret = PTR_ERR(vm->scratch_pdp);
  1050. goto free_pd;
  1051. }
  1052. }
  1053. gen8_initialize_pt(vm, vm->scratch_pt);
  1054. gen8_initialize_pd(vm, vm->scratch_pd);
  1055. if (use_4lvl(vm))
  1056. gen8_initialize_pdp(vm, vm->scratch_pdp);
  1057. return 0;
  1058. free_pd:
  1059. free_pd(vm, vm->scratch_pd);
  1060. free_pt:
  1061. free_pt(vm, vm->scratch_pt);
  1062. free_scratch_page:
  1063. cleanup_scratch_page(vm);
  1064. return ret;
  1065. }
  1066. static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
  1067. {
  1068. struct i915_address_space *vm = &ppgtt->vm;
  1069. struct drm_i915_private *dev_priv = vm->i915;
  1070. enum vgt_g2v_type msg;
  1071. int i;
  1072. if (use_4lvl(vm)) {
  1073. const u64 daddr = px_dma(&ppgtt->pml4);
  1074. I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
  1075. I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
  1076. msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
  1077. VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
  1078. } else {
  1079. for (i = 0; i < GEN8_3LVL_PDPES; i++) {
  1080. const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
  1081. I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
  1082. I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
  1083. }
  1084. msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
  1085. VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
  1086. }
  1087. I915_WRITE(vgtif_reg(g2v_notify), msg);
  1088. return 0;
  1089. }
  1090. static void gen8_free_scratch(struct i915_address_space *vm)
  1091. {
  1092. if (use_4lvl(vm))
  1093. free_pdp(vm, vm->scratch_pdp);
  1094. free_pd(vm, vm->scratch_pd);
  1095. free_pt(vm, vm->scratch_pt);
  1096. cleanup_scratch_page(vm);
  1097. }
  1098. static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
  1099. struct i915_page_directory_pointer *pdp)
  1100. {
  1101. const unsigned int pdpes = i915_pdpes_per_pdp(vm);
  1102. int i;
  1103. for (i = 0; i < pdpes; i++) {
  1104. if (pdp->page_directory[i] == vm->scratch_pd)
  1105. continue;
  1106. gen8_free_page_tables(vm, pdp->page_directory[i]);
  1107. free_pd(vm, pdp->page_directory[i]);
  1108. }
  1109. free_pdp(vm, pdp);
  1110. }
  1111. static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
  1112. {
  1113. int i;
  1114. for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
  1115. if (ppgtt->pml4.pdps[i] == ppgtt->vm.scratch_pdp)
  1116. continue;
  1117. gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pml4.pdps[i]);
  1118. }
  1119. cleanup_px(&ppgtt->vm, &ppgtt->pml4);
  1120. }
  1121. static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
  1122. {
  1123. struct drm_i915_private *dev_priv = vm->i915;
  1124. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  1125. if (intel_vgpu_active(dev_priv))
  1126. gen8_ppgtt_notify_vgt(ppgtt, false);
  1127. if (use_4lvl(vm))
  1128. gen8_ppgtt_cleanup_4lvl(ppgtt);
  1129. else
  1130. gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, &ppgtt->pdp);
  1131. gen8_free_scratch(vm);
  1132. }
  1133. static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
  1134. struct i915_page_directory *pd,
  1135. u64 start, u64 length)
  1136. {
  1137. struct i915_page_table *pt;
  1138. u64 from = start;
  1139. unsigned int pde;
  1140. gen8_for_each_pde(pt, pd, start, length, pde) {
  1141. int count = gen8_pte_count(start, length);
  1142. if (pt == vm->scratch_pt) {
  1143. pd->used_pdes++;
  1144. pt = alloc_pt(vm);
  1145. if (IS_ERR(pt)) {
  1146. pd->used_pdes--;
  1147. goto unwind;
  1148. }
  1149. if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
  1150. gen8_initialize_pt(vm, pt);
  1151. gen8_ppgtt_set_pde(vm, pd, pt, pde);
  1152. GEM_BUG_ON(pd->used_pdes > I915_PDES);
  1153. }
  1154. pt->used_ptes += count;
  1155. }
  1156. return 0;
  1157. unwind:
  1158. gen8_ppgtt_clear_pd(vm, pd, from, start - from);
  1159. return -ENOMEM;
  1160. }
  1161. static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
  1162. struct i915_page_directory_pointer *pdp,
  1163. u64 start, u64 length)
  1164. {
  1165. struct i915_page_directory *pd;
  1166. u64 from = start;
  1167. unsigned int pdpe;
  1168. int ret;
  1169. gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
  1170. if (pd == vm->scratch_pd) {
  1171. pdp->used_pdpes++;
  1172. pd = alloc_pd(vm);
  1173. if (IS_ERR(pd)) {
  1174. pdp->used_pdpes--;
  1175. goto unwind;
  1176. }
  1177. gen8_initialize_pd(vm, pd);
  1178. gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
  1179. GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
  1180. mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
  1181. }
  1182. ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
  1183. if (unlikely(ret))
  1184. goto unwind_pd;
  1185. }
  1186. return 0;
  1187. unwind_pd:
  1188. if (!pd->used_pdes) {
  1189. gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
  1190. GEM_BUG_ON(!pdp->used_pdpes);
  1191. pdp->used_pdpes--;
  1192. free_pd(vm, pd);
  1193. }
  1194. unwind:
  1195. gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
  1196. return -ENOMEM;
  1197. }
  1198. static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
  1199. u64 start, u64 length)
  1200. {
  1201. return gen8_ppgtt_alloc_pdp(vm,
  1202. &i915_vm_to_ppgtt(vm)->pdp, start, length);
  1203. }
  1204. static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
  1205. u64 start, u64 length)
  1206. {
  1207. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  1208. struct i915_pml4 *pml4 = &ppgtt->pml4;
  1209. struct i915_page_directory_pointer *pdp;
  1210. u64 from = start;
  1211. u32 pml4e;
  1212. int ret;
  1213. gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
  1214. if (pml4->pdps[pml4e] == vm->scratch_pdp) {
  1215. pdp = alloc_pdp(vm);
  1216. if (IS_ERR(pdp))
  1217. goto unwind;
  1218. gen8_initialize_pdp(vm, pdp);
  1219. gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
  1220. }
  1221. ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
  1222. if (unlikely(ret))
  1223. goto unwind_pdp;
  1224. }
  1225. return 0;
  1226. unwind_pdp:
  1227. if (!pdp->used_pdpes) {
  1228. gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
  1229. free_pdp(vm, pdp);
  1230. }
  1231. unwind:
  1232. gen8_ppgtt_clear_4lvl(vm, from, start - from);
  1233. return -ENOMEM;
  1234. }
  1235. static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
  1236. struct i915_page_directory_pointer *pdp,
  1237. u64 start, u64 length,
  1238. gen8_pte_t scratch_pte,
  1239. struct seq_file *m)
  1240. {
  1241. struct i915_address_space *vm = &ppgtt->vm;
  1242. struct i915_page_directory *pd;
  1243. u32 pdpe;
  1244. gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
  1245. struct i915_page_table *pt;
  1246. u64 pd_len = length;
  1247. u64 pd_start = start;
  1248. u32 pde;
  1249. if (pdp->page_directory[pdpe] == ppgtt->vm.scratch_pd)
  1250. continue;
  1251. seq_printf(m, "\tPDPE #%d\n", pdpe);
  1252. gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
  1253. u32 pte;
  1254. gen8_pte_t *pt_vaddr;
  1255. if (pd->page_table[pde] == ppgtt->vm.scratch_pt)
  1256. continue;
  1257. pt_vaddr = kmap_atomic_px(pt);
  1258. for (pte = 0; pte < GEN8_PTES; pte += 4) {
  1259. u64 va = (pdpe << GEN8_PDPE_SHIFT |
  1260. pde << GEN8_PDE_SHIFT |
  1261. pte << GEN8_PTE_SHIFT);
  1262. int i;
  1263. bool found = false;
  1264. for (i = 0; i < 4; i++)
  1265. if (pt_vaddr[pte + i] != scratch_pte)
  1266. found = true;
  1267. if (!found)
  1268. continue;
  1269. seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
  1270. for (i = 0; i < 4; i++) {
  1271. if (pt_vaddr[pte + i] != scratch_pte)
  1272. seq_printf(m, " %llx", pt_vaddr[pte + i]);
  1273. else
  1274. seq_puts(m, " SCRATCH ");
  1275. }
  1276. seq_puts(m, "\n");
  1277. }
  1278. kunmap_atomic(pt_vaddr);
  1279. }
  1280. }
  1281. }
  1282. static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
  1283. {
  1284. struct i915_address_space *vm = &ppgtt->vm;
  1285. const gen8_pte_t scratch_pte =
  1286. gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
  1287. u64 start = 0, length = ppgtt->vm.total;
  1288. if (use_4lvl(vm)) {
  1289. u64 pml4e;
  1290. struct i915_pml4 *pml4 = &ppgtt->pml4;
  1291. struct i915_page_directory_pointer *pdp;
  1292. gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
  1293. if (pml4->pdps[pml4e] == ppgtt->vm.scratch_pdp)
  1294. continue;
  1295. seq_printf(m, " PML4E #%llu\n", pml4e);
  1296. gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
  1297. }
  1298. } else {
  1299. gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
  1300. }
  1301. }
  1302. static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
  1303. {
  1304. struct i915_address_space *vm = &ppgtt->vm;
  1305. struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
  1306. struct i915_page_directory *pd;
  1307. u64 start = 0, length = ppgtt->vm.total;
  1308. u64 from = start;
  1309. unsigned int pdpe;
  1310. gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
  1311. pd = alloc_pd(vm);
  1312. if (IS_ERR(pd))
  1313. goto unwind;
  1314. gen8_initialize_pd(vm, pd);
  1315. gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
  1316. pdp->used_pdpes++;
  1317. }
  1318. pdp->used_pdpes++; /* never remove */
  1319. return 0;
  1320. unwind:
  1321. start -= from;
  1322. gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
  1323. gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
  1324. free_pd(vm, pd);
  1325. }
  1326. pdp->used_pdpes = 0;
  1327. return -ENOMEM;
  1328. }
  1329. /*
  1330. * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  1331. * with a net effect resembling a 2-level page table in normal x86 terms. Each
  1332. * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
  1333. * space.
  1334. *
  1335. */
  1336. static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
  1337. {
  1338. struct i915_hw_ppgtt *ppgtt;
  1339. int err;
  1340. ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
  1341. if (!ppgtt)
  1342. return ERR_PTR(-ENOMEM);
  1343. kref_init(&ppgtt->ref);
  1344. ppgtt->vm.i915 = i915;
  1345. ppgtt->vm.dma = &i915->drm.pdev->dev;
  1346. ppgtt->vm.total = USES_FULL_48BIT_PPGTT(i915) ?
  1347. 1ULL << 48 :
  1348. 1ULL << 32;
  1349. /*
  1350. * From bdw, there is support for read-only pages in the PPGTT.
  1351. *
  1352. * XXX GVT is not honouring the lack of RW in the PTE bits.
  1353. */
  1354. ppgtt->vm.has_read_only = !intel_vgpu_active(i915);
  1355. i915_address_space_init(&ppgtt->vm, i915);
  1356. /* There are only few exceptions for gen >=6. chv and bxt.
  1357. * And we are not sure about the latter so play safe for now.
  1358. */
  1359. if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915))
  1360. ppgtt->vm.pt_kmap_wc = true;
  1361. err = gen8_init_scratch(&ppgtt->vm);
  1362. if (err)
  1363. goto err_free;
  1364. if (use_4lvl(&ppgtt->vm)) {
  1365. err = setup_px(&ppgtt->vm, &ppgtt->pml4);
  1366. if (err)
  1367. goto err_scratch;
  1368. gen8_initialize_pml4(&ppgtt->vm, &ppgtt->pml4);
  1369. ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
  1370. ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
  1371. ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
  1372. } else {
  1373. err = __pdp_init(&ppgtt->vm, &ppgtt->pdp);
  1374. if (err)
  1375. goto err_scratch;
  1376. if (intel_vgpu_active(i915)) {
  1377. err = gen8_preallocate_top_level_pdp(ppgtt);
  1378. if (err) {
  1379. __pdp_fini(&ppgtt->pdp);
  1380. goto err_scratch;
  1381. }
  1382. }
  1383. ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
  1384. ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
  1385. ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
  1386. }
  1387. if (intel_vgpu_active(i915))
  1388. gen8_ppgtt_notify_vgt(ppgtt, true);
  1389. ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
  1390. ppgtt->debug_dump = gen8_dump_ppgtt;
  1391. ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma;
  1392. ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
  1393. ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages;
  1394. ppgtt->vm.vma_ops.clear_pages = clear_pages;
  1395. return ppgtt;
  1396. err_scratch:
  1397. gen8_free_scratch(&ppgtt->vm);
  1398. err_free:
  1399. kfree(ppgtt);
  1400. return ERR_PTR(err);
  1401. }
  1402. static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m)
  1403. {
  1404. struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
  1405. const gen6_pte_t scratch_pte = ppgtt->scratch_pte;
  1406. struct i915_page_table *pt;
  1407. u32 pte, pde;
  1408. gen6_for_all_pdes(pt, &base->pd, pde) {
  1409. gen6_pte_t *vaddr;
  1410. if (pt == base->vm.scratch_pt)
  1411. continue;
  1412. if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
  1413. u32 expected =
  1414. GEN6_PDE_ADDR_ENCODE(px_dma(pt)) |
  1415. GEN6_PDE_VALID;
  1416. u32 pd_entry = readl(ppgtt->pd_addr + pde);
  1417. if (pd_entry != expected)
  1418. seq_printf(m,
  1419. "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
  1420. pde,
  1421. pd_entry,
  1422. expected);
  1423. seq_printf(m, "\tPDE: %x\n", pd_entry);
  1424. }
  1425. vaddr = kmap_atomic_px(base->pd.page_table[pde]);
  1426. for (pte = 0; pte < GEN6_PTES; pte += 4) {
  1427. int i;
  1428. for (i = 0; i < 4; i++)
  1429. if (vaddr[pte + i] != scratch_pte)
  1430. break;
  1431. if (i == 4)
  1432. continue;
  1433. seq_printf(m, "\t\t(%03d, %04d) %08llx: ",
  1434. pde, pte,
  1435. (pde * GEN6_PTES + pte) * I915_GTT_PAGE_SIZE);
  1436. for (i = 0; i < 4; i++) {
  1437. if (vaddr[pte + i] != scratch_pte)
  1438. seq_printf(m, " %08x", vaddr[pte + i]);
  1439. else
  1440. seq_puts(m, " SCRATCH");
  1441. }
  1442. seq_puts(m, "\n");
  1443. }
  1444. kunmap_atomic(vaddr);
  1445. }
  1446. }
  1447. /* Write pde (index) from the page directory @pd to the page table @pt */
  1448. static inline void gen6_write_pde(const struct gen6_hw_ppgtt *ppgtt,
  1449. const unsigned int pde,
  1450. const struct i915_page_table *pt)
  1451. {
  1452. /* Caller needs to make sure the write completes if necessary */
  1453. iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
  1454. ppgtt->pd_addr + pde);
  1455. }
  1456. static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv)
  1457. {
  1458. struct intel_engine_cs *engine;
  1459. enum intel_engine_id id;
  1460. for_each_engine(engine, dev_priv, id) {
  1461. u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ?
  1462. GEN8_GFX_PPGTT_48B : 0;
  1463. I915_WRITE(RING_MODE_GEN7(engine),
  1464. _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
  1465. }
  1466. }
  1467. static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
  1468. {
  1469. struct intel_engine_cs *engine;
  1470. u32 ecochk, ecobits;
  1471. enum intel_engine_id id;
  1472. ecobits = I915_READ(GAC_ECO_BITS);
  1473. I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
  1474. ecochk = I915_READ(GAM_ECOCHK);
  1475. if (IS_HASWELL(dev_priv)) {
  1476. ecochk |= ECOCHK_PPGTT_WB_HSW;
  1477. } else {
  1478. ecochk |= ECOCHK_PPGTT_LLC_IVB;
  1479. ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
  1480. }
  1481. I915_WRITE(GAM_ECOCHK, ecochk);
  1482. for_each_engine(engine, dev_priv, id) {
  1483. /* GFX_MODE is per-ring on gen7+ */
  1484. I915_WRITE(RING_MODE_GEN7(engine),
  1485. _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  1486. }
  1487. }
  1488. static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
  1489. {
  1490. u32 ecochk, gab_ctl, ecobits;
  1491. ecobits = I915_READ(GAC_ECO_BITS);
  1492. I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
  1493. ECOBITS_PPGTT_CACHE64B);
  1494. gab_ctl = I915_READ(GAB_CTL);
  1495. I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
  1496. ecochk = I915_READ(GAM_ECOCHK);
  1497. I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
  1498. I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  1499. }
  1500. /* PPGTT support for Sandybdrige/Gen6 and later */
  1501. static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
  1502. u64 start, u64 length)
  1503. {
  1504. struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
  1505. unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
  1506. unsigned int pde = first_entry / GEN6_PTES;
  1507. unsigned int pte = first_entry % GEN6_PTES;
  1508. unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
  1509. const gen6_pte_t scratch_pte = ppgtt->scratch_pte;
  1510. while (num_entries) {
  1511. struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++];
  1512. const unsigned int end = min(pte + num_entries, GEN6_PTES);
  1513. const unsigned int count = end - pte;
  1514. gen6_pte_t *vaddr;
  1515. GEM_BUG_ON(pt == vm->scratch_pt);
  1516. num_entries -= count;
  1517. GEM_BUG_ON(count > pt->used_ptes);
  1518. pt->used_ptes -= count;
  1519. if (!pt->used_ptes)
  1520. ppgtt->scan_for_unused_pt = true;
  1521. /*
  1522. * Note that the hw doesn't support removing PDE on the fly
  1523. * (they are cached inside the context with no means to
  1524. * invalidate the cache), so we can only reset the PTE
  1525. * entries back to scratch.
  1526. */
  1527. vaddr = kmap_atomic_px(pt);
  1528. do {
  1529. vaddr[pte++] = scratch_pte;
  1530. } while (pte < end);
  1531. kunmap_atomic(vaddr);
  1532. pte = 0;
  1533. }
  1534. }
  1535. static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
  1536. struct i915_vma *vma,
  1537. enum i915_cache_level cache_level,
  1538. u32 flags)
  1539. {
  1540. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  1541. unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
  1542. unsigned act_pt = first_entry / GEN6_PTES;
  1543. unsigned act_pte = first_entry % GEN6_PTES;
  1544. const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
  1545. struct sgt_dma iter = sgt_dma(vma);
  1546. gen6_pte_t *vaddr;
  1547. GEM_BUG_ON(ppgtt->pd.page_table[act_pt] == vm->scratch_pt);
  1548. vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
  1549. do {
  1550. vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
  1551. iter.dma += I915_GTT_PAGE_SIZE;
  1552. if (iter.dma == iter.max) {
  1553. iter.sg = __sg_next(iter.sg);
  1554. if (!iter.sg)
  1555. break;
  1556. iter.dma = sg_dma_address(iter.sg);
  1557. iter.max = iter.dma + iter.sg->length;
  1558. }
  1559. if (++act_pte == GEN6_PTES) {
  1560. kunmap_atomic(vaddr);
  1561. vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
  1562. act_pte = 0;
  1563. }
  1564. } while (1);
  1565. kunmap_atomic(vaddr);
  1566. vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
  1567. }
  1568. static int gen6_alloc_va_range(struct i915_address_space *vm,
  1569. u64 start, u64 length)
  1570. {
  1571. struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
  1572. struct i915_page_table *pt;
  1573. u64 from = start;
  1574. unsigned int pde;
  1575. bool flush = false;
  1576. gen6_for_each_pde(pt, &ppgtt->base.pd, start, length, pde) {
  1577. const unsigned int count = gen6_pte_count(start, length);
  1578. if (pt == vm->scratch_pt) {
  1579. pt = alloc_pt(vm);
  1580. if (IS_ERR(pt))
  1581. goto unwind_out;
  1582. gen6_initialize_pt(ppgtt, pt);
  1583. ppgtt->base.pd.page_table[pde] = pt;
  1584. if (i915_vma_is_bound(ppgtt->vma,
  1585. I915_VMA_GLOBAL_BIND)) {
  1586. gen6_write_pde(ppgtt, pde, pt);
  1587. flush = true;
  1588. }
  1589. GEM_BUG_ON(pt->used_ptes);
  1590. }
  1591. pt->used_ptes += count;
  1592. }
  1593. if (flush) {
  1594. mark_tlbs_dirty(&ppgtt->base);
  1595. gen6_ggtt_invalidate(ppgtt->base.vm.i915);
  1596. }
  1597. return 0;
  1598. unwind_out:
  1599. gen6_ppgtt_clear_range(vm, from, start - from);
  1600. return -ENOMEM;
  1601. }
  1602. static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt)
  1603. {
  1604. struct i915_address_space * const vm = &ppgtt->base.vm;
  1605. struct i915_page_table *unused;
  1606. u32 pde;
  1607. int ret;
  1608. ret = setup_scratch_page(vm, __GFP_HIGHMEM);
  1609. if (ret)
  1610. return ret;
  1611. ppgtt->scratch_pte =
  1612. vm->pte_encode(vm->scratch_page.daddr,
  1613. I915_CACHE_NONE, PTE_READ_ONLY);
  1614. vm->scratch_pt = alloc_pt(vm);
  1615. if (IS_ERR(vm->scratch_pt)) {
  1616. cleanup_scratch_page(vm);
  1617. return PTR_ERR(vm->scratch_pt);
  1618. }
  1619. gen6_initialize_pt(ppgtt, vm->scratch_pt);
  1620. gen6_for_all_pdes(unused, &ppgtt->base.pd, pde)
  1621. ppgtt->base.pd.page_table[pde] = vm->scratch_pt;
  1622. return 0;
  1623. }
  1624. static void gen6_ppgtt_free_scratch(struct i915_address_space *vm)
  1625. {
  1626. free_pt(vm, vm->scratch_pt);
  1627. cleanup_scratch_page(vm);
  1628. }
  1629. static void gen6_ppgtt_free_pd(struct gen6_hw_ppgtt *ppgtt)
  1630. {
  1631. struct i915_page_table *pt;
  1632. u32 pde;
  1633. gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
  1634. if (pt != ppgtt->base.vm.scratch_pt)
  1635. free_pt(&ppgtt->base.vm, pt);
  1636. }
  1637. static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
  1638. {
  1639. struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
  1640. i915_vma_destroy(ppgtt->vma);
  1641. gen6_ppgtt_free_pd(ppgtt);
  1642. gen6_ppgtt_free_scratch(vm);
  1643. }
  1644. static int pd_vma_set_pages(struct i915_vma *vma)
  1645. {
  1646. vma->pages = ERR_PTR(-ENODEV);
  1647. return 0;
  1648. }
  1649. static void pd_vma_clear_pages(struct i915_vma *vma)
  1650. {
  1651. GEM_BUG_ON(!vma->pages);
  1652. vma->pages = NULL;
  1653. }
  1654. static int pd_vma_bind(struct i915_vma *vma,
  1655. enum i915_cache_level cache_level,
  1656. u32 unused)
  1657. {
  1658. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
  1659. struct gen6_hw_ppgtt *ppgtt = vma->private;
  1660. u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
  1661. struct i915_page_table *pt;
  1662. unsigned int pde;
  1663. ppgtt->base.pd.base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
  1664. ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
  1665. gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
  1666. gen6_write_pde(ppgtt, pde, pt);
  1667. mark_tlbs_dirty(&ppgtt->base);
  1668. gen6_ggtt_invalidate(ppgtt->base.vm.i915);
  1669. return 0;
  1670. }
  1671. static void pd_vma_unbind(struct i915_vma *vma)
  1672. {
  1673. struct gen6_hw_ppgtt *ppgtt = vma->private;
  1674. struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt;
  1675. struct i915_page_table *pt;
  1676. unsigned int pde;
  1677. if (!ppgtt->scan_for_unused_pt)
  1678. return;
  1679. /* Free all no longer used page tables */
  1680. gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) {
  1681. if (pt->used_ptes || pt == scratch_pt)
  1682. continue;
  1683. free_pt(&ppgtt->base.vm, pt);
  1684. ppgtt->base.pd.page_table[pde] = scratch_pt;
  1685. }
  1686. ppgtt->scan_for_unused_pt = false;
  1687. }
  1688. static const struct i915_vma_ops pd_vma_ops = {
  1689. .set_pages = pd_vma_set_pages,
  1690. .clear_pages = pd_vma_clear_pages,
  1691. .bind_vma = pd_vma_bind,
  1692. .unbind_vma = pd_vma_unbind,
  1693. };
  1694. static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
  1695. {
  1696. struct drm_i915_private *i915 = ppgtt->base.vm.i915;
  1697. struct i915_ggtt *ggtt = &i915->ggtt;
  1698. struct i915_vma *vma;
  1699. GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
  1700. GEM_BUG_ON(size > ggtt->vm.total);
  1701. vma = kmem_cache_zalloc(i915->vmas, GFP_KERNEL);
  1702. if (!vma)
  1703. return ERR_PTR(-ENOMEM);
  1704. init_request_active(&vma->last_fence, NULL);
  1705. vma->vm = &ggtt->vm;
  1706. vma->ops = &pd_vma_ops;
  1707. vma->private = ppgtt;
  1708. vma->active = RB_ROOT;
  1709. vma->size = size;
  1710. vma->fence_size = size;
  1711. vma->flags = I915_VMA_GGTT;
  1712. vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
  1713. INIT_LIST_HEAD(&vma->obj_link);
  1714. list_add(&vma->vm_link, &vma->vm->unbound_list);
  1715. return vma;
  1716. }
  1717. int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
  1718. {
  1719. struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
  1720. /*
  1721. * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
  1722. * which will be pinned into every active context.
  1723. * (When vma->pin_count becomes atomic, I expect we will naturally
  1724. * need a larger, unpacked, type and kill this redundancy.)
  1725. */
  1726. if (ppgtt->pin_count++)
  1727. return 0;
  1728. /*
  1729. * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
  1730. * allocator works in address space sizes, so it's multiplied by page
  1731. * size. We allocate at the top of the GTT to avoid fragmentation.
  1732. */
  1733. return i915_vma_pin(ppgtt->vma,
  1734. 0, GEN6_PD_ALIGN,
  1735. PIN_GLOBAL | PIN_HIGH);
  1736. }
  1737. void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base)
  1738. {
  1739. struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
  1740. GEM_BUG_ON(!ppgtt->pin_count);
  1741. if (--ppgtt->pin_count)
  1742. return;
  1743. i915_vma_unpin(ppgtt->vma);
  1744. }
  1745. static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
  1746. {
  1747. struct i915_ggtt * const ggtt = &i915->ggtt;
  1748. struct gen6_hw_ppgtt *ppgtt;
  1749. int err;
  1750. ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
  1751. if (!ppgtt)
  1752. return ERR_PTR(-ENOMEM);
  1753. kref_init(&ppgtt->base.ref);
  1754. ppgtt->base.vm.i915 = i915;
  1755. ppgtt->base.vm.dma = &i915->drm.pdev->dev;
  1756. ppgtt->base.vm.total = I915_PDES * GEN6_PTES * I915_GTT_PAGE_SIZE;
  1757. i915_address_space_init(&ppgtt->base.vm, i915);
  1758. ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
  1759. ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
  1760. ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
  1761. ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
  1762. ppgtt->base.debug_dump = gen6_dump_ppgtt;
  1763. ppgtt->base.vm.vma_ops.bind_vma = ppgtt_bind_vma;
  1764. ppgtt->base.vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
  1765. ppgtt->base.vm.vma_ops.set_pages = ppgtt_set_pages;
  1766. ppgtt->base.vm.vma_ops.clear_pages = clear_pages;
  1767. ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
  1768. err = gen6_ppgtt_init_scratch(ppgtt);
  1769. if (err)
  1770. goto err_free;
  1771. ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
  1772. if (IS_ERR(ppgtt->vma)) {
  1773. err = PTR_ERR(ppgtt->vma);
  1774. goto err_scratch;
  1775. }
  1776. return &ppgtt->base;
  1777. err_scratch:
  1778. gen6_ppgtt_free_scratch(&ppgtt->base.vm);
  1779. err_free:
  1780. kfree(ppgtt);
  1781. return ERR_PTR(err);
  1782. }
  1783. static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
  1784. {
  1785. /* This function is for gtt related workarounds. This function is
  1786. * called on driver load and after a GPU reset, so you can place
  1787. * workarounds here even if they get overwritten by GPU reset.
  1788. */
  1789. /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
  1790. if (IS_BROADWELL(dev_priv))
  1791. I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
  1792. else if (IS_CHERRYVIEW(dev_priv))
  1793. I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
  1794. else if (IS_GEN9_LP(dev_priv))
  1795. I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
  1796. else if (INTEL_GEN(dev_priv) >= 9)
  1797. I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
  1798. /*
  1799. * To support 64K PTEs we need to first enable the use of the
  1800. * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
  1801. * mmio, otherwise the page-walker will simply ignore the IPS bit. This
  1802. * shouldn't be needed after GEN10.
  1803. *
  1804. * 64K pages were first introduced from BDW+, although technically they
  1805. * only *work* from gen9+. For pre-BDW we instead have the option for
  1806. * 32K pages, but we don't currently have any support for it in our
  1807. * driver.
  1808. */
  1809. if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
  1810. INTEL_GEN(dev_priv) <= 10)
  1811. I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
  1812. I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
  1813. GAMW_ECO_ENABLE_64K_IPS_FIELD);
  1814. }
  1815. int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
  1816. {
  1817. gtt_write_workarounds(dev_priv);
  1818. /* In the case of execlists, PPGTT is enabled by the context descriptor
  1819. * and the PDPs are contained within the context itself. We don't
  1820. * need to do anything here. */
  1821. if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
  1822. return 0;
  1823. if (!USES_PPGTT(dev_priv))
  1824. return 0;
  1825. if (IS_GEN6(dev_priv))
  1826. gen6_ppgtt_enable(dev_priv);
  1827. else if (IS_GEN7(dev_priv))
  1828. gen7_ppgtt_enable(dev_priv);
  1829. else if (INTEL_GEN(dev_priv) >= 8)
  1830. gen8_ppgtt_enable(dev_priv);
  1831. else
  1832. MISSING_CASE(INTEL_GEN(dev_priv));
  1833. return 0;
  1834. }
  1835. static struct i915_hw_ppgtt *
  1836. __hw_ppgtt_create(struct drm_i915_private *i915)
  1837. {
  1838. if (INTEL_GEN(i915) < 8)
  1839. return gen6_ppgtt_create(i915);
  1840. else
  1841. return gen8_ppgtt_create(i915);
  1842. }
  1843. struct i915_hw_ppgtt *
  1844. i915_ppgtt_create(struct drm_i915_private *i915,
  1845. struct drm_i915_file_private *fpriv)
  1846. {
  1847. struct i915_hw_ppgtt *ppgtt;
  1848. ppgtt = __hw_ppgtt_create(i915);
  1849. if (IS_ERR(ppgtt))
  1850. return ppgtt;
  1851. ppgtt->vm.file = fpriv;
  1852. trace_i915_ppgtt_create(&ppgtt->vm);
  1853. return ppgtt;
  1854. }
  1855. void i915_ppgtt_close(struct i915_address_space *vm)
  1856. {
  1857. GEM_BUG_ON(vm->closed);
  1858. vm->closed = true;
  1859. }
  1860. static void ppgtt_destroy_vma(struct i915_address_space *vm)
  1861. {
  1862. struct list_head *phases[] = {
  1863. &vm->active_list,
  1864. &vm->inactive_list,
  1865. &vm->unbound_list,
  1866. NULL,
  1867. }, **phase;
  1868. vm->closed = true;
  1869. for (phase = phases; *phase; phase++) {
  1870. struct i915_vma *vma, *vn;
  1871. list_for_each_entry_safe(vma, vn, *phase, vm_link)
  1872. i915_vma_destroy(vma);
  1873. }
  1874. }
  1875. void i915_ppgtt_release(struct kref *kref)
  1876. {
  1877. struct i915_hw_ppgtt *ppgtt =
  1878. container_of(kref, struct i915_hw_ppgtt, ref);
  1879. trace_i915_ppgtt_release(&ppgtt->vm);
  1880. ppgtt_destroy_vma(&ppgtt->vm);
  1881. GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list));
  1882. GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list));
  1883. GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
  1884. ppgtt->vm.cleanup(&ppgtt->vm);
  1885. i915_address_space_fini(&ppgtt->vm);
  1886. kfree(ppgtt);
  1887. }
  1888. /* Certain Gen5 chipsets require require idling the GPU before
  1889. * unmapping anything from the GTT when VT-d is enabled.
  1890. */
  1891. static bool needs_idle_maps(struct drm_i915_private *dev_priv)
  1892. {
  1893. /* Query intel_iommu to see if we need the workaround. Presumably that
  1894. * was loaded first.
  1895. */
  1896. return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active();
  1897. }
  1898. static void gen6_check_faults(struct drm_i915_private *dev_priv)
  1899. {
  1900. struct intel_engine_cs *engine;
  1901. enum intel_engine_id id;
  1902. u32 fault;
  1903. for_each_engine(engine, dev_priv, id) {
  1904. fault = I915_READ(RING_FAULT_REG(engine));
  1905. if (fault & RING_FAULT_VALID) {
  1906. DRM_DEBUG_DRIVER("Unexpected fault\n"
  1907. "\tAddr: 0x%08lx\n"
  1908. "\tAddress space: %s\n"
  1909. "\tSource ID: %d\n"
  1910. "\tType: %d\n",
  1911. fault & PAGE_MASK,
  1912. fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
  1913. RING_FAULT_SRCID(fault),
  1914. RING_FAULT_FAULT_TYPE(fault));
  1915. }
  1916. }
  1917. }
  1918. static void gen8_check_faults(struct drm_i915_private *dev_priv)
  1919. {
  1920. u32 fault = I915_READ(GEN8_RING_FAULT_REG);
  1921. if (fault & RING_FAULT_VALID) {
  1922. u32 fault_data0, fault_data1;
  1923. u64 fault_addr;
  1924. fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
  1925. fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
  1926. fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
  1927. ((u64)fault_data0 << 12);
  1928. DRM_DEBUG_DRIVER("Unexpected fault\n"
  1929. "\tAddr: 0x%08x_%08x\n"
  1930. "\tAddress space: %s\n"
  1931. "\tEngine ID: %d\n"
  1932. "\tSource ID: %d\n"
  1933. "\tType: %d\n",
  1934. upper_32_bits(fault_addr),
  1935. lower_32_bits(fault_addr),
  1936. fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
  1937. GEN8_RING_FAULT_ENGINE_ID(fault),
  1938. RING_FAULT_SRCID(fault),
  1939. RING_FAULT_FAULT_TYPE(fault));
  1940. }
  1941. }
  1942. void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
  1943. {
  1944. /* From GEN8 onwards we only have one 'All Engine Fault Register' */
  1945. if (INTEL_GEN(dev_priv) >= 8)
  1946. gen8_check_faults(dev_priv);
  1947. else if (INTEL_GEN(dev_priv) >= 6)
  1948. gen6_check_faults(dev_priv);
  1949. else
  1950. return;
  1951. i915_clear_error_registers(dev_priv);
  1952. }
  1953. void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
  1954. {
  1955. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  1956. /* Don't bother messing with faults pre GEN6 as we have little
  1957. * documentation supporting that it's a good idea.
  1958. */
  1959. if (INTEL_GEN(dev_priv) < 6)
  1960. return;
  1961. i915_check_and_clear_faults(dev_priv);
  1962. ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
  1963. i915_ggtt_invalidate(dev_priv);
  1964. }
  1965. int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
  1966. struct sg_table *pages)
  1967. {
  1968. do {
  1969. if (dma_map_sg_attrs(&obj->base.dev->pdev->dev,
  1970. pages->sgl, pages->nents,
  1971. PCI_DMA_BIDIRECTIONAL,
  1972. DMA_ATTR_NO_WARN))
  1973. return 0;
  1974. /* If the DMA remap fails, one cause can be that we have
  1975. * too many objects pinned in a small remapping table,
  1976. * such as swiotlb. Incrementally purge all other objects and
  1977. * try again - if there are no more pages to remove from
  1978. * the DMA remapper, i915_gem_shrink will return 0.
  1979. */
  1980. GEM_BUG_ON(obj->mm.pages == pages);
  1981. } while (i915_gem_shrink(to_i915(obj->base.dev),
  1982. obj->base.size >> PAGE_SHIFT, NULL,
  1983. I915_SHRINK_BOUND |
  1984. I915_SHRINK_UNBOUND |
  1985. I915_SHRINK_ACTIVE));
  1986. return -ENOSPC;
  1987. }
  1988. static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
  1989. {
  1990. writeq(pte, addr);
  1991. }
  1992. static void gen8_ggtt_insert_page(struct i915_address_space *vm,
  1993. dma_addr_t addr,
  1994. u64 offset,
  1995. enum i915_cache_level level,
  1996. u32 unused)
  1997. {
  1998. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  1999. gen8_pte_t __iomem *pte =
  2000. (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
  2001. gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
  2002. ggtt->invalidate(vm->i915);
  2003. }
  2004. static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
  2005. struct i915_vma *vma,
  2006. enum i915_cache_level level,
  2007. u32 flags)
  2008. {
  2009. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  2010. struct sgt_iter sgt_iter;
  2011. gen8_pte_t __iomem *gtt_entries;
  2012. const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
  2013. dma_addr_t addr;
  2014. /*
  2015. * Note that we ignore PTE_READ_ONLY here. The caller must be careful
  2016. * not to allow the user to override access to a read only page.
  2017. */
  2018. gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
  2019. gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
  2020. for_each_sgt_dma(addr, sgt_iter, vma->pages)
  2021. gen8_set_pte(gtt_entries++, pte_encode | addr);
  2022. /*
  2023. * We want to flush the TLBs only after we're certain all the PTE
  2024. * updates have finished.
  2025. */
  2026. ggtt->invalidate(vm->i915);
  2027. }
  2028. static void gen6_ggtt_insert_page(struct i915_address_space *vm,
  2029. dma_addr_t addr,
  2030. u64 offset,
  2031. enum i915_cache_level level,
  2032. u32 flags)
  2033. {
  2034. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  2035. gen6_pte_t __iomem *pte =
  2036. (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
  2037. iowrite32(vm->pte_encode(addr, level, flags), pte);
  2038. ggtt->invalidate(vm->i915);
  2039. }
  2040. /*
  2041. * Binds an object into the global gtt with the specified cache level. The object
  2042. * will be accessible to the GPU via commands whose operands reference offsets
  2043. * within the global GTT as well as accessible by the GPU through the GMADR
  2044. * mapped BAR (dev_priv->mm.gtt->gtt).
  2045. */
  2046. static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
  2047. struct i915_vma *vma,
  2048. enum i915_cache_level level,
  2049. u32 flags)
  2050. {
  2051. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  2052. gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
  2053. unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
  2054. struct sgt_iter iter;
  2055. dma_addr_t addr;
  2056. for_each_sgt_dma(addr, iter, vma->pages)
  2057. iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
  2058. /*
  2059. * We want to flush the TLBs only after we're certain all the PTE
  2060. * updates have finished.
  2061. */
  2062. ggtt->invalidate(vm->i915);
  2063. }
  2064. static void nop_clear_range(struct i915_address_space *vm,
  2065. u64 start, u64 length)
  2066. {
  2067. }
  2068. static void gen8_ggtt_clear_range(struct i915_address_space *vm,
  2069. u64 start, u64 length)
  2070. {
  2071. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  2072. unsigned first_entry = start / I915_GTT_PAGE_SIZE;
  2073. unsigned num_entries = length / I915_GTT_PAGE_SIZE;
  2074. const gen8_pte_t scratch_pte =
  2075. gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
  2076. gen8_pte_t __iomem *gtt_base =
  2077. (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
  2078. const int max_entries = ggtt_total_entries(ggtt) - first_entry;
  2079. int i;
  2080. if (WARN(num_entries > max_entries,
  2081. "First entry = %d; Num entries = %d (max=%d)\n",
  2082. first_entry, num_entries, max_entries))
  2083. num_entries = max_entries;
  2084. for (i = 0; i < num_entries; i++)
  2085. gen8_set_pte(&gtt_base[i], scratch_pte);
  2086. }
  2087. static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
  2088. {
  2089. struct drm_i915_private *dev_priv = vm->i915;
  2090. /*
  2091. * Make sure the internal GAM fifo has been cleared of all GTT
  2092. * writes before exiting stop_machine(). This guarantees that
  2093. * any aperture accesses waiting to start in another process
  2094. * cannot back up behind the GTT writes causing a hang.
  2095. * The register can be any arbitrary GAM register.
  2096. */
  2097. POSTING_READ(GFX_FLSH_CNTL_GEN6);
  2098. }
  2099. struct insert_page {
  2100. struct i915_address_space *vm;
  2101. dma_addr_t addr;
  2102. u64 offset;
  2103. enum i915_cache_level level;
  2104. };
  2105. static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
  2106. {
  2107. struct insert_page *arg = _arg;
  2108. gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
  2109. bxt_vtd_ggtt_wa(arg->vm);
  2110. return 0;
  2111. }
  2112. static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
  2113. dma_addr_t addr,
  2114. u64 offset,
  2115. enum i915_cache_level level,
  2116. u32 unused)
  2117. {
  2118. struct insert_page arg = { vm, addr, offset, level };
  2119. stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
  2120. }
  2121. struct insert_entries {
  2122. struct i915_address_space *vm;
  2123. struct i915_vma *vma;
  2124. enum i915_cache_level level;
  2125. u32 flags;
  2126. };
  2127. static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
  2128. {
  2129. struct insert_entries *arg = _arg;
  2130. gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
  2131. bxt_vtd_ggtt_wa(arg->vm);
  2132. return 0;
  2133. }
  2134. static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
  2135. struct i915_vma *vma,
  2136. enum i915_cache_level level,
  2137. u32 flags)
  2138. {
  2139. struct insert_entries arg = { vm, vma, level, flags };
  2140. stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
  2141. }
  2142. struct clear_range {
  2143. struct i915_address_space *vm;
  2144. u64 start;
  2145. u64 length;
  2146. };
  2147. static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
  2148. {
  2149. struct clear_range *arg = _arg;
  2150. gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
  2151. bxt_vtd_ggtt_wa(arg->vm);
  2152. return 0;
  2153. }
  2154. static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
  2155. u64 start,
  2156. u64 length)
  2157. {
  2158. struct clear_range arg = { vm, start, length };
  2159. stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
  2160. }
  2161. static void gen6_ggtt_clear_range(struct i915_address_space *vm,
  2162. u64 start, u64 length)
  2163. {
  2164. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  2165. unsigned first_entry = start / I915_GTT_PAGE_SIZE;
  2166. unsigned num_entries = length / I915_GTT_PAGE_SIZE;
  2167. gen6_pte_t scratch_pte, __iomem *gtt_base =
  2168. (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
  2169. const int max_entries = ggtt_total_entries(ggtt) - first_entry;
  2170. int i;
  2171. if (WARN(num_entries > max_entries,
  2172. "First entry = %d; Num entries = %d (max=%d)\n",
  2173. first_entry, num_entries, max_entries))
  2174. num_entries = max_entries;
  2175. scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
  2176. I915_CACHE_LLC, 0);
  2177. for (i = 0; i < num_entries; i++)
  2178. iowrite32(scratch_pte, &gtt_base[i]);
  2179. }
  2180. static void i915_ggtt_insert_page(struct i915_address_space *vm,
  2181. dma_addr_t addr,
  2182. u64 offset,
  2183. enum i915_cache_level cache_level,
  2184. u32 unused)
  2185. {
  2186. unsigned int flags = (cache_level == I915_CACHE_NONE) ?
  2187. AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
  2188. intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
  2189. }
  2190. static void i915_ggtt_insert_entries(struct i915_address_space *vm,
  2191. struct i915_vma *vma,
  2192. enum i915_cache_level cache_level,
  2193. u32 unused)
  2194. {
  2195. unsigned int flags = (cache_level == I915_CACHE_NONE) ?
  2196. AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
  2197. intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
  2198. flags);
  2199. }
  2200. static void i915_ggtt_clear_range(struct i915_address_space *vm,
  2201. u64 start, u64 length)
  2202. {
  2203. intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
  2204. }
  2205. static int ggtt_bind_vma(struct i915_vma *vma,
  2206. enum i915_cache_level cache_level,
  2207. u32 flags)
  2208. {
  2209. struct drm_i915_private *i915 = vma->vm->i915;
  2210. struct drm_i915_gem_object *obj = vma->obj;
  2211. u32 pte_flags;
  2212. /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
  2213. pte_flags = 0;
  2214. if (i915_gem_object_is_readonly(obj))
  2215. pte_flags |= PTE_READ_ONLY;
  2216. intel_runtime_pm_get(i915);
  2217. vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
  2218. intel_runtime_pm_put(i915);
  2219. vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
  2220. /*
  2221. * Without aliasing PPGTT there's no difference between
  2222. * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
  2223. * upgrade to both bound if we bind either to avoid double-binding.
  2224. */
  2225. vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
  2226. return 0;
  2227. }
  2228. static void ggtt_unbind_vma(struct i915_vma *vma)
  2229. {
  2230. struct drm_i915_private *i915 = vma->vm->i915;
  2231. intel_runtime_pm_get(i915);
  2232. vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
  2233. intel_runtime_pm_put(i915);
  2234. }
  2235. static int aliasing_gtt_bind_vma(struct i915_vma *vma,
  2236. enum i915_cache_level cache_level,
  2237. u32 flags)
  2238. {
  2239. struct drm_i915_private *i915 = vma->vm->i915;
  2240. u32 pte_flags;
  2241. int ret;
  2242. /* Currently applicable only to VLV */
  2243. pte_flags = 0;
  2244. if (i915_gem_object_is_readonly(vma->obj))
  2245. pte_flags |= PTE_READ_ONLY;
  2246. if (flags & I915_VMA_LOCAL_BIND) {
  2247. struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
  2248. if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
  2249. ret = appgtt->vm.allocate_va_range(&appgtt->vm,
  2250. vma->node.start,
  2251. vma->size);
  2252. if (ret)
  2253. return ret;
  2254. }
  2255. appgtt->vm.insert_entries(&appgtt->vm, vma, cache_level,
  2256. pte_flags);
  2257. }
  2258. if (flags & I915_VMA_GLOBAL_BIND) {
  2259. intel_runtime_pm_get(i915);
  2260. vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
  2261. intel_runtime_pm_put(i915);
  2262. }
  2263. return 0;
  2264. }
  2265. static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
  2266. {
  2267. struct drm_i915_private *i915 = vma->vm->i915;
  2268. if (vma->flags & I915_VMA_GLOBAL_BIND) {
  2269. intel_runtime_pm_get(i915);
  2270. vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
  2271. intel_runtime_pm_put(i915);
  2272. }
  2273. if (vma->flags & I915_VMA_LOCAL_BIND) {
  2274. struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->vm;
  2275. vm->clear_range(vm, vma->node.start, vma->size);
  2276. }
  2277. }
  2278. void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
  2279. struct sg_table *pages)
  2280. {
  2281. struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
  2282. struct device *kdev = &dev_priv->drm.pdev->dev;
  2283. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  2284. if (unlikely(ggtt->do_idle_maps)) {
  2285. if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) {
  2286. DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
  2287. /* Wait a bit, in hopes it avoids the hang */
  2288. udelay(10);
  2289. }
  2290. }
  2291. dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
  2292. }
  2293. static int ggtt_set_pages(struct i915_vma *vma)
  2294. {
  2295. int ret;
  2296. GEM_BUG_ON(vma->pages);
  2297. ret = i915_get_ggtt_vma_pages(vma);
  2298. if (ret)
  2299. return ret;
  2300. vma->page_sizes = vma->obj->mm.page_sizes;
  2301. return 0;
  2302. }
  2303. static void i915_gtt_color_adjust(const struct drm_mm_node *node,
  2304. unsigned long color,
  2305. u64 *start,
  2306. u64 *end)
  2307. {
  2308. if (node->allocated && node->color != color)
  2309. *start += I915_GTT_PAGE_SIZE;
  2310. /* Also leave a space between the unallocated reserved node after the
  2311. * GTT and any objects within the GTT, i.e. we use the color adjustment
  2312. * to insert a guard page to prevent prefetches crossing over the
  2313. * GTT boundary.
  2314. */
  2315. node = list_next_entry(node, node_list);
  2316. if (node->color != color)
  2317. *end -= I915_GTT_PAGE_SIZE;
  2318. }
  2319. int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
  2320. {
  2321. struct i915_ggtt *ggtt = &i915->ggtt;
  2322. struct i915_hw_ppgtt *ppgtt;
  2323. int err;
  2324. ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM));
  2325. if (IS_ERR(ppgtt))
  2326. return PTR_ERR(ppgtt);
  2327. if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
  2328. err = -ENODEV;
  2329. goto err_ppgtt;
  2330. }
  2331. /*
  2332. * Note we only pre-allocate as far as the end of the global
  2333. * GTT. On 48b / 4-level page-tables, the difference is very,
  2334. * very significant! We have to preallocate as GVT/vgpu does
  2335. * not like the page directory disappearing.
  2336. */
  2337. err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
  2338. if (err)
  2339. goto err_ppgtt;
  2340. i915->mm.aliasing_ppgtt = ppgtt;
  2341. GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
  2342. ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
  2343. GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
  2344. ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
  2345. return 0;
  2346. err_ppgtt:
  2347. i915_ppgtt_put(ppgtt);
  2348. return err;
  2349. }
  2350. void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
  2351. {
  2352. struct i915_ggtt *ggtt = &i915->ggtt;
  2353. struct i915_hw_ppgtt *ppgtt;
  2354. ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
  2355. if (!ppgtt)
  2356. return;
  2357. i915_ppgtt_put(ppgtt);
  2358. ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
  2359. ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
  2360. }
  2361. int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
  2362. {
  2363. /* Let GEM Manage all of the aperture.
  2364. *
  2365. * However, leave one page at the end still bound to the scratch page.
  2366. * There are a number of places where the hardware apparently prefetches
  2367. * past the end of the object, and we've seen multiple hangs with the
  2368. * GPU head pointer stuck in a batchbuffer bound at the last page of the
  2369. * aperture. One page should be enough to keep any prefetching inside
  2370. * of the aperture.
  2371. */
  2372. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  2373. unsigned long hole_start, hole_end;
  2374. struct drm_mm_node *entry;
  2375. int ret;
  2376. /*
  2377. * GuC requires all resources that we're sharing with it to be placed in
  2378. * non-WOPCM memory. If GuC is not present or not in use we still need a
  2379. * small bias as ring wraparound at offset 0 sometimes hangs. No idea
  2380. * why.
  2381. */
  2382. ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
  2383. intel_guc_reserved_gtt_size(&dev_priv->guc));
  2384. ret = intel_vgt_balloon(dev_priv);
  2385. if (ret)
  2386. return ret;
  2387. /* Reserve a mappable slot for our lockless error capture */
  2388. ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture,
  2389. PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
  2390. 0, ggtt->mappable_end,
  2391. DRM_MM_INSERT_LOW);
  2392. if (ret)
  2393. return ret;
  2394. /* Clear any non-preallocated blocks */
  2395. drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
  2396. DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
  2397. hole_start, hole_end);
  2398. ggtt->vm.clear_range(&ggtt->vm, hole_start,
  2399. hole_end - hole_start);
  2400. }
  2401. /* And finally clear the reserved guard page */
  2402. ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
  2403. if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
  2404. ret = i915_gem_init_aliasing_ppgtt(dev_priv);
  2405. if (ret)
  2406. goto err;
  2407. }
  2408. return 0;
  2409. err:
  2410. drm_mm_remove_node(&ggtt->error_capture);
  2411. return ret;
  2412. }
  2413. /**
  2414. * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
  2415. * @dev_priv: i915 device
  2416. */
  2417. void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
  2418. {
  2419. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  2420. struct i915_vma *vma, *vn;
  2421. struct pagevec *pvec;
  2422. ggtt->vm.closed = true;
  2423. mutex_lock(&dev_priv->drm.struct_mutex);
  2424. i915_gem_fini_aliasing_ppgtt(dev_priv);
  2425. GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
  2426. list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link)
  2427. WARN_ON(i915_vma_unbind(vma));
  2428. if (drm_mm_node_allocated(&ggtt->error_capture))
  2429. drm_mm_remove_node(&ggtt->error_capture);
  2430. if (drm_mm_initialized(&ggtt->vm.mm)) {
  2431. intel_vgt_deballoon(dev_priv);
  2432. i915_address_space_fini(&ggtt->vm);
  2433. }
  2434. ggtt->vm.cleanup(&ggtt->vm);
  2435. pvec = &dev_priv->mm.wc_stash.pvec;
  2436. if (pvec->nr) {
  2437. set_pages_array_wb(pvec->pages, pvec->nr);
  2438. __pagevec_release(pvec);
  2439. }
  2440. mutex_unlock(&dev_priv->drm.struct_mutex);
  2441. arch_phys_wc_del(ggtt->mtrr);
  2442. io_mapping_fini(&ggtt->iomap);
  2443. i915_gem_cleanup_stolen(dev_priv);
  2444. }
  2445. static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
  2446. {
  2447. snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
  2448. snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
  2449. return snb_gmch_ctl << 20;
  2450. }
  2451. static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
  2452. {
  2453. bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
  2454. bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
  2455. if (bdw_gmch_ctl)
  2456. bdw_gmch_ctl = 1 << bdw_gmch_ctl;
  2457. #ifdef CONFIG_X86_32
  2458. /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
  2459. if (bdw_gmch_ctl > 4)
  2460. bdw_gmch_ctl = 4;
  2461. #endif
  2462. return bdw_gmch_ctl << 20;
  2463. }
  2464. static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
  2465. {
  2466. gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
  2467. gmch_ctrl &= SNB_GMCH_GGMS_MASK;
  2468. if (gmch_ctrl)
  2469. return 1 << (20 + gmch_ctrl);
  2470. return 0;
  2471. }
  2472. static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
  2473. {
  2474. struct drm_i915_private *dev_priv = ggtt->vm.i915;
  2475. struct pci_dev *pdev = dev_priv->drm.pdev;
  2476. phys_addr_t phys_addr;
  2477. int ret;
  2478. /* For Modern GENs the PTEs and register space are split in the BAR */
  2479. phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
  2480. /*
  2481. * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
  2482. * will be dropped. For WC mappings in general we have 64 byte burst
  2483. * writes when the WC buffer is flushed, so we can't use it, but have to
  2484. * resort to an uncached mapping. The WC issue is easily caught by the
  2485. * readback check when writing GTT PTE entries.
  2486. */
  2487. if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
  2488. ggtt->gsm = ioremap_nocache(phys_addr, size);
  2489. else
  2490. ggtt->gsm = ioremap_wc(phys_addr, size);
  2491. if (!ggtt->gsm) {
  2492. DRM_ERROR("Failed to map the ggtt page table\n");
  2493. return -ENOMEM;
  2494. }
  2495. ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
  2496. if (ret) {
  2497. DRM_ERROR("Scratch setup failed\n");
  2498. /* iounmap will also get called at remove, but meh */
  2499. iounmap(ggtt->gsm);
  2500. return ret;
  2501. }
  2502. return 0;
  2503. }
  2504. static struct intel_ppat_entry *
  2505. __alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
  2506. {
  2507. struct intel_ppat_entry *entry = &ppat->entries[index];
  2508. GEM_BUG_ON(index >= ppat->max_entries);
  2509. GEM_BUG_ON(test_bit(index, ppat->used));
  2510. entry->ppat = ppat;
  2511. entry->value = value;
  2512. kref_init(&entry->ref);
  2513. set_bit(index, ppat->used);
  2514. set_bit(index, ppat->dirty);
  2515. return entry;
  2516. }
  2517. static void __free_ppat_entry(struct intel_ppat_entry *entry)
  2518. {
  2519. struct intel_ppat *ppat = entry->ppat;
  2520. unsigned int index = entry - ppat->entries;
  2521. GEM_BUG_ON(index >= ppat->max_entries);
  2522. GEM_BUG_ON(!test_bit(index, ppat->used));
  2523. entry->value = ppat->clear_value;
  2524. clear_bit(index, ppat->used);
  2525. set_bit(index, ppat->dirty);
  2526. }
  2527. /**
  2528. * intel_ppat_get - get a usable PPAT entry
  2529. * @i915: i915 device instance
  2530. * @value: the PPAT value required by the caller
  2531. *
  2532. * The function tries to search if there is an existing PPAT entry which
  2533. * matches with the required value. If perfectly matched, the existing PPAT
  2534. * entry will be used. If only partially matched, it will try to check if
  2535. * there is any available PPAT index. If yes, it will allocate a new PPAT
  2536. * index for the required entry and update the HW. If not, the partially
  2537. * matched entry will be used.
  2538. */
  2539. const struct intel_ppat_entry *
  2540. intel_ppat_get(struct drm_i915_private *i915, u8 value)
  2541. {
  2542. struct intel_ppat *ppat = &i915->ppat;
  2543. struct intel_ppat_entry *entry = NULL;
  2544. unsigned int scanned, best_score;
  2545. int i;
  2546. GEM_BUG_ON(!ppat->max_entries);
  2547. scanned = best_score = 0;
  2548. for_each_set_bit(i, ppat->used, ppat->max_entries) {
  2549. unsigned int score;
  2550. score = ppat->match(ppat->entries[i].value, value);
  2551. if (score > best_score) {
  2552. entry = &ppat->entries[i];
  2553. if (score == INTEL_PPAT_PERFECT_MATCH) {
  2554. kref_get(&entry->ref);
  2555. return entry;
  2556. }
  2557. best_score = score;
  2558. }
  2559. scanned++;
  2560. }
  2561. if (scanned == ppat->max_entries) {
  2562. if (!entry)
  2563. return ERR_PTR(-ENOSPC);
  2564. kref_get(&entry->ref);
  2565. return entry;
  2566. }
  2567. i = find_first_zero_bit(ppat->used, ppat->max_entries);
  2568. entry = __alloc_ppat_entry(ppat, i, value);
  2569. ppat->update_hw(i915);
  2570. return entry;
  2571. }
  2572. static void release_ppat(struct kref *kref)
  2573. {
  2574. struct intel_ppat_entry *entry =
  2575. container_of(kref, struct intel_ppat_entry, ref);
  2576. struct drm_i915_private *i915 = entry->ppat->i915;
  2577. __free_ppat_entry(entry);
  2578. entry->ppat->update_hw(i915);
  2579. }
  2580. /**
  2581. * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
  2582. * @entry: an intel PPAT entry
  2583. *
  2584. * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
  2585. * entry is dynamically allocated, its reference count will be decreased. Once
  2586. * the reference count becomes into zero, the PPAT index becomes free again.
  2587. */
  2588. void intel_ppat_put(const struct intel_ppat_entry *entry)
  2589. {
  2590. struct intel_ppat *ppat = entry->ppat;
  2591. unsigned int index = entry - ppat->entries;
  2592. GEM_BUG_ON(!ppat->max_entries);
  2593. kref_put(&ppat->entries[index].ref, release_ppat);
  2594. }
  2595. static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
  2596. {
  2597. struct intel_ppat *ppat = &dev_priv->ppat;
  2598. int i;
  2599. for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
  2600. I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
  2601. clear_bit(i, ppat->dirty);
  2602. }
  2603. }
  2604. static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
  2605. {
  2606. struct intel_ppat *ppat = &dev_priv->ppat;
  2607. u64 pat = 0;
  2608. int i;
  2609. for (i = 0; i < ppat->max_entries; i++)
  2610. pat |= GEN8_PPAT(i, ppat->entries[i].value);
  2611. bitmap_clear(ppat->dirty, 0, ppat->max_entries);
  2612. I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
  2613. I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
  2614. }
  2615. static unsigned int bdw_private_pat_match(u8 src, u8 dst)
  2616. {
  2617. unsigned int score = 0;
  2618. enum {
  2619. AGE_MATCH = BIT(0),
  2620. TC_MATCH = BIT(1),
  2621. CA_MATCH = BIT(2),
  2622. };
  2623. /* Cache attribute has to be matched. */
  2624. if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
  2625. return 0;
  2626. score |= CA_MATCH;
  2627. if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
  2628. score |= TC_MATCH;
  2629. if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
  2630. score |= AGE_MATCH;
  2631. if (score == (AGE_MATCH | TC_MATCH | CA_MATCH))
  2632. return INTEL_PPAT_PERFECT_MATCH;
  2633. return score;
  2634. }
  2635. static unsigned int chv_private_pat_match(u8 src, u8 dst)
  2636. {
  2637. return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
  2638. INTEL_PPAT_PERFECT_MATCH : 0;
  2639. }
  2640. static void cnl_setup_private_ppat(struct intel_ppat *ppat)
  2641. {
  2642. ppat->max_entries = 8;
  2643. ppat->update_hw = cnl_private_pat_update_hw;
  2644. ppat->match = bdw_private_pat_match;
  2645. ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
  2646. __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
  2647. __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
  2648. __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
  2649. __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
  2650. __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
  2651. __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
  2652. __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
  2653. __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
  2654. }
  2655. /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
  2656. * bits. When using advanced contexts each context stores its own PAT, but
  2657. * writing this data shouldn't be harmful even in those cases. */
  2658. static void bdw_setup_private_ppat(struct intel_ppat *ppat)
  2659. {
  2660. ppat->max_entries = 8;
  2661. ppat->update_hw = bdw_private_pat_update_hw;
  2662. ppat->match = bdw_private_pat_match;
  2663. ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
  2664. if (!USES_PPGTT(ppat->i915)) {
  2665. /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
  2666. * so RTL will always use the value corresponding to
  2667. * pat_sel = 000".
  2668. * So let's disable cache for GGTT to avoid screen corruptions.
  2669. * MOCS still can be used though.
  2670. * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
  2671. * before this patch, i.e. the same uncached + snooping access
  2672. * like on gen6/7 seems to be in effect.
  2673. * - So this just fixes blitter/render access. Again it looks
  2674. * like it's not just uncached access, but uncached + snooping.
  2675. * So we can still hold onto all our assumptions wrt cpu
  2676. * clflushing on LLC machines.
  2677. */
  2678. __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
  2679. return;
  2680. }
  2681. __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); /* for normal objects, no eLLC */
  2682. __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); /* for something pointing to ptes? */
  2683. __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */
  2684. __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); /* Uncached objects, mostly for scanout */
  2685. __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
  2686. __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
  2687. __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
  2688. __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
  2689. }
  2690. static void chv_setup_private_ppat(struct intel_ppat *ppat)
  2691. {
  2692. ppat->max_entries = 8;
  2693. ppat->update_hw = bdw_private_pat_update_hw;
  2694. ppat->match = chv_private_pat_match;
  2695. ppat->clear_value = CHV_PPAT_SNOOP;
  2696. /*
  2697. * Map WB on BDW to snooped on CHV.
  2698. *
  2699. * Only the snoop bit has meaning for CHV, the rest is
  2700. * ignored.
  2701. *
  2702. * The hardware will never snoop for certain types of accesses:
  2703. * - CPU GTT (GMADR->GGTT->no snoop->memory)
  2704. * - PPGTT page tables
  2705. * - some other special cycles
  2706. *
  2707. * As with BDW, we also need to consider the following for GT accesses:
  2708. * "For GGTT, there is NO pat_sel[2:0] from the entry,
  2709. * so RTL will always use the value corresponding to
  2710. * pat_sel = 000".
  2711. * Which means we must set the snoop bit in PAT entry 0
  2712. * in order to keep the global status page working.
  2713. */
  2714. __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
  2715. __alloc_ppat_entry(ppat, 1, 0);
  2716. __alloc_ppat_entry(ppat, 2, 0);
  2717. __alloc_ppat_entry(ppat, 3, 0);
  2718. __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
  2719. __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
  2720. __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
  2721. __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
  2722. }
  2723. static void gen6_gmch_remove(struct i915_address_space *vm)
  2724. {
  2725. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  2726. iounmap(ggtt->gsm);
  2727. cleanup_scratch_page(vm);
  2728. }
  2729. static void setup_private_pat(struct drm_i915_private *dev_priv)
  2730. {
  2731. struct intel_ppat *ppat = &dev_priv->ppat;
  2732. int i;
  2733. ppat->i915 = dev_priv;
  2734. if (INTEL_GEN(dev_priv) >= 10)
  2735. cnl_setup_private_ppat(ppat);
  2736. else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
  2737. chv_setup_private_ppat(ppat);
  2738. else
  2739. bdw_setup_private_ppat(ppat);
  2740. GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
  2741. for_each_clear_bit(i, ppat->used, ppat->max_entries) {
  2742. ppat->entries[i].value = ppat->clear_value;
  2743. ppat->entries[i].ppat = ppat;
  2744. set_bit(i, ppat->dirty);
  2745. }
  2746. ppat->update_hw(dev_priv);
  2747. }
  2748. static int gen8_gmch_probe(struct i915_ggtt *ggtt)
  2749. {
  2750. struct drm_i915_private *dev_priv = ggtt->vm.i915;
  2751. struct pci_dev *pdev = dev_priv->drm.pdev;
  2752. unsigned int size;
  2753. u16 snb_gmch_ctl;
  2754. int err;
  2755. /* TODO: We're not aware of mappable constraints on gen8 yet */
  2756. ggtt->gmadr =
  2757. (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
  2758. pci_resource_len(pdev, 2));
  2759. ggtt->mappable_end = resource_size(&ggtt->gmadr);
  2760. err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
  2761. if (!err)
  2762. err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
  2763. if (err)
  2764. DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
  2765. pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
  2766. if (IS_CHERRYVIEW(dev_priv))
  2767. size = chv_get_total_gtt_size(snb_gmch_ctl);
  2768. else
  2769. size = gen8_get_total_gtt_size(snb_gmch_ctl);
  2770. ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
  2771. ggtt->vm.cleanup = gen6_gmch_remove;
  2772. ggtt->vm.insert_page = gen8_ggtt_insert_page;
  2773. ggtt->vm.clear_range = nop_clear_range;
  2774. if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
  2775. ggtt->vm.clear_range = gen8_ggtt_clear_range;
  2776. ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
  2777. /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
  2778. if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
  2779. ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
  2780. ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
  2781. if (ggtt->vm.clear_range != nop_clear_range)
  2782. ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
  2783. /* Prevent recursively calling stop_machine() and deadlocks. */
  2784. dev_info(dev_priv->drm.dev,
  2785. "Disabling error capture for VT-d workaround\n");
  2786. i915_disable_error_state(dev_priv, -ENODEV);
  2787. }
  2788. ggtt->invalidate = gen6_ggtt_invalidate;
  2789. ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
  2790. ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
  2791. ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
  2792. ggtt->vm.vma_ops.clear_pages = clear_pages;
  2793. setup_private_pat(dev_priv);
  2794. return ggtt_probe_common(ggtt, size);
  2795. }
  2796. static int gen6_gmch_probe(struct i915_ggtt *ggtt)
  2797. {
  2798. struct drm_i915_private *dev_priv = ggtt->vm.i915;
  2799. struct pci_dev *pdev = dev_priv->drm.pdev;
  2800. unsigned int size;
  2801. u16 snb_gmch_ctl;
  2802. int err;
  2803. ggtt->gmadr =
  2804. (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
  2805. pci_resource_len(pdev, 2));
  2806. ggtt->mappable_end = resource_size(&ggtt->gmadr);
  2807. /* 64/512MB is the current min/max we actually know of, but this is just
  2808. * a coarse sanity check.
  2809. */
  2810. if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
  2811. DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
  2812. return -ENXIO;
  2813. }
  2814. err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
  2815. if (!err)
  2816. err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
  2817. if (err)
  2818. DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
  2819. pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
  2820. size = gen6_get_total_gtt_size(snb_gmch_ctl);
  2821. ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
  2822. ggtt->vm.clear_range = gen6_ggtt_clear_range;
  2823. ggtt->vm.insert_page = gen6_ggtt_insert_page;
  2824. ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
  2825. ggtt->vm.cleanup = gen6_gmch_remove;
  2826. ggtt->invalidate = gen6_ggtt_invalidate;
  2827. if (HAS_EDRAM(dev_priv))
  2828. ggtt->vm.pte_encode = iris_pte_encode;
  2829. else if (IS_HASWELL(dev_priv))
  2830. ggtt->vm.pte_encode = hsw_pte_encode;
  2831. else if (IS_VALLEYVIEW(dev_priv))
  2832. ggtt->vm.pte_encode = byt_pte_encode;
  2833. else if (INTEL_GEN(dev_priv) >= 7)
  2834. ggtt->vm.pte_encode = ivb_pte_encode;
  2835. else
  2836. ggtt->vm.pte_encode = snb_pte_encode;
  2837. ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
  2838. ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
  2839. ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
  2840. ggtt->vm.vma_ops.clear_pages = clear_pages;
  2841. return ggtt_probe_common(ggtt, size);
  2842. }
  2843. static void i915_gmch_remove(struct i915_address_space *vm)
  2844. {
  2845. intel_gmch_remove();
  2846. }
  2847. static int i915_gmch_probe(struct i915_ggtt *ggtt)
  2848. {
  2849. struct drm_i915_private *dev_priv = ggtt->vm.i915;
  2850. phys_addr_t gmadr_base;
  2851. int ret;
  2852. ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
  2853. if (!ret) {
  2854. DRM_ERROR("failed to set up gmch\n");
  2855. return -EIO;
  2856. }
  2857. intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
  2858. ggtt->gmadr =
  2859. (struct resource) DEFINE_RES_MEM(gmadr_base,
  2860. ggtt->mappable_end);
  2861. ggtt->do_idle_maps = needs_idle_maps(dev_priv);
  2862. ggtt->vm.insert_page = i915_ggtt_insert_page;
  2863. ggtt->vm.insert_entries = i915_ggtt_insert_entries;
  2864. ggtt->vm.clear_range = i915_ggtt_clear_range;
  2865. ggtt->vm.cleanup = i915_gmch_remove;
  2866. ggtt->invalidate = gmch_ggtt_invalidate;
  2867. ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
  2868. ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
  2869. ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
  2870. ggtt->vm.vma_ops.clear_pages = clear_pages;
  2871. if (unlikely(ggtt->do_idle_maps))
  2872. DRM_INFO("applying Ironlake quirks for intel_iommu\n");
  2873. return 0;
  2874. }
  2875. /**
  2876. * i915_ggtt_probe_hw - Probe GGTT hardware location
  2877. * @dev_priv: i915 device
  2878. */
  2879. int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
  2880. {
  2881. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  2882. int ret;
  2883. ggtt->vm.i915 = dev_priv;
  2884. ggtt->vm.dma = &dev_priv->drm.pdev->dev;
  2885. if (INTEL_GEN(dev_priv) <= 5)
  2886. ret = i915_gmch_probe(ggtt);
  2887. else if (INTEL_GEN(dev_priv) < 8)
  2888. ret = gen6_gmch_probe(ggtt);
  2889. else
  2890. ret = gen8_gmch_probe(ggtt);
  2891. if (ret)
  2892. return ret;
  2893. /* Trim the GGTT to fit the GuC mappable upper range (when enabled).
  2894. * This is easier than doing range restriction on the fly, as we
  2895. * currently don't have any bits spare to pass in this upper
  2896. * restriction!
  2897. */
  2898. if (USES_GUC(dev_priv)) {
  2899. ggtt->vm.total = min_t(u64, ggtt->vm.total, GUC_GGTT_TOP);
  2900. ggtt->mappable_end =
  2901. min_t(u64, ggtt->mappable_end, ggtt->vm.total);
  2902. }
  2903. if ((ggtt->vm.total - 1) >> 32) {
  2904. DRM_ERROR("We never expected a Global GTT with more than 32bits"
  2905. " of address space! Found %lldM!\n",
  2906. ggtt->vm.total >> 20);
  2907. ggtt->vm.total = 1ULL << 32;
  2908. ggtt->mappable_end =
  2909. min_t(u64, ggtt->mappable_end, ggtt->vm.total);
  2910. }
  2911. if (ggtt->mappable_end > ggtt->vm.total) {
  2912. DRM_ERROR("mappable aperture extends past end of GGTT,"
  2913. " aperture=%pa, total=%llx\n",
  2914. &ggtt->mappable_end, ggtt->vm.total);
  2915. ggtt->mappable_end = ggtt->vm.total;
  2916. }
  2917. /* GMADR is the PCI mmio aperture into the global GTT. */
  2918. DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
  2919. DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
  2920. DRM_DEBUG_DRIVER("DSM size = %lluM\n",
  2921. (u64)resource_size(&intel_graphics_stolen_res) >> 20);
  2922. if (intel_vtd_active())
  2923. DRM_INFO("VT-d active for gfx access\n");
  2924. return 0;
  2925. }
  2926. /**
  2927. * i915_ggtt_init_hw - Initialize GGTT hardware
  2928. * @dev_priv: i915 device
  2929. */
  2930. int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
  2931. {
  2932. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  2933. int ret;
  2934. stash_init(&dev_priv->mm.wc_stash);
  2935. /* Note that we use page colouring to enforce a guard page at the
  2936. * end of the address space. This is required as the CS may prefetch
  2937. * beyond the end of the batch buffer, across the page boundary,
  2938. * and beyond the end of the GTT if we do not provide a guard.
  2939. */
  2940. mutex_lock(&dev_priv->drm.struct_mutex);
  2941. i915_address_space_init(&ggtt->vm, dev_priv);
  2942. ggtt->vm.is_ggtt = true;
  2943. /* Only VLV supports read-only GGTT mappings */
  2944. ggtt->vm.has_read_only = IS_VALLEYVIEW(dev_priv);
  2945. if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv))
  2946. ggtt->vm.mm.color_adjust = i915_gtt_color_adjust;
  2947. mutex_unlock(&dev_priv->drm.struct_mutex);
  2948. if (!io_mapping_init_wc(&dev_priv->ggtt.iomap,
  2949. dev_priv->ggtt.gmadr.start,
  2950. dev_priv->ggtt.mappable_end)) {
  2951. ret = -EIO;
  2952. goto out_gtt_cleanup;
  2953. }
  2954. ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
  2955. /*
  2956. * Initialise stolen early so that we may reserve preallocated
  2957. * objects for the BIOS to KMS transition.
  2958. */
  2959. ret = i915_gem_init_stolen(dev_priv);
  2960. if (ret)
  2961. goto out_gtt_cleanup;
  2962. return 0;
  2963. out_gtt_cleanup:
  2964. ggtt->vm.cleanup(&ggtt->vm);
  2965. return ret;
  2966. }
  2967. int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
  2968. {
  2969. if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
  2970. return -EIO;
  2971. return 0;
  2972. }
  2973. void i915_ggtt_enable_guc(struct drm_i915_private *i915)
  2974. {
  2975. GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
  2976. i915->ggtt.invalidate = guc_ggtt_invalidate;
  2977. i915_ggtt_invalidate(i915);
  2978. }
  2979. void i915_ggtt_disable_guc(struct drm_i915_private *i915)
  2980. {
  2981. /* XXX Temporary pardon for error unload */
  2982. if (i915->ggtt.invalidate == gen6_ggtt_invalidate)
  2983. return;
  2984. /* We should only be called after i915_ggtt_enable_guc() */
  2985. GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
  2986. i915->ggtt.invalidate = gen6_ggtt_invalidate;
  2987. i915_ggtt_invalidate(i915);
  2988. }
  2989. void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
  2990. {
  2991. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  2992. struct i915_vma *vma, *vn;
  2993. i915_check_and_clear_faults(dev_priv);
  2994. /* First fill our portion of the GTT with scratch pages */
  2995. ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
  2996. ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
  2997. /* clflush objects bound into the GGTT and rebind them. */
  2998. GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
  2999. list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) {
  3000. struct drm_i915_gem_object *obj = vma->obj;
  3001. if (!(vma->flags & I915_VMA_GLOBAL_BIND))
  3002. continue;
  3003. if (!i915_vma_unbind(vma))
  3004. continue;
  3005. WARN_ON(i915_vma_bind(vma,
  3006. obj ? obj->cache_level : 0,
  3007. PIN_UPDATE));
  3008. if (obj)
  3009. WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
  3010. }
  3011. ggtt->vm.closed = false;
  3012. i915_ggtt_invalidate(dev_priv);
  3013. if (INTEL_GEN(dev_priv) >= 8) {
  3014. struct intel_ppat *ppat = &dev_priv->ppat;
  3015. bitmap_set(ppat->dirty, 0, ppat->max_entries);
  3016. dev_priv->ppat.update_hw(dev_priv);
  3017. return;
  3018. }
  3019. }
  3020. static struct scatterlist *
  3021. rotate_pages(const dma_addr_t *in, unsigned int offset,
  3022. unsigned int width, unsigned int height,
  3023. unsigned int stride,
  3024. struct sg_table *st, struct scatterlist *sg)
  3025. {
  3026. unsigned int column, row;
  3027. unsigned int src_idx;
  3028. for (column = 0; column < width; column++) {
  3029. src_idx = stride * (height - 1) + column;
  3030. for (row = 0; row < height; row++) {
  3031. st->nents++;
  3032. /* We don't need the pages, but need to initialize
  3033. * the entries so the sg list can be happily traversed.
  3034. * The only thing we need are DMA addresses.
  3035. */
  3036. sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
  3037. sg_dma_address(sg) = in[offset + src_idx];
  3038. sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
  3039. sg = sg_next(sg);
  3040. src_idx -= stride;
  3041. }
  3042. }
  3043. return sg;
  3044. }
  3045. static noinline struct sg_table *
  3046. intel_rotate_pages(struct intel_rotation_info *rot_info,
  3047. struct drm_i915_gem_object *obj)
  3048. {
  3049. const unsigned long n_pages = obj->base.size / I915_GTT_PAGE_SIZE;
  3050. unsigned int size = intel_rotation_info_size(rot_info);
  3051. struct sgt_iter sgt_iter;
  3052. dma_addr_t dma_addr;
  3053. unsigned long i;
  3054. dma_addr_t *page_addr_list;
  3055. struct sg_table *st;
  3056. struct scatterlist *sg;
  3057. int ret = -ENOMEM;
  3058. /* Allocate a temporary list of source pages for random access. */
  3059. page_addr_list = kvmalloc_array(n_pages,
  3060. sizeof(dma_addr_t),
  3061. GFP_KERNEL);
  3062. if (!page_addr_list)
  3063. return ERR_PTR(ret);
  3064. /* Allocate target SG list. */
  3065. st = kmalloc(sizeof(*st), GFP_KERNEL);
  3066. if (!st)
  3067. goto err_st_alloc;
  3068. ret = sg_alloc_table(st, size, GFP_KERNEL);
  3069. if (ret)
  3070. goto err_sg_alloc;
  3071. /* Populate source page list from the object. */
  3072. i = 0;
  3073. for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
  3074. page_addr_list[i++] = dma_addr;
  3075. GEM_BUG_ON(i != n_pages);
  3076. st->nents = 0;
  3077. sg = st->sgl;
  3078. for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
  3079. sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
  3080. rot_info->plane[i].width, rot_info->plane[i].height,
  3081. rot_info->plane[i].stride, st, sg);
  3082. }
  3083. kvfree(page_addr_list);
  3084. return st;
  3085. err_sg_alloc:
  3086. kfree(st);
  3087. err_st_alloc:
  3088. kvfree(page_addr_list);
  3089. DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
  3090. obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
  3091. return ERR_PTR(ret);
  3092. }
  3093. static noinline struct sg_table *
  3094. intel_partial_pages(const struct i915_ggtt_view *view,
  3095. struct drm_i915_gem_object *obj)
  3096. {
  3097. struct sg_table *st;
  3098. struct scatterlist *sg, *iter;
  3099. unsigned int count = view->partial.size;
  3100. unsigned int offset;
  3101. int ret = -ENOMEM;
  3102. st = kmalloc(sizeof(*st), GFP_KERNEL);
  3103. if (!st)
  3104. goto err_st_alloc;
  3105. ret = sg_alloc_table(st, count, GFP_KERNEL);
  3106. if (ret)
  3107. goto err_sg_alloc;
  3108. iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
  3109. GEM_BUG_ON(!iter);
  3110. sg = st->sgl;
  3111. st->nents = 0;
  3112. do {
  3113. unsigned int len;
  3114. len = min(iter->length - (offset << PAGE_SHIFT),
  3115. count << PAGE_SHIFT);
  3116. sg_set_page(sg, NULL, len, 0);
  3117. sg_dma_address(sg) =
  3118. sg_dma_address(iter) + (offset << PAGE_SHIFT);
  3119. sg_dma_len(sg) = len;
  3120. st->nents++;
  3121. count -= len >> PAGE_SHIFT;
  3122. if (count == 0) {
  3123. sg_mark_end(sg);
  3124. return st;
  3125. }
  3126. sg = __sg_next(sg);
  3127. iter = __sg_next(iter);
  3128. offset = 0;
  3129. } while (1);
  3130. err_sg_alloc:
  3131. kfree(st);
  3132. err_st_alloc:
  3133. return ERR_PTR(ret);
  3134. }
  3135. static int
  3136. i915_get_ggtt_vma_pages(struct i915_vma *vma)
  3137. {
  3138. int ret;
  3139. /* The vma->pages are only valid within the lifespan of the borrowed
  3140. * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
  3141. * must be the vma->pages. A simple rule is that vma->pages must only
  3142. * be accessed when the obj->mm.pages are pinned.
  3143. */
  3144. GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
  3145. switch (vma->ggtt_view.type) {
  3146. default:
  3147. GEM_BUG_ON(vma->ggtt_view.type);
  3148. /* fall through */
  3149. case I915_GGTT_VIEW_NORMAL:
  3150. vma->pages = vma->obj->mm.pages;
  3151. return 0;
  3152. case I915_GGTT_VIEW_ROTATED:
  3153. vma->pages =
  3154. intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
  3155. break;
  3156. case I915_GGTT_VIEW_PARTIAL:
  3157. vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
  3158. break;
  3159. }
  3160. ret = 0;
  3161. if (unlikely(IS_ERR(vma->pages))) {
  3162. ret = PTR_ERR(vma->pages);
  3163. vma->pages = NULL;
  3164. DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
  3165. vma->ggtt_view.type, ret);
  3166. }
  3167. return ret;
  3168. }
  3169. /**
  3170. * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
  3171. * @vm: the &struct i915_address_space
  3172. * @node: the &struct drm_mm_node (typically i915_vma.mode)
  3173. * @size: how much space to allocate inside the GTT,
  3174. * must be #I915_GTT_PAGE_SIZE aligned
  3175. * @offset: where to insert inside the GTT,
  3176. * must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
  3177. * (@offset + @size) must fit within the address space
  3178. * @color: color to apply to node, if this node is not from a VMA,
  3179. * color must be #I915_COLOR_UNEVICTABLE
  3180. * @flags: control search and eviction behaviour
  3181. *
  3182. * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
  3183. * the address space (using @size and @color). If the @node does not fit, it
  3184. * tries to evict any overlapping nodes from the GTT, including any
  3185. * neighbouring nodes if the colors do not match (to ensure guard pages between
  3186. * differing domains). See i915_gem_evict_for_node() for the gory details
  3187. * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
  3188. * evicting active overlapping objects, and any overlapping node that is pinned
  3189. * or marked as unevictable will also result in failure.
  3190. *
  3191. * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
  3192. * asked to wait for eviction and interrupted.
  3193. */
  3194. int i915_gem_gtt_reserve(struct i915_address_space *vm,
  3195. struct drm_mm_node *node,
  3196. u64 size, u64 offset, unsigned long color,
  3197. unsigned int flags)
  3198. {
  3199. int err;
  3200. GEM_BUG_ON(!size);
  3201. GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
  3202. GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
  3203. GEM_BUG_ON(range_overflows(offset, size, vm->total));
  3204. GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
  3205. GEM_BUG_ON(drm_mm_node_allocated(node));
  3206. node->size = size;
  3207. node->start = offset;
  3208. node->color = color;
  3209. err = drm_mm_reserve_node(&vm->mm, node);
  3210. if (err != -ENOSPC)
  3211. return err;
  3212. if (flags & PIN_NOEVICT)
  3213. return -ENOSPC;
  3214. err = i915_gem_evict_for_node(vm, node, flags);
  3215. if (err == 0)
  3216. err = drm_mm_reserve_node(&vm->mm, node);
  3217. return err;
  3218. }
  3219. static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
  3220. {
  3221. u64 range, addr;
  3222. GEM_BUG_ON(range_overflows(start, len, end));
  3223. GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
  3224. range = round_down(end - len, align) - round_up(start, align);
  3225. if (range) {
  3226. if (sizeof(unsigned long) == sizeof(u64)) {
  3227. addr = get_random_long();
  3228. } else {
  3229. addr = get_random_int();
  3230. if (range > U32_MAX) {
  3231. addr <<= 32;
  3232. addr |= get_random_int();
  3233. }
  3234. }
  3235. div64_u64_rem(addr, range, &addr);
  3236. start += addr;
  3237. }
  3238. return round_up(start, align);
  3239. }
  3240. /**
  3241. * i915_gem_gtt_insert - insert a node into an address_space (GTT)
  3242. * @vm: the &struct i915_address_space
  3243. * @node: the &struct drm_mm_node (typically i915_vma.node)
  3244. * @size: how much space to allocate inside the GTT,
  3245. * must be #I915_GTT_PAGE_SIZE aligned
  3246. * @alignment: required alignment of starting offset, may be 0 but
  3247. * if specified, this must be a power-of-two and at least
  3248. * #I915_GTT_MIN_ALIGNMENT
  3249. * @color: color to apply to node
  3250. * @start: start of any range restriction inside GTT (0 for all),
  3251. * must be #I915_GTT_PAGE_SIZE aligned
  3252. * @end: end of any range restriction inside GTT (U64_MAX for all),
  3253. * must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
  3254. * @flags: control search and eviction behaviour
  3255. *
  3256. * i915_gem_gtt_insert() first searches for an available hole into which
  3257. * is can insert the node. The hole address is aligned to @alignment and
  3258. * its @size must then fit entirely within the [@start, @end] bounds. The
  3259. * nodes on either side of the hole must match @color, or else a guard page
  3260. * will be inserted between the two nodes (or the node evicted). If no
  3261. * suitable hole is found, first a victim is randomly selected and tested
  3262. * for eviction, otherwise then the LRU list of objects within the GTT
  3263. * is scanned to find the first set of replacement nodes to create the hole.
  3264. * Those old overlapping nodes are evicted from the GTT (and so must be
  3265. * rebound before any future use). Any node that is currently pinned cannot
  3266. * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
  3267. * active and #PIN_NONBLOCK is specified, that node is also skipped when
  3268. * searching for an eviction candidate. See i915_gem_evict_something() for
  3269. * the gory details on the eviction algorithm.
  3270. *
  3271. * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
  3272. * asked to wait for eviction and interrupted.
  3273. */
  3274. int i915_gem_gtt_insert(struct i915_address_space *vm,
  3275. struct drm_mm_node *node,
  3276. u64 size, u64 alignment, unsigned long color,
  3277. u64 start, u64 end, unsigned int flags)
  3278. {
  3279. enum drm_mm_insert_mode mode;
  3280. u64 offset;
  3281. int err;
  3282. lockdep_assert_held(&vm->i915->drm.struct_mutex);
  3283. GEM_BUG_ON(!size);
  3284. GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
  3285. GEM_BUG_ON(alignment && !is_power_of_2(alignment));
  3286. GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
  3287. GEM_BUG_ON(start >= end);
  3288. GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
  3289. GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
  3290. GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
  3291. GEM_BUG_ON(drm_mm_node_allocated(node));
  3292. if (unlikely(range_overflows(start, size, end)))
  3293. return -ENOSPC;
  3294. if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
  3295. return -ENOSPC;
  3296. mode = DRM_MM_INSERT_BEST;
  3297. if (flags & PIN_HIGH)
  3298. mode = DRM_MM_INSERT_HIGHEST;
  3299. if (flags & PIN_MAPPABLE)
  3300. mode = DRM_MM_INSERT_LOW;
  3301. /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
  3302. * so we know that we always have a minimum alignment of 4096.
  3303. * The drm_mm range manager is optimised to return results
  3304. * with zero alignment, so where possible use the optimal
  3305. * path.
  3306. */
  3307. BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
  3308. if (alignment <= I915_GTT_MIN_ALIGNMENT)
  3309. alignment = 0;
  3310. err = drm_mm_insert_node_in_range(&vm->mm, node,
  3311. size, alignment, color,
  3312. start, end, mode);
  3313. if (err != -ENOSPC)
  3314. return err;
  3315. if (mode & DRM_MM_INSERT_ONCE) {
  3316. err = drm_mm_insert_node_in_range(&vm->mm, node,
  3317. size, alignment, color,
  3318. start, end,
  3319. DRM_MM_INSERT_BEST);
  3320. if (err != -ENOSPC)
  3321. return err;
  3322. }
  3323. if (flags & PIN_NOEVICT)
  3324. return -ENOSPC;
  3325. /* No free space, pick a slot at random.
  3326. *
  3327. * There is a pathological case here using a GTT shared between
  3328. * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
  3329. *
  3330. * |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
  3331. * (64k objects) (448k objects)
  3332. *
  3333. * Now imagine that the eviction LRU is ordered top-down (just because
  3334. * pathology meets real life), and that we need to evict an object to
  3335. * make room inside the aperture. The eviction scan then has to walk
  3336. * the 448k list before it finds one within range. And now imagine that
  3337. * it has to search for a new hole between every byte inside the memcpy,
  3338. * for several simultaneous clients.
  3339. *
  3340. * On a full-ppgtt system, if we have run out of available space, there
  3341. * will be lots and lots of objects in the eviction list! Again,
  3342. * searching that LRU list may be slow if we are also applying any
  3343. * range restrictions (e.g. restriction to low 4GiB) and so, for
  3344. * simplicity and similarilty between different GTT, try the single
  3345. * random replacement first.
  3346. */
  3347. offset = random_offset(start, end,
  3348. size, alignment ?: I915_GTT_MIN_ALIGNMENT);
  3349. err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
  3350. if (err != -ENOSPC)
  3351. return err;
  3352. /* Randomly selected placement is pinned, do a search */
  3353. err = i915_gem_evict_something(vm, size, alignment, color,
  3354. start, end, flags);
  3355. if (err)
  3356. return err;
  3357. return drm_mm_insert_node_in_range(&vm->mm, node,
  3358. size, alignment, color,
  3359. start, end, DRM_MM_INSERT_EVICT);
  3360. }
  3361. #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  3362. #include "selftests/mock_gtt.c"
  3363. #include "selftests/i915_gem_gtt.c"
  3364. #endif