gtt.c 62 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402
  1. /*
  2. * GTT virtualization
  3. *
  4. * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the "Software"),
  8. * to deal in the Software without restriction, including without limitation
  9. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10. * and/or sell copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice (including the next
  14. * paragraph) shall be included in all copies or substantial portions of the
  15. * Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23. * SOFTWARE.
  24. *
  25. * Authors:
  26. * Zhi Wang <zhi.a.wang@intel.com>
  27. * Zhenyu Wang <zhenyuw@linux.intel.com>
  28. * Xiao Zheng <xiao.zheng@intel.com>
  29. *
  30. * Contributors:
  31. * Min He <min.he@intel.com>
  32. * Bing Niu <bing.niu@intel.com>
  33. *
  34. */
  35. #include "i915_drv.h"
  36. #include "gvt.h"
  37. #include "i915_pvinfo.h"
  38. #include "trace.h"
  39. #if defined(VERBOSE_DEBUG)
  40. #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
  41. #else
  42. #define gvt_vdbg_mm(fmt, args...)
  43. #endif
  44. static bool enable_out_of_sync = false;
  45. static int preallocated_oos_pages = 8192;
  46. /*
  47. * validate a gm address and related range size,
  48. * translate it to host gm address
  49. */
  50. bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
  51. {
  52. if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size
  53. && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) {
  54. gvt_vgpu_err("invalid range gmadr 0x%llx size 0x%x\n",
  55. addr, size);
  56. return false;
  57. }
  58. return true;
  59. }
  60. /* translate a guest gmadr to host gmadr */
  61. int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
  62. {
  63. if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr),
  64. "invalid guest gmadr %llx\n", g_addr))
  65. return -EACCES;
  66. if (vgpu_gmadr_is_aperture(vgpu, g_addr))
  67. *h_addr = vgpu_aperture_gmadr_base(vgpu)
  68. + (g_addr - vgpu_aperture_offset(vgpu));
  69. else
  70. *h_addr = vgpu_hidden_gmadr_base(vgpu)
  71. + (g_addr - vgpu_hidden_offset(vgpu));
  72. return 0;
  73. }
  74. /* translate a host gmadr to guest gmadr */
  75. int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
  76. {
  77. if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr),
  78. "invalid host gmadr %llx\n", h_addr))
  79. return -EACCES;
  80. if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
  81. *g_addr = vgpu_aperture_gmadr_base(vgpu)
  82. + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
  83. else
  84. *g_addr = vgpu_hidden_gmadr_base(vgpu)
  85. + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
  86. return 0;
  87. }
  88. int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
  89. unsigned long *h_index)
  90. {
  91. u64 h_addr;
  92. int ret;
  93. ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT,
  94. &h_addr);
  95. if (ret)
  96. return ret;
  97. *h_index = h_addr >> I915_GTT_PAGE_SHIFT;
  98. return 0;
  99. }
  100. int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
  101. unsigned long *g_index)
  102. {
  103. u64 g_addr;
  104. int ret;
  105. ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT,
  106. &g_addr);
  107. if (ret)
  108. return ret;
  109. *g_index = g_addr >> I915_GTT_PAGE_SHIFT;
  110. return 0;
  111. }
  112. #define gtt_type_is_entry(type) \
  113. (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
  114. && type != GTT_TYPE_PPGTT_PTE_ENTRY \
  115. && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
  116. #define gtt_type_is_pt(type) \
  117. (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
  118. #define gtt_type_is_pte_pt(type) \
  119. (type == GTT_TYPE_PPGTT_PTE_PT)
  120. #define gtt_type_is_root_pointer(type) \
  121. (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
  122. #define gtt_init_entry(e, t, p, v) do { \
  123. (e)->type = t; \
  124. (e)->pdev = p; \
  125. memcpy(&(e)->val64, &v, sizeof(v)); \
  126. } while (0)
  127. /*
  128. * Mappings between GTT_TYPE* enumerations.
  129. * Following information can be found according to the given type:
  130. * - type of next level page table
  131. * - type of entry inside this level page table
  132. * - type of entry with PSE set
  133. *
  134. * If the given type doesn't have such a kind of information,
  135. * e.g. give a l4 root entry type, then request to get its PSE type,
  136. * give a PTE page table type, then request to get its next level page
  137. * table type, as we know l4 root entry doesn't have a PSE bit,
  138. * and a PTE page table doesn't have a next level page table type,
  139. * GTT_TYPE_INVALID will be returned. This is useful when traversing a
  140. * page table.
  141. */
  142. struct gtt_type_table_entry {
  143. int entry_type;
  144. int pt_type;
  145. int next_pt_type;
  146. int pse_entry_type;
  147. };
  148. #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
  149. [type] = { \
  150. .entry_type = e_type, \
  151. .pt_type = cpt_type, \
  152. .next_pt_type = npt_type, \
  153. .pse_entry_type = pse_type, \
  154. }
  155. static struct gtt_type_table_entry gtt_type_table[] = {
  156. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
  157. GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
  158. GTT_TYPE_INVALID,
  159. GTT_TYPE_PPGTT_PML4_PT,
  160. GTT_TYPE_INVALID),
  161. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
  162. GTT_TYPE_PPGTT_PML4_ENTRY,
  163. GTT_TYPE_PPGTT_PML4_PT,
  164. GTT_TYPE_PPGTT_PDP_PT,
  165. GTT_TYPE_INVALID),
  166. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
  167. GTT_TYPE_PPGTT_PML4_ENTRY,
  168. GTT_TYPE_PPGTT_PML4_PT,
  169. GTT_TYPE_PPGTT_PDP_PT,
  170. GTT_TYPE_INVALID),
  171. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
  172. GTT_TYPE_PPGTT_PDP_ENTRY,
  173. GTT_TYPE_PPGTT_PDP_PT,
  174. GTT_TYPE_PPGTT_PDE_PT,
  175. GTT_TYPE_PPGTT_PTE_1G_ENTRY),
  176. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
  177. GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
  178. GTT_TYPE_INVALID,
  179. GTT_TYPE_PPGTT_PDE_PT,
  180. GTT_TYPE_PPGTT_PTE_1G_ENTRY),
  181. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
  182. GTT_TYPE_PPGTT_PDP_ENTRY,
  183. GTT_TYPE_PPGTT_PDP_PT,
  184. GTT_TYPE_PPGTT_PDE_PT,
  185. GTT_TYPE_PPGTT_PTE_1G_ENTRY),
  186. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
  187. GTT_TYPE_PPGTT_PDE_ENTRY,
  188. GTT_TYPE_PPGTT_PDE_PT,
  189. GTT_TYPE_PPGTT_PTE_PT,
  190. GTT_TYPE_PPGTT_PTE_2M_ENTRY),
  191. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
  192. GTT_TYPE_PPGTT_PDE_ENTRY,
  193. GTT_TYPE_PPGTT_PDE_PT,
  194. GTT_TYPE_PPGTT_PTE_PT,
  195. GTT_TYPE_PPGTT_PTE_2M_ENTRY),
  196. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
  197. GTT_TYPE_PPGTT_PTE_4K_ENTRY,
  198. GTT_TYPE_PPGTT_PTE_PT,
  199. GTT_TYPE_INVALID,
  200. GTT_TYPE_INVALID),
  201. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
  202. GTT_TYPE_PPGTT_PTE_4K_ENTRY,
  203. GTT_TYPE_PPGTT_PTE_PT,
  204. GTT_TYPE_INVALID,
  205. GTT_TYPE_INVALID),
  206. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
  207. GTT_TYPE_PPGTT_PDE_ENTRY,
  208. GTT_TYPE_PPGTT_PDE_PT,
  209. GTT_TYPE_INVALID,
  210. GTT_TYPE_PPGTT_PTE_2M_ENTRY),
  211. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
  212. GTT_TYPE_PPGTT_PDP_ENTRY,
  213. GTT_TYPE_PPGTT_PDP_PT,
  214. GTT_TYPE_INVALID,
  215. GTT_TYPE_PPGTT_PTE_1G_ENTRY),
  216. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
  217. GTT_TYPE_GGTT_PTE,
  218. GTT_TYPE_INVALID,
  219. GTT_TYPE_INVALID,
  220. GTT_TYPE_INVALID),
  221. };
  222. static inline int get_next_pt_type(int type)
  223. {
  224. return gtt_type_table[type].next_pt_type;
  225. }
  226. static inline int get_pt_type(int type)
  227. {
  228. return gtt_type_table[type].pt_type;
  229. }
  230. static inline int get_entry_type(int type)
  231. {
  232. return gtt_type_table[type].entry_type;
  233. }
  234. static inline int get_pse_type(int type)
  235. {
  236. return gtt_type_table[type].pse_entry_type;
  237. }
  238. static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index)
  239. {
  240. void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
  241. return readq(addr);
  242. }
  243. static void ggtt_invalidate(struct drm_i915_private *dev_priv)
  244. {
  245. mmio_hw_access_pre(dev_priv);
  246. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  247. mmio_hw_access_post(dev_priv);
  248. }
  249. static void write_pte64(struct drm_i915_private *dev_priv,
  250. unsigned long index, u64 pte)
  251. {
  252. void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
  253. writeq(pte, addr);
  254. }
  255. static inline int gtt_get_entry64(void *pt,
  256. struct intel_gvt_gtt_entry *e,
  257. unsigned long index, bool hypervisor_access, unsigned long gpa,
  258. struct intel_vgpu *vgpu)
  259. {
  260. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  261. int ret;
  262. if (WARN_ON(info->gtt_entry_size != 8))
  263. return -EINVAL;
  264. if (hypervisor_access) {
  265. ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa +
  266. (index << info->gtt_entry_size_shift),
  267. &e->val64, 8);
  268. if (WARN_ON(ret))
  269. return ret;
  270. } else if (!pt) {
  271. e->val64 = read_pte64(vgpu->gvt->dev_priv, index);
  272. } else {
  273. e->val64 = *((u64 *)pt + index);
  274. }
  275. return 0;
  276. }
  277. static inline int gtt_set_entry64(void *pt,
  278. struct intel_gvt_gtt_entry *e,
  279. unsigned long index, bool hypervisor_access, unsigned long gpa,
  280. struct intel_vgpu *vgpu)
  281. {
  282. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  283. int ret;
  284. if (WARN_ON(info->gtt_entry_size != 8))
  285. return -EINVAL;
  286. if (hypervisor_access) {
  287. ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa +
  288. (index << info->gtt_entry_size_shift),
  289. &e->val64, 8);
  290. if (WARN_ON(ret))
  291. return ret;
  292. } else if (!pt) {
  293. write_pte64(vgpu->gvt->dev_priv, index, e->val64);
  294. } else {
  295. *((u64 *)pt + index) = e->val64;
  296. }
  297. return 0;
  298. }
  299. #define GTT_HAW 46
  300. #define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30)
  301. #define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21)
  302. #define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12)
  303. static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
  304. {
  305. unsigned long pfn;
  306. if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
  307. pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
  308. else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
  309. pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
  310. else
  311. pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
  312. return pfn;
  313. }
  314. static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
  315. {
  316. if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
  317. e->val64 &= ~ADDR_1G_MASK;
  318. pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
  319. } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
  320. e->val64 &= ~ADDR_2M_MASK;
  321. pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
  322. } else {
  323. e->val64 &= ~ADDR_4K_MASK;
  324. pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
  325. }
  326. e->val64 |= (pfn << PAGE_SHIFT);
  327. }
  328. static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
  329. {
  330. /* Entry doesn't have PSE bit. */
  331. if (get_pse_type(e->type) == GTT_TYPE_INVALID)
  332. return false;
  333. e->type = get_entry_type(e->type);
  334. if (!(e->val64 & _PAGE_PSE))
  335. return false;
  336. e->type = get_pse_type(e->type);
  337. return true;
  338. }
  339. static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
  340. {
  341. /*
  342. * i915 writes PDP root pointer registers without present bit,
  343. * it also works, so we need to treat root pointer entry
  344. * specifically.
  345. */
  346. if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
  347. || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
  348. return (e->val64 != 0);
  349. else
  350. return (e->val64 & _PAGE_PRESENT);
  351. }
  352. static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
  353. {
  354. e->val64 &= ~_PAGE_PRESENT;
  355. }
  356. static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
  357. {
  358. e->val64 |= _PAGE_PRESENT;
  359. }
  360. /*
  361. * Per-platform GMA routines.
  362. */
  363. static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
  364. {
  365. unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
  366. trace_gma_index(__func__, gma, x);
  367. return x;
  368. }
  369. #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
  370. static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
  371. { \
  372. unsigned long x = (exp); \
  373. trace_gma_index(__func__, gma, x); \
  374. return x; \
  375. }
  376. DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
  377. DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
  378. DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
  379. DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
  380. DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
  381. static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
  382. .get_entry = gtt_get_entry64,
  383. .set_entry = gtt_set_entry64,
  384. .clear_present = gtt_entry_clear_present,
  385. .set_present = gtt_entry_set_present,
  386. .test_present = gen8_gtt_test_present,
  387. .test_pse = gen8_gtt_test_pse,
  388. .get_pfn = gen8_gtt_get_pfn,
  389. .set_pfn = gen8_gtt_set_pfn,
  390. };
  391. static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
  392. .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
  393. .gma_to_pte_index = gen8_gma_to_pte_index,
  394. .gma_to_pde_index = gen8_gma_to_pde_index,
  395. .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
  396. .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
  397. .gma_to_pml4_index = gen8_gma_to_pml4_index,
  398. };
  399. /*
  400. * MM helpers.
  401. */
  402. static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
  403. struct intel_gvt_gtt_entry *entry, unsigned long index,
  404. bool guest)
  405. {
  406. struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
  407. GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
  408. entry->type = mm->ppgtt_mm.root_entry_type;
  409. pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
  410. mm->ppgtt_mm.shadow_pdps,
  411. entry, index, false, 0, mm->vgpu);
  412. pte_ops->test_pse(entry);
  413. }
  414. static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
  415. struct intel_gvt_gtt_entry *entry, unsigned long index)
  416. {
  417. _ppgtt_get_root_entry(mm, entry, index, true);
  418. }
  419. static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
  420. struct intel_gvt_gtt_entry *entry, unsigned long index)
  421. {
  422. _ppgtt_get_root_entry(mm, entry, index, false);
  423. }
  424. static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
  425. struct intel_gvt_gtt_entry *entry, unsigned long index,
  426. bool guest)
  427. {
  428. struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
  429. pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
  430. mm->ppgtt_mm.shadow_pdps,
  431. entry, index, false, 0, mm->vgpu);
  432. }
  433. static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm,
  434. struct intel_gvt_gtt_entry *entry, unsigned long index)
  435. {
  436. _ppgtt_set_root_entry(mm, entry, index, true);
  437. }
  438. static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
  439. struct intel_gvt_gtt_entry *entry, unsigned long index)
  440. {
  441. _ppgtt_set_root_entry(mm, entry, index, false);
  442. }
  443. static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
  444. struct intel_gvt_gtt_entry *entry, unsigned long index)
  445. {
  446. struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
  447. GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
  448. entry->type = GTT_TYPE_GGTT_PTE;
  449. pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
  450. false, 0, mm->vgpu);
  451. }
  452. static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
  453. struct intel_gvt_gtt_entry *entry, unsigned long index)
  454. {
  455. struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
  456. GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
  457. pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
  458. false, 0, mm->vgpu);
  459. }
  460. static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
  461. struct intel_gvt_gtt_entry *entry, unsigned long index)
  462. {
  463. struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
  464. GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
  465. pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
  466. }
  467. static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
  468. struct intel_gvt_gtt_entry *entry, unsigned long index)
  469. {
  470. struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
  471. GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
  472. pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
  473. }
  474. /*
  475. * PPGTT shadow page table helpers.
  476. */
  477. static inline int ppgtt_spt_get_entry(
  478. struct intel_vgpu_ppgtt_spt *spt,
  479. void *page_table, int type,
  480. struct intel_gvt_gtt_entry *e, unsigned long index,
  481. bool guest)
  482. {
  483. struct intel_gvt *gvt = spt->vgpu->gvt;
  484. struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
  485. int ret;
  486. e->type = get_entry_type(type);
  487. if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
  488. return -EINVAL;
  489. ret = ops->get_entry(page_table, e, index, guest,
  490. spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
  491. spt->vgpu);
  492. if (ret)
  493. return ret;
  494. ops->test_pse(e);
  495. gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
  496. type, e->type, index, e->val64);
  497. return 0;
  498. }
  499. static inline int ppgtt_spt_set_entry(
  500. struct intel_vgpu_ppgtt_spt *spt,
  501. void *page_table, int type,
  502. struct intel_gvt_gtt_entry *e, unsigned long index,
  503. bool guest)
  504. {
  505. struct intel_gvt *gvt = spt->vgpu->gvt;
  506. struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
  507. if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
  508. return -EINVAL;
  509. gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
  510. type, e->type, index, e->val64);
  511. return ops->set_entry(page_table, e, index, guest,
  512. spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
  513. spt->vgpu);
  514. }
  515. #define ppgtt_get_guest_entry(spt, e, index) \
  516. ppgtt_spt_get_entry(spt, NULL, \
  517. spt->guest_page.type, e, index, true)
  518. #define ppgtt_set_guest_entry(spt, e, index) \
  519. ppgtt_spt_set_entry(spt, NULL, \
  520. spt->guest_page.type, e, index, true)
  521. #define ppgtt_get_shadow_entry(spt, e, index) \
  522. ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
  523. spt->shadow_page.type, e, index, false)
  524. #define ppgtt_set_shadow_entry(spt, e, index) \
  525. ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
  526. spt->shadow_page.type, e, index, false)
  527. static void *alloc_spt(gfp_t gfp_mask)
  528. {
  529. struct intel_vgpu_ppgtt_spt *spt;
  530. spt = kzalloc(sizeof(*spt), gfp_mask);
  531. if (!spt)
  532. return NULL;
  533. spt->shadow_page.page = alloc_page(gfp_mask);
  534. if (!spt->shadow_page.page) {
  535. kfree(spt);
  536. return NULL;
  537. }
  538. return spt;
  539. }
  540. static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
  541. {
  542. __free_page(spt->shadow_page.page);
  543. kfree(spt);
  544. }
  545. static int detach_oos_page(struct intel_vgpu *vgpu,
  546. struct intel_vgpu_oos_page *oos_page);
  547. static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
  548. {
  549. struct device *kdev = &spt->vgpu->gvt->dev_priv->drm.pdev->dev;
  550. trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
  551. dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
  552. PCI_DMA_BIDIRECTIONAL);
  553. radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
  554. if (spt->guest_page.oos_page)
  555. detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
  556. intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
  557. list_del_init(&spt->post_shadow_list);
  558. free_spt(spt);
  559. }
  560. static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
  561. {
  562. struct intel_vgpu_ppgtt_spt *spt;
  563. struct radix_tree_iter iter;
  564. void **slot;
  565. radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
  566. spt = radix_tree_deref_slot(slot);
  567. ppgtt_free_spt(spt);
  568. }
  569. }
  570. static int ppgtt_handle_guest_write_page_table_bytes(
  571. struct intel_vgpu_ppgtt_spt *spt,
  572. u64 pa, void *p_data, int bytes);
  573. static int ppgtt_write_protection_handler(
  574. struct intel_vgpu_page_track *page_track,
  575. u64 gpa, void *data, int bytes)
  576. {
  577. struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
  578. int ret;
  579. if (bytes != 4 && bytes != 8)
  580. return -EINVAL;
  581. ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
  582. if (ret)
  583. return ret;
  584. return ret;
  585. }
  586. /* Find a spt by guest gfn. */
  587. static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
  588. struct intel_vgpu *vgpu, unsigned long gfn)
  589. {
  590. struct intel_vgpu_page_track *track;
  591. track = intel_vgpu_find_page_track(vgpu, gfn);
  592. if (track && track->handler == ppgtt_write_protection_handler)
  593. return track->priv_data;
  594. return NULL;
  595. }
  596. /* Find the spt by shadow page mfn. */
  597. static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
  598. struct intel_vgpu *vgpu, unsigned long mfn)
  599. {
  600. return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
  601. }
  602. static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
  603. static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
  604. struct intel_vgpu *vgpu, int type, unsigned long gfn)
  605. {
  606. struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
  607. struct intel_vgpu_ppgtt_spt *spt = NULL;
  608. dma_addr_t daddr;
  609. int ret;
  610. retry:
  611. spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
  612. if (!spt) {
  613. if (reclaim_one_ppgtt_mm(vgpu->gvt))
  614. goto retry;
  615. gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
  616. return ERR_PTR(-ENOMEM);
  617. }
  618. spt->vgpu = vgpu;
  619. atomic_set(&spt->refcount, 1);
  620. INIT_LIST_HEAD(&spt->post_shadow_list);
  621. /*
  622. * Init shadow_page.
  623. */
  624. spt->shadow_page.type = type;
  625. daddr = dma_map_page(kdev, spt->shadow_page.page,
  626. 0, 4096, PCI_DMA_BIDIRECTIONAL);
  627. if (dma_mapping_error(kdev, daddr)) {
  628. gvt_vgpu_err("fail to map dma addr\n");
  629. ret = -EINVAL;
  630. goto err_free_spt;
  631. }
  632. spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
  633. spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
  634. /*
  635. * Init guest_page.
  636. */
  637. spt->guest_page.type = type;
  638. spt->guest_page.gfn = gfn;
  639. ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn,
  640. ppgtt_write_protection_handler, spt);
  641. if (ret)
  642. goto err_unmap_dma;
  643. ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
  644. if (ret)
  645. goto err_unreg_page_track;
  646. trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
  647. return spt;
  648. err_unreg_page_track:
  649. intel_vgpu_unregister_page_track(vgpu, spt->guest_page.gfn);
  650. err_unmap_dma:
  651. dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  652. err_free_spt:
  653. free_spt(spt);
  654. return ERR_PTR(ret);
  655. }
  656. #define pt_entry_size_shift(spt) \
  657. ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
  658. #define pt_entries(spt) \
  659. (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
  660. #define for_each_present_guest_entry(spt, e, i) \
  661. for (i = 0; i < pt_entries(spt); i++) \
  662. if (!ppgtt_get_guest_entry(spt, e, i) && \
  663. spt->vgpu->gvt->gtt.pte_ops->test_present(e))
  664. #define for_each_present_shadow_entry(spt, e, i) \
  665. for (i = 0; i < pt_entries(spt); i++) \
  666. if (!ppgtt_get_shadow_entry(spt, e, i) && \
  667. spt->vgpu->gvt->gtt.pte_ops->test_present(e))
  668. static void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
  669. {
  670. int v = atomic_read(&spt->refcount);
  671. trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
  672. atomic_inc(&spt->refcount);
  673. }
  674. static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
  675. static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
  676. struct intel_gvt_gtt_entry *e)
  677. {
  678. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  679. struct intel_vgpu_ppgtt_spt *s;
  680. intel_gvt_gtt_type_t cur_pt_type;
  681. GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
  682. if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
  683. && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
  684. cur_pt_type = get_next_pt_type(e->type) + 1;
  685. if (ops->get_pfn(e) ==
  686. vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
  687. return 0;
  688. }
  689. s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
  690. if (!s) {
  691. gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
  692. ops->get_pfn(e));
  693. return -ENXIO;
  694. }
  695. return ppgtt_invalidate_spt(s);
  696. }
  697. static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
  698. struct intel_gvt_gtt_entry *entry)
  699. {
  700. struct intel_vgpu *vgpu = spt->vgpu;
  701. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  702. unsigned long pfn;
  703. int type;
  704. pfn = ops->get_pfn(entry);
  705. type = spt->shadow_page.type;
  706. if (pfn == vgpu->gtt.scratch_pt[type].page_mfn)
  707. return;
  708. intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
  709. }
  710. static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
  711. {
  712. struct intel_vgpu *vgpu = spt->vgpu;
  713. struct intel_gvt_gtt_entry e;
  714. unsigned long index;
  715. int ret;
  716. int v = atomic_read(&spt->refcount);
  717. trace_spt_change(spt->vgpu->id, "die", spt,
  718. spt->guest_page.gfn, spt->shadow_page.type);
  719. trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
  720. if (atomic_dec_return(&spt->refcount) > 0)
  721. return 0;
  722. for_each_present_shadow_entry(spt, &e, index) {
  723. switch (e.type) {
  724. case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
  725. gvt_vdbg_mm("invalidate 4K entry\n");
  726. ppgtt_invalidate_pte(spt, &e);
  727. break;
  728. case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
  729. case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
  730. WARN(1, "GVT doesn't support 2M/1GB page\n");
  731. continue;
  732. case GTT_TYPE_PPGTT_PML4_ENTRY:
  733. case GTT_TYPE_PPGTT_PDP_ENTRY:
  734. case GTT_TYPE_PPGTT_PDE_ENTRY:
  735. gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
  736. ret = ppgtt_invalidate_spt_by_shadow_entry(
  737. spt->vgpu, &e);
  738. if (ret)
  739. goto fail;
  740. break;
  741. default:
  742. GEM_BUG_ON(1);
  743. }
  744. }
  745. trace_spt_change(spt->vgpu->id, "release", spt,
  746. spt->guest_page.gfn, spt->shadow_page.type);
  747. ppgtt_free_spt(spt);
  748. return 0;
  749. fail:
  750. gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
  751. spt, e.val64, e.type);
  752. return ret;
  753. }
  754. static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
  755. static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
  756. struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
  757. {
  758. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  759. struct intel_vgpu_ppgtt_spt *spt = NULL;
  760. int ret;
  761. GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
  762. spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
  763. if (spt)
  764. ppgtt_get_spt(spt);
  765. else {
  766. int type = get_next_pt_type(we->type);
  767. spt = ppgtt_alloc_spt(vgpu, type, ops->get_pfn(we));
  768. if (IS_ERR(spt)) {
  769. ret = PTR_ERR(spt);
  770. goto fail;
  771. }
  772. ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
  773. if (ret)
  774. goto fail;
  775. ret = ppgtt_populate_spt(spt);
  776. if (ret)
  777. goto fail;
  778. trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
  779. spt->shadow_page.type);
  780. }
  781. return spt;
  782. fail:
  783. gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
  784. spt, we->val64, we->type);
  785. return ERR_PTR(ret);
  786. }
  787. static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
  788. struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
  789. {
  790. struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
  791. se->type = ge->type;
  792. se->val64 = ge->val64;
  793. ops->set_pfn(se, s->shadow_page.mfn);
  794. }
  795. static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
  796. struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
  797. struct intel_gvt_gtt_entry *ge)
  798. {
  799. struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
  800. struct intel_gvt_gtt_entry se = *ge;
  801. unsigned long gfn;
  802. dma_addr_t dma_addr;
  803. int ret;
  804. if (!pte_ops->test_present(ge))
  805. return 0;
  806. gfn = pte_ops->get_pfn(ge);
  807. switch (ge->type) {
  808. case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
  809. gvt_vdbg_mm("shadow 4K gtt entry\n");
  810. break;
  811. case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
  812. case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
  813. gvt_vgpu_err("GVT doesn't support 2M/1GB entry\n");
  814. return -EINVAL;
  815. default:
  816. GEM_BUG_ON(1);
  817. };
  818. /* direct shadow */
  819. ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr);
  820. if (ret)
  821. return -ENXIO;
  822. pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
  823. ppgtt_set_shadow_entry(spt, &se, index);
  824. return 0;
  825. }
  826. static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
  827. {
  828. struct intel_vgpu *vgpu = spt->vgpu;
  829. struct intel_gvt *gvt = vgpu->gvt;
  830. struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
  831. struct intel_vgpu_ppgtt_spt *s;
  832. struct intel_gvt_gtt_entry se, ge;
  833. unsigned long gfn, i;
  834. int ret;
  835. trace_spt_change(spt->vgpu->id, "born", spt,
  836. spt->guest_page.gfn, spt->shadow_page.type);
  837. for_each_present_guest_entry(spt, &ge, i) {
  838. if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
  839. s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
  840. if (IS_ERR(s)) {
  841. ret = PTR_ERR(s);
  842. goto fail;
  843. }
  844. ppgtt_get_shadow_entry(spt, &se, i);
  845. ppgtt_generate_shadow_entry(&se, s, &ge);
  846. ppgtt_set_shadow_entry(spt, &se, i);
  847. } else {
  848. gfn = ops->get_pfn(&ge);
  849. if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
  850. ops->set_pfn(&se, gvt->gtt.scratch_mfn);
  851. ppgtt_set_shadow_entry(spt, &se, i);
  852. continue;
  853. }
  854. ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
  855. if (ret)
  856. goto fail;
  857. }
  858. }
  859. return 0;
  860. fail:
  861. gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
  862. spt, ge.val64, ge.type);
  863. return ret;
  864. }
  865. static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
  866. struct intel_gvt_gtt_entry *se, unsigned long index)
  867. {
  868. struct intel_vgpu *vgpu = spt->vgpu;
  869. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  870. int ret;
  871. trace_spt_guest_change(spt->vgpu->id, "remove", spt,
  872. spt->shadow_page.type, se->val64, index);
  873. gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
  874. se->type, index, se->val64);
  875. if (!ops->test_present(se))
  876. return 0;
  877. if (ops->get_pfn(se) ==
  878. vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
  879. return 0;
  880. if (gtt_type_is_pt(get_next_pt_type(se->type))) {
  881. struct intel_vgpu_ppgtt_spt *s =
  882. intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
  883. if (!s) {
  884. gvt_vgpu_err("fail to find guest page\n");
  885. ret = -ENXIO;
  886. goto fail;
  887. }
  888. ret = ppgtt_invalidate_spt(s);
  889. if (ret)
  890. goto fail;
  891. } else
  892. ppgtt_invalidate_pte(spt, se);
  893. return 0;
  894. fail:
  895. gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
  896. spt, se->val64, se->type);
  897. return ret;
  898. }
  899. static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
  900. struct intel_gvt_gtt_entry *we, unsigned long index)
  901. {
  902. struct intel_vgpu *vgpu = spt->vgpu;
  903. struct intel_gvt_gtt_entry m;
  904. struct intel_vgpu_ppgtt_spt *s;
  905. int ret;
  906. trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
  907. we->val64, index);
  908. gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
  909. we->type, index, we->val64);
  910. if (gtt_type_is_pt(get_next_pt_type(we->type))) {
  911. s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
  912. if (IS_ERR(s)) {
  913. ret = PTR_ERR(s);
  914. goto fail;
  915. }
  916. ppgtt_get_shadow_entry(spt, &m, index);
  917. ppgtt_generate_shadow_entry(&m, s, we);
  918. ppgtt_set_shadow_entry(spt, &m, index);
  919. } else {
  920. ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
  921. if (ret)
  922. goto fail;
  923. }
  924. return 0;
  925. fail:
  926. gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
  927. spt, we->val64, we->type);
  928. return ret;
  929. }
  930. static int sync_oos_page(struct intel_vgpu *vgpu,
  931. struct intel_vgpu_oos_page *oos_page)
  932. {
  933. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  934. struct intel_gvt *gvt = vgpu->gvt;
  935. struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
  936. struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
  937. struct intel_gvt_gtt_entry old, new;
  938. int index;
  939. int ret;
  940. trace_oos_change(vgpu->id, "sync", oos_page->id,
  941. spt, spt->guest_page.type);
  942. old.type = new.type = get_entry_type(spt->guest_page.type);
  943. old.val64 = new.val64 = 0;
  944. for (index = 0; index < (I915_GTT_PAGE_SIZE >>
  945. info->gtt_entry_size_shift); index++) {
  946. ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
  947. ops->get_entry(NULL, &new, index, true,
  948. spt->guest_page.gfn << PAGE_SHIFT, vgpu);
  949. if (old.val64 == new.val64
  950. && !test_and_clear_bit(index, spt->post_shadow_bitmap))
  951. continue;
  952. trace_oos_sync(vgpu->id, oos_page->id,
  953. spt, spt->guest_page.type,
  954. new.val64, index);
  955. ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
  956. if (ret)
  957. return ret;
  958. ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
  959. }
  960. spt->guest_page.write_cnt = 0;
  961. list_del_init(&spt->post_shadow_list);
  962. return 0;
  963. }
  964. static int detach_oos_page(struct intel_vgpu *vgpu,
  965. struct intel_vgpu_oos_page *oos_page)
  966. {
  967. struct intel_gvt *gvt = vgpu->gvt;
  968. struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
  969. trace_oos_change(vgpu->id, "detach", oos_page->id,
  970. spt, spt->guest_page.type);
  971. spt->guest_page.write_cnt = 0;
  972. spt->guest_page.oos_page = NULL;
  973. oos_page->spt = NULL;
  974. list_del_init(&oos_page->vm_list);
  975. list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
  976. return 0;
  977. }
  978. static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
  979. struct intel_vgpu_ppgtt_spt *spt)
  980. {
  981. struct intel_gvt *gvt = spt->vgpu->gvt;
  982. int ret;
  983. ret = intel_gvt_hypervisor_read_gpa(spt->vgpu,
  984. spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
  985. oos_page->mem, I915_GTT_PAGE_SIZE);
  986. if (ret)
  987. return ret;
  988. oos_page->spt = spt;
  989. spt->guest_page.oos_page = oos_page;
  990. list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
  991. trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
  992. spt, spt->guest_page.type);
  993. return 0;
  994. }
  995. static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
  996. {
  997. struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
  998. int ret;
  999. ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
  1000. if (ret)
  1001. return ret;
  1002. trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
  1003. spt, spt->guest_page.type);
  1004. list_del_init(&oos_page->vm_list);
  1005. return sync_oos_page(spt->vgpu, oos_page);
  1006. }
  1007. static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
  1008. {
  1009. struct intel_gvt *gvt = spt->vgpu->gvt;
  1010. struct intel_gvt_gtt *gtt = &gvt->gtt;
  1011. struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
  1012. int ret;
  1013. WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
  1014. if (list_empty(&gtt->oos_page_free_list_head)) {
  1015. oos_page = container_of(gtt->oos_page_use_list_head.next,
  1016. struct intel_vgpu_oos_page, list);
  1017. ret = ppgtt_set_guest_page_sync(oos_page->spt);
  1018. if (ret)
  1019. return ret;
  1020. ret = detach_oos_page(spt->vgpu, oos_page);
  1021. if (ret)
  1022. return ret;
  1023. } else
  1024. oos_page = container_of(gtt->oos_page_free_list_head.next,
  1025. struct intel_vgpu_oos_page, list);
  1026. return attach_oos_page(oos_page, spt);
  1027. }
  1028. static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
  1029. {
  1030. struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
  1031. if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
  1032. return -EINVAL;
  1033. trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
  1034. spt, spt->guest_page.type);
  1035. list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
  1036. return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
  1037. }
  1038. /**
  1039. * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
  1040. * @vgpu: a vGPU
  1041. *
  1042. * This function is called before submitting a guest workload to host,
  1043. * to sync all the out-of-synced shadow for vGPU
  1044. *
  1045. * Returns:
  1046. * Zero on success, negative error code if failed.
  1047. */
  1048. int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
  1049. {
  1050. struct list_head *pos, *n;
  1051. struct intel_vgpu_oos_page *oos_page;
  1052. int ret;
  1053. if (!enable_out_of_sync)
  1054. return 0;
  1055. list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
  1056. oos_page = container_of(pos,
  1057. struct intel_vgpu_oos_page, vm_list);
  1058. ret = ppgtt_set_guest_page_sync(oos_page->spt);
  1059. if (ret)
  1060. return ret;
  1061. }
  1062. return 0;
  1063. }
  1064. /*
  1065. * The heart of PPGTT shadow page table.
  1066. */
  1067. static int ppgtt_handle_guest_write_page_table(
  1068. struct intel_vgpu_ppgtt_spt *spt,
  1069. struct intel_gvt_gtt_entry *we, unsigned long index)
  1070. {
  1071. struct intel_vgpu *vgpu = spt->vgpu;
  1072. int type = spt->shadow_page.type;
  1073. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  1074. struct intel_gvt_gtt_entry old_se;
  1075. int new_present;
  1076. int ret;
  1077. new_present = ops->test_present(we);
  1078. /*
  1079. * Adding the new entry first and then removing the old one, that can
  1080. * guarantee the ppgtt table is validated during the window between
  1081. * adding and removal.
  1082. */
  1083. ppgtt_get_shadow_entry(spt, &old_se, index);
  1084. if (new_present) {
  1085. ret = ppgtt_handle_guest_entry_add(spt, we, index);
  1086. if (ret)
  1087. goto fail;
  1088. }
  1089. ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
  1090. if (ret)
  1091. goto fail;
  1092. if (!new_present) {
  1093. ops->set_pfn(&old_se, vgpu->gtt.scratch_pt[type].page_mfn);
  1094. ppgtt_set_shadow_entry(spt, &old_se, index);
  1095. }
  1096. return 0;
  1097. fail:
  1098. gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
  1099. spt, we->val64, we->type);
  1100. return ret;
  1101. }
  1102. static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
  1103. {
  1104. return enable_out_of_sync
  1105. && gtt_type_is_pte_pt(spt->guest_page.type)
  1106. && spt->guest_page.write_cnt >= 2;
  1107. }
  1108. static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
  1109. unsigned long index)
  1110. {
  1111. set_bit(index, spt->post_shadow_bitmap);
  1112. if (!list_empty(&spt->post_shadow_list))
  1113. return;
  1114. list_add_tail(&spt->post_shadow_list,
  1115. &spt->vgpu->gtt.post_shadow_list_head);
  1116. }
  1117. /**
  1118. * intel_vgpu_flush_post_shadow - flush the post shadow transactions
  1119. * @vgpu: a vGPU
  1120. *
  1121. * This function is called before submitting a guest workload to host,
  1122. * to flush all the post shadows for a vGPU.
  1123. *
  1124. * Returns:
  1125. * Zero on success, negative error code if failed.
  1126. */
  1127. int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
  1128. {
  1129. struct list_head *pos, *n;
  1130. struct intel_vgpu_ppgtt_spt *spt;
  1131. struct intel_gvt_gtt_entry ge;
  1132. unsigned long index;
  1133. int ret;
  1134. list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
  1135. spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
  1136. post_shadow_list);
  1137. for_each_set_bit(index, spt->post_shadow_bitmap,
  1138. GTT_ENTRY_NUM_IN_ONE_PAGE) {
  1139. ppgtt_get_guest_entry(spt, &ge, index);
  1140. ret = ppgtt_handle_guest_write_page_table(spt,
  1141. &ge, index);
  1142. if (ret)
  1143. return ret;
  1144. clear_bit(index, spt->post_shadow_bitmap);
  1145. }
  1146. list_del_init(&spt->post_shadow_list);
  1147. }
  1148. return 0;
  1149. }
  1150. static int ppgtt_handle_guest_write_page_table_bytes(
  1151. struct intel_vgpu_ppgtt_spt *spt,
  1152. u64 pa, void *p_data, int bytes)
  1153. {
  1154. struct intel_vgpu *vgpu = spt->vgpu;
  1155. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  1156. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  1157. struct intel_gvt_gtt_entry we, se;
  1158. unsigned long index;
  1159. int ret;
  1160. index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
  1161. ppgtt_get_guest_entry(spt, &we, index);
  1162. ops->test_pse(&we);
  1163. if (bytes == info->gtt_entry_size) {
  1164. ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
  1165. if (ret)
  1166. return ret;
  1167. } else {
  1168. if (!test_bit(index, spt->post_shadow_bitmap)) {
  1169. int type = spt->shadow_page.type;
  1170. ppgtt_get_shadow_entry(spt, &se, index);
  1171. ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
  1172. if (ret)
  1173. return ret;
  1174. ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
  1175. ppgtt_set_shadow_entry(spt, &se, index);
  1176. }
  1177. ppgtt_set_post_shadow(spt, index);
  1178. }
  1179. if (!enable_out_of_sync)
  1180. return 0;
  1181. spt->guest_page.write_cnt++;
  1182. if (spt->guest_page.oos_page)
  1183. ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
  1184. false, 0, vgpu);
  1185. if (can_do_out_of_sync(spt)) {
  1186. if (!spt->guest_page.oos_page)
  1187. ppgtt_allocate_oos_page(spt);
  1188. ret = ppgtt_set_guest_page_oos(spt);
  1189. if (ret < 0)
  1190. return ret;
  1191. }
  1192. return 0;
  1193. }
  1194. static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
  1195. {
  1196. struct intel_vgpu *vgpu = mm->vgpu;
  1197. struct intel_gvt *gvt = vgpu->gvt;
  1198. struct intel_gvt_gtt *gtt = &gvt->gtt;
  1199. struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
  1200. struct intel_gvt_gtt_entry se;
  1201. int index;
  1202. if (!mm->ppgtt_mm.shadowed)
  1203. return;
  1204. for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
  1205. ppgtt_get_shadow_root_entry(mm, &se, index);
  1206. if (!ops->test_present(&se))
  1207. continue;
  1208. ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
  1209. se.val64 = 0;
  1210. ppgtt_set_shadow_root_entry(mm, &se, index);
  1211. trace_spt_guest_change(vgpu->id, "destroy root pointer",
  1212. NULL, se.type, se.val64, index);
  1213. }
  1214. mm->ppgtt_mm.shadowed = false;
  1215. }
  1216. static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
  1217. {
  1218. struct intel_vgpu *vgpu = mm->vgpu;
  1219. struct intel_gvt *gvt = vgpu->gvt;
  1220. struct intel_gvt_gtt *gtt = &gvt->gtt;
  1221. struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
  1222. struct intel_vgpu_ppgtt_spt *spt;
  1223. struct intel_gvt_gtt_entry ge, se;
  1224. int index, ret;
  1225. if (mm->ppgtt_mm.shadowed)
  1226. return 0;
  1227. mm->ppgtt_mm.shadowed = true;
  1228. for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
  1229. ppgtt_get_guest_root_entry(mm, &ge, index);
  1230. if (!ops->test_present(&ge))
  1231. continue;
  1232. trace_spt_guest_change(vgpu->id, __func__, NULL,
  1233. ge.type, ge.val64, index);
  1234. spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
  1235. if (IS_ERR(spt)) {
  1236. gvt_vgpu_err("fail to populate guest root pointer\n");
  1237. ret = PTR_ERR(spt);
  1238. goto fail;
  1239. }
  1240. ppgtt_generate_shadow_entry(&se, spt, &ge);
  1241. ppgtt_set_shadow_root_entry(mm, &se, index);
  1242. trace_spt_guest_change(vgpu->id, "populate root pointer",
  1243. NULL, se.type, se.val64, index);
  1244. }
  1245. return 0;
  1246. fail:
  1247. invalidate_ppgtt_mm(mm);
  1248. return ret;
  1249. }
  1250. static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
  1251. {
  1252. struct intel_vgpu_mm *mm;
  1253. mm = kzalloc(sizeof(*mm), GFP_KERNEL);
  1254. if (!mm)
  1255. return NULL;
  1256. mm->vgpu = vgpu;
  1257. kref_init(&mm->ref);
  1258. atomic_set(&mm->pincount, 0);
  1259. return mm;
  1260. }
  1261. static void vgpu_free_mm(struct intel_vgpu_mm *mm)
  1262. {
  1263. kfree(mm);
  1264. }
  1265. /**
  1266. * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
  1267. * @vgpu: a vGPU
  1268. * @root_entry_type: ppgtt root entry type
  1269. * @pdps: guest pdps.
  1270. *
  1271. * This function is used to create a ppgtt mm object for a vGPU.
  1272. *
  1273. * Returns:
  1274. * Zero on success, negative error code in pointer if failed.
  1275. */
  1276. struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
  1277. intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
  1278. {
  1279. struct intel_gvt *gvt = vgpu->gvt;
  1280. struct intel_vgpu_mm *mm;
  1281. int ret;
  1282. mm = vgpu_alloc_mm(vgpu);
  1283. if (!mm)
  1284. return ERR_PTR(-ENOMEM);
  1285. mm->type = INTEL_GVT_MM_PPGTT;
  1286. GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
  1287. root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
  1288. mm->ppgtt_mm.root_entry_type = root_entry_type;
  1289. INIT_LIST_HEAD(&mm->ppgtt_mm.list);
  1290. INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
  1291. if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
  1292. mm->ppgtt_mm.guest_pdps[0] = pdps[0];
  1293. else
  1294. memcpy(mm->ppgtt_mm.guest_pdps, pdps,
  1295. sizeof(mm->ppgtt_mm.guest_pdps));
  1296. ret = shadow_ppgtt_mm(mm);
  1297. if (ret) {
  1298. gvt_vgpu_err("failed to shadow ppgtt mm\n");
  1299. vgpu_free_mm(mm);
  1300. return ERR_PTR(ret);
  1301. }
  1302. list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
  1303. list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
  1304. return mm;
  1305. }
  1306. static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
  1307. {
  1308. struct intel_vgpu_mm *mm;
  1309. unsigned long nr_entries;
  1310. mm = vgpu_alloc_mm(vgpu);
  1311. if (!mm)
  1312. return ERR_PTR(-ENOMEM);
  1313. mm->type = INTEL_GVT_MM_GGTT;
  1314. nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
  1315. mm->ggtt_mm.virtual_ggtt = vzalloc(nr_entries *
  1316. vgpu->gvt->device_info.gtt_entry_size);
  1317. if (!mm->ggtt_mm.virtual_ggtt) {
  1318. vgpu_free_mm(mm);
  1319. return ERR_PTR(-ENOMEM);
  1320. }
  1321. return mm;
  1322. }
  1323. /**
  1324. * _intel_vgpu_mm_release - destroy a mm object
  1325. * @mm_ref: a kref object
  1326. *
  1327. * This function is used to destroy a mm object for vGPU
  1328. *
  1329. */
  1330. void _intel_vgpu_mm_release(struct kref *mm_ref)
  1331. {
  1332. struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
  1333. if (GEM_WARN_ON(atomic_read(&mm->pincount)))
  1334. gvt_err("vgpu mm pin count bug detected\n");
  1335. if (mm->type == INTEL_GVT_MM_PPGTT) {
  1336. list_del(&mm->ppgtt_mm.list);
  1337. list_del(&mm->ppgtt_mm.lru_list);
  1338. invalidate_ppgtt_mm(mm);
  1339. } else {
  1340. vfree(mm->ggtt_mm.virtual_ggtt);
  1341. }
  1342. vgpu_free_mm(mm);
  1343. }
  1344. /**
  1345. * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
  1346. * @mm: a vGPU mm object
  1347. *
  1348. * This function is called when user doesn't want to use a vGPU mm object
  1349. */
  1350. void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
  1351. {
  1352. atomic_dec(&mm->pincount);
  1353. }
  1354. /**
  1355. * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
  1356. * @vgpu: a vGPU
  1357. *
  1358. * This function is called when user wants to use a vGPU mm object. If this
  1359. * mm object hasn't been shadowed yet, the shadow will be populated at this
  1360. * time.
  1361. *
  1362. * Returns:
  1363. * Zero on success, negative error code if failed.
  1364. */
  1365. int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
  1366. {
  1367. int ret;
  1368. atomic_inc(&mm->pincount);
  1369. if (mm->type == INTEL_GVT_MM_PPGTT) {
  1370. ret = shadow_ppgtt_mm(mm);
  1371. if (ret)
  1372. return ret;
  1373. list_move_tail(&mm->ppgtt_mm.lru_list,
  1374. &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
  1375. }
  1376. return 0;
  1377. }
  1378. static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
  1379. {
  1380. struct intel_vgpu_mm *mm;
  1381. struct list_head *pos, *n;
  1382. list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
  1383. mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
  1384. if (atomic_read(&mm->pincount))
  1385. continue;
  1386. list_del_init(&mm->ppgtt_mm.lru_list);
  1387. invalidate_ppgtt_mm(mm);
  1388. return 1;
  1389. }
  1390. return 0;
  1391. }
  1392. /*
  1393. * GMA translation APIs.
  1394. */
  1395. static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
  1396. struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
  1397. {
  1398. struct intel_vgpu *vgpu = mm->vgpu;
  1399. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  1400. struct intel_vgpu_ppgtt_spt *s;
  1401. s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
  1402. if (!s)
  1403. return -ENXIO;
  1404. if (!guest)
  1405. ppgtt_get_shadow_entry(s, e, index);
  1406. else
  1407. ppgtt_get_guest_entry(s, e, index);
  1408. return 0;
  1409. }
  1410. /**
  1411. * intel_vgpu_gma_to_gpa - translate a gma to GPA
  1412. * @mm: mm object. could be a PPGTT or GGTT mm object
  1413. * @gma: graphics memory address in this mm object
  1414. *
  1415. * This function is used to translate a graphics memory address in specific
  1416. * graphics memory space to guest physical address.
  1417. *
  1418. * Returns:
  1419. * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
  1420. */
  1421. unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
  1422. {
  1423. struct intel_vgpu *vgpu = mm->vgpu;
  1424. struct intel_gvt *gvt = vgpu->gvt;
  1425. struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
  1426. struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
  1427. unsigned long gpa = INTEL_GVT_INVALID_ADDR;
  1428. unsigned long gma_index[4];
  1429. struct intel_gvt_gtt_entry e;
  1430. int i, levels = 0;
  1431. int ret;
  1432. GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
  1433. mm->type != INTEL_GVT_MM_PPGTT);
  1434. if (mm->type == INTEL_GVT_MM_GGTT) {
  1435. if (!vgpu_gmadr_is_valid(vgpu, gma))
  1436. goto err;
  1437. ggtt_get_guest_entry(mm, &e,
  1438. gma_ops->gma_to_ggtt_pte_index(gma));
  1439. gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
  1440. + (gma & ~I915_GTT_PAGE_MASK);
  1441. trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
  1442. } else {
  1443. switch (mm->ppgtt_mm.root_entry_type) {
  1444. case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
  1445. ppgtt_get_shadow_root_entry(mm, &e, 0);
  1446. gma_index[0] = gma_ops->gma_to_pml4_index(gma);
  1447. gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
  1448. gma_index[2] = gma_ops->gma_to_pde_index(gma);
  1449. gma_index[3] = gma_ops->gma_to_pte_index(gma);
  1450. levels = 4;
  1451. break;
  1452. case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
  1453. ppgtt_get_shadow_root_entry(mm, &e,
  1454. gma_ops->gma_to_l3_pdp_index(gma));
  1455. gma_index[0] = gma_ops->gma_to_pde_index(gma);
  1456. gma_index[1] = gma_ops->gma_to_pte_index(gma);
  1457. levels = 2;
  1458. break;
  1459. default:
  1460. GEM_BUG_ON(1);
  1461. }
  1462. /* walk the shadow page table and get gpa from guest entry */
  1463. for (i = 0; i < levels; i++) {
  1464. ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
  1465. (i == levels - 1));
  1466. if (ret)
  1467. goto err;
  1468. if (!pte_ops->test_present(&e)) {
  1469. gvt_dbg_core("GMA 0x%lx is not present\n", gma);
  1470. goto err;
  1471. }
  1472. }
  1473. gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
  1474. (gma & ~I915_GTT_PAGE_MASK);
  1475. trace_gma_translate(vgpu->id, "ppgtt", 0,
  1476. mm->ppgtt_mm.root_entry_type, gma, gpa);
  1477. }
  1478. return gpa;
  1479. err:
  1480. gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
  1481. return INTEL_GVT_INVALID_ADDR;
  1482. }
  1483. static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
  1484. unsigned int off, void *p_data, unsigned int bytes)
  1485. {
  1486. struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
  1487. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  1488. unsigned long index = off >> info->gtt_entry_size_shift;
  1489. struct intel_gvt_gtt_entry e;
  1490. if (bytes != 4 && bytes != 8)
  1491. return -EINVAL;
  1492. ggtt_get_guest_entry(ggtt_mm, &e, index);
  1493. memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
  1494. bytes);
  1495. return 0;
  1496. }
  1497. /**
  1498. * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read
  1499. * @vgpu: a vGPU
  1500. * @off: register offset
  1501. * @p_data: data will be returned to guest
  1502. * @bytes: data length
  1503. *
  1504. * This function is used to emulate the GTT MMIO register read
  1505. *
  1506. * Returns:
  1507. * Zero on success, error code if failed.
  1508. */
  1509. int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
  1510. void *p_data, unsigned int bytes)
  1511. {
  1512. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  1513. int ret;
  1514. if (bytes != 4 && bytes != 8)
  1515. return -EINVAL;
  1516. off -= info->gtt_start_offset;
  1517. ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
  1518. return ret;
  1519. }
  1520. static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
  1521. struct intel_gvt_gtt_entry *entry)
  1522. {
  1523. struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
  1524. unsigned long pfn;
  1525. pfn = pte_ops->get_pfn(entry);
  1526. if (pfn != vgpu->gvt->gtt.scratch_mfn)
  1527. intel_gvt_hypervisor_dma_unmap_guest_page(vgpu,
  1528. pfn << PAGE_SHIFT);
  1529. }
  1530. static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
  1531. void *p_data, unsigned int bytes)
  1532. {
  1533. struct intel_gvt *gvt = vgpu->gvt;
  1534. const struct intel_gvt_device_info *info = &gvt->device_info;
  1535. struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
  1536. struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
  1537. unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
  1538. unsigned long gma, gfn;
  1539. struct intel_gvt_gtt_entry e, m;
  1540. dma_addr_t dma_addr;
  1541. int ret;
  1542. if (bytes != 4 && bytes != 8)
  1543. return -EINVAL;
  1544. gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
  1545. /* the VM may configure the whole GM space when ballooning is used */
  1546. if (!vgpu_gmadr_is_valid(vgpu, gma))
  1547. return 0;
  1548. ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
  1549. memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
  1550. bytes);
  1551. if (ops->test_present(&e)) {
  1552. gfn = ops->get_pfn(&e);
  1553. m = e;
  1554. /* one PTE update may be issued in multiple writes and the
  1555. * first write may not construct a valid gfn
  1556. */
  1557. if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
  1558. ops->set_pfn(&m, gvt->gtt.scratch_mfn);
  1559. goto out;
  1560. }
  1561. ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn,
  1562. &dma_addr);
  1563. if (ret) {
  1564. gvt_vgpu_err("fail to populate guest ggtt entry\n");
  1565. /* guest driver may read/write the entry when partial
  1566. * update the entry in this situation p2m will fail
  1567. * settting the shadow entry to point to a scratch page
  1568. */
  1569. ops->set_pfn(&m, gvt->gtt.scratch_mfn);
  1570. } else
  1571. ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
  1572. } else {
  1573. ggtt_get_host_entry(ggtt_mm, &m, g_gtt_index);
  1574. ggtt_invalidate_pte(vgpu, &m);
  1575. ops->set_pfn(&m, gvt->gtt.scratch_mfn);
  1576. ops->clear_present(&m);
  1577. }
  1578. out:
  1579. ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
  1580. ggtt_invalidate(gvt->dev_priv);
  1581. ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
  1582. return 0;
  1583. }
  1584. /*
  1585. * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
  1586. * @vgpu: a vGPU
  1587. * @off: register offset
  1588. * @p_data: data from guest write
  1589. * @bytes: data length
  1590. *
  1591. * This function is used to emulate the GTT MMIO register write
  1592. *
  1593. * Returns:
  1594. * Zero on success, error code if failed.
  1595. */
  1596. int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
  1597. unsigned int off, void *p_data, unsigned int bytes)
  1598. {
  1599. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  1600. int ret;
  1601. if (bytes != 4 && bytes != 8)
  1602. return -EINVAL;
  1603. off -= info->gtt_start_offset;
  1604. ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
  1605. return ret;
  1606. }
  1607. static int alloc_scratch_pages(struct intel_vgpu *vgpu,
  1608. intel_gvt_gtt_type_t type)
  1609. {
  1610. struct intel_vgpu_gtt *gtt = &vgpu->gtt;
  1611. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  1612. int page_entry_num = I915_GTT_PAGE_SIZE >>
  1613. vgpu->gvt->device_info.gtt_entry_size_shift;
  1614. void *scratch_pt;
  1615. int i;
  1616. struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
  1617. dma_addr_t daddr;
  1618. if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
  1619. return -EINVAL;
  1620. scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
  1621. if (!scratch_pt) {
  1622. gvt_vgpu_err("fail to allocate scratch page\n");
  1623. return -ENOMEM;
  1624. }
  1625. daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
  1626. 4096, PCI_DMA_BIDIRECTIONAL);
  1627. if (dma_mapping_error(dev, daddr)) {
  1628. gvt_vgpu_err("fail to dmamap scratch_pt\n");
  1629. __free_page(virt_to_page(scratch_pt));
  1630. return -ENOMEM;
  1631. }
  1632. gtt->scratch_pt[type].page_mfn =
  1633. (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
  1634. gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
  1635. gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
  1636. vgpu->id, type, gtt->scratch_pt[type].page_mfn);
  1637. /* Build the tree by full filled the scratch pt with the entries which
  1638. * point to the next level scratch pt or scratch page. The
  1639. * scratch_pt[type] indicate the scratch pt/scratch page used by the
  1640. * 'type' pt.
  1641. * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
  1642. * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
  1643. * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
  1644. */
  1645. if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) {
  1646. struct intel_gvt_gtt_entry se;
  1647. memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
  1648. se.type = get_entry_type(type - 1);
  1649. ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
  1650. /* The entry parameters like present/writeable/cache type
  1651. * set to the same as i915's scratch page tree.
  1652. */
  1653. se.val64 |= _PAGE_PRESENT | _PAGE_RW;
  1654. if (type == GTT_TYPE_PPGTT_PDE_PT)
  1655. se.val64 |= PPAT_CACHED;
  1656. for (i = 0; i < page_entry_num; i++)
  1657. ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
  1658. }
  1659. return 0;
  1660. }
  1661. static int release_scratch_page_tree(struct intel_vgpu *vgpu)
  1662. {
  1663. int i;
  1664. struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
  1665. dma_addr_t daddr;
  1666. for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
  1667. if (vgpu->gtt.scratch_pt[i].page != NULL) {
  1668. daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
  1669. I915_GTT_PAGE_SHIFT);
  1670. dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
  1671. __free_page(vgpu->gtt.scratch_pt[i].page);
  1672. vgpu->gtt.scratch_pt[i].page = NULL;
  1673. vgpu->gtt.scratch_pt[i].page_mfn = 0;
  1674. }
  1675. }
  1676. return 0;
  1677. }
  1678. static int create_scratch_page_tree(struct intel_vgpu *vgpu)
  1679. {
  1680. int i, ret;
  1681. for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
  1682. ret = alloc_scratch_pages(vgpu, i);
  1683. if (ret)
  1684. goto err;
  1685. }
  1686. return 0;
  1687. err:
  1688. release_scratch_page_tree(vgpu);
  1689. return ret;
  1690. }
  1691. /**
  1692. * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
  1693. * @vgpu: a vGPU
  1694. *
  1695. * This function is used to initialize per-vGPU graphics memory virtualization
  1696. * components.
  1697. *
  1698. * Returns:
  1699. * Zero on success, error code if failed.
  1700. */
  1701. int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
  1702. {
  1703. struct intel_vgpu_gtt *gtt = &vgpu->gtt;
  1704. INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
  1705. INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
  1706. INIT_LIST_HEAD(&gtt->oos_page_list_head);
  1707. INIT_LIST_HEAD(&gtt->post_shadow_list_head);
  1708. gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
  1709. if (IS_ERR(gtt->ggtt_mm)) {
  1710. gvt_vgpu_err("fail to create mm for ggtt.\n");
  1711. return PTR_ERR(gtt->ggtt_mm);
  1712. }
  1713. intel_vgpu_reset_ggtt(vgpu, false);
  1714. return create_scratch_page_tree(vgpu);
  1715. }
  1716. static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
  1717. {
  1718. struct list_head *pos, *n;
  1719. struct intel_vgpu_mm *mm;
  1720. list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
  1721. mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
  1722. intel_vgpu_destroy_mm(mm);
  1723. }
  1724. if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
  1725. gvt_err("vgpu ppgtt mm is not fully destroyed\n");
  1726. if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
  1727. gvt_err("Why we still has spt not freed?\n");
  1728. ppgtt_free_all_spt(vgpu);
  1729. }
  1730. }
  1731. static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
  1732. {
  1733. intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
  1734. vgpu->gtt.ggtt_mm = NULL;
  1735. }
  1736. /**
  1737. * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
  1738. * @vgpu: a vGPU
  1739. *
  1740. * This function is used to clean up per-vGPU graphics memory virtualization
  1741. * components.
  1742. *
  1743. * Returns:
  1744. * Zero on success, error code if failed.
  1745. */
  1746. void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
  1747. {
  1748. intel_vgpu_destroy_all_ppgtt_mm(vgpu);
  1749. intel_vgpu_destroy_ggtt_mm(vgpu);
  1750. release_scratch_page_tree(vgpu);
  1751. }
  1752. static void clean_spt_oos(struct intel_gvt *gvt)
  1753. {
  1754. struct intel_gvt_gtt *gtt = &gvt->gtt;
  1755. struct list_head *pos, *n;
  1756. struct intel_vgpu_oos_page *oos_page;
  1757. WARN(!list_empty(&gtt->oos_page_use_list_head),
  1758. "someone is still using oos page\n");
  1759. list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
  1760. oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
  1761. list_del(&oos_page->list);
  1762. kfree(oos_page);
  1763. }
  1764. }
  1765. static int setup_spt_oos(struct intel_gvt *gvt)
  1766. {
  1767. struct intel_gvt_gtt *gtt = &gvt->gtt;
  1768. struct intel_vgpu_oos_page *oos_page;
  1769. int i;
  1770. int ret;
  1771. INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
  1772. INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
  1773. for (i = 0; i < preallocated_oos_pages; i++) {
  1774. oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
  1775. if (!oos_page) {
  1776. ret = -ENOMEM;
  1777. goto fail;
  1778. }
  1779. INIT_LIST_HEAD(&oos_page->list);
  1780. INIT_LIST_HEAD(&oos_page->vm_list);
  1781. oos_page->id = i;
  1782. list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
  1783. }
  1784. gvt_dbg_mm("%d oos pages preallocated\n", i);
  1785. return 0;
  1786. fail:
  1787. clean_spt_oos(gvt);
  1788. return ret;
  1789. }
  1790. /**
  1791. * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
  1792. * @vgpu: a vGPU
  1793. * @page_table_level: PPGTT page table level
  1794. * @root_entry: PPGTT page table root pointers
  1795. *
  1796. * This function is used to find a PPGTT mm object from mm object pool
  1797. *
  1798. * Returns:
  1799. * pointer to mm object on success, NULL if failed.
  1800. */
  1801. struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
  1802. u64 pdps[])
  1803. {
  1804. struct intel_vgpu_mm *mm;
  1805. struct list_head *pos;
  1806. list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
  1807. mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
  1808. switch (mm->ppgtt_mm.root_entry_type) {
  1809. case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
  1810. if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
  1811. return mm;
  1812. break;
  1813. case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
  1814. if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
  1815. sizeof(mm->ppgtt_mm.guest_pdps)))
  1816. return mm;
  1817. break;
  1818. default:
  1819. GEM_BUG_ON(1);
  1820. }
  1821. }
  1822. return NULL;
  1823. }
  1824. /**
  1825. * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
  1826. * @vgpu: a vGPU
  1827. * @root_entry_type: ppgtt root entry type
  1828. * @pdps: guest pdps
  1829. *
  1830. * This function is used to find or create a PPGTT mm object from a guest.
  1831. *
  1832. * Returns:
  1833. * Zero on success, negative error code if failed.
  1834. */
  1835. struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
  1836. intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
  1837. {
  1838. struct intel_vgpu_mm *mm;
  1839. mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
  1840. if (mm) {
  1841. intel_vgpu_mm_get(mm);
  1842. } else {
  1843. mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
  1844. if (IS_ERR(mm))
  1845. gvt_vgpu_err("fail to create mm\n");
  1846. }
  1847. return mm;
  1848. }
  1849. /**
  1850. * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
  1851. * @vgpu: a vGPU
  1852. * @pdps: guest pdps
  1853. *
  1854. * This function is used to find a PPGTT mm object from a guest and destroy it.
  1855. *
  1856. * Returns:
  1857. * Zero on success, negative error code if failed.
  1858. */
  1859. int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
  1860. {
  1861. struct intel_vgpu_mm *mm;
  1862. mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
  1863. if (!mm) {
  1864. gvt_vgpu_err("fail to find ppgtt instance.\n");
  1865. return -EINVAL;
  1866. }
  1867. intel_vgpu_mm_put(mm);
  1868. return 0;
  1869. }
  1870. /**
  1871. * intel_gvt_init_gtt - initialize mm components of a GVT device
  1872. * @gvt: GVT device
  1873. *
  1874. * This function is called at the initialization stage, to initialize
  1875. * the mm components of a GVT device.
  1876. *
  1877. * Returns:
  1878. * zero on success, negative error code if failed.
  1879. */
  1880. int intel_gvt_init_gtt(struct intel_gvt *gvt)
  1881. {
  1882. int ret;
  1883. void *page;
  1884. struct device *dev = &gvt->dev_priv->drm.pdev->dev;
  1885. dma_addr_t daddr;
  1886. gvt_dbg_core("init gtt\n");
  1887. if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
  1888. || IS_KABYLAKE(gvt->dev_priv)) {
  1889. gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
  1890. gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
  1891. } else {
  1892. return -ENODEV;
  1893. }
  1894. page = (void *)get_zeroed_page(GFP_KERNEL);
  1895. if (!page) {
  1896. gvt_err("fail to allocate scratch ggtt page\n");
  1897. return -ENOMEM;
  1898. }
  1899. daddr = dma_map_page(dev, virt_to_page(page), 0,
  1900. 4096, PCI_DMA_BIDIRECTIONAL);
  1901. if (dma_mapping_error(dev, daddr)) {
  1902. gvt_err("fail to dmamap scratch ggtt page\n");
  1903. __free_page(virt_to_page(page));
  1904. return -ENOMEM;
  1905. }
  1906. gvt->gtt.scratch_page = virt_to_page(page);
  1907. gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
  1908. if (enable_out_of_sync) {
  1909. ret = setup_spt_oos(gvt);
  1910. if (ret) {
  1911. gvt_err("fail to initialize SPT oos\n");
  1912. dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
  1913. __free_page(gvt->gtt.scratch_page);
  1914. return ret;
  1915. }
  1916. }
  1917. INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
  1918. return 0;
  1919. }
  1920. /**
  1921. * intel_gvt_clean_gtt - clean up mm components of a GVT device
  1922. * @gvt: GVT device
  1923. *
  1924. * This function is called at the driver unloading stage, to clean up the
  1925. * the mm components of a GVT device.
  1926. *
  1927. */
  1928. void intel_gvt_clean_gtt(struct intel_gvt *gvt)
  1929. {
  1930. struct device *dev = &gvt->dev_priv->drm.pdev->dev;
  1931. dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
  1932. I915_GTT_PAGE_SHIFT);
  1933. dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
  1934. __free_page(gvt->gtt.scratch_page);
  1935. if (enable_out_of_sync)
  1936. clean_spt_oos(gvt);
  1937. }
  1938. /**
  1939. * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
  1940. * @vgpu: a vGPU
  1941. *
  1942. * This function is called when invalidate all PPGTT instances of a vGPU.
  1943. *
  1944. */
  1945. void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
  1946. {
  1947. struct list_head *pos, *n;
  1948. struct intel_vgpu_mm *mm;
  1949. list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
  1950. mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
  1951. if (mm->type == INTEL_GVT_MM_PPGTT) {
  1952. list_del_init(&mm->ppgtt_mm.lru_list);
  1953. if (mm->ppgtt_mm.shadowed)
  1954. invalidate_ppgtt_mm(mm);
  1955. }
  1956. }
  1957. }
  1958. /**
  1959. * intel_vgpu_reset_ggtt - reset the GGTT entry
  1960. * @vgpu: a vGPU
  1961. * @invalidate_old: invalidate old entries
  1962. *
  1963. * This function is called at the vGPU create stage
  1964. * to reset all the GGTT entries.
  1965. *
  1966. */
  1967. void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
  1968. {
  1969. struct intel_gvt *gvt = vgpu->gvt;
  1970. struct drm_i915_private *dev_priv = gvt->dev_priv;
  1971. struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
  1972. struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
  1973. struct intel_gvt_gtt_entry old_entry;
  1974. u32 index;
  1975. u32 num_entries;
  1976. pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
  1977. pte_ops->set_present(&entry);
  1978. index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
  1979. num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
  1980. while (num_entries--) {
  1981. if (invalidate_old) {
  1982. ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
  1983. ggtt_invalidate_pte(vgpu, &old_entry);
  1984. }
  1985. ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
  1986. }
  1987. index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
  1988. num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
  1989. while (num_entries--) {
  1990. if (invalidate_old) {
  1991. ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
  1992. ggtt_invalidate_pte(vgpu, &old_entry);
  1993. }
  1994. ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
  1995. }
  1996. ggtt_invalidate(dev_priv);
  1997. }
  1998. /**
  1999. * intel_vgpu_reset_gtt - reset the all GTT related status
  2000. * @vgpu: a vGPU
  2001. *
  2002. * This function is called from vfio core to reset reset all
  2003. * GTT related status, including GGTT, PPGTT, scratch page.
  2004. *
  2005. */
  2006. void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
  2007. {
  2008. /* Shadow pages are only created when there is no page
  2009. * table tracking data, so remove page tracking data after
  2010. * removing the shadow pages.
  2011. */
  2012. intel_vgpu_destroy_all_ppgtt_mm(vgpu);
  2013. intel_vgpu_reset_ggtt(vgpu, true);
  2014. }