gtt.c 64 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461
  1. /*
  2. * GTT virtualization
  3. *
  4. * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the "Software"),
  8. * to deal in the Software without restriction, including without limitation
  9. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10. * and/or sell copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice (including the next
  14. * paragraph) shall be included in all copies or substantial portions of the
  15. * Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23. * SOFTWARE.
  24. *
  25. * Authors:
  26. * Zhi Wang <zhi.a.wang@intel.com>
  27. * Zhenyu Wang <zhenyuw@linux.intel.com>
  28. * Xiao Zheng <xiao.zheng@intel.com>
  29. *
  30. * Contributors:
  31. * Min He <min.he@intel.com>
  32. * Bing Niu <bing.niu@intel.com>
  33. *
  34. */
  35. #include "i915_drv.h"
  36. #include "gvt.h"
  37. #include "i915_pvinfo.h"
  38. #include "trace.h"
  39. #if defined(VERBOSE_DEBUG)
  40. #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
  41. #else
  42. #define gvt_vdbg_mm(fmt, args...)
  43. #endif
  44. static bool enable_out_of_sync = false;
  45. static int preallocated_oos_pages = 8192;
  46. /*
  47. * validate a gm address and related range size,
  48. * translate it to host gm address
  49. */
  50. bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
  51. {
  52. if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size
  53. && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) {
  54. gvt_vgpu_err("invalid range gmadr 0x%llx size 0x%x\n",
  55. addr, size);
  56. return false;
  57. }
  58. return true;
  59. }
  60. /* translate a guest gmadr to host gmadr */
  61. int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
  62. {
  63. if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr),
  64. "invalid guest gmadr %llx\n", g_addr))
  65. return -EACCES;
  66. if (vgpu_gmadr_is_aperture(vgpu, g_addr))
  67. *h_addr = vgpu_aperture_gmadr_base(vgpu)
  68. + (g_addr - vgpu_aperture_offset(vgpu));
  69. else
  70. *h_addr = vgpu_hidden_gmadr_base(vgpu)
  71. + (g_addr - vgpu_hidden_offset(vgpu));
  72. return 0;
  73. }
  74. /* translate a host gmadr to guest gmadr */
  75. int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
  76. {
  77. if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr),
  78. "invalid host gmadr %llx\n", h_addr))
  79. return -EACCES;
  80. if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
  81. *g_addr = vgpu_aperture_gmadr_base(vgpu)
  82. + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
  83. else
  84. *g_addr = vgpu_hidden_gmadr_base(vgpu)
  85. + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
  86. return 0;
  87. }
  88. int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
  89. unsigned long *h_index)
  90. {
  91. u64 h_addr;
  92. int ret;
  93. ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT,
  94. &h_addr);
  95. if (ret)
  96. return ret;
  97. *h_index = h_addr >> I915_GTT_PAGE_SHIFT;
  98. return 0;
  99. }
  100. int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
  101. unsigned long *g_index)
  102. {
  103. u64 g_addr;
  104. int ret;
  105. ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT,
  106. &g_addr);
  107. if (ret)
  108. return ret;
  109. *g_index = g_addr >> I915_GTT_PAGE_SHIFT;
  110. return 0;
  111. }
  112. #define gtt_type_is_entry(type) \
  113. (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
  114. && type != GTT_TYPE_PPGTT_PTE_ENTRY \
  115. && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
  116. #define gtt_type_is_pt(type) \
  117. (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
  118. #define gtt_type_is_pte_pt(type) \
  119. (type == GTT_TYPE_PPGTT_PTE_PT)
  120. #define gtt_type_is_root_pointer(type) \
  121. (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
  122. #define gtt_init_entry(e, t, p, v) do { \
  123. (e)->type = t; \
  124. (e)->pdev = p; \
  125. memcpy(&(e)->val64, &v, sizeof(v)); \
  126. } while (0)
  127. /*
  128. * Mappings between GTT_TYPE* enumerations.
  129. * Following information can be found according to the given type:
  130. * - type of next level page table
  131. * - type of entry inside this level page table
  132. * - type of entry with PSE set
  133. *
  134. * If the given type doesn't have such a kind of information,
  135. * e.g. give a l4 root entry type, then request to get its PSE type,
  136. * give a PTE page table type, then request to get its next level page
  137. * table type, as we know l4 root entry doesn't have a PSE bit,
  138. * and a PTE page table doesn't have a next level page table type,
  139. * GTT_TYPE_INVALID will be returned. This is useful when traversing a
  140. * page table.
  141. */
  142. struct gtt_type_table_entry {
  143. int entry_type;
  144. int pt_type;
  145. int next_pt_type;
  146. int pse_entry_type;
  147. };
  148. #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
  149. [type] = { \
  150. .entry_type = e_type, \
  151. .pt_type = cpt_type, \
  152. .next_pt_type = npt_type, \
  153. .pse_entry_type = pse_type, \
  154. }
  155. static struct gtt_type_table_entry gtt_type_table[] = {
  156. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
  157. GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
  158. GTT_TYPE_INVALID,
  159. GTT_TYPE_PPGTT_PML4_PT,
  160. GTT_TYPE_INVALID),
  161. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
  162. GTT_TYPE_PPGTT_PML4_ENTRY,
  163. GTT_TYPE_PPGTT_PML4_PT,
  164. GTT_TYPE_PPGTT_PDP_PT,
  165. GTT_TYPE_INVALID),
  166. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
  167. GTT_TYPE_PPGTT_PML4_ENTRY,
  168. GTT_TYPE_PPGTT_PML4_PT,
  169. GTT_TYPE_PPGTT_PDP_PT,
  170. GTT_TYPE_INVALID),
  171. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
  172. GTT_TYPE_PPGTT_PDP_ENTRY,
  173. GTT_TYPE_PPGTT_PDP_PT,
  174. GTT_TYPE_PPGTT_PDE_PT,
  175. GTT_TYPE_PPGTT_PTE_1G_ENTRY),
  176. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
  177. GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
  178. GTT_TYPE_INVALID,
  179. GTT_TYPE_PPGTT_PDE_PT,
  180. GTT_TYPE_PPGTT_PTE_1G_ENTRY),
  181. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
  182. GTT_TYPE_PPGTT_PDP_ENTRY,
  183. GTT_TYPE_PPGTT_PDP_PT,
  184. GTT_TYPE_PPGTT_PDE_PT,
  185. GTT_TYPE_PPGTT_PTE_1G_ENTRY),
  186. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
  187. GTT_TYPE_PPGTT_PDE_ENTRY,
  188. GTT_TYPE_PPGTT_PDE_PT,
  189. GTT_TYPE_PPGTT_PTE_PT,
  190. GTT_TYPE_PPGTT_PTE_2M_ENTRY),
  191. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
  192. GTT_TYPE_PPGTT_PDE_ENTRY,
  193. GTT_TYPE_PPGTT_PDE_PT,
  194. GTT_TYPE_PPGTT_PTE_PT,
  195. GTT_TYPE_PPGTT_PTE_2M_ENTRY),
  196. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
  197. GTT_TYPE_PPGTT_PTE_4K_ENTRY,
  198. GTT_TYPE_PPGTT_PTE_PT,
  199. GTT_TYPE_INVALID,
  200. GTT_TYPE_INVALID),
  201. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
  202. GTT_TYPE_PPGTT_PTE_4K_ENTRY,
  203. GTT_TYPE_PPGTT_PTE_PT,
  204. GTT_TYPE_INVALID,
  205. GTT_TYPE_INVALID),
  206. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
  207. GTT_TYPE_PPGTT_PDE_ENTRY,
  208. GTT_TYPE_PPGTT_PDE_PT,
  209. GTT_TYPE_INVALID,
  210. GTT_TYPE_PPGTT_PTE_2M_ENTRY),
  211. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
  212. GTT_TYPE_PPGTT_PDP_ENTRY,
  213. GTT_TYPE_PPGTT_PDP_PT,
  214. GTT_TYPE_INVALID,
  215. GTT_TYPE_PPGTT_PTE_1G_ENTRY),
  216. GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
  217. GTT_TYPE_GGTT_PTE,
  218. GTT_TYPE_INVALID,
  219. GTT_TYPE_INVALID,
  220. GTT_TYPE_INVALID),
  221. };
  222. static inline int get_next_pt_type(int type)
  223. {
  224. return gtt_type_table[type].next_pt_type;
  225. }
  226. static inline int get_pt_type(int type)
  227. {
  228. return gtt_type_table[type].pt_type;
  229. }
  230. static inline int get_entry_type(int type)
  231. {
  232. return gtt_type_table[type].entry_type;
  233. }
  234. static inline int get_pse_type(int type)
  235. {
  236. return gtt_type_table[type].pse_entry_type;
  237. }
  238. static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index)
  239. {
  240. void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
  241. return readq(addr);
  242. }
  243. static void ggtt_invalidate(struct drm_i915_private *dev_priv)
  244. {
  245. mmio_hw_access_pre(dev_priv);
  246. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  247. mmio_hw_access_post(dev_priv);
  248. }
  249. static void write_pte64(struct drm_i915_private *dev_priv,
  250. unsigned long index, u64 pte)
  251. {
  252. void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
  253. writeq(pte, addr);
  254. }
  255. static inline int gtt_get_entry64(void *pt,
  256. struct intel_gvt_gtt_entry *e,
  257. unsigned long index, bool hypervisor_access, unsigned long gpa,
  258. struct intel_vgpu *vgpu)
  259. {
  260. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  261. int ret;
  262. if (WARN_ON(info->gtt_entry_size != 8))
  263. return -EINVAL;
  264. if (hypervisor_access) {
  265. ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa +
  266. (index << info->gtt_entry_size_shift),
  267. &e->val64, 8);
  268. if (WARN_ON(ret))
  269. return ret;
  270. } else if (!pt) {
  271. e->val64 = read_pte64(vgpu->gvt->dev_priv, index);
  272. } else {
  273. e->val64 = *((u64 *)pt + index);
  274. }
  275. return 0;
  276. }
  277. static inline int gtt_set_entry64(void *pt,
  278. struct intel_gvt_gtt_entry *e,
  279. unsigned long index, bool hypervisor_access, unsigned long gpa,
  280. struct intel_vgpu *vgpu)
  281. {
  282. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  283. int ret;
  284. if (WARN_ON(info->gtt_entry_size != 8))
  285. return -EINVAL;
  286. if (hypervisor_access) {
  287. ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa +
  288. (index << info->gtt_entry_size_shift),
  289. &e->val64, 8);
  290. if (WARN_ON(ret))
  291. return ret;
  292. } else if (!pt) {
  293. write_pte64(vgpu->gvt->dev_priv, index, e->val64);
  294. } else {
  295. *((u64 *)pt + index) = e->val64;
  296. }
  297. return 0;
  298. }
  299. #define GTT_HAW 46
  300. #define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30)
  301. #define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21)
  302. #define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12)
  303. static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
  304. {
  305. unsigned long pfn;
  306. if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
  307. pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
  308. else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
  309. pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
  310. else
  311. pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
  312. return pfn;
  313. }
  314. static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
  315. {
  316. if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
  317. e->val64 &= ~ADDR_1G_MASK;
  318. pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
  319. } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
  320. e->val64 &= ~ADDR_2M_MASK;
  321. pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
  322. } else {
  323. e->val64 &= ~ADDR_4K_MASK;
  324. pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
  325. }
  326. e->val64 |= (pfn << PAGE_SHIFT);
  327. }
  328. static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
  329. {
  330. /* Entry doesn't have PSE bit. */
  331. if (get_pse_type(e->type) == GTT_TYPE_INVALID)
  332. return false;
  333. e->type = get_entry_type(e->type);
  334. if (!(e->val64 & _PAGE_PSE))
  335. return false;
  336. e->type = get_pse_type(e->type);
  337. return true;
  338. }
  339. static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
  340. {
  341. /*
  342. * i915 writes PDP root pointer registers without present bit,
  343. * it also works, so we need to treat root pointer entry
  344. * specifically.
  345. */
  346. if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
  347. || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
  348. return (e->val64 != 0);
  349. else
  350. return (e->val64 & _PAGE_PRESENT);
  351. }
  352. static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
  353. {
  354. e->val64 &= ~_PAGE_PRESENT;
  355. }
  356. static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
  357. {
  358. e->val64 |= _PAGE_PRESENT;
  359. }
  360. /*
  361. * Per-platform GMA routines.
  362. */
  363. static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
  364. {
  365. unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
  366. trace_gma_index(__func__, gma, x);
  367. return x;
  368. }
  369. #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
  370. static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
  371. { \
  372. unsigned long x = (exp); \
  373. trace_gma_index(__func__, gma, x); \
  374. return x; \
  375. }
  376. DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
  377. DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
  378. DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
  379. DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
  380. DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
  381. static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
  382. .get_entry = gtt_get_entry64,
  383. .set_entry = gtt_set_entry64,
  384. .clear_present = gtt_entry_clear_present,
  385. .set_present = gtt_entry_set_present,
  386. .test_present = gen8_gtt_test_present,
  387. .test_pse = gen8_gtt_test_pse,
  388. .get_pfn = gen8_gtt_get_pfn,
  389. .set_pfn = gen8_gtt_set_pfn,
  390. };
  391. static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
  392. .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
  393. .gma_to_pte_index = gen8_gma_to_pte_index,
  394. .gma_to_pde_index = gen8_gma_to_pde_index,
  395. .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
  396. .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
  397. .gma_to_pml4_index = gen8_gma_to_pml4_index,
  398. };
  399. /*
  400. * MM helpers.
  401. */
  402. static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
  403. struct intel_gvt_gtt_entry *entry, unsigned long index,
  404. bool guest)
  405. {
  406. struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
  407. GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
  408. entry->type = mm->ppgtt_mm.root_entry_type;
  409. pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
  410. mm->ppgtt_mm.shadow_pdps,
  411. entry, index, false, 0, mm->vgpu);
  412. pte_ops->test_pse(entry);
  413. }
  414. static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
  415. struct intel_gvt_gtt_entry *entry, unsigned long index)
  416. {
  417. _ppgtt_get_root_entry(mm, entry, index, true);
  418. }
  419. static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
  420. struct intel_gvt_gtt_entry *entry, unsigned long index)
  421. {
  422. _ppgtt_get_root_entry(mm, entry, index, false);
  423. }
  424. static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
  425. struct intel_gvt_gtt_entry *entry, unsigned long index,
  426. bool guest)
  427. {
  428. struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
  429. pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
  430. mm->ppgtt_mm.shadow_pdps,
  431. entry, index, false, 0, mm->vgpu);
  432. }
  433. static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm,
  434. struct intel_gvt_gtt_entry *entry, unsigned long index)
  435. {
  436. _ppgtt_set_root_entry(mm, entry, index, true);
  437. }
  438. static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
  439. struct intel_gvt_gtt_entry *entry, unsigned long index)
  440. {
  441. _ppgtt_set_root_entry(mm, entry, index, false);
  442. }
  443. static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
  444. struct intel_gvt_gtt_entry *entry, unsigned long index)
  445. {
  446. struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
  447. GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
  448. entry->type = GTT_TYPE_GGTT_PTE;
  449. pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
  450. false, 0, mm->vgpu);
  451. }
  452. static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
  453. struct intel_gvt_gtt_entry *entry, unsigned long index)
  454. {
  455. struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
  456. GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
  457. pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
  458. false, 0, mm->vgpu);
  459. }
  460. static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
  461. struct intel_gvt_gtt_entry *entry, unsigned long index)
  462. {
  463. struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
  464. GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
  465. pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
  466. }
  467. static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
  468. struct intel_gvt_gtt_entry *entry, unsigned long index)
  469. {
  470. struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
  471. GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
  472. pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
  473. }
  474. /*
  475. * PPGTT shadow page table helpers.
  476. */
  477. static inline int ppgtt_spt_get_entry(
  478. struct intel_vgpu_ppgtt_spt *spt,
  479. void *page_table, int type,
  480. struct intel_gvt_gtt_entry *e, unsigned long index,
  481. bool guest)
  482. {
  483. struct intel_gvt *gvt = spt->vgpu->gvt;
  484. struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
  485. int ret;
  486. e->type = get_entry_type(type);
  487. if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
  488. return -EINVAL;
  489. ret = ops->get_entry(page_table, e, index, guest,
  490. spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
  491. spt->vgpu);
  492. if (ret)
  493. return ret;
  494. ops->test_pse(e);
  495. gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
  496. type, e->type, index, e->val64);
  497. return 0;
  498. }
  499. static inline int ppgtt_spt_set_entry(
  500. struct intel_vgpu_ppgtt_spt *spt,
  501. void *page_table, int type,
  502. struct intel_gvt_gtt_entry *e, unsigned long index,
  503. bool guest)
  504. {
  505. struct intel_gvt *gvt = spt->vgpu->gvt;
  506. struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
  507. if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
  508. return -EINVAL;
  509. gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
  510. type, e->type, index, e->val64);
  511. return ops->set_entry(page_table, e, index, guest,
  512. spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
  513. spt->vgpu);
  514. }
  515. #define ppgtt_get_guest_entry(spt, e, index) \
  516. ppgtt_spt_get_entry(spt, NULL, \
  517. spt->guest_page.type, e, index, true)
  518. #define ppgtt_set_guest_entry(spt, e, index) \
  519. ppgtt_spt_set_entry(spt, NULL, \
  520. spt->guest_page.type, e, index, true)
  521. #define ppgtt_get_shadow_entry(spt, e, index) \
  522. ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
  523. spt->shadow_page.type, e, index, false)
  524. #define ppgtt_set_shadow_entry(spt, e, index) \
  525. ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
  526. spt->shadow_page.type, e, index, false)
  527. static void *alloc_spt(gfp_t gfp_mask)
  528. {
  529. struct intel_vgpu_ppgtt_spt *spt;
  530. spt = kzalloc(sizeof(*spt), gfp_mask);
  531. if (!spt)
  532. return NULL;
  533. spt->shadow_page.page = alloc_page(gfp_mask);
  534. if (!spt->shadow_page.page) {
  535. kfree(spt);
  536. return NULL;
  537. }
  538. return spt;
  539. }
  540. static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
  541. {
  542. __free_page(spt->shadow_page.page);
  543. kfree(spt);
  544. }
  545. static int detach_oos_page(struct intel_vgpu *vgpu,
  546. struct intel_vgpu_oos_page *oos_page);
  547. static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
  548. {
  549. struct device *kdev = &spt->vgpu->gvt->dev_priv->drm.pdev->dev;
  550. trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
  551. dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
  552. PCI_DMA_BIDIRECTIONAL);
  553. radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
  554. if (spt->guest_page.oos_page)
  555. detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
  556. intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
  557. list_del_init(&spt->post_shadow_list);
  558. free_spt(spt);
  559. }
  560. static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
  561. {
  562. struct intel_vgpu_ppgtt_spt *spt;
  563. struct radix_tree_iter iter;
  564. void **slot;
  565. radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
  566. spt = radix_tree_deref_slot(slot);
  567. ppgtt_free_spt(spt);
  568. }
  569. }
  570. static int ppgtt_handle_guest_write_page_table_bytes(
  571. struct intel_vgpu_ppgtt_spt *spt,
  572. u64 pa, void *p_data, int bytes);
  573. static int ppgtt_write_protection_handler(
  574. struct intel_vgpu_page_track *page_track,
  575. u64 gpa, void *data, int bytes)
  576. {
  577. struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
  578. int ret;
  579. if (bytes != 4 && bytes != 8)
  580. return -EINVAL;
  581. ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
  582. if (ret)
  583. return ret;
  584. return ret;
  585. }
  586. /* Find a spt by guest gfn. */
  587. static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
  588. struct intel_vgpu *vgpu, unsigned long gfn)
  589. {
  590. struct intel_vgpu_page_track *track;
  591. track = intel_vgpu_find_page_track(vgpu, gfn);
  592. if (track && track->handler == ppgtt_write_protection_handler)
  593. return track->priv_data;
  594. return NULL;
  595. }
  596. /* Find the spt by shadow page mfn. */
  597. static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
  598. struct intel_vgpu *vgpu, unsigned long mfn)
  599. {
  600. return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
  601. }
  602. static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
  603. static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
  604. struct intel_vgpu *vgpu, int type, unsigned long gfn)
  605. {
  606. struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
  607. struct intel_vgpu_ppgtt_spt *spt = NULL;
  608. dma_addr_t daddr;
  609. int ret;
  610. retry:
  611. spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
  612. if (!spt) {
  613. if (reclaim_one_ppgtt_mm(vgpu->gvt))
  614. goto retry;
  615. gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
  616. return ERR_PTR(-ENOMEM);
  617. }
  618. spt->vgpu = vgpu;
  619. atomic_set(&spt->refcount, 1);
  620. INIT_LIST_HEAD(&spt->post_shadow_list);
  621. /*
  622. * Init shadow_page.
  623. */
  624. spt->shadow_page.type = type;
  625. daddr = dma_map_page(kdev, spt->shadow_page.page,
  626. 0, 4096, PCI_DMA_BIDIRECTIONAL);
  627. if (dma_mapping_error(kdev, daddr)) {
  628. gvt_vgpu_err("fail to map dma addr\n");
  629. ret = -EINVAL;
  630. goto err_free_spt;
  631. }
  632. spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
  633. spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
  634. /*
  635. * Init guest_page.
  636. */
  637. spt->guest_page.type = type;
  638. spt->guest_page.gfn = gfn;
  639. ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn,
  640. ppgtt_write_protection_handler, spt);
  641. if (ret)
  642. goto err_unmap_dma;
  643. ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
  644. if (ret)
  645. goto err_unreg_page_track;
  646. trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
  647. return spt;
  648. err_unreg_page_track:
  649. intel_vgpu_unregister_page_track(vgpu, spt->guest_page.gfn);
  650. err_unmap_dma:
  651. dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  652. err_free_spt:
  653. free_spt(spt);
  654. return ERR_PTR(ret);
  655. }
  656. #define pt_entry_size_shift(spt) \
  657. ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
  658. #define pt_entries(spt) \
  659. (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
  660. #define for_each_present_guest_entry(spt, e, i) \
  661. for (i = 0; i < pt_entries(spt); i++) \
  662. if (!ppgtt_get_guest_entry(spt, e, i) && \
  663. spt->vgpu->gvt->gtt.pte_ops->test_present(e))
  664. #define for_each_present_shadow_entry(spt, e, i) \
  665. for (i = 0; i < pt_entries(spt); i++) \
  666. if (!ppgtt_get_shadow_entry(spt, e, i) && \
  667. spt->vgpu->gvt->gtt.pte_ops->test_present(e))
  668. static void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
  669. {
  670. int v = atomic_read(&spt->refcount);
  671. trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
  672. atomic_inc(&spt->refcount);
  673. }
  674. static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
  675. static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
  676. struct intel_gvt_gtt_entry *e)
  677. {
  678. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  679. struct intel_vgpu_ppgtt_spt *s;
  680. intel_gvt_gtt_type_t cur_pt_type;
  681. GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
  682. if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
  683. && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
  684. cur_pt_type = get_next_pt_type(e->type) + 1;
  685. if (ops->get_pfn(e) ==
  686. vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
  687. return 0;
  688. }
  689. s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
  690. if (!s) {
  691. gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
  692. ops->get_pfn(e));
  693. return -ENXIO;
  694. }
  695. return ppgtt_invalidate_spt(s);
  696. }
  697. static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
  698. struct intel_gvt_gtt_entry *entry)
  699. {
  700. struct intel_vgpu *vgpu = spt->vgpu;
  701. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  702. unsigned long pfn;
  703. int type;
  704. pfn = ops->get_pfn(entry);
  705. type = spt->shadow_page.type;
  706. if (pfn == vgpu->gtt.scratch_pt[type].page_mfn)
  707. return;
  708. intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
  709. }
  710. static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
  711. {
  712. struct intel_vgpu *vgpu = spt->vgpu;
  713. struct intel_gvt_gtt_entry e;
  714. unsigned long index;
  715. int ret;
  716. int v = atomic_read(&spt->refcount);
  717. trace_spt_change(spt->vgpu->id, "die", spt,
  718. spt->guest_page.gfn, spt->shadow_page.type);
  719. trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
  720. if (atomic_dec_return(&spt->refcount) > 0)
  721. return 0;
  722. for_each_present_shadow_entry(spt, &e, index) {
  723. switch (e.type) {
  724. case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
  725. gvt_vdbg_mm("invalidate 4K entry\n");
  726. ppgtt_invalidate_pte(spt, &e);
  727. break;
  728. case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
  729. case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
  730. WARN(1, "GVT doesn't support 2M/1GB page\n");
  731. continue;
  732. case GTT_TYPE_PPGTT_PML4_ENTRY:
  733. case GTT_TYPE_PPGTT_PDP_ENTRY:
  734. case GTT_TYPE_PPGTT_PDE_ENTRY:
  735. gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
  736. ret = ppgtt_invalidate_spt_by_shadow_entry(
  737. spt->vgpu, &e);
  738. if (ret)
  739. goto fail;
  740. break;
  741. default:
  742. GEM_BUG_ON(1);
  743. }
  744. }
  745. trace_spt_change(spt->vgpu->id, "release", spt,
  746. spt->guest_page.gfn, spt->shadow_page.type);
  747. ppgtt_free_spt(spt);
  748. return 0;
  749. fail:
  750. gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
  751. spt, e.val64, e.type);
  752. return ret;
  753. }
  754. static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
  755. static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
  756. struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
  757. {
  758. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  759. struct intel_vgpu_ppgtt_spt *spt = NULL;
  760. int ret;
  761. GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
  762. spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
  763. if (spt)
  764. ppgtt_get_spt(spt);
  765. else {
  766. int type = get_next_pt_type(we->type);
  767. spt = ppgtt_alloc_spt(vgpu, type, ops->get_pfn(we));
  768. if (IS_ERR(spt)) {
  769. ret = PTR_ERR(spt);
  770. goto fail;
  771. }
  772. ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
  773. if (ret)
  774. goto fail;
  775. ret = ppgtt_populate_spt(spt);
  776. if (ret)
  777. goto fail;
  778. trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
  779. spt->shadow_page.type);
  780. }
  781. return spt;
  782. fail:
  783. gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
  784. spt, we->val64, we->type);
  785. return ERR_PTR(ret);
  786. }
  787. static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
  788. struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
  789. {
  790. struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
  791. se->type = ge->type;
  792. se->val64 = ge->val64;
  793. ops->set_pfn(se, s->shadow_page.mfn);
  794. }
  795. static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
  796. struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
  797. struct intel_gvt_gtt_entry *ge)
  798. {
  799. struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
  800. struct intel_gvt_gtt_entry se = *ge;
  801. unsigned long gfn;
  802. dma_addr_t dma_addr;
  803. int ret;
  804. if (!pte_ops->test_present(ge))
  805. return 0;
  806. gfn = pte_ops->get_pfn(ge);
  807. switch (ge->type) {
  808. case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
  809. gvt_vdbg_mm("shadow 4K gtt entry\n");
  810. break;
  811. case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
  812. case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
  813. gvt_vgpu_err("GVT doesn't support 2M/1GB entry\n");
  814. return -EINVAL;
  815. default:
  816. GEM_BUG_ON(1);
  817. };
  818. /* direct shadow */
  819. ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr);
  820. if (ret)
  821. return -ENXIO;
  822. pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
  823. ppgtt_set_shadow_entry(spt, &se, index);
  824. return 0;
  825. }
  826. static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
  827. {
  828. struct intel_vgpu *vgpu = spt->vgpu;
  829. struct intel_gvt *gvt = vgpu->gvt;
  830. struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
  831. struct intel_vgpu_ppgtt_spt *s;
  832. struct intel_gvt_gtt_entry se, ge;
  833. unsigned long gfn, i;
  834. int ret;
  835. trace_spt_change(spt->vgpu->id, "born", spt,
  836. spt->guest_page.gfn, spt->shadow_page.type);
  837. for_each_present_guest_entry(spt, &ge, i) {
  838. if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
  839. s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
  840. if (IS_ERR(s)) {
  841. ret = PTR_ERR(s);
  842. goto fail;
  843. }
  844. ppgtt_get_shadow_entry(spt, &se, i);
  845. ppgtt_generate_shadow_entry(&se, s, &ge);
  846. ppgtt_set_shadow_entry(spt, &se, i);
  847. } else {
  848. gfn = ops->get_pfn(&ge);
  849. if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
  850. ops->set_pfn(&se, gvt->gtt.scratch_mfn);
  851. ppgtt_set_shadow_entry(spt, &se, i);
  852. continue;
  853. }
  854. ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
  855. if (ret)
  856. goto fail;
  857. }
  858. }
  859. return 0;
  860. fail:
  861. gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
  862. spt, ge.val64, ge.type);
  863. return ret;
  864. }
  865. static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
  866. struct intel_gvt_gtt_entry *se, unsigned long index)
  867. {
  868. struct intel_vgpu *vgpu = spt->vgpu;
  869. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  870. int ret;
  871. trace_spt_guest_change(spt->vgpu->id, "remove", spt,
  872. spt->shadow_page.type, se->val64, index);
  873. gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
  874. se->type, index, se->val64);
  875. if (!ops->test_present(se))
  876. return 0;
  877. if (ops->get_pfn(se) ==
  878. vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
  879. return 0;
  880. if (gtt_type_is_pt(get_next_pt_type(se->type))) {
  881. struct intel_vgpu_ppgtt_spt *s =
  882. intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
  883. if (!s) {
  884. gvt_vgpu_err("fail to find guest page\n");
  885. ret = -ENXIO;
  886. goto fail;
  887. }
  888. ret = ppgtt_invalidate_spt(s);
  889. if (ret)
  890. goto fail;
  891. } else
  892. ppgtt_invalidate_pte(spt, se);
  893. return 0;
  894. fail:
  895. gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
  896. spt, se->val64, se->type);
  897. return ret;
  898. }
  899. static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
  900. struct intel_gvt_gtt_entry *we, unsigned long index)
  901. {
  902. struct intel_vgpu *vgpu = spt->vgpu;
  903. struct intel_gvt_gtt_entry m;
  904. struct intel_vgpu_ppgtt_spt *s;
  905. int ret;
  906. trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
  907. we->val64, index);
  908. gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
  909. we->type, index, we->val64);
  910. if (gtt_type_is_pt(get_next_pt_type(we->type))) {
  911. s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
  912. if (IS_ERR(s)) {
  913. ret = PTR_ERR(s);
  914. goto fail;
  915. }
  916. ppgtt_get_shadow_entry(spt, &m, index);
  917. ppgtt_generate_shadow_entry(&m, s, we);
  918. ppgtt_set_shadow_entry(spt, &m, index);
  919. } else {
  920. ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
  921. if (ret)
  922. goto fail;
  923. }
  924. return 0;
  925. fail:
  926. gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
  927. spt, we->val64, we->type);
  928. return ret;
  929. }
  930. static int sync_oos_page(struct intel_vgpu *vgpu,
  931. struct intel_vgpu_oos_page *oos_page)
  932. {
  933. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  934. struct intel_gvt *gvt = vgpu->gvt;
  935. struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
  936. struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
  937. struct intel_gvt_gtt_entry old, new;
  938. int index;
  939. int ret;
  940. trace_oos_change(vgpu->id, "sync", oos_page->id,
  941. spt, spt->guest_page.type);
  942. old.type = new.type = get_entry_type(spt->guest_page.type);
  943. old.val64 = new.val64 = 0;
  944. for (index = 0; index < (I915_GTT_PAGE_SIZE >>
  945. info->gtt_entry_size_shift); index++) {
  946. ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
  947. ops->get_entry(NULL, &new, index, true,
  948. spt->guest_page.gfn << PAGE_SHIFT, vgpu);
  949. if (old.val64 == new.val64
  950. && !test_and_clear_bit(index, spt->post_shadow_bitmap))
  951. continue;
  952. trace_oos_sync(vgpu->id, oos_page->id,
  953. spt, spt->guest_page.type,
  954. new.val64, index);
  955. ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
  956. if (ret)
  957. return ret;
  958. ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
  959. }
  960. spt->guest_page.write_cnt = 0;
  961. list_del_init(&spt->post_shadow_list);
  962. return 0;
  963. }
  964. static int detach_oos_page(struct intel_vgpu *vgpu,
  965. struct intel_vgpu_oos_page *oos_page)
  966. {
  967. struct intel_gvt *gvt = vgpu->gvt;
  968. struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
  969. trace_oos_change(vgpu->id, "detach", oos_page->id,
  970. spt, spt->guest_page.type);
  971. spt->guest_page.write_cnt = 0;
  972. spt->guest_page.oos_page = NULL;
  973. oos_page->spt = NULL;
  974. list_del_init(&oos_page->vm_list);
  975. list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
  976. return 0;
  977. }
  978. static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
  979. struct intel_vgpu_ppgtt_spt *spt)
  980. {
  981. struct intel_gvt *gvt = spt->vgpu->gvt;
  982. int ret;
  983. ret = intel_gvt_hypervisor_read_gpa(spt->vgpu,
  984. spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
  985. oos_page->mem, I915_GTT_PAGE_SIZE);
  986. if (ret)
  987. return ret;
  988. oos_page->spt = spt;
  989. spt->guest_page.oos_page = oos_page;
  990. list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
  991. trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
  992. spt, spt->guest_page.type);
  993. return 0;
  994. }
  995. static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
  996. {
  997. struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
  998. int ret;
  999. ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
  1000. if (ret)
  1001. return ret;
  1002. trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
  1003. spt, spt->guest_page.type);
  1004. list_del_init(&oos_page->vm_list);
  1005. return sync_oos_page(spt->vgpu, oos_page);
  1006. }
  1007. static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
  1008. {
  1009. struct intel_gvt *gvt = spt->vgpu->gvt;
  1010. struct intel_gvt_gtt *gtt = &gvt->gtt;
  1011. struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
  1012. int ret;
  1013. WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
  1014. if (list_empty(&gtt->oos_page_free_list_head)) {
  1015. oos_page = container_of(gtt->oos_page_use_list_head.next,
  1016. struct intel_vgpu_oos_page, list);
  1017. ret = ppgtt_set_guest_page_sync(oos_page->spt);
  1018. if (ret)
  1019. return ret;
  1020. ret = detach_oos_page(spt->vgpu, oos_page);
  1021. if (ret)
  1022. return ret;
  1023. } else
  1024. oos_page = container_of(gtt->oos_page_free_list_head.next,
  1025. struct intel_vgpu_oos_page, list);
  1026. return attach_oos_page(oos_page, spt);
  1027. }
  1028. static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
  1029. {
  1030. struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
  1031. if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
  1032. return -EINVAL;
  1033. trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
  1034. spt, spt->guest_page.type);
  1035. list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
  1036. return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
  1037. }
  1038. /**
  1039. * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
  1040. * @vgpu: a vGPU
  1041. *
  1042. * This function is called before submitting a guest workload to host,
  1043. * to sync all the out-of-synced shadow for vGPU
  1044. *
  1045. * Returns:
  1046. * Zero on success, negative error code if failed.
  1047. */
  1048. int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
  1049. {
  1050. struct list_head *pos, *n;
  1051. struct intel_vgpu_oos_page *oos_page;
  1052. int ret;
  1053. if (!enable_out_of_sync)
  1054. return 0;
  1055. list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
  1056. oos_page = container_of(pos,
  1057. struct intel_vgpu_oos_page, vm_list);
  1058. ret = ppgtt_set_guest_page_sync(oos_page->spt);
  1059. if (ret)
  1060. return ret;
  1061. }
  1062. return 0;
  1063. }
  1064. /*
  1065. * The heart of PPGTT shadow page table.
  1066. */
  1067. static int ppgtt_handle_guest_write_page_table(
  1068. struct intel_vgpu_ppgtt_spt *spt,
  1069. struct intel_gvt_gtt_entry *we, unsigned long index)
  1070. {
  1071. struct intel_vgpu *vgpu = spt->vgpu;
  1072. int type = spt->shadow_page.type;
  1073. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  1074. struct intel_gvt_gtt_entry old_se;
  1075. int new_present;
  1076. int ret;
  1077. new_present = ops->test_present(we);
  1078. /*
  1079. * Adding the new entry first and then removing the old one, that can
  1080. * guarantee the ppgtt table is validated during the window between
  1081. * adding and removal.
  1082. */
  1083. ppgtt_get_shadow_entry(spt, &old_se, index);
  1084. if (new_present) {
  1085. ret = ppgtt_handle_guest_entry_add(spt, we, index);
  1086. if (ret)
  1087. goto fail;
  1088. }
  1089. ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
  1090. if (ret)
  1091. goto fail;
  1092. if (!new_present) {
  1093. ops->set_pfn(&old_se, vgpu->gtt.scratch_pt[type].page_mfn);
  1094. ppgtt_set_shadow_entry(spt, &old_se, index);
  1095. }
  1096. return 0;
  1097. fail:
  1098. gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
  1099. spt, we->val64, we->type);
  1100. return ret;
  1101. }
  1102. static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
  1103. {
  1104. return enable_out_of_sync
  1105. && gtt_type_is_pte_pt(spt->guest_page.type)
  1106. && spt->guest_page.write_cnt >= 2;
  1107. }
  1108. static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
  1109. unsigned long index)
  1110. {
  1111. set_bit(index, spt->post_shadow_bitmap);
  1112. if (!list_empty(&spt->post_shadow_list))
  1113. return;
  1114. list_add_tail(&spt->post_shadow_list,
  1115. &spt->vgpu->gtt.post_shadow_list_head);
  1116. }
  1117. /**
  1118. * intel_vgpu_flush_post_shadow - flush the post shadow transactions
  1119. * @vgpu: a vGPU
  1120. *
  1121. * This function is called before submitting a guest workload to host,
  1122. * to flush all the post shadows for a vGPU.
  1123. *
  1124. * Returns:
  1125. * Zero on success, negative error code if failed.
  1126. */
  1127. int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
  1128. {
  1129. struct list_head *pos, *n;
  1130. struct intel_vgpu_ppgtt_spt *spt;
  1131. struct intel_gvt_gtt_entry ge;
  1132. unsigned long index;
  1133. int ret;
  1134. list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
  1135. spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
  1136. post_shadow_list);
  1137. for_each_set_bit(index, spt->post_shadow_bitmap,
  1138. GTT_ENTRY_NUM_IN_ONE_PAGE) {
  1139. ppgtt_get_guest_entry(spt, &ge, index);
  1140. ret = ppgtt_handle_guest_write_page_table(spt,
  1141. &ge, index);
  1142. if (ret)
  1143. return ret;
  1144. clear_bit(index, spt->post_shadow_bitmap);
  1145. }
  1146. list_del_init(&spt->post_shadow_list);
  1147. }
  1148. return 0;
  1149. }
  1150. static int ppgtt_handle_guest_write_page_table_bytes(
  1151. struct intel_vgpu_ppgtt_spt *spt,
  1152. u64 pa, void *p_data, int bytes)
  1153. {
  1154. struct intel_vgpu *vgpu = spt->vgpu;
  1155. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  1156. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  1157. struct intel_gvt_gtt_entry we, se;
  1158. unsigned long index;
  1159. int ret;
  1160. index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
  1161. ppgtt_get_guest_entry(spt, &we, index);
  1162. ops->test_pse(&we);
  1163. if (bytes == info->gtt_entry_size) {
  1164. ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
  1165. if (ret)
  1166. return ret;
  1167. } else {
  1168. if (!test_bit(index, spt->post_shadow_bitmap)) {
  1169. int type = spt->shadow_page.type;
  1170. ppgtt_get_shadow_entry(spt, &se, index);
  1171. ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
  1172. if (ret)
  1173. return ret;
  1174. ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
  1175. ppgtt_set_shadow_entry(spt, &se, index);
  1176. }
  1177. ppgtt_set_post_shadow(spt, index);
  1178. }
  1179. if (!enable_out_of_sync)
  1180. return 0;
  1181. spt->guest_page.write_cnt++;
  1182. if (spt->guest_page.oos_page)
  1183. ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
  1184. false, 0, vgpu);
  1185. if (can_do_out_of_sync(spt)) {
  1186. if (!spt->guest_page.oos_page)
  1187. ppgtt_allocate_oos_page(spt);
  1188. ret = ppgtt_set_guest_page_oos(spt);
  1189. if (ret < 0)
  1190. return ret;
  1191. }
  1192. return 0;
  1193. }
  1194. static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
  1195. {
  1196. struct intel_vgpu *vgpu = mm->vgpu;
  1197. struct intel_gvt *gvt = vgpu->gvt;
  1198. struct intel_gvt_gtt *gtt = &gvt->gtt;
  1199. struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
  1200. struct intel_gvt_gtt_entry se;
  1201. int index;
  1202. if (!mm->ppgtt_mm.shadowed)
  1203. return;
  1204. for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
  1205. ppgtt_get_shadow_root_entry(mm, &se, index);
  1206. if (!ops->test_present(&se))
  1207. continue;
  1208. ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
  1209. se.val64 = 0;
  1210. ppgtt_set_shadow_root_entry(mm, &se, index);
  1211. trace_spt_guest_change(vgpu->id, "destroy root pointer",
  1212. NULL, se.type, se.val64, index);
  1213. }
  1214. mm->ppgtt_mm.shadowed = false;
  1215. }
  1216. static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
  1217. {
  1218. struct intel_vgpu *vgpu = mm->vgpu;
  1219. struct intel_gvt *gvt = vgpu->gvt;
  1220. struct intel_gvt_gtt *gtt = &gvt->gtt;
  1221. struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
  1222. struct intel_vgpu_ppgtt_spt *spt;
  1223. struct intel_gvt_gtt_entry ge, se;
  1224. int index, ret;
  1225. if (mm->ppgtt_mm.shadowed)
  1226. return 0;
  1227. mm->ppgtt_mm.shadowed = true;
  1228. for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
  1229. ppgtt_get_guest_root_entry(mm, &ge, index);
  1230. if (!ops->test_present(&ge))
  1231. continue;
  1232. trace_spt_guest_change(vgpu->id, __func__, NULL,
  1233. ge.type, ge.val64, index);
  1234. spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
  1235. if (IS_ERR(spt)) {
  1236. gvt_vgpu_err("fail to populate guest root pointer\n");
  1237. ret = PTR_ERR(spt);
  1238. goto fail;
  1239. }
  1240. ppgtt_generate_shadow_entry(&se, spt, &ge);
  1241. ppgtt_set_shadow_root_entry(mm, &se, index);
  1242. trace_spt_guest_change(vgpu->id, "populate root pointer",
  1243. NULL, se.type, se.val64, index);
  1244. }
  1245. return 0;
  1246. fail:
  1247. invalidate_ppgtt_mm(mm);
  1248. return ret;
  1249. }
  1250. static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
  1251. {
  1252. struct intel_vgpu_mm *mm;
  1253. mm = kzalloc(sizeof(*mm), GFP_KERNEL);
  1254. if (!mm)
  1255. return NULL;
  1256. mm->vgpu = vgpu;
  1257. kref_init(&mm->ref);
  1258. atomic_set(&mm->pincount, 0);
  1259. return mm;
  1260. }
  1261. static void vgpu_free_mm(struct intel_vgpu_mm *mm)
  1262. {
  1263. kfree(mm);
  1264. }
  1265. /**
  1266. * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
  1267. * @vgpu: a vGPU
  1268. * @root_entry_type: ppgtt root entry type
  1269. * @pdps: guest pdps.
  1270. *
  1271. * This function is used to create a ppgtt mm object for a vGPU.
  1272. *
  1273. * Returns:
  1274. * Zero on success, negative error code in pointer if failed.
  1275. */
  1276. struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
  1277. intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
  1278. {
  1279. struct intel_gvt *gvt = vgpu->gvt;
  1280. struct intel_vgpu_mm *mm;
  1281. int ret;
  1282. mm = vgpu_alloc_mm(vgpu);
  1283. if (!mm)
  1284. return ERR_PTR(-ENOMEM);
  1285. mm->type = INTEL_GVT_MM_PPGTT;
  1286. GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
  1287. root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
  1288. mm->ppgtt_mm.root_entry_type = root_entry_type;
  1289. INIT_LIST_HEAD(&mm->ppgtt_mm.list);
  1290. INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
  1291. if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
  1292. mm->ppgtt_mm.guest_pdps[0] = pdps[0];
  1293. else
  1294. memcpy(mm->ppgtt_mm.guest_pdps, pdps,
  1295. sizeof(mm->ppgtt_mm.guest_pdps));
  1296. ret = shadow_ppgtt_mm(mm);
  1297. if (ret) {
  1298. gvt_vgpu_err("failed to shadow ppgtt mm\n");
  1299. vgpu_free_mm(mm);
  1300. return ERR_PTR(ret);
  1301. }
  1302. list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
  1303. list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
  1304. return mm;
  1305. }
  1306. static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
  1307. {
  1308. struct intel_vgpu_mm *mm;
  1309. unsigned long nr_entries;
  1310. mm = vgpu_alloc_mm(vgpu);
  1311. if (!mm)
  1312. return ERR_PTR(-ENOMEM);
  1313. mm->type = INTEL_GVT_MM_GGTT;
  1314. nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
  1315. mm->ggtt_mm.virtual_ggtt =
  1316. vzalloc(array_size(nr_entries,
  1317. vgpu->gvt->device_info.gtt_entry_size));
  1318. if (!mm->ggtt_mm.virtual_ggtt) {
  1319. vgpu_free_mm(mm);
  1320. return ERR_PTR(-ENOMEM);
  1321. }
  1322. mm->ggtt_mm.last_partial_off = -1UL;
  1323. return mm;
  1324. }
  1325. /**
  1326. * _intel_vgpu_mm_release - destroy a mm object
  1327. * @mm_ref: a kref object
  1328. *
  1329. * This function is used to destroy a mm object for vGPU
  1330. *
  1331. */
  1332. void _intel_vgpu_mm_release(struct kref *mm_ref)
  1333. {
  1334. struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
  1335. if (GEM_WARN_ON(atomic_read(&mm->pincount)))
  1336. gvt_err("vgpu mm pin count bug detected\n");
  1337. if (mm->type == INTEL_GVT_MM_PPGTT) {
  1338. list_del(&mm->ppgtt_mm.list);
  1339. list_del(&mm->ppgtt_mm.lru_list);
  1340. invalidate_ppgtt_mm(mm);
  1341. } else {
  1342. vfree(mm->ggtt_mm.virtual_ggtt);
  1343. mm->ggtt_mm.last_partial_off = -1UL;
  1344. }
  1345. vgpu_free_mm(mm);
  1346. }
  1347. /**
  1348. * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
  1349. * @mm: a vGPU mm object
  1350. *
  1351. * This function is called when user doesn't want to use a vGPU mm object
  1352. */
  1353. void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
  1354. {
  1355. atomic_dec(&mm->pincount);
  1356. }
  1357. /**
  1358. * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
  1359. * @vgpu: a vGPU
  1360. *
  1361. * This function is called when user wants to use a vGPU mm object. If this
  1362. * mm object hasn't been shadowed yet, the shadow will be populated at this
  1363. * time.
  1364. *
  1365. * Returns:
  1366. * Zero on success, negative error code if failed.
  1367. */
  1368. int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
  1369. {
  1370. int ret;
  1371. atomic_inc(&mm->pincount);
  1372. if (mm->type == INTEL_GVT_MM_PPGTT) {
  1373. ret = shadow_ppgtt_mm(mm);
  1374. if (ret)
  1375. return ret;
  1376. list_move_tail(&mm->ppgtt_mm.lru_list,
  1377. &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
  1378. }
  1379. return 0;
  1380. }
  1381. static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
  1382. {
  1383. struct intel_vgpu_mm *mm;
  1384. struct list_head *pos, *n;
  1385. list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
  1386. mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
  1387. if (atomic_read(&mm->pincount))
  1388. continue;
  1389. list_del_init(&mm->ppgtt_mm.lru_list);
  1390. invalidate_ppgtt_mm(mm);
  1391. return 1;
  1392. }
  1393. return 0;
  1394. }
  1395. /*
  1396. * GMA translation APIs.
  1397. */
  1398. static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
  1399. struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
  1400. {
  1401. struct intel_vgpu *vgpu = mm->vgpu;
  1402. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  1403. struct intel_vgpu_ppgtt_spt *s;
  1404. s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
  1405. if (!s)
  1406. return -ENXIO;
  1407. if (!guest)
  1408. ppgtt_get_shadow_entry(s, e, index);
  1409. else
  1410. ppgtt_get_guest_entry(s, e, index);
  1411. return 0;
  1412. }
  1413. /**
  1414. * intel_vgpu_gma_to_gpa - translate a gma to GPA
  1415. * @mm: mm object. could be a PPGTT or GGTT mm object
  1416. * @gma: graphics memory address in this mm object
  1417. *
  1418. * This function is used to translate a graphics memory address in specific
  1419. * graphics memory space to guest physical address.
  1420. *
  1421. * Returns:
  1422. * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
  1423. */
  1424. unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
  1425. {
  1426. struct intel_vgpu *vgpu = mm->vgpu;
  1427. struct intel_gvt *gvt = vgpu->gvt;
  1428. struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
  1429. struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
  1430. unsigned long gpa = INTEL_GVT_INVALID_ADDR;
  1431. unsigned long gma_index[4];
  1432. struct intel_gvt_gtt_entry e;
  1433. int i, levels = 0;
  1434. int ret;
  1435. GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
  1436. mm->type != INTEL_GVT_MM_PPGTT);
  1437. if (mm->type == INTEL_GVT_MM_GGTT) {
  1438. if (!vgpu_gmadr_is_valid(vgpu, gma))
  1439. goto err;
  1440. ggtt_get_guest_entry(mm, &e,
  1441. gma_ops->gma_to_ggtt_pte_index(gma));
  1442. gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
  1443. + (gma & ~I915_GTT_PAGE_MASK);
  1444. trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
  1445. } else {
  1446. switch (mm->ppgtt_mm.root_entry_type) {
  1447. case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
  1448. ppgtt_get_shadow_root_entry(mm, &e, 0);
  1449. gma_index[0] = gma_ops->gma_to_pml4_index(gma);
  1450. gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
  1451. gma_index[2] = gma_ops->gma_to_pde_index(gma);
  1452. gma_index[3] = gma_ops->gma_to_pte_index(gma);
  1453. levels = 4;
  1454. break;
  1455. case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
  1456. ppgtt_get_shadow_root_entry(mm, &e,
  1457. gma_ops->gma_to_l3_pdp_index(gma));
  1458. gma_index[0] = gma_ops->gma_to_pde_index(gma);
  1459. gma_index[1] = gma_ops->gma_to_pte_index(gma);
  1460. levels = 2;
  1461. break;
  1462. default:
  1463. GEM_BUG_ON(1);
  1464. }
  1465. /* walk the shadow page table and get gpa from guest entry */
  1466. for (i = 0; i < levels; i++) {
  1467. ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
  1468. (i == levels - 1));
  1469. if (ret)
  1470. goto err;
  1471. if (!pte_ops->test_present(&e)) {
  1472. gvt_dbg_core("GMA 0x%lx is not present\n", gma);
  1473. goto err;
  1474. }
  1475. }
  1476. gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
  1477. (gma & ~I915_GTT_PAGE_MASK);
  1478. trace_gma_translate(vgpu->id, "ppgtt", 0,
  1479. mm->ppgtt_mm.root_entry_type, gma, gpa);
  1480. }
  1481. return gpa;
  1482. err:
  1483. gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
  1484. return INTEL_GVT_INVALID_ADDR;
  1485. }
  1486. static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
  1487. unsigned int off, void *p_data, unsigned int bytes)
  1488. {
  1489. struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
  1490. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  1491. unsigned long index = off >> info->gtt_entry_size_shift;
  1492. struct intel_gvt_gtt_entry e;
  1493. if (bytes != 4 && bytes != 8)
  1494. return -EINVAL;
  1495. ggtt_get_guest_entry(ggtt_mm, &e, index);
  1496. memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
  1497. bytes);
  1498. return 0;
  1499. }
  1500. /**
  1501. * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read
  1502. * @vgpu: a vGPU
  1503. * @off: register offset
  1504. * @p_data: data will be returned to guest
  1505. * @bytes: data length
  1506. *
  1507. * This function is used to emulate the GTT MMIO register read
  1508. *
  1509. * Returns:
  1510. * Zero on success, error code if failed.
  1511. */
  1512. int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
  1513. void *p_data, unsigned int bytes)
  1514. {
  1515. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  1516. int ret;
  1517. if (bytes != 4 && bytes != 8)
  1518. return -EINVAL;
  1519. off -= info->gtt_start_offset;
  1520. ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
  1521. return ret;
  1522. }
  1523. static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
  1524. struct intel_gvt_gtt_entry *entry)
  1525. {
  1526. struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
  1527. unsigned long pfn;
  1528. pfn = pte_ops->get_pfn(entry);
  1529. if (pfn != vgpu->gvt->gtt.scratch_mfn)
  1530. intel_gvt_hypervisor_dma_unmap_guest_page(vgpu,
  1531. pfn << PAGE_SHIFT);
  1532. }
  1533. static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
  1534. void *p_data, unsigned int bytes)
  1535. {
  1536. struct intel_gvt *gvt = vgpu->gvt;
  1537. const struct intel_gvt_device_info *info = &gvt->device_info;
  1538. struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
  1539. struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
  1540. unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
  1541. unsigned long gma, gfn;
  1542. struct intel_gvt_gtt_entry e, m;
  1543. dma_addr_t dma_addr;
  1544. int ret;
  1545. if (bytes != 4 && bytes != 8)
  1546. return -EINVAL;
  1547. gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
  1548. /* the VM may configure the whole GM space when ballooning is used */
  1549. if (!vgpu_gmadr_is_valid(vgpu, gma))
  1550. return 0;
  1551. ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
  1552. memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
  1553. bytes);
  1554. /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
  1555. * write, we assume the two 4 bytes writes are consecutive.
  1556. * Otherwise, we abort and report error
  1557. */
  1558. if (bytes < info->gtt_entry_size) {
  1559. if (ggtt_mm->ggtt_mm.last_partial_off == -1UL) {
  1560. /* the first partial part*/
  1561. ggtt_mm->ggtt_mm.last_partial_off = off;
  1562. ggtt_mm->ggtt_mm.last_partial_data = e.val64;
  1563. return 0;
  1564. } else if ((g_gtt_index ==
  1565. (ggtt_mm->ggtt_mm.last_partial_off >>
  1566. info->gtt_entry_size_shift)) &&
  1567. (off != ggtt_mm->ggtt_mm.last_partial_off)) {
  1568. /* the second partial part */
  1569. int last_off = ggtt_mm->ggtt_mm.last_partial_off &
  1570. (info->gtt_entry_size - 1);
  1571. memcpy((void *)&e.val64 + last_off,
  1572. (void *)&ggtt_mm->ggtt_mm.last_partial_data +
  1573. last_off, bytes);
  1574. ggtt_mm->ggtt_mm.last_partial_off = -1UL;
  1575. } else {
  1576. int last_offset;
  1577. gvt_vgpu_err("failed to populate guest ggtt entry: abnormal ggtt entry write sequence, last_partial_off=%lx, offset=%x, bytes=%d, ggtt entry size=%d\n",
  1578. ggtt_mm->ggtt_mm.last_partial_off, off,
  1579. bytes, info->gtt_entry_size);
  1580. /* set host ggtt entry to scratch page and clear
  1581. * virtual ggtt entry as not present for last
  1582. * partially write offset
  1583. */
  1584. last_offset = ggtt_mm->ggtt_mm.last_partial_off &
  1585. (~(info->gtt_entry_size - 1));
  1586. ggtt_get_host_entry(ggtt_mm, &m, last_offset);
  1587. ggtt_invalidate_pte(vgpu, &m);
  1588. ops->set_pfn(&m, gvt->gtt.scratch_mfn);
  1589. ops->clear_present(&m);
  1590. ggtt_set_host_entry(ggtt_mm, &m, last_offset);
  1591. ggtt_invalidate(gvt->dev_priv);
  1592. ggtt_get_guest_entry(ggtt_mm, &e, last_offset);
  1593. ops->clear_present(&e);
  1594. ggtt_set_guest_entry(ggtt_mm, &e, last_offset);
  1595. ggtt_mm->ggtt_mm.last_partial_off = off;
  1596. ggtt_mm->ggtt_mm.last_partial_data = e.val64;
  1597. return 0;
  1598. }
  1599. }
  1600. if (ops->test_present(&e)) {
  1601. gfn = ops->get_pfn(&e);
  1602. m = e;
  1603. /* one PTE update may be issued in multiple writes and the
  1604. * first write may not construct a valid gfn
  1605. */
  1606. if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
  1607. ops->set_pfn(&m, gvt->gtt.scratch_mfn);
  1608. goto out;
  1609. }
  1610. ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn,
  1611. &dma_addr);
  1612. if (ret) {
  1613. gvt_vgpu_err("fail to populate guest ggtt entry\n");
  1614. /* guest driver may read/write the entry when partial
  1615. * update the entry in this situation p2m will fail
  1616. * settting the shadow entry to point to a scratch page
  1617. */
  1618. ops->set_pfn(&m, gvt->gtt.scratch_mfn);
  1619. } else
  1620. ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
  1621. } else {
  1622. ggtt_get_host_entry(ggtt_mm, &m, g_gtt_index);
  1623. ggtt_invalidate_pte(vgpu, &m);
  1624. ops->set_pfn(&m, gvt->gtt.scratch_mfn);
  1625. ops->clear_present(&m);
  1626. }
  1627. out:
  1628. ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
  1629. ggtt_invalidate(gvt->dev_priv);
  1630. ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
  1631. return 0;
  1632. }
  1633. /*
  1634. * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
  1635. * @vgpu: a vGPU
  1636. * @off: register offset
  1637. * @p_data: data from guest write
  1638. * @bytes: data length
  1639. *
  1640. * This function is used to emulate the GTT MMIO register write
  1641. *
  1642. * Returns:
  1643. * Zero on success, error code if failed.
  1644. */
  1645. int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
  1646. unsigned int off, void *p_data, unsigned int bytes)
  1647. {
  1648. const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
  1649. int ret;
  1650. if (bytes != 4 && bytes != 8)
  1651. return -EINVAL;
  1652. off -= info->gtt_start_offset;
  1653. ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
  1654. return ret;
  1655. }
  1656. static int alloc_scratch_pages(struct intel_vgpu *vgpu,
  1657. intel_gvt_gtt_type_t type)
  1658. {
  1659. struct intel_vgpu_gtt *gtt = &vgpu->gtt;
  1660. struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
  1661. int page_entry_num = I915_GTT_PAGE_SIZE >>
  1662. vgpu->gvt->device_info.gtt_entry_size_shift;
  1663. void *scratch_pt;
  1664. int i;
  1665. struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
  1666. dma_addr_t daddr;
  1667. if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
  1668. return -EINVAL;
  1669. scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
  1670. if (!scratch_pt) {
  1671. gvt_vgpu_err("fail to allocate scratch page\n");
  1672. return -ENOMEM;
  1673. }
  1674. daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
  1675. 4096, PCI_DMA_BIDIRECTIONAL);
  1676. if (dma_mapping_error(dev, daddr)) {
  1677. gvt_vgpu_err("fail to dmamap scratch_pt\n");
  1678. __free_page(virt_to_page(scratch_pt));
  1679. return -ENOMEM;
  1680. }
  1681. gtt->scratch_pt[type].page_mfn =
  1682. (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
  1683. gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
  1684. gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
  1685. vgpu->id, type, gtt->scratch_pt[type].page_mfn);
  1686. /* Build the tree by full filled the scratch pt with the entries which
  1687. * point to the next level scratch pt or scratch page. The
  1688. * scratch_pt[type] indicate the scratch pt/scratch page used by the
  1689. * 'type' pt.
  1690. * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
  1691. * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
  1692. * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
  1693. */
  1694. if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) {
  1695. struct intel_gvt_gtt_entry se;
  1696. memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
  1697. se.type = get_entry_type(type - 1);
  1698. ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
  1699. /* The entry parameters like present/writeable/cache type
  1700. * set to the same as i915's scratch page tree.
  1701. */
  1702. se.val64 |= _PAGE_PRESENT | _PAGE_RW;
  1703. if (type == GTT_TYPE_PPGTT_PDE_PT)
  1704. se.val64 |= PPAT_CACHED;
  1705. for (i = 0; i < page_entry_num; i++)
  1706. ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
  1707. }
  1708. return 0;
  1709. }
  1710. static int release_scratch_page_tree(struct intel_vgpu *vgpu)
  1711. {
  1712. int i;
  1713. struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
  1714. dma_addr_t daddr;
  1715. for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
  1716. if (vgpu->gtt.scratch_pt[i].page != NULL) {
  1717. daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
  1718. I915_GTT_PAGE_SHIFT);
  1719. dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
  1720. __free_page(vgpu->gtt.scratch_pt[i].page);
  1721. vgpu->gtt.scratch_pt[i].page = NULL;
  1722. vgpu->gtt.scratch_pt[i].page_mfn = 0;
  1723. }
  1724. }
  1725. return 0;
  1726. }
  1727. static int create_scratch_page_tree(struct intel_vgpu *vgpu)
  1728. {
  1729. int i, ret;
  1730. for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
  1731. ret = alloc_scratch_pages(vgpu, i);
  1732. if (ret)
  1733. goto err;
  1734. }
  1735. return 0;
  1736. err:
  1737. release_scratch_page_tree(vgpu);
  1738. return ret;
  1739. }
  1740. /**
  1741. * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
  1742. * @vgpu: a vGPU
  1743. *
  1744. * This function is used to initialize per-vGPU graphics memory virtualization
  1745. * components.
  1746. *
  1747. * Returns:
  1748. * Zero on success, error code if failed.
  1749. */
  1750. int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
  1751. {
  1752. struct intel_vgpu_gtt *gtt = &vgpu->gtt;
  1753. INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
  1754. INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
  1755. INIT_LIST_HEAD(&gtt->oos_page_list_head);
  1756. INIT_LIST_HEAD(&gtt->post_shadow_list_head);
  1757. gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
  1758. if (IS_ERR(gtt->ggtt_mm)) {
  1759. gvt_vgpu_err("fail to create mm for ggtt.\n");
  1760. return PTR_ERR(gtt->ggtt_mm);
  1761. }
  1762. intel_vgpu_reset_ggtt(vgpu, false);
  1763. return create_scratch_page_tree(vgpu);
  1764. }
  1765. static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
  1766. {
  1767. struct list_head *pos, *n;
  1768. struct intel_vgpu_mm *mm;
  1769. list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
  1770. mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
  1771. intel_vgpu_destroy_mm(mm);
  1772. }
  1773. if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
  1774. gvt_err("vgpu ppgtt mm is not fully destroyed\n");
  1775. if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
  1776. gvt_err("Why we still has spt not freed?\n");
  1777. ppgtt_free_all_spt(vgpu);
  1778. }
  1779. }
  1780. static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
  1781. {
  1782. intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
  1783. vgpu->gtt.ggtt_mm = NULL;
  1784. }
  1785. /**
  1786. * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
  1787. * @vgpu: a vGPU
  1788. *
  1789. * This function is used to clean up per-vGPU graphics memory virtualization
  1790. * components.
  1791. *
  1792. * Returns:
  1793. * Zero on success, error code if failed.
  1794. */
  1795. void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
  1796. {
  1797. intel_vgpu_destroy_all_ppgtt_mm(vgpu);
  1798. intel_vgpu_destroy_ggtt_mm(vgpu);
  1799. release_scratch_page_tree(vgpu);
  1800. }
  1801. static void clean_spt_oos(struct intel_gvt *gvt)
  1802. {
  1803. struct intel_gvt_gtt *gtt = &gvt->gtt;
  1804. struct list_head *pos, *n;
  1805. struct intel_vgpu_oos_page *oos_page;
  1806. WARN(!list_empty(&gtt->oos_page_use_list_head),
  1807. "someone is still using oos page\n");
  1808. list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
  1809. oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
  1810. list_del(&oos_page->list);
  1811. kfree(oos_page);
  1812. }
  1813. }
  1814. static int setup_spt_oos(struct intel_gvt *gvt)
  1815. {
  1816. struct intel_gvt_gtt *gtt = &gvt->gtt;
  1817. struct intel_vgpu_oos_page *oos_page;
  1818. int i;
  1819. int ret;
  1820. INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
  1821. INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
  1822. for (i = 0; i < preallocated_oos_pages; i++) {
  1823. oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
  1824. if (!oos_page) {
  1825. ret = -ENOMEM;
  1826. goto fail;
  1827. }
  1828. INIT_LIST_HEAD(&oos_page->list);
  1829. INIT_LIST_HEAD(&oos_page->vm_list);
  1830. oos_page->id = i;
  1831. list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
  1832. }
  1833. gvt_dbg_mm("%d oos pages preallocated\n", i);
  1834. return 0;
  1835. fail:
  1836. clean_spt_oos(gvt);
  1837. return ret;
  1838. }
  1839. /**
  1840. * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
  1841. * @vgpu: a vGPU
  1842. * @page_table_level: PPGTT page table level
  1843. * @root_entry: PPGTT page table root pointers
  1844. *
  1845. * This function is used to find a PPGTT mm object from mm object pool
  1846. *
  1847. * Returns:
  1848. * pointer to mm object on success, NULL if failed.
  1849. */
  1850. struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
  1851. u64 pdps[])
  1852. {
  1853. struct intel_vgpu_mm *mm;
  1854. struct list_head *pos;
  1855. list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
  1856. mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
  1857. switch (mm->ppgtt_mm.root_entry_type) {
  1858. case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
  1859. if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
  1860. return mm;
  1861. break;
  1862. case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
  1863. if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
  1864. sizeof(mm->ppgtt_mm.guest_pdps)))
  1865. return mm;
  1866. break;
  1867. default:
  1868. GEM_BUG_ON(1);
  1869. }
  1870. }
  1871. return NULL;
  1872. }
  1873. /**
  1874. * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
  1875. * @vgpu: a vGPU
  1876. * @root_entry_type: ppgtt root entry type
  1877. * @pdps: guest pdps
  1878. *
  1879. * This function is used to find or create a PPGTT mm object from a guest.
  1880. *
  1881. * Returns:
  1882. * Zero on success, negative error code if failed.
  1883. */
  1884. struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
  1885. intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
  1886. {
  1887. struct intel_vgpu_mm *mm;
  1888. mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
  1889. if (mm) {
  1890. intel_vgpu_mm_get(mm);
  1891. } else {
  1892. mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
  1893. if (IS_ERR(mm))
  1894. gvt_vgpu_err("fail to create mm\n");
  1895. }
  1896. return mm;
  1897. }
  1898. /**
  1899. * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
  1900. * @vgpu: a vGPU
  1901. * @pdps: guest pdps
  1902. *
  1903. * This function is used to find a PPGTT mm object from a guest and destroy it.
  1904. *
  1905. * Returns:
  1906. * Zero on success, negative error code if failed.
  1907. */
  1908. int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
  1909. {
  1910. struct intel_vgpu_mm *mm;
  1911. mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
  1912. if (!mm) {
  1913. gvt_vgpu_err("fail to find ppgtt instance.\n");
  1914. return -EINVAL;
  1915. }
  1916. intel_vgpu_mm_put(mm);
  1917. return 0;
  1918. }
  1919. /**
  1920. * intel_gvt_init_gtt - initialize mm components of a GVT device
  1921. * @gvt: GVT device
  1922. *
  1923. * This function is called at the initialization stage, to initialize
  1924. * the mm components of a GVT device.
  1925. *
  1926. * Returns:
  1927. * zero on success, negative error code if failed.
  1928. */
  1929. int intel_gvt_init_gtt(struct intel_gvt *gvt)
  1930. {
  1931. int ret;
  1932. void *page;
  1933. struct device *dev = &gvt->dev_priv->drm.pdev->dev;
  1934. dma_addr_t daddr;
  1935. gvt_dbg_core("init gtt\n");
  1936. if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
  1937. || IS_KABYLAKE(gvt->dev_priv)) {
  1938. gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
  1939. gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
  1940. } else {
  1941. return -ENODEV;
  1942. }
  1943. page = (void *)get_zeroed_page(GFP_KERNEL);
  1944. if (!page) {
  1945. gvt_err("fail to allocate scratch ggtt page\n");
  1946. return -ENOMEM;
  1947. }
  1948. daddr = dma_map_page(dev, virt_to_page(page), 0,
  1949. 4096, PCI_DMA_BIDIRECTIONAL);
  1950. if (dma_mapping_error(dev, daddr)) {
  1951. gvt_err("fail to dmamap scratch ggtt page\n");
  1952. __free_page(virt_to_page(page));
  1953. return -ENOMEM;
  1954. }
  1955. gvt->gtt.scratch_page = virt_to_page(page);
  1956. gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
  1957. if (enable_out_of_sync) {
  1958. ret = setup_spt_oos(gvt);
  1959. if (ret) {
  1960. gvt_err("fail to initialize SPT oos\n");
  1961. dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
  1962. __free_page(gvt->gtt.scratch_page);
  1963. return ret;
  1964. }
  1965. }
  1966. INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
  1967. return 0;
  1968. }
  1969. /**
  1970. * intel_gvt_clean_gtt - clean up mm components of a GVT device
  1971. * @gvt: GVT device
  1972. *
  1973. * This function is called at the driver unloading stage, to clean up the
  1974. * the mm components of a GVT device.
  1975. *
  1976. */
  1977. void intel_gvt_clean_gtt(struct intel_gvt *gvt)
  1978. {
  1979. struct device *dev = &gvt->dev_priv->drm.pdev->dev;
  1980. dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
  1981. I915_GTT_PAGE_SHIFT);
  1982. dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
  1983. __free_page(gvt->gtt.scratch_page);
  1984. if (enable_out_of_sync)
  1985. clean_spt_oos(gvt);
  1986. }
  1987. /**
  1988. * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
  1989. * @vgpu: a vGPU
  1990. *
  1991. * This function is called when invalidate all PPGTT instances of a vGPU.
  1992. *
  1993. */
  1994. void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
  1995. {
  1996. struct list_head *pos, *n;
  1997. struct intel_vgpu_mm *mm;
  1998. list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
  1999. mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
  2000. if (mm->type == INTEL_GVT_MM_PPGTT) {
  2001. list_del_init(&mm->ppgtt_mm.lru_list);
  2002. if (mm->ppgtt_mm.shadowed)
  2003. invalidate_ppgtt_mm(mm);
  2004. }
  2005. }
  2006. }
  2007. /**
  2008. * intel_vgpu_reset_ggtt - reset the GGTT entry
  2009. * @vgpu: a vGPU
  2010. * @invalidate_old: invalidate old entries
  2011. *
  2012. * This function is called at the vGPU create stage
  2013. * to reset all the GGTT entries.
  2014. *
  2015. */
  2016. void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
  2017. {
  2018. struct intel_gvt *gvt = vgpu->gvt;
  2019. struct drm_i915_private *dev_priv = gvt->dev_priv;
  2020. struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
  2021. struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
  2022. struct intel_gvt_gtt_entry old_entry;
  2023. u32 index;
  2024. u32 num_entries;
  2025. pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
  2026. pte_ops->set_present(&entry);
  2027. index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
  2028. num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
  2029. while (num_entries--) {
  2030. if (invalidate_old) {
  2031. ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
  2032. ggtt_invalidate_pte(vgpu, &old_entry);
  2033. }
  2034. ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
  2035. }
  2036. index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
  2037. num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
  2038. while (num_entries--) {
  2039. if (invalidate_old) {
  2040. ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
  2041. ggtt_invalidate_pte(vgpu, &old_entry);
  2042. }
  2043. ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
  2044. }
  2045. ggtt_invalidate(dev_priv);
  2046. }
  2047. /**
  2048. * intel_vgpu_reset_gtt - reset the all GTT related status
  2049. * @vgpu: a vGPU
  2050. *
  2051. * This function is called from vfio core to reset reset all
  2052. * GTT related status, including GGTT, PPGTT, scratch page.
  2053. *
  2054. */
  2055. void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
  2056. {
  2057. /* Shadow pages are only created when there is no page
  2058. * table tracking data, so remove page tracking data after
  2059. * removing the shadow pages.
  2060. */
  2061. intel_vgpu_destroy_all_ppgtt_mm(vgpu);
  2062. intel_vgpu_reset_ggtt(vgpu, true);
  2063. }