x86.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888
  1. /*
  2. * tools/testing/selftests/kvm/lib/x86.c
  3. *
  4. * Copyright (C) 2018, Google LLC.
  5. *
  6. * This work is licensed under the terms of the GNU GPL, version 2.
  7. */
  8. #define _GNU_SOURCE /* for program_invocation_name */
  9. #include "test_util.h"
  10. #include "kvm_util.h"
  11. #include "kvm_util_internal.h"
  12. #include "x86.h"
  13. /* Minimum physical address used for virtual translation tables. */
  14. #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
  15. /* Virtual translation table structure declarations */
  16. struct pageMapL4Entry {
  17. uint64_t present:1;
  18. uint64_t writable:1;
  19. uint64_t user:1;
  20. uint64_t write_through:1;
  21. uint64_t cache_disable:1;
  22. uint64_t accessed:1;
  23. uint64_t ignored_06:1;
  24. uint64_t page_size:1;
  25. uint64_t ignored_11_08:4;
  26. uint64_t address:40;
  27. uint64_t ignored_62_52:11;
  28. uint64_t execute_disable:1;
  29. };
  30. struct pageDirectoryPointerEntry {
  31. uint64_t present:1;
  32. uint64_t writable:1;
  33. uint64_t user:1;
  34. uint64_t write_through:1;
  35. uint64_t cache_disable:1;
  36. uint64_t accessed:1;
  37. uint64_t ignored_06:1;
  38. uint64_t page_size:1;
  39. uint64_t ignored_11_08:4;
  40. uint64_t address:40;
  41. uint64_t ignored_62_52:11;
  42. uint64_t execute_disable:1;
  43. };
  44. struct pageDirectoryEntry {
  45. uint64_t present:1;
  46. uint64_t writable:1;
  47. uint64_t user:1;
  48. uint64_t write_through:1;
  49. uint64_t cache_disable:1;
  50. uint64_t accessed:1;
  51. uint64_t ignored_06:1;
  52. uint64_t page_size:1;
  53. uint64_t ignored_11_08:4;
  54. uint64_t address:40;
  55. uint64_t ignored_62_52:11;
  56. uint64_t execute_disable:1;
  57. };
  58. struct pageTableEntry {
  59. uint64_t present:1;
  60. uint64_t writable:1;
  61. uint64_t user:1;
  62. uint64_t write_through:1;
  63. uint64_t cache_disable:1;
  64. uint64_t accessed:1;
  65. uint64_t dirty:1;
  66. uint64_t reserved_07:1;
  67. uint64_t global:1;
  68. uint64_t ignored_11_09:3;
  69. uint64_t address:40;
  70. uint64_t ignored_62_52:11;
  71. uint64_t execute_disable:1;
  72. };
  73. /* Register Dump
  74. *
  75. * Input Args:
  76. * indent - Left margin indent amount
  77. * regs - register
  78. *
  79. * Output Args:
  80. * stream - Output FILE stream
  81. *
  82. * Return: None
  83. *
  84. * Dumps the state of the registers given by regs, to the FILE stream
  85. * given by steam.
  86. */
  87. void regs_dump(FILE *stream, struct kvm_regs *regs,
  88. uint8_t indent)
  89. {
  90. fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
  91. "rcx: 0x%.16llx rdx: 0x%.16llx\n",
  92. indent, "",
  93. regs->rax, regs->rbx, regs->rcx, regs->rdx);
  94. fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
  95. "rsp: 0x%.16llx rbp: 0x%.16llx\n",
  96. indent, "",
  97. regs->rsi, regs->rdi, regs->rsp, regs->rbp);
  98. fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "
  99. "r10: 0x%.16llx r11: 0x%.16llx\n",
  100. indent, "",
  101. regs->r8, regs->r9, regs->r10, regs->r11);
  102. fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
  103. "r14: 0x%.16llx r15: 0x%.16llx\n",
  104. indent, "",
  105. regs->r12, regs->r13, regs->r14, regs->r15);
  106. fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
  107. indent, "",
  108. regs->rip, regs->rflags);
  109. }
  110. /* Segment Dump
  111. *
  112. * Input Args:
  113. * indent - Left margin indent amount
  114. * segment - KVM segment
  115. *
  116. * Output Args:
  117. * stream - Output FILE stream
  118. *
  119. * Return: None
  120. *
  121. * Dumps the state of the KVM segment given by segment, to the FILE stream
  122. * given by steam.
  123. */
  124. static void segment_dump(FILE *stream, struct kvm_segment *segment,
  125. uint8_t indent)
  126. {
  127. fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
  128. "selector: 0x%.4x type: 0x%.2x\n",
  129. indent, "", segment->base, segment->limit,
  130. segment->selector, segment->type);
  131. fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
  132. "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
  133. indent, "", segment->present, segment->dpl,
  134. segment->db, segment->s, segment->l);
  135. fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
  136. "unusable: 0x%.2x padding: 0x%.2x\n",
  137. indent, "", segment->g, segment->avl,
  138. segment->unusable, segment->padding);
  139. }
  140. /* dtable Dump
  141. *
  142. * Input Args:
  143. * indent - Left margin indent amount
  144. * dtable - KVM dtable
  145. *
  146. * Output Args:
  147. * stream - Output FILE stream
  148. *
  149. * Return: None
  150. *
  151. * Dumps the state of the KVM dtable given by dtable, to the FILE stream
  152. * given by steam.
  153. */
  154. static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
  155. uint8_t indent)
  156. {
  157. fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
  158. "padding: 0x%.4x 0x%.4x 0x%.4x\n",
  159. indent, "", dtable->base, dtable->limit,
  160. dtable->padding[0], dtable->padding[1], dtable->padding[2]);
  161. }
  162. /* System Register Dump
  163. *
  164. * Input Args:
  165. * indent - Left margin indent amount
  166. * sregs - System registers
  167. *
  168. * Output Args:
  169. * stream - Output FILE stream
  170. *
  171. * Return: None
  172. *
  173. * Dumps the state of the system registers given by sregs, to the FILE stream
  174. * given by steam.
  175. */
  176. void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
  177. uint8_t indent)
  178. {
  179. unsigned int i;
  180. fprintf(stream, "%*scs:\n", indent, "");
  181. segment_dump(stream, &sregs->cs, indent + 2);
  182. fprintf(stream, "%*sds:\n", indent, "");
  183. segment_dump(stream, &sregs->ds, indent + 2);
  184. fprintf(stream, "%*ses:\n", indent, "");
  185. segment_dump(stream, &sregs->es, indent + 2);
  186. fprintf(stream, "%*sfs:\n", indent, "");
  187. segment_dump(stream, &sregs->fs, indent + 2);
  188. fprintf(stream, "%*sgs:\n", indent, "");
  189. segment_dump(stream, &sregs->gs, indent + 2);
  190. fprintf(stream, "%*sss:\n", indent, "");
  191. segment_dump(stream, &sregs->ss, indent + 2);
  192. fprintf(stream, "%*str:\n", indent, "");
  193. segment_dump(stream, &sregs->tr, indent + 2);
  194. fprintf(stream, "%*sldt:\n", indent, "");
  195. segment_dump(stream, &sregs->ldt, indent + 2);
  196. fprintf(stream, "%*sgdt:\n", indent, "");
  197. dtable_dump(stream, &sregs->gdt, indent + 2);
  198. fprintf(stream, "%*sidt:\n", indent, "");
  199. dtable_dump(stream, &sregs->idt, indent + 2);
  200. fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
  201. "cr3: 0x%.16llx cr4: 0x%.16llx\n",
  202. indent, "",
  203. sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
  204. fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
  205. "apic_base: 0x%.16llx\n",
  206. indent, "",
  207. sregs->cr8, sregs->efer, sregs->apic_base);
  208. fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
  209. for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
  210. fprintf(stream, "%*s%.16llx\n", indent + 2, "",
  211. sregs->interrupt_bitmap[i]);
  212. }
  213. }
  214. void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
  215. {
  216. int rc;
  217. TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
  218. "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
  219. /* If needed, create page map l4 table. */
  220. if (!vm->pgd_created) {
  221. vm_paddr_t paddr = vm_phy_page_alloc(vm,
  222. KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
  223. vm->pgd = paddr;
  224. vm->pgd_created = true;
  225. }
  226. }
  227. /* VM Virtual Page Map
  228. *
  229. * Input Args:
  230. * vm - Virtual Machine
  231. * vaddr - VM Virtual Address
  232. * paddr - VM Physical Address
  233. * pgd_memslot - Memory region slot for new virtual translation tables
  234. *
  235. * Output Args: None
  236. *
  237. * Return: None
  238. *
  239. * Within the VM given by vm, creates a virtual translation for the page
  240. * starting at vaddr to the page starting at paddr.
  241. */
  242. void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
  243. uint32_t pgd_memslot)
  244. {
  245. uint16_t index[4];
  246. struct pageMapL4Entry *pml4e;
  247. TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
  248. "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
  249. TEST_ASSERT((vaddr % vm->page_size) == 0,
  250. "Virtual address not on page boundary,\n"
  251. " vaddr: 0x%lx vm->page_size: 0x%x",
  252. vaddr, vm->page_size);
  253. TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
  254. (vaddr >> vm->page_shift)),
  255. "Invalid virtual address, vaddr: 0x%lx",
  256. vaddr);
  257. TEST_ASSERT((paddr % vm->page_size) == 0,
  258. "Physical address not on page boundary,\n"
  259. " paddr: 0x%lx vm->page_size: 0x%x",
  260. paddr, vm->page_size);
  261. TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
  262. "Physical address beyond beyond maximum supported,\n"
  263. " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
  264. paddr, vm->max_gfn, vm->page_size);
  265. index[0] = (vaddr >> 12) & 0x1ffu;
  266. index[1] = (vaddr >> 21) & 0x1ffu;
  267. index[2] = (vaddr >> 30) & 0x1ffu;
  268. index[3] = (vaddr >> 39) & 0x1ffu;
  269. /* Allocate page directory pointer table if not present. */
  270. pml4e = addr_gpa2hva(vm, vm->pgd);
  271. if (!pml4e[index[3]].present) {
  272. pml4e[index[3]].address = vm_phy_page_alloc(vm,
  273. KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
  274. >> vm->page_shift;
  275. pml4e[index[3]].writable = true;
  276. pml4e[index[3]].present = true;
  277. }
  278. /* Allocate page directory table if not present. */
  279. struct pageDirectoryPointerEntry *pdpe;
  280. pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
  281. if (!pdpe[index[2]].present) {
  282. pdpe[index[2]].address = vm_phy_page_alloc(vm,
  283. KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
  284. >> vm->page_shift;
  285. pdpe[index[2]].writable = true;
  286. pdpe[index[2]].present = true;
  287. }
  288. /* Allocate page table if not present. */
  289. struct pageDirectoryEntry *pde;
  290. pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
  291. if (!pde[index[1]].present) {
  292. pde[index[1]].address = vm_phy_page_alloc(vm,
  293. KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
  294. >> vm->page_shift;
  295. pde[index[1]].writable = true;
  296. pde[index[1]].present = true;
  297. }
  298. /* Fill in page table entry. */
  299. struct pageTableEntry *pte;
  300. pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
  301. pte[index[0]].address = paddr >> vm->page_shift;
  302. pte[index[0]].writable = true;
  303. pte[index[0]].present = 1;
  304. }
  305. /* Virtual Translation Tables Dump
  306. *
  307. * Input Args:
  308. * vm - Virtual Machine
  309. * indent - Left margin indent amount
  310. *
  311. * Output Args:
  312. * stream - Output FILE stream
  313. *
  314. * Return: None
  315. *
  316. * Dumps to the FILE stream given by stream, the contents of all the
  317. * virtual translation tables for the VM given by vm.
  318. */
  319. void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
  320. {
  321. struct pageMapL4Entry *pml4e, *pml4e_start;
  322. struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
  323. struct pageDirectoryEntry *pde, *pde_start;
  324. struct pageTableEntry *pte, *pte_start;
  325. if (!vm->pgd_created)
  326. return;
  327. fprintf(stream, "%*s "
  328. " no\n", indent, "");
  329. fprintf(stream, "%*s index hvaddr gpaddr "
  330. "addr w exec dirty\n",
  331. indent, "");
  332. pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
  333. vm->pgd);
  334. for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
  335. pml4e = &pml4e_start[n1];
  336. if (!pml4e->present)
  337. continue;
  338. fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
  339. " %u\n",
  340. indent, "",
  341. pml4e - pml4e_start, pml4e,
  342. addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
  343. pml4e->writable, pml4e->execute_disable);
  344. pdpe_start = addr_gpa2hva(vm, pml4e->address
  345. * vm->page_size);
  346. for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
  347. pdpe = &pdpe_start[n2];
  348. if (!pdpe->present)
  349. continue;
  350. fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
  351. "%u %u\n",
  352. indent, "",
  353. pdpe - pdpe_start, pdpe,
  354. addr_hva2gpa(vm, pdpe),
  355. (uint64_t) pdpe->address, pdpe->writable,
  356. pdpe->execute_disable);
  357. pde_start = addr_gpa2hva(vm,
  358. pdpe->address * vm->page_size);
  359. for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
  360. pde = &pde_start[n3];
  361. if (!pde->present)
  362. continue;
  363. fprintf(stream, "%*spde 0x%-3zx %p "
  364. "0x%-12lx 0x%-10lx %u %u\n",
  365. indent, "", pde - pde_start, pde,
  366. addr_hva2gpa(vm, pde),
  367. (uint64_t) pde->address, pde->writable,
  368. pde->execute_disable);
  369. pte_start = addr_gpa2hva(vm,
  370. pde->address * vm->page_size);
  371. for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
  372. pte = &pte_start[n4];
  373. if (!pte->present)
  374. continue;
  375. fprintf(stream, "%*spte 0x%-3zx %p "
  376. "0x%-12lx 0x%-10lx %u %u "
  377. " %u 0x%-10lx\n",
  378. indent, "",
  379. pte - pte_start, pte,
  380. addr_hva2gpa(vm, pte),
  381. (uint64_t) pte->address,
  382. pte->writable,
  383. pte->execute_disable,
  384. pte->dirty,
  385. ((uint64_t) n1 << 27)
  386. | ((uint64_t) n2 << 18)
  387. | ((uint64_t) n3 << 9)
  388. | ((uint64_t) n4));
  389. }
  390. }
  391. }
  392. }
  393. }
  394. /* Set Unusable Segment
  395. *
  396. * Input Args: None
  397. *
  398. * Output Args:
  399. * segp - Pointer to segment register
  400. *
  401. * Return: None
  402. *
  403. * Sets the segment register pointed to by segp to an unusable state.
  404. */
  405. static void kvm_seg_set_unusable(struct kvm_segment *segp)
  406. {
  407. memset(segp, 0, sizeof(*segp));
  408. segp->unusable = true;
  409. }
  410. static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
  411. {
  412. void *gdt = addr_gva2hva(vm, vm->gdt);
  413. struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
  414. desc->limit0 = segp->limit & 0xFFFF;
  415. desc->base0 = segp->base & 0xFFFF;
  416. desc->base1 = segp->base >> 16;
  417. desc->s = segp->s;
  418. desc->type = segp->type;
  419. desc->dpl = segp->dpl;
  420. desc->p = segp->present;
  421. desc->limit1 = segp->limit >> 16;
  422. desc->l = segp->l;
  423. desc->db = segp->db;
  424. desc->g = segp->g;
  425. desc->base2 = segp->base >> 24;
  426. if (!segp->s)
  427. desc->base3 = segp->base >> 32;
  428. }
  429. /* Set Long Mode Flat Kernel Code Segment
  430. *
  431. * Input Args:
  432. * vm - VM whose GDT is being filled, or NULL to only write segp
  433. * selector - selector value
  434. *
  435. * Output Args:
  436. * segp - Pointer to KVM segment
  437. *
  438. * Return: None
  439. *
  440. * Sets up the KVM segment pointed to by segp, to be a code segment
  441. * with the selector value given by selector.
  442. */
  443. static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
  444. struct kvm_segment *segp)
  445. {
  446. memset(segp, 0, sizeof(*segp));
  447. segp->selector = selector;
  448. segp->limit = 0xFFFFFFFFu;
  449. segp->s = 0x1; /* kTypeCodeData */
  450. segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
  451. * | kFlagCodeReadable
  452. */
  453. segp->g = true;
  454. segp->l = true;
  455. segp->present = 1;
  456. if (vm)
  457. kvm_seg_fill_gdt_64bit(vm, segp);
  458. }
  459. /* Set Long Mode Flat Kernel Data Segment
  460. *
  461. * Input Args:
  462. * vm - VM whose GDT is being filled, or NULL to only write segp
  463. * selector - selector value
  464. *
  465. * Output Args:
  466. * segp - Pointer to KVM segment
  467. *
  468. * Return: None
  469. *
  470. * Sets up the KVM segment pointed to by segp, to be a data segment
  471. * with the selector value given by selector.
  472. */
  473. static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
  474. struct kvm_segment *segp)
  475. {
  476. memset(segp, 0, sizeof(*segp));
  477. segp->selector = selector;
  478. segp->limit = 0xFFFFFFFFu;
  479. segp->s = 0x1; /* kTypeCodeData */
  480. segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
  481. * | kFlagDataWritable
  482. */
  483. segp->g = true;
  484. segp->present = true;
  485. if (vm)
  486. kvm_seg_fill_gdt_64bit(vm, segp);
  487. }
  488. /* Address Guest Virtual to Guest Physical
  489. *
  490. * Input Args:
  491. * vm - Virtual Machine
  492. * gpa - VM virtual address
  493. *
  494. * Output Args: None
  495. *
  496. * Return:
  497. * Equivalent VM physical address
  498. *
  499. * Translates the VM virtual address given by gva to a VM physical
  500. * address and then locates the memory region containing the VM
  501. * physical address, within the VM given by vm. When found, the host
  502. * virtual address providing the memory to the vm physical address is returned.
  503. * A TEST_ASSERT failure occurs if no region containing translated
  504. * VM virtual address exists.
  505. */
  506. vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
  507. {
  508. uint16_t index[4];
  509. struct pageMapL4Entry *pml4e;
  510. struct pageDirectoryPointerEntry *pdpe;
  511. struct pageDirectoryEntry *pde;
  512. struct pageTableEntry *pte;
  513. void *hva;
  514. TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
  515. "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
  516. index[0] = (gva >> 12) & 0x1ffu;
  517. index[1] = (gva >> 21) & 0x1ffu;
  518. index[2] = (gva >> 30) & 0x1ffu;
  519. index[3] = (gva >> 39) & 0x1ffu;
  520. if (!vm->pgd_created)
  521. goto unmapped_gva;
  522. pml4e = addr_gpa2hva(vm, vm->pgd);
  523. if (!pml4e[index[3]].present)
  524. goto unmapped_gva;
  525. pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
  526. if (!pdpe[index[2]].present)
  527. goto unmapped_gva;
  528. pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
  529. if (!pde[index[1]].present)
  530. goto unmapped_gva;
  531. pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
  532. if (!pte[index[0]].present)
  533. goto unmapped_gva;
  534. return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
  535. unmapped_gva:
  536. TEST_ASSERT(false, "No mapping for vm virtual address, "
  537. "gva: 0x%lx", gva);
  538. }
  539. static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
  540. int pgd_memslot)
  541. {
  542. if (!vm->gdt)
  543. vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
  544. KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
  545. dt->base = vm->gdt;
  546. dt->limit = getpagesize();
  547. }
  548. static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
  549. int selector, int gdt_memslot,
  550. int pgd_memslot)
  551. {
  552. if (!vm->tss)
  553. vm->tss = vm_vaddr_alloc(vm, getpagesize(),
  554. KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
  555. memset(segp, 0, sizeof(*segp));
  556. segp->base = vm->tss;
  557. segp->limit = 0x67;
  558. segp->selector = selector;
  559. segp->type = 0xb;
  560. segp->present = 1;
  561. kvm_seg_fill_gdt_64bit(vm, segp);
  562. }
  563. void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
  564. {
  565. struct kvm_sregs sregs;
  566. /* Set mode specific system register values. */
  567. vcpu_sregs_get(vm, vcpuid, &sregs);
  568. sregs.idt.limit = 0;
  569. kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
  570. switch (vm->mode) {
  571. case VM_MODE_FLAT48PG:
  572. sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
  573. sregs.cr4 |= X86_CR4_PAE;
  574. sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
  575. kvm_seg_set_unusable(&sregs.ldt);
  576. kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
  577. kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
  578. kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
  579. kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
  580. break;
  581. default:
  582. TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
  583. }
  584. sregs.cr3 = vm->pgd;
  585. vcpu_sregs_set(vm, vcpuid, &sregs);
  586. }
  587. /* Adds a vCPU with reasonable defaults (i.e., a stack)
  588. *
  589. * Input Args:
  590. * vcpuid - The id of the VCPU to add to the VM.
  591. * guest_code - The vCPU's entry point
  592. */
  593. void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
  594. {
  595. struct kvm_mp_state mp_state;
  596. struct kvm_regs regs;
  597. vm_vaddr_t stack_vaddr;
  598. stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
  599. DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
  600. /* Create VCPU */
  601. vm_vcpu_add(vm, vcpuid, 0, 0);
  602. /* Setup guest general purpose registers */
  603. vcpu_regs_get(vm, vcpuid, &regs);
  604. regs.rflags = regs.rflags | 0x2;
  605. regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
  606. regs.rip = (unsigned long) guest_code;
  607. vcpu_regs_set(vm, vcpuid, &regs);
  608. /* Setup the MP state */
  609. mp_state.mp_state = 0;
  610. vcpu_set_mp_state(vm, vcpuid, &mp_state);
  611. }
  612. /* VM VCPU CPUID Set
  613. *
  614. * Input Args:
  615. * vm - Virtual Machine
  616. * vcpuid - VCPU id
  617. * cpuid - The CPUID values to set.
  618. *
  619. * Output Args: None
  620. *
  621. * Return: void
  622. *
  623. * Set the VCPU's CPUID.
  624. */
  625. void vcpu_set_cpuid(struct kvm_vm *vm,
  626. uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
  627. {
  628. struct vcpu *vcpu = vcpu_find(vm, vcpuid);
  629. int rc;
  630. TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
  631. rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
  632. TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
  633. rc, errno);
  634. }
  635. /* Create a VM with reasonable defaults
  636. *
  637. * Input Args:
  638. * vcpuid - The id of the single VCPU to add to the VM.
  639. * extra_mem_pages - The size of extra memories to add (this will
  640. * decide how much extra space we will need to
  641. * setup the page tables using mem slot 0)
  642. * guest_code - The vCPU's entry point
  643. *
  644. * Output Args: None
  645. *
  646. * Return:
  647. * Pointer to opaque structure that describes the created VM.
  648. */
  649. struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
  650. void *guest_code)
  651. {
  652. struct kvm_vm *vm;
  653. /*
  654. * For x86 the maximum page table size for a memory region
  655. * will be when only 4K pages are used. In that case the
  656. * total extra size for page tables (for extra N pages) will
  657. * be: N/512+N/512^2+N/512^3+... which is definitely smaller
  658. * than N/512*2.
  659. */
  660. uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
  661. /* Create VM */
  662. vm = vm_create(VM_MODE_FLAT48PG,
  663. DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
  664. O_RDWR);
  665. /* Setup guest code */
  666. kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
  667. /* Setup IRQ Chip */
  668. vm_create_irqchip(vm);
  669. /* Add the first vCPU. */
  670. vm_vcpu_add_default(vm, vcpuid, guest_code);
  671. return vm;
  672. }
  673. struct kvm_x86_state {
  674. struct kvm_vcpu_events events;
  675. struct kvm_mp_state mp_state;
  676. struct kvm_regs regs;
  677. struct kvm_xsave xsave;
  678. struct kvm_xcrs xcrs;
  679. struct kvm_sregs sregs;
  680. struct kvm_debugregs debugregs;
  681. union {
  682. struct kvm_nested_state nested;
  683. char nested_[16384];
  684. };
  685. struct kvm_msrs msrs;
  686. };
  687. static int kvm_get_num_msrs(struct kvm_vm *vm)
  688. {
  689. struct kvm_msr_list nmsrs;
  690. int r;
  691. nmsrs.nmsrs = 0;
  692. r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
  693. TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
  694. r);
  695. return nmsrs.nmsrs;
  696. }
  697. struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
  698. {
  699. struct vcpu *vcpu = vcpu_find(vm, vcpuid);
  700. struct kvm_msr_list *list;
  701. struct kvm_x86_state *state;
  702. int nmsrs, r, i;
  703. static int nested_size = -1;
  704. if (nested_size == -1) {
  705. nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
  706. TEST_ASSERT(nested_size <= sizeof(state->nested_),
  707. "Nested state size too big, %i > %zi",
  708. nested_size, sizeof(state->nested_));
  709. }
  710. nmsrs = kvm_get_num_msrs(vm);
  711. list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
  712. list->nmsrs = nmsrs;
  713. r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
  714. TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
  715. r);
  716. state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
  717. r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
  718. TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
  719. r);
  720. r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
  721. TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
  722. r);
  723. r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
  724. TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
  725. r);
  726. r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
  727. TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
  728. r);
  729. r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
  730. TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
  731. r);
  732. r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
  733. TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
  734. r);
  735. if (nested_size) {
  736. state->nested.size = sizeof(state->nested_);
  737. r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
  738. TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
  739. r);
  740. TEST_ASSERT(state->nested.size <= nested_size,
  741. "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
  742. state->nested.size, nested_size);
  743. } else
  744. state->nested.size = 0;
  745. state->msrs.nmsrs = nmsrs;
  746. for (i = 0; i < nmsrs; i++)
  747. state->msrs.entries[i].index = list->indices[i];
  748. r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
  749. TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)",
  750. r, r == nmsrs ? -1 : list->indices[r]);
  751. r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
  752. TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
  753. r);
  754. free(list);
  755. return state;
  756. }
  757. void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
  758. {
  759. struct vcpu *vcpu = vcpu_find(vm, vcpuid);
  760. int r;
  761. if (state->nested.size) {
  762. r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
  763. TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
  764. r);
  765. }
  766. r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
  767. TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
  768. r);
  769. r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
  770. TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
  771. r);
  772. r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
  773. TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
  774. r);
  775. r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
  776. TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
  777. r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
  778. r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
  779. TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
  780. r);
  781. r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
  782. TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
  783. r);
  784. r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
  785. TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
  786. r);
  787. r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
  788. TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
  789. r);
  790. }