setup.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875
  1. /*
  2. * Machine specific setup for xen
  3. *
  4. * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
  5. */
  6. #include <linux/module.h>
  7. #include <linux/sched.h>
  8. #include <linux/mm.h>
  9. #include <linux/pm.h>
  10. #include <linux/memblock.h>
  11. #include <linux/cpuidle.h>
  12. #include <linux/cpufreq.h>
  13. #include <asm/elf.h>
  14. #include <asm/vdso.h>
  15. #include <asm/e820.h>
  16. #include <asm/setup.h>
  17. #include <asm/acpi.h>
  18. #include <asm/numa.h>
  19. #include <asm/xen/hypervisor.h>
  20. #include <asm/xen/hypercall.h>
  21. #include <xen/xen.h>
  22. #include <xen/page.h>
  23. #include <xen/interface/callback.h>
  24. #include <xen/interface/memory.h>
  25. #include <xen/interface/physdev.h>
  26. #include <xen/features.h>
  27. #include "xen-ops.h"
  28. #include "vdso.h"
  29. #include "p2m.h"
  30. #include "mmu.h"
  31. /* These are code, but not functions. Defined in entry.S */
  32. extern const char xen_hypervisor_callback[];
  33. extern const char xen_failsafe_callback[];
  34. #ifdef CONFIG_X86_64
  35. extern asmlinkage void nmi(void);
  36. #endif
  37. extern void xen_sysenter_target(void);
  38. extern void xen_syscall_target(void);
  39. extern void xen_syscall32_target(void);
  40. /* Amount of extra memory space we add to the e820 ranges */
  41. struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
  42. /* Number of pages released from the initial allocation. */
  43. unsigned long xen_released_pages;
  44. /*
  45. * Buffer used to remap identity mapped pages. We only need the virtual space.
  46. * The physical page behind this address is remapped as needed to different
  47. * buffer pages.
  48. */
  49. #define REMAP_SIZE (P2M_PER_PAGE - 3)
  50. static struct {
  51. unsigned long next_area_mfn;
  52. unsigned long target_pfn;
  53. unsigned long size;
  54. unsigned long mfns[REMAP_SIZE];
  55. } xen_remap_buf __initdata __aligned(PAGE_SIZE);
  56. static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
  57. /*
  58. * The maximum amount of extra memory compared to the base size. The
  59. * main scaling factor is the size of struct page. At extreme ratios
  60. * of base:extra, all the base memory can be filled with page
  61. * structures for the extra memory, leaving no space for anything
  62. * else.
  63. *
  64. * 10x seems like a reasonable balance between scaling flexibility and
  65. * leaving a practically usable system.
  66. */
  67. #define EXTRA_MEM_RATIO (10)
  68. static void __init xen_add_extra_mem(u64 start, u64 size)
  69. {
  70. int i;
  71. for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
  72. /* Add new region. */
  73. if (xen_extra_mem[i].size == 0) {
  74. xen_extra_mem[i].start = start;
  75. xen_extra_mem[i].size = size;
  76. break;
  77. }
  78. /* Append to existing region. */
  79. if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
  80. xen_extra_mem[i].size += size;
  81. break;
  82. }
  83. }
  84. if (i == XEN_EXTRA_MEM_MAX_REGIONS)
  85. printk(KERN_WARNING "Warning: not enough extra memory regions\n");
  86. memblock_reserve(start, size);
  87. }
  88. static void __init xen_del_extra_mem(u64 start, u64 size)
  89. {
  90. int i;
  91. u64 start_r, size_r;
  92. for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
  93. start_r = xen_extra_mem[i].start;
  94. size_r = xen_extra_mem[i].size;
  95. /* Start of region. */
  96. if (start_r == start) {
  97. BUG_ON(size > size_r);
  98. xen_extra_mem[i].start += size;
  99. xen_extra_mem[i].size -= size;
  100. break;
  101. }
  102. /* End of region. */
  103. if (start_r + size_r == start + size) {
  104. BUG_ON(size > size_r);
  105. xen_extra_mem[i].size -= size;
  106. break;
  107. }
  108. /* Mid of region. */
  109. if (start > start_r && start < start_r + size_r) {
  110. BUG_ON(start + size > start_r + size_r);
  111. xen_extra_mem[i].size = start - start_r;
  112. /* Calling memblock_reserve() again is okay. */
  113. xen_add_extra_mem(start + size, start_r + size_r -
  114. (start + size));
  115. break;
  116. }
  117. }
  118. memblock_free(start, size);
  119. }
  120. /*
  121. * Called during boot before the p2m list can take entries beyond the
  122. * hypervisor supplied p2m list. Entries in extra mem are to be regarded as
  123. * invalid.
  124. */
  125. unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
  126. {
  127. int i;
  128. unsigned long addr = PFN_PHYS(pfn);
  129. for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
  130. if (addr >= xen_extra_mem[i].start &&
  131. addr < xen_extra_mem[i].start + xen_extra_mem[i].size)
  132. return INVALID_P2M_ENTRY;
  133. }
  134. return IDENTITY_FRAME(pfn);
  135. }
  136. /*
  137. * Mark all pfns of extra mem as invalid in p2m list.
  138. */
  139. void __init xen_inv_extra_mem(void)
  140. {
  141. unsigned long pfn, pfn_s, pfn_e;
  142. int i;
  143. for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
  144. pfn_s = PFN_DOWN(xen_extra_mem[i].start);
  145. pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size);
  146. for (pfn = pfn_s; pfn < pfn_e; pfn++)
  147. set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
  148. }
  149. }
  150. /*
  151. * Finds the next RAM pfn available in the E820 map after min_pfn.
  152. * This function updates min_pfn with the pfn found and returns
  153. * the size of that range or zero if not found.
  154. */
  155. static unsigned long __init xen_find_pfn_range(
  156. const struct e820entry *list, size_t map_size,
  157. unsigned long *min_pfn)
  158. {
  159. const struct e820entry *entry;
  160. unsigned int i;
  161. unsigned long done = 0;
  162. for (i = 0, entry = list; i < map_size; i++, entry++) {
  163. unsigned long s_pfn;
  164. unsigned long e_pfn;
  165. if (entry->type != E820_RAM)
  166. continue;
  167. e_pfn = PFN_DOWN(entry->addr + entry->size);
  168. /* We only care about E820 after this */
  169. if (e_pfn < *min_pfn)
  170. continue;
  171. s_pfn = PFN_UP(entry->addr);
  172. /* If min_pfn falls within the E820 entry, we want to start
  173. * at the min_pfn PFN.
  174. */
  175. if (s_pfn <= *min_pfn) {
  176. done = e_pfn - *min_pfn;
  177. } else {
  178. done = e_pfn - s_pfn;
  179. *min_pfn = s_pfn;
  180. }
  181. break;
  182. }
  183. return done;
  184. }
  185. static int __init xen_free_mfn(unsigned long mfn)
  186. {
  187. struct xen_memory_reservation reservation = {
  188. .address_bits = 0,
  189. .extent_order = 0,
  190. .domid = DOMID_SELF
  191. };
  192. set_xen_guest_handle(reservation.extent_start, &mfn);
  193. reservation.nr_extents = 1;
  194. return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
  195. }
  196. /*
  197. * This releases a chunk of memory and then does the identity map. It's used
  198. * as a fallback if the remapping fails.
  199. */
  200. static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
  201. unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
  202. unsigned long *released)
  203. {
  204. unsigned long len = 0;
  205. unsigned long pfn, end;
  206. int ret;
  207. WARN_ON(start_pfn > end_pfn);
  208. end = min(end_pfn, nr_pages);
  209. for (pfn = start_pfn; pfn < end; pfn++) {
  210. unsigned long mfn = pfn_to_mfn(pfn);
  211. /* Make sure pfn exists to start with */
  212. if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
  213. continue;
  214. ret = xen_free_mfn(mfn);
  215. WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
  216. if (ret == 1) {
  217. if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
  218. break;
  219. len++;
  220. } else
  221. break;
  222. }
  223. /* Need to release pages first */
  224. *released += len;
  225. *identity += set_phys_range_identity(start_pfn, end_pfn);
  226. }
  227. /*
  228. * Helper function to update the p2m and m2p tables and kernel mapping.
  229. */
  230. static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
  231. {
  232. struct mmu_update update = {
  233. .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
  234. .val = pfn
  235. };
  236. /* Update p2m */
  237. if (!set_phys_to_machine(pfn, mfn)) {
  238. WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
  239. pfn, mfn);
  240. BUG();
  241. }
  242. /* Update m2p */
  243. if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
  244. WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
  245. mfn, pfn);
  246. BUG();
  247. }
  248. /* Update kernel mapping, but not for highmem. */
  249. if ((pfn << PAGE_SHIFT) >= __pa(high_memory))
  250. return;
  251. if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
  252. mfn_pte(mfn, PAGE_KERNEL), 0)) {
  253. WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
  254. mfn, pfn);
  255. BUG();
  256. }
  257. }
  258. /*
  259. * This function updates the p2m and m2p tables with an identity map from
  260. * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
  261. * original allocation at remap_pfn. The information needed for remapping is
  262. * saved in the memory itself to avoid the need for allocating buffers. The
  263. * complete remap information is contained in a list of MFNs each containing
  264. * up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
  265. * This enables us to preserve the original mfn sequence while doing the
  266. * remapping at a time when the memory management is capable of allocating
  267. * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
  268. * its callers.
  269. */
  270. static void __init xen_do_set_identity_and_remap_chunk(
  271. unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
  272. {
  273. unsigned long buf = (unsigned long)&xen_remap_buf;
  274. unsigned long mfn_save, mfn;
  275. unsigned long ident_pfn_iter, remap_pfn_iter;
  276. unsigned long ident_end_pfn = start_pfn + size;
  277. unsigned long left = size;
  278. unsigned long ident_cnt = 0;
  279. unsigned int i, chunk;
  280. WARN_ON(size == 0);
  281. BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
  282. mfn_save = virt_to_mfn(buf);
  283. for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
  284. ident_pfn_iter < ident_end_pfn;
  285. ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) {
  286. chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE;
  287. /* Map first pfn to xen_remap_buf */
  288. mfn = pfn_to_mfn(ident_pfn_iter);
  289. set_pte_mfn(buf, mfn, PAGE_KERNEL);
  290. /* Save mapping information in page */
  291. xen_remap_buf.next_area_mfn = xen_remap_mfn;
  292. xen_remap_buf.target_pfn = remap_pfn_iter;
  293. xen_remap_buf.size = chunk;
  294. for (i = 0; i < chunk; i++)
  295. xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i);
  296. /* Put remap buf into list. */
  297. xen_remap_mfn = mfn;
  298. /* Set identity map */
  299. ident_cnt += set_phys_range_identity(ident_pfn_iter,
  300. ident_pfn_iter + chunk);
  301. left -= chunk;
  302. }
  303. /* Restore old xen_remap_buf mapping */
  304. set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
  305. }
  306. /*
  307. * This function takes a contiguous pfn range that needs to be identity mapped
  308. * and:
  309. *
  310. * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
  311. * 2) Calls the do_ function to actually do the mapping/remapping work.
  312. *
  313. * The goal is to not allocate additional memory but to remap the existing
  314. * pages. In the case of an error the underlying memory is simply released back
  315. * to Xen and not remapped.
  316. */
  317. static unsigned long __init xen_set_identity_and_remap_chunk(
  318. const struct e820entry *list, size_t map_size, unsigned long start_pfn,
  319. unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
  320. unsigned long *identity, unsigned long *released)
  321. {
  322. unsigned long pfn;
  323. unsigned long i = 0;
  324. unsigned long n = end_pfn - start_pfn;
  325. while (i < n) {
  326. unsigned long cur_pfn = start_pfn + i;
  327. unsigned long left = n - i;
  328. unsigned long size = left;
  329. unsigned long remap_range_size;
  330. /* Do not remap pages beyond the current allocation */
  331. if (cur_pfn >= nr_pages) {
  332. /* Identity map remaining pages */
  333. *identity += set_phys_range_identity(cur_pfn,
  334. cur_pfn + size);
  335. break;
  336. }
  337. if (cur_pfn + size > nr_pages)
  338. size = nr_pages - cur_pfn;
  339. remap_range_size = xen_find_pfn_range(list, map_size,
  340. &remap_pfn);
  341. if (!remap_range_size) {
  342. pr_warning("Unable to find available pfn range, not remapping identity pages\n");
  343. xen_set_identity_and_release_chunk(cur_pfn,
  344. cur_pfn + left, nr_pages, identity, released);
  345. break;
  346. }
  347. /* Adjust size to fit in current e820 RAM region */
  348. if (size > remap_range_size)
  349. size = remap_range_size;
  350. xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn);
  351. /* Update variables to reflect new mappings. */
  352. i += size;
  353. remap_pfn += size;
  354. *identity += size;
  355. }
  356. /*
  357. * If the PFNs are currently mapped, the VA mapping also needs
  358. * to be updated to be 1:1.
  359. */
  360. for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
  361. (void)HYPERVISOR_update_va_mapping(
  362. (unsigned long)__va(pfn << PAGE_SHIFT),
  363. mfn_pte(pfn, PAGE_KERNEL_IO), 0);
  364. return remap_pfn;
  365. }
  366. static void __init xen_set_identity_and_remap(
  367. const struct e820entry *list, size_t map_size, unsigned long nr_pages,
  368. unsigned long *released)
  369. {
  370. phys_addr_t start = 0;
  371. unsigned long identity = 0;
  372. unsigned long last_pfn = nr_pages;
  373. const struct e820entry *entry;
  374. unsigned long num_released = 0;
  375. int i;
  376. /*
  377. * Combine non-RAM regions and gaps until a RAM region (or the
  378. * end of the map) is reached, then set the 1:1 map and
  379. * remap the memory in those non-RAM regions.
  380. *
  381. * The combined non-RAM regions are rounded to a whole number
  382. * of pages so any partial pages are accessible via the 1:1
  383. * mapping. This is needed for some BIOSes that put (for
  384. * example) the DMI tables in a reserved region that begins on
  385. * a non-page boundary.
  386. */
  387. for (i = 0, entry = list; i < map_size; i++, entry++) {
  388. phys_addr_t end = entry->addr + entry->size;
  389. if (entry->type == E820_RAM || i == map_size - 1) {
  390. unsigned long start_pfn = PFN_DOWN(start);
  391. unsigned long end_pfn = PFN_UP(end);
  392. if (entry->type == E820_RAM)
  393. end_pfn = PFN_UP(entry->addr);
  394. if (start_pfn < end_pfn)
  395. last_pfn = xen_set_identity_and_remap_chunk(
  396. list, map_size, start_pfn,
  397. end_pfn, nr_pages, last_pfn,
  398. &identity, &num_released);
  399. start = end;
  400. }
  401. }
  402. *released = num_released;
  403. pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
  404. pr_info("Released %ld page(s)\n", num_released);
  405. }
  406. /*
  407. * Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
  408. * The remap information (which mfn remap to which pfn) is contained in the
  409. * to be remapped memory itself in a linked list anchored at xen_remap_mfn.
  410. * This scheme allows to remap the different chunks in arbitrary order while
  411. * the resulting mapping will be independant from the order.
  412. */
  413. void __init xen_remap_memory(void)
  414. {
  415. unsigned long buf = (unsigned long)&xen_remap_buf;
  416. unsigned long mfn_save, mfn, pfn;
  417. unsigned long remapped = 0;
  418. unsigned int i;
  419. unsigned long pfn_s = ~0UL;
  420. unsigned long len = 0;
  421. mfn_save = virt_to_mfn(buf);
  422. while (xen_remap_mfn != INVALID_P2M_ENTRY) {
  423. /* Map the remap information */
  424. set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL);
  425. BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]);
  426. pfn = xen_remap_buf.target_pfn;
  427. for (i = 0; i < xen_remap_buf.size; i++) {
  428. mfn = xen_remap_buf.mfns[i];
  429. xen_update_mem_tables(pfn, mfn);
  430. remapped++;
  431. pfn++;
  432. }
  433. if (pfn_s == ~0UL || pfn == pfn_s) {
  434. pfn_s = xen_remap_buf.target_pfn;
  435. len += xen_remap_buf.size;
  436. } else if (pfn_s + len == xen_remap_buf.target_pfn) {
  437. len += xen_remap_buf.size;
  438. } else {
  439. xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
  440. pfn_s = xen_remap_buf.target_pfn;
  441. len = xen_remap_buf.size;
  442. }
  443. mfn = xen_remap_mfn;
  444. xen_remap_mfn = xen_remap_buf.next_area_mfn;
  445. }
  446. if (pfn_s != ~0UL && len)
  447. xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
  448. set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
  449. pr_info("Remapped %ld page(s)\n", remapped);
  450. }
  451. static unsigned long __init xen_get_max_pages(void)
  452. {
  453. unsigned long max_pages = MAX_DOMAIN_PAGES;
  454. domid_t domid = DOMID_SELF;
  455. int ret;
  456. /*
  457. * For the initial domain we use the maximum reservation as
  458. * the maximum page.
  459. *
  460. * For guest domains the current maximum reservation reflects
  461. * the current maximum rather than the static maximum. In this
  462. * case the e820 map provided to us will cover the static
  463. * maximum region.
  464. */
  465. if (xen_initial_domain()) {
  466. ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
  467. if (ret > 0)
  468. max_pages = ret;
  469. }
  470. return min(max_pages, MAX_DOMAIN_PAGES);
  471. }
  472. static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
  473. {
  474. u64 end = start + size;
  475. /* Align RAM regions to page boundaries. */
  476. if (type == E820_RAM) {
  477. start = PAGE_ALIGN(start);
  478. end &= ~((u64)PAGE_SIZE - 1);
  479. }
  480. e820_add_region(start, end - start, type);
  481. }
  482. void xen_ignore_unusable(struct e820entry *list, size_t map_size)
  483. {
  484. struct e820entry *entry;
  485. unsigned int i;
  486. for (i = 0, entry = list; i < map_size; i++, entry++) {
  487. if (entry->type == E820_UNUSABLE)
  488. entry->type = E820_RAM;
  489. }
  490. }
  491. /**
  492. * machine_specific_memory_setup - Hook for machine specific memory setup.
  493. **/
  494. char * __init xen_memory_setup(void)
  495. {
  496. static struct e820entry map[E820MAX] __initdata;
  497. unsigned long max_pfn = xen_start_info->nr_pages;
  498. unsigned long long mem_end;
  499. int rc;
  500. struct xen_memory_map memmap;
  501. unsigned long max_pages;
  502. unsigned long extra_pages = 0;
  503. int i;
  504. int op;
  505. max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
  506. mem_end = PFN_PHYS(max_pfn);
  507. memmap.nr_entries = E820MAX;
  508. set_xen_guest_handle(memmap.buffer, map);
  509. op = xen_initial_domain() ?
  510. XENMEM_machine_memory_map :
  511. XENMEM_memory_map;
  512. rc = HYPERVISOR_memory_op(op, &memmap);
  513. if (rc == -ENOSYS) {
  514. BUG_ON(xen_initial_domain());
  515. memmap.nr_entries = 1;
  516. map[0].addr = 0ULL;
  517. map[0].size = mem_end;
  518. /* 8MB slack (to balance backend allocations). */
  519. map[0].size += 8ULL << 20;
  520. map[0].type = E820_RAM;
  521. rc = 0;
  522. }
  523. BUG_ON(rc);
  524. BUG_ON(memmap.nr_entries == 0);
  525. /*
  526. * Xen won't allow a 1:1 mapping to be created to UNUSABLE
  527. * regions, so if we're using the machine memory map leave the
  528. * region as RAM as it is in the pseudo-physical map.
  529. *
  530. * UNUSABLE regions in domUs are not handled and will need
  531. * a patch in the future.
  532. */
  533. if (xen_initial_domain())
  534. xen_ignore_unusable(map, memmap.nr_entries);
  535. /* Make sure the Xen-supplied memory map is well-ordered. */
  536. sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
  537. max_pages = xen_get_max_pages();
  538. if (max_pages > max_pfn)
  539. extra_pages += max_pages - max_pfn;
  540. /*
  541. * Set identity map on non-RAM pages and prepare remapping the
  542. * underlying RAM.
  543. */
  544. xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
  545. &xen_released_pages);
  546. extra_pages += xen_released_pages;
  547. /*
  548. * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
  549. * factor the base size. On non-highmem systems, the base
  550. * size is the full initial memory allocation; on highmem it
  551. * is limited to the max size of lowmem, so that it doesn't
  552. * get completely filled.
  553. *
  554. * In principle there could be a problem in lowmem systems if
  555. * the initial memory is also very large with respect to
  556. * lowmem, but we won't try to deal with that here.
  557. */
  558. extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
  559. extra_pages);
  560. i = 0;
  561. while (i < memmap.nr_entries) {
  562. u64 addr = map[i].addr;
  563. u64 size = map[i].size;
  564. u32 type = map[i].type;
  565. if (type == E820_RAM) {
  566. if (addr < mem_end) {
  567. size = min(size, mem_end - addr);
  568. } else if (extra_pages) {
  569. size = min(size, (u64)extra_pages * PAGE_SIZE);
  570. extra_pages -= size / PAGE_SIZE;
  571. xen_add_extra_mem(addr, size);
  572. xen_max_p2m_pfn = PFN_DOWN(addr + size);
  573. } else
  574. type = E820_UNUSABLE;
  575. }
  576. xen_align_and_add_e820_region(addr, size, type);
  577. map[i].addr += size;
  578. map[i].size -= size;
  579. if (map[i].size == 0)
  580. i++;
  581. }
  582. /*
  583. * Set the rest as identity mapped, in case PCI BARs are
  584. * located here.
  585. *
  586. * PFNs above MAX_P2M_PFN are considered identity mapped as
  587. * well.
  588. */
  589. set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul);
  590. /*
  591. * In domU, the ISA region is normal, usable memory, but we
  592. * reserve ISA memory anyway because too many things poke
  593. * about in there.
  594. */
  595. e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
  596. E820_RESERVED);
  597. /*
  598. * Reserve Xen bits:
  599. * - mfn_list
  600. * - xen_start_info
  601. * See comment above "struct start_info" in <xen/interface/xen.h>
  602. * We tried to make the the memblock_reserve more selective so
  603. * that it would be clear what region is reserved. Sadly we ran
  604. * in the problem wherein on a 64-bit hypervisor with a 32-bit
  605. * initial domain, the pt_base has the cr3 value which is not
  606. * neccessarily where the pagetable starts! As Jan put it: "
  607. * Actually, the adjustment turns out to be correct: The page
  608. * tables for a 32-on-64 dom0 get allocated in the order "first L1",
  609. * "first L2", "first L3", so the offset to the page table base is
  610. * indeed 2. When reading xen/include/public/xen.h's comment
  611. * very strictly, this is not a violation (since there nothing is said
  612. * that the first thing in the page table space is pointed to by
  613. * pt_base; I admit that this seems to be implied though, namely
  614. * do I think that it is implied that the page table space is the
  615. * range [pt_base, pt_base + nt_pt_frames), whereas that
  616. * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
  617. * which - without a priori knowledge - the kernel would have
  618. * difficulty to figure out)." - so lets just fall back to the
  619. * easy way and reserve the whole region.
  620. */
  621. memblock_reserve(__pa(xen_start_info->mfn_list),
  622. xen_start_info->pt_base - xen_start_info->mfn_list);
  623. sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
  624. return "Xen";
  625. }
  626. /*
  627. * Machine specific memory setup for auto-translated guests.
  628. */
  629. char * __init xen_auto_xlated_memory_setup(void)
  630. {
  631. static struct e820entry map[E820MAX] __initdata;
  632. struct xen_memory_map memmap;
  633. int i;
  634. int rc;
  635. memmap.nr_entries = E820MAX;
  636. set_xen_guest_handle(memmap.buffer, map);
  637. rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
  638. if (rc < 0)
  639. panic("No memory map (%d)\n", rc);
  640. sanitize_e820_map(map, ARRAY_SIZE(map), &memmap.nr_entries);
  641. for (i = 0; i < memmap.nr_entries; i++)
  642. e820_add_region(map[i].addr, map[i].size, map[i].type);
  643. memblock_reserve(__pa(xen_start_info->mfn_list),
  644. xen_start_info->pt_base - xen_start_info->mfn_list);
  645. return "Xen";
  646. }
  647. /*
  648. * Set the bit indicating "nosegneg" library variants should be used.
  649. * We only need to bother in pure 32-bit mode; compat 32-bit processes
  650. * can have un-truncated segments, so wrapping around is allowed.
  651. */
  652. static void __init fiddle_vdso(void)
  653. {
  654. #ifdef CONFIG_X86_32
  655. /*
  656. * This could be called before selected_vdso32 is initialized, so
  657. * just fiddle with both possible images. vdso_image_32_syscall
  658. * can't be selected, since it only exists on 64-bit systems.
  659. */
  660. u32 *mask;
  661. mask = vdso_image_32_int80.data +
  662. vdso_image_32_int80.sym_VDSO32_NOTE_MASK;
  663. *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
  664. mask = vdso_image_32_sysenter.data +
  665. vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK;
  666. *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
  667. #endif
  668. }
  669. static int register_callback(unsigned type, const void *func)
  670. {
  671. struct callback_register callback = {
  672. .type = type,
  673. .address = XEN_CALLBACK(__KERNEL_CS, func),
  674. .flags = CALLBACKF_mask_events,
  675. };
  676. return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
  677. }
  678. void xen_enable_sysenter(void)
  679. {
  680. int ret;
  681. unsigned sysenter_feature;
  682. #ifdef CONFIG_X86_32
  683. sysenter_feature = X86_FEATURE_SEP;
  684. #else
  685. sysenter_feature = X86_FEATURE_SYSENTER32;
  686. #endif
  687. if (!boot_cpu_has(sysenter_feature))
  688. return;
  689. ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
  690. if(ret != 0)
  691. setup_clear_cpu_cap(sysenter_feature);
  692. }
  693. void xen_enable_syscall(void)
  694. {
  695. #ifdef CONFIG_X86_64
  696. int ret;
  697. ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
  698. if (ret != 0) {
  699. printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
  700. /* Pretty fatal; 64-bit userspace has no other
  701. mechanism for syscalls. */
  702. }
  703. if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
  704. ret = register_callback(CALLBACKTYPE_syscall32,
  705. xen_syscall32_target);
  706. if (ret != 0)
  707. setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
  708. }
  709. #endif /* CONFIG_X86_64 */
  710. }
  711. void __init xen_pvmmu_arch_setup(void)
  712. {
  713. HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
  714. HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
  715. HYPERVISOR_vm_assist(VMASST_CMD_enable,
  716. VMASST_TYPE_pae_extended_cr3);
  717. if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
  718. register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
  719. BUG();
  720. xen_enable_sysenter();
  721. xen_enable_syscall();
  722. }
  723. /* This function is not called for HVM domains */
  724. void __init xen_arch_setup(void)
  725. {
  726. xen_panic_handler_init();
  727. if (!xen_feature(XENFEAT_auto_translated_physmap))
  728. xen_pvmmu_arch_setup();
  729. #ifdef CONFIG_ACPI
  730. if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
  731. printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
  732. disable_acpi();
  733. }
  734. #endif
  735. memcpy(boot_command_line, xen_start_info->cmd_line,
  736. MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
  737. COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
  738. /* Set up idle, making sure it calls safe_halt() pvop */
  739. disable_cpuidle();
  740. disable_cpufreq();
  741. WARN_ON(xen_set_default_idle());
  742. fiddle_vdso();
  743. #ifdef CONFIG_NUMA
  744. numa_off = 1;
  745. #endif
  746. }