|
@@ -27,17 +27,23 @@
|
|
|
#include <xen/interface/memory.h>
|
|
|
#include <xen/interface/physdev.h>
|
|
|
#include <xen/features.h>
|
|
|
+#include <xen/hvc-console.h>
|
|
|
#include "xen-ops.h"
|
|
|
#include "vdso.h"
|
|
|
-#include "p2m.h"
|
|
|
#include "mmu.h"
|
|
|
|
|
|
+#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
|
|
|
+
|
|
|
/* Amount of extra memory space we add to the e820 ranges */
|
|
|
struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
|
|
|
|
|
|
/* Number of pages released from the initial allocation. */
|
|
|
unsigned long xen_released_pages;
|
|
|
|
|
|
+/* E820 map used during setting up memory. */
|
|
|
+static struct e820entry xen_e820_map[E820MAX] __initdata;
|
|
|
+static u32 xen_e820_map_entries __initdata;
|
|
|
+
|
|
|
/*
|
|
|
* Buffer used to remap identity mapped pages. We only need the virtual space.
|
|
|
* The physical page behind this address is remapped as needed to different
|
|
@@ -64,62 +70,89 @@ static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
|
|
|
*/
|
|
|
#define EXTRA_MEM_RATIO (10)
|
|
|
|
|
|
-static void __init xen_add_extra_mem(phys_addr_t start, phys_addr_t size)
|
|
|
+static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
|
|
|
+
|
|
|
+static void __init xen_parse_512gb(void)
|
|
|
+{
|
|
|
+ bool val = false;
|
|
|
+ char *arg;
|
|
|
+
|
|
|
+ arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit");
|
|
|
+ if (!arg)
|
|
|
+ return;
|
|
|
+
|
|
|
+ arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit=");
|
|
|
+ if (!arg)
|
|
|
+ val = true;
|
|
|
+ else if (strtobool(arg + strlen("xen_512gb_limit="), &val))
|
|
|
+ return;
|
|
|
+
|
|
|
+ xen_512gb_limit = val;
|
|
|
+}
|
|
|
+
|
|
|
+static void __init xen_add_extra_mem(unsigned long start_pfn,
|
|
|
+ unsigned long n_pfns)
|
|
|
{
|
|
|
int i;
|
|
|
|
|
|
+ /*
|
|
|
+ * No need to check for zero size, should happen rarely and will only
|
|
|
+ * write a new entry regarded to be unused due to zero size.
|
|
|
+ */
|
|
|
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
|
|
|
/* Add new region. */
|
|
|
- if (xen_extra_mem[i].size == 0) {
|
|
|
- xen_extra_mem[i].start = start;
|
|
|
- xen_extra_mem[i].size = size;
|
|
|
+ if (xen_extra_mem[i].n_pfns == 0) {
|
|
|
+ xen_extra_mem[i].start_pfn = start_pfn;
|
|
|
+ xen_extra_mem[i].n_pfns = n_pfns;
|
|
|
break;
|
|
|
}
|
|
|
/* Append to existing region. */
|
|
|
- if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
|
|
|
- xen_extra_mem[i].size += size;
|
|
|
+ if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
|
|
|
+ start_pfn) {
|
|
|
+ xen_extra_mem[i].n_pfns += n_pfns;
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
if (i == XEN_EXTRA_MEM_MAX_REGIONS)
|
|
|
printk(KERN_WARNING "Warning: not enough extra memory regions\n");
|
|
|
|
|
|
- memblock_reserve(start, size);
|
|
|
+ memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
|
|
|
}
|
|
|
|
|
|
-static void __init xen_del_extra_mem(phys_addr_t start, phys_addr_t size)
|
|
|
+static void __init xen_del_extra_mem(unsigned long start_pfn,
|
|
|
+ unsigned long n_pfns)
|
|
|
{
|
|
|
int i;
|
|
|
- phys_addr_t start_r, size_r;
|
|
|
+ unsigned long start_r, size_r;
|
|
|
|
|
|
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
|
|
|
- start_r = xen_extra_mem[i].start;
|
|
|
- size_r = xen_extra_mem[i].size;
|
|
|
+ start_r = xen_extra_mem[i].start_pfn;
|
|
|
+ size_r = xen_extra_mem[i].n_pfns;
|
|
|
|
|
|
/* Start of region. */
|
|
|
- if (start_r == start) {
|
|
|
- BUG_ON(size > size_r);
|
|
|
- xen_extra_mem[i].start += size;
|
|
|
- xen_extra_mem[i].size -= size;
|
|
|
+ if (start_r == start_pfn) {
|
|
|
+ BUG_ON(n_pfns > size_r);
|
|
|
+ xen_extra_mem[i].start_pfn += n_pfns;
|
|
|
+ xen_extra_mem[i].n_pfns -= n_pfns;
|
|
|
break;
|
|
|
}
|
|
|
/* End of region. */
|
|
|
- if (start_r + size_r == start + size) {
|
|
|
- BUG_ON(size > size_r);
|
|
|
- xen_extra_mem[i].size -= size;
|
|
|
+ if (start_r + size_r == start_pfn + n_pfns) {
|
|
|
+ BUG_ON(n_pfns > size_r);
|
|
|
+ xen_extra_mem[i].n_pfns -= n_pfns;
|
|
|
break;
|
|
|
}
|
|
|
/* Mid of region. */
|
|
|
- if (start > start_r && start < start_r + size_r) {
|
|
|
- BUG_ON(start + size > start_r + size_r);
|
|
|
- xen_extra_mem[i].size = start - start_r;
|
|
|
+ if (start_pfn > start_r && start_pfn < start_r + size_r) {
|
|
|
+ BUG_ON(start_pfn + n_pfns > start_r + size_r);
|
|
|
+ xen_extra_mem[i].n_pfns = start_pfn - start_r;
|
|
|
/* Calling memblock_reserve() again is okay. */
|
|
|
- xen_add_extra_mem(start + size, start_r + size_r -
|
|
|
- (start + size));
|
|
|
+ xen_add_extra_mem(start_pfn + n_pfns, start_r + size_r -
|
|
|
+ (start_pfn + n_pfns));
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
- memblock_free(start, size);
|
|
|
+ memblock_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -130,11 +163,10 @@ static void __init xen_del_extra_mem(phys_addr_t start, phys_addr_t size)
|
|
|
unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
|
|
|
{
|
|
|
int i;
|
|
|
- phys_addr_t addr = PFN_PHYS(pfn);
|
|
|
|
|
|
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
|
|
|
- if (addr >= xen_extra_mem[i].start &&
|
|
|
- addr < xen_extra_mem[i].start + xen_extra_mem[i].size)
|
|
|
+ if (pfn >= xen_extra_mem[i].start_pfn &&
|
|
|
+ pfn < xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns)
|
|
|
return INVALID_P2M_ENTRY;
|
|
|
}
|
|
|
|
|
@@ -150,10 +182,10 @@ void __init xen_inv_extra_mem(void)
|
|
|
int i;
|
|
|
|
|
|
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
|
|
|
- if (!xen_extra_mem[i].size)
|
|
|
+ if (!xen_extra_mem[i].n_pfns)
|
|
|
continue;
|
|
|
- pfn_s = PFN_DOWN(xen_extra_mem[i].start);
|
|
|
- pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size);
|
|
|
+ pfn_s = xen_extra_mem[i].start_pfn;
|
|
|
+ pfn_e = pfn_s + xen_extra_mem[i].n_pfns;
|
|
|
for (pfn = pfn_s; pfn < pfn_e; pfn++)
|
|
|
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
|
}
|
|
@@ -164,15 +196,13 @@ void __init xen_inv_extra_mem(void)
|
|
|
* This function updates min_pfn with the pfn found and returns
|
|
|
* the size of that range or zero if not found.
|
|
|
*/
|
|
|
-static unsigned long __init xen_find_pfn_range(
|
|
|
- const struct e820entry *list, size_t map_size,
|
|
|
- unsigned long *min_pfn)
|
|
|
+static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn)
|
|
|
{
|
|
|
- const struct e820entry *entry;
|
|
|
+ const struct e820entry *entry = xen_e820_map;
|
|
|
unsigned int i;
|
|
|
unsigned long done = 0;
|
|
|
|
|
|
- for (i = 0, entry = list; i < map_size; i++, entry++) {
|
|
|
+ for (i = 0; i < xen_e820_map_entries; i++, entry++) {
|
|
|
unsigned long s_pfn;
|
|
|
unsigned long e_pfn;
|
|
|
|
|
@@ -221,7 +251,7 @@ static int __init xen_free_mfn(unsigned long mfn)
|
|
|
* as a fallback if the remapping fails.
|
|
|
*/
|
|
|
static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
|
|
|
- unsigned long end_pfn, unsigned long nr_pages, unsigned long *released)
|
|
|
+ unsigned long end_pfn, unsigned long nr_pages)
|
|
|
{
|
|
|
unsigned long pfn, end;
|
|
|
int ret;
|
|
@@ -241,7 +271,7 @@ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
|
|
|
WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
|
|
|
|
|
|
if (ret == 1) {
|
|
|
- (*released)++;
|
|
|
+ xen_released_pages++;
|
|
|
if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
|
|
|
break;
|
|
|
} else
|
|
@@ -356,9 +386,8 @@ static void __init xen_do_set_identity_and_remap_chunk(
|
|
|
* to Xen and not remapped.
|
|
|
*/
|
|
|
static unsigned long __init xen_set_identity_and_remap_chunk(
|
|
|
- const struct e820entry *list, size_t map_size, unsigned long start_pfn,
|
|
|
- unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
|
|
|
- unsigned long *released, unsigned long *remapped)
|
|
|
+ unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
|
|
|
+ unsigned long remap_pfn)
|
|
|
{
|
|
|
unsigned long pfn;
|
|
|
unsigned long i = 0;
|
|
@@ -379,12 +408,11 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
|
|
|
if (cur_pfn + size > nr_pages)
|
|
|
size = nr_pages - cur_pfn;
|
|
|
|
|
|
- remap_range_size = xen_find_pfn_range(list, map_size,
|
|
|
- &remap_pfn);
|
|
|
+ remap_range_size = xen_find_pfn_range(&remap_pfn);
|
|
|
if (!remap_range_size) {
|
|
|
pr_warning("Unable to find available pfn range, not remapping identity pages\n");
|
|
|
xen_set_identity_and_release_chunk(cur_pfn,
|
|
|
- cur_pfn + left, nr_pages, released);
|
|
|
+ cur_pfn + left, nr_pages);
|
|
|
break;
|
|
|
}
|
|
|
/* Adjust size to fit in current e820 RAM region */
|
|
@@ -396,7 +424,6 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
|
|
|
/* Update variables to reflect new mappings. */
|
|
|
i += size;
|
|
|
remap_pfn += size;
|
|
|
- *remapped += size;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -411,15 +438,11 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
|
|
|
return remap_pfn;
|
|
|
}
|
|
|
|
|
|
-static void __init xen_set_identity_and_remap(
|
|
|
- const struct e820entry *list, size_t map_size, unsigned long nr_pages,
|
|
|
- unsigned long *released, unsigned long *remapped)
|
|
|
+static void __init xen_set_identity_and_remap(unsigned long nr_pages)
|
|
|
{
|
|
|
phys_addr_t start = 0;
|
|
|
unsigned long last_pfn = nr_pages;
|
|
|
- const struct e820entry *entry;
|
|
|
- unsigned long num_released = 0;
|
|
|
- unsigned long num_remapped = 0;
|
|
|
+ const struct e820entry *entry = xen_e820_map;
|
|
|
int i;
|
|
|
|
|
|
/*
|
|
@@ -433,9 +456,9 @@ static void __init xen_set_identity_and_remap(
|
|
|
* example) the DMI tables in a reserved region that begins on
|
|
|
* a non-page boundary.
|
|
|
*/
|
|
|
- for (i = 0, entry = list; i < map_size; i++, entry++) {
|
|
|
+ for (i = 0; i < xen_e820_map_entries; i++, entry++) {
|
|
|
phys_addr_t end = entry->addr + entry->size;
|
|
|
- if (entry->type == E820_RAM || i == map_size - 1) {
|
|
|
+ if (entry->type == E820_RAM || i == xen_e820_map_entries - 1) {
|
|
|
unsigned long start_pfn = PFN_DOWN(start);
|
|
|
unsigned long end_pfn = PFN_UP(end);
|
|
|
|
|
@@ -444,17 +467,13 @@ static void __init xen_set_identity_and_remap(
|
|
|
|
|
|
if (start_pfn < end_pfn)
|
|
|
last_pfn = xen_set_identity_and_remap_chunk(
|
|
|
- list, map_size, start_pfn,
|
|
|
- end_pfn, nr_pages, last_pfn,
|
|
|
- &num_released, &num_remapped);
|
|
|
+ start_pfn, end_pfn, nr_pages,
|
|
|
+ last_pfn);
|
|
|
start = end;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- *released = num_released;
|
|
|
- *remapped = num_remapped;
|
|
|
-
|
|
|
- pr_info("Released %ld page(s)\n", num_released);
|
|
|
+ pr_info("Released %ld page(s)\n", xen_released_pages);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -494,7 +513,7 @@ void __init xen_remap_memory(void)
|
|
|
} else if (pfn_s + len == xen_remap_buf.target_pfn) {
|
|
|
len += xen_remap_buf.size;
|
|
|
} else {
|
|
|
- xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
|
|
|
+ xen_del_extra_mem(pfn_s, len);
|
|
|
pfn_s = xen_remap_buf.target_pfn;
|
|
|
len = xen_remap_buf.size;
|
|
|
}
|
|
@@ -504,19 +523,36 @@ void __init xen_remap_memory(void)
|
|
|
}
|
|
|
|
|
|
if (pfn_s != ~0UL && len)
|
|
|
- xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
|
|
|
+ xen_del_extra_mem(pfn_s, len);
|
|
|
|
|
|
set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
|
|
|
|
|
|
pr_info("Remapped %ld page(s)\n", remapped);
|
|
|
}
|
|
|
|
|
|
+static unsigned long __init xen_get_pages_limit(void)
|
|
|
+{
|
|
|
+ unsigned long limit;
|
|
|
+
|
|
|
+#ifdef CONFIG_X86_32
|
|
|
+ limit = GB(64) / PAGE_SIZE;
|
|
|
+#else
|
|
|
+ limit = MAXMEM / PAGE_SIZE;
|
|
|
+ if (!xen_initial_domain() && xen_512gb_limit)
|
|
|
+ limit = GB(512) / PAGE_SIZE;
|
|
|
+#endif
|
|
|
+ return limit;
|
|
|
+}
|
|
|
+
|
|
|
static unsigned long __init xen_get_max_pages(void)
|
|
|
{
|
|
|
- unsigned long max_pages = MAX_DOMAIN_PAGES;
|
|
|
+ unsigned long max_pages, limit;
|
|
|
domid_t domid = DOMID_SELF;
|
|
|
int ret;
|
|
|
|
|
|
+ limit = xen_get_pages_limit();
|
|
|
+ max_pages = limit;
|
|
|
+
|
|
|
/*
|
|
|
* For the initial domain we use the maximum reservation as
|
|
|
* the maximum page.
|
|
@@ -532,7 +568,7 @@ static unsigned long __init xen_get_max_pages(void)
|
|
|
max_pages = ret;
|
|
|
}
|
|
|
|
|
|
- return min(max_pages, MAX_DOMAIN_PAGES);
|
|
|
+ return min(max_pages, limit);
|
|
|
}
|
|
|
|
|
|
static void __init xen_align_and_add_e820_region(phys_addr_t start,
|
|
@@ -549,39 +585,188 @@ static void __init xen_align_and_add_e820_region(phys_addr_t start,
|
|
|
e820_add_region(start, end - start, type);
|
|
|
}
|
|
|
|
|
|
-static void __init xen_ignore_unusable(struct e820entry *list, size_t map_size)
|
|
|
+static void __init xen_ignore_unusable(void)
|
|
|
{
|
|
|
- struct e820entry *entry;
|
|
|
+ struct e820entry *entry = xen_e820_map;
|
|
|
unsigned int i;
|
|
|
|
|
|
- for (i = 0, entry = list; i < map_size; i++, entry++) {
|
|
|
+ for (i = 0; i < xen_e820_map_entries; i++, entry++) {
|
|
|
if (entry->type == E820_UNUSABLE)
|
|
|
entry->type = E820_RAM;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+static unsigned long __init xen_count_remap_pages(unsigned long max_pfn)
|
|
|
+{
|
|
|
+ unsigned long extra = 0;
|
|
|
+ unsigned long start_pfn, end_pfn;
|
|
|
+ const struct e820entry *entry = xen_e820_map;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ end_pfn = 0;
|
|
|
+ for (i = 0; i < xen_e820_map_entries; i++, entry++) {
|
|
|
+ start_pfn = PFN_DOWN(entry->addr);
|
|
|
+ /* Adjacent regions on non-page boundaries handling! */
|
|
|
+ end_pfn = min(end_pfn, start_pfn);
|
|
|
+
|
|
|
+ if (start_pfn >= max_pfn)
|
|
|
+ return extra + max_pfn - end_pfn;
|
|
|
+
|
|
|
+ /* Add any holes in map to result. */
|
|
|
+ extra += start_pfn - end_pfn;
|
|
|
+
|
|
|
+ end_pfn = PFN_UP(entry->addr + entry->size);
|
|
|
+ end_pfn = min(end_pfn, max_pfn);
|
|
|
+
|
|
|
+ if (entry->type != E820_RAM)
|
|
|
+ extra += end_pfn - start_pfn;
|
|
|
+ }
|
|
|
+
|
|
|
+ return extra;
|
|
|
+}
|
|
|
+
|
|
|
+bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size)
|
|
|
+{
|
|
|
+ struct e820entry *entry;
|
|
|
+ unsigned mapcnt;
|
|
|
+ phys_addr_t end;
|
|
|
+
|
|
|
+ if (!size)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ end = start + size;
|
|
|
+ entry = xen_e820_map;
|
|
|
+
|
|
|
+ for (mapcnt = 0; mapcnt < xen_e820_map_entries; mapcnt++) {
|
|
|
+ if (entry->type == E820_RAM && entry->addr <= start &&
|
|
|
+ (entry->addr + entry->size) >= end)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ entry++;
|
|
|
+ }
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Find a free area in physical memory not yet reserved and compliant with
|
|
|
+ * E820 map.
|
|
|
+ * Used to relocate pre-allocated areas like initrd or p2m list which are in
|
|
|
+ * conflict with the to be used E820 map.
|
|
|
+ * In case no area is found, return 0. Otherwise return the physical address
|
|
|
+ * of the area which is already reserved for convenience.
|
|
|
+ */
|
|
|
+phys_addr_t __init xen_find_free_area(phys_addr_t size)
|
|
|
+{
|
|
|
+ unsigned mapcnt;
|
|
|
+ phys_addr_t addr, start;
|
|
|
+ struct e820entry *entry = xen_e820_map;
|
|
|
+
|
|
|
+ for (mapcnt = 0; mapcnt < xen_e820_map_entries; mapcnt++, entry++) {
|
|
|
+ if (entry->type != E820_RAM || entry->size < size)
|
|
|
+ continue;
|
|
|
+ start = entry->addr;
|
|
|
+ for (addr = start; addr < start + size; addr += PAGE_SIZE) {
|
|
|
+ if (!memblock_is_reserved(addr))
|
|
|
+ continue;
|
|
|
+ start = addr + PAGE_SIZE;
|
|
|
+ if (start + size > entry->addr + entry->size)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (addr >= start + size) {
|
|
|
+ memblock_reserve(start, size);
|
|
|
+ return start;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Like memcpy, but with physical addresses for dest and src.
|
|
|
+ */
|
|
|
+static void __init xen_phys_memcpy(phys_addr_t dest, phys_addr_t src,
|
|
|
+ phys_addr_t n)
|
|
|
+{
|
|
|
+ phys_addr_t dest_off, src_off, dest_len, src_len, len;
|
|
|
+ void *from, *to;
|
|
|
+
|
|
|
+ while (n) {
|
|
|
+ dest_off = dest & ~PAGE_MASK;
|
|
|
+ src_off = src & ~PAGE_MASK;
|
|
|
+ dest_len = n;
|
|
|
+ if (dest_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off)
|
|
|
+ dest_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off;
|
|
|
+ src_len = n;
|
|
|
+ if (src_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off)
|
|
|
+ src_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off;
|
|
|
+ len = min(dest_len, src_len);
|
|
|
+ to = early_memremap(dest - dest_off, dest_len + dest_off);
|
|
|
+ from = early_memremap(src - src_off, src_len + src_off);
|
|
|
+ memcpy(to, from, len);
|
|
|
+ early_memunmap(to, dest_len + dest_off);
|
|
|
+ early_memunmap(from, src_len + src_off);
|
|
|
+ n -= len;
|
|
|
+ dest += len;
|
|
|
+ src += len;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Reserve Xen mfn_list.
|
|
|
+ */
|
|
|
+static void __init xen_reserve_xen_mfnlist(void)
|
|
|
+{
|
|
|
+ phys_addr_t start, size;
|
|
|
+
|
|
|
+ if (xen_start_info->mfn_list >= __START_KERNEL_map) {
|
|
|
+ start = __pa(xen_start_info->mfn_list);
|
|
|
+ size = PFN_ALIGN(xen_start_info->nr_pages *
|
|
|
+ sizeof(unsigned long));
|
|
|
+ } else {
|
|
|
+ start = PFN_PHYS(xen_start_info->first_p2m_pfn);
|
|
|
+ size = PFN_PHYS(xen_start_info->nr_p2m_frames);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!xen_is_e820_reserved(start, size)) {
|
|
|
+ memblock_reserve(start, size);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+#ifdef CONFIG_X86_32
|
|
|
+ /*
|
|
|
+ * Relocating the p2m on 32 bit system to an arbitrary virtual address
|
|
|
+ * is not supported, so just give up.
|
|
|
+ */
|
|
|
+ xen_raw_console_write("Xen hypervisor allocated p2m list conflicts with E820 map\n");
|
|
|
+ BUG();
|
|
|
+#else
|
|
|
+ xen_relocate_p2m();
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* machine_specific_memory_setup - Hook for machine specific memory setup.
|
|
|
**/
|
|
|
char * __init xen_memory_setup(void)
|
|
|
{
|
|
|
- static struct e820entry map[E820MAX] __initdata;
|
|
|
-
|
|
|
- unsigned long max_pfn = xen_start_info->nr_pages;
|
|
|
- phys_addr_t mem_end;
|
|
|
+ unsigned long max_pfn, pfn_s, n_pfns;
|
|
|
+ phys_addr_t mem_end, addr, size, chunk_size;
|
|
|
+ u32 type;
|
|
|
int rc;
|
|
|
struct xen_memory_map memmap;
|
|
|
unsigned long max_pages;
|
|
|
unsigned long extra_pages = 0;
|
|
|
- unsigned long remapped_pages;
|
|
|
int i;
|
|
|
int op;
|
|
|
|
|
|
- max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
|
|
|
+ xen_parse_512gb();
|
|
|
+ max_pfn = xen_get_pages_limit();
|
|
|
+ max_pfn = min(max_pfn, xen_start_info->nr_pages);
|
|
|
mem_end = PFN_PHYS(max_pfn);
|
|
|
|
|
|
memmap.nr_entries = E820MAX;
|
|
|
- set_xen_guest_handle(memmap.buffer, map);
|
|
|
+ set_xen_guest_handle(memmap.buffer, xen_e820_map);
|
|
|
|
|
|
op = xen_initial_domain() ?
|
|
|
XENMEM_machine_memory_map :
|
|
@@ -590,15 +775,16 @@ char * __init xen_memory_setup(void)
|
|
|
if (rc == -ENOSYS) {
|
|
|
BUG_ON(xen_initial_domain());
|
|
|
memmap.nr_entries = 1;
|
|
|
- map[0].addr = 0ULL;
|
|
|
- map[0].size = mem_end;
|
|
|
+ xen_e820_map[0].addr = 0ULL;
|
|
|
+ xen_e820_map[0].size = mem_end;
|
|
|
/* 8MB slack (to balance backend allocations). */
|
|
|
- map[0].size += 8ULL << 20;
|
|
|
- map[0].type = E820_RAM;
|
|
|
+ xen_e820_map[0].size += 8ULL << 20;
|
|
|
+ xen_e820_map[0].type = E820_RAM;
|
|
|
rc = 0;
|
|
|
}
|
|
|
BUG_ON(rc);
|
|
|
BUG_ON(memmap.nr_entries == 0);
|
|
|
+ xen_e820_map_entries = memmap.nr_entries;
|
|
|
|
|
|
/*
|
|
|
* Xen won't allow a 1:1 mapping to be created to UNUSABLE
|
|
@@ -609,24 +795,19 @@ char * __init xen_memory_setup(void)
|
|
|
* a patch in the future.
|
|
|
*/
|
|
|
if (xen_initial_domain())
|
|
|
- xen_ignore_unusable(map, memmap.nr_entries);
|
|
|
+ xen_ignore_unusable();
|
|
|
|
|
|
/* Make sure the Xen-supplied memory map is well-ordered. */
|
|
|
- sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
|
|
|
+ sanitize_e820_map(xen_e820_map, xen_e820_map_entries,
|
|
|
+ &xen_e820_map_entries);
|
|
|
|
|
|
max_pages = xen_get_max_pages();
|
|
|
- if (max_pages > max_pfn)
|
|
|
- extra_pages += max_pages - max_pfn;
|
|
|
|
|
|
- /*
|
|
|
- * Set identity map on non-RAM pages and prepare remapping the
|
|
|
- * underlying RAM.
|
|
|
- */
|
|
|
- xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
|
|
|
- &xen_released_pages, &remapped_pages);
|
|
|
+ /* How many extra pages do we need due to remapping? */
|
|
|
+ max_pages += xen_count_remap_pages(max_pfn);
|
|
|
|
|
|
- extra_pages += xen_released_pages;
|
|
|
- extra_pages += remapped_pages;
|
|
|
+ if (max_pages > max_pfn)
|
|
|
+ extra_pages += max_pages - max_pfn;
|
|
|
|
|
|
/*
|
|
|
* Clamp the amount of extra memory to a EXTRA_MEM_RATIO
|
|
@@ -635,46 +816,54 @@ char * __init xen_memory_setup(void)
|
|
|
* is limited to the max size of lowmem, so that it doesn't
|
|
|
* get completely filled.
|
|
|
*
|
|
|
+ * Make sure we have no memory above max_pages, as this area
|
|
|
+ * isn't handled by the p2m management.
|
|
|
+ *
|
|
|
* In principle there could be a problem in lowmem systems if
|
|
|
* the initial memory is also very large with respect to
|
|
|
* lowmem, but we won't try to deal with that here.
|
|
|
*/
|
|
|
- extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
|
|
|
- extra_pages);
|
|
|
+ extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
|
|
|
+ extra_pages, max_pages - max_pfn);
|
|
|
i = 0;
|
|
|
- while (i < memmap.nr_entries) {
|
|
|
- phys_addr_t addr = map[i].addr;
|
|
|
- phys_addr_t size = map[i].size;
|
|
|
- u32 type = map[i].type;
|
|
|
+ addr = xen_e820_map[0].addr;
|
|
|
+ size = xen_e820_map[0].size;
|
|
|
+ while (i < xen_e820_map_entries) {
|
|
|
+ chunk_size = size;
|
|
|
+ type = xen_e820_map[i].type;
|
|
|
|
|
|
if (type == E820_RAM) {
|
|
|
if (addr < mem_end) {
|
|
|
- size = min(size, mem_end - addr);
|
|
|
+ chunk_size = min(size, mem_end - addr);
|
|
|
} else if (extra_pages) {
|
|
|
- size = min(size, PFN_PHYS(extra_pages));
|
|
|
- extra_pages -= PFN_DOWN(size);
|
|
|
- xen_add_extra_mem(addr, size);
|
|
|
- xen_max_p2m_pfn = PFN_DOWN(addr + size);
|
|
|
+ chunk_size = min(size, PFN_PHYS(extra_pages));
|
|
|
+ pfn_s = PFN_UP(addr);
|
|
|
+ n_pfns = PFN_DOWN(addr + chunk_size) - pfn_s;
|
|
|
+ extra_pages -= n_pfns;
|
|
|
+ xen_add_extra_mem(pfn_s, n_pfns);
|
|
|
+ xen_max_p2m_pfn = pfn_s + n_pfns;
|
|
|
} else
|
|
|
type = E820_UNUSABLE;
|
|
|
}
|
|
|
|
|
|
- xen_align_and_add_e820_region(addr, size, type);
|
|
|
+ xen_align_and_add_e820_region(addr, chunk_size, type);
|
|
|
|
|
|
- map[i].addr += size;
|
|
|
- map[i].size -= size;
|
|
|
- if (map[i].size == 0)
|
|
|
+ addr += chunk_size;
|
|
|
+ size -= chunk_size;
|
|
|
+ if (size == 0) {
|
|
|
i++;
|
|
|
+ if (i < xen_e820_map_entries) {
|
|
|
+ addr = xen_e820_map[i].addr;
|
|
|
+ size = xen_e820_map[i].size;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
* Set the rest as identity mapped, in case PCI BARs are
|
|
|
* located here.
|
|
|
- *
|
|
|
- * PFNs above MAX_P2M_PFN are considered identity mapped as
|
|
|
- * well.
|
|
|
*/
|
|
|
- set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul);
|
|
|
+ set_phys_range_identity(addr / PAGE_SIZE, ~0ul);
|
|
|
|
|
|
/*
|
|
|
* In domU, the ISA region is normal, usable memory, but we
|
|
@@ -684,34 +873,53 @@ char * __init xen_memory_setup(void)
|
|
|
e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
|
|
|
E820_RESERVED);
|
|
|
|
|
|
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
|
|
+
|
|
|
/*
|
|
|
- * Reserve Xen bits:
|
|
|
- * - mfn_list
|
|
|
- * - xen_start_info
|
|
|
- * See comment above "struct start_info" in <xen/interface/xen.h>
|
|
|
- * We tried to make the the memblock_reserve more selective so
|
|
|
- * that it would be clear what region is reserved. Sadly we ran
|
|
|
- * in the problem wherein on a 64-bit hypervisor with a 32-bit
|
|
|
- * initial domain, the pt_base has the cr3 value which is not
|
|
|
- * neccessarily where the pagetable starts! As Jan put it: "
|
|
|
- * Actually, the adjustment turns out to be correct: The page
|
|
|
- * tables for a 32-on-64 dom0 get allocated in the order "first L1",
|
|
|
- * "first L2", "first L3", so the offset to the page table base is
|
|
|
- * indeed 2. When reading xen/include/public/xen.h's comment
|
|
|
- * very strictly, this is not a violation (since there nothing is said
|
|
|
- * that the first thing in the page table space is pointed to by
|
|
|
- * pt_base; I admit that this seems to be implied though, namely
|
|
|
- * do I think that it is implied that the page table space is the
|
|
|
- * range [pt_base, pt_base + nt_pt_frames), whereas that
|
|
|
- * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
|
|
|
- * which - without a priori knowledge - the kernel would have
|
|
|
- * difficulty to figure out)." - so lets just fall back to the
|
|
|
- * easy way and reserve the whole region.
|
|
|
+ * Check whether the kernel itself conflicts with the target E820 map.
|
|
|
+ * Failing now is better than running into weird problems later due
|
|
|
+ * to relocating (and even reusing) pages with kernel text or data.
|
|
|
*/
|
|
|
- memblock_reserve(__pa(xen_start_info->mfn_list),
|
|
|
- xen_start_info->pt_base - xen_start_info->mfn_list);
|
|
|
+ if (xen_is_e820_reserved(__pa_symbol(_text),
|
|
|
+ __pa_symbol(__bss_stop) - __pa_symbol(_text))) {
|
|
|
+ xen_raw_console_write("Xen hypervisor allocated kernel memory conflicts with E820 map\n");
|
|
|
+ BUG();
|
|
|
+ }
|
|
|
|
|
|
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
|
|
+ /*
|
|
|
+ * Check for a conflict of the hypervisor supplied page tables with
|
|
|
+ * the target E820 map.
|
|
|
+ */
|
|
|
+ xen_pt_check_e820();
|
|
|
+
|
|
|
+ xen_reserve_xen_mfnlist();
|
|
|
+
|
|
|
+ /* Check for a conflict of the initrd with the target E820 map. */
|
|
|
+ if (xen_is_e820_reserved(boot_params.hdr.ramdisk_image,
|
|
|
+ boot_params.hdr.ramdisk_size)) {
|
|
|
+ phys_addr_t new_area, start, size;
|
|
|
+
|
|
|
+ new_area = xen_find_free_area(boot_params.hdr.ramdisk_size);
|
|
|
+ if (!new_area) {
|
|
|
+ xen_raw_console_write("Can't find new memory area for initrd needed due to E820 map conflict\n");
|
|
|
+ BUG();
|
|
|
+ }
|
|
|
+
|
|
|
+ start = boot_params.hdr.ramdisk_image;
|
|
|
+ size = boot_params.hdr.ramdisk_size;
|
|
|
+ xen_phys_memcpy(new_area, start, size);
|
|
|
+ pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n",
|
|
|
+ start, start + size, new_area, new_area + size);
|
|
|
+ memblock_free(start, size);
|
|
|
+ boot_params.hdr.ramdisk_image = new_area;
|
|
|
+ boot_params.ext_ramdisk_image = new_area >> 32;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Set identity map on non-RAM pages and prepare remapping the
|
|
|
+ * underlying RAM.
|
|
|
+ */
|
|
|
+ xen_set_identity_and_remap(max_pfn);
|
|
|
|
|
|
return "Xen";
|
|
|
}
|
|
@@ -721,26 +929,30 @@ char * __init xen_memory_setup(void)
|
|
|
*/
|
|
|
char * __init xen_auto_xlated_memory_setup(void)
|
|
|
{
|
|
|
- static struct e820entry map[E820MAX] __initdata;
|
|
|
-
|
|
|
struct xen_memory_map memmap;
|
|
|
int i;
|
|
|
int rc;
|
|
|
|
|
|
memmap.nr_entries = E820MAX;
|
|
|
- set_xen_guest_handle(memmap.buffer, map);
|
|
|
+ set_xen_guest_handle(memmap.buffer, xen_e820_map);
|
|
|
|
|
|
rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
|
|
|
if (rc < 0)
|
|
|
panic("No memory map (%d)\n", rc);
|
|
|
|
|
|
- sanitize_e820_map(map, ARRAY_SIZE(map), &memmap.nr_entries);
|
|
|
+ xen_e820_map_entries = memmap.nr_entries;
|
|
|
+
|
|
|
+ sanitize_e820_map(xen_e820_map, ARRAY_SIZE(xen_e820_map),
|
|
|
+ &xen_e820_map_entries);
|
|
|
|
|
|
- for (i = 0; i < memmap.nr_entries; i++)
|
|
|
- e820_add_region(map[i].addr, map[i].size, map[i].type);
|
|
|
+ for (i = 0; i < xen_e820_map_entries; i++)
|
|
|
+ e820_add_region(xen_e820_map[i].addr, xen_e820_map[i].size,
|
|
|
+ xen_e820_map[i].type);
|
|
|
|
|
|
- memblock_reserve(__pa(xen_start_info->mfn_list),
|
|
|
- xen_start_info->pt_base - xen_start_info->mfn_list);
|
|
|
+ /* Remove p2m info, it is not needed. */
|
|
|
+ xen_start_info->mfn_list = 0;
|
|
|
+ xen_start_info->first_p2m_pfn = 0;
|
|
|
+ xen_start_info->nr_p2m_frames = 0;
|
|
|
|
|
|
return "Xen";
|
|
|
}
|