e820.c 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260
  1. /*
  2. * Low level x86 E820 memory map handling functions.
  3. *
  4. * The firmware and bootloader passes us the "E820 table", which is the primary
  5. * physical memory layout description available about x86 systems.
  6. *
  7. * The kernel takes the E820 memory layout and optionally modifies it with
  8. * quirks and other tweaks, and feeds that into the generic Linux memory
  9. * allocation code routines via a platform independent interface (memblock, etc.).
  10. */
  11. #include <linux/crash_dump.h>
  12. #include <linux/bootmem.h>
  13. #include <linux/suspend.h>
  14. #include <linux/acpi.h>
  15. #include <linux/firmware-map.h>
  16. #include <linux/memblock.h>
  17. #include <linux/sort.h>
  18. #include <asm/e820/api.h>
  19. #include <asm/setup.h>
  20. /*
  21. * We organize the E820 table into three main data structures:
  22. *
  23. * - 'e820_table_firmware': the original firmware version passed to us by the
  24. * bootloader - not modified by the kernel. It is composed of two parts:
  25. * the first 128 E820 memory entries in boot_params.e820_table and the remaining
  26. * (if any) entries of the SETUP_E820_EXT nodes. We use this to:
  27. *
  28. * - inform the user about the firmware's notion of memory layout
  29. * via /sys/firmware/memmap
  30. *
  31. * - the hibernation code uses it to generate a kernel-independent MD5
  32. * fingerprint of the physical memory layout of a system.
  33. *
  34. * - 'e820_table_kexec': a slightly modified (by the kernel) firmware version
  35. * passed to us by the bootloader - the major difference between
  36. * e820_table_firmware[] and this one is that, the latter marks the setup_data
  37. * list created by the EFI boot stub as reserved, so that kexec can reuse the
  38. * setup_data information in the second kernel. Besides, e820_table_kexec[]
  39. * might also be modified by the kexec itself to fake a mptable.
  40. * We use this to:
  41. *
  42. * - kexec, which is a bootloader in disguise, uses the original E820
  43. * layout to pass to the kexec-ed kernel. This way the original kernel
  44. * can have a restricted E820 map while the kexec()-ed kexec-kernel
  45. * can have access to full memory - etc.
  46. *
  47. * - 'e820_table': this is the main E820 table that is massaged by the
  48. * low level x86 platform code, or modified by boot parameters, before
  49. * passed on to higher level MM layers.
  50. *
  51. * Once the E820 map has been converted to the standard Linux memory layout
  52. * information its role stops - modifying it has no effect and does not get
  53. * re-propagated. So itsmain role is a temporary bootstrap storage of firmware
  54. * specific memory layout data during early bootup.
  55. */
  56. static struct e820_table e820_table_init __initdata;
  57. static struct e820_table e820_table_kexec_init __initdata;
  58. static struct e820_table e820_table_firmware_init __initdata;
  59. struct e820_table *e820_table __refdata = &e820_table_init;
  60. struct e820_table *e820_table_kexec __refdata = &e820_table_kexec_init;
  61. struct e820_table *e820_table_firmware __refdata = &e820_table_firmware_init;
  62. /* For PCI or other memory-mapped resources */
  63. unsigned long pci_mem_start = 0xaeedbabe;
  64. #ifdef CONFIG_PCI
  65. EXPORT_SYMBOL(pci_mem_start);
  66. #endif
  67. /*
  68. * This function checks if any part of the range <start,end> is mapped
  69. * with type.
  70. */
  71. bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
  72. {
  73. int i;
  74. for (i = 0; i < e820_table->nr_entries; i++) {
  75. struct e820_entry *entry = &e820_table->entries[i];
  76. if (type && entry->type != type)
  77. continue;
  78. if (entry->addr >= end || entry->addr + entry->size <= start)
  79. continue;
  80. return 1;
  81. }
  82. return 0;
  83. }
  84. EXPORT_SYMBOL_GPL(e820__mapped_any);
  85. /*
  86. * This function checks if the entire <start,end> range is mapped with 'type'.
  87. *
  88. * Note: this function only works correctly once the E820 table is sorted and
  89. * not-overlapping (at least for the range specified), which is the case normally.
  90. */
  91. static struct e820_entry *__e820__mapped_all(u64 start, u64 end,
  92. enum e820_type type)
  93. {
  94. int i;
  95. for (i = 0; i < e820_table->nr_entries; i++) {
  96. struct e820_entry *entry = &e820_table->entries[i];
  97. if (type && entry->type != type)
  98. continue;
  99. /* Is the region (part) in overlap with the current region? */
  100. if (entry->addr >= end || entry->addr + entry->size <= start)
  101. continue;
  102. /*
  103. * If the region is at the beginning of <start,end> we move
  104. * 'start' to the end of the region since it's ok until there
  105. */
  106. if (entry->addr <= start)
  107. start = entry->addr + entry->size;
  108. /*
  109. * If 'start' is now at or beyond 'end', we're done, full
  110. * coverage of the desired range exists:
  111. */
  112. if (start >= end)
  113. return entry;
  114. }
  115. return NULL;
  116. }
  117. /*
  118. * This function checks if the entire range <start,end> is mapped with type.
  119. */
  120. bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type)
  121. {
  122. return __e820__mapped_all(start, end, type);
  123. }
  124. /*
  125. * This function returns the type associated with the range <start,end>.
  126. */
  127. int e820__get_entry_type(u64 start, u64 end)
  128. {
  129. struct e820_entry *entry = __e820__mapped_all(start, end, 0);
  130. return entry ? entry->type : -EINVAL;
  131. }
  132. /*
  133. * Add a memory region to the kernel E820 map.
  134. */
  135. static void __init __e820__range_add(struct e820_table *table, u64 start, u64 size, enum e820_type type)
  136. {
  137. int x = table->nr_entries;
  138. if (x >= ARRAY_SIZE(table->entries)) {
  139. pr_err("e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", start, start + size - 1);
  140. return;
  141. }
  142. table->entries[x].addr = start;
  143. table->entries[x].size = size;
  144. table->entries[x].type = type;
  145. table->nr_entries++;
  146. }
  147. void __init e820__range_add(u64 start, u64 size, enum e820_type type)
  148. {
  149. __e820__range_add(e820_table, start, size, type);
  150. }
  151. static void __init e820_print_type(enum e820_type type)
  152. {
  153. switch (type) {
  154. case E820_TYPE_RAM: /* Fall through: */
  155. case E820_TYPE_RESERVED_KERN: pr_cont("usable"); break;
  156. case E820_TYPE_RESERVED: pr_cont("reserved"); break;
  157. case E820_TYPE_ACPI: pr_cont("ACPI data"); break;
  158. case E820_TYPE_NVS: pr_cont("ACPI NVS"); break;
  159. case E820_TYPE_UNUSABLE: pr_cont("unusable"); break;
  160. case E820_TYPE_PMEM: /* Fall through: */
  161. case E820_TYPE_PRAM: pr_cont("persistent (type %u)", type); break;
  162. default: pr_cont("type %u", type); break;
  163. }
  164. }
  165. void __init e820__print_table(char *who)
  166. {
  167. int i;
  168. for (i = 0; i < e820_table->nr_entries; i++) {
  169. pr_info("%s: [mem %#018Lx-%#018Lx] ", who,
  170. e820_table->entries[i].addr,
  171. e820_table->entries[i].addr + e820_table->entries[i].size - 1);
  172. e820_print_type(e820_table->entries[i].type);
  173. pr_cont("\n");
  174. }
  175. }
  176. /*
  177. * Sanitize an E820 map.
  178. *
  179. * Some E820 layouts include overlapping entries. The following
  180. * replaces the original E820 map with a new one, removing overlaps,
  181. * and resolving conflicting memory types in favor of highest
  182. * numbered type.
  183. *
  184. * The input parameter 'entries' points to an array of 'struct
  185. * e820_entry' which on entry has elements in the range [0, *nr_entries)
  186. * valid, and which has space for up to max_nr_entries entries.
  187. * On return, the resulting sanitized E820 map entries will be in
  188. * overwritten in the same location, starting at 'entries'.
  189. *
  190. * The integer pointed to by nr_entries must be valid on entry (the
  191. * current number of valid entries located at 'entries'). If the
  192. * sanitizing succeeds the *nr_entries will be updated with the new
  193. * number of valid entries (something no more than max_nr_entries).
  194. *
  195. * The return value from e820__update_table() is zero if it
  196. * successfully 'sanitized' the map entries passed in, and is -1
  197. * if it did nothing, which can happen if either of (1) it was
  198. * only passed one map entry, or (2) any of the input map entries
  199. * were invalid (start + size < start, meaning that the size was
  200. * so big the described memory range wrapped around through zero.)
  201. *
  202. * Visually we're performing the following
  203. * (1,2,3,4 = memory types)...
  204. *
  205. * Sample memory map (w/overlaps):
  206. * ____22__________________
  207. * ______________________4_
  208. * ____1111________________
  209. * _44_____________________
  210. * 11111111________________
  211. * ____________________33__
  212. * ___________44___________
  213. * __________33333_________
  214. * ______________22________
  215. * ___________________2222_
  216. * _________111111111______
  217. * _____________________11_
  218. * _________________4______
  219. *
  220. * Sanitized equivalent (no overlap):
  221. * 1_______________________
  222. * _44_____________________
  223. * ___1____________________
  224. * ____22__________________
  225. * ______11________________
  226. * _________1______________
  227. * __________3_____________
  228. * ___________44___________
  229. * _____________33_________
  230. * _______________2________
  231. * ________________1_______
  232. * _________________4______
  233. * ___________________2____
  234. * ____________________33__
  235. * ______________________4_
  236. */
  237. struct change_member {
  238. /* Pointer to the original entry: */
  239. struct e820_entry *entry;
  240. /* Address for this change point: */
  241. unsigned long long addr;
  242. };
  243. static struct change_member change_point_list[2*E820_MAX_ENTRIES] __initdata;
  244. static struct change_member *change_point[2*E820_MAX_ENTRIES] __initdata;
  245. static struct e820_entry *overlap_list[E820_MAX_ENTRIES] __initdata;
  246. static struct e820_entry new_entries[E820_MAX_ENTRIES] __initdata;
  247. static int __init cpcompare(const void *a, const void *b)
  248. {
  249. struct change_member * const *app = a, * const *bpp = b;
  250. const struct change_member *ap = *app, *bp = *bpp;
  251. /*
  252. * Inputs are pointers to two elements of change_point[]. If their
  253. * addresses are not equal, their difference dominates. If the addresses
  254. * are equal, then consider one that represents the end of its region
  255. * to be greater than one that does not.
  256. */
  257. if (ap->addr != bp->addr)
  258. return ap->addr > bp->addr ? 1 : -1;
  259. return (ap->addr != ap->entry->addr) - (bp->addr != bp->entry->addr);
  260. }
  261. int __init e820__update_table(struct e820_table *table)
  262. {
  263. struct e820_entry *entries = table->entries;
  264. u32 max_nr_entries = ARRAY_SIZE(table->entries);
  265. enum e820_type current_type, last_type;
  266. unsigned long long last_addr;
  267. u32 new_nr_entries, overlap_entries;
  268. u32 i, chg_idx, chg_nr;
  269. /* If there's only one memory region, don't bother: */
  270. if (table->nr_entries < 2)
  271. return -1;
  272. BUG_ON(table->nr_entries > max_nr_entries);
  273. /* Bail out if we find any unreasonable addresses in the map: */
  274. for (i = 0; i < table->nr_entries; i++) {
  275. if (entries[i].addr + entries[i].size < entries[i].addr)
  276. return -1;
  277. }
  278. /* Create pointers for initial change-point information (for sorting): */
  279. for (i = 0; i < 2 * table->nr_entries; i++)
  280. change_point[i] = &change_point_list[i];
  281. /*
  282. * Record all known change-points (starting and ending addresses),
  283. * omitting empty memory regions:
  284. */
  285. chg_idx = 0;
  286. for (i = 0; i < table->nr_entries; i++) {
  287. if (entries[i].size != 0) {
  288. change_point[chg_idx]->addr = entries[i].addr;
  289. change_point[chg_idx++]->entry = &entries[i];
  290. change_point[chg_idx]->addr = entries[i].addr + entries[i].size;
  291. change_point[chg_idx++]->entry = &entries[i];
  292. }
  293. }
  294. chg_nr = chg_idx;
  295. /* Sort change-point list by memory addresses (low -> high): */
  296. sort(change_point, chg_nr, sizeof(*change_point), cpcompare, NULL);
  297. /* Create a new memory map, removing overlaps: */
  298. overlap_entries = 0; /* Number of entries in the overlap table */
  299. new_nr_entries = 0; /* Index for creating new map entries */
  300. last_type = 0; /* Start with undefined memory type */
  301. last_addr = 0; /* Start with 0 as last starting address */
  302. /* Loop through change-points, determining effect on the new map: */
  303. for (chg_idx = 0; chg_idx < chg_nr; chg_idx++) {
  304. /* Keep track of all overlapping entries */
  305. if (change_point[chg_idx]->addr == change_point[chg_idx]->entry->addr) {
  306. /* Add map entry to overlap list (> 1 entry implies an overlap) */
  307. overlap_list[overlap_entries++] = change_point[chg_idx]->entry;
  308. } else {
  309. /* Remove entry from list (order independent, so swap with last): */
  310. for (i = 0; i < overlap_entries; i++) {
  311. if (overlap_list[i] == change_point[chg_idx]->entry)
  312. overlap_list[i] = overlap_list[overlap_entries-1];
  313. }
  314. overlap_entries--;
  315. }
  316. /*
  317. * If there are overlapping entries, decide which
  318. * "type" to use (larger value takes precedence --
  319. * 1=usable, 2,3,4,4+=unusable)
  320. */
  321. current_type = 0;
  322. for (i = 0; i < overlap_entries; i++) {
  323. if (overlap_list[i]->type > current_type)
  324. current_type = overlap_list[i]->type;
  325. }
  326. /* Continue building up new map based on this information: */
  327. if (current_type != last_type || current_type == E820_TYPE_PRAM) {
  328. if (last_type != 0) {
  329. new_entries[new_nr_entries].size = change_point[chg_idx]->addr - last_addr;
  330. /* Move forward only if the new size was non-zero: */
  331. if (new_entries[new_nr_entries].size != 0)
  332. /* No more space left for new entries? */
  333. if (++new_nr_entries >= max_nr_entries)
  334. break;
  335. }
  336. if (current_type != 0) {
  337. new_entries[new_nr_entries].addr = change_point[chg_idx]->addr;
  338. new_entries[new_nr_entries].type = current_type;
  339. last_addr = change_point[chg_idx]->addr;
  340. }
  341. last_type = current_type;
  342. }
  343. }
  344. /* Copy the new entries into the original location: */
  345. memcpy(entries, new_entries, new_nr_entries*sizeof(*entries));
  346. table->nr_entries = new_nr_entries;
  347. return 0;
  348. }
  349. static int __init __append_e820_table(struct boot_e820_entry *entries, u32 nr_entries)
  350. {
  351. struct boot_e820_entry *entry = entries;
  352. while (nr_entries) {
  353. u64 start = entry->addr;
  354. u64 size = entry->size;
  355. u64 end = start + size - 1;
  356. u32 type = entry->type;
  357. /* Ignore the entry on 64-bit overflow: */
  358. if (start > end && likely(size))
  359. return -1;
  360. e820__range_add(start, size, type);
  361. entry++;
  362. nr_entries--;
  363. }
  364. return 0;
  365. }
  366. /*
  367. * Copy the BIOS E820 map into a safe place.
  368. *
  369. * Sanity-check it while we're at it..
  370. *
  371. * If we're lucky and live on a modern system, the setup code
  372. * will have given us a memory map that we can use to properly
  373. * set up memory. If we aren't, we'll fake a memory map.
  374. */
  375. static int __init append_e820_table(struct boot_e820_entry *entries, u32 nr_entries)
  376. {
  377. /* Only one memory region (or negative)? Ignore it */
  378. if (nr_entries < 2)
  379. return -1;
  380. return __append_e820_table(entries, nr_entries);
  381. }
  382. static u64 __init
  383. __e820__range_update(struct e820_table *table, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
  384. {
  385. u64 end;
  386. unsigned int i;
  387. u64 real_updated_size = 0;
  388. BUG_ON(old_type == new_type);
  389. if (size > (ULLONG_MAX - start))
  390. size = ULLONG_MAX - start;
  391. end = start + size;
  392. printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ", start, end - 1);
  393. e820_print_type(old_type);
  394. pr_cont(" ==> ");
  395. e820_print_type(new_type);
  396. pr_cont("\n");
  397. for (i = 0; i < table->nr_entries; i++) {
  398. struct e820_entry *entry = &table->entries[i];
  399. u64 final_start, final_end;
  400. u64 entry_end;
  401. if (entry->type != old_type)
  402. continue;
  403. entry_end = entry->addr + entry->size;
  404. /* Completely covered by new range? */
  405. if (entry->addr >= start && entry_end <= end) {
  406. entry->type = new_type;
  407. real_updated_size += entry->size;
  408. continue;
  409. }
  410. /* New range is completely covered? */
  411. if (entry->addr < start && entry_end > end) {
  412. __e820__range_add(table, start, size, new_type);
  413. __e820__range_add(table, end, entry_end - end, entry->type);
  414. entry->size = start - entry->addr;
  415. real_updated_size += size;
  416. continue;
  417. }
  418. /* Partially covered: */
  419. final_start = max(start, entry->addr);
  420. final_end = min(end, entry_end);
  421. if (final_start >= final_end)
  422. continue;
  423. __e820__range_add(table, final_start, final_end - final_start, new_type);
  424. real_updated_size += final_end - final_start;
  425. /*
  426. * Left range could be head or tail, so need to update
  427. * its size first:
  428. */
  429. entry->size -= final_end - final_start;
  430. if (entry->addr < final_start)
  431. continue;
  432. entry->addr = final_end;
  433. }
  434. return real_updated_size;
  435. }
  436. u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
  437. {
  438. return __e820__range_update(e820_table, start, size, old_type, new_type);
  439. }
  440. static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
  441. {
  442. return __e820__range_update(e820_table_kexec, start, size, old_type, new_type);
  443. }
  444. /* Remove a range of memory from the E820 table: */
  445. u64 __init e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type)
  446. {
  447. int i;
  448. u64 end;
  449. u64 real_removed_size = 0;
  450. if (size > (ULLONG_MAX - start))
  451. size = ULLONG_MAX - start;
  452. end = start + size;
  453. printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ", start, end - 1);
  454. if (check_type)
  455. e820_print_type(old_type);
  456. pr_cont("\n");
  457. for (i = 0; i < e820_table->nr_entries; i++) {
  458. struct e820_entry *entry = &e820_table->entries[i];
  459. u64 final_start, final_end;
  460. u64 entry_end;
  461. if (check_type && entry->type != old_type)
  462. continue;
  463. entry_end = entry->addr + entry->size;
  464. /* Completely covered? */
  465. if (entry->addr >= start && entry_end <= end) {
  466. real_removed_size += entry->size;
  467. memset(entry, 0, sizeof(*entry));
  468. continue;
  469. }
  470. /* Is the new range completely covered? */
  471. if (entry->addr < start && entry_end > end) {
  472. e820__range_add(end, entry_end - end, entry->type);
  473. entry->size = start - entry->addr;
  474. real_removed_size += size;
  475. continue;
  476. }
  477. /* Partially covered: */
  478. final_start = max(start, entry->addr);
  479. final_end = min(end, entry_end);
  480. if (final_start >= final_end)
  481. continue;
  482. real_removed_size += final_end - final_start;
  483. /*
  484. * Left range could be head or tail, so need to update
  485. * the size first:
  486. */
  487. entry->size -= final_end - final_start;
  488. if (entry->addr < final_start)
  489. continue;
  490. entry->addr = final_end;
  491. }
  492. return real_removed_size;
  493. }
  494. void __init e820__update_table_print(void)
  495. {
  496. if (e820__update_table(e820_table))
  497. return;
  498. pr_info("e820: modified physical RAM map:\n");
  499. e820__print_table("modified");
  500. }
  501. static void __init e820__update_table_kexec(void)
  502. {
  503. e820__update_table(e820_table_kexec);
  504. }
  505. #define MAX_GAP_END 0x100000000ull
  506. /*
  507. * Search for a gap in the E820 memory space from 0 to MAX_GAP_END (4GB).
  508. */
  509. static int __init e820_search_gap(unsigned long *gapstart, unsigned long *gapsize)
  510. {
  511. unsigned long long last = MAX_GAP_END;
  512. int i = e820_table->nr_entries;
  513. int found = 0;
  514. while (--i >= 0) {
  515. unsigned long long start = e820_table->entries[i].addr;
  516. unsigned long long end = start + e820_table->entries[i].size;
  517. /*
  518. * Since "last" is at most 4GB, we know we'll
  519. * fit in 32 bits if this condition is true:
  520. */
  521. if (last > end) {
  522. unsigned long gap = last - end;
  523. if (gap >= *gapsize) {
  524. *gapsize = gap;
  525. *gapstart = end;
  526. found = 1;
  527. }
  528. }
  529. if (start < last)
  530. last = start;
  531. }
  532. return found;
  533. }
  534. /*
  535. * Search for the biggest gap in the low 32 bits of the E820
  536. * memory space. We pass this space to the PCI subsystem, so
  537. * that it can assign MMIO resources for hotplug or
  538. * unconfigured devices in.
  539. *
  540. * Hopefully the BIOS let enough space left.
  541. */
  542. __init void e820__setup_pci_gap(void)
  543. {
  544. unsigned long gapstart, gapsize;
  545. int found;
  546. gapsize = 0x400000;
  547. found = e820_search_gap(&gapstart, &gapsize);
  548. if (!found) {
  549. #ifdef CONFIG_X86_64
  550. gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
  551. pr_err(
  552. "e820: Cannot find an available gap in the 32-bit address range\n"
  553. "e820: PCI devices with unassigned 32-bit BARs may not work!\n");
  554. #else
  555. gapstart = 0x10000000;
  556. #endif
  557. }
  558. /*
  559. * e820__reserve_resources_late() protects stolen RAM already:
  560. */
  561. pci_mem_start = gapstart;
  562. pr_info("e820: [mem %#010lx-%#010lx] available for PCI devices\n", gapstart, gapstart + gapsize - 1);
  563. }
  564. /*
  565. * Called late during init, in free_initmem().
  566. *
  567. * Initial e820_table and e820_table_kexec are largish __initdata arrays.
  568. *
  569. * Copy them to a (usually much smaller) dynamically allocated area that is
  570. * sized precisely after the number of e820 entries.
  571. *
  572. * This is done after we've performed all the fixes and tweaks to the tables.
  573. * All functions which modify them are __init functions, which won't exist
  574. * after free_initmem().
  575. */
  576. __init void e820__reallocate_tables(void)
  577. {
  578. struct e820_table *n;
  579. int size;
  580. size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table->nr_entries;
  581. n = kmalloc(size, GFP_KERNEL);
  582. BUG_ON(!n);
  583. memcpy(n, e820_table, size);
  584. e820_table = n;
  585. size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_kexec->nr_entries;
  586. n = kmalloc(size, GFP_KERNEL);
  587. BUG_ON(!n);
  588. memcpy(n, e820_table_kexec, size);
  589. e820_table_kexec = n;
  590. size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries;
  591. n = kmalloc(size, GFP_KERNEL);
  592. BUG_ON(!n);
  593. memcpy(n, e820_table_firmware, size);
  594. e820_table_firmware = n;
  595. }
  596. /*
  597. * Because of the small fixed size of struct boot_params, only the first
  598. * 128 E820 memory entries are passed to the kernel via boot_params.e820_table,
  599. * the remaining (if any) entries are passed via the SETUP_E820_EXT node of
  600. * struct setup_data, which is parsed here.
  601. */
  602. void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
  603. {
  604. int entries;
  605. struct boot_e820_entry *extmap;
  606. struct setup_data *sdata;
  607. sdata = early_memremap(phys_addr, data_len);
  608. entries = sdata->len / sizeof(*extmap);
  609. extmap = (struct boot_e820_entry *)(sdata->data);
  610. __append_e820_table(extmap, entries);
  611. e820__update_table(e820_table);
  612. memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec));
  613. memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
  614. early_memunmap(sdata, data_len);
  615. pr_info("e820: extended physical RAM map:\n");
  616. e820__print_table("extended");
  617. }
  618. /*
  619. * Find the ranges of physical addresses that do not correspond to
  620. * E820 RAM areas and register the corresponding pages as 'nosave' for
  621. * hibernation (32-bit) or software suspend and suspend to RAM (64-bit).
  622. *
  623. * This function requires the E820 map to be sorted and without any
  624. * overlapping entries.
  625. */
  626. void __init e820__register_nosave_regions(unsigned long limit_pfn)
  627. {
  628. int i;
  629. unsigned long pfn = 0;
  630. for (i = 0; i < e820_table->nr_entries; i++) {
  631. struct e820_entry *entry = &e820_table->entries[i];
  632. if (pfn < PFN_UP(entry->addr))
  633. register_nosave_region(pfn, PFN_UP(entry->addr));
  634. pfn = PFN_DOWN(entry->addr + entry->size);
  635. if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
  636. register_nosave_region(PFN_UP(entry->addr), pfn);
  637. if (pfn >= limit_pfn)
  638. break;
  639. }
  640. }
  641. #ifdef CONFIG_ACPI
  642. /*
  643. * Register ACPI NVS memory regions, so that we can save/restore them during
  644. * hibernation and the subsequent resume:
  645. */
  646. static int __init e820__register_nvs_regions(void)
  647. {
  648. int i;
  649. for (i = 0; i < e820_table->nr_entries; i++) {
  650. struct e820_entry *entry = &e820_table->entries[i];
  651. if (entry->type == E820_TYPE_NVS)
  652. acpi_nvs_register(entry->addr, entry->size);
  653. }
  654. return 0;
  655. }
  656. core_initcall(e820__register_nvs_regions);
  657. #endif
  658. /*
  659. * Allocate the requested number of bytes with the requsted alignment
  660. * and return (the physical address) to the caller. Also register this
  661. * range in the 'kexec' E820 table as a reserved range.
  662. *
  663. * This allows kexec to fake a new mptable, as if it came from the real
  664. * system.
  665. */
  666. u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
  667. {
  668. u64 addr;
  669. addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
  670. if (addr) {
  671. e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
  672. pr_info("e820: update e820_table_kexec for e820__memblock_alloc_reserved()\n");
  673. e820__update_table_kexec();
  674. }
  675. return addr;
  676. }
  677. #ifdef CONFIG_X86_32
  678. # ifdef CONFIG_X86_PAE
  679. # define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT))
  680. # else
  681. # define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT))
  682. # endif
  683. #else /* CONFIG_X86_32 */
  684. # define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
  685. #endif
  686. /*
  687. * Find the highest page frame number we have available
  688. */
  689. static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type type)
  690. {
  691. int i;
  692. unsigned long last_pfn = 0;
  693. unsigned long max_arch_pfn = MAX_ARCH_PFN;
  694. for (i = 0; i < e820_table->nr_entries; i++) {
  695. struct e820_entry *entry = &e820_table->entries[i];
  696. unsigned long start_pfn;
  697. unsigned long end_pfn;
  698. if (entry->type != type)
  699. continue;
  700. start_pfn = entry->addr >> PAGE_SHIFT;
  701. end_pfn = (entry->addr + entry->size) >> PAGE_SHIFT;
  702. if (start_pfn >= limit_pfn)
  703. continue;
  704. if (end_pfn > limit_pfn) {
  705. last_pfn = limit_pfn;
  706. break;
  707. }
  708. if (end_pfn > last_pfn)
  709. last_pfn = end_pfn;
  710. }
  711. if (last_pfn > max_arch_pfn)
  712. last_pfn = max_arch_pfn;
  713. pr_info("e820: last_pfn = %#lx max_arch_pfn = %#lx\n",
  714. last_pfn, max_arch_pfn);
  715. return last_pfn;
  716. }
  717. unsigned long __init e820__end_of_ram_pfn(void)
  718. {
  719. return e820_end_pfn(MAX_ARCH_PFN, E820_TYPE_RAM);
  720. }
  721. unsigned long __init e820__end_of_low_ram_pfn(void)
  722. {
  723. return e820_end_pfn(1UL << (32 - PAGE_SHIFT), E820_TYPE_RAM);
  724. }
  725. static void __init early_panic(char *msg)
  726. {
  727. early_printk(msg);
  728. panic(msg);
  729. }
  730. static int userdef __initdata;
  731. /* The "mem=nopentium" boot option disables 4MB page tables on 32-bit kernels: */
  732. static int __init parse_memopt(char *p)
  733. {
  734. u64 mem_size;
  735. if (!p)
  736. return -EINVAL;
  737. if (!strcmp(p, "nopentium")) {
  738. #ifdef CONFIG_X86_32
  739. setup_clear_cpu_cap(X86_FEATURE_PSE);
  740. return 0;
  741. #else
  742. pr_warn("mem=nopentium ignored! (only supported on x86_32)\n");
  743. return -EINVAL;
  744. #endif
  745. }
  746. userdef = 1;
  747. mem_size = memparse(p, &p);
  748. /* Don't remove all memory when getting "mem={invalid}" parameter: */
  749. if (mem_size == 0)
  750. return -EINVAL;
  751. e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1);
  752. return 0;
  753. }
  754. early_param("mem", parse_memopt);
  755. static int __init parse_memmap_one(char *p)
  756. {
  757. char *oldp;
  758. u64 start_at, mem_size;
  759. if (!p)
  760. return -EINVAL;
  761. if (!strncmp(p, "exactmap", 8)) {
  762. #ifdef CONFIG_CRASH_DUMP
  763. /*
  764. * If we are doing a crash dump, we still need to know
  765. * the real memory size before the original memory map is
  766. * reset.
  767. */
  768. saved_max_pfn = e820__end_of_ram_pfn();
  769. #endif
  770. e820_table->nr_entries = 0;
  771. userdef = 1;
  772. return 0;
  773. }
  774. oldp = p;
  775. mem_size = memparse(p, &p);
  776. if (p == oldp)
  777. return -EINVAL;
  778. userdef = 1;
  779. if (*p == '@') {
  780. start_at = memparse(p+1, &p);
  781. e820__range_add(start_at, mem_size, E820_TYPE_RAM);
  782. } else if (*p == '#') {
  783. start_at = memparse(p+1, &p);
  784. e820__range_add(start_at, mem_size, E820_TYPE_ACPI);
  785. } else if (*p == '$') {
  786. start_at = memparse(p+1, &p);
  787. e820__range_add(start_at, mem_size, E820_TYPE_RESERVED);
  788. } else if (*p == '!') {
  789. start_at = memparse(p+1, &p);
  790. e820__range_add(start_at, mem_size, E820_TYPE_PRAM);
  791. } else {
  792. e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1);
  793. }
  794. return *p == '\0' ? 0 : -EINVAL;
  795. }
  796. static int __init parse_memmap_opt(char *str)
  797. {
  798. while (str) {
  799. char *k = strchr(str, ',');
  800. if (k)
  801. *k++ = 0;
  802. parse_memmap_one(str);
  803. str = k;
  804. }
  805. return 0;
  806. }
  807. early_param("memmap", parse_memmap_opt);
  808. /*
  809. * Reserve all entries from the bootloader's extensible data nodes list,
  810. * because if present we are going to use it later on to fetch e820
  811. * entries from it:
  812. */
  813. void __init e820__reserve_setup_data(void)
  814. {
  815. struct setup_data *data;
  816. u64 pa_data;
  817. pa_data = boot_params.hdr.setup_data;
  818. if (!pa_data)
  819. return;
  820. while (pa_data) {
  821. data = early_memremap(pa_data, sizeof(*data));
  822. e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
  823. e820__range_update_kexec(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
  824. pa_data = data->next;
  825. early_memunmap(data, sizeof(*data));
  826. }
  827. e820__update_table(e820_table);
  828. e820__update_table(e820_table_kexec);
  829. pr_info("extended physical RAM map:\n");
  830. e820__print_table("reserve setup_data");
  831. }
  832. /*
  833. * Called after parse_early_param(), after early parameters (such as mem=)
  834. * have been processed, in which case we already have an E820 table filled in
  835. * via the parameter callback function(s), but it's not sorted and printed yet:
  836. */
  837. void __init e820__finish_early_params(void)
  838. {
  839. if (userdef) {
  840. if (e820__update_table(e820_table) < 0)
  841. early_panic("Invalid user supplied memory map");
  842. pr_info("e820: user-defined physical RAM map:\n");
  843. e820__print_table("user");
  844. }
  845. }
  846. static const char *__init e820_type_to_string(struct e820_entry *entry)
  847. {
  848. switch (entry->type) {
  849. case E820_TYPE_RESERVED_KERN: /* Fall-through: */
  850. case E820_TYPE_RAM: return "System RAM";
  851. case E820_TYPE_ACPI: return "ACPI Tables";
  852. case E820_TYPE_NVS: return "ACPI Non-volatile Storage";
  853. case E820_TYPE_UNUSABLE: return "Unusable memory";
  854. case E820_TYPE_PRAM: return "Persistent Memory (legacy)";
  855. case E820_TYPE_PMEM: return "Persistent Memory";
  856. case E820_TYPE_RESERVED: return "Reserved";
  857. default: return "Unknown E820 type";
  858. }
  859. }
  860. static unsigned long __init e820_type_to_iomem_type(struct e820_entry *entry)
  861. {
  862. switch (entry->type) {
  863. case E820_TYPE_RESERVED_KERN: /* Fall-through: */
  864. case E820_TYPE_RAM: return IORESOURCE_SYSTEM_RAM;
  865. case E820_TYPE_ACPI: /* Fall-through: */
  866. case E820_TYPE_NVS: /* Fall-through: */
  867. case E820_TYPE_UNUSABLE: /* Fall-through: */
  868. case E820_TYPE_PRAM: /* Fall-through: */
  869. case E820_TYPE_PMEM: /* Fall-through: */
  870. case E820_TYPE_RESERVED: /* Fall-through: */
  871. default: return IORESOURCE_MEM;
  872. }
  873. }
  874. static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry)
  875. {
  876. switch (entry->type) {
  877. case E820_TYPE_ACPI: return IORES_DESC_ACPI_TABLES;
  878. case E820_TYPE_NVS: return IORES_DESC_ACPI_NV_STORAGE;
  879. case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY;
  880. case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
  881. case E820_TYPE_RESERVED_KERN: /* Fall-through: */
  882. case E820_TYPE_RAM: /* Fall-through: */
  883. case E820_TYPE_UNUSABLE: /* Fall-through: */
  884. case E820_TYPE_RESERVED: /* Fall-through: */
  885. default: return IORES_DESC_NONE;
  886. }
  887. }
  888. static bool __init do_mark_busy(enum e820_type type, struct resource *res)
  889. {
  890. /* this is the legacy bios/dos rom-shadow + mmio region */
  891. if (res->start < (1ULL<<20))
  892. return true;
  893. /*
  894. * Treat persistent memory like device memory, i.e. reserve it
  895. * for exclusive use of a driver
  896. */
  897. switch (type) {
  898. case E820_TYPE_RESERVED:
  899. case E820_TYPE_PRAM:
  900. case E820_TYPE_PMEM:
  901. return false;
  902. case E820_TYPE_RESERVED_KERN:
  903. case E820_TYPE_RAM:
  904. case E820_TYPE_ACPI:
  905. case E820_TYPE_NVS:
  906. case E820_TYPE_UNUSABLE:
  907. default:
  908. return true;
  909. }
  910. }
  911. /*
  912. * Mark E820 reserved areas as busy for the resource manager:
  913. */
  914. static struct resource __initdata *e820_res;
  915. void __init e820__reserve_resources(void)
  916. {
  917. int i;
  918. struct resource *res;
  919. u64 end;
  920. res = alloc_bootmem(sizeof(*res) * e820_table->nr_entries);
  921. e820_res = res;
  922. for (i = 0; i < e820_table->nr_entries; i++) {
  923. struct e820_entry *entry = e820_table->entries + i;
  924. end = entry->addr + entry->size - 1;
  925. if (end != (resource_size_t)end) {
  926. res++;
  927. continue;
  928. }
  929. res->start = entry->addr;
  930. res->end = end;
  931. res->name = e820_type_to_string(entry);
  932. res->flags = e820_type_to_iomem_type(entry);
  933. res->desc = e820_type_to_iores_desc(entry);
  934. /*
  935. * Don't register the region that could be conflicted with
  936. * PCI device BAR resources and insert them later in
  937. * pcibios_resource_survey():
  938. */
  939. if (do_mark_busy(entry->type, res)) {
  940. res->flags |= IORESOURCE_BUSY;
  941. insert_resource(&iomem_resource, res);
  942. }
  943. res++;
  944. }
  945. /* Expose the bootloader-provided memory layout to the sysfs. */
  946. for (i = 0; i < e820_table_firmware->nr_entries; i++) {
  947. struct e820_entry *entry = e820_table_firmware->entries + i;
  948. firmware_map_add_early(entry->addr, entry->addr + entry->size, e820_type_to_string(entry));
  949. }
  950. }
  951. /*
  952. * How much should we pad the end of RAM, depending on where it is?
  953. */
  954. static unsigned long __init ram_alignment(resource_size_t pos)
  955. {
  956. unsigned long mb = pos >> 20;
  957. /* To 64kB in the first megabyte */
  958. if (!mb)
  959. return 64*1024;
  960. /* To 1MB in the first 16MB */
  961. if (mb < 16)
  962. return 1024*1024;
  963. /* To 64MB for anything above that */
  964. return 64*1024*1024;
  965. }
  966. #define MAX_RESOURCE_SIZE ((resource_size_t)-1)
  967. void __init e820__reserve_resources_late(void)
  968. {
  969. int i;
  970. struct resource *res;
  971. res = e820_res;
  972. for (i = 0; i < e820_table->nr_entries; i++) {
  973. if (!res->parent && res->end)
  974. insert_resource_expand_to_fit(&iomem_resource, res);
  975. res++;
  976. }
  977. /*
  978. * Try to bump up RAM regions to reasonable boundaries, to
  979. * avoid stolen RAM:
  980. */
  981. for (i = 0; i < e820_table->nr_entries; i++) {
  982. struct e820_entry *entry = &e820_table->entries[i];
  983. u64 start, end;
  984. if (entry->type != E820_TYPE_RAM)
  985. continue;
  986. start = entry->addr + entry->size;
  987. end = round_up(start, ram_alignment(start)) - 1;
  988. if (end > MAX_RESOURCE_SIZE)
  989. end = MAX_RESOURCE_SIZE;
  990. if (start >= end)
  991. continue;
  992. printk(KERN_DEBUG "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n", start, end);
  993. reserve_region_with_split(&iomem_resource, start, end, "RAM buffer");
  994. }
  995. }
  996. /*
  997. * Pass the firmware (bootloader) E820 map to the kernel and process it:
  998. */
  999. char *__init e820__memory_setup_default(void)
  1000. {
  1001. char *who = "BIOS-e820";
  1002. /*
  1003. * Try to copy the BIOS-supplied E820-map.
  1004. *
  1005. * Otherwise fake a memory map; one section from 0k->640k,
  1006. * the next section from 1mb->appropriate_mem_k
  1007. */
  1008. if (append_e820_table(boot_params.e820_table, boot_params.e820_entries) < 0) {
  1009. u64 mem_size;
  1010. /* Compare results from other methods and take the one that gives more RAM: */
  1011. if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) {
  1012. mem_size = boot_params.screen_info.ext_mem_k;
  1013. who = "BIOS-88";
  1014. } else {
  1015. mem_size = boot_params.alt_mem_k;
  1016. who = "BIOS-e801";
  1017. }
  1018. e820_table->nr_entries = 0;
  1019. e820__range_add(0, LOWMEMSIZE(), E820_TYPE_RAM);
  1020. e820__range_add(HIGH_MEMORY, mem_size << 10, E820_TYPE_RAM);
  1021. }
  1022. /* We just appended a lot of ranges, sanitize the table: */
  1023. e820__update_table(e820_table);
  1024. return who;
  1025. }
  1026. /*
  1027. * Calls e820__memory_setup_default() in essence to pick up the firmware/bootloader
  1028. * E820 map - with an optional platform quirk available for virtual platforms
  1029. * to override this method of boot environment processing:
  1030. */
  1031. void __init e820__memory_setup(void)
  1032. {
  1033. char *who;
  1034. /* This is a firmware interface ABI - make sure we don't break it: */
  1035. BUILD_BUG_ON(sizeof(struct boot_e820_entry) != 20);
  1036. who = x86_init.resources.memory_setup();
  1037. memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec));
  1038. memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
  1039. pr_info("e820: BIOS-provided physical RAM map:\n");
  1040. e820__print_table(who);
  1041. }
  1042. void __init e820__memblock_setup(void)
  1043. {
  1044. int i;
  1045. u64 end;
  1046. /*
  1047. * The bootstrap memblock region count maximum is 128 entries
  1048. * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries
  1049. * than that - so allow memblock resizing.
  1050. *
  1051. * This is safe, because this call happens pretty late during x86 setup,
  1052. * so we know about reserved memory regions already. (This is important
  1053. * so that memblock resizing does no stomp over reserved areas.)
  1054. */
  1055. memblock_allow_resize();
  1056. for (i = 0; i < e820_table->nr_entries; i++) {
  1057. struct e820_entry *entry = &e820_table->entries[i];
  1058. end = entry->addr + entry->size;
  1059. if (end != (resource_size_t)end)
  1060. continue;
  1061. if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
  1062. continue;
  1063. memblock_add(entry->addr, entry->size);
  1064. }
  1065. /* Throw away partial pages: */
  1066. memblock_trim_memory(PAGE_SIZE);
  1067. memblock_dump_all();
  1068. }