kcore.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * fs/proc/kcore.c kernel ELF core dumper
  4. *
  5. * Modelled on fs/exec.c:aout_core_dump()
  6. * Jeremy Fitzhardinge <jeremy@sw.oz.au>
  7. * ELF version written by David Howells <David.Howells@nexor.co.uk>
  8. * Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
  9. * Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
  10. * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
  11. */
  12. #include <linux/crash_core.h>
  13. #include <linux/mm.h>
  14. #include <linux/proc_fs.h>
  15. #include <linux/kcore.h>
  16. #include <linux/user.h>
  17. #include <linux/capability.h>
  18. #include <linux/elf.h>
  19. #include <linux/elfcore.h>
  20. #include <linux/notifier.h>
  21. #include <linux/vmalloc.h>
  22. #include <linux/highmem.h>
  23. #include <linux/printk.h>
  24. #include <linux/memblock.h>
  25. #include <linux/init.h>
  26. #include <linux/slab.h>
  27. #include <linux/uaccess.h>
  28. #include <asm/io.h>
  29. #include <linux/list.h>
  30. #include <linux/ioport.h>
  31. #include <linux/memory.h>
  32. #include <linux/sched/task.h>
  33. #include <asm/sections.h>
  34. #include "internal.h"
  35. #define CORE_STR "CORE"
  36. #ifndef ELF_CORE_EFLAGS
  37. #define ELF_CORE_EFLAGS 0
  38. #endif
  39. static struct proc_dir_entry *proc_root_kcore;
  40. #ifndef kc_vaddr_to_offset
  41. #define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
  42. #endif
  43. #ifndef kc_offset_to_vaddr
  44. #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
  45. #endif
  46. static LIST_HEAD(kclist_head);
  47. static DECLARE_RWSEM(kclist_lock);
  48. static int kcore_need_update = 1;
  49. /* This doesn't grab kclist_lock, so it should only be used at init time. */
  50. void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
  51. int type)
  52. {
  53. new->addr = (unsigned long)addr;
  54. new->size = size;
  55. new->type = type;
  56. list_add_tail(&new->list, &kclist_head);
  57. }
  58. static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
  59. size_t *data_offset)
  60. {
  61. size_t try, size;
  62. struct kcore_list *m;
  63. *nphdr = 1; /* PT_NOTE */
  64. size = 0;
  65. list_for_each_entry(m, &kclist_head, list) {
  66. try = kc_vaddr_to_offset((size_t)m->addr + m->size);
  67. if (try > size)
  68. size = try;
  69. *nphdr = *nphdr + 1;
  70. }
  71. *phdrs_len = *nphdr * sizeof(struct elf_phdr);
  72. *notes_len = (4 * sizeof(struct elf_note) +
  73. 3 * ALIGN(sizeof(CORE_STR), 4) +
  74. VMCOREINFO_NOTE_NAME_BYTES +
  75. ALIGN(sizeof(struct elf_prstatus), 4) +
  76. ALIGN(sizeof(struct elf_prpsinfo), 4) +
  77. ALIGN(arch_task_struct_size, 4) +
  78. ALIGN(vmcoreinfo_size, 4));
  79. *data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len +
  80. *notes_len);
  81. return *data_offset + size;
  82. }
  83. #ifdef CONFIG_HIGHMEM
  84. /*
  85. * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
  86. * because memory hole is not as big as !HIGHMEM case.
  87. * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
  88. */
  89. static int kcore_ram_list(struct list_head *head)
  90. {
  91. struct kcore_list *ent;
  92. ent = kmalloc(sizeof(*ent), GFP_KERNEL);
  93. if (!ent)
  94. return -ENOMEM;
  95. ent->addr = (unsigned long)__va(0);
  96. ent->size = max_low_pfn << PAGE_SHIFT;
  97. ent->type = KCORE_RAM;
  98. list_add(&ent->list, head);
  99. return 0;
  100. }
  101. #else /* !CONFIG_HIGHMEM */
  102. #ifdef CONFIG_SPARSEMEM_VMEMMAP
  103. /* calculate vmemmap's address from given system ram pfn and register it */
  104. static int
  105. get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
  106. {
  107. unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
  108. unsigned long nr_pages = ent->size >> PAGE_SHIFT;
  109. unsigned long start, end;
  110. struct kcore_list *vmm, *tmp;
  111. start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
  112. end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
  113. end = PAGE_ALIGN(end);
  114. /* overlap check (because we have to align page */
  115. list_for_each_entry(tmp, head, list) {
  116. if (tmp->type != KCORE_VMEMMAP)
  117. continue;
  118. if (start < tmp->addr + tmp->size)
  119. if (end > tmp->addr)
  120. end = tmp->addr;
  121. }
  122. if (start < end) {
  123. vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
  124. if (!vmm)
  125. return 0;
  126. vmm->addr = start;
  127. vmm->size = end - start;
  128. vmm->type = KCORE_VMEMMAP;
  129. list_add_tail(&vmm->list, head);
  130. }
  131. return 1;
  132. }
  133. #else
  134. static int
  135. get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
  136. {
  137. return 1;
  138. }
  139. #endif
  140. static int
  141. kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
  142. {
  143. struct list_head *head = (struct list_head *)arg;
  144. struct kcore_list *ent;
  145. struct page *p;
  146. if (!pfn_valid(pfn))
  147. return 1;
  148. p = pfn_to_page(pfn);
  149. if (!memmap_valid_within(pfn, p, page_zone(p)))
  150. return 1;
  151. ent = kmalloc(sizeof(*ent), GFP_KERNEL);
  152. if (!ent)
  153. return -ENOMEM;
  154. ent->addr = (unsigned long)page_to_virt(p);
  155. ent->size = nr_pages << PAGE_SHIFT;
  156. if (!virt_addr_valid(ent->addr))
  157. goto free_out;
  158. /* cut not-mapped area. ....from ppc-32 code. */
  159. if (ULONG_MAX - ent->addr < ent->size)
  160. ent->size = ULONG_MAX - ent->addr;
  161. /*
  162. * We've already checked virt_addr_valid so we know this address
  163. * is a valid pointer, therefore we can check against it to determine
  164. * if we need to trim
  165. */
  166. if (VMALLOC_START > ent->addr) {
  167. if (VMALLOC_START - ent->addr < ent->size)
  168. ent->size = VMALLOC_START - ent->addr;
  169. }
  170. ent->type = KCORE_RAM;
  171. list_add_tail(&ent->list, head);
  172. if (!get_sparsemem_vmemmap_info(ent, head)) {
  173. list_del(&ent->list);
  174. goto free_out;
  175. }
  176. return 0;
  177. free_out:
  178. kfree(ent);
  179. return 1;
  180. }
  181. static int kcore_ram_list(struct list_head *list)
  182. {
  183. int nid, ret;
  184. unsigned long end_pfn;
  185. /* Not inialized....update now */
  186. /* find out "max pfn" */
  187. end_pfn = 0;
  188. for_each_node_state(nid, N_MEMORY) {
  189. unsigned long node_end;
  190. node_end = node_end_pfn(nid);
  191. if (end_pfn < node_end)
  192. end_pfn = node_end;
  193. }
  194. /* scan 0 to max_pfn */
  195. ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private);
  196. if (ret)
  197. return -ENOMEM;
  198. return 0;
  199. }
  200. #endif /* CONFIG_HIGHMEM */
  201. static int kcore_update_ram(void)
  202. {
  203. LIST_HEAD(list);
  204. LIST_HEAD(garbage);
  205. int nphdr;
  206. size_t phdrs_len, notes_len, data_offset;
  207. struct kcore_list *tmp, *pos;
  208. int ret = 0;
  209. down_write(&kclist_lock);
  210. if (!xchg(&kcore_need_update, 0))
  211. goto out;
  212. ret = kcore_ram_list(&list);
  213. if (ret) {
  214. /* Couldn't get the RAM list, try again next time. */
  215. WRITE_ONCE(kcore_need_update, 1);
  216. list_splice_tail(&list, &garbage);
  217. goto out;
  218. }
  219. list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
  220. if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP)
  221. list_move(&pos->list, &garbage);
  222. }
  223. list_splice_tail(&list, &kclist_head);
  224. proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, &notes_len,
  225. &data_offset);
  226. out:
  227. up_write(&kclist_lock);
  228. list_for_each_entry_safe(pos, tmp, &garbage, list) {
  229. list_del(&pos->list);
  230. kfree(pos);
  231. }
  232. return ret;
  233. }
  234. static void append_kcore_note(char *notes, size_t *i, const char *name,
  235. unsigned int type, const void *desc,
  236. size_t descsz)
  237. {
  238. struct elf_note *note = (struct elf_note *)&notes[*i];
  239. note->n_namesz = strlen(name) + 1;
  240. note->n_descsz = descsz;
  241. note->n_type = type;
  242. *i += sizeof(*note);
  243. memcpy(&notes[*i], name, note->n_namesz);
  244. *i = ALIGN(*i + note->n_namesz, 4);
  245. memcpy(&notes[*i], desc, descsz);
  246. *i = ALIGN(*i + descsz, 4);
  247. }
  248. static ssize_t
  249. read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
  250. {
  251. char *buf = file->private_data;
  252. size_t phdrs_offset, notes_offset, data_offset;
  253. size_t phdrs_len, notes_len;
  254. struct kcore_list *m;
  255. size_t tsz;
  256. int nphdr;
  257. unsigned long start;
  258. size_t orig_buflen = buflen;
  259. int ret = 0;
  260. down_read(&kclist_lock);
  261. get_kcore_size(&nphdr, &phdrs_len, &notes_len, &data_offset);
  262. phdrs_offset = sizeof(struct elfhdr);
  263. notes_offset = phdrs_offset + phdrs_len;
  264. /* ELF file header. */
  265. if (buflen && *fpos < sizeof(struct elfhdr)) {
  266. struct elfhdr ehdr = {
  267. .e_ident = {
  268. [EI_MAG0] = ELFMAG0,
  269. [EI_MAG1] = ELFMAG1,
  270. [EI_MAG2] = ELFMAG2,
  271. [EI_MAG3] = ELFMAG3,
  272. [EI_CLASS] = ELF_CLASS,
  273. [EI_DATA] = ELF_DATA,
  274. [EI_VERSION] = EV_CURRENT,
  275. [EI_OSABI] = ELF_OSABI,
  276. },
  277. .e_type = ET_CORE,
  278. .e_machine = ELF_ARCH,
  279. .e_version = EV_CURRENT,
  280. .e_phoff = sizeof(struct elfhdr),
  281. .e_flags = ELF_CORE_EFLAGS,
  282. .e_ehsize = sizeof(struct elfhdr),
  283. .e_phentsize = sizeof(struct elf_phdr),
  284. .e_phnum = nphdr,
  285. };
  286. tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos);
  287. if (copy_to_user(buffer, (char *)&ehdr + *fpos, tsz)) {
  288. ret = -EFAULT;
  289. goto out;
  290. }
  291. buffer += tsz;
  292. buflen -= tsz;
  293. *fpos += tsz;
  294. }
  295. /* ELF program headers. */
  296. if (buflen && *fpos < phdrs_offset + phdrs_len) {
  297. struct elf_phdr *phdrs, *phdr;
  298. phdrs = kzalloc(phdrs_len, GFP_KERNEL);
  299. if (!phdrs) {
  300. ret = -ENOMEM;
  301. goto out;
  302. }
  303. phdrs[0].p_type = PT_NOTE;
  304. phdrs[0].p_offset = notes_offset;
  305. phdrs[0].p_filesz = notes_len;
  306. phdr = &phdrs[1];
  307. list_for_each_entry(m, &kclist_head, list) {
  308. phdr->p_type = PT_LOAD;
  309. phdr->p_flags = PF_R | PF_W | PF_X;
  310. phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset;
  311. if (m->type == KCORE_REMAP)
  312. phdr->p_vaddr = (size_t)m->vaddr;
  313. else
  314. phdr->p_vaddr = (size_t)m->addr;
  315. if (m->type == KCORE_RAM || m->type == KCORE_REMAP)
  316. phdr->p_paddr = __pa(m->addr);
  317. else if (m->type == KCORE_TEXT)
  318. phdr->p_paddr = __pa_symbol(m->addr);
  319. else
  320. phdr->p_paddr = (elf_addr_t)-1;
  321. phdr->p_filesz = phdr->p_memsz = m->size;
  322. phdr->p_align = PAGE_SIZE;
  323. phdr++;
  324. }
  325. tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos);
  326. if (copy_to_user(buffer, (char *)phdrs + *fpos - phdrs_offset,
  327. tsz)) {
  328. kfree(phdrs);
  329. ret = -EFAULT;
  330. goto out;
  331. }
  332. kfree(phdrs);
  333. buffer += tsz;
  334. buflen -= tsz;
  335. *fpos += tsz;
  336. }
  337. /* ELF note segment. */
  338. if (buflen && *fpos < notes_offset + notes_len) {
  339. struct elf_prstatus prstatus = {};
  340. struct elf_prpsinfo prpsinfo = {
  341. .pr_sname = 'R',
  342. .pr_fname = "vmlinux",
  343. };
  344. char *notes;
  345. size_t i = 0;
  346. strlcpy(prpsinfo.pr_psargs, saved_command_line,
  347. sizeof(prpsinfo.pr_psargs));
  348. notes = kzalloc(notes_len, GFP_KERNEL);
  349. if (!notes) {
  350. ret = -ENOMEM;
  351. goto out;
  352. }
  353. append_kcore_note(notes, &i, CORE_STR, NT_PRSTATUS, &prstatus,
  354. sizeof(prstatus));
  355. append_kcore_note(notes, &i, CORE_STR, NT_PRPSINFO, &prpsinfo,
  356. sizeof(prpsinfo));
  357. append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current,
  358. arch_task_struct_size);
  359. /*
  360. * vmcoreinfo_size is mostly constant after init time, but it
  361. * can be changed by crash_save_vmcoreinfo(). Racing here with a
  362. * panic on another CPU before the machine goes down is insanely
  363. * unlikely, but it's better to not leave potential buffer
  364. * overflows lying around, regardless.
  365. */
  366. append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
  367. vmcoreinfo_data,
  368. min(vmcoreinfo_size, notes_len - i));
  369. tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
  370. if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) {
  371. kfree(notes);
  372. ret = -EFAULT;
  373. goto out;
  374. }
  375. kfree(notes);
  376. buffer += tsz;
  377. buflen -= tsz;
  378. *fpos += tsz;
  379. }
  380. /*
  381. * Check to see if our file offset matches with any of
  382. * the addresses in the elf_phdr on our list.
  383. */
  384. start = kc_offset_to_vaddr(*fpos - data_offset);
  385. if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
  386. tsz = buflen;
  387. m = NULL;
  388. while (buflen) {
  389. /*
  390. * If this is the first iteration or the address is not within
  391. * the previous entry, search for a matching entry.
  392. */
  393. if (!m || start < m->addr || start >= m->addr + m->size) {
  394. list_for_each_entry(m, &kclist_head, list) {
  395. if (start >= m->addr &&
  396. start < m->addr + m->size)
  397. break;
  398. }
  399. }
  400. if (&m->list == &kclist_head) {
  401. if (clear_user(buffer, tsz)) {
  402. ret = -EFAULT;
  403. goto out;
  404. }
  405. m = NULL; /* skip the list anchor */
  406. } else if (m->type == KCORE_VMALLOC) {
  407. vread(buf, (char *)start, tsz);
  408. /* we have to zero-fill user buffer even if no read */
  409. if (copy_to_user(buffer, buf, tsz)) {
  410. ret = -EFAULT;
  411. goto out;
  412. }
  413. } else if (m->type == KCORE_USER) {
  414. /* User page is handled prior to normal kernel page: */
  415. if (copy_to_user(buffer, (char *)start, tsz)) {
  416. ret = -EFAULT;
  417. goto out;
  418. }
  419. } else {
  420. if (kern_addr_valid(start)) {
  421. /*
  422. * Using bounce buffer to bypass the
  423. * hardened user copy kernel text checks.
  424. */
  425. if (probe_kernel_read(buf, (void *) start, tsz)) {
  426. if (clear_user(buffer, tsz)) {
  427. ret = -EFAULT;
  428. goto out;
  429. }
  430. } else {
  431. if (copy_to_user(buffer, buf, tsz)) {
  432. ret = -EFAULT;
  433. goto out;
  434. }
  435. }
  436. } else {
  437. if (clear_user(buffer, tsz)) {
  438. ret = -EFAULT;
  439. goto out;
  440. }
  441. }
  442. }
  443. buflen -= tsz;
  444. *fpos += tsz;
  445. buffer += tsz;
  446. start += tsz;
  447. tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
  448. }
  449. out:
  450. up_read(&kclist_lock);
  451. if (ret)
  452. return ret;
  453. return orig_buflen - buflen;
  454. }
  455. static int open_kcore(struct inode *inode, struct file *filp)
  456. {
  457. if (!capable(CAP_SYS_RAWIO))
  458. return -EPERM;
  459. filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
  460. if (!filp->private_data)
  461. return -ENOMEM;
  462. if (kcore_need_update)
  463. kcore_update_ram();
  464. if (i_size_read(inode) != proc_root_kcore->size) {
  465. inode_lock(inode);
  466. i_size_write(inode, proc_root_kcore->size);
  467. inode_unlock(inode);
  468. }
  469. return 0;
  470. }
  471. static int release_kcore(struct inode *inode, struct file *file)
  472. {
  473. kfree(file->private_data);
  474. return 0;
  475. }
  476. static const struct file_operations proc_kcore_operations = {
  477. .read = read_kcore,
  478. .open = open_kcore,
  479. .release = release_kcore,
  480. .llseek = default_llseek,
  481. };
  482. /* just remember that we have to update kcore */
  483. static int __meminit kcore_callback(struct notifier_block *self,
  484. unsigned long action, void *arg)
  485. {
  486. switch (action) {
  487. case MEM_ONLINE:
  488. case MEM_OFFLINE:
  489. kcore_need_update = 1;
  490. break;
  491. }
  492. return NOTIFY_OK;
  493. }
  494. static struct notifier_block kcore_callback_nb __meminitdata = {
  495. .notifier_call = kcore_callback,
  496. .priority = 0,
  497. };
  498. static struct kcore_list kcore_vmalloc;
  499. #ifdef CONFIG_ARCH_PROC_KCORE_TEXT
  500. static struct kcore_list kcore_text;
  501. /*
  502. * If defined, special segment is used for mapping kernel text instead of
  503. * direct-map area. We need to create special TEXT section.
  504. */
  505. static void __init proc_kcore_text_init(void)
  506. {
  507. kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT);
  508. }
  509. #else
  510. static void __init proc_kcore_text_init(void)
  511. {
  512. }
  513. #endif
  514. #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
  515. /*
  516. * MODULES_VADDR has no intersection with VMALLOC_ADDR.
  517. */
  518. struct kcore_list kcore_modules;
  519. static void __init add_modules_range(void)
  520. {
  521. if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) {
  522. kclist_add(&kcore_modules, (void *)MODULES_VADDR,
  523. MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
  524. }
  525. }
  526. #else
  527. static void __init add_modules_range(void)
  528. {
  529. }
  530. #endif
  531. static int __init proc_kcore_init(void)
  532. {
  533. proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
  534. &proc_kcore_operations);
  535. if (!proc_root_kcore) {
  536. pr_err("couldn't create /proc/kcore\n");
  537. return 0; /* Always returns 0. */
  538. }
  539. /* Store text area if it's special */
  540. proc_kcore_text_init();
  541. /* Store vmalloc area */
  542. kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
  543. VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
  544. add_modules_range();
  545. /* Store direct-map area from physical memory map */
  546. kcore_update_ram();
  547. register_hotmemory_notifier(&kcore_callback_nb);
  548. return 0;
  549. }
  550. fs_initcall(proc_kcore_init);