unwind_orc.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. #include <linux/module.h>
  2. #include <linux/sort.h>
  3. #include <asm/ptrace.h>
  4. #include <asm/stacktrace.h>
  5. #include <asm/unwind.h>
  6. #include <asm/orc_types.h>
  7. #include <asm/orc_lookup.h>
  8. #include <asm/sections.h>
  9. #define orc_warn(fmt, ...) \
  10. printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
  11. extern int __start_orc_unwind_ip[];
  12. extern int __stop_orc_unwind_ip[];
  13. extern struct orc_entry __start_orc_unwind[];
  14. extern struct orc_entry __stop_orc_unwind[];
  15. static DEFINE_MUTEX(sort_mutex);
  16. int *cur_orc_ip_table = __start_orc_unwind_ip;
  17. struct orc_entry *cur_orc_table = __start_orc_unwind;
  18. unsigned int lookup_num_blocks;
  19. bool orc_init;
  20. static inline unsigned long orc_ip(const int *ip)
  21. {
  22. return (unsigned long)ip + *ip;
  23. }
  24. static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
  25. unsigned int num_entries, unsigned long ip)
  26. {
  27. int *first = ip_table;
  28. int *last = ip_table + num_entries - 1;
  29. int *mid = first, *found = first;
  30. if (!num_entries)
  31. return NULL;
  32. /*
  33. * Do a binary range search to find the rightmost duplicate of a given
  34. * starting address. Some entries are section terminators which are
  35. * "weak" entries for ensuring there are no gaps. They should be
  36. * ignored when they conflict with a real entry.
  37. */
  38. while (first <= last) {
  39. mid = first + ((last - first) / 2);
  40. if (orc_ip(mid) <= ip) {
  41. found = mid;
  42. first = mid + 1;
  43. } else
  44. last = mid - 1;
  45. }
  46. return u_table + (found - ip_table);
  47. }
  48. #ifdef CONFIG_MODULES
  49. static struct orc_entry *orc_module_find(unsigned long ip)
  50. {
  51. struct module *mod;
  52. mod = __module_address(ip);
  53. if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip)
  54. return NULL;
  55. return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind,
  56. mod->arch.num_orcs, ip);
  57. }
  58. #else
  59. static struct orc_entry *orc_module_find(unsigned long ip)
  60. {
  61. return NULL;
  62. }
  63. #endif
  64. static struct orc_entry *orc_find(unsigned long ip)
  65. {
  66. if (!orc_init)
  67. return NULL;
  68. /* For non-init vmlinux addresses, use the fast lookup table: */
  69. if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
  70. unsigned int idx, start, stop;
  71. idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
  72. if (unlikely((idx >= lookup_num_blocks-1))) {
  73. orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
  74. idx, lookup_num_blocks, (void *)ip);
  75. return NULL;
  76. }
  77. start = orc_lookup[idx];
  78. stop = orc_lookup[idx + 1] + 1;
  79. if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
  80. (__start_orc_unwind + stop > __stop_orc_unwind))) {
  81. orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
  82. idx, lookup_num_blocks, start, stop, (void *)ip);
  83. return NULL;
  84. }
  85. return __orc_find(__start_orc_unwind_ip + start,
  86. __start_orc_unwind + start, stop - start, ip);
  87. }
  88. /* vmlinux .init slow lookup: */
  89. if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext)
  90. return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
  91. __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
  92. /* Module lookup: */
  93. return orc_module_find(ip);
  94. }
  95. static void orc_sort_swap(void *_a, void *_b, int size)
  96. {
  97. struct orc_entry *orc_a, *orc_b;
  98. struct orc_entry orc_tmp;
  99. int *a = _a, *b = _b, tmp;
  100. int delta = _b - _a;
  101. /* Swap the .orc_unwind_ip entries: */
  102. tmp = *a;
  103. *a = *b + delta;
  104. *b = tmp - delta;
  105. /* Swap the corresponding .orc_unwind entries: */
  106. orc_a = cur_orc_table + (a - cur_orc_ip_table);
  107. orc_b = cur_orc_table + (b - cur_orc_ip_table);
  108. orc_tmp = *orc_a;
  109. *orc_a = *orc_b;
  110. *orc_b = orc_tmp;
  111. }
  112. static int orc_sort_cmp(const void *_a, const void *_b)
  113. {
  114. struct orc_entry *orc_a;
  115. const int *a = _a, *b = _b;
  116. unsigned long a_val = orc_ip(a);
  117. unsigned long b_val = orc_ip(b);
  118. if (a_val > b_val)
  119. return 1;
  120. if (a_val < b_val)
  121. return -1;
  122. /*
  123. * The "weak" section terminator entries need to always be on the left
  124. * to ensure the lookup code skips them in favor of real entries.
  125. * These terminator entries exist to handle any gaps created by
  126. * whitelisted .o files which didn't get objtool generation.
  127. */
  128. orc_a = cur_orc_table + (a - cur_orc_ip_table);
  129. return orc_a->sp_reg == ORC_REG_UNDEFINED ? -1 : 1;
  130. }
  131. #ifdef CONFIG_MODULES
  132. void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
  133. void *_orc, size_t orc_size)
  134. {
  135. int *orc_ip = _orc_ip;
  136. struct orc_entry *orc = _orc;
  137. unsigned int num_entries = orc_ip_size / sizeof(int);
  138. WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 ||
  139. orc_size % sizeof(*orc) != 0 ||
  140. num_entries != orc_size / sizeof(*orc));
  141. /*
  142. * The 'cur_orc_*' globals allow the orc_sort_swap() callback to
  143. * associate an .orc_unwind_ip table entry with its corresponding
  144. * .orc_unwind entry so they can both be swapped.
  145. */
  146. mutex_lock(&sort_mutex);
  147. cur_orc_ip_table = orc_ip;
  148. cur_orc_table = orc;
  149. sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap);
  150. mutex_unlock(&sort_mutex);
  151. mod->arch.orc_unwind_ip = orc_ip;
  152. mod->arch.orc_unwind = orc;
  153. mod->arch.num_orcs = num_entries;
  154. }
  155. #endif
  156. void __init unwind_init(void)
  157. {
  158. size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
  159. size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
  160. size_t num_entries = orc_ip_size / sizeof(int);
  161. struct orc_entry *orc;
  162. int i;
  163. if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
  164. orc_size % sizeof(struct orc_entry) != 0 ||
  165. num_entries != orc_size / sizeof(struct orc_entry)) {
  166. orc_warn("WARNING: Bad or missing .orc_unwind table. Disabling unwinder.\n");
  167. return;
  168. }
  169. /* Sort the .orc_unwind and .orc_unwind_ip tables: */
  170. sort(__start_orc_unwind_ip, num_entries, sizeof(int), orc_sort_cmp,
  171. orc_sort_swap);
  172. /* Initialize the fast lookup table: */
  173. lookup_num_blocks = orc_lookup_end - orc_lookup;
  174. for (i = 0; i < lookup_num_blocks-1; i++) {
  175. orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
  176. num_entries,
  177. LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
  178. if (!orc) {
  179. orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n");
  180. return;
  181. }
  182. orc_lookup[i] = orc - __start_orc_unwind;
  183. }
  184. /* Initialize the ending block: */
  185. orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries,
  186. LOOKUP_STOP_IP);
  187. if (!orc) {
  188. orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n");
  189. return;
  190. }
  191. orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;
  192. orc_init = true;
  193. }
  194. unsigned long unwind_get_return_address(struct unwind_state *state)
  195. {
  196. if (unwind_done(state))
  197. return 0;
  198. return __kernel_text_address(state->ip) ? state->ip : 0;
  199. }
  200. EXPORT_SYMBOL_GPL(unwind_get_return_address);
  201. unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
  202. {
  203. if (unwind_done(state))
  204. return NULL;
  205. if (state->regs)
  206. return &state->regs->ip;
  207. if (state->sp)
  208. return (unsigned long *)state->sp - 1;
  209. return NULL;
  210. }
  211. static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
  212. size_t len)
  213. {
  214. struct stack_info *info = &state->stack_info;
  215. /*
  216. * If the address isn't on the current stack, switch to the next one.
  217. *
  218. * We may have to traverse multiple stacks to deal with the possibility
  219. * that info->next_sp could point to an empty stack and the address
  220. * could be on a subsequent stack.
  221. */
  222. while (!on_stack(info, (void *)addr, len))
  223. if (get_stack_info(info->next_sp, state->task, info,
  224. &state->stack_mask))
  225. return false;
  226. return true;
  227. }
  228. static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
  229. unsigned long *val)
  230. {
  231. if (!stack_access_ok(state, addr, sizeof(long)))
  232. return false;
  233. *val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr);
  234. return true;
  235. }
  236. #define REGS_SIZE (sizeof(struct pt_regs))
  237. #define SP_OFFSET (offsetof(struct pt_regs, sp))
  238. #define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
  239. #define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
  240. static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
  241. unsigned long *ip, unsigned long *sp, bool full)
  242. {
  243. size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
  244. size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
  245. struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
  246. if (IS_ENABLED(CONFIG_X86_64)) {
  247. if (!stack_access_ok(state, addr, regs_size))
  248. return false;
  249. *ip = regs->ip;
  250. *sp = regs->sp;
  251. return true;
  252. }
  253. if (!stack_access_ok(state, addr, sp_offset))
  254. return false;
  255. *ip = regs->ip;
  256. if (user_mode(regs)) {
  257. if (!stack_access_ok(state, addr + sp_offset,
  258. REGS_SIZE - SP_OFFSET))
  259. return false;
  260. *sp = regs->sp;
  261. } else
  262. *sp = (unsigned long)&regs->sp;
  263. return true;
  264. }
  265. bool unwind_next_frame(struct unwind_state *state)
  266. {
  267. unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
  268. enum stack_type prev_type = state->stack_info.type;
  269. struct orc_entry *orc;
  270. struct pt_regs *ptregs;
  271. bool indirect = false;
  272. if (unwind_done(state))
  273. return false;
  274. /* Don't let modules unload while we're reading their ORC data. */
  275. preempt_disable();
  276. /* Have we reached the end? */
  277. if (state->regs && user_mode(state->regs))
  278. goto done;
  279. /*
  280. * Find the orc_entry associated with the text address.
  281. *
  282. * Decrement call return addresses by one so they work for sibling
  283. * calls and calls to noreturn functions.
  284. */
  285. orc = orc_find(state->signal ? state->ip : state->ip - 1);
  286. if (!orc || orc->sp_reg == ORC_REG_UNDEFINED)
  287. goto done;
  288. orig_ip = state->ip;
  289. /* Find the previous frame's stack: */
  290. switch (orc->sp_reg) {
  291. case ORC_REG_SP:
  292. sp = state->sp + orc->sp_offset;
  293. break;
  294. case ORC_REG_BP:
  295. sp = state->bp + orc->sp_offset;
  296. break;
  297. case ORC_REG_SP_INDIRECT:
  298. sp = state->sp + orc->sp_offset;
  299. indirect = true;
  300. break;
  301. case ORC_REG_BP_INDIRECT:
  302. sp = state->bp + orc->sp_offset;
  303. indirect = true;
  304. break;
  305. case ORC_REG_R10:
  306. if (!state->regs || !state->full_regs) {
  307. orc_warn("missing regs for base reg R10 at ip %pB\n",
  308. (void *)state->ip);
  309. goto done;
  310. }
  311. sp = state->regs->r10;
  312. break;
  313. case ORC_REG_R13:
  314. if (!state->regs || !state->full_regs) {
  315. orc_warn("missing regs for base reg R13 at ip %pB\n",
  316. (void *)state->ip);
  317. goto done;
  318. }
  319. sp = state->regs->r13;
  320. break;
  321. case ORC_REG_DI:
  322. if (!state->regs || !state->full_regs) {
  323. orc_warn("missing regs for base reg DI at ip %pB\n",
  324. (void *)state->ip);
  325. goto done;
  326. }
  327. sp = state->regs->di;
  328. break;
  329. case ORC_REG_DX:
  330. if (!state->regs || !state->full_regs) {
  331. orc_warn("missing regs for base reg DX at ip %pB\n",
  332. (void *)state->ip);
  333. goto done;
  334. }
  335. sp = state->regs->dx;
  336. break;
  337. default:
  338. orc_warn("unknown SP base reg %d for ip %pB\n",
  339. orc->sp_reg, (void *)state->ip);
  340. goto done;
  341. }
  342. if (indirect) {
  343. if (!deref_stack_reg(state, sp, &sp))
  344. goto done;
  345. }
  346. /* Find IP, SP and possibly regs: */
  347. switch (orc->type) {
  348. case ORC_TYPE_CALL:
  349. ip_p = sp - sizeof(long);
  350. if (!deref_stack_reg(state, ip_p, &state->ip))
  351. goto done;
  352. state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
  353. state->ip, (void *)ip_p);
  354. state->sp = sp;
  355. state->regs = NULL;
  356. state->signal = false;
  357. break;
  358. case ORC_TYPE_REGS:
  359. if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
  360. orc_warn("can't dereference registers at %p for ip %pB\n",
  361. (void *)sp, (void *)orig_ip);
  362. goto done;
  363. }
  364. state->regs = (struct pt_regs *)sp;
  365. state->full_regs = true;
  366. state->signal = true;
  367. break;
  368. case ORC_TYPE_REGS_IRET:
  369. if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
  370. orc_warn("can't dereference iret registers at %p for ip %pB\n",
  371. (void *)sp, (void *)orig_ip);
  372. goto done;
  373. }
  374. ptregs = container_of((void *)sp, struct pt_regs, ip);
  375. if ((unsigned long)ptregs >= prev_sp &&
  376. on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
  377. state->regs = ptregs;
  378. state->full_regs = false;
  379. } else
  380. state->regs = NULL;
  381. state->signal = true;
  382. break;
  383. default:
  384. orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
  385. orc->type, (void *)orig_ip);
  386. break;
  387. }
  388. /* Find BP: */
  389. switch (orc->bp_reg) {
  390. case ORC_REG_UNDEFINED:
  391. if (state->regs && state->full_regs)
  392. state->bp = state->regs->bp;
  393. break;
  394. case ORC_REG_PREV_SP:
  395. if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp))
  396. goto done;
  397. break;
  398. case ORC_REG_BP:
  399. if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp))
  400. goto done;
  401. break;
  402. default:
  403. orc_warn("unknown BP base reg %d for ip %pB\n",
  404. orc->bp_reg, (void *)orig_ip);
  405. goto done;
  406. }
  407. /* Prevent a recursive loop due to bad ORC data: */
  408. if (state->stack_info.type == prev_type &&
  409. on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
  410. state->sp <= prev_sp) {
  411. orc_warn("stack going in the wrong direction? ip=%pB\n",
  412. (void *)orig_ip);
  413. goto done;
  414. }
  415. preempt_enable();
  416. return true;
  417. done:
  418. preempt_enable();
  419. state->stack_info.type = STACK_TYPE_UNKNOWN;
  420. return false;
  421. }
  422. EXPORT_SYMBOL_GPL(unwind_next_frame);
  423. void __unwind_start(struct unwind_state *state, struct task_struct *task,
  424. struct pt_regs *regs, unsigned long *first_frame)
  425. {
  426. memset(state, 0, sizeof(*state));
  427. state->task = task;
  428. /*
  429. * Refuse to unwind the stack of a task while it's executing on another
  430. * CPU. This check is racy, but that's ok: the unwinder has other
  431. * checks to prevent it from going off the rails.
  432. */
  433. if (task_on_another_cpu(task))
  434. goto done;
  435. if (regs) {
  436. if (user_mode(regs))
  437. goto done;
  438. state->ip = regs->ip;
  439. state->sp = kernel_stack_pointer(regs);
  440. state->bp = regs->bp;
  441. state->regs = regs;
  442. state->full_regs = true;
  443. state->signal = true;
  444. } else if (task == current) {
  445. asm volatile("lea (%%rip), %0\n\t"
  446. "mov %%rsp, %1\n\t"
  447. "mov %%rbp, %2\n\t"
  448. : "=r" (state->ip), "=r" (state->sp),
  449. "=r" (state->bp));
  450. } else {
  451. struct inactive_task_frame *frame = (void *)task->thread.sp;
  452. state->sp = task->thread.sp;
  453. state->bp = READ_ONCE_NOCHECK(frame->bp);
  454. state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
  455. }
  456. if (get_stack_info((unsigned long *)state->sp, state->task,
  457. &state->stack_info, &state->stack_mask))
  458. return;
  459. /*
  460. * The caller can provide the address of the first frame directly
  461. * (first_frame) or indirectly (regs->sp) to indicate which stack frame
  462. * to start unwinding at. Skip ahead until we reach it.
  463. */
  464. /* When starting from regs, skip the regs frame: */
  465. if (regs) {
  466. unwind_next_frame(state);
  467. return;
  468. }
  469. /* Otherwise, skip ahead to the user-specified starting frame: */
  470. while (!unwind_done(state) &&
  471. (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
  472. state->sp <= (unsigned long)first_frame))
  473. unwind_next_frame(state);
  474. return;
  475. done:
  476. state->stack_info.type = STACK_TYPE_UNKNOWN;
  477. return;
  478. }
  479. EXPORT_SYMBOL_GPL(__unwind_start);