ptrace.c 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634
  1. /* By Ross Biro 1/23/92 */
  2. /*
  3. * Pentium III FXSR, SSE support
  4. * Gareth Hughes <gareth@valinux.com>, May 2000
  5. */
  6. #include <linux/kernel.h>
  7. #include <linux/sched.h>
  8. #include <linux/mm.h>
  9. #include <linux/smp.h>
  10. #include <linux/errno.h>
  11. #include <linux/slab.h>
  12. #include <linux/ptrace.h>
  13. #include <linux/regset.h>
  14. #include <linux/tracehook.h>
  15. #include <linux/user.h>
  16. #include <linux/elf.h>
  17. #include <linux/security.h>
  18. #include <linux/audit.h>
  19. #include <linux/seccomp.h>
  20. #include <linux/signal.h>
  21. #include <linux/perf_event.h>
  22. #include <linux/hw_breakpoint.h>
  23. #include <linux/rcupdate.h>
  24. #include <linux/export.h>
  25. #include <linux/context_tracking.h>
  26. #include <asm/uaccess.h>
  27. #include <asm/pgtable.h>
  28. #include <asm/processor.h>
  29. #include <asm/i387.h>
  30. #include <asm/fpu-internal.h>
  31. #include <asm/debugreg.h>
  32. #include <asm/ldt.h>
  33. #include <asm/desc.h>
  34. #include <asm/prctl.h>
  35. #include <asm/proto.h>
  36. #include <asm/hw_breakpoint.h>
  37. #include <asm/traps.h>
  38. #include "tls.h"
  39. #define CREATE_TRACE_POINTS
  40. #include <trace/events/syscalls.h>
  41. enum x86_regset {
  42. REGSET_GENERAL,
  43. REGSET_FP,
  44. REGSET_XFP,
  45. REGSET_IOPERM64 = REGSET_XFP,
  46. REGSET_XSTATE,
  47. REGSET_TLS,
  48. REGSET_IOPERM32,
  49. };
  50. struct pt_regs_offset {
  51. const char *name;
  52. int offset;
  53. };
  54. #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
  55. #define REG_OFFSET_END {.name = NULL, .offset = 0}
  56. static const struct pt_regs_offset regoffset_table[] = {
  57. #ifdef CONFIG_X86_64
  58. REG_OFFSET_NAME(r15),
  59. REG_OFFSET_NAME(r14),
  60. REG_OFFSET_NAME(r13),
  61. REG_OFFSET_NAME(r12),
  62. REG_OFFSET_NAME(r11),
  63. REG_OFFSET_NAME(r10),
  64. REG_OFFSET_NAME(r9),
  65. REG_OFFSET_NAME(r8),
  66. #endif
  67. REG_OFFSET_NAME(bx),
  68. REG_OFFSET_NAME(cx),
  69. REG_OFFSET_NAME(dx),
  70. REG_OFFSET_NAME(si),
  71. REG_OFFSET_NAME(di),
  72. REG_OFFSET_NAME(bp),
  73. REG_OFFSET_NAME(ax),
  74. #ifdef CONFIG_X86_32
  75. REG_OFFSET_NAME(ds),
  76. REG_OFFSET_NAME(es),
  77. REG_OFFSET_NAME(fs),
  78. REG_OFFSET_NAME(gs),
  79. #endif
  80. REG_OFFSET_NAME(orig_ax),
  81. REG_OFFSET_NAME(ip),
  82. REG_OFFSET_NAME(cs),
  83. REG_OFFSET_NAME(flags),
  84. REG_OFFSET_NAME(sp),
  85. REG_OFFSET_NAME(ss),
  86. REG_OFFSET_END,
  87. };
  88. /**
  89. * regs_query_register_offset() - query register offset from its name
  90. * @name: the name of a register
  91. *
  92. * regs_query_register_offset() returns the offset of a register in struct
  93. * pt_regs from its name. If the name is invalid, this returns -EINVAL;
  94. */
  95. int regs_query_register_offset(const char *name)
  96. {
  97. const struct pt_regs_offset *roff;
  98. for (roff = regoffset_table; roff->name != NULL; roff++)
  99. if (!strcmp(roff->name, name))
  100. return roff->offset;
  101. return -EINVAL;
  102. }
  103. /**
  104. * regs_query_register_name() - query register name from its offset
  105. * @offset: the offset of a register in struct pt_regs.
  106. *
  107. * regs_query_register_name() returns the name of a register from its
  108. * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
  109. */
  110. const char *regs_query_register_name(unsigned int offset)
  111. {
  112. const struct pt_regs_offset *roff;
  113. for (roff = regoffset_table; roff->name != NULL; roff++)
  114. if (roff->offset == offset)
  115. return roff->name;
  116. return NULL;
  117. }
  118. static const int arg_offs_table[] = {
  119. #ifdef CONFIG_X86_32
  120. [0] = offsetof(struct pt_regs, ax),
  121. [1] = offsetof(struct pt_regs, dx),
  122. [2] = offsetof(struct pt_regs, cx)
  123. #else /* CONFIG_X86_64 */
  124. [0] = offsetof(struct pt_regs, di),
  125. [1] = offsetof(struct pt_regs, si),
  126. [2] = offsetof(struct pt_regs, dx),
  127. [3] = offsetof(struct pt_regs, cx),
  128. [4] = offsetof(struct pt_regs, r8),
  129. [5] = offsetof(struct pt_regs, r9)
  130. #endif
  131. };
  132. /*
  133. * does not yet catch signals sent when the child dies.
  134. * in exit.c or in signal.c.
  135. */
  136. /*
  137. * Determines which flags the user has access to [1 = access, 0 = no access].
  138. */
  139. #define FLAG_MASK_32 ((unsigned long) \
  140. (X86_EFLAGS_CF | X86_EFLAGS_PF | \
  141. X86_EFLAGS_AF | X86_EFLAGS_ZF | \
  142. X86_EFLAGS_SF | X86_EFLAGS_TF | \
  143. X86_EFLAGS_DF | X86_EFLAGS_OF | \
  144. X86_EFLAGS_RF | X86_EFLAGS_AC))
  145. /*
  146. * Determines whether a value may be installed in a segment register.
  147. */
  148. static inline bool invalid_selector(u16 value)
  149. {
  150. return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL);
  151. }
  152. #ifdef CONFIG_X86_32
  153. #define FLAG_MASK FLAG_MASK_32
  154. /*
  155. * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
  156. * when it traps. The previous stack will be directly underneath the saved
  157. * registers, and 'sp/ss' won't even have been saved. Thus the '&regs->sp'.
  158. *
  159. * Now, if the stack is empty, '&regs->sp' is out of range. In this
  160. * case we try to take the previous stack. To always return a non-null
  161. * stack pointer we fall back to regs as stack if no previous stack
  162. * exists.
  163. *
  164. * This is valid only for kernel mode traps.
  165. */
  166. unsigned long kernel_stack_pointer(struct pt_regs *regs)
  167. {
  168. unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
  169. unsigned long sp = (unsigned long)&regs->sp;
  170. u32 *prev_esp;
  171. if (context == (sp & ~(THREAD_SIZE - 1)))
  172. return sp;
  173. prev_esp = (u32 *)(context);
  174. if (prev_esp)
  175. return (unsigned long)prev_esp;
  176. return (unsigned long)regs;
  177. }
  178. EXPORT_SYMBOL_GPL(kernel_stack_pointer);
  179. static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
  180. {
  181. BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
  182. return &regs->bx + (regno >> 2);
  183. }
  184. static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
  185. {
  186. /*
  187. * Returning the value truncates it to 16 bits.
  188. */
  189. unsigned int retval;
  190. if (offset != offsetof(struct user_regs_struct, gs))
  191. retval = *pt_regs_access(task_pt_regs(task), offset);
  192. else {
  193. if (task == current)
  194. retval = get_user_gs(task_pt_regs(task));
  195. else
  196. retval = task_user_gs(task);
  197. }
  198. return retval;
  199. }
  200. static int set_segment_reg(struct task_struct *task,
  201. unsigned long offset, u16 value)
  202. {
  203. /*
  204. * The value argument was already truncated to 16 bits.
  205. */
  206. if (invalid_selector(value))
  207. return -EIO;
  208. /*
  209. * For %cs and %ss we cannot permit a null selector.
  210. * We can permit a bogus selector as long as it has USER_RPL.
  211. * Null selectors are fine for other segment registers, but
  212. * we will never get back to user mode with invalid %cs or %ss
  213. * and will take the trap in iret instead. Much code relies
  214. * on user_mode() to distinguish a user trap frame (which can
  215. * safely use invalid selectors) from a kernel trap frame.
  216. */
  217. switch (offset) {
  218. case offsetof(struct user_regs_struct, cs):
  219. case offsetof(struct user_regs_struct, ss):
  220. if (unlikely(value == 0))
  221. return -EIO;
  222. default:
  223. *pt_regs_access(task_pt_regs(task), offset) = value;
  224. break;
  225. case offsetof(struct user_regs_struct, gs):
  226. if (task == current)
  227. set_user_gs(task_pt_regs(task), value);
  228. else
  229. task_user_gs(task) = value;
  230. }
  231. return 0;
  232. }
  233. #else /* CONFIG_X86_64 */
  234. #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
  235. static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset)
  236. {
  237. BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0);
  238. return &regs->r15 + (offset / sizeof(regs->r15));
  239. }
  240. static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
  241. {
  242. /*
  243. * Returning the value truncates it to 16 bits.
  244. */
  245. unsigned int seg;
  246. switch (offset) {
  247. case offsetof(struct user_regs_struct, fs):
  248. if (task == current) {
  249. /* Older gas can't assemble movq %?s,%r?? */
  250. asm("movl %%fs,%0" : "=r" (seg));
  251. return seg;
  252. }
  253. return task->thread.fsindex;
  254. case offsetof(struct user_regs_struct, gs):
  255. if (task == current) {
  256. asm("movl %%gs,%0" : "=r" (seg));
  257. return seg;
  258. }
  259. return task->thread.gsindex;
  260. case offsetof(struct user_regs_struct, ds):
  261. if (task == current) {
  262. asm("movl %%ds,%0" : "=r" (seg));
  263. return seg;
  264. }
  265. return task->thread.ds;
  266. case offsetof(struct user_regs_struct, es):
  267. if (task == current) {
  268. asm("movl %%es,%0" : "=r" (seg));
  269. return seg;
  270. }
  271. return task->thread.es;
  272. case offsetof(struct user_regs_struct, cs):
  273. case offsetof(struct user_regs_struct, ss):
  274. break;
  275. }
  276. return *pt_regs_access(task_pt_regs(task), offset);
  277. }
  278. static int set_segment_reg(struct task_struct *task,
  279. unsigned long offset, u16 value)
  280. {
  281. /*
  282. * The value argument was already truncated to 16 bits.
  283. */
  284. if (invalid_selector(value))
  285. return -EIO;
  286. switch (offset) {
  287. case offsetof(struct user_regs_struct,fs):
  288. /*
  289. * If this is setting fs as for normal 64-bit use but
  290. * setting fs_base has implicitly changed it, leave it.
  291. */
  292. if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
  293. task->thread.fs != 0) ||
  294. (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
  295. task->thread.fs == 0))
  296. break;
  297. task->thread.fsindex = value;
  298. if (task == current)
  299. loadsegment(fs, task->thread.fsindex);
  300. break;
  301. case offsetof(struct user_regs_struct,gs):
  302. /*
  303. * If this is setting gs as for normal 64-bit use but
  304. * setting gs_base has implicitly changed it, leave it.
  305. */
  306. if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
  307. task->thread.gs != 0) ||
  308. (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
  309. task->thread.gs == 0))
  310. break;
  311. task->thread.gsindex = value;
  312. if (task == current)
  313. load_gs_index(task->thread.gsindex);
  314. break;
  315. case offsetof(struct user_regs_struct,ds):
  316. task->thread.ds = value;
  317. if (task == current)
  318. loadsegment(ds, task->thread.ds);
  319. break;
  320. case offsetof(struct user_regs_struct,es):
  321. task->thread.es = value;
  322. if (task == current)
  323. loadsegment(es, task->thread.es);
  324. break;
  325. /*
  326. * Can't actually change these in 64-bit mode.
  327. */
  328. case offsetof(struct user_regs_struct,cs):
  329. if (unlikely(value == 0))
  330. return -EIO;
  331. task_pt_regs(task)->cs = value;
  332. break;
  333. case offsetof(struct user_regs_struct,ss):
  334. if (unlikely(value == 0))
  335. return -EIO;
  336. task_pt_regs(task)->ss = value;
  337. break;
  338. }
  339. return 0;
  340. }
  341. #endif /* CONFIG_X86_32 */
  342. static unsigned long get_flags(struct task_struct *task)
  343. {
  344. unsigned long retval = task_pt_regs(task)->flags;
  345. /*
  346. * If the debugger set TF, hide it from the readout.
  347. */
  348. if (test_tsk_thread_flag(task, TIF_FORCED_TF))
  349. retval &= ~X86_EFLAGS_TF;
  350. return retval;
  351. }
  352. static int set_flags(struct task_struct *task, unsigned long value)
  353. {
  354. struct pt_regs *regs = task_pt_regs(task);
  355. /*
  356. * If the user value contains TF, mark that
  357. * it was not "us" (the debugger) that set it.
  358. * If not, make sure it stays set if we had.
  359. */
  360. if (value & X86_EFLAGS_TF)
  361. clear_tsk_thread_flag(task, TIF_FORCED_TF);
  362. else if (test_tsk_thread_flag(task, TIF_FORCED_TF))
  363. value |= X86_EFLAGS_TF;
  364. regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK);
  365. return 0;
  366. }
  367. static int putreg(struct task_struct *child,
  368. unsigned long offset, unsigned long value)
  369. {
  370. switch (offset) {
  371. case offsetof(struct user_regs_struct, cs):
  372. case offsetof(struct user_regs_struct, ds):
  373. case offsetof(struct user_regs_struct, es):
  374. case offsetof(struct user_regs_struct, fs):
  375. case offsetof(struct user_regs_struct, gs):
  376. case offsetof(struct user_regs_struct, ss):
  377. return set_segment_reg(child, offset, value);
  378. case offsetof(struct user_regs_struct, flags):
  379. return set_flags(child, value);
  380. #ifdef CONFIG_X86_64
  381. case offsetof(struct user_regs_struct,fs_base):
  382. if (value >= TASK_SIZE_OF(child))
  383. return -EIO;
  384. /*
  385. * When changing the segment base, use do_arch_prctl
  386. * to set either thread.fs or thread.fsindex and the
  387. * corresponding GDT slot.
  388. */
  389. if (child->thread.fs != value)
  390. return do_arch_prctl(child, ARCH_SET_FS, value);
  391. return 0;
  392. case offsetof(struct user_regs_struct,gs_base):
  393. /*
  394. * Exactly the same here as the %fs handling above.
  395. */
  396. if (value >= TASK_SIZE_OF(child))
  397. return -EIO;
  398. if (child->thread.gs != value)
  399. return do_arch_prctl(child, ARCH_SET_GS, value);
  400. return 0;
  401. #endif
  402. }
  403. *pt_regs_access(task_pt_regs(child), offset) = value;
  404. return 0;
  405. }
  406. static unsigned long getreg(struct task_struct *task, unsigned long offset)
  407. {
  408. switch (offset) {
  409. case offsetof(struct user_regs_struct, cs):
  410. case offsetof(struct user_regs_struct, ds):
  411. case offsetof(struct user_regs_struct, es):
  412. case offsetof(struct user_regs_struct, fs):
  413. case offsetof(struct user_regs_struct, gs):
  414. case offsetof(struct user_regs_struct, ss):
  415. return get_segment_reg(task, offset);
  416. case offsetof(struct user_regs_struct, flags):
  417. return get_flags(task);
  418. #ifdef CONFIG_X86_64
  419. case offsetof(struct user_regs_struct, fs_base): {
  420. /*
  421. * do_arch_prctl may have used a GDT slot instead of
  422. * the MSR. To userland, it appears the same either
  423. * way, except the %fs segment selector might not be 0.
  424. */
  425. unsigned int seg = task->thread.fsindex;
  426. if (task->thread.fs != 0)
  427. return task->thread.fs;
  428. if (task == current)
  429. asm("movl %%fs,%0" : "=r" (seg));
  430. if (seg != FS_TLS_SEL)
  431. return 0;
  432. return get_desc_base(&task->thread.tls_array[FS_TLS]);
  433. }
  434. case offsetof(struct user_regs_struct, gs_base): {
  435. /*
  436. * Exactly the same here as the %fs handling above.
  437. */
  438. unsigned int seg = task->thread.gsindex;
  439. if (task->thread.gs != 0)
  440. return task->thread.gs;
  441. if (task == current)
  442. asm("movl %%gs,%0" : "=r" (seg));
  443. if (seg != GS_TLS_SEL)
  444. return 0;
  445. return get_desc_base(&task->thread.tls_array[GS_TLS]);
  446. }
  447. #endif
  448. }
  449. return *pt_regs_access(task_pt_regs(task), offset);
  450. }
  451. static int genregs_get(struct task_struct *target,
  452. const struct user_regset *regset,
  453. unsigned int pos, unsigned int count,
  454. void *kbuf, void __user *ubuf)
  455. {
  456. if (kbuf) {
  457. unsigned long *k = kbuf;
  458. while (count >= sizeof(*k)) {
  459. *k++ = getreg(target, pos);
  460. count -= sizeof(*k);
  461. pos += sizeof(*k);
  462. }
  463. } else {
  464. unsigned long __user *u = ubuf;
  465. while (count >= sizeof(*u)) {
  466. if (__put_user(getreg(target, pos), u++))
  467. return -EFAULT;
  468. count -= sizeof(*u);
  469. pos += sizeof(*u);
  470. }
  471. }
  472. return 0;
  473. }
  474. static int genregs_set(struct task_struct *target,
  475. const struct user_regset *regset,
  476. unsigned int pos, unsigned int count,
  477. const void *kbuf, const void __user *ubuf)
  478. {
  479. int ret = 0;
  480. if (kbuf) {
  481. const unsigned long *k = kbuf;
  482. while (count >= sizeof(*k) && !ret) {
  483. ret = putreg(target, pos, *k++);
  484. count -= sizeof(*k);
  485. pos += sizeof(*k);
  486. }
  487. } else {
  488. const unsigned long __user *u = ubuf;
  489. while (count >= sizeof(*u) && !ret) {
  490. unsigned long word;
  491. ret = __get_user(word, u++);
  492. if (ret)
  493. break;
  494. ret = putreg(target, pos, word);
  495. count -= sizeof(*u);
  496. pos += sizeof(*u);
  497. }
  498. }
  499. return ret;
  500. }
  501. static void ptrace_triggered(struct perf_event *bp,
  502. struct perf_sample_data *data,
  503. struct pt_regs *regs)
  504. {
  505. int i;
  506. struct thread_struct *thread = &(current->thread);
  507. /*
  508. * Store in the virtual DR6 register the fact that the breakpoint
  509. * was hit so the thread's debugger will see it.
  510. */
  511. for (i = 0; i < HBP_NUM; i++) {
  512. if (thread->ptrace_bps[i] == bp)
  513. break;
  514. }
  515. thread->debugreg6 |= (DR_TRAP0 << i);
  516. }
  517. /*
  518. * Walk through every ptrace breakpoints for this thread and
  519. * build the dr7 value on top of their attributes.
  520. *
  521. */
  522. static unsigned long ptrace_get_dr7(struct perf_event *bp[])
  523. {
  524. int i;
  525. int dr7 = 0;
  526. struct arch_hw_breakpoint *info;
  527. for (i = 0; i < HBP_NUM; i++) {
  528. if (bp[i] && !bp[i]->attr.disabled) {
  529. info = counter_arch_bp(bp[i]);
  530. dr7 |= encode_dr7(i, info->len, info->type);
  531. }
  532. }
  533. return dr7;
  534. }
  535. static int ptrace_fill_bp_fields(struct perf_event_attr *attr,
  536. int len, int type, bool disabled)
  537. {
  538. int err, bp_len, bp_type;
  539. err = arch_bp_generic_fields(len, type, &bp_len, &bp_type);
  540. if (!err) {
  541. attr->bp_len = bp_len;
  542. attr->bp_type = bp_type;
  543. attr->disabled = disabled;
  544. }
  545. return err;
  546. }
  547. static struct perf_event *
  548. ptrace_register_breakpoint(struct task_struct *tsk, int len, int type,
  549. unsigned long addr, bool disabled)
  550. {
  551. struct perf_event_attr attr;
  552. int err;
  553. ptrace_breakpoint_init(&attr);
  554. attr.bp_addr = addr;
  555. err = ptrace_fill_bp_fields(&attr, len, type, disabled);
  556. if (err)
  557. return ERR_PTR(err);
  558. return register_user_hw_breakpoint(&attr, ptrace_triggered,
  559. NULL, tsk);
  560. }
  561. static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
  562. int disabled)
  563. {
  564. struct perf_event_attr attr = bp->attr;
  565. int err;
  566. err = ptrace_fill_bp_fields(&attr, len, type, disabled);
  567. if (err)
  568. return err;
  569. return modify_user_hw_breakpoint(bp, &attr);
  570. }
  571. /*
  572. * Handle ptrace writes to debug register 7.
  573. */
  574. static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
  575. {
  576. struct thread_struct *thread = &tsk->thread;
  577. unsigned long old_dr7;
  578. bool second_pass = false;
  579. int i, rc, ret = 0;
  580. data &= ~DR_CONTROL_RESERVED;
  581. old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
  582. restore:
  583. rc = 0;
  584. for (i = 0; i < HBP_NUM; i++) {
  585. unsigned len, type;
  586. bool disabled = !decode_dr7(data, i, &len, &type);
  587. struct perf_event *bp = thread->ptrace_bps[i];
  588. if (!bp) {
  589. if (disabled)
  590. continue;
  591. bp = ptrace_register_breakpoint(tsk,
  592. len, type, 0, disabled);
  593. if (IS_ERR(bp)) {
  594. rc = PTR_ERR(bp);
  595. break;
  596. }
  597. thread->ptrace_bps[i] = bp;
  598. continue;
  599. }
  600. rc = ptrace_modify_breakpoint(bp, len, type, disabled);
  601. if (rc)
  602. break;
  603. }
  604. /* Restore if the first pass failed, second_pass shouldn't fail. */
  605. if (rc && !WARN_ON(second_pass)) {
  606. ret = rc;
  607. data = old_dr7;
  608. second_pass = true;
  609. goto restore;
  610. }
  611. return ret;
  612. }
  613. /*
  614. * Handle PTRACE_PEEKUSR calls for the debug register area.
  615. */
  616. static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
  617. {
  618. struct thread_struct *thread = &tsk->thread;
  619. unsigned long val = 0;
  620. if (n < HBP_NUM) {
  621. struct perf_event *bp = thread->ptrace_bps[n];
  622. if (bp)
  623. val = bp->hw.info.address;
  624. } else if (n == 6) {
  625. val = thread->debugreg6;
  626. } else if (n == 7) {
  627. val = thread->ptrace_dr7;
  628. }
  629. return val;
  630. }
  631. static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
  632. unsigned long addr)
  633. {
  634. struct thread_struct *t = &tsk->thread;
  635. struct perf_event *bp = t->ptrace_bps[nr];
  636. int err = 0;
  637. if (!bp) {
  638. /*
  639. * Put stub len and type to create an inactive but correct bp.
  640. *
  641. * CHECKME: the previous code returned -EIO if the addr wasn't
  642. * a valid task virtual addr. The new one will return -EINVAL in
  643. * this case.
  644. * -EINVAL may be what we want for in-kernel breakpoints users,
  645. * but -EIO looks better for ptrace, since we refuse a register
  646. * writing for the user. And anyway this is the previous
  647. * behaviour.
  648. */
  649. bp = ptrace_register_breakpoint(tsk,
  650. X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE,
  651. addr, true);
  652. if (IS_ERR(bp))
  653. err = PTR_ERR(bp);
  654. else
  655. t->ptrace_bps[nr] = bp;
  656. } else {
  657. struct perf_event_attr attr = bp->attr;
  658. attr.bp_addr = addr;
  659. err = modify_user_hw_breakpoint(bp, &attr);
  660. }
  661. return err;
  662. }
  663. /*
  664. * Handle PTRACE_POKEUSR calls for the debug register area.
  665. */
  666. static int ptrace_set_debugreg(struct task_struct *tsk, int n,
  667. unsigned long val)
  668. {
  669. struct thread_struct *thread = &tsk->thread;
  670. /* There are no DR4 or DR5 registers */
  671. int rc = -EIO;
  672. if (n < HBP_NUM) {
  673. rc = ptrace_set_breakpoint_addr(tsk, n, val);
  674. } else if (n == 6) {
  675. thread->debugreg6 = val;
  676. rc = 0;
  677. } else if (n == 7) {
  678. rc = ptrace_write_dr7(tsk, val);
  679. if (!rc)
  680. thread->ptrace_dr7 = val;
  681. }
  682. return rc;
  683. }
  684. /*
  685. * These access the current or another (stopped) task's io permission
  686. * bitmap for debugging or core dump.
  687. */
  688. static int ioperm_active(struct task_struct *target,
  689. const struct user_regset *regset)
  690. {
  691. return target->thread.io_bitmap_max / regset->size;
  692. }
  693. static int ioperm_get(struct task_struct *target,
  694. const struct user_regset *regset,
  695. unsigned int pos, unsigned int count,
  696. void *kbuf, void __user *ubuf)
  697. {
  698. if (!target->thread.io_bitmap_ptr)
  699. return -ENXIO;
  700. return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
  701. target->thread.io_bitmap_ptr,
  702. 0, IO_BITMAP_BYTES);
  703. }
  704. /*
  705. * Called by kernel/ptrace.c when detaching..
  706. *
  707. * Make sure the single step bit is not set.
  708. */
  709. void ptrace_disable(struct task_struct *child)
  710. {
  711. user_disable_single_step(child);
  712. #ifdef TIF_SYSCALL_EMU
  713. clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
  714. #endif
  715. }
  716. #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
  717. static const struct user_regset_view user_x86_32_view; /* Initialized below. */
  718. #endif
  719. long arch_ptrace(struct task_struct *child, long request,
  720. unsigned long addr, unsigned long data)
  721. {
  722. int ret;
  723. unsigned long __user *datap = (unsigned long __user *)data;
  724. switch (request) {
  725. /* read the word at location addr in the USER area. */
  726. case PTRACE_PEEKUSR: {
  727. unsigned long tmp;
  728. ret = -EIO;
  729. if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
  730. break;
  731. tmp = 0; /* Default return condition */
  732. if (addr < sizeof(struct user_regs_struct))
  733. tmp = getreg(child, addr);
  734. else if (addr >= offsetof(struct user, u_debugreg[0]) &&
  735. addr <= offsetof(struct user, u_debugreg[7])) {
  736. addr -= offsetof(struct user, u_debugreg[0]);
  737. tmp = ptrace_get_debugreg(child, addr / sizeof(data));
  738. }
  739. ret = put_user(tmp, datap);
  740. break;
  741. }
  742. case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
  743. ret = -EIO;
  744. if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
  745. break;
  746. if (addr < sizeof(struct user_regs_struct))
  747. ret = putreg(child, addr, data);
  748. else if (addr >= offsetof(struct user, u_debugreg[0]) &&
  749. addr <= offsetof(struct user, u_debugreg[7])) {
  750. addr -= offsetof(struct user, u_debugreg[0]);
  751. ret = ptrace_set_debugreg(child,
  752. addr / sizeof(data), data);
  753. }
  754. break;
  755. case PTRACE_GETREGS: /* Get all gp regs from the child. */
  756. return copy_regset_to_user(child,
  757. task_user_regset_view(current),
  758. REGSET_GENERAL,
  759. 0, sizeof(struct user_regs_struct),
  760. datap);
  761. case PTRACE_SETREGS: /* Set all gp regs in the child. */
  762. return copy_regset_from_user(child,
  763. task_user_regset_view(current),
  764. REGSET_GENERAL,
  765. 0, sizeof(struct user_regs_struct),
  766. datap);
  767. case PTRACE_GETFPREGS: /* Get the child FPU state. */
  768. return copy_regset_to_user(child,
  769. task_user_regset_view(current),
  770. REGSET_FP,
  771. 0, sizeof(struct user_i387_struct),
  772. datap);
  773. case PTRACE_SETFPREGS: /* Set the child FPU state. */
  774. return copy_regset_from_user(child,
  775. task_user_regset_view(current),
  776. REGSET_FP,
  777. 0, sizeof(struct user_i387_struct),
  778. datap);
  779. #ifdef CONFIG_X86_32
  780. case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
  781. return copy_regset_to_user(child, &user_x86_32_view,
  782. REGSET_XFP,
  783. 0, sizeof(struct user_fxsr_struct),
  784. datap) ? -EIO : 0;
  785. case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
  786. return copy_regset_from_user(child, &user_x86_32_view,
  787. REGSET_XFP,
  788. 0, sizeof(struct user_fxsr_struct),
  789. datap) ? -EIO : 0;
  790. #endif
  791. #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
  792. case PTRACE_GET_THREAD_AREA:
  793. if ((int) addr < 0)
  794. return -EIO;
  795. ret = do_get_thread_area(child, addr,
  796. (struct user_desc __user *)data);
  797. break;
  798. case PTRACE_SET_THREAD_AREA:
  799. if ((int) addr < 0)
  800. return -EIO;
  801. ret = do_set_thread_area(child, addr,
  802. (struct user_desc __user *)data, 0);
  803. break;
  804. #endif
  805. #ifdef CONFIG_X86_64
  806. /* normal 64bit interface to access TLS data.
  807. Works just like arch_prctl, except that the arguments
  808. are reversed. */
  809. case PTRACE_ARCH_PRCTL:
  810. ret = do_arch_prctl(child, data, addr);
  811. break;
  812. #endif
  813. default:
  814. ret = ptrace_request(child, request, addr, data);
  815. break;
  816. }
  817. return ret;
  818. }
  819. #ifdef CONFIG_IA32_EMULATION
  820. #include <linux/compat.h>
  821. #include <linux/syscalls.h>
  822. #include <asm/ia32.h>
  823. #include <asm/user32.h>
  824. #define R32(l,q) \
  825. case offsetof(struct user32, regs.l): \
  826. regs->q = value; break
  827. #define SEG32(rs) \
  828. case offsetof(struct user32, regs.rs): \
  829. return set_segment_reg(child, \
  830. offsetof(struct user_regs_struct, rs), \
  831. value); \
  832. break
  833. static int putreg32(struct task_struct *child, unsigned regno, u32 value)
  834. {
  835. struct pt_regs *regs = task_pt_regs(child);
  836. switch (regno) {
  837. SEG32(cs);
  838. SEG32(ds);
  839. SEG32(es);
  840. SEG32(fs);
  841. SEG32(gs);
  842. SEG32(ss);
  843. R32(ebx, bx);
  844. R32(ecx, cx);
  845. R32(edx, dx);
  846. R32(edi, di);
  847. R32(esi, si);
  848. R32(ebp, bp);
  849. R32(eax, ax);
  850. R32(eip, ip);
  851. R32(esp, sp);
  852. case offsetof(struct user32, regs.orig_eax):
  853. /*
  854. * A 32-bit debugger setting orig_eax means to restore
  855. * the state of the task restarting a 32-bit syscall.
  856. * Make sure we interpret the -ERESTART* codes correctly
  857. * in case the task is not actually still sitting at the
  858. * exit from a 32-bit syscall with TS_COMPAT still set.
  859. */
  860. regs->orig_ax = value;
  861. if (syscall_get_nr(child, regs) >= 0)
  862. task_thread_info(child)->status |= TS_COMPAT;
  863. break;
  864. case offsetof(struct user32, regs.eflags):
  865. return set_flags(child, value);
  866. case offsetof(struct user32, u_debugreg[0]) ...
  867. offsetof(struct user32, u_debugreg[7]):
  868. regno -= offsetof(struct user32, u_debugreg[0]);
  869. return ptrace_set_debugreg(child, regno / 4, value);
  870. default:
  871. if (regno > sizeof(struct user32) || (regno & 3))
  872. return -EIO;
  873. /*
  874. * Other dummy fields in the virtual user structure
  875. * are ignored
  876. */
  877. break;
  878. }
  879. return 0;
  880. }
  881. #undef R32
  882. #undef SEG32
  883. #define R32(l,q) \
  884. case offsetof(struct user32, regs.l): \
  885. *val = regs->q; break
  886. #define SEG32(rs) \
  887. case offsetof(struct user32, regs.rs): \
  888. *val = get_segment_reg(child, \
  889. offsetof(struct user_regs_struct, rs)); \
  890. break
  891. static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
  892. {
  893. struct pt_regs *regs = task_pt_regs(child);
  894. switch (regno) {
  895. SEG32(ds);
  896. SEG32(es);
  897. SEG32(fs);
  898. SEG32(gs);
  899. R32(cs, cs);
  900. R32(ss, ss);
  901. R32(ebx, bx);
  902. R32(ecx, cx);
  903. R32(edx, dx);
  904. R32(edi, di);
  905. R32(esi, si);
  906. R32(ebp, bp);
  907. R32(eax, ax);
  908. R32(orig_eax, orig_ax);
  909. R32(eip, ip);
  910. R32(esp, sp);
  911. case offsetof(struct user32, regs.eflags):
  912. *val = get_flags(child);
  913. break;
  914. case offsetof(struct user32, u_debugreg[0]) ...
  915. offsetof(struct user32, u_debugreg[7]):
  916. regno -= offsetof(struct user32, u_debugreg[0]);
  917. *val = ptrace_get_debugreg(child, regno / 4);
  918. break;
  919. default:
  920. if (regno > sizeof(struct user32) || (regno & 3))
  921. return -EIO;
  922. /*
  923. * Other dummy fields in the virtual user structure
  924. * are ignored
  925. */
  926. *val = 0;
  927. break;
  928. }
  929. return 0;
  930. }
  931. #undef R32
  932. #undef SEG32
  933. static int genregs32_get(struct task_struct *target,
  934. const struct user_regset *regset,
  935. unsigned int pos, unsigned int count,
  936. void *kbuf, void __user *ubuf)
  937. {
  938. if (kbuf) {
  939. compat_ulong_t *k = kbuf;
  940. while (count >= sizeof(*k)) {
  941. getreg32(target, pos, k++);
  942. count -= sizeof(*k);
  943. pos += sizeof(*k);
  944. }
  945. } else {
  946. compat_ulong_t __user *u = ubuf;
  947. while (count >= sizeof(*u)) {
  948. compat_ulong_t word;
  949. getreg32(target, pos, &word);
  950. if (__put_user(word, u++))
  951. return -EFAULT;
  952. count -= sizeof(*u);
  953. pos += sizeof(*u);
  954. }
  955. }
  956. return 0;
  957. }
  958. static int genregs32_set(struct task_struct *target,
  959. const struct user_regset *regset,
  960. unsigned int pos, unsigned int count,
  961. const void *kbuf, const void __user *ubuf)
  962. {
  963. int ret = 0;
  964. if (kbuf) {
  965. const compat_ulong_t *k = kbuf;
  966. while (count >= sizeof(*k) && !ret) {
  967. ret = putreg32(target, pos, *k++);
  968. count -= sizeof(*k);
  969. pos += sizeof(*k);
  970. }
  971. } else {
  972. const compat_ulong_t __user *u = ubuf;
  973. while (count >= sizeof(*u) && !ret) {
  974. compat_ulong_t word;
  975. ret = __get_user(word, u++);
  976. if (ret)
  977. break;
  978. ret = putreg32(target, pos, word);
  979. count -= sizeof(*u);
  980. pos += sizeof(*u);
  981. }
  982. }
  983. return ret;
  984. }
  985. #ifdef CONFIG_X86_X32_ABI
  986. static long x32_arch_ptrace(struct task_struct *child,
  987. compat_long_t request, compat_ulong_t caddr,
  988. compat_ulong_t cdata)
  989. {
  990. unsigned long addr = caddr;
  991. unsigned long data = cdata;
  992. void __user *datap = compat_ptr(data);
  993. int ret;
  994. switch (request) {
  995. /* Read 32bits at location addr in the USER area. Only allow
  996. to return the lower 32bits of segment and debug registers. */
  997. case PTRACE_PEEKUSR: {
  998. u32 tmp;
  999. ret = -EIO;
  1000. if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) ||
  1001. addr < offsetof(struct user_regs_struct, cs))
  1002. break;
  1003. tmp = 0; /* Default return condition */
  1004. if (addr < sizeof(struct user_regs_struct))
  1005. tmp = getreg(child, addr);
  1006. else if (addr >= offsetof(struct user, u_debugreg[0]) &&
  1007. addr <= offsetof(struct user, u_debugreg[7])) {
  1008. addr -= offsetof(struct user, u_debugreg[0]);
  1009. tmp = ptrace_get_debugreg(child, addr / sizeof(data));
  1010. }
  1011. ret = put_user(tmp, (__u32 __user *)datap);
  1012. break;
  1013. }
  1014. /* Write the word at location addr in the USER area. Only allow
  1015. to update segment and debug registers with the upper 32bits
  1016. zero-extended. */
  1017. case PTRACE_POKEUSR:
  1018. ret = -EIO;
  1019. if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) ||
  1020. addr < offsetof(struct user_regs_struct, cs))
  1021. break;
  1022. if (addr < sizeof(struct user_regs_struct))
  1023. ret = putreg(child, addr, data);
  1024. else if (addr >= offsetof(struct user, u_debugreg[0]) &&
  1025. addr <= offsetof(struct user, u_debugreg[7])) {
  1026. addr -= offsetof(struct user, u_debugreg[0]);
  1027. ret = ptrace_set_debugreg(child,
  1028. addr / sizeof(data), data);
  1029. }
  1030. break;
  1031. case PTRACE_GETREGS: /* Get all gp regs from the child. */
  1032. return copy_regset_to_user(child,
  1033. task_user_regset_view(current),
  1034. REGSET_GENERAL,
  1035. 0, sizeof(struct user_regs_struct),
  1036. datap);
  1037. case PTRACE_SETREGS: /* Set all gp regs in the child. */
  1038. return copy_regset_from_user(child,
  1039. task_user_regset_view(current),
  1040. REGSET_GENERAL,
  1041. 0, sizeof(struct user_regs_struct),
  1042. datap);
  1043. case PTRACE_GETFPREGS: /* Get the child FPU state. */
  1044. return copy_regset_to_user(child,
  1045. task_user_regset_view(current),
  1046. REGSET_FP,
  1047. 0, sizeof(struct user_i387_struct),
  1048. datap);
  1049. case PTRACE_SETFPREGS: /* Set the child FPU state. */
  1050. return copy_regset_from_user(child,
  1051. task_user_regset_view(current),
  1052. REGSET_FP,
  1053. 0, sizeof(struct user_i387_struct),
  1054. datap);
  1055. default:
  1056. return compat_ptrace_request(child, request, addr, data);
  1057. }
  1058. return ret;
  1059. }
  1060. #endif
  1061. long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
  1062. compat_ulong_t caddr, compat_ulong_t cdata)
  1063. {
  1064. unsigned long addr = caddr;
  1065. unsigned long data = cdata;
  1066. void __user *datap = compat_ptr(data);
  1067. int ret;
  1068. __u32 val;
  1069. #ifdef CONFIG_X86_X32_ABI
  1070. if (!is_ia32_task())
  1071. return x32_arch_ptrace(child, request, caddr, cdata);
  1072. #endif
  1073. switch (request) {
  1074. case PTRACE_PEEKUSR:
  1075. ret = getreg32(child, addr, &val);
  1076. if (ret == 0)
  1077. ret = put_user(val, (__u32 __user *)datap);
  1078. break;
  1079. case PTRACE_POKEUSR:
  1080. ret = putreg32(child, addr, data);
  1081. break;
  1082. case PTRACE_GETREGS: /* Get all gp regs from the child. */
  1083. return copy_regset_to_user(child, &user_x86_32_view,
  1084. REGSET_GENERAL,
  1085. 0, sizeof(struct user_regs_struct32),
  1086. datap);
  1087. case PTRACE_SETREGS: /* Set all gp regs in the child. */
  1088. return copy_regset_from_user(child, &user_x86_32_view,
  1089. REGSET_GENERAL, 0,
  1090. sizeof(struct user_regs_struct32),
  1091. datap);
  1092. case PTRACE_GETFPREGS: /* Get the child FPU state. */
  1093. return copy_regset_to_user(child, &user_x86_32_view,
  1094. REGSET_FP, 0,
  1095. sizeof(struct user_i387_ia32_struct),
  1096. datap);
  1097. case PTRACE_SETFPREGS: /* Set the child FPU state. */
  1098. return copy_regset_from_user(
  1099. child, &user_x86_32_view, REGSET_FP,
  1100. 0, sizeof(struct user_i387_ia32_struct), datap);
  1101. case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
  1102. return copy_regset_to_user(child, &user_x86_32_view,
  1103. REGSET_XFP, 0,
  1104. sizeof(struct user32_fxsr_struct),
  1105. datap);
  1106. case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
  1107. return copy_regset_from_user(child, &user_x86_32_view,
  1108. REGSET_XFP, 0,
  1109. sizeof(struct user32_fxsr_struct),
  1110. datap);
  1111. case PTRACE_GET_THREAD_AREA:
  1112. case PTRACE_SET_THREAD_AREA:
  1113. return arch_ptrace(child, request, addr, data);
  1114. default:
  1115. return compat_ptrace_request(child, request, addr, data);
  1116. }
  1117. return ret;
  1118. }
  1119. #endif /* CONFIG_IA32_EMULATION */
  1120. #ifdef CONFIG_X86_64
  1121. static struct user_regset x86_64_regsets[] __read_mostly = {
  1122. [REGSET_GENERAL] = {
  1123. .core_note_type = NT_PRSTATUS,
  1124. .n = sizeof(struct user_regs_struct) / sizeof(long),
  1125. .size = sizeof(long), .align = sizeof(long),
  1126. .get = genregs_get, .set = genregs_set
  1127. },
  1128. [REGSET_FP] = {
  1129. .core_note_type = NT_PRFPREG,
  1130. .n = sizeof(struct user_i387_struct) / sizeof(long),
  1131. .size = sizeof(long), .align = sizeof(long),
  1132. .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
  1133. },
  1134. [REGSET_XSTATE] = {
  1135. .core_note_type = NT_X86_XSTATE,
  1136. .size = sizeof(u64), .align = sizeof(u64),
  1137. .active = xstateregs_active, .get = xstateregs_get,
  1138. .set = xstateregs_set
  1139. },
  1140. [REGSET_IOPERM64] = {
  1141. .core_note_type = NT_386_IOPERM,
  1142. .n = IO_BITMAP_LONGS,
  1143. .size = sizeof(long), .align = sizeof(long),
  1144. .active = ioperm_active, .get = ioperm_get
  1145. },
  1146. };
  1147. static const struct user_regset_view user_x86_64_view = {
  1148. .name = "x86_64", .e_machine = EM_X86_64,
  1149. .regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets)
  1150. };
  1151. #else /* CONFIG_X86_32 */
  1152. #define user_regs_struct32 user_regs_struct
  1153. #define genregs32_get genregs_get
  1154. #define genregs32_set genregs_set
  1155. #endif /* CONFIG_X86_64 */
  1156. #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
  1157. static struct user_regset x86_32_regsets[] __read_mostly = {
  1158. [REGSET_GENERAL] = {
  1159. .core_note_type = NT_PRSTATUS,
  1160. .n = sizeof(struct user_regs_struct32) / sizeof(u32),
  1161. .size = sizeof(u32), .align = sizeof(u32),
  1162. .get = genregs32_get, .set = genregs32_set
  1163. },
  1164. [REGSET_FP] = {
  1165. .core_note_type = NT_PRFPREG,
  1166. .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
  1167. .size = sizeof(u32), .align = sizeof(u32),
  1168. .active = fpregs_active, .get = fpregs_get, .set = fpregs_set
  1169. },
  1170. [REGSET_XFP] = {
  1171. .core_note_type = NT_PRXFPREG,
  1172. .n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
  1173. .size = sizeof(u32), .align = sizeof(u32),
  1174. .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
  1175. },
  1176. [REGSET_XSTATE] = {
  1177. .core_note_type = NT_X86_XSTATE,
  1178. .size = sizeof(u64), .align = sizeof(u64),
  1179. .active = xstateregs_active, .get = xstateregs_get,
  1180. .set = xstateregs_set
  1181. },
  1182. [REGSET_TLS] = {
  1183. .core_note_type = NT_386_TLS,
  1184. .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
  1185. .size = sizeof(struct user_desc),
  1186. .align = sizeof(struct user_desc),
  1187. .active = regset_tls_active,
  1188. .get = regset_tls_get, .set = regset_tls_set
  1189. },
  1190. [REGSET_IOPERM32] = {
  1191. .core_note_type = NT_386_IOPERM,
  1192. .n = IO_BITMAP_BYTES / sizeof(u32),
  1193. .size = sizeof(u32), .align = sizeof(u32),
  1194. .active = ioperm_active, .get = ioperm_get
  1195. },
  1196. };
  1197. static const struct user_regset_view user_x86_32_view = {
  1198. .name = "i386", .e_machine = EM_386,
  1199. .regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets)
  1200. };
  1201. #endif
  1202. /*
  1203. * This represents bytes 464..511 in the memory layout exported through
  1204. * the REGSET_XSTATE interface.
  1205. */
  1206. u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
  1207. void update_regset_xstate_info(unsigned int size, u64 xstate_mask)
  1208. {
  1209. #ifdef CONFIG_X86_64
  1210. x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
  1211. #endif
  1212. #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
  1213. x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64);
  1214. #endif
  1215. xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
  1216. }
  1217. const struct user_regset_view *task_user_regset_view(struct task_struct *task)
  1218. {
  1219. #ifdef CONFIG_IA32_EMULATION
  1220. if (test_tsk_thread_flag(task, TIF_IA32))
  1221. #endif
  1222. #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
  1223. return &user_x86_32_view;
  1224. #endif
  1225. #ifdef CONFIG_X86_64
  1226. return &user_x86_64_view;
  1227. #endif
  1228. }
  1229. static void fill_sigtrap_info(struct task_struct *tsk,
  1230. struct pt_regs *regs,
  1231. int error_code, int si_code,
  1232. struct siginfo *info)
  1233. {
  1234. tsk->thread.trap_nr = X86_TRAP_DB;
  1235. tsk->thread.error_code = error_code;
  1236. memset(info, 0, sizeof(*info));
  1237. info->si_signo = SIGTRAP;
  1238. info->si_code = si_code;
  1239. info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
  1240. }
  1241. void user_single_step_siginfo(struct task_struct *tsk,
  1242. struct pt_regs *regs,
  1243. struct siginfo *info)
  1244. {
  1245. fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info);
  1246. }
  1247. void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
  1248. int error_code, int si_code)
  1249. {
  1250. struct siginfo info;
  1251. fill_sigtrap_info(tsk, regs, error_code, si_code, &info);
  1252. /* Send us the fake SIGTRAP */
  1253. force_sig_info(SIGTRAP, &info, tsk);
  1254. }
  1255. static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
  1256. {
  1257. #ifdef CONFIG_X86_64
  1258. if (arch == AUDIT_ARCH_X86_64) {
  1259. audit_syscall_entry(regs->orig_ax, regs->di,
  1260. regs->si, regs->dx, regs->r10);
  1261. } else
  1262. #endif
  1263. {
  1264. audit_syscall_entry(regs->orig_ax, regs->bx,
  1265. regs->cx, regs->dx, regs->si);
  1266. }
  1267. }
  1268. /*
  1269. * We can return 0 to resume the syscall or anything else to go to phase
  1270. * 2. If we resume the syscall, we need to put something appropriate in
  1271. * regs->orig_ax.
  1272. *
  1273. * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
  1274. * are fully functional.
  1275. *
  1276. * For phase 2's benefit, our return value is:
  1277. * 0: resume the syscall
  1278. * 1: go to phase 2; no seccomp phase 2 needed
  1279. * anything else: go to phase 2; pass return value to seccomp
  1280. */
  1281. unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
  1282. {
  1283. unsigned long ret = 0;
  1284. u32 work;
  1285. BUG_ON(regs != task_pt_regs(current));
  1286. work = ACCESS_ONCE(current_thread_info()->flags) &
  1287. _TIF_WORK_SYSCALL_ENTRY;
  1288. /*
  1289. * If TIF_NOHZ is set, we are required to call user_exit() before
  1290. * doing anything that could touch RCU.
  1291. */
  1292. if (work & _TIF_NOHZ) {
  1293. user_exit();
  1294. work &= ~_TIF_NOHZ;
  1295. }
  1296. #ifdef CONFIG_SECCOMP
  1297. /*
  1298. * Do seccomp first -- it should minimize exposure of other
  1299. * code, and keeping seccomp fast is probably more valuable
  1300. * than the rest of this.
  1301. */
  1302. if (work & _TIF_SECCOMP) {
  1303. struct seccomp_data sd;
  1304. sd.arch = arch;
  1305. sd.nr = regs->orig_ax;
  1306. sd.instruction_pointer = regs->ip;
  1307. #ifdef CONFIG_X86_64
  1308. if (arch == AUDIT_ARCH_X86_64) {
  1309. sd.args[0] = regs->di;
  1310. sd.args[1] = regs->si;
  1311. sd.args[2] = regs->dx;
  1312. sd.args[3] = regs->r10;
  1313. sd.args[4] = regs->r8;
  1314. sd.args[5] = regs->r9;
  1315. } else
  1316. #endif
  1317. {
  1318. sd.args[0] = regs->bx;
  1319. sd.args[1] = regs->cx;
  1320. sd.args[2] = regs->dx;
  1321. sd.args[3] = regs->si;
  1322. sd.args[4] = regs->di;
  1323. sd.args[5] = regs->bp;
  1324. }
  1325. BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
  1326. BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
  1327. ret = seccomp_phase1(&sd);
  1328. if (ret == SECCOMP_PHASE1_SKIP) {
  1329. regs->orig_ax = -1;
  1330. ret = 0;
  1331. } else if (ret != SECCOMP_PHASE1_OK) {
  1332. return ret; /* Go directly to phase 2 */
  1333. }
  1334. work &= ~_TIF_SECCOMP;
  1335. }
  1336. #endif
  1337. /* Do our best to finish without phase 2. */
  1338. if (work == 0)
  1339. return ret; /* seccomp and/or nohz only (ret == 0 here) */
  1340. #ifdef CONFIG_AUDITSYSCALL
  1341. if (work == _TIF_SYSCALL_AUDIT) {
  1342. /*
  1343. * If there is no more work to be done except auditing,
  1344. * then audit in phase 1. Phase 2 always audits, so, if
  1345. * we audit here, then we can't go on to phase 2.
  1346. */
  1347. do_audit_syscall_entry(regs, arch);
  1348. return 0;
  1349. }
  1350. #endif
  1351. return 1; /* Something is enabled that we can't handle in phase 1 */
  1352. }
  1353. /* Returns the syscall nr to run (which should match regs->orig_ax). */
  1354. long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
  1355. unsigned long phase1_result)
  1356. {
  1357. long ret = 0;
  1358. u32 work = ACCESS_ONCE(current_thread_info()->flags) &
  1359. _TIF_WORK_SYSCALL_ENTRY;
  1360. BUG_ON(regs != task_pt_regs(current));
  1361. /*
  1362. * If we stepped into a sysenter/syscall insn, it trapped in
  1363. * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
  1364. * If user-mode had set TF itself, then it's still clear from
  1365. * do_debug() and we need to set it again to restore the user
  1366. * state. If we entered on the slow path, TF was already set.
  1367. */
  1368. if (work & _TIF_SINGLESTEP)
  1369. regs->flags |= X86_EFLAGS_TF;
  1370. #ifdef CONFIG_SECCOMP
  1371. /*
  1372. * Call seccomp_phase2 before running the other hooks so that
  1373. * they can see any changes made by a seccomp tracer.
  1374. */
  1375. if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
  1376. /* seccomp failures shouldn't expose any additional code. */
  1377. return -1;
  1378. }
  1379. #endif
  1380. if (unlikely(work & _TIF_SYSCALL_EMU))
  1381. ret = -1L;
  1382. if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
  1383. tracehook_report_syscall_entry(regs))
  1384. ret = -1L;
  1385. if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
  1386. trace_sys_enter(regs, regs->orig_ax);
  1387. do_audit_syscall_entry(regs, arch);
  1388. return ret ?: regs->orig_ax;
  1389. }
  1390. long syscall_trace_enter(struct pt_regs *regs)
  1391. {
  1392. u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
  1393. unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
  1394. if (phase1_result == 0)
  1395. return regs->orig_ax;
  1396. else
  1397. return syscall_trace_enter_phase2(regs, arch, phase1_result);
  1398. }
  1399. void syscall_trace_leave(struct pt_regs *regs)
  1400. {
  1401. bool step;
  1402. /*
  1403. * We may come here right after calling schedule_user()
  1404. * or do_notify_resume(), in which case we can be in RCU
  1405. * user mode.
  1406. */
  1407. user_exit();
  1408. audit_syscall_exit(regs);
  1409. if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
  1410. trace_sys_exit(regs, regs->ax);
  1411. /*
  1412. * If TIF_SYSCALL_EMU is set, we only get here because of
  1413. * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
  1414. * We already reported this syscall instruction in
  1415. * syscall_trace_enter().
  1416. */
  1417. step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
  1418. !test_thread_flag(TIF_SYSCALL_EMU);
  1419. if (step || test_thread_flag(TIF_SYSCALL_TRACE))
  1420. tracehook_report_syscall_exit(regs, step);
  1421. user_enter();
  1422. }