perf_event.h 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190
  1. /*
  2. * Performance events:
  3. *
  4. * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
  5. * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
  6. * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
  7. *
  8. * Data type definitions, declarations, prototypes.
  9. *
  10. * Started by: Thomas Gleixner and Ingo Molnar
  11. *
  12. * For licencing details see kernel-base/COPYING
  13. */
  14. #ifndef _LINUX_PERF_EVENT_H
  15. #define _LINUX_PERF_EVENT_H
  16. #include <uapi/linux/perf_event.h>
  17. /*
  18. * Kernel-internal data types and definitions:
  19. */
  20. #ifdef CONFIG_PERF_EVENTS
  21. # include <asm/perf_event.h>
  22. # include <asm/local64.h>
  23. #endif
  24. struct perf_guest_info_callbacks {
  25. int (*is_in_guest)(void);
  26. int (*is_user_mode)(void);
  27. unsigned long (*get_guest_ip)(void);
  28. };
  29. #ifdef CONFIG_HAVE_HW_BREAKPOINT
  30. #include <asm/hw_breakpoint.h>
  31. #endif
  32. #include <linux/list.h>
  33. #include <linux/mutex.h>
  34. #include <linux/rculist.h>
  35. #include <linux/rcupdate.h>
  36. #include <linux/spinlock.h>
  37. #include <linux/hrtimer.h>
  38. #include <linux/fs.h>
  39. #include <linux/pid_namespace.h>
  40. #include <linux/workqueue.h>
  41. #include <linux/ftrace.h>
  42. #include <linux/cpu.h>
  43. #include <linux/irq_work.h>
  44. #include <linux/static_key.h>
  45. #include <linux/jump_label_ratelimit.h>
  46. #include <linux/atomic.h>
  47. #include <linux/sysfs.h>
  48. #include <linux/perf_regs.h>
  49. #include <linux/workqueue.h>
  50. #include <linux/cgroup.h>
  51. #include <asm/local.h>
  52. struct perf_callchain_entry {
  53. __u64 nr;
  54. __u64 ip[PERF_MAX_STACK_DEPTH];
  55. };
  56. struct perf_raw_record {
  57. u32 size;
  58. void *data;
  59. };
  60. /*
  61. * branch stack layout:
  62. * nr: number of taken branches stored in entries[]
  63. *
  64. * Note that nr can vary from sample to sample
  65. * branches (to, from) are stored from most recent
  66. * to least recent, i.e., entries[0] contains the most
  67. * recent branch.
  68. */
  69. struct perf_branch_stack {
  70. __u64 nr;
  71. struct perf_branch_entry entries[0];
  72. };
  73. struct task_struct;
  74. /*
  75. * extra PMU register associated with an event
  76. */
  77. struct hw_perf_event_extra {
  78. u64 config; /* register value */
  79. unsigned int reg; /* register address or index */
  80. int alloc; /* extra register already allocated */
  81. int idx; /* index in shared_regs->regs[] */
  82. };
  83. /**
  84. * struct hw_perf_event - performance event hardware details:
  85. */
  86. struct hw_perf_event {
  87. #ifdef CONFIG_PERF_EVENTS
  88. union {
  89. struct { /* hardware */
  90. u64 config;
  91. u64 last_tag;
  92. unsigned long config_base;
  93. unsigned long event_base;
  94. int event_base_rdpmc;
  95. int idx;
  96. int last_cpu;
  97. int flags;
  98. struct hw_perf_event_extra extra_reg;
  99. struct hw_perf_event_extra branch_reg;
  100. };
  101. struct { /* software */
  102. struct hrtimer hrtimer;
  103. };
  104. struct { /* tracepoint */
  105. /* for tp_event->class */
  106. struct list_head tp_list;
  107. };
  108. struct { /* intel_cqm */
  109. int cqm_state;
  110. u32 cqm_rmid;
  111. struct list_head cqm_events_entry;
  112. struct list_head cqm_groups_entry;
  113. struct list_head cqm_group_entry;
  114. };
  115. struct { /* itrace */
  116. int itrace_started;
  117. };
  118. #ifdef CONFIG_HAVE_HW_BREAKPOINT
  119. struct { /* breakpoint */
  120. /*
  121. * Crufty hack to avoid the chicken and egg
  122. * problem hw_breakpoint has with context
  123. * creation and event initalization.
  124. */
  125. struct arch_hw_breakpoint info;
  126. struct list_head bp_list;
  127. };
  128. #endif
  129. };
  130. /*
  131. * If the event is a per task event, this will point to the task in
  132. * question. See the comment in perf_event_alloc().
  133. */
  134. struct task_struct *target;
  135. /*
  136. * hw_perf_event::state flags; used to track the PERF_EF_* state.
  137. */
  138. #define PERF_HES_STOPPED 0x01 /* the counter is stopped */
  139. #define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */
  140. #define PERF_HES_ARCH 0x04
  141. int state;
  142. /*
  143. * The last observed hardware counter value, updated with a
  144. * local64_cmpxchg() such that pmu::read() can be called nested.
  145. */
  146. local64_t prev_count;
  147. /*
  148. * The period to start the next sample with.
  149. */
  150. u64 sample_period;
  151. /*
  152. * The period we started this sample with.
  153. */
  154. u64 last_period;
  155. /*
  156. * However much is left of the current period; note that this is
  157. * a full 64bit value and allows for generation of periods longer
  158. * than hardware might allow.
  159. */
  160. local64_t period_left;
  161. /*
  162. * State for throttling the event, see __perf_event_overflow() and
  163. * perf_adjust_freq_unthr_context().
  164. */
  165. u64 interrupts_seq;
  166. u64 interrupts;
  167. /*
  168. * State for freq target events, see __perf_event_overflow() and
  169. * perf_adjust_freq_unthr_context().
  170. */
  171. u64 freq_time_stamp;
  172. u64 freq_count_stamp;
  173. #endif
  174. };
  175. struct perf_event;
  176. /*
  177. * Common implementation detail of pmu::{start,commit,cancel}_txn
  178. */
  179. #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */
  180. #define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */
  181. /**
  182. * pmu::capabilities flags
  183. */
  184. #define PERF_PMU_CAP_NO_INTERRUPT 0x01
  185. #define PERF_PMU_CAP_NO_NMI 0x02
  186. #define PERF_PMU_CAP_AUX_NO_SG 0x04
  187. #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08
  188. #define PERF_PMU_CAP_EXCLUSIVE 0x10
  189. #define PERF_PMU_CAP_ITRACE 0x20
  190. /**
  191. * struct pmu - generic performance monitoring unit
  192. */
  193. struct pmu {
  194. struct list_head entry;
  195. struct module *module;
  196. struct device *dev;
  197. const struct attribute_group **attr_groups;
  198. const char *name;
  199. int type;
  200. /*
  201. * various common per-pmu feature flags
  202. */
  203. int capabilities;
  204. int * __percpu pmu_disable_count;
  205. struct perf_cpu_context * __percpu pmu_cpu_context;
  206. atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
  207. int task_ctx_nr;
  208. int hrtimer_interval_ms;
  209. /*
  210. * Fully disable/enable this PMU, can be used to protect from the PMI
  211. * as well as for lazy/batch writing of the MSRs.
  212. */
  213. void (*pmu_enable) (struct pmu *pmu); /* optional */
  214. void (*pmu_disable) (struct pmu *pmu); /* optional */
  215. /*
  216. * Try and initialize the event for this PMU.
  217. *
  218. * Returns:
  219. * -ENOENT -- @event is not for this PMU
  220. *
  221. * -ENODEV -- @event is for this PMU but PMU not present
  222. * -EBUSY -- @event is for this PMU but PMU temporarily unavailable
  223. * -EINVAL -- @event is for this PMU but @event is not valid
  224. * -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported
  225. * -EACCESS -- @event is for this PMU, @event is valid, but no privilidges
  226. *
  227. * 0 -- @event is for this PMU and valid
  228. *
  229. * Other error return values are allowed.
  230. */
  231. int (*event_init) (struct perf_event *event);
  232. /*
  233. * Notification that the event was mapped or unmapped. Called
  234. * in the context of the mapping task.
  235. */
  236. void (*event_mapped) (struct perf_event *event); /*optional*/
  237. void (*event_unmapped) (struct perf_event *event); /*optional*/
  238. /*
  239. * Flags for ->add()/->del()/ ->start()/->stop(). There are
  240. * matching hw_perf_event::state flags.
  241. */
  242. #define PERF_EF_START 0x01 /* start the counter when adding */
  243. #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
  244. #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
  245. /*
  246. * Adds/Removes a counter to/from the PMU, can be done inside a
  247. * transaction, see the ->*_txn() methods.
  248. *
  249. * The add/del callbacks will reserve all hardware resources required
  250. * to service the event, this includes any counter constraint
  251. * scheduling etc.
  252. *
  253. * Called with IRQs disabled and the PMU disabled on the CPU the event
  254. * is on.
  255. *
  256. * ->add() called without PERF_EF_START should result in the same state
  257. * as ->add() followed by ->stop().
  258. *
  259. * ->del() must always PERF_EF_UPDATE stop an event. If it calls
  260. * ->stop() that must deal with already being stopped without
  261. * PERF_EF_UPDATE.
  262. */
  263. int (*add) (struct perf_event *event, int flags);
  264. void (*del) (struct perf_event *event, int flags);
  265. /*
  266. * Starts/Stops a counter present on the PMU.
  267. *
  268. * The PMI handler should stop the counter when perf_event_overflow()
  269. * returns !0. ->start() will be used to continue.
  270. *
  271. * Also used to change the sample period.
  272. *
  273. * Called with IRQs disabled and the PMU disabled on the CPU the event
  274. * is on -- will be called from NMI context with the PMU generates
  275. * NMIs.
  276. *
  277. * ->stop() with PERF_EF_UPDATE will read the counter and update
  278. * period/count values like ->read() would.
  279. *
  280. * ->start() with PERF_EF_RELOAD will reprogram the the counter
  281. * value, must be preceded by a ->stop() with PERF_EF_UPDATE.
  282. */
  283. void (*start) (struct perf_event *event, int flags);
  284. void (*stop) (struct perf_event *event, int flags);
  285. /*
  286. * Updates the counter value of the event.
  287. *
  288. * For sampling capable PMUs this will also update the software period
  289. * hw_perf_event::period_left field.
  290. */
  291. void (*read) (struct perf_event *event);
  292. /*
  293. * Group events scheduling is treated as a transaction, add
  294. * group events as a whole and perform one schedulability test.
  295. * If the test fails, roll back the whole group
  296. *
  297. * Start the transaction, after this ->add() doesn't need to
  298. * do schedulability tests.
  299. *
  300. * Optional.
  301. */
  302. void (*start_txn) (struct pmu *pmu, unsigned int txn_flags);
  303. /*
  304. * If ->start_txn() disabled the ->add() schedulability test
  305. * then ->commit_txn() is required to perform one. On success
  306. * the transaction is closed. On error the transaction is kept
  307. * open until ->cancel_txn() is called.
  308. *
  309. * Optional.
  310. */
  311. int (*commit_txn) (struct pmu *pmu);
  312. /*
  313. * Will cancel the transaction, assumes ->del() is called
  314. * for each successful ->add() during the transaction.
  315. *
  316. * Optional.
  317. */
  318. void (*cancel_txn) (struct pmu *pmu);
  319. /*
  320. * Will return the value for perf_event_mmap_page::index for this event,
  321. * if no implementation is provided it will default to: event->hw.idx + 1.
  322. */
  323. int (*event_idx) (struct perf_event *event); /*optional */
  324. /*
  325. * context-switches callback
  326. */
  327. void (*sched_task) (struct perf_event_context *ctx,
  328. bool sched_in);
  329. /*
  330. * PMU specific data size
  331. */
  332. size_t task_ctx_size;
  333. /*
  334. * Return the count value for a counter.
  335. */
  336. u64 (*count) (struct perf_event *event); /*optional*/
  337. /*
  338. * Set up pmu-private data structures for an AUX area
  339. */
  340. void *(*setup_aux) (int cpu, void **pages,
  341. int nr_pages, bool overwrite);
  342. /* optional */
  343. /*
  344. * Free pmu-private AUX data structures
  345. */
  346. void (*free_aux) (void *aux); /* optional */
  347. /*
  348. * Filter events for PMU-specific reasons.
  349. */
  350. int (*filter_match) (struct perf_event *event); /* optional */
  351. };
  352. /**
  353. * enum perf_event_active_state - the states of a event
  354. */
  355. enum perf_event_active_state {
  356. PERF_EVENT_STATE_DEAD = -4,
  357. PERF_EVENT_STATE_EXIT = -3,
  358. PERF_EVENT_STATE_ERROR = -2,
  359. PERF_EVENT_STATE_OFF = -1,
  360. PERF_EVENT_STATE_INACTIVE = 0,
  361. PERF_EVENT_STATE_ACTIVE = 1,
  362. };
  363. struct file;
  364. struct perf_sample_data;
  365. typedef void (*perf_overflow_handler_t)(struct perf_event *,
  366. struct perf_sample_data *,
  367. struct pt_regs *regs);
  368. enum perf_group_flag {
  369. PERF_GROUP_SOFTWARE = 0x1,
  370. };
  371. #define SWEVENT_HLIST_BITS 8
  372. #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
  373. struct swevent_hlist {
  374. struct hlist_head heads[SWEVENT_HLIST_SIZE];
  375. struct rcu_head rcu_head;
  376. };
  377. #define PERF_ATTACH_CONTEXT 0x01
  378. #define PERF_ATTACH_GROUP 0x02
  379. #define PERF_ATTACH_TASK 0x04
  380. #define PERF_ATTACH_TASK_DATA 0x08
  381. struct perf_cgroup;
  382. struct ring_buffer;
  383. /**
  384. * struct perf_event - performance event kernel representation:
  385. */
  386. struct perf_event {
  387. #ifdef CONFIG_PERF_EVENTS
  388. /*
  389. * entry onto perf_event_context::event_list;
  390. * modifications require ctx->lock
  391. * RCU safe iterations.
  392. */
  393. struct list_head event_entry;
  394. /*
  395. * XXX: group_entry and sibling_list should be mutually exclusive;
  396. * either you're a sibling on a group, or you're the group leader.
  397. * Rework the code to always use the same list element.
  398. *
  399. * Locked for modification by both ctx->mutex and ctx->lock; holding
  400. * either sufficies for read.
  401. */
  402. struct list_head group_entry;
  403. struct list_head sibling_list;
  404. /*
  405. * We need storage to track the entries in perf_pmu_migrate_context; we
  406. * cannot use the event_entry because of RCU and we want to keep the
  407. * group in tact which avoids us using the other two entries.
  408. */
  409. struct list_head migrate_entry;
  410. struct hlist_node hlist_entry;
  411. struct list_head active_entry;
  412. int nr_siblings;
  413. int group_flags;
  414. struct perf_event *group_leader;
  415. struct pmu *pmu;
  416. void *pmu_private;
  417. enum perf_event_active_state state;
  418. unsigned int attach_state;
  419. local64_t count;
  420. atomic64_t child_count;
  421. /*
  422. * These are the total time in nanoseconds that the event
  423. * has been enabled (i.e. eligible to run, and the task has
  424. * been scheduled in, if this is a per-task event)
  425. * and running (scheduled onto the CPU), respectively.
  426. *
  427. * They are computed from tstamp_enabled, tstamp_running and
  428. * tstamp_stopped when the event is in INACTIVE or ACTIVE state.
  429. */
  430. u64 total_time_enabled;
  431. u64 total_time_running;
  432. /*
  433. * These are timestamps used for computing total_time_enabled
  434. * and total_time_running when the event is in INACTIVE or
  435. * ACTIVE state, measured in nanoseconds from an arbitrary point
  436. * in time.
  437. * tstamp_enabled: the notional time when the event was enabled
  438. * tstamp_running: the notional time when the event was scheduled on
  439. * tstamp_stopped: in INACTIVE state, the notional time when the
  440. * event was scheduled off.
  441. */
  442. u64 tstamp_enabled;
  443. u64 tstamp_running;
  444. u64 tstamp_stopped;
  445. /*
  446. * timestamp shadows the actual context timing but it can
  447. * be safely used in NMI interrupt context. It reflects the
  448. * context time as it was when the event was last scheduled in.
  449. *
  450. * ctx_time already accounts for ctx->timestamp. Therefore to
  451. * compute ctx_time for a sample, simply add perf_clock().
  452. */
  453. u64 shadow_ctx_time;
  454. struct perf_event_attr attr;
  455. u16 header_size;
  456. u16 id_header_size;
  457. u16 read_size;
  458. struct hw_perf_event hw;
  459. struct perf_event_context *ctx;
  460. atomic_long_t refcount;
  461. /*
  462. * These accumulate total time (in nanoseconds) that children
  463. * events have been enabled and running, respectively.
  464. */
  465. atomic64_t child_total_time_enabled;
  466. atomic64_t child_total_time_running;
  467. /*
  468. * Protect attach/detach and child_list:
  469. */
  470. struct mutex child_mutex;
  471. struct list_head child_list;
  472. struct perf_event *parent;
  473. int oncpu;
  474. int cpu;
  475. struct list_head owner_entry;
  476. struct task_struct *owner;
  477. /* mmap bits */
  478. struct mutex mmap_mutex;
  479. atomic_t mmap_count;
  480. struct ring_buffer *rb;
  481. struct list_head rb_entry;
  482. unsigned long rcu_batches;
  483. int rcu_pending;
  484. /* poll related */
  485. wait_queue_head_t waitq;
  486. struct fasync_struct *fasync;
  487. /* delayed work for NMIs and such */
  488. int pending_wakeup;
  489. int pending_kill;
  490. int pending_disable;
  491. struct irq_work pending;
  492. atomic_t event_limit;
  493. void (*destroy)(struct perf_event *);
  494. struct rcu_head rcu_head;
  495. struct pid_namespace *ns;
  496. u64 id;
  497. u64 (*clock)(void);
  498. perf_overflow_handler_t overflow_handler;
  499. void *overflow_handler_context;
  500. #ifdef CONFIG_EVENT_TRACING
  501. struct trace_event_call *tp_event;
  502. struct event_filter *filter;
  503. #ifdef CONFIG_FUNCTION_TRACER
  504. struct ftrace_ops ftrace_ops;
  505. #endif
  506. #endif
  507. #ifdef CONFIG_CGROUP_PERF
  508. struct perf_cgroup *cgrp; /* cgroup event is attach to */
  509. int cgrp_defer_enabled;
  510. #endif
  511. #endif /* CONFIG_PERF_EVENTS */
  512. };
  513. /**
  514. * struct perf_event_context - event context structure
  515. *
  516. * Used as a container for task events and CPU events as well:
  517. */
  518. struct perf_event_context {
  519. struct pmu *pmu;
  520. /*
  521. * Protect the states of the events in the list,
  522. * nr_active, and the list:
  523. */
  524. raw_spinlock_t lock;
  525. /*
  526. * Protect the list of events. Locking either mutex or lock
  527. * is sufficient to ensure the list doesn't change; to change
  528. * the list you need to lock both the mutex and the spinlock.
  529. */
  530. struct mutex mutex;
  531. struct list_head active_ctx_list;
  532. struct list_head pinned_groups;
  533. struct list_head flexible_groups;
  534. struct list_head event_list;
  535. int nr_events;
  536. int nr_active;
  537. int is_active;
  538. int nr_stat;
  539. int nr_freq;
  540. int rotate_disable;
  541. atomic_t refcount;
  542. struct task_struct *task;
  543. /*
  544. * Context clock, runs when context enabled.
  545. */
  546. u64 time;
  547. u64 timestamp;
  548. /*
  549. * These fields let us detect when two contexts have both
  550. * been cloned (inherited) from a common ancestor.
  551. */
  552. struct perf_event_context *parent_ctx;
  553. u64 parent_gen;
  554. u64 generation;
  555. int pin_count;
  556. int nr_cgroups; /* cgroup evts */
  557. void *task_ctx_data; /* pmu specific data */
  558. struct rcu_head rcu_head;
  559. };
  560. /*
  561. * Number of contexts where an event can trigger:
  562. * task, softirq, hardirq, nmi.
  563. */
  564. #define PERF_NR_CONTEXTS 4
  565. /**
  566. * struct perf_event_cpu_context - per cpu event context structure
  567. */
  568. struct perf_cpu_context {
  569. struct perf_event_context ctx;
  570. struct perf_event_context *task_ctx;
  571. int active_oncpu;
  572. int exclusive;
  573. raw_spinlock_t hrtimer_lock;
  574. struct hrtimer hrtimer;
  575. ktime_t hrtimer_interval;
  576. unsigned int hrtimer_active;
  577. struct pmu *unique_pmu;
  578. struct perf_cgroup *cgrp;
  579. };
  580. struct perf_output_handle {
  581. struct perf_event *event;
  582. struct ring_buffer *rb;
  583. unsigned long wakeup;
  584. unsigned long size;
  585. union {
  586. void *addr;
  587. unsigned long head;
  588. };
  589. int page;
  590. };
  591. #ifdef CONFIG_CGROUP_PERF
  592. /*
  593. * perf_cgroup_info keeps track of time_enabled for a cgroup.
  594. * This is a per-cpu dynamically allocated data structure.
  595. */
  596. struct perf_cgroup_info {
  597. u64 time;
  598. u64 timestamp;
  599. };
  600. struct perf_cgroup {
  601. struct cgroup_subsys_state css;
  602. struct perf_cgroup_info __percpu *info;
  603. };
  604. /*
  605. * Must ensure cgroup is pinned (css_get) before calling
  606. * this function. In other words, we cannot call this function
  607. * if there is no cgroup event for the current CPU context.
  608. */
  609. static inline struct perf_cgroup *
  610. perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
  611. {
  612. return container_of(task_css_check(task, perf_event_cgrp_id,
  613. ctx ? lockdep_is_held(&ctx->lock)
  614. : true),
  615. struct perf_cgroup, css);
  616. }
  617. #endif /* CONFIG_CGROUP_PERF */
  618. #ifdef CONFIG_PERF_EVENTS
  619. extern void *perf_aux_output_begin(struct perf_output_handle *handle,
  620. struct perf_event *event);
  621. extern void perf_aux_output_end(struct perf_output_handle *handle,
  622. unsigned long size, bool truncated);
  623. extern int perf_aux_output_skip(struct perf_output_handle *handle,
  624. unsigned long size);
  625. extern void *perf_get_aux(struct perf_output_handle *handle);
  626. extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
  627. extern void perf_pmu_unregister(struct pmu *pmu);
  628. extern int perf_num_counters(void);
  629. extern const char *perf_pmu_name(void);
  630. extern void __perf_event_task_sched_in(struct task_struct *prev,
  631. struct task_struct *task);
  632. extern void __perf_event_task_sched_out(struct task_struct *prev,
  633. struct task_struct *next);
  634. extern int perf_event_init_task(struct task_struct *child);
  635. extern void perf_event_exit_task(struct task_struct *child);
  636. extern void perf_event_free_task(struct task_struct *task);
  637. extern void perf_event_delayed_put(struct task_struct *task);
  638. extern struct file *perf_event_get(unsigned int fd);
  639. extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
  640. extern void perf_event_print_debug(void);
  641. extern void perf_pmu_disable(struct pmu *pmu);
  642. extern void perf_pmu_enable(struct pmu *pmu);
  643. extern void perf_sched_cb_dec(struct pmu *pmu);
  644. extern void perf_sched_cb_inc(struct pmu *pmu);
  645. extern int perf_event_task_disable(void);
  646. extern int perf_event_task_enable(void);
  647. extern int perf_event_refresh(struct perf_event *event, int refresh);
  648. extern void perf_event_update_userpage(struct perf_event *event);
  649. extern int perf_event_release_kernel(struct perf_event *event);
  650. extern struct perf_event *
  651. perf_event_create_kernel_counter(struct perf_event_attr *attr,
  652. int cpu,
  653. struct task_struct *task,
  654. perf_overflow_handler_t callback,
  655. void *context);
  656. extern void perf_pmu_migrate_context(struct pmu *pmu,
  657. int src_cpu, int dst_cpu);
  658. extern u64 perf_event_read_local(struct perf_event *event);
  659. extern u64 perf_event_read_value(struct perf_event *event,
  660. u64 *enabled, u64 *running);
  661. struct perf_sample_data {
  662. /*
  663. * Fields set by perf_sample_data_init(), group so as to
  664. * minimize the cachelines touched.
  665. */
  666. u64 addr;
  667. struct perf_raw_record *raw;
  668. struct perf_branch_stack *br_stack;
  669. u64 period;
  670. u64 weight;
  671. u64 txn;
  672. union perf_mem_data_src data_src;
  673. /*
  674. * The other fields, optionally {set,used} by
  675. * perf_{prepare,output}_sample().
  676. */
  677. u64 type;
  678. u64 ip;
  679. struct {
  680. u32 pid;
  681. u32 tid;
  682. } tid_entry;
  683. u64 time;
  684. u64 id;
  685. u64 stream_id;
  686. struct {
  687. u32 cpu;
  688. u32 reserved;
  689. } cpu_entry;
  690. struct perf_callchain_entry *callchain;
  691. /*
  692. * regs_user may point to task_pt_regs or to regs_user_copy, depending
  693. * on arch details.
  694. */
  695. struct perf_regs regs_user;
  696. struct pt_regs regs_user_copy;
  697. struct perf_regs regs_intr;
  698. u64 stack_user_size;
  699. } ____cacheline_aligned;
  700. /* default value for data source */
  701. #define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\
  702. PERF_MEM_S(LVL, NA) |\
  703. PERF_MEM_S(SNOOP, NA) |\
  704. PERF_MEM_S(LOCK, NA) |\
  705. PERF_MEM_S(TLB, NA))
  706. static inline void perf_sample_data_init(struct perf_sample_data *data,
  707. u64 addr, u64 period)
  708. {
  709. /* remaining struct members initialized in perf_prepare_sample() */
  710. data->addr = addr;
  711. data->raw = NULL;
  712. data->br_stack = NULL;
  713. data->period = period;
  714. data->weight = 0;
  715. data->data_src.val = PERF_MEM_NA;
  716. data->txn = 0;
  717. }
  718. extern void perf_output_sample(struct perf_output_handle *handle,
  719. struct perf_event_header *header,
  720. struct perf_sample_data *data,
  721. struct perf_event *event);
  722. extern void perf_prepare_sample(struct perf_event_header *header,
  723. struct perf_sample_data *data,
  724. struct perf_event *event,
  725. struct pt_regs *regs);
  726. extern int perf_event_overflow(struct perf_event *event,
  727. struct perf_sample_data *data,
  728. struct pt_regs *regs);
  729. extern void perf_event_output(struct perf_event *event,
  730. struct perf_sample_data *data,
  731. struct pt_regs *regs);
  732. extern void
  733. perf_event_header__init_id(struct perf_event_header *header,
  734. struct perf_sample_data *data,
  735. struct perf_event *event);
  736. extern void
  737. perf_event__output_id_sample(struct perf_event *event,
  738. struct perf_output_handle *handle,
  739. struct perf_sample_data *sample);
  740. extern void
  741. perf_log_lost_samples(struct perf_event *event, u64 lost);
  742. static inline bool is_sampling_event(struct perf_event *event)
  743. {
  744. return event->attr.sample_period != 0;
  745. }
  746. /*
  747. * Return 1 for a software event, 0 for a hardware event
  748. */
  749. static inline int is_software_event(struct perf_event *event)
  750. {
  751. return event->pmu->task_ctx_nr == perf_sw_context;
  752. }
  753. extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
  754. extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
  755. extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
  756. #ifndef perf_arch_fetch_caller_regs
  757. static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
  758. #endif
  759. /*
  760. * Take a snapshot of the regs. Skip ip and frame pointer to
  761. * the nth caller. We only need a few of the regs:
  762. * - ip for PERF_SAMPLE_IP
  763. * - cs for user_mode() tests
  764. * - bp for callchains
  765. * - eflags, for future purposes, just in case
  766. */
  767. static inline void perf_fetch_caller_regs(struct pt_regs *regs)
  768. {
  769. memset(regs, 0, sizeof(*regs));
  770. perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
  771. }
  772. static __always_inline void
  773. perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
  774. {
  775. if (static_key_false(&perf_swevent_enabled[event_id]))
  776. __perf_sw_event(event_id, nr, regs, addr);
  777. }
  778. DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
  779. /*
  780. * 'Special' version for the scheduler, it hard assumes no recursion,
  781. * which is guaranteed by us not actually scheduling inside other swevents
  782. * because those disable preemption.
  783. */
  784. static __always_inline void
  785. perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
  786. {
  787. if (static_key_false(&perf_swevent_enabled[event_id])) {
  788. struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
  789. perf_fetch_caller_regs(regs);
  790. ___perf_sw_event(event_id, nr, regs, addr);
  791. }
  792. }
  793. extern struct static_key_false perf_sched_events;
  794. static __always_inline bool
  795. perf_sw_migrate_enabled(void)
  796. {
  797. if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
  798. return true;
  799. return false;
  800. }
  801. static inline void perf_event_task_migrate(struct task_struct *task)
  802. {
  803. if (perf_sw_migrate_enabled())
  804. task->sched_migrated = 1;
  805. }
  806. static inline void perf_event_task_sched_in(struct task_struct *prev,
  807. struct task_struct *task)
  808. {
  809. if (static_branch_unlikely(&perf_sched_events))
  810. __perf_event_task_sched_in(prev, task);
  811. if (perf_sw_migrate_enabled() && task->sched_migrated) {
  812. struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
  813. perf_fetch_caller_regs(regs);
  814. ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
  815. task->sched_migrated = 0;
  816. }
  817. }
  818. static inline void perf_event_task_sched_out(struct task_struct *prev,
  819. struct task_struct *next)
  820. {
  821. perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
  822. if (static_branch_unlikely(&perf_sched_events))
  823. __perf_event_task_sched_out(prev, next);
  824. }
  825. static inline u64 __perf_event_count(struct perf_event *event)
  826. {
  827. return local64_read(&event->count) + atomic64_read(&event->child_count);
  828. }
  829. extern void perf_event_mmap(struct vm_area_struct *vma);
  830. extern struct perf_guest_info_callbacks *perf_guest_cbs;
  831. extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
  832. extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
  833. extern void perf_event_exec(void);
  834. extern void perf_event_comm(struct task_struct *tsk, bool exec);
  835. extern void perf_event_fork(struct task_struct *tsk);
  836. /* Callchains */
  837. DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
  838. extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs);
  839. extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs);
  840. static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
  841. {
  842. if (entry->nr < PERF_MAX_STACK_DEPTH)
  843. entry->ip[entry->nr++] = ip;
  844. }
  845. extern int sysctl_perf_event_paranoid;
  846. extern int sysctl_perf_event_mlock;
  847. extern int sysctl_perf_event_sample_rate;
  848. extern int sysctl_perf_cpu_time_max_percent;
  849. extern void perf_sample_event_took(u64 sample_len_ns);
  850. extern int perf_proc_update_handler(struct ctl_table *table, int write,
  851. void __user *buffer, size_t *lenp,
  852. loff_t *ppos);
  853. extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
  854. void __user *buffer, size_t *lenp,
  855. loff_t *ppos);
  856. static inline bool perf_paranoid_tracepoint_raw(void)
  857. {
  858. return sysctl_perf_event_paranoid > -1;
  859. }
  860. static inline bool perf_paranoid_cpu(void)
  861. {
  862. return sysctl_perf_event_paranoid > 0;
  863. }
  864. static inline bool perf_paranoid_kernel(void)
  865. {
  866. return sysctl_perf_event_paranoid > 1;
  867. }
  868. extern void perf_event_init(void);
  869. extern void perf_tp_event(u64 addr, u64 count, void *record,
  870. int entry_size, struct pt_regs *regs,
  871. struct hlist_head *head, int rctx,
  872. struct task_struct *task);
  873. extern void perf_bp_event(struct perf_event *event, void *data);
  874. #ifndef perf_misc_flags
  875. # define perf_misc_flags(regs) \
  876. (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
  877. # define perf_instruction_pointer(regs) instruction_pointer(regs)
  878. #endif
  879. static inline bool has_branch_stack(struct perf_event *event)
  880. {
  881. return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
  882. }
  883. static inline bool needs_branch_stack(struct perf_event *event)
  884. {
  885. return event->attr.branch_sample_type != 0;
  886. }
  887. static inline bool has_aux(struct perf_event *event)
  888. {
  889. return event->pmu->setup_aux;
  890. }
  891. extern int perf_output_begin(struct perf_output_handle *handle,
  892. struct perf_event *event, unsigned int size);
  893. extern void perf_output_end(struct perf_output_handle *handle);
  894. extern unsigned int perf_output_copy(struct perf_output_handle *handle,
  895. const void *buf, unsigned int len);
  896. extern unsigned int perf_output_skip(struct perf_output_handle *handle,
  897. unsigned int len);
  898. extern int perf_swevent_get_recursion_context(void);
  899. extern void perf_swevent_put_recursion_context(int rctx);
  900. extern u64 perf_swevent_set_period(struct perf_event *event);
  901. extern void perf_event_enable(struct perf_event *event);
  902. extern void perf_event_disable(struct perf_event *event);
  903. extern void perf_event_disable_local(struct perf_event *event);
  904. extern void perf_event_task_tick(void);
  905. #else /* !CONFIG_PERF_EVENTS: */
  906. static inline void *
  907. perf_aux_output_begin(struct perf_output_handle *handle,
  908. struct perf_event *event) { return NULL; }
  909. static inline void
  910. perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
  911. bool truncated) { }
  912. static inline int
  913. perf_aux_output_skip(struct perf_output_handle *handle,
  914. unsigned long size) { return -EINVAL; }
  915. static inline void *
  916. perf_get_aux(struct perf_output_handle *handle) { return NULL; }
  917. static inline void
  918. perf_event_task_migrate(struct task_struct *task) { }
  919. static inline void
  920. perf_event_task_sched_in(struct task_struct *prev,
  921. struct task_struct *task) { }
  922. static inline void
  923. perf_event_task_sched_out(struct task_struct *prev,
  924. struct task_struct *next) { }
  925. static inline int perf_event_init_task(struct task_struct *child) { return 0; }
  926. static inline void perf_event_exit_task(struct task_struct *child) { }
  927. static inline void perf_event_free_task(struct task_struct *task) { }
  928. static inline void perf_event_delayed_put(struct task_struct *task) { }
  929. static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); }
  930. static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
  931. {
  932. return ERR_PTR(-EINVAL);
  933. }
  934. static inline u64 perf_event_read_local(struct perf_event *event) { return -EINVAL; }
  935. static inline void perf_event_print_debug(void) { }
  936. static inline int perf_event_task_disable(void) { return -EINVAL; }
  937. static inline int perf_event_task_enable(void) { return -EINVAL; }
  938. static inline int perf_event_refresh(struct perf_event *event, int refresh)
  939. {
  940. return -EINVAL;
  941. }
  942. static inline void
  943. perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { }
  944. static inline void
  945. perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { }
  946. static inline void
  947. perf_bp_event(struct perf_event *event, void *data) { }
  948. static inline int perf_register_guest_info_callbacks
  949. (struct perf_guest_info_callbacks *callbacks) { return 0; }
  950. static inline int perf_unregister_guest_info_callbacks
  951. (struct perf_guest_info_callbacks *callbacks) { return 0; }
  952. static inline void perf_event_mmap(struct vm_area_struct *vma) { }
  953. static inline void perf_event_exec(void) { }
  954. static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
  955. static inline void perf_event_fork(struct task_struct *tsk) { }
  956. static inline void perf_event_init(void) { }
  957. static inline int perf_swevent_get_recursion_context(void) { return -1; }
  958. static inline void perf_swevent_put_recursion_context(int rctx) { }
  959. static inline u64 perf_swevent_set_period(struct perf_event *event) { return 0; }
  960. static inline void perf_event_enable(struct perf_event *event) { }
  961. static inline void perf_event_disable(struct perf_event *event) { }
  962. static inline int __perf_event_disable(void *info) { return -1; }
  963. static inline void perf_event_task_tick(void) { }
  964. static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
  965. #endif
  966. #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
  967. extern void perf_restore_debug_store(void);
  968. #else
  969. static inline void perf_restore_debug_store(void) { }
  970. #endif
  971. #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
  972. /*
  973. * This has to have a higher priority than migration_notifier in sched/core.c.
  974. */
  975. #define perf_cpu_notifier(fn) \
  976. do { \
  977. static struct notifier_block fn##_nb = \
  978. { .notifier_call = fn, .priority = CPU_PRI_PERF }; \
  979. unsigned long cpu = smp_processor_id(); \
  980. unsigned long flags; \
  981. \
  982. cpu_notifier_register_begin(); \
  983. fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \
  984. (void *)(unsigned long)cpu); \
  985. local_irq_save(flags); \
  986. fn(&fn##_nb, (unsigned long)CPU_STARTING, \
  987. (void *)(unsigned long)cpu); \
  988. local_irq_restore(flags); \
  989. fn(&fn##_nb, (unsigned long)CPU_ONLINE, \
  990. (void *)(unsigned long)cpu); \
  991. __register_cpu_notifier(&fn##_nb); \
  992. cpu_notifier_register_done(); \
  993. } while (0)
  994. /*
  995. * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the
  996. * callback for already online CPUs.
  997. */
  998. #define __perf_cpu_notifier(fn) \
  999. do { \
  1000. static struct notifier_block fn##_nb = \
  1001. { .notifier_call = fn, .priority = CPU_PRI_PERF }; \
  1002. \
  1003. __register_cpu_notifier(&fn##_nb); \
  1004. } while (0)
  1005. struct perf_pmu_events_attr {
  1006. struct device_attribute attr;
  1007. u64 id;
  1008. const char *event_str;
  1009. };
  1010. ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
  1011. char *page);
  1012. #define PMU_EVENT_ATTR(_name, _var, _id, _show) \
  1013. static struct perf_pmu_events_attr _var = { \
  1014. .attr = __ATTR(_name, 0444, _show, NULL), \
  1015. .id = _id, \
  1016. };
  1017. #define PMU_EVENT_ATTR_STRING(_name, _var, _str) \
  1018. static struct perf_pmu_events_attr _var = { \
  1019. .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
  1020. .id = 0, \
  1021. .event_str = _str, \
  1022. };
  1023. #define PMU_FORMAT_ATTR(_name, _format) \
  1024. static ssize_t \
  1025. _name##_show(struct device *dev, \
  1026. struct device_attribute *attr, \
  1027. char *page) \
  1028. { \
  1029. BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
  1030. return sprintf(page, _format "\n"); \
  1031. } \
  1032. \
  1033. static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
  1034. #endif /* _LINUX_PERF_EVENT_H */