|
@@ -0,0 +1,442 @@
|
|
|
+perf.data format
|
|
|
+
|
|
|
+Uptodate as of v4.7
|
|
|
+
|
|
|
+This document describes the on-disk perf.data format, generated by perf record
|
|
|
+or perf inject and consumed by the other perf tools.
|
|
|
+
|
|
|
+On a high level perf.data contains the events generated by the PMUs, plus metadata.
|
|
|
+
|
|
|
+All fields are in native-endian of the machine that generated the perf.data.
|
|
|
+
|
|
|
+When perf is writing to a pipe it uses a special version of the file
|
|
|
+format that does not rely on seeking to adjust data offsets. This
|
|
|
+format is not described here. The pipe version can be converted to
|
|
|
+normal perf.data with perf inject.
|
|
|
+
|
|
|
+The file starts with a perf_header:
|
|
|
+
|
|
|
+struct perf_header {
|
|
|
+ char magic[8]; /* PERFILE2 */
|
|
|
+ uint64_t size; /* size of the header */
|
|
|
+ uint64_t attr_size; /* size of an attribute in attrs */
|
|
|
+ struct perf_file_section attrs;
|
|
|
+ struct perf_file_section data;
|
|
|
+ struct perf_file_section event_types;
|
|
|
+ uint64_t flags;
|
|
|
+ uint64_t flags1[3];
|
|
|
+};
|
|
|
+
|
|
|
+The magic number identifies the perf file and the version. Current perf versions
|
|
|
+use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1
|
|
|
+is not described here. The magic number also identifies the endian. When the
|
|
|
+magic value is 64bit byte swapped compared the file is in non-native
|
|
|
+endian.
|
|
|
+
|
|
|
+A perf_file_section contains a pointer to another section of the perf file.
|
|
|
+The header contains three such pointers: for attributes, data and event types.
|
|
|
+
|
|
|
+struct perf_file_section {
|
|
|
+ uint64_t offset; /* offset from start of file */
|
|
|
+ uint64_t size; /* size of the section */
|
|
|
+};
|
|
|
+
|
|
|
+Flags section:
|
|
|
+
|
|
|
+The header is followed by different optional headers, described by the bits set
|
|
|
+in flags. Only headers for which the bit is set are included. Each header
|
|
|
+consists of a perf_file_section located after the initial header.
|
|
|
+The respective perf_file_section points to the data of the additional
|
|
|
+header and defines its size.
|
|
|
+
|
|
|
+Some headers consist of strings, which are defined like this:
|
|
|
+
|
|
|
+struct perf_header_string {
|
|
|
+ uint32_t len;
|
|
|
+ char string[len]; /* zero terminated */
|
|
|
+};
|
|
|
+
|
|
|
+Some headers consist of a sequence of strings, which start with a
|
|
|
+
|
|
|
+struct perf_header_string_list {
|
|
|
+ uint32_t nr;
|
|
|
+ struct perf_header_string strings[nr]; /* variable length records */
|
|
|
+};
|
|
|
+
|
|
|
+The bits are the flags bits in a 256 bit bitmap starting with
|
|
|
+flags. These define the valid bits:
|
|
|
+
|
|
|
+ HEADER_RESERVED = 0, /* always cleared */
|
|
|
+ HEADER_FIRST_FEATURE = 1,
|
|
|
+ HEADER_TRACING_DATA = 1,
|
|
|
+
|
|
|
+Describe me.
|
|
|
+
|
|
|
+ HEADER_BUILD_ID = 2,
|
|
|
+
|
|
|
+The header consists of an sequence of build_id_event. The size of each record
|
|
|
+is defined by header.size (see perf_event.h). Each event defines a ELF build id
|
|
|
+for a executable file name for a pid. An ELF build id is a unique identifier
|
|
|
+assigned by the linker to an executable.
|
|
|
+
|
|
|
+struct build_id_event {
|
|
|
+ struct perf_event_header header;
|
|
|
+ pid_t pid;
|
|
|
+ uint8_t build_id[24];
|
|
|
+ char filename[header.size - offsetof(struct build_id_event, filename)];
|
|
|
+};
|
|
|
+
|
|
|
+ HEADER_HOSTNAME = 3,
|
|
|
+
|
|
|
+A perf_header_string with the hostname where the data was collected
|
|
|
+(uname -n)
|
|
|
+
|
|
|
+ HEADER_OSRELEASE = 4,
|
|
|
+
|
|
|
+A perf_header_string with the os release where the data was collected
|
|
|
+(uname -r)
|
|
|
+
|
|
|
+ HEADER_VERSION = 5,
|
|
|
+
|
|
|
+A perf_header_string with the perf user tool version where the
|
|
|
+data was collected. This is the same as the version of the source tree
|
|
|
+the perf tool was built from.
|
|
|
+
|
|
|
+ HEADER_ARCH = 6,
|
|
|
+
|
|
|
+A perf_header_string with the CPU architecture (uname -m)
|
|
|
+
|
|
|
+ HEADER_NRCPUS = 7,
|
|
|
+
|
|
|
+A structure defining the number of CPUs.
|
|
|
+
|
|
|
+struct nr_cpus {
|
|
|
+ uint32_t nr_cpus_online;
|
|
|
+ uint32_t nr_cpus_available; /* CPUs not yet onlined */
|
|
|
+};
|
|
|
+
|
|
|
+ HEADER_CPUDESC = 8,
|
|
|
+
|
|
|
+A perf_header_string with description of the CPU. On x86 this is the model name
|
|
|
+in /proc/cpuinfo
|
|
|
+
|
|
|
+ HEADER_CPUID = 9,
|
|
|
+
|
|
|
+A perf_header_string with the exact CPU type. On x86 this is
|
|
|
+vendor,family,model,stepping. For example: GenuineIntel,6,69,1
|
|
|
+
|
|
|
+ HEADER_TOTAL_MEM = 10,
|
|
|
+
|
|
|
+An uint64_t with the total memory in bytes.
|
|
|
+
|
|
|
+ HEADER_CMDLINE = 11,
|
|
|
+
|
|
|
+A perf_header_string with the perf command line used to collect the data.
|
|
|
+
|
|
|
+ HEADER_EVENT_DESC = 12,
|
|
|
+
|
|
|
+Another description of the perf_event_attrs, more detailed than header.attrs
|
|
|
+including IDs and names. See perf_event.h or the man page for a description
|
|
|
+of a struct perf_event_attr.
|
|
|
+
|
|
|
+struct {
|
|
|
+ uint32_t nr; /* number of events */
|
|
|
+ uint32_t attr_size; /* size of each perf_event_attr */
|
|
|
+ struct {
|
|
|
+ struct perf_event_attr attr; /* size of attr_size */
|
|
|
+ uint32_t nr_ids;
|
|
|
+ struct perf_header_string event_string;
|
|
|
+ uint64_t ids[nr_ids];
|
|
|
+ } events[nr]; /* Variable length records */
|
|
|
+};
|
|
|
+
|
|
|
+ HEADER_CPU_TOPOLOGY = 13,
|
|
|
+
|
|
|
+String lists defining the core and CPU threads topology.
|
|
|
+
|
|
|
+struct {
|
|
|
+ struct perf_header_string_list cores; /* Variable length */
|
|
|
+ struct perf_header_string_list threads; /* Variable length */
|
|
|
+};
|
|
|
+
|
|
|
+Example:
|
|
|
+ sibling cores : 0-3
|
|
|
+ sibling threads : 0-1
|
|
|
+ sibling threads : 2-3
|
|
|
+
|
|
|
+ HEADER_NUMA_TOPOLOGY = 14,
|
|
|
+
|
|
|
+ A list of NUMA node descriptions
|
|
|
+
|
|
|
+struct {
|
|
|
+ uint32_t nr;
|
|
|
+ struct {
|
|
|
+ uint32_t nodenr;
|
|
|
+ uint64_t mem_total;
|
|
|
+ uint64_t mem_free;
|
|
|
+ struct perf_header_string cpus;
|
|
|
+ } nodes[nr]; /* Variable length records */
|
|
|
+};
|
|
|
+
|
|
|
+ HEADER_BRANCH_STACK = 15,
|
|
|
+
|
|
|
+Not implemented in perf.
|
|
|
+
|
|
|
+ HEADER_PMU_MAPPINGS = 16,
|
|
|
+
|
|
|
+ A list of PMU structures, defining the different PMUs supported by perf.
|
|
|
+
|
|
|
+struct {
|
|
|
+ uint32_t nr;
|
|
|
+ struct pmu {
|
|
|
+ uint32_t pmu_type;
|
|
|
+ struct perf_header_string pmu_name;
|
|
|
+ } [nr]; /* Variable length records */
|
|
|
+};
|
|
|
+
|
|
|
+ HEADER_GROUP_DESC = 17,
|
|
|
+
|
|
|
+ Description of counter groups ({...} in perf syntax)
|
|
|
+
|
|
|
+struct {
|
|
|
+ uint32_t nr;
|
|
|
+ struct {
|
|
|
+ struct perf_header_string string;
|
|
|
+ uint32_t leader_idx;
|
|
|
+ uint32_t nr_members;
|
|
|
+ } [nr]; /* Variable length records */
|
|
|
+};
|
|
|
+
|
|
|
+ HEADER_AUXTRACE = 18,
|
|
|
+
|
|
|
+Define additional auxtrace areas in the perf.data. auxtrace is used to store
|
|
|
+undecoded hardware tracing information, such as Intel Processor Trace data.
|
|
|
+
|
|
|
+/**
|
|
|
+ * struct auxtrace_index_entry - indexes a AUX area tracing event within a
|
|
|
+ * perf.data file.
|
|
|
+ * @file_offset: offset within the perf.data file
|
|
|
+ * @sz: size of the event
|
|
|
+ */
|
|
|
+struct auxtrace_index_entry {
|
|
|
+ u64 file_offset;
|
|
|
+ u64 sz;
|
|
|
+};
|
|
|
+
|
|
|
+#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
|
|
|
+
|
|
|
+/**
|
|
|
+ * struct auxtrace_index - index of AUX area tracing events within a perf.data
|
|
|
+ * file.
|
|
|
+ * @list: linking a number of arrays of entries
|
|
|
+ * @nr: number of entries
|
|
|
+ * @entries: array of entries
|
|
|
+ */
|
|
|
+struct auxtrace_index {
|
|
|
+ struct list_head list;
|
|
|
+ size_t nr;
|
|
|
+ struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
|
|
|
+};
|
|
|
+
|
|
|
+ other bits are reserved and should ignored for now
|
|
|
+ HEADER_FEAT_BITS = 256,
|
|
|
+
|
|
|
+Attributes
|
|
|
+
|
|
|
+This is an array of perf_event_attrs, each attr_size bytes long, which defines
|
|
|
+each event collected. See perf_event.h or the man page for a detailed
|
|
|
+description.
|
|
|
+
|
|
|
+Data
|
|
|
+
|
|
|
+This section is the bulk of the file. It consist of a stream of perf_events
|
|
|
+describing events. This matches the format generated by the kernel.
|
|
|
+See perf_event.h or the manpage for a detailed description.
|
|
|
+
|
|
|
+Some notes on parsing:
|
|
|
+
|
|
|
+Ordering
|
|
|
+
|
|
|
+The events are not necessarily in time stamp order, as they can be
|
|
|
+collected in parallel on different CPUs. If the events should be
|
|
|
+processed in time order they need to be sorted first. It is possible
|
|
|
+to only do a partial sort using the FINISHED_ROUND event header (see
|
|
|
+below). perf record guarantees that there is no reordering over a
|
|
|
+FINISHED_ROUND.
|
|
|
+
|
|
|
+ID vs IDENTIFIER
|
|
|
+
|
|
|
+When the event stream contains multiple events each event is identified
|
|
|
+by an ID. This can be either through the PERF_SAMPLE_ID or the
|
|
|
+PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
|
|
|
+at a fixed offset from the event header, which allows reliable
|
|
|
+parsing of the header. Relying on ID may be ambigious.
|
|
|
+IDENTIFIER is only supported by newer Linux kernels.
|
|
|
+
|
|
|
+Perf record specific events:
|
|
|
+
|
|
|
+In addition to the kernel generated event types perf record adds its
|
|
|
+own event types (in addition it also synthesizes some kernel events,
|
|
|
+for example MMAP events)
|
|
|
+
|
|
|
+ PERF_RECORD_USER_TYPE_START = 64,
|
|
|
+ PERF_RECORD_HEADER_ATTR = 64,
|
|
|
+
|
|
|
+struct attr_event {
|
|
|
+ struct perf_event_header header;
|
|
|
+ struct perf_event_attr attr;
|
|
|
+ uint64_t id[];
|
|
|
+};
|
|
|
+
|
|
|
+ PERF_RECORD_HEADER_EVENT_TYPE = 65, /* depreceated */
|
|
|
+
|
|
|
+#define MAX_EVENT_NAME 64
|
|
|
+
|
|
|
+struct perf_trace_event_type {
|
|
|
+ uint64_t event_id;
|
|
|
+ char name[MAX_EVENT_NAME];
|
|
|
+};
|
|
|
+
|
|
|
+struct event_type_event {
|
|
|
+ struct perf_event_header header;
|
|
|
+ struct perf_trace_event_type event_type;
|
|
|
+};
|
|
|
+
|
|
|
+
|
|
|
+ PERF_RECORD_HEADER_TRACING_DATA = 66,
|
|
|
+
|
|
|
+Describe me
|
|
|
+
|
|
|
+struct tracing_data_event {
|
|
|
+ struct perf_event_header header;
|
|
|
+ uint32_t size;
|
|
|
+};
|
|
|
+
|
|
|
+ PERF_RECORD_HEADER_BUILD_ID = 67,
|
|
|
+
|
|
|
+Define a ELF build ID for a referenced executable.
|
|
|
+
|
|
|
+ struct build_id_event; /* See above */
|
|
|
+
|
|
|
+ PERF_RECORD_FINISHED_ROUND = 68,
|
|
|
+
|
|
|
+No event reordering over this header. No payload.
|
|
|
+
|
|
|
+ PERF_RECORD_ID_INDEX = 69,
|
|
|
+
|
|
|
+Map event ids to CPUs and TIDs.
|
|
|
+
|
|
|
+struct id_index_entry {
|
|
|
+ uint64_t id;
|
|
|
+ uint64_t idx;
|
|
|
+ uint64_t cpu;
|
|
|
+ uint64_t tid;
|
|
|
+};
|
|
|
+
|
|
|
+struct id_index_event {
|
|
|
+ struct perf_event_header header;
|
|
|
+ uint64_t nr;
|
|
|
+ struct id_index_entry entries[nr];
|
|
|
+};
|
|
|
+
|
|
|
+ PERF_RECORD_AUXTRACE_INFO = 70,
|
|
|
+
|
|
|
+Auxtrace type specific information. Describe me
|
|
|
+
|
|
|
+struct auxtrace_info_event {
|
|
|
+ struct perf_event_header header;
|
|
|
+ uint32_t type;
|
|
|
+ uint32_t reserved__; /* For alignment */
|
|
|
+ uint64_t priv[];
|
|
|
+};
|
|
|
+
|
|
|
+ PERF_RECORD_AUXTRACE = 71,
|
|
|
+
|
|
|
+Defines auxtrace data. Followed by the actual data. The contents of
|
|
|
+the auxtrace data is dependent on the event and the CPU. For example
|
|
|
+for Intel Processor Trace it contains Processor Trace data generated
|
|
|
+by the CPU.
|
|
|
+
|
|
|
+struct auxtrace_event {
|
|
|
+ struct perf_event_header header;
|
|
|
+ uint64_t size;
|
|
|
+ uint64_t offset;
|
|
|
+ uint64_t reference;
|
|
|
+ uint32_t idx;
|
|
|
+ uint32_t tid;
|
|
|
+ uint32_t cpu;
|
|
|
+ uint32_t reserved__; /* For alignment */
|
|
|
+};
|
|
|
+
|
|
|
+struct aux_event {
|
|
|
+ struct perf_event_header header;
|
|
|
+ uint64_t aux_offset;
|
|
|
+ uint64_t aux_size;
|
|
|
+ uint64_t flags;
|
|
|
+};
|
|
|
+
|
|
|
+ PERF_RECORD_AUXTRACE_ERROR = 72,
|
|
|
+
|
|
|
+Describes an error in hardware tracing
|
|
|
+
|
|
|
+enum auxtrace_error_type {
|
|
|
+ PERF_AUXTRACE_ERROR_ITRACE = 1,
|
|
|
+ PERF_AUXTRACE_ERROR_MAX
|
|
|
+};
|
|
|
+
|
|
|
+#define MAX_AUXTRACE_ERROR_MSG 64
|
|
|
+
|
|
|
+struct auxtrace_error_event {
|
|
|
+ struct perf_event_header header;
|
|
|
+ uint32_t type;
|
|
|
+ uint32_t code;
|
|
|
+ uint32_t cpu;
|
|
|
+ uint32_t pid;
|
|
|
+ uint32_t tid;
|
|
|
+ uint32_t reserved__; /* For alignment */
|
|
|
+ uint64_t ip;
|
|
|
+ char msg[MAX_AUXTRACE_ERROR_MSG];
|
|
|
+};
|
|
|
+
|
|
|
+Event types
|
|
|
+
|
|
|
+Define the event attributes with their IDs.
|
|
|
+
|
|
|
+An array bound by the perf_file_section size.
|
|
|
+
|
|
|
+ struct {
|
|
|
+ struct perf_event_attr attr; /* Size defined by header.attr_size */
|
|
|
+ struct perf_file_section ids;
|
|
|
+ }
|
|
|
+
|
|
|
+ids points to a array of uint64_t defining the ids for event attr attr.
|
|
|
+
|
|
|
+References:
|
|
|
+
|
|
|
+include/uapi/linux/perf_event.h
|
|
|
+
|
|
|
+This is the canonical description of the kernel generated perf_events
|
|
|
+and the perf_event_attrs.
|
|
|
+
|
|
|
+perf_events manpage
|
|
|
+
|
|
|
+A manpage describing perf_event and perf_event_attr is here:
|
|
|
+http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html
|
|
|
+This tends to be slightly behind the kernel include, but has better
|
|
|
+descriptions. An (typically older) version of the man page may be
|
|
|
+included with the standard Linux man pages, available with "man
|
|
|
+perf_events"
|
|
|
+
|
|
|
+pmu-tools
|
|
|
+
|
|
|
+https://github.com/andikleen/pmu-tools/tree/master/parser
|
|
|
+
|
|
|
+A definition of the perf.data format in python "construct" format is available
|
|
|
+in pmu-tools parser. This allows to read perf.data from python and dump it.
|
|
|
+
|
|
|
+quipper
|
|
|
+
|
|
|
+The quipper C++ parser is available at
|
|
|
+https://chromium.googlesource.com/chromiumos/platform/chromiumos-wide-profiling/
|
|
|
+Unfortunately this parser tends to be many versions behind and may not be able
|
|
|
+to parse data files generated by recent perf.
|