|
@@ -95,6 +95,7 @@ struct work_atoms {
|
|
|
u64 total_lat;
|
|
|
u64 nb_atoms;
|
|
|
u64 total_runtime;
|
|
|
+ int num_merged;
|
|
|
};
|
|
|
|
|
|
typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
|
|
@@ -168,9 +169,10 @@ struct perf_sched {
|
|
|
u64 all_runtime;
|
|
|
u64 all_count;
|
|
|
u64 cpu_last_switched[MAX_CPUS];
|
|
|
- struct rb_root atom_root, sorted_atom_root;
|
|
|
+ struct rb_root atom_root, sorted_atom_root, merged_atom_root;
|
|
|
struct list_head sort_list, cmp_pid;
|
|
|
bool force;
|
|
|
+ bool skip_merge;
|
|
|
};
|
|
|
|
|
|
static u64 get_nsecs(void)
|
|
@@ -1182,7 +1184,10 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
|
|
|
sched->all_runtime += work_list->total_runtime;
|
|
|
sched->all_count += work_list->nb_atoms;
|
|
|
|
|
|
- ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
|
|
|
+ if (work_list->num_merged > 1)
|
|
|
+ ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged);
|
|
|
+ else
|
|
|
+ ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
|
|
|
|
|
|
for (i = 0; i < 24 - ret; i++)
|
|
|
printf(" ");
|
|
@@ -1302,17 +1307,22 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
|
|
|
static void perf_sched__sort_lat(struct perf_sched *sched)
|
|
|
{
|
|
|
struct rb_node *node;
|
|
|
-
|
|
|
+ struct rb_root *root = &sched->atom_root;
|
|
|
+again:
|
|
|
for (;;) {
|
|
|
struct work_atoms *data;
|
|
|
- node = rb_first(&sched->atom_root);
|
|
|
+ node = rb_first(root);
|
|
|
if (!node)
|
|
|
break;
|
|
|
|
|
|
- rb_erase(node, &sched->atom_root);
|
|
|
+ rb_erase(node, root);
|
|
|
data = rb_entry(node, struct work_atoms, node);
|
|
|
__thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
|
|
|
}
|
|
|
+ if (root == &sched->atom_root) {
|
|
|
+ root = &sched->merged_atom_root;
|
|
|
+ goto again;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
static int process_sched_wakeup_event(struct perf_tool *tool,
|
|
@@ -1572,6 +1582,59 @@ static void print_bad_events(struct perf_sched *sched)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
|
|
|
+{
|
|
|
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
|
|
|
+ struct work_atoms *this;
|
|
|
+ const char *comm = thread__comm_str(data->thread), *this_comm;
|
|
|
+
|
|
|
+ while (*new) {
|
|
|
+ int cmp;
|
|
|
+
|
|
|
+ this = container_of(*new, struct work_atoms, node);
|
|
|
+ parent = *new;
|
|
|
+
|
|
|
+ this_comm = thread__comm_str(this->thread);
|
|
|
+ cmp = strcmp(comm, this_comm);
|
|
|
+ if (cmp > 0) {
|
|
|
+ new = &((*new)->rb_left);
|
|
|
+ } else if (cmp < 0) {
|
|
|
+ new = &((*new)->rb_right);
|
|
|
+ } else {
|
|
|
+ this->num_merged++;
|
|
|
+ this->total_runtime += data->total_runtime;
|
|
|
+ this->nb_atoms += data->nb_atoms;
|
|
|
+ this->total_lat += data->total_lat;
|
|
|
+ list_splice(&data->work_list, &this->work_list);
|
|
|
+ if (this->max_lat < data->max_lat) {
|
|
|
+ this->max_lat = data->max_lat;
|
|
|
+ this->max_lat_at = data->max_lat_at;
|
|
|
+ }
|
|
|
+ zfree(&data);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ data->num_merged++;
|
|
|
+ rb_link_node(&data->node, parent, new);
|
|
|
+ rb_insert_color(&data->node, root);
|
|
|
+}
|
|
|
+
|
|
|
+static void perf_sched__merge_lat(struct perf_sched *sched)
|
|
|
+{
|
|
|
+ struct work_atoms *data;
|
|
|
+ struct rb_node *node;
|
|
|
+
|
|
|
+ if (sched->skip_merge)
|
|
|
+ return;
|
|
|
+
|
|
|
+ while ((node = rb_first(&sched->atom_root))) {
|
|
|
+ rb_erase(node, &sched->atom_root);
|
|
|
+ data = rb_entry(node, struct work_atoms, node);
|
|
|
+ __merge_work_atoms(&sched->merged_atom_root, data);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static int perf_sched__lat(struct perf_sched *sched)
|
|
|
{
|
|
|
struct rb_node *next;
|
|
@@ -1581,6 +1644,7 @@ static int perf_sched__lat(struct perf_sched *sched)
|
|
|
if (perf_sched__read_events(sched))
|
|
|
return -1;
|
|
|
|
|
|
+ perf_sched__merge_lat(sched);
|
|
|
perf_sched__sort_lat(sched);
|
|
|
|
|
|
printf("\n -----------------------------------------------------------------------------------------------------------------\n");
|
|
@@ -1732,6 +1796,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
|
|
|
.profile_cpu = -1,
|
|
|
.next_shortname1 = 'A',
|
|
|
.next_shortname2 = '0',
|
|
|
+ .skip_merge = 0,
|
|
|
};
|
|
|
const struct option latency_options[] = {
|
|
|
OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
|
|
@@ -1742,6 +1807,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
|
|
|
"CPU to profile on"),
|
|
|
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
|
|
|
"dump raw trace in ASCII"),
|
|
|
+ OPT_BOOLEAN('p', "pids", &sched.skip_merge,
|
|
|
+ "latency stats per pid instead of per comm"),
|
|
|
OPT_END()
|
|
|
};
|
|
|
const struct option replay_options[] = {
|