|
@@ -12,6 +12,7 @@
|
|
|
#include <stdbool.h>
|
|
|
#include <symbol/kallsyms.h>
|
|
|
#include "unwind.h"
|
|
|
+#include "linux/hash.h"
|
|
|
|
|
|
static void dsos__init(struct dsos *dsos)
|
|
|
{
|
|
@@ -1391,7 +1392,11 @@ static int add_callchain_ip(struct thread *thread,
|
|
|
|
|
|
al.filtered = 0;
|
|
|
al.sym = NULL;
|
|
|
- thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
|
|
|
+ if (cpumode == -1)
|
|
|
+ thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
|
|
|
+ ip, &al);
|
|
|
+ else
|
|
|
+ thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
|
|
|
ip, &al);
|
|
|
if (al.sym != NULL) {
|
|
|
if (sort__has_parent && !*parent &&
|
|
@@ -1427,8 +1432,50 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
|
|
|
return bi;
|
|
|
}
|
|
|
|
|
|
+#define CHASHSZ 127
|
|
|
+#define CHASHBITS 7
|
|
|
+#define NO_ENTRY 0xff
|
|
|
+
|
|
|
+#define PERF_MAX_BRANCH_DEPTH 127
|
|
|
+
|
|
|
+/* Remove loops. */
|
|
|
+static int remove_loops(struct branch_entry *l, int nr)
|
|
|
+{
|
|
|
+ int i, j, off;
|
|
|
+ unsigned char chash[CHASHSZ];
|
|
|
+
|
|
|
+ memset(chash, NO_ENTRY, sizeof(chash));
|
|
|
+
|
|
|
+ BUG_ON(PERF_MAX_BRANCH_DEPTH > 255);
|
|
|
+
|
|
|
+ for (i = 0; i < nr; i++) {
|
|
|
+ int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ;
|
|
|
+
|
|
|
+ /* no collision handling for now */
|
|
|
+ if (chash[h] == NO_ENTRY) {
|
|
|
+ chash[h] = i;
|
|
|
+ } else if (l[chash[h]].from == l[i].from) {
|
|
|
+ bool is_loop = true;
|
|
|
+ /* check if it is a real loop */
|
|
|
+ off = 0;
|
|
|
+ for (j = chash[h]; j < i && i + off < nr; j++, off++)
|
|
|
+ if (l[j].from != l[i + off].from) {
|
|
|
+ is_loop = false;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (is_loop) {
|
|
|
+ memmove(l + i, l + i + off,
|
|
|
+ (nr - (i + off)) * sizeof(*l));
|
|
|
+ nr -= off;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return nr;
|
|
|
+}
|
|
|
+
|
|
|
static int thread__resolve_callchain_sample(struct thread *thread,
|
|
|
struct ip_callchain *chain,
|
|
|
+ struct branch_stack *branch,
|
|
|
struct symbol **parent,
|
|
|
struct addr_location *root_al,
|
|
|
int max_stack)
|
|
@@ -1438,22 +1485,82 @@ static int thread__resolve_callchain_sample(struct thread *thread,
|
|
|
int i;
|
|
|
int j;
|
|
|
int err;
|
|
|
- int skip_idx __maybe_unused;
|
|
|
+ int skip_idx = -1;
|
|
|
+ int first_call = 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Based on DWARF debug information, some architectures skip
|
|
|
+ * a callchain entry saved by the kernel.
|
|
|
+ */
|
|
|
+ if (chain->nr < PERF_MAX_STACK_DEPTH)
|
|
|
+ skip_idx = arch_skip_callchain_idx(thread, chain);
|
|
|
|
|
|
callchain_cursor_reset(&callchain_cursor);
|
|
|
|
|
|
+ /*
|
|
|
+ * Add branches to call stack for easier browsing. This gives
|
|
|
+ * more context for a sample than just the callers.
|
|
|
+ *
|
|
|
+ * This uses individual histograms of paths compared to the
|
|
|
+ * aggregated histograms the normal LBR mode uses.
|
|
|
+ *
|
|
|
+ * Limitations for now:
|
|
|
+ * - No extra filters
|
|
|
+ * - No annotations (should annotate somehow)
|
|
|
+ */
|
|
|
+
|
|
|
+ if (branch && callchain_param.branch_callstack) {
|
|
|
+ int nr = min(max_stack, (int)branch->nr);
|
|
|
+ struct branch_entry be[nr];
|
|
|
+
|
|
|
+ if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
|
|
|
+ pr_warning("corrupted branch chain. skipping...\n");
|
|
|
+ goto check_calls;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (i = 0; i < nr; i++) {
|
|
|
+ if (callchain_param.order == ORDER_CALLEE) {
|
|
|
+ be[i] = branch->entries[i];
|
|
|
+ /*
|
|
|
+ * Check for overlap into the callchain.
|
|
|
+ * The return address is one off compared to
|
|
|
+ * the branch entry. To adjust for this
|
|
|
+ * assume the calling instruction is not longer
|
|
|
+ * than 8 bytes.
|
|
|
+ */
|
|
|
+ if (i == skip_idx ||
|
|
|
+ chain->ips[first_call] >= PERF_CONTEXT_MAX)
|
|
|
+ first_call++;
|
|
|
+ else if (be[i].from < chain->ips[first_call] &&
|
|
|
+ be[i].from >= chain->ips[first_call] - 8)
|
|
|
+ first_call++;
|
|
|
+ } else
|
|
|
+ be[i] = branch->entries[branch->nr - i - 1];
|
|
|
+ }
|
|
|
+
|
|
|
+ nr = remove_loops(be, nr);
|
|
|
+
|
|
|
+ for (i = 0; i < nr; i++) {
|
|
|
+ err = add_callchain_ip(thread, parent, root_al,
|
|
|
+ -1, be[i].to);
|
|
|
+ if (!err)
|
|
|
+ err = add_callchain_ip(thread, parent, root_al,
|
|
|
+ -1, be[i].from);
|
|
|
+ if (err == -EINVAL)
|
|
|
+ break;
|
|
|
+ if (err)
|
|
|
+ return err;
|
|
|
+ }
|
|
|
+ chain_nr -= nr;
|
|
|
+ }
|
|
|
+
|
|
|
+check_calls:
|
|
|
if (chain->nr > PERF_MAX_STACK_DEPTH) {
|
|
|
pr_warning("corrupted callchain. skipping...\n");
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
- * Based on DWARF debug information, some architectures skip
|
|
|
- * a callchain entry saved by the kernel.
|
|
|
- */
|
|
|
- skip_idx = arch_skip_callchain_idx(thread, chain);
|
|
|
-
|
|
|
- for (i = 0; i < chain_nr; i++) {
|
|
|
+ for (i = first_call; i < chain_nr; i++) {
|
|
|
u64 ip;
|
|
|
|
|
|
if (callchain_param.order == ORDER_CALLEE)
|
|
@@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread,
|
|
|
int max_stack)
|
|
|
{
|
|
|
int ret = thread__resolve_callchain_sample(thread, sample->callchain,
|
|
|
+ sample->branch_stack,
|
|
|
parent, root_al, max_stack);
|
|
|
if (ret)
|
|
|
return ret;
|