9 年之前 · cae15db749
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -113,6 +113,7 @@ libperf-y += scripting-engines/
 
				 libperf-$(CONFIG_ZLIB) += zlib.o
			
 
				 libperf-$(CONFIG_LZMA) += lzma.o
			
 
				 libperf-y += demangle-java.o
			
 
				+libperf-y += demangle-rust.o
			
 
				 
			
 
				 ifdef CONFIG_JITDUMP
			
 
				 libperf-$(CONFIG_LIBELF) += jitdump.o
			
--- a/tools/perf/util/demangle-rust.c
+++ b/tools/perf/util/demangle-rust.c
@@ -0,0 +1,269 @@
 
				+#include <string.h>
			
 
				+#include "util.h"
			
 
				+#include "debug.h"
			
 
				+
			
 
				+#include "demangle-rust.h"
			
 
				+
			
 
				+/*
			
 
				+ * Mangled Rust symbols look like this:
			
 
				+ *
			
 
				+ *     _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
			
 
				+ *
			
 
				+ * The original symbol is:
			
 
				+ *
			
 
				+ *     <std::sys::fd::FileDesc as core::ops::Drop>::drop
			
 
				+ *
			
 
				+ * The last component of the path is a 64-bit hash in lowercase hex, prefixed
			
 
				+ * with "h". Rust does not have a global namespace between crates, an illusion
			
 
				+ * which Rust maintains by using the hash to distinguish things that would
			
 
				+ * otherwise have the same symbol.
			
 
				+ *
			
 
				+ * Any path component not starting with a XID_Start character is prefixed with
			
 
				+ * "_".
			
 
				+ *
			
 
				+ * The following escape sequences are used:
			
 
				+ *
			
 
				+ *     ","  =>  $C$
			
 
				+ *     "@"  =>  $SP$
			
 
				+ *     "*"  =>  $BP$
			
 
				+ *     "&"  =>  $RF$
			
 
				+ *     "<"  =>  $LT$
			
 
				+ *     ">"  =>  $GT$
			
 
				+ *     "("  =>  $LP$
			
 
				+ *     ")"  =>  $RP$
			
 
				+ *     " "  =>  $u20$
			
 
				+ *     "'"  =>  $u27$
			
 
				+ *     "["  =>  $u5b$
			
 
				+ *     "]"  =>  $u5d$
			
 
				+ *     "~"  =>  $u7e$
			
 
				+ *
			
 
				+ * A double ".." means "::" and a single "." means "-".
			
 
				+ *
			
 
				+ * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
			
 
				+ */
			
 
				+
			
 
				+static const char *hash_prefix = "::h";
			
 
				+static const size_t hash_prefix_len = 3;
			
 
				+static const size_t hash_len = 16;
			
 
				+
			
 
				+static bool is_prefixed_hash(const char *start);
			
 
				+static bool looks_like_rust(const char *sym, size_t len);
			
 
				+static bool unescape(const char **in, char **out, const char *seq, char value);
			
 
				+
			
 
				+/*
			
 
				+ * INPUT:
			
 
				+ *     sym: symbol that has been through BFD-demangling
			
 
				+ *
			
 
				+ * This function looks for the following indicators:
			
 
				+ *
			
 
				+ *  1. The hash must consist of "h" followed by 16 lowercase hex digits.
			
 
				+ *
			
 
				+ *  2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
			
 
				+ *     hex digits. This is true of 99.9998% of hashes so once in your life you
			
 
				+ *     may see a false negative. The point is to notice path components that
			
 
				+ *     could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
			
 
				+ *     this case a false positive (non-Rust symbol has an important path
			
 
				+ *     component removed because it looks like a Rust hash) is worse than a
			
 
				+ *     false negative (the rare Rust symbol is not demangled) so this sets the
			
 
				+ *     balance in favor of false negatives.
			
 
				+ *
			
 
				+ *  3. There must be no characters other than a-zA-Z0-9 and _.:$
			
 
				+ *
			
 
				+ *  4. There must be no unrecognized $-sign sequences.
			
 
				+ *
			
 
				+ *  5. There must be no sequence of three or more dots in a row ("...").
			
 
				+ */
			
 
				+bool
			
 
				+rust_is_mangled(const char *sym)
			
 
				+{
			
 
				+	size_t len, len_without_hash;
			
 
				+
			
 
				+	if (!sym)
			
 
				+		return false;
			
 
				+
			
 
				+	len = strlen(sym);
			
 
				+	if (len <= hash_prefix_len + hash_len)
			
 
				+		/* Not long enough to contain "::h" + hash + something else */
			
 
				+		return false;
			
 
				+
			
 
				+	len_without_hash = len - (hash_prefix_len + hash_len);
			
 
				+	if (!is_prefixed_hash(sym + len_without_hash))
			
 
				+		return false;
			
 
				+
			
 
				+	return looks_like_rust(sym, len_without_hash);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
			
 
				+ * digits must comprise between 5 and 15 (inclusive) distinct digits.
			
 
				+ */
			
 
				+static bool is_prefixed_hash(const char *str)
			
 
				+{
			
 
				+	const char *end;
			
 
				+	bool seen[16];
			
 
				+	size_t i;
			
 
				+	int count;
			
 
				+
			
 
				+	if (strncmp(str, hash_prefix, hash_prefix_len))
			
 
				+		return false;
			
 
				+	str += hash_prefix_len;
			
 
				+
			
 
				+	memset(seen, false, sizeof(seen));
			
 
				+	for (end = str + hash_len; str < end; str++)
			
 
				+		if (*str >= '0' && *str <= '9')
			
 
				+			seen[*str - '0'] = true;
			
 
				+		else if (*str >= 'a' && *str <= 'f')
			
 
				+			seen[*str - 'a' + 10] = true;
			
 
				+		else
			
 
				+			return false;
			
 
				+
			
 
				+	/* Count how many distinct digits seen */
			
 
				+	count = 0;
			
 
				+	for (i = 0; i < 16; i++)
			
 
				+		if (seen[i])
			
 
				+			count++;
			
 
				+
			
 
				+	return count >= 5 && count <= 15;
			
 
				+}
			
 
				+
			
 
				+static bool looks_like_rust(const char *str, size_t len)
			
 
				+{
			
 
				+	const char *end = str + len;
			
 
				+
			
 
				+	while (str < end)
			
 
				+		switch (*str) {
			
 
				+		case '$':
			
 
				+			if (!strncmp(str, "$C$", 3))
			
 
				+				str += 3;
			
 
				+			else if (!strncmp(str, "$SP$", 4)
			
 
				+					|| !strncmp(str, "$BP$", 4)
			
 
				+					|| !strncmp(str, "$RF$", 4)
			
 
				+					|| !strncmp(str, "$LT$", 4)
			
 
				+					|| !strncmp(str, "$GT$", 4)
			
 
				+					|| !strncmp(str, "$LP$", 4)
			
 
				+					|| !strncmp(str, "$RP$", 4))
			
 
				+				str += 4;
			
 
				+			else if (!strncmp(str, "$u20$", 5)
			
 
				+					|| !strncmp(str, "$u27$", 5)
			
 
				+					|| !strncmp(str, "$u5b$", 5)
			
 
				+					|| !strncmp(str, "$u5d$", 5)
			
 
				+					|| !strncmp(str, "$u7e$", 5))
			
 
				+				str += 5;
			
 
				+			else
			
 
				+				return false;
			
 
				+			break;
			
 
				+		case '.':
			
 
				+			/* Do not allow three or more consecutive dots */
			
 
				+			if (!strncmp(str, "...", 3))
			
 
				+				return false;
			
 
				+			/* Fall through */
			
 
				+		case 'a' ... 'z':
			
 
				+		case 'A' ... 'Z':
			
 
				+		case '0' ... '9':
			
 
				+		case '_':
			
 
				+		case ':':
			
 
				+			str++;
			
 
				+			break;
			
 
				+		default:
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * INPUT:
			
 
				+ *     sym: symbol for which rust_is_mangled(sym) returns true
			
 
				+ *
			
 
				+ * The input is demangled in-place because the mangled name is always longer
			
 
				+ * than the demangled one.
			
 
				+ */
			
 
				+void
			
 
				+rust_demangle_sym(char *sym)
			
 
				+{
			
 
				+	const char *in;
			
 
				+	char *out;
			
 
				+	const char *end;
			
 
				+
			
 
				+	if (!sym)
			
 
				+		return;
			
 
				+
			
 
				+	in = sym;
			
 
				+	out = sym;
			
 
				+	end = sym + strlen(sym) - (hash_prefix_len + hash_len);
			
 
				+
			
 
				+	while (in < end)
			
 
				+		switch (*in) {
			
 
				+		case '$':
			
 
				+			if (!(unescape(&in, &out, "$C$", ',')
			
 
				+					|| unescape(&in, &out, "$SP$", '@')
			
 
				+					|| unescape(&in, &out, "$BP$", '*')
			
 
				+					|| unescape(&in, &out, "$RF$", '&')
			
 
				+					|| unescape(&in, &out, "$LT$", '<')
			
 
				+					|| unescape(&in, &out, "$GT$", '>')
			
 
				+					|| unescape(&in, &out, "$LP$", '(')
			
 
				+					|| unescape(&in, &out, "$RP$", ')')
			
 
				+					|| unescape(&in, &out, "$u20$", ' ')
			
 
				+					|| unescape(&in, &out, "$u27$", '\'')
			
 
				+					|| unescape(&in, &out, "$u5b$", '[')
			
 
				+					|| unescape(&in, &out, "$u5d$", ']')
			
 
				+					|| unescape(&in, &out, "$u7e$", '~'))) {
			
 
				+				pr_err("demangle-rust: unexpected escape sequence");
			
 
				+				goto done;
			
 
				+			}
			
 
				+			break;
			
 
				+		case '_':
			
 
				+			/*
			
 
				+			 * If this is the start of a path component and the next
			
 
				+			 * character is an escape sequence, ignore the
			
 
				+			 * underscore. The mangler inserts an underscore to make
			
 
				+			 * sure the path component begins with a XID_Start
			
 
				+			 * character.
			
 
				+			 */
			
 
				+			if ((in == sym || in[-1] == ':') && in[1] == '$')
			
 
				+				in++;
			
 
				+			else
			
 
				+				*out++ = *in++;
			
 
				+			break;
			
 
				+		case '.':
			
 
				+			if (in[1] == '.') {
			
 
				+				/* ".." becomes "::" */
			
 
				+				*out++ = ':';
			
 
				+				*out++ = ':';
			
 
				+				in += 2;
			
 
				+			} else {
			
 
				+				/* "." becomes "-" */
			
 
				+				*out++ = '-';
			
 
				+				in++;
			
 
				+			}
			
 
				+			break;
			
 
				+		case 'a' ... 'z':
			
 
				+		case 'A' ... 'Z':
			
 
				+		case '0' ... '9':
			
 
				+		case ':':
			
 
				+			*out++ = *in++;
			
 
				+			break;
			
 
				+		default:
			
 
				+			pr_err("demangle-rust: unexpected character '%c' in symbol\n",
			
 
				+				*in);
			
 
				+			goto done;
			
 
				+		}
			
 
				+
			
 
				+done:
			
 
				+	*out = '\0';
			
 
				+}
			
 
				+
			
 
				+static bool unescape(const char **in, char **out, const char *seq, char value)
			
 
				+{
			
 
				+	size_t len = strlen(seq);
			
 
				+
			
 
				+	if (strncmp(*in, seq, len))
			
 
				+		return false;
			
 
				+
			
 
				+	**out = value;
			
 
				+
			
 
				+	*in += len;
			
 
				+	*out += 1;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
--- a/tools/perf/util/demangle-rust.h
+++ b/tools/perf/util/demangle-rust.h
@@ -0,0 +1,7 @@
 
				+#ifndef __PERF_DEMANGLE_RUST
			
 
				+#define __PERF_DEMANGLE_RUST 1
			
 
				+
			
 
				+bool rust_is_mangled(const char *str);
			
 
				+void rust_demangle_sym(char *str);
			
 
				+
			
 
				+#endif /* __PERF_DEMANGLE_RUST */
			
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -7,6 +7,7 @@
 
				 
			
 
				 #include "symbol.h"
			
 
				 #include "demangle-java.h"
			
 
				+#include "demangle-rust.h"
			
 
				 #include "machine.h"
			
 
				 #include "vdso.h"
			
 
				 #include <symbol/kallsyms.h>
			
@@ -1081,6 +1082,13 @@ new_symbol:
 
				 			demangled = bfd_demangle(NULL, elf_name, demangle_flags);
			
 
				 			if (demangled == NULL)
			
 
				 				demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
			
 
				+			else if (rust_is_mangled(demangled))
			
 
				+				/*
			
 
				+				 * Input to Rust demangling is the BFD-demangled
			
 
				+				 * name which it Rust-demangles in place.
			
 
				+				 */
			
 
				+				rust_demangle_sym(demangled);
			
 
				+
			
 
				 			if (demangled != NULL)
			
 
				 				elf_name = demangled;
			
 
				 		}