export-to-postgresql.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. # export-to-postgresql.py: export perf data to a postgresql database
  2. # Copyright (c) 2014, Intel Corporation.
  3. #
  4. # This program is free software; you can redistribute it and/or modify it
  5. # under the terms and conditions of the GNU General Public License,
  6. # version 2, as published by the Free Software Foundation.
  7. #
  8. # This program is distributed in the hope it will be useful, but WITHOUT
  9. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  11. # more details.
  12. import os
  13. import sys
  14. import struct
  15. import datetime
  16. from PySide.QtSql import *
  17. # Need to access PostgreSQL C library directly to use COPY FROM STDIN
  18. from ctypes import *
  19. libpq = CDLL("libpq.so.5")
  20. PQconnectdb = libpq.PQconnectdb
  21. PQconnectdb.restype = c_void_p
  22. PQfinish = libpq.PQfinish
  23. PQstatus = libpq.PQstatus
  24. PQexec = libpq.PQexec
  25. PQexec.restype = c_void_p
  26. PQresultStatus = libpq.PQresultStatus
  27. PQputCopyData = libpq.PQputCopyData
  28. PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ]
  29. PQputCopyEnd = libpq.PQputCopyEnd
  30. PQputCopyEnd.argtypes = [ c_void_p, c_void_p ]
  31. sys.path.append(os.environ['PERF_EXEC_PATH'] + \
  32. '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
  33. # These perf imports are not used at present
  34. #from perf_trace_context import *
  35. #from Core import *
  36. perf_db_export_mode = True
  37. perf_db_export_calls = False
  38. def usage():
  39. print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]"
  40. print >> sys.stderr, "where: columns 'all' or 'branches'"
  41. print >> sys.stderr, " calls 'calls' => create calls table"
  42. raise Exception("Too few arguments")
  43. if (len(sys.argv) < 2):
  44. usage()
  45. dbname = sys.argv[1]
  46. if (len(sys.argv) >= 3):
  47. columns = sys.argv[2]
  48. else:
  49. columns = "all"
  50. if columns not in ("all", "branches"):
  51. usage()
  52. branches = (columns == "branches")
  53. if (len(sys.argv) >= 4):
  54. if (sys.argv[3] == "calls"):
  55. perf_db_export_calls = True
  56. else:
  57. usage()
  58. output_dir_name = os.getcwd() + "/" + dbname + "-perf-data"
  59. os.mkdir(output_dir_name)
  60. def do_query(q, s):
  61. if (q.exec_(s)):
  62. return
  63. raise Exception("Query failed: " + q.lastError().text())
  64. print datetime.datetime.today(), "Creating database..."
  65. db = QSqlDatabase.addDatabase('QPSQL')
  66. query = QSqlQuery(db)
  67. db.setDatabaseName('postgres')
  68. db.open()
  69. try:
  70. do_query(query, 'CREATE DATABASE ' + dbname)
  71. except:
  72. os.rmdir(output_dir_name)
  73. raise
  74. query.finish()
  75. query.clear()
  76. db.close()
  77. db.setDatabaseName(dbname)
  78. db.open()
  79. query = QSqlQuery(db)
  80. do_query(query, 'SET client_min_messages TO WARNING')
  81. do_query(query, 'CREATE TABLE selected_events ('
  82. 'id bigint NOT NULL,'
  83. 'name varchar(80))')
  84. do_query(query, 'CREATE TABLE machines ('
  85. 'id bigint NOT NULL,'
  86. 'pid integer,'
  87. 'root_dir varchar(4096))')
  88. do_query(query, 'CREATE TABLE threads ('
  89. 'id bigint NOT NULL,'
  90. 'machine_id bigint,'
  91. 'process_id bigint,'
  92. 'pid integer,'
  93. 'tid integer)')
  94. do_query(query, 'CREATE TABLE comms ('
  95. 'id bigint NOT NULL,'
  96. 'comm varchar(16))')
  97. do_query(query, 'CREATE TABLE comm_threads ('
  98. 'id bigint NOT NULL,'
  99. 'comm_id bigint,'
  100. 'thread_id bigint)')
  101. do_query(query, 'CREATE TABLE dsos ('
  102. 'id bigint NOT NULL,'
  103. 'machine_id bigint,'
  104. 'short_name varchar(256),'
  105. 'long_name varchar(4096),'
  106. 'build_id varchar(64))')
  107. do_query(query, 'CREATE TABLE symbols ('
  108. 'id bigint NOT NULL,'
  109. 'dso_id bigint,'
  110. 'sym_start bigint,'
  111. 'sym_end bigint,'
  112. 'binding integer,'
  113. 'name varchar(2048))')
  114. do_query(query, 'CREATE TABLE branch_types ('
  115. 'id integer NOT NULL,'
  116. 'name varchar(80))')
  117. if branches:
  118. do_query(query, 'CREATE TABLE samples ('
  119. 'id bigint NOT NULL,'
  120. 'evsel_id bigint,'
  121. 'machine_id bigint,'
  122. 'thread_id bigint,'
  123. 'comm_id bigint,'
  124. 'dso_id bigint,'
  125. 'symbol_id bigint,'
  126. 'sym_offset bigint,'
  127. 'ip bigint,'
  128. 'time bigint,'
  129. 'cpu integer,'
  130. 'to_dso_id bigint,'
  131. 'to_symbol_id bigint,'
  132. 'to_sym_offset bigint,'
  133. 'to_ip bigint,'
  134. 'branch_type integer,'
  135. 'in_tx boolean)')
  136. else:
  137. do_query(query, 'CREATE TABLE samples ('
  138. 'id bigint NOT NULL,'
  139. 'evsel_id bigint,'
  140. 'machine_id bigint,'
  141. 'thread_id bigint,'
  142. 'comm_id bigint,'
  143. 'dso_id bigint,'
  144. 'symbol_id bigint,'
  145. 'sym_offset bigint,'
  146. 'ip bigint,'
  147. 'time bigint,'
  148. 'cpu integer,'
  149. 'to_dso_id bigint,'
  150. 'to_symbol_id bigint,'
  151. 'to_sym_offset bigint,'
  152. 'to_ip bigint,'
  153. 'period bigint,'
  154. 'weight bigint,'
  155. 'transaction bigint,'
  156. 'data_src bigint,'
  157. 'branch_type integer,'
  158. 'in_tx boolean)')
  159. if perf_db_export_calls:
  160. do_query(query, 'CREATE TABLE call_paths ('
  161. 'id bigint NOT NULL,'
  162. 'parent_id bigint,'
  163. 'symbol_id bigint,'
  164. 'ip bigint)')
  165. do_query(query, 'CREATE TABLE calls ('
  166. 'id bigint NOT NULL,'
  167. 'thread_id bigint,'
  168. 'comm_id bigint,'
  169. 'call_path_id bigint,'
  170. 'call_time bigint,'
  171. 'return_time bigint,'
  172. 'branch_count bigint,'
  173. 'call_id bigint,'
  174. 'return_id bigint,'
  175. 'parent_call_path_id bigint,'
  176. 'flags integer)')
  177. do_query(query, 'CREATE VIEW samples_view AS '
  178. 'SELECT '
  179. 'id,'
  180. 'time,'
  181. 'cpu,'
  182. '(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
  183. '(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
  184. '(SELECT comm FROM comms WHERE id = comm_id) AS command,'
  185. '(SELECT name FROM selected_events WHERE id = evsel_id) AS event,'
  186. 'to_hex(ip) AS ip_hex,'
  187. '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
  188. 'sym_offset,'
  189. '(SELECT short_name FROM dsos WHERE id = dso_id) AS dso_short_name,'
  190. 'to_hex(to_ip) AS to_ip_hex,'
  191. '(SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol,'
  192. 'to_sym_offset,'
  193. '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,'
  194. '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,'
  195. 'in_tx'
  196. ' FROM samples')
  197. file_header = struct.pack("!11sii", "PGCOPY\n\377\r\n\0", 0, 0)
  198. file_trailer = "\377\377"
  199. def open_output_file(file_name):
  200. path_name = output_dir_name + "/" + file_name
  201. file = open(path_name, "w+")
  202. file.write(file_header)
  203. return file
  204. def close_output_file(file):
  205. file.write(file_trailer)
  206. file.close()
  207. def copy_output_file_direct(file, table_name):
  208. close_output_file(file)
  209. sql = "COPY " + table_name + " FROM '" + file.name + "' (FORMAT 'binary')"
  210. do_query(query, sql)
  211. # Use COPY FROM STDIN because security may prevent postgres from accessing the files directly
  212. def copy_output_file(file, table_name):
  213. conn = PQconnectdb("dbname = " + dbname)
  214. if (PQstatus(conn)):
  215. raise Exception("COPY FROM STDIN PQconnectdb failed")
  216. file.write(file_trailer)
  217. file.seek(0)
  218. sql = "COPY " + table_name + " FROM STDIN (FORMAT 'binary')"
  219. res = PQexec(conn, sql)
  220. if (PQresultStatus(res) != 4):
  221. raise Exception("COPY FROM STDIN PQexec failed")
  222. data = file.read(65536)
  223. while (len(data)):
  224. ret = PQputCopyData(conn, data, len(data))
  225. if (ret != 1):
  226. raise Exception("COPY FROM STDIN PQputCopyData failed, error " + str(ret))
  227. data = file.read(65536)
  228. ret = PQputCopyEnd(conn, None)
  229. if (ret != 1):
  230. raise Exception("COPY FROM STDIN PQputCopyEnd failed, error " + str(ret))
  231. PQfinish(conn)
  232. def remove_output_file(file):
  233. name = file.name
  234. file.close()
  235. os.unlink(name)
  236. evsel_file = open_output_file("evsel_table.bin")
  237. machine_file = open_output_file("machine_table.bin")
  238. thread_file = open_output_file("thread_table.bin")
  239. comm_file = open_output_file("comm_table.bin")
  240. comm_thread_file = open_output_file("comm_thread_table.bin")
  241. dso_file = open_output_file("dso_table.bin")
  242. symbol_file = open_output_file("symbol_table.bin")
  243. branch_type_file = open_output_file("branch_type_table.bin")
  244. sample_file = open_output_file("sample_table.bin")
  245. if perf_db_export_calls:
  246. call_path_file = open_output_file("call_path_table.bin")
  247. call_file = open_output_file("call_table.bin")
  248. def trace_begin():
  249. print datetime.datetime.today(), "Writing to intermediate files..."
  250. # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs
  251. evsel_table(0, "unknown")
  252. machine_table(0, 0, "unknown")
  253. thread_table(0, 0, 0, -1, -1)
  254. comm_table(0, "unknown")
  255. dso_table(0, 0, "unknown", "unknown", "")
  256. symbol_table(0, 0, 0, 0, 0, "unknown")
  257. sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
  258. if perf_db_export_calls:
  259. call_path_table(0, 0, 0, 0)
  260. unhandled_count = 0
  261. def trace_end():
  262. print datetime.datetime.today(), "Copying to database..."
  263. copy_output_file(evsel_file, "selected_events")
  264. copy_output_file(machine_file, "machines")
  265. copy_output_file(thread_file, "threads")
  266. copy_output_file(comm_file, "comms")
  267. copy_output_file(comm_thread_file, "comm_threads")
  268. copy_output_file(dso_file, "dsos")
  269. copy_output_file(symbol_file, "symbols")
  270. copy_output_file(branch_type_file, "branch_types")
  271. copy_output_file(sample_file, "samples")
  272. if perf_db_export_calls:
  273. copy_output_file(call_path_file, "call_paths")
  274. copy_output_file(call_file, "calls")
  275. print datetime.datetime.today(), "Removing intermediate files..."
  276. remove_output_file(evsel_file)
  277. remove_output_file(machine_file)
  278. remove_output_file(thread_file)
  279. remove_output_file(comm_file)
  280. remove_output_file(comm_thread_file)
  281. remove_output_file(dso_file)
  282. remove_output_file(symbol_file)
  283. remove_output_file(branch_type_file)
  284. remove_output_file(sample_file)
  285. if perf_db_export_calls:
  286. remove_output_file(call_path_file)
  287. remove_output_file(call_file)
  288. os.rmdir(output_dir_name)
  289. print datetime.datetime.today(), "Adding primary keys"
  290. do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)')
  291. do_query(query, 'ALTER TABLE machines ADD PRIMARY KEY (id)')
  292. do_query(query, 'ALTER TABLE threads ADD PRIMARY KEY (id)')
  293. do_query(query, 'ALTER TABLE comms ADD PRIMARY KEY (id)')
  294. do_query(query, 'ALTER TABLE comm_threads ADD PRIMARY KEY (id)')
  295. do_query(query, 'ALTER TABLE dsos ADD PRIMARY KEY (id)')
  296. do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)')
  297. do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)')
  298. do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)')
  299. if perf_db_export_calls:
  300. do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)')
  301. do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)')
  302. print datetime.datetime.today(), "Adding foreign keys"
  303. do_query(query, 'ALTER TABLE threads '
  304. 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),'
  305. 'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads (id)')
  306. do_query(query, 'ALTER TABLE comm_threads '
  307. 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
  308. 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id)')
  309. do_query(query, 'ALTER TABLE dsos '
  310. 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id)')
  311. do_query(query, 'ALTER TABLE symbols '
  312. 'ADD CONSTRAINT dsofk FOREIGN KEY (dso_id) REFERENCES dsos (id)')
  313. do_query(query, 'ALTER TABLE samples '
  314. 'ADD CONSTRAINT evselfk FOREIGN KEY (evsel_id) REFERENCES selected_events (id),'
  315. 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),'
  316. 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),'
  317. 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
  318. 'ADD CONSTRAINT dsofk FOREIGN KEY (dso_id) REFERENCES dsos (id),'
  319. 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),'
  320. 'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),'
  321. 'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)')
  322. if perf_db_export_calls:
  323. do_query(query, 'ALTER TABLE call_paths '
  324. 'ADD CONSTRAINT parentfk FOREIGN KEY (parent_id) REFERENCES call_paths (id),'
  325. 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id)')
  326. do_query(query, 'ALTER TABLE calls '
  327. 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),'
  328. 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
  329. 'ADD CONSTRAINT call_pathfk FOREIGN KEY (call_path_id) REFERENCES call_paths (id),'
  330. 'ADD CONSTRAINT callfk FOREIGN KEY (call_id) REFERENCES samples (id),'
  331. 'ADD CONSTRAINT returnfk FOREIGN KEY (return_id) REFERENCES samples (id),'
  332. 'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)')
  333. do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
  334. if (unhandled_count):
  335. print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events"
  336. print datetime.datetime.today(), "Done"
  337. def trace_unhandled(event_name, context, event_fields_dict):
  338. global unhandled_count
  339. unhandled_count += 1
  340. def sched__sched_switch(*x):
  341. pass
  342. def evsel_table(evsel_id, evsel_name, *x):
  343. n = len(evsel_name)
  344. fmt = "!hiqi" + str(n) + "s"
  345. value = struct.pack(fmt, 2, 8, evsel_id, n, evsel_name)
  346. evsel_file.write(value)
  347. def machine_table(machine_id, pid, root_dir, *x):
  348. n = len(root_dir)
  349. fmt = "!hiqiii" + str(n) + "s"
  350. value = struct.pack(fmt, 3, 8, machine_id, 4, pid, n, root_dir)
  351. machine_file.write(value)
  352. def thread_table(thread_id, machine_id, process_id, pid, tid, *x):
  353. value = struct.pack("!hiqiqiqiiii", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid)
  354. thread_file.write(value)
  355. def comm_table(comm_id, comm_str, *x):
  356. n = len(comm_str)
  357. fmt = "!hiqi" + str(n) + "s"
  358. value = struct.pack(fmt, 2, 8, comm_id, n, comm_str)
  359. comm_file.write(value)
  360. def comm_thread_table(comm_thread_id, comm_id, thread_id, *x):
  361. fmt = "!hiqiqiq"
  362. value = struct.pack(fmt, 3, 8, comm_thread_id, 8, comm_id, 8, thread_id)
  363. comm_thread_file.write(value)
  364. def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x):
  365. n1 = len(short_name)
  366. n2 = len(long_name)
  367. n3 = len(build_id)
  368. fmt = "!hiqiqi" + str(n1) + "si" + str(n2) + "si" + str(n3) + "s"
  369. value = struct.pack(fmt, 5, 8, dso_id, 8, machine_id, n1, short_name, n2, long_name, n3, build_id)
  370. dso_file.write(value)
  371. def symbol_table(symbol_id, dso_id, sym_start, sym_end, binding, symbol_name, *x):
  372. n = len(symbol_name)
  373. fmt = "!hiqiqiqiqiii" + str(n) + "s"
  374. value = struct.pack(fmt, 6, 8, symbol_id, 8, dso_id, 8, sym_start, 8, sym_end, 4, binding, n, symbol_name)
  375. symbol_file.write(value)
  376. def branch_type_table(branch_type, name, *x):
  377. n = len(name)
  378. fmt = "!hiii" + str(n) + "s"
  379. value = struct.pack(fmt, 2, 4, branch_type, n, name)
  380. branch_type_file.write(value)
  381. def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x):
  382. if branches:
  383. value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx)
  384. else:
  385. value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx)
  386. sample_file.write(value)
  387. def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
  388. fmt = "!hiqiqiqiq"
  389. value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip)
  390. call_path_file.write(value)
  391. def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, *x):
  392. fmt = "!hiqiqiqiqiqiqiqiqiqiqii"
  393. value = struct.pack(fmt, 11, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags)
  394. call_file.write(value)