export-to-postgresql.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. # export-to-postgresql.py: export perf data to a postgresql database
  2. # Copyright (c) 2014, Intel Corporation.
  3. #
  4. # This program is free software; you can redistribute it and/or modify it
  5. # under the terms and conditions of the GNU General Public License,
  6. # version 2, as published by the Free Software Foundation.
  7. #
  8. # This program is distributed in the hope it will be useful, but WITHOUT
  9. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  11. # more details.
  12. import os
  13. import sys
  14. import struct
  15. import datetime
  16. from PySide.QtSql import *
  17. # Need to access PostgreSQL C library directly to use COPY FROM STDIN
  18. from ctypes import *
  19. libpq = CDLL("libpq.so.5")
  20. PQconnectdb = libpq.PQconnectdb
  21. PQconnectdb.restype = c_void_p
  22. PQfinish = libpq.PQfinish
  23. PQstatus = libpq.PQstatus
  24. PQexec = libpq.PQexec
  25. PQexec.restype = c_void_p
  26. PQresultStatus = libpq.PQresultStatus
  27. PQputCopyData = libpq.PQputCopyData
  28. PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ]
  29. PQputCopyEnd = libpq.PQputCopyEnd
  30. PQputCopyEnd.argtypes = [ c_void_p, c_void_p ]
  31. sys.path.append(os.environ['PERF_EXEC_PATH'] + \
  32. '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
  33. # These perf imports are not used at present
  34. #from perf_trace_context import *
  35. #from Core import *
  36. perf_db_export_mode = True
  37. def usage():
  38. print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>]"
  39. print >> sys.stderr, "where: columns 'all' or 'branches'"
  40. raise Exception("Too few arguments")
  41. if (len(sys.argv) < 2):
  42. usage()
  43. dbname = sys.argv[1]
  44. if (len(sys.argv) >= 3):
  45. columns = sys.argv[2]
  46. else:
  47. columns = "all"
  48. if columns not in ("all", "branches"):
  49. usage()
  50. branches = (columns == "branches")
  51. output_dir_name = os.getcwd() + "/" + dbname + "-perf-data"
  52. os.mkdir(output_dir_name)
  53. def do_query(q, s):
  54. if (q.exec_(s)):
  55. return
  56. raise Exception("Query failed: " + q.lastError().text())
  57. print datetime.datetime.today(), "Creating database..."
  58. db = QSqlDatabase.addDatabase('QPSQL')
  59. query = QSqlQuery(db)
  60. db.setDatabaseName('postgres')
  61. db.open()
  62. try:
  63. do_query(query, 'CREATE DATABASE ' + dbname)
  64. except:
  65. os.rmdir(output_dir_name)
  66. raise
  67. query.finish()
  68. query.clear()
  69. db.close()
  70. db.setDatabaseName(dbname)
  71. db.open()
  72. query = QSqlQuery(db)
  73. do_query(query, 'SET client_min_messages TO WARNING')
  74. do_query(query, 'CREATE TABLE selected_events ('
  75. 'id bigint NOT NULL,'
  76. 'name varchar(80))')
  77. do_query(query, 'CREATE TABLE machines ('
  78. 'id bigint NOT NULL,'
  79. 'pid integer,'
  80. 'root_dir varchar(4096))')
  81. do_query(query, 'CREATE TABLE threads ('
  82. 'id bigint NOT NULL,'
  83. 'machine_id bigint,'
  84. 'process_id bigint,'
  85. 'pid integer,'
  86. 'tid integer)')
  87. do_query(query, 'CREATE TABLE comms ('
  88. 'id bigint NOT NULL,'
  89. 'comm varchar(16))')
  90. do_query(query, 'CREATE TABLE comm_threads ('
  91. 'id bigint NOT NULL,'
  92. 'comm_id bigint,'
  93. 'thread_id bigint)')
  94. do_query(query, 'CREATE TABLE dsos ('
  95. 'id bigint NOT NULL,'
  96. 'machine_id bigint,'
  97. 'short_name varchar(256),'
  98. 'long_name varchar(4096),'
  99. 'build_id varchar(64))')
  100. do_query(query, 'CREATE TABLE symbols ('
  101. 'id bigint NOT NULL,'
  102. 'dso_id bigint,'
  103. 'sym_start bigint,'
  104. 'sym_end bigint,'
  105. 'binding integer,'
  106. 'name varchar(2048))')
  107. do_query(query, 'CREATE TABLE branch_types ('
  108. 'id integer NOT NULL,'
  109. 'name varchar(80))')
  110. if branches:
  111. do_query(query, 'CREATE TABLE samples ('
  112. 'id bigint NOT NULL,'
  113. 'evsel_id bigint,'
  114. 'machine_id bigint,'
  115. 'thread_id bigint,'
  116. 'comm_id bigint,'
  117. 'dso_id bigint,'
  118. 'symbol_id bigint,'
  119. 'sym_offset bigint,'
  120. 'ip bigint,'
  121. 'time bigint,'
  122. 'cpu integer,'
  123. 'to_dso_id bigint,'
  124. 'to_symbol_id bigint,'
  125. 'to_sym_offset bigint,'
  126. 'to_ip bigint,'
  127. 'branch_type integer,'
  128. 'in_tx boolean)')
  129. else:
  130. do_query(query, 'CREATE TABLE samples ('
  131. 'id bigint NOT NULL,'
  132. 'evsel_id bigint,'
  133. 'machine_id bigint,'
  134. 'thread_id bigint,'
  135. 'comm_id bigint,'
  136. 'dso_id bigint,'
  137. 'symbol_id bigint,'
  138. 'sym_offset bigint,'
  139. 'ip bigint,'
  140. 'time bigint,'
  141. 'cpu integer,'
  142. 'to_dso_id bigint,'
  143. 'to_symbol_id bigint,'
  144. 'to_sym_offset bigint,'
  145. 'to_ip bigint,'
  146. 'period bigint,'
  147. 'weight bigint,'
  148. 'transaction bigint,'
  149. 'data_src bigint,'
  150. 'branch_type integer,'
  151. 'in_tx boolean)')
  152. do_query(query, 'CREATE VIEW samples_view AS '
  153. 'SELECT '
  154. 'id,'
  155. 'time,'
  156. 'cpu,'
  157. '(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
  158. '(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
  159. '(SELECT comm FROM comms WHERE id = comm_id) AS command,'
  160. '(SELECT name FROM selected_events WHERE id = evsel_id) AS event,'
  161. 'to_hex(ip) AS ip_hex,'
  162. '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
  163. 'sym_offset,'
  164. '(SELECT short_name FROM dsos WHERE id = dso_id) AS dso_short_name,'
  165. 'to_hex(to_ip) AS to_ip_hex,'
  166. '(SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol,'
  167. 'to_sym_offset,'
  168. '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,'
  169. '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,'
  170. 'in_tx'
  171. ' FROM samples')
  172. file_header = struct.pack("!11sii", "PGCOPY\n\377\r\n\0", 0, 0)
  173. file_trailer = "\377\377"
  174. def open_output_file(file_name):
  175. path_name = output_dir_name + "/" + file_name
  176. file = open(path_name, "w+")
  177. file.write(file_header)
  178. return file
  179. def close_output_file(file):
  180. file.write(file_trailer)
  181. file.close()
  182. def copy_output_file_direct(file, table_name):
  183. close_output_file(file)
  184. sql = "COPY " + table_name + " FROM '" + file.name + "' (FORMAT 'binary')"
  185. do_query(query, sql)
  186. # Use COPY FROM STDIN because security may prevent postgres from accessing the files directly
  187. def copy_output_file(file, table_name):
  188. conn = PQconnectdb("dbname = " + dbname)
  189. if (PQstatus(conn)):
  190. raise Exception("COPY FROM STDIN PQconnectdb failed")
  191. file.write(file_trailer)
  192. file.seek(0)
  193. sql = "COPY " + table_name + " FROM STDIN (FORMAT 'binary')"
  194. res = PQexec(conn, sql)
  195. if (PQresultStatus(res) != 4):
  196. raise Exception("COPY FROM STDIN PQexec failed")
  197. data = file.read(65536)
  198. while (len(data)):
  199. ret = PQputCopyData(conn, data, len(data))
  200. if (ret != 1):
  201. raise Exception("COPY FROM STDIN PQputCopyData failed, error " + str(ret))
  202. data = file.read(65536)
  203. ret = PQputCopyEnd(conn, None)
  204. if (ret != 1):
  205. raise Exception("COPY FROM STDIN PQputCopyEnd failed, error " + str(ret))
  206. PQfinish(conn)
  207. def remove_output_file(file):
  208. name = file.name
  209. file.close()
  210. os.unlink(name)
  211. evsel_file = open_output_file("evsel_table.bin")
  212. machine_file = open_output_file("machine_table.bin")
  213. thread_file = open_output_file("thread_table.bin")
  214. comm_file = open_output_file("comm_table.bin")
  215. comm_thread_file = open_output_file("comm_thread_table.bin")
  216. dso_file = open_output_file("dso_table.bin")
  217. symbol_file = open_output_file("symbol_table.bin")
  218. branch_type_file = open_output_file("branch_type_table.bin")
  219. sample_file = open_output_file("sample_table.bin")
  220. def trace_begin():
  221. print datetime.datetime.today(), "Writing to intermediate files..."
  222. # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs
  223. evsel_table(0, "unknown")
  224. machine_table(0, 0, "unknown")
  225. thread_table(0, 0, 0, -1, -1)
  226. comm_table(0, "unknown")
  227. dso_table(0, 0, "unknown", "unknown", "")
  228. symbol_table(0, 0, 0, 0, 0, "unknown")
  229. unhandled_count = 0
  230. def trace_end():
  231. print datetime.datetime.today(), "Copying to database..."
  232. copy_output_file(evsel_file, "selected_events")
  233. copy_output_file(machine_file, "machines")
  234. copy_output_file(thread_file, "threads")
  235. copy_output_file(comm_file, "comms")
  236. copy_output_file(comm_thread_file, "comm_threads")
  237. copy_output_file(dso_file, "dsos")
  238. copy_output_file(symbol_file, "symbols")
  239. copy_output_file(branch_type_file, "branch_types")
  240. copy_output_file(sample_file, "samples")
  241. print datetime.datetime.today(), "Removing intermediate files..."
  242. remove_output_file(evsel_file)
  243. remove_output_file(machine_file)
  244. remove_output_file(thread_file)
  245. remove_output_file(comm_file)
  246. remove_output_file(comm_thread_file)
  247. remove_output_file(dso_file)
  248. remove_output_file(symbol_file)
  249. remove_output_file(branch_type_file)
  250. remove_output_file(sample_file)
  251. os.rmdir(output_dir_name)
  252. print datetime.datetime.today(), "Adding primary keys"
  253. do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)')
  254. do_query(query, 'ALTER TABLE machines ADD PRIMARY KEY (id)')
  255. do_query(query, 'ALTER TABLE threads ADD PRIMARY KEY (id)')
  256. do_query(query, 'ALTER TABLE comms ADD PRIMARY KEY (id)')
  257. do_query(query, 'ALTER TABLE comm_threads ADD PRIMARY KEY (id)')
  258. do_query(query, 'ALTER TABLE dsos ADD PRIMARY KEY (id)')
  259. do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)')
  260. do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)')
  261. do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)')
  262. print datetime.datetime.today(), "Adding foreign keys"
  263. do_query(query, 'ALTER TABLE threads '
  264. 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),'
  265. 'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads (id)')
  266. do_query(query, 'ALTER TABLE comm_threads '
  267. 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
  268. 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id)')
  269. do_query(query, 'ALTER TABLE dsos '
  270. 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id)')
  271. do_query(query, 'ALTER TABLE symbols '
  272. 'ADD CONSTRAINT dsofk FOREIGN KEY (dso_id) REFERENCES dsos (id)')
  273. do_query(query, 'ALTER TABLE samples '
  274. 'ADD CONSTRAINT evselfk FOREIGN KEY (evsel_id) REFERENCES selected_events (id),'
  275. 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),'
  276. 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),'
  277. 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
  278. 'ADD CONSTRAINT dsofk FOREIGN KEY (dso_id) REFERENCES dsos (id),'
  279. 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),'
  280. 'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),'
  281. 'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)')
  282. if (unhandled_count):
  283. print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events"
  284. print datetime.datetime.today(), "Done"
  285. def trace_unhandled(event_name, context, event_fields_dict):
  286. global unhandled_count
  287. unhandled_count += 1
  288. def sched__sched_switch(*x):
  289. pass
  290. def evsel_table(evsel_id, evsel_name, *x):
  291. n = len(evsel_name)
  292. fmt = "!hiqi" + str(n) + "s"
  293. value = struct.pack(fmt, 2, 8, evsel_id, n, evsel_name)
  294. evsel_file.write(value)
  295. def machine_table(machine_id, pid, root_dir, *x):
  296. n = len(root_dir)
  297. fmt = "!hiqiii" + str(n) + "s"
  298. value = struct.pack(fmt, 3, 8, machine_id, 4, pid, n, root_dir)
  299. machine_file.write(value)
  300. def thread_table(thread_id, machine_id, process_id, pid, tid, *x):
  301. value = struct.pack("!hiqiqiqiiii", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid)
  302. thread_file.write(value)
  303. def comm_table(comm_id, comm_str, *x):
  304. n = len(comm_str)
  305. fmt = "!hiqi" + str(n) + "s"
  306. value = struct.pack(fmt, 2, 8, comm_id, n, comm_str)
  307. comm_file.write(value)
  308. def comm_thread_table(comm_thread_id, comm_id, thread_id, *x):
  309. fmt = "!hiqiqiq"
  310. value = struct.pack(fmt, 3, 8, comm_thread_id, 8, comm_id, 8, thread_id)
  311. comm_thread_file.write(value)
  312. def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x):
  313. n1 = len(short_name)
  314. n2 = len(long_name)
  315. n3 = len(build_id)
  316. fmt = "!hiqiqi" + str(n1) + "si" + str(n2) + "si" + str(n3) + "s"
  317. value = struct.pack(fmt, 5, 8, dso_id, 8, machine_id, n1, short_name, n2, long_name, n3, build_id)
  318. dso_file.write(value)
  319. def symbol_table(symbol_id, dso_id, sym_start, sym_end, binding, symbol_name, *x):
  320. n = len(symbol_name)
  321. fmt = "!hiqiqiqiqiii" + str(n) + "s"
  322. value = struct.pack(fmt, 6, 8, symbol_id, 8, dso_id, 8, sym_start, 8, sym_end, 4, binding, n, symbol_name)
  323. symbol_file.write(value)
  324. def branch_type_table(branch_type, name, *x):
  325. n = len(name)
  326. fmt = "!hiii" + str(n) + "s"
  327. value = struct.pack(fmt, 2, 4, branch_type, n, name)
  328. branch_type_file.write(value)
  329. def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x):
  330. if branches:
  331. value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx)
  332. else:
  333. value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx)
  334. sample_file.write(value)