export-to-postgresql.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. # export-to-postgresql.py: export perf data to a postgresql database
  2. # Copyright (c) 2014, Intel Corporation.
  3. #
  4. # This program is free software; you can redistribute it and/or modify it
  5. # under the terms and conditions of the GNU General Public License,
  6. # version 2, as published by the Free Software Foundation.
  7. #
  8. # This program is distributed in the hope it will be useful, but WITHOUT
  9. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  11. # more details.
  12. import os
  13. import sys
  14. import struct
  15. import datetime
  16. from PySide.QtSql import *
  17. # Need to access PostgreSQL C library directly to use COPY FROM STDIN
  18. from ctypes import *
  19. libpq = CDLL("libpq.so.5")
  20. PQconnectdb = libpq.PQconnectdb
  21. PQconnectdb.restype = c_void_p
  22. PQfinish = libpq.PQfinish
  23. PQstatus = libpq.PQstatus
  24. PQexec = libpq.PQexec
  25. PQexec.restype = c_void_p
  26. PQresultStatus = libpq.PQresultStatus
  27. PQputCopyData = libpq.PQputCopyData
  28. PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ]
  29. PQputCopyEnd = libpq.PQputCopyEnd
  30. PQputCopyEnd.argtypes = [ c_void_p, c_void_p ]
  31. sys.path.append(os.environ['PERF_EXEC_PATH'] + \
  32. '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
  33. # These perf imports are not used at present
  34. #from perf_trace_context import *
  35. #from Core import *
  36. perf_db_export_mode = True
  37. def usage():
  38. print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>]"
  39. print >> sys.stderr, "where: columns 'all' or 'branches'"
  40. raise Exception("Too few arguments")
  41. if (len(sys.argv) < 2):
  42. usage()
  43. dbname = sys.argv[1]
  44. if (len(sys.argv) >= 3):
  45. columns = sys.argv[2]
  46. else:
  47. columns = "all"
  48. if columns not in ("all", "branches"):
  49. usage()
  50. branches = (columns == "branches")
  51. output_dir_name = os.getcwd() + "/" + dbname + "-perf-data"
  52. os.mkdir(output_dir_name)
  53. def do_query(q, s):
  54. if (q.exec_(s)):
  55. return
  56. raise Exception("Query failed: " + q.lastError().text())
  57. print datetime.datetime.today(), "Creating database..."
  58. db = QSqlDatabase.addDatabase('QPSQL')
  59. query = QSqlQuery(db)
  60. db.setDatabaseName('postgres')
  61. db.open()
  62. try:
  63. do_query(query, 'CREATE DATABASE ' + dbname)
  64. except:
  65. os.rmdir(output_dir_name)
  66. raise
  67. query.finish()
  68. query.clear()
  69. db.close()
  70. db.setDatabaseName(dbname)
  71. db.open()
  72. query = QSqlQuery(db)
  73. do_query(query, 'SET client_min_messages TO WARNING')
  74. do_query(query, 'CREATE TABLE selected_events ('
  75. 'id bigint NOT NULL,'
  76. 'name varchar(80))')
  77. do_query(query, 'CREATE TABLE machines ('
  78. 'id bigint NOT NULL,'
  79. 'pid integer,'
  80. 'root_dir varchar(4096))')
  81. do_query(query, 'CREATE TABLE threads ('
  82. 'id bigint NOT NULL,'
  83. 'machine_id bigint,'
  84. 'process_id bigint,'
  85. 'pid integer,'
  86. 'tid integer)')
  87. do_query(query, 'CREATE TABLE comms ('
  88. 'id bigint NOT NULL,'
  89. 'comm varchar(16))')
  90. do_query(query, 'CREATE TABLE comm_threads ('
  91. 'id bigint NOT NULL,'
  92. 'comm_id bigint,'
  93. 'thread_id bigint)')
  94. do_query(query, 'CREATE TABLE dsos ('
  95. 'id bigint NOT NULL,'
  96. 'machine_id bigint,'
  97. 'short_name varchar(256),'
  98. 'long_name varchar(4096),'
  99. 'build_id varchar(64))')
  100. do_query(query, 'CREATE TABLE symbols ('
  101. 'id bigint NOT NULL,'
  102. 'dso_id bigint,'
  103. 'sym_start bigint,'
  104. 'sym_end bigint,'
  105. 'binding integer,'
  106. 'name varchar(2048))')
  107. if branches:
  108. do_query(query, 'CREATE TABLE samples ('
  109. 'id bigint NOT NULL,'
  110. 'evsel_id bigint,'
  111. 'machine_id bigint,'
  112. 'thread_id bigint,'
  113. 'comm_id bigint,'
  114. 'dso_id bigint,'
  115. 'symbol_id bigint,'
  116. 'sym_offset bigint,'
  117. 'ip bigint,'
  118. 'time bigint,'
  119. 'cpu integer,'
  120. 'to_dso_id bigint,'
  121. 'to_symbol_id bigint,'
  122. 'to_sym_offset bigint,'
  123. 'to_ip bigint)')
  124. else:
  125. do_query(query, 'CREATE TABLE samples ('
  126. 'id bigint NOT NULL,'
  127. 'evsel_id bigint,'
  128. 'machine_id bigint,'
  129. 'thread_id bigint,'
  130. 'comm_id bigint,'
  131. 'dso_id bigint,'
  132. 'symbol_id bigint,'
  133. 'sym_offset bigint,'
  134. 'ip bigint,'
  135. 'time bigint,'
  136. 'cpu integer,'
  137. 'to_dso_id bigint,'
  138. 'to_symbol_id bigint,'
  139. 'to_sym_offset bigint,'
  140. 'to_ip bigint,'
  141. 'period bigint,'
  142. 'weight bigint,'
  143. 'transaction bigint,'
  144. 'data_src bigint)')
  145. do_query(query, 'CREATE VIEW samples_view AS '
  146. 'SELECT '
  147. 'id,'
  148. 'time,'
  149. 'cpu,'
  150. '(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
  151. '(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
  152. '(SELECT comm FROM comms WHERE id = comm_id) AS command,'
  153. '(SELECT name FROM selected_events WHERE id = evsel_id) AS event,'
  154. 'to_hex(ip) AS ip_hex,'
  155. '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
  156. 'sym_offset,'
  157. '(SELECT short_name FROM dsos WHERE id = dso_id) AS dso_short_name,'
  158. 'to_hex(to_ip) AS to_ip_hex,'
  159. '(SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol,'
  160. 'to_sym_offset,'
  161. '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name'
  162. ' FROM samples')
  163. file_header = struct.pack("!11sii", "PGCOPY\n\377\r\n\0", 0, 0)
  164. file_trailer = "\377\377"
  165. def open_output_file(file_name):
  166. path_name = output_dir_name + "/" + file_name
  167. file = open(path_name, "w+")
  168. file.write(file_header)
  169. return file
  170. def close_output_file(file):
  171. file.write(file_trailer)
  172. file.close()
  173. def copy_output_file_direct(file, table_name):
  174. close_output_file(file)
  175. sql = "COPY " + table_name + " FROM '" + file.name + "' (FORMAT 'binary')"
  176. do_query(query, sql)
  177. # Use COPY FROM STDIN because security may prevent postgres from accessing the files directly
  178. def copy_output_file(file, table_name):
  179. conn = PQconnectdb("dbname = " + dbname)
  180. if (PQstatus(conn)):
  181. raise Exception("COPY FROM STDIN PQconnectdb failed")
  182. file.write(file_trailer)
  183. file.seek(0)
  184. sql = "COPY " + table_name + " FROM STDIN (FORMAT 'binary')"
  185. res = PQexec(conn, sql)
  186. if (PQresultStatus(res) != 4):
  187. raise Exception("COPY FROM STDIN PQexec failed")
  188. data = file.read(65536)
  189. while (len(data)):
  190. ret = PQputCopyData(conn, data, len(data))
  191. if (ret != 1):
  192. raise Exception("COPY FROM STDIN PQputCopyData failed, error " + str(ret))
  193. data = file.read(65536)
  194. ret = PQputCopyEnd(conn, None)
  195. if (ret != 1):
  196. raise Exception("COPY FROM STDIN PQputCopyEnd failed, error " + str(ret))
  197. PQfinish(conn)
  198. def remove_output_file(file):
  199. name = file.name
  200. file.close()
  201. os.unlink(name)
  202. evsel_file = open_output_file("evsel_table.bin")
  203. machine_file = open_output_file("machine_table.bin")
  204. thread_file = open_output_file("thread_table.bin")
  205. comm_file = open_output_file("comm_table.bin")
  206. comm_thread_file = open_output_file("comm_thread_table.bin")
  207. dso_file = open_output_file("dso_table.bin")
  208. symbol_file = open_output_file("symbol_table.bin")
  209. sample_file = open_output_file("sample_table.bin")
  210. def trace_begin():
  211. print datetime.datetime.today(), "Writing to intermediate files..."
  212. # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs
  213. evsel_table(0, "unknown")
  214. machine_table(0, 0, "unknown")
  215. thread_table(0, 0, 0, -1, -1)
  216. comm_table(0, "unknown")
  217. dso_table(0, 0, "unknown", "unknown", "")
  218. symbol_table(0, 0, 0, 0, 0, "unknown")
  219. unhandled_count = 0
  220. def trace_end():
  221. print datetime.datetime.today(), "Copying to database..."
  222. copy_output_file(evsel_file, "selected_events")
  223. copy_output_file(machine_file, "machines")
  224. copy_output_file(thread_file, "threads")
  225. copy_output_file(comm_file, "comms")
  226. copy_output_file(comm_thread_file, "comm_threads")
  227. copy_output_file(dso_file, "dsos")
  228. copy_output_file(symbol_file, "symbols")
  229. copy_output_file(sample_file, "samples")
  230. print datetime.datetime.today(), "Removing intermediate files..."
  231. remove_output_file(evsel_file)
  232. remove_output_file(machine_file)
  233. remove_output_file(thread_file)
  234. remove_output_file(comm_file)
  235. remove_output_file(comm_thread_file)
  236. remove_output_file(dso_file)
  237. remove_output_file(symbol_file)
  238. remove_output_file(sample_file)
  239. os.rmdir(output_dir_name)
  240. print datetime.datetime.today(), "Adding primary keys"
  241. do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)')
  242. do_query(query, 'ALTER TABLE machines ADD PRIMARY KEY (id)')
  243. do_query(query, 'ALTER TABLE threads ADD PRIMARY KEY (id)')
  244. do_query(query, 'ALTER TABLE comms ADD PRIMARY KEY (id)')
  245. do_query(query, 'ALTER TABLE comm_threads ADD PRIMARY KEY (id)')
  246. do_query(query, 'ALTER TABLE dsos ADD PRIMARY KEY (id)')
  247. do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)')
  248. do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)')
  249. print datetime.datetime.today(), "Adding foreign keys"
  250. do_query(query, 'ALTER TABLE threads '
  251. 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),'
  252. 'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads (id)')
  253. do_query(query, 'ALTER TABLE comm_threads '
  254. 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
  255. 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id)')
  256. do_query(query, 'ALTER TABLE dsos '
  257. 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id)')
  258. do_query(query, 'ALTER TABLE symbols '
  259. 'ADD CONSTRAINT dsofk FOREIGN KEY (dso_id) REFERENCES dsos (id)')
  260. do_query(query, 'ALTER TABLE samples '
  261. 'ADD CONSTRAINT evselfk FOREIGN KEY (evsel_id) REFERENCES selected_events (id),'
  262. 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),'
  263. 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),'
  264. 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
  265. 'ADD CONSTRAINT dsofk FOREIGN KEY (dso_id) REFERENCES dsos (id),'
  266. 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),'
  267. 'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),'
  268. 'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)')
  269. if (unhandled_count):
  270. print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events"
  271. print datetime.datetime.today(), "Done"
  272. def trace_unhandled(event_name, context, event_fields_dict):
  273. global unhandled_count
  274. unhandled_count += 1
  275. def sched__sched_switch(*x):
  276. pass
  277. def evsel_table(evsel_id, evsel_name, *x):
  278. n = len(evsel_name)
  279. fmt = "!hiqi" + str(n) + "s"
  280. value = struct.pack(fmt, 2, 8, evsel_id, n, evsel_name)
  281. evsel_file.write(value)
  282. def machine_table(machine_id, pid, root_dir, *x):
  283. n = len(root_dir)
  284. fmt = "!hiqiii" + str(n) + "s"
  285. value = struct.pack(fmt, 3, 8, machine_id, 4, pid, n, root_dir)
  286. machine_file.write(value)
  287. def thread_table(thread_id, machine_id, process_id, pid, tid, *x):
  288. value = struct.pack("!hiqiqiqiiii", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid)
  289. thread_file.write(value)
  290. def comm_table(comm_id, comm_str, *x):
  291. n = len(comm_str)
  292. fmt = "!hiqi" + str(n) + "s"
  293. value = struct.pack(fmt, 2, 8, comm_id, n, comm_str)
  294. comm_file.write(value)
  295. def comm_thread_table(comm_thread_id, comm_id, thread_id, *x):
  296. fmt = "!hiqiqiq"
  297. value = struct.pack(fmt, 3, 8, comm_thread_id, 8, comm_id, 8, thread_id)
  298. comm_thread_file.write(value)
  299. def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x):
  300. n1 = len(short_name)
  301. n2 = len(long_name)
  302. n3 = len(build_id)
  303. fmt = "!hiqiqi" + str(n1) + "si" + str(n2) + "si" + str(n3) + "s"
  304. value = struct.pack(fmt, 5, 8, dso_id, 8, machine_id, n1, short_name, n2, long_name, n3, build_id)
  305. dso_file.write(value)
  306. def symbol_table(symbol_id, dso_id, sym_start, sym_end, binding, symbol_name, *x):
  307. n = len(symbol_name)
  308. fmt = "!hiqiqiqiqiii" + str(n) + "s"
  309. value = struct.pack(fmt, 6, 8, symbol_id, 8, dso_id, 8, sym_start, 8, sym_end, 4, binding, n, symbol_name)
  310. symbol_file.write(value)
  311. def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, *x):
  312. if branches:
  313. value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiq", 15, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip)
  314. else:
  315. value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiq", 19, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src)
  316. sample_file.write(value)