builtin-sched.c 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784
  1. #include "builtin.h"
  2. #include "perf.h"
  3. #include "util/util.h"
  4. #include "util/evlist.h"
  5. #include "util/cache.h"
  6. #include "util/evsel.h"
  7. #include "util/symbol.h"
  8. #include "util/thread.h"
  9. #include "util/header.h"
  10. #include "util/session.h"
  11. #include "util/tool.h"
  12. #include "util/cloexec.h"
  13. #include "util/parse-options.h"
  14. #include "util/trace-event.h"
  15. #include "util/debug.h"
  16. #include <sys/prctl.h>
  17. #include <sys/resource.h>
  18. #include <semaphore.h>
  19. #include <pthread.h>
  20. #include <math.h>
  21. #include <api/fs/fs.h>
  22. #define PR_SET_NAME 15 /* Set process name */
  23. #define MAX_CPUS 4096
  24. #define COMM_LEN 20
  25. #define SYM_LEN 129
  26. #define MAX_PID 1024000
  27. struct sched_atom;
  28. struct task_desc {
  29. unsigned long nr;
  30. unsigned long pid;
  31. char comm[COMM_LEN];
  32. unsigned long nr_events;
  33. unsigned long curr_event;
  34. struct sched_atom **atoms;
  35. pthread_t thread;
  36. sem_t sleep_sem;
  37. sem_t ready_for_work;
  38. sem_t work_done_sem;
  39. u64 cpu_usage;
  40. };
  41. enum sched_event_type {
  42. SCHED_EVENT_RUN,
  43. SCHED_EVENT_SLEEP,
  44. SCHED_EVENT_WAKEUP,
  45. SCHED_EVENT_MIGRATION,
  46. };
  47. struct sched_atom {
  48. enum sched_event_type type;
  49. int specific_wait;
  50. u64 timestamp;
  51. u64 duration;
  52. unsigned long nr;
  53. sem_t *wait_sem;
  54. struct task_desc *wakee;
  55. };
  56. #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
  57. enum thread_state {
  58. THREAD_SLEEPING = 0,
  59. THREAD_WAIT_CPU,
  60. THREAD_SCHED_IN,
  61. THREAD_IGNORE
  62. };
  63. struct work_atom {
  64. struct list_head list;
  65. enum thread_state state;
  66. u64 sched_out_time;
  67. u64 wake_up_time;
  68. u64 sched_in_time;
  69. u64 runtime;
  70. };
  71. struct work_atoms {
  72. struct list_head work_list;
  73. struct thread *thread;
  74. struct rb_node node;
  75. u64 max_lat;
  76. u64 max_lat_at;
  77. u64 total_lat;
  78. u64 nb_atoms;
  79. u64 total_runtime;
  80. };
  81. typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
  82. struct perf_sched;
  83. struct trace_sched_handler {
  84. int (*switch_event)(struct perf_sched *sched, struct perf_evsel *evsel,
  85. struct perf_sample *sample, struct machine *machine);
  86. int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel,
  87. struct perf_sample *sample, struct machine *machine);
  88. int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel,
  89. struct perf_sample *sample, struct machine *machine);
  90. /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */
  91. int (*fork_event)(struct perf_sched *sched, union perf_event *event,
  92. struct machine *machine);
  93. int (*migrate_task_event)(struct perf_sched *sched,
  94. struct perf_evsel *evsel,
  95. struct perf_sample *sample,
  96. struct machine *machine);
  97. };
  98. struct perf_sched {
  99. struct perf_tool tool;
  100. const char *sort_order;
  101. unsigned long nr_tasks;
  102. struct task_desc **pid_to_task;
  103. struct task_desc **tasks;
  104. const struct trace_sched_handler *tp_handler;
  105. pthread_mutex_t start_work_mutex;
  106. pthread_mutex_t work_done_wait_mutex;
  107. int profile_cpu;
  108. /*
  109. * Track the current task - that way we can know whether there's any
  110. * weird events, such as a task being switched away that is not current.
  111. */
  112. int max_cpu;
  113. u32 curr_pid[MAX_CPUS];
  114. struct thread *curr_thread[MAX_CPUS];
  115. char next_shortname1;
  116. char next_shortname2;
  117. unsigned int replay_repeat;
  118. unsigned long nr_run_events;
  119. unsigned long nr_sleep_events;
  120. unsigned long nr_wakeup_events;
  121. unsigned long nr_sleep_corrections;
  122. unsigned long nr_run_events_optimized;
  123. unsigned long targetless_wakeups;
  124. unsigned long multitarget_wakeups;
  125. unsigned long nr_runs;
  126. unsigned long nr_timestamps;
  127. unsigned long nr_unordered_timestamps;
  128. unsigned long nr_context_switch_bugs;
  129. unsigned long nr_events;
  130. unsigned long nr_lost_chunks;
  131. unsigned long nr_lost_events;
  132. u64 run_measurement_overhead;
  133. u64 sleep_measurement_overhead;
  134. u64 start_time;
  135. u64 cpu_usage;
  136. u64 runavg_cpu_usage;
  137. u64 parent_cpu_usage;
  138. u64 runavg_parent_cpu_usage;
  139. u64 sum_runtime;
  140. u64 sum_fluct;
  141. u64 run_avg;
  142. u64 all_runtime;
  143. u64 all_count;
  144. u64 cpu_last_switched[MAX_CPUS];
  145. struct rb_root atom_root, sorted_atom_root;
  146. struct list_head sort_list, cmp_pid;
  147. };
  148. static u64 get_nsecs(void)
  149. {
  150. struct timespec ts;
  151. clock_gettime(CLOCK_MONOTONIC, &ts);
  152. return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
  153. }
  154. static void burn_nsecs(struct perf_sched *sched, u64 nsecs)
  155. {
  156. u64 T0 = get_nsecs(), T1;
  157. do {
  158. T1 = get_nsecs();
  159. } while (T1 + sched->run_measurement_overhead < T0 + nsecs);
  160. }
  161. static void sleep_nsecs(u64 nsecs)
  162. {
  163. struct timespec ts;
  164. ts.tv_nsec = nsecs % 999999999;
  165. ts.tv_sec = nsecs / 999999999;
  166. nanosleep(&ts, NULL);
  167. }
  168. static void calibrate_run_measurement_overhead(struct perf_sched *sched)
  169. {
  170. u64 T0, T1, delta, min_delta = 1000000000ULL;
  171. int i;
  172. for (i = 0; i < 10; i++) {
  173. T0 = get_nsecs();
  174. burn_nsecs(sched, 0);
  175. T1 = get_nsecs();
  176. delta = T1-T0;
  177. min_delta = min(min_delta, delta);
  178. }
  179. sched->run_measurement_overhead = min_delta;
  180. printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta);
  181. }
  182. static void calibrate_sleep_measurement_overhead(struct perf_sched *sched)
  183. {
  184. u64 T0, T1, delta, min_delta = 1000000000ULL;
  185. int i;
  186. for (i = 0; i < 10; i++) {
  187. T0 = get_nsecs();
  188. sleep_nsecs(10000);
  189. T1 = get_nsecs();
  190. delta = T1-T0;
  191. min_delta = min(min_delta, delta);
  192. }
  193. min_delta -= 10000;
  194. sched->sleep_measurement_overhead = min_delta;
  195. printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta);
  196. }
  197. static struct sched_atom *
  198. get_new_event(struct task_desc *task, u64 timestamp)
  199. {
  200. struct sched_atom *event = zalloc(sizeof(*event));
  201. unsigned long idx = task->nr_events;
  202. size_t size;
  203. event->timestamp = timestamp;
  204. event->nr = idx;
  205. task->nr_events++;
  206. size = sizeof(struct sched_atom *) * task->nr_events;
  207. task->atoms = realloc(task->atoms, size);
  208. BUG_ON(!task->atoms);
  209. task->atoms[idx] = event;
  210. return event;
  211. }
  212. static struct sched_atom *last_event(struct task_desc *task)
  213. {
  214. if (!task->nr_events)
  215. return NULL;
  216. return task->atoms[task->nr_events - 1];
  217. }
  218. static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task,
  219. u64 timestamp, u64 duration)
  220. {
  221. struct sched_atom *event, *curr_event = last_event(task);
  222. /*
  223. * optimize an existing RUN event by merging this one
  224. * to it:
  225. */
  226. if (curr_event && curr_event->type == SCHED_EVENT_RUN) {
  227. sched->nr_run_events_optimized++;
  228. curr_event->duration += duration;
  229. return;
  230. }
  231. event = get_new_event(task, timestamp);
  232. event->type = SCHED_EVENT_RUN;
  233. event->duration = duration;
  234. sched->nr_run_events++;
  235. }
  236. static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task,
  237. u64 timestamp, struct task_desc *wakee)
  238. {
  239. struct sched_atom *event, *wakee_event;
  240. event = get_new_event(task, timestamp);
  241. event->type = SCHED_EVENT_WAKEUP;
  242. event->wakee = wakee;
  243. wakee_event = last_event(wakee);
  244. if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) {
  245. sched->targetless_wakeups++;
  246. return;
  247. }
  248. if (wakee_event->wait_sem) {
  249. sched->multitarget_wakeups++;
  250. return;
  251. }
  252. wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem));
  253. sem_init(wakee_event->wait_sem, 0, 0);
  254. wakee_event->specific_wait = 1;
  255. event->wait_sem = wakee_event->wait_sem;
  256. sched->nr_wakeup_events++;
  257. }
  258. static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task,
  259. u64 timestamp, u64 task_state __maybe_unused)
  260. {
  261. struct sched_atom *event = get_new_event(task, timestamp);
  262. event->type = SCHED_EVENT_SLEEP;
  263. sched->nr_sleep_events++;
  264. }
  265. static struct task_desc *register_pid(struct perf_sched *sched,
  266. unsigned long pid, const char *comm)
  267. {
  268. struct task_desc *task;
  269. static int pid_max;
  270. if (sched->pid_to_task == NULL) {
  271. if (sysctl__read_int("kernel/pid_max", &pid_max) < 0)
  272. pid_max = MAX_PID;
  273. BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL);
  274. }
  275. if (pid >= (unsigned long)pid_max) {
  276. BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) *
  277. sizeof(struct task_desc *))) == NULL);
  278. while (pid >= (unsigned long)pid_max)
  279. sched->pid_to_task[pid_max++] = NULL;
  280. }
  281. task = sched->pid_to_task[pid];
  282. if (task)
  283. return task;
  284. task = zalloc(sizeof(*task));
  285. task->pid = pid;
  286. task->nr = sched->nr_tasks;
  287. strcpy(task->comm, comm);
  288. /*
  289. * every task starts in sleeping state - this gets ignored
  290. * if there's no wakeup pointing to this sleep state:
  291. */
  292. add_sched_event_sleep(sched, task, 0, 0);
  293. sched->pid_to_task[pid] = task;
  294. sched->nr_tasks++;
  295. sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *));
  296. BUG_ON(!sched->tasks);
  297. sched->tasks[task->nr] = task;
  298. if (verbose)
  299. printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm);
  300. return task;
  301. }
  302. static void print_task_traces(struct perf_sched *sched)
  303. {
  304. struct task_desc *task;
  305. unsigned long i;
  306. for (i = 0; i < sched->nr_tasks; i++) {
  307. task = sched->tasks[i];
  308. printf("task %6ld (%20s:%10ld), nr_events: %ld\n",
  309. task->nr, task->comm, task->pid, task->nr_events);
  310. }
  311. }
  312. static void add_cross_task_wakeups(struct perf_sched *sched)
  313. {
  314. struct task_desc *task1, *task2;
  315. unsigned long i, j;
  316. for (i = 0; i < sched->nr_tasks; i++) {
  317. task1 = sched->tasks[i];
  318. j = i + 1;
  319. if (j == sched->nr_tasks)
  320. j = 0;
  321. task2 = sched->tasks[j];
  322. add_sched_event_wakeup(sched, task1, 0, task2);
  323. }
  324. }
  325. static void perf_sched__process_event(struct perf_sched *sched,
  326. struct sched_atom *atom)
  327. {
  328. int ret = 0;
  329. switch (atom->type) {
  330. case SCHED_EVENT_RUN:
  331. burn_nsecs(sched, atom->duration);
  332. break;
  333. case SCHED_EVENT_SLEEP:
  334. if (atom->wait_sem)
  335. ret = sem_wait(atom->wait_sem);
  336. BUG_ON(ret);
  337. break;
  338. case SCHED_EVENT_WAKEUP:
  339. if (atom->wait_sem)
  340. ret = sem_post(atom->wait_sem);
  341. BUG_ON(ret);
  342. break;
  343. case SCHED_EVENT_MIGRATION:
  344. break;
  345. default:
  346. BUG_ON(1);
  347. }
  348. }
  349. static u64 get_cpu_usage_nsec_parent(void)
  350. {
  351. struct rusage ru;
  352. u64 sum;
  353. int err;
  354. err = getrusage(RUSAGE_SELF, &ru);
  355. BUG_ON(err);
  356. sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3;
  357. sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3;
  358. return sum;
  359. }
  360. static int self_open_counters(void)
  361. {
  362. struct perf_event_attr attr;
  363. char sbuf[STRERR_BUFSIZE];
  364. int fd;
  365. memset(&attr, 0, sizeof(attr));
  366. attr.type = PERF_TYPE_SOFTWARE;
  367. attr.config = PERF_COUNT_SW_TASK_CLOCK;
  368. fd = sys_perf_event_open(&attr, 0, -1, -1,
  369. perf_event_open_cloexec_flag());
  370. if (fd < 0)
  371. pr_err("Error: sys_perf_event_open() syscall returned "
  372. "with %d (%s)\n", fd,
  373. strerror_r(errno, sbuf, sizeof(sbuf)));
  374. return fd;
  375. }
  376. static u64 get_cpu_usage_nsec_self(int fd)
  377. {
  378. u64 runtime;
  379. int ret;
  380. ret = read(fd, &runtime, sizeof(runtime));
  381. BUG_ON(ret != sizeof(runtime));
  382. return runtime;
  383. }
  384. struct sched_thread_parms {
  385. struct task_desc *task;
  386. struct perf_sched *sched;
  387. int fd;
  388. };
  389. static void *thread_func(void *ctx)
  390. {
  391. struct sched_thread_parms *parms = ctx;
  392. struct task_desc *this_task = parms->task;
  393. struct perf_sched *sched = parms->sched;
  394. u64 cpu_usage_0, cpu_usage_1;
  395. unsigned long i, ret;
  396. char comm2[22];
  397. int fd = parms->fd;
  398. zfree(&parms);
  399. sprintf(comm2, ":%s", this_task->comm);
  400. prctl(PR_SET_NAME, comm2);
  401. if (fd < 0)
  402. return NULL;
  403. again:
  404. ret = sem_post(&this_task->ready_for_work);
  405. BUG_ON(ret);
  406. ret = pthread_mutex_lock(&sched->start_work_mutex);
  407. BUG_ON(ret);
  408. ret = pthread_mutex_unlock(&sched->start_work_mutex);
  409. BUG_ON(ret);
  410. cpu_usage_0 = get_cpu_usage_nsec_self(fd);
  411. for (i = 0; i < this_task->nr_events; i++) {
  412. this_task->curr_event = i;
  413. perf_sched__process_event(sched, this_task->atoms[i]);
  414. }
  415. cpu_usage_1 = get_cpu_usage_nsec_self(fd);
  416. this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
  417. ret = sem_post(&this_task->work_done_sem);
  418. BUG_ON(ret);
  419. ret = pthread_mutex_lock(&sched->work_done_wait_mutex);
  420. BUG_ON(ret);
  421. ret = pthread_mutex_unlock(&sched->work_done_wait_mutex);
  422. BUG_ON(ret);
  423. goto again;
  424. }
  425. static void create_tasks(struct perf_sched *sched)
  426. {
  427. struct task_desc *task;
  428. pthread_attr_t attr;
  429. unsigned long i;
  430. int err;
  431. err = pthread_attr_init(&attr);
  432. BUG_ON(err);
  433. err = pthread_attr_setstacksize(&attr,
  434. (size_t) max(16 * 1024, PTHREAD_STACK_MIN));
  435. BUG_ON(err);
  436. err = pthread_mutex_lock(&sched->start_work_mutex);
  437. BUG_ON(err);
  438. err = pthread_mutex_lock(&sched->work_done_wait_mutex);
  439. BUG_ON(err);
  440. for (i = 0; i < sched->nr_tasks; i++) {
  441. struct sched_thread_parms *parms = malloc(sizeof(*parms));
  442. BUG_ON(parms == NULL);
  443. parms->task = task = sched->tasks[i];
  444. parms->sched = sched;
  445. parms->fd = self_open_counters();
  446. sem_init(&task->sleep_sem, 0, 0);
  447. sem_init(&task->ready_for_work, 0, 0);
  448. sem_init(&task->work_done_sem, 0, 0);
  449. task->curr_event = 0;
  450. err = pthread_create(&task->thread, &attr, thread_func, parms);
  451. BUG_ON(err);
  452. }
  453. }
  454. static void wait_for_tasks(struct perf_sched *sched)
  455. {
  456. u64 cpu_usage_0, cpu_usage_1;
  457. struct task_desc *task;
  458. unsigned long i, ret;
  459. sched->start_time = get_nsecs();
  460. sched->cpu_usage = 0;
  461. pthread_mutex_unlock(&sched->work_done_wait_mutex);
  462. for (i = 0; i < sched->nr_tasks; i++) {
  463. task = sched->tasks[i];
  464. ret = sem_wait(&task->ready_for_work);
  465. BUG_ON(ret);
  466. sem_init(&task->ready_for_work, 0, 0);
  467. }
  468. ret = pthread_mutex_lock(&sched->work_done_wait_mutex);
  469. BUG_ON(ret);
  470. cpu_usage_0 = get_cpu_usage_nsec_parent();
  471. pthread_mutex_unlock(&sched->start_work_mutex);
  472. for (i = 0; i < sched->nr_tasks; i++) {
  473. task = sched->tasks[i];
  474. ret = sem_wait(&task->work_done_sem);
  475. BUG_ON(ret);
  476. sem_init(&task->work_done_sem, 0, 0);
  477. sched->cpu_usage += task->cpu_usage;
  478. task->cpu_usage = 0;
  479. }
  480. cpu_usage_1 = get_cpu_usage_nsec_parent();
  481. if (!sched->runavg_cpu_usage)
  482. sched->runavg_cpu_usage = sched->cpu_usage;
  483. sched->runavg_cpu_usage = (sched->runavg_cpu_usage * 9 + sched->cpu_usage) / 10;
  484. sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0;
  485. if (!sched->runavg_parent_cpu_usage)
  486. sched->runavg_parent_cpu_usage = sched->parent_cpu_usage;
  487. sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * 9 +
  488. sched->parent_cpu_usage)/10;
  489. ret = pthread_mutex_lock(&sched->start_work_mutex);
  490. BUG_ON(ret);
  491. for (i = 0; i < sched->nr_tasks; i++) {
  492. task = sched->tasks[i];
  493. sem_init(&task->sleep_sem, 0, 0);
  494. task->curr_event = 0;
  495. }
  496. }
  497. static void run_one_test(struct perf_sched *sched)
  498. {
  499. u64 T0, T1, delta, avg_delta, fluct;
  500. T0 = get_nsecs();
  501. wait_for_tasks(sched);
  502. T1 = get_nsecs();
  503. delta = T1 - T0;
  504. sched->sum_runtime += delta;
  505. sched->nr_runs++;
  506. avg_delta = sched->sum_runtime / sched->nr_runs;
  507. if (delta < avg_delta)
  508. fluct = avg_delta - delta;
  509. else
  510. fluct = delta - avg_delta;
  511. sched->sum_fluct += fluct;
  512. if (!sched->run_avg)
  513. sched->run_avg = delta;
  514. sched->run_avg = (sched->run_avg * 9 + delta) / 10;
  515. printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0);
  516. printf("ravg: %0.2f, ", (double)sched->run_avg / 1e6);
  517. printf("cpu: %0.2f / %0.2f",
  518. (double)sched->cpu_usage / 1e6, (double)sched->runavg_cpu_usage / 1e6);
  519. #if 0
  520. /*
  521. * rusage statistics done by the parent, these are less
  522. * accurate than the sched->sum_exec_runtime based statistics:
  523. */
  524. printf(" [%0.2f / %0.2f]",
  525. (double)sched->parent_cpu_usage/1e6,
  526. (double)sched->runavg_parent_cpu_usage/1e6);
  527. #endif
  528. printf("\n");
  529. if (sched->nr_sleep_corrections)
  530. printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections);
  531. sched->nr_sleep_corrections = 0;
  532. }
  533. static void test_calibrations(struct perf_sched *sched)
  534. {
  535. u64 T0, T1;
  536. T0 = get_nsecs();
  537. burn_nsecs(sched, 1e6);
  538. T1 = get_nsecs();
  539. printf("the run test took %" PRIu64 " nsecs\n", T1 - T0);
  540. T0 = get_nsecs();
  541. sleep_nsecs(1e6);
  542. T1 = get_nsecs();
  543. printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0);
  544. }
  545. static int
  546. replay_wakeup_event(struct perf_sched *sched,
  547. struct perf_evsel *evsel, struct perf_sample *sample,
  548. struct machine *machine __maybe_unused)
  549. {
  550. const char *comm = perf_evsel__strval(evsel, sample, "comm");
  551. const u32 pid = perf_evsel__intval(evsel, sample, "pid");
  552. struct task_desc *waker, *wakee;
  553. if (verbose) {
  554. printf("sched_wakeup event %p\n", evsel);
  555. printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid);
  556. }
  557. waker = register_pid(sched, sample->tid, "<unknown>");
  558. wakee = register_pid(sched, pid, comm);
  559. add_sched_event_wakeup(sched, waker, sample->time, wakee);
  560. return 0;
  561. }
  562. static int replay_switch_event(struct perf_sched *sched,
  563. struct perf_evsel *evsel,
  564. struct perf_sample *sample,
  565. struct machine *machine __maybe_unused)
  566. {
  567. const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"),
  568. *next_comm = perf_evsel__strval(evsel, sample, "next_comm");
  569. const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
  570. next_pid = perf_evsel__intval(evsel, sample, "next_pid");
  571. const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
  572. struct task_desc *prev, __maybe_unused *next;
  573. u64 timestamp0, timestamp = sample->time;
  574. int cpu = sample->cpu;
  575. s64 delta;
  576. if (verbose)
  577. printf("sched_switch event %p\n", evsel);
  578. if (cpu >= MAX_CPUS || cpu < 0)
  579. return 0;
  580. timestamp0 = sched->cpu_last_switched[cpu];
  581. if (timestamp0)
  582. delta = timestamp - timestamp0;
  583. else
  584. delta = 0;
  585. if (delta < 0) {
  586. pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
  587. return -1;
  588. }
  589. pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n",
  590. prev_comm, prev_pid, next_comm, next_pid, delta);
  591. prev = register_pid(sched, prev_pid, prev_comm);
  592. next = register_pid(sched, next_pid, next_comm);
  593. sched->cpu_last_switched[cpu] = timestamp;
  594. add_sched_event_run(sched, prev, timestamp, delta);
  595. add_sched_event_sleep(sched, prev, timestamp, prev_state);
  596. return 0;
  597. }
  598. static int replay_fork_event(struct perf_sched *sched,
  599. union perf_event *event,
  600. struct machine *machine)
  601. {
  602. struct thread *child, *parent;
  603. child = machine__findnew_thread(machine, event->fork.pid,
  604. event->fork.tid);
  605. parent = machine__findnew_thread(machine, event->fork.ppid,
  606. event->fork.ptid);
  607. if (child == NULL || parent == NULL) {
  608. pr_debug("thread does not exist on fork event: child %p, parent %p\n",
  609. child, parent);
  610. return 0;
  611. }
  612. if (verbose) {
  613. printf("fork event\n");
  614. printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid);
  615. printf("... child: %s/%d\n", thread__comm_str(child), child->tid);
  616. }
  617. register_pid(sched, parent->tid, thread__comm_str(parent));
  618. register_pid(sched, child->tid, thread__comm_str(child));
  619. return 0;
  620. }
  621. struct sort_dimension {
  622. const char *name;
  623. sort_fn_t cmp;
  624. struct list_head list;
  625. };
  626. static int
  627. thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r)
  628. {
  629. struct sort_dimension *sort;
  630. int ret = 0;
  631. BUG_ON(list_empty(list));
  632. list_for_each_entry(sort, list, list) {
  633. ret = sort->cmp(l, r);
  634. if (ret)
  635. return ret;
  636. }
  637. return ret;
  638. }
  639. static struct work_atoms *
  640. thread_atoms_search(struct rb_root *root, struct thread *thread,
  641. struct list_head *sort_list)
  642. {
  643. struct rb_node *node = root->rb_node;
  644. struct work_atoms key = { .thread = thread };
  645. while (node) {
  646. struct work_atoms *atoms;
  647. int cmp;
  648. atoms = container_of(node, struct work_atoms, node);
  649. cmp = thread_lat_cmp(sort_list, &key, atoms);
  650. if (cmp > 0)
  651. node = node->rb_left;
  652. else if (cmp < 0)
  653. node = node->rb_right;
  654. else {
  655. BUG_ON(thread != atoms->thread);
  656. return atoms;
  657. }
  658. }
  659. return NULL;
  660. }
  661. static void
  662. __thread_latency_insert(struct rb_root *root, struct work_atoms *data,
  663. struct list_head *sort_list)
  664. {
  665. struct rb_node **new = &(root->rb_node), *parent = NULL;
  666. while (*new) {
  667. struct work_atoms *this;
  668. int cmp;
  669. this = container_of(*new, struct work_atoms, node);
  670. parent = *new;
  671. cmp = thread_lat_cmp(sort_list, data, this);
  672. if (cmp > 0)
  673. new = &((*new)->rb_left);
  674. else
  675. new = &((*new)->rb_right);
  676. }
  677. rb_link_node(&data->node, parent, new);
  678. rb_insert_color(&data->node, root);
  679. }
  680. static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
  681. {
  682. struct work_atoms *atoms = zalloc(sizeof(*atoms));
  683. if (!atoms) {
  684. pr_err("No memory at %s\n", __func__);
  685. return -1;
  686. }
  687. atoms->thread = thread__get(thread);
  688. INIT_LIST_HEAD(&atoms->work_list);
  689. __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid);
  690. return 0;
  691. }
  692. static char sched_out_state(u64 prev_state)
  693. {
  694. const char *str = TASK_STATE_TO_CHAR_STR;
  695. return str[prev_state];
  696. }
  697. static int
  698. add_sched_out_event(struct work_atoms *atoms,
  699. char run_state,
  700. u64 timestamp)
  701. {
  702. struct work_atom *atom = zalloc(sizeof(*atom));
  703. if (!atom) {
  704. pr_err("Non memory at %s", __func__);
  705. return -1;
  706. }
  707. atom->sched_out_time = timestamp;
  708. if (run_state == 'R') {
  709. atom->state = THREAD_WAIT_CPU;
  710. atom->wake_up_time = atom->sched_out_time;
  711. }
  712. list_add_tail(&atom->list, &atoms->work_list);
  713. return 0;
  714. }
  715. static void
  716. add_runtime_event(struct work_atoms *atoms, u64 delta,
  717. u64 timestamp __maybe_unused)
  718. {
  719. struct work_atom *atom;
  720. BUG_ON(list_empty(&atoms->work_list));
  721. atom = list_entry(atoms->work_list.prev, struct work_atom, list);
  722. atom->runtime += delta;
  723. atoms->total_runtime += delta;
  724. }
  725. static void
  726. add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
  727. {
  728. struct work_atom *atom;
  729. u64 delta;
  730. if (list_empty(&atoms->work_list))
  731. return;
  732. atom = list_entry(atoms->work_list.prev, struct work_atom, list);
  733. if (atom->state != THREAD_WAIT_CPU)
  734. return;
  735. if (timestamp < atom->wake_up_time) {
  736. atom->state = THREAD_IGNORE;
  737. return;
  738. }
  739. atom->state = THREAD_SCHED_IN;
  740. atom->sched_in_time = timestamp;
  741. delta = atom->sched_in_time - atom->wake_up_time;
  742. atoms->total_lat += delta;
  743. if (delta > atoms->max_lat) {
  744. atoms->max_lat = delta;
  745. atoms->max_lat_at = timestamp;
  746. }
  747. atoms->nb_atoms++;
  748. }
  749. static int latency_switch_event(struct perf_sched *sched,
  750. struct perf_evsel *evsel,
  751. struct perf_sample *sample,
  752. struct machine *machine)
  753. {
  754. const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
  755. next_pid = perf_evsel__intval(evsel, sample, "next_pid");
  756. const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
  757. struct work_atoms *out_events, *in_events;
  758. struct thread *sched_out, *sched_in;
  759. u64 timestamp0, timestamp = sample->time;
  760. int cpu = sample->cpu;
  761. s64 delta;
  762. BUG_ON(cpu >= MAX_CPUS || cpu < 0);
  763. timestamp0 = sched->cpu_last_switched[cpu];
  764. sched->cpu_last_switched[cpu] = timestamp;
  765. if (timestamp0)
  766. delta = timestamp - timestamp0;
  767. else
  768. delta = 0;
  769. if (delta < 0) {
  770. pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
  771. return -1;
  772. }
  773. sched_out = machine__findnew_thread(machine, -1, prev_pid);
  774. sched_in = machine__findnew_thread(machine, -1, next_pid);
  775. out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
  776. if (!out_events) {
  777. if (thread_atoms_insert(sched, sched_out))
  778. return -1;
  779. out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
  780. if (!out_events) {
  781. pr_err("out-event: Internal tree error");
  782. return -1;
  783. }
  784. }
  785. if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp))
  786. return -1;
  787. in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
  788. if (!in_events) {
  789. if (thread_atoms_insert(sched, sched_in))
  790. return -1;
  791. in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
  792. if (!in_events) {
  793. pr_err("in-event: Internal tree error");
  794. return -1;
  795. }
  796. /*
  797. * Take came in we have not heard about yet,
  798. * add in an initial atom in runnable state:
  799. */
  800. if (add_sched_out_event(in_events, 'R', timestamp))
  801. return -1;
  802. }
  803. add_sched_in_event(in_events, timestamp);
  804. return 0;
  805. }
  806. static int latency_runtime_event(struct perf_sched *sched,
  807. struct perf_evsel *evsel,
  808. struct perf_sample *sample,
  809. struct machine *machine)
  810. {
  811. const u32 pid = perf_evsel__intval(evsel, sample, "pid");
  812. const u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
  813. struct thread *thread = machine__findnew_thread(machine, -1, pid);
  814. struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
  815. u64 timestamp = sample->time;
  816. int cpu = sample->cpu;
  817. BUG_ON(cpu >= MAX_CPUS || cpu < 0);
  818. if (!atoms) {
  819. if (thread_atoms_insert(sched, thread))
  820. return -1;
  821. atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
  822. if (!atoms) {
  823. pr_err("in-event: Internal tree error");
  824. return -1;
  825. }
  826. if (add_sched_out_event(atoms, 'R', timestamp))
  827. return -1;
  828. }
  829. add_runtime_event(atoms, runtime, timestamp);
  830. return 0;
  831. }
  832. static int latency_wakeup_event(struct perf_sched *sched,
  833. struct perf_evsel *evsel,
  834. struct perf_sample *sample,
  835. struct machine *machine)
  836. {
  837. const u32 pid = perf_evsel__intval(evsel, sample, "pid");
  838. struct work_atoms *atoms;
  839. struct work_atom *atom;
  840. struct thread *wakee;
  841. u64 timestamp = sample->time;
  842. wakee = machine__findnew_thread(machine, -1, pid);
  843. atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
  844. if (!atoms) {
  845. if (thread_atoms_insert(sched, wakee))
  846. return -1;
  847. atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
  848. if (!atoms) {
  849. pr_err("wakeup-event: Internal tree error");
  850. return -1;
  851. }
  852. if (add_sched_out_event(atoms, 'S', timestamp))
  853. return -1;
  854. }
  855. BUG_ON(list_empty(&atoms->work_list));
  856. atom = list_entry(atoms->work_list.prev, struct work_atom, list);
  857. /*
  858. * As we do not guarantee the wakeup event happens when
  859. * task is out of run queue, also may happen when task is
  860. * on run queue and wakeup only change ->state to TASK_RUNNING,
  861. * then we should not set the ->wake_up_time when wake up a
  862. * task which is on run queue.
  863. *
  864. * You WILL be missing events if you've recorded only
  865. * one CPU, or are only looking at only one, so don't
  866. * skip in this case.
  867. */
  868. if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
  869. return 0;
  870. sched->nr_timestamps++;
  871. if (atom->sched_out_time > timestamp) {
  872. sched->nr_unordered_timestamps++;
  873. return 0;
  874. }
  875. atom->state = THREAD_WAIT_CPU;
  876. atom->wake_up_time = timestamp;
  877. return 0;
  878. }
  879. static int latency_migrate_task_event(struct perf_sched *sched,
  880. struct perf_evsel *evsel,
  881. struct perf_sample *sample,
  882. struct machine *machine)
  883. {
  884. const u32 pid = perf_evsel__intval(evsel, sample, "pid");
  885. u64 timestamp = sample->time;
  886. struct work_atoms *atoms;
  887. struct work_atom *atom;
  888. struct thread *migrant;
  889. /*
  890. * Only need to worry about migration when profiling one CPU.
  891. */
  892. if (sched->profile_cpu == -1)
  893. return 0;
  894. migrant = machine__findnew_thread(machine, -1, pid);
  895. atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
  896. if (!atoms) {
  897. if (thread_atoms_insert(sched, migrant))
  898. return -1;
  899. register_pid(sched, migrant->tid, thread__comm_str(migrant));
  900. atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
  901. if (!atoms) {
  902. pr_err("migration-event: Internal tree error");
  903. return -1;
  904. }
  905. if (add_sched_out_event(atoms, 'R', timestamp))
  906. return -1;
  907. }
  908. BUG_ON(list_empty(&atoms->work_list));
  909. atom = list_entry(atoms->work_list.prev, struct work_atom, list);
  910. atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp;
  911. sched->nr_timestamps++;
  912. if (atom->sched_out_time > timestamp)
  913. sched->nr_unordered_timestamps++;
  914. return 0;
  915. }
  916. static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list)
  917. {
  918. int i;
  919. int ret;
  920. u64 avg;
  921. if (!work_list->nb_atoms)
  922. return;
  923. /*
  924. * Ignore idle threads:
  925. */
  926. if (!strcmp(thread__comm_str(work_list->thread), "swapper"))
  927. return;
  928. sched->all_runtime += work_list->total_runtime;
  929. sched->all_count += work_list->nb_atoms;
  930. ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
  931. for (i = 0; i < 24 - ret; i++)
  932. printf(" ");
  933. avg = work_list->total_lat / work_list->nb_atoms;
  934. printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13.6f s\n",
  935. (double)work_list->total_runtime / 1e6,
  936. work_list->nb_atoms, (double)avg / 1e6,
  937. (double)work_list->max_lat / 1e6,
  938. (double)work_list->max_lat_at / 1e9);
  939. }
  940. static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
  941. {
  942. if (l->thread->tid < r->thread->tid)
  943. return -1;
  944. if (l->thread->tid > r->thread->tid)
  945. return 1;
  946. return 0;
  947. }
  948. static int avg_cmp(struct work_atoms *l, struct work_atoms *r)
  949. {
  950. u64 avgl, avgr;
  951. if (!l->nb_atoms)
  952. return -1;
  953. if (!r->nb_atoms)
  954. return 1;
  955. avgl = l->total_lat / l->nb_atoms;
  956. avgr = r->total_lat / r->nb_atoms;
  957. if (avgl < avgr)
  958. return -1;
  959. if (avgl > avgr)
  960. return 1;
  961. return 0;
  962. }
  963. static int max_cmp(struct work_atoms *l, struct work_atoms *r)
  964. {
  965. if (l->max_lat < r->max_lat)
  966. return -1;
  967. if (l->max_lat > r->max_lat)
  968. return 1;
  969. return 0;
  970. }
  971. static int switch_cmp(struct work_atoms *l, struct work_atoms *r)
  972. {
  973. if (l->nb_atoms < r->nb_atoms)
  974. return -1;
  975. if (l->nb_atoms > r->nb_atoms)
  976. return 1;
  977. return 0;
  978. }
  979. static int runtime_cmp(struct work_atoms *l, struct work_atoms *r)
  980. {
  981. if (l->total_runtime < r->total_runtime)
  982. return -1;
  983. if (l->total_runtime > r->total_runtime)
  984. return 1;
  985. return 0;
  986. }
  987. static int sort_dimension__add(const char *tok, struct list_head *list)
  988. {
  989. size_t i;
  990. static struct sort_dimension avg_sort_dimension = {
  991. .name = "avg",
  992. .cmp = avg_cmp,
  993. };
  994. static struct sort_dimension max_sort_dimension = {
  995. .name = "max",
  996. .cmp = max_cmp,
  997. };
  998. static struct sort_dimension pid_sort_dimension = {
  999. .name = "pid",
  1000. .cmp = pid_cmp,
  1001. };
  1002. static struct sort_dimension runtime_sort_dimension = {
  1003. .name = "runtime",
  1004. .cmp = runtime_cmp,
  1005. };
  1006. static struct sort_dimension switch_sort_dimension = {
  1007. .name = "switch",
  1008. .cmp = switch_cmp,
  1009. };
  1010. struct sort_dimension *available_sorts[] = {
  1011. &pid_sort_dimension,
  1012. &avg_sort_dimension,
  1013. &max_sort_dimension,
  1014. &switch_sort_dimension,
  1015. &runtime_sort_dimension,
  1016. };
  1017. for (i = 0; i < ARRAY_SIZE(available_sorts); i++) {
  1018. if (!strcmp(available_sorts[i]->name, tok)) {
  1019. list_add_tail(&available_sorts[i]->list, list);
  1020. return 0;
  1021. }
  1022. }
  1023. return -1;
  1024. }
  1025. static void perf_sched__sort_lat(struct perf_sched *sched)
  1026. {
  1027. struct rb_node *node;
  1028. for (;;) {
  1029. struct work_atoms *data;
  1030. node = rb_first(&sched->atom_root);
  1031. if (!node)
  1032. break;
  1033. rb_erase(node, &sched->atom_root);
  1034. data = rb_entry(node, struct work_atoms, node);
  1035. __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
  1036. }
  1037. }
  1038. static int process_sched_wakeup_event(struct perf_tool *tool,
  1039. struct perf_evsel *evsel,
  1040. struct perf_sample *sample,
  1041. struct machine *machine)
  1042. {
  1043. struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
  1044. if (sched->tp_handler->wakeup_event)
  1045. return sched->tp_handler->wakeup_event(sched, evsel, sample, machine);
  1046. return 0;
  1047. }
  1048. static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
  1049. struct perf_sample *sample, struct machine *machine)
  1050. {
  1051. const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
  1052. struct thread *sched_in;
  1053. int new_shortname;
  1054. u64 timestamp0, timestamp = sample->time;
  1055. s64 delta;
  1056. int cpu, this_cpu = sample->cpu;
  1057. BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
  1058. if (this_cpu > sched->max_cpu)
  1059. sched->max_cpu = this_cpu;
  1060. timestamp0 = sched->cpu_last_switched[this_cpu];
  1061. sched->cpu_last_switched[this_cpu] = timestamp;
  1062. if (timestamp0)
  1063. delta = timestamp - timestamp0;
  1064. else
  1065. delta = 0;
  1066. if (delta < 0) {
  1067. pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
  1068. return -1;
  1069. }
  1070. sched_in = machine__findnew_thread(machine, -1, next_pid);
  1071. sched->curr_thread[this_cpu] = sched_in;
  1072. printf(" ");
  1073. new_shortname = 0;
  1074. if (!sched_in->shortname[0]) {
  1075. if (!strcmp(thread__comm_str(sched_in), "swapper")) {
  1076. /*
  1077. * Don't allocate a letter-number for swapper:0
  1078. * as a shortname. Instead, we use '.' for it.
  1079. */
  1080. sched_in->shortname[0] = '.';
  1081. sched_in->shortname[1] = ' ';
  1082. } else {
  1083. sched_in->shortname[0] = sched->next_shortname1;
  1084. sched_in->shortname[1] = sched->next_shortname2;
  1085. if (sched->next_shortname1 < 'Z') {
  1086. sched->next_shortname1++;
  1087. } else {
  1088. sched->next_shortname1 = 'A';
  1089. if (sched->next_shortname2 < '9')
  1090. sched->next_shortname2++;
  1091. else
  1092. sched->next_shortname2 = '0';
  1093. }
  1094. }
  1095. new_shortname = 1;
  1096. }
  1097. for (cpu = 0; cpu <= sched->max_cpu; cpu++) {
  1098. if (cpu != this_cpu)
  1099. printf(" ");
  1100. else
  1101. printf("*");
  1102. if (sched->curr_thread[cpu])
  1103. printf("%2s ", sched->curr_thread[cpu]->shortname);
  1104. else
  1105. printf(" ");
  1106. }
  1107. printf(" %12.6f secs ", (double)timestamp/1e9);
  1108. if (new_shortname) {
  1109. printf("%s => %s:%d\n",
  1110. sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
  1111. } else {
  1112. printf("\n");
  1113. }
  1114. return 0;
  1115. }
  1116. static int process_sched_switch_event(struct perf_tool *tool,
  1117. struct perf_evsel *evsel,
  1118. struct perf_sample *sample,
  1119. struct machine *machine)
  1120. {
  1121. struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
  1122. int this_cpu = sample->cpu, err = 0;
  1123. u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
  1124. next_pid = perf_evsel__intval(evsel, sample, "next_pid");
  1125. if (sched->curr_pid[this_cpu] != (u32)-1) {
  1126. /*
  1127. * Are we trying to switch away a PID that is
  1128. * not current?
  1129. */
  1130. if (sched->curr_pid[this_cpu] != prev_pid)
  1131. sched->nr_context_switch_bugs++;
  1132. }
  1133. if (sched->tp_handler->switch_event)
  1134. err = sched->tp_handler->switch_event(sched, evsel, sample, machine);
  1135. sched->curr_pid[this_cpu] = next_pid;
  1136. return err;
  1137. }
  1138. static int process_sched_runtime_event(struct perf_tool *tool,
  1139. struct perf_evsel *evsel,
  1140. struct perf_sample *sample,
  1141. struct machine *machine)
  1142. {
  1143. struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
  1144. if (sched->tp_handler->runtime_event)
  1145. return sched->tp_handler->runtime_event(sched, evsel, sample, machine);
  1146. return 0;
  1147. }
  1148. static int perf_sched__process_fork_event(struct perf_tool *tool,
  1149. union perf_event *event,
  1150. struct perf_sample *sample,
  1151. struct machine *machine)
  1152. {
  1153. struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
  1154. /* run the fork event through the perf machineruy */
  1155. perf_event__process_fork(tool, event, sample, machine);
  1156. /* and then run additional processing needed for this command */
  1157. if (sched->tp_handler->fork_event)
  1158. return sched->tp_handler->fork_event(sched, event, machine);
  1159. return 0;
  1160. }
  1161. static int process_sched_migrate_task_event(struct perf_tool *tool,
  1162. struct perf_evsel *evsel,
  1163. struct perf_sample *sample,
  1164. struct machine *machine)
  1165. {
  1166. struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
  1167. if (sched->tp_handler->migrate_task_event)
  1168. return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine);
  1169. return 0;
  1170. }
  1171. typedef int (*tracepoint_handler)(struct perf_tool *tool,
  1172. struct perf_evsel *evsel,
  1173. struct perf_sample *sample,
  1174. struct machine *machine);
  1175. static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused,
  1176. union perf_event *event __maybe_unused,
  1177. struct perf_sample *sample,
  1178. struct perf_evsel *evsel,
  1179. struct machine *machine)
  1180. {
  1181. int err = 0;
  1182. if (evsel->handler != NULL) {
  1183. tracepoint_handler f = evsel->handler;
  1184. err = f(tool, evsel, sample, machine);
  1185. }
  1186. return err;
  1187. }
  1188. static int perf_sched__read_events(struct perf_sched *sched)
  1189. {
  1190. const struct perf_evsel_str_handler handlers[] = {
  1191. { "sched:sched_switch", process_sched_switch_event, },
  1192. { "sched:sched_stat_runtime", process_sched_runtime_event, },
  1193. { "sched:sched_wakeup", process_sched_wakeup_event, },
  1194. { "sched:sched_wakeup_new", process_sched_wakeup_event, },
  1195. { "sched:sched_migrate_task", process_sched_migrate_task_event, },
  1196. };
  1197. struct perf_session *session;
  1198. struct perf_data_file file = {
  1199. .path = input_name,
  1200. .mode = PERF_DATA_MODE_READ,
  1201. };
  1202. int rc = -1;
  1203. session = perf_session__new(&file, false, &sched->tool);
  1204. if (session == NULL) {
  1205. pr_debug("No Memory for session\n");
  1206. return -1;
  1207. }
  1208. symbol__init(&session->header.env);
  1209. if (perf_session__set_tracepoints_handlers(session, handlers))
  1210. goto out_delete;
  1211. if (perf_session__has_traces(session, "record -R")) {
  1212. int err = perf_session__process_events(session);
  1213. if (err) {
  1214. pr_err("Failed to process events, error %d", err);
  1215. goto out_delete;
  1216. }
  1217. sched->nr_events = session->evlist->stats.nr_events[0];
  1218. sched->nr_lost_events = session->evlist->stats.total_lost;
  1219. sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST];
  1220. }
  1221. rc = 0;
  1222. out_delete:
  1223. perf_session__delete(session);
  1224. return rc;
  1225. }
  1226. static void print_bad_events(struct perf_sched *sched)
  1227. {
  1228. if (sched->nr_unordered_timestamps && sched->nr_timestamps) {
  1229. printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
  1230. (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0,
  1231. sched->nr_unordered_timestamps, sched->nr_timestamps);
  1232. }
  1233. if (sched->nr_lost_events && sched->nr_events) {
  1234. printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
  1235. (double)sched->nr_lost_events/(double)sched->nr_events * 100.0,
  1236. sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks);
  1237. }
  1238. if (sched->nr_context_switch_bugs && sched->nr_timestamps) {
  1239. printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
  1240. (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0,
  1241. sched->nr_context_switch_bugs, sched->nr_timestamps);
  1242. if (sched->nr_lost_events)
  1243. printf(" (due to lost events?)");
  1244. printf("\n");
  1245. }
  1246. }
  1247. static int perf_sched__lat(struct perf_sched *sched)
  1248. {
  1249. struct rb_node *next;
  1250. setup_pager();
  1251. if (perf_sched__read_events(sched))
  1252. return -1;
  1253. perf_sched__sort_lat(sched);
  1254. printf("\n -----------------------------------------------------------------------------------------------------------------\n");
  1255. printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
  1256. printf(" -----------------------------------------------------------------------------------------------------------------\n");
  1257. next = rb_first(&sched->sorted_atom_root);
  1258. while (next) {
  1259. struct work_atoms *work_list;
  1260. work_list = rb_entry(next, struct work_atoms, node);
  1261. output_lat_thread(sched, work_list);
  1262. next = rb_next(next);
  1263. thread__zput(work_list->thread);
  1264. }
  1265. printf(" -----------------------------------------------------------------------------------------------------------------\n");
  1266. printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n",
  1267. (double)sched->all_runtime / 1e6, sched->all_count);
  1268. printf(" ---------------------------------------------------\n");
  1269. print_bad_events(sched);
  1270. printf("\n");
  1271. return 0;
  1272. }
  1273. static int perf_sched__map(struct perf_sched *sched)
  1274. {
  1275. sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
  1276. setup_pager();
  1277. if (perf_sched__read_events(sched))
  1278. return -1;
  1279. print_bad_events(sched);
  1280. return 0;
  1281. }
  1282. static int perf_sched__replay(struct perf_sched *sched)
  1283. {
  1284. unsigned long i;
  1285. calibrate_run_measurement_overhead(sched);
  1286. calibrate_sleep_measurement_overhead(sched);
  1287. test_calibrations(sched);
  1288. if (perf_sched__read_events(sched))
  1289. return -1;
  1290. printf("nr_run_events: %ld\n", sched->nr_run_events);
  1291. printf("nr_sleep_events: %ld\n", sched->nr_sleep_events);
  1292. printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events);
  1293. if (sched->targetless_wakeups)
  1294. printf("target-less wakeups: %ld\n", sched->targetless_wakeups);
  1295. if (sched->multitarget_wakeups)
  1296. printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups);
  1297. if (sched->nr_run_events_optimized)
  1298. printf("run atoms optimized: %ld\n",
  1299. sched->nr_run_events_optimized);
  1300. print_task_traces(sched);
  1301. add_cross_task_wakeups(sched);
  1302. create_tasks(sched);
  1303. printf("------------------------------------------------------------\n");
  1304. for (i = 0; i < sched->replay_repeat; i++)
  1305. run_one_test(sched);
  1306. return 0;
  1307. }
  1308. static void setup_sorting(struct perf_sched *sched, const struct option *options,
  1309. const char * const usage_msg[])
  1310. {
  1311. char *tmp, *tok, *str = strdup(sched->sort_order);
  1312. for (tok = strtok_r(str, ", ", &tmp);
  1313. tok; tok = strtok_r(NULL, ", ", &tmp)) {
  1314. if (sort_dimension__add(tok, &sched->sort_list) < 0) {
  1315. error("Unknown --sort key: `%s'", tok);
  1316. usage_with_options(usage_msg, options);
  1317. }
  1318. }
  1319. free(str);
  1320. sort_dimension__add("pid", &sched->cmp_pid);
  1321. }
  1322. static int __cmd_record(int argc, const char **argv)
  1323. {
  1324. unsigned int rec_argc, i, j;
  1325. const char **rec_argv;
  1326. const char * const record_args[] = {
  1327. "record",
  1328. "-a",
  1329. "-R",
  1330. "-m", "1024",
  1331. "-c", "1",
  1332. "-e", "sched:sched_switch",
  1333. "-e", "sched:sched_stat_wait",
  1334. "-e", "sched:sched_stat_sleep",
  1335. "-e", "sched:sched_stat_iowait",
  1336. "-e", "sched:sched_stat_runtime",
  1337. "-e", "sched:sched_process_fork",
  1338. "-e", "sched:sched_wakeup",
  1339. "-e", "sched:sched_wakeup_new",
  1340. "-e", "sched:sched_migrate_task",
  1341. };
  1342. rec_argc = ARRAY_SIZE(record_args) + argc - 1;
  1343. rec_argv = calloc(rec_argc + 1, sizeof(char *));
  1344. if (rec_argv == NULL)
  1345. return -ENOMEM;
  1346. for (i = 0; i < ARRAY_SIZE(record_args); i++)
  1347. rec_argv[i] = strdup(record_args[i]);
  1348. for (j = 1; j < (unsigned int)argc; j++, i++)
  1349. rec_argv[i] = argv[j];
  1350. BUG_ON(i != rec_argc);
  1351. return cmd_record(i, rec_argv, NULL);
  1352. }
  1353. int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
  1354. {
  1355. const char default_sort_order[] = "avg, max, switch, runtime";
  1356. struct perf_sched sched = {
  1357. .tool = {
  1358. .sample = perf_sched__process_tracepoint_sample,
  1359. .comm = perf_event__process_comm,
  1360. .lost = perf_event__process_lost,
  1361. .fork = perf_sched__process_fork_event,
  1362. .ordered_events = true,
  1363. },
  1364. .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid),
  1365. .sort_list = LIST_HEAD_INIT(sched.sort_list),
  1366. .start_work_mutex = PTHREAD_MUTEX_INITIALIZER,
  1367. .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER,
  1368. .sort_order = default_sort_order,
  1369. .replay_repeat = 10,
  1370. .profile_cpu = -1,
  1371. .next_shortname1 = 'A',
  1372. .next_shortname2 = '0',
  1373. };
  1374. const struct option latency_options[] = {
  1375. OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
  1376. "sort by key(s): runtime, switch, avg, max"),
  1377. OPT_INCR('v', "verbose", &verbose,
  1378. "be more verbose (show symbol address, etc)"),
  1379. OPT_INTEGER('C', "CPU", &sched.profile_cpu,
  1380. "CPU to profile on"),
  1381. OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
  1382. "dump raw trace in ASCII"),
  1383. OPT_END()
  1384. };
  1385. const struct option replay_options[] = {
  1386. OPT_UINTEGER('r', "repeat", &sched.replay_repeat,
  1387. "repeat the workload replay N times (-1: infinite)"),
  1388. OPT_INCR('v', "verbose", &verbose,
  1389. "be more verbose (show symbol address, etc)"),
  1390. OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
  1391. "dump raw trace in ASCII"),
  1392. OPT_END()
  1393. };
  1394. const struct option sched_options[] = {
  1395. OPT_STRING('i', "input", &input_name, "file",
  1396. "input file name"),
  1397. OPT_INCR('v', "verbose", &verbose,
  1398. "be more verbose (show symbol address, etc)"),
  1399. OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
  1400. "dump raw trace in ASCII"),
  1401. OPT_END()
  1402. };
  1403. const char * const latency_usage[] = {
  1404. "perf sched latency [<options>]",
  1405. NULL
  1406. };
  1407. const char * const replay_usage[] = {
  1408. "perf sched replay [<options>]",
  1409. NULL
  1410. };
  1411. const char *const sched_subcommands[] = { "record", "latency", "map",
  1412. "replay", "script", NULL };
  1413. const char *sched_usage[] = {
  1414. NULL,
  1415. NULL
  1416. };
  1417. struct trace_sched_handler lat_ops = {
  1418. .wakeup_event = latency_wakeup_event,
  1419. .switch_event = latency_switch_event,
  1420. .runtime_event = latency_runtime_event,
  1421. .migrate_task_event = latency_migrate_task_event,
  1422. };
  1423. struct trace_sched_handler map_ops = {
  1424. .switch_event = map_switch_event,
  1425. };
  1426. struct trace_sched_handler replay_ops = {
  1427. .wakeup_event = replay_wakeup_event,
  1428. .switch_event = replay_switch_event,
  1429. .fork_event = replay_fork_event,
  1430. };
  1431. unsigned int i;
  1432. for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
  1433. sched.curr_pid[i] = -1;
  1434. argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands,
  1435. sched_usage, PARSE_OPT_STOP_AT_NON_OPTION);
  1436. if (!argc)
  1437. usage_with_options(sched_usage, sched_options);
  1438. /*
  1439. * Aliased to 'perf script' for now:
  1440. */
  1441. if (!strcmp(argv[0], "script"))
  1442. return cmd_script(argc, argv, prefix);
  1443. if (!strncmp(argv[0], "rec", 3)) {
  1444. return __cmd_record(argc, argv);
  1445. } else if (!strncmp(argv[0], "lat", 3)) {
  1446. sched.tp_handler = &lat_ops;
  1447. if (argc > 1) {
  1448. argc = parse_options(argc, argv, latency_options, latency_usage, 0);
  1449. if (argc)
  1450. usage_with_options(latency_usage, latency_options);
  1451. }
  1452. setup_sorting(&sched, latency_options, latency_usage);
  1453. return perf_sched__lat(&sched);
  1454. } else if (!strcmp(argv[0], "map")) {
  1455. sched.tp_handler = &map_ops;
  1456. setup_sorting(&sched, latency_options, latency_usage);
  1457. return perf_sched__map(&sched);
  1458. } else if (!strncmp(argv[0], "rep", 3)) {
  1459. sched.tp_handler = &replay_ops;
  1460. if (argc) {
  1461. argc = parse_options(argc, argv, replay_options, replay_usage, 0);
  1462. if (argc)
  1463. usage_with_options(replay_usage, replay_options);
  1464. }
  1465. return perf_sched__replay(&sched);
  1466. } else {
  1467. usage_with_options(sched_usage, sched_options);
  1468. }
  1469. return 0;
  1470. }