perf_event_intel_rapl.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714
  1. /*
  2. * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
  3. * Copyright (C) 2013 Google, Inc., Stephane Eranian
  4. *
  5. * Intel RAPL interface is specified in the IA-32 Manual Vol3b
  6. * section 14.7.1 (September 2013)
  7. *
  8. * RAPL provides more controls than just reporting energy consumption
  9. * however here we only expose the 3 energy consumption free running
  10. * counters (pp0, pkg, dram).
  11. *
  12. * Each of those counters increments in a power unit defined by the
  13. * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules
  14. * but it can vary.
  15. *
  16. * Counter to rapl events mappings:
  17. *
  18. * pp0 counter: consumption of all physical cores (power plane 0)
  19. * event: rapl_energy_cores
  20. * perf code: 0x1
  21. *
  22. * pkg counter: consumption of the whole processor package
  23. * event: rapl_energy_pkg
  24. * perf code: 0x2
  25. *
  26. * dram counter: consumption of the dram domain (servers only)
  27. * event: rapl_energy_dram
  28. * perf code: 0x3
  29. *
  30. * dram counter: consumption of the builtin-gpu domain (client only)
  31. * event: rapl_energy_gpu
  32. * perf code: 0x4
  33. *
  34. * We manage those counters as free running (read-only). They may be
  35. * use simultaneously by other tools, such as turbostat.
  36. *
  37. * The events only support system-wide mode counting. There is no
  38. * sampling support because it does not make sense and is not
  39. * supported by the RAPL hardware.
  40. *
  41. * Because we want to avoid floating-point operations in the kernel,
  42. * the events are all reported in fixed point arithmetic (32.32).
  43. * Tools must adjust the counts to convert them to Watts using
  44. * the duration of the measurement. Tools may use a function such as
  45. * ldexp(raw_count, -32);
  46. */
  47. #include <linux/module.h>
  48. #include <linux/slab.h>
  49. #include <linux/perf_event.h>
  50. #include <asm/cpu_device_id.h>
  51. #include "perf_event.h"
  52. /*
  53. * RAPL energy status counters
  54. */
  55. #define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */
  56. #define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */
  57. #define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */
  58. #define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */
  59. #define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */
  60. #define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
  61. #define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
  62. #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
  63. /* Clients have PP0, PKG */
  64. #define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
  65. 1<<RAPL_IDX_PKG_NRG_STAT|\
  66. 1<<RAPL_IDX_PP1_NRG_STAT)
  67. /* Servers have PP0, PKG, RAM */
  68. #define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\
  69. 1<<RAPL_IDX_PKG_NRG_STAT|\
  70. 1<<RAPL_IDX_RAM_NRG_STAT)
  71. /* Servers have PP0, PKG, RAM, PP1 */
  72. #define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\
  73. 1<<RAPL_IDX_PKG_NRG_STAT|\
  74. 1<<RAPL_IDX_RAM_NRG_STAT|\
  75. 1<<RAPL_IDX_PP1_NRG_STAT)
  76. /*
  77. * event code: LSB 8 bits, passed in attr->config
  78. * any other bit is reserved
  79. */
  80. #define RAPL_EVENT_MASK 0xFFULL
  81. #define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \
  82. static ssize_t __rapl_##_var##_show(struct kobject *kobj, \
  83. struct kobj_attribute *attr, \
  84. char *page) \
  85. { \
  86. BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
  87. return sprintf(page, _format "\n"); \
  88. } \
  89. static struct kobj_attribute format_attr_##_var = \
  90. __ATTR(_name, 0444, __rapl_##_var##_show, NULL)
  91. #define RAPL_EVENT_DESC(_name, _config) \
  92. { \
  93. .attr = __ATTR(_name, 0444, rapl_event_show, NULL), \
  94. .config = _config, \
  95. }
  96. #define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
  97. struct rapl_pmu {
  98. spinlock_t lock;
  99. int hw_unit; /* 1/2^hw_unit Joule */
  100. int n_active; /* number of active events */
  101. struct list_head active_list;
  102. struct pmu *pmu; /* pointer to rapl_pmu_class */
  103. ktime_t timer_interval; /* in ktime_t unit */
  104. struct hrtimer hrtimer;
  105. };
  106. static struct pmu rapl_pmu_class;
  107. static cpumask_t rapl_cpu_mask;
  108. static int rapl_cntr_mask;
  109. static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
  110. static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
  111. static inline u64 rapl_read_counter(struct perf_event *event)
  112. {
  113. u64 raw;
  114. rdmsrl(event->hw.event_base, raw);
  115. return raw;
  116. }
  117. static inline u64 rapl_scale(u64 v)
  118. {
  119. /*
  120. * scale delta to smallest unit (1/2^32)
  121. * users must then scale back: count * 1/(1e9*2^32) to get Joules
  122. * or use ldexp(count, -32).
  123. * Watts = Joules/Time delta
  124. */
  125. return v << (32 - __this_cpu_read(rapl_pmu->hw_unit));
  126. }
  127. static u64 rapl_event_update(struct perf_event *event)
  128. {
  129. struct hw_perf_event *hwc = &event->hw;
  130. u64 prev_raw_count, new_raw_count;
  131. s64 delta, sdelta;
  132. int shift = RAPL_CNTR_WIDTH;
  133. again:
  134. prev_raw_count = local64_read(&hwc->prev_count);
  135. rdmsrl(event->hw.event_base, new_raw_count);
  136. if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
  137. new_raw_count) != prev_raw_count) {
  138. cpu_relax();
  139. goto again;
  140. }
  141. /*
  142. * Now we have the new raw value and have updated the prev
  143. * timestamp already. We can now calculate the elapsed delta
  144. * (event-)time and add that to the generic event.
  145. *
  146. * Careful, not all hw sign-extends above the physical width
  147. * of the count.
  148. */
  149. delta = (new_raw_count << shift) - (prev_raw_count << shift);
  150. delta >>= shift;
  151. sdelta = rapl_scale(delta);
  152. local64_add(sdelta, &event->count);
  153. return new_raw_count;
  154. }
  155. static void rapl_start_hrtimer(struct rapl_pmu *pmu)
  156. {
  157. __hrtimer_start_range_ns(&pmu->hrtimer,
  158. pmu->timer_interval, 0,
  159. HRTIMER_MODE_REL_PINNED, 0);
  160. }
  161. static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
  162. {
  163. hrtimer_cancel(&pmu->hrtimer);
  164. }
  165. static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
  166. {
  167. struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
  168. struct perf_event *event;
  169. unsigned long flags;
  170. if (!pmu->n_active)
  171. return HRTIMER_NORESTART;
  172. spin_lock_irqsave(&pmu->lock, flags);
  173. list_for_each_entry(event, &pmu->active_list, active_entry) {
  174. rapl_event_update(event);
  175. }
  176. spin_unlock_irqrestore(&pmu->lock, flags);
  177. hrtimer_forward_now(hrtimer, pmu->timer_interval);
  178. return HRTIMER_RESTART;
  179. }
  180. static void rapl_hrtimer_init(struct rapl_pmu *pmu)
  181. {
  182. struct hrtimer *hr = &pmu->hrtimer;
  183. hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  184. hr->function = rapl_hrtimer_handle;
  185. }
  186. static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
  187. struct perf_event *event)
  188. {
  189. if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
  190. return;
  191. event->hw.state = 0;
  192. list_add_tail(&event->active_entry, &pmu->active_list);
  193. local64_set(&event->hw.prev_count, rapl_read_counter(event));
  194. pmu->n_active++;
  195. if (pmu->n_active == 1)
  196. rapl_start_hrtimer(pmu);
  197. }
  198. static void rapl_pmu_event_start(struct perf_event *event, int mode)
  199. {
  200. struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
  201. unsigned long flags;
  202. spin_lock_irqsave(&pmu->lock, flags);
  203. __rapl_pmu_event_start(pmu, event);
  204. spin_unlock_irqrestore(&pmu->lock, flags);
  205. }
  206. static void rapl_pmu_event_stop(struct perf_event *event, int mode)
  207. {
  208. struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
  209. struct hw_perf_event *hwc = &event->hw;
  210. unsigned long flags;
  211. spin_lock_irqsave(&pmu->lock, flags);
  212. /* mark event as deactivated and stopped */
  213. if (!(hwc->state & PERF_HES_STOPPED)) {
  214. WARN_ON_ONCE(pmu->n_active <= 0);
  215. pmu->n_active--;
  216. if (pmu->n_active == 0)
  217. rapl_stop_hrtimer(pmu);
  218. list_del(&event->active_entry);
  219. WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
  220. hwc->state |= PERF_HES_STOPPED;
  221. }
  222. /* check if update of sw counter is necessary */
  223. if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
  224. /*
  225. * Drain the remaining delta count out of a event
  226. * that we are disabling:
  227. */
  228. rapl_event_update(event);
  229. hwc->state |= PERF_HES_UPTODATE;
  230. }
  231. spin_unlock_irqrestore(&pmu->lock, flags);
  232. }
  233. static int rapl_pmu_event_add(struct perf_event *event, int mode)
  234. {
  235. struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
  236. struct hw_perf_event *hwc = &event->hw;
  237. unsigned long flags;
  238. spin_lock_irqsave(&pmu->lock, flags);
  239. hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
  240. if (mode & PERF_EF_START)
  241. __rapl_pmu_event_start(pmu, event);
  242. spin_unlock_irqrestore(&pmu->lock, flags);
  243. return 0;
  244. }
  245. static void rapl_pmu_event_del(struct perf_event *event, int flags)
  246. {
  247. rapl_pmu_event_stop(event, PERF_EF_UPDATE);
  248. }
  249. static int rapl_pmu_event_init(struct perf_event *event)
  250. {
  251. u64 cfg = event->attr.config & RAPL_EVENT_MASK;
  252. int bit, msr, ret = 0;
  253. /* only look at RAPL events */
  254. if (event->attr.type != rapl_pmu_class.type)
  255. return -ENOENT;
  256. /* check only supported bits are set */
  257. if (event->attr.config & ~RAPL_EVENT_MASK)
  258. return -EINVAL;
  259. /*
  260. * check event is known (determines counter)
  261. */
  262. switch (cfg) {
  263. case INTEL_RAPL_PP0:
  264. bit = RAPL_IDX_PP0_NRG_STAT;
  265. msr = MSR_PP0_ENERGY_STATUS;
  266. break;
  267. case INTEL_RAPL_PKG:
  268. bit = RAPL_IDX_PKG_NRG_STAT;
  269. msr = MSR_PKG_ENERGY_STATUS;
  270. break;
  271. case INTEL_RAPL_RAM:
  272. bit = RAPL_IDX_RAM_NRG_STAT;
  273. msr = MSR_DRAM_ENERGY_STATUS;
  274. break;
  275. case INTEL_RAPL_PP1:
  276. bit = RAPL_IDX_PP1_NRG_STAT;
  277. msr = MSR_PP1_ENERGY_STATUS;
  278. break;
  279. default:
  280. return -EINVAL;
  281. }
  282. /* check event supported */
  283. if (!(rapl_cntr_mask & (1 << bit)))
  284. return -EINVAL;
  285. /* unsupported modes and filters */
  286. if (event->attr.exclude_user ||
  287. event->attr.exclude_kernel ||
  288. event->attr.exclude_hv ||
  289. event->attr.exclude_idle ||
  290. event->attr.exclude_host ||
  291. event->attr.exclude_guest ||
  292. event->attr.sample_period) /* no sampling */
  293. return -EINVAL;
  294. /* must be done before validate_group */
  295. event->hw.event_base = msr;
  296. event->hw.config = cfg;
  297. event->hw.idx = bit;
  298. return ret;
  299. }
  300. static void rapl_pmu_event_read(struct perf_event *event)
  301. {
  302. rapl_event_update(event);
  303. }
  304. static ssize_t rapl_get_attr_cpumask(struct device *dev,
  305. struct device_attribute *attr, char *buf)
  306. {
  307. int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &rapl_cpu_mask);
  308. buf[n++] = '\n';
  309. buf[n] = '\0';
  310. return n;
  311. }
  312. static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
  313. static struct attribute *rapl_pmu_attrs[] = {
  314. &dev_attr_cpumask.attr,
  315. NULL,
  316. };
  317. static struct attribute_group rapl_pmu_attr_group = {
  318. .attrs = rapl_pmu_attrs,
  319. };
  320. EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
  321. EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
  322. EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
  323. EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
  324. EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
  325. EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
  326. EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
  327. EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
  328. /*
  329. * we compute in 0.23 nJ increments regardless of MSR
  330. */
  331. EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
  332. EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
  333. EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
  334. EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
  335. static struct attribute *rapl_events_srv_attr[] = {
  336. EVENT_PTR(rapl_cores),
  337. EVENT_PTR(rapl_pkg),
  338. EVENT_PTR(rapl_ram),
  339. EVENT_PTR(rapl_cores_unit),
  340. EVENT_PTR(rapl_pkg_unit),
  341. EVENT_PTR(rapl_ram_unit),
  342. EVENT_PTR(rapl_cores_scale),
  343. EVENT_PTR(rapl_pkg_scale),
  344. EVENT_PTR(rapl_ram_scale),
  345. NULL,
  346. };
  347. static struct attribute *rapl_events_cln_attr[] = {
  348. EVENT_PTR(rapl_cores),
  349. EVENT_PTR(rapl_pkg),
  350. EVENT_PTR(rapl_gpu),
  351. EVENT_PTR(rapl_cores_unit),
  352. EVENT_PTR(rapl_pkg_unit),
  353. EVENT_PTR(rapl_gpu_unit),
  354. EVENT_PTR(rapl_cores_scale),
  355. EVENT_PTR(rapl_pkg_scale),
  356. EVENT_PTR(rapl_gpu_scale),
  357. NULL,
  358. };
  359. static struct attribute *rapl_events_hsw_attr[] = {
  360. EVENT_PTR(rapl_cores),
  361. EVENT_PTR(rapl_pkg),
  362. EVENT_PTR(rapl_gpu),
  363. EVENT_PTR(rapl_ram),
  364. EVENT_PTR(rapl_cores_unit),
  365. EVENT_PTR(rapl_pkg_unit),
  366. EVENT_PTR(rapl_gpu_unit),
  367. EVENT_PTR(rapl_ram_unit),
  368. EVENT_PTR(rapl_cores_scale),
  369. EVENT_PTR(rapl_pkg_scale),
  370. EVENT_PTR(rapl_gpu_scale),
  371. EVENT_PTR(rapl_ram_scale),
  372. NULL,
  373. };
  374. static struct attribute_group rapl_pmu_events_group = {
  375. .name = "events",
  376. .attrs = NULL, /* patched at runtime */
  377. };
  378. DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
  379. static struct attribute *rapl_formats_attr[] = {
  380. &format_attr_event.attr,
  381. NULL,
  382. };
  383. static struct attribute_group rapl_pmu_format_group = {
  384. .name = "format",
  385. .attrs = rapl_formats_attr,
  386. };
  387. const struct attribute_group *rapl_attr_groups[] = {
  388. &rapl_pmu_attr_group,
  389. &rapl_pmu_format_group,
  390. &rapl_pmu_events_group,
  391. NULL,
  392. };
  393. static struct pmu rapl_pmu_class = {
  394. .attr_groups = rapl_attr_groups,
  395. .task_ctx_nr = perf_invalid_context, /* system-wide only */
  396. .event_init = rapl_pmu_event_init,
  397. .add = rapl_pmu_event_add, /* must have */
  398. .del = rapl_pmu_event_del, /* must have */
  399. .start = rapl_pmu_event_start,
  400. .stop = rapl_pmu_event_stop,
  401. .read = rapl_pmu_event_read,
  402. };
  403. static void rapl_cpu_exit(int cpu)
  404. {
  405. struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
  406. int i, phys_id = topology_physical_package_id(cpu);
  407. int target = -1;
  408. /* find a new cpu on same package */
  409. for_each_online_cpu(i) {
  410. if (i == cpu)
  411. continue;
  412. if (phys_id == topology_physical_package_id(i)) {
  413. target = i;
  414. break;
  415. }
  416. }
  417. /*
  418. * clear cpu from cpumask
  419. * if was set in cpumask and still some cpu on package,
  420. * then move to new cpu
  421. */
  422. if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0)
  423. cpumask_set_cpu(target, &rapl_cpu_mask);
  424. WARN_ON(cpumask_empty(&rapl_cpu_mask));
  425. /*
  426. * migrate events and context to new cpu
  427. */
  428. if (target >= 0)
  429. perf_pmu_migrate_context(pmu->pmu, cpu, target);
  430. /* cancel overflow polling timer for CPU */
  431. rapl_stop_hrtimer(pmu);
  432. }
  433. static void rapl_cpu_init(int cpu)
  434. {
  435. int i, phys_id = topology_physical_package_id(cpu);
  436. /* check if phys_is is already covered */
  437. for_each_cpu(i, &rapl_cpu_mask) {
  438. if (phys_id == topology_physical_package_id(i))
  439. return;
  440. }
  441. /* was not found, so add it */
  442. cpumask_set_cpu(cpu, &rapl_cpu_mask);
  443. }
  444. static int rapl_cpu_prepare(int cpu)
  445. {
  446. struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
  447. int phys_id = topology_physical_package_id(cpu);
  448. u64 ms;
  449. u64 msr_rapl_power_unit_bits;
  450. if (pmu)
  451. return 0;
  452. if (phys_id < 0)
  453. return -1;
  454. /* protect rdmsrl() to handle virtualization */
  455. if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
  456. return -1;
  457. pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
  458. if (!pmu)
  459. return -1;
  460. spin_lock_init(&pmu->lock);
  461. INIT_LIST_HEAD(&pmu->active_list);
  462. /*
  463. * grab power unit as: 1/2^unit Joules
  464. *
  465. * we cache in local PMU instance
  466. */
  467. pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
  468. pmu->pmu = &rapl_pmu_class;
  469. /*
  470. * use reference of 200W for scaling the timeout
  471. * to avoid missing counter overflows.
  472. * 200W = 200 Joules/sec
  473. * divide interval by 2 to avoid lockstep (2 * 100)
  474. * if hw unit is 32, then we use 2 ms 1/200/2
  475. */
  476. if (pmu->hw_unit < 32)
  477. ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1));
  478. else
  479. ms = 2;
  480. pmu->timer_interval = ms_to_ktime(ms);
  481. rapl_hrtimer_init(pmu);
  482. /* set RAPL pmu for this cpu for now */
  483. per_cpu(rapl_pmu, cpu) = pmu;
  484. per_cpu(rapl_pmu_to_free, cpu) = NULL;
  485. return 0;
  486. }
  487. static void rapl_cpu_kfree(int cpu)
  488. {
  489. struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu);
  490. kfree(pmu);
  491. per_cpu(rapl_pmu_to_free, cpu) = NULL;
  492. }
  493. static int rapl_cpu_dying(int cpu)
  494. {
  495. struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
  496. if (!pmu)
  497. return 0;
  498. per_cpu(rapl_pmu, cpu) = NULL;
  499. per_cpu(rapl_pmu_to_free, cpu) = pmu;
  500. return 0;
  501. }
  502. static int rapl_cpu_notifier(struct notifier_block *self,
  503. unsigned long action, void *hcpu)
  504. {
  505. unsigned int cpu = (long)hcpu;
  506. switch (action & ~CPU_TASKS_FROZEN) {
  507. case CPU_UP_PREPARE:
  508. rapl_cpu_prepare(cpu);
  509. break;
  510. case CPU_STARTING:
  511. rapl_cpu_init(cpu);
  512. break;
  513. case CPU_UP_CANCELED:
  514. case CPU_DYING:
  515. rapl_cpu_dying(cpu);
  516. break;
  517. case CPU_ONLINE:
  518. case CPU_DEAD:
  519. rapl_cpu_kfree(cpu);
  520. break;
  521. case CPU_DOWN_PREPARE:
  522. rapl_cpu_exit(cpu);
  523. break;
  524. default:
  525. break;
  526. }
  527. return NOTIFY_OK;
  528. }
  529. static const struct x86_cpu_id rapl_cpu_match[] = {
  530. [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
  531. [1] = {},
  532. };
  533. static int __init rapl_pmu_init(void)
  534. {
  535. struct rapl_pmu *pmu;
  536. int cpu, ret;
  537. /*
  538. * check for Intel processor family 6
  539. */
  540. if (!x86_match_cpu(rapl_cpu_match))
  541. return 0;
  542. /* check supported CPU */
  543. switch (boot_cpu_data.x86_model) {
  544. case 42: /* Sandy Bridge */
  545. case 58: /* Ivy Bridge */
  546. rapl_cntr_mask = RAPL_IDX_CLN;
  547. rapl_pmu_events_group.attrs = rapl_events_cln_attr;
  548. break;
  549. case 60: /* Haswell */
  550. case 69: /* Haswell-Celeron */
  551. rapl_cntr_mask = RAPL_IDX_HSW;
  552. rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
  553. break;
  554. case 45: /* Sandy Bridge-EP */
  555. case 62: /* IvyTown */
  556. rapl_cntr_mask = RAPL_IDX_SRV;
  557. rapl_pmu_events_group.attrs = rapl_events_srv_attr;
  558. break;
  559. default:
  560. /* unsupported */
  561. return 0;
  562. }
  563. cpu_notifier_register_begin();
  564. for_each_online_cpu(cpu) {
  565. ret = rapl_cpu_prepare(cpu);
  566. if (ret)
  567. goto out;
  568. rapl_cpu_init(cpu);
  569. }
  570. __perf_cpu_notifier(rapl_cpu_notifier);
  571. ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
  572. if (WARN_ON(ret)) {
  573. pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
  574. cpu_notifier_register_done();
  575. return -1;
  576. }
  577. pmu = __this_cpu_read(rapl_pmu);
  578. pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
  579. " API unit is 2^-32 Joules,"
  580. " %d fixed counters"
  581. " %llu ms ovfl timer\n",
  582. pmu->hw_unit,
  583. hweight32(rapl_cntr_mask),
  584. ktime_to_ms(pmu->timer_interval));
  585. out:
  586. cpu_notifier_register_done();
  587. return 0;
  588. }
  589. device_initcall(rapl_pmu_init);