i915_pmu.c 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096
  1. /*
  2. * SPDX-License-Identifier: MIT
  3. *
  4. * Copyright © 2017-2018 Intel Corporation
  5. */
  6. #include <linux/irq.h>
  7. #include "i915_pmu.h"
  8. #include "intel_ringbuffer.h"
  9. #include "i915_drv.h"
  10. /* Frequency for the sampling timer for events which need it. */
  11. #define FREQUENCY 200
  12. #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
  13. #define ENGINE_SAMPLE_MASK \
  14. (BIT(I915_SAMPLE_BUSY) | \
  15. BIT(I915_SAMPLE_WAIT) | \
  16. BIT(I915_SAMPLE_SEMA))
  17. #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
  18. static cpumask_t i915_pmu_cpumask;
  19. static u8 engine_config_sample(u64 config)
  20. {
  21. return config & I915_PMU_SAMPLE_MASK;
  22. }
  23. static u8 engine_event_sample(struct perf_event *event)
  24. {
  25. return engine_config_sample(event->attr.config);
  26. }
  27. static u8 engine_event_class(struct perf_event *event)
  28. {
  29. return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
  30. }
  31. static u8 engine_event_instance(struct perf_event *event)
  32. {
  33. return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
  34. }
  35. static bool is_engine_config(u64 config)
  36. {
  37. return config < __I915_PMU_OTHER(0);
  38. }
  39. static unsigned int config_enabled_bit(u64 config)
  40. {
  41. if (is_engine_config(config))
  42. return engine_config_sample(config);
  43. else
  44. return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
  45. }
  46. static u64 config_enabled_mask(u64 config)
  47. {
  48. return BIT_ULL(config_enabled_bit(config));
  49. }
  50. static bool is_engine_event(struct perf_event *event)
  51. {
  52. return is_engine_config(event->attr.config);
  53. }
  54. static unsigned int event_enabled_bit(struct perf_event *event)
  55. {
  56. return config_enabled_bit(event->attr.config);
  57. }
  58. static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
  59. {
  60. u64 enable;
  61. /*
  62. * Only some counters need the sampling timer.
  63. *
  64. * We start with a bitmask of all currently enabled events.
  65. */
  66. enable = i915->pmu.enable;
  67. /*
  68. * Mask out all the ones which do not need the timer, or in
  69. * other words keep all the ones that could need the timer.
  70. */
  71. enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
  72. config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
  73. ENGINE_SAMPLE_MASK;
  74. /*
  75. * When the GPU is idle per-engine counters do not need to be
  76. * running so clear those bits out.
  77. */
  78. if (!gpu_active)
  79. enable &= ~ENGINE_SAMPLE_MASK;
  80. /*
  81. * Also there is software busyness tracking available we do not
  82. * need the timer for I915_SAMPLE_BUSY counter.
  83. *
  84. * Use RCS as proxy for all engines.
  85. */
  86. else if (intel_engine_supports_stats(i915->engine[RCS]))
  87. enable &= ~BIT(I915_SAMPLE_BUSY);
  88. /*
  89. * If some bits remain it means we need the sampling timer running.
  90. */
  91. return enable;
  92. }
  93. void i915_pmu_gt_parked(struct drm_i915_private *i915)
  94. {
  95. if (!i915->pmu.base.event_init)
  96. return;
  97. spin_lock_irq(&i915->pmu.lock);
  98. /*
  99. * Signal sampling timer to stop if only engine events are enabled and
  100. * GPU went idle.
  101. */
  102. i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
  103. spin_unlock_irq(&i915->pmu.lock);
  104. }
  105. static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
  106. {
  107. if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
  108. i915->pmu.timer_enabled = true;
  109. i915->pmu.timer_last = ktime_get();
  110. hrtimer_start_range_ns(&i915->pmu.timer,
  111. ns_to_ktime(PERIOD), 0,
  112. HRTIMER_MODE_REL_PINNED);
  113. }
  114. }
  115. void i915_pmu_gt_unparked(struct drm_i915_private *i915)
  116. {
  117. if (!i915->pmu.base.event_init)
  118. return;
  119. spin_lock_irq(&i915->pmu.lock);
  120. /*
  121. * Re-enable sampling timer when GPU goes active.
  122. */
  123. __i915_pmu_maybe_start_timer(i915);
  124. spin_unlock_irq(&i915->pmu.lock);
  125. }
  126. static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
  127. {
  128. if (!fw)
  129. intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
  130. return true;
  131. }
  132. static void
  133. add_sample(struct i915_pmu_sample *sample, u32 val)
  134. {
  135. sample->cur += val;
  136. }
  137. static void
  138. engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
  139. {
  140. struct intel_engine_cs *engine;
  141. enum intel_engine_id id;
  142. bool fw = false;
  143. if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
  144. return;
  145. if (!dev_priv->gt.awake)
  146. return;
  147. if (!intel_runtime_pm_get_if_in_use(dev_priv))
  148. return;
  149. for_each_engine(engine, dev_priv, id) {
  150. u32 current_seqno = intel_engine_get_seqno(engine);
  151. u32 last_seqno = intel_engine_last_submit(engine);
  152. u32 val;
  153. val = !i915_seqno_passed(current_seqno, last_seqno);
  154. if (val)
  155. add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
  156. period_ns);
  157. if (val && (engine->pmu.enable &
  158. (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
  159. fw = grab_forcewake(dev_priv, fw);
  160. val = I915_READ_FW(RING_CTL(engine->mmio_base));
  161. } else {
  162. val = 0;
  163. }
  164. if (val & RING_WAIT)
  165. add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
  166. period_ns);
  167. if (val & RING_WAIT_SEMAPHORE)
  168. add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
  169. period_ns);
  170. }
  171. if (fw)
  172. intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
  173. intel_runtime_pm_put(dev_priv);
  174. }
  175. static void
  176. add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
  177. {
  178. sample->cur += mul_u32_u32(val, mul);
  179. }
  180. static void
  181. frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
  182. {
  183. if (dev_priv->pmu.enable &
  184. config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
  185. u32 val;
  186. val = dev_priv->gt_pm.rps.cur_freq;
  187. if (dev_priv->gt.awake &&
  188. intel_runtime_pm_get_if_in_use(dev_priv)) {
  189. val = intel_get_cagf(dev_priv,
  190. I915_READ_NOTRACE(GEN6_RPSTAT1));
  191. intel_runtime_pm_put(dev_priv);
  192. }
  193. add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
  194. intel_gpu_freq(dev_priv, val),
  195. period_ns / 1000);
  196. }
  197. if (dev_priv->pmu.enable &
  198. config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
  199. add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ],
  200. intel_gpu_freq(dev_priv,
  201. dev_priv->gt_pm.rps.cur_freq),
  202. period_ns / 1000);
  203. }
  204. }
  205. static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
  206. {
  207. struct drm_i915_private *i915 =
  208. container_of(hrtimer, struct drm_i915_private, pmu.timer);
  209. unsigned int period_ns;
  210. ktime_t now;
  211. if (!READ_ONCE(i915->pmu.timer_enabled))
  212. return HRTIMER_NORESTART;
  213. now = ktime_get();
  214. period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
  215. i915->pmu.timer_last = now;
  216. /*
  217. * Strictly speaking the passed in period may not be 100% accurate for
  218. * all internal calculation, since some amount of time can be spent on
  219. * grabbing the forcewake. However the potential error from timer call-
  220. * back delay greatly dominates this so we keep it simple.
  221. */
  222. engines_sample(i915, period_ns);
  223. frequency_sample(i915, period_ns);
  224. hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
  225. return HRTIMER_RESTART;
  226. }
  227. static u64 count_interrupts(struct drm_i915_private *i915)
  228. {
  229. /* open-coded kstat_irqs() */
  230. struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
  231. u64 sum = 0;
  232. int cpu;
  233. if (!desc || !desc->kstat_irqs)
  234. return 0;
  235. for_each_possible_cpu(cpu)
  236. sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
  237. return sum;
  238. }
  239. static void engine_event_destroy(struct perf_event *event)
  240. {
  241. struct drm_i915_private *i915 =
  242. container_of(event->pmu, typeof(*i915), pmu.base);
  243. struct intel_engine_cs *engine;
  244. engine = intel_engine_lookup_user(i915,
  245. engine_event_class(event),
  246. engine_event_instance(event));
  247. if (WARN_ON_ONCE(!engine))
  248. return;
  249. if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
  250. intel_engine_supports_stats(engine))
  251. intel_disable_engine_stats(engine);
  252. }
  253. static void i915_pmu_event_destroy(struct perf_event *event)
  254. {
  255. WARN_ON(event->parent);
  256. if (is_engine_event(event))
  257. engine_event_destroy(event);
  258. }
  259. static int
  260. engine_event_status(struct intel_engine_cs *engine,
  261. enum drm_i915_pmu_engine_sample sample)
  262. {
  263. switch (sample) {
  264. case I915_SAMPLE_BUSY:
  265. case I915_SAMPLE_WAIT:
  266. break;
  267. case I915_SAMPLE_SEMA:
  268. if (INTEL_GEN(engine->i915) < 6)
  269. return -ENODEV;
  270. break;
  271. default:
  272. return -ENOENT;
  273. }
  274. return 0;
  275. }
  276. static int
  277. config_status(struct drm_i915_private *i915, u64 config)
  278. {
  279. switch (config) {
  280. case I915_PMU_ACTUAL_FREQUENCY:
  281. if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
  282. /* Requires a mutex for sampling! */
  283. return -ENODEV;
  284. /* Fall-through. */
  285. case I915_PMU_REQUESTED_FREQUENCY:
  286. if (INTEL_GEN(i915) < 6)
  287. return -ENODEV;
  288. break;
  289. case I915_PMU_INTERRUPTS:
  290. break;
  291. case I915_PMU_RC6_RESIDENCY:
  292. if (!HAS_RC6(i915))
  293. return -ENODEV;
  294. break;
  295. default:
  296. return -ENOENT;
  297. }
  298. return 0;
  299. }
  300. static int engine_event_init(struct perf_event *event)
  301. {
  302. struct drm_i915_private *i915 =
  303. container_of(event->pmu, typeof(*i915), pmu.base);
  304. struct intel_engine_cs *engine;
  305. u8 sample;
  306. int ret;
  307. engine = intel_engine_lookup_user(i915, engine_event_class(event),
  308. engine_event_instance(event));
  309. if (!engine)
  310. return -ENODEV;
  311. sample = engine_event_sample(event);
  312. ret = engine_event_status(engine, sample);
  313. if (ret)
  314. return ret;
  315. if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
  316. ret = intel_enable_engine_stats(engine);
  317. return ret;
  318. }
  319. static int i915_pmu_event_init(struct perf_event *event)
  320. {
  321. struct drm_i915_private *i915 =
  322. container_of(event->pmu, typeof(*i915), pmu.base);
  323. int ret;
  324. if (event->attr.type != event->pmu->type)
  325. return -ENOENT;
  326. /* unsupported modes and filters */
  327. if (event->attr.sample_period) /* no sampling */
  328. return -EINVAL;
  329. if (has_branch_stack(event))
  330. return -EOPNOTSUPP;
  331. if (event->cpu < 0)
  332. return -EINVAL;
  333. /* only allow running on one cpu at a time */
  334. if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
  335. return -EINVAL;
  336. if (is_engine_event(event))
  337. ret = engine_event_init(event);
  338. else
  339. ret = config_status(i915, event->attr.config);
  340. if (ret)
  341. return ret;
  342. if (!event->parent)
  343. event->destroy = i915_pmu_event_destroy;
  344. return 0;
  345. }
  346. static u64 __get_rc6(struct drm_i915_private *i915)
  347. {
  348. u64 val;
  349. val = intel_rc6_residency_ns(i915,
  350. IS_VALLEYVIEW(i915) ?
  351. VLV_GT_RENDER_RC6 :
  352. GEN6_GT_GFX_RC6);
  353. if (HAS_RC6p(i915))
  354. val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
  355. if (HAS_RC6pp(i915))
  356. val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
  357. return val;
  358. }
  359. static u64 get_rc6(struct drm_i915_private *i915)
  360. {
  361. #if IS_ENABLED(CONFIG_PM)
  362. unsigned long flags;
  363. u64 val;
  364. if (intel_runtime_pm_get_if_in_use(i915)) {
  365. val = __get_rc6(i915);
  366. intel_runtime_pm_put(i915);
  367. /*
  368. * If we are coming back from being runtime suspended we must
  369. * be careful not to report a larger value than returned
  370. * previously.
  371. */
  372. spin_lock_irqsave(&i915->pmu.lock, flags);
  373. if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
  374. i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
  375. i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
  376. } else {
  377. val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
  378. }
  379. spin_unlock_irqrestore(&i915->pmu.lock, flags);
  380. } else {
  381. struct pci_dev *pdev = i915->drm.pdev;
  382. struct device *kdev = &pdev->dev;
  383. /*
  384. * We are runtime suspended.
  385. *
  386. * Report the delta from when the device was suspended to now,
  387. * on top of the last known real value, as the approximated RC6
  388. * counter value.
  389. */
  390. spin_lock_irqsave(&i915->pmu.lock, flags);
  391. spin_lock(&kdev->power.lock);
  392. /*
  393. * After the above branch intel_runtime_pm_get_if_in_use failed
  394. * to get the runtime PM reference we cannot assume we are in
  395. * runtime suspend since we can either: a) race with coming out
  396. * of it before we took the power.lock, or b) there are other
  397. * states than suspended which can bring us here.
  398. *
  399. * We need to double-check that we are indeed currently runtime
  400. * suspended and if not we cannot do better than report the last
  401. * known RC6 value.
  402. */
  403. if (kdev->power.runtime_status == RPM_SUSPENDED) {
  404. if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
  405. i915->pmu.suspended_jiffies_last =
  406. kdev->power.suspended_jiffies;
  407. val = kdev->power.suspended_jiffies -
  408. i915->pmu.suspended_jiffies_last;
  409. val += jiffies - kdev->power.accounting_timestamp;
  410. val = jiffies_to_nsecs(val);
  411. val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
  412. i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
  413. } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
  414. val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
  415. } else {
  416. val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
  417. }
  418. spin_unlock(&kdev->power.lock);
  419. spin_unlock_irqrestore(&i915->pmu.lock, flags);
  420. }
  421. return val;
  422. #else
  423. return __get_rc6(i915);
  424. #endif
  425. }
  426. static u64 __i915_pmu_event_read(struct perf_event *event)
  427. {
  428. struct drm_i915_private *i915 =
  429. container_of(event->pmu, typeof(*i915), pmu.base);
  430. u64 val = 0;
  431. if (is_engine_event(event)) {
  432. u8 sample = engine_event_sample(event);
  433. struct intel_engine_cs *engine;
  434. engine = intel_engine_lookup_user(i915,
  435. engine_event_class(event),
  436. engine_event_instance(event));
  437. if (WARN_ON_ONCE(!engine)) {
  438. /* Do nothing */
  439. } else if (sample == I915_SAMPLE_BUSY &&
  440. intel_engine_supports_stats(engine)) {
  441. val = ktime_to_ns(intel_engine_get_busy_time(engine));
  442. } else {
  443. val = engine->pmu.sample[sample].cur;
  444. }
  445. } else {
  446. switch (event->attr.config) {
  447. case I915_PMU_ACTUAL_FREQUENCY:
  448. val =
  449. div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
  450. USEC_PER_SEC /* to MHz */);
  451. break;
  452. case I915_PMU_REQUESTED_FREQUENCY:
  453. val =
  454. div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
  455. USEC_PER_SEC /* to MHz */);
  456. break;
  457. case I915_PMU_INTERRUPTS:
  458. val = count_interrupts(i915);
  459. break;
  460. case I915_PMU_RC6_RESIDENCY:
  461. val = get_rc6(i915);
  462. break;
  463. }
  464. }
  465. return val;
  466. }
  467. static void i915_pmu_event_read(struct perf_event *event)
  468. {
  469. struct hw_perf_event *hwc = &event->hw;
  470. u64 prev, new;
  471. again:
  472. prev = local64_read(&hwc->prev_count);
  473. new = __i915_pmu_event_read(event);
  474. if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
  475. goto again;
  476. local64_add(new - prev, &event->count);
  477. }
  478. static void i915_pmu_enable(struct perf_event *event)
  479. {
  480. struct drm_i915_private *i915 =
  481. container_of(event->pmu, typeof(*i915), pmu.base);
  482. unsigned int bit = event_enabled_bit(event);
  483. unsigned long flags;
  484. spin_lock_irqsave(&i915->pmu.lock, flags);
  485. /*
  486. * Update the bitmask of enabled events and increment
  487. * the event reference counter.
  488. */
  489. GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
  490. GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
  491. i915->pmu.enable |= BIT_ULL(bit);
  492. i915->pmu.enable_count[bit]++;
  493. /*
  494. * Start the sampling timer if needed and not already enabled.
  495. */
  496. __i915_pmu_maybe_start_timer(i915);
  497. /*
  498. * For per-engine events the bitmask and reference counting
  499. * is stored per engine.
  500. */
  501. if (is_engine_event(event)) {
  502. u8 sample = engine_event_sample(event);
  503. struct intel_engine_cs *engine;
  504. engine = intel_engine_lookup_user(i915,
  505. engine_event_class(event),
  506. engine_event_instance(event));
  507. GEM_BUG_ON(!engine);
  508. engine->pmu.enable |= BIT(sample);
  509. GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
  510. GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
  511. engine->pmu.enable_count[sample]++;
  512. }
  513. spin_unlock_irqrestore(&i915->pmu.lock, flags);
  514. /*
  515. * Store the current counter value so we can report the correct delta
  516. * for all listeners. Even when the event was already enabled and has
  517. * an existing non-zero value.
  518. */
  519. local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
  520. }
  521. static void i915_pmu_disable(struct perf_event *event)
  522. {
  523. struct drm_i915_private *i915 =
  524. container_of(event->pmu, typeof(*i915), pmu.base);
  525. unsigned int bit = event_enabled_bit(event);
  526. unsigned long flags;
  527. spin_lock_irqsave(&i915->pmu.lock, flags);
  528. if (is_engine_event(event)) {
  529. u8 sample = engine_event_sample(event);
  530. struct intel_engine_cs *engine;
  531. engine = intel_engine_lookup_user(i915,
  532. engine_event_class(event),
  533. engine_event_instance(event));
  534. GEM_BUG_ON(!engine);
  535. GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
  536. GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
  537. /*
  538. * Decrement the reference count and clear the enabled
  539. * bitmask when the last listener on an event goes away.
  540. */
  541. if (--engine->pmu.enable_count[sample] == 0)
  542. engine->pmu.enable &= ~BIT(sample);
  543. }
  544. GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
  545. GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
  546. /*
  547. * Decrement the reference count and clear the enabled
  548. * bitmask when the last listener on an event goes away.
  549. */
  550. if (--i915->pmu.enable_count[bit] == 0) {
  551. i915->pmu.enable &= ~BIT_ULL(bit);
  552. i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
  553. }
  554. spin_unlock_irqrestore(&i915->pmu.lock, flags);
  555. }
  556. static void i915_pmu_event_start(struct perf_event *event, int flags)
  557. {
  558. i915_pmu_enable(event);
  559. event->hw.state = 0;
  560. }
  561. static void i915_pmu_event_stop(struct perf_event *event, int flags)
  562. {
  563. if (flags & PERF_EF_UPDATE)
  564. i915_pmu_event_read(event);
  565. i915_pmu_disable(event);
  566. event->hw.state = PERF_HES_STOPPED;
  567. }
  568. static int i915_pmu_event_add(struct perf_event *event, int flags)
  569. {
  570. if (flags & PERF_EF_START)
  571. i915_pmu_event_start(event, flags);
  572. return 0;
  573. }
  574. static void i915_pmu_event_del(struct perf_event *event, int flags)
  575. {
  576. i915_pmu_event_stop(event, PERF_EF_UPDATE);
  577. }
  578. static int i915_pmu_event_event_idx(struct perf_event *event)
  579. {
  580. return 0;
  581. }
  582. struct i915_str_attribute {
  583. struct device_attribute attr;
  584. const char *str;
  585. };
  586. static ssize_t i915_pmu_format_show(struct device *dev,
  587. struct device_attribute *attr, char *buf)
  588. {
  589. struct i915_str_attribute *eattr;
  590. eattr = container_of(attr, struct i915_str_attribute, attr);
  591. return sprintf(buf, "%s\n", eattr->str);
  592. }
  593. #define I915_PMU_FORMAT_ATTR(_name, _config) \
  594. (&((struct i915_str_attribute[]) { \
  595. { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
  596. .str = _config, } \
  597. })[0].attr.attr)
  598. static struct attribute *i915_pmu_format_attrs[] = {
  599. I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
  600. NULL,
  601. };
  602. static const struct attribute_group i915_pmu_format_attr_group = {
  603. .name = "format",
  604. .attrs = i915_pmu_format_attrs,
  605. };
  606. struct i915_ext_attribute {
  607. struct device_attribute attr;
  608. unsigned long val;
  609. };
  610. static ssize_t i915_pmu_event_show(struct device *dev,
  611. struct device_attribute *attr, char *buf)
  612. {
  613. struct i915_ext_attribute *eattr;
  614. eattr = container_of(attr, struct i915_ext_attribute, attr);
  615. return sprintf(buf, "config=0x%lx\n", eattr->val);
  616. }
  617. static struct attribute_group i915_pmu_events_attr_group = {
  618. .name = "events",
  619. /* Patch in attrs at runtime. */
  620. };
  621. static ssize_t
  622. i915_pmu_get_attr_cpumask(struct device *dev,
  623. struct device_attribute *attr,
  624. char *buf)
  625. {
  626. return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
  627. }
  628. static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
  629. static struct attribute *i915_cpumask_attrs[] = {
  630. &dev_attr_cpumask.attr,
  631. NULL,
  632. };
  633. static const struct attribute_group i915_pmu_cpumask_attr_group = {
  634. .attrs = i915_cpumask_attrs,
  635. };
  636. static const struct attribute_group *i915_pmu_attr_groups[] = {
  637. &i915_pmu_format_attr_group,
  638. &i915_pmu_events_attr_group,
  639. &i915_pmu_cpumask_attr_group,
  640. NULL
  641. };
  642. #define __event(__config, __name, __unit) \
  643. { \
  644. .config = (__config), \
  645. .name = (__name), \
  646. .unit = (__unit), \
  647. }
  648. #define __engine_event(__sample, __name) \
  649. { \
  650. .sample = (__sample), \
  651. .name = (__name), \
  652. }
  653. static struct i915_ext_attribute *
  654. add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
  655. {
  656. sysfs_attr_init(&attr->attr.attr);
  657. attr->attr.attr.name = name;
  658. attr->attr.attr.mode = 0444;
  659. attr->attr.show = i915_pmu_event_show;
  660. attr->val = config;
  661. return ++attr;
  662. }
  663. static struct perf_pmu_events_attr *
  664. add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
  665. const char *str)
  666. {
  667. sysfs_attr_init(&attr->attr.attr);
  668. attr->attr.attr.name = name;
  669. attr->attr.attr.mode = 0444;
  670. attr->attr.show = perf_event_sysfs_show;
  671. attr->event_str = str;
  672. return ++attr;
  673. }
  674. static struct attribute **
  675. create_event_attributes(struct drm_i915_private *i915)
  676. {
  677. static const struct {
  678. u64 config;
  679. const char *name;
  680. const char *unit;
  681. } events[] = {
  682. __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
  683. __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
  684. __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
  685. __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
  686. };
  687. static const struct {
  688. enum drm_i915_pmu_engine_sample sample;
  689. char *name;
  690. } engine_events[] = {
  691. __engine_event(I915_SAMPLE_BUSY, "busy"),
  692. __engine_event(I915_SAMPLE_SEMA, "sema"),
  693. __engine_event(I915_SAMPLE_WAIT, "wait"),
  694. };
  695. unsigned int count = 0;
  696. struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
  697. struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
  698. struct attribute **attr = NULL, **attr_iter;
  699. struct intel_engine_cs *engine;
  700. enum intel_engine_id id;
  701. unsigned int i;
  702. /* Count how many counters we will be exposing. */
  703. for (i = 0; i < ARRAY_SIZE(events); i++) {
  704. if (!config_status(i915, events[i].config))
  705. count++;
  706. }
  707. for_each_engine(engine, i915, id) {
  708. for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
  709. if (!engine_event_status(engine,
  710. engine_events[i].sample))
  711. count++;
  712. }
  713. }
  714. /* Allocate attribute objects and table. */
  715. i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
  716. if (!i915_attr)
  717. goto err_alloc;
  718. pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
  719. if (!pmu_attr)
  720. goto err_alloc;
  721. /* Max one pointer of each attribute type plus a termination entry. */
  722. attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
  723. if (!attr)
  724. goto err_alloc;
  725. i915_iter = i915_attr;
  726. pmu_iter = pmu_attr;
  727. attr_iter = attr;
  728. /* Initialize supported non-engine counters. */
  729. for (i = 0; i < ARRAY_SIZE(events); i++) {
  730. char *str;
  731. if (config_status(i915, events[i].config))
  732. continue;
  733. str = kstrdup(events[i].name, GFP_KERNEL);
  734. if (!str)
  735. goto err;
  736. *attr_iter++ = &i915_iter->attr.attr;
  737. i915_iter = add_i915_attr(i915_iter, str, events[i].config);
  738. if (events[i].unit) {
  739. str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
  740. if (!str)
  741. goto err;
  742. *attr_iter++ = &pmu_iter->attr.attr;
  743. pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
  744. }
  745. }
  746. /* Initialize supported engine counters. */
  747. for_each_engine(engine, i915, id) {
  748. for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
  749. char *str;
  750. if (engine_event_status(engine,
  751. engine_events[i].sample))
  752. continue;
  753. str = kasprintf(GFP_KERNEL, "%s-%s",
  754. engine->name, engine_events[i].name);
  755. if (!str)
  756. goto err;
  757. *attr_iter++ = &i915_iter->attr.attr;
  758. i915_iter =
  759. add_i915_attr(i915_iter, str,
  760. __I915_PMU_ENGINE(engine->uabi_class,
  761. engine->instance,
  762. engine_events[i].sample));
  763. str = kasprintf(GFP_KERNEL, "%s-%s.unit",
  764. engine->name, engine_events[i].name);
  765. if (!str)
  766. goto err;
  767. *attr_iter++ = &pmu_iter->attr.attr;
  768. pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
  769. }
  770. }
  771. i915->pmu.i915_attr = i915_attr;
  772. i915->pmu.pmu_attr = pmu_attr;
  773. return attr;
  774. err:;
  775. for (attr_iter = attr; *attr_iter; attr_iter++)
  776. kfree((*attr_iter)->name);
  777. err_alloc:
  778. kfree(attr);
  779. kfree(i915_attr);
  780. kfree(pmu_attr);
  781. return NULL;
  782. }
  783. static void free_event_attributes(struct drm_i915_private *i915)
  784. {
  785. struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
  786. for (; *attr_iter; attr_iter++)
  787. kfree((*attr_iter)->name);
  788. kfree(i915_pmu_events_attr_group.attrs);
  789. kfree(i915->pmu.i915_attr);
  790. kfree(i915->pmu.pmu_attr);
  791. i915_pmu_events_attr_group.attrs = NULL;
  792. i915->pmu.i915_attr = NULL;
  793. i915->pmu.pmu_attr = NULL;
  794. }
  795. static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
  796. {
  797. struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
  798. GEM_BUG_ON(!pmu->base.event_init);
  799. /* Select the first online CPU as a designated reader. */
  800. if (!cpumask_weight(&i915_pmu_cpumask))
  801. cpumask_set_cpu(cpu, &i915_pmu_cpumask);
  802. return 0;
  803. }
  804. static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
  805. {
  806. struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
  807. unsigned int target;
  808. GEM_BUG_ON(!pmu->base.event_init);
  809. if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
  810. target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
  811. /* Migrate events if there is a valid target */
  812. if (target < nr_cpu_ids) {
  813. cpumask_set_cpu(target, &i915_pmu_cpumask);
  814. perf_pmu_migrate_context(&pmu->base, cpu, target);
  815. }
  816. }
  817. return 0;
  818. }
  819. static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
  820. static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
  821. {
  822. enum cpuhp_state slot;
  823. int ret;
  824. ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
  825. "perf/x86/intel/i915:online",
  826. i915_pmu_cpu_online,
  827. i915_pmu_cpu_offline);
  828. if (ret < 0)
  829. return ret;
  830. slot = ret;
  831. ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
  832. if (ret) {
  833. cpuhp_remove_multi_state(slot);
  834. return ret;
  835. }
  836. cpuhp_slot = slot;
  837. return 0;
  838. }
  839. static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
  840. {
  841. WARN_ON(cpuhp_slot == CPUHP_INVALID);
  842. WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
  843. cpuhp_remove_multi_state(cpuhp_slot);
  844. }
  845. void i915_pmu_register(struct drm_i915_private *i915)
  846. {
  847. int ret;
  848. if (INTEL_GEN(i915) <= 2) {
  849. DRM_INFO("PMU not supported for this GPU.");
  850. return;
  851. }
  852. i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
  853. if (!i915_pmu_events_attr_group.attrs) {
  854. ret = -ENOMEM;
  855. goto err;
  856. }
  857. i915->pmu.base.attr_groups = i915_pmu_attr_groups;
  858. i915->pmu.base.task_ctx_nr = perf_invalid_context;
  859. i915->pmu.base.event_init = i915_pmu_event_init;
  860. i915->pmu.base.add = i915_pmu_event_add;
  861. i915->pmu.base.del = i915_pmu_event_del;
  862. i915->pmu.base.start = i915_pmu_event_start;
  863. i915->pmu.base.stop = i915_pmu_event_stop;
  864. i915->pmu.base.read = i915_pmu_event_read;
  865. i915->pmu.base.event_idx = i915_pmu_event_event_idx;
  866. spin_lock_init(&i915->pmu.lock);
  867. hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  868. i915->pmu.timer.function = i915_sample;
  869. ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
  870. if (ret)
  871. goto err;
  872. ret = i915_pmu_register_cpuhp_state(i915);
  873. if (ret)
  874. goto err_unreg;
  875. return;
  876. err_unreg:
  877. perf_pmu_unregister(&i915->pmu.base);
  878. err:
  879. i915->pmu.base.event_init = NULL;
  880. free_event_attributes(i915);
  881. DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
  882. }
  883. void i915_pmu_unregister(struct drm_i915_private *i915)
  884. {
  885. if (!i915->pmu.base.event_init)
  886. return;
  887. WARN_ON(i915->pmu.enable);
  888. hrtimer_cancel(&i915->pmu.timer);
  889. i915_pmu_unregister_cpuhp_state(i915);
  890. perf_pmu_unregister(&i915->pmu.base);
  891. i915->pmu.base.event_init = NULL;
  892. free_event_attributes(i915);
  893. }