i915_pmu.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865
  1. /*
  2. * Copyright © 2017 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. *
  23. */
  24. #include <linux/perf_event.h>
  25. #include <linux/pm_runtime.h>
  26. #include "i915_drv.h"
  27. #include "i915_pmu.h"
  28. #include "intel_ringbuffer.h"
  29. /* Frequency for the sampling timer for events which need it. */
  30. #define FREQUENCY 200
  31. #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
  32. #define ENGINE_SAMPLE_MASK \
  33. (BIT(I915_SAMPLE_BUSY) | \
  34. BIT(I915_SAMPLE_WAIT) | \
  35. BIT(I915_SAMPLE_SEMA))
  36. #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
  37. static cpumask_t i915_pmu_cpumask;
  38. static u8 engine_config_sample(u64 config)
  39. {
  40. return config & I915_PMU_SAMPLE_MASK;
  41. }
  42. static u8 engine_event_sample(struct perf_event *event)
  43. {
  44. return engine_config_sample(event->attr.config);
  45. }
  46. static u8 engine_event_class(struct perf_event *event)
  47. {
  48. return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
  49. }
  50. static u8 engine_event_instance(struct perf_event *event)
  51. {
  52. return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
  53. }
  54. static bool is_engine_config(u64 config)
  55. {
  56. return config < __I915_PMU_OTHER(0);
  57. }
  58. static unsigned int config_enabled_bit(u64 config)
  59. {
  60. if (is_engine_config(config))
  61. return engine_config_sample(config);
  62. else
  63. return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
  64. }
  65. static u64 config_enabled_mask(u64 config)
  66. {
  67. return BIT_ULL(config_enabled_bit(config));
  68. }
  69. static bool is_engine_event(struct perf_event *event)
  70. {
  71. return is_engine_config(event->attr.config);
  72. }
  73. static unsigned int event_enabled_bit(struct perf_event *event)
  74. {
  75. return config_enabled_bit(event->attr.config);
  76. }
  77. static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
  78. {
  79. u64 enable;
  80. /*
  81. * Only some counters need the sampling timer.
  82. *
  83. * We start with a bitmask of all currently enabled events.
  84. */
  85. enable = i915->pmu.enable;
  86. /*
  87. * Mask out all the ones which do not need the timer, or in
  88. * other words keep all the ones that could need the timer.
  89. */
  90. enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
  91. config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
  92. ENGINE_SAMPLE_MASK;
  93. /*
  94. * When the GPU is idle per-engine counters do not need to be
  95. * running so clear those bits out.
  96. */
  97. if (!gpu_active)
  98. enable &= ~ENGINE_SAMPLE_MASK;
  99. /*
  100. * Also there is software busyness tracking available we do not
  101. * need the timer for I915_SAMPLE_BUSY counter.
  102. *
  103. * Use RCS as proxy for all engines.
  104. */
  105. else if (intel_engine_supports_stats(i915->engine[RCS]))
  106. enable &= ~BIT(I915_SAMPLE_BUSY);
  107. /*
  108. * If some bits remain it means we need the sampling timer running.
  109. */
  110. return enable;
  111. }
  112. void i915_pmu_gt_parked(struct drm_i915_private *i915)
  113. {
  114. if (!i915->pmu.base.event_init)
  115. return;
  116. spin_lock_irq(&i915->pmu.lock);
  117. /*
  118. * Signal sampling timer to stop if only engine events are enabled and
  119. * GPU went idle.
  120. */
  121. i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
  122. spin_unlock_irq(&i915->pmu.lock);
  123. }
  124. static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
  125. {
  126. if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
  127. i915->pmu.timer_enabled = true;
  128. hrtimer_start_range_ns(&i915->pmu.timer,
  129. ns_to_ktime(PERIOD), 0,
  130. HRTIMER_MODE_REL_PINNED);
  131. }
  132. }
  133. void i915_pmu_gt_unparked(struct drm_i915_private *i915)
  134. {
  135. if (!i915->pmu.base.event_init)
  136. return;
  137. spin_lock_irq(&i915->pmu.lock);
  138. /*
  139. * Re-enable sampling timer when GPU goes active.
  140. */
  141. __i915_pmu_maybe_start_timer(i915);
  142. spin_unlock_irq(&i915->pmu.lock);
  143. }
  144. static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
  145. {
  146. if (!fw)
  147. intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
  148. return true;
  149. }
  150. static void
  151. update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
  152. {
  153. sample->cur += mul_u32_u32(val, unit);
  154. }
  155. static void engines_sample(struct drm_i915_private *dev_priv)
  156. {
  157. struct intel_engine_cs *engine;
  158. enum intel_engine_id id;
  159. bool fw = false;
  160. if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
  161. return;
  162. if (!dev_priv->gt.awake)
  163. return;
  164. if (!intel_runtime_pm_get_if_in_use(dev_priv))
  165. return;
  166. for_each_engine(engine, dev_priv, id) {
  167. u32 current_seqno = intel_engine_get_seqno(engine);
  168. u32 last_seqno = intel_engine_last_submit(engine);
  169. u32 val;
  170. val = !i915_seqno_passed(current_seqno, last_seqno);
  171. update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
  172. PERIOD, val);
  173. if (val && (engine->pmu.enable &
  174. (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
  175. fw = grab_forcewake(dev_priv, fw);
  176. val = I915_READ_FW(RING_CTL(engine->mmio_base));
  177. } else {
  178. val = 0;
  179. }
  180. update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
  181. PERIOD, !!(val & RING_WAIT));
  182. update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
  183. PERIOD, !!(val & RING_WAIT_SEMAPHORE));
  184. }
  185. if (fw)
  186. intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
  187. intel_runtime_pm_put(dev_priv);
  188. }
  189. static void frequency_sample(struct drm_i915_private *dev_priv)
  190. {
  191. if (dev_priv->pmu.enable &
  192. config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
  193. u32 val;
  194. val = dev_priv->gt_pm.rps.cur_freq;
  195. if (dev_priv->gt.awake &&
  196. intel_runtime_pm_get_if_in_use(dev_priv)) {
  197. val = intel_get_cagf(dev_priv,
  198. I915_READ_NOTRACE(GEN6_RPSTAT1));
  199. intel_runtime_pm_put(dev_priv);
  200. }
  201. update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
  202. 1, intel_gpu_freq(dev_priv, val));
  203. }
  204. if (dev_priv->pmu.enable &
  205. config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
  206. update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
  207. intel_gpu_freq(dev_priv,
  208. dev_priv->gt_pm.rps.cur_freq));
  209. }
  210. }
  211. static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
  212. {
  213. struct drm_i915_private *i915 =
  214. container_of(hrtimer, struct drm_i915_private, pmu.timer);
  215. if (!READ_ONCE(i915->pmu.timer_enabled))
  216. return HRTIMER_NORESTART;
  217. engines_sample(i915);
  218. frequency_sample(i915);
  219. hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
  220. return HRTIMER_RESTART;
  221. }
  222. static u64 count_interrupts(struct drm_i915_private *i915)
  223. {
  224. /* open-coded kstat_irqs() */
  225. struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
  226. u64 sum = 0;
  227. int cpu;
  228. if (!desc || !desc->kstat_irqs)
  229. return 0;
  230. for_each_possible_cpu(cpu)
  231. sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
  232. return sum;
  233. }
  234. static void i915_pmu_event_destroy(struct perf_event *event)
  235. {
  236. WARN_ON(event->parent);
  237. }
  238. static int engine_event_init(struct perf_event *event)
  239. {
  240. struct drm_i915_private *i915 =
  241. container_of(event->pmu, typeof(*i915), pmu.base);
  242. if (!intel_engine_lookup_user(i915, engine_event_class(event),
  243. engine_event_instance(event)))
  244. return -ENODEV;
  245. switch (engine_event_sample(event)) {
  246. case I915_SAMPLE_BUSY:
  247. case I915_SAMPLE_WAIT:
  248. break;
  249. case I915_SAMPLE_SEMA:
  250. if (INTEL_GEN(i915) < 6)
  251. return -ENODEV;
  252. break;
  253. default:
  254. return -ENOENT;
  255. }
  256. return 0;
  257. }
  258. static int i915_pmu_event_init(struct perf_event *event)
  259. {
  260. struct drm_i915_private *i915 =
  261. container_of(event->pmu, typeof(*i915), pmu.base);
  262. int ret;
  263. if (event->attr.type != event->pmu->type)
  264. return -ENOENT;
  265. /* unsupported modes and filters */
  266. if (event->attr.sample_period) /* no sampling */
  267. return -EINVAL;
  268. if (has_branch_stack(event))
  269. return -EOPNOTSUPP;
  270. if (event->cpu < 0)
  271. return -EINVAL;
  272. /* only allow running on one cpu at a time */
  273. if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
  274. return -EINVAL;
  275. if (is_engine_event(event)) {
  276. ret = engine_event_init(event);
  277. } else {
  278. ret = 0;
  279. switch (event->attr.config) {
  280. case I915_PMU_ACTUAL_FREQUENCY:
  281. if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
  282. /* Requires a mutex for sampling! */
  283. ret = -ENODEV;
  284. case I915_PMU_REQUESTED_FREQUENCY:
  285. if (INTEL_GEN(i915) < 6)
  286. ret = -ENODEV;
  287. break;
  288. case I915_PMU_INTERRUPTS:
  289. break;
  290. case I915_PMU_RC6_RESIDENCY:
  291. if (!HAS_RC6(i915))
  292. ret = -ENODEV;
  293. break;
  294. default:
  295. ret = -ENOENT;
  296. break;
  297. }
  298. }
  299. if (ret)
  300. return ret;
  301. if (!event->parent)
  302. event->destroy = i915_pmu_event_destroy;
  303. return 0;
  304. }
  305. static u64 __i915_pmu_event_read(struct perf_event *event)
  306. {
  307. struct drm_i915_private *i915 =
  308. container_of(event->pmu, typeof(*i915), pmu.base);
  309. u64 val = 0;
  310. if (is_engine_event(event)) {
  311. u8 sample = engine_event_sample(event);
  312. struct intel_engine_cs *engine;
  313. engine = intel_engine_lookup_user(i915,
  314. engine_event_class(event),
  315. engine_event_instance(event));
  316. if (WARN_ON_ONCE(!engine)) {
  317. /* Do nothing */
  318. } else if (sample == I915_SAMPLE_BUSY &&
  319. engine->pmu.busy_stats) {
  320. val = ktime_to_ns(intel_engine_get_busy_time(engine));
  321. } else {
  322. val = engine->pmu.sample[sample].cur;
  323. }
  324. } else {
  325. switch (event->attr.config) {
  326. case I915_PMU_ACTUAL_FREQUENCY:
  327. val =
  328. div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
  329. FREQUENCY);
  330. break;
  331. case I915_PMU_REQUESTED_FREQUENCY:
  332. val =
  333. div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
  334. FREQUENCY);
  335. break;
  336. case I915_PMU_INTERRUPTS:
  337. val = count_interrupts(i915);
  338. break;
  339. case I915_PMU_RC6_RESIDENCY:
  340. intel_runtime_pm_get(i915);
  341. val = intel_rc6_residency_ns(i915,
  342. IS_VALLEYVIEW(i915) ?
  343. VLV_GT_RENDER_RC6 :
  344. GEN6_GT_GFX_RC6);
  345. if (HAS_RC6p(i915))
  346. val += intel_rc6_residency_ns(i915,
  347. GEN6_GT_GFX_RC6p);
  348. if (HAS_RC6pp(i915))
  349. val += intel_rc6_residency_ns(i915,
  350. GEN6_GT_GFX_RC6pp);
  351. intel_runtime_pm_put(i915);
  352. break;
  353. }
  354. }
  355. return val;
  356. }
  357. static void i915_pmu_event_read(struct perf_event *event)
  358. {
  359. struct hw_perf_event *hwc = &event->hw;
  360. u64 prev, new;
  361. again:
  362. prev = local64_read(&hwc->prev_count);
  363. new = __i915_pmu_event_read(event);
  364. if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
  365. goto again;
  366. local64_add(new - prev, &event->count);
  367. }
  368. static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
  369. {
  370. return intel_engine_supports_stats(engine) &&
  371. (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
  372. }
  373. static void i915_pmu_enable(struct perf_event *event)
  374. {
  375. struct drm_i915_private *i915 =
  376. container_of(event->pmu, typeof(*i915), pmu.base);
  377. unsigned int bit = event_enabled_bit(event);
  378. unsigned long flags;
  379. spin_lock_irqsave(&i915->pmu.lock, flags);
  380. /*
  381. * Update the bitmask of enabled events and increment
  382. * the event reference counter.
  383. */
  384. GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
  385. GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
  386. i915->pmu.enable |= BIT_ULL(bit);
  387. i915->pmu.enable_count[bit]++;
  388. /*
  389. * Start the sampling timer if needed and not already enabled.
  390. */
  391. __i915_pmu_maybe_start_timer(i915);
  392. /*
  393. * For per-engine events the bitmask and reference counting
  394. * is stored per engine.
  395. */
  396. if (is_engine_event(event)) {
  397. u8 sample = engine_event_sample(event);
  398. struct intel_engine_cs *engine;
  399. engine = intel_engine_lookup_user(i915,
  400. engine_event_class(event),
  401. engine_event_instance(event));
  402. GEM_BUG_ON(!engine);
  403. engine->pmu.enable |= BIT(sample);
  404. GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
  405. GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
  406. if (engine->pmu.enable_count[sample]++ == 0) {
  407. /*
  408. * Enable engine busy stats tracking if needed or
  409. * alternatively cancel the scheduled disable.
  410. *
  411. * If the delayed disable was pending, cancel it and
  412. * in this case do not enable since it already is.
  413. */
  414. if (engine_needs_busy_stats(engine) &&
  415. !engine->pmu.busy_stats) {
  416. engine->pmu.busy_stats = true;
  417. if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
  418. intel_enable_engine_stats(engine);
  419. }
  420. }
  421. }
  422. /*
  423. * Store the current counter value so we can report the correct delta
  424. * for all listeners. Even when the event was already enabled and has
  425. * an existing non-zero value.
  426. */
  427. local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
  428. spin_unlock_irqrestore(&i915->pmu.lock, flags);
  429. }
  430. static void __disable_busy_stats(struct work_struct *work)
  431. {
  432. struct intel_engine_cs *engine =
  433. container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
  434. intel_disable_engine_stats(engine);
  435. }
  436. static void i915_pmu_disable(struct perf_event *event)
  437. {
  438. struct drm_i915_private *i915 =
  439. container_of(event->pmu, typeof(*i915), pmu.base);
  440. unsigned int bit = event_enabled_bit(event);
  441. unsigned long flags;
  442. spin_lock_irqsave(&i915->pmu.lock, flags);
  443. if (is_engine_event(event)) {
  444. u8 sample = engine_event_sample(event);
  445. struct intel_engine_cs *engine;
  446. engine = intel_engine_lookup_user(i915,
  447. engine_event_class(event),
  448. engine_event_instance(event));
  449. GEM_BUG_ON(!engine);
  450. GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
  451. GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
  452. /*
  453. * Decrement the reference count and clear the enabled
  454. * bitmask when the last listener on an event goes away.
  455. */
  456. if (--engine->pmu.enable_count[sample] == 0) {
  457. engine->pmu.enable &= ~BIT(sample);
  458. if (!engine_needs_busy_stats(engine) &&
  459. engine->pmu.busy_stats) {
  460. engine->pmu.busy_stats = false;
  461. /*
  462. * We request a delayed disable to handle the
  463. * rapid on/off cycles on events, which can
  464. * happen when tools like perf stat start, in a
  465. * nicer way.
  466. *
  467. * In addition, this also helps with busy stats
  468. * accuracy with background CPU offline/online
  469. * migration events.
  470. */
  471. queue_delayed_work(system_wq,
  472. &engine->pmu.disable_busy_stats,
  473. round_jiffies_up_relative(HZ));
  474. }
  475. }
  476. }
  477. GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
  478. GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
  479. /*
  480. * Decrement the reference count and clear the enabled
  481. * bitmask when the last listener on an event goes away.
  482. */
  483. if (--i915->pmu.enable_count[bit] == 0) {
  484. i915->pmu.enable &= ~BIT_ULL(bit);
  485. i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
  486. }
  487. spin_unlock_irqrestore(&i915->pmu.lock, flags);
  488. }
  489. static void i915_pmu_event_start(struct perf_event *event, int flags)
  490. {
  491. i915_pmu_enable(event);
  492. event->hw.state = 0;
  493. }
  494. static void i915_pmu_event_stop(struct perf_event *event, int flags)
  495. {
  496. if (flags & PERF_EF_UPDATE)
  497. i915_pmu_event_read(event);
  498. i915_pmu_disable(event);
  499. event->hw.state = PERF_HES_STOPPED;
  500. }
  501. static int i915_pmu_event_add(struct perf_event *event, int flags)
  502. {
  503. if (flags & PERF_EF_START)
  504. i915_pmu_event_start(event, flags);
  505. return 0;
  506. }
  507. static void i915_pmu_event_del(struct perf_event *event, int flags)
  508. {
  509. i915_pmu_event_stop(event, PERF_EF_UPDATE);
  510. }
  511. static int i915_pmu_event_event_idx(struct perf_event *event)
  512. {
  513. return 0;
  514. }
  515. struct i915_str_attribute {
  516. struct device_attribute attr;
  517. const char *str;
  518. };
  519. static ssize_t i915_pmu_format_show(struct device *dev,
  520. struct device_attribute *attr, char *buf)
  521. {
  522. struct i915_str_attribute *eattr;
  523. eattr = container_of(attr, struct i915_str_attribute, attr);
  524. return sprintf(buf, "%s\n", eattr->str);
  525. }
  526. #define I915_PMU_FORMAT_ATTR(_name, _config) \
  527. (&((struct i915_str_attribute[]) { \
  528. { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
  529. .str = _config, } \
  530. })[0].attr.attr)
  531. static struct attribute *i915_pmu_format_attrs[] = {
  532. I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
  533. NULL,
  534. };
  535. static const struct attribute_group i915_pmu_format_attr_group = {
  536. .name = "format",
  537. .attrs = i915_pmu_format_attrs,
  538. };
  539. struct i915_ext_attribute {
  540. struct device_attribute attr;
  541. unsigned long val;
  542. };
  543. static ssize_t i915_pmu_event_show(struct device *dev,
  544. struct device_attribute *attr, char *buf)
  545. {
  546. struct i915_ext_attribute *eattr;
  547. eattr = container_of(attr, struct i915_ext_attribute, attr);
  548. return sprintf(buf, "config=0x%lx\n", eattr->val);
  549. }
  550. #define I915_EVENT_ATTR(_name, _config) \
  551. (&((struct i915_ext_attribute[]) { \
  552. { .attr = __ATTR(_name, 0444, i915_pmu_event_show, NULL), \
  553. .val = _config, } \
  554. })[0].attr.attr)
  555. #define I915_EVENT_STR(_name, _str) \
  556. (&((struct perf_pmu_events_attr[]) { \
  557. { .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
  558. .id = 0, \
  559. .event_str = _str, } \
  560. })[0].attr.attr)
  561. #define I915_EVENT(_name, _config, _unit) \
  562. I915_EVENT_ATTR(_name, _config), \
  563. I915_EVENT_STR(_name.unit, _unit)
  564. #define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \
  565. I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \
  566. I915_EVENT_STR(_name.unit, "ns")
  567. #define I915_ENGINE_EVENTS(_name, _class, _instance) \
  568. I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \
  569. I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \
  570. I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT)
  571. static struct attribute *i915_pmu_events_attrs[] = {
  572. I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0),
  573. I915_ENGINE_EVENTS(bcs, I915_ENGINE_CLASS_COPY, 0),
  574. I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 0),
  575. I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 1),
  576. I915_ENGINE_EVENTS(vecs, I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
  577. I915_EVENT(actual-frequency, I915_PMU_ACTUAL_FREQUENCY, "MHz"),
  578. I915_EVENT(requested-frequency, I915_PMU_REQUESTED_FREQUENCY, "MHz"),
  579. I915_EVENT_ATTR(interrupts, I915_PMU_INTERRUPTS),
  580. I915_EVENT(rc6-residency, I915_PMU_RC6_RESIDENCY, "ns"),
  581. NULL,
  582. };
  583. static const struct attribute_group i915_pmu_events_attr_group = {
  584. .name = "events",
  585. .attrs = i915_pmu_events_attrs,
  586. };
  587. static ssize_t
  588. i915_pmu_get_attr_cpumask(struct device *dev,
  589. struct device_attribute *attr,
  590. char *buf)
  591. {
  592. return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
  593. }
  594. static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
  595. static struct attribute *i915_cpumask_attrs[] = {
  596. &dev_attr_cpumask.attr,
  597. NULL,
  598. };
  599. static struct attribute_group i915_pmu_cpumask_attr_group = {
  600. .attrs = i915_cpumask_attrs,
  601. };
  602. static const struct attribute_group *i915_pmu_attr_groups[] = {
  603. &i915_pmu_format_attr_group,
  604. &i915_pmu_events_attr_group,
  605. &i915_pmu_cpumask_attr_group,
  606. NULL
  607. };
  608. static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
  609. {
  610. struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
  611. GEM_BUG_ON(!pmu->base.event_init);
  612. /* Select the first online CPU as a designated reader. */
  613. if (!cpumask_weight(&i915_pmu_cpumask))
  614. cpumask_set_cpu(cpu, &i915_pmu_cpumask);
  615. return 0;
  616. }
  617. static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
  618. {
  619. struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
  620. unsigned int target;
  621. GEM_BUG_ON(!pmu->base.event_init);
  622. if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
  623. target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
  624. /* Migrate events if there is a valid target */
  625. if (target < nr_cpu_ids) {
  626. cpumask_set_cpu(target, &i915_pmu_cpumask);
  627. perf_pmu_migrate_context(&pmu->base, cpu, target);
  628. }
  629. }
  630. return 0;
  631. }
  632. static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
  633. static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
  634. {
  635. enum cpuhp_state slot;
  636. int ret;
  637. ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
  638. "perf/x86/intel/i915:online",
  639. i915_pmu_cpu_online,
  640. i915_pmu_cpu_offline);
  641. if (ret < 0)
  642. return ret;
  643. slot = ret;
  644. ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
  645. if (ret) {
  646. cpuhp_remove_multi_state(slot);
  647. return ret;
  648. }
  649. cpuhp_slot = slot;
  650. return 0;
  651. }
  652. static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
  653. {
  654. WARN_ON(cpuhp_slot == CPUHP_INVALID);
  655. WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
  656. cpuhp_remove_multi_state(cpuhp_slot);
  657. }
  658. void i915_pmu_register(struct drm_i915_private *i915)
  659. {
  660. struct intel_engine_cs *engine;
  661. enum intel_engine_id id;
  662. int ret;
  663. if (INTEL_GEN(i915) <= 2) {
  664. DRM_INFO("PMU not supported for this GPU.");
  665. return;
  666. }
  667. i915->pmu.base.attr_groups = i915_pmu_attr_groups;
  668. i915->pmu.base.task_ctx_nr = perf_invalid_context;
  669. i915->pmu.base.event_init = i915_pmu_event_init;
  670. i915->pmu.base.add = i915_pmu_event_add;
  671. i915->pmu.base.del = i915_pmu_event_del;
  672. i915->pmu.base.start = i915_pmu_event_start;
  673. i915->pmu.base.stop = i915_pmu_event_stop;
  674. i915->pmu.base.read = i915_pmu_event_read;
  675. i915->pmu.base.event_idx = i915_pmu_event_event_idx;
  676. spin_lock_init(&i915->pmu.lock);
  677. hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  678. i915->pmu.timer.function = i915_sample;
  679. for_each_engine(engine, i915, id)
  680. INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
  681. __disable_busy_stats);
  682. ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
  683. if (ret)
  684. goto err;
  685. ret = i915_pmu_register_cpuhp_state(i915);
  686. if (ret)
  687. goto err_unreg;
  688. return;
  689. err_unreg:
  690. perf_pmu_unregister(&i915->pmu.base);
  691. err:
  692. i915->pmu.base.event_init = NULL;
  693. DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
  694. }
  695. void i915_pmu_unregister(struct drm_i915_private *i915)
  696. {
  697. struct intel_engine_cs *engine;
  698. enum intel_engine_id id;
  699. if (!i915->pmu.base.event_init)
  700. return;
  701. WARN_ON(i915->pmu.enable);
  702. hrtimer_cancel(&i915->pmu.timer);
  703. for_each_engine(engine, i915, id) {
  704. GEM_BUG_ON(engine->pmu.busy_stats);
  705. flush_delayed_work(&engine->pmu.disable_busy_stats);
  706. }
  707. i915_pmu_unregister_cpuhp_state(i915);
  708. perf_pmu_unregister(&i915->pmu.base);
  709. i915->pmu.base.event_init = NULL;
  710. }