i915_pmu.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089
  1. /*
  2. * Copyright © 2017 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. *
  23. */
  24. #include <linux/perf_event.h>
  25. #include <linux/pm_runtime.h>
  26. #include "i915_drv.h"
  27. #include "i915_pmu.h"
  28. #include "intel_ringbuffer.h"
  29. /* Frequency for the sampling timer for events which need it. */
  30. #define FREQUENCY 200
  31. #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
  32. #define ENGINE_SAMPLE_MASK \
  33. (BIT(I915_SAMPLE_BUSY) | \
  34. BIT(I915_SAMPLE_WAIT) | \
  35. BIT(I915_SAMPLE_SEMA))
  36. #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
  37. static cpumask_t i915_pmu_cpumask;
  38. static u8 engine_config_sample(u64 config)
  39. {
  40. return config & I915_PMU_SAMPLE_MASK;
  41. }
  42. static u8 engine_event_sample(struct perf_event *event)
  43. {
  44. return engine_config_sample(event->attr.config);
  45. }
  46. static u8 engine_event_class(struct perf_event *event)
  47. {
  48. return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
  49. }
  50. static u8 engine_event_instance(struct perf_event *event)
  51. {
  52. return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
  53. }
  54. static bool is_engine_config(u64 config)
  55. {
  56. return config < __I915_PMU_OTHER(0);
  57. }
  58. static unsigned int config_enabled_bit(u64 config)
  59. {
  60. if (is_engine_config(config))
  61. return engine_config_sample(config);
  62. else
  63. return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
  64. }
  65. static u64 config_enabled_mask(u64 config)
  66. {
  67. return BIT_ULL(config_enabled_bit(config));
  68. }
  69. static bool is_engine_event(struct perf_event *event)
  70. {
  71. return is_engine_config(event->attr.config);
  72. }
  73. static unsigned int event_enabled_bit(struct perf_event *event)
  74. {
  75. return config_enabled_bit(event->attr.config);
  76. }
  77. static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
  78. {
  79. u64 enable;
  80. /*
  81. * Only some counters need the sampling timer.
  82. *
  83. * We start with a bitmask of all currently enabled events.
  84. */
  85. enable = i915->pmu.enable;
  86. /*
  87. * Mask out all the ones which do not need the timer, or in
  88. * other words keep all the ones that could need the timer.
  89. */
  90. enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
  91. config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
  92. ENGINE_SAMPLE_MASK;
  93. /*
  94. * When the GPU is idle per-engine counters do not need to be
  95. * running so clear those bits out.
  96. */
  97. if (!gpu_active)
  98. enable &= ~ENGINE_SAMPLE_MASK;
  99. /*
  100. * Also there is software busyness tracking available we do not
  101. * need the timer for I915_SAMPLE_BUSY counter.
  102. *
  103. * Use RCS as proxy for all engines.
  104. */
  105. else if (intel_engine_supports_stats(i915->engine[RCS]))
  106. enable &= ~BIT(I915_SAMPLE_BUSY);
  107. /*
  108. * If some bits remain it means we need the sampling timer running.
  109. */
  110. return enable;
  111. }
  112. void i915_pmu_gt_parked(struct drm_i915_private *i915)
  113. {
  114. if (!i915->pmu.base.event_init)
  115. return;
  116. spin_lock_irq(&i915->pmu.lock);
  117. /*
  118. * Signal sampling timer to stop if only engine events are enabled and
  119. * GPU went idle.
  120. */
  121. i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
  122. spin_unlock_irq(&i915->pmu.lock);
  123. }
  124. static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
  125. {
  126. if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
  127. i915->pmu.timer_enabled = true;
  128. hrtimer_start_range_ns(&i915->pmu.timer,
  129. ns_to_ktime(PERIOD), 0,
  130. HRTIMER_MODE_REL_PINNED);
  131. }
  132. }
  133. void i915_pmu_gt_unparked(struct drm_i915_private *i915)
  134. {
  135. if (!i915->pmu.base.event_init)
  136. return;
  137. spin_lock_irq(&i915->pmu.lock);
  138. /*
  139. * Re-enable sampling timer when GPU goes active.
  140. */
  141. __i915_pmu_maybe_start_timer(i915);
  142. spin_unlock_irq(&i915->pmu.lock);
  143. }
  144. static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
  145. {
  146. if (!fw)
  147. intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
  148. return true;
  149. }
  150. static void
  151. update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
  152. {
  153. sample->cur += mul_u32_u32(val, unit);
  154. }
  155. static void engines_sample(struct drm_i915_private *dev_priv)
  156. {
  157. struct intel_engine_cs *engine;
  158. enum intel_engine_id id;
  159. bool fw = false;
  160. if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
  161. return;
  162. if (!dev_priv->gt.awake)
  163. return;
  164. if (!intel_runtime_pm_get_if_in_use(dev_priv))
  165. return;
  166. for_each_engine(engine, dev_priv, id) {
  167. u32 current_seqno = intel_engine_get_seqno(engine);
  168. u32 last_seqno = intel_engine_last_submit(engine);
  169. u32 val;
  170. val = !i915_seqno_passed(current_seqno, last_seqno);
  171. update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
  172. PERIOD, val);
  173. if (val && (engine->pmu.enable &
  174. (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
  175. fw = grab_forcewake(dev_priv, fw);
  176. val = I915_READ_FW(RING_CTL(engine->mmio_base));
  177. } else {
  178. val = 0;
  179. }
  180. update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
  181. PERIOD, !!(val & RING_WAIT));
  182. update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
  183. PERIOD, !!(val & RING_WAIT_SEMAPHORE));
  184. }
  185. if (fw)
  186. intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
  187. intel_runtime_pm_put(dev_priv);
  188. }
  189. static void frequency_sample(struct drm_i915_private *dev_priv)
  190. {
  191. if (dev_priv->pmu.enable &
  192. config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
  193. u32 val;
  194. val = dev_priv->gt_pm.rps.cur_freq;
  195. if (dev_priv->gt.awake &&
  196. intel_runtime_pm_get_if_in_use(dev_priv)) {
  197. val = intel_get_cagf(dev_priv,
  198. I915_READ_NOTRACE(GEN6_RPSTAT1));
  199. intel_runtime_pm_put(dev_priv);
  200. }
  201. update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
  202. 1, intel_gpu_freq(dev_priv, val));
  203. }
  204. if (dev_priv->pmu.enable &
  205. config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
  206. update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
  207. intel_gpu_freq(dev_priv,
  208. dev_priv->gt_pm.rps.cur_freq));
  209. }
  210. }
  211. static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
  212. {
  213. struct drm_i915_private *i915 =
  214. container_of(hrtimer, struct drm_i915_private, pmu.timer);
  215. if (!READ_ONCE(i915->pmu.timer_enabled))
  216. return HRTIMER_NORESTART;
  217. engines_sample(i915);
  218. frequency_sample(i915);
  219. hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
  220. return HRTIMER_RESTART;
  221. }
  222. static u64 count_interrupts(struct drm_i915_private *i915)
  223. {
  224. /* open-coded kstat_irqs() */
  225. struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
  226. u64 sum = 0;
  227. int cpu;
  228. if (!desc || !desc->kstat_irqs)
  229. return 0;
  230. for_each_possible_cpu(cpu)
  231. sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
  232. return sum;
  233. }
  234. static void engine_event_destroy(struct perf_event *event)
  235. {
  236. struct drm_i915_private *i915 =
  237. container_of(event->pmu, typeof(*i915), pmu.base);
  238. struct intel_engine_cs *engine;
  239. engine = intel_engine_lookup_user(i915,
  240. engine_event_class(event),
  241. engine_event_instance(event));
  242. if (WARN_ON_ONCE(!engine))
  243. return;
  244. if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
  245. intel_engine_supports_stats(engine))
  246. intel_disable_engine_stats(engine);
  247. }
  248. static void i915_pmu_event_destroy(struct perf_event *event)
  249. {
  250. WARN_ON(event->parent);
  251. if (is_engine_event(event))
  252. engine_event_destroy(event);
  253. }
  254. static int
  255. engine_event_status(struct intel_engine_cs *engine,
  256. enum drm_i915_pmu_engine_sample sample)
  257. {
  258. switch (sample) {
  259. case I915_SAMPLE_BUSY:
  260. case I915_SAMPLE_WAIT:
  261. break;
  262. case I915_SAMPLE_SEMA:
  263. if (INTEL_GEN(engine->i915) < 6)
  264. return -ENODEV;
  265. break;
  266. default:
  267. return -ENOENT;
  268. }
  269. return 0;
  270. }
  271. static int
  272. config_status(struct drm_i915_private *i915, u64 config)
  273. {
  274. switch (config) {
  275. case I915_PMU_ACTUAL_FREQUENCY:
  276. if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
  277. /* Requires a mutex for sampling! */
  278. return -ENODEV;
  279. /* Fall-through. */
  280. case I915_PMU_REQUESTED_FREQUENCY:
  281. if (INTEL_GEN(i915) < 6)
  282. return -ENODEV;
  283. break;
  284. case I915_PMU_INTERRUPTS:
  285. break;
  286. case I915_PMU_RC6_RESIDENCY:
  287. if (!HAS_RC6(i915))
  288. return -ENODEV;
  289. break;
  290. default:
  291. return -ENOENT;
  292. }
  293. return 0;
  294. }
  295. static int engine_event_init(struct perf_event *event)
  296. {
  297. struct drm_i915_private *i915 =
  298. container_of(event->pmu, typeof(*i915), pmu.base);
  299. struct intel_engine_cs *engine;
  300. u8 sample;
  301. int ret;
  302. engine = intel_engine_lookup_user(i915, engine_event_class(event),
  303. engine_event_instance(event));
  304. if (!engine)
  305. return -ENODEV;
  306. sample = engine_event_sample(event);
  307. ret = engine_event_status(engine, sample);
  308. if (ret)
  309. return ret;
  310. if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
  311. ret = intel_enable_engine_stats(engine);
  312. return ret;
  313. }
  314. static int i915_pmu_event_init(struct perf_event *event)
  315. {
  316. struct drm_i915_private *i915 =
  317. container_of(event->pmu, typeof(*i915), pmu.base);
  318. int ret;
  319. if (event->attr.type != event->pmu->type)
  320. return -ENOENT;
  321. /* unsupported modes and filters */
  322. if (event->attr.sample_period) /* no sampling */
  323. return -EINVAL;
  324. if (has_branch_stack(event))
  325. return -EOPNOTSUPP;
  326. if (event->cpu < 0)
  327. return -EINVAL;
  328. /* only allow running on one cpu at a time */
  329. if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
  330. return -EINVAL;
  331. if (is_engine_event(event))
  332. ret = engine_event_init(event);
  333. else
  334. ret = config_status(i915, event->attr.config);
  335. if (ret)
  336. return ret;
  337. if (!event->parent)
  338. event->destroy = i915_pmu_event_destroy;
  339. return 0;
  340. }
  341. static u64 __get_rc6(struct drm_i915_private *i915)
  342. {
  343. u64 val;
  344. val = intel_rc6_residency_ns(i915,
  345. IS_VALLEYVIEW(i915) ?
  346. VLV_GT_RENDER_RC6 :
  347. GEN6_GT_GFX_RC6);
  348. if (HAS_RC6p(i915))
  349. val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
  350. if (HAS_RC6pp(i915))
  351. val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
  352. return val;
  353. }
  354. static u64 get_rc6(struct drm_i915_private *i915)
  355. {
  356. #if IS_ENABLED(CONFIG_PM)
  357. unsigned long flags;
  358. u64 val;
  359. if (intel_runtime_pm_get_if_in_use(i915)) {
  360. val = __get_rc6(i915);
  361. intel_runtime_pm_put(i915);
  362. /*
  363. * If we are coming back from being runtime suspended we must
  364. * be careful not to report a larger value than returned
  365. * previously.
  366. */
  367. spin_lock_irqsave(&i915->pmu.lock, flags);
  368. if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
  369. i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
  370. i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
  371. } else {
  372. val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
  373. }
  374. spin_unlock_irqrestore(&i915->pmu.lock, flags);
  375. } else {
  376. struct pci_dev *pdev = i915->drm.pdev;
  377. struct device *kdev = &pdev->dev;
  378. /*
  379. * We are runtime suspended.
  380. *
  381. * Report the delta from when the device was suspended to now,
  382. * on top of the last known real value, as the approximated RC6
  383. * counter value.
  384. */
  385. spin_lock_irqsave(&i915->pmu.lock, flags);
  386. spin_lock(&kdev->power.lock);
  387. /*
  388. * After the above branch intel_runtime_pm_get_if_in_use failed
  389. * to get the runtime PM reference we cannot assume we are in
  390. * runtime suspend since we can either: a) race with coming out
  391. * of it before we took the power.lock, or b) there are other
  392. * states than suspended which can bring us here.
  393. *
  394. * We need to double-check that we are indeed currently runtime
  395. * suspended and if not we cannot do better than report the last
  396. * known RC6 value.
  397. */
  398. if (kdev->power.runtime_status == RPM_SUSPENDED) {
  399. if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
  400. i915->pmu.suspended_jiffies_last =
  401. kdev->power.suspended_jiffies;
  402. val = kdev->power.suspended_jiffies -
  403. i915->pmu.suspended_jiffies_last;
  404. val += jiffies - kdev->power.accounting_timestamp;
  405. val = jiffies_to_nsecs(val);
  406. val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
  407. i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
  408. } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
  409. val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
  410. } else {
  411. val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
  412. }
  413. spin_unlock(&kdev->power.lock);
  414. spin_unlock_irqrestore(&i915->pmu.lock, flags);
  415. }
  416. return val;
  417. #else
  418. return __get_rc6(i915);
  419. #endif
  420. }
  421. static u64 __i915_pmu_event_read(struct perf_event *event)
  422. {
  423. struct drm_i915_private *i915 =
  424. container_of(event->pmu, typeof(*i915), pmu.base);
  425. u64 val = 0;
  426. if (is_engine_event(event)) {
  427. u8 sample = engine_event_sample(event);
  428. struct intel_engine_cs *engine;
  429. engine = intel_engine_lookup_user(i915,
  430. engine_event_class(event),
  431. engine_event_instance(event));
  432. if (WARN_ON_ONCE(!engine)) {
  433. /* Do nothing */
  434. } else if (sample == I915_SAMPLE_BUSY &&
  435. intel_engine_supports_stats(engine)) {
  436. val = ktime_to_ns(intel_engine_get_busy_time(engine));
  437. } else {
  438. val = engine->pmu.sample[sample].cur;
  439. }
  440. } else {
  441. switch (event->attr.config) {
  442. case I915_PMU_ACTUAL_FREQUENCY:
  443. val =
  444. div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
  445. FREQUENCY);
  446. break;
  447. case I915_PMU_REQUESTED_FREQUENCY:
  448. val =
  449. div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
  450. FREQUENCY);
  451. break;
  452. case I915_PMU_INTERRUPTS:
  453. val = count_interrupts(i915);
  454. break;
  455. case I915_PMU_RC6_RESIDENCY:
  456. val = get_rc6(i915);
  457. break;
  458. }
  459. }
  460. return val;
  461. }
  462. static void i915_pmu_event_read(struct perf_event *event)
  463. {
  464. struct hw_perf_event *hwc = &event->hw;
  465. u64 prev, new;
  466. again:
  467. prev = local64_read(&hwc->prev_count);
  468. new = __i915_pmu_event_read(event);
  469. if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
  470. goto again;
  471. local64_add(new - prev, &event->count);
  472. }
  473. static void i915_pmu_enable(struct perf_event *event)
  474. {
  475. struct drm_i915_private *i915 =
  476. container_of(event->pmu, typeof(*i915), pmu.base);
  477. unsigned int bit = event_enabled_bit(event);
  478. unsigned long flags;
  479. spin_lock_irqsave(&i915->pmu.lock, flags);
  480. /*
  481. * Update the bitmask of enabled events and increment
  482. * the event reference counter.
  483. */
  484. GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
  485. GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
  486. i915->pmu.enable |= BIT_ULL(bit);
  487. i915->pmu.enable_count[bit]++;
  488. /*
  489. * Start the sampling timer if needed and not already enabled.
  490. */
  491. __i915_pmu_maybe_start_timer(i915);
  492. /*
  493. * For per-engine events the bitmask and reference counting
  494. * is stored per engine.
  495. */
  496. if (is_engine_event(event)) {
  497. u8 sample = engine_event_sample(event);
  498. struct intel_engine_cs *engine;
  499. engine = intel_engine_lookup_user(i915,
  500. engine_event_class(event),
  501. engine_event_instance(event));
  502. GEM_BUG_ON(!engine);
  503. engine->pmu.enable |= BIT(sample);
  504. GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
  505. GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
  506. engine->pmu.enable_count[sample]++;
  507. }
  508. spin_unlock_irqrestore(&i915->pmu.lock, flags);
  509. /*
  510. * Store the current counter value so we can report the correct delta
  511. * for all listeners. Even when the event was already enabled and has
  512. * an existing non-zero value.
  513. */
  514. local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
  515. }
  516. static void i915_pmu_disable(struct perf_event *event)
  517. {
  518. struct drm_i915_private *i915 =
  519. container_of(event->pmu, typeof(*i915), pmu.base);
  520. unsigned int bit = event_enabled_bit(event);
  521. unsigned long flags;
  522. spin_lock_irqsave(&i915->pmu.lock, flags);
  523. if (is_engine_event(event)) {
  524. u8 sample = engine_event_sample(event);
  525. struct intel_engine_cs *engine;
  526. engine = intel_engine_lookup_user(i915,
  527. engine_event_class(event),
  528. engine_event_instance(event));
  529. GEM_BUG_ON(!engine);
  530. GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
  531. GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
  532. /*
  533. * Decrement the reference count and clear the enabled
  534. * bitmask when the last listener on an event goes away.
  535. */
  536. if (--engine->pmu.enable_count[sample] == 0)
  537. engine->pmu.enable &= ~BIT(sample);
  538. }
  539. GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
  540. GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
  541. /*
  542. * Decrement the reference count and clear the enabled
  543. * bitmask when the last listener on an event goes away.
  544. */
  545. if (--i915->pmu.enable_count[bit] == 0) {
  546. i915->pmu.enable &= ~BIT_ULL(bit);
  547. i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
  548. }
  549. spin_unlock_irqrestore(&i915->pmu.lock, flags);
  550. }
  551. static void i915_pmu_event_start(struct perf_event *event, int flags)
  552. {
  553. i915_pmu_enable(event);
  554. event->hw.state = 0;
  555. }
  556. static void i915_pmu_event_stop(struct perf_event *event, int flags)
  557. {
  558. if (flags & PERF_EF_UPDATE)
  559. i915_pmu_event_read(event);
  560. i915_pmu_disable(event);
  561. event->hw.state = PERF_HES_STOPPED;
  562. }
  563. static int i915_pmu_event_add(struct perf_event *event, int flags)
  564. {
  565. if (flags & PERF_EF_START)
  566. i915_pmu_event_start(event, flags);
  567. return 0;
  568. }
  569. static void i915_pmu_event_del(struct perf_event *event, int flags)
  570. {
  571. i915_pmu_event_stop(event, PERF_EF_UPDATE);
  572. }
  573. static int i915_pmu_event_event_idx(struct perf_event *event)
  574. {
  575. return 0;
  576. }
  577. struct i915_str_attribute {
  578. struct device_attribute attr;
  579. const char *str;
  580. };
  581. static ssize_t i915_pmu_format_show(struct device *dev,
  582. struct device_attribute *attr, char *buf)
  583. {
  584. struct i915_str_attribute *eattr;
  585. eattr = container_of(attr, struct i915_str_attribute, attr);
  586. return sprintf(buf, "%s\n", eattr->str);
  587. }
  588. #define I915_PMU_FORMAT_ATTR(_name, _config) \
  589. (&((struct i915_str_attribute[]) { \
  590. { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
  591. .str = _config, } \
  592. })[0].attr.attr)
  593. static struct attribute *i915_pmu_format_attrs[] = {
  594. I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
  595. NULL,
  596. };
  597. static const struct attribute_group i915_pmu_format_attr_group = {
  598. .name = "format",
  599. .attrs = i915_pmu_format_attrs,
  600. };
  601. struct i915_ext_attribute {
  602. struct device_attribute attr;
  603. unsigned long val;
  604. };
  605. static ssize_t i915_pmu_event_show(struct device *dev,
  606. struct device_attribute *attr, char *buf)
  607. {
  608. struct i915_ext_attribute *eattr;
  609. eattr = container_of(attr, struct i915_ext_attribute, attr);
  610. return sprintf(buf, "config=0x%lx\n", eattr->val);
  611. }
  612. static struct attribute_group i915_pmu_events_attr_group = {
  613. .name = "events",
  614. /* Patch in attrs at runtime. */
  615. };
  616. static ssize_t
  617. i915_pmu_get_attr_cpumask(struct device *dev,
  618. struct device_attribute *attr,
  619. char *buf)
  620. {
  621. return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
  622. }
  623. static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
  624. static struct attribute *i915_cpumask_attrs[] = {
  625. &dev_attr_cpumask.attr,
  626. NULL,
  627. };
  628. static const struct attribute_group i915_pmu_cpumask_attr_group = {
  629. .attrs = i915_cpumask_attrs,
  630. };
  631. static const struct attribute_group *i915_pmu_attr_groups[] = {
  632. &i915_pmu_format_attr_group,
  633. &i915_pmu_events_attr_group,
  634. &i915_pmu_cpumask_attr_group,
  635. NULL
  636. };
  637. #define __event(__config, __name, __unit) \
  638. { \
  639. .config = (__config), \
  640. .name = (__name), \
  641. .unit = (__unit), \
  642. }
  643. #define __engine_event(__sample, __name) \
  644. { \
  645. .sample = (__sample), \
  646. .name = (__name), \
  647. }
  648. static struct i915_ext_attribute *
  649. add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
  650. {
  651. sysfs_attr_init(&attr->attr.attr);
  652. attr->attr.attr.name = name;
  653. attr->attr.attr.mode = 0444;
  654. attr->attr.show = i915_pmu_event_show;
  655. attr->val = config;
  656. return ++attr;
  657. }
  658. static struct perf_pmu_events_attr *
  659. add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
  660. const char *str)
  661. {
  662. sysfs_attr_init(&attr->attr.attr);
  663. attr->attr.attr.name = name;
  664. attr->attr.attr.mode = 0444;
  665. attr->attr.show = perf_event_sysfs_show;
  666. attr->event_str = str;
  667. return ++attr;
  668. }
  669. static struct attribute **
  670. create_event_attributes(struct drm_i915_private *i915)
  671. {
  672. static const struct {
  673. u64 config;
  674. const char *name;
  675. const char *unit;
  676. } events[] = {
  677. __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
  678. __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
  679. __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
  680. __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
  681. };
  682. static const struct {
  683. enum drm_i915_pmu_engine_sample sample;
  684. char *name;
  685. } engine_events[] = {
  686. __engine_event(I915_SAMPLE_BUSY, "busy"),
  687. __engine_event(I915_SAMPLE_SEMA, "sema"),
  688. __engine_event(I915_SAMPLE_WAIT, "wait"),
  689. };
  690. unsigned int count = 0;
  691. struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
  692. struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
  693. struct attribute **attr = NULL, **attr_iter;
  694. struct intel_engine_cs *engine;
  695. enum intel_engine_id id;
  696. unsigned int i;
  697. /* Count how many counters we will be exposing. */
  698. for (i = 0; i < ARRAY_SIZE(events); i++) {
  699. if (!config_status(i915, events[i].config))
  700. count++;
  701. }
  702. for_each_engine(engine, i915, id) {
  703. for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
  704. if (!engine_event_status(engine,
  705. engine_events[i].sample))
  706. count++;
  707. }
  708. }
  709. /* Allocate attribute objects and table. */
  710. i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
  711. if (!i915_attr)
  712. goto err_alloc;
  713. pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
  714. if (!pmu_attr)
  715. goto err_alloc;
  716. /* Max one pointer of each attribute type plus a termination entry. */
  717. attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
  718. if (!attr)
  719. goto err_alloc;
  720. i915_iter = i915_attr;
  721. pmu_iter = pmu_attr;
  722. attr_iter = attr;
  723. /* Initialize supported non-engine counters. */
  724. for (i = 0; i < ARRAY_SIZE(events); i++) {
  725. char *str;
  726. if (config_status(i915, events[i].config))
  727. continue;
  728. str = kstrdup(events[i].name, GFP_KERNEL);
  729. if (!str)
  730. goto err;
  731. *attr_iter++ = &i915_iter->attr.attr;
  732. i915_iter = add_i915_attr(i915_iter, str, events[i].config);
  733. if (events[i].unit) {
  734. str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
  735. if (!str)
  736. goto err;
  737. *attr_iter++ = &pmu_iter->attr.attr;
  738. pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
  739. }
  740. }
  741. /* Initialize supported engine counters. */
  742. for_each_engine(engine, i915, id) {
  743. for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
  744. char *str;
  745. if (engine_event_status(engine,
  746. engine_events[i].sample))
  747. continue;
  748. str = kasprintf(GFP_KERNEL, "%s-%s",
  749. engine->name, engine_events[i].name);
  750. if (!str)
  751. goto err;
  752. *attr_iter++ = &i915_iter->attr.attr;
  753. i915_iter =
  754. add_i915_attr(i915_iter, str,
  755. __I915_PMU_ENGINE(engine->uabi_class,
  756. engine->instance,
  757. engine_events[i].sample));
  758. str = kasprintf(GFP_KERNEL, "%s-%s.unit",
  759. engine->name, engine_events[i].name);
  760. if (!str)
  761. goto err;
  762. *attr_iter++ = &pmu_iter->attr.attr;
  763. pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
  764. }
  765. }
  766. i915->pmu.i915_attr = i915_attr;
  767. i915->pmu.pmu_attr = pmu_attr;
  768. return attr;
  769. err:;
  770. for (attr_iter = attr; *attr_iter; attr_iter++)
  771. kfree((*attr_iter)->name);
  772. err_alloc:
  773. kfree(attr);
  774. kfree(i915_attr);
  775. kfree(pmu_attr);
  776. return NULL;
  777. }
  778. static void free_event_attributes(struct drm_i915_private *i915)
  779. {
  780. struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
  781. for (; *attr_iter; attr_iter++)
  782. kfree((*attr_iter)->name);
  783. kfree(i915_pmu_events_attr_group.attrs);
  784. kfree(i915->pmu.i915_attr);
  785. kfree(i915->pmu.pmu_attr);
  786. i915_pmu_events_attr_group.attrs = NULL;
  787. i915->pmu.i915_attr = NULL;
  788. i915->pmu.pmu_attr = NULL;
  789. }
  790. static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
  791. {
  792. struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
  793. GEM_BUG_ON(!pmu->base.event_init);
  794. /* Select the first online CPU as a designated reader. */
  795. if (!cpumask_weight(&i915_pmu_cpumask))
  796. cpumask_set_cpu(cpu, &i915_pmu_cpumask);
  797. return 0;
  798. }
  799. static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
  800. {
  801. struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
  802. unsigned int target;
  803. GEM_BUG_ON(!pmu->base.event_init);
  804. if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
  805. target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
  806. /* Migrate events if there is a valid target */
  807. if (target < nr_cpu_ids) {
  808. cpumask_set_cpu(target, &i915_pmu_cpumask);
  809. perf_pmu_migrate_context(&pmu->base, cpu, target);
  810. }
  811. }
  812. return 0;
  813. }
  814. static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
  815. static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
  816. {
  817. enum cpuhp_state slot;
  818. int ret;
  819. ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
  820. "perf/x86/intel/i915:online",
  821. i915_pmu_cpu_online,
  822. i915_pmu_cpu_offline);
  823. if (ret < 0)
  824. return ret;
  825. slot = ret;
  826. ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
  827. if (ret) {
  828. cpuhp_remove_multi_state(slot);
  829. return ret;
  830. }
  831. cpuhp_slot = slot;
  832. return 0;
  833. }
  834. static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
  835. {
  836. WARN_ON(cpuhp_slot == CPUHP_INVALID);
  837. WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
  838. cpuhp_remove_multi_state(cpuhp_slot);
  839. }
  840. void i915_pmu_register(struct drm_i915_private *i915)
  841. {
  842. int ret;
  843. if (INTEL_GEN(i915) <= 2) {
  844. DRM_INFO("PMU not supported for this GPU.");
  845. return;
  846. }
  847. i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
  848. if (!i915_pmu_events_attr_group.attrs) {
  849. ret = -ENOMEM;
  850. goto err;
  851. }
  852. i915->pmu.base.attr_groups = i915_pmu_attr_groups;
  853. i915->pmu.base.task_ctx_nr = perf_invalid_context;
  854. i915->pmu.base.event_init = i915_pmu_event_init;
  855. i915->pmu.base.add = i915_pmu_event_add;
  856. i915->pmu.base.del = i915_pmu_event_del;
  857. i915->pmu.base.start = i915_pmu_event_start;
  858. i915->pmu.base.stop = i915_pmu_event_stop;
  859. i915->pmu.base.read = i915_pmu_event_read;
  860. i915->pmu.base.event_idx = i915_pmu_event_event_idx;
  861. spin_lock_init(&i915->pmu.lock);
  862. hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  863. i915->pmu.timer.function = i915_sample;
  864. ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
  865. if (ret)
  866. goto err;
  867. ret = i915_pmu_register_cpuhp_state(i915);
  868. if (ret)
  869. goto err_unreg;
  870. return;
  871. err_unreg:
  872. perf_pmu_unregister(&i915->pmu.base);
  873. err:
  874. i915->pmu.base.event_init = NULL;
  875. free_event_attributes(i915);
  876. DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
  877. }
  878. void i915_pmu_unregister(struct drm_i915_private *i915)
  879. {
  880. if (!i915->pmu.base.event_init)
  881. return;
  882. WARN_ON(i915->pmu.enable);
  883. hrtimer_cancel(&i915->pmu.timer);
  884. i915_pmu_unregister_cpuhp_state(i915);
  885. perf_pmu_unregister(&i915->pmu.base);
  886. i915->pmu.base.event_init = NULL;
  887. free_event_attributes(i915);
  888. }