i915_perf.c 113 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552
  1. /*
  2. * Copyright © 2015-2016 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. *
  23. * Authors:
  24. * Robert Bragg <robert@sixbynine.org>
  25. */
  26. /**
  27. * DOC: i915 Perf Overview
  28. *
  29. * Gen graphics supports a large number of performance counters that can help
  30. * driver and application developers understand and optimize their use of the
  31. * GPU.
  32. *
  33. * This i915 perf interface enables userspace to configure and open a file
  34. * descriptor representing a stream of GPU metrics which can then be read() as
  35. * a stream of sample records.
  36. *
  37. * The interface is particularly suited to exposing buffered metrics that are
  38. * captured by DMA from the GPU, unsynchronized with and unrelated to the CPU.
  39. *
  40. * Streams representing a single context are accessible to applications with a
  41. * corresponding drm file descriptor, such that OpenGL can use the interface
  42. * without special privileges. Access to system-wide metrics requires root
  43. * privileges by default, unless changed via the dev.i915.perf_event_paranoid
  44. * sysctl option.
  45. *
  46. */
  47. /**
  48. * DOC: i915 Perf History and Comparison with Core Perf
  49. *
  50. * The interface was initially inspired by the core Perf infrastructure but
  51. * some notable differences are:
  52. *
  53. * i915 perf file descriptors represent a "stream" instead of an "event"; where
  54. * a perf event primarily corresponds to a single 64bit value, while a stream
  55. * might sample sets of tightly-coupled counters, depending on the
  56. * configuration. For example the Gen OA unit isn't designed to support
  57. * orthogonal configurations of individual counters; it's configured for a set
  58. * of related counters. Samples for an i915 perf stream capturing OA metrics
  59. * will include a set of counter values packed in a compact HW specific format.
  60. * The OA unit supports a number of different packing formats which can be
  61. * selected by the user opening the stream. Perf has support for grouping
  62. * events, but each event in the group is configured, validated and
  63. * authenticated individually with separate system calls.
  64. *
  65. * i915 perf stream configurations are provided as an array of u64 (key,value)
  66. * pairs, instead of a fixed struct with multiple miscellaneous config members,
  67. * interleaved with event-type specific members.
  68. *
  69. * i915 perf doesn't support exposing metrics via an mmap'd circular buffer.
  70. * The supported metrics are being written to memory by the GPU unsynchronized
  71. * with the CPU, using HW specific packing formats for counter sets. Sometimes
  72. * the constraints on HW configuration require reports to be filtered before it
  73. * would be acceptable to expose them to unprivileged applications - to hide
  74. * the metrics of other processes/contexts. For these use cases a read() based
  75. * interface is a good fit, and provides an opportunity to filter data as it
  76. * gets copied from the GPU mapped buffers to userspace buffers.
  77. *
  78. *
  79. * Issues hit with first prototype based on Core Perf
  80. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  81. *
  82. * The first prototype of this driver was based on the core perf
  83. * infrastructure, and while we did make that mostly work, with some changes to
  84. * perf, we found we were breaking or working around too many assumptions baked
  85. * into perf's currently cpu centric design.
  86. *
  87. * In the end we didn't see a clear benefit to making perf's implementation and
  88. * interface more complex by changing design assumptions while we knew we still
  89. * wouldn't be able to use any existing perf based userspace tools.
  90. *
  91. * Also considering the Gen specific nature of the Observability hardware and
  92. * how userspace will sometimes need to combine i915 perf OA metrics with
  93. * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're
  94. * expecting the interface to be used by a platform specific userspace such as
  95. * OpenGL or tools. This is to say; we aren't inherently missing out on having
  96. * a standard vendor/architecture agnostic interface by not using perf.
  97. *
  98. *
  99. * For posterity, in case we might re-visit trying to adapt core perf to be
  100. * better suited to exposing i915 metrics these were the main pain points we
  101. * hit:
  102. *
  103. * - The perf based OA PMU driver broke some significant design assumptions:
  104. *
  105. * Existing perf pmus are used for profiling work on a cpu and we were
  106. * introducing the idea of _IS_DEVICE pmus with different security
  107. * implications, the need to fake cpu-related data (such as user/kernel
  108. * registers) to fit with perf's current design, and adding _DEVICE records
  109. * as a way to forward device-specific status records.
  110. *
  111. * The OA unit writes reports of counters into a circular buffer, without
  112. * involvement from the CPU, making our PMU driver the first of a kind.
  113. *
  114. * Given the way we were periodically forward data from the GPU-mapped, OA
  115. * buffer to perf's buffer, those bursts of sample writes looked to perf like
  116. * we were sampling too fast and so we had to subvert its throttling checks.
  117. *
  118. * Perf supports groups of counters and allows those to be read via
  119. * transactions internally but transactions currently seem designed to be
  120. * explicitly initiated from the cpu (say in response to a userspace read())
  121. * and while we could pull a report out of the OA buffer we can't
  122. * trigger a report from the cpu on demand.
  123. *
  124. * Related to being report based; the OA counters are configured in HW as a
  125. * set while perf generally expects counter configurations to be orthogonal.
  126. * Although counters can be associated with a group leader as they are
  127. * opened, there's no clear precedent for being able to provide group-wide
  128. * configuration attributes (for example we want to let userspace choose the
  129. * OA unit report format used to capture all counters in a set, or specify a
  130. * GPU context to filter metrics on). We avoided using perf's grouping
  131. * feature and forwarded OA reports to userspace via perf's 'raw' sample
  132. * field. This suited our userspace well considering how coupled the counters
  133. * are when dealing with normalizing. It would be inconvenient to split
  134. * counters up into separate events, only to require userspace to recombine
  135. * them. For Mesa it's also convenient to be forwarded raw, periodic reports
  136. * for combining with the side-band raw reports it captures using
  137. * MI_REPORT_PERF_COUNT commands.
  138. *
  139. * - As a side note on perf's grouping feature; there was also some concern
  140. * that using PERF_FORMAT_GROUP as a way to pack together counter values
  141. * would quite drastically inflate our sample sizes, which would likely
  142. * lower the effective sampling resolutions we could use when the available
  143. * memory bandwidth is limited.
  144. *
  145. * With the OA unit's report formats, counters are packed together as 32
  146. * or 40bit values, with the largest report size being 256 bytes.
  147. *
  148. * PERF_FORMAT_GROUP values are 64bit, but there doesn't appear to be a
  149. * documented ordering to the values, implying PERF_FORMAT_ID must also be
  150. * used to add a 64bit ID before each value; giving 16 bytes per counter.
  151. *
  152. * Related to counter orthogonality; we can't time share the OA unit, while
  153. * event scheduling is a central design idea within perf for allowing
  154. * userspace to open + enable more events than can be configured in HW at any
  155. * one time. The OA unit is not designed to allow re-configuration while in
  156. * use. We can't reconfigure the OA unit without losing internal OA unit
  157. * state which we can't access explicitly to save and restore. Reconfiguring
  158. * the OA unit is also relatively slow, involving ~100 register writes. From
  159. * userspace Mesa also depends on a stable OA configuration when emitting
  160. * MI_REPORT_PERF_COUNT commands and importantly the OA unit can't be
  161. * disabled while there are outstanding MI_RPC commands lest we hang the
  162. * command streamer.
  163. *
  164. * The contents of sample records aren't extensible by device drivers (i.e.
  165. * the sample_type bits). As an example; Sourab Gupta had been looking to
  166. * attach GPU timestamps to our OA samples. We were shoehorning OA reports
  167. * into sample records by using the 'raw' field, but it's tricky to pack more
  168. * than one thing into this field because events/core.c currently only lets a
  169. * pmu give a single raw data pointer plus len which will be copied into the
  170. * ring buffer. To include more than the OA report we'd have to copy the
  171. * report into an intermediate larger buffer. I'd been considering allowing a
  172. * vector of data+len values to be specified for copying the raw data, but
  173. * it felt like a kludge to being using the raw field for this purpose.
  174. *
  175. * - It felt like our perf based PMU was making some technical compromises
  176. * just for the sake of using perf:
  177. *
  178. * perf_event_open() requires events to either relate to a pid or a specific
  179. * cpu core, while our device pmu related to neither. Events opened with a
  180. * pid will be automatically enabled/disabled according to the scheduling of
  181. * that process - so not appropriate for us. When an event is related to a
  182. * cpu id, perf ensures pmu methods will be invoked via an inter process
  183. * interrupt on that core. To avoid invasive changes our userspace opened OA
  184. * perf events for a specific cpu. This was workable but it meant the
  185. * majority of the OA driver ran in atomic context, including all OA report
  186. * forwarding, which wasn't really necessary in our case and seems to make
  187. * our locking requirements somewhat complex as we handled the interaction
  188. * with the rest of the i915 driver.
  189. */
  190. #include <linux/anon_inodes.h>
  191. #include <linux/sizes.h>
  192. #include <linux/uuid.h>
  193. #include "i915_drv.h"
  194. #include "i915_oa_hsw.h"
  195. #include "i915_oa_bdw.h"
  196. #include "i915_oa_chv.h"
  197. #include "i915_oa_sklgt2.h"
  198. #include "i915_oa_sklgt3.h"
  199. #include "i915_oa_sklgt4.h"
  200. #include "i915_oa_bxt.h"
  201. #include "i915_oa_kblgt2.h"
  202. #include "i915_oa_kblgt3.h"
  203. #include "i915_oa_glk.h"
  204. #include "i915_oa_cflgt2.h"
  205. #include "i915_oa_cflgt3.h"
  206. #include "i915_oa_cnl.h"
  207. #include "i915_oa_icl.h"
  208. /* HW requires this to be a power of two, between 128k and 16M, though driver
  209. * is currently generally designed assuming the largest 16M size is used such
  210. * that the overflow cases are unlikely in normal operation.
  211. */
  212. #define OA_BUFFER_SIZE SZ_16M
  213. #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
  214. /**
  215. * DOC: OA Tail Pointer Race
  216. *
  217. * There's a HW race condition between OA unit tail pointer register updates and
  218. * writes to memory whereby the tail pointer can sometimes get ahead of what's
  219. * been written out to the OA buffer so far (in terms of what's visible to the
  220. * CPU).
  221. *
  222. * Although this can be observed explicitly while copying reports to userspace
  223. * by checking for a zeroed report-id field in tail reports, we want to account
  224. * for this earlier, as part of the oa_buffer_check to avoid lots of redundant
  225. * read() attempts.
  226. *
  227. * In effect we define a tail pointer for reading that lags the real tail
  228. * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
  229. * time for the corresponding reports to become visible to the CPU.
  230. *
  231. * To manage this we actually track two tail pointers:
  232. * 1) An 'aging' tail with an associated timestamp that is tracked until we
  233. * can trust the corresponding data is visible to the CPU; at which point
  234. * it is considered 'aged'.
  235. * 2) An 'aged' tail that can be used for read()ing.
  236. *
  237. * The two separate pointers let us decouple read()s from tail pointer aging.
  238. *
  239. * The tail pointers are checked and updated at a limited rate within a hrtimer
  240. * callback (the same callback that is used for delivering EPOLLIN events)
  241. *
  242. * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
  243. * indicates that an updated tail pointer is needed.
  244. *
  245. * Most of the implementation details for this workaround are in
  246. * oa_buffer_check_unlocked() and _append_oa_reports()
  247. *
  248. * Note for posterity: previously the driver used to define an effective tail
  249. * pointer that lagged the real pointer by a 'tail margin' measured in bytes
  250. * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
  251. * This was flawed considering that the OA unit may also automatically generate
  252. * non-periodic reports (such as on context switch) or the OA unit may be
  253. * enabled without any periodic sampling.
  254. */
  255. #define OA_TAIL_MARGIN_NSEC 100000ULL
  256. #define INVALID_TAIL_PTR 0xffffffff
  257. /* frequency for checking whether the OA unit has written new reports to the
  258. * circular OA buffer...
  259. */
  260. #define POLL_FREQUENCY 200
  261. #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
  262. /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
  263. static int zero;
  264. static int one = 1;
  265. static u32 i915_perf_stream_paranoid = true;
  266. /* The maximum exponent the hardware accepts is 63 (essentially it selects one
  267. * of the 64bit timestamp bits to trigger reports from) but there's currently
  268. * no known use case for sampling as infrequently as once per 47 thousand years.
  269. *
  270. * Since the timestamps included in OA reports are only 32bits it seems
  271. * reasonable to limit the OA exponent where it's still possible to account for
  272. * overflow in OA report timestamps.
  273. */
  274. #define OA_EXPONENT_MAX 31
  275. #define INVALID_CTX_ID 0xffffffff
  276. /* On Gen8+ automatically triggered OA reports include a 'reason' field... */
  277. #define OAREPORT_REASON_MASK 0x3f
  278. #define OAREPORT_REASON_SHIFT 19
  279. #define OAREPORT_REASON_TIMER (1<<0)
  280. #define OAREPORT_REASON_CTX_SWITCH (1<<3)
  281. #define OAREPORT_REASON_CLK_RATIO (1<<5)
  282. /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
  283. *
  284. * The highest sampling frequency we can theoretically program the OA unit
  285. * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell.
  286. *
  287. * Initialized just before we register the sysctl parameter.
  288. */
  289. static int oa_sample_rate_hard_limit;
  290. /* Theoretically we can program the OA unit to sample every 160ns but don't
  291. * allow that by default unless root...
  292. *
  293. * The default threshold of 100000Hz is based on perf's similar
  294. * kernel.perf_event_max_sample_rate sysctl parameter.
  295. */
  296. static u32 i915_oa_max_sample_rate = 100000;
  297. /* XXX: beware if future OA HW adds new report formats that the current
  298. * code assumes all reports have a power-of-two size and ~(size - 1) can
  299. * be used as a mask to align the OA tail pointer.
  300. */
  301. static struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
  302. [I915_OA_FORMAT_A13] = { 0, 64 },
  303. [I915_OA_FORMAT_A29] = { 1, 128 },
  304. [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 },
  305. /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
  306. [I915_OA_FORMAT_B4_C8] = { 4, 64 },
  307. [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 },
  308. [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 },
  309. [I915_OA_FORMAT_C4_B8] = { 7, 64 },
  310. };
  311. static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
  312. [I915_OA_FORMAT_A12] = { 0, 64 },
  313. [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 },
  314. [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
  315. [I915_OA_FORMAT_C4_B8] = { 7, 64 },
  316. };
  317. #define SAMPLE_OA_REPORT (1<<0)
  318. /**
  319. * struct perf_open_properties - for validated properties given to open a stream
  320. * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
  321. * @single_context: Whether a single or all gpu contexts should be monitored
  322. * @ctx_handle: A gem ctx handle for use with @single_context
  323. * @metrics_set: An ID for an OA unit metric set advertised via sysfs
  324. * @oa_format: An OA unit HW report format
  325. * @oa_periodic: Whether to enable periodic OA unit sampling
  326. * @oa_period_exponent: The OA unit sampling period is derived from this
  327. *
  328. * As read_properties_unlocked() enumerates and validates the properties given
  329. * to open a stream of metrics the configuration is built up in the structure
  330. * which starts out zero initialized.
  331. */
  332. struct perf_open_properties {
  333. u32 sample_flags;
  334. u64 single_context:1;
  335. u64 ctx_handle;
  336. /* OA sampling state */
  337. int metrics_set;
  338. int oa_format;
  339. bool oa_periodic;
  340. int oa_period_exponent;
  341. };
  342. static void free_oa_config(struct drm_i915_private *dev_priv,
  343. struct i915_oa_config *oa_config)
  344. {
  345. if (!PTR_ERR(oa_config->flex_regs))
  346. kfree(oa_config->flex_regs);
  347. if (!PTR_ERR(oa_config->b_counter_regs))
  348. kfree(oa_config->b_counter_regs);
  349. if (!PTR_ERR(oa_config->mux_regs))
  350. kfree(oa_config->mux_regs);
  351. kfree(oa_config);
  352. }
  353. static void put_oa_config(struct drm_i915_private *dev_priv,
  354. struct i915_oa_config *oa_config)
  355. {
  356. if (!atomic_dec_and_test(&oa_config->ref_count))
  357. return;
  358. free_oa_config(dev_priv, oa_config);
  359. }
  360. static int get_oa_config(struct drm_i915_private *dev_priv,
  361. int metrics_set,
  362. struct i915_oa_config **out_config)
  363. {
  364. int ret;
  365. if (metrics_set == 1) {
  366. *out_config = &dev_priv->perf.oa.test_config;
  367. atomic_inc(&dev_priv->perf.oa.test_config.ref_count);
  368. return 0;
  369. }
  370. ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
  371. if (ret)
  372. return ret;
  373. *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
  374. if (!*out_config)
  375. ret = -EINVAL;
  376. else
  377. atomic_inc(&(*out_config)->ref_count);
  378. mutex_unlock(&dev_priv->perf.metrics_lock);
  379. return ret;
  380. }
  381. static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
  382. {
  383. return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
  384. }
  385. static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
  386. {
  387. u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
  388. return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
  389. }
  390. /**
  391. * oa_buffer_check_unlocked - check for data and update tail ptr state
  392. * @dev_priv: i915 device instance
  393. *
  394. * This is either called via fops (for blocking reads in user ctx) or the poll
  395. * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
  396. * if there is data available for userspace to read.
  397. *
  398. * This function is central to providing a workaround for the OA unit tail
  399. * pointer having a race with respect to what data is visible to the CPU.
  400. * It is responsible for reading tail pointers from the hardware and giving
  401. * the pointers time to 'age' before they are made available for reading.
  402. * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
  403. *
  404. * Besides returning true when there is data available to read() this function
  405. * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
  406. * and .aged_tail_idx state used for reading.
  407. *
  408. * Note: It's safe to read OA config state here unlocked, assuming that this is
  409. * only called while the stream is enabled, while the global OA configuration
  410. * can't be modified.
  411. *
  412. * Returns: %true if the OA buffer contains data, else %false
  413. */
  414. static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
  415. {
  416. int report_size = dev_priv->perf.oa.oa_buffer.format_size;
  417. unsigned long flags;
  418. unsigned int aged_idx;
  419. u32 head, hw_tail, aged_tail, aging_tail;
  420. u64 now;
  421. /* We have to consider the (unlikely) possibility that read() errors
  422. * could result in an OA buffer reset which might reset the head,
  423. * tails[] and aged_tail state.
  424. */
  425. spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  426. /* NB: The head we observe here might effectively be a little out of
  427. * date (between head and tails[aged_idx].offset if there is currently
  428. * a read() in progress.
  429. */
  430. head = dev_priv->perf.oa.oa_buffer.head;
  431. aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
  432. aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
  433. aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
  434. hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
  435. /* The tail pointer increases in 64 byte increments,
  436. * not in report_size steps...
  437. */
  438. hw_tail &= ~(report_size - 1);
  439. now = ktime_get_mono_fast_ns();
  440. /* Update the aged tail
  441. *
  442. * Flip the tail pointer available for read()s once the aging tail is
  443. * old enough to trust that the corresponding data will be visible to
  444. * the CPU...
  445. *
  446. * Do this before updating the aging pointer in case we may be able to
  447. * immediately start aging a new pointer too (if new data has become
  448. * available) without needing to wait for a later hrtimer callback.
  449. */
  450. if (aging_tail != INVALID_TAIL_PTR &&
  451. ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
  452. OA_TAIL_MARGIN_NSEC)) {
  453. aged_idx ^= 1;
  454. dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
  455. aged_tail = aging_tail;
  456. /* Mark that we need a new pointer to start aging... */
  457. dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
  458. aging_tail = INVALID_TAIL_PTR;
  459. }
  460. /* Update the aging tail
  461. *
  462. * We throttle aging tail updates until we have a new tail that
  463. * represents >= one report more data than is already available for
  464. * reading. This ensures there will be enough data for a successful
  465. * read once this new pointer has aged and ensures we will give the new
  466. * pointer time to age.
  467. */
  468. if (aging_tail == INVALID_TAIL_PTR &&
  469. (aged_tail == INVALID_TAIL_PTR ||
  470. OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
  471. struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
  472. u32 gtt_offset = i915_ggtt_offset(vma);
  473. /* Be paranoid and do a bounds check on the pointer read back
  474. * from hardware, just in case some spurious hardware condition
  475. * could put the tail out of bounds...
  476. */
  477. if (hw_tail >= gtt_offset &&
  478. hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
  479. dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
  480. aging_tail = hw_tail;
  481. dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
  482. } else {
  483. DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
  484. hw_tail);
  485. }
  486. }
  487. spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  488. return aged_tail == INVALID_TAIL_PTR ?
  489. false : OA_TAKEN(aged_tail, head) >= report_size;
  490. }
  491. /**
  492. * append_oa_status - Appends a status record to a userspace read() buffer.
  493. * @stream: An i915-perf stream opened for OA metrics
  494. * @buf: destination buffer given by userspace
  495. * @count: the number of bytes userspace wants to read
  496. * @offset: (inout): the current position for writing into @buf
  497. * @type: The kind of status to report to userspace
  498. *
  499. * Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`)
  500. * into the userspace read() buffer.
  501. *
  502. * The @buf @offset will only be updated on success.
  503. *
  504. * Returns: 0 on success, negative error code on failure.
  505. */
  506. static int append_oa_status(struct i915_perf_stream *stream,
  507. char __user *buf,
  508. size_t count,
  509. size_t *offset,
  510. enum drm_i915_perf_record_type type)
  511. {
  512. struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
  513. if ((count - *offset) < header.size)
  514. return -ENOSPC;
  515. if (copy_to_user(buf + *offset, &header, sizeof(header)))
  516. return -EFAULT;
  517. (*offset) += header.size;
  518. return 0;
  519. }
  520. /**
  521. * append_oa_sample - Copies single OA report into userspace read() buffer.
  522. * @stream: An i915-perf stream opened for OA metrics
  523. * @buf: destination buffer given by userspace
  524. * @count: the number of bytes userspace wants to read
  525. * @offset: (inout): the current position for writing into @buf
  526. * @report: A single OA report to (optionally) include as part of the sample
  527. *
  528. * The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*`
  529. * properties when opening a stream, tracked as `stream->sample_flags`. This
  530. * function copies the requested components of a single sample to the given
  531. * read() @buf.
  532. *
  533. * The @buf @offset will only be updated on success.
  534. *
  535. * Returns: 0 on success, negative error code on failure.
  536. */
  537. static int append_oa_sample(struct i915_perf_stream *stream,
  538. char __user *buf,
  539. size_t count,
  540. size_t *offset,
  541. const u8 *report)
  542. {
  543. struct drm_i915_private *dev_priv = stream->dev_priv;
  544. int report_size = dev_priv->perf.oa.oa_buffer.format_size;
  545. struct drm_i915_perf_record_header header;
  546. u32 sample_flags = stream->sample_flags;
  547. header.type = DRM_I915_PERF_RECORD_SAMPLE;
  548. header.pad = 0;
  549. header.size = stream->sample_size;
  550. if ((count - *offset) < header.size)
  551. return -ENOSPC;
  552. buf += *offset;
  553. if (copy_to_user(buf, &header, sizeof(header)))
  554. return -EFAULT;
  555. buf += sizeof(header);
  556. if (sample_flags & SAMPLE_OA_REPORT) {
  557. if (copy_to_user(buf, report, report_size))
  558. return -EFAULT;
  559. }
  560. (*offset) += header.size;
  561. return 0;
  562. }
  563. /**
  564. * Copies all buffered OA reports into userspace read() buffer.
  565. * @stream: An i915-perf stream opened for OA metrics
  566. * @buf: destination buffer given by userspace
  567. * @count: the number of bytes userspace wants to read
  568. * @offset: (inout): the current position for writing into @buf
  569. *
  570. * Notably any error condition resulting in a short read (-%ENOSPC or
  571. * -%EFAULT) will be returned even though one or more records may
  572. * have been successfully copied. In this case it's up to the caller
  573. * to decide if the error should be squashed before returning to
  574. * userspace.
  575. *
  576. * Note: reports are consumed from the head, and appended to the
  577. * tail, so the tail chases the head?... If you think that's mad
  578. * and back-to-front you're not alone, but this follows the
  579. * Gen PRM naming convention.
  580. *
  581. * Returns: 0 on success, negative error code on failure.
  582. */
  583. static int gen8_append_oa_reports(struct i915_perf_stream *stream,
  584. char __user *buf,
  585. size_t count,
  586. size_t *offset)
  587. {
  588. struct drm_i915_private *dev_priv = stream->dev_priv;
  589. int report_size = dev_priv->perf.oa.oa_buffer.format_size;
  590. u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
  591. u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
  592. u32 mask = (OA_BUFFER_SIZE - 1);
  593. size_t start_offset = *offset;
  594. unsigned long flags;
  595. unsigned int aged_tail_idx;
  596. u32 head, tail;
  597. u32 taken;
  598. int ret = 0;
  599. if (WARN_ON(!stream->enabled))
  600. return -EIO;
  601. spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  602. head = dev_priv->perf.oa.oa_buffer.head;
  603. aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
  604. tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
  605. spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  606. /*
  607. * An invalid tail pointer here means we're still waiting for the poll
  608. * hrtimer callback to give us a pointer
  609. */
  610. if (tail == INVALID_TAIL_PTR)
  611. return -EAGAIN;
  612. /*
  613. * NB: oa_buffer.head/tail include the gtt_offset which we don't want
  614. * while indexing relative to oa_buf_base.
  615. */
  616. head -= gtt_offset;
  617. tail -= gtt_offset;
  618. /*
  619. * An out of bounds or misaligned head or tail pointer implies a driver
  620. * bug since we validate + align the tail pointers we read from the
  621. * hardware and we are in full control of the head pointer which should
  622. * only be incremented by multiples of the report size (notably also
  623. * all a power of two).
  624. */
  625. if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
  626. tail > OA_BUFFER_SIZE || tail % report_size,
  627. "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
  628. head, tail))
  629. return -EIO;
  630. for (/* none */;
  631. (taken = OA_TAKEN(tail, head));
  632. head = (head + report_size) & mask) {
  633. u8 *report = oa_buf_base + head;
  634. u32 *report32 = (void *)report;
  635. u32 ctx_id;
  636. u32 reason;
  637. /*
  638. * All the report sizes factor neatly into the buffer
  639. * size so we never expect to see a report split
  640. * between the beginning and end of the buffer.
  641. *
  642. * Given the initial alignment check a misalignment
  643. * here would imply a driver bug that would result
  644. * in an overrun.
  645. */
  646. if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
  647. DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
  648. break;
  649. }
  650. /*
  651. * The reason field includes flags identifying what
  652. * triggered this specific report (mostly timer
  653. * triggered or e.g. due to a context switch).
  654. *
  655. * This field is never expected to be zero so we can
  656. * check that the report isn't invalid before copying
  657. * it to userspace...
  658. */
  659. reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
  660. OAREPORT_REASON_MASK);
  661. if (reason == 0) {
  662. if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
  663. DRM_NOTE("Skipping spurious, invalid OA report\n");
  664. continue;
  665. }
  666. /*
  667. * XXX: Just keep the lower 21 bits for now since I'm not
  668. * entirely sure if the HW touches any of the higher bits in
  669. * this field
  670. */
  671. ctx_id = report32[2] & 0x1fffff;
  672. /*
  673. * Squash whatever is in the CTX_ID field if it's marked as
  674. * invalid to be sure we avoid false-positive, single-context
  675. * filtering below...
  676. *
  677. * Note: that we don't clear the valid_ctx_bit so userspace can
  678. * understand that the ID has been squashed by the kernel.
  679. */
  680. if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
  681. ctx_id = report32[2] = INVALID_CTX_ID;
  682. /*
  683. * NB: For Gen 8 the OA unit no longer supports clock gating
  684. * off for a specific context and the kernel can't securely
  685. * stop the counters from updating as system-wide / global
  686. * values.
  687. *
  688. * Automatic reports now include a context ID so reports can be
  689. * filtered on the cpu but it's not worth trying to
  690. * automatically subtract/hide counter progress for other
  691. * contexts while filtering since we can't stop userspace
  692. * issuing MI_REPORT_PERF_COUNT commands which would still
  693. * provide a side-band view of the real values.
  694. *
  695. * To allow userspace (such as Mesa/GL_INTEL_performance_query)
  696. * to normalize counters for a single filtered context then it
  697. * needs be forwarded bookend context-switch reports so that it
  698. * can track switches in between MI_REPORT_PERF_COUNT commands
  699. * and can itself subtract/ignore the progress of counters
  700. * associated with other contexts. Note that the hardware
  701. * automatically triggers reports when switching to a new
  702. * context which are tagged with the ID of the newly active
  703. * context. To avoid the complexity (and likely fragility) of
  704. * reading ahead while parsing reports to try and minimize
  705. * forwarding redundant context switch reports (i.e. between
  706. * other, unrelated contexts) we simply elect to forward them
  707. * all.
  708. *
  709. * We don't rely solely on the reason field to identify context
  710. * switches since it's not-uncommon for periodic samples to
  711. * identify a switch before any 'context switch' report.
  712. */
  713. if (!dev_priv->perf.oa.exclusive_stream->ctx ||
  714. dev_priv->perf.oa.specific_ctx_id == ctx_id ||
  715. (dev_priv->perf.oa.oa_buffer.last_ctx_id ==
  716. dev_priv->perf.oa.specific_ctx_id) ||
  717. reason & OAREPORT_REASON_CTX_SWITCH) {
  718. /*
  719. * While filtering for a single context we avoid
  720. * leaking the IDs of other contexts.
  721. */
  722. if (dev_priv->perf.oa.exclusive_stream->ctx &&
  723. dev_priv->perf.oa.specific_ctx_id != ctx_id) {
  724. report32[2] = INVALID_CTX_ID;
  725. }
  726. ret = append_oa_sample(stream, buf, count, offset,
  727. report);
  728. if (ret)
  729. break;
  730. dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
  731. }
  732. /*
  733. * The above reason field sanity check is based on
  734. * the assumption that the OA buffer is initially
  735. * zeroed and we reset the field after copying so the
  736. * check is still meaningful once old reports start
  737. * being overwritten.
  738. */
  739. report32[0] = 0;
  740. }
  741. if (start_offset != *offset) {
  742. spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  743. /*
  744. * We removed the gtt_offset for the copy loop above, indexing
  745. * relative to oa_buf_base so put back here...
  746. */
  747. head += gtt_offset;
  748. I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
  749. dev_priv->perf.oa.oa_buffer.head = head;
  750. spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  751. }
  752. return ret;
  753. }
  754. /**
  755. * gen8_oa_read - copy status records then buffered OA reports
  756. * @stream: An i915-perf stream opened for OA metrics
  757. * @buf: destination buffer given by userspace
  758. * @count: the number of bytes userspace wants to read
  759. * @offset: (inout): the current position for writing into @buf
  760. *
  761. * Checks OA unit status registers and if necessary appends corresponding
  762. * status records for userspace (such as for a buffer full condition) and then
  763. * initiate appending any buffered OA reports.
  764. *
  765. * Updates @offset according to the number of bytes successfully copied into
  766. * the userspace buffer.
  767. *
  768. * NB: some data may be successfully copied to the userspace buffer
  769. * even if an error is returned, and this is reflected in the
  770. * updated @offset.
  771. *
  772. * Returns: zero on success or a negative error code
  773. */
  774. static int gen8_oa_read(struct i915_perf_stream *stream,
  775. char __user *buf,
  776. size_t count,
  777. size_t *offset)
  778. {
  779. struct drm_i915_private *dev_priv = stream->dev_priv;
  780. u32 oastatus;
  781. int ret;
  782. if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
  783. return -EIO;
  784. oastatus = I915_READ(GEN8_OASTATUS);
  785. /*
  786. * We treat OABUFFER_OVERFLOW as a significant error:
  787. *
  788. * Although theoretically we could handle this more gracefully
  789. * sometimes, some Gens don't correctly suppress certain
  790. * automatically triggered reports in this condition and so we
  791. * have to assume that old reports are now being trampled
  792. * over.
  793. *
  794. * Considering how we don't currently give userspace control
  795. * over the OA buffer size and always configure a large 16MB
  796. * buffer, then a buffer overflow does anyway likely indicate
  797. * that something has gone quite badly wrong.
  798. */
  799. if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
  800. ret = append_oa_status(stream, buf, count, offset,
  801. DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
  802. if (ret)
  803. return ret;
  804. DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
  805. dev_priv->perf.oa.period_exponent);
  806. dev_priv->perf.oa.ops.oa_disable(dev_priv);
  807. dev_priv->perf.oa.ops.oa_enable(dev_priv);
  808. /*
  809. * Note: .oa_enable() is expected to re-init the oabuffer and
  810. * reset GEN8_OASTATUS for us
  811. */
  812. oastatus = I915_READ(GEN8_OASTATUS);
  813. }
  814. if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
  815. ret = append_oa_status(stream, buf, count, offset,
  816. DRM_I915_PERF_RECORD_OA_REPORT_LOST);
  817. if (ret)
  818. return ret;
  819. I915_WRITE(GEN8_OASTATUS,
  820. oastatus & ~GEN8_OASTATUS_REPORT_LOST);
  821. }
  822. return gen8_append_oa_reports(stream, buf, count, offset);
  823. }
  824. /**
  825. * Copies all buffered OA reports into userspace read() buffer.
  826. * @stream: An i915-perf stream opened for OA metrics
  827. * @buf: destination buffer given by userspace
  828. * @count: the number of bytes userspace wants to read
  829. * @offset: (inout): the current position for writing into @buf
  830. *
  831. * Notably any error condition resulting in a short read (-%ENOSPC or
  832. * -%EFAULT) will be returned even though one or more records may
  833. * have been successfully copied. In this case it's up to the caller
  834. * to decide if the error should be squashed before returning to
  835. * userspace.
  836. *
  837. * Note: reports are consumed from the head, and appended to the
  838. * tail, so the tail chases the head?... If you think that's mad
  839. * and back-to-front you're not alone, but this follows the
  840. * Gen PRM naming convention.
  841. *
  842. * Returns: 0 on success, negative error code on failure.
  843. */
  844. static int gen7_append_oa_reports(struct i915_perf_stream *stream,
  845. char __user *buf,
  846. size_t count,
  847. size_t *offset)
  848. {
  849. struct drm_i915_private *dev_priv = stream->dev_priv;
  850. int report_size = dev_priv->perf.oa.oa_buffer.format_size;
  851. u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
  852. u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
  853. u32 mask = (OA_BUFFER_SIZE - 1);
  854. size_t start_offset = *offset;
  855. unsigned long flags;
  856. unsigned int aged_tail_idx;
  857. u32 head, tail;
  858. u32 taken;
  859. int ret = 0;
  860. if (WARN_ON(!stream->enabled))
  861. return -EIO;
  862. spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  863. head = dev_priv->perf.oa.oa_buffer.head;
  864. aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
  865. tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
  866. spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  867. /* An invalid tail pointer here means we're still waiting for the poll
  868. * hrtimer callback to give us a pointer
  869. */
  870. if (tail == INVALID_TAIL_PTR)
  871. return -EAGAIN;
  872. /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
  873. * while indexing relative to oa_buf_base.
  874. */
  875. head -= gtt_offset;
  876. tail -= gtt_offset;
  877. /* An out of bounds or misaligned head or tail pointer implies a driver
  878. * bug since we validate + align the tail pointers we read from the
  879. * hardware and we are in full control of the head pointer which should
  880. * only be incremented by multiples of the report size (notably also
  881. * all a power of two).
  882. */
  883. if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
  884. tail > OA_BUFFER_SIZE || tail % report_size,
  885. "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
  886. head, tail))
  887. return -EIO;
  888. for (/* none */;
  889. (taken = OA_TAKEN(tail, head));
  890. head = (head + report_size) & mask) {
  891. u8 *report = oa_buf_base + head;
  892. u32 *report32 = (void *)report;
  893. /* All the report sizes factor neatly into the buffer
  894. * size so we never expect to see a report split
  895. * between the beginning and end of the buffer.
  896. *
  897. * Given the initial alignment check a misalignment
  898. * here would imply a driver bug that would result
  899. * in an overrun.
  900. */
  901. if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
  902. DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
  903. break;
  904. }
  905. /* The report-ID field for periodic samples includes
  906. * some undocumented flags related to what triggered
  907. * the report and is never expected to be zero so we
  908. * can check that the report isn't invalid before
  909. * copying it to userspace...
  910. */
  911. if (report32[0] == 0) {
  912. if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
  913. DRM_NOTE("Skipping spurious, invalid OA report\n");
  914. continue;
  915. }
  916. ret = append_oa_sample(stream, buf, count, offset, report);
  917. if (ret)
  918. break;
  919. /* The above report-id field sanity check is based on
  920. * the assumption that the OA buffer is initially
  921. * zeroed and we reset the field after copying so the
  922. * check is still meaningful once old reports start
  923. * being overwritten.
  924. */
  925. report32[0] = 0;
  926. }
  927. if (start_offset != *offset) {
  928. spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  929. /* We removed the gtt_offset for the copy loop above, indexing
  930. * relative to oa_buf_base so put back here...
  931. */
  932. head += gtt_offset;
  933. I915_WRITE(GEN7_OASTATUS2,
  934. ((head & GEN7_OASTATUS2_HEAD_MASK) |
  935. GEN7_OASTATUS2_MEM_SELECT_GGTT));
  936. dev_priv->perf.oa.oa_buffer.head = head;
  937. spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  938. }
  939. return ret;
  940. }
  941. /**
  942. * gen7_oa_read - copy status records then buffered OA reports
  943. * @stream: An i915-perf stream opened for OA metrics
  944. * @buf: destination buffer given by userspace
  945. * @count: the number of bytes userspace wants to read
  946. * @offset: (inout): the current position for writing into @buf
  947. *
  948. * Checks Gen 7 specific OA unit status registers and if necessary appends
  949. * corresponding status records for userspace (such as for a buffer full
  950. * condition) and then initiate appending any buffered OA reports.
  951. *
  952. * Updates @offset according to the number of bytes successfully copied into
  953. * the userspace buffer.
  954. *
  955. * Returns: zero on success or a negative error code
  956. */
  957. static int gen7_oa_read(struct i915_perf_stream *stream,
  958. char __user *buf,
  959. size_t count,
  960. size_t *offset)
  961. {
  962. struct drm_i915_private *dev_priv = stream->dev_priv;
  963. u32 oastatus1;
  964. int ret;
  965. if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
  966. return -EIO;
  967. oastatus1 = I915_READ(GEN7_OASTATUS1);
  968. /* XXX: On Haswell we don't have a safe way to clear oastatus1
  969. * bits while the OA unit is enabled (while the tail pointer
  970. * may be updated asynchronously) so we ignore status bits
  971. * that have already been reported to userspace.
  972. */
  973. oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1;
  974. /* We treat OABUFFER_OVERFLOW as a significant error:
  975. *
  976. * - The status can be interpreted to mean that the buffer is
  977. * currently full (with a higher precedence than OA_TAKEN()
  978. * which will start to report a near-empty buffer after an
  979. * overflow) but it's awkward that we can't clear the status
  980. * on Haswell, so without a reset we won't be able to catch
  981. * the state again.
  982. *
  983. * - Since it also implies the HW has started overwriting old
  984. * reports it may also affect our sanity checks for invalid
  985. * reports when copying to userspace that assume new reports
  986. * are being written to cleared memory.
  987. *
  988. * - In the future we may want to introduce a flight recorder
  989. * mode where the driver will automatically maintain a safe
  990. * guard band between head/tail, avoiding this overflow
  991. * condition, but we avoid the added driver complexity for
  992. * now.
  993. */
  994. if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
  995. ret = append_oa_status(stream, buf, count, offset,
  996. DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
  997. if (ret)
  998. return ret;
  999. DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
  1000. dev_priv->perf.oa.period_exponent);
  1001. dev_priv->perf.oa.ops.oa_disable(dev_priv);
  1002. dev_priv->perf.oa.ops.oa_enable(dev_priv);
  1003. oastatus1 = I915_READ(GEN7_OASTATUS1);
  1004. }
  1005. if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
  1006. ret = append_oa_status(stream, buf, count, offset,
  1007. DRM_I915_PERF_RECORD_OA_REPORT_LOST);
  1008. if (ret)
  1009. return ret;
  1010. dev_priv->perf.oa.gen7_latched_oastatus1 |=
  1011. GEN7_OASTATUS1_REPORT_LOST;
  1012. }
  1013. return gen7_append_oa_reports(stream, buf, count, offset);
  1014. }
  1015. /**
  1016. * i915_oa_wait_unlocked - handles blocking IO until OA data available
  1017. * @stream: An i915-perf stream opened for OA metrics
  1018. *
  1019. * Called when userspace tries to read() from a blocking stream FD opened
  1020. * for OA metrics. It waits until the hrtimer callback finds a non-empty
  1021. * OA buffer and wakes us.
  1022. *
  1023. * Note: it's acceptable to have this return with some false positives
  1024. * since any subsequent read handling will return -EAGAIN if there isn't
  1025. * really data ready for userspace yet.
  1026. *
  1027. * Returns: zero on success or a negative error code
  1028. */
  1029. static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
  1030. {
  1031. struct drm_i915_private *dev_priv = stream->dev_priv;
  1032. /* We would wait indefinitely if periodic sampling is not enabled */
  1033. if (!dev_priv->perf.oa.periodic)
  1034. return -EIO;
  1035. return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
  1036. oa_buffer_check_unlocked(dev_priv));
  1037. }
  1038. /**
  1039. * i915_oa_poll_wait - call poll_wait() for an OA stream poll()
  1040. * @stream: An i915-perf stream opened for OA metrics
  1041. * @file: An i915 perf stream file
  1042. * @wait: poll() state table
  1043. *
  1044. * For handling userspace polling on an i915 perf stream opened for OA metrics,
  1045. * this starts a poll_wait with the wait queue that our hrtimer callback wakes
  1046. * when it sees data ready to read in the circular OA buffer.
  1047. */
  1048. static void i915_oa_poll_wait(struct i915_perf_stream *stream,
  1049. struct file *file,
  1050. poll_table *wait)
  1051. {
  1052. struct drm_i915_private *dev_priv = stream->dev_priv;
  1053. poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
  1054. }
  1055. /**
  1056. * i915_oa_read - just calls through to &i915_oa_ops->read
  1057. * @stream: An i915-perf stream opened for OA metrics
  1058. * @buf: destination buffer given by userspace
  1059. * @count: the number of bytes userspace wants to read
  1060. * @offset: (inout): the current position for writing into @buf
  1061. *
  1062. * Updates @offset according to the number of bytes successfully copied into
  1063. * the userspace buffer.
  1064. *
  1065. * Returns: zero on success or a negative error code
  1066. */
  1067. static int i915_oa_read(struct i915_perf_stream *stream,
  1068. char __user *buf,
  1069. size_t count,
  1070. size_t *offset)
  1071. {
  1072. struct drm_i915_private *dev_priv = stream->dev_priv;
  1073. return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
  1074. }
  1075. /**
  1076. * oa_get_render_ctx_id - determine and hold ctx hw id
  1077. * @stream: An i915-perf stream opened for OA metrics
  1078. *
  1079. * Determine the render context hw id, and ensure it remains fixed for the
  1080. * lifetime of the stream. This ensures that we don't have to worry about
  1081. * updating the context ID in OACONTROL on the fly.
  1082. *
  1083. * Returns: zero on success or a negative error code
  1084. */
  1085. static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
  1086. {
  1087. struct drm_i915_private *dev_priv = stream->dev_priv;
  1088. if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
  1089. dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
  1090. } else {
  1091. struct intel_engine_cs *engine = dev_priv->engine[RCS];
  1092. struct intel_ring *ring;
  1093. int ret;
  1094. ret = i915_mutex_lock_interruptible(&dev_priv->drm);
  1095. if (ret)
  1096. return ret;
  1097. /*
  1098. * As the ID is the gtt offset of the context's vma we
  1099. * pin the vma to ensure the ID remains fixed.
  1100. *
  1101. * NB: implied RCS engine...
  1102. */
  1103. ring = intel_context_pin(stream->ctx, engine);
  1104. mutex_unlock(&dev_priv->drm.struct_mutex);
  1105. if (IS_ERR(ring))
  1106. return PTR_ERR(ring);
  1107. /*
  1108. * Explicitly track the ID (instead of calling
  1109. * i915_ggtt_offset() on the fly) considering the difference
  1110. * with gen8+ and execlists
  1111. */
  1112. dev_priv->perf.oa.specific_ctx_id =
  1113. i915_ggtt_offset(to_intel_context(stream->ctx, engine)->state);
  1114. }
  1115. return 0;
  1116. }
  1117. /**
  1118. * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold
  1119. * @stream: An i915-perf stream opened for OA metrics
  1120. *
  1121. * In case anything needed doing to ensure the context HW ID would remain valid
  1122. * for the lifetime of the stream, then that can be undone here.
  1123. */
  1124. static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
  1125. {
  1126. struct drm_i915_private *dev_priv = stream->dev_priv;
  1127. if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
  1128. dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
  1129. } else {
  1130. struct intel_engine_cs *engine = dev_priv->engine[RCS];
  1131. mutex_lock(&dev_priv->drm.struct_mutex);
  1132. dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
  1133. intel_context_unpin(stream->ctx, engine);
  1134. mutex_unlock(&dev_priv->drm.struct_mutex);
  1135. }
  1136. }
  1137. static void
  1138. free_oa_buffer(struct drm_i915_private *i915)
  1139. {
  1140. mutex_lock(&i915->drm.struct_mutex);
  1141. i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj);
  1142. i915_vma_unpin(i915->perf.oa.oa_buffer.vma);
  1143. i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj);
  1144. i915->perf.oa.oa_buffer.vma = NULL;
  1145. i915->perf.oa.oa_buffer.vaddr = NULL;
  1146. mutex_unlock(&i915->drm.struct_mutex);
  1147. }
  1148. static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
  1149. {
  1150. struct drm_i915_private *dev_priv = stream->dev_priv;
  1151. BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);
  1152. /*
  1153. * Unset exclusive_stream first, it will be checked while disabling
  1154. * the metric set on gen8+.
  1155. */
  1156. mutex_lock(&dev_priv->drm.struct_mutex);
  1157. dev_priv->perf.oa.exclusive_stream = NULL;
  1158. dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
  1159. mutex_unlock(&dev_priv->drm.struct_mutex);
  1160. free_oa_buffer(dev_priv);
  1161. intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
  1162. intel_runtime_pm_put(dev_priv);
  1163. if (stream->ctx)
  1164. oa_put_render_ctx_id(stream);
  1165. put_oa_config(dev_priv, stream->oa_config);
  1166. if (dev_priv->perf.oa.spurious_report_rs.missed) {
  1167. DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
  1168. dev_priv->perf.oa.spurious_report_rs.missed);
  1169. }
  1170. }
  1171. static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
  1172. {
  1173. u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
  1174. unsigned long flags;
  1175. spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  1176. /* Pre-DevBDW: OABUFFER must be set with counters off,
  1177. * before OASTATUS1, but after OASTATUS2
  1178. */
  1179. I915_WRITE(GEN7_OASTATUS2,
  1180. gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */
  1181. dev_priv->perf.oa.oa_buffer.head = gtt_offset;
  1182. I915_WRITE(GEN7_OABUFFER, gtt_offset);
  1183. I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
  1184. /* Mark that we need updated tail pointers to read from... */
  1185. dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
  1186. dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
  1187. spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  1188. /* On Haswell we have to track which OASTATUS1 flags we've
  1189. * already seen since they can't be cleared while periodic
  1190. * sampling is enabled.
  1191. */
  1192. dev_priv->perf.oa.gen7_latched_oastatus1 = 0;
  1193. /* NB: although the OA buffer will initially be allocated
  1194. * zeroed via shmfs (and so this memset is redundant when
  1195. * first allocating), we may re-init the OA buffer, either
  1196. * when re-enabling a stream or in error/reset paths.
  1197. *
  1198. * The reason we clear the buffer for each re-init is for the
  1199. * sanity check in gen7_append_oa_reports() that looks at the
  1200. * report-id field to make sure it's non-zero which relies on
  1201. * the assumption that new reports are being written to zeroed
  1202. * memory...
  1203. */
  1204. memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
  1205. /* Maybe make ->pollin per-stream state if we support multiple
  1206. * concurrent streams in the future.
  1207. */
  1208. dev_priv->perf.oa.pollin = false;
  1209. }
  1210. static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
  1211. {
  1212. u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
  1213. unsigned long flags;
  1214. spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  1215. I915_WRITE(GEN8_OASTATUS, 0);
  1216. I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
  1217. dev_priv->perf.oa.oa_buffer.head = gtt_offset;
  1218. I915_WRITE(GEN8_OABUFFER_UDW, 0);
  1219. /*
  1220. * PRM says:
  1221. *
  1222. * "This MMIO must be set before the OATAILPTR
  1223. * register and after the OAHEADPTR register. This is
  1224. * to enable proper functionality of the overflow
  1225. * bit."
  1226. */
  1227. I915_WRITE(GEN8_OABUFFER, gtt_offset |
  1228. OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
  1229. I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
  1230. /* Mark that we need updated tail pointers to read from... */
  1231. dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
  1232. dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
  1233. /*
  1234. * Reset state used to recognise context switches, affecting which
  1235. * reports we will forward to userspace while filtering for a single
  1236. * context.
  1237. */
  1238. dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
  1239. spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  1240. /*
  1241. * NB: although the OA buffer will initially be allocated
  1242. * zeroed via shmfs (and so this memset is redundant when
  1243. * first allocating), we may re-init the OA buffer, either
  1244. * when re-enabling a stream or in error/reset paths.
  1245. *
  1246. * The reason we clear the buffer for each re-init is for the
  1247. * sanity check in gen8_append_oa_reports() that looks at the
  1248. * reason field to make sure it's non-zero which relies on
  1249. * the assumption that new reports are being written to zeroed
  1250. * memory...
  1251. */
  1252. memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
  1253. /*
  1254. * Maybe make ->pollin per-stream state if we support multiple
  1255. * concurrent streams in the future.
  1256. */
  1257. dev_priv->perf.oa.pollin = false;
  1258. }
  1259. static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
  1260. {
  1261. struct drm_i915_gem_object *bo;
  1262. struct i915_vma *vma;
  1263. int ret;
  1264. if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma))
  1265. return -ENODEV;
  1266. ret = i915_mutex_lock_interruptible(&dev_priv->drm);
  1267. if (ret)
  1268. return ret;
  1269. BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
  1270. BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
  1271. bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE);
  1272. if (IS_ERR(bo)) {
  1273. DRM_ERROR("Failed to allocate OA buffer\n");
  1274. ret = PTR_ERR(bo);
  1275. goto unlock;
  1276. }
  1277. ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
  1278. if (ret)
  1279. goto err_unref;
  1280. /* PreHSW required 512K alignment, HSW requires 16M */
  1281. vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
  1282. if (IS_ERR(vma)) {
  1283. ret = PTR_ERR(vma);
  1284. goto err_unref;
  1285. }
  1286. dev_priv->perf.oa.oa_buffer.vma = vma;
  1287. dev_priv->perf.oa.oa_buffer.vaddr =
  1288. i915_gem_object_pin_map(bo, I915_MAP_WB);
  1289. if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
  1290. ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
  1291. goto err_unpin;
  1292. }
  1293. dev_priv->perf.oa.ops.init_oa_buffer(dev_priv);
  1294. DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
  1295. i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma),
  1296. dev_priv->perf.oa.oa_buffer.vaddr);
  1297. goto unlock;
  1298. err_unpin:
  1299. __i915_vma_unpin(vma);
  1300. err_unref:
  1301. i915_gem_object_put(bo);
  1302. dev_priv->perf.oa.oa_buffer.vaddr = NULL;
  1303. dev_priv->perf.oa.oa_buffer.vma = NULL;
  1304. unlock:
  1305. mutex_unlock(&dev_priv->drm.struct_mutex);
  1306. return ret;
  1307. }
  1308. static void config_oa_regs(struct drm_i915_private *dev_priv,
  1309. const struct i915_oa_reg *regs,
  1310. u32 n_regs)
  1311. {
  1312. u32 i;
  1313. for (i = 0; i < n_regs; i++) {
  1314. const struct i915_oa_reg *reg = regs + i;
  1315. I915_WRITE(reg->addr, reg->value);
  1316. }
  1317. }
  1318. static int hsw_enable_metric_set(struct drm_i915_private *dev_priv,
  1319. const struct i915_oa_config *oa_config)
  1320. {
  1321. /* PRM:
  1322. *
  1323. * OA unit is using “crclk” for its functionality. When trunk
  1324. * level clock gating takes place, OA clock would be gated,
  1325. * unable to count the events from non-render clock domain.
  1326. * Render clock gating must be disabled when OA is enabled to
  1327. * count the events from non-render domain. Unit level clock
  1328. * gating for RCS should also be disabled.
  1329. */
  1330. I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
  1331. ~GEN7_DOP_CLOCK_GATE_ENABLE));
  1332. I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
  1333. GEN6_CSUNIT_CLOCK_GATE_DISABLE));
  1334. config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
  1335. /* It apparently takes a fairly long time for a new MUX
  1336. * configuration to be be applied after these register writes.
  1337. * This delay duration was derived empirically based on the
  1338. * render_basic config but hopefully it covers the maximum
  1339. * configuration latency.
  1340. *
  1341. * As a fallback, the checks in _append_oa_reports() to skip
  1342. * invalid OA reports do also seem to work to discard reports
  1343. * generated before this config has completed - albeit not
  1344. * silently.
  1345. *
  1346. * Unfortunately this is essentially a magic number, since we
  1347. * don't currently know of a reliable mechanism for predicting
  1348. * how long the MUX config will take to apply and besides
  1349. * seeing invalid reports we don't know of a reliable way to
  1350. * explicitly check that the MUX config has landed.
  1351. *
  1352. * It's even possible we've miss characterized the underlying
  1353. * problem - it just seems like the simplest explanation why
  1354. * a delay at this location would mitigate any invalid reports.
  1355. */
  1356. usleep_range(15000, 20000);
  1357. config_oa_regs(dev_priv, oa_config->b_counter_regs,
  1358. oa_config->b_counter_regs_len);
  1359. return 0;
  1360. }
  1361. static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
  1362. {
  1363. I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
  1364. ~GEN6_CSUNIT_CLOCK_GATE_DISABLE));
  1365. I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
  1366. GEN7_DOP_CLOCK_GATE_ENABLE));
  1367. I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
  1368. ~GT_NOA_ENABLE));
  1369. }
  1370. /*
  1371. * NB: It must always remain pointer safe to run this even if the OA unit
  1372. * has been disabled.
  1373. *
  1374. * It's fine to put out-of-date values into these per-context registers
  1375. * in the case that the OA unit has been disabled.
  1376. */
  1377. static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
  1378. u32 *reg_state,
  1379. const struct i915_oa_config *oa_config)
  1380. {
  1381. struct drm_i915_private *dev_priv = ctx->i915;
  1382. u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
  1383. u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
  1384. /* The MMIO offsets for Flex EU registers aren't contiguous */
  1385. u32 flex_mmio[] = {
  1386. i915_mmio_reg_offset(EU_PERF_CNTL0),
  1387. i915_mmio_reg_offset(EU_PERF_CNTL1),
  1388. i915_mmio_reg_offset(EU_PERF_CNTL2),
  1389. i915_mmio_reg_offset(EU_PERF_CNTL3),
  1390. i915_mmio_reg_offset(EU_PERF_CNTL4),
  1391. i915_mmio_reg_offset(EU_PERF_CNTL5),
  1392. i915_mmio_reg_offset(EU_PERF_CNTL6),
  1393. };
  1394. int i;
  1395. reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
  1396. reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent <<
  1397. GEN8_OA_TIMER_PERIOD_SHIFT) |
  1398. (dev_priv->perf.oa.periodic ?
  1399. GEN8_OA_TIMER_ENABLE : 0) |
  1400. GEN8_OA_COUNTER_RESUME;
  1401. for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) {
  1402. u32 state_offset = ctx_flexeu0 + i * 2;
  1403. u32 mmio = flex_mmio[i];
  1404. /*
  1405. * This arbitrary default will select the 'EU FPU0 Pipeline
  1406. * Active' event. In the future it's anticipated that there
  1407. * will be an explicit 'No Event' we can select, but not yet...
  1408. */
  1409. u32 value = 0;
  1410. if (oa_config) {
  1411. u32 j;
  1412. for (j = 0; j < oa_config->flex_regs_len; j++) {
  1413. if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
  1414. value = oa_config->flex_regs[j].value;
  1415. break;
  1416. }
  1417. }
  1418. }
  1419. reg_state[state_offset] = mmio;
  1420. reg_state[state_offset+1] = value;
  1421. }
  1422. }
  1423. /*
  1424. * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This
  1425. * is only used by the kernel context.
  1426. */
  1427. static int gen8_emit_oa_config(struct i915_request *rq,
  1428. const struct i915_oa_config *oa_config)
  1429. {
  1430. struct drm_i915_private *dev_priv = rq->i915;
  1431. /* The MMIO offsets for Flex EU registers aren't contiguous */
  1432. u32 flex_mmio[] = {
  1433. i915_mmio_reg_offset(EU_PERF_CNTL0),
  1434. i915_mmio_reg_offset(EU_PERF_CNTL1),
  1435. i915_mmio_reg_offset(EU_PERF_CNTL2),
  1436. i915_mmio_reg_offset(EU_PERF_CNTL3),
  1437. i915_mmio_reg_offset(EU_PERF_CNTL4),
  1438. i915_mmio_reg_offset(EU_PERF_CNTL5),
  1439. i915_mmio_reg_offset(EU_PERF_CNTL6),
  1440. };
  1441. u32 *cs;
  1442. int i;
  1443. cs = intel_ring_begin(rq, ARRAY_SIZE(flex_mmio) * 2 + 4);
  1444. if (IS_ERR(cs))
  1445. return PTR_ERR(cs);
  1446. *cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1);
  1447. *cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
  1448. *cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
  1449. (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
  1450. GEN8_OA_COUNTER_RESUME;
  1451. for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) {
  1452. u32 mmio = flex_mmio[i];
  1453. /*
  1454. * This arbitrary default will select the 'EU FPU0 Pipeline
  1455. * Active' event. In the future it's anticipated that there
  1456. * will be an explicit 'No Event' we can select, but not
  1457. * yet...
  1458. */
  1459. u32 value = 0;
  1460. if (oa_config) {
  1461. u32 j;
  1462. for (j = 0; j < oa_config->flex_regs_len; j++) {
  1463. if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
  1464. value = oa_config->flex_regs[j].value;
  1465. break;
  1466. }
  1467. }
  1468. }
  1469. *cs++ = mmio;
  1470. *cs++ = value;
  1471. }
  1472. *cs++ = MI_NOOP;
  1473. intel_ring_advance(rq, cs);
  1474. return 0;
  1475. }
  1476. static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv,
  1477. const struct i915_oa_config *oa_config)
  1478. {
  1479. struct intel_engine_cs *engine = dev_priv->engine[RCS];
  1480. struct i915_timeline *timeline;
  1481. struct i915_request *rq;
  1482. int ret;
  1483. lockdep_assert_held(&dev_priv->drm.struct_mutex);
  1484. i915_retire_requests(dev_priv);
  1485. rq = i915_request_alloc(engine, dev_priv->kernel_context);
  1486. if (IS_ERR(rq))
  1487. return PTR_ERR(rq);
  1488. ret = gen8_emit_oa_config(rq, oa_config);
  1489. if (ret) {
  1490. i915_request_add(rq);
  1491. return ret;
  1492. }
  1493. /* Queue this switch after all other activity */
  1494. list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
  1495. struct i915_request *prev;
  1496. prev = i915_gem_active_raw(&timeline->last_request,
  1497. &dev_priv->drm.struct_mutex);
  1498. if (prev)
  1499. i915_request_await_dma_fence(rq, &prev->fence);
  1500. }
  1501. i915_request_add(rq);
  1502. return 0;
  1503. }
  1504. /*
  1505. * Manages updating the per-context aspects of the OA stream
  1506. * configuration across all contexts.
  1507. *
  1508. * The awkward consideration here is that OACTXCONTROL controls the
  1509. * exponent for periodic sampling which is primarily used for system
  1510. * wide profiling where we'd like a consistent sampling period even in
  1511. * the face of context switches.
  1512. *
  1513. * Our approach of updating the register state context (as opposed to
  1514. * say using a workaround batch buffer) ensures that the hardware
  1515. * won't automatically reload an out-of-date timer exponent even
  1516. * transiently before a WA BB could be parsed.
  1517. *
  1518. * This function needs to:
  1519. * - Ensure the currently running context's per-context OA state is
  1520. * updated
  1521. * - Ensure that all existing contexts will have the correct per-context
  1522. * OA state if they are scheduled for use.
  1523. * - Ensure any new contexts will be initialized with the correct
  1524. * per-context OA state.
  1525. *
  1526. * Note: it's only the RCS/Render context that has any OA state.
  1527. */
  1528. static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
  1529. const struct i915_oa_config *oa_config)
  1530. {
  1531. struct intel_engine_cs *engine = dev_priv->engine[RCS];
  1532. struct i915_gem_context *ctx;
  1533. int ret;
  1534. unsigned int wait_flags = I915_WAIT_LOCKED;
  1535. lockdep_assert_held(&dev_priv->drm.struct_mutex);
  1536. /* Switch away from any user context. */
  1537. ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
  1538. if (ret)
  1539. goto out;
  1540. /*
  1541. * The OA register config is setup through the context image. This image
  1542. * might be written to by the GPU on context switch (in particular on
  1543. * lite-restore). This means we can't safely update a context's image,
  1544. * if this context is scheduled/submitted to run on the GPU.
  1545. *
  1546. * We could emit the OA register config through the batch buffer but
  1547. * this might leave small interval of time where the OA unit is
  1548. * configured at an invalid sampling period.
  1549. *
  1550. * So far the best way to work around this issue seems to be draining
  1551. * the GPU from any submitted work.
  1552. */
  1553. ret = i915_gem_wait_for_idle(dev_priv, wait_flags);
  1554. if (ret)
  1555. goto out;
  1556. /* Update all contexts now that we've stalled the submission. */
  1557. list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
  1558. struct intel_context *ce = to_intel_context(ctx, engine);
  1559. u32 *regs;
  1560. /* OA settings will be set upon first use */
  1561. if (!ce->state)
  1562. continue;
  1563. regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
  1564. if (IS_ERR(regs)) {
  1565. ret = PTR_ERR(regs);
  1566. goto out;
  1567. }
  1568. ce->state->obj->mm.dirty = true;
  1569. regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
  1570. gen8_update_reg_state_unlocked(ctx, regs, oa_config);
  1571. i915_gem_object_unpin_map(ce->state->obj);
  1572. }
  1573. out:
  1574. return ret;
  1575. }
  1576. static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
  1577. const struct i915_oa_config *oa_config)
  1578. {
  1579. int ret;
  1580. /*
  1581. * We disable slice/unslice clock ratio change reports on SKL since
  1582. * they are too noisy. The HW generates a lot of redundant reports
  1583. * where the ratio hasn't really changed causing a lot of redundant
  1584. * work to processes and increasing the chances we'll hit buffer
  1585. * overruns.
  1586. *
  1587. * Although we don't currently use the 'disable overrun' OABUFFER
  1588. * feature it's worth noting that clock ratio reports have to be
  1589. * disabled before considering to use that feature since the HW doesn't
  1590. * correctly block these reports.
  1591. *
  1592. * Currently none of the high-level metrics we have depend on knowing
  1593. * this ratio to normalize.
  1594. *
  1595. * Note: This register is not power context saved and restored, but
  1596. * that's OK considering that we disable RC6 while the OA unit is
  1597. * enabled.
  1598. *
  1599. * The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to
  1600. * be read back from automatically triggered reports, as part of the
  1601. * RPT_ID field.
  1602. */
  1603. if (IS_GEN(dev_priv, 9, 11)) {
  1604. I915_WRITE(GEN8_OA_DEBUG,
  1605. _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
  1606. GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
  1607. }
  1608. /*
  1609. * Update all contexts prior writing the mux configurations as we need
  1610. * to make sure all slices/subslices are ON before writing to NOA
  1611. * registers.
  1612. */
  1613. ret = gen8_configure_all_contexts(dev_priv, oa_config);
  1614. if (ret)
  1615. return ret;
  1616. config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
  1617. config_oa_regs(dev_priv, oa_config->b_counter_regs,
  1618. oa_config->b_counter_regs_len);
  1619. return 0;
  1620. }
  1621. static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
  1622. {
  1623. /* Reset all contexts' slices/subslices configurations. */
  1624. gen8_configure_all_contexts(dev_priv, NULL);
  1625. I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
  1626. ~GT_NOA_ENABLE));
  1627. }
  1628. static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
  1629. {
  1630. /* Reset all contexts' slices/subslices configurations. */
  1631. gen8_configure_all_contexts(dev_priv, NULL);
  1632. /* Make sure we disable noa to save power. */
  1633. I915_WRITE(RPM_CONFIG1,
  1634. I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE);
  1635. }
  1636. static void gen7_oa_enable(struct drm_i915_private *dev_priv)
  1637. {
  1638. struct i915_gem_context *ctx =
  1639. dev_priv->perf.oa.exclusive_stream->ctx;
  1640. u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
  1641. bool periodic = dev_priv->perf.oa.periodic;
  1642. u32 period_exponent = dev_priv->perf.oa.period_exponent;
  1643. u32 report_format = dev_priv->perf.oa.oa_buffer.format;
  1644. /*
  1645. * Reset buf pointers so we don't forward reports from before now.
  1646. *
  1647. * Think carefully if considering trying to avoid this, since it
  1648. * also ensures status flags and the buffer itself are cleared
  1649. * in error paths, and we have checks for invalid reports based
  1650. * on the assumption that certain fields are written to zeroed
  1651. * memory which this helps maintains.
  1652. */
  1653. gen7_init_oa_buffer(dev_priv);
  1654. I915_WRITE(GEN7_OACONTROL,
  1655. (ctx_id & GEN7_OACONTROL_CTX_MASK) |
  1656. (period_exponent <<
  1657. GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
  1658. (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
  1659. (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
  1660. (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
  1661. GEN7_OACONTROL_ENABLE);
  1662. }
  1663. static void gen8_oa_enable(struct drm_i915_private *dev_priv)
  1664. {
  1665. u32 report_format = dev_priv->perf.oa.oa_buffer.format;
  1666. /*
  1667. * Reset buf pointers so we don't forward reports from before now.
  1668. *
  1669. * Think carefully if considering trying to avoid this, since it
  1670. * also ensures status flags and the buffer itself are cleared
  1671. * in error paths, and we have checks for invalid reports based
  1672. * on the assumption that certain fields are written to zeroed
  1673. * memory which this helps maintains.
  1674. */
  1675. gen8_init_oa_buffer(dev_priv);
  1676. /*
  1677. * Note: we don't rely on the hardware to perform single context
  1678. * filtering and instead filter on the cpu based on the context-id
  1679. * field of reports
  1680. */
  1681. I915_WRITE(GEN8_OACONTROL, (report_format <<
  1682. GEN8_OA_REPORT_FORMAT_SHIFT) |
  1683. GEN8_OA_COUNTER_ENABLE);
  1684. }
  1685. /**
  1686. * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream
  1687. * @stream: An i915 perf stream opened for OA metrics
  1688. *
  1689. * [Re]enables hardware periodic sampling according to the period configured
  1690. * when opening the stream. This also starts a hrtimer that will periodically
  1691. * check for data in the circular OA buffer for notifying userspace (e.g.
  1692. * during a read() or poll()).
  1693. */
  1694. static void i915_oa_stream_enable(struct i915_perf_stream *stream)
  1695. {
  1696. struct drm_i915_private *dev_priv = stream->dev_priv;
  1697. dev_priv->perf.oa.ops.oa_enable(dev_priv);
  1698. if (dev_priv->perf.oa.periodic)
  1699. hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
  1700. ns_to_ktime(POLL_PERIOD),
  1701. HRTIMER_MODE_REL_PINNED);
  1702. }
  1703. static void gen7_oa_disable(struct drm_i915_private *dev_priv)
  1704. {
  1705. I915_WRITE(GEN7_OACONTROL, 0);
  1706. if (intel_wait_for_register(dev_priv,
  1707. GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0,
  1708. 50))
  1709. DRM_ERROR("wait for OA to be disabled timed out\n");
  1710. }
  1711. static void gen8_oa_disable(struct drm_i915_private *dev_priv)
  1712. {
  1713. I915_WRITE(GEN8_OACONTROL, 0);
  1714. if (intel_wait_for_register(dev_priv,
  1715. GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0,
  1716. 50))
  1717. DRM_ERROR("wait for OA to be disabled timed out\n");
  1718. }
  1719. /**
  1720. * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream
  1721. * @stream: An i915 perf stream opened for OA metrics
  1722. *
  1723. * Stops the OA unit from periodically writing counter reports into the
  1724. * circular OA buffer. This also stops the hrtimer that periodically checks for
  1725. * data in the circular OA buffer, for notifying userspace.
  1726. */
  1727. static void i915_oa_stream_disable(struct i915_perf_stream *stream)
  1728. {
  1729. struct drm_i915_private *dev_priv = stream->dev_priv;
  1730. dev_priv->perf.oa.ops.oa_disable(dev_priv);
  1731. if (dev_priv->perf.oa.periodic)
  1732. hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
  1733. }
  1734. static const struct i915_perf_stream_ops i915_oa_stream_ops = {
  1735. .destroy = i915_oa_stream_destroy,
  1736. .enable = i915_oa_stream_enable,
  1737. .disable = i915_oa_stream_disable,
  1738. .wait_unlocked = i915_oa_wait_unlocked,
  1739. .poll_wait = i915_oa_poll_wait,
  1740. .read = i915_oa_read,
  1741. };
  1742. /**
  1743. * i915_oa_stream_init - validate combined props for OA stream and init
  1744. * @stream: An i915 perf stream
  1745. * @param: The open parameters passed to `DRM_I915_PERF_OPEN`
  1746. * @props: The property state that configures stream (individually validated)
  1747. *
  1748. * While read_properties_unlocked() validates properties in isolation it
  1749. * doesn't ensure that the combination necessarily makes sense.
  1750. *
  1751. * At this point it has been determined that userspace wants a stream of
  1752. * OA metrics, but still we need to further validate the combined
  1753. * properties are OK.
  1754. *
  1755. * If the configuration makes sense then we can allocate memory for
  1756. * a circular OA buffer and apply the requested metric set configuration.
  1757. *
  1758. * Returns: zero on success or a negative error code.
  1759. */
  1760. static int i915_oa_stream_init(struct i915_perf_stream *stream,
  1761. struct drm_i915_perf_open_param *param,
  1762. struct perf_open_properties *props)
  1763. {
  1764. struct drm_i915_private *dev_priv = stream->dev_priv;
  1765. int format_size;
  1766. int ret;
  1767. /* If the sysfs metrics/ directory wasn't registered for some
  1768. * reason then don't let userspace try their luck with config
  1769. * IDs
  1770. */
  1771. if (!dev_priv->perf.metrics_kobj) {
  1772. DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
  1773. return -EINVAL;
  1774. }
  1775. if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
  1776. DRM_DEBUG("Only OA report sampling supported\n");
  1777. return -EINVAL;
  1778. }
  1779. if (!dev_priv->perf.oa.ops.init_oa_buffer) {
  1780. DRM_DEBUG("OA unit not supported\n");
  1781. return -ENODEV;
  1782. }
  1783. /* To avoid the complexity of having to accurately filter
  1784. * counter reports and marshal to the appropriate client
  1785. * we currently only allow exclusive access
  1786. */
  1787. if (dev_priv->perf.oa.exclusive_stream) {
  1788. DRM_DEBUG("OA unit already in use\n");
  1789. return -EBUSY;
  1790. }
  1791. if (!props->oa_format) {
  1792. DRM_DEBUG("OA report format not specified\n");
  1793. return -EINVAL;
  1794. }
  1795. /* We set up some ratelimit state to potentially throttle any _NOTES
  1796. * about spurious, invalid OA reports which we don't forward to
  1797. * userspace.
  1798. *
  1799. * The initialization is associated with opening the stream (not driver
  1800. * init) considering we print a _NOTE about any throttling when closing
  1801. * the stream instead of waiting until driver _fini which no one would
  1802. * ever see.
  1803. *
  1804. * Using the same limiting factors as printk_ratelimit()
  1805. */
  1806. ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs,
  1807. 5 * HZ, 10);
  1808. /* Since we use a DRM_NOTE for spurious reports it would be
  1809. * inconsistent to let __ratelimit() automatically print a warning for
  1810. * throttling.
  1811. */
  1812. ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs,
  1813. RATELIMIT_MSG_ON_RELEASE);
  1814. stream->sample_size = sizeof(struct drm_i915_perf_record_header);
  1815. format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
  1816. stream->sample_flags |= SAMPLE_OA_REPORT;
  1817. stream->sample_size += format_size;
  1818. dev_priv->perf.oa.oa_buffer.format_size = format_size;
  1819. if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0))
  1820. return -EINVAL;
  1821. dev_priv->perf.oa.oa_buffer.format =
  1822. dev_priv->perf.oa.oa_formats[props->oa_format].format;
  1823. dev_priv->perf.oa.periodic = props->oa_periodic;
  1824. if (dev_priv->perf.oa.periodic)
  1825. dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
  1826. if (stream->ctx) {
  1827. ret = oa_get_render_ctx_id(stream);
  1828. if (ret) {
  1829. DRM_DEBUG("Invalid context id to filter with\n");
  1830. return ret;
  1831. }
  1832. }
  1833. ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
  1834. if (ret) {
  1835. DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
  1836. goto err_config;
  1837. }
  1838. /* PRM - observability performance counters:
  1839. *
  1840. * OACONTROL, performance counter enable, note:
  1841. *
  1842. * "When this bit is set, in order to have coherent counts,
  1843. * RC6 power state and trunk clock gating must be disabled.
  1844. * This can be achieved by programming MMIO registers as
  1845. * 0xA094=0 and 0xA090[31]=1"
  1846. *
  1847. * In our case we are expecting that taking pm + FORCEWAKE
  1848. * references will effectively disable RC6.
  1849. */
  1850. intel_runtime_pm_get(dev_priv);
  1851. intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
  1852. ret = alloc_oa_buffer(dev_priv);
  1853. if (ret)
  1854. goto err_oa_buf_alloc;
  1855. ret = i915_mutex_lock_interruptible(&dev_priv->drm);
  1856. if (ret)
  1857. goto err_lock;
  1858. ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
  1859. stream->oa_config);
  1860. if (ret) {
  1861. DRM_DEBUG("Unable to enable metric set\n");
  1862. goto err_enable;
  1863. }
  1864. stream->ops = &i915_oa_stream_ops;
  1865. dev_priv->perf.oa.exclusive_stream = stream;
  1866. mutex_unlock(&dev_priv->drm.struct_mutex);
  1867. return 0;
  1868. err_enable:
  1869. dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
  1870. mutex_unlock(&dev_priv->drm.struct_mutex);
  1871. err_lock:
  1872. free_oa_buffer(dev_priv);
  1873. err_oa_buf_alloc:
  1874. put_oa_config(dev_priv, stream->oa_config);
  1875. intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
  1876. intel_runtime_pm_put(dev_priv);
  1877. err_config:
  1878. if (stream->ctx)
  1879. oa_put_render_ctx_id(stream);
  1880. return ret;
  1881. }
  1882. void i915_oa_init_reg_state(struct intel_engine_cs *engine,
  1883. struct i915_gem_context *ctx,
  1884. u32 *reg_state)
  1885. {
  1886. struct i915_perf_stream *stream;
  1887. if (engine->id != RCS)
  1888. return;
  1889. stream = engine->i915->perf.oa.exclusive_stream;
  1890. if (stream)
  1891. gen8_update_reg_state_unlocked(ctx, reg_state, stream->oa_config);
  1892. }
  1893. /**
  1894. * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
  1895. * @stream: An i915 perf stream
  1896. * @file: An i915 perf stream file
  1897. * @buf: destination buffer given by userspace
  1898. * @count: the number of bytes userspace wants to read
  1899. * @ppos: (inout) file seek position (unused)
  1900. *
  1901. * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
  1902. * ensure that if we've successfully copied any data then reporting that takes
  1903. * precedence over any internal error status, so the data isn't lost.
  1904. *
  1905. * For example ret will be -ENOSPC whenever there is more buffered data than
  1906. * can be copied to userspace, but that's only interesting if we weren't able
  1907. * to copy some data because it implies the userspace buffer is too small to
  1908. * receive a single record (and we never split records).
  1909. *
  1910. * Another case with ret == -EFAULT is more of a grey area since it would seem
  1911. * like bad form for userspace to ask us to overrun its buffer, but the user
  1912. * knows best:
  1913. *
  1914. * http://yarchive.net/comp/linux/partial_reads_writes.html
  1915. *
  1916. * Returns: The number of bytes copied or a negative error code on failure.
  1917. */
  1918. static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
  1919. struct file *file,
  1920. char __user *buf,
  1921. size_t count,
  1922. loff_t *ppos)
  1923. {
  1924. /* Note we keep the offset (aka bytes read) separate from any
  1925. * error status so that the final check for whether we return
  1926. * the bytes read with a higher precedence than any error (see
  1927. * comment below) doesn't need to be handled/duplicated in
  1928. * stream->ops->read() implementations.
  1929. */
  1930. size_t offset = 0;
  1931. int ret = stream->ops->read(stream, buf, count, &offset);
  1932. return offset ?: (ret ?: -EAGAIN);
  1933. }
  1934. /**
  1935. * i915_perf_read - handles read() FOP for i915 perf stream FDs
  1936. * @file: An i915 perf stream file
  1937. * @buf: destination buffer given by userspace
  1938. * @count: the number of bytes userspace wants to read
  1939. * @ppos: (inout) file seek position (unused)
  1940. *
  1941. * The entry point for handling a read() on a stream file descriptor from
  1942. * userspace. Most of the work is left to the i915_perf_read_locked() and
  1943. * &i915_perf_stream_ops->read but to save having stream implementations (of
  1944. * which we might have multiple later) we handle blocking read here.
  1945. *
  1946. * We can also consistently treat trying to read from a disabled stream
  1947. * as an IO error so implementations can assume the stream is enabled
  1948. * while reading.
  1949. *
  1950. * Returns: The number of bytes copied or a negative error code on failure.
  1951. */
  1952. static ssize_t i915_perf_read(struct file *file,
  1953. char __user *buf,
  1954. size_t count,
  1955. loff_t *ppos)
  1956. {
  1957. struct i915_perf_stream *stream = file->private_data;
  1958. struct drm_i915_private *dev_priv = stream->dev_priv;
  1959. ssize_t ret;
  1960. /* To ensure it's handled consistently we simply treat all reads of a
  1961. * disabled stream as an error. In particular it might otherwise lead
  1962. * to a deadlock for blocking file descriptors...
  1963. */
  1964. if (!stream->enabled)
  1965. return -EIO;
  1966. if (!(file->f_flags & O_NONBLOCK)) {
  1967. /* There's the small chance of false positives from
  1968. * stream->ops->wait_unlocked.
  1969. *
  1970. * E.g. with single context filtering since we only wait until
  1971. * oabuffer has >= 1 report we don't immediately know whether
  1972. * any reports really belong to the current context
  1973. */
  1974. do {
  1975. ret = stream->ops->wait_unlocked(stream);
  1976. if (ret)
  1977. return ret;
  1978. mutex_lock(&dev_priv->perf.lock);
  1979. ret = i915_perf_read_locked(stream, file,
  1980. buf, count, ppos);
  1981. mutex_unlock(&dev_priv->perf.lock);
  1982. } while (ret == -EAGAIN);
  1983. } else {
  1984. mutex_lock(&dev_priv->perf.lock);
  1985. ret = i915_perf_read_locked(stream, file, buf, count, ppos);
  1986. mutex_unlock(&dev_priv->perf.lock);
  1987. }
  1988. /* We allow the poll checking to sometimes report false positive EPOLLIN
  1989. * events where we might actually report EAGAIN on read() if there's
  1990. * not really any data available. In this situation though we don't
  1991. * want to enter a busy loop between poll() reporting a EPOLLIN event
  1992. * and read() returning -EAGAIN. Clearing the oa.pollin state here
  1993. * effectively ensures we back off until the next hrtimer callback
  1994. * before reporting another EPOLLIN event.
  1995. */
  1996. if (ret >= 0 || ret == -EAGAIN) {
  1997. /* Maybe make ->pollin per-stream state if we support multiple
  1998. * concurrent streams in the future.
  1999. */
  2000. dev_priv->perf.oa.pollin = false;
  2001. }
  2002. return ret;
  2003. }
  2004. static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
  2005. {
  2006. struct drm_i915_private *dev_priv =
  2007. container_of(hrtimer, typeof(*dev_priv),
  2008. perf.oa.poll_check_timer);
  2009. if (oa_buffer_check_unlocked(dev_priv)) {
  2010. dev_priv->perf.oa.pollin = true;
  2011. wake_up(&dev_priv->perf.oa.poll_wq);
  2012. }
  2013. hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
  2014. return HRTIMER_RESTART;
  2015. }
  2016. /**
  2017. * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream
  2018. * @dev_priv: i915 device instance
  2019. * @stream: An i915 perf stream
  2020. * @file: An i915 perf stream file
  2021. * @wait: poll() state table
  2022. *
  2023. * For handling userspace polling on an i915 perf stream, this calls through to
  2024. * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
  2025. * will be woken for new stream data.
  2026. *
  2027. * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
  2028. * with any non-file-operation driver hooks.
  2029. *
  2030. * Returns: any poll events that are ready without sleeping
  2031. */
  2032. static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
  2033. struct i915_perf_stream *stream,
  2034. struct file *file,
  2035. poll_table *wait)
  2036. {
  2037. __poll_t events = 0;
  2038. stream->ops->poll_wait(stream, file, wait);
  2039. /* Note: we don't explicitly check whether there's something to read
  2040. * here since this path may be very hot depending on what else
  2041. * userspace is polling, or on the timeout in use. We rely solely on
  2042. * the hrtimer/oa_poll_check_timer_cb to notify us when there are
  2043. * samples to read.
  2044. */
  2045. if (dev_priv->perf.oa.pollin)
  2046. events |= EPOLLIN;
  2047. return events;
  2048. }
  2049. /**
  2050. * i915_perf_poll - call poll_wait() with a suitable wait queue for stream
  2051. * @file: An i915 perf stream file
  2052. * @wait: poll() state table
  2053. *
  2054. * For handling userspace polling on an i915 perf stream, this ensures
  2055. * poll_wait() gets called with a wait queue that will be woken for new stream
  2056. * data.
  2057. *
  2058. * Note: Implementation deferred to i915_perf_poll_locked()
  2059. *
  2060. * Returns: any poll events that are ready without sleeping
  2061. */
  2062. static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
  2063. {
  2064. struct i915_perf_stream *stream = file->private_data;
  2065. struct drm_i915_private *dev_priv = stream->dev_priv;
  2066. __poll_t ret;
  2067. mutex_lock(&dev_priv->perf.lock);
  2068. ret = i915_perf_poll_locked(dev_priv, stream, file, wait);
  2069. mutex_unlock(&dev_priv->perf.lock);
  2070. return ret;
  2071. }
  2072. /**
  2073. * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
  2074. * @stream: A disabled i915 perf stream
  2075. *
  2076. * [Re]enables the associated capture of data for this stream.
  2077. *
  2078. * If a stream was previously enabled then there's currently no intention
  2079. * to provide userspace any guarantee about the preservation of previously
  2080. * buffered data.
  2081. */
  2082. static void i915_perf_enable_locked(struct i915_perf_stream *stream)
  2083. {
  2084. if (stream->enabled)
  2085. return;
  2086. /* Allow stream->ops->enable() to refer to this */
  2087. stream->enabled = true;
  2088. if (stream->ops->enable)
  2089. stream->ops->enable(stream);
  2090. }
  2091. /**
  2092. * i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl
  2093. * @stream: An enabled i915 perf stream
  2094. *
  2095. * Disables the associated capture of data for this stream.
  2096. *
  2097. * The intention is that disabling an re-enabling a stream will ideally be
  2098. * cheaper than destroying and re-opening a stream with the same configuration,
  2099. * though there are no formal guarantees about what state or buffered data
  2100. * must be retained between disabling and re-enabling a stream.
  2101. *
  2102. * Note: while a stream is disabled it's considered an error for userspace
  2103. * to attempt to read from the stream (-EIO).
  2104. */
  2105. static void i915_perf_disable_locked(struct i915_perf_stream *stream)
  2106. {
  2107. if (!stream->enabled)
  2108. return;
  2109. /* Allow stream->ops->disable() to refer to this */
  2110. stream->enabled = false;
  2111. if (stream->ops->disable)
  2112. stream->ops->disable(stream);
  2113. }
  2114. /**
  2115. * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
  2116. * @stream: An i915 perf stream
  2117. * @cmd: the ioctl request
  2118. * @arg: the ioctl data
  2119. *
  2120. * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
  2121. * with any non-file-operation driver hooks.
  2122. *
  2123. * Returns: zero on success or a negative error code. Returns -EINVAL for
  2124. * an unknown ioctl request.
  2125. */
  2126. static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
  2127. unsigned int cmd,
  2128. unsigned long arg)
  2129. {
  2130. switch (cmd) {
  2131. case I915_PERF_IOCTL_ENABLE:
  2132. i915_perf_enable_locked(stream);
  2133. return 0;
  2134. case I915_PERF_IOCTL_DISABLE:
  2135. i915_perf_disable_locked(stream);
  2136. return 0;
  2137. }
  2138. return -EINVAL;
  2139. }
  2140. /**
  2141. * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
  2142. * @file: An i915 perf stream file
  2143. * @cmd: the ioctl request
  2144. * @arg: the ioctl data
  2145. *
  2146. * Implementation deferred to i915_perf_ioctl_locked().
  2147. *
  2148. * Returns: zero on success or a negative error code. Returns -EINVAL for
  2149. * an unknown ioctl request.
  2150. */
  2151. static long i915_perf_ioctl(struct file *file,
  2152. unsigned int cmd,
  2153. unsigned long arg)
  2154. {
  2155. struct i915_perf_stream *stream = file->private_data;
  2156. struct drm_i915_private *dev_priv = stream->dev_priv;
  2157. long ret;
  2158. mutex_lock(&dev_priv->perf.lock);
  2159. ret = i915_perf_ioctl_locked(stream, cmd, arg);
  2160. mutex_unlock(&dev_priv->perf.lock);
  2161. return ret;
  2162. }
  2163. /**
  2164. * i915_perf_destroy_locked - destroy an i915 perf stream
  2165. * @stream: An i915 perf stream
  2166. *
  2167. * Frees all resources associated with the given i915 perf @stream, disabling
  2168. * any associated data capture in the process.
  2169. *
  2170. * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
  2171. * with any non-file-operation driver hooks.
  2172. */
  2173. static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
  2174. {
  2175. if (stream->enabled)
  2176. i915_perf_disable_locked(stream);
  2177. if (stream->ops->destroy)
  2178. stream->ops->destroy(stream);
  2179. list_del(&stream->link);
  2180. if (stream->ctx)
  2181. i915_gem_context_put(stream->ctx);
  2182. kfree(stream);
  2183. }
  2184. /**
  2185. * i915_perf_release - handles userspace close() of a stream file
  2186. * @inode: anonymous inode associated with file
  2187. * @file: An i915 perf stream file
  2188. *
  2189. * Cleans up any resources associated with an open i915 perf stream file.
  2190. *
  2191. * NB: close() can't really fail from the userspace point of view.
  2192. *
  2193. * Returns: zero on success or a negative error code.
  2194. */
  2195. static int i915_perf_release(struct inode *inode, struct file *file)
  2196. {
  2197. struct i915_perf_stream *stream = file->private_data;
  2198. struct drm_i915_private *dev_priv = stream->dev_priv;
  2199. mutex_lock(&dev_priv->perf.lock);
  2200. i915_perf_destroy_locked(stream);
  2201. mutex_unlock(&dev_priv->perf.lock);
  2202. return 0;
  2203. }
  2204. static const struct file_operations fops = {
  2205. .owner = THIS_MODULE,
  2206. .llseek = no_llseek,
  2207. .release = i915_perf_release,
  2208. .poll = i915_perf_poll,
  2209. .read = i915_perf_read,
  2210. .unlocked_ioctl = i915_perf_ioctl,
  2211. /* Our ioctl have no arguments, so it's safe to use the same function
  2212. * to handle 32bits compatibility.
  2213. */
  2214. .compat_ioctl = i915_perf_ioctl,
  2215. };
  2216. /**
  2217. * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD
  2218. * @dev_priv: i915 device instance
  2219. * @param: The open parameters passed to 'DRM_I915_PERF_OPEN`
  2220. * @props: individually validated u64 property value pairs
  2221. * @file: drm file
  2222. *
  2223. * See i915_perf_ioctl_open() for interface details.
  2224. *
  2225. * Implements further stream config validation and stream initialization on
  2226. * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex
  2227. * taken to serialize with any non-file-operation driver hooks.
  2228. *
  2229. * Note: at this point the @props have only been validated in isolation and
  2230. * it's still necessary to validate that the combination of properties makes
  2231. * sense.
  2232. *
  2233. * In the case where userspace is interested in OA unit metrics then further
  2234. * config validation and stream initialization details will be handled by
  2235. * i915_oa_stream_init(). The code here should only validate config state that
  2236. * will be relevant to all stream types / backends.
  2237. *
  2238. * Returns: zero on success or a negative error code.
  2239. */
  2240. static int
  2241. i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
  2242. struct drm_i915_perf_open_param *param,
  2243. struct perf_open_properties *props,
  2244. struct drm_file *file)
  2245. {
  2246. struct i915_gem_context *specific_ctx = NULL;
  2247. struct i915_perf_stream *stream = NULL;
  2248. unsigned long f_flags = 0;
  2249. bool privileged_op = true;
  2250. int stream_fd;
  2251. int ret;
  2252. if (props->single_context) {
  2253. u32 ctx_handle = props->ctx_handle;
  2254. struct drm_i915_file_private *file_priv = file->driver_priv;
  2255. specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
  2256. if (!specific_ctx) {
  2257. DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
  2258. ctx_handle);
  2259. ret = -ENOENT;
  2260. goto err;
  2261. }
  2262. }
  2263. /*
  2264. * On Haswell the OA unit supports clock gating off for a specific
  2265. * context and in this mode there's no visibility of metrics for the
  2266. * rest of the system, which we consider acceptable for a
  2267. * non-privileged client.
  2268. *
  2269. * For Gen8+ the OA unit no longer supports clock gating off for a
  2270. * specific context and the kernel can't securely stop the counters
  2271. * from updating as system-wide / global values. Even though we can
  2272. * filter reports based on the included context ID we can't block
  2273. * clients from seeing the raw / global counter values via
  2274. * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
  2275. * enable the OA unit by default.
  2276. */
  2277. if (IS_HASWELL(dev_priv) && specific_ctx)
  2278. privileged_op = false;
  2279. /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
  2280. * we check a dev.i915.perf_stream_paranoid sysctl option
  2281. * to determine if it's ok to access system wide OA counters
  2282. * without CAP_SYS_ADMIN privileges.
  2283. */
  2284. if (privileged_op &&
  2285. i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
  2286. DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
  2287. ret = -EACCES;
  2288. goto err_ctx;
  2289. }
  2290. stream = kzalloc(sizeof(*stream), GFP_KERNEL);
  2291. if (!stream) {
  2292. ret = -ENOMEM;
  2293. goto err_ctx;
  2294. }
  2295. stream->dev_priv = dev_priv;
  2296. stream->ctx = specific_ctx;
  2297. ret = i915_oa_stream_init(stream, param, props);
  2298. if (ret)
  2299. goto err_alloc;
  2300. /* we avoid simply assigning stream->sample_flags = props->sample_flags
  2301. * to have _stream_init check the combination of sample flags more
  2302. * thoroughly, but still this is the expected result at this point.
  2303. */
  2304. if (WARN_ON(stream->sample_flags != props->sample_flags)) {
  2305. ret = -ENODEV;
  2306. goto err_flags;
  2307. }
  2308. list_add(&stream->link, &dev_priv->perf.streams);
  2309. if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
  2310. f_flags |= O_CLOEXEC;
  2311. if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
  2312. f_flags |= O_NONBLOCK;
  2313. stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
  2314. if (stream_fd < 0) {
  2315. ret = stream_fd;
  2316. goto err_open;
  2317. }
  2318. if (!(param->flags & I915_PERF_FLAG_DISABLED))
  2319. i915_perf_enable_locked(stream);
  2320. return stream_fd;
  2321. err_open:
  2322. list_del(&stream->link);
  2323. err_flags:
  2324. if (stream->ops->destroy)
  2325. stream->ops->destroy(stream);
  2326. err_alloc:
  2327. kfree(stream);
  2328. err_ctx:
  2329. if (specific_ctx)
  2330. i915_gem_context_put(specific_ctx);
  2331. err:
  2332. return ret;
  2333. }
  2334. static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
  2335. {
  2336. return div64_u64(1000000000ULL * (2ULL << exponent),
  2337. 1000ULL * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz);
  2338. }
  2339. /**
  2340. * read_properties_unlocked - validate + copy userspace stream open properties
  2341. * @dev_priv: i915 device instance
  2342. * @uprops: The array of u64 key value pairs given by userspace
  2343. * @n_props: The number of key value pairs expected in @uprops
  2344. * @props: The stream configuration built up while validating properties
  2345. *
  2346. * Note this function only validates properties in isolation it doesn't
  2347. * validate that the combination of properties makes sense or that all
  2348. * properties necessary for a particular kind of stream have been set.
  2349. *
  2350. * Note that there currently aren't any ordering requirements for properties so
  2351. * we shouldn't validate or assume anything about ordering here. This doesn't
  2352. * rule out defining new properties with ordering requirements in the future.
  2353. */
  2354. static int read_properties_unlocked(struct drm_i915_private *dev_priv,
  2355. u64 __user *uprops,
  2356. u32 n_props,
  2357. struct perf_open_properties *props)
  2358. {
  2359. u64 __user *uprop = uprops;
  2360. u32 i;
  2361. memset(props, 0, sizeof(struct perf_open_properties));
  2362. if (!n_props) {
  2363. DRM_DEBUG("No i915 perf properties given\n");
  2364. return -EINVAL;
  2365. }
  2366. /* Considering that ID = 0 is reserved and assuming that we don't
  2367. * (currently) expect any configurations to ever specify duplicate
  2368. * values for a particular property ID then the last _PROP_MAX value is
  2369. * one greater than the maximum number of properties we expect to get
  2370. * from userspace.
  2371. */
  2372. if (n_props >= DRM_I915_PERF_PROP_MAX) {
  2373. DRM_DEBUG("More i915 perf properties specified than exist\n");
  2374. return -EINVAL;
  2375. }
  2376. for (i = 0; i < n_props; i++) {
  2377. u64 oa_period, oa_freq_hz;
  2378. u64 id, value;
  2379. int ret;
  2380. ret = get_user(id, uprop);
  2381. if (ret)
  2382. return ret;
  2383. ret = get_user(value, uprop + 1);
  2384. if (ret)
  2385. return ret;
  2386. if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
  2387. DRM_DEBUG("Unknown i915 perf property ID\n");
  2388. return -EINVAL;
  2389. }
  2390. switch ((enum drm_i915_perf_property_id)id) {
  2391. case DRM_I915_PERF_PROP_CTX_HANDLE:
  2392. props->single_context = 1;
  2393. props->ctx_handle = value;
  2394. break;
  2395. case DRM_I915_PERF_PROP_SAMPLE_OA:
  2396. if (value)
  2397. props->sample_flags |= SAMPLE_OA_REPORT;
  2398. break;
  2399. case DRM_I915_PERF_PROP_OA_METRICS_SET:
  2400. if (value == 0) {
  2401. DRM_DEBUG("Unknown OA metric set ID\n");
  2402. return -EINVAL;
  2403. }
  2404. props->metrics_set = value;
  2405. break;
  2406. case DRM_I915_PERF_PROP_OA_FORMAT:
  2407. if (value == 0 || value >= I915_OA_FORMAT_MAX) {
  2408. DRM_DEBUG("Out-of-range OA report format %llu\n",
  2409. value);
  2410. return -EINVAL;
  2411. }
  2412. if (!dev_priv->perf.oa.oa_formats[value].size) {
  2413. DRM_DEBUG("Unsupported OA report format %llu\n",
  2414. value);
  2415. return -EINVAL;
  2416. }
  2417. props->oa_format = value;
  2418. break;
  2419. case DRM_I915_PERF_PROP_OA_EXPONENT:
  2420. if (value > OA_EXPONENT_MAX) {
  2421. DRM_DEBUG("OA timer exponent too high (> %u)\n",
  2422. OA_EXPONENT_MAX);
  2423. return -EINVAL;
  2424. }
  2425. /* Theoretically we can program the OA unit to sample
  2426. * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns
  2427. * for BXT. We don't allow such high sampling
  2428. * frequencies by default unless root.
  2429. */
  2430. BUILD_BUG_ON(sizeof(oa_period) != 8);
  2431. oa_period = oa_exponent_to_ns(dev_priv, value);
  2432. /* This check is primarily to ensure that oa_period <=
  2433. * UINT32_MAX (before passing to do_div which only
  2434. * accepts a u32 denominator), but we can also skip
  2435. * checking anything < 1Hz which implicitly can't be
  2436. * limited via an integer oa_max_sample_rate.
  2437. */
  2438. if (oa_period <= NSEC_PER_SEC) {
  2439. u64 tmp = NSEC_PER_SEC;
  2440. do_div(tmp, oa_period);
  2441. oa_freq_hz = tmp;
  2442. } else
  2443. oa_freq_hz = 0;
  2444. if (oa_freq_hz > i915_oa_max_sample_rate &&
  2445. !capable(CAP_SYS_ADMIN)) {
  2446. DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n",
  2447. i915_oa_max_sample_rate);
  2448. return -EACCES;
  2449. }
  2450. props->oa_periodic = true;
  2451. props->oa_period_exponent = value;
  2452. break;
  2453. case DRM_I915_PERF_PROP_MAX:
  2454. MISSING_CASE(id);
  2455. return -EINVAL;
  2456. }
  2457. uprop += 2;
  2458. }
  2459. return 0;
  2460. }
  2461. /**
  2462. * i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD
  2463. * @dev: drm device
  2464. * @data: ioctl data copied from userspace (unvalidated)
  2465. * @file: drm file
  2466. *
  2467. * Validates the stream open parameters given by userspace including flags
  2468. * and an array of u64 key, value pair properties.
  2469. *
  2470. * Very little is assumed up front about the nature of the stream being
  2471. * opened (for instance we don't assume it's for periodic OA unit metrics). An
  2472. * i915-perf stream is expected to be a suitable interface for other forms of
  2473. * buffered data written by the GPU besides periodic OA metrics.
  2474. *
  2475. * Note we copy the properties from userspace outside of the i915 perf
  2476. * mutex to avoid an awkward lockdep with mmap_sem.
  2477. *
  2478. * Most of the implementation details are handled by
  2479. * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock
  2480. * mutex for serializing with any non-file-operation driver hooks.
  2481. *
  2482. * Return: A newly opened i915 Perf stream file descriptor or negative
  2483. * error code on failure.
  2484. */
  2485. int i915_perf_open_ioctl(struct drm_device *dev, void *data,
  2486. struct drm_file *file)
  2487. {
  2488. struct drm_i915_private *dev_priv = dev->dev_private;
  2489. struct drm_i915_perf_open_param *param = data;
  2490. struct perf_open_properties props;
  2491. u32 known_open_flags;
  2492. int ret;
  2493. if (!dev_priv->perf.initialized) {
  2494. DRM_DEBUG("i915 perf interface not available for this system\n");
  2495. return -ENOTSUPP;
  2496. }
  2497. known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
  2498. I915_PERF_FLAG_FD_NONBLOCK |
  2499. I915_PERF_FLAG_DISABLED;
  2500. if (param->flags & ~known_open_flags) {
  2501. DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n");
  2502. return -EINVAL;
  2503. }
  2504. ret = read_properties_unlocked(dev_priv,
  2505. u64_to_user_ptr(param->properties_ptr),
  2506. param->num_properties,
  2507. &props);
  2508. if (ret)
  2509. return ret;
  2510. mutex_lock(&dev_priv->perf.lock);
  2511. ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file);
  2512. mutex_unlock(&dev_priv->perf.lock);
  2513. return ret;
  2514. }
  2515. /**
  2516. * i915_perf_register - exposes i915-perf to userspace
  2517. * @dev_priv: i915 device instance
  2518. *
  2519. * In particular OA metric sets are advertised under a sysfs metrics/
  2520. * directory allowing userspace to enumerate valid IDs that can be
  2521. * used to open an i915-perf stream.
  2522. */
  2523. void i915_perf_register(struct drm_i915_private *dev_priv)
  2524. {
  2525. int ret;
  2526. if (!dev_priv->perf.initialized)
  2527. return;
  2528. /* To be sure we're synchronized with an attempted
  2529. * i915_perf_open_ioctl(); considering that we register after
  2530. * being exposed to userspace.
  2531. */
  2532. mutex_lock(&dev_priv->perf.lock);
  2533. dev_priv->perf.metrics_kobj =
  2534. kobject_create_and_add("metrics",
  2535. &dev_priv->drm.primary->kdev->kobj);
  2536. if (!dev_priv->perf.metrics_kobj)
  2537. goto exit;
  2538. sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr);
  2539. if (IS_HASWELL(dev_priv)) {
  2540. i915_perf_load_test_config_hsw(dev_priv);
  2541. } else if (IS_BROADWELL(dev_priv)) {
  2542. i915_perf_load_test_config_bdw(dev_priv);
  2543. } else if (IS_CHERRYVIEW(dev_priv)) {
  2544. i915_perf_load_test_config_chv(dev_priv);
  2545. } else if (IS_SKYLAKE(dev_priv)) {
  2546. if (IS_SKL_GT2(dev_priv))
  2547. i915_perf_load_test_config_sklgt2(dev_priv);
  2548. else if (IS_SKL_GT3(dev_priv))
  2549. i915_perf_load_test_config_sklgt3(dev_priv);
  2550. else if (IS_SKL_GT4(dev_priv))
  2551. i915_perf_load_test_config_sklgt4(dev_priv);
  2552. } else if (IS_BROXTON(dev_priv)) {
  2553. i915_perf_load_test_config_bxt(dev_priv);
  2554. } else if (IS_KABYLAKE(dev_priv)) {
  2555. if (IS_KBL_GT2(dev_priv))
  2556. i915_perf_load_test_config_kblgt2(dev_priv);
  2557. else if (IS_KBL_GT3(dev_priv))
  2558. i915_perf_load_test_config_kblgt3(dev_priv);
  2559. } else if (IS_GEMINILAKE(dev_priv)) {
  2560. i915_perf_load_test_config_glk(dev_priv);
  2561. } else if (IS_COFFEELAKE(dev_priv)) {
  2562. if (IS_CFL_GT2(dev_priv))
  2563. i915_perf_load_test_config_cflgt2(dev_priv);
  2564. if (IS_CFL_GT3(dev_priv))
  2565. i915_perf_load_test_config_cflgt3(dev_priv);
  2566. } else if (IS_CANNONLAKE(dev_priv)) {
  2567. i915_perf_load_test_config_cnl(dev_priv);
  2568. } else if (IS_ICELAKE(dev_priv)) {
  2569. i915_perf_load_test_config_icl(dev_priv);
  2570. }
  2571. if (dev_priv->perf.oa.test_config.id == 0)
  2572. goto sysfs_error;
  2573. ret = sysfs_create_group(dev_priv->perf.metrics_kobj,
  2574. &dev_priv->perf.oa.test_config.sysfs_metric);
  2575. if (ret)
  2576. goto sysfs_error;
  2577. atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
  2578. goto exit;
  2579. sysfs_error:
  2580. kobject_put(dev_priv->perf.metrics_kobj);
  2581. dev_priv->perf.metrics_kobj = NULL;
  2582. exit:
  2583. mutex_unlock(&dev_priv->perf.lock);
  2584. }
  2585. /**
  2586. * i915_perf_unregister - hide i915-perf from userspace
  2587. * @dev_priv: i915 device instance
  2588. *
  2589. * i915-perf state cleanup is split up into an 'unregister' and
  2590. * 'deinit' phase where the interface is first hidden from
  2591. * userspace by i915_perf_unregister() before cleaning up
  2592. * remaining state in i915_perf_fini().
  2593. */
  2594. void i915_perf_unregister(struct drm_i915_private *dev_priv)
  2595. {
  2596. if (!dev_priv->perf.metrics_kobj)
  2597. return;
  2598. sysfs_remove_group(dev_priv->perf.metrics_kobj,
  2599. &dev_priv->perf.oa.test_config.sysfs_metric);
  2600. kobject_put(dev_priv->perf.metrics_kobj);
  2601. dev_priv->perf.metrics_kobj = NULL;
  2602. }
  2603. static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr)
  2604. {
  2605. static const i915_reg_t flex_eu_regs[] = {
  2606. EU_PERF_CNTL0,
  2607. EU_PERF_CNTL1,
  2608. EU_PERF_CNTL2,
  2609. EU_PERF_CNTL3,
  2610. EU_PERF_CNTL4,
  2611. EU_PERF_CNTL5,
  2612. EU_PERF_CNTL6,
  2613. };
  2614. int i;
  2615. for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) {
  2616. if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr)
  2617. return true;
  2618. }
  2619. return false;
  2620. }
  2621. static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr)
  2622. {
  2623. return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) &&
  2624. addr <= i915_mmio_reg_offset(OASTARTTRIG8)) ||
  2625. (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) &&
  2626. addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) ||
  2627. (addr >= i915_mmio_reg_offset(OACEC0_0) &&
  2628. addr <= i915_mmio_reg_offset(OACEC7_1));
  2629. }
  2630. static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
  2631. {
  2632. return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) ||
  2633. (addr >= i915_mmio_reg_offset(MICRO_BP0_0) &&
  2634. addr <= i915_mmio_reg_offset(NOA_WRITE)) ||
  2635. (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) &&
  2636. addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) ||
  2637. (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) &&
  2638. addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI));
  2639. }
  2640. static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
  2641. {
  2642. return gen7_is_valid_mux_addr(dev_priv, addr) ||
  2643. addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) ||
  2644. (addr >= i915_mmio_reg_offset(RPM_CONFIG0) &&
  2645. addr <= i915_mmio_reg_offset(NOA_CONFIG(8)));
  2646. }
  2647. static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
  2648. {
  2649. return gen8_is_valid_mux_addr(dev_priv, addr) ||
  2650. (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) &&
  2651. addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI));
  2652. }
  2653. static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
  2654. {
  2655. return gen7_is_valid_mux_addr(dev_priv, addr) ||
  2656. (addr >= 0x25100 && addr <= 0x2FF90) ||
  2657. (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) &&
  2658. addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) ||
  2659. addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0);
  2660. }
  2661. static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
  2662. {
  2663. return gen7_is_valid_mux_addr(dev_priv, addr) ||
  2664. (addr >= 0x182300 && addr <= 0x1823A4);
  2665. }
  2666. static uint32_t mask_reg_value(u32 reg, u32 val)
  2667. {
  2668. /* HALF_SLICE_CHICKEN2 is programmed with a the
  2669. * WaDisableSTUnitPowerOptimization workaround. Make sure the value
  2670. * programmed by userspace doesn't change this.
  2671. */
  2672. if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg)
  2673. val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
  2674. /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function
  2675. * indicated by its name and a bunch of selection fields used by OA
  2676. * configs.
  2677. */
  2678. if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg)
  2679. val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
  2680. return val;
  2681. }
  2682. static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv,
  2683. bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr),
  2684. u32 __user *regs,
  2685. u32 n_regs)
  2686. {
  2687. struct i915_oa_reg *oa_regs;
  2688. int err;
  2689. u32 i;
  2690. if (!n_regs)
  2691. return NULL;
  2692. if (!access_ok(VERIFY_READ, regs, n_regs * sizeof(u32) * 2))
  2693. return ERR_PTR(-EFAULT);
  2694. /* No is_valid function means we're not allowing any register to be programmed. */
  2695. GEM_BUG_ON(!is_valid);
  2696. if (!is_valid)
  2697. return ERR_PTR(-EINVAL);
  2698. oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL);
  2699. if (!oa_regs)
  2700. return ERR_PTR(-ENOMEM);
  2701. for (i = 0; i < n_regs; i++) {
  2702. u32 addr, value;
  2703. err = get_user(addr, regs);
  2704. if (err)
  2705. goto addr_err;
  2706. if (!is_valid(dev_priv, addr)) {
  2707. DRM_DEBUG("Invalid oa_reg address: %X\n", addr);
  2708. err = -EINVAL;
  2709. goto addr_err;
  2710. }
  2711. err = get_user(value, regs + 1);
  2712. if (err)
  2713. goto addr_err;
  2714. oa_regs[i].addr = _MMIO(addr);
  2715. oa_regs[i].value = mask_reg_value(addr, value);
  2716. regs += 2;
  2717. }
  2718. return oa_regs;
  2719. addr_err:
  2720. kfree(oa_regs);
  2721. return ERR_PTR(err);
  2722. }
  2723. static ssize_t show_dynamic_id(struct device *dev,
  2724. struct device_attribute *attr,
  2725. char *buf)
  2726. {
  2727. struct i915_oa_config *oa_config =
  2728. container_of(attr, typeof(*oa_config), sysfs_metric_id);
  2729. return sprintf(buf, "%d\n", oa_config->id);
  2730. }
  2731. static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv,
  2732. struct i915_oa_config *oa_config)
  2733. {
  2734. sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
  2735. oa_config->sysfs_metric_id.attr.name = "id";
  2736. oa_config->sysfs_metric_id.attr.mode = S_IRUGO;
  2737. oa_config->sysfs_metric_id.show = show_dynamic_id;
  2738. oa_config->sysfs_metric_id.store = NULL;
  2739. oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr;
  2740. oa_config->attrs[1] = NULL;
  2741. oa_config->sysfs_metric.name = oa_config->uuid;
  2742. oa_config->sysfs_metric.attrs = oa_config->attrs;
  2743. return sysfs_create_group(dev_priv->perf.metrics_kobj,
  2744. &oa_config->sysfs_metric);
  2745. }
  2746. /**
  2747. * i915_perf_add_config_ioctl - DRM ioctl() for userspace to add a new OA config
  2748. * @dev: drm device
  2749. * @data: ioctl data (pointer to struct drm_i915_perf_oa_config) copied from
  2750. * userspace (unvalidated)
  2751. * @file: drm file
  2752. *
  2753. * Validates the submitted OA register to be saved into a new OA config that
  2754. * can then be used for programming the OA unit and its NOA network.
  2755. *
  2756. * Returns: A new allocated config number to be used with the perf open ioctl
  2757. * or a negative error code on failure.
  2758. */
  2759. int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
  2760. struct drm_file *file)
  2761. {
  2762. struct drm_i915_private *dev_priv = dev->dev_private;
  2763. struct drm_i915_perf_oa_config *args = data;
  2764. struct i915_oa_config *oa_config, *tmp;
  2765. int err, id;
  2766. if (!dev_priv->perf.initialized) {
  2767. DRM_DEBUG("i915 perf interface not available for this system\n");
  2768. return -ENOTSUPP;
  2769. }
  2770. if (!dev_priv->perf.metrics_kobj) {
  2771. DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
  2772. return -EINVAL;
  2773. }
  2774. if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
  2775. DRM_DEBUG("Insufficient privileges to add i915 OA config\n");
  2776. return -EACCES;
  2777. }
  2778. if ((!args->mux_regs_ptr || !args->n_mux_regs) &&
  2779. (!args->boolean_regs_ptr || !args->n_boolean_regs) &&
  2780. (!args->flex_regs_ptr || !args->n_flex_regs)) {
  2781. DRM_DEBUG("No OA registers given\n");
  2782. return -EINVAL;
  2783. }
  2784. oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
  2785. if (!oa_config) {
  2786. DRM_DEBUG("Failed to allocate memory for the OA config\n");
  2787. return -ENOMEM;
  2788. }
  2789. atomic_set(&oa_config->ref_count, 1);
  2790. if (!uuid_is_valid(args->uuid)) {
  2791. DRM_DEBUG("Invalid uuid format for OA config\n");
  2792. err = -EINVAL;
  2793. goto reg_err;
  2794. }
  2795. /* Last character in oa_config->uuid will be 0 because oa_config is
  2796. * kzalloc.
  2797. */
  2798. memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid));
  2799. oa_config->mux_regs_len = args->n_mux_regs;
  2800. oa_config->mux_regs =
  2801. alloc_oa_regs(dev_priv,
  2802. dev_priv->perf.oa.ops.is_valid_mux_reg,
  2803. u64_to_user_ptr(args->mux_regs_ptr),
  2804. args->n_mux_regs);
  2805. if (IS_ERR(oa_config->mux_regs)) {
  2806. DRM_DEBUG("Failed to create OA config for mux_regs\n");
  2807. err = PTR_ERR(oa_config->mux_regs);
  2808. goto reg_err;
  2809. }
  2810. oa_config->b_counter_regs_len = args->n_boolean_regs;
  2811. oa_config->b_counter_regs =
  2812. alloc_oa_regs(dev_priv,
  2813. dev_priv->perf.oa.ops.is_valid_b_counter_reg,
  2814. u64_to_user_ptr(args->boolean_regs_ptr),
  2815. args->n_boolean_regs);
  2816. if (IS_ERR(oa_config->b_counter_regs)) {
  2817. DRM_DEBUG("Failed to create OA config for b_counter_regs\n");
  2818. err = PTR_ERR(oa_config->b_counter_regs);
  2819. goto reg_err;
  2820. }
  2821. if (INTEL_GEN(dev_priv) < 8) {
  2822. if (args->n_flex_regs != 0) {
  2823. err = -EINVAL;
  2824. goto reg_err;
  2825. }
  2826. } else {
  2827. oa_config->flex_regs_len = args->n_flex_regs;
  2828. oa_config->flex_regs =
  2829. alloc_oa_regs(dev_priv,
  2830. dev_priv->perf.oa.ops.is_valid_flex_reg,
  2831. u64_to_user_ptr(args->flex_regs_ptr),
  2832. args->n_flex_regs);
  2833. if (IS_ERR(oa_config->flex_regs)) {
  2834. DRM_DEBUG("Failed to create OA config for flex_regs\n");
  2835. err = PTR_ERR(oa_config->flex_regs);
  2836. goto reg_err;
  2837. }
  2838. }
  2839. err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
  2840. if (err)
  2841. goto reg_err;
  2842. /* We shouldn't have too many configs, so this iteration shouldn't be
  2843. * too costly.
  2844. */
  2845. idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) {
  2846. if (!strcmp(tmp->uuid, oa_config->uuid)) {
  2847. DRM_DEBUG("OA config already exists with this uuid\n");
  2848. err = -EADDRINUSE;
  2849. goto sysfs_err;
  2850. }
  2851. }
  2852. err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config);
  2853. if (err) {
  2854. DRM_DEBUG("Failed to create sysfs entry for OA config\n");
  2855. goto sysfs_err;
  2856. }
  2857. /* Config id 0 is invalid, id 1 for kernel stored test config. */
  2858. oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr,
  2859. oa_config, 2,
  2860. 0, GFP_KERNEL);
  2861. if (oa_config->id < 0) {
  2862. DRM_DEBUG("Failed to create sysfs entry for OA config\n");
  2863. err = oa_config->id;
  2864. goto sysfs_err;
  2865. }
  2866. mutex_unlock(&dev_priv->perf.metrics_lock);
  2867. DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id);
  2868. return oa_config->id;
  2869. sysfs_err:
  2870. mutex_unlock(&dev_priv->perf.metrics_lock);
  2871. reg_err:
  2872. put_oa_config(dev_priv, oa_config);
  2873. DRM_DEBUG("Failed to add new OA config\n");
  2874. return err;
  2875. }
  2876. /**
  2877. * i915_perf_remove_config_ioctl - DRM ioctl() for userspace to remove an OA config
  2878. * @dev: drm device
  2879. * @data: ioctl data (pointer to u64 integer) copied from userspace
  2880. * @file: drm file
  2881. *
  2882. * Configs can be removed while being used, the will stop appearing in sysfs
  2883. * and their content will be freed when the stream using the config is closed.
  2884. *
  2885. * Returns: 0 on success or a negative error code on failure.
  2886. */
  2887. int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
  2888. struct drm_file *file)
  2889. {
  2890. struct drm_i915_private *dev_priv = dev->dev_private;
  2891. u64 *arg = data;
  2892. struct i915_oa_config *oa_config;
  2893. int ret;
  2894. if (!dev_priv->perf.initialized) {
  2895. DRM_DEBUG("i915 perf interface not available for this system\n");
  2896. return -ENOTSUPP;
  2897. }
  2898. if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
  2899. DRM_DEBUG("Insufficient privileges to remove i915 OA config\n");
  2900. return -EACCES;
  2901. }
  2902. ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
  2903. if (ret)
  2904. goto lock_err;
  2905. oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg);
  2906. if (!oa_config) {
  2907. DRM_DEBUG("Failed to remove unknown OA config\n");
  2908. ret = -ENOENT;
  2909. goto config_err;
  2910. }
  2911. GEM_BUG_ON(*arg != oa_config->id);
  2912. sysfs_remove_group(dev_priv->perf.metrics_kobj,
  2913. &oa_config->sysfs_metric);
  2914. idr_remove(&dev_priv->perf.metrics_idr, *arg);
  2915. DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
  2916. put_oa_config(dev_priv, oa_config);
  2917. config_err:
  2918. mutex_unlock(&dev_priv->perf.metrics_lock);
  2919. lock_err:
  2920. return ret;
  2921. }
  2922. static struct ctl_table oa_table[] = {
  2923. {
  2924. .procname = "perf_stream_paranoid",
  2925. .data = &i915_perf_stream_paranoid,
  2926. .maxlen = sizeof(i915_perf_stream_paranoid),
  2927. .mode = 0644,
  2928. .proc_handler = proc_dointvec_minmax,
  2929. .extra1 = &zero,
  2930. .extra2 = &one,
  2931. },
  2932. {
  2933. .procname = "oa_max_sample_rate",
  2934. .data = &i915_oa_max_sample_rate,
  2935. .maxlen = sizeof(i915_oa_max_sample_rate),
  2936. .mode = 0644,
  2937. .proc_handler = proc_dointvec_minmax,
  2938. .extra1 = &zero,
  2939. .extra2 = &oa_sample_rate_hard_limit,
  2940. },
  2941. {}
  2942. };
  2943. static struct ctl_table i915_root[] = {
  2944. {
  2945. .procname = "i915",
  2946. .maxlen = 0,
  2947. .mode = 0555,
  2948. .child = oa_table,
  2949. },
  2950. {}
  2951. };
  2952. static struct ctl_table dev_root[] = {
  2953. {
  2954. .procname = "dev",
  2955. .maxlen = 0,
  2956. .mode = 0555,
  2957. .child = i915_root,
  2958. },
  2959. {}
  2960. };
  2961. /**
  2962. * i915_perf_init - initialize i915-perf state on module load
  2963. * @dev_priv: i915 device instance
  2964. *
  2965. * Initializes i915-perf state without exposing anything to userspace.
  2966. *
  2967. * Note: i915-perf initialization is split into an 'init' and 'register'
  2968. * phase with the i915_perf_register() exposing state to userspace.
  2969. */
  2970. void i915_perf_init(struct drm_i915_private *dev_priv)
  2971. {
  2972. if (IS_HASWELL(dev_priv)) {
  2973. dev_priv->perf.oa.ops.is_valid_b_counter_reg =
  2974. gen7_is_valid_b_counter_addr;
  2975. dev_priv->perf.oa.ops.is_valid_mux_reg =
  2976. hsw_is_valid_mux_addr;
  2977. dev_priv->perf.oa.ops.is_valid_flex_reg = NULL;
  2978. dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
  2979. dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
  2980. dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set;
  2981. dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
  2982. dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
  2983. dev_priv->perf.oa.ops.read = gen7_oa_read;
  2984. dev_priv->perf.oa.ops.oa_hw_tail_read =
  2985. gen7_oa_hw_tail_read;
  2986. dev_priv->perf.oa.oa_formats = hsw_oa_formats;
  2987. } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
  2988. /* Note: that although we could theoretically also support the
  2989. * legacy ringbuffer mode on BDW (and earlier iterations of
  2990. * this driver, before upstreaming did this) it didn't seem
  2991. * worth the complexity to maintain now that BDW+ enable
  2992. * execlist mode by default.
  2993. */
  2994. dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats;
  2995. dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer;
  2996. dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable;
  2997. dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable;
  2998. dev_priv->perf.oa.ops.read = gen8_oa_read;
  2999. dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
  3000. if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv)) {
  3001. dev_priv->perf.oa.ops.is_valid_b_counter_reg =
  3002. gen7_is_valid_b_counter_addr;
  3003. dev_priv->perf.oa.ops.is_valid_mux_reg =
  3004. gen8_is_valid_mux_addr;
  3005. dev_priv->perf.oa.ops.is_valid_flex_reg =
  3006. gen8_is_valid_flex_addr;
  3007. if (IS_CHERRYVIEW(dev_priv)) {
  3008. dev_priv->perf.oa.ops.is_valid_mux_reg =
  3009. chv_is_valid_mux_addr;
  3010. }
  3011. dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
  3012. dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set;
  3013. if (IS_GEN8(dev_priv)) {
  3014. dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
  3015. dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
  3016. dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
  3017. } else {
  3018. dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
  3019. dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
  3020. dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
  3021. }
  3022. } else if (IS_GEN(dev_priv, 10, 11)) {
  3023. dev_priv->perf.oa.ops.is_valid_b_counter_reg =
  3024. gen7_is_valid_b_counter_addr;
  3025. dev_priv->perf.oa.ops.is_valid_mux_reg =
  3026. gen10_is_valid_mux_addr;
  3027. dev_priv->perf.oa.ops.is_valid_flex_reg =
  3028. gen8_is_valid_flex_addr;
  3029. dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
  3030. dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set;
  3031. dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
  3032. dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
  3033. dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
  3034. }
  3035. }
  3036. if (dev_priv->perf.oa.ops.enable_metric_set) {
  3037. hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
  3038. CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  3039. dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
  3040. init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
  3041. INIT_LIST_HEAD(&dev_priv->perf.streams);
  3042. mutex_init(&dev_priv->perf.lock);
  3043. spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
  3044. oa_sample_rate_hard_limit = 1000 *
  3045. (INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
  3046. dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
  3047. mutex_init(&dev_priv->perf.metrics_lock);
  3048. idr_init(&dev_priv->perf.metrics_idr);
  3049. dev_priv->perf.initialized = true;
  3050. }
  3051. }
  3052. static int destroy_config(int id, void *p, void *data)
  3053. {
  3054. struct drm_i915_private *dev_priv = data;
  3055. struct i915_oa_config *oa_config = p;
  3056. put_oa_config(dev_priv, oa_config);
  3057. return 0;
  3058. }
  3059. /**
  3060. * i915_perf_fini - Counter part to i915_perf_init()
  3061. * @dev_priv: i915 device instance
  3062. */
  3063. void i915_perf_fini(struct drm_i915_private *dev_priv)
  3064. {
  3065. if (!dev_priv->perf.initialized)
  3066. return;
  3067. idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv);
  3068. idr_destroy(&dev_priv->perf.metrics_idr);
  3069. unregister_sysctl_table(dev_priv->perf.sysctl_header);
  3070. memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
  3071. dev_priv->perf.initialized = false;
  3072. }