main.c 113 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254
  1. /*
  2. * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/debugfs.h>
  33. #include <linux/highmem.h>
  34. #include <linux/module.h>
  35. #include <linux/init.h>
  36. #include <linux/errno.h>
  37. #include <linux/pci.h>
  38. #include <linux/dma-mapping.h>
  39. #include <linux/slab.h>
  40. #if defined(CONFIG_X86)
  41. #include <asm/pat.h>
  42. #endif
  43. #include <linux/sched.h>
  44. #include <linux/sched/mm.h>
  45. #include <linux/sched/task.h>
  46. #include <linux/delay.h>
  47. #include <rdma/ib_user_verbs.h>
  48. #include <rdma/ib_addr.h>
  49. #include <rdma/ib_cache.h>
  50. #include <linux/mlx5/port.h>
  51. #include <linux/mlx5/vport.h>
  52. #include <linux/list.h>
  53. #include <rdma/ib_smi.h>
  54. #include <rdma/ib_umem.h>
  55. #include <linux/in.h>
  56. #include <linux/etherdevice.h>
  57. #include <linux/mlx5/fs.h>
  58. #include <linux/mlx5/vport.h>
  59. #include "mlx5_ib.h"
  60. #include "cmd.h"
  61. #include <linux/mlx5/vport.h>
  62. #define DRIVER_NAME "mlx5_ib"
  63. #define DRIVER_VERSION "5.0-0"
  64. MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
  65. MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
  66. MODULE_LICENSE("Dual BSD/GPL");
  67. static char mlx5_version[] =
  68. DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
  69. DRIVER_VERSION "\n";
  70. enum {
  71. MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
  72. };
  73. static enum rdma_link_layer
  74. mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
  75. {
  76. switch (port_type_cap) {
  77. case MLX5_CAP_PORT_TYPE_IB:
  78. return IB_LINK_LAYER_INFINIBAND;
  79. case MLX5_CAP_PORT_TYPE_ETH:
  80. return IB_LINK_LAYER_ETHERNET;
  81. default:
  82. return IB_LINK_LAYER_UNSPECIFIED;
  83. }
  84. }
  85. static enum rdma_link_layer
  86. mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
  87. {
  88. struct mlx5_ib_dev *dev = to_mdev(device);
  89. int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
  90. return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
  91. }
  92. static int get_port_state(struct ib_device *ibdev,
  93. u8 port_num,
  94. enum ib_port_state *state)
  95. {
  96. struct ib_port_attr attr;
  97. int ret;
  98. memset(&attr, 0, sizeof(attr));
  99. ret = mlx5_ib_query_port(ibdev, port_num, &attr);
  100. if (!ret)
  101. *state = attr.state;
  102. return ret;
  103. }
  104. static int mlx5_netdev_event(struct notifier_block *this,
  105. unsigned long event, void *ptr)
  106. {
  107. struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
  108. struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
  109. roce.nb);
  110. switch (event) {
  111. case NETDEV_REGISTER:
  112. case NETDEV_UNREGISTER:
  113. write_lock(&ibdev->roce.netdev_lock);
  114. if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
  115. ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ?
  116. NULL : ndev;
  117. write_unlock(&ibdev->roce.netdev_lock);
  118. break;
  119. case NETDEV_CHANGE:
  120. case NETDEV_UP:
  121. case NETDEV_DOWN: {
  122. struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
  123. struct net_device *upper = NULL;
  124. if (lag_ndev) {
  125. upper = netdev_master_upper_dev_get(lag_ndev);
  126. dev_put(lag_ndev);
  127. }
  128. if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
  129. && ibdev->ib_active) {
  130. struct ib_event ibev = { };
  131. enum ib_port_state port_state;
  132. if (get_port_state(&ibdev->ib_dev, 1, &port_state))
  133. return NOTIFY_DONE;
  134. if (ibdev->roce.last_port_state == port_state)
  135. return NOTIFY_DONE;
  136. ibdev->roce.last_port_state = port_state;
  137. ibev.device = &ibdev->ib_dev;
  138. if (port_state == IB_PORT_DOWN)
  139. ibev.event = IB_EVENT_PORT_ERR;
  140. else if (port_state == IB_PORT_ACTIVE)
  141. ibev.event = IB_EVENT_PORT_ACTIVE;
  142. else
  143. return NOTIFY_DONE;
  144. ibev.element.port_num = 1;
  145. ib_dispatch_event(&ibev);
  146. }
  147. break;
  148. }
  149. default:
  150. break;
  151. }
  152. return NOTIFY_DONE;
  153. }
  154. static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
  155. u8 port_num)
  156. {
  157. struct mlx5_ib_dev *ibdev = to_mdev(device);
  158. struct net_device *ndev;
  159. ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
  160. if (ndev)
  161. return ndev;
  162. /* Ensure ndev does not disappear before we invoke dev_hold()
  163. */
  164. read_lock(&ibdev->roce.netdev_lock);
  165. ndev = ibdev->roce.netdev;
  166. if (ndev)
  167. dev_hold(ndev);
  168. read_unlock(&ibdev->roce.netdev_lock);
  169. return ndev;
  170. }
  171. static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
  172. u8 *active_width)
  173. {
  174. switch (eth_proto_oper) {
  175. case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
  176. case MLX5E_PROT_MASK(MLX5E_1000BASE_KX):
  177. case MLX5E_PROT_MASK(MLX5E_100BASE_TX):
  178. case MLX5E_PROT_MASK(MLX5E_1000BASE_T):
  179. *active_width = IB_WIDTH_1X;
  180. *active_speed = IB_SPEED_SDR;
  181. break;
  182. case MLX5E_PROT_MASK(MLX5E_10GBASE_T):
  183. case MLX5E_PROT_MASK(MLX5E_10GBASE_CX4):
  184. case MLX5E_PROT_MASK(MLX5E_10GBASE_KX4):
  185. case MLX5E_PROT_MASK(MLX5E_10GBASE_KR):
  186. case MLX5E_PROT_MASK(MLX5E_10GBASE_CR):
  187. case MLX5E_PROT_MASK(MLX5E_10GBASE_SR):
  188. case MLX5E_PROT_MASK(MLX5E_10GBASE_ER):
  189. *active_width = IB_WIDTH_1X;
  190. *active_speed = IB_SPEED_QDR;
  191. break;
  192. case MLX5E_PROT_MASK(MLX5E_25GBASE_CR):
  193. case MLX5E_PROT_MASK(MLX5E_25GBASE_KR):
  194. case MLX5E_PROT_MASK(MLX5E_25GBASE_SR):
  195. *active_width = IB_WIDTH_1X;
  196. *active_speed = IB_SPEED_EDR;
  197. break;
  198. case MLX5E_PROT_MASK(MLX5E_40GBASE_CR4):
  199. case MLX5E_PROT_MASK(MLX5E_40GBASE_KR4):
  200. case MLX5E_PROT_MASK(MLX5E_40GBASE_SR4):
  201. case MLX5E_PROT_MASK(MLX5E_40GBASE_LR4):
  202. *active_width = IB_WIDTH_4X;
  203. *active_speed = IB_SPEED_QDR;
  204. break;
  205. case MLX5E_PROT_MASK(MLX5E_50GBASE_CR2):
  206. case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2):
  207. case MLX5E_PROT_MASK(MLX5E_50GBASE_SR2):
  208. *active_width = IB_WIDTH_1X;
  209. *active_speed = IB_SPEED_HDR;
  210. break;
  211. case MLX5E_PROT_MASK(MLX5E_56GBASE_R4):
  212. *active_width = IB_WIDTH_4X;
  213. *active_speed = IB_SPEED_FDR;
  214. break;
  215. case MLX5E_PROT_MASK(MLX5E_100GBASE_CR4):
  216. case MLX5E_PROT_MASK(MLX5E_100GBASE_SR4):
  217. case MLX5E_PROT_MASK(MLX5E_100GBASE_KR4):
  218. case MLX5E_PROT_MASK(MLX5E_100GBASE_LR4):
  219. *active_width = IB_WIDTH_4X;
  220. *active_speed = IB_SPEED_EDR;
  221. break;
  222. default:
  223. return -EINVAL;
  224. }
  225. return 0;
  226. }
  227. static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
  228. struct ib_port_attr *props)
  229. {
  230. struct mlx5_ib_dev *dev = to_mdev(device);
  231. struct mlx5_core_dev *mdev = dev->mdev;
  232. struct net_device *ndev, *upper;
  233. enum ib_mtu ndev_ib_mtu;
  234. u16 qkey_viol_cntr;
  235. u32 eth_prot_oper;
  236. int err;
  237. /* Possible bad flows are checked before filling out props so in case
  238. * of an error it will still be zeroed out.
  239. */
  240. err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num);
  241. if (err)
  242. return err;
  243. translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
  244. &props->active_width);
  245. props->port_cap_flags |= IB_PORT_CM_SUP;
  246. props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
  247. props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
  248. roce_address_table_size);
  249. props->max_mtu = IB_MTU_4096;
  250. props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
  251. props->pkey_tbl_len = 1;
  252. props->state = IB_PORT_DOWN;
  253. props->phys_state = 3;
  254. mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
  255. props->qkey_viol_cntr = qkey_viol_cntr;
  256. ndev = mlx5_ib_get_netdev(device, port_num);
  257. if (!ndev)
  258. return 0;
  259. if (mlx5_lag_is_active(dev->mdev)) {
  260. rcu_read_lock();
  261. upper = netdev_master_upper_dev_get_rcu(ndev);
  262. if (upper) {
  263. dev_put(ndev);
  264. ndev = upper;
  265. dev_hold(ndev);
  266. }
  267. rcu_read_unlock();
  268. }
  269. if (netif_running(ndev) && netif_carrier_ok(ndev)) {
  270. props->state = IB_PORT_ACTIVE;
  271. props->phys_state = 5;
  272. }
  273. ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
  274. dev_put(ndev);
  275. props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
  276. return 0;
  277. }
  278. static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
  279. unsigned int index, const union ib_gid *gid,
  280. const struct ib_gid_attr *attr)
  281. {
  282. enum ib_gid_type gid_type = IB_GID_TYPE_IB;
  283. u8 roce_version = 0;
  284. u8 roce_l3_type = 0;
  285. bool vlan = false;
  286. u8 mac[ETH_ALEN];
  287. u16 vlan_id = 0;
  288. if (gid) {
  289. gid_type = attr->gid_type;
  290. ether_addr_copy(mac, attr->ndev->dev_addr);
  291. if (is_vlan_dev(attr->ndev)) {
  292. vlan = true;
  293. vlan_id = vlan_dev_vlan_id(attr->ndev);
  294. }
  295. }
  296. switch (gid_type) {
  297. case IB_GID_TYPE_IB:
  298. roce_version = MLX5_ROCE_VERSION_1;
  299. break;
  300. case IB_GID_TYPE_ROCE_UDP_ENCAP:
  301. roce_version = MLX5_ROCE_VERSION_2;
  302. if (ipv6_addr_v4mapped((void *)gid))
  303. roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4;
  304. else
  305. roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6;
  306. break;
  307. default:
  308. mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type);
  309. }
  310. return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
  311. roce_l3_type, gid->raw, mac, vlan,
  312. vlan_id);
  313. }
  314. static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
  315. unsigned int index, const union ib_gid *gid,
  316. const struct ib_gid_attr *attr,
  317. __always_unused void **context)
  318. {
  319. return set_roce_addr(to_mdev(device), port_num, index, gid, attr);
  320. }
  321. static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
  322. unsigned int index, __always_unused void **context)
  323. {
  324. return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL);
  325. }
  326. __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
  327. int index)
  328. {
  329. struct ib_gid_attr attr;
  330. union ib_gid gid;
  331. if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
  332. return 0;
  333. if (!attr.ndev)
  334. return 0;
  335. dev_put(attr.ndev);
  336. if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
  337. return 0;
  338. return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
  339. }
  340. int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
  341. int index, enum ib_gid_type *gid_type)
  342. {
  343. struct ib_gid_attr attr;
  344. union ib_gid gid;
  345. int ret;
  346. ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
  347. if (ret)
  348. return ret;
  349. if (!attr.ndev)
  350. return -ENODEV;
  351. dev_put(attr.ndev);
  352. *gid_type = attr.gid_type;
  353. return 0;
  354. }
  355. static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
  356. {
  357. if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
  358. return !MLX5_CAP_GEN(dev->mdev, ib_virt);
  359. return 0;
  360. }
  361. enum {
  362. MLX5_VPORT_ACCESS_METHOD_MAD,
  363. MLX5_VPORT_ACCESS_METHOD_HCA,
  364. MLX5_VPORT_ACCESS_METHOD_NIC,
  365. };
  366. static int mlx5_get_vport_access_method(struct ib_device *ibdev)
  367. {
  368. if (mlx5_use_mad_ifc(to_mdev(ibdev)))
  369. return MLX5_VPORT_ACCESS_METHOD_MAD;
  370. if (mlx5_ib_port_link_layer(ibdev, 1) ==
  371. IB_LINK_LAYER_ETHERNET)
  372. return MLX5_VPORT_ACCESS_METHOD_NIC;
  373. return MLX5_VPORT_ACCESS_METHOD_HCA;
  374. }
  375. static void get_atomic_caps(struct mlx5_ib_dev *dev,
  376. struct ib_device_attr *props)
  377. {
  378. u8 tmp;
  379. u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
  380. u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
  381. u8 atomic_req_8B_endianness_mode =
  382. MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianness_mode);
  383. /* Check if HW supports 8 bytes standard atomic operations and capable
  384. * of host endianness respond
  385. */
  386. tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
  387. if (((atomic_operations & tmp) == tmp) &&
  388. (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
  389. (atomic_req_8B_endianness_mode)) {
  390. props->atomic_cap = IB_ATOMIC_HCA;
  391. } else {
  392. props->atomic_cap = IB_ATOMIC_NONE;
  393. }
  394. }
  395. static int mlx5_query_system_image_guid(struct ib_device *ibdev,
  396. __be64 *sys_image_guid)
  397. {
  398. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  399. struct mlx5_core_dev *mdev = dev->mdev;
  400. u64 tmp;
  401. int err;
  402. switch (mlx5_get_vport_access_method(ibdev)) {
  403. case MLX5_VPORT_ACCESS_METHOD_MAD:
  404. return mlx5_query_mad_ifc_system_image_guid(ibdev,
  405. sys_image_guid);
  406. case MLX5_VPORT_ACCESS_METHOD_HCA:
  407. err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
  408. break;
  409. case MLX5_VPORT_ACCESS_METHOD_NIC:
  410. err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
  411. break;
  412. default:
  413. return -EINVAL;
  414. }
  415. if (!err)
  416. *sys_image_guid = cpu_to_be64(tmp);
  417. return err;
  418. }
  419. static int mlx5_query_max_pkeys(struct ib_device *ibdev,
  420. u16 *max_pkeys)
  421. {
  422. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  423. struct mlx5_core_dev *mdev = dev->mdev;
  424. switch (mlx5_get_vport_access_method(ibdev)) {
  425. case MLX5_VPORT_ACCESS_METHOD_MAD:
  426. return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys);
  427. case MLX5_VPORT_ACCESS_METHOD_HCA:
  428. case MLX5_VPORT_ACCESS_METHOD_NIC:
  429. *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
  430. pkey_table_size));
  431. return 0;
  432. default:
  433. return -EINVAL;
  434. }
  435. }
  436. static int mlx5_query_vendor_id(struct ib_device *ibdev,
  437. u32 *vendor_id)
  438. {
  439. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  440. switch (mlx5_get_vport_access_method(ibdev)) {
  441. case MLX5_VPORT_ACCESS_METHOD_MAD:
  442. return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id);
  443. case MLX5_VPORT_ACCESS_METHOD_HCA:
  444. case MLX5_VPORT_ACCESS_METHOD_NIC:
  445. return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
  446. default:
  447. return -EINVAL;
  448. }
  449. }
  450. static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
  451. __be64 *node_guid)
  452. {
  453. u64 tmp;
  454. int err;
  455. switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
  456. case MLX5_VPORT_ACCESS_METHOD_MAD:
  457. return mlx5_query_mad_ifc_node_guid(dev, node_guid);
  458. case MLX5_VPORT_ACCESS_METHOD_HCA:
  459. err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
  460. break;
  461. case MLX5_VPORT_ACCESS_METHOD_NIC:
  462. err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
  463. break;
  464. default:
  465. return -EINVAL;
  466. }
  467. if (!err)
  468. *node_guid = cpu_to_be64(tmp);
  469. return err;
  470. }
  471. struct mlx5_reg_node_desc {
  472. u8 desc[IB_DEVICE_NODE_DESC_MAX];
  473. };
  474. static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
  475. {
  476. struct mlx5_reg_node_desc in;
  477. if (mlx5_use_mad_ifc(dev))
  478. return mlx5_query_mad_ifc_node_desc(dev, node_desc);
  479. memset(&in, 0, sizeof(in));
  480. return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
  481. sizeof(struct mlx5_reg_node_desc),
  482. MLX5_REG_NODE_DESC, 0, 0);
  483. }
  484. static int mlx5_ib_query_device(struct ib_device *ibdev,
  485. struct ib_device_attr *props,
  486. struct ib_udata *uhw)
  487. {
  488. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  489. struct mlx5_core_dev *mdev = dev->mdev;
  490. int err = -ENOMEM;
  491. int max_sq_desc;
  492. int max_rq_sg;
  493. int max_sq_sg;
  494. u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
  495. struct mlx5_ib_query_device_resp resp = {};
  496. size_t resp_len;
  497. u64 max_tso;
  498. resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
  499. if (uhw->outlen && uhw->outlen < resp_len)
  500. return -EINVAL;
  501. else
  502. resp.response_length = resp_len;
  503. if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
  504. return -EINVAL;
  505. memset(props, 0, sizeof(*props));
  506. err = mlx5_query_system_image_guid(ibdev,
  507. &props->sys_image_guid);
  508. if (err)
  509. return err;
  510. err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
  511. if (err)
  512. return err;
  513. err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
  514. if (err)
  515. return err;
  516. props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
  517. (fw_rev_min(dev->mdev) << 16) |
  518. fw_rev_sub(dev->mdev);
  519. props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
  520. IB_DEVICE_PORT_ACTIVE_EVENT |
  521. IB_DEVICE_SYS_IMAGE_GUID |
  522. IB_DEVICE_RC_RNR_NAK_GEN;
  523. if (MLX5_CAP_GEN(mdev, pkv))
  524. props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
  525. if (MLX5_CAP_GEN(mdev, qkv))
  526. props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
  527. if (MLX5_CAP_GEN(mdev, apm))
  528. props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
  529. if (MLX5_CAP_GEN(mdev, xrc))
  530. props->device_cap_flags |= IB_DEVICE_XRC;
  531. if (MLX5_CAP_GEN(mdev, imaicl)) {
  532. props->device_cap_flags |= IB_DEVICE_MEM_WINDOW |
  533. IB_DEVICE_MEM_WINDOW_TYPE_2B;
  534. props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
  535. /* We support 'Gappy' memory registration too */
  536. props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
  537. }
  538. props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
  539. if (MLX5_CAP_GEN(mdev, sho)) {
  540. props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
  541. /* At this stage no support for signature handover */
  542. props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
  543. IB_PROT_T10DIF_TYPE_2 |
  544. IB_PROT_T10DIF_TYPE_3;
  545. props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
  546. IB_GUARD_T10DIF_CSUM;
  547. }
  548. if (MLX5_CAP_GEN(mdev, block_lb_mc))
  549. props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
  550. if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
  551. if (MLX5_CAP_ETH(mdev, csum_cap)) {
  552. /* Legacy bit to support old userspace libraries */
  553. props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
  554. props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
  555. }
  556. if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
  557. props->raw_packet_caps |=
  558. IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
  559. if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
  560. max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
  561. if (max_tso) {
  562. resp.tso_caps.max_tso = 1 << max_tso;
  563. resp.tso_caps.supported_qpts |=
  564. 1 << IB_QPT_RAW_PACKET;
  565. resp.response_length += sizeof(resp.tso_caps);
  566. }
  567. }
  568. if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
  569. resp.rss_caps.rx_hash_function =
  570. MLX5_RX_HASH_FUNC_TOEPLITZ;
  571. resp.rss_caps.rx_hash_fields_mask =
  572. MLX5_RX_HASH_SRC_IPV4 |
  573. MLX5_RX_HASH_DST_IPV4 |
  574. MLX5_RX_HASH_SRC_IPV6 |
  575. MLX5_RX_HASH_DST_IPV6 |
  576. MLX5_RX_HASH_SRC_PORT_TCP |
  577. MLX5_RX_HASH_DST_PORT_TCP |
  578. MLX5_RX_HASH_SRC_PORT_UDP |
  579. MLX5_RX_HASH_DST_PORT_UDP;
  580. resp.response_length += sizeof(resp.rss_caps);
  581. }
  582. } else {
  583. if (field_avail(typeof(resp), tso_caps, uhw->outlen))
  584. resp.response_length += sizeof(resp.tso_caps);
  585. if (field_avail(typeof(resp), rss_caps, uhw->outlen))
  586. resp.response_length += sizeof(resp.rss_caps);
  587. }
  588. if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
  589. props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
  590. props->device_cap_flags |= IB_DEVICE_UD_TSO;
  591. }
  592. if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
  593. MLX5_CAP_GEN(dev->mdev, general_notification_event))
  594. props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP;
  595. if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
  596. MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap))
  597. props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
  598. if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
  599. MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
  600. /* Legacy bit to support old userspace libraries */
  601. props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
  602. props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
  603. }
  604. if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
  605. props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
  606. props->vendor_part_id = mdev->pdev->device;
  607. props->hw_ver = mdev->pdev->revision;
  608. props->max_mr_size = ~0ull;
  609. props->page_size_cap = ~(min_page_size - 1);
  610. props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
  611. props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
  612. max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
  613. sizeof(struct mlx5_wqe_data_seg);
  614. max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
  615. max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
  616. sizeof(struct mlx5_wqe_raddr_seg)) /
  617. sizeof(struct mlx5_wqe_data_seg);
  618. props->max_sge = min(max_rq_sg, max_sq_sg);
  619. props->max_sge_rd = MLX5_MAX_SGE_RD;
  620. props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
  621. props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
  622. props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
  623. props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
  624. props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
  625. props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
  626. props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
  627. props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
  628. props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
  629. props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
  630. props->max_srq_sge = max_rq_sg - 1;
  631. props->max_fast_reg_page_list_len =
  632. 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
  633. get_atomic_caps(dev, props);
  634. props->masked_atomic_cap = IB_ATOMIC_NONE;
  635. props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
  636. props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
  637. props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
  638. props->max_mcast_grp;
  639. props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
  640. props->max_ah = INT_MAX;
  641. props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
  642. props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
  643. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  644. if (MLX5_CAP_GEN(mdev, pg))
  645. props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
  646. props->odp_caps = dev->odp_caps;
  647. #endif
  648. if (MLX5_CAP_GEN(mdev, cd))
  649. props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
  650. if (!mlx5_core_is_pf(mdev))
  651. props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
  652. if (mlx5_ib_port_link_layer(ibdev, 1) ==
  653. IB_LINK_LAYER_ETHERNET) {
  654. props->rss_caps.max_rwq_indirection_tables =
  655. 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt);
  656. props->rss_caps.max_rwq_indirection_table_size =
  657. 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size);
  658. props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
  659. props->max_wq_type_rq =
  660. 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
  661. }
  662. if (MLX5_CAP_GEN(mdev, tag_matching)) {
  663. props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
  664. props->tm_caps.max_num_tags =
  665. (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
  666. props->tm_caps.flags = IB_TM_CAP_RC;
  667. props->tm_caps.max_ops =
  668. 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
  669. props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
  670. }
  671. if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
  672. resp.cqe_comp_caps.max_num =
  673. MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
  674. MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0;
  675. resp.cqe_comp_caps.supported_format =
  676. MLX5_IB_CQE_RES_FORMAT_HASH |
  677. MLX5_IB_CQE_RES_FORMAT_CSUM;
  678. resp.response_length += sizeof(resp.cqe_comp_caps);
  679. }
  680. if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) {
  681. if (MLX5_CAP_QOS(mdev, packet_pacing) &&
  682. MLX5_CAP_GEN(mdev, qos)) {
  683. resp.packet_pacing_caps.qp_rate_limit_max =
  684. MLX5_CAP_QOS(mdev, packet_pacing_max_rate);
  685. resp.packet_pacing_caps.qp_rate_limit_min =
  686. MLX5_CAP_QOS(mdev, packet_pacing_min_rate);
  687. resp.packet_pacing_caps.supported_qpts |=
  688. 1 << IB_QPT_RAW_PACKET;
  689. }
  690. resp.response_length += sizeof(resp.packet_pacing_caps);
  691. }
  692. if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
  693. uhw->outlen)) {
  694. if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe))
  695. resp.mlx5_ib_support_multi_pkt_send_wqes =
  696. MLX5_IB_ALLOW_MPW;
  697. if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
  698. resp.mlx5_ib_support_multi_pkt_send_wqes |=
  699. MLX5_IB_SUPPORT_EMPW;
  700. resp.response_length +=
  701. sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
  702. }
  703. if (field_avail(typeof(resp), reserved, uhw->outlen))
  704. resp.response_length += sizeof(resp.reserved);
  705. if (field_avail(typeof(resp), sw_parsing_caps,
  706. uhw->outlen)) {
  707. resp.response_length += sizeof(resp.sw_parsing_caps);
  708. if (MLX5_CAP_ETH(mdev, swp)) {
  709. resp.sw_parsing_caps.sw_parsing_offloads |=
  710. MLX5_IB_SW_PARSING;
  711. if (MLX5_CAP_ETH(mdev, swp_csum))
  712. resp.sw_parsing_caps.sw_parsing_offloads |=
  713. MLX5_IB_SW_PARSING_CSUM;
  714. if (MLX5_CAP_ETH(mdev, swp_lso))
  715. resp.sw_parsing_caps.sw_parsing_offloads |=
  716. MLX5_IB_SW_PARSING_LSO;
  717. if (resp.sw_parsing_caps.sw_parsing_offloads)
  718. resp.sw_parsing_caps.supported_qpts =
  719. BIT(IB_QPT_RAW_PACKET);
  720. }
  721. }
  722. if (uhw->outlen) {
  723. err = ib_copy_to_udata(uhw, &resp, resp.response_length);
  724. if (err)
  725. return err;
  726. }
  727. return 0;
  728. }
  729. enum mlx5_ib_width {
  730. MLX5_IB_WIDTH_1X = 1 << 0,
  731. MLX5_IB_WIDTH_2X = 1 << 1,
  732. MLX5_IB_WIDTH_4X = 1 << 2,
  733. MLX5_IB_WIDTH_8X = 1 << 3,
  734. MLX5_IB_WIDTH_12X = 1 << 4
  735. };
  736. static int translate_active_width(struct ib_device *ibdev, u8 active_width,
  737. u8 *ib_width)
  738. {
  739. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  740. int err = 0;
  741. if (active_width & MLX5_IB_WIDTH_1X) {
  742. *ib_width = IB_WIDTH_1X;
  743. } else if (active_width & MLX5_IB_WIDTH_2X) {
  744. mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n",
  745. (int)active_width);
  746. err = -EINVAL;
  747. } else if (active_width & MLX5_IB_WIDTH_4X) {
  748. *ib_width = IB_WIDTH_4X;
  749. } else if (active_width & MLX5_IB_WIDTH_8X) {
  750. *ib_width = IB_WIDTH_8X;
  751. } else if (active_width & MLX5_IB_WIDTH_12X) {
  752. *ib_width = IB_WIDTH_12X;
  753. } else {
  754. mlx5_ib_dbg(dev, "Invalid active_width %d\n",
  755. (int)active_width);
  756. err = -EINVAL;
  757. }
  758. return err;
  759. }
  760. static int mlx5_mtu_to_ib_mtu(int mtu)
  761. {
  762. switch (mtu) {
  763. case 256: return 1;
  764. case 512: return 2;
  765. case 1024: return 3;
  766. case 2048: return 4;
  767. case 4096: return 5;
  768. default:
  769. pr_warn("invalid mtu\n");
  770. return -1;
  771. }
  772. }
  773. enum ib_max_vl_num {
  774. __IB_MAX_VL_0 = 1,
  775. __IB_MAX_VL_0_1 = 2,
  776. __IB_MAX_VL_0_3 = 3,
  777. __IB_MAX_VL_0_7 = 4,
  778. __IB_MAX_VL_0_14 = 5,
  779. };
  780. enum mlx5_vl_hw_cap {
  781. MLX5_VL_HW_0 = 1,
  782. MLX5_VL_HW_0_1 = 2,
  783. MLX5_VL_HW_0_2 = 3,
  784. MLX5_VL_HW_0_3 = 4,
  785. MLX5_VL_HW_0_4 = 5,
  786. MLX5_VL_HW_0_5 = 6,
  787. MLX5_VL_HW_0_6 = 7,
  788. MLX5_VL_HW_0_7 = 8,
  789. MLX5_VL_HW_0_14 = 15
  790. };
  791. static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
  792. u8 *max_vl_num)
  793. {
  794. switch (vl_hw_cap) {
  795. case MLX5_VL_HW_0:
  796. *max_vl_num = __IB_MAX_VL_0;
  797. break;
  798. case MLX5_VL_HW_0_1:
  799. *max_vl_num = __IB_MAX_VL_0_1;
  800. break;
  801. case MLX5_VL_HW_0_3:
  802. *max_vl_num = __IB_MAX_VL_0_3;
  803. break;
  804. case MLX5_VL_HW_0_7:
  805. *max_vl_num = __IB_MAX_VL_0_7;
  806. break;
  807. case MLX5_VL_HW_0_14:
  808. *max_vl_num = __IB_MAX_VL_0_14;
  809. break;
  810. default:
  811. return -EINVAL;
  812. }
  813. return 0;
  814. }
  815. static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
  816. struct ib_port_attr *props)
  817. {
  818. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  819. struct mlx5_core_dev *mdev = dev->mdev;
  820. struct mlx5_hca_vport_context *rep;
  821. u16 max_mtu;
  822. u16 oper_mtu;
  823. int err;
  824. u8 ib_link_width_oper;
  825. u8 vl_hw_cap;
  826. rep = kzalloc(sizeof(*rep), GFP_KERNEL);
  827. if (!rep) {
  828. err = -ENOMEM;
  829. goto out;
  830. }
  831. /* props being zeroed by the caller, avoid zeroing it here */
  832. err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
  833. if (err)
  834. goto out;
  835. props->lid = rep->lid;
  836. props->lmc = rep->lmc;
  837. props->sm_lid = rep->sm_lid;
  838. props->sm_sl = rep->sm_sl;
  839. props->state = rep->vport_state;
  840. props->phys_state = rep->port_physical_state;
  841. props->port_cap_flags = rep->cap_mask1;
  842. props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
  843. props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
  844. props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
  845. props->bad_pkey_cntr = rep->pkey_violation_counter;
  846. props->qkey_viol_cntr = rep->qkey_violation_counter;
  847. props->subnet_timeout = rep->subnet_timeout;
  848. props->init_type_reply = rep->init_type_reply;
  849. props->grh_required = rep->grh_required;
  850. err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
  851. if (err)
  852. goto out;
  853. err = translate_active_width(ibdev, ib_link_width_oper,
  854. &props->active_width);
  855. if (err)
  856. goto out;
  857. err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
  858. if (err)
  859. goto out;
  860. mlx5_query_port_max_mtu(mdev, &max_mtu, port);
  861. props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu);
  862. mlx5_query_port_oper_mtu(mdev, &oper_mtu, port);
  863. props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu);
  864. err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port);
  865. if (err)
  866. goto out;
  867. err = translate_max_vl_num(ibdev, vl_hw_cap,
  868. &props->max_vl_num);
  869. out:
  870. kfree(rep);
  871. return err;
  872. }
  873. int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
  874. struct ib_port_attr *props)
  875. {
  876. unsigned int count;
  877. int ret;
  878. switch (mlx5_get_vport_access_method(ibdev)) {
  879. case MLX5_VPORT_ACCESS_METHOD_MAD:
  880. ret = mlx5_query_mad_ifc_port(ibdev, port, props);
  881. break;
  882. case MLX5_VPORT_ACCESS_METHOD_HCA:
  883. ret = mlx5_query_hca_port(ibdev, port, props);
  884. break;
  885. case MLX5_VPORT_ACCESS_METHOD_NIC:
  886. ret = mlx5_query_port_roce(ibdev, port, props);
  887. break;
  888. default:
  889. ret = -EINVAL;
  890. }
  891. if (!ret && props) {
  892. count = mlx5_core_reserved_gids_count(to_mdev(ibdev)->mdev);
  893. props->gid_tbl_len -= count;
  894. }
  895. return ret;
  896. }
  897. static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
  898. union ib_gid *gid)
  899. {
  900. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  901. struct mlx5_core_dev *mdev = dev->mdev;
  902. switch (mlx5_get_vport_access_method(ibdev)) {
  903. case MLX5_VPORT_ACCESS_METHOD_MAD:
  904. return mlx5_query_mad_ifc_gids(ibdev, port, index, gid);
  905. case MLX5_VPORT_ACCESS_METHOD_HCA:
  906. return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid);
  907. default:
  908. return -EINVAL;
  909. }
  910. }
  911. static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
  912. u16 *pkey)
  913. {
  914. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  915. struct mlx5_core_dev *mdev = dev->mdev;
  916. switch (mlx5_get_vport_access_method(ibdev)) {
  917. case MLX5_VPORT_ACCESS_METHOD_MAD:
  918. return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey);
  919. case MLX5_VPORT_ACCESS_METHOD_HCA:
  920. case MLX5_VPORT_ACCESS_METHOD_NIC:
  921. return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
  922. pkey);
  923. default:
  924. return -EINVAL;
  925. }
  926. }
  927. static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
  928. struct ib_device_modify *props)
  929. {
  930. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  931. struct mlx5_reg_node_desc in;
  932. struct mlx5_reg_node_desc out;
  933. int err;
  934. if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
  935. return -EOPNOTSUPP;
  936. if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
  937. return 0;
  938. /*
  939. * If possible, pass node desc to FW, so it can generate
  940. * a 144 trap. If cmd fails, just ignore.
  941. */
  942. memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
  943. err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
  944. sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
  945. if (err)
  946. return err;
  947. memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
  948. return err;
  949. }
  950. static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
  951. u32 value)
  952. {
  953. struct mlx5_hca_vport_context ctx = {};
  954. int err;
  955. err = mlx5_query_hca_vport_context(dev->mdev, 0,
  956. port_num, 0, &ctx);
  957. if (err)
  958. return err;
  959. if (~ctx.cap_mask1_perm & mask) {
  960. mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
  961. mask, ctx.cap_mask1_perm);
  962. return -EINVAL;
  963. }
  964. ctx.cap_mask1 = value;
  965. ctx.cap_mask1_perm = mask;
  966. err = mlx5_core_modify_hca_vport_context(dev->mdev, 0,
  967. port_num, 0, &ctx);
  968. return err;
  969. }
  970. static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
  971. struct ib_port_modify *props)
  972. {
  973. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  974. struct ib_port_attr attr;
  975. u32 tmp;
  976. int err;
  977. u32 change_mask;
  978. u32 value;
  979. bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
  980. IB_LINK_LAYER_INFINIBAND);
  981. /* CM layer calls ib_modify_port() regardless of the link layer. For
  982. * Ethernet ports, qkey violation and Port capabilities are meaningless.
  983. */
  984. if (!is_ib)
  985. return 0;
  986. if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
  987. change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
  988. value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
  989. return set_port_caps_atomic(dev, port, change_mask, value);
  990. }
  991. mutex_lock(&dev->cap_mask_mutex);
  992. err = ib_query_port(ibdev, port, &attr);
  993. if (err)
  994. goto out;
  995. tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
  996. ~props->clr_port_cap_mask;
  997. err = mlx5_set_port_caps(dev->mdev, port, tmp);
  998. out:
  999. mutex_unlock(&dev->cap_mask_mutex);
  1000. return err;
  1001. }
  1002. static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps)
  1003. {
  1004. mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n",
  1005. caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n");
  1006. }
  1007. static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
  1008. struct mlx5_ib_alloc_ucontext_req_v2 *req,
  1009. u32 *num_sys_pages)
  1010. {
  1011. int uars_per_sys_page;
  1012. int bfregs_per_sys_page;
  1013. int ref_bfregs = req->total_num_bfregs;
  1014. if (req->total_num_bfregs == 0)
  1015. return -EINVAL;
  1016. BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE);
  1017. BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE);
  1018. if (req->total_num_bfregs > MLX5_MAX_BFREGS)
  1019. return -ENOMEM;
  1020. uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k);
  1021. bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
  1022. req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
  1023. *num_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
  1024. if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
  1025. return -EINVAL;
  1026. mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, using %d sys pages\n",
  1027. MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
  1028. lib_uar_4k ? "yes" : "no", ref_bfregs,
  1029. req->total_num_bfregs, *num_sys_pages);
  1030. return 0;
  1031. }
  1032. static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
  1033. {
  1034. struct mlx5_bfreg_info *bfregi;
  1035. int err;
  1036. int i;
  1037. bfregi = &context->bfregi;
  1038. for (i = 0; i < bfregi->num_sys_pages; i++) {
  1039. err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
  1040. if (err)
  1041. goto error;
  1042. mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]);
  1043. }
  1044. return 0;
  1045. error:
  1046. for (--i; i >= 0; i--)
  1047. if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]))
  1048. mlx5_ib_warn(dev, "failed to free uar %d\n", i);
  1049. return err;
  1050. }
  1051. static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
  1052. {
  1053. struct mlx5_bfreg_info *bfregi;
  1054. int err;
  1055. int i;
  1056. bfregi = &context->bfregi;
  1057. for (i = 0; i < bfregi->num_sys_pages; i++) {
  1058. err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
  1059. if (err) {
  1060. mlx5_ib_warn(dev, "failed to free uar %d\n", i);
  1061. return err;
  1062. }
  1063. }
  1064. return 0;
  1065. }
  1066. static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
  1067. {
  1068. int err;
  1069. err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
  1070. if (err)
  1071. return err;
  1072. if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
  1073. !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
  1074. return err;
  1075. mutex_lock(&dev->lb_mutex);
  1076. dev->user_td++;
  1077. if (dev->user_td == 2)
  1078. err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
  1079. mutex_unlock(&dev->lb_mutex);
  1080. return err;
  1081. }
  1082. static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
  1083. {
  1084. mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
  1085. if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
  1086. !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
  1087. return;
  1088. mutex_lock(&dev->lb_mutex);
  1089. dev->user_td--;
  1090. if (dev->user_td < 2)
  1091. mlx5_nic_vport_update_local_lb(dev->mdev, false);
  1092. mutex_unlock(&dev->lb_mutex);
  1093. }
  1094. static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
  1095. struct ib_udata *udata)
  1096. {
  1097. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  1098. struct mlx5_ib_alloc_ucontext_req_v2 req = {};
  1099. struct mlx5_ib_alloc_ucontext_resp resp = {};
  1100. struct mlx5_ib_ucontext *context;
  1101. struct mlx5_bfreg_info *bfregi;
  1102. int ver;
  1103. int err;
  1104. size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
  1105. max_cqe_version);
  1106. bool lib_uar_4k;
  1107. if (!dev->ib_active)
  1108. return ERR_PTR(-EAGAIN);
  1109. if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
  1110. ver = 0;
  1111. else if (udata->inlen >= min_req_v2)
  1112. ver = 2;
  1113. else
  1114. return ERR_PTR(-EINVAL);
  1115. err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
  1116. if (err)
  1117. return ERR_PTR(err);
  1118. if (req.flags)
  1119. return ERR_PTR(-EINVAL);
  1120. if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
  1121. return ERR_PTR(-EOPNOTSUPP);
  1122. req.total_num_bfregs = ALIGN(req.total_num_bfregs,
  1123. MLX5_NON_FP_BFREGS_PER_UAR);
  1124. if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
  1125. return ERR_PTR(-EINVAL);
  1126. resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
  1127. if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
  1128. resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
  1129. resp.cache_line_size = cache_line_size();
  1130. resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
  1131. resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
  1132. resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
  1133. resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
  1134. resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
  1135. resp.cqe_version = min_t(__u8,
  1136. (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
  1137. req.max_cqe_version);
  1138. resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
  1139. MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
  1140. resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
  1141. MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
  1142. resp.response_length = min(offsetof(typeof(resp), response_length) +
  1143. sizeof(resp.response_length), udata->outlen);
  1144. context = kzalloc(sizeof(*context), GFP_KERNEL);
  1145. if (!context)
  1146. return ERR_PTR(-ENOMEM);
  1147. lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
  1148. bfregi = &context->bfregi;
  1149. /* updates req->total_num_bfregs */
  1150. err = calc_total_bfregs(dev, lib_uar_4k, &req, &bfregi->num_sys_pages);
  1151. if (err)
  1152. goto out_ctx;
  1153. mutex_init(&bfregi->lock);
  1154. bfregi->lib_uar_4k = lib_uar_4k;
  1155. bfregi->count = kcalloc(req.total_num_bfregs, sizeof(*bfregi->count),
  1156. GFP_KERNEL);
  1157. if (!bfregi->count) {
  1158. err = -ENOMEM;
  1159. goto out_ctx;
  1160. }
  1161. bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
  1162. sizeof(*bfregi->sys_pages),
  1163. GFP_KERNEL);
  1164. if (!bfregi->sys_pages) {
  1165. err = -ENOMEM;
  1166. goto out_count;
  1167. }
  1168. err = allocate_uars(dev, context);
  1169. if (err)
  1170. goto out_sys_pages;
  1171. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1172. context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
  1173. #endif
  1174. context->upd_xlt_page = __get_free_page(GFP_KERNEL);
  1175. if (!context->upd_xlt_page) {
  1176. err = -ENOMEM;
  1177. goto out_uars;
  1178. }
  1179. mutex_init(&context->upd_xlt_page_mutex);
  1180. if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
  1181. err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
  1182. if (err)
  1183. goto out_page;
  1184. }
  1185. INIT_LIST_HEAD(&context->vma_private_list);
  1186. INIT_LIST_HEAD(&context->db_page_list);
  1187. mutex_init(&context->db_page_mutex);
  1188. resp.tot_bfregs = req.total_num_bfregs;
  1189. resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
  1190. if (field_avail(typeof(resp), cqe_version, udata->outlen))
  1191. resp.response_length += sizeof(resp.cqe_version);
  1192. if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
  1193. resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
  1194. MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
  1195. resp.response_length += sizeof(resp.cmds_supp_uhw);
  1196. }
  1197. if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) {
  1198. if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
  1199. mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
  1200. resp.eth_min_inline++;
  1201. }
  1202. resp.response_length += sizeof(resp.eth_min_inline);
  1203. }
  1204. /*
  1205. * We don't want to expose information from the PCI bar that is located
  1206. * after 4096 bytes, so if the arch only supports larger pages, let's
  1207. * pretend we don't support reading the HCA's core clock. This is also
  1208. * forced by mmap function.
  1209. */
  1210. if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
  1211. if (PAGE_SIZE <= 4096) {
  1212. resp.comp_mask |=
  1213. MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
  1214. resp.hca_core_clock_offset =
  1215. offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
  1216. }
  1217. resp.response_length += sizeof(resp.hca_core_clock_offset) +
  1218. sizeof(resp.reserved2);
  1219. }
  1220. if (field_avail(typeof(resp), log_uar_size, udata->outlen))
  1221. resp.response_length += sizeof(resp.log_uar_size);
  1222. if (field_avail(typeof(resp), num_uars_per_page, udata->outlen))
  1223. resp.response_length += sizeof(resp.num_uars_per_page);
  1224. err = ib_copy_to_udata(udata, &resp, resp.response_length);
  1225. if (err)
  1226. goto out_td;
  1227. bfregi->ver = ver;
  1228. bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
  1229. context->cqe_version = resp.cqe_version;
  1230. context->lib_caps = req.lib_caps;
  1231. print_lib_caps(dev, context->lib_caps);
  1232. return &context->ibucontext;
  1233. out_td:
  1234. if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
  1235. mlx5_ib_dealloc_transport_domain(dev, context->tdn);
  1236. out_page:
  1237. free_page(context->upd_xlt_page);
  1238. out_uars:
  1239. deallocate_uars(dev, context);
  1240. out_sys_pages:
  1241. kfree(bfregi->sys_pages);
  1242. out_count:
  1243. kfree(bfregi->count);
  1244. out_ctx:
  1245. kfree(context);
  1246. return ERR_PTR(err);
  1247. }
  1248. static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
  1249. {
  1250. struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
  1251. struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
  1252. struct mlx5_bfreg_info *bfregi;
  1253. bfregi = &context->bfregi;
  1254. if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
  1255. mlx5_ib_dealloc_transport_domain(dev, context->tdn);
  1256. free_page(context->upd_xlt_page);
  1257. deallocate_uars(dev, context);
  1258. kfree(bfregi->sys_pages);
  1259. kfree(bfregi->count);
  1260. kfree(context);
  1261. return 0;
  1262. }
  1263. static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
  1264. struct mlx5_bfreg_info *bfregi,
  1265. int idx)
  1266. {
  1267. int fw_uars_per_page;
  1268. fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
  1269. return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) +
  1270. bfregi->sys_pages[idx] / fw_uars_per_page;
  1271. }
  1272. static int get_command(unsigned long offset)
  1273. {
  1274. return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
  1275. }
  1276. static int get_arg(unsigned long offset)
  1277. {
  1278. return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
  1279. }
  1280. static int get_index(unsigned long offset)
  1281. {
  1282. return get_arg(offset);
  1283. }
  1284. static void mlx5_ib_vma_open(struct vm_area_struct *area)
  1285. {
  1286. /* vma_open is called when a new VMA is created on top of our VMA. This
  1287. * is done through either mremap flow or split_vma (usually due to
  1288. * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
  1289. * as this VMA is strongly hardware related. Therefore we set the
  1290. * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
  1291. * calling us again and trying to do incorrect actions. We assume that
  1292. * the original VMA size is exactly a single page, and therefore all
  1293. * "splitting" operation will not happen to it.
  1294. */
  1295. area->vm_ops = NULL;
  1296. }
  1297. static void mlx5_ib_vma_close(struct vm_area_struct *area)
  1298. {
  1299. struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
  1300. /* It's guaranteed that all VMAs opened on a FD are closed before the
  1301. * file itself is closed, therefore no sync is needed with the regular
  1302. * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
  1303. * However need a sync with accessing the vma as part of
  1304. * mlx5_ib_disassociate_ucontext.
  1305. * The close operation is usually called under mm->mmap_sem except when
  1306. * process is exiting.
  1307. * The exiting case is handled explicitly as part of
  1308. * mlx5_ib_disassociate_ucontext.
  1309. */
  1310. mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
  1311. /* setting the vma context pointer to null in the mlx5_ib driver's
  1312. * private data, to protect a race condition in
  1313. * mlx5_ib_disassociate_ucontext().
  1314. */
  1315. mlx5_ib_vma_priv_data->vma = NULL;
  1316. list_del(&mlx5_ib_vma_priv_data->list);
  1317. kfree(mlx5_ib_vma_priv_data);
  1318. }
  1319. static const struct vm_operations_struct mlx5_ib_vm_ops = {
  1320. .open = mlx5_ib_vma_open,
  1321. .close = mlx5_ib_vma_close
  1322. };
  1323. static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
  1324. struct mlx5_ib_ucontext *ctx)
  1325. {
  1326. struct mlx5_ib_vma_private_data *vma_prv;
  1327. struct list_head *vma_head = &ctx->vma_private_list;
  1328. vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
  1329. if (!vma_prv)
  1330. return -ENOMEM;
  1331. vma_prv->vma = vma;
  1332. vma->vm_private_data = vma_prv;
  1333. vma->vm_ops = &mlx5_ib_vm_ops;
  1334. list_add(&vma_prv->list, vma_head);
  1335. return 0;
  1336. }
  1337. static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
  1338. {
  1339. int ret;
  1340. struct vm_area_struct *vma;
  1341. struct mlx5_ib_vma_private_data *vma_private, *n;
  1342. struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
  1343. struct task_struct *owning_process = NULL;
  1344. struct mm_struct *owning_mm = NULL;
  1345. owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
  1346. if (!owning_process)
  1347. return;
  1348. owning_mm = get_task_mm(owning_process);
  1349. if (!owning_mm) {
  1350. pr_info("no mm, disassociate ucontext is pending task termination\n");
  1351. while (1) {
  1352. put_task_struct(owning_process);
  1353. usleep_range(1000, 2000);
  1354. owning_process = get_pid_task(ibcontext->tgid,
  1355. PIDTYPE_PID);
  1356. if (!owning_process ||
  1357. owning_process->state == TASK_DEAD) {
  1358. pr_info("disassociate ucontext done, task was terminated\n");
  1359. /* in case task was dead need to release the
  1360. * task struct.
  1361. */
  1362. if (owning_process)
  1363. put_task_struct(owning_process);
  1364. return;
  1365. }
  1366. }
  1367. }
  1368. /* need to protect from a race on closing the vma as part of
  1369. * mlx5_ib_vma_close.
  1370. */
  1371. down_write(&owning_mm->mmap_sem);
  1372. list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
  1373. list) {
  1374. vma = vma_private->vma;
  1375. ret = zap_vma_ptes(vma, vma->vm_start,
  1376. PAGE_SIZE);
  1377. WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__);
  1378. /* context going to be destroyed, should
  1379. * not access ops any more.
  1380. */
  1381. vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
  1382. vma->vm_ops = NULL;
  1383. list_del(&vma_private->list);
  1384. kfree(vma_private);
  1385. }
  1386. up_write(&owning_mm->mmap_sem);
  1387. mmput(owning_mm);
  1388. put_task_struct(owning_process);
  1389. }
  1390. static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
  1391. {
  1392. switch (cmd) {
  1393. case MLX5_IB_MMAP_WC_PAGE:
  1394. return "WC";
  1395. case MLX5_IB_MMAP_REGULAR_PAGE:
  1396. return "best effort WC";
  1397. case MLX5_IB_MMAP_NC_PAGE:
  1398. return "NC";
  1399. default:
  1400. return NULL;
  1401. }
  1402. }
  1403. static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
  1404. struct vm_area_struct *vma,
  1405. struct mlx5_ib_ucontext *context)
  1406. {
  1407. struct mlx5_bfreg_info *bfregi = &context->bfregi;
  1408. int err;
  1409. unsigned long idx;
  1410. phys_addr_t pfn, pa;
  1411. pgprot_t prot;
  1412. int uars_per_page;
  1413. if (vma->vm_end - vma->vm_start != PAGE_SIZE)
  1414. return -EINVAL;
  1415. uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k);
  1416. idx = get_index(vma->vm_pgoff);
  1417. if (idx % uars_per_page ||
  1418. idx * uars_per_page >= bfregi->num_sys_pages) {
  1419. mlx5_ib_warn(dev, "invalid uar index %lu\n", idx);
  1420. return -EINVAL;
  1421. }
  1422. switch (cmd) {
  1423. case MLX5_IB_MMAP_WC_PAGE:
  1424. /* Some architectures don't support WC memory */
  1425. #if defined(CONFIG_X86)
  1426. if (!pat_enabled())
  1427. return -EPERM;
  1428. #elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU)))
  1429. return -EPERM;
  1430. #endif
  1431. /* fall through */
  1432. case MLX5_IB_MMAP_REGULAR_PAGE:
  1433. /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
  1434. prot = pgprot_writecombine(vma->vm_page_prot);
  1435. break;
  1436. case MLX5_IB_MMAP_NC_PAGE:
  1437. prot = pgprot_noncached(vma->vm_page_prot);
  1438. break;
  1439. default:
  1440. return -EINVAL;
  1441. }
  1442. pfn = uar_index2pfn(dev, bfregi, idx);
  1443. mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
  1444. vma->vm_page_prot = prot;
  1445. err = io_remap_pfn_range(vma, vma->vm_start, pfn,
  1446. PAGE_SIZE, vma->vm_page_prot);
  1447. if (err) {
  1448. mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n",
  1449. err, vma->vm_start, &pfn, mmap_cmd2str(cmd));
  1450. return -EAGAIN;
  1451. }
  1452. pa = pfn << PAGE_SHIFT;
  1453. mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
  1454. vma->vm_start, &pa);
  1455. return mlx5_ib_set_vma_data(vma, context);
  1456. }
  1457. static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
  1458. {
  1459. struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
  1460. struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
  1461. unsigned long command;
  1462. phys_addr_t pfn;
  1463. command = get_command(vma->vm_pgoff);
  1464. switch (command) {
  1465. case MLX5_IB_MMAP_WC_PAGE:
  1466. case MLX5_IB_MMAP_NC_PAGE:
  1467. case MLX5_IB_MMAP_REGULAR_PAGE:
  1468. return uar_mmap(dev, command, vma, context);
  1469. case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
  1470. return -ENOSYS;
  1471. case MLX5_IB_MMAP_CORE_CLOCK:
  1472. if (vma->vm_end - vma->vm_start != PAGE_SIZE)
  1473. return -EINVAL;
  1474. if (vma->vm_flags & VM_WRITE)
  1475. return -EPERM;
  1476. /* Don't expose to user-space information it shouldn't have */
  1477. if (PAGE_SIZE > 4096)
  1478. return -EOPNOTSUPP;
  1479. vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  1480. pfn = (dev->mdev->iseg_base +
  1481. offsetof(struct mlx5_init_seg, internal_timer_h)) >>
  1482. PAGE_SHIFT;
  1483. if (io_remap_pfn_range(vma, vma->vm_start, pfn,
  1484. PAGE_SIZE, vma->vm_page_prot))
  1485. return -EAGAIN;
  1486. mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
  1487. vma->vm_start,
  1488. (unsigned long long)pfn << PAGE_SHIFT);
  1489. break;
  1490. default:
  1491. return -EINVAL;
  1492. }
  1493. return 0;
  1494. }
  1495. static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
  1496. struct ib_ucontext *context,
  1497. struct ib_udata *udata)
  1498. {
  1499. struct mlx5_ib_alloc_pd_resp resp;
  1500. struct mlx5_ib_pd *pd;
  1501. int err;
  1502. pd = kmalloc(sizeof(*pd), GFP_KERNEL);
  1503. if (!pd)
  1504. return ERR_PTR(-ENOMEM);
  1505. err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
  1506. if (err) {
  1507. kfree(pd);
  1508. return ERR_PTR(err);
  1509. }
  1510. if (context) {
  1511. resp.pdn = pd->pdn;
  1512. if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
  1513. mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
  1514. kfree(pd);
  1515. return ERR_PTR(-EFAULT);
  1516. }
  1517. }
  1518. return &pd->ibpd;
  1519. }
  1520. static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
  1521. {
  1522. struct mlx5_ib_dev *mdev = to_mdev(pd->device);
  1523. struct mlx5_ib_pd *mpd = to_mpd(pd);
  1524. mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
  1525. kfree(mpd);
  1526. return 0;
  1527. }
  1528. enum {
  1529. MATCH_CRITERIA_ENABLE_OUTER_BIT,
  1530. MATCH_CRITERIA_ENABLE_MISC_BIT,
  1531. MATCH_CRITERIA_ENABLE_INNER_BIT
  1532. };
  1533. #define HEADER_IS_ZERO(match_criteria, headers) \
  1534. !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
  1535. 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
  1536. static u8 get_match_criteria_enable(u32 *match_criteria)
  1537. {
  1538. u8 match_criteria_enable;
  1539. match_criteria_enable =
  1540. (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
  1541. MATCH_CRITERIA_ENABLE_OUTER_BIT;
  1542. match_criteria_enable |=
  1543. (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
  1544. MATCH_CRITERIA_ENABLE_MISC_BIT;
  1545. match_criteria_enable |=
  1546. (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
  1547. MATCH_CRITERIA_ENABLE_INNER_BIT;
  1548. return match_criteria_enable;
  1549. }
  1550. static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
  1551. {
  1552. MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
  1553. MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
  1554. }
  1555. static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val,
  1556. bool inner)
  1557. {
  1558. if (inner) {
  1559. MLX5_SET(fte_match_set_misc,
  1560. misc_c, inner_ipv6_flow_label, mask);
  1561. MLX5_SET(fte_match_set_misc,
  1562. misc_v, inner_ipv6_flow_label, val);
  1563. } else {
  1564. MLX5_SET(fte_match_set_misc,
  1565. misc_c, outer_ipv6_flow_label, mask);
  1566. MLX5_SET(fte_match_set_misc,
  1567. misc_v, outer_ipv6_flow_label, val);
  1568. }
  1569. }
  1570. static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
  1571. {
  1572. MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
  1573. MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
  1574. MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
  1575. MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
  1576. }
  1577. #define LAST_ETH_FIELD vlan_tag
  1578. #define LAST_IB_FIELD sl
  1579. #define LAST_IPV4_FIELD tos
  1580. #define LAST_IPV6_FIELD traffic_class
  1581. #define LAST_TCP_UDP_FIELD src_port
  1582. #define LAST_TUNNEL_FIELD tunnel_id
  1583. #define LAST_FLOW_TAG_FIELD tag_id
  1584. #define LAST_DROP_FIELD size
  1585. /* Field is the last supported field */
  1586. #define FIELDS_NOT_SUPPORTED(filter, field)\
  1587. memchr_inv((void *)&filter.field +\
  1588. sizeof(filter.field), 0,\
  1589. sizeof(filter) -\
  1590. offsetof(typeof(filter), field) -\
  1591. sizeof(filter.field))
  1592. #define IPV4_VERSION 4
  1593. #define IPV6_VERSION 6
  1594. static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
  1595. u32 *match_v, const union ib_flow_spec *ib_spec,
  1596. u32 *tag_id, bool *is_drop)
  1597. {
  1598. void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
  1599. misc_parameters);
  1600. void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
  1601. misc_parameters);
  1602. void *headers_c;
  1603. void *headers_v;
  1604. int match_ipv;
  1605. if (ib_spec->type & IB_FLOW_SPEC_INNER) {
  1606. headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
  1607. inner_headers);
  1608. headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
  1609. inner_headers);
  1610. match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
  1611. ft_field_support.inner_ip_version);
  1612. } else {
  1613. headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
  1614. outer_headers);
  1615. headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
  1616. outer_headers);
  1617. match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
  1618. ft_field_support.outer_ip_version);
  1619. }
  1620. switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
  1621. case IB_FLOW_SPEC_ETH:
  1622. if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
  1623. return -EOPNOTSUPP;
  1624. ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
  1625. dmac_47_16),
  1626. ib_spec->eth.mask.dst_mac);
  1627. ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
  1628. dmac_47_16),
  1629. ib_spec->eth.val.dst_mac);
  1630. ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
  1631. smac_47_16),
  1632. ib_spec->eth.mask.src_mac);
  1633. ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
  1634. smac_47_16),
  1635. ib_spec->eth.val.src_mac);
  1636. if (ib_spec->eth.mask.vlan_tag) {
  1637. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1638. cvlan_tag, 1);
  1639. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1640. cvlan_tag, 1);
  1641. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1642. first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
  1643. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1644. first_vid, ntohs(ib_spec->eth.val.vlan_tag));
  1645. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1646. first_cfi,
  1647. ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
  1648. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1649. first_cfi,
  1650. ntohs(ib_spec->eth.val.vlan_tag) >> 12);
  1651. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1652. first_prio,
  1653. ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
  1654. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1655. first_prio,
  1656. ntohs(ib_spec->eth.val.vlan_tag) >> 13);
  1657. }
  1658. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1659. ethertype, ntohs(ib_spec->eth.mask.ether_type));
  1660. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1661. ethertype, ntohs(ib_spec->eth.val.ether_type));
  1662. break;
  1663. case IB_FLOW_SPEC_IPV4:
  1664. if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
  1665. return -EOPNOTSUPP;
  1666. if (match_ipv) {
  1667. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1668. ip_version, 0xf);
  1669. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1670. ip_version, IPV4_VERSION);
  1671. } else {
  1672. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1673. ethertype, 0xffff);
  1674. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1675. ethertype, ETH_P_IP);
  1676. }
  1677. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
  1678. src_ipv4_src_ipv6.ipv4_layout.ipv4),
  1679. &ib_spec->ipv4.mask.src_ip,
  1680. sizeof(ib_spec->ipv4.mask.src_ip));
  1681. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
  1682. src_ipv4_src_ipv6.ipv4_layout.ipv4),
  1683. &ib_spec->ipv4.val.src_ip,
  1684. sizeof(ib_spec->ipv4.val.src_ip));
  1685. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
  1686. dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
  1687. &ib_spec->ipv4.mask.dst_ip,
  1688. sizeof(ib_spec->ipv4.mask.dst_ip));
  1689. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
  1690. dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
  1691. &ib_spec->ipv4.val.dst_ip,
  1692. sizeof(ib_spec->ipv4.val.dst_ip));
  1693. set_tos(headers_c, headers_v,
  1694. ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
  1695. set_proto(headers_c, headers_v,
  1696. ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
  1697. break;
  1698. case IB_FLOW_SPEC_IPV6:
  1699. if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
  1700. return -EOPNOTSUPP;
  1701. if (match_ipv) {
  1702. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1703. ip_version, 0xf);
  1704. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1705. ip_version, IPV6_VERSION);
  1706. } else {
  1707. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1708. ethertype, 0xffff);
  1709. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1710. ethertype, ETH_P_IPV6);
  1711. }
  1712. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
  1713. src_ipv4_src_ipv6.ipv6_layout.ipv6),
  1714. &ib_spec->ipv6.mask.src_ip,
  1715. sizeof(ib_spec->ipv6.mask.src_ip));
  1716. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
  1717. src_ipv4_src_ipv6.ipv6_layout.ipv6),
  1718. &ib_spec->ipv6.val.src_ip,
  1719. sizeof(ib_spec->ipv6.val.src_ip));
  1720. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
  1721. dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
  1722. &ib_spec->ipv6.mask.dst_ip,
  1723. sizeof(ib_spec->ipv6.mask.dst_ip));
  1724. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
  1725. dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
  1726. &ib_spec->ipv6.val.dst_ip,
  1727. sizeof(ib_spec->ipv6.val.dst_ip));
  1728. set_tos(headers_c, headers_v,
  1729. ib_spec->ipv6.mask.traffic_class,
  1730. ib_spec->ipv6.val.traffic_class);
  1731. set_proto(headers_c, headers_v,
  1732. ib_spec->ipv6.mask.next_hdr,
  1733. ib_spec->ipv6.val.next_hdr);
  1734. set_flow_label(misc_params_c, misc_params_v,
  1735. ntohl(ib_spec->ipv6.mask.flow_label),
  1736. ntohl(ib_spec->ipv6.val.flow_label),
  1737. ib_spec->type & IB_FLOW_SPEC_INNER);
  1738. break;
  1739. case IB_FLOW_SPEC_TCP:
  1740. if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
  1741. LAST_TCP_UDP_FIELD))
  1742. return -EOPNOTSUPP;
  1743. MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
  1744. 0xff);
  1745. MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
  1746. IPPROTO_TCP);
  1747. MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
  1748. ntohs(ib_spec->tcp_udp.mask.src_port));
  1749. MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
  1750. ntohs(ib_spec->tcp_udp.val.src_port));
  1751. MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
  1752. ntohs(ib_spec->tcp_udp.mask.dst_port));
  1753. MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
  1754. ntohs(ib_spec->tcp_udp.val.dst_port));
  1755. break;
  1756. case IB_FLOW_SPEC_UDP:
  1757. if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
  1758. LAST_TCP_UDP_FIELD))
  1759. return -EOPNOTSUPP;
  1760. MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
  1761. 0xff);
  1762. MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
  1763. IPPROTO_UDP);
  1764. MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
  1765. ntohs(ib_spec->tcp_udp.mask.src_port));
  1766. MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
  1767. ntohs(ib_spec->tcp_udp.val.src_port));
  1768. MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
  1769. ntohs(ib_spec->tcp_udp.mask.dst_port));
  1770. MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
  1771. ntohs(ib_spec->tcp_udp.val.dst_port));
  1772. break;
  1773. case IB_FLOW_SPEC_VXLAN_TUNNEL:
  1774. if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
  1775. LAST_TUNNEL_FIELD))
  1776. return -EOPNOTSUPP;
  1777. MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
  1778. ntohl(ib_spec->tunnel.mask.tunnel_id));
  1779. MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
  1780. ntohl(ib_spec->tunnel.val.tunnel_id));
  1781. break;
  1782. case IB_FLOW_SPEC_ACTION_TAG:
  1783. if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
  1784. LAST_FLOW_TAG_FIELD))
  1785. return -EOPNOTSUPP;
  1786. if (ib_spec->flow_tag.tag_id >= BIT(24))
  1787. return -EINVAL;
  1788. *tag_id = ib_spec->flow_tag.tag_id;
  1789. break;
  1790. case IB_FLOW_SPEC_ACTION_DROP:
  1791. if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
  1792. LAST_DROP_FIELD))
  1793. return -EOPNOTSUPP;
  1794. *is_drop = true;
  1795. break;
  1796. default:
  1797. return -EINVAL;
  1798. }
  1799. return 0;
  1800. }
  1801. /* If a flow could catch both multicast and unicast packets,
  1802. * it won't fall into the multicast flow steering table and this rule
  1803. * could steal other multicast packets.
  1804. */
  1805. static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
  1806. {
  1807. union ib_flow_spec *flow_spec;
  1808. if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
  1809. ib_attr->num_of_specs < 1)
  1810. return false;
  1811. flow_spec = (union ib_flow_spec *)(ib_attr + 1);
  1812. if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
  1813. struct ib_flow_spec_ipv4 *ipv4_spec;
  1814. ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
  1815. if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
  1816. return true;
  1817. return false;
  1818. }
  1819. if (flow_spec->type == IB_FLOW_SPEC_ETH) {
  1820. struct ib_flow_spec_eth *eth_spec;
  1821. eth_spec = (struct ib_flow_spec_eth *)flow_spec;
  1822. return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
  1823. is_multicast_ether_addr(eth_spec->val.dst_mac);
  1824. }
  1825. return false;
  1826. }
  1827. static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
  1828. const struct ib_flow_attr *flow_attr,
  1829. bool check_inner)
  1830. {
  1831. union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
  1832. int match_ipv = check_inner ?
  1833. MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
  1834. ft_field_support.inner_ip_version) :
  1835. MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
  1836. ft_field_support.outer_ip_version);
  1837. int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
  1838. bool ipv4_spec_valid, ipv6_spec_valid;
  1839. unsigned int ip_spec_type = 0;
  1840. bool has_ethertype = false;
  1841. unsigned int spec_index;
  1842. bool mask_valid = true;
  1843. u16 eth_type = 0;
  1844. bool type_valid;
  1845. /* Validate that ethertype is correct */
  1846. for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
  1847. if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
  1848. ib_spec->eth.mask.ether_type) {
  1849. mask_valid = (ib_spec->eth.mask.ether_type ==
  1850. htons(0xffff));
  1851. has_ethertype = true;
  1852. eth_type = ntohs(ib_spec->eth.val.ether_type);
  1853. } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
  1854. (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
  1855. ip_spec_type = ib_spec->type;
  1856. }
  1857. ib_spec = (void *)ib_spec + ib_spec->size;
  1858. }
  1859. type_valid = (!has_ethertype) || (!ip_spec_type);
  1860. if (!type_valid && mask_valid) {
  1861. ipv4_spec_valid = (eth_type == ETH_P_IP) &&
  1862. (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
  1863. ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
  1864. (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
  1865. type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
  1866. (((eth_type == ETH_P_MPLS_UC) ||
  1867. (eth_type == ETH_P_MPLS_MC)) && match_ipv);
  1868. }
  1869. return type_valid;
  1870. }
  1871. static bool is_valid_attr(struct mlx5_core_dev *mdev,
  1872. const struct ib_flow_attr *flow_attr)
  1873. {
  1874. return is_valid_ethertype(mdev, flow_attr, false) &&
  1875. is_valid_ethertype(mdev, flow_attr, true);
  1876. }
  1877. static void put_flow_table(struct mlx5_ib_dev *dev,
  1878. struct mlx5_ib_flow_prio *prio, bool ft_added)
  1879. {
  1880. prio->refcount -= !!ft_added;
  1881. if (!prio->refcount) {
  1882. mlx5_destroy_flow_table(prio->flow_table);
  1883. prio->flow_table = NULL;
  1884. }
  1885. }
  1886. static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
  1887. {
  1888. struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
  1889. struct mlx5_ib_flow_handler *handler = container_of(flow_id,
  1890. struct mlx5_ib_flow_handler,
  1891. ibflow);
  1892. struct mlx5_ib_flow_handler *iter, *tmp;
  1893. mutex_lock(&dev->flow_db.lock);
  1894. list_for_each_entry_safe(iter, tmp, &handler->list, list) {
  1895. mlx5_del_flow_rules(iter->rule);
  1896. put_flow_table(dev, iter->prio, true);
  1897. list_del(&iter->list);
  1898. kfree(iter);
  1899. }
  1900. mlx5_del_flow_rules(handler->rule);
  1901. put_flow_table(dev, handler->prio, true);
  1902. mutex_unlock(&dev->flow_db.lock);
  1903. kfree(handler);
  1904. return 0;
  1905. }
  1906. static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
  1907. {
  1908. priority *= 2;
  1909. if (!dont_trap)
  1910. priority++;
  1911. return priority;
  1912. }
  1913. enum flow_table_type {
  1914. MLX5_IB_FT_RX,
  1915. MLX5_IB_FT_TX
  1916. };
  1917. #define MLX5_FS_MAX_TYPES 6
  1918. #define MLX5_FS_MAX_ENTRIES BIT(16)
  1919. static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
  1920. struct ib_flow_attr *flow_attr,
  1921. enum flow_table_type ft_type)
  1922. {
  1923. bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
  1924. struct mlx5_flow_namespace *ns = NULL;
  1925. struct mlx5_ib_flow_prio *prio;
  1926. struct mlx5_flow_table *ft;
  1927. int max_table_size;
  1928. int num_entries;
  1929. int num_groups;
  1930. int priority;
  1931. int err = 0;
  1932. max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
  1933. log_max_ft_size));
  1934. if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
  1935. if (flow_is_multicast_only(flow_attr) &&
  1936. !dont_trap)
  1937. priority = MLX5_IB_FLOW_MCAST_PRIO;
  1938. else
  1939. priority = ib_prio_to_core_prio(flow_attr->priority,
  1940. dont_trap);
  1941. ns = mlx5_get_flow_namespace(dev->mdev,
  1942. MLX5_FLOW_NAMESPACE_BYPASS);
  1943. num_entries = MLX5_FS_MAX_ENTRIES;
  1944. num_groups = MLX5_FS_MAX_TYPES;
  1945. prio = &dev->flow_db.prios[priority];
  1946. } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
  1947. flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
  1948. ns = mlx5_get_flow_namespace(dev->mdev,
  1949. MLX5_FLOW_NAMESPACE_LEFTOVERS);
  1950. build_leftovers_ft_param(&priority,
  1951. &num_entries,
  1952. &num_groups);
  1953. prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
  1954. } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
  1955. if (!MLX5_CAP_FLOWTABLE(dev->mdev,
  1956. allow_sniffer_and_nic_rx_shared_tir))
  1957. return ERR_PTR(-ENOTSUPP);
  1958. ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
  1959. MLX5_FLOW_NAMESPACE_SNIFFER_RX :
  1960. MLX5_FLOW_NAMESPACE_SNIFFER_TX);
  1961. prio = &dev->flow_db.sniffer[ft_type];
  1962. priority = 0;
  1963. num_entries = 1;
  1964. num_groups = 1;
  1965. }
  1966. if (!ns)
  1967. return ERR_PTR(-ENOTSUPP);
  1968. if (num_entries > max_table_size)
  1969. return ERR_PTR(-ENOMEM);
  1970. ft = prio->flow_table;
  1971. if (!ft) {
  1972. ft = mlx5_create_auto_grouped_flow_table(ns, priority,
  1973. num_entries,
  1974. num_groups,
  1975. 0, 0);
  1976. if (!IS_ERR(ft)) {
  1977. prio->refcount = 0;
  1978. prio->flow_table = ft;
  1979. } else {
  1980. err = PTR_ERR(ft);
  1981. }
  1982. }
  1983. return err ? ERR_PTR(err) : prio;
  1984. }
  1985. static void set_underlay_qp(struct mlx5_ib_dev *dev,
  1986. struct mlx5_flow_spec *spec,
  1987. u32 underlay_qpn)
  1988. {
  1989. void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
  1990. spec->match_criteria,
  1991. misc_parameters);
  1992. void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
  1993. misc_parameters);
  1994. if (underlay_qpn &&
  1995. MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
  1996. ft_field_support.bth_dst_qp)) {
  1997. MLX5_SET(fte_match_set_misc,
  1998. misc_params_v, bth_dst_qp, underlay_qpn);
  1999. MLX5_SET(fte_match_set_misc,
  2000. misc_params_c, bth_dst_qp, 0xffffff);
  2001. }
  2002. }
  2003. static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
  2004. struct mlx5_ib_flow_prio *ft_prio,
  2005. const struct ib_flow_attr *flow_attr,
  2006. struct mlx5_flow_destination *dst,
  2007. u32 underlay_qpn)
  2008. {
  2009. struct mlx5_flow_table *ft = ft_prio->flow_table;
  2010. struct mlx5_ib_flow_handler *handler;
  2011. struct mlx5_flow_act flow_act = {0};
  2012. struct mlx5_flow_spec *spec;
  2013. struct mlx5_flow_destination *rule_dst = dst;
  2014. const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
  2015. unsigned int spec_index;
  2016. u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
  2017. bool is_drop = false;
  2018. int err = 0;
  2019. int dest_num = 1;
  2020. if (!is_valid_attr(dev->mdev, flow_attr))
  2021. return ERR_PTR(-EINVAL);
  2022. spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
  2023. handler = kzalloc(sizeof(*handler), GFP_KERNEL);
  2024. if (!handler || !spec) {
  2025. err = -ENOMEM;
  2026. goto free;
  2027. }
  2028. INIT_LIST_HEAD(&handler->list);
  2029. for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
  2030. err = parse_flow_attr(dev->mdev, spec->match_criteria,
  2031. spec->match_value,
  2032. ib_flow, &flow_tag, &is_drop);
  2033. if (err < 0)
  2034. goto free;
  2035. ib_flow += ((union ib_flow_spec *)ib_flow)->size;
  2036. }
  2037. if (!flow_is_multicast_only(flow_attr))
  2038. set_underlay_qp(dev, spec, underlay_qpn);
  2039. spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
  2040. if (is_drop) {
  2041. flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
  2042. rule_dst = NULL;
  2043. dest_num = 0;
  2044. } else {
  2045. flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
  2046. MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
  2047. }
  2048. if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
  2049. (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
  2050. flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
  2051. mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
  2052. flow_tag, flow_attr->type);
  2053. err = -EINVAL;
  2054. goto free;
  2055. }
  2056. flow_act.flow_tag = flow_tag;
  2057. handler->rule = mlx5_add_flow_rules(ft, spec,
  2058. &flow_act,
  2059. rule_dst, dest_num);
  2060. if (IS_ERR(handler->rule)) {
  2061. err = PTR_ERR(handler->rule);
  2062. goto free;
  2063. }
  2064. ft_prio->refcount++;
  2065. handler->prio = ft_prio;
  2066. ft_prio->flow_table = ft;
  2067. free:
  2068. if (err)
  2069. kfree(handler);
  2070. kvfree(spec);
  2071. return err ? ERR_PTR(err) : handler;
  2072. }
  2073. static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
  2074. struct mlx5_ib_flow_prio *ft_prio,
  2075. const struct ib_flow_attr *flow_attr,
  2076. struct mlx5_flow_destination *dst)
  2077. {
  2078. return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0);
  2079. }
  2080. static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
  2081. struct mlx5_ib_flow_prio *ft_prio,
  2082. struct ib_flow_attr *flow_attr,
  2083. struct mlx5_flow_destination *dst)
  2084. {
  2085. struct mlx5_ib_flow_handler *handler_dst = NULL;
  2086. struct mlx5_ib_flow_handler *handler = NULL;
  2087. handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
  2088. if (!IS_ERR(handler)) {
  2089. handler_dst = create_flow_rule(dev, ft_prio,
  2090. flow_attr, dst);
  2091. if (IS_ERR(handler_dst)) {
  2092. mlx5_del_flow_rules(handler->rule);
  2093. ft_prio->refcount--;
  2094. kfree(handler);
  2095. handler = handler_dst;
  2096. } else {
  2097. list_add(&handler_dst->list, &handler->list);
  2098. }
  2099. }
  2100. return handler;
  2101. }
  2102. enum {
  2103. LEFTOVERS_MC,
  2104. LEFTOVERS_UC,
  2105. };
  2106. static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
  2107. struct mlx5_ib_flow_prio *ft_prio,
  2108. struct ib_flow_attr *flow_attr,
  2109. struct mlx5_flow_destination *dst)
  2110. {
  2111. struct mlx5_ib_flow_handler *handler_ucast = NULL;
  2112. struct mlx5_ib_flow_handler *handler = NULL;
  2113. static struct {
  2114. struct ib_flow_attr flow_attr;
  2115. struct ib_flow_spec_eth eth_flow;
  2116. } leftovers_specs[] = {
  2117. [LEFTOVERS_MC] = {
  2118. .flow_attr = {
  2119. .num_of_specs = 1,
  2120. .size = sizeof(leftovers_specs[0])
  2121. },
  2122. .eth_flow = {
  2123. .type = IB_FLOW_SPEC_ETH,
  2124. .size = sizeof(struct ib_flow_spec_eth),
  2125. .mask = {.dst_mac = {0x1} },
  2126. .val = {.dst_mac = {0x1} }
  2127. }
  2128. },
  2129. [LEFTOVERS_UC] = {
  2130. .flow_attr = {
  2131. .num_of_specs = 1,
  2132. .size = sizeof(leftovers_specs[0])
  2133. },
  2134. .eth_flow = {
  2135. .type = IB_FLOW_SPEC_ETH,
  2136. .size = sizeof(struct ib_flow_spec_eth),
  2137. .mask = {.dst_mac = {0x1} },
  2138. .val = {.dst_mac = {} }
  2139. }
  2140. }
  2141. };
  2142. handler = create_flow_rule(dev, ft_prio,
  2143. &leftovers_specs[LEFTOVERS_MC].flow_attr,
  2144. dst);
  2145. if (!IS_ERR(handler) &&
  2146. flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
  2147. handler_ucast = create_flow_rule(dev, ft_prio,
  2148. &leftovers_specs[LEFTOVERS_UC].flow_attr,
  2149. dst);
  2150. if (IS_ERR(handler_ucast)) {
  2151. mlx5_del_flow_rules(handler->rule);
  2152. ft_prio->refcount--;
  2153. kfree(handler);
  2154. handler = handler_ucast;
  2155. } else {
  2156. list_add(&handler_ucast->list, &handler->list);
  2157. }
  2158. }
  2159. return handler;
  2160. }
  2161. static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
  2162. struct mlx5_ib_flow_prio *ft_rx,
  2163. struct mlx5_ib_flow_prio *ft_tx,
  2164. struct mlx5_flow_destination *dst)
  2165. {
  2166. struct mlx5_ib_flow_handler *handler_rx;
  2167. struct mlx5_ib_flow_handler *handler_tx;
  2168. int err;
  2169. static const struct ib_flow_attr flow_attr = {
  2170. .num_of_specs = 0,
  2171. .size = sizeof(flow_attr)
  2172. };
  2173. handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
  2174. if (IS_ERR(handler_rx)) {
  2175. err = PTR_ERR(handler_rx);
  2176. goto err;
  2177. }
  2178. handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
  2179. if (IS_ERR(handler_tx)) {
  2180. err = PTR_ERR(handler_tx);
  2181. goto err_tx;
  2182. }
  2183. list_add(&handler_tx->list, &handler_rx->list);
  2184. return handler_rx;
  2185. err_tx:
  2186. mlx5_del_flow_rules(handler_rx->rule);
  2187. ft_rx->refcount--;
  2188. kfree(handler_rx);
  2189. err:
  2190. return ERR_PTR(err);
  2191. }
  2192. static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
  2193. struct ib_flow_attr *flow_attr,
  2194. int domain)
  2195. {
  2196. struct mlx5_ib_dev *dev = to_mdev(qp->device);
  2197. struct mlx5_ib_qp *mqp = to_mqp(qp);
  2198. struct mlx5_ib_flow_handler *handler = NULL;
  2199. struct mlx5_flow_destination *dst = NULL;
  2200. struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
  2201. struct mlx5_ib_flow_prio *ft_prio;
  2202. int err;
  2203. int underlay_qpn;
  2204. if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
  2205. return ERR_PTR(-ENOMEM);
  2206. if (domain != IB_FLOW_DOMAIN_USER ||
  2207. flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
  2208. (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
  2209. return ERR_PTR(-EINVAL);
  2210. dst = kzalloc(sizeof(*dst), GFP_KERNEL);
  2211. if (!dst)
  2212. return ERR_PTR(-ENOMEM);
  2213. mutex_lock(&dev->flow_db.lock);
  2214. ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
  2215. if (IS_ERR(ft_prio)) {
  2216. err = PTR_ERR(ft_prio);
  2217. goto unlock;
  2218. }
  2219. if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
  2220. ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
  2221. if (IS_ERR(ft_prio_tx)) {
  2222. err = PTR_ERR(ft_prio_tx);
  2223. ft_prio_tx = NULL;
  2224. goto destroy_ft;
  2225. }
  2226. }
  2227. dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
  2228. if (mqp->flags & MLX5_IB_QP_RSS)
  2229. dst->tir_num = mqp->rss_qp.tirn;
  2230. else
  2231. dst->tir_num = mqp->raw_packet_qp.rq.tirn;
  2232. if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
  2233. if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
  2234. handler = create_dont_trap_rule(dev, ft_prio,
  2235. flow_attr, dst);
  2236. } else {
  2237. underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ?
  2238. mqp->underlay_qpn : 0;
  2239. handler = _create_flow_rule(dev, ft_prio, flow_attr,
  2240. dst, underlay_qpn);
  2241. }
  2242. } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
  2243. flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
  2244. handler = create_leftovers_rule(dev, ft_prio, flow_attr,
  2245. dst);
  2246. } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
  2247. handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
  2248. } else {
  2249. err = -EINVAL;
  2250. goto destroy_ft;
  2251. }
  2252. if (IS_ERR(handler)) {
  2253. err = PTR_ERR(handler);
  2254. handler = NULL;
  2255. goto destroy_ft;
  2256. }
  2257. mutex_unlock(&dev->flow_db.lock);
  2258. kfree(dst);
  2259. return &handler->ibflow;
  2260. destroy_ft:
  2261. put_flow_table(dev, ft_prio, false);
  2262. if (ft_prio_tx)
  2263. put_flow_table(dev, ft_prio_tx, false);
  2264. unlock:
  2265. mutex_unlock(&dev->flow_db.lock);
  2266. kfree(dst);
  2267. kfree(handler);
  2268. return ERR_PTR(err);
  2269. }
  2270. static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
  2271. {
  2272. struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
  2273. struct mlx5_ib_qp *mqp = to_mqp(ibqp);
  2274. int err;
  2275. if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
  2276. mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
  2277. return -EOPNOTSUPP;
  2278. }
  2279. err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
  2280. if (err)
  2281. mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
  2282. ibqp->qp_num, gid->raw);
  2283. return err;
  2284. }
  2285. static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
  2286. {
  2287. struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
  2288. int err;
  2289. err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
  2290. if (err)
  2291. mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
  2292. ibqp->qp_num, gid->raw);
  2293. return err;
  2294. }
  2295. static int init_node_data(struct mlx5_ib_dev *dev)
  2296. {
  2297. int err;
  2298. err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
  2299. if (err)
  2300. return err;
  2301. dev->mdev->rev_id = dev->mdev->pdev->revision;
  2302. return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
  2303. }
  2304. static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
  2305. char *buf)
  2306. {
  2307. struct mlx5_ib_dev *dev =
  2308. container_of(device, struct mlx5_ib_dev, ib_dev.dev);
  2309. return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
  2310. }
  2311. static ssize_t show_reg_pages(struct device *device,
  2312. struct device_attribute *attr, char *buf)
  2313. {
  2314. struct mlx5_ib_dev *dev =
  2315. container_of(device, struct mlx5_ib_dev, ib_dev.dev);
  2316. return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
  2317. }
  2318. static ssize_t show_hca(struct device *device, struct device_attribute *attr,
  2319. char *buf)
  2320. {
  2321. struct mlx5_ib_dev *dev =
  2322. container_of(device, struct mlx5_ib_dev, ib_dev.dev);
  2323. return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
  2324. }
  2325. static ssize_t show_rev(struct device *device, struct device_attribute *attr,
  2326. char *buf)
  2327. {
  2328. struct mlx5_ib_dev *dev =
  2329. container_of(device, struct mlx5_ib_dev, ib_dev.dev);
  2330. return sprintf(buf, "%x\n", dev->mdev->rev_id);
  2331. }
  2332. static ssize_t show_board(struct device *device, struct device_attribute *attr,
  2333. char *buf)
  2334. {
  2335. struct mlx5_ib_dev *dev =
  2336. container_of(device, struct mlx5_ib_dev, ib_dev.dev);
  2337. return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
  2338. dev->mdev->board_id);
  2339. }
  2340. static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
  2341. static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
  2342. static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
  2343. static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
  2344. static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
  2345. static struct device_attribute *mlx5_class_attributes[] = {
  2346. &dev_attr_hw_rev,
  2347. &dev_attr_hca_type,
  2348. &dev_attr_board_id,
  2349. &dev_attr_fw_pages,
  2350. &dev_attr_reg_pages,
  2351. };
  2352. static void pkey_change_handler(struct work_struct *work)
  2353. {
  2354. struct mlx5_ib_port_resources *ports =
  2355. container_of(work, struct mlx5_ib_port_resources,
  2356. pkey_change_work);
  2357. mutex_lock(&ports->devr->mutex);
  2358. mlx5_ib_gsi_pkey_change(ports->gsi);
  2359. mutex_unlock(&ports->devr->mutex);
  2360. }
  2361. static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
  2362. {
  2363. struct mlx5_ib_qp *mqp;
  2364. struct mlx5_ib_cq *send_mcq, *recv_mcq;
  2365. struct mlx5_core_cq *mcq;
  2366. struct list_head cq_armed_list;
  2367. unsigned long flags_qp;
  2368. unsigned long flags_cq;
  2369. unsigned long flags;
  2370. INIT_LIST_HEAD(&cq_armed_list);
  2371. /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
  2372. spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
  2373. list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
  2374. spin_lock_irqsave(&mqp->sq.lock, flags_qp);
  2375. if (mqp->sq.tail != mqp->sq.head) {
  2376. send_mcq = to_mcq(mqp->ibqp.send_cq);
  2377. spin_lock_irqsave(&send_mcq->lock, flags_cq);
  2378. if (send_mcq->mcq.comp &&
  2379. mqp->ibqp.send_cq->comp_handler) {
  2380. if (!send_mcq->mcq.reset_notify_added) {
  2381. send_mcq->mcq.reset_notify_added = 1;
  2382. list_add_tail(&send_mcq->mcq.reset_notify,
  2383. &cq_armed_list);
  2384. }
  2385. }
  2386. spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
  2387. }
  2388. spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
  2389. spin_lock_irqsave(&mqp->rq.lock, flags_qp);
  2390. /* no handling is needed for SRQ */
  2391. if (!mqp->ibqp.srq) {
  2392. if (mqp->rq.tail != mqp->rq.head) {
  2393. recv_mcq = to_mcq(mqp->ibqp.recv_cq);
  2394. spin_lock_irqsave(&recv_mcq->lock, flags_cq);
  2395. if (recv_mcq->mcq.comp &&
  2396. mqp->ibqp.recv_cq->comp_handler) {
  2397. if (!recv_mcq->mcq.reset_notify_added) {
  2398. recv_mcq->mcq.reset_notify_added = 1;
  2399. list_add_tail(&recv_mcq->mcq.reset_notify,
  2400. &cq_armed_list);
  2401. }
  2402. }
  2403. spin_unlock_irqrestore(&recv_mcq->lock,
  2404. flags_cq);
  2405. }
  2406. }
  2407. spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
  2408. }
  2409. /*At that point all inflight post send were put to be executed as of we
  2410. * lock/unlock above locks Now need to arm all involved CQs.
  2411. */
  2412. list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
  2413. mcq->comp(mcq);
  2414. }
  2415. spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
  2416. }
  2417. static void delay_drop_handler(struct work_struct *work)
  2418. {
  2419. int err;
  2420. struct mlx5_ib_delay_drop *delay_drop =
  2421. container_of(work, struct mlx5_ib_delay_drop,
  2422. delay_drop_work);
  2423. atomic_inc(&delay_drop->events_cnt);
  2424. mutex_lock(&delay_drop->lock);
  2425. err = mlx5_core_set_delay_drop(delay_drop->dev->mdev,
  2426. delay_drop->timeout);
  2427. if (err) {
  2428. mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n",
  2429. delay_drop->timeout);
  2430. delay_drop->activate = false;
  2431. }
  2432. mutex_unlock(&delay_drop->lock);
  2433. }
  2434. static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
  2435. enum mlx5_dev_event event, unsigned long param)
  2436. {
  2437. struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
  2438. struct ib_event ibev;
  2439. bool fatal = false;
  2440. u8 port = 0;
  2441. switch (event) {
  2442. case MLX5_DEV_EVENT_SYS_ERROR:
  2443. ibev.event = IB_EVENT_DEVICE_FATAL;
  2444. mlx5_ib_handle_internal_error(ibdev);
  2445. fatal = true;
  2446. break;
  2447. case MLX5_DEV_EVENT_PORT_UP:
  2448. case MLX5_DEV_EVENT_PORT_DOWN:
  2449. case MLX5_DEV_EVENT_PORT_INITIALIZED:
  2450. port = (u8)param;
  2451. /* In RoCE, port up/down events are handled in
  2452. * mlx5_netdev_event().
  2453. */
  2454. if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
  2455. IB_LINK_LAYER_ETHERNET)
  2456. return;
  2457. ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ?
  2458. IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
  2459. break;
  2460. case MLX5_DEV_EVENT_LID_CHANGE:
  2461. ibev.event = IB_EVENT_LID_CHANGE;
  2462. port = (u8)param;
  2463. break;
  2464. case MLX5_DEV_EVENT_PKEY_CHANGE:
  2465. ibev.event = IB_EVENT_PKEY_CHANGE;
  2466. port = (u8)param;
  2467. schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
  2468. break;
  2469. case MLX5_DEV_EVENT_GUID_CHANGE:
  2470. ibev.event = IB_EVENT_GID_CHANGE;
  2471. port = (u8)param;
  2472. break;
  2473. case MLX5_DEV_EVENT_CLIENT_REREG:
  2474. ibev.event = IB_EVENT_CLIENT_REREGISTER;
  2475. port = (u8)param;
  2476. break;
  2477. case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
  2478. schedule_work(&ibdev->delay_drop.delay_drop_work);
  2479. goto out;
  2480. default:
  2481. goto out;
  2482. }
  2483. ibev.device = &ibdev->ib_dev;
  2484. ibev.element.port_num = port;
  2485. if (port < 1 || port > ibdev->num_ports) {
  2486. mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
  2487. goto out;
  2488. }
  2489. if (ibdev->ib_active)
  2490. ib_dispatch_event(&ibev);
  2491. if (fatal)
  2492. ibdev->ib_active = false;
  2493. out:
  2494. return;
  2495. }
  2496. static int set_has_smi_cap(struct mlx5_ib_dev *dev)
  2497. {
  2498. struct mlx5_hca_vport_context vport_ctx;
  2499. int err;
  2500. int port;
  2501. for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
  2502. dev->mdev->port_caps[port - 1].has_smi = false;
  2503. if (MLX5_CAP_GEN(dev->mdev, port_type) ==
  2504. MLX5_CAP_PORT_TYPE_IB) {
  2505. if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
  2506. err = mlx5_query_hca_vport_context(dev->mdev, 0,
  2507. port, 0,
  2508. &vport_ctx);
  2509. if (err) {
  2510. mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
  2511. port, err);
  2512. return err;
  2513. }
  2514. dev->mdev->port_caps[port - 1].has_smi =
  2515. vport_ctx.has_smi;
  2516. } else {
  2517. dev->mdev->port_caps[port - 1].has_smi = true;
  2518. }
  2519. }
  2520. }
  2521. return 0;
  2522. }
  2523. static void get_ext_port_caps(struct mlx5_ib_dev *dev)
  2524. {
  2525. int port;
  2526. for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
  2527. mlx5_query_ext_port_caps(dev, port);
  2528. }
  2529. static int get_port_caps(struct mlx5_ib_dev *dev)
  2530. {
  2531. struct ib_device_attr *dprops = NULL;
  2532. struct ib_port_attr *pprops = NULL;
  2533. int err = -ENOMEM;
  2534. int port;
  2535. struct ib_udata uhw = {.inlen = 0, .outlen = 0};
  2536. pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
  2537. if (!pprops)
  2538. goto out;
  2539. dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
  2540. if (!dprops)
  2541. goto out;
  2542. err = set_has_smi_cap(dev);
  2543. if (err)
  2544. goto out;
  2545. err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
  2546. if (err) {
  2547. mlx5_ib_warn(dev, "query_device failed %d\n", err);
  2548. goto out;
  2549. }
  2550. for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
  2551. memset(pprops, 0, sizeof(*pprops));
  2552. err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
  2553. if (err) {
  2554. mlx5_ib_warn(dev, "query_port %d failed %d\n",
  2555. port, err);
  2556. break;
  2557. }
  2558. dev->mdev->port_caps[port - 1].pkey_table_len =
  2559. dprops->max_pkeys;
  2560. dev->mdev->port_caps[port - 1].gid_table_len =
  2561. pprops->gid_tbl_len;
  2562. mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
  2563. dprops->max_pkeys, pprops->gid_tbl_len);
  2564. }
  2565. out:
  2566. kfree(pprops);
  2567. kfree(dprops);
  2568. return err;
  2569. }
  2570. static void destroy_umrc_res(struct mlx5_ib_dev *dev)
  2571. {
  2572. int err;
  2573. err = mlx5_mr_cache_cleanup(dev);
  2574. if (err)
  2575. mlx5_ib_warn(dev, "mr cache cleanup failed\n");
  2576. mlx5_ib_destroy_qp(dev->umrc.qp);
  2577. ib_free_cq(dev->umrc.cq);
  2578. ib_dealloc_pd(dev->umrc.pd);
  2579. }
  2580. enum {
  2581. MAX_UMR_WR = 128,
  2582. };
  2583. static int create_umr_res(struct mlx5_ib_dev *dev)
  2584. {
  2585. struct ib_qp_init_attr *init_attr = NULL;
  2586. struct ib_qp_attr *attr = NULL;
  2587. struct ib_pd *pd;
  2588. struct ib_cq *cq;
  2589. struct ib_qp *qp;
  2590. int ret;
  2591. attr = kzalloc(sizeof(*attr), GFP_KERNEL);
  2592. init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
  2593. if (!attr || !init_attr) {
  2594. ret = -ENOMEM;
  2595. goto error_0;
  2596. }
  2597. pd = ib_alloc_pd(&dev->ib_dev, 0);
  2598. if (IS_ERR(pd)) {
  2599. mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
  2600. ret = PTR_ERR(pd);
  2601. goto error_0;
  2602. }
  2603. cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
  2604. if (IS_ERR(cq)) {
  2605. mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
  2606. ret = PTR_ERR(cq);
  2607. goto error_2;
  2608. }
  2609. init_attr->send_cq = cq;
  2610. init_attr->recv_cq = cq;
  2611. init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
  2612. init_attr->cap.max_send_wr = MAX_UMR_WR;
  2613. init_attr->cap.max_send_sge = 1;
  2614. init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
  2615. init_attr->port_num = 1;
  2616. qp = mlx5_ib_create_qp(pd, init_attr, NULL);
  2617. if (IS_ERR(qp)) {
  2618. mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
  2619. ret = PTR_ERR(qp);
  2620. goto error_3;
  2621. }
  2622. qp->device = &dev->ib_dev;
  2623. qp->real_qp = qp;
  2624. qp->uobject = NULL;
  2625. qp->qp_type = MLX5_IB_QPT_REG_UMR;
  2626. attr->qp_state = IB_QPS_INIT;
  2627. attr->port_num = 1;
  2628. ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
  2629. IB_QP_PORT, NULL);
  2630. if (ret) {
  2631. mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
  2632. goto error_4;
  2633. }
  2634. memset(attr, 0, sizeof(*attr));
  2635. attr->qp_state = IB_QPS_RTR;
  2636. attr->path_mtu = IB_MTU_256;
  2637. ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
  2638. if (ret) {
  2639. mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
  2640. goto error_4;
  2641. }
  2642. memset(attr, 0, sizeof(*attr));
  2643. attr->qp_state = IB_QPS_RTS;
  2644. ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
  2645. if (ret) {
  2646. mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
  2647. goto error_4;
  2648. }
  2649. dev->umrc.qp = qp;
  2650. dev->umrc.cq = cq;
  2651. dev->umrc.pd = pd;
  2652. sema_init(&dev->umrc.sem, MAX_UMR_WR);
  2653. ret = mlx5_mr_cache_init(dev);
  2654. if (ret) {
  2655. mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
  2656. goto error_4;
  2657. }
  2658. kfree(attr);
  2659. kfree(init_attr);
  2660. return 0;
  2661. error_4:
  2662. mlx5_ib_destroy_qp(qp);
  2663. error_3:
  2664. ib_free_cq(cq);
  2665. error_2:
  2666. ib_dealloc_pd(pd);
  2667. error_0:
  2668. kfree(attr);
  2669. kfree(init_attr);
  2670. return ret;
  2671. }
  2672. static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
  2673. {
  2674. switch (umr_fence_cap) {
  2675. case MLX5_CAP_UMR_FENCE_NONE:
  2676. return MLX5_FENCE_MODE_NONE;
  2677. case MLX5_CAP_UMR_FENCE_SMALL:
  2678. return MLX5_FENCE_MODE_INITIATOR_SMALL;
  2679. default:
  2680. return MLX5_FENCE_MODE_STRONG_ORDERING;
  2681. }
  2682. }
  2683. static int create_dev_resources(struct mlx5_ib_resources *devr)
  2684. {
  2685. struct ib_srq_init_attr attr;
  2686. struct mlx5_ib_dev *dev;
  2687. struct ib_cq_init_attr cq_attr = {.cqe = 1};
  2688. int port;
  2689. int ret = 0;
  2690. dev = container_of(devr, struct mlx5_ib_dev, devr);
  2691. mutex_init(&devr->mutex);
  2692. devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
  2693. if (IS_ERR(devr->p0)) {
  2694. ret = PTR_ERR(devr->p0);
  2695. goto error0;
  2696. }
  2697. devr->p0->device = &dev->ib_dev;
  2698. devr->p0->uobject = NULL;
  2699. atomic_set(&devr->p0->usecnt, 0);
  2700. devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
  2701. if (IS_ERR(devr->c0)) {
  2702. ret = PTR_ERR(devr->c0);
  2703. goto error1;
  2704. }
  2705. devr->c0->device = &dev->ib_dev;
  2706. devr->c0->uobject = NULL;
  2707. devr->c0->comp_handler = NULL;
  2708. devr->c0->event_handler = NULL;
  2709. devr->c0->cq_context = NULL;
  2710. atomic_set(&devr->c0->usecnt, 0);
  2711. devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
  2712. if (IS_ERR(devr->x0)) {
  2713. ret = PTR_ERR(devr->x0);
  2714. goto error2;
  2715. }
  2716. devr->x0->device = &dev->ib_dev;
  2717. devr->x0->inode = NULL;
  2718. atomic_set(&devr->x0->usecnt, 0);
  2719. mutex_init(&devr->x0->tgt_qp_mutex);
  2720. INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
  2721. devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
  2722. if (IS_ERR(devr->x1)) {
  2723. ret = PTR_ERR(devr->x1);
  2724. goto error3;
  2725. }
  2726. devr->x1->device = &dev->ib_dev;
  2727. devr->x1->inode = NULL;
  2728. atomic_set(&devr->x1->usecnt, 0);
  2729. mutex_init(&devr->x1->tgt_qp_mutex);
  2730. INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
  2731. memset(&attr, 0, sizeof(attr));
  2732. attr.attr.max_sge = 1;
  2733. attr.attr.max_wr = 1;
  2734. attr.srq_type = IB_SRQT_XRC;
  2735. attr.ext.cq = devr->c0;
  2736. attr.ext.xrc.xrcd = devr->x0;
  2737. devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
  2738. if (IS_ERR(devr->s0)) {
  2739. ret = PTR_ERR(devr->s0);
  2740. goto error4;
  2741. }
  2742. devr->s0->device = &dev->ib_dev;
  2743. devr->s0->pd = devr->p0;
  2744. devr->s0->uobject = NULL;
  2745. devr->s0->event_handler = NULL;
  2746. devr->s0->srq_context = NULL;
  2747. devr->s0->srq_type = IB_SRQT_XRC;
  2748. devr->s0->ext.xrc.xrcd = devr->x0;
  2749. devr->s0->ext.cq = devr->c0;
  2750. atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
  2751. atomic_inc(&devr->s0->ext.cq->usecnt);
  2752. atomic_inc(&devr->p0->usecnt);
  2753. atomic_set(&devr->s0->usecnt, 0);
  2754. memset(&attr, 0, sizeof(attr));
  2755. attr.attr.max_sge = 1;
  2756. attr.attr.max_wr = 1;
  2757. attr.srq_type = IB_SRQT_BASIC;
  2758. devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
  2759. if (IS_ERR(devr->s1)) {
  2760. ret = PTR_ERR(devr->s1);
  2761. goto error5;
  2762. }
  2763. devr->s1->device = &dev->ib_dev;
  2764. devr->s1->pd = devr->p0;
  2765. devr->s1->uobject = NULL;
  2766. devr->s1->event_handler = NULL;
  2767. devr->s1->srq_context = NULL;
  2768. devr->s1->srq_type = IB_SRQT_BASIC;
  2769. devr->s1->ext.cq = devr->c0;
  2770. atomic_inc(&devr->p0->usecnt);
  2771. atomic_set(&devr->s1->usecnt, 0);
  2772. for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
  2773. INIT_WORK(&devr->ports[port].pkey_change_work,
  2774. pkey_change_handler);
  2775. devr->ports[port].devr = devr;
  2776. }
  2777. return 0;
  2778. error5:
  2779. mlx5_ib_destroy_srq(devr->s0);
  2780. error4:
  2781. mlx5_ib_dealloc_xrcd(devr->x1);
  2782. error3:
  2783. mlx5_ib_dealloc_xrcd(devr->x0);
  2784. error2:
  2785. mlx5_ib_destroy_cq(devr->c0);
  2786. error1:
  2787. mlx5_ib_dealloc_pd(devr->p0);
  2788. error0:
  2789. return ret;
  2790. }
  2791. static void destroy_dev_resources(struct mlx5_ib_resources *devr)
  2792. {
  2793. struct mlx5_ib_dev *dev =
  2794. container_of(devr, struct mlx5_ib_dev, devr);
  2795. int port;
  2796. mlx5_ib_destroy_srq(devr->s1);
  2797. mlx5_ib_destroy_srq(devr->s0);
  2798. mlx5_ib_dealloc_xrcd(devr->x0);
  2799. mlx5_ib_dealloc_xrcd(devr->x1);
  2800. mlx5_ib_destroy_cq(devr->c0);
  2801. mlx5_ib_dealloc_pd(devr->p0);
  2802. /* Make sure no change P_Key work items are still executing */
  2803. for (port = 0; port < dev->num_ports; ++port)
  2804. cancel_work_sync(&devr->ports[port].pkey_change_work);
  2805. }
  2806. static u32 get_core_cap_flags(struct ib_device *ibdev)
  2807. {
  2808. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  2809. enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
  2810. u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
  2811. u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
  2812. u32 ret = 0;
  2813. if (ll == IB_LINK_LAYER_INFINIBAND)
  2814. return RDMA_CORE_PORT_IBA_IB;
  2815. ret = RDMA_CORE_PORT_RAW_PACKET;
  2816. if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
  2817. return ret;
  2818. if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
  2819. return ret;
  2820. if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
  2821. ret |= RDMA_CORE_PORT_IBA_ROCE;
  2822. if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
  2823. ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
  2824. return ret;
  2825. }
  2826. static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
  2827. struct ib_port_immutable *immutable)
  2828. {
  2829. struct ib_port_attr attr;
  2830. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  2831. enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
  2832. int err;
  2833. immutable->core_cap_flags = get_core_cap_flags(ibdev);
  2834. err = ib_query_port(ibdev, port_num, &attr);
  2835. if (err)
  2836. return err;
  2837. immutable->pkey_tbl_len = attr.pkey_tbl_len;
  2838. immutable->gid_tbl_len = attr.gid_tbl_len;
  2839. immutable->core_cap_flags = get_core_cap_flags(ibdev);
  2840. if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
  2841. immutable->max_mad_size = IB_MGMT_MAD_SIZE;
  2842. return 0;
  2843. }
  2844. static void get_dev_fw_str(struct ib_device *ibdev, char *str)
  2845. {
  2846. struct mlx5_ib_dev *dev =
  2847. container_of(ibdev, struct mlx5_ib_dev, ib_dev);
  2848. snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d",
  2849. fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev),
  2850. fw_rev_sub(dev->mdev));
  2851. }
  2852. static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
  2853. {
  2854. struct mlx5_core_dev *mdev = dev->mdev;
  2855. struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev,
  2856. MLX5_FLOW_NAMESPACE_LAG);
  2857. struct mlx5_flow_table *ft;
  2858. int err;
  2859. if (!ns || !mlx5_lag_is_active(mdev))
  2860. return 0;
  2861. err = mlx5_cmd_create_vport_lag(mdev);
  2862. if (err)
  2863. return err;
  2864. ft = mlx5_create_lag_demux_flow_table(ns, 0, 0);
  2865. if (IS_ERR(ft)) {
  2866. err = PTR_ERR(ft);
  2867. goto err_destroy_vport_lag;
  2868. }
  2869. dev->flow_db.lag_demux_ft = ft;
  2870. return 0;
  2871. err_destroy_vport_lag:
  2872. mlx5_cmd_destroy_vport_lag(mdev);
  2873. return err;
  2874. }
  2875. static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
  2876. {
  2877. struct mlx5_core_dev *mdev = dev->mdev;
  2878. if (dev->flow_db.lag_demux_ft) {
  2879. mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
  2880. dev->flow_db.lag_demux_ft = NULL;
  2881. mlx5_cmd_destroy_vport_lag(mdev);
  2882. }
  2883. }
  2884. static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev)
  2885. {
  2886. int err;
  2887. dev->roce.nb.notifier_call = mlx5_netdev_event;
  2888. err = register_netdevice_notifier(&dev->roce.nb);
  2889. if (err) {
  2890. dev->roce.nb.notifier_call = NULL;
  2891. return err;
  2892. }
  2893. return 0;
  2894. }
  2895. static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev)
  2896. {
  2897. if (dev->roce.nb.notifier_call) {
  2898. unregister_netdevice_notifier(&dev->roce.nb);
  2899. dev->roce.nb.notifier_call = NULL;
  2900. }
  2901. }
  2902. static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
  2903. {
  2904. int err;
  2905. err = mlx5_add_netdev_notifier(dev);
  2906. if (err)
  2907. return err;
  2908. if (MLX5_CAP_GEN(dev->mdev, roce)) {
  2909. err = mlx5_nic_vport_enable_roce(dev->mdev);
  2910. if (err)
  2911. goto err_unregister_netdevice_notifier;
  2912. }
  2913. err = mlx5_eth_lag_init(dev);
  2914. if (err)
  2915. goto err_disable_roce;
  2916. return 0;
  2917. err_disable_roce:
  2918. if (MLX5_CAP_GEN(dev->mdev, roce))
  2919. mlx5_nic_vport_disable_roce(dev->mdev);
  2920. err_unregister_netdevice_notifier:
  2921. mlx5_remove_netdev_notifier(dev);
  2922. return err;
  2923. }
  2924. static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
  2925. {
  2926. mlx5_eth_lag_cleanup(dev);
  2927. if (MLX5_CAP_GEN(dev->mdev, roce))
  2928. mlx5_nic_vport_disable_roce(dev->mdev);
  2929. }
  2930. struct mlx5_ib_counter {
  2931. const char *name;
  2932. size_t offset;
  2933. };
  2934. #define INIT_Q_COUNTER(_name) \
  2935. { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
  2936. static const struct mlx5_ib_counter basic_q_cnts[] = {
  2937. INIT_Q_COUNTER(rx_write_requests),
  2938. INIT_Q_COUNTER(rx_read_requests),
  2939. INIT_Q_COUNTER(rx_atomic_requests),
  2940. INIT_Q_COUNTER(out_of_buffer),
  2941. };
  2942. static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
  2943. INIT_Q_COUNTER(out_of_sequence),
  2944. };
  2945. static const struct mlx5_ib_counter retrans_q_cnts[] = {
  2946. INIT_Q_COUNTER(duplicate_request),
  2947. INIT_Q_COUNTER(rnr_nak_retry_err),
  2948. INIT_Q_COUNTER(packet_seq_err),
  2949. INIT_Q_COUNTER(implied_nak_seq_err),
  2950. INIT_Q_COUNTER(local_ack_timeout_err),
  2951. };
  2952. #define INIT_CONG_COUNTER(_name) \
  2953. { .name = #_name, .offset = \
  2954. MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
  2955. static const struct mlx5_ib_counter cong_cnts[] = {
  2956. INIT_CONG_COUNTER(rp_cnp_ignored),
  2957. INIT_CONG_COUNTER(rp_cnp_handled),
  2958. INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
  2959. INIT_CONG_COUNTER(np_cnp_sent),
  2960. };
  2961. static const struct mlx5_ib_counter extended_err_cnts[] = {
  2962. INIT_Q_COUNTER(resp_local_length_error),
  2963. INIT_Q_COUNTER(resp_cqe_error),
  2964. INIT_Q_COUNTER(req_cqe_error),
  2965. INIT_Q_COUNTER(req_remote_invalid_request),
  2966. INIT_Q_COUNTER(req_remote_access_errors),
  2967. INIT_Q_COUNTER(resp_remote_access_errors),
  2968. INIT_Q_COUNTER(resp_cqe_flush_error),
  2969. INIT_Q_COUNTER(req_cqe_flush_error),
  2970. };
  2971. static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
  2972. {
  2973. unsigned int i;
  2974. for (i = 0; i < dev->num_ports; i++) {
  2975. mlx5_core_dealloc_q_counter(dev->mdev,
  2976. dev->port[i].cnts.set_id);
  2977. kfree(dev->port[i].cnts.names);
  2978. kfree(dev->port[i].cnts.offsets);
  2979. }
  2980. }
  2981. static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
  2982. struct mlx5_ib_counters *cnts)
  2983. {
  2984. u32 num_counters;
  2985. num_counters = ARRAY_SIZE(basic_q_cnts);
  2986. if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
  2987. num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
  2988. if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
  2989. num_counters += ARRAY_SIZE(retrans_q_cnts);
  2990. if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
  2991. num_counters += ARRAY_SIZE(extended_err_cnts);
  2992. cnts->num_q_counters = num_counters;
  2993. if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
  2994. cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
  2995. num_counters += ARRAY_SIZE(cong_cnts);
  2996. }
  2997. cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
  2998. if (!cnts->names)
  2999. return -ENOMEM;
  3000. cnts->offsets = kcalloc(num_counters,
  3001. sizeof(cnts->offsets), GFP_KERNEL);
  3002. if (!cnts->offsets)
  3003. goto err_names;
  3004. return 0;
  3005. err_names:
  3006. kfree(cnts->names);
  3007. return -ENOMEM;
  3008. }
  3009. static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
  3010. const char **names,
  3011. size_t *offsets)
  3012. {
  3013. int i;
  3014. int j = 0;
  3015. for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
  3016. names[j] = basic_q_cnts[i].name;
  3017. offsets[j] = basic_q_cnts[i].offset;
  3018. }
  3019. if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
  3020. for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
  3021. names[j] = out_of_seq_q_cnts[i].name;
  3022. offsets[j] = out_of_seq_q_cnts[i].offset;
  3023. }
  3024. }
  3025. if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
  3026. for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
  3027. names[j] = retrans_q_cnts[i].name;
  3028. offsets[j] = retrans_q_cnts[i].offset;
  3029. }
  3030. }
  3031. if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
  3032. for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
  3033. names[j] = extended_err_cnts[i].name;
  3034. offsets[j] = extended_err_cnts[i].offset;
  3035. }
  3036. }
  3037. if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
  3038. for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
  3039. names[j] = cong_cnts[i].name;
  3040. offsets[j] = cong_cnts[i].offset;
  3041. }
  3042. }
  3043. }
  3044. static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
  3045. {
  3046. int i;
  3047. int ret;
  3048. for (i = 0; i < dev->num_ports; i++) {
  3049. struct mlx5_ib_port *port = &dev->port[i];
  3050. ret = mlx5_core_alloc_q_counter(dev->mdev,
  3051. &port->cnts.set_id);
  3052. if (ret) {
  3053. mlx5_ib_warn(dev,
  3054. "couldn't allocate queue counter for port %d, err %d\n",
  3055. i + 1, ret);
  3056. goto dealloc_counters;
  3057. }
  3058. ret = __mlx5_ib_alloc_counters(dev, &port->cnts);
  3059. if (ret)
  3060. goto dealloc_counters;
  3061. mlx5_ib_fill_counters(dev, port->cnts.names,
  3062. port->cnts.offsets);
  3063. }
  3064. return 0;
  3065. dealloc_counters:
  3066. while (--i >= 0)
  3067. mlx5_core_dealloc_q_counter(dev->mdev,
  3068. dev->port[i].cnts.set_id);
  3069. return ret;
  3070. }
  3071. static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
  3072. u8 port_num)
  3073. {
  3074. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  3075. struct mlx5_ib_port *port = &dev->port[port_num - 1];
  3076. /* We support only per port stats */
  3077. if (port_num == 0)
  3078. return NULL;
  3079. return rdma_alloc_hw_stats_struct(port->cnts.names,
  3080. port->cnts.num_q_counters +
  3081. port->cnts.num_cong_counters,
  3082. RDMA_HW_STATS_DEFAULT_LIFESPAN);
  3083. }
  3084. static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev,
  3085. struct mlx5_ib_port *port,
  3086. struct rdma_hw_stats *stats)
  3087. {
  3088. int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
  3089. void *out;
  3090. __be32 val;
  3091. int ret, i;
  3092. out = kvzalloc(outlen, GFP_KERNEL);
  3093. if (!out)
  3094. return -ENOMEM;
  3095. ret = mlx5_core_query_q_counter(dev->mdev,
  3096. port->cnts.set_id, 0,
  3097. out, outlen);
  3098. if (ret)
  3099. goto free;
  3100. for (i = 0; i < port->cnts.num_q_counters; i++) {
  3101. val = *(__be32 *)(out + port->cnts.offsets[i]);
  3102. stats->value[i] = (u64)be32_to_cpu(val);
  3103. }
  3104. free:
  3105. kvfree(out);
  3106. return ret;
  3107. }
  3108. static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev,
  3109. struct mlx5_ib_port *port,
  3110. struct rdma_hw_stats *stats)
  3111. {
  3112. int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
  3113. void *out;
  3114. int ret, i;
  3115. int offset = port->cnts.num_q_counters;
  3116. out = kvzalloc(outlen, GFP_KERNEL);
  3117. if (!out)
  3118. return -ENOMEM;
  3119. ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen);
  3120. if (ret)
  3121. goto free;
  3122. for (i = 0; i < port->cnts.num_cong_counters; i++) {
  3123. stats->value[i + offset] =
  3124. be64_to_cpup((__be64 *)(out +
  3125. port->cnts.offsets[i + offset]));
  3126. }
  3127. free:
  3128. kvfree(out);
  3129. return ret;
  3130. }
  3131. static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
  3132. struct rdma_hw_stats *stats,
  3133. u8 port_num, int index)
  3134. {
  3135. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  3136. struct mlx5_ib_port *port = &dev->port[port_num - 1];
  3137. int ret, num_counters;
  3138. if (!stats)
  3139. return -EINVAL;
  3140. ret = mlx5_ib_query_q_counters(dev, port, stats);
  3141. if (ret)
  3142. return ret;
  3143. num_counters = port->cnts.num_q_counters;
  3144. if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
  3145. ret = mlx5_ib_query_cong_counters(dev, port, stats);
  3146. if (ret)
  3147. return ret;
  3148. num_counters += port->cnts.num_cong_counters;
  3149. }
  3150. return num_counters;
  3151. }
  3152. static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
  3153. {
  3154. return mlx5_rdma_netdev_free(netdev);
  3155. }
  3156. static struct net_device*
  3157. mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
  3158. u8 port_num,
  3159. enum rdma_netdev_t type,
  3160. const char *name,
  3161. unsigned char name_assign_type,
  3162. void (*setup)(struct net_device *))
  3163. {
  3164. struct net_device *netdev;
  3165. struct rdma_netdev *rn;
  3166. if (type != RDMA_NETDEV_IPOIB)
  3167. return ERR_PTR(-EOPNOTSUPP);
  3168. netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
  3169. name, setup);
  3170. if (likely(!IS_ERR_OR_NULL(netdev))) {
  3171. rn = netdev_priv(netdev);
  3172. rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev;
  3173. }
  3174. return netdev;
  3175. }
  3176. static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
  3177. {
  3178. if (!dev->delay_drop.dbg)
  3179. return;
  3180. debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs);
  3181. kfree(dev->delay_drop.dbg);
  3182. dev->delay_drop.dbg = NULL;
  3183. }
  3184. static void cancel_delay_drop(struct mlx5_ib_dev *dev)
  3185. {
  3186. if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
  3187. return;
  3188. cancel_work_sync(&dev->delay_drop.delay_drop_work);
  3189. delay_drop_debugfs_cleanup(dev);
  3190. }
  3191. static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
  3192. size_t count, loff_t *pos)
  3193. {
  3194. struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
  3195. char lbuf[20];
  3196. int len;
  3197. len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout);
  3198. return simple_read_from_buffer(buf, count, pos, lbuf, len);
  3199. }
  3200. static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf,
  3201. size_t count, loff_t *pos)
  3202. {
  3203. struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
  3204. u32 timeout;
  3205. u32 var;
  3206. if (kstrtouint_from_user(buf, count, 0, &var))
  3207. return -EFAULT;
  3208. timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS *
  3209. 1000);
  3210. if (timeout != var)
  3211. mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n",
  3212. timeout);
  3213. delay_drop->timeout = timeout;
  3214. return count;
  3215. }
  3216. static const struct file_operations fops_delay_drop_timeout = {
  3217. .owner = THIS_MODULE,
  3218. .open = simple_open,
  3219. .write = delay_drop_timeout_write,
  3220. .read = delay_drop_timeout_read,
  3221. };
  3222. static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
  3223. {
  3224. struct mlx5_ib_dbg_delay_drop *dbg;
  3225. if (!mlx5_debugfs_root)
  3226. return 0;
  3227. dbg = kzalloc(sizeof(*dbg), GFP_KERNEL);
  3228. if (!dbg)
  3229. return -ENOMEM;
  3230. dev->delay_drop.dbg = dbg;
  3231. dbg->dir_debugfs =
  3232. debugfs_create_dir("delay_drop",
  3233. dev->mdev->priv.dbg_root);
  3234. if (!dbg->dir_debugfs)
  3235. goto out_debugfs;
  3236. dbg->events_cnt_debugfs =
  3237. debugfs_create_atomic_t("num_timeout_events", 0400,
  3238. dbg->dir_debugfs,
  3239. &dev->delay_drop.events_cnt);
  3240. if (!dbg->events_cnt_debugfs)
  3241. goto out_debugfs;
  3242. dbg->rqs_cnt_debugfs =
  3243. debugfs_create_atomic_t("num_rqs", 0400,
  3244. dbg->dir_debugfs,
  3245. &dev->delay_drop.rqs_cnt);
  3246. if (!dbg->rqs_cnt_debugfs)
  3247. goto out_debugfs;
  3248. dbg->timeout_debugfs =
  3249. debugfs_create_file("timeout", 0600,
  3250. dbg->dir_debugfs,
  3251. &dev->delay_drop,
  3252. &fops_delay_drop_timeout);
  3253. if (!dbg->timeout_debugfs)
  3254. goto out_debugfs;
  3255. return 0;
  3256. out_debugfs:
  3257. delay_drop_debugfs_cleanup(dev);
  3258. return -ENOMEM;
  3259. }
  3260. static void init_delay_drop(struct mlx5_ib_dev *dev)
  3261. {
  3262. if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
  3263. return;
  3264. mutex_init(&dev->delay_drop.lock);
  3265. dev->delay_drop.dev = dev;
  3266. dev->delay_drop.activate = false;
  3267. dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
  3268. INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
  3269. atomic_set(&dev->delay_drop.rqs_cnt, 0);
  3270. atomic_set(&dev->delay_drop.events_cnt, 0);
  3271. if (delay_drop_debugfs_init(dev))
  3272. mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
  3273. }
  3274. static const struct cpumask *
  3275. mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
  3276. {
  3277. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  3278. return mlx5_get_vector_affinity(dev->mdev, comp_vector);
  3279. }
  3280. static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
  3281. {
  3282. struct mlx5_ib_dev *dev;
  3283. enum rdma_link_layer ll;
  3284. int port_type_cap;
  3285. const char *name;
  3286. int err;
  3287. int i;
  3288. port_type_cap = MLX5_CAP_GEN(mdev, port_type);
  3289. ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
  3290. printk_once(KERN_INFO "%s", mlx5_version);
  3291. dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
  3292. if (!dev)
  3293. return NULL;
  3294. dev->mdev = mdev;
  3295. dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
  3296. GFP_KERNEL);
  3297. if (!dev->port)
  3298. goto err_dealloc;
  3299. rwlock_init(&dev->roce.netdev_lock);
  3300. err = get_port_caps(dev);
  3301. if (err)
  3302. goto err_free_port;
  3303. if (mlx5_use_mad_ifc(dev))
  3304. get_ext_port_caps(dev);
  3305. if (!mlx5_lag_is_active(mdev))
  3306. name = "mlx5_%d";
  3307. else
  3308. name = "mlx5_bond_%d";
  3309. strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX);
  3310. dev->ib_dev.owner = THIS_MODULE;
  3311. dev->ib_dev.node_type = RDMA_NODE_IB_CA;
  3312. dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
  3313. dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
  3314. dev->ib_dev.phys_port_cnt = dev->num_ports;
  3315. dev->ib_dev.num_comp_vectors =
  3316. dev->mdev->priv.eq_table.num_comp_vectors;
  3317. dev->ib_dev.dev.parent = &mdev->pdev->dev;
  3318. dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
  3319. dev->ib_dev.uverbs_cmd_mask =
  3320. (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
  3321. (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
  3322. (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
  3323. (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
  3324. (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
  3325. (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
  3326. (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
  3327. (1ull << IB_USER_VERBS_CMD_REG_MR) |
  3328. (1ull << IB_USER_VERBS_CMD_REREG_MR) |
  3329. (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
  3330. (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
  3331. (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
  3332. (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
  3333. (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
  3334. (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
  3335. (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
  3336. (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
  3337. (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
  3338. (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
  3339. (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
  3340. (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
  3341. (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
  3342. (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
  3343. (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
  3344. (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
  3345. (1ull << IB_USER_VERBS_CMD_OPEN_QP);
  3346. dev->ib_dev.uverbs_ex_cmd_mask =
  3347. (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
  3348. (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
  3349. (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
  3350. (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP);
  3351. dev->ib_dev.query_device = mlx5_ib_query_device;
  3352. dev->ib_dev.query_port = mlx5_ib_query_port;
  3353. dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
  3354. if (ll == IB_LINK_LAYER_ETHERNET)
  3355. dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
  3356. dev->ib_dev.query_gid = mlx5_ib_query_gid;
  3357. dev->ib_dev.add_gid = mlx5_ib_add_gid;
  3358. dev->ib_dev.del_gid = mlx5_ib_del_gid;
  3359. dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
  3360. dev->ib_dev.modify_device = mlx5_ib_modify_device;
  3361. dev->ib_dev.modify_port = mlx5_ib_modify_port;
  3362. dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
  3363. dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
  3364. dev->ib_dev.mmap = mlx5_ib_mmap;
  3365. dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
  3366. dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
  3367. dev->ib_dev.create_ah = mlx5_ib_create_ah;
  3368. dev->ib_dev.query_ah = mlx5_ib_query_ah;
  3369. dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
  3370. dev->ib_dev.create_srq = mlx5_ib_create_srq;
  3371. dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
  3372. dev->ib_dev.query_srq = mlx5_ib_query_srq;
  3373. dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
  3374. dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
  3375. dev->ib_dev.create_qp = mlx5_ib_create_qp;
  3376. dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
  3377. dev->ib_dev.query_qp = mlx5_ib_query_qp;
  3378. dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
  3379. dev->ib_dev.post_send = mlx5_ib_post_send;
  3380. dev->ib_dev.post_recv = mlx5_ib_post_recv;
  3381. dev->ib_dev.create_cq = mlx5_ib_create_cq;
  3382. dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
  3383. dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
  3384. dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
  3385. dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
  3386. dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
  3387. dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
  3388. dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
  3389. dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr;
  3390. dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
  3391. dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
  3392. dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
  3393. dev->ib_dev.process_mad = mlx5_ib_process_mad;
  3394. dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
  3395. dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
  3396. dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
  3397. dev->ib_dev.get_port_immutable = mlx5_port_immutable;
  3398. dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
  3399. dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
  3400. if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
  3401. dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
  3402. if (mlx5_core_is_pf(mdev)) {
  3403. dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
  3404. dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
  3405. dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats;
  3406. dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
  3407. }
  3408. dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
  3409. mlx5_ib_internal_fill_odp_caps(dev);
  3410. dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
  3411. if (MLX5_CAP_GEN(mdev, imaicl)) {
  3412. dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
  3413. dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
  3414. dev->ib_dev.uverbs_cmd_mask |=
  3415. (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
  3416. (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
  3417. }
  3418. if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
  3419. dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
  3420. dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
  3421. }
  3422. if (MLX5_CAP_GEN(mdev, xrc)) {
  3423. dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
  3424. dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
  3425. dev->ib_dev.uverbs_cmd_mask |=
  3426. (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
  3427. (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
  3428. }
  3429. dev->ib_dev.create_flow = mlx5_ib_create_flow;
  3430. dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
  3431. dev->ib_dev.uverbs_ex_cmd_mask |=
  3432. (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
  3433. (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
  3434. if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
  3435. IB_LINK_LAYER_ETHERNET) {
  3436. dev->ib_dev.create_wq = mlx5_ib_create_wq;
  3437. dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
  3438. dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
  3439. dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
  3440. dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
  3441. dev->ib_dev.uverbs_ex_cmd_mask |=
  3442. (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
  3443. (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
  3444. (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
  3445. (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
  3446. (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
  3447. }
  3448. err = init_node_data(dev);
  3449. if (err)
  3450. goto err_free_port;
  3451. mutex_init(&dev->flow_db.lock);
  3452. mutex_init(&dev->cap_mask_mutex);
  3453. INIT_LIST_HEAD(&dev->qp_list);
  3454. spin_lock_init(&dev->reset_flow_resource_lock);
  3455. if (ll == IB_LINK_LAYER_ETHERNET) {
  3456. err = mlx5_enable_eth(dev);
  3457. if (err)
  3458. goto err_free_port;
  3459. dev->roce.last_port_state = IB_PORT_DOWN;
  3460. }
  3461. err = create_dev_resources(&dev->devr);
  3462. if (err)
  3463. goto err_disable_eth;
  3464. err = mlx5_ib_odp_init_one(dev);
  3465. if (err)
  3466. goto err_rsrc;
  3467. if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
  3468. err = mlx5_ib_alloc_counters(dev);
  3469. if (err)
  3470. goto err_odp;
  3471. }
  3472. err = mlx5_ib_init_cong_debugfs(dev);
  3473. if (err)
  3474. goto err_cnt;
  3475. dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
  3476. if (!dev->mdev->priv.uar)
  3477. goto err_cong;
  3478. err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
  3479. if (err)
  3480. goto err_uar_page;
  3481. err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
  3482. if (err)
  3483. goto err_bfreg;
  3484. err = ib_register_device(&dev->ib_dev, NULL);
  3485. if (err)
  3486. goto err_fp_bfreg;
  3487. err = create_umr_res(dev);
  3488. if (err)
  3489. goto err_dev;
  3490. init_delay_drop(dev);
  3491. for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
  3492. err = device_create_file(&dev->ib_dev.dev,
  3493. mlx5_class_attributes[i]);
  3494. if (err)
  3495. goto err_delay_drop;
  3496. }
  3497. if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
  3498. MLX5_CAP_GEN(mdev, disable_local_lb))
  3499. mutex_init(&dev->lb_mutex);
  3500. dev->ib_active = true;
  3501. return dev;
  3502. err_delay_drop:
  3503. cancel_delay_drop(dev);
  3504. destroy_umrc_res(dev);
  3505. err_dev:
  3506. ib_unregister_device(&dev->ib_dev);
  3507. err_fp_bfreg:
  3508. mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
  3509. err_bfreg:
  3510. mlx5_free_bfreg(dev->mdev, &dev->bfreg);
  3511. err_uar_page:
  3512. mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
  3513. err_cong:
  3514. mlx5_ib_cleanup_cong_debugfs(dev);
  3515. err_cnt:
  3516. if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
  3517. mlx5_ib_dealloc_counters(dev);
  3518. err_odp:
  3519. mlx5_ib_odp_remove_one(dev);
  3520. err_rsrc:
  3521. destroy_dev_resources(&dev->devr);
  3522. err_disable_eth:
  3523. if (ll == IB_LINK_LAYER_ETHERNET) {
  3524. mlx5_disable_eth(dev);
  3525. mlx5_remove_netdev_notifier(dev);
  3526. }
  3527. err_free_port:
  3528. kfree(dev->port);
  3529. err_dealloc:
  3530. ib_dealloc_device((struct ib_device *)dev);
  3531. return NULL;
  3532. }
  3533. static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
  3534. {
  3535. struct mlx5_ib_dev *dev = context;
  3536. enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
  3537. cancel_delay_drop(dev);
  3538. mlx5_remove_netdev_notifier(dev);
  3539. ib_unregister_device(&dev->ib_dev);
  3540. mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
  3541. mlx5_free_bfreg(dev->mdev, &dev->bfreg);
  3542. mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
  3543. mlx5_ib_cleanup_cong_debugfs(dev);
  3544. if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
  3545. mlx5_ib_dealloc_counters(dev);
  3546. destroy_umrc_res(dev);
  3547. mlx5_ib_odp_remove_one(dev);
  3548. destroy_dev_resources(&dev->devr);
  3549. if (ll == IB_LINK_LAYER_ETHERNET)
  3550. mlx5_disable_eth(dev);
  3551. kfree(dev->port);
  3552. ib_dealloc_device(&dev->ib_dev);
  3553. }
  3554. static struct mlx5_interface mlx5_ib_interface = {
  3555. .add = mlx5_ib_add,
  3556. .remove = mlx5_ib_remove,
  3557. .event = mlx5_ib_event,
  3558. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  3559. .pfault = mlx5_ib_pfault,
  3560. #endif
  3561. .protocol = MLX5_INTERFACE_PROTOCOL_IB,
  3562. };
  3563. static int __init mlx5_ib_init(void)
  3564. {
  3565. int err;
  3566. mlx5_ib_odp_init();
  3567. err = mlx5_register_interface(&mlx5_ib_interface);
  3568. return err;
  3569. }
  3570. static void __exit mlx5_ib_cleanup(void)
  3571. {
  3572. mlx5_unregister_interface(&mlx5_ib_interface);
  3573. }
  3574. module_init(mlx5_ib_init);
  3575. module_exit(mlx5_ib_cleanup);