main.c 114 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305
  1. /*
  2. * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/debugfs.h>
  33. #include <linux/highmem.h>
  34. #include <linux/module.h>
  35. #include <linux/init.h>
  36. #include <linux/errno.h>
  37. #include <linux/pci.h>
  38. #include <linux/dma-mapping.h>
  39. #include <linux/slab.h>
  40. #if defined(CONFIG_X86)
  41. #include <asm/pat.h>
  42. #endif
  43. #include <linux/sched.h>
  44. #include <linux/sched/mm.h>
  45. #include <linux/sched/task.h>
  46. #include <linux/delay.h>
  47. #include <rdma/ib_user_verbs.h>
  48. #include <rdma/ib_addr.h>
  49. #include <rdma/ib_cache.h>
  50. #include <linux/mlx5/port.h>
  51. #include <linux/mlx5/vport.h>
  52. #include <linux/list.h>
  53. #include <rdma/ib_smi.h>
  54. #include <rdma/ib_umem.h>
  55. #include <linux/in.h>
  56. #include <linux/etherdevice.h>
  57. #include <linux/mlx5/fs.h>
  58. #include <linux/mlx5/vport.h>
  59. #include "mlx5_ib.h"
  60. #include "cmd.h"
  61. #include <linux/mlx5/vport.h>
  62. #define DRIVER_NAME "mlx5_ib"
  63. #define DRIVER_VERSION "5.0-0"
  64. MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
  65. MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
  66. MODULE_LICENSE("Dual BSD/GPL");
  67. static char mlx5_version[] =
  68. DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
  69. DRIVER_VERSION "\n";
  70. enum {
  71. MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
  72. };
  73. static enum rdma_link_layer
  74. mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
  75. {
  76. switch (port_type_cap) {
  77. case MLX5_CAP_PORT_TYPE_IB:
  78. return IB_LINK_LAYER_INFINIBAND;
  79. case MLX5_CAP_PORT_TYPE_ETH:
  80. return IB_LINK_LAYER_ETHERNET;
  81. default:
  82. return IB_LINK_LAYER_UNSPECIFIED;
  83. }
  84. }
  85. static enum rdma_link_layer
  86. mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
  87. {
  88. struct mlx5_ib_dev *dev = to_mdev(device);
  89. int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
  90. return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
  91. }
  92. static int get_port_state(struct ib_device *ibdev,
  93. u8 port_num,
  94. enum ib_port_state *state)
  95. {
  96. struct ib_port_attr attr;
  97. int ret;
  98. memset(&attr, 0, sizeof(attr));
  99. ret = mlx5_ib_query_port(ibdev, port_num, &attr);
  100. if (!ret)
  101. *state = attr.state;
  102. return ret;
  103. }
  104. static int mlx5_netdev_event(struct notifier_block *this,
  105. unsigned long event, void *ptr)
  106. {
  107. struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
  108. struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
  109. roce.nb);
  110. switch (event) {
  111. case NETDEV_REGISTER:
  112. case NETDEV_UNREGISTER:
  113. write_lock(&ibdev->roce.netdev_lock);
  114. if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
  115. ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ?
  116. NULL : ndev;
  117. write_unlock(&ibdev->roce.netdev_lock);
  118. break;
  119. case NETDEV_CHANGE:
  120. case NETDEV_UP:
  121. case NETDEV_DOWN: {
  122. struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
  123. struct net_device *upper = NULL;
  124. if (lag_ndev) {
  125. upper = netdev_master_upper_dev_get(lag_ndev);
  126. dev_put(lag_ndev);
  127. }
  128. if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
  129. && ibdev->ib_active) {
  130. struct ib_event ibev = { };
  131. enum ib_port_state port_state;
  132. if (get_port_state(&ibdev->ib_dev, 1, &port_state))
  133. return NOTIFY_DONE;
  134. if (ibdev->roce.last_port_state == port_state)
  135. return NOTIFY_DONE;
  136. ibdev->roce.last_port_state = port_state;
  137. ibev.device = &ibdev->ib_dev;
  138. if (port_state == IB_PORT_DOWN)
  139. ibev.event = IB_EVENT_PORT_ERR;
  140. else if (port_state == IB_PORT_ACTIVE)
  141. ibev.event = IB_EVENT_PORT_ACTIVE;
  142. else
  143. return NOTIFY_DONE;
  144. ibev.element.port_num = 1;
  145. ib_dispatch_event(&ibev);
  146. }
  147. break;
  148. }
  149. default:
  150. break;
  151. }
  152. return NOTIFY_DONE;
  153. }
  154. static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
  155. u8 port_num)
  156. {
  157. struct mlx5_ib_dev *ibdev = to_mdev(device);
  158. struct net_device *ndev;
  159. ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
  160. if (ndev)
  161. return ndev;
  162. /* Ensure ndev does not disappear before we invoke dev_hold()
  163. */
  164. read_lock(&ibdev->roce.netdev_lock);
  165. ndev = ibdev->roce.netdev;
  166. if (ndev)
  167. dev_hold(ndev);
  168. read_unlock(&ibdev->roce.netdev_lock);
  169. return ndev;
  170. }
  171. static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
  172. u8 *active_width)
  173. {
  174. switch (eth_proto_oper) {
  175. case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
  176. case MLX5E_PROT_MASK(MLX5E_1000BASE_KX):
  177. case MLX5E_PROT_MASK(MLX5E_100BASE_TX):
  178. case MLX5E_PROT_MASK(MLX5E_1000BASE_T):
  179. *active_width = IB_WIDTH_1X;
  180. *active_speed = IB_SPEED_SDR;
  181. break;
  182. case MLX5E_PROT_MASK(MLX5E_10GBASE_T):
  183. case MLX5E_PROT_MASK(MLX5E_10GBASE_CX4):
  184. case MLX5E_PROT_MASK(MLX5E_10GBASE_KX4):
  185. case MLX5E_PROT_MASK(MLX5E_10GBASE_KR):
  186. case MLX5E_PROT_MASK(MLX5E_10GBASE_CR):
  187. case MLX5E_PROT_MASK(MLX5E_10GBASE_SR):
  188. case MLX5E_PROT_MASK(MLX5E_10GBASE_ER):
  189. *active_width = IB_WIDTH_1X;
  190. *active_speed = IB_SPEED_QDR;
  191. break;
  192. case MLX5E_PROT_MASK(MLX5E_25GBASE_CR):
  193. case MLX5E_PROT_MASK(MLX5E_25GBASE_KR):
  194. case MLX5E_PROT_MASK(MLX5E_25GBASE_SR):
  195. *active_width = IB_WIDTH_1X;
  196. *active_speed = IB_SPEED_EDR;
  197. break;
  198. case MLX5E_PROT_MASK(MLX5E_40GBASE_CR4):
  199. case MLX5E_PROT_MASK(MLX5E_40GBASE_KR4):
  200. case MLX5E_PROT_MASK(MLX5E_40GBASE_SR4):
  201. case MLX5E_PROT_MASK(MLX5E_40GBASE_LR4):
  202. *active_width = IB_WIDTH_4X;
  203. *active_speed = IB_SPEED_QDR;
  204. break;
  205. case MLX5E_PROT_MASK(MLX5E_50GBASE_CR2):
  206. case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2):
  207. case MLX5E_PROT_MASK(MLX5E_50GBASE_SR2):
  208. *active_width = IB_WIDTH_1X;
  209. *active_speed = IB_SPEED_HDR;
  210. break;
  211. case MLX5E_PROT_MASK(MLX5E_56GBASE_R4):
  212. *active_width = IB_WIDTH_4X;
  213. *active_speed = IB_SPEED_FDR;
  214. break;
  215. case MLX5E_PROT_MASK(MLX5E_100GBASE_CR4):
  216. case MLX5E_PROT_MASK(MLX5E_100GBASE_SR4):
  217. case MLX5E_PROT_MASK(MLX5E_100GBASE_KR4):
  218. case MLX5E_PROT_MASK(MLX5E_100GBASE_LR4):
  219. *active_width = IB_WIDTH_4X;
  220. *active_speed = IB_SPEED_EDR;
  221. break;
  222. default:
  223. return -EINVAL;
  224. }
  225. return 0;
  226. }
  227. static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
  228. struct ib_port_attr *props)
  229. {
  230. struct mlx5_ib_dev *dev = to_mdev(device);
  231. struct mlx5_core_dev *mdev = dev->mdev;
  232. struct net_device *ndev, *upper;
  233. enum ib_mtu ndev_ib_mtu;
  234. u16 qkey_viol_cntr;
  235. u32 eth_prot_oper;
  236. int err;
  237. /* Possible bad flows are checked before filling out props so in case
  238. * of an error it will still be zeroed out.
  239. */
  240. err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num);
  241. if (err)
  242. return err;
  243. translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
  244. &props->active_width);
  245. props->port_cap_flags |= IB_PORT_CM_SUP;
  246. props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
  247. props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
  248. roce_address_table_size);
  249. props->max_mtu = IB_MTU_4096;
  250. props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
  251. props->pkey_tbl_len = 1;
  252. props->state = IB_PORT_DOWN;
  253. props->phys_state = 3;
  254. mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
  255. props->qkey_viol_cntr = qkey_viol_cntr;
  256. ndev = mlx5_ib_get_netdev(device, port_num);
  257. if (!ndev)
  258. return 0;
  259. if (mlx5_lag_is_active(dev->mdev)) {
  260. rcu_read_lock();
  261. upper = netdev_master_upper_dev_get_rcu(ndev);
  262. if (upper) {
  263. dev_put(ndev);
  264. ndev = upper;
  265. dev_hold(ndev);
  266. }
  267. rcu_read_unlock();
  268. }
  269. if (netif_running(ndev) && netif_carrier_ok(ndev)) {
  270. props->state = IB_PORT_ACTIVE;
  271. props->phys_state = 5;
  272. }
  273. ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
  274. dev_put(ndev);
  275. props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
  276. return 0;
  277. }
  278. static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
  279. unsigned int index, const union ib_gid *gid,
  280. const struct ib_gid_attr *attr)
  281. {
  282. enum ib_gid_type gid_type = IB_GID_TYPE_IB;
  283. u8 roce_version = 0;
  284. u8 roce_l3_type = 0;
  285. bool vlan = false;
  286. u8 mac[ETH_ALEN];
  287. u16 vlan_id = 0;
  288. if (gid) {
  289. gid_type = attr->gid_type;
  290. ether_addr_copy(mac, attr->ndev->dev_addr);
  291. if (is_vlan_dev(attr->ndev)) {
  292. vlan = true;
  293. vlan_id = vlan_dev_vlan_id(attr->ndev);
  294. }
  295. }
  296. switch (gid_type) {
  297. case IB_GID_TYPE_IB:
  298. roce_version = MLX5_ROCE_VERSION_1;
  299. break;
  300. case IB_GID_TYPE_ROCE_UDP_ENCAP:
  301. roce_version = MLX5_ROCE_VERSION_2;
  302. if (ipv6_addr_v4mapped((void *)gid))
  303. roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4;
  304. else
  305. roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6;
  306. break;
  307. default:
  308. mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type);
  309. }
  310. return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
  311. roce_l3_type, gid->raw, mac, vlan,
  312. vlan_id);
  313. }
  314. static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
  315. unsigned int index, const union ib_gid *gid,
  316. const struct ib_gid_attr *attr,
  317. __always_unused void **context)
  318. {
  319. return set_roce_addr(to_mdev(device), port_num, index, gid, attr);
  320. }
  321. static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
  322. unsigned int index, __always_unused void **context)
  323. {
  324. return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL);
  325. }
  326. __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
  327. int index)
  328. {
  329. struct ib_gid_attr attr;
  330. union ib_gid gid;
  331. if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
  332. return 0;
  333. if (!attr.ndev)
  334. return 0;
  335. dev_put(attr.ndev);
  336. if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
  337. return 0;
  338. return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
  339. }
  340. int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
  341. int index, enum ib_gid_type *gid_type)
  342. {
  343. struct ib_gid_attr attr;
  344. union ib_gid gid;
  345. int ret;
  346. ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
  347. if (ret)
  348. return ret;
  349. if (!attr.ndev)
  350. return -ENODEV;
  351. dev_put(attr.ndev);
  352. *gid_type = attr.gid_type;
  353. return 0;
  354. }
  355. static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
  356. {
  357. if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
  358. return !MLX5_CAP_GEN(dev->mdev, ib_virt);
  359. return 0;
  360. }
  361. enum {
  362. MLX5_VPORT_ACCESS_METHOD_MAD,
  363. MLX5_VPORT_ACCESS_METHOD_HCA,
  364. MLX5_VPORT_ACCESS_METHOD_NIC,
  365. };
  366. static int mlx5_get_vport_access_method(struct ib_device *ibdev)
  367. {
  368. if (mlx5_use_mad_ifc(to_mdev(ibdev)))
  369. return MLX5_VPORT_ACCESS_METHOD_MAD;
  370. if (mlx5_ib_port_link_layer(ibdev, 1) ==
  371. IB_LINK_LAYER_ETHERNET)
  372. return MLX5_VPORT_ACCESS_METHOD_NIC;
  373. return MLX5_VPORT_ACCESS_METHOD_HCA;
  374. }
  375. static void get_atomic_caps(struct mlx5_ib_dev *dev,
  376. struct ib_device_attr *props)
  377. {
  378. u8 tmp;
  379. u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
  380. u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
  381. u8 atomic_req_8B_endianness_mode =
  382. MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianness_mode);
  383. /* Check if HW supports 8 bytes standard atomic operations and capable
  384. * of host endianness respond
  385. */
  386. tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
  387. if (((atomic_operations & tmp) == tmp) &&
  388. (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
  389. (atomic_req_8B_endianness_mode)) {
  390. props->atomic_cap = IB_ATOMIC_HCA;
  391. } else {
  392. props->atomic_cap = IB_ATOMIC_NONE;
  393. }
  394. }
  395. static int mlx5_query_system_image_guid(struct ib_device *ibdev,
  396. __be64 *sys_image_guid)
  397. {
  398. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  399. struct mlx5_core_dev *mdev = dev->mdev;
  400. u64 tmp;
  401. int err;
  402. switch (mlx5_get_vport_access_method(ibdev)) {
  403. case MLX5_VPORT_ACCESS_METHOD_MAD:
  404. return mlx5_query_mad_ifc_system_image_guid(ibdev,
  405. sys_image_guid);
  406. case MLX5_VPORT_ACCESS_METHOD_HCA:
  407. err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
  408. break;
  409. case MLX5_VPORT_ACCESS_METHOD_NIC:
  410. err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
  411. break;
  412. default:
  413. return -EINVAL;
  414. }
  415. if (!err)
  416. *sys_image_guid = cpu_to_be64(tmp);
  417. return err;
  418. }
  419. static int mlx5_query_max_pkeys(struct ib_device *ibdev,
  420. u16 *max_pkeys)
  421. {
  422. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  423. struct mlx5_core_dev *mdev = dev->mdev;
  424. switch (mlx5_get_vport_access_method(ibdev)) {
  425. case MLX5_VPORT_ACCESS_METHOD_MAD:
  426. return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys);
  427. case MLX5_VPORT_ACCESS_METHOD_HCA:
  428. case MLX5_VPORT_ACCESS_METHOD_NIC:
  429. *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
  430. pkey_table_size));
  431. return 0;
  432. default:
  433. return -EINVAL;
  434. }
  435. }
  436. static int mlx5_query_vendor_id(struct ib_device *ibdev,
  437. u32 *vendor_id)
  438. {
  439. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  440. switch (mlx5_get_vport_access_method(ibdev)) {
  441. case MLX5_VPORT_ACCESS_METHOD_MAD:
  442. return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id);
  443. case MLX5_VPORT_ACCESS_METHOD_HCA:
  444. case MLX5_VPORT_ACCESS_METHOD_NIC:
  445. return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
  446. default:
  447. return -EINVAL;
  448. }
  449. }
  450. static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
  451. __be64 *node_guid)
  452. {
  453. u64 tmp;
  454. int err;
  455. switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
  456. case MLX5_VPORT_ACCESS_METHOD_MAD:
  457. return mlx5_query_mad_ifc_node_guid(dev, node_guid);
  458. case MLX5_VPORT_ACCESS_METHOD_HCA:
  459. err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
  460. break;
  461. case MLX5_VPORT_ACCESS_METHOD_NIC:
  462. err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
  463. break;
  464. default:
  465. return -EINVAL;
  466. }
  467. if (!err)
  468. *node_guid = cpu_to_be64(tmp);
  469. return err;
  470. }
  471. struct mlx5_reg_node_desc {
  472. u8 desc[IB_DEVICE_NODE_DESC_MAX];
  473. };
  474. static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
  475. {
  476. struct mlx5_reg_node_desc in;
  477. if (mlx5_use_mad_ifc(dev))
  478. return mlx5_query_mad_ifc_node_desc(dev, node_desc);
  479. memset(&in, 0, sizeof(in));
  480. return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
  481. sizeof(struct mlx5_reg_node_desc),
  482. MLX5_REG_NODE_DESC, 0, 0);
  483. }
  484. static int mlx5_ib_query_device(struct ib_device *ibdev,
  485. struct ib_device_attr *props,
  486. struct ib_udata *uhw)
  487. {
  488. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  489. struct mlx5_core_dev *mdev = dev->mdev;
  490. int err = -ENOMEM;
  491. int max_sq_desc;
  492. int max_rq_sg;
  493. int max_sq_sg;
  494. u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
  495. struct mlx5_ib_query_device_resp resp = {};
  496. size_t resp_len;
  497. u64 max_tso;
  498. resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
  499. if (uhw->outlen && uhw->outlen < resp_len)
  500. return -EINVAL;
  501. else
  502. resp.response_length = resp_len;
  503. if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
  504. return -EINVAL;
  505. memset(props, 0, sizeof(*props));
  506. err = mlx5_query_system_image_guid(ibdev,
  507. &props->sys_image_guid);
  508. if (err)
  509. return err;
  510. err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
  511. if (err)
  512. return err;
  513. err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
  514. if (err)
  515. return err;
  516. props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
  517. (fw_rev_min(dev->mdev) << 16) |
  518. fw_rev_sub(dev->mdev);
  519. props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
  520. IB_DEVICE_PORT_ACTIVE_EVENT |
  521. IB_DEVICE_SYS_IMAGE_GUID |
  522. IB_DEVICE_RC_RNR_NAK_GEN;
  523. if (MLX5_CAP_GEN(mdev, pkv))
  524. props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
  525. if (MLX5_CAP_GEN(mdev, qkv))
  526. props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
  527. if (MLX5_CAP_GEN(mdev, apm))
  528. props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
  529. if (MLX5_CAP_GEN(mdev, xrc))
  530. props->device_cap_flags |= IB_DEVICE_XRC;
  531. if (MLX5_CAP_GEN(mdev, imaicl)) {
  532. props->device_cap_flags |= IB_DEVICE_MEM_WINDOW |
  533. IB_DEVICE_MEM_WINDOW_TYPE_2B;
  534. props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
  535. /* We support 'Gappy' memory registration too */
  536. props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
  537. }
  538. props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
  539. if (MLX5_CAP_GEN(mdev, sho)) {
  540. props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
  541. /* At this stage no support for signature handover */
  542. props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
  543. IB_PROT_T10DIF_TYPE_2 |
  544. IB_PROT_T10DIF_TYPE_3;
  545. props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
  546. IB_GUARD_T10DIF_CSUM;
  547. }
  548. if (MLX5_CAP_GEN(mdev, block_lb_mc))
  549. props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
  550. if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
  551. if (MLX5_CAP_ETH(mdev, csum_cap)) {
  552. /* Legacy bit to support old userspace libraries */
  553. props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
  554. props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
  555. }
  556. if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
  557. props->raw_packet_caps |=
  558. IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
  559. if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
  560. max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
  561. if (max_tso) {
  562. resp.tso_caps.max_tso = 1 << max_tso;
  563. resp.tso_caps.supported_qpts |=
  564. 1 << IB_QPT_RAW_PACKET;
  565. resp.response_length += sizeof(resp.tso_caps);
  566. }
  567. }
  568. if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
  569. resp.rss_caps.rx_hash_function =
  570. MLX5_RX_HASH_FUNC_TOEPLITZ;
  571. resp.rss_caps.rx_hash_fields_mask =
  572. MLX5_RX_HASH_SRC_IPV4 |
  573. MLX5_RX_HASH_DST_IPV4 |
  574. MLX5_RX_HASH_SRC_IPV6 |
  575. MLX5_RX_HASH_DST_IPV6 |
  576. MLX5_RX_HASH_SRC_PORT_TCP |
  577. MLX5_RX_HASH_DST_PORT_TCP |
  578. MLX5_RX_HASH_SRC_PORT_UDP |
  579. MLX5_RX_HASH_DST_PORT_UDP;
  580. resp.response_length += sizeof(resp.rss_caps);
  581. }
  582. } else {
  583. if (field_avail(typeof(resp), tso_caps, uhw->outlen))
  584. resp.response_length += sizeof(resp.tso_caps);
  585. if (field_avail(typeof(resp), rss_caps, uhw->outlen))
  586. resp.response_length += sizeof(resp.rss_caps);
  587. }
  588. if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
  589. props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
  590. props->device_cap_flags |= IB_DEVICE_UD_TSO;
  591. }
  592. if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
  593. MLX5_CAP_GEN(dev->mdev, general_notification_event))
  594. props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP;
  595. if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
  596. MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap))
  597. props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
  598. if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
  599. MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
  600. /* Legacy bit to support old userspace libraries */
  601. props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
  602. props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
  603. }
  604. if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
  605. props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
  606. if (MLX5_CAP_GEN(mdev, end_pad))
  607. props->device_cap_flags |= IB_DEVICE_PCI_WRITE_END_PADDING;
  608. props->vendor_part_id = mdev->pdev->device;
  609. props->hw_ver = mdev->pdev->revision;
  610. props->max_mr_size = ~0ull;
  611. props->page_size_cap = ~(min_page_size - 1);
  612. props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
  613. props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
  614. max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
  615. sizeof(struct mlx5_wqe_data_seg);
  616. max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
  617. max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
  618. sizeof(struct mlx5_wqe_raddr_seg)) /
  619. sizeof(struct mlx5_wqe_data_seg);
  620. props->max_sge = min(max_rq_sg, max_sq_sg);
  621. props->max_sge_rd = MLX5_MAX_SGE_RD;
  622. props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
  623. props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
  624. props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
  625. props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
  626. props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
  627. props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
  628. props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
  629. props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
  630. props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
  631. props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
  632. props->max_srq_sge = max_rq_sg - 1;
  633. props->max_fast_reg_page_list_len =
  634. 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
  635. get_atomic_caps(dev, props);
  636. props->masked_atomic_cap = IB_ATOMIC_NONE;
  637. props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
  638. props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
  639. props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
  640. props->max_mcast_grp;
  641. props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
  642. props->max_ah = INT_MAX;
  643. props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
  644. props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
  645. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  646. if (MLX5_CAP_GEN(mdev, pg))
  647. props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
  648. props->odp_caps = dev->odp_caps;
  649. #endif
  650. if (MLX5_CAP_GEN(mdev, cd))
  651. props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
  652. if (!mlx5_core_is_pf(mdev))
  653. props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
  654. if (mlx5_ib_port_link_layer(ibdev, 1) ==
  655. IB_LINK_LAYER_ETHERNET) {
  656. props->rss_caps.max_rwq_indirection_tables =
  657. 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt);
  658. props->rss_caps.max_rwq_indirection_table_size =
  659. 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size);
  660. props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
  661. props->max_wq_type_rq =
  662. 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
  663. }
  664. if (MLX5_CAP_GEN(mdev, tag_matching)) {
  665. props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
  666. props->tm_caps.max_num_tags =
  667. (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
  668. props->tm_caps.flags = IB_TM_CAP_RC;
  669. props->tm_caps.max_ops =
  670. 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
  671. props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
  672. }
  673. if (MLX5_CAP_GEN(dev->mdev, cq_moderation)) {
  674. props->cq_caps.max_cq_moderation_count =
  675. MLX5_MAX_CQ_COUNT;
  676. props->cq_caps.max_cq_moderation_period =
  677. MLX5_MAX_CQ_PERIOD;
  678. }
  679. if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
  680. resp.cqe_comp_caps.max_num =
  681. MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
  682. MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0;
  683. resp.cqe_comp_caps.supported_format =
  684. MLX5_IB_CQE_RES_FORMAT_HASH |
  685. MLX5_IB_CQE_RES_FORMAT_CSUM;
  686. resp.response_length += sizeof(resp.cqe_comp_caps);
  687. }
  688. if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) {
  689. if (MLX5_CAP_QOS(mdev, packet_pacing) &&
  690. MLX5_CAP_GEN(mdev, qos)) {
  691. resp.packet_pacing_caps.qp_rate_limit_max =
  692. MLX5_CAP_QOS(mdev, packet_pacing_max_rate);
  693. resp.packet_pacing_caps.qp_rate_limit_min =
  694. MLX5_CAP_QOS(mdev, packet_pacing_min_rate);
  695. resp.packet_pacing_caps.supported_qpts |=
  696. 1 << IB_QPT_RAW_PACKET;
  697. }
  698. resp.response_length += sizeof(resp.packet_pacing_caps);
  699. }
  700. if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
  701. uhw->outlen)) {
  702. if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe))
  703. resp.mlx5_ib_support_multi_pkt_send_wqes =
  704. MLX5_IB_ALLOW_MPW;
  705. if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
  706. resp.mlx5_ib_support_multi_pkt_send_wqes |=
  707. MLX5_IB_SUPPORT_EMPW;
  708. resp.response_length +=
  709. sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
  710. }
  711. if (field_avail(typeof(resp), flags, uhw->outlen)) {
  712. resp.response_length += sizeof(resp.flags);
  713. if (MLX5_CAP_GEN(mdev, cqe_compression_128))
  714. resp.flags |=
  715. MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP;
  716. if (MLX5_CAP_GEN(mdev, cqe_128_always))
  717. resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD;
  718. }
  719. if (field_avail(typeof(resp), sw_parsing_caps,
  720. uhw->outlen)) {
  721. resp.response_length += sizeof(resp.sw_parsing_caps);
  722. if (MLX5_CAP_ETH(mdev, swp)) {
  723. resp.sw_parsing_caps.sw_parsing_offloads |=
  724. MLX5_IB_SW_PARSING;
  725. if (MLX5_CAP_ETH(mdev, swp_csum))
  726. resp.sw_parsing_caps.sw_parsing_offloads |=
  727. MLX5_IB_SW_PARSING_CSUM;
  728. if (MLX5_CAP_ETH(mdev, swp_lso))
  729. resp.sw_parsing_caps.sw_parsing_offloads |=
  730. MLX5_IB_SW_PARSING_LSO;
  731. if (resp.sw_parsing_caps.sw_parsing_offloads)
  732. resp.sw_parsing_caps.supported_qpts =
  733. BIT(IB_QPT_RAW_PACKET);
  734. }
  735. }
  736. if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen)) {
  737. resp.response_length += sizeof(resp.striding_rq_caps);
  738. if (MLX5_CAP_GEN(mdev, striding_rq)) {
  739. resp.striding_rq_caps.min_single_stride_log_num_of_bytes =
  740. MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
  741. resp.striding_rq_caps.max_single_stride_log_num_of_bytes =
  742. MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES;
  743. resp.striding_rq_caps.min_single_wqe_log_num_of_strides =
  744. MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
  745. resp.striding_rq_caps.max_single_wqe_log_num_of_strides =
  746. MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES;
  747. resp.striding_rq_caps.supported_qpts =
  748. BIT(IB_QPT_RAW_PACKET);
  749. }
  750. }
  751. if (field_avail(typeof(resp), tunnel_offloads_caps,
  752. uhw->outlen)) {
  753. resp.response_length += sizeof(resp.tunnel_offloads_caps);
  754. if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan))
  755. resp.tunnel_offloads_caps |=
  756. MLX5_IB_TUNNELED_OFFLOADS_VXLAN;
  757. if (MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx))
  758. resp.tunnel_offloads_caps |=
  759. MLX5_IB_TUNNELED_OFFLOADS_GENEVE;
  760. if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre))
  761. resp.tunnel_offloads_caps |=
  762. MLX5_IB_TUNNELED_OFFLOADS_GRE;
  763. }
  764. if (uhw->outlen) {
  765. err = ib_copy_to_udata(uhw, &resp, resp.response_length);
  766. if (err)
  767. return err;
  768. }
  769. return 0;
  770. }
  771. enum mlx5_ib_width {
  772. MLX5_IB_WIDTH_1X = 1 << 0,
  773. MLX5_IB_WIDTH_2X = 1 << 1,
  774. MLX5_IB_WIDTH_4X = 1 << 2,
  775. MLX5_IB_WIDTH_8X = 1 << 3,
  776. MLX5_IB_WIDTH_12X = 1 << 4
  777. };
  778. static int translate_active_width(struct ib_device *ibdev, u8 active_width,
  779. u8 *ib_width)
  780. {
  781. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  782. int err = 0;
  783. if (active_width & MLX5_IB_WIDTH_1X) {
  784. *ib_width = IB_WIDTH_1X;
  785. } else if (active_width & MLX5_IB_WIDTH_2X) {
  786. mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n",
  787. (int)active_width);
  788. err = -EINVAL;
  789. } else if (active_width & MLX5_IB_WIDTH_4X) {
  790. *ib_width = IB_WIDTH_4X;
  791. } else if (active_width & MLX5_IB_WIDTH_8X) {
  792. *ib_width = IB_WIDTH_8X;
  793. } else if (active_width & MLX5_IB_WIDTH_12X) {
  794. *ib_width = IB_WIDTH_12X;
  795. } else {
  796. mlx5_ib_dbg(dev, "Invalid active_width %d\n",
  797. (int)active_width);
  798. err = -EINVAL;
  799. }
  800. return err;
  801. }
  802. static int mlx5_mtu_to_ib_mtu(int mtu)
  803. {
  804. switch (mtu) {
  805. case 256: return 1;
  806. case 512: return 2;
  807. case 1024: return 3;
  808. case 2048: return 4;
  809. case 4096: return 5;
  810. default:
  811. pr_warn("invalid mtu\n");
  812. return -1;
  813. }
  814. }
  815. enum ib_max_vl_num {
  816. __IB_MAX_VL_0 = 1,
  817. __IB_MAX_VL_0_1 = 2,
  818. __IB_MAX_VL_0_3 = 3,
  819. __IB_MAX_VL_0_7 = 4,
  820. __IB_MAX_VL_0_14 = 5,
  821. };
  822. enum mlx5_vl_hw_cap {
  823. MLX5_VL_HW_0 = 1,
  824. MLX5_VL_HW_0_1 = 2,
  825. MLX5_VL_HW_0_2 = 3,
  826. MLX5_VL_HW_0_3 = 4,
  827. MLX5_VL_HW_0_4 = 5,
  828. MLX5_VL_HW_0_5 = 6,
  829. MLX5_VL_HW_0_6 = 7,
  830. MLX5_VL_HW_0_7 = 8,
  831. MLX5_VL_HW_0_14 = 15
  832. };
  833. static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
  834. u8 *max_vl_num)
  835. {
  836. switch (vl_hw_cap) {
  837. case MLX5_VL_HW_0:
  838. *max_vl_num = __IB_MAX_VL_0;
  839. break;
  840. case MLX5_VL_HW_0_1:
  841. *max_vl_num = __IB_MAX_VL_0_1;
  842. break;
  843. case MLX5_VL_HW_0_3:
  844. *max_vl_num = __IB_MAX_VL_0_3;
  845. break;
  846. case MLX5_VL_HW_0_7:
  847. *max_vl_num = __IB_MAX_VL_0_7;
  848. break;
  849. case MLX5_VL_HW_0_14:
  850. *max_vl_num = __IB_MAX_VL_0_14;
  851. break;
  852. default:
  853. return -EINVAL;
  854. }
  855. return 0;
  856. }
  857. static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
  858. struct ib_port_attr *props)
  859. {
  860. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  861. struct mlx5_core_dev *mdev = dev->mdev;
  862. struct mlx5_hca_vport_context *rep;
  863. u16 max_mtu;
  864. u16 oper_mtu;
  865. int err;
  866. u8 ib_link_width_oper;
  867. u8 vl_hw_cap;
  868. rep = kzalloc(sizeof(*rep), GFP_KERNEL);
  869. if (!rep) {
  870. err = -ENOMEM;
  871. goto out;
  872. }
  873. /* props being zeroed by the caller, avoid zeroing it here */
  874. err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
  875. if (err)
  876. goto out;
  877. props->lid = rep->lid;
  878. props->lmc = rep->lmc;
  879. props->sm_lid = rep->sm_lid;
  880. props->sm_sl = rep->sm_sl;
  881. props->state = rep->vport_state;
  882. props->phys_state = rep->port_physical_state;
  883. props->port_cap_flags = rep->cap_mask1;
  884. props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
  885. props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
  886. props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
  887. props->bad_pkey_cntr = rep->pkey_violation_counter;
  888. props->qkey_viol_cntr = rep->qkey_violation_counter;
  889. props->subnet_timeout = rep->subnet_timeout;
  890. props->init_type_reply = rep->init_type_reply;
  891. props->grh_required = rep->grh_required;
  892. err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
  893. if (err)
  894. goto out;
  895. err = translate_active_width(ibdev, ib_link_width_oper,
  896. &props->active_width);
  897. if (err)
  898. goto out;
  899. err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
  900. if (err)
  901. goto out;
  902. mlx5_query_port_max_mtu(mdev, &max_mtu, port);
  903. props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu);
  904. mlx5_query_port_oper_mtu(mdev, &oper_mtu, port);
  905. props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu);
  906. err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port);
  907. if (err)
  908. goto out;
  909. err = translate_max_vl_num(ibdev, vl_hw_cap,
  910. &props->max_vl_num);
  911. out:
  912. kfree(rep);
  913. return err;
  914. }
  915. int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
  916. struct ib_port_attr *props)
  917. {
  918. unsigned int count;
  919. int ret;
  920. switch (mlx5_get_vport_access_method(ibdev)) {
  921. case MLX5_VPORT_ACCESS_METHOD_MAD:
  922. ret = mlx5_query_mad_ifc_port(ibdev, port, props);
  923. break;
  924. case MLX5_VPORT_ACCESS_METHOD_HCA:
  925. ret = mlx5_query_hca_port(ibdev, port, props);
  926. break;
  927. case MLX5_VPORT_ACCESS_METHOD_NIC:
  928. ret = mlx5_query_port_roce(ibdev, port, props);
  929. break;
  930. default:
  931. ret = -EINVAL;
  932. }
  933. if (!ret && props) {
  934. count = mlx5_core_reserved_gids_count(to_mdev(ibdev)->mdev);
  935. props->gid_tbl_len -= count;
  936. }
  937. return ret;
  938. }
  939. static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
  940. union ib_gid *gid)
  941. {
  942. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  943. struct mlx5_core_dev *mdev = dev->mdev;
  944. switch (mlx5_get_vport_access_method(ibdev)) {
  945. case MLX5_VPORT_ACCESS_METHOD_MAD:
  946. return mlx5_query_mad_ifc_gids(ibdev, port, index, gid);
  947. case MLX5_VPORT_ACCESS_METHOD_HCA:
  948. return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid);
  949. default:
  950. return -EINVAL;
  951. }
  952. }
  953. static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
  954. u16 *pkey)
  955. {
  956. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  957. struct mlx5_core_dev *mdev = dev->mdev;
  958. switch (mlx5_get_vport_access_method(ibdev)) {
  959. case MLX5_VPORT_ACCESS_METHOD_MAD:
  960. return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey);
  961. case MLX5_VPORT_ACCESS_METHOD_HCA:
  962. case MLX5_VPORT_ACCESS_METHOD_NIC:
  963. return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
  964. pkey);
  965. default:
  966. return -EINVAL;
  967. }
  968. }
  969. static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
  970. struct ib_device_modify *props)
  971. {
  972. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  973. struct mlx5_reg_node_desc in;
  974. struct mlx5_reg_node_desc out;
  975. int err;
  976. if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
  977. return -EOPNOTSUPP;
  978. if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
  979. return 0;
  980. /*
  981. * If possible, pass node desc to FW, so it can generate
  982. * a 144 trap. If cmd fails, just ignore.
  983. */
  984. memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
  985. err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
  986. sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
  987. if (err)
  988. return err;
  989. memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
  990. return err;
  991. }
  992. static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
  993. u32 value)
  994. {
  995. struct mlx5_hca_vport_context ctx = {};
  996. int err;
  997. err = mlx5_query_hca_vport_context(dev->mdev, 0,
  998. port_num, 0, &ctx);
  999. if (err)
  1000. return err;
  1001. if (~ctx.cap_mask1_perm & mask) {
  1002. mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
  1003. mask, ctx.cap_mask1_perm);
  1004. return -EINVAL;
  1005. }
  1006. ctx.cap_mask1 = value;
  1007. ctx.cap_mask1_perm = mask;
  1008. err = mlx5_core_modify_hca_vport_context(dev->mdev, 0,
  1009. port_num, 0, &ctx);
  1010. return err;
  1011. }
  1012. static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
  1013. struct ib_port_modify *props)
  1014. {
  1015. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  1016. struct ib_port_attr attr;
  1017. u32 tmp;
  1018. int err;
  1019. u32 change_mask;
  1020. u32 value;
  1021. bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
  1022. IB_LINK_LAYER_INFINIBAND);
  1023. /* CM layer calls ib_modify_port() regardless of the link layer. For
  1024. * Ethernet ports, qkey violation and Port capabilities are meaningless.
  1025. */
  1026. if (!is_ib)
  1027. return 0;
  1028. if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
  1029. change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
  1030. value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
  1031. return set_port_caps_atomic(dev, port, change_mask, value);
  1032. }
  1033. mutex_lock(&dev->cap_mask_mutex);
  1034. err = ib_query_port(ibdev, port, &attr);
  1035. if (err)
  1036. goto out;
  1037. tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
  1038. ~props->clr_port_cap_mask;
  1039. err = mlx5_set_port_caps(dev->mdev, port, tmp);
  1040. out:
  1041. mutex_unlock(&dev->cap_mask_mutex);
  1042. return err;
  1043. }
  1044. static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps)
  1045. {
  1046. mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n",
  1047. caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n");
  1048. }
  1049. static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
  1050. struct mlx5_ib_alloc_ucontext_req_v2 *req,
  1051. u32 *num_sys_pages)
  1052. {
  1053. int uars_per_sys_page;
  1054. int bfregs_per_sys_page;
  1055. int ref_bfregs = req->total_num_bfregs;
  1056. if (req->total_num_bfregs == 0)
  1057. return -EINVAL;
  1058. BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE);
  1059. BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE);
  1060. if (req->total_num_bfregs > MLX5_MAX_BFREGS)
  1061. return -ENOMEM;
  1062. uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k);
  1063. bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
  1064. req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
  1065. *num_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
  1066. if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
  1067. return -EINVAL;
  1068. mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, using %d sys pages\n",
  1069. MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
  1070. lib_uar_4k ? "yes" : "no", ref_bfregs,
  1071. req->total_num_bfregs, *num_sys_pages);
  1072. return 0;
  1073. }
  1074. static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
  1075. {
  1076. struct mlx5_bfreg_info *bfregi;
  1077. int err;
  1078. int i;
  1079. bfregi = &context->bfregi;
  1080. for (i = 0; i < bfregi->num_sys_pages; i++) {
  1081. err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
  1082. if (err)
  1083. goto error;
  1084. mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]);
  1085. }
  1086. return 0;
  1087. error:
  1088. for (--i; i >= 0; i--)
  1089. if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]))
  1090. mlx5_ib_warn(dev, "failed to free uar %d\n", i);
  1091. return err;
  1092. }
  1093. static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
  1094. {
  1095. struct mlx5_bfreg_info *bfregi;
  1096. int err;
  1097. int i;
  1098. bfregi = &context->bfregi;
  1099. for (i = 0; i < bfregi->num_sys_pages; i++) {
  1100. err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
  1101. if (err) {
  1102. mlx5_ib_warn(dev, "failed to free uar %d\n", i);
  1103. return err;
  1104. }
  1105. }
  1106. return 0;
  1107. }
  1108. static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
  1109. {
  1110. int err;
  1111. err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
  1112. if (err)
  1113. return err;
  1114. if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
  1115. !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
  1116. return err;
  1117. mutex_lock(&dev->lb_mutex);
  1118. dev->user_td++;
  1119. if (dev->user_td == 2)
  1120. err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
  1121. mutex_unlock(&dev->lb_mutex);
  1122. return err;
  1123. }
  1124. static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
  1125. {
  1126. mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
  1127. if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
  1128. !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
  1129. return;
  1130. mutex_lock(&dev->lb_mutex);
  1131. dev->user_td--;
  1132. if (dev->user_td < 2)
  1133. mlx5_nic_vport_update_local_lb(dev->mdev, false);
  1134. mutex_unlock(&dev->lb_mutex);
  1135. }
  1136. static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
  1137. struct ib_udata *udata)
  1138. {
  1139. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  1140. struct mlx5_ib_alloc_ucontext_req_v2 req = {};
  1141. struct mlx5_ib_alloc_ucontext_resp resp = {};
  1142. struct mlx5_ib_ucontext *context;
  1143. struct mlx5_bfreg_info *bfregi;
  1144. int ver;
  1145. int err;
  1146. size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
  1147. max_cqe_version);
  1148. bool lib_uar_4k;
  1149. if (!dev->ib_active)
  1150. return ERR_PTR(-EAGAIN);
  1151. if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
  1152. ver = 0;
  1153. else if (udata->inlen >= min_req_v2)
  1154. ver = 2;
  1155. else
  1156. return ERR_PTR(-EINVAL);
  1157. err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
  1158. if (err)
  1159. return ERR_PTR(err);
  1160. if (req.flags)
  1161. return ERR_PTR(-EINVAL);
  1162. if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
  1163. return ERR_PTR(-EOPNOTSUPP);
  1164. req.total_num_bfregs = ALIGN(req.total_num_bfregs,
  1165. MLX5_NON_FP_BFREGS_PER_UAR);
  1166. if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
  1167. return ERR_PTR(-EINVAL);
  1168. resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
  1169. if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
  1170. resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
  1171. resp.cache_line_size = cache_line_size();
  1172. resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
  1173. resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
  1174. resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
  1175. resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
  1176. resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
  1177. resp.cqe_version = min_t(__u8,
  1178. (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
  1179. req.max_cqe_version);
  1180. resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
  1181. MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
  1182. resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
  1183. MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
  1184. resp.response_length = min(offsetof(typeof(resp), response_length) +
  1185. sizeof(resp.response_length), udata->outlen);
  1186. context = kzalloc(sizeof(*context), GFP_KERNEL);
  1187. if (!context)
  1188. return ERR_PTR(-ENOMEM);
  1189. lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
  1190. bfregi = &context->bfregi;
  1191. /* updates req->total_num_bfregs */
  1192. err = calc_total_bfregs(dev, lib_uar_4k, &req, &bfregi->num_sys_pages);
  1193. if (err)
  1194. goto out_ctx;
  1195. mutex_init(&bfregi->lock);
  1196. bfregi->lib_uar_4k = lib_uar_4k;
  1197. bfregi->count = kcalloc(req.total_num_bfregs, sizeof(*bfregi->count),
  1198. GFP_KERNEL);
  1199. if (!bfregi->count) {
  1200. err = -ENOMEM;
  1201. goto out_ctx;
  1202. }
  1203. bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
  1204. sizeof(*bfregi->sys_pages),
  1205. GFP_KERNEL);
  1206. if (!bfregi->sys_pages) {
  1207. err = -ENOMEM;
  1208. goto out_count;
  1209. }
  1210. err = allocate_uars(dev, context);
  1211. if (err)
  1212. goto out_sys_pages;
  1213. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1214. context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
  1215. #endif
  1216. context->upd_xlt_page = __get_free_page(GFP_KERNEL);
  1217. if (!context->upd_xlt_page) {
  1218. err = -ENOMEM;
  1219. goto out_uars;
  1220. }
  1221. mutex_init(&context->upd_xlt_page_mutex);
  1222. if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
  1223. err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
  1224. if (err)
  1225. goto out_page;
  1226. }
  1227. INIT_LIST_HEAD(&context->vma_private_list);
  1228. INIT_LIST_HEAD(&context->db_page_list);
  1229. mutex_init(&context->db_page_mutex);
  1230. resp.tot_bfregs = req.total_num_bfregs;
  1231. resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
  1232. if (field_avail(typeof(resp), cqe_version, udata->outlen))
  1233. resp.response_length += sizeof(resp.cqe_version);
  1234. if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
  1235. resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
  1236. MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
  1237. resp.response_length += sizeof(resp.cmds_supp_uhw);
  1238. }
  1239. if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) {
  1240. if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
  1241. mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
  1242. resp.eth_min_inline++;
  1243. }
  1244. resp.response_length += sizeof(resp.eth_min_inline);
  1245. }
  1246. /*
  1247. * We don't want to expose information from the PCI bar that is located
  1248. * after 4096 bytes, so if the arch only supports larger pages, let's
  1249. * pretend we don't support reading the HCA's core clock. This is also
  1250. * forced by mmap function.
  1251. */
  1252. if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
  1253. if (PAGE_SIZE <= 4096) {
  1254. resp.comp_mask |=
  1255. MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
  1256. resp.hca_core_clock_offset =
  1257. offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
  1258. }
  1259. resp.response_length += sizeof(resp.hca_core_clock_offset) +
  1260. sizeof(resp.reserved2);
  1261. }
  1262. if (field_avail(typeof(resp), log_uar_size, udata->outlen))
  1263. resp.response_length += sizeof(resp.log_uar_size);
  1264. if (field_avail(typeof(resp), num_uars_per_page, udata->outlen))
  1265. resp.response_length += sizeof(resp.num_uars_per_page);
  1266. err = ib_copy_to_udata(udata, &resp, resp.response_length);
  1267. if (err)
  1268. goto out_td;
  1269. bfregi->ver = ver;
  1270. bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
  1271. context->cqe_version = resp.cqe_version;
  1272. context->lib_caps = req.lib_caps;
  1273. print_lib_caps(dev, context->lib_caps);
  1274. return &context->ibucontext;
  1275. out_td:
  1276. if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
  1277. mlx5_ib_dealloc_transport_domain(dev, context->tdn);
  1278. out_page:
  1279. free_page(context->upd_xlt_page);
  1280. out_uars:
  1281. deallocate_uars(dev, context);
  1282. out_sys_pages:
  1283. kfree(bfregi->sys_pages);
  1284. out_count:
  1285. kfree(bfregi->count);
  1286. out_ctx:
  1287. kfree(context);
  1288. return ERR_PTR(err);
  1289. }
  1290. static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
  1291. {
  1292. struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
  1293. struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
  1294. struct mlx5_bfreg_info *bfregi;
  1295. bfregi = &context->bfregi;
  1296. if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
  1297. mlx5_ib_dealloc_transport_domain(dev, context->tdn);
  1298. free_page(context->upd_xlt_page);
  1299. deallocate_uars(dev, context);
  1300. kfree(bfregi->sys_pages);
  1301. kfree(bfregi->count);
  1302. kfree(context);
  1303. return 0;
  1304. }
  1305. static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
  1306. struct mlx5_bfreg_info *bfregi,
  1307. int idx)
  1308. {
  1309. int fw_uars_per_page;
  1310. fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
  1311. return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) +
  1312. bfregi->sys_pages[idx] / fw_uars_per_page;
  1313. }
  1314. static int get_command(unsigned long offset)
  1315. {
  1316. return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
  1317. }
  1318. static int get_arg(unsigned long offset)
  1319. {
  1320. return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
  1321. }
  1322. static int get_index(unsigned long offset)
  1323. {
  1324. return get_arg(offset);
  1325. }
  1326. static void mlx5_ib_vma_open(struct vm_area_struct *area)
  1327. {
  1328. /* vma_open is called when a new VMA is created on top of our VMA. This
  1329. * is done through either mremap flow or split_vma (usually due to
  1330. * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
  1331. * as this VMA is strongly hardware related. Therefore we set the
  1332. * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
  1333. * calling us again and trying to do incorrect actions. We assume that
  1334. * the original VMA size is exactly a single page, and therefore all
  1335. * "splitting" operation will not happen to it.
  1336. */
  1337. area->vm_ops = NULL;
  1338. }
  1339. static void mlx5_ib_vma_close(struct vm_area_struct *area)
  1340. {
  1341. struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
  1342. /* It's guaranteed that all VMAs opened on a FD are closed before the
  1343. * file itself is closed, therefore no sync is needed with the regular
  1344. * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
  1345. * However need a sync with accessing the vma as part of
  1346. * mlx5_ib_disassociate_ucontext.
  1347. * The close operation is usually called under mm->mmap_sem except when
  1348. * process is exiting.
  1349. * The exiting case is handled explicitly as part of
  1350. * mlx5_ib_disassociate_ucontext.
  1351. */
  1352. mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
  1353. /* setting the vma context pointer to null in the mlx5_ib driver's
  1354. * private data, to protect a race condition in
  1355. * mlx5_ib_disassociate_ucontext().
  1356. */
  1357. mlx5_ib_vma_priv_data->vma = NULL;
  1358. list_del(&mlx5_ib_vma_priv_data->list);
  1359. kfree(mlx5_ib_vma_priv_data);
  1360. }
  1361. static const struct vm_operations_struct mlx5_ib_vm_ops = {
  1362. .open = mlx5_ib_vma_open,
  1363. .close = mlx5_ib_vma_close
  1364. };
  1365. static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
  1366. struct mlx5_ib_ucontext *ctx)
  1367. {
  1368. struct mlx5_ib_vma_private_data *vma_prv;
  1369. struct list_head *vma_head = &ctx->vma_private_list;
  1370. vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
  1371. if (!vma_prv)
  1372. return -ENOMEM;
  1373. vma_prv->vma = vma;
  1374. vma->vm_private_data = vma_prv;
  1375. vma->vm_ops = &mlx5_ib_vm_ops;
  1376. list_add(&vma_prv->list, vma_head);
  1377. return 0;
  1378. }
  1379. static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
  1380. {
  1381. int ret;
  1382. struct vm_area_struct *vma;
  1383. struct mlx5_ib_vma_private_data *vma_private, *n;
  1384. struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
  1385. struct task_struct *owning_process = NULL;
  1386. struct mm_struct *owning_mm = NULL;
  1387. owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
  1388. if (!owning_process)
  1389. return;
  1390. owning_mm = get_task_mm(owning_process);
  1391. if (!owning_mm) {
  1392. pr_info("no mm, disassociate ucontext is pending task termination\n");
  1393. while (1) {
  1394. put_task_struct(owning_process);
  1395. usleep_range(1000, 2000);
  1396. owning_process = get_pid_task(ibcontext->tgid,
  1397. PIDTYPE_PID);
  1398. if (!owning_process ||
  1399. owning_process->state == TASK_DEAD) {
  1400. pr_info("disassociate ucontext done, task was terminated\n");
  1401. /* in case task was dead need to release the
  1402. * task struct.
  1403. */
  1404. if (owning_process)
  1405. put_task_struct(owning_process);
  1406. return;
  1407. }
  1408. }
  1409. }
  1410. /* need to protect from a race on closing the vma as part of
  1411. * mlx5_ib_vma_close.
  1412. */
  1413. down_write(&owning_mm->mmap_sem);
  1414. list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
  1415. list) {
  1416. vma = vma_private->vma;
  1417. ret = zap_vma_ptes(vma, vma->vm_start,
  1418. PAGE_SIZE);
  1419. WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__);
  1420. /* context going to be destroyed, should
  1421. * not access ops any more.
  1422. */
  1423. vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
  1424. vma->vm_ops = NULL;
  1425. list_del(&vma_private->list);
  1426. kfree(vma_private);
  1427. }
  1428. up_write(&owning_mm->mmap_sem);
  1429. mmput(owning_mm);
  1430. put_task_struct(owning_process);
  1431. }
  1432. static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
  1433. {
  1434. switch (cmd) {
  1435. case MLX5_IB_MMAP_WC_PAGE:
  1436. return "WC";
  1437. case MLX5_IB_MMAP_REGULAR_PAGE:
  1438. return "best effort WC";
  1439. case MLX5_IB_MMAP_NC_PAGE:
  1440. return "NC";
  1441. default:
  1442. return NULL;
  1443. }
  1444. }
  1445. static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
  1446. struct vm_area_struct *vma,
  1447. struct mlx5_ib_ucontext *context)
  1448. {
  1449. struct mlx5_bfreg_info *bfregi = &context->bfregi;
  1450. int err;
  1451. unsigned long idx;
  1452. phys_addr_t pfn, pa;
  1453. pgprot_t prot;
  1454. int uars_per_page;
  1455. if (vma->vm_end - vma->vm_start != PAGE_SIZE)
  1456. return -EINVAL;
  1457. uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k);
  1458. idx = get_index(vma->vm_pgoff);
  1459. if (idx % uars_per_page ||
  1460. idx * uars_per_page >= bfregi->num_sys_pages) {
  1461. mlx5_ib_warn(dev, "invalid uar index %lu\n", idx);
  1462. return -EINVAL;
  1463. }
  1464. switch (cmd) {
  1465. case MLX5_IB_MMAP_WC_PAGE:
  1466. /* Some architectures don't support WC memory */
  1467. #if defined(CONFIG_X86)
  1468. if (!pat_enabled())
  1469. return -EPERM;
  1470. #elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU)))
  1471. return -EPERM;
  1472. #endif
  1473. /* fall through */
  1474. case MLX5_IB_MMAP_REGULAR_PAGE:
  1475. /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
  1476. prot = pgprot_writecombine(vma->vm_page_prot);
  1477. break;
  1478. case MLX5_IB_MMAP_NC_PAGE:
  1479. prot = pgprot_noncached(vma->vm_page_prot);
  1480. break;
  1481. default:
  1482. return -EINVAL;
  1483. }
  1484. pfn = uar_index2pfn(dev, bfregi, idx);
  1485. mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
  1486. vma->vm_page_prot = prot;
  1487. err = io_remap_pfn_range(vma, vma->vm_start, pfn,
  1488. PAGE_SIZE, vma->vm_page_prot);
  1489. if (err) {
  1490. mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n",
  1491. err, vma->vm_start, &pfn, mmap_cmd2str(cmd));
  1492. return -EAGAIN;
  1493. }
  1494. pa = pfn << PAGE_SHIFT;
  1495. mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
  1496. vma->vm_start, &pa);
  1497. return mlx5_ib_set_vma_data(vma, context);
  1498. }
  1499. static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
  1500. {
  1501. struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
  1502. struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
  1503. unsigned long command;
  1504. phys_addr_t pfn;
  1505. command = get_command(vma->vm_pgoff);
  1506. switch (command) {
  1507. case MLX5_IB_MMAP_WC_PAGE:
  1508. case MLX5_IB_MMAP_NC_PAGE:
  1509. case MLX5_IB_MMAP_REGULAR_PAGE:
  1510. return uar_mmap(dev, command, vma, context);
  1511. case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
  1512. return -ENOSYS;
  1513. case MLX5_IB_MMAP_CORE_CLOCK:
  1514. if (vma->vm_end - vma->vm_start != PAGE_SIZE)
  1515. return -EINVAL;
  1516. if (vma->vm_flags & VM_WRITE)
  1517. return -EPERM;
  1518. /* Don't expose to user-space information it shouldn't have */
  1519. if (PAGE_SIZE > 4096)
  1520. return -EOPNOTSUPP;
  1521. vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  1522. pfn = (dev->mdev->iseg_base +
  1523. offsetof(struct mlx5_init_seg, internal_timer_h)) >>
  1524. PAGE_SHIFT;
  1525. if (io_remap_pfn_range(vma, vma->vm_start, pfn,
  1526. PAGE_SIZE, vma->vm_page_prot))
  1527. return -EAGAIN;
  1528. mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
  1529. vma->vm_start,
  1530. (unsigned long long)pfn << PAGE_SHIFT);
  1531. break;
  1532. default:
  1533. return -EINVAL;
  1534. }
  1535. return 0;
  1536. }
  1537. static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
  1538. struct ib_ucontext *context,
  1539. struct ib_udata *udata)
  1540. {
  1541. struct mlx5_ib_alloc_pd_resp resp;
  1542. struct mlx5_ib_pd *pd;
  1543. int err;
  1544. pd = kmalloc(sizeof(*pd), GFP_KERNEL);
  1545. if (!pd)
  1546. return ERR_PTR(-ENOMEM);
  1547. err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
  1548. if (err) {
  1549. kfree(pd);
  1550. return ERR_PTR(err);
  1551. }
  1552. if (context) {
  1553. resp.pdn = pd->pdn;
  1554. if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
  1555. mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
  1556. kfree(pd);
  1557. return ERR_PTR(-EFAULT);
  1558. }
  1559. }
  1560. return &pd->ibpd;
  1561. }
  1562. static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
  1563. {
  1564. struct mlx5_ib_dev *mdev = to_mdev(pd->device);
  1565. struct mlx5_ib_pd *mpd = to_mpd(pd);
  1566. mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
  1567. kfree(mpd);
  1568. return 0;
  1569. }
  1570. enum {
  1571. MATCH_CRITERIA_ENABLE_OUTER_BIT,
  1572. MATCH_CRITERIA_ENABLE_MISC_BIT,
  1573. MATCH_CRITERIA_ENABLE_INNER_BIT
  1574. };
  1575. #define HEADER_IS_ZERO(match_criteria, headers) \
  1576. !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
  1577. 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
  1578. static u8 get_match_criteria_enable(u32 *match_criteria)
  1579. {
  1580. u8 match_criteria_enable;
  1581. match_criteria_enable =
  1582. (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
  1583. MATCH_CRITERIA_ENABLE_OUTER_BIT;
  1584. match_criteria_enable |=
  1585. (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
  1586. MATCH_CRITERIA_ENABLE_MISC_BIT;
  1587. match_criteria_enable |=
  1588. (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
  1589. MATCH_CRITERIA_ENABLE_INNER_BIT;
  1590. return match_criteria_enable;
  1591. }
  1592. static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
  1593. {
  1594. MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
  1595. MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
  1596. }
  1597. static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val,
  1598. bool inner)
  1599. {
  1600. if (inner) {
  1601. MLX5_SET(fte_match_set_misc,
  1602. misc_c, inner_ipv6_flow_label, mask);
  1603. MLX5_SET(fte_match_set_misc,
  1604. misc_v, inner_ipv6_flow_label, val);
  1605. } else {
  1606. MLX5_SET(fte_match_set_misc,
  1607. misc_c, outer_ipv6_flow_label, mask);
  1608. MLX5_SET(fte_match_set_misc,
  1609. misc_v, outer_ipv6_flow_label, val);
  1610. }
  1611. }
  1612. static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
  1613. {
  1614. MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
  1615. MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
  1616. MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
  1617. MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
  1618. }
  1619. #define LAST_ETH_FIELD vlan_tag
  1620. #define LAST_IB_FIELD sl
  1621. #define LAST_IPV4_FIELD tos
  1622. #define LAST_IPV6_FIELD traffic_class
  1623. #define LAST_TCP_UDP_FIELD src_port
  1624. #define LAST_TUNNEL_FIELD tunnel_id
  1625. #define LAST_FLOW_TAG_FIELD tag_id
  1626. #define LAST_DROP_FIELD size
  1627. /* Field is the last supported field */
  1628. #define FIELDS_NOT_SUPPORTED(filter, field)\
  1629. memchr_inv((void *)&filter.field +\
  1630. sizeof(filter.field), 0,\
  1631. sizeof(filter) -\
  1632. offsetof(typeof(filter), field) -\
  1633. sizeof(filter.field))
  1634. #define IPV4_VERSION 4
  1635. #define IPV6_VERSION 6
  1636. static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
  1637. u32 *match_v, const union ib_flow_spec *ib_spec,
  1638. u32 *tag_id, bool *is_drop)
  1639. {
  1640. void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
  1641. misc_parameters);
  1642. void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
  1643. misc_parameters);
  1644. void *headers_c;
  1645. void *headers_v;
  1646. int match_ipv;
  1647. if (ib_spec->type & IB_FLOW_SPEC_INNER) {
  1648. headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
  1649. inner_headers);
  1650. headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
  1651. inner_headers);
  1652. match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
  1653. ft_field_support.inner_ip_version);
  1654. } else {
  1655. headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
  1656. outer_headers);
  1657. headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
  1658. outer_headers);
  1659. match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
  1660. ft_field_support.outer_ip_version);
  1661. }
  1662. switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
  1663. case IB_FLOW_SPEC_ETH:
  1664. if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
  1665. return -EOPNOTSUPP;
  1666. ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
  1667. dmac_47_16),
  1668. ib_spec->eth.mask.dst_mac);
  1669. ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
  1670. dmac_47_16),
  1671. ib_spec->eth.val.dst_mac);
  1672. ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
  1673. smac_47_16),
  1674. ib_spec->eth.mask.src_mac);
  1675. ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
  1676. smac_47_16),
  1677. ib_spec->eth.val.src_mac);
  1678. if (ib_spec->eth.mask.vlan_tag) {
  1679. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1680. cvlan_tag, 1);
  1681. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1682. cvlan_tag, 1);
  1683. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1684. first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
  1685. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1686. first_vid, ntohs(ib_spec->eth.val.vlan_tag));
  1687. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1688. first_cfi,
  1689. ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
  1690. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1691. first_cfi,
  1692. ntohs(ib_spec->eth.val.vlan_tag) >> 12);
  1693. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1694. first_prio,
  1695. ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
  1696. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1697. first_prio,
  1698. ntohs(ib_spec->eth.val.vlan_tag) >> 13);
  1699. }
  1700. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1701. ethertype, ntohs(ib_spec->eth.mask.ether_type));
  1702. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1703. ethertype, ntohs(ib_spec->eth.val.ether_type));
  1704. break;
  1705. case IB_FLOW_SPEC_IPV4:
  1706. if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
  1707. return -EOPNOTSUPP;
  1708. if (match_ipv) {
  1709. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1710. ip_version, 0xf);
  1711. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1712. ip_version, IPV4_VERSION);
  1713. } else {
  1714. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1715. ethertype, 0xffff);
  1716. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1717. ethertype, ETH_P_IP);
  1718. }
  1719. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
  1720. src_ipv4_src_ipv6.ipv4_layout.ipv4),
  1721. &ib_spec->ipv4.mask.src_ip,
  1722. sizeof(ib_spec->ipv4.mask.src_ip));
  1723. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
  1724. src_ipv4_src_ipv6.ipv4_layout.ipv4),
  1725. &ib_spec->ipv4.val.src_ip,
  1726. sizeof(ib_spec->ipv4.val.src_ip));
  1727. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
  1728. dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
  1729. &ib_spec->ipv4.mask.dst_ip,
  1730. sizeof(ib_spec->ipv4.mask.dst_ip));
  1731. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
  1732. dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
  1733. &ib_spec->ipv4.val.dst_ip,
  1734. sizeof(ib_spec->ipv4.val.dst_ip));
  1735. set_tos(headers_c, headers_v,
  1736. ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
  1737. set_proto(headers_c, headers_v,
  1738. ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
  1739. break;
  1740. case IB_FLOW_SPEC_IPV6:
  1741. if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
  1742. return -EOPNOTSUPP;
  1743. if (match_ipv) {
  1744. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1745. ip_version, 0xf);
  1746. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1747. ip_version, IPV6_VERSION);
  1748. } else {
  1749. MLX5_SET(fte_match_set_lyr_2_4, headers_c,
  1750. ethertype, 0xffff);
  1751. MLX5_SET(fte_match_set_lyr_2_4, headers_v,
  1752. ethertype, ETH_P_IPV6);
  1753. }
  1754. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
  1755. src_ipv4_src_ipv6.ipv6_layout.ipv6),
  1756. &ib_spec->ipv6.mask.src_ip,
  1757. sizeof(ib_spec->ipv6.mask.src_ip));
  1758. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
  1759. src_ipv4_src_ipv6.ipv6_layout.ipv6),
  1760. &ib_spec->ipv6.val.src_ip,
  1761. sizeof(ib_spec->ipv6.val.src_ip));
  1762. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
  1763. dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
  1764. &ib_spec->ipv6.mask.dst_ip,
  1765. sizeof(ib_spec->ipv6.mask.dst_ip));
  1766. memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
  1767. dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
  1768. &ib_spec->ipv6.val.dst_ip,
  1769. sizeof(ib_spec->ipv6.val.dst_ip));
  1770. set_tos(headers_c, headers_v,
  1771. ib_spec->ipv6.mask.traffic_class,
  1772. ib_spec->ipv6.val.traffic_class);
  1773. set_proto(headers_c, headers_v,
  1774. ib_spec->ipv6.mask.next_hdr,
  1775. ib_spec->ipv6.val.next_hdr);
  1776. set_flow_label(misc_params_c, misc_params_v,
  1777. ntohl(ib_spec->ipv6.mask.flow_label),
  1778. ntohl(ib_spec->ipv6.val.flow_label),
  1779. ib_spec->type & IB_FLOW_SPEC_INNER);
  1780. break;
  1781. case IB_FLOW_SPEC_TCP:
  1782. if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
  1783. LAST_TCP_UDP_FIELD))
  1784. return -EOPNOTSUPP;
  1785. MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
  1786. 0xff);
  1787. MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
  1788. IPPROTO_TCP);
  1789. MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
  1790. ntohs(ib_spec->tcp_udp.mask.src_port));
  1791. MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
  1792. ntohs(ib_spec->tcp_udp.val.src_port));
  1793. MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
  1794. ntohs(ib_spec->tcp_udp.mask.dst_port));
  1795. MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
  1796. ntohs(ib_spec->tcp_udp.val.dst_port));
  1797. break;
  1798. case IB_FLOW_SPEC_UDP:
  1799. if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
  1800. LAST_TCP_UDP_FIELD))
  1801. return -EOPNOTSUPP;
  1802. MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
  1803. 0xff);
  1804. MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
  1805. IPPROTO_UDP);
  1806. MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
  1807. ntohs(ib_spec->tcp_udp.mask.src_port));
  1808. MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
  1809. ntohs(ib_spec->tcp_udp.val.src_port));
  1810. MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
  1811. ntohs(ib_spec->tcp_udp.mask.dst_port));
  1812. MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
  1813. ntohs(ib_spec->tcp_udp.val.dst_port));
  1814. break;
  1815. case IB_FLOW_SPEC_VXLAN_TUNNEL:
  1816. if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
  1817. LAST_TUNNEL_FIELD))
  1818. return -EOPNOTSUPP;
  1819. MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
  1820. ntohl(ib_spec->tunnel.mask.tunnel_id));
  1821. MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
  1822. ntohl(ib_spec->tunnel.val.tunnel_id));
  1823. break;
  1824. case IB_FLOW_SPEC_ACTION_TAG:
  1825. if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
  1826. LAST_FLOW_TAG_FIELD))
  1827. return -EOPNOTSUPP;
  1828. if (ib_spec->flow_tag.tag_id >= BIT(24))
  1829. return -EINVAL;
  1830. *tag_id = ib_spec->flow_tag.tag_id;
  1831. break;
  1832. case IB_FLOW_SPEC_ACTION_DROP:
  1833. if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
  1834. LAST_DROP_FIELD))
  1835. return -EOPNOTSUPP;
  1836. *is_drop = true;
  1837. break;
  1838. default:
  1839. return -EINVAL;
  1840. }
  1841. return 0;
  1842. }
  1843. /* If a flow could catch both multicast and unicast packets,
  1844. * it won't fall into the multicast flow steering table and this rule
  1845. * could steal other multicast packets.
  1846. */
  1847. static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
  1848. {
  1849. union ib_flow_spec *flow_spec;
  1850. if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
  1851. ib_attr->num_of_specs < 1)
  1852. return false;
  1853. flow_spec = (union ib_flow_spec *)(ib_attr + 1);
  1854. if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
  1855. struct ib_flow_spec_ipv4 *ipv4_spec;
  1856. ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
  1857. if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
  1858. return true;
  1859. return false;
  1860. }
  1861. if (flow_spec->type == IB_FLOW_SPEC_ETH) {
  1862. struct ib_flow_spec_eth *eth_spec;
  1863. eth_spec = (struct ib_flow_spec_eth *)flow_spec;
  1864. return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
  1865. is_multicast_ether_addr(eth_spec->val.dst_mac);
  1866. }
  1867. return false;
  1868. }
  1869. static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
  1870. const struct ib_flow_attr *flow_attr,
  1871. bool check_inner)
  1872. {
  1873. union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
  1874. int match_ipv = check_inner ?
  1875. MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
  1876. ft_field_support.inner_ip_version) :
  1877. MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
  1878. ft_field_support.outer_ip_version);
  1879. int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
  1880. bool ipv4_spec_valid, ipv6_spec_valid;
  1881. unsigned int ip_spec_type = 0;
  1882. bool has_ethertype = false;
  1883. unsigned int spec_index;
  1884. bool mask_valid = true;
  1885. u16 eth_type = 0;
  1886. bool type_valid;
  1887. /* Validate that ethertype is correct */
  1888. for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
  1889. if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
  1890. ib_spec->eth.mask.ether_type) {
  1891. mask_valid = (ib_spec->eth.mask.ether_type ==
  1892. htons(0xffff));
  1893. has_ethertype = true;
  1894. eth_type = ntohs(ib_spec->eth.val.ether_type);
  1895. } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
  1896. (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
  1897. ip_spec_type = ib_spec->type;
  1898. }
  1899. ib_spec = (void *)ib_spec + ib_spec->size;
  1900. }
  1901. type_valid = (!has_ethertype) || (!ip_spec_type);
  1902. if (!type_valid && mask_valid) {
  1903. ipv4_spec_valid = (eth_type == ETH_P_IP) &&
  1904. (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
  1905. ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
  1906. (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
  1907. type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
  1908. (((eth_type == ETH_P_MPLS_UC) ||
  1909. (eth_type == ETH_P_MPLS_MC)) && match_ipv);
  1910. }
  1911. return type_valid;
  1912. }
  1913. static bool is_valid_attr(struct mlx5_core_dev *mdev,
  1914. const struct ib_flow_attr *flow_attr)
  1915. {
  1916. return is_valid_ethertype(mdev, flow_attr, false) &&
  1917. is_valid_ethertype(mdev, flow_attr, true);
  1918. }
  1919. static void put_flow_table(struct mlx5_ib_dev *dev,
  1920. struct mlx5_ib_flow_prio *prio, bool ft_added)
  1921. {
  1922. prio->refcount -= !!ft_added;
  1923. if (!prio->refcount) {
  1924. mlx5_destroy_flow_table(prio->flow_table);
  1925. prio->flow_table = NULL;
  1926. }
  1927. }
  1928. static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
  1929. {
  1930. struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
  1931. struct mlx5_ib_flow_handler *handler = container_of(flow_id,
  1932. struct mlx5_ib_flow_handler,
  1933. ibflow);
  1934. struct mlx5_ib_flow_handler *iter, *tmp;
  1935. mutex_lock(&dev->flow_db.lock);
  1936. list_for_each_entry_safe(iter, tmp, &handler->list, list) {
  1937. mlx5_del_flow_rules(iter->rule);
  1938. put_flow_table(dev, iter->prio, true);
  1939. list_del(&iter->list);
  1940. kfree(iter);
  1941. }
  1942. mlx5_del_flow_rules(handler->rule);
  1943. put_flow_table(dev, handler->prio, true);
  1944. mutex_unlock(&dev->flow_db.lock);
  1945. kfree(handler);
  1946. return 0;
  1947. }
  1948. static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
  1949. {
  1950. priority *= 2;
  1951. if (!dont_trap)
  1952. priority++;
  1953. return priority;
  1954. }
  1955. enum flow_table_type {
  1956. MLX5_IB_FT_RX,
  1957. MLX5_IB_FT_TX
  1958. };
  1959. #define MLX5_FS_MAX_TYPES 6
  1960. #define MLX5_FS_MAX_ENTRIES BIT(16)
  1961. static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
  1962. struct ib_flow_attr *flow_attr,
  1963. enum flow_table_type ft_type)
  1964. {
  1965. bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
  1966. struct mlx5_flow_namespace *ns = NULL;
  1967. struct mlx5_ib_flow_prio *prio;
  1968. struct mlx5_flow_table *ft;
  1969. int max_table_size;
  1970. int num_entries;
  1971. int num_groups;
  1972. int priority;
  1973. int err = 0;
  1974. max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
  1975. log_max_ft_size));
  1976. if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
  1977. if (flow_is_multicast_only(flow_attr) &&
  1978. !dont_trap)
  1979. priority = MLX5_IB_FLOW_MCAST_PRIO;
  1980. else
  1981. priority = ib_prio_to_core_prio(flow_attr->priority,
  1982. dont_trap);
  1983. ns = mlx5_get_flow_namespace(dev->mdev,
  1984. MLX5_FLOW_NAMESPACE_BYPASS);
  1985. num_entries = MLX5_FS_MAX_ENTRIES;
  1986. num_groups = MLX5_FS_MAX_TYPES;
  1987. prio = &dev->flow_db.prios[priority];
  1988. } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
  1989. flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
  1990. ns = mlx5_get_flow_namespace(dev->mdev,
  1991. MLX5_FLOW_NAMESPACE_LEFTOVERS);
  1992. build_leftovers_ft_param(&priority,
  1993. &num_entries,
  1994. &num_groups);
  1995. prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
  1996. } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
  1997. if (!MLX5_CAP_FLOWTABLE(dev->mdev,
  1998. allow_sniffer_and_nic_rx_shared_tir))
  1999. return ERR_PTR(-ENOTSUPP);
  2000. ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
  2001. MLX5_FLOW_NAMESPACE_SNIFFER_RX :
  2002. MLX5_FLOW_NAMESPACE_SNIFFER_TX);
  2003. prio = &dev->flow_db.sniffer[ft_type];
  2004. priority = 0;
  2005. num_entries = 1;
  2006. num_groups = 1;
  2007. }
  2008. if (!ns)
  2009. return ERR_PTR(-ENOTSUPP);
  2010. if (num_entries > max_table_size)
  2011. return ERR_PTR(-ENOMEM);
  2012. ft = prio->flow_table;
  2013. if (!ft) {
  2014. ft = mlx5_create_auto_grouped_flow_table(ns, priority,
  2015. num_entries,
  2016. num_groups,
  2017. 0, 0);
  2018. if (!IS_ERR(ft)) {
  2019. prio->refcount = 0;
  2020. prio->flow_table = ft;
  2021. } else {
  2022. err = PTR_ERR(ft);
  2023. }
  2024. }
  2025. return err ? ERR_PTR(err) : prio;
  2026. }
  2027. static void set_underlay_qp(struct mlx5_ib_dev *dev,
  2028. struct mlx5_flow_spec *spec,
  2029. u32 underlay_qpn)
  2030. {
  2031. void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
  2032. spec->match_criteria,
  2033. misc_parameters);
  2034. void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
  2035. misc_parameters);
  2036. if (underlay_qpn &&
  2037. MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
  2038. ft_field_support.bth_dst_qp)) {
  2039. MLX5_SET(fte_match_set_misc,
  2040. misc_params_v, bth_dst_qp, underlay_qpn);
  2041. MLX5_SET(fte_match_set_misc,
  2042. misc_params_c, bth_dst_qp, 0xffffff);
  2043. }
  2044. }
  2045. static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
  2046. struct mlx5_ib_flow_prio *ft_prio,
  2047. const struct ib_flow_attr *flow_attr,
  2048. struct mlx5_flow_destination *dst,
  2049. u32 underlay_qpn)
  2050. {
  2051. struct mlx5_flow_table *ft = ft_prio->flow_table;
  2052. struct mlx5_ib_flow_handler *handler;
  2053. struct mlx5_flow_act flow_act = {0};
  2054. struct mlx5_flow_spec *spec;
  2055. struct mlx5_flow_destination *rule_dst = dst;
  2056. const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
  2057. unsigned int spec_index;
  2058. u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
  2059. bool is_drop = false;
  2060. int err = 0;
  2061. int dest_num = 1;
  2062. if (!is_valid_attr(dev->mdev, flow_attr))
  2063. return ERR_PTR(-EINVAL);
  2064. spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
  2065. handler = kzalloc(sizeof(*handler), GFP_KERNEL);
  2066. if (!handler || !spec) {
  2067. err = -ENOMEM;
  2068. goto free;
  2069. }
  2070. INIT_LIST_HEAD(&handler->list);
  2071. for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
  2072. err = parse_flow_attr(dev->mdev, spec->match_criteria,
  2073. spec->match_value,
  2074. ib_flow, &flow_tag, &is_drop);
  2075. if (err < 0)
  2076. goto free;
  2077. ib_flow += ((union ib_flow_spec *)ib_flow)->size;
  2078. }
  2079. if (!flow_is_multicast_only(flow_attr))
  2080. set_underlay_qp(dev, spec, underlay_qpn);
  2081. spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
  2082. if (is_drop) {
  2083. flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
  2084. rule_dst = NULL;
  2085. dest_num = 0;
  2086. } else {
  2087. flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
  2088. MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
  2089. }
  2090. if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
  2091. (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
  2092. flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
  2093. mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
  2094. flow_tag, flow_attr->type);
  2095. err = -EINVAL;
  2096. goto free;
  2097. }
  2098. flow_act.flow_tag = flow_tag;
  2099. handler->rule = mlx5_add_flow_rules(ft, spec,
  2100. &flow_act,
  2101. rule_dst, dest_num);
  2102. if (IS_ERR(handler->rule)) {
  2103. err = PTR_ERR(handler->rule);
  2104. goto free;
  2105. }
  2106. ft_prio->refcount++;
  2107. handler->prio = ft_prio;
  2108. ft_prio->flow_table = ft;
  2109. free:
  2110. if (err)
  2111. kfree(handler);
  2112. kvfree(spec);
  2113. return err ? ERR_PTR(err) : handler;
  2114. }
  2115. static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
  2116. struct mlx5_ib_flow_prio *ft_prio,
  2117. const struct ib_flow_attr *flow_attr,
  2118. struct mlx5_flow_destination *dst)
  2119. {
  2120. return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0);
  2121. }
  2122. static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
  2123. struct mlx5_ib_flow_prio *ft_prio,
  2124. struct ib_flow_attr *flow_attr,
  2125. struct mlx5_flow_destination *dst)
  2126. {
  2127. struct mlx5_ib_flow_handler *handler_dst = NULL;
  2128. struct mlx5_ib_flow_handler *handler = NULL;
  2129. handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
  2130. if (!IS_ERR(handler)) {
  2131. handler_dst = create_flow_rule(dev, ft_prio,
  2132. flow_attr, dst);
  2133. if (IS_ERR(handler_dst)) {
  2134. mlx5_del_flow_rules(handler->rule);
  2135. ft_prio->refcount--;
  2136. kfree(handler);
  2137. handler = handler_dst;
  2138. } else {
  2139. list_add(&handler_dst->list, &handler->list);
  2140. }
  2141. }
  2142. return handler;
  2143. }
  2144. enum {
  2145. LEFTOVERS_MC,
  2146. LEFTOVERS_UC,
  2147. };
  2148. static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
  2149. struct mlx5_ib_flow_prio *ft_prio,
  2150. struct ib_flow_attr *flow_attr,
  2151. struct mlx5_flow_destination *dst)
  2152. {
  2153. struct mlx5_ib_flow_handler *handler_ucast = NULL;
  2154. struct mlx5_ib_flow_handler *handler = NULL;
  2155. static struct {
  2156. struct ib_flow_attr flow_attr;
  2157. struct ib_flow_spec_eth eth_flow;
  2158. } leftovers_specs[] = {
  2159. [LEFTOVERS_MC] = {
  2160. .flow_attr = {
  2161. .num_of_specs = 1,
  2162. .size = sizeof(leftovers_specs[0])
  2163. },
  2164. .eth_flow = {
  2165. .type = IB_FLOW_SPEC_ETH,
  2166. .size = sizeof(struct ib_flow_spec_eth),
  2167. .mask = {.dst_mac = {0x1} },
  2168. .val = {.dst_mac = {0x1} }
  2169. }
  2170. },
  2171. [LEFTOVERS_UC] = {
  2172. .flow_attr = {
  2173. .num_of_specs = 1,
  2174. .size = sizeof(leftovers_specs[0])
  2175. },
  2176. .eth_flow = {
  2177. .type = IB_FLOW_SPEC_ETH,
  2178. .size = sizeof(struct ib_flow_spec_eth),
  2179. .mask = {.dst_mac = {0x1} },
  2180. .val = {.dst_mac = {} }
  2181. }
  2182. }
  2183. };
  2184. handler = create_flow_rule(dev, ft_prio,
  2185. &leftovers_specs[LEFTOVERS_MC].flow_attr,
  2186. dst);
  2187. if (!IS_ERR(handler) &&
  2188. flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
  2189. handler_ucast = create_flow_rule(dev, ft_prio,
  2190. &leftovers_specs[LEFTOVERS_UC].flow_attr,
  2191. dst);
  2192. if (IS_ERR(handler_ucast)) {
  2193. mlx5_del_flow_rules(handler->rule);
  2194. ft_prio->refcount--;
  2195. kfree(handler);
  2196. handler = handler_ucast;
  2197. } else {
  2198. list_add(&handler_ucast->list, &handler->list);
  2199. }
  2200. }
  2201. return handler;
  2202. }
  2203. static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
  2204. struct mlx5_ib_flow_prio *ft_rx,
  2205. struct mlx5_ib_flow_prio *ft_tx,
  2206. struct mlx5_flow_destination *dst)
  2207. {
  2208. struct mlx5_ib_flow_handler *handler_rx;
  2209. struct mlx5_ib_flow_handler *handler_tx;
  2210. int err;
  2211. static const struct ib_flow_attr flow_attr = {
  2212. .num_of_specs = 0,
  2213. .size = sizeof(flow_attr)
  2214. };
  2215. handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
  2216. if (IS_ERR(handler_rx)) {
  2217. err = PTR_ERR(handler_rx);
  2218. goto err;
  2219. }
  2220. handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
  2221. if (IS_ERR(handler_tx)) {
  2222. err = PTR_ERR(handler_tx);
  2223. goto err_tx;
  2224. }
  2225. list_add(&handler_tx->list, &handler_rx->list);
  2226. return handler_rx;
  2227. err_tx:
  2228. mlx5_del_flow_rules(handler_rx->rule);
  2229. ft_rx->refcount--;
  2230. kfree(handler_rx);
  2231. err:
  2232. return ERR_PTR(err);
  2233. }
  2234. static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
  2235. struct ib_flow_attr *flow_attr,
  2236. int domain)
  2237. {
  2238. struct mlx5_ib_dev *dev = to_mdev(qp->device);
  2239. struct mlx5_ib_qp *mqp = to_mqp(qp);
  2240. struct mlx5_ib_flow_handler *handler = NULL;
  2241. struct mlx5_flow_destination *dst = NULL;
  2242. struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
  2243. struct mlx5_ib_flow_prio *ft_prio;
  2244. int err;
  2245. int underlay_qpn;
  2246. if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
  2247. return ERR_PTR(-ENOMEM);
  2248. if (domain != IB_FLOW_DOMAIN_USER ||
  2249. flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
  2250. (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
  2251. return ERR_PTR(-EINVAL);
  2252. dst = kzalloc(sizeof(*dst), GFP_KERNEL);
  2253. if (!dst)
  2254. return ERR_PTR(-ENOMEM);
  2255. mutex_lock(&dev->flow_db.lock);
  2256. ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
  2257. if (IS_ERR(ft_prio)) {
  2258. err = PTR_ERR(ft_prio);
  2259. goto unlock;
  2260. }
  2261. if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
  2262. ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
  2263. if (IS_ERR(ft_prio_tx)) {
  2264. err = PTR_ERR(ft_prio_tx);
  2265. ft_prio_tx = NULL;
  2266. goto destroy_ft;
  2267. }
  2268. }
  2269. dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
  2270. if (mqp->flags & MLX5_IB_QP_RSS)
  2271. dst->tir_num = mqp->rss_qp.tirn;
  2272. else
  2273. dst->tir_num = mqp->raw_packet_qp.rq.tirn;
  2274. if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
  2275. if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
  2276. handler = create_dont_trap_rule(dev, ft_prio,
  2277. flow_attr, dst);
  2278. } else {
  2279. underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ?
  2280. mqp->underlay_qpn : 0;
  2281. handler = _create_flow_rule(dev, ft_prio, flow_attr,
  2282. dst, underlay_qpn);
  2283. }
  2284. } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
  2285. flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
  2286. handler = create_leftovers_rule(dev, ft_prio, flow_attr,
  2287. dst);
  2288. } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
  2289. handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
  2290. } else {
  2291. err = -EINVAL;
  2292. goto destroy_ft;
  2293. }
  2294. if (IS_ERR(handler)) {
  2295. err = PTR_ERR(handler);
  2296. handler = NULL;
  2297. goto destroy_ft;
  2298. }
  2299. mutex_unlock(&dev->flow_db.lock);
  2300. kfree(dst);
  2301. return &handler->ibflow;
  2302. destroy_ft:
  2303. put_flow_table(dev, ft_prio, false);
  2304. if (ft_prio_tx)
  2305. put_flow_table(dev, ft_prio_tx, false);
  2306. unlock:
  2307. mutex_unlock(&dev->flow_db.lock);
  2308. kfree(dst);
  2309. kfree(handler);
  2310. return ERR_PTR(err);
  2311. }
  2312. static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
  2313. {
  2314. struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
  2315. struct mlx5_ib_qp *mqp = to_mqp(ibqp);
  2316. int err;
  2317. if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
  2318. mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
  2319. return -EOPNOTSUPP;
  2320. }
  2321. err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
  2322. if (err)
  2323. mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
  2324. ibqp->qp_num, gid->raw);
  2325. return err;
  2326. }
  2327. static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
  2328. {
  2329. struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
  2330. int err;
  2331. err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
  2332. if (err)
  2333. mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
  2334. ibqp->qp_num, gid->raw);
  2335. return err;
  2336. }
  2337. static int init_node_data(struct mlx5_ib_dev *dev)
  2338. {
  2339. int err;
  2340. err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
  2341. if (err)
  2342. return err;
  2343. dev->mdev->rev_id = dev->mdev->pdev->revision;
  2344. return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
  2345. }
  2346. static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
  2347. char *buf)
  2348. {
  2349. struct mlx5_ib_dev *dev =
  2350. container_of(device, struct mlx5_ib_dev, ib_dev.dev);
  2351. return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
  2352. }
  2353. static ssize_t show_reg_pages(struct device *device,
  2354. struct device_attribute *attr, char *buf)
  2355. {
  2356. struct mlx5_ib_dev *dev =
  2357. container_of(device, struct mlx5_ib_dev, ib_dev.dev);
  2358. return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
  2359. }
  2360. static ssize_t show_hca(struct device *device, struct device_attribute *attr,
  2361. char *buf)
  2362. {
  2363. struct mlx5_ib_dev *dev =
  2364. container_of(device, struct mlx5_ib_dev, ib_dev.dev);
  2365. return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
  2366. }
  2367. static ssize_t show_rev(struct device *device, struct device_attribute *attr,
  2368. char *buf)
  2369. {
  2370. struct mlx5_ib_dev *dev =
  2371. container_of(device, struct mlx5_ib_dev, ib_dev.dev);
  2372. return sprintf(buf, "%x\n", dev->mdev->rev_id);
  2373. }
  2374. static ssize_t show_board(struct device *device, struct device_attribute *attr,
  2375. char *buf)
  2376. {
  2377. struct mlx5_ib_dev *dev =
  2378. container_of(device, struct mlx5_ib_dev, ib_dev.dev);
  2379. return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
  2380. dev->mdev->board_id);
  2381. }
  2382. static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
  2383. static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
  2384. static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
  2385. static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
  2386. static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
  2387. static struct device_attribute *mlx5_class_attributes[] = {
  2388. &dev_attr_hw_rev,
  2389. &dev_attr_hca_type,
  2390. &dev_attr_board_id,
  2391. &dev_attr_fw_pages,
  2392. &dev_attr_reg_pages,
  2393. };
  2394. static void pkey_change_handler(struct work_struct *work)
  2395. {
  2396. struct mlx5_ib_port_resources *ports =
  2397. container_of(work, struct mlx5_ib_port_resources,
  2398. pkey_change_work);
  2399. mutex_lock(&ports->devr->mutex);
  2400. mlx5_ib_gsi_pkey_change(ports->gsi);
  2401. mutex_unlock(&ports->devr->mutex);
  2402. }
  2403. static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
  2404. {
  2405. struct mlx5_ib_qp *mqp;
  2406. struct mlx5_ib_cq *send_mcq, *recv_mcq;
  2407. struct mlx5_core_cq *mcq;
  2408. struct list_head cq_armed_list;
  2409. unsigned long flags_qp;
  2410. unsigned long flags_cq;
  2411. unsigned long flags;
  2412. INIT_LIST_HEAD(&cq_armed_list);
  2413. /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
  2414. spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
  2415. list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
  2416. spin_lock_irqsave(&mqp->sq.lock, flags_qp);
  2417. if (mqp->sq.tail != mqp->sq.head) {
  2418. send_mcq = to_mcq(mqp->ibqp.send_cq);
  2419. spin_lock_irqsave(&send_mcq->lock, flags_cq);
  2420. if (send_mcq->mcq.comp &&
  2421. mqp->ibqp.send_cq->comp_handler) {
  2422. if (!send_mcq->mcq.reset_notify_added) {
  2423. send_mcq->mcq.reset_notify_added = 1;
  2424. list_add_tail(&send_mcq->mcq.reset_notify,
  2425. &cq_armed_list);
  2426. }
  2427. }
  2428. spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
  2429. }
  2430. spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
  2431. spin_lock_irqsave(&mqp->rq.lock, flags_qp);
  2432. /* no handling is needed for SRQ */
  2433. if (!mqp->ibqp.srq) {
  2434. if (mqp->rq.tail != mqp->rq.head) {
  2435. recv_mcq = to_mcq(mqp->ibqp.recv_cq);
  2436. spin_lock_irqsave(&recv_mcq->lock, flags_cq);
  2437. if (recv_mcq->mcq.comp &&
  2438. mqp->ibqp.recv_cq->comp_handler) {
  2439. if (!recv_mcq->mcq.reset_notify_added) {
  2440. recv_mcq->mcq.reset_notify_added = 1;
  2441. list_add_tail(&recv_mcq->mcq.reset_notify,
  2442. &cq_armed_list);
  2443. }
  2444. }
  2445. spin_unlock_irqrestore(&recv_mcq->lock,
  2446. flags_cq);
  2447. }
  2448. }
  2449. spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
  2450. }
  2451. /*At that point all inflight post send were put to be executed as of we
  2452. * lock/unlock above locks Now need to arm all involved CQs.
  2453. */
  2454. list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
  2455. mcq->comp(mcq);
  2456. }
  2457. spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
  2458. }
  2459. static void delay_drop_handler(struct work_struct *work)
  2460. {
  2461. int err;
  2462. struct mlx5_ib_delay_drop *delay_drop =
  2463. container_of(work, struct mlx5_ib_delay_drop,
  2464. delay_drop_work);
  2465. atomic_inc(&delay_drop->events_cnt);
  2466. mutex_lock(&delay_drop->lock);
  2467. err = mlx5_core_set_delay_drop(delay_drop->dev->mdev,
  2468. delay_drop->timeout);
  2469. if (err) {
  2470. mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n",
  2471. delay_drop->timeout);
  2472. delay_drop->activate = false;
  2473. }
  2474. mutex_unlock(&delay_drop->lock);
  2475. }
  2476. static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
  2477. enum mlx5_dev_event event, unsigned long param)
  2478. {
  2479. struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
  2480. struct ib_event ibev;
  2481. bool fatal = false;
  2482. u8 port = 0;
  2483. switch (event) {
  2484. case MLX5_DEV_EVENT_SYS_ERROR:
  2485. ibev.event = IB_EVENT_DEVICE_FATAL;
  2486. mlx5_ib_handle_internal_error(ibdev);
  2487. fatal = true;
  2488. break;
  2489. case MLX5_DEV_EVENT_PORT_UP:
  2490. case MLX5_DEV_EVENT_PORT_DOWN:
  2491. case MLX5_DEV_EVENT_PORT_INITIALIZED:
  2492. port = (u8)param;
  2493. /* In RoCE, port up/down events are handled in
  2494. * mlx5_netdev_event().
  2495. */
  2496. if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
  2497. IB_LINK_LAYER_ETHERNET)
  2498. return;
  2499. ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ?
  2500. IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
  2501. break;
  2502. case MLX5_DEV_EVENT_LID_CHANGE:
  2503. ibev.event = IB_EVENT_LID_CHANGE;
  2504. port = (u8)param;
  2505. break;
  2506. case MLX5_DEV_EVENT_PKEY_CHANGE:
  2507. ibev.event = IB_EVENT_PKEY_CHANGE;
  2508. port = (u8)param;
  2509. schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
  2510. break;
  2511. case MLX5_DEV_EVENT_GUID_CHANGE:
  2512. ibev.event = IB_EVENT_GID_CHANGE;
  2513. port = (u8)param;
  2514. break;
  2515. case MLX5_DEV_EVENT_CLIENT_REREG:
  2516. ibev.event = IB_EVENT_CLIENT_REREGISTER;
  2517. port = (u8)param;
  2518. break;
  2519. case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
  2520. schedule_work(&ibdev->delay_drop.delay_drop_work);
  2521. goto out;
  2522. default:
  2523. goto out;
  2524. }
  2525. ibev.device = &ibdev->ib_dev;
  2526. ibev.element.port_num = port;
  2527. if (port < 1 || port > ibdev->num_ports) {
  2528. mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
  2529. goto out;
  2530. }
  2531. if (ibdev->ib_active)
  2532. ib_dispatch_event(&ibev);
  2533. if (fatal)
  2534. ibdev->ib_active = false;
  2535. out:
  2536. return;
  2537. }
  2538. static int set_has_smi_cap(struct mlx5_ib_dev *dev)
  2539. {
  2540. struct mlx5_hca_vport_context vport_ctx;
  2541. int err;
  2542. int port;
  2543. for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
  2544. dev->mdev->port_caps[port - 1].has_smi = false;
  2545. if (MLX5_CAP_GEN(dev->mdev, port_type) ==
  2546. MLX5_CAP_PORT_TYPE_IB) {
  2547. if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
  2548. err = mlx5_query_hca_vport_context(dev->mdev, 0,
  2549. port, 0,
  2550. &vport_ctx);
  2551. if (err) {
  2552. mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
  2553. port, err);
  2554. return err;
  2555. }
  2556. dev->mdev->port_caps[port - 1].has_smi =
  2557. vport_ctx.has_smi;
  2558. } else {
  2559. dev->mdev->port_caps[port - 1].has_smi = true;
  2560. }
  2561. }
  2562. }
  2563. return 0;
  2564. }
  2565. static void get_ext_port_caps(struct mlx5_ib_dev *dev)
  2566. {
  2567. int port;
  2568. for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
  2569. mlx5_query_ext_port_caps(dev, port);
  2570. }
  2571. static int get_port_caps(struct mlx5_ib_dev *dev)
  2572. {
  2573. struct ib_device_attr *dprops = NULL;
  2574. struct ib_port_attr *pprops = NULL;
  2575. int err = -ENOMEM;
  2576. int port;
  2577. struct ib_udata uhw = {.inlen = 0, .outlen = 0};
  2578. pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
  2579. if (!pprops)
  2580. goto out;
  2581. dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
  2582. if (!dprops)
  2583. goto out;
  2584. err = set_has_smi_cap(dev);
  2585. if (err)
  2586. goto out;
  2587. err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
  2588. if (err) {
  2589. mlx5_ib_warn(dev, "query_device failed %d\n", err);
  2590. goto out;
  2591. }
  2592. for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
  2593. memset(pprops, 0, sizeof(*pprops));
  2594. err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
  2595. if (err) {
  2596. mlx5_ib_warn(dev, "query_port %d failed %d\n",
  2597. port, err);
  2598. break;
  2599. }
  2600. dev->mdev->port_caps[port - 1].pkey_table_len =
  2601. dprops->max_pkeys;
  2602. dev->mdev->port_caps[port - 1].gid_table_len =
  2603. pprops->gid_tbl_len;
  2604. mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
  2605. dprops->max_pkeys, pprops->gid_tbl_len);
  2606. }
  2607. out:
  2608. kfree(pprops);
  2609. kfree(dprops);
  2610. return err;
  2611. }
  2612. static void destroy_umrc_res(struct mlx5_ib_dev *dev)
  2613. {
  2614. int err;
  2615. err = mlx5_mr_cache_cleanup(dev);
  2616. if (err)
  2617. mlx5_ib_warn(dev, "mr cache cleanup failed\n");
  2618. mlx5_ib_destroy_qp(dev->umrc.qp);
  2619. ib_free_cq(dev->umrc.cq);
  2620. ib_dealloc_pd(dev->umrc.pd);
  2621. }
  2622. enum {
  2623. MAX_UMR_WR = 128,
  2624. };
  2625. static int create_umr_res(struct mlx5_ib_dev *dev)
  2626. {
  2627. struct ib_qp_init_attr *init_attr = NULL;
  2628. struct ib_qp_attr *attr = NULL;
  2629. struct ib_pd *pd;
  2630. struct ib_cq *cq;
  2631. struct ib_qp *qp;
  2632. int ret;
  2633. attr = kzalloc(sizeof(*attr), GFP_KERNEL);
  2634. init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
  2635. if (!attr || !init_attr) {
  2636. ret = -ENOMEM;
  2637. goto error_0;
  2638. }
  2639. pd = ib_alloc_pd(&dev->ib_dev, 0);
  2640. if (IS_ERR(pd)) {
  2641. mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
  2642. ret = PTR_ERR(pd);
  2643. goto error_0;
  2644. }
  2645. cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
  2646. if (IS_ERR(cq)) {
  2647. mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
  2648. ret = PTR_ERR(cq);
  2649. goto error_2;
  2650. }
  2651. init_attr->send_cq = cq;
  2652. init_attr->recv_cq = cq;
  2653. init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
  2654. init_attr->cap.max_send_wr = MAX_UMR_WR;
  2655. init_attr->cap.max_send_sge = 1;
  2656. init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
  2657. init_attr->port_num = 1;
  2658. qp = mlx5_ib_create_qp(pd, init_attr, NULL);
  2659. if (IS_ERR(qp)) {
  2660. mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
  2661. ret = PTR_ERR(qp);
  2662. goto error_3;
  2663. }
  2664. qp->device = &dev->ib_dev;
  2665. qp->real_qp = qp;
  2666. qp->uobject = NULL;
  2667. qp->qp_type = MLX5_IB_QPT_REG_UMR;
  2668. qp->send_cq = init_attr->send_cq;
  2669. qp->recv_cq = init_attr->recv_cq;
  2670. attr->qp_state = IB_QPS_INIT;
  2671. attr->port_num = 1;
  2672. ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
  2673. IB_QP_PORT, NULL);
  2674. if (ret) {
  2675. mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
  2676. goto error_4;
  2677. }
  2678. memset(attr, 0, sizeof(*attr));
  2679. attr->qp_state = IB_QPS_RTR;
  2680. attr->path_mtu = IB_MTU_256;
  2681. ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
  2682. if (ret) {
  2683. mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
  2684. goto error_4;
  2685. }
  2686. memset(attr, 0, sizeof(*attr));
  2687. attr->qp_state = IB_QPS_RTS;
  2688. ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
  2689. if (ret) {
  2690. mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
  2691. goto error_4;
  2692. }
  2693. dev->umrc.qp = qp;
  2694. dev->umrc.cq = cq;
  2695. dev->umrc.pd = pd;
  2696. sema_init(&dev->umrc.sem, MAX_UMR_WR);
  2697. ret = mlx5_mr_cache_init(dev);
  2698. if (ret) {
  2699. mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
  2700. goto error_4;
  2701. }
  2702. kfree(attr);
  2703. kfree(init_attr);
  2704. return 0;
  2705. error_4:
  2706. mlx5_ib_destroy_qp(qp);
  2707. error_3:
  2708. ib_free_cq(cq);
  2709. error_2:
  2710. ib_dealloc_pd(pd);
  2711. error_0:
  2712. kfree(attr);
  2713. kfree(init_attr);
  2714. return ret;
  2715. }
  2716. static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
  2717. {
  2718. switch (umr_fence_cap) {
  2719. case MLX5_CAP_UMR_FENCE_NONE:
  2720. return MLX5_FENCE_MODE_NONE;
  2721. case MLX5_CAP_UMR_FENCE_SMALL:
  2722. return MLX5_FENCE_MODE_INITIATOR_SMALL;
  2723. default:
  2724. return MLX5_FENCE_MODE_STRONG_ORDERING;
  2725. }
  2726. }
  2727. static int create_dev_resources(struct mlx5_ib_resources *devr)
  2728. {
  2729. struct ib_srq_init_attr attr;
  2730. struct mlx5_ib_dev *dev;
  2731. struct ib_cq_init_attr cq_attr = {.cqe = 1};
  2732. int port;
  2733. int ret = 0;
  2734. dev = container_of(devr, struct mlx5_ib_dev, devr);
  2735. mutex_init(&devr->mutex);
  2736. devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
  2737. if (IS_ERR(devr->p0)) {
  2738. ret = PTR_ERR(devr->p0);
  2739. goto error0;
  2740. }
  2741. devr->p0->device = &dev->ib_dev;
  2742. devr->p0->uobject = NULL;
  2743. atomic_set(&devr->p0->usecnt, 0);
  2744. devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
  2745. if (IS_ERR(devr->c0)) {
  2746. ret = PTR_ERR(devr->c0);
  2747. goto error1;
  2748. }
  2749. devr->c0->device = &dev->ib_dev;
  2750. devr->c0->uobject = NULL;
  2751. devr->c0->comp_handler = NULL;
  2752. devr->c0->event_handler = NULL;
  2753. devr->c0->cq_context = NULL;
  2754. atomic_set(&devr->c0->usecnt, 0);
  2755. devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
  2756. if (IS_ERR(devr->x0)) {
  2757. ret = PTR_ERR(devr->x0);
  2758. goto error2;
  2759. }
  2760. devr->x0->device = &dev->ib_dev;
  2761. devr->x0->inode = NULL;
  2762. atomic_set(&devr->x0->usecnt, 0);
  2763. mutex_init(&devr->x0->tgt_qp_mutex);
  2764. INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
  2765. devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
  2766. if (IS_ERR(devr->x1)) {
  2767. ret = PTR_ERR(devr->x1);
  2768. goto error3;
  2769. }
  2770. devr->x1->device = &dev->ib_dev;
  2771. devr->x1->inode = NULL;
  2772. atomic_set(&devr->x1->usecnt, 0);
  2773. mutex_init(&devr->x1->tgt_qp_mutex);
  2774. INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
  2775. memset(&attr, 0, sizeof(attr));
  2776. attr.attr.max_sge = 1;
  2777. attr.attr.max_wr = 1;
  2778. attr.srq_type = IB_SRQT_XRC;
  2779. attr.ext.cq = devr->c0;
  2780. attr.ext.xrc.xrcd = devr->x0;
  2781. devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
  2782. if (IS_ERR(devr->s0)) {
  2783. ret = PTR_ERR(devr->s0);
  2784. goto error4;
  2785. }
  2786. devr->s0->device = &dev->ib_dev;
  2787. devr->s0->pd = devr->p0;
  2788. devr->s0->uobject = NULL;
  2789. devr->s0->event_handler = NULL;
  2790. devr->s0->srq_context = NULL;
  2791. devr->s0->srq_type = IB_SRQT_XRC;
  2792. devr->s0->ext.xrc.xrcd = devr->x0;
  2793. devr->s0->ext.cq = devr->c0;
  2794. atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
  2795. atomic_inc(&devr->s0->ext.cq->usecnt);
  2796. atomic_inc(&devr->p0->usecnt);
  2797. atomic_set(&devr->s0->usecnt, 0);
  2798. memset(&attr, 0, sizeof(attr));
  2799. attr.attr.max_sge = 1;
  2800. attr.attr.max_wr = 1;
  2801. attr.srq_type = IB_SRQT_BASIC;
  2802. devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
  2803. if (IS_ERR(devr->s1)) {
  2804. ret = PTR_ERR(devr->s1);
  2805. goto error5;
  2806. }
  2807. devr->s1->device = &dev->ib_dev;
  2808. devr->s1->pd = devr->p0;
  2809. devr->s1->uobject = NULL;
  2810. devr->s1->event_handler = NULL;
  2811. devr->s1->srq_context = NULL;
  2812. devr->s1->srq_type = IB_SRQT_BASIC;
  2813. devr->s1->ext.cq = devr->c0;
  2814. atomic_inc(&devr->p0->usecnt);
  2815. atomic_set(&devr->s1->usecnt, 0);
  2816. for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
  2817. INIT_WORK(&devr->ports[port].pkey_change_work,
  2818. pkey_change_handler);
  2819. devr->ports[port].devr = devr;
  2820. }
  2821. return 0;
  2822. error5:
  2823. mlx5_ib_destroy_srq(devr->s0);
  2824. error4:
  2825. mlx5_ib_dealloc_xrcd(devr->x1);
  2826. error3:
  2827. mlx5_ib_dealloc_xrcd(devr->x0);
  2828. error2:
  2829. mlx5_ib_destroy_cq(devr->c0);
  2830. error1:
  2831. mlx5_ib_dealloc_pd(devr->p0);
  2832. error0:
  2833. return ret;
  2834. }
  2835. static void destroy_dev_resources(struct mlx5_ib_resources *devr)
  2836. {
  2837. struct mlx5_ib_dev *dev =
  2838. container_of(devr, struct mlx5_ib_dev, devr);
  2839. int port;
  2840. mlx5_ib_destroy_srq(devr->s1);
  2841. mlx5_ib_destroy_srq(devr->s0);
  2842. mlx5_ib_dealloc_xrcd(devr->x0);
  2843. mlx5_ib_dealloc_xrcd(devr->x1);
  2844. mlx5_ib_destroy_cq(devr->c0);
  2845. mlx5_ib_dealloc_pd(devr->p0);
  2846. /* Make sure no change P_Key work items are still executing */
  2847. for (port = 0; port < dev->num_ports; ++port)
  2848. cancel_work_sync(&devr->ports[port].pkey_change_work);
  2849. }
  2850. static u32 get_core_cap_flags(struct ib_device *ibdev)
  2851. {
  2852. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  2853. enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
  2854. u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
  2855. u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
  2856. u32 ret = 0;
  2857. if (ll == IB_LINK_LAYER_INFINIBAND)
  2858. return RDMA_CORE_PORT_IBA_IB;
  2859. ret = RDMA_CORE_PORT_RAW_PACKET;
  2860. if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
  2861. return ret;
  2862. if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
  2863. return ret;
  2864. if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
  2865. ret |= RDMA_CORE_PORT_IBA_ROCE;
  2866. if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
  2867. ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
  2868. return ret;
  2869. }
  2870. static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
  2871. struct ib_port_immutable *immutable)
  2872. {
  2873. struct ib_port_attr attr;
  2874. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  2875. enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
  2876. int err;
  2877. immutable->core_cap_flags = get_core_cap_flags(ibdev);
  2878. err = ib_query_port(ibdev, port_num, &attr);
  2879. if (err)
  2880. return err;
  2881. immutable->pkey_tbl_len = attr.pkey_tbl_len;
  2882. immutable->gid_tbl_len = attr.gid_tbl_len;
  2883. immutable->core_cap_flags = get_core_cap_flags(ibdev);
  2884. if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
  2885. immutable->max_mad_size = IB_MGMT_MAD_SIZE;
  2886. return 0;
  2887. }
  2888. static void get_dev_fw_str(struct ib_device *ibdev, char *str)
  2889. {
  2890. struct mlx5_ib_dev *dev =
  2891. container_of(ibdev, struct mlx5_ib_dev, ib_dev);
  2892. snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d",
  2893. fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev),
  2894. fw_rev_sub(dev->mdev));
  2895. }
  2896. static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
  2897. {
  2898. struct mlx5_core_dev *mdev = dev->mdev;
  2899. struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev,
  2900. MLX5_FLOW_NAMESPACE_LAG);
  2901. struct mlx5_flow_table *ft;
  2902. int err;
  2903. if (!ns || !mlx5_lag_is_active(mdev))
  2904. return 0;
  2905. err = mlx5_cmd_create_vport_lag(mdev);
  2906. if (err)
  2907. return err;
  2908. ft = mlx5_create_lag_demux_flow_table(ns, 0, 0);
  2909. if (IS_ERR(ft)) {
  2910. err = PTR_ERR(ft);
  2911. goto err_destroy_vport_lag;
  2912. }
  2913. dev->flow_db.lag_demux_ft = ft;
  2914. return 0;
  2915. err_destroy_vport_lag:
  2916. mlx5_cmd_destroy_vport_lag(mdev);
  2917. return err;
  2918. }
  2919. static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
  2920. {
  2921. struct mlx5_core_dev *mdev = dev->mdev;
  2922. if (dev->flow_db.lag_demux_ft) {
  2923. mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
  2924. dev->flow_db.lag_demux_ft = NULL;
  2925. mlx5_cmd_destroy_vport_lag(mdev);
  2926. }
  2927. }
  2928. static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev)
  2929. {
  2930. int err;
  2931. dev->roce.nb.notifier_call = mlx5_netdev_event;
  2932. err = register_netdevice_notifier(&dev->roce.nb);
  2933. if (err) {
  2934. dev->roce.nb.notifier_call = NULL;
  2935. return err;
  2936. }
  2937. return 0;
  2938. }
  2939. static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev)
  2940. {
  2941. if (dev->roce.nb.notifier_call) {
  2942. unregister_netdevice_notifier(&dev->roce.nb);
  2943. dev->roce.nb.notifier_call = NULL;
  2944. }
  2945. }
  2946. static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
  2947. {
  2948. int err;
  2949. err = mlx5_add_netdev_notifier(dev);
  2950. if (err)
  2951. return err;
  2952. if (MLX5_CAP_GEN(dev->mdev, roce)) {
  2953. err = mlx5_nic_vport_enable_roce(dev->mdev);
  2954. if (err)
  2955. goto err_unregister_netdevice_notifier;
  2956. }
  2957. err = mlx5_eth_lag_init(dev);
  2958. if (err)
  2959. goto err_disable_roce;
  2960. return 0;
  2961. err_disable_roce:
  2962. if (MLX5_CAP_GEN(dev->mdev, roce))
  2963. mlx5_nic_vport_disable_roce(dev->mdev);
  2964. err_unregister_netdevice_notifier:
  2965. mlx5_remove_netdev_notifier(dev);
  2966. return err;
  2967. }
  2968. static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
  2969. {
  2970. mlx5_eth_lag_cleanup(dev);
  2971. if (MLX5_CAP_GEN(dev->mdev, roce))
  2972. mlx5_nic_vport_disable_roce(dev->mdev);
  2973. }
  2974. struct mlx5_ib_counter {
  2975. const char *name;
  2976. size_t offset;
  2977. };
  2978. #define INIT_Q_COUNTER(_name) \
  2979. { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
  2980. static const struct mlx5_ib_counter basic_q_cnts[] = {
  2981. INIT_Q_COUNTER(rx_write_requests),
  2982. INIT_Q_COUNTER(rx_read_requests),
  2983. INIT_Q_COUNTER(rx_atomic_requests),
  2984. INIT_Q_COUNTER(out_of_buffer),
  2985. };
  2986. static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
  2987. INIT_Q_COUNTER(out_of_sequence),
  2988. };
  2989. static const struct mlx5_ib_counter retrans_q_cnts[] = {
  2990. INIT_Q_COUNTER(duplicate_request),
  2991. INIT_Q_COUNTER(rnr_nak_retry_err),
  2992. INIT_Q_COUNTER(packet_seq_err),
  2993. INIT_Q_COUNTER(implied_nak_seq_err),
  2994. INIT_Q_COUNTER(local_ack_timeout_err),
  2995. };
  2996. #define INIT_CONG_COUNTER(_name) \
  2997. { .name = #_name, .offset = \
  2998. MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
  2999. static const struct mlx5_ib_counter cong_cnts[] = {
  3000. INIT_CONG_COUNTER(rp_cnp_ignored),
  3001. INIT_CONG_COUNTER(rp_cnp_handled),
  3002. INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
  3003. INIT_CONG_COUNTER(np_cnp_sent),
  3004. };
  3005. static const struct mlx5_ib_counter extended_err_cnts[] = {
  3006. INIT_Q_COUNTER(resp_local_length_error),
  3007. INIT_Q_COUNTER(resp_cqe_error),
  3008. INIT_Q_COUNTER(req_cqe_error),
  3009. INIT_Q_COUNTER(req_remote_invalid_request),
  3010. INIT_Q_COUNTER(req_remote_access_errors),
  3011. INIT_Q_COUNTER(resp_remote_access_errors),
  3012. INIT_Q_COUNTER(resp_cqe_flush_error),
  3013. INIT_Q_COUNTER(req_cqe_flush_error),
  3014. };
  3015. static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
  3016. {
  3017. unsigned int i;
  3018. for (i = 0; i < dev->num_ports; i++) {
  3019. mlx5_core_dealloc_q_counter(dev->mdev,
  3020. dev->port[i].cnts.set_id);
  3021. kfree(dev->port[i].cnts.names);
  3022. kfree(dev->port[i].cnts.offsets);
  3023. }
  3024. }
  3025. static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
  3026. struct mlx5_ib_counters *cnts)
  3027. {
  3028. u32 num_counters;
  3029. num_counters = ARRAY_SIZE(basic_q_cnts);
  3030. if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
  3031. num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
  3032. if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
  3033. num_counters += ARRAY_SIZE(retrans_q_cnts);
  3034. if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
  3035. num_counters += ARRAY_SIZE(extended_err_cnts);
  3036. cnts->num_q_counters = num_counters;
  3037. if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
  3038. cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
  3039. num_counters += ARRAY_SIZE(cong_cnts);
  3040. }
  3041. cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
  3042. if (!cnts->names)
  3043. return -ENOMEM;
  3044. cnts->offsets = kcalloc(num_counters,
  3045. sizeof(cnts->offsets), GFP_KERNEL);
  3046. if (!cnts->offsets)
  3047. goto err_names;
  3048. return 0;
  3049. err_names:
  3050. kfree(cnts->names);
  3051. return -ENOMEM;
  3052. }
  3053. static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
  3054. const char **names,
  3055. size_t *offsets)
  3056. {
  3057. int i;
  3058. int j = 0;
  3059. for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
  3060. names[j] = basic_q_cnts[i].name;
  3061. offsets[j] = basic_q_cnts[i].offset;
  3062. }
  3063. if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
  3064. for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
  3065. names[j] = out_of_seq_q_cnts[i].name;
  3066. offsets[j] = out_of_seq_q_cnts[i].offset;
  3067. }
  3068. }
  3069. if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
  3070. for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
  3071. names[j] = retrans_q_cnts[i].name;
  3072. offsets[j] = retrans_q_cnts[i].offset;
  3073. }
  3074. }
  3075. if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
  3076. for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
  3077. names[j] = extended_err_cnts[i].name;
  3078. offsets[j] = extended_err_cnts[i].offset;
  3079. }
  3080. }
  3081. if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
  3082. for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
  3083. names[j] = cong_cnts[i].name;
  3084. offsets[j] = cong_cnts[i].offset;
  3085. }
  3086. }
  3087. }
  3088. static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
  3089. {
  3090. int i;
  3091. int ret;
  3092. for (i = 0; i < dev->num_ports; i++) {
  3093. struct mlx5_ib_port *port = &dev->port[i];
  3094. ret = mlx5_core_alloc_q_counter(dev->mdev,
  3095. &port->cnts.set_id);
  3096. if (ret) {
  3097. mlx5_ib_warn(dev,
  3098. "couldn't allocate queue counter for port %d, err %d\n",
  3099. i + 1, ret);
  3100. goto dealloc_counters;
  3101. }
  3102. ret = __mlx5_ib_alloc_counters(dev, &port->cnts);
  3103. if (ret)
  3104. goto dealloc_counters;
  3105. mlx5_ib_fill_counters(dev, port->cnts.names,
  3106. port->cnts.offsets);
  3107. }
  3108. return 0;
  3109. dealloc_counters:
  3110. while (--i >= 0)
  3111. mlx5_core_dealloc_q_counter(dev->mdev,
  3112. dev->port[i].cnts.set_id);
  3113. return ret;
  3114. }
  3115. static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
  3116. u8 port_num)
  3117. {
  3118. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  3119. struct mlx5_ib_port *port = &dev->port[port_num - 1];
  3120. /* We support only per port stats */
  3121. if (port_num == 0)
  3122. return NULL;
  3123. return rdma_alloc_hw_stats_struct(port->cnts.names,
  3124. port->cnts.num_q_counters +
  3125. port->cnts.num_cong_counters,
  3126. RDMA_HW_STATS_DEFAULT_LIFESPAN);
  3127. }
  3128. static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev,
  3129. struct mlx5_ib_port *port,
  3130. struct rdma_hw_stats *stats)
  3131. {
  3132. int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
  3133. void *out;
  3134. __be32 val;
  3135. int ret, i;
  3136. out = kvzalloc(outlen, GFP_KERNEL);
  3137. if (!out)
  3138. return -ENOMEM;
  3139. ret = mlx5_core_query_q_counter(dev->mdev,
  3140. port->cnts.set_id, 0,
  3141. out, outlen);
  3142. if (ret)
  3143. goto free;
  3144. for (i = 0; i < port->cnts.num_q_counters; i++) {
  3145. val = *(__be32 *)(out + port->cnts.offsets[i]);
  3146. stats->value[i] = (u64)be32_to_cpu(val);
  3147. }
  3148. free:
  3149. kvfree(out);
  3150. return ret;
  3151. }
  3152. static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev,
  3153. struct mlx5_ib_port *port,
  3154. struct rdma_hw_stats *stats)
  3155. {
  3156. int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
  3157. void *out;
  3158. int ret, i;
  3159. int offset = port->cnts.num_q_counters;
  3160. out = kvzalloc(outlen, GFP_KERNEL);
  3161. if (!out)
  3162. return -ENOMEM;
  3163. ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen);
  3164. if (ret)
  3165. goto free;
  3166. for (i = 0; i < port->cnts.num_cong_counters; i++) {
  3167. stats->value[i + offset] =
  3168. be64_to_cpup((__be64 *)(out +
  3169. port->cnts.offsets[i + offset]));
  3170. }
  3171. free:
  3172. kvfree(out);
  3173. return ret;
  3174. }
  3175. static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
  3176. struct rdma_hw_stats *stats,
  3177. u8 port_num, int index)
  3178. {
  3179. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  3180. struct mlx5_ib_port *port = &dev->port[port_num - 1];
  3181. int ret, num_counters;
  3182. if (!stats)
  3183. return -EINVAL;
  3184. ret = mlx5_ib_query_q_counters(dev, port, stats);
  3185. if (ret)
  3186. return ret;
  3187. num_counters = port->cnts.num_q_counters;
  3188. if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
  3189. ret = mlx5_ib_query_cong_counters(dev, port, stats);
  3190. if (ret)
  3191. return ret;
  3192. num_counters += port->cnts.num_cong_counters;
  3193. }
  3194. return num_counters;
  3195. }
  3196. static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
  3197. {
  3198. return mlx5_rdma_netdev_free(netdev);
  3199. }
  3200. static struct net_device*
  3201. mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
  3202. u8 port_num,
  3203. enum rdma_netdev_t type,
  3204. const char *name,
  3205. unsigned char name_assign_type,
  3206. void (*setup)(struct net_device *))
  3207. {
  3208. struct net_device *netdev;
  3209. struct rdma_netdev *rn;
  3210. if (type != RDMA_NETDEV_IPOIB)
  3211. return ERR_PTR(-EOPNOTSUPP);
  3212. netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
  3213. name, setup);
  3214. if (likely(!IS_ERR_OR_NULL(netdev))) {
  3215. rn = netdev_priv(netdev);
  3216. rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev;
  3217. }
  3218. return netdev;
  3219. }
  3220. static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
  3221. {
  3222. if (!dev->delay_drop.dbg)
  3223. return;
  3224. debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs);
  3225. kfree(dev->delay_drop.dbg);
  3226. dev->delay_drop.dbg = NULL;
  3227. }
  3228. static void cancel_delay_drop(struct mlx5_ib_dev *dev)
  3229. {
  3230. if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
  3231. return;
  3232. cancel_work_sync(&dev->delay_drop.delay_drop_work);
  3233. delay_drop_debugfs_cleanup(dev);
  3234. }
  3235. static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
  3236. size_t count, loff_t *pos)
  3237. {
  3238. struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
  3239. char lbuf[20];
  3240. int len;
  3241. len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout);
  3242. return simple_read_from_buffer(buf, count, pos, lbuf, len);
  3243. }
  3244. static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf,
  3245. size_t count, loff_t *pos)
  3246. {
  3247. struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
  3248. u32 timeout;
  3249. u32 var;
  3250. if (kstrtouint_from_user(buf, count, 0, &var))
  3251. return -EFAULT;
  3252. timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS *
  3253. 1000);
  3254. if (timeout != var)
  3255. mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n",
  3256. timeout);
  3257. delay_drop->timeout = timeout;
  3258. return count;
  3259. }
  3260. static const struct file_operations fops_delay_drop_timeout = {
  3261. .owner = THIS_MODULE,
  3262. .open = simple_open,
  3263. .write = delay_drop_timeout_write,
  3264. .read = delay_drop_timeout_read,
  3265. };
  3266. static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
  3267. {
  3268. struct mlx5_ib_dbg_delay_drop *dbg;
  3269. if (!mlx5_debugfs_root)
  3270. return 0;
  3271. dbg = kzalloc(sizeof(*dbg), GFP_KERNEL);
  3272. if (!dbg)
  3273. return -ENOMEM;
  3274. dev->delay_drop.dbg = dbg;
  3275. dbg->dir_debugfs =
  3276. debugfs_create_dir("delay_drop",
  3277. dev->mdev->priv.dbg_root);
  3278. if (!dbg->dir_debugfs)
  3279. goto out_debugfs;
  3280. dbg->events_cnt_debugfs =
  3281. debugfs_create_atomic_t("num_timeout_events", 0400,
  3282. dbg->dir_debugfs,
  3283. &dev->delay_drop.events_cnt);
  3284. if (!dbg->events_cnt_debugfs)
  3285. goto out_debugfs;
  3286. dbg->rqs_cnt_debugfs =
  3287. debugfs_create_atomic_t("num_rqs", 0400,
  3288. dbg->dir_debugfs,
  3289. &dev->delay_drop.rqs_cnt);
  3290. if (!dbg->rqs_cnt_debugfs)
  3291. goto out_debugfs;
  3292. dbg->timeout_debugfs =
  3293. debugfs_create_file("timeout", 0600,
  3294. dbg->dir_debugfs,
  3295. &dev->delay_drop,
  3296. &fops_delay_drop_timeout);
  3297. if (!dbg->timeout_debugfs)
  3298. goto out_debugfs;
  3299. return 0;
  3300. out_debugfs:
  3301. delay_drop_debugfs_cleanup(dev);
  3302. return -ENOMEM;
  3303. }
  3304. static void init_delay_drop(struct mlx5_ib_dev *dev)
  3305. {
  3306. if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
  3307. return;
  3308. mutex_init(&dev->delay_drop.lock);
  3309. dev->delay_drop.dev = dev;
  3310. dev->delay_drop.activate = false;
  3311. dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
  3312. INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
  3313. atomic_set(&dev->delay_drop.rqs_cnt, 0);
  3314. atomic_set(&dev->delay_drop.events_cnt, 0);
  3315. if (delay_drop_debugfs_init(dev))
  3316. mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
  3317. }
  3318. static const struct cpumask *
  3319. mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
  3320. {
  3321. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  3322. return mlx5_get_vector_affinity(dev->mdev, comp_vector);
  3323. }
  3324. static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
  3325. {
  3326. struct mlx5_ib_dev *dev;
  3327. enum rdma_link_layer ll;
  3328. int port_type_cap;
  3329. const char *name;
  3330. int err;
  3331. int i;
  3332. port_type_cap = MLX5_CAP_GEN(mdev, port_type);
  3333. ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
  3334. printk_once(KERN_INFO "%s", mlx5_version);
  3335. dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
  3336. if (!dev)
  3337. return NULL;
  3338. dev->mdev = mdev;
  3339. dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
  3340. GFP_KERNEL);
  3341. if (!dev->port)
  3342. goto err_dealloc;
  3343. rwlock_init(&dev->roce.netdev_lock);
  3344. err = get_port_caps(dev);
  3345. if (err)
  3346. goto err_free_port;
  3347. if (mlx5_use_mad_ifc(dev))
  3348. get_ext_port_caps(dev);
  3349. if (!mlx5_lag_is_active(mdev))
  3350. name = "mlx5_%d";
  3351. else
  3352. name = "mlx5_bond_%d";
  3353. strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX);
  3354. dev->ib_dev.owner = THIS_MODULE;
  3355. dev->ib_dev.node_type = RDMA_NODE_IB_CA;
  3356. dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
  3357. dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
  3358. dev->ib_dev.phys_port_cnt = dev->num_ports;
  3359. dev->ib_dev.num_comp_vectors =
  3360. dev->mdev->priv.eq_table.num_comp_vectors;
  3361. dev->ib_dev.dev.parent = &mdev->pdev->dev;
  3362. dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
  3363. dev->ib_dev.uverbs_cmd_mask =
  3364. (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
  3365. (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
  3366. (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
  3367. (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
  3368. (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
  3369. (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
  3370. (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
  3371. (1ull << IB_USER_VERBS_CMD_REG_MR) |
  3372. (1ull << IB_USER_VERBS_CMD_REREG_MR) |
  3373. (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
  3374. (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
  3375. (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
  3376. (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
  3377. (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
  3378. (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
  3379. (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
  3380. (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
  3381. (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
  3382. (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
  3383. (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
  3384. (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
  3385. (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
  3386. (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
  3387. (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
  3388. (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
  3389. (1ull << IB_USER_VERBS_CMD_OPEN_QP);
  3390. dev->ib_dev.uverbs_ex_cmd_mask =
  3391. (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
  3392. (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
  3393. (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
  3394. (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) |
  3395. (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
  3396. dev->ib_dev.query_device = mlx5_ib_query_device;
  3397. dev->ib_dev.query_port = mlx5_ib_query_port;
  3398. dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
  3399. if (ll == IB_LINK_LAYER_ETHERNET)
  3400. dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
  3401. dev->ib_dev.query_gid = mlx5_ib_query_gid;
  3402. dev->ib_dev.add_gid = mlx5_ib_add_gid;
  3403. dev->ib_dev.del_gid = mlx5_ib_del_gid;
  3404. dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
  3405. dev->ib_dev.modify_device = mlx5_ib_modify_device;
  3406. dev->ib_dev.modify_port = mlx5_ib_modify_port;
  3407. dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
  3408. dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
  3409. dev->ib_dev.mmap = mlx5_ib_mmap;
  3410. dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
  3411. dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
  3412. dev->ib_dev.create_ah = mlx5_ib_create_ah;
  3413. dev->ib_dev.query_ah = mlx5_ib_query_ah;
  3414. dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
  3415. dev->ib_dev.create_srq = mlx5_ib_create_srq;
  3416. dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
  3417. dev->ib_dev.query_srq = mlx5_ib_query_srq;
  3418. dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
  3419. dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
  3420. dev->ib_dev.create_qp = mlx5_ib_create_qp;
  3421. dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
  3422. dev->ib_dev.query_qp = mlx5_ib_query_qp;
  3423. dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
  3424. dev->ib_dev.post_send = mlx5_ib_post_send;
  3425. dev->ib_dev.post_recv = mlx5_ib_post_recv;
  3426. dev->ib_dev.create_cq = mlx5_ib_create_cq;
  3427. dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
  3428. dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
  3429. dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
  3430. dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
  3431. dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
  3432. dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
  3433. dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
  3434. dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr;
  3435. dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
  3436. dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
  3437. dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
  3438. dev->ib_dev.process_mad = mlx5_ib_process_mad;
  3439. dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
  3440. dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
  3441. dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
  3442. dev->ib_dev.get_port_immutable = mlx5_port_immutable;
  3443. dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
  3444. dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
  3445. if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
  3446. dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
  3447. if (mlx5_core_is_pf(mdev)) {
  3448. dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
  3449. dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
  3450. dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats;
  3451. dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
  3452. }
  3453. dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
  3454. mlx5_ib_internal_fill_odp_caps(dev);
  3455. dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
  3456. if (MLX5_CAP_GEN(mdev, imaicl)) {
  3457. dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
  3458. dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
  3459. dev->ib_dev.uverbs_cmd_mask |=
  3460. (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
  3461. (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
  3462. }
  3463. if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
  3464. dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
  3465. dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
  3466. }
  3467. if (MLX5_CAP_GEN(mdev, xrc)) {
  3468. dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
  3469. dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
  3470. dev->ib_dev.uverbs_cmd_mask |=
  3471. (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
  3472. (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
  3473. }
  3474. dev->ib_dev.create_flow = mlx5_ib_create_flow;
  3475. dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
  3476. dev->ib_dev.uverbs_ex_cmd_mask |=
  3477. (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
  3478. (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
  3479. if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
  3480. IB_LINK_LAYER_ETHERNET) {
  3481. dev->ib_dev.create_wq = mlx5_ib_create_wq;
  3482. dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
  3483. dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
  3484. dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
  3485. dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
  3486. dev->ib_dev.uverbs_ex_cmd_mask |=
  3487. (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
  3488. (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
  3489. (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
  3490. (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
  3491. (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
  3492. }
  3493. err = init_node_data(dev);
  3494. if (err)
  3495. goto err_free_port;
  3496. mutex_init(&dev->flow_db.lock);
  3497. mutex_init(&dev->cap_mask_mutex);
  3498. INIT_LIST_HEAD(&dev->qp_list);
  3499. spin_lock_init(&dev->reset_flow_resource_lock);
  3500. if (ll == IB_LINK_LAYER_ETHERNET) {
  3501. err = mlx5_enable_eth(dev);
  3502. if (err)
  3503. goto err_free_port;
  3504. dev->roce.last_port_state = IB_PORT_DOWN;
  3505. }
  3506. err = create_dev_resources(&dev->devr);
  3507. if (err)
  3508. goto err_disable_eth;
  3509. err = mlx5_ib_odp_init_one(dev);
  3510. if (err)
  3511. goto err_rsrc;
  3512. if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
  3513. err = mlx5_ib_alloc_counters(dev);
  3514. if (err)
  3515. goto err_odp;
  3516. }
  3517. err = mlx5_ib_init_cong_debugfs(dev);
  3518. if (err)
  3519. goto err_cnt;
  3520. dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
  3521. if (!dev->mdev->priv.uar)
  3522. goto err_cong;
  3523. err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
  3524. if (err)
  3525. goto err_uar_page;
  3526. err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
  3527. if (err)
  3528. goto err_bfreg;
  3529. err = ib_register_device(&dev->ib_dev, NULL);
  3530. if (err)
  3531. goto err_fp_bfreg;
  3532. err = create_umr_res(dev);
  3533. if (err)
  3534. goto err_dev;
  3535. init_delay_drop(dev);
  3536. for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
  3537. err = device_create_file(&dev->ib_dev.dev,
  3538. mlx5_class_attributes[i]);
  3539. if (err)
  3540. goto err_delay_drop;
  3541. }
  3542. if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
  3543. MLX5_CAP_GEN(mdev, disable_local_lb))
  3544. mutex_init(&dev->lb_mutex);
  3545. dev->ib_active = true;
  3546. return dev;
  3547. err_delay_drop:
  3548. cancel_delay_drop(dev);
  3549. destroy_umrc_res(dev);
  3550. err_dev:
  3551. ib_unregister_device(&dev->ib_dev);
  3552. err_fp_bfreg:
  3553. mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
  3554. err_bfreg:
  3555. mlx5_free_bfreg(dev->mdev, &dev->bfreg);
  3556. err_uar_page:
  3557. mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
  3558. err_cong:
  3559. mlx5_ib_cleanup_cong_debugfs(dev);
  3560. err_cnt:
  3561. if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
  3562. mlx5_ib_dealloc_counters(dev);
  3563. err_odp:
  3564. mlx5_ib_odp_remove_one(dev);
  3565. err_rsrc:
  3566. destroy_dev_resources(&dev->devr);
  3567. err_disable_eth:
  3568. if (ll == IB_LINK_LAYER_ETHERNET) {
  3569. mlx5_disable_eth(dev);
  3570. mlx5_remove_netdev_notifier(dev);
  3571. }
  3572. err_free_port:
  3573. kfree(dev->port);
  3574. err_dealloc:
  3575. ib_dealloc_device((struct ib_device *)dev);
  3576. return NULL;
  3577. }
  3578. static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
  3579. {
  3580. struct mlx5_ib_dev *dev = context;
  3581. enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
  3582. cancel_delay_drop(dev);
  3583. mlx5_remove_netdev_notifier(dev);
  3584. ib_unregister_device(&dev->ib_dev);
  3585. mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
  3586. mlx5_free_bfreg(dev->mdev, &dev->bfreg);
  3587. mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
  3588. mlx5_ib_cleanup_cong_debugfs(dev);
  3589. if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
  3590. mlx5_ib_dealloc_counters(dev);
  3591. destroy_umrc_res(dev);
  3592. mlx5_ib_odp_remove_one(dev);
  3593. destroy_dev_resources(&dev->devr);
  3594. if (ll == IB_LINK_LAYER_ETHERNET)
  3595. mlx5_disable_eth(dev);
  3596. kfree(dev->port);
  3597. ib_dealloc_device(&dev->ib_dev);
  3598. }
  3599. static struct mlx5_interface mlx5_ib_interface = {
  3600. .add = mlx5_ib_add,
  3601. .remove = mlx5_ib_remove,
  3602. .event = mlx5_ib_event,
  3603. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  3604. .pfault = mlx5_ib_pfault,
  3605. #endif
  3606. .protocol = MLX5_INTERFACE_PROTOCOL_IB,
  3607. };
  3608. static int __init mlx5_ib_init(void)
  3609. {
  3610. int err;
  3611. mlx5_ib_odp_init();
  3612. err = mlx5_register_interface(&mlx5_ib_interface);
  3613. return err;
  3614. }
  3615. static void __exit mlx5_ib_cleanup(void)
  3616. {
  3617. mlx5_unregister_interface(&mlx5_ib_interface);
  3618. }
  3619. module_init(mlx5_ib_init);
  3620. module_exit(mlx5_ib_cleanup);