verifier.c 167 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753
  1. /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  2. * Copyright (c) 2016 Facebook
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of version 2 of the GNU General Public
  6. * License as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. */
  13. #include <linux/kernel.h>
  14. #include <linux/types.h>
  15. #include <linux/slab.h>
  16. #include <linux/bpf.h>
  17. #include <linux/bpf_verifier.h>
  18. #include <linux/filter.h>
  19. #include <net/netlink.h>
  20. #include <linux/file.h>
  21. #include <linux/vmalloc.h>
  22. #include <linux/stringify.h>
  23. #include <linux/bsearch.h>
  24. #include <linux/sort.h>
  25. #include "disasm.h"
  26. static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  27. #define BPF_PROG_TYPE(_id, _name) \
  28. [_id] = & _name ## _verifier_ops,
  29. #define BPF_MAP_TYPE(_id, _ops)
  30. #include <linux/bpf_types.h>
  31. #undef BPF_PROG_TYPE
  32. #undef BPF_MAP_TYPE
  33. };
  34. /* bpf_check() is a static code analyzer that walks eBPF program
  35. * instruction by instruction and updates register/stack state.
  36. * All paths of conditional branches are analyzed until 'bpf_exit' insn.
  37. *
  38. * The first pass is depth-first-search to check that the program is a DAG.
  39. * It rejects the following programs:
  40. * - larger than BPF_MAXINSNS insns
  41. * - if loop is present (detected via back-edge)
  42. * - unreachable insns exist (shouldn't be a forest. program = one function)
  43. * - out of bounds or malformed jumps
  44. * The second pass is all possible path descent from the 1st insn.
  45. * Since it's analyzing all pathes through the program, the length of the
  46. * analysis is limited to 64k insn, which may be hit even if total number of
  47. * insn is less then 4K, but there are too many branches that change stack/regs.
  48. * Number of 'branches to be analyzed' is limited to 1k
  49. *
  50. * On entry to each instruction, each register has a type, and the instruction
  51. * changes the types of the registers depending on instruction semantics.
  52. * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
  53. * copied to R1.
  54. *
  55. * All registers are 64-bit.
  56. * R0 - return register
  57. * R1-R5 argument passing registers
  58. * R6-R9 callee saved registers
  59. * R10 - frame pointer read-only
  60. *
  61. * At the start of BPF program the register R1 contains a pointer to bpf_context
  62. * and has type PTR_TO_CTX.
  63. *
  64. * Verifier tracks arithmetic operations on pointers in case:
  65. * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
  66. * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
  67. * 1st insn copies R10 (which has FRAME_PTR) type into R1
  68. * and 2nd arithmetic instruction is pattern matched to recognize
  69. * that it wants to construct a pointer to some element within stack.
  70. * So after 2nd insn, the register R1 has type PTR_TO_STACK
  71. * (and -20 constant is saved for further stack bounds checking).
  72. * Meaning that this reg is a pointer to stack plus known immediate constant.
  73. *
  74. * Most of the time the registers have SCALAR_VALUE type, which
  75. * means the register has some value, but it's not a valid pointer.
  76. * (like pointer plus pointer becomes SCALAR_VALUE type)
  77. *
  78. * When verifier sees load or store instructions the type of base register
  79. * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
  80. * types recognized by check_mem_access() function.
  81. *
  82. * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  83. * and the range of [ptr, ptr + map's value_size) is accessible.
  84. *
  85. * registers used to pass values to function calls are checked against
  86. * function argument constraints.
  87. *
  88. * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
  89. * It means that the register type passed to this function must be
  90. * PTR_TO_STACK and it will be used inside the function as
  91. * 'pointer to map element key'
  92. *
  93. * For example the argument constraints for bpf_map_lookup_elem():
  94. * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
  95. * .arg1_type = ARG_CONST_MAP_PTR,
  96. * .arg2_type = ARG_PTR_TO_MAP_KEY,
  97. *
  98. * ret_type says that this function returns 'pointer to map elem value or null'
  99. * function expects 1st argument to be a const pointer to 'struct bpf_map' and
  100. * 2nd argument should be a pointer to stack, which will be used inside
  101. * the helper function as a pointer to map element key.
  102. *
  103. * On the kernel side the helper function looks like:
  104. * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
  105. * {
  106. * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
  107. * void *key = (void *) (unsigned long) r2;
  108. * void *value;
  109. *
  110. * here kernel can access 'key' and 'map' pointers safely, knowing that
  111. * [key, key + map->key_size) bytes are valid and were initialized on
  112. * the stack of eBPF program.
  113. * }
  114. *
  115. * Corresponding eBPF program may look like:
  116. * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
  117. * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
  118. * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
  119. * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
  120. * here verifier looks at prototype of map_lookup_elem() and sees:
  121. * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
  122. * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
  123. *
  124. * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
  125. * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
  126. * and were initialized prior to this call.
  127. * If it's ok, then verifier allows this BPF_CALL insn and looks at
  128. * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
  129. * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
  130. * returns ether pointer to map value or NULL.
  131. *
  132. * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
  133. * insn, the register holding that pointer in the true branch changes state to
  134. * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
  135. * branch. See check_cond_jmp_op().
  136. *
  137. * After the call R0 is set to return type of the function and registers R1-R5
  138. * are set to NOT_INIT to indicate that they are no longer readable.
  139. */
  140. /* verifier_state + insn_idx are pushed to stack when branch is encountered */
  141. struct bpf_verifier_stack_elem {
  142. /* verifer state is 'st'
  143. * before processing instruction 'insn_idx'
  144. * and after processing instruction 'prev_insn_idx'
  145. */
  146. struct bpf_verifier_state st;
  147. int insn_idx;
  148. int prev_insn_idx;
  149. struct bpf_verifier_stack_elem *next;
  150. };
  151. #define BPF_COMPLEXITY_LIMIT_INSNS 131072
  152. #define BPF_COMPLEXITY_LIMIT_STACK 1024
  153. #define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
  154. struct bpf_call_arg_meta {
  155. struct bpf_map *map_ptr;
  156. bool raw_mode;
  157. bool pkt_access;
  158. int regno;
  159. int access_size;
  160. };
  161. static DEFINE_MUTEX(bpf_verifier_lock);
  162. void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
  163. va_list args)
  164. {
  165. unsigned int n;
  166. n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
  167. WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
  168. "verifier log line truncated - local buffer too short\n");
  169. n = min(log->len_total - log->len_used - 1, n);
  170. log->kbuf[n] = '\0';
  171. if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
  172. log->len_used += n;
  173. else
  174. log->ubuf = NULL;
  175. }
  176. /* log_level controls verbosity level of eBPF verifier.
  177. * bpf_verifier_log_write() is used to dump the verification trace to the log,
  178. * so the user can figure out what's wrong with the program
  179. */
  180. __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
  181. const char *fmt, ...)
  182. {
  183. va_list args;
  184. if (!bpf_verifier_log_needed(&env->log))
  185. return;
  186. va_start(args, fmt);
  187. bpf_verifier_vlog(&env->log, fmt, args);
  188. va_end(args);
  189. }
  190. EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
  191. __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
  192. {
  193. struct bpf_verifier_env *env = private_data;
  194. va_list args;
  195. if (!bpf_verifier_log_needed(&env->log))
  196. return;
  197. va_start(args, fmt);
  198. bpf_verifier_vlog(&env->log, fmt, args);
  199. va_end(args);
  200. }
  201. static bool type_is_pkt_pointer(enum bpf_reg_type type)
  202. {
  203. return type == PTR_TO_PACKET ||
  204. type == PTR_TO_PACKET_META;
  205. }
  206. /* string representation of 'enum bpf_reg_type' */
  207. static const char * const reg_type_str[] = {
  208. [NOT_INIT] = "?",
  209. [SCALAR_VALUE] = "inv",
  210. [PTR_TO_CTX] = "ctx",
  211. [CONST_PTR_TO_MAP] = "map_ptr",
  212. [PTR_TO_MAP_VALUE] = "map_value",
  213. [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
  214. [PTR_TO_STACK] = "fp",
  215. [PTR_TO_PACKET] = "pkt",
  216. [PTR_TO_PACKET_META] = "pkt_meta",
  217. [PTR_TO_PACKET_END] = "pkt_end",
  218. };
  219. static void print_liveness(struct bpf_verifier_env *env,
  220. enum bpf_reg_liveness live)
  221. {
  222. if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
  223. verbose(env, "_");
  224. if (live & REG_LIVE_READ)
  225. verbose(env, "r");
  226. if (live & REG_LIVE_WRITTEN)
  227. verbose(env, "w");
  228. }
  229. static struct bpf_func_state *func(struct bpf_verifier_env *env,
  230. const struct bpf_reg_state *reg)
  231. {
  232. struct bpf_verifier_state *cur = env->cur_state;
  233. return cur->frame[reg->frameno];
  234. }
  235. static void print_verifier_state(struct bpf_verifier_env *env,
  236. const struct bpf_func_state *state)
  237. {
  238. const struct bpf_reg_state *reg;
  239. enum bpf_reg_type t;
  240. int i;
  241. if (state->frameno)
  242. verbose(env, " frame%d:", state->frameno);
  243. for (i = 0; i < MAX_BPF_REG; i++) {
  244. reg = &state->regs[i];
  245. t = reg->type;
  246. if (t == NOT_INIT)
  247. continue;
  248. verbose(env, " R%d", i);
  249. print_liveness(env, reg->live);
  250. verbose(env, "=%s", reg_type_str[t]);
  251. if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
  252. tnum_is_const(reg->var_off)) {
  253. /* reg->off should be 0 for SCALAR_VALUE */
  254. verbose(env, "%lld", reg->var_off.value + reg->off);
  255. if (t == PTR_TO_STACK)
  256. verbose(env, ",call_%d", func(env, reg)->callsite);
  257. } else {
  258. verbose(env, "(id=%d", reg->id);
  259. if (t != SCALAR_VALUE)
  260. verbose(env, ",off=%d", reg->off);
  261. if (type_is_pkt_pointer(t))
  262. verbose(env, ",r=%d", reg->range);
  263. else if (t == CONST_PTR_TO_MAP ||
  264. t == PTR_TO_MAP_VALUE ||
  265. t == PTR_TO_MAP_VALUE_OR_NULL)
  266. verbose(env, ",ks=%d,vs=%d",
  267. reg->map_ptr->key_size,
  268. reg->map_ptr->value_size);
  269. if (tnum_is_const(reg->var_off)) {
  270. /* Typically an immediate SCALAR_VALUE, but
  271. * could be a pointer whose offset is too big
  272. * for reg->off
  273. */
  274. verbose(env, ",imm=%llx", reg->var_off.value);
  275. } else {
  276. if (reg->smin_value != reg->umin_value &&
  277. reg->smin_value != S64_MIN)
  278. verbose(env, ",smin_value=%lld",
  279. (long long)reg->smin_value);
  280. if (reg->smax_value != reg->umax_value &&
  281. reg->smax_value != S64_MAX)
  282. verbose(env, ",smax_value=%lld",
  283. (long long)reg->smax_value);
  284. if (reg->umin_value != 0)
  285. verbose(env, ",umin_value=%llu",
  286. (unsigned long long)reg->umin_value);
  287. if (reg->umax_value != U64_MAX)
  288. verbose(env, ",umax_value=%llu",
  289. (unsigned long long)reg->umax_value);
  290. if (!tnum_is_unknown(reg->var_off)) {
  291. char tn_buf[48];
  292. tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
  293. verbose(env, ",var_off=%s", tn_buf);
  294. }
  295. }
  296. verbose(env, ")");
  297. }
  298. }
  299. for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
  300. if (state->stack[i].slot_type[0] == STACK_SPILL) {
  301. verbose(env, " fp%d",
  302. (-i - 1) * BPF_REG_SIZE);
  303. print_liveness(env, state->stack[i].spilled_ptr.live);
  304. verbose(env, "=%s",
  305. reg_type_str[state->stack[i].spilled_ptr.type]);
  306. }
  307. if (state->stack[i].slot_type[0] == STACK_ZERO)
  308. verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE);
  309. }
  310. verbose(env, "\n");
  311. }
  312. static int copy_stack_state(struct bpf_func_state *dst,
  313. const struct bpf_func_state *src)
  314. {
  315. if (!src->stack)
  316. return 0;
  317. if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) {
  318. /* internal bug, make state invalid to reject the program */
  319. memset(dst, 0, sizeof(*dst));
  320. return -EFAULT;
  321. }
  322. memcpy(dst->stack, src->stack,
  323. sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE));
  324. return 0;
  325. }
  326. /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
  327. * make it consume minimal amount of memory. check_stack_write() access from
  328. * the program calls into realloc_func_state() to grow the stack size.
  329. * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
  330. * which this function copies over. It points to previous bpf_verifier_state
  331. * which is never reallocated
  332. */
  333. static int realloc_func_state(struct bpf_func_state *state, int size,
  334. bool copy_old)
  335. {
  336. u32 old_size = state->allocated_stack;
  337. struct bpf_stack_state *new_stack;
  338. int slot = size / BPF_REG_SIZE;
  339. if (size <= old_size || !size) {
  340. if (copy_old)
  341. return 0;
  342. state->allocated_stack = slot * BPF_REG_SIZE;
  343. if (!size && old_size) {
  344. kfree(state->stack);
  345. state->stack = NULL;
  346. }
  347. return 0;
  348. }
  349. new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state),
  350. GFP_KERNEL);
  351. if (!new_stack)
  352. return -ENOMEM;
  353. if (copy_old) {
  354. if (state->stack)
  355. memcpy(new_stack, state->stack,
  356. sizeof(*new_stack) * (old_size / BPF_REG_SIZE));
  357. memset(new_stack + old_size / BPF_REG_SIZE, 0,
  358. sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE);
  359. }
  360. state->allocated_stack = slot * BPF_REG_SIZE;
  361. kfree(state->stack);
  362. state->stack = new_stack;
  363. return 0;
  364. }
  365. static void free_func_state(struct bpf_func_state *state)
  366. {
  367. if (!state)
  368. return;
  369. kfree(state->stack);
  370. kfree(state);
  371. }
  372. static void free_verifier_state(struct bpf_verifier_state *state,
  373. bool free_self)
  374. {
  375. int i;
  376. for (i = 0; i <= state->curframe; i++) {
  377. free_func_state(state->frame[i]);
  378. state->frame[i] = NULL;
  379. }
  380. if (free_self)
  381. kfree(state);
  382. }
  383. /* copy verifier state from src to dst growing dst stack space
  384. * when necessary to accommodate larger src stack
  385. */
  386. static int copy_func_state(struct bpf_func_state *dst,
  387. const struct bpf_func_state *src)
  388. {
  389. int err;
  390. err = realloc_func_state(dst, src->allocated_stack, false);
  391. if (err)
  392. return err;
  393. memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
  394. return copy_stack_state(dst, src);
  395. }
  396. static int copy_verifier_state(struct bpf_verifier_state *dst_state,
  397. const struct bpf_verifier_state *src)
  398. {
  399. struct bpf_func_state *dst;
  400. int i, err;
  401. /* if dst has more stack frames then src frame, free them */
  402. for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
  403. free_func_state(dst_state->frame[i]);
  404. dst_state->frame[i] = NULL;
  405. }
  406. dst_state->curframe = src->curframe;
  407. dst_state->parent = src->parent;
  408. for (i = 0; i <= src->curframe; i++) {
  409. dst = dst_state->frame[i];
  410. if (!dst) {
  411. dst = kzalloc(sizeof(*dst), GFP_KERNEL);
  412. if (!dst)
  413. return -ENOMEM;
  414. dst_state->frame[i] = dst;
  415. }
  416. err = copy_func_state(dst, src->frame[i]);
  417. if (err)
  418. return err;
  419. }
  420. return 0;
  421. }
  422. static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
  423. int *insn_idx)
  424. {
  425. struct bpf_verifier_state *cur = env->cur_state;
  426. struct bpf_verifier_stack_elem *elem, *head = env->head;
  427. int err;
  428. if (env->head == NULL)
  429. return -ENOENT;
  430. if (cur) {
  431. err = copy_verifier_state(cur, &head->st);
  432. if (err)
  433. return err;
  434. }
  435. if (insn_idx)
  436. *insn_idx = head->insn_idx;
  437. if (prev_insn_idx)
  438. *prev_insn_idx = head->prev_insn_idx;
  439. elem = head->next;
  440. free_verifier_state(&head->st, false);
  441. kfree(head);
  442. env->head = elem;
  443. env->stack_size--;
  444. return 0;
  445. }
  446. static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
  447. int insn_idx, int prev_insn_idx)
  448. {
  449. struct bpf_verifier_state *cur = env->cur_state;
  450. struct bpf_verifier_stack_elem *elem;
  451. int err;
  452. elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
  453. if (!elem)
  454. goto err;
  455. elem->insn_idx = insn_idx;
  456. elem->prev_insn_idx = prev_insn_idx;
  457. elem->next = env->head;
  458. env->head = elem;
  459. env->stack_size++;
  460. err = copy_verifier_state(&elem->st, cur);
  461. if (err)
  462. goto err;
  463. if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
  464. verbose(env, "BPF program is too complex\n");
  465. goto err;
  466. }
  467. return &elem->st;
  468. err:
  469. free_verifier_state(env->cur_state, true);
  470. env->cur_state = NULL;
  471. /* pop all elements and return */
  472. while (!pop_stack(env, NULL, NULL));
  473. return NULL;
  474. }
  475. #define CALLER_SAVED_REGS 6
  476. static const int caller_saved[CALLER_SAVED_REGS] = {
  477. BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
  478. };
  479. static void __mark_reg_not_init(struct bpf_reg_state *reg);
  480. /* Mark the unknown part of a register (variable offset or scalar value) as
  481. * known to have the value @imm.
  482. */
  483. static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
  484. {
  485. reg->id = 0;
  486. reg->var_off = tnum_const(imm);
  487. reg->smin_value = (s64)imm;
  488. reg->smax_value = (s64)imm;
  489. reg->umin_value = imm;
  490. reg->umax_value = imm;
  491. }
  492. /* Mark the 'variable offset' part of a register as zero. This should be
  493. * used only on registers holding a pointer type.
  494. */
  495. static void __mark_reg_known_zero(struct bpf_reg_state *reg)
  496. {
  497. __mark_reg_known(reg, 0);
  498. }
  499. static void __mark_reg_const_zero(struct bpf_reg_state *reg)
  500. {
  501. __mark_reg_known(reg, 0);
  502. reg->off = 0;
  503. reg->type = SCALAR_VALUE;
  504. }
  505. static void mark_reg_known_zero(struct bpf_verifier_env *env,
  506. struct bpf_reg_state *regs, u32 regno)
  507. {
  508. if (WARN_ON(regno >= MAX_BPF_REG)) {
  509. verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
  510. /* Something bad happened, let's kill all regs */
  511. for (regno = 0; regno < MAX_BPF_REG; regno++)
  512. __mark_reg_not_init(regs + regno);
  513. return;
  514. }
  515. __mark_reg_known_zero(regs + regno);
  516. }
  517. static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
  518. {
  519. return type_is_pkt_pointer(reg->type);
  520. }
  521. static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
  522. {
  523. return reg_is_pkt_pointer(reg) ||
  524. reg->type == PTR_TO_PACKET_END;
  525. }
  526. /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
  527. static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
  528. enum bpf_reg_type which)
  529. {
  530. /* The register can already have a range from prior markings.
  531. * This is fine as long as it hasn't been advanced from its
  532. * origin.
  533. */
  534. return reg->type == which &&
  535. reg->id == 0 &&
  536. reg->off == 0 &&
  537. tnum_equals_const(reg->var_off, 0);
  538. }
  539. /* Attempts to improve min/max values based on var_off information */
  540. static void __update_reg_bounds(struct bpf_reg_state *reg)
  541. {
  542. /* min signed is max(sign bit) | min(other bits) */
  543. reg->smin_value = max_t(s64, reg->smin_value,
  544. reg->var_off.value | (reg->var_off.mask & S64_MIN));
  545. /* max signed is min(sign bit) | max(other bits) */
  546. reg->smax_value = min_t(s64, reg->smax_value,
  547. reg->var_off.value | (reg->var_off.mask & S64_MAX));
  548. reg->umin_value = max(reg->umin_value, reg->var_off.value);
  549. reg->umax_value = min(reg->umax_value,
  550. reg->var_off.value | reg->var_off.mask);
  551. }
  552. /* Uses signed min/max values to inform unsigned, and vice-versa */
  553. static void __reg_deduce_bounds(struct bpf_reg_state *reg)
  554. {
  555. /* Learn sign from signed bounds.
  556. * If we cannot cross the sign boundary, then signed and unsigned bounds
  557. * are the same, so combine. This works even in the negative case, e.g.
  558. * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
  559. */
  560. if (reg->smin_value >= 0 || reg->smax_value < 0) {
  561. reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
  562. reg->umin_value);
  563. reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
  564. reg->umax_value);
  565. return;
  566. }
  567. /* Learn sign from unsigned bounds. Signed bounds cross the sign
  568. * boundary, so we must be careful.
  569. */
  570. if ((s64)reg->umax_value >= 0) {
  571. /* Positive. We can't learn anything from the smin, but smax
  572. * is positive, hence safe.
  573. */
  574. reg->smin_value = reg->umin_value;
  575. reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
  576. reg->umax_value);
  577. } else if ((s64)reg->umin_value < 0) {
  578. /* Negative. We can't learn anything from the smax, but smin
  579. * is negative, hence safe.
  580. */
  581. reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
  582. reg->umin_value);
  583. reg->smax_value = reg->umax_value;
  584. }
  585. }
  586. /* Attempts to improve var_off based on unsigned min/max information */
  587. static void __reg_bound_offset(struct bpf_reg_state *reg)
  588. {
  589. reg->var_off = tnum_intersect(reg->var_off,
  590. tnum_range(reg->umin_value,
  591. reg->umax_value));
  592. }
  593. /* Reset the min/max bounds of a register */
  594. static void __mark_reg_unbounded(struct bpf_reg_state *reg)
  595. {
  596. reg->smin_value = S64_MIN;
  597. reg->smax_value = S64_MAX;
  598. reg->umin_value = 0;
  599. reg->umax_value = U64_MAX;
  600. }
  601. /* Mark a register as having a completely unknown (scalar) value. */
  602. static void __mark_reg_unknown(struct bpf_reg_state *reg)
  603. {
  604. reg->type = SCALAR_VALUE;
  605. reg->id = 0;
  606. reg->off = 0;
  607. reg->var_off = tnum_unknown;
  608. reg->frameno = 0;
  609. __mark_reg_unbounded(reg);
  610. }
  611. static void mark_reg_unknown(struct bpf_verifier_env *env,
  612. struct bpf_reg_state *regs, u32 regno)
  613. {
  614. if (WARN_ON(regno >= MAX_BPF_REG)) {
  615. verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
  616. /* Something bad happened, let's kill all regs except FP */
  617. for (regno = 0; regno < BPF_REG_FP; regno++)
  618. __mark_reg_not_init(regs + regno);
  619. return;
  620. }
  621. __mark_reg_unknown(regs + regno);
  622. }
  623. static void __mark_reg_not_init(struct bpf_reg_state *reg)
  624. {
  625. __mark_reg_unknown(reg);
  626. reg->type = NOT_INIT;
  627. }
  628. static void mark_reg_not_init(struct bpf_verifier_env *env,
  629. struct bpf_reg_state *regs, u32 regno)
  630. {
  631. if (WARN_ON(regno >= MAX_BPF_REG)) {
  632. verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
  633. /* Something bad happened, let's kill all regs except FP */
  634. for (regno = 0; regno < BPF_REG_FP; regno++)
  635. __mark_reg_not_init(regs + regno);
  636. return;
  637. }
  638. __mark_reg_not_init(regs + regno);
  639. }
  640. static void init_reg_state(struct bpf_verifier_env *env,
  641. struct bpf_func_state *state)
  642. {
  643. struct bpf_reg_state *regs = state->regs;
  644. int i;
  645. for (i = 0; i < MAX_BPF_REG; i++) {
  646. mark_reg_not_init(env, regs, i);
  647. regs[i].live = REG_LIVE_NONE;
  648. }
  649. /* frame pointer */
  650. regs[BPF_REG_FP].type = PTR_TO_STACK;
  651. mark_reg_known_zero(env, regs, BPF_REG_FP);
  652. regs[BPF_REG_FP].frameno = state->frameno;
  653. /* 1st arg to a function */
  654. regs[BPF_REG_1].type = PTR_TO_CTX;
  655. mark_reg_known_zero(env, regs, BPF_REG_1);
  656. }
  657. #define BPF_MAIN_FUNC (-1)
  658. static void init_func_state(struct bpf_verifier_env *env,
  659. struct bpf_func_state *state,
  660. int callsite, int frameno, int subprogno)
  661. {
  662. state->callsite = callsite;
  663. state->frameno = frameno;
  664. state->subprogno = subprogno;
  665. init_reg_state(env, state);
  666. }
  667. enum reg_arg_type {
  668. SRC_OP, /* register is used as source operand */
  669. DST_OP, /* register is used as destination operand */
  670. DST_OP_NO_MARK /* same as above, check only, don't mark */
  671. };
  672. static int cmp_subprogs(const void *a, const void *b)
  673. {
  674. return *(int *)a - *(int *)b;
  675. }
  676. static int find_subprog(struct bpf_verifier_env *env, int off)
  677. {
  678. u32 *p;
  679. p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
  680. sizeof(env->subprog_starts[0]), cmp_subprogs);
  681. if (!p)
  682. return -ENOENT;
  683. return p - env->subprog_starts;
  684. }
  685. static int add_subprog(struct bpf_verifier_env *env, int off)
  686. {
  687. int insn_cnt = env->prog->len;
  688. int ret;
  689. if (off >= insn_cnt || off < 0) {
  690. verbose(env, "call to invalid destination\n");
  691. return -EINVAL;
  692. }
  693. ret = find_subprog(env, off);
  694. if (ret >= 0)
  695. return 0;
  696. if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
  697. verbose(env, "too many subprograms\n");
  698. return -E2BIG;
  699. }
  700. env->subprog_starts[env->subprog_cnt++] = off;
  701. sort(env->subprog_starts, env->subprog_cnt,
  702. sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
  703. return 0;
  704. }
  705. static int check_subprogs(struct bpf_verifier_env *env)
  706. {
  707. int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
  708. struct bpf_insn *insn = env->prog->insnsi;
  709. int insn_cnt = env->prog->len;
  710. /* determine subprog starts. The end is one before the next starts */
  711. for (i = 0; i < insn_cnt; i++) {
  712. if (insn[i].code != (BPF_JMP | BPF_CALL))
  713. continue;
  714. if (insn[i].src_reg != BPF_PSEUDO_CALL)
  715. continue;
  716. if (!env->allow_ptr_leaks) {
  717. verbose(env, "function calls to other bpf functions are allowed for root only\n");
  718. return -EPERM;
  719. }
  720. if (bpf_prog_is_dev_bound(env->prog->aux)) {
  721. verbose(env, "function calls in offloaded programs are not supported yet\n");
  722. return -EINVAL;
  723. }
  724. ret = add_subprog(env, i + insn[i].imm + 1);
  725. if (ret < 0)
  726. return ret;
  727. }
  728. if (env->log.level > 1)
  729. for (i = 0; i < env->subprog_cnt; i++)
  730. verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
  731. /* now check that all jumps are within the same subprog */
  732. subprog_start = 0;
  733. if (env->subprog_cnt == cur_subprog)
  734. subprog_end = insn_cnt;
  735. else
  736. subprog_end = env->subprog_starts[cur_subprog++];
  737. for (i = 0; i < insn_cnt; i++) {
  738. u8 code = insn[i].code;
  739. if (BPF_CLASS(code) != BPF_JMP)
  740. goto next;
  741. if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
  742. goto next;
  743. off = i + insn[i].off + 1;
  744. if (off < subprog_start || off >= subprog_end) {
  745. verbose(env, "jump out of range from insn %d to %d\n", i, off);
  746. return -EINVAL;
  747. }
  748. next:
  749. if (i == subprog_end - 1) {
  750. /* to avoid fall-through from one subprog into another
  751. * the last insn of the subprog should be either exit
  752. * or unconditional jump back
  753. */
  754. if (code != (BPF_JMP | BPF_EXIT) &&
  755. code != (BPF_JMP | BPF_JA)) {
  756. verbose(env, "last insn is not an exit or jmp\n");
  757. return -EINVAL;
  758. }
  759. subprog_start = subprog_end;
  760. if (env->subprog_cnt == cur_subprog)
  761. subprog_end = insn_cnt;
  762. else
  763. subprog_end = env->subprog_starts[cur_subprog++];
  764. }
  765. }
  766. return 0;
  767. }
  768. static
  769. struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env,
  770. const struct bpf_verifier_state *state,
  771. struct bpf_verifier_state *parent,
  772. u32 regno)
  773. {
  774. struct bpf_verifier_state *tmp = NULL;
  775. /* 'parent' could be a state of caller and
  776. * 'state' could be a state of callee. In such case
  777. * parent->curframe < state->curframe
  778. * and it's ok for r1 - r5 registers
  779. *
  780. * 'parent' could be a callee's state after it bpf_exit-ed.
  781. * In such case parent->curframe > state->curframe
  782. * and it's ok for r0 only
  783. */
  784. if (parent->curframe == state->curframe ||
  785. (parent->curframe < state->curframe &&
  786. regno >= BPF_REG_1 && regno <= BPF_REG_5) ||
  787. (parent->curframe > state->curframe &&
  788. regno == BPF_REG_0))
  789. return parent;
  790. if (parent->curframe > state->curframe &&
  791. regno >= BPF_REG_6) {
  792. /* for callee saved regs we have to skip the whole chain
  793. * of states that belong to callee and mark as LIVE_READ
  794. * the registers before the call
  795. */
  796. tmp = parent;
  797. while (tmp && tmp->curframe != state->curframe) {
  798. tmp = tmp->parent;
  799. }
  800. if (!tmp)
  801. goto bug;
  802. parent = tmp;
  803. } else {
  804. goto bug;
  805. }
  806. return parent;
  807. bug:
  808. verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp);
  809. verbose(env, "regno %d parent frame %d current frame %d\n",
  810. regno, parent->curframe, state->curframe);
  811. return NULL;
  812. }
  813. static int mark_reg_read(struct bpf_verifier_env *env,
  814. const struct bpf_verifier_state *state,
  815. struct bpf_verifier_state *parent,
  816. u32 regno)
  817. {
  818. bool writes = parent == state->parent; /* Observe write marks */
  819. if (regno == BPF_REG_FP)
  820. /* We don't need to worry about FP liveness because it's read-only */
  821. return 0;
  822. while (parent) {
  823. /* if read wasn't screened by an earlier write ... */
  824. if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN)
  825. break;
  826. parent = skip_callee(env, state, parent, regno);
  827. if (!parent)
  828. return -EFAULT;
  829. /* ... then we depend on parent's value */
  830. parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ;
  831. state = parent;
  832. parent = state->parent;
  833. writes = true;
  834. }
  835. return 0;
  836. }
  837. static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
  838. enum reg_arg_type t)
  839. {
  840. struct bpf_verifier_state *vstate = env->cur_state;
  841. struct bpf_func_state *state = vstate->frame[vstate->curframe];
  842. struct bpf_reg_state *regs = state->regs;
  843. if (regno >= MAX_BPF_REG) {
  844. verbose(env, "R%d is invalid\n", regno);
  845. return -EINVAL;
  846. }
  847. if (t == SRC_OP) {
  848. /* check whether register used as source operand can be read */
  849. if (regs[regno].type == NOT_INIT) {
  850. verbose(env, "R%d !read_ok\n", regno);
  851. return -EACCES;
  852. }
  853. return mark_reg_read(env, vstate, vstate->parent, regno);
  854. } else {
  855. /* check whether register used as dest operand can be written to */
  856. if (regno == BPF_REG_FP) {
  857. verbose(env, "frame pointer is read only\n");
  858. return -EACCES;
  859. }
  860. regs[regno].live |= REG_LIVE_WRITTEN;
  861. if (t == DST_OP)
  862. mark_reg_unknown(env, regs, regno);
  863. }
  864. return 0;
  865. }
  866. static bool is_spillable_regtype(enum bpf_reg_type type)
  867. {
  868. switch (type) {
  869. case PTR_TO_MAP_VALUE:
  870. case PTR_TO_MAP_VALUE_OR_NULL:
  871. case PTR_TO_STACK:
  872. case PTR_TO_CTX:
  873. case PTR_TO_PACKET:
  874. case PTR_TO_PACKET_META:
  875. case PTR_TO_PACKET_END:
  876. case CONST_PTR_TO_MAP:
  877. return true;
  878. default:
  879. return false;
  880. }
  881. }
  882. /* Does this register contain a constant zero? */
  883. static bool register_is_null(struct bpf_reg_state *reg)
  884. {
  885. return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
  886. }
  887. /* check_stack_read/write functions track spill/fill of registers,
  888. * stack boundary and alignment are checked in check_mem_access()
  889. */
  890. static int check_stack_write(struct bpf_verifier_env *env,
  891. struct bpf_func_state *state, /* func where register points to */
  892. int off, int size, int value_regno)
  893. {
  894. struct bpf_func_state *cur; /* state of the current function */
  895. int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
  896. enum bpf_reg_type type;
  897. err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
  898. true);
  899. if (err)
  900. return err;
  901. /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
  902. * so it's aligned access and [off, off + size) are within stack limits
  903. */
  904. if (!env->allow_ptr_leaks &&
  905. state->stack[spi].slot_type[0] == STACK_SPILL &&
  906. size != BPF_REG_SIZE) {
  907. verbose(env, "attempt to corrupt spilled pointer on stack\n");
  908. return -EACCES;
  909. }
  910. cur = env->cur_state->frame[env->cur_state->curframe];
  911. if (value_regno >= 0 &&
  912. is_spillable_regtype((type = cur->regs[value_regno].type))) {
  913. /* register containing pointer is being spilled into stack */
  914. if (size != BPF_REG_SIZE) {
  915. verbose(env, "invalid size of register spill\n");
  916. return -EACCES;
  917. }
  918. if (state != cur && type == PTR_TO_STACK) {
  919. verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
  920. return -EINVAL;
  921. }
  922. /* save register state */
  923. state->stack[spi].spilled_ptr = cur->regs[value_regno];
  924. state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
  925. for (i = 0; i < BPF_REG_SIZE; i++)
  926. state->stack[spi].slot_type[i] = STACK_SPILL;
  927. } else {
  928. u8 type = STACK_MISC;
  929. /* regular write of data into stack */
  930. state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
  931. /* only mark the slot as written if all 8 bytes were written
  932. * otherwise read propagation may incorrectly stop too soon
  933. * when stack slots are partially written.
  934. * This heuristic means that read propagation will be
  935. * conservative, since it will add reg_live_read marks
  936. * to stack slots all the way to first state when programs
  937. * writes+reads less than 8 bytes
  938. */
  939. if (size == BPF_REG_SIZE)
  940. state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
  941. /* when we zero initialize stack slots mark them as such */
  942. if (value_regno >= 0 &&
  943. register_is_null(&cur->regs[value_regno]))
  944. type = STACK_ZERO;
  945. for (i = 0; i < size; i++)
  946. state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
  947. type;
  948. }
  949. return 0;
  950. }
  951. /* registers of every function are unique and mark_reg_read() propagates
  952. * the liveness in the following cases:
  953. * - from callee into caller for R1 - R5 that were used as arguments
  954. * - from caller into callee for R0 that used as result of the call
  955. * - from caller to the same caller skipping states of the callee for R6 - R9,
  956. * since R6 - R9 are callee saved by implicit function prologue and
  957. * caller's R6 != callee's R6, so when we propagate liveness up to
  958. * parent states we need to skip callee states for R6 - R9.
  959. *
  960. * stack slot marking is different, since stacks of caller and callee are
  961. * accessible in both (since caller can pass a pointer to caller's stack to
  962. * callee which can pass it to another function), hence mark_stack_slot_read()
  963. * has to propagate the stack liveness to all parent states at given frame number.
  964. * Consider code:
  965. * f1() {
  966. * ptr = fp - 8;
  967. * *ptr = ctx;
  968. * call f2 {
  969. * .. = *ptr;
  970. * }
  971. * .. = *ptr;
  972. * }
  973. * First *ptr is reading from f1's stack and mark_stack_slot_read() has
  974. * to mark liveness at the f1's frame and not f2's frame.
  975. * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has
  976. * to propagate liveness to f2 states at f1's frame level and further into
  977. * f1 states at f1's frame level until write into that stack slot
  978. */
  979. static void mark_stack_slot_read(struct bpf_verifier_env *env,
  980. const struct bpf_verifier_state *state,
  981. struct bpf_verifier_state *parent,
  982. int slot, int frameno)
  983. {
  984. bool writes = parent == state->parent; /* Observe write marks */
  985. while (parent) {
  986. if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE)
  987. /* since LIVE_WRITTEN mark is only done for full 8-byte
  988. * write the read marks are conservative and parent
  989. * state may not even have the stack allocated. In such case
  990. * end the propagation, since the loop reached beginning
  991. * of the function
  992. */
  993. break;
  994. /* if read wasn't screened by an earlier write ... */
  995. if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
  996. break;
  997. /* ... then we depend on parent's value */
  998. parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
  999. state = parent;
  1000. parent = state->parent;
  1001. writes = true;
  1002. }
  1003. }
  1004. static int check_stack_read(struct bpf_verifier_env *env,
  1005. struct bpf_func_state *reg_state /* func where register points to */,
  1006. int off, int size, int value_regno)
  1007. {
  1008. struct bpf_verifier_state *vstate = env->cur_state;
  1009. struct bpf_func_state *state = vstate->frame[vstate->curframe];
  1010. int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
  1011. u8 *stype;
  1012. if (reg_state->allocated_stack <= slot) {
  1013. verbose(env, "invalid read from stack off %d+0 size %d\n",
  1014. off, size);
  1015. return -EACCES;
  1016. }
  1017. stype = reg_state->stack[spi].slot_type;
  1018. if (stype[0] == STACK_SPILL) {
  1019. if (size != BPF_REG_SIZE) {
  1020. verbose(env, "invalid size of register spill\n");
  1021. return -EACCES;
  1022. }
  1023. for (i = 1; i < BPF_REG_SIZE; i++) {
  1024. if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
  1025. verbose(env, "corrupted spill memory\n");
  1026. return -EACCES;
  1027. }
  1028. }
  1029. if (value_regno >= 0) {
  1030. /* restore register state from stack */
  1031. state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
  1032. /* mark reg as written since spilled pointer state likely
  1033. * has its liveness marks cleared by is_state_visited()
  1034. * which resets stack/reg liveness for state transitions
  1035. */
  1036. state->regs[value_regno].live |= REG_LIVE_WRITTEN;
  1037. }
  1038. mark_stack_slot_read(env, vstate, vstate->parent, spi,
  1039. reg_state->frameno);
  1040. return 0;
  1041. } else {
  1042. int zeros = 0;
  1043. for (i = 0; i < size; i++) {
  1044. if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
  1045. continue;
  1046. if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
  1047. zeros++;
  1048. continue;
  1049. }
  1050. verbose(env, "invalid read from stack off %d+%d size %d\n",
  1051. off, i, size);
  1052. return -EACCES;
  1053. }
  1054. mark_stack_slot_read(env, vstate, vstate->parent, spi,
  1055. reg_state->frameno);
  1056. if (value_regno >= 0) {
  1057. if (zeros == size) {
  1058. /* any size read into register is zero extended,
  1059. * so the whole register == const_zero
  1060. */
  1061. __mark_reg_const_zero(&state->regs[value_regno]);
  1062. } else {
  1063. /* have read misc data from the stack */
  1064. mark_reg_unknown(env, state->regs, value_regno);
  1065. }
  1066. state->regs[value_regno].live |= REG_LIVE_WRITTEN;
  1067. }
  1068. return 0;
  1069. }
  1070. }
  1071. /* check read/write into map element returned by bpf_map_lookup_elem() */
  1072. static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
  1073. int size, bool zero_size_allowed)
  1074. {
  1075. struct bpf_reg_state *regs = cur_regs(env);
  1076. struct bpf_map *map = regs[regno].map_ptr;
  1077. if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
  1078. off + size > map->value_size) {
  1079. verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
  1080. map->value_size, off, size);
  1081. return -EACCES;
  1082. }
  1083. return 0;
  1084. }
  1085. /* check read/write into a map element with possible variable offset */
  1086. static int check_map_access(struct bpf_verifier_env *env, u32 regno,
  1087. int off, int size, bool zero_size_allowed)
  1088. {
  1089. struct bpf_verifier_state *vstate = env->cur_state;
  1090. struct bpf_func_state *state = vstate->frame[vstate->curframe];
  1091. struct bpf_reg_state *reg = &state->regs[regno];
  1092. int err;
  1093. /* We may have adjusted the register to this map value, so we
  1094. * need to try adding each of min_value and max_value to off
  1095. * to make sure our theoretical access will be safe.
  1096. */
  1097. if (env->log.level)
  1098. print_verifier_state(env, state);
  1099. /* The minimum value is only important with signed
  1100. * comparisons where we can't assume the floor of a
  1101. * value is 0. If we are using signed variables for our
  1102. * index'es we need to make sure that whatever we use
  1103. * will have a set floor within our range.
  1104. */
  1105. if (reg->smin_value < 0) {
  1106. verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
  1107. regno);
  1108. return -EACCES;
  1109. }
  1110. err = __check_map_access(env, regno, reg->smin_value + off, size,
  1111. zero_size_allowed);
  1112. if (err) {
  1113. verbose(env, "R%d min value is outside of the array range\n",
  1114. regno);
  1115. return err;
  1116. }
  1117. /* If we haven't set a max value then we need to bail since we can't be
  1118. * sure we won't do bad things.
  1119. * If reg->umax_value + off could overflow, treat that as unbounded too.
  1120. */
  1121. if (reg->umax_value >= BPF_MAX_VAR_OFF) {
  1122. verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
  1123. regno);
  1124. return -EACCES;
  1125. }
  1126. err = __check_map_access(env, regno, reg->umax_value + off, size,
  1127. zero_size_allowed);
  1128. if (err)
  1129. verbose(env, "R%d max value is outside of the array range\n",
  1130. regno);
  1131. return err;
  1132. }
  1133. #define MAX_PACKET_OFF 0xffff
  1134. static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
  1135. const struct bpf_call_arg_meta *meta,
  1136. enum bpf_access_type t)
  1137. {
  1138. switch (env->prog->type) {
  1139. case BPF_PROG_TYPE_LWT_IN:
  1140. case BPF_PROG_TYPE_LWT_OUT:
  1141. /* dst_input() and dst_output() can't write for now */
  1142. if (t == BPF_WRITE)
  1143. return false;
  1144. /* fallthrough */
  1145. case BPF_PROG_TYPE_SCHED_CLS:
  1146. case BPF_PROG_TYPE_SCHED_ACT:
  1147. case BPF_PROG_TYPE_XDP:
  1148. case BPF_PROG_TYPE_LWT_XMIT:
  1149. case BPF_PROG_TYPE_SK_SKB:
  1150. case BPF_PROG_TYPE_SK_MSG:
  1151. if (meta)
  1152. return meta->pkt_access;
  1153. env->seen_direct_write = true;
  1154. return true;
  1155. default:
  1156. return false;
  1157. }
  1158. }
  1159. static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
  1160. int off, int size, bool zero_size_allowed)
  1161. {
  1162. struct bpf_reg_state *regs = cur_regs(env);
  1163. struct bpf_reg_state *reg = &regs[regno];
  1164. if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
  1165. (u64)off + size > reg->range) {
  1166. verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
  1167. off, size, regno, reg->id, reg->off, reg->range);
  1168. return -EACCES;
  1169. }
  1170. return 0;
  1171. }
  1172. static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
  1173. int size, bool zero_size_allowed)
  1174. {
  1175. struct bpf_reg_state *regs = cur_regs(env);
  1176. struct bpf_reg_state *reg = &regs[regno];
  1177. int err;
  1178. /* We may have added a variable offset to the packet pointer; but any
  1179. * reg->range we have comes after that. We are only checking the fixed
  1180. * offset.
  1181. */
  1182. /* We don't allow negative numbers, because we aren't tracking enough
  1183. * detail to prove they're safe.
  1184. */
  1185. if (reg->smin_value < 0) {
  1186. verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
  1187. regno);
  1188. return -EACCES;
  1189. }
  1190. err = __check_packet_access(env, regno, off, size, zero_size_allowed);
  1191. if (err) {
  1192. verbose(env, "R%d offset is outside of the packet\n", regno);
  1193. return err;
  1194. }
  1195. return err;
  1196. }
  1197. /* check access to 'struct bpf_context' fields. Supports fixed offsets only */
  1198. static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
  1199. enum bpf_access_type t, enum bpf_reg_type *reg_type)
  1200. {
  1201. struct bpf_insn_access_aux info = {
  1202. .reg_type = *reg_type,
  1203. };
  1204. if (env->ops->is_valid_access &&
  1205. env->ops->is_valid_access(off, size, t, env->prog, &info)) {
  1206. /* A non zero info.ctx_field_size indicates that this field is a
  1207. * candidate for later verifier transformation to load the whole
  1208. * field and then apply a mask when accessed with a narrower
  1209. * access than actual ctx access size. A zero info.ctx_field_size
  1210. * will only allow for whole field access and rejects any other
  1211. * type of narrower access.
  1212. */
  1213. *reg_type = info.reg_type;
  1214. env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
  1215. /* remember the offset of last byte accessed in ctx */
  1216. if (env->prog->aux->max_ctx_offset < off + size)
  1217. env->prog->aux->max_ctx_offset = off + size;
  1218. return 0;
  1219. }
  1220. verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
  1221. return -EACCES;
  1222. }
  1223. static bool __is_pointer_value(bool allow_ptr_leaks,
  1224. const struct bpf_reg_state *reg)
  1225. {
  1226. if (allow_ptr_leaks)
  1227. return false;
  1228. return reg->type != SCALAR_VALUE;
  1229. }
  1230. static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
  1231. {
  1232. return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno);
  1233. }
  1234. static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
  1235. {
  1236. const struct bpf_reg_state *reg = cur_regs(env) + regno;
  1237. return reg->type == PTR_TO_CTX;
  1238. }
  1239. static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
  1240. {
  1241. const struct bpf_reg_state *reg = cur_regs(env) + regno;
  1242. return type_is_pkt_pointer(reg->type);
  1243. }
  1244. static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
  1245. const struct bpf_reg_state *reg,
  1246. int off, int size, bool strict)
  1247. {
  1248. struct tnum reg_off;
  1249. int ip_align;
  1250. /* Byte size accesses are always allowed. */
  1251. if (!strict || size == 1)
  1252. return 0;
  1253. /* For platforms that do not have a Kconfig enabling
  1254. * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
  1255. * NET_IP_ALIGN is universally set to '2'. And on platforms
  1256. * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
  1257. * to this code only in strict mode where we want to emulate
  1258. * the NET_IP_ALIGN==2 checking. Therefore use an
  1259. * unconditional IP align value of '2'.
  1260. */
  1261. ip_align = 2;
  1262. reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
  1263. if (!tnum_is_aligned(reg_off, size)) {
  1264. char tn_buf[48];
  1265. tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
  1266. verbose(env,
  1267. "misaligned packet access off %d+%s+%d+%d size %d\n",
  1268. ip_align, tn_buf, reg->off, off, size);
  1269. return -EACCES;
  1270. }
  1271. return 0;
  1272. }
  1273. static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
  1274. const struct bpf_reg_state *reg,
  1275. const char *pointer_desc,
  1276. int off, int size, bool strict)
  1277. {
  1278. struct tnum reg_off;
  1279. /* Byte size accesses are always allowed. */
  1280. if (!strict || size == 1)
  1281. return 0;
  1282. reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
  1283. if (!tnum_is_aligned(reg_off, size)) {
  1284. char tn_buf[48];
  1285. tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
  1286. verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
  1287. pointer_desc, tn_buf, reg->off, off, size);
  1288. return -EACCES;
  1289. }
  1290. return 0;
  1291. }
  1292. static int check_ptr_alignment(struct bpf_verifier_env *env,
  1293. const struct bpf_reg_state *reg, int off,
  1294. int size, bool strict_alignment_once)
  1295. {
  1296. bool strict = env->strict_alignment || strict_alignment_once;
  1297. const char *pointer_desc = "";
  1298. switch (reg->type) {
  1299. case PTR_TO_PACKET:
  1300. case PTR_TO_PACKET_META:
  1301. /* Special case, because of NET_IP_ALIGN. Given metadata sits
  1302. * right in front, treat it the very same way.
  1303. */
  1304. return check_pkt_ptr_alignment(env, reg, off, size, strict);
  1305. case PTR_TO_MAP_VALUE:
  1306. pointer_desc = "value ";
  1307. break;
  1308. case PTR_TO_CTX:
  1309. pointer_desc = "context ";
  1310. break;
  1311. case PTR_TO_STACK:
  1312. pointer_desc = "stack ";
  1313. /* The stack spill tracking logic in check_stack_write()
  1314. * and check_stack_read() relies on stack accesses being
  1315. * aligned.
  1316. */
  1317. strict = true;
  1318. break;
  1319. default:
  1320. break;
  1321. }
  1322. return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
  1323. strict);
  1324. }
  1325. static int update_stack_depth(struct bpf_verifier_env *env,
  1326. const struct bpf_func_state *func,
  1327. int off)
  1328. {
  1329. u16 stack = env->subprog_stack_depth[func->subprogno];
  1330. if (stack >= -off)
  1331. return 0;
  1332. /* update known max for given subprogram */
  1333. env->subprog_stack_depth[func->subprogno] = -off;
  1334. return 0;
  1335. }
  1336. /* starting from main bpf function walk all instructions of the function
  1337. * and recursively walk all callees that given function can call.
  1338. * Ignore jump and exit insns.
  1339. * Since recursion is prevented by check_cfg() this algorithm
  1340. * only needs a local stack of MAX_CALL_FRAMES to remember callsites
  1341. */
  1342. static int check_max_stack_depth(struct bpf_verifier_env *env)
  1343. {
  1344. int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end;
  1345. struct bpf_insn *insn = env->prog->insnsi;
  1346. int insn_cnt = env->prog->len;
  1347. int ret_insn[MAX_CALL_FRAMES];
  1348. int ret_prog[MAX_CALL_FRAMES];
  1349. process_func:
  1350. /* round up to 32-bytes, since this is granularity
  1351. * of interpreter stack size
  1352. */
  1353. depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
  1354. if (depth > MAX_BPF_STACK) {
  1355. verbose(env, "combined stack size of %d calls is %d. Too large\n",
  1356. frame + 1, depth);
  1357. return -EACCES;
  1358. }
  1359. continue_func:
  1360. if (env->subprog_cnt == subprog)
  1361. subprog_end = insn_cnt;
  1362. else
  1363. subprog_end = env->subprog_starts[subprog];
  1364. for (; i < subprog_end; i++) {
  1365. if (insn[i].code != (BPF_JMP | BPF_CALL))
  1366. continue;
  1367. if (insn[i].src_reg != BPF_PSEUDO_CALL)
  1368. continue;
  1369. /* remember insn and function to return to */
  1370. ret_insn[frame] = i + 1;
  1371. ret_prog[frame] = subprog;
  1372. /* find the callee */
  1373. i = i + insn[i].imm + 1;
  1374. subprog = find_subprog(env, i);
  1375. if (subprog < 0) {
  1376. WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
  1377. i);
  1378. return -EFAULT;
  1379. }
  1380. subprog++;
  1381. frame++;
  1382. if (frame >= MAX_CALL_FRAMES) {
  1383. WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
  1384. return -EFAULT;
  1385. }
  1386. goto process_func;
  1387. }
  1388. /* end of for() loop means the last insn of the 'subprog'
  1389. * was reached. Doesn't matter whether it was JA or EXIT
  1390. */
  1391. if (frame == 0)
  1392. return 0;
  1393. depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
  1394. frame--;
  1395. i = ret_insn[frame];
  1396. subprog = ret_prog[frame];
  1397. goto continue_func;
  1398. }
  1399. #ifndef CONFIG_BPF_JIT_ALWAYS_ON
  1400. static int get_callee_stack_depth(struct bpf_verifier_env *env,
  1401. const struct bpf_insn *insn, int idx)
  1402. {
  1403. int start = idx + insn->imm + 1, subprog;
  1404. subprog = find_subprog(env, start);
  1405. if (subprog < 0) {
  1406. WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
  1407. start);
  1408. return -EFAULT;
  1409. }
  1410. subprog++;
  1411. return env->subprog_stack_depth[subprog];
  1412. }
  1413. #endif
  1414. /* truncate register to smaller size (in bytes)
  1415. * must be called with size < BPF_REG_SIZE
  1416. */
  1417. static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
  1418. {
  1419. u64 mask;
  1420. /* clear high bits in bit representation */
  1421. reg->var_off = tnum_cast(reg->var_off, size);
  1422. /* fix arithmetic bounds */
  1423. mask = ((u64)1 << (size * 8)) - 1;
  1424. if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
  1425. reg->umin_value &= mask;
  1426. reg->umax_value &= mask;
  1427. } else {
  1428. reg->umin_value = 0;
  1429. reg->umax_value = mask;
  1430. }
  1431. reg->smin_value = reg->umin_value;
  1432. reg->smax_value = reg->umax_value;
  1433. }
  1434. /* check whether memory at (regno + off) is accessible for t = (read | write)
  1435. * if t==write, value_regno is a register which value is stored into memory
  1436. * if t==read, value_regno is a register which will receive the value from memory
  1437. * if t==write && value_regno==-1, some unknown value is stored into memory
  1438. * if t==read && value_regno==-1, don't care what we read from memory
  1439. */
  1440. static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
  1441. int off, int bpf_size, enum bpf_access_type t,
  1442. int value_regno, bool strict_alignment_once)
  1443. {
  1444. struct bpf_reg_state *regs = cur_regs(env);
  1445. struct bpf_reg_state *reg = regs + regno;
  1446. struct bpf_func_state *state;
  1447. int size, err = 0;
  1448. size = bpf_size_to_bytes(bpf_size);
  1449. if (size < 0)
  1450. return size;
  1451. /* alignment checks will add in reg->off themselves */
  1452. err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
  1453. if (err)
  1454. return err;
  1455. /* for access checks, reg->off is just part of off */
  1456. off += reg->off;
  1457. if (reg->type == PTR_TO_MAP_VALUE) {
  1458. if (t == BPF_WRITE && value_regno >= 0 &&
  1459. is_pointer_value(env, value_regno)) {
  1460. verbose(env, "R%d leaks addr into map\n", value_regno);
  1461. return -EACCES;
  1462. }
  1463. err = check_map_access(env, regno, off, size, false);
  1464. if (!err && t == BPF_READ && value_regno >= 0)
  1465. mark_reg_unknown(env, regs, value_regno);
  1466. } else if (reg->type == PTR_TO_CTX) {
  1467. enum bpf_reg_type reg_type = SCALAR_VALUE;
  1468. if (t == BPF_WRITE && value_regno >= 0 &&
  1469. is_pointer_value(env, value_regno)) {
  1470. verbose(env, "R%d leaks addr into ctx\n", value_regno);
  1471. return -EACCES;
  1472. }
  1473. /* ctx accesses must be at a fixed offset, so that we can
  1474. * determine what type of data were returned.
  1475. */
  1476. if (reg->off) {
  1477. verbose(env,
  1478. "dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
  1479. regno, reg->off, off - reg->off);
  1480. return -EACCES;
  1481. }
  1482. if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
  1483. char tn_buf[48];
  1484. tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
  1485. verbose(env,
  1486. "variable ctx access var_off=%s off=%d size=%d",
  1487. tn_buf, off, size);
  1488. return -EACCES;
  1489. }
  1490. err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
  1491. if (!err && t == BPF_READ && value_regno >= 0) {
  1492. /* ctx access returns either a scalar, or a
  1493. * PTR_TO_PACKET[_META,_END]. In the latter
  1494. * case, we know the offset is zero.
  1495. */
  1496. if (reg_type == SCALAR_VALUE)
  1497. mark_reg_unknown(env, regs, value_regno);
  1498. else
  1499. mark_reg_known_zero(env, regs,
  1500. value_regno);
  1501. regs[value_regno].id = 0;
  1502. regs[value_regno].off = 0;
  1503. regs[value_regno].range = 0;
  1504. regs[value_regno].type = reg_type;
  1505. }
  1506. } else if (reg->type == PTR_TO_STACK) {
  1507. /* stack accesses must be at a fixed offset, so that we can
  1508. * determine what type of data were returned.
  1509. * See check_stack_read().
  1510. */
  1511. if (!tnum_is_const(reg->var_off)) {
  1512. char tn_buf[48];
  1513. tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
  1514. verbose(env, "variable stack access var_off=%s off=%d size=%d",
  1515. tn_buf, off, size);
  1516. return -EACCES;
  1517. }
  1518. off += reg->var_off.value;
  1519. if (off >= 0 || off < -MAX_BPF_STACK) {
  1520. verbose(env, "invalid stack off=%d size=%d\n", off,
  1521. size);
  1522. return -EACCES;
  1523. }
  1524. state = func(env, reg);
  1525. err = update_stack_depth(env, state, off);
  1526. if (err)
  1527. return err;
  1528. if (t == BPF_WRITE)
  1529. err = check_stack_write(env, state, off, size,
  1530. value_regno);
  1531. else
  1532. err = check_stack_read(env, state, off, size,
  1533. value_regno);
  1534. } else if (reg_is_pkt_pointer(reg)) {
  1535. if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
  1536. verbose(env, "cannot write into packet\n");
  1537. return -EACCES;
  1538. }
  1539. if (t == BPF_WRITE && value_regno >= 0 &&
  1540. is_pointer_value(env, value_regno)) {
  1541. verbose(env, "R%d leaks addr into packet\n",
  1542. value_regno);
  1543. return -EACCES;
  1544. }
  1545. err = check_packet_access(env, regno, off, size, false);
  1546. if (!err && t == BPF_READ && value_regno >= 0)
  1547. mark_reg_unknown(env, regs, value_regno);
  1548. } else {
  1549. verbose(env, "R%d invalid mem access '%s'\n", regno,
  1550. reg_type_str[reg->type]);
  1551. return -EACCES;
  1552. }
  1553. if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
  1554. regs[value_regno].type == SCALAR_VALUE) {
  1555. /* b/h/w load zero-extends, mark upper bits as known 0 */
  1556. coerce_reg_to_size(&regs[value_regno], size);
  1557. }
  1558. return err;
  1559. }
  1560. static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
  1561. {
  1562. int err;
  1563. if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
  1564. insn->imm != 0) {
  1565. verbose(env, "BPF_XADD uses reserved fields\n");
  1566. return -EINVAL;
  1567. }
  1568. /* check src1 operand */
  1569. err = check_reg_arg(env, insn->src_reg, SRC_OP);
  1570. if (err)
  1571. return err;
  1572. /* check src2 operand */
  1573. err = check_reg_arg(env, insn->dst_reg, SRC_OP);
  1574. if (err)
  1575. return err;
  1576. if (is_pointer_value(env, insn->src_reg)) {
  1577. verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
  1578. return -EACCES;
  1579. }
  1580. if (is_ctx_reg(env, insn->dst_reg) ||
  1581. is_pkt_reg(env, insn->dst_reg)) {
  1582. verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
  1583. insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ?
  1584. "context" : "packet");
  1585. return -EACCES;
  1586. }
  1587. /* check whether atomic_add can read the memory */
  1588. err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
  1589. BPF_SIZE(insn->code), BPF_READ, -1, true);
  1590. if (err)
  1591. return err;
  1592. /* check whether atomic_add can write into the same memory */
  1593. return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
  1594. BPF_SIZE(insn->code), BPF_WRITE, -1, true);
  1595. }
  1596. /* when register 'regno' is passed into function that will read 'access_size'
  1597. * bytes from that pointer, make sure that it's within stack boundary
  1598. * and all elements of stack are initialized.
  1599. * Unlike most pointer bounds-checking functions, this one doesn't take an
  1600. * 'off' argument, so it has to add in reg->off itself.
  1601. */
  1602. static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
  1603. int access_size, bool zero_size_allowed,
  1604. struct bpf_call_arg_meta *meta)
  1605. {
  1606. struct bpf_reg_state *reg = cur_regs(env) + regno;
  1607. struct bpf_func_state *state = func(env, reg);
  1608. int off, i, slot, spi;
  1609. if (reg->type != PTR_TO_STACK) {
  1610. /* Allow zero-byte read from NULL, regardless of pointer type */
  1611. if (zero_size_allowed && access_size == 0 &&
  1612. register_is_null(reg))
  1613. return 0;
  1614. verbose(env, "R%d type=%s expected=%s\n", regno,
  1615. reg_type_str[reg->type],
  1616. reg_type_str[PTR_TO_STACK]);
  1617. return -EACCES;
  1618. }
  1619. /* Only allow fixed-offset stack reads */
  1620. if (!tnum_is_const(reg->var_off)) {
  1621. char tn_buf[48];
  1622. tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
  1623. verbose(env, "invalid variable stack read R%d var_off=%s\n",
  1624. regno, tn_buf);
  1625. return -EACCES;
  1626. }
  1627. off = reg->off + reg->var_off.value;
  1628. if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
  1629. access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
  1630. verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
  1631. regno, off, access_size);
  1632. return -EACCES;
  1633. }
  1634. if (meta && meta->raw_mode) {
  1635. meta->access_size = access_size;
  1636. meta->regno = regno;
  1637. return 0;
  1638. }
  1639. for (i = 0; i < access_size; i++) {
  1640. u8 *stype;
  1641. slot = -(off + i) - 1;
  1642. spi = slot / BPF_REG_SIZE;
  1643. if (state->allocated_stack <= slot)
  1644. goto err;
  1645. stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
  1646. if (*stype == STACK_MISC)
  1647. goto mark;
  1648. if (*stype == STACK_ZERO) {
  1649. /* helper can write anything into the stack */
  1650. *stype = STACK_MISC;
  1651. goto mark;
  1652. }
  1653. err:
  1654. verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
  1655. off, i, access_size);
  1656. return -EACCES;
  1657. mark:
  1658. /* reading any byte out of 8-byte 'spill_slot' will cause
  1659. * the whole slot to be marked as 'read'
  1660. */
  1661. mark_stack_slot_read(env, env->cur_state, env->cur_state->parent,
  1662. spi, state->frameno);
  1663. }
  1664. return update_stack_depth(env, state, off);
  1665. }
  1666. static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
  1667. int access_size, bool zero_size_allowed,
  1668. struct bpf_call_arg_meta *meta)
  1669. {
  1670. struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
  1671. switch (reg->type) {
  1672. case PTR_TO_PACKET:
  1673. case PTR_TO_PACKET_META:
  1674. return check_packet_access(env, regno, reg->off, access_size,
  1675. zero_size_allowed);
  1676. case PTR_TO_MAP_VALUE:
  1677. return check_map_access(env, regno, reg->off, access_size,
  1678. zero_size_allowed);
  1679. default: /* scalar_value|ptr_to_stack or invalid ptr */
  1680. return check_stack_boundary(env, regno, access_size,
  1681. zero_size_allowed, meta);
  1682. }
  1683. }
  1684. static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
  1685. {
  1686. return type == ARG_PTR_TO_MEM ||
  1687. type == ARG_PTR_TO_MEM_OR_NULL ||
  1688. type == ARG_PTR_TO_UNINIT_MEM;
  1689. }
  1690. static bool arg_type_is_mem_size(enum bpf_arg_type type)
  1691. {
  1692. return type == ARG_CONST_SIZE ||
  1693. type == ARG_CONST_SIZE_OR_ZERO;
  1694. }
  1695. static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
  1696. enum bpf_arg_type arg_type,
  1697. struct bpf_call_arg_meta *meta)
  1698. {
  1699. struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
  1700. enum bpf_reg_type expected_type, type = reg->type;
  1701. int err = 0;
  1702. if (arg_type == ARG_DONTCARE)
  1703. return 0;
  1704. err = check_reg_arg(env, regno, SRC_OP);
  1705. if (err)
  1706. return err;
  1707. if (arg_type == ARG_ANYTHING) {
  1708. if (is_pointer_value(env, regno)) {
  1709. verbose(env, "R%d leaks addr into helper function\n",
  1710. regno);
  1711. return -EACCES;
  1712. }
  1713. return 0;
  1714. }
  1715. if (type_is_pkt_pointer(type) &&
  1716. !may_access_direct_pkt_data(env, meta, BPF_READ)) {
  1717. verbose(env, "helper access to the packet is not allowed\n");
  1718. return -EACCES;
  1719. }
  1720. if (arg_type == ARG_PTR_TO_MAP_KEY ||
  1721. arg_type == ARG_PTR_TO_MAP_VALUE) {
  1722. expected_type = PTR_TO_STACK;
  1723. if (!type_is_pkt_pointer(type) &&
  1724. type != expected_type)
  1725. goto err_type;
  1726. } else if (arg_type == ARG_CONST_SIZE ||
  1727. arg_type == ARG_CONST_SIZE_OR_ZERO) {
  1728. expected_type = SCALAR_VALUE;
  1729. if (type != expected_type)
  1730. goto err_type;
  1731. } else if (arg_type == ARG_CONST_MAP_PTR) {
  1732. expected_type = CONST_PTR_TO_MAP;
  1733. if (type != expected_type)
  1734. goto err_type;
  1735. } else if (arg_type == ARG_PTR_TO_CTX) {
  1736. expected_type = PTR_TO_CTX;
  1737. if (type != expected_type)
  1738. goto err_type;
  1739. } else if (arg_type_is_mem_ptr(arg_type)) {
  1740. expected_type = PTR_TO_STACK;
  1741. /* One exception here. In case function allows for NULL to be
  1742. * passed in as argument, it's a SCALAR_VALUE type. Final test
  1743. * happens during stack boundary checking.
  1744. */
  1745. if (register_is_null(reg) &&
  1746. arg_type == ARG_PTR_TO_MEM_OR_NULL)
  1747. /* final test in check_stack_boundary() */;
  1748. else if (!type_is_pkt_pointer(type) &&
  1749. type != PTR_TO_MAP_VALUE &&
  1750. type != expected_type)
  1751. goto err_type;
  1752. meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
  1753. } else {
  1754. verbose(env, "unsupported arg_type %d\n", arg_type);
  1755. return -EFAULT;
  1756. }
  1757. if (arg_type == ARG_CONST_MAP_PTR) {
  1758. /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
  1759. meta->map_ptr = reg->map_ptr;
  1760. } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
  1761. /* bpf_map_xxx(..., map_ptr, ..., key) call:
  1762. * check that [key, key + map->key_size) are within
  1763. * stack limits and initialized
  1764. */
  1765. if (!meta->map_ptr) {
  1766. /* in function declaration map_ptr must come before
  1767. * map_key, so that it's verified and known before
  1768. * we have to check map_key here. Otherwise it means
  1769. * that kernel subsystem misconfigured verifier
  1770. */
  1771. verbose(env, "invalid map_ptr to access map->key\n");
  1772. return -EACCES;
  1773. }
  1774. if (type_is_pkt_pointer(type))
  1775. err = check_packet_access(env, regno, reg->off,
  1776. meta->map_ptr->key_size,
  1777. false);
  1778. else
  1779. err = check_stack_boundary(env, regno,
  1780. meta->map_ptr->key_size,
  1781. false, NULL);
  1782. } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
  1783. /* bpf_map_xxx(..., map_ptr, ..., value) call:
  1784. * check [value, value + map->value_size) validity
  1785. */
  1786. if (!meta->map_ptr) {
  1787. /* kernel subsystem misconfigured verifier */
  1788. verbose(env, "invalid map_ptr to access map->value\n");
  1789. return -EACCES;
  1790. }
  1791. if (type_is_pkt_pointer(type))
  1792. err = check_packet_access(env, regno, reg->off,
  1793. meta->map_ptr->value_size,
  1794. false);
  1795. else
  1796. err = check_stack_boundary(env, regno,
  1797. meta->map_ptr->value_size,
  1798. false, NULL);
  1799. } else if (arg_type_is_mem_size(arg_type)) {
  1800. bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
  1801. /* The register is SCALAR_VALUE; the access check
  1802. * happens using its boundaries.
  1803. */
  1804. if (!tnum_is_const(reg->var_off))
  1805. /* For unprivileged variable accesses, disable raw
  1806. * mode so that the program is required to
  1807. * initialize all the memory that the helper could
  1808. * just partially fill up.
  1809. */
  1810. meta = NULL;
  1811. if (reg->smin_value < 0) {
  1812. verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
  1813. regno);
  1814. return -EACCES;
  1815. }
  1816. if (reg->umin_value == 0) {
  1817. err = check_helper_mem_access(env, regno - 1, 0,
  1818. zero_size_allowed,
  1819. meta);
  1820. if (err)
  1821. return err;
  1822. }
  1823. if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
  1824. verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
  1825. regno);
  1826. return -EACCES;
  1827. }
  1828. err = check_helper_mem_access(env, regno - 1,
  1829. reg->umax_value,
  1830. zero_size_allowed, meta);
  1831. }
  1832. return err;
  1833. err_type:
  1834. verbose(env, "R%d type=%s expected=%s\n", regno,
  1835. reg_type_str[type], reg_type_str[expected_type]);
  1836. return -EACCES;
  1837. }
  1838. static int check_map_func_compatibility(struct bpf_verifier_env *env,
  1839. struct bpf_map *map, int func_id)
  1840. {
  1841. if (!map)
  1842. return 0;
  1843. /* We need a two way check, first is from map perspective ... */
  1844. switch (map->map_type) {
  1845. case BPF_MAP_TYPE_PROG_ARRAY:
  1846. if (func_id != BPF_FUNC_tail_call)
  1847. goto error;
  1848. break;
  1849. case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
  1850. if (func_id != BPF_FUNC_perf_event_read &&
  1851. func_id != BPF_FUNC_perf_event_output &&
  1852. func_id != BPF_FUNC_perf_event_read_value)
  1853. goto error;
  1854. break;
  1855. case BPF_MAP_TYPE_STACK_TRACE:
  1856. if (func_id != BPF_FUNC_get_stackid)
  1857. goto error;
  1858. break;
  1859. case BPF_MAP_TYPE_CGROUP_ARRAY:
  1860. if (func_id != BPF_FUNC_skb_under_cgroup &&
  1861. func_id != BPF_FUNC_current_task_under_cgroup)
  1862. goto error;
  1863. break;
  1864. /* devmap returns a pointer to a live net_device ifindex that we cannot
  1865. * allow to be modified from bpf side. So do not allow lookup elements
  1866. * for now.
  1867. */
  1868. case BPF_MAP_TYPE_DEVMAP:
  1869. if (func_id != BPF_FUNC_redirect_map)
  1870. goto error;
  1871. break;
  1872. /* Restrict bpf side of cpumap, open when use-cases appear */
  1873. case BPF_MAP_TYPE_CPUMAP:
  1874. if (func_id != BPF_FUNC_redirect_map)
  1875. goto error;
  1876. break;
  1877. case BPF_MAP_TYPE_ARRAY_OF_MAPS:
  1878. case BPF_MAP_TYPE_HASH_OF_MAPS:
  1879. if (func_id != BPF_FUNC_map_lookup_elem)
  1880. goto error;
  1881. break;
  1882. case BPF_MAP_TYPE_SOCKMAP:
  1883. if (func_id != BPF_FUNC_sk_redirect_map &&
  1884. func_id != BPF_FUNC_sock_map_update &&
  1885. func_id != BPF_FUNC_map_delete_elem &&
  1886. func_id != BPF_FUNC_msg_redirect_map)
  1887. goto error;
  1888. break;
  1889. default:
  1890. break;
  1891. }
  1892. /* ... and second from the function itself. */
  1893. switch (func_id) {
  1894. case BPF_FUNC_tail_call:
  1895. if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
  1896. goto error;
  1897. if (env->subprog_cnt) {
  1898. verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
  1899. return -EINVAL;
  1900. }
  1901. break;
  1902. case BPF_FUNC_perf_event_read:
  1903. case BPF_FUNC_perf_event_output:
  1904. case BPF_FUNC_perf_event_read_value:
  1905. if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
  1906. goto error;
  1907. break;
  1908. case BPF_FUNC_get_stackid:
  1909. if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
  1910. goto error;
  1911. break;
  1912. case BPF_FUNC_current_task_under_cgroup:
  1913. case BPF_FUNC_skb_under_cgroup:
  1914. if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
  1915. goto error;
  1916. break;
  1917. case BPF_FUNC_redirect_map:
  1918. if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
  1919. map->map_type != BPF_MAP_TYPE_CPUMAP)
  1920. goto error;
  1921. break;
  1922. case BPF_FUNC_sk_redirect_map:
  1923. case BPF_FUNC_msg_redirect_map:
  1924. if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
  1925. goto error;
  1926. break;
  1927. case BPF_FUNC_sock_map_update:
  1928. if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
  1929. goto error;
  1930. break;
  1931. default:
  1932. break;
  1933. }
  1934. return 0;
  1935. error:
  1936. verbose(env, "cannot pass map_type %d into func %s#%d\n",
  1937. map->map_type, func_id_name(func_id), func_id);
  1938. return -EINVAL;
  1939. }
  1940. static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
  1941. {
  1942. int count = 0;
  1943. if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
  1944. count++;
  1945. if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
  1946. count++;
  1947. if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
  1948. count++;
  1949. if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
  1950. count++;
  1951. if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
  1952. count++;
  1953. /* We only support one arg being in raw mode at the moment,
  1954. * which is sufficient for the helper functions we have
  1955. * right now.
  1956. */
  1957. return count <= 1;
  1958. }
  1959. static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
  1960. enum bpf_arg_type arg_next)
  1961. {
  1962. return (arg_type_is_mem_ptr(arg_curr) &&
  1963. !arg_type_is_mem_size(arg_next)) ||
  1964. (!arg_type_is_mem_ptr(arg_curr) &&
  1965. arg_type_is_mem_size(arg_next));
  1966. }
  1967. static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
  1968. {
  1969. /* bpf_xxx(..., buf, len) call will access 'len'
  1970. * bytes from memory 'buf'. Both arg types need
  1971. * to be paired, so make sure there's no buggy
  1972. * helper function specification.
  1973. */
  1974. if (arg_type_is_mem_size(fn->arg1_type) ||
  1975. arg_type_is_mem_ptr(fn->arg5_type) ||
  1976. check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
  1977. check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
  1978. check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
  1979. check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
  1980. return false;
  1981. return true;
  1982. }
  1983. static int check_func_proto(const struct bpf_func_proto *fn)
  1984. {
  1985. return check_raw_mode_ok(fn) &&
  1986. check_arg_pair_ok(fn) ? 0 : -EINVAL;
  1987. }
  1988. /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
  1989. * are now invalid, so turn them into unknown SCALAR_VALUE.
  1990. */
  1991. static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
  1992. struct bpf_func_state *state)
  1993. {
  1994. struct bpf_reg_state *regs = state->regs, *reg;
  1995. int i;
  1996. for (i = 0; i < MAX_BPF_REG; i++)
  1997. if (reg_is_pkt_pointer_any(&regs[i]))
  1998. mark_reg_unknown(env, regs, i);
  1999. for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
  2000. if (state->stack[i].slot_type[0] != STACK_SPILL)
  2001. continue;
  2002. reg = &state->stack[i].spilled_ptr;
  2003. if (reg_is_pkt_pointer_any(reg))
  2004. __mark_reg_unknown(reg);
  2005. }
  2006. }
  2007. static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
  2008. {
  2009. struct bpf_verifier_state *vstate = env->cur_state;
  2010. int i;
  2011. for (i = 0; i <= vstate->curframe; i++)
  2012. __clear_all_pkt_pointers(env, vstate->frame[i]);
  2013. }
  2014. static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
  2015. int *insn_idx)
  2016. {
  2017. struct bpf_verifier_state *state = env->cur_state;
  2018. struct bpf_func_state *caller, *callee;
  2019. int i, subprog, target_insn;
  2020. if (state->curframe + 1 >= MAX_CALL_FRAMES) {
  2021. verbose(env, "the call stack of %d frames is too deep\n",
  2022. state->curframe + 2);
  2023. return -E2BIG;
  2024. }
  2025. target_insn = *insn_idx + insn->imm;
  2026. subprog = find_subprog(env, target_insn + 1);
  2027. if (subprog < 0) {
  2028. verbose(env, "verifier bug. No program starts at insn %d\n",
  2029. target_insn + 1);
  2030. return -EFAULT;
  2031. }
  2032. caller = state->frame[state->curframe];
  2033. if (state->frame[state->curframe + 1]) {
  2034. verbose(env, "verifier bug. Frame %d already allocated\n",
  2035. state->curframe + 1);
  2036. return -EFAULT;
  2037. }
  2038. callee = kzalloc(sizeof(*callee), GFP_KERNEL);
  2039. if (!callee)
  2040. return -ENOMEM;
  2041. state->frame[state->curframe + 1] = callee;
  2042. /* callee cannot access r0, r6 - r9 for reading and has to write
  2043. * into its own stack before reading from it.
  2044. * callee can read/write into caller's stack
  2045. */
  2046. init_func_state(env, callee,
  2047. /* remember the callsite, it will be used by bpf_exit */
  2048. *insn_idx /* callsite */,
  2049. state->curframe + 1 /* frameno within this callchain */,
  2050. subprog + 1 /* subprog number within this prog */);
  2051. /* copy r1 - r5 args that callee can access */
  2052. for (i = BPF_REG_1; i <= BPF_REG_5; i++)
  2053. callee->regs[i] = caller->regs[i];
  2054. /* after the call regsiters r0 - r5 were scratched */
  2055. for (i = 0; i < CALLER_SAVED_REGS; i++) {
  2056. mark_reg_not_init(env, caller->regs, caller_saved[i]);
  2057. check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
  2058. }
  2059. /* only increment it after check_reg_arg() finished */
  2060. state->curframe++;
  2061. /* and go analyze first insn of the callee */
  2062. *insn_idx = target_insn;
  2063. if (env->log.level) {
  2064. verbose(env, "caller:\n");
  2065. print_verifier_state(env, caller);
  2066. verbose(env, "callee:\n");
  2067. print_verifier_state(env, callee);
  2068. }
  2069. return 0;
  2070. }
  2071. static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
  2072. {
  2073. struct bpf_verifier_state *state = env->cur_state;
  2074. struct bpf_func_state *caller, *callee;
  2075. struct bpf_reg_state *r0;
  2076. callee = state->frame[state->curframe];
  2077. r0 = &callee->regs[BPF_REG_0];
  2078. if (r0->type == PTR_TO_STACK) {
  2079. /* technically it's ok to return caller's stack pointer
  2080. * (or caller's caller's pointer) back to the caller,
  2081. * since these pointers are valid. Only current stack
  2082. * pointer will be invalid as soon as function exits,
  2083. * but let's be conservative
  2084. */
  2085. verbose(env, "cannot return stack pointer to the caller\n");
  2086. return -EINVAL;
  2087. }
  2088. state->curframe--;
  2089. caller = state->frame[state->curframe];
  2090. /* return to the caller whatever r0 had in the callee */
  2091. caller->regs[BPF_REG_0] = *r0;
  2092. *insn_idx = callee->callsite + 1;
  2093. if (env->log.level) {
  2094. verbose(env, "returning from callee:\n");
  2095. print_verifier_state(env, callee);
  2096. verbose(env, "to caller at %d:\n", *insn_idx);
  2097. print_verifier_state(env, caller);
  2098. }
  2099. /* clear everything in the callee */
  2100. free_func_state(callee);
  2101. state->frame[state->curframe + 1] = NULL;
  2102. return 0;
  2103. }
  2104. static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
  2105. {
  2106. const struct bpf_func_proto *fn = NULL;
  2107. struct bpf_reg_state *regs;
  2108. struct bpf_call_arg_meta meta;
  2109. bool changes_data;
  2110. int i, err;
  2111. /* find function prototype */
  2112. if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
  2113. verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
  2114. func_id);
  2115. return -EINVAL;
  2116. }
  2117. if (env->ops->get_func_proto)
  2118. fn = env->ops->get_func_proto(func_id, env->prog);
  2119. if (!fn) {
  2120. verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
  2121. func_id);
  2122. return -EINVAL;
  2123. }
  2124. /* eBPF programs must be GPL compatible to use GPL-ed functions */
  2125. if (!env->prog->gpl_compatible && fn->gpl_only) {
  2126. verbose(env, "cannot call GPL only function from proprietary program\n");
  2127. return -EINVAL;
  2128. }
  2129. /* With LD_ABS/IND some JITs save/restore skb from r1. */
  2130. changes_data = bpf_helper_changes_pkt_data(fn->func);
  2131. if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
  2132. verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
  2133. func_id_name(func_id), func_id);
  2134. return -EINVAL;
  2135. }
  2136. memset(&meta, 0, sizeof(meta));
  2137. meta.pkt_access = fn->pkt_access;
  2138. err = check_func_proto(fn);
  2139. if (err) {
  2140. verbose(env, "kernel subsystem misconfigured func %s#%d\n",
  2141. func_id_name(func_id), func_id);
  2142. return err;
  2143. }
  2144. /* check args */
  2145. err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
  2146. if (err)
  2147. return err;
  2148. err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
  2149. if (err)
  2150. return err;
  2151. if (func_id == BPF_FUNC_tail_call) {
  2152. if (meta.map_ptr == NULL) {
  2153. verbose(env, "verifier bug\n");
  2154. return -EINVAL;
  2155. }
  2156. env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
  2157. }
  2158. err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
  2159. if (err)
  2160. return err;
  2161. err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
  2162. if (err)
  2163. return err;
  2164. err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
  2165. if (err)
  2166. return err;
  2167. /* Mark slots with STACK_MISC in case of raw mode, stack offset
  2168. * is inferred from register state.
  2169. */
  2170. for (i = 0; i < meta.access_size; i++) {
  2171. err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
  2172. BPF_WRITE, -1, false);
  2173. if (err)
  2174. return err;
  2175. }
  2176. regs = cur_regs(env);
  2177. /* reset caller saved regs */
  2178. for (i = 0; i < CALLER_SAVED_REGS; i++) {
  2179. mark_reg_not_init(env, regs, caller_saved[i]);
  2180. check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
  2181. }
  2182. /* update return register (already marked as written above) */
  2183. if (fn->ret_type == RET_INTEGER) {
  2184. /* sets type to SCALAR_VALUE */
  2185. mark_reg_unknown(env, regs, BPF_REG_0);
  2186. } else if (fn->ret_type == RET_VOID) {
  2187. regs[BPF_REG_0].type = NOT_INIT;
  2188. } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
  2189. struct bpf_insn_aux_data *insn_aux;
  2190. regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
  2191. /* There is no offset yet applied, variable or fixed */
  2192. mark_reg_known_zero(env, regs, BPF_REG_0);
  2193. regs[BPF_REG_0].off = 0;
  2194. /* remember map_ptr, so that check_map_access()
  2195. * can check 'value_size' boundary of memory access
  2196. * to map element returned from bpf_map_lookup_elem()
  2197. */
  2198. if (meta.map_ptr == NULL) {
  2199. verbose(env,
  2200. "kernel subsystem misconfigured verifier\n");
  2201. return -EINVAL;
  2202. }
  2203. regs[BPF_REG_0].map_ptr = meta.map_ptr;
  2204. regs[BPF_REG_0].id = ++env->id_gen;
  2205. insn_aux = &env->insn_aux_data[insn_idx];
  2206. if (!insn_aux->map_ptr)
  2207. insn_aux->map_ptr = meta.map_ptr;
  2208. else if (insn_aux->map_ptr != meta.map_ptr)
  2209. insn_aux->map_ptr = BPF_MAP_PTR_POISON;
  2210. } else {
  2211. verbose(env, "unknown return type %d of func %s#%d\n",
  2212. fn->ret_type, func_id_name(func_id), func_id);
  2213. return -EINVAL;
  2214. }
  2215. err = check_map_func_compatibility(env, meta.map_ptr, func_id);
  2216. if (err)
  2217. return err;
  2218. if (changes_data)
  2219. clear_all_pkt_pointers(env);
  2220. return 0;
  2221. }
  2222. static bool signed_add_overflows(s64 a, s64 b)
  2223. {
  2224. /* Do the add in u64, where overflow is well-defined */
  2225. s64 res = (s64)((u64)a + (u64)b);
  2226. if (b < 0)
  2227. return res > a;
  2228. return res < a;
  2229. }
  2230. static bool signed_sub_overflows(s64 a, s64 b)
  2231. {
  2232. /* Do the sub in u64, where overflow is well-defined */
  2233. s64 res = (s64)((u64)a - (u64)b);
  2234. if (b < 0)
  2235. return res < a;
  2236. return res > a;
  2237. }
  2238. static bool check_reg_sane_offset(struct bpf_verifier_env *env,
  2239. const struct bpf_reg_state *reg,
  2240. enum bpf_reg_type type)
  2241. {
  2242. bool known = tnum_is_const(reg->var_off);
  2243. s64 val = reg->var_off.value;
  2244. s64 smin = reg->smin_value;
  2245. if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
  2246. verbose(env, "math between %s pointer and %lld is not allowed\n",
  2247. reg_type_str[type], val);
  2248. return false;
  2249. }
  2250. if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
  2251. verbose(env, "%s pointer offset %d is not allowed\n",
  2252. reg_type_str[type], reg->off);
  2253. return false;
  2254. }
  2255. if (smin == S64_MIN) {
  2256. verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
  2257. reg_type_str[type]);
  2258. return false;
  2259. }
  2260. if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
  2261. verbose(env, "value %lld makes %s pointer be out of bounds\n",
  2262. smin, reg_type_str[type]);
  2263. return false;
  2264. }
  2265. return true;
  2266. }
  2267. /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
  2268. * Caller should also handle BPF_MOV case separately.
  2269. * If we return -EACCES, caller may want to try again treating pointer as a
  2270. * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
  2271. */
  2272. static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
  2273. struct bpf_insn *insn,
  2274. const struct bpf_reg_state *ptr_reg,
  2275. const struct bpf_reg_state *off_reg)
  2276. {
  2277. struct bpf_verifier_state *vstate = env->cur_state;
  2278. struct bpf_func_state *state = vstate->frame[vstate->curframe];
  2279. struct bpf_reg_state *regs = state->regs, *dst_reg;
  2280. bool known = tnum_is_const(off_reg->var_off);
  2281. s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
  2282. smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
  2283. u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
  2284. umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
  2285. u8 opcode = BPF_OP(insn->code);
  2286. u32 dst = insn->dst_reg;
  2287. dst_reg = &regs[dst];
  2288. if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
  2289. smin_val > smax_val || umin_val > umax_val) {
  2290. /* Taint dst register if offset had invalid bounds derived from
  2291. * e.g. dead branches.
  2292. */
  2293. __mark_reg_unknown(dst_reg);
  2294. return 0;
  2295. }
  2296. if (BPF_CLASS(insn->code) != BPF_ALU64) {
  2297. /* 32-bit ALU ops on pointers produce (meaningless) scalars */
  2298. verbose(env,
  2299. "R%d 32-bit pointer arithmetic prohibited\n",
  2300. dst);
  2301. return -EACCES;
  2302. }
  2303. if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
  2304. verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
  2305. dst);
  2306. return -EACCES;
  2307. }
  2308. if (ptr_reg->type == CONST_PTR_TO_MAP) {
  2309. verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
  2310. dst);
  2311. return -EACCES;
  2312. }
  2313. if (ptr_reg->type == PTR_TO_PACKET_END) {
  2314. verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
  2315. dst);
  2316. return -EACCES;
  2317. }
  2318. /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
  2319. * The id may be overwritten later if we create a new variable offset.
  2320. */
  2321. dst_reg->type = ptr_reg->type;
  2322. dst_reg->id = ptr_reg->id;
  2323. if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
  2324. !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
  2325. return -EINVAL;
  2326. switch (opcode) {
  2327. case BPF_ADD:
  2328. /* We can take a fixed offset as long as it doesn't overflow
  2329. * the s32 'off' field
  2330. */
  2331. if (known && (ptr_reg->off + smin_val ==
  2332. (s64)(s32)(ptr_reg->off + smin_val))) {
  2333. /* pointer += K. Accumulate it into fixed offset */
  2334. dst_reg->smin_value = smin_ptr;
  2335. dst_reg->smax_value = smax_ptr;
  2336. dst_reg->umin_value = umin_ptr;
  2337. dst_reg->umax_value = umax_ptr;
  2338. dst_reg->var_off = ptr_reg->var_off;
  2339. dst_reg->off = ptr_reg->off + smin_val;
  2340. dst_reg->range = ptr_reg->range;
  2341. break;
  2342. }
  2343. /* A new variable offset is created. Note that off_reg->off
  2344. * == 0, since it's a scalar.
  2345. * dst_reg gets the pointer type and since some positive
  2346. * integer value was added to the pointer, give it a new 'id'
  2347. * if it's a PTR_TO_PACKET.
  2348. * this creates a new 'base' pointer, off_reg (variable) gets
  2349. * added into the variable offset, and we copy the fixed offset
  2350. * from ptr_reg.
  2351. */
  2352. if (signed_add_overflows(smin_ptr, smin_val) ||
  2353. signed_add_overflows(smax_ptr, smax_val)) {
  2354. dst_reg->smin_value = S64_MIN;
  2355. dst_reg->smax_value = S64_MAX;
  2356. } else {
  2357. dst_reg->smin_value = smin_ptr + smin_val;
  2358. dst_reg->smax_value = smax_ptr + smax_val;
  2359. }
  2360. if (umin_ptr + umin_val < umin_ptr ||
  2361. umax_ptr + umax_val < umax_ptr) {
  2362. dst_reg->umin_value = 0;
  2363. dst_reg->umax_value = U64_MAX;
  2364. } else {
  2365. dst_reg->umin_value = umin_ptr + umin_val;
  2366. dst_reg->umax_value = umax_ptr + umax_val;
  2367. }
  2368. dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
  2369. dst_reg->off = ptr_reg->off;
  2370. if (reg_is_pkt_pointer(ptr_reg)) {
  2371. dst_reg->id = ++env->id_gen;
  2372. /* something was added to pkt_ptr, set range to zero */
  2373. dst_reg->range = 0;
  2374. }
  2375. break;
  2376. case BPF_SUB:
  2377. if (dst_reg == off_reg) {
  2378. /* scalar -= pointer. Creates an unknown scalar */
  2379. verbose(env, "R%d tried to subtract pointer from scalar\n",
  2380. dst);
  2381. return -EACCES;
  2382. }
  2383. /* We don't allow subtraction from FP, because (according to
  2384. * test_verifier.c test "invalid fp arithmetic", JITs might not
  2385. * be able to deal with it.
  2386. */
  2387. if (ptr_reg->type == PTR_TO_STACK) {
  2388. verbose(env, "R%d subtraction from stack pointer prohibited\n",
  2389. dst);
  2390. return -EACCES;
  2391. }
  2392. if (known && (ptr_reg->off - smin_val ==
  2393. (s64)(s32)(ptr_reg->off - smin_val))) {
  2394. /* pointer -= K. Subtract it from fixed offset */
  2395. dst_reg->smin_value = smin_ptr;
  2396. dst_reg->smax_value = smax_ptr;
  2397. dst_reg->umin_value = umin_ptr;
  2398. dst_reg->umax_value = umax_ptr;
  2399. dst_reg->var_off = ptr_reg->var_off;
  2400. dst_reg->id = ptr_reg->id;
  2401. dst_reg->off = ptr_reg->off - smin_val;
  2402. dst_reg->range = ptr_reg->range;
  2403. break;
  2404. }
  2405. /* A new variable offset is created. If the subtrahend is known
  2406. * nonnegative, then any reg->range we had before is still good.
  2407. */
  2408. if (signed_sub_overflows(smin_ptr, smax_val) ||
  2409. signed_sub_overflows(smax_ptr, smin_val)) {
  2410. /* Overflow possible, we know nothing */
  2411. dst_reg->smin_value = S64_MIN;
  2412. dst_reg->smax_value = S64_MAX;
  2413. } else {
  2414. dst_reg->smin_value = smin_ptr - smax_val;
  2415. dst_reg->smax_value = smax_ptr - smin_val;
  2416. }
  2417. if (umin_ptr < umax_val) {
  2418. /* Overflow possible, we know nothing */
  2419. dst_reg->umin_value = 0;
  2420. dst_reg->umax_value = U64_MAX;
  2421. } else {
  2422. /* Cannot overflow (as long as bounds are consistent) */
  2423. dst_reg->umin_value = umin_ptr - umax_val;
  2424. dst_reg->umax_value = umax_ptr - umin_val;
  2425. }
  2426. dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
  2427. dst_reg->off = ptr_reg->off;
  2428. if (reg_is_pkt_pointer(ptr_reg)) {
  2429. dst_reg->id = ++env->id_gen;
  2430. /* something was added to pkt_ptr, set range to zero */
  2431. if (smin_val < 0)
  2432. dst_reg->range = 0;
  2433. }
  2434. break;
  2435. case BPF_AND:
  2436. case BPF_OR:
  2437. case BPF_XOR:
  2438. /* bitwise ops on pointers are troublesome, prohibit. */
  2439. verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
  2440. dst, bpf_alu_string[opcode >> 4]);
  2441. return -EACCES;
  2442. default:
  2443. /* other operators (e.g. MUL,LSH) produce non-pointer results */
  2444. verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
  2445. dst, bpf_alu_string[opcode >> 4]);
  2446. return -EACCES;
  2447. }
  2448. if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
  2449. return -EINVAL;
  2450. __update_reg_bounds(dst_reg);
  2451. __reg_deduce_bounds(dst_reg);
  2452. __reg_bound_offset(dst_reg);
  2453. return 0;
  2454. }
  2455. /* WARNING: This function does calculations on 64-bit values, but the actual
  2456. * execution may occur on 32-bit values. Therefore, things like bitshifts
  2457. * need extra checks in the 32-bit case.
  2458. */
  2459. static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
  2460. struct bpf_insn *insn,
  2461. struct bpf_reg_state *dst_reg,
  2462. struct bpf_reg_state src_reg)
  2463. {
  2464. struct bpf_reg_state *regs = cur_regs(env);
  2465. u8 opcode = BPF_OP(insn->code);
  2466. bool src_known, dst_known;
  2467. s64 smin_val, smax_val;
  2468. u64 umin_val, umax_val;
  2469. u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
  2470. smin_val = src_reg.smin_value;
  2471. smax_val = src_reg.smax_value;
  2472. umin_val = src_reg.umin_value;
  2473. umax_val = src_reg.umax_value;
  2474. src_known = tnum_is_const(src_reg.var_off);
  2475. dst_known = tnum_is_const(dst_reg->var_off);
  2476. if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
  2477. smin_val > smax_val || umin_val > umax_val) {
  2478. /* Taint dst register if offset had invalid bounds derived from
  2479. * e.g. dead branches.
  2480. */
  2481. __mark_reg_unknown(dst_reg);
  2482. return 0;
  2483. }
  2484. if (!src_known &&
  2485. opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
  2486. __mark_reg_unknown(dst_reg);
  2487. return 0;
  2488. }
  2489. switch (opcode) {
  2490. case BPF_ADD:
  2491. if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
  2492. signed_add_overflows(dst_reg->smax_value, smax_val)) {
  2493. dst_reg->smin_value = S64_MIN;
  2494. dst_reg->smax_value = S64_MAX;
  2495. } else {
  2496. dst_reg->smin_value += smin_val;
  2497. dst_reg->smax_value += smax_val;
  2498. }
  2499. if (dst_reg->umin_value + umin_val < umin_val ||
  2500. dst_reg->umax_value + umax_val < umax_val) {
  2501. dst_reg->umin_value = 0;
  2502. dst_reg->umax_value = U64_MAX;
  2503. } else {
  2504. dst_reg->umin_value += umin_val;
  2505. dst_reg->umax_value += umax_val;
  2506. }
  2507. dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
  2508. break;
  2509. case BPF_SUB:
  2510. if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
  2511. signed_sub_overflows(dst_reg->smax_value, smin_val)) {
  2512. /* Overflow possible, we know nothing */
  2513. dst_reg->smin_value = S64_MIN;
  2514. dst_reg->smax_value = S64_MAX;
  2515. } else {
  2516. dst_reg->smin_value -= smax_val;
  2517. dst_reg->smax_value -= smin_val;
  2518. }
  2519. if (dst_reg->umin_value < umax_val) {
  2520. /* Overflow possible, we know nothing */
  2521. dst_reg->umin_value = 0;
  2522. dst_reg->umax_value = U64_MAX;
  2523. } else {
  2524. /* Cannot overflow (as long as bounds are consistent) */
  2525. dst_reg->umin_value -= umax_val;
  2526. dst_reg->umax_value -= umin_val;
  2527. }
  2528. dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
  2529. break;
  2530. case BPF_MUL:
  2531. dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
  2532. if (smin_val < 0 || dst_reg->smin_value < 0) {
  2533. /* Ain't nobody got time to multiply that sign */
  2534. __mark_reg_unbounded(dst_reg);
  2535. __update_reg_bounds(dst_reg);
  2536. break;
  2537. }
  2538. /* Both values are positive, so we can work with unsigned and
  2539. * copy the result to signed (unless it exceeds S64_MAX).
  2540. */
  2541. if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
  2542. /* Potential overflow, we know nothing */
  2543. __mark_reg_unbounded(dst_reg);
  2544. /* (except what we can learn from the var_off) */
  2545. __update_reg_bounds(dst_reg);
  2546. break;
  2547. }
  2548. dst_reg->umin_value *= umin_val;
  2549. dst_reg->umax_value *= umax_val;
  2550. if (dst_reg->umax_value > S64_MAX) {
  2551. /* Overflow possible, we know nothing */
  2552. dst_reg->smin_value = S64_MIN;
  2553. dst_reg->smax_value = S64_MAX;
  2554. } else {
  2555. dst_reg->smin_value = dst_reg->umin_value;
  2556. dst_reg->smax_value = dst_reg->umax_value;
  2557. }
  2558. break;
  2559. case BPF_AND:
  2560. if (src_known && dst_known) {
  2561. __mark_reg_known(dst_reg, dst_reg->var_off.value &
  2562. src_reg.var_off.value);
  2563. break;
  2564. }
  2565. /* We get our minimum from the var_off, since that's inherently
  2566. * bitwise. Our maximum is the minimum of the operands' maxima.
  2567. */
  2568. dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
  2569. dst_reg->umin_value = dst_reg->var_off.value;
  2570. dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
  2571. if (dst_reg->smin_value < 0 || smin_val < 0) {
  2572. /* Lose signed bounds when ANDing negative numbers,
  2573. * ain't nobody got time for that.
  2574. */
  2575. dst_reg->smin_value = S64_MIN;
  2576. dst_reg->smax_value = S64_MAX;
  2577. } else {
  2578. /* ANDing two positives gives a positive, so safe to
  2579. * cast result into s64.
  2580. */
  2581. dst_reg->smin_value = dst_reg->umin_value;
  2582. dst_reg->smax_value = dst_reg->umax_value;
  2583. }
  2584. /* We may learn something more from the var_off */
  2585. __update_reg_bounds(dst_reg);
  2586. break;
  2587. case BPF_OR:
  2588. if (src_known && dst_known) {
  2589. __mark_reg_known(dst_reg, dst_reg->var_off.value |
  2590. src_reg.var_off.value);
  2591. break;
  2592. }
  2593. /* We get our maximum from the var_off, and our minimum is the
  2594. * maximum of the operands' minima
  2595. */
  2596. dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
  2597. dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
  2598. dst_reg->umax_value = dst_reg->var_off.value |
  2599. dst_reg->var_off.mask;
  2600. if (dst_reg->smin_value < 0 || smin_val < 0) {
  2601. /* Lose signed bounds when ORing negative numbers,
  2602. * ain't nobody got time for that.
  2603. */
  2604. dst_reg->smin_value = S64_MIN;
  2605. dst_reg->smax_value = S64_MAX;
  2606. } else {
  2607. /* ORing two positives gives a positive, so safe to
  2608. * cast result into s64.
  2609. */
  2610. dst_reg->smin_value = dst_reg->umin_value;
  2611. dst_reg->smax_value = dst_reg->umax_value;
  2612. }
  2613. /* We may learn something more from the var_off */
  2614. __update_reg_bounds(dst_reg);
  2615. break;
  2616. case BPF_LSH:
  2617. if (umax_val >= insn_bitness) {
  2618. /* Shifts greater than 31 or 63 are undefined.
  2619. * This includes shifts by a negative number.
  2620. */
  2621. mark_reg_unknown(env, regs, insn->dst_reg);
  2622. break;
  2623. }
  2624. /* We lose all sign bit information (except what we can pick
  2625. * up from var_off)
  2626. */
  2627. dst_reg->smin_value = S64_MIN;
  2628. dst_reg->smax_value = S64_MAX;
  2629. /* If we might shift our top bit out, then we know nothing */
  2630. if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
  2631. dst_reg->umin_value = 0;
  2632. dst_reg->umax_value = U64_MAX;
  2633. } else {
  2634. dst_reg->umin_value <<= umin_val;
  2635. dst_reg->umax_value <<= umax_val;
  2636. }
  2637. if (src_known)
  2638. dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
  2639. else
  2640. dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val);
  2641. /* We may learn something more from the var_off */
  2642. __update_reg_bounds(dst_reg);
  2643. break;
  2644. case BPF_RSH:
  2645. if (umax_val >= insn_bitness) {
  2646. /* Shifts greater than 31 or 63 are undefined.
  2647. * This includes shifts by a negative number.
  2648. */
  2649. mark_reg_unknown(env, regs, insn->dst_reg);
  2650. break;
  2651. }
  2652. /* BPF_RSH is an unsigned shift. If the value in dst_reg might
  2653. * be negative, then either:
  2654. * 1) src_reg might be zero, so the sign bit of the result is
  2655. * unknown, so we lose our signed bounds
  2656. * 2) it's known negative, thus the unsigned bounds capture the
  2657. * signed bounds
  2658. * 3) the signed bounds cross zero, so they tell us nothing
  2659. * about the result
  2660. * If the value in dst_reg is known nonnegative, then again the
  2661. * unsigned bounts capture the signed bounds.
  2662. * Thus, in all cases it suffices to blow away our signed bounds
  2663. * and rely on inferring new ones from the unsigned bounds and
  2664. * var_off of the result.
  2665. */
  2666. dst_reg->smin_value = S64_MIN;
  2667. dst_reg->smax_value = S64_MAX;
  2668. if (src_known)
  2669. dst_reg->var_off = tnum_rshift(dst_reg->var_off,
  2670. umin_val);
  2671. else
  2672. dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val);
  2673. dst_reg->umin_value >>= umax_val;
  2674. dst_reg->umax_value >>= umin_val;
  2675. /* We may learn something more from the var_off */
  2676. __update_reg_bounds(dst_reg);
  2677. break;
  2678. default:
  2679. mark_reg_unknown(env, regs, insn->dst_reg);
  2680. break;
  2681. }
  2682. if (BPF_CLASS(insn->code) != BPF_ALU64) {
  2683. /* 32-bit ALU ops are (32,32)->32 */
  2684. coerce_reg_to_size(dst_reg, 4);
  2685. coerce_reg_to_size(&src_reg, 4);
  2686. }
  2687. __reg_deduce_bounds(dst_reg);
  2688. __reg_bound_offset(dst_reg);
  2689. return 0;
  2690. }
  2691. /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
  2692. * and var_off.
  2693. */
  2694. static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
  2695. struct bpf_insn *insn)
  2696. {
  2697. struct bpf_verifier_state *vstate = env->cur_state;
  2698. struct bpf_func_state *state = vstate->frame[vstate->curframe];
  2699. struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
  2700. struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
  2701. u8 opcode = BPF_OP(insn->code);
  2702. dst_reg = &regs[insn->dst_reg];
  2703. src_reg = NULL;
  2704. if (dst_reg->type != SCALAR_VALUE)
  2705. ptr_reg = dst_reg;
  2706. if (BPF_SRC(insn->code) == BPF_X) {
  2707. src_reg = &regs[insn->src_reg];
  2708. if (src_reg->type != SCALAR_VALUE) {
  2709. if (dst_reg->type != SCALAR_VALUE) {
  2710. /* Combining two pointers by any ALU op yields
  2711. * an arbitrary scalar. Disallow all math except
  2712. * pointer subtraction
  2713. */
  2714. if (opcode == BPF_SUB){
  2715. mark_reg_unknown(env, regs, insn->dst_reg);
  2716. return 0;
  2717. }
  2718. verbose(env, "R%d pointer %s pointer prohibited\n",
  2719. insn->dst_reg,
  2720. bpf_alu_string[opcode >> 4]);
  2721. return -EACCES;
  2722. } else {
  2723. /* scalar += pointer
  2724. * This is legal, but we have to reverse our
  2725. * src/dest handling in computing the range
  2726. */
  2727. return adjust_ptr_min_max_vals(env, insn,
  2728. src_reg, dst_reg);
  2729. }
  2730. } else if (ptr_reg) {
  2731. /* pointer += scalar */
  2732. return adjust_ptr_min_max_vals(env, insn,
  2733. dst_reg, src_reg);
  2734. }
  2735. } else {
  2736. /* Pretend the src is a reg with a known value, since we only
  2737. * need to be able to read from this state.
  2738. */
  2739. off_reg.type = SCALAR_VALUE;
  2740. __mark_reg_known(&off_reg, insn->imm);
  2741. src_reg = &off_reg;
  2742. if (ptr_reg) /* pointer += K */
  2743. return adjust_ptr_min_max_vals(env, insn,
  2744. ptr_reg, src_reg);
  2745. }
  2746. /* Got here implies adding two SCALAR_VALUEs */
  2747. if (WARN_ON_ONCE(ptr_reg)) {
  2748. print_verifier_state(env, state);
  2749. verbose(env, "verifier internal error: unexpected ptr_reg\n");
  2750. return -EINVAL;
  2751. }
  2752. if (WARN_ON(!src_reg)) {
  2753. print_verifier_state(env, state);
  2754. verbose(env, "verifier internal error: no src_reg\n");
  2755. return -EINVAL;
  2756. }
  2757. return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
  2758. }
  2759. /* check validity of 32-bit and 64-bit arithmetic operations */
  2760. static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
  2761. {
  2762. struct bpf_reg_state *regs = cur_regs(env);
  2763. u8 opcode = BPF_OP(insn->code);
  2764. int err;
  2765. if (opcode == BPF_END || opcode == BPF_NEG) {
  2766. if (opcode == BPF_NEG) {
  2767. if (BPF_SRC(insn->code) != 0 ||
  2768. insn->src_reg != BPF_REG_0 ||
  2769. insn->off != 0 || insn->imm != 0) {
  2770. verbose(env, "BPF_NEG uses reserved fields\n");
  2771. return -EINVAL;
  2772. }
  2773. } else {
  2774. if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
  2775. (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
  2776. BPF_CLASS(insn->code) == BPF_ALU64) {
  2777. verbose(env, "BPF_END uses reserved fields\n");
  2778. return -EINVAL;
  2779. }
  2780. }
  2781. /* check src operand */
  2782. err = check_reg_arg(env, insn->dst_reg, SRC_OP);
  2783. if (err)
  2784. return err;
  2785. if (is_pointer_value(env, insn->dst_reg)) {
  2786. verbose(env, "R%d pointer arithmetic prohibited\n",
  2787. insn->dst_reg);
  2788. return -EACCES;
  2789. }
  2790. /* check dest operand */
  2791. err = check_reg_arg(env, insn->dst_reg, DST_OP);
  2792. if (err)
  2793. return err;
  2794. } else if (opcode == BPF_MOV) {
  2795. if (BPF_SRC(insn->code) == BPF_X) {
  2796. if (insn->imm != 0 || insn->off != 0) {
  2797. verbose(env, "BPF_MOV uses reserved fields\n");
  2798. return -EINVAL;
  2799. }
  2800. /* check src operand */
  2801. err = check_reg_arg(env, insn->src_reg, SRC_OP);
  2802. if (err)
  2803. return err;
  2804. } else {
  2805. if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
  2806. verbose(env, "BPF_MOV uses reserved fields\n");
  2807. return -EINVAL;
  2808. }
  2809. }
  2810. /* check dest operand */
  2811. err = check_reg_arg(env, insn->dst_reg, DST_OP);
  2812. if (err)
  2813. return err;
  2814. if (BPF_SRC(insn->code) == BPF_X) {
  2815. if (BPF_CLASS(insn->code) == BPF_ALU64) {
  2816. /* case: R1 = R2
  2817. * copy register state to dest reg
  2818. */
  2819. regs[insn->dst_reg] = regs[insn->src_reg];
  2820. regs[insn->dst_reg].live |= REG_LIVE_WRITTEN;
  2821. } else {
  2822. /* R1 = (u32) R2 */
  2823. if (is_pointer_value(env, insn->src_reg)) {
  2824. verbose(env,
  2825. "R%d partial copy of pointer\n",
  2826. insn->src_reg);
  2827. return -EACCES;
  2828. }
  2829. mark_reg_unknown(env, regs, insn->dst_reg);
  2830. coerce_reg_to_size(&regs[insn->dst_reg], 4);
  2831. }
  2832. } else {
  2833. /* case: R = imm
  2834. * remember the value we stored into this reg
  2835. */
  2836. regs[insn->dst_reg].type = SCALAR_VALUE;
  2837. if (BPF_CLASS(insn->code) == BPF_ALU64) {
  2838. __mark_reg_known(regs + insn->dst_reg,
  2839. insn->imm);
  2840. } else {
  2841. __mark_reg_known(regs + insn->dst_reg,
  2842. (u32)insn->imm);
  2843. }
  2844. }
  2845. } else if (opcode > BPF_END) {
  2846. verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
  2847. return -EINVAL;
  2848. } else { /* all other ALU ops: and, sub, xor, add, ... */
  2849. if (BPF_SRC(insn->code) == BPF_X) {
  2850. if (insn->imm != 0 || insn->off != 0) {
  2851. verbose(env, "BPF_ALU uses reserved fields\n");
  2852. return -EINVAL;
  2853. }
  2854. /* check src1 operand */
  2855. err = check_reg_arg(env, insn->src_reg, SRC_OP);
  2856. if (err)
  2857. return err;
  2858. } else {
  2859. if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
  2860. verbose(env, "BPF_ALU uses reserved fields\n");
  2861. return -EINVAL;
  2862. }
  2863. }
  2864. /* check src2 operand */
  2865. err = check_reg_arg(env, insn->dst_reg, SRC_OP);
  2866. if (err)
  2867. return err;
  2868. if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
  2869. BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
  2870. verbose(env, "div by zero\n");
  2871. return -EINVAL;
  2872. }
  2873. if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) {
  2874. verbose(env, "BPF_ARSH not supported for 32 bit ALU\n");
  2875. return -EINVAL;
  2876. }
  2877. if ((opcode == BPF_LSH || opcode == BPF_RSH ||
  2878. opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
  2879. int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
  2880. if (insn->imm < 0 || insn->imm >= size) {
  2881. verbose(env, "invalid shift %d\n", insn->imm);
  2882. return -EINVAL;
  2883. }
  2884. }
  2885. /* check dest operand */
  2886. err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
  2887. if (err)
  2888. return err;
  2889. return adjust_reg_min_max_vals(env, insn);
  2890. }
  2891. return 0;
  2892. }
  2893. static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
  2894. struct bpf_reg_state *dst_reg,
  2895. enum bpf_reg_type type,
  2896. bool range_right_open)
  2897. {
  2898. struct bpf_func_state *state = vstate->frame[vstate->curframe];
  2899. struct bpf_reg_state *regs = state->regs, *reg;
  2900. u16 new_range;
  2901. int i, j;
  2902. if (dst_reg->off < 0 ||
  2903. (dst_reg->off == 0 && range_right_open))
  2904. /* This doesn't give us any range */
  2905. return;
  2906. if (dst_reg->umax_value > MAX_PACKET_OFF ||
  2907. dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
  2908. /* Risk of overflow. For instance, ptr + (1<<63) may be less
  2909. * than pkt_end, but that's because it's also less than pkt.
  2910. */
  2911. return;
  2912. new_range = dst_reg->off;
  2913. if (range_right_open)
  2914. new_range--;
  2915. /* Examples for register markings:
  2916. *
  2917. * pkt_data in dst register:
  2918. *
  2919. * r2 = r3;
  2920. * r2 += 8;
  2921. * if (r2 > pkt_end) goto <handle exception>
  2922. * <access okay>
  2923. *
  2924. * r2 = r3;
  2925. * r2 += 8;
  2926. * if (r2 < pkt_end) goto <access okay>
  2927. * <handle exception>
  2928. *
  2929. * Where:
  2930. * r2 == dst_reg, pkt_end == src_reg
  2931. * r2=pkt(id=n,off=8,r=0)
  2932. * r3=pkt(id=n,off=0,r=0)
  2933. *
  2934. * pkt_data in src register:
  2935. *
  2936. * r2 = r3;
  2937. * r2 += 8;
  2938. * if (pkt_end >= r2) goto <access okay>
  2939. * <handle exception>
  2940. *
  2941. * r2 = r3;
  2942. * r2 += 8;
  2943. * if (pkt_end <= r2) goto <handle exception>
  2944. * <access okay>
  2945. *
  2946. * Where:
  2947. * pkt_end == dst_reg, r2 == src_reg
  2948. * r2=pkt(id=n,off=8,r=0)
  2949. * r3=pkt(id=n,off=0,r=0)
  2950. *
  2951. * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
  2952. * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
  2953. * and [r3, r3 + 8-1) respectively is safe to access depending on
  2954. * the check.
  2955. */
  2956. /* If our ids match, then we must have the same max_value. And we
  2957. * don't care about the other reg's fixed offset, since if it's too big
  2958. * the range won't allow anything.
  2959. * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
  2960. */
  2961. for (i = 0; i < MAX_BPF_REG; i++)
  2962. if (regs[i].type == type && regs[i].id == dst_reg->id)
  2963. /* keep the maximum range already checked */
  2964. regs[i].range = max(regs[i].range, new_range);
  2965. for (j = 0; j <= vstate->curframe; j++) {
  2966. state = vstate->frame[j];
  2967. for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
  2968. if (state->stack[i].slot_type[0] != STACK_SPILL)
  2969. continue;
  2970. reg = &state->stack[i].spilled_ptr;
  2971. if (reg->type == type && reg->id == dst_reg->id)
  2972. reg->range = max(reg->range, new_range);
  2973. }
  2974. }
  2975. }
  2976. /* Adjusts the register min/max values in the case that the dst_reg is the
  2977. * variable register that we are working on, and src_reg is a constant or we're
  2978. * simply doing a BPF_K check.
  2979. * In JEQ/JNE cases we also adjust the var_off values.
  2980. */
  2981. static void reg_set_min_max(struct bpf_reg_state *true_reg,
  2982. struct bpf_reg_state *false_reg, u64 val,
  2983. u8 opcode)
  2984. {
  2985. /* If the dst_reg is a pointer, we can't learn anything about its
  2986. * variable offset from the compare (unless src_reg were a pointer into
  2987. * the same object, but we don't bother with that.
  2988. * Since false_reg and true_reg have the same type by construction, we
  2989. * only need to check one of them for pointerness.
  2990. */
  2991. if (__is_pointer_value(false, false_reg))
  2992. return;
  2993. switch (opcode) {
  2994. case BPF_JEQ:
  2995. /* If this is false then we know nothing Jon Snow, but if it is
  2996. * true then we know for sure.
  2997. */
  2998. __mark_reg_known(true_reg, val);
  2999. break;
  3000. case BPF_JNE:
  3001. /* If this is true we know nothing Jon Snow, but if it is false
  3002. * we know the value for sure;
  3003. */
  3004. __mark_reg_known(false_reg, val);
  3005. break;
  3006. case BPF_JGT:
  3007. false_reg->umax_value = min(false_reg->umax_value, val);
  3008. true_reg->umin_value = max(true_reg->umin_value, val + 1);
  3009. break;
  3010. case BPF_JSGT:
  3011. false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
  3012. true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
  3013. break;
  3014. case BPF_JLT:
  3015. false_reg->umin_value = max(false_reg->umin_value, val);
  3016. true_reg->umax_value = min(true_reg->umax_value, val - 1);
  3017. break;
  3018. case BPF_JSLT:
  3019. false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
  3020. true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
  3021. break;
  3022. case BPF_JGE:
  3023. false_reg->umax_value = min(false_reg->umax_value, val - 1);
  3024. true_reg->umin_value = max(true_reg->umin_value, val);
  3025. break;
  3026. case BPF_JSGE:
  3027. false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
  3028. true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
  3029. break;
  3030. case BPF_JLE:
  3031. false_reg->umin_value = max(false_reg->umin_value, val + 1);
  3032. true_reg->umax_value = min(true_reg->umax_value, val);
  3033. break;
  3034. case BPF_JSLE:
  3035. false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
  3036. true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
  3037. break;
  3038. default:
  3039. break;
  3040. }
  3041. __reg_deduce_bounds(false_reg);
  3042. __reg_deduce_bounds(true_reg);
  3043. /* We might have learned some bits from the bounds. */
  3044. __reg_bound_offset(false_reg);
  3045. __reg_bound_offset(true_reg);
  3046. /* Intersecting with the old var_off might have improved our bounds
  3047. * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
  3048. * then new var_off is (0; 0x7f...fc) which improves our umax.
  3049. */
  3050. __update_reg_bounds(false_reg);
  3051. __update_reg_bounds(true_reg);
  3052. }
  3053. /* Same as above, but for the case that dst_reg holds a constant and src_reg is
  3054. * the variable reg.
  3055. */
  3056. static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
  3057. struct bpf_reg_state *false_reg, u64 val,
  3058. u8 opcode)
  3059. {
  3060. if (__is_pointer_value(false, false_reg))
  3061. return;
  3062. switch (opcode) {
  3063. case BPF_JEQ:
  3064. /* If this is false then we know nothing Jon Snow, but if it is
  3065. * true then we know for sure.
  3066. */
  3067. __mark_reg_known(true_reg, val);
  3068. break;
  3069. case BPF_JNE:
  3070. /* If this is true we know nothing Jon Snow, but if it is false
  3071. * we know the value for sure;
  3072. */
  3073. __mark_reg_known(false_reg, val);
  3074. break;
  3075. case BPF_JGT:
  3076. true_reg->umax_value = min(true_reg->umax_value, val - 1);
  3077. false_reg->umin_value = max(false_reg->umin_value, val);
  3078. break;
  3079. case BPF_JSGT:
  3080. true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
  3081. false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
  3082. break;
  3083. case BPF_JLT:
  3084. true_reg->umin_value = max(true_reg->umin_value, val + 1);
  3085. false_reg->umax_value = min(false_reg->umax_value, val);
  3086. break;
  3087. case BPF_JSLT:
  3088. true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
  3089. false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
  3090. break;
  3091. case BPF_JGE:
  3092. true_reg->umax_value = min(true_reg->umax_value, val);
  3093. false_reg->umin_value = max(false_reg->umin_value, val + 1);
  3094. break;
  3095. case BPF_JSGE:
  3096. true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
  3097. false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
  3098. break;
  3099. case BPF_JLE:
  3100. true_reg->umin_value = max(true_reg->umin_value, val);
  3101. false_reg->umax_value = min(false_reg->umax_value, val - 1);
  3102. break;
  3103. case BPF_JSLE:
  3104. true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
  3105. false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
  3106. break;
  3107. default:
  3108. break;
  3109. }
  3110. __reg_deduce_bounds(false_reg);
  3111. __reg_deduce_bounds(true_reg);
  3112. /* We might have learned some bits from the bounds. */
  3113. __reg_bound_offset(false_reg);
  3114. __reg_bound_offset(true_reg);
  3115. /* Intersecting with the old var_off might have improved our bounds
  3116. * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
  3117. * then new var_off is (0; 0x7f...fc) which improves our umax.
  3118. */
  3119. __update_reg_bounds(false_reg);
  3120. __update_reg_bounds(true_reg);
  3121. }
  3122. /* Regs are known to be equal, so intersect their min/max/var_off */
  3123. static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
  3124. struct bpf_reg_state *dst_reg)
  3125. {
  3126. src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
  3127. dst_reg->umin_value);
  3128. src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
  3129. dst_reg->umax_value);
  3130. src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
  3131. dst_reg->smin_value);
  3132. src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
  3133. dst_reg->smax_value);
  3134. src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
  3135. dst_reg->var_off);
  3136. /* We might have learned new bounds from the var_off. */
  3137. __update_reg_bounds(src_reg);
  3138. __update_reg_bounds(dst_reg);
  3139. /* We might have learned something about the sign bit. */
  3140. __reg_deduce_bounds(src_reg);
  3141. __reg_deduce_bounds(dst_reg);
  3142. /* We might have learned some bits from the bounds. */
  3143. __reg_bound_offset(src_reg);
  3144. __reg_bound_offset(dst_reg);
  3145. /* Intersecting with the old var_off might have improved our bounds
  3146. * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
  3147. * then new var_off is (0; 0x7f...fc) which improves our umax.
  3148. */
  3149. __update_reg_bounds(src_reg);
  3150. __update_reg_bounds(dst_reg);
  3151. }
  3152. static void reg_combine_min_max(struct bpf_reg_state *true_src,
  3153. struct bpf_reg_state *true_dst,
  3154. struct bpf_reg_state *false_src,
  3155. struct bpf_reg_state *false_dst,
  3156. u8 opcode)
  3157. {
  3158. switch (opcode) {
  3159. case BPF_JEQ:
  3160. __reg_combine_min_max(true_src, true_dst);
  3161. break;
  3162. case BPF_JNE:
  3163. __reg_combine_min_max(false_src, false_dst);
  3164. break;
  3165. }
  3166. }
  3167. static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
  3168. bool is_null)
  3169. {
  3170. struct bpf_reg_state *reg = &regs[regno];
  3171. if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
  3172. /* Old offset (both fixed and variable parts) should
  3173. * have been known-zero, because we don't allow pointer
  3174. * arithmetic on pointers that might be NULL.
  3175. */
  3176. if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
  3177. !tnum_equals_const(reg->var_off, 0) ||
  3178. reg->off)) {
  3179. __mark_reg_known_zero(reg);
  3180. reg->off = 0;
  3181. }
  3182. if (is_null) {
  3183. reg->type = SCALAR_VALUE;
  3184. } else if (reg->map_ptr->inner_map_meta) {
  3185. reg->type = CONST_PTR_TO_MAP;
  3186. reg->map_ptr = reg->map_ptr->inner_map_meta;
  3187. } else {
  3188. reg->type = PTR_TO_MAP_VALUE;
  3189. }
  3190. /* We don't need id from this point onwards anymore, thus we
  3191. * should better reset it, so that state pruning has chances
  3192. * to take effect.
  3193. */
  3194. reg->id = 0;
  3195. }
  3196. }
  3197. /* The logic is similar to find_good_pkt_pointers(), both could eventually
  3198. * be folded together at some point.
  3199. */
  3200. static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
  3201. bool is_null)
  3202. {
  3203. struct bpf_func_state *state = vstate->frame[vstate->curframe];
  3204. struct bpf_reg_state *regs = state->regs;
  3205. u32 id = regs[regno].id;
  3206. int i, j;
  3207. for (i = 0; i < MAX_BPF_REG; i++)
  3208. mark_map_reg(regs, i, id, is_null);
  3209. for (j = 0; j <= vstate->curframe; j++) {
  3210. state = vstate->frame[j];
  3211. for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
  3212. if (state->stack[i].slot_type[0] != STACK_SPILL)
  3213. continue;
  3214. mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
  3215. }
  3216. }
  3217. }
  3218. static bool try_match_pkt_pointers(const struct bpf_insn *insn,
  3219. struct bpf_reg_state *dst_reg,
  3220. struct bpf_reg_state *src_reg,
  3221. struct bpf_verifier_state *this_branch,
  3222. struct bpf_verifier_state *other_branch)
  3223. {
  3224. if (BPF_SRC(insn->code) != BPF_X)
  3225. return false;
  3226. switch (BPF_OP(insn->code)) {
  3227. case BPF_JGT:
  3228. if ((dst_reg->type == PTR_TO_PACKET &&
  3229. src_reg->type == PTR_TO_PACKET_END) ||
  3230. (dst_reg->type == PTR_TO_PACKET_META &&
  3231. reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
  3232. /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
  3233. find_good_pkt_pointers(this_branch, dst_reg,
  3234. dst_reg->type, false);
  3235. } else if ((dst_reg->type == PTR_TO_PACKET_END &&
  3236. src_reg->type == PTR_TO_PACKET) ||
  3237. (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
  3238. src_reg->type == PTR_TO_PACKET_META)) {
  3239. /* pkt_end > pkt_data', pkt_data > pkt_meta' */
  3240. find_good_pkt_pointers(other_branch, src_reg,
  3241. src_reg->type, true);
  3242. } else {
  3243. return false;
  3244. }
  3245. break;
  3246. case BPF_JLT:
  3247. if ((dst_reg->type == PTR_TO_PACKET &&
  3248. src_reg->type == PTR_TO_PACKET_END) ||
  3249. (dst_reg->type == PTR_TO_PACKET_META &&
  3250. reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
  3251. /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
  3252. find_good_pkt_pointers(other_branch, dst_reg,
  3253. dst_reg->type, true);
  3254. } else if ((dst_reg->type == PTR_TO_PACKET_END &&
  3255. src_reg->type == PTR_TO_PACKET) ||
  3256. (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
  3257. src_reg->type == PTR_TO_PACKET_META)) {
  3258. /* pkt_end < pkt_data', pkt_data > pkt_meta' */
  3259. find_good_pkt_pointers(this_branch, src_reg,
  3260. src_reg->type, false);
  3261. } else {
  3262. return false;
  3263. }
  3264. break;
  3265. case BPF_JGE:
  3266. if ((dst_reg->type == PTR_TO_PACKET &&
  3267. src_reg->type == PTR_TO_PACKET_END) ||
  3268. (dst_reg->type == PTR_TO_PACKET_META &&
  3269. reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
  3270. /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
  3271. find_good_pkt_pointers(this_branch, dst_reg,
  3272. dst_reg->type, true);
  3273. } else if ((dst_reg->type == PTR_TO_PACKET_END &&
  3274. src_reg->type == PTR_TO_PACKET) ||
  3275. (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
  3276. src_reg->type == PTR_TO_PACKET_META)) {
  3277. /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
  3278. find_good_pkt_pointers(other_branch, src_reg,
  3279. src_reg->type, false);
  3280. } else {
  3281. return false;
  3282. }
  3283. break;
  3284. case BPF_JLE:
  3285. if ((dst_reg->type == PTR_TO_PACKET &&
  3286. src_reg->type == PTR_TO_PACKET_END) ||
  3287. (dst_reg->type == PTR_TO_PACKET_META &&
  3288. reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
  3289. /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
  3290. find_good_pkt_pointers(other_branch, dst_reg,
  3291. dst_reg->type, false);
  3292. } else if ((dst_reg->type == PTR_TO_PACKET_END &&
  3293. src_reg->type == PTR_TO_PACKET) ||
  3294. (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
  3295. src_reg->type == PTR_TO_PACKET_META)) {
  3296. /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
  3297. find_good_pkt_pointers(this_branch, src_reg,
  3298. src_reg->type, true);
  3299. } else {
  3300. return false;
  3301. }
  3302. break;
  3303. default:
  3304. return false;
  3305. }
  3306. return true;
  3307. }
  3308. static int check_cond_jmp_op(struct bpf_verifier_env *env,
  3309. struct bpf_insn *insn, int *insn_idx)
  3310. {
  3311. struct bpf_verifier_state *this_branch = env->cur_state;
  3312. struct bpf_verifier_state *other_branch;
  3313. struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
  3314. struct bpf_reg_state *dst_reg, *other_branch_regs;
  3315. u8 opcode = BPF_OP(insn->code);
  3316. int err;
  3317. if (opcode > BPF_JSLE) {
  3318. verbose(env, "invalid BPF_JMP opcode %x\n", opcode);
  3319. return -EINVAL;
  3320. }
  3321. if (BPF_SRC(insn->code) == BPF_X) {
  3322. if (insn->imm != 0) {
  3323. verbose(env, "BPF_JMP uses reserved fields\n");
  3324. return -EINVAL;
  3325. }
  3326. /* check src1 operand */
  3327. err = check_reg_arg(env, insn->src_reg, SRC_OP);
  3328. if (err)
  3329. return err;
  3330. if (is_pointer_value(env, insn->src_reg)) {
  3331. verbose(env, "R%d pointer comparison prohibited\n",
  3332. insn->src_reg);
  3333. return -EACCES;
  3334. }
  3335. } else {
  3336. if (insn->src_reg != BPF_REG_0) {
  3337. verbose(env, "BPF_JMP uses reserved fields\n");
  3338. return -EINVAL;
  3339. }
  3340. }
  3341. /* check src2 operand */
  3342. err = check_reg_arg(env, insn->dst_reg, SRC_OP);
  3343. if (err)
  3344. return err;
  3345. dst_reg = &regs[insn->dst_reg];
  3346. /* detect if R == 0 where R was initialized to zero earlier */
  3347. if (BPF_SRC(insn->code) == BPF_K &&
  3348. (opcode == BPF_JEQ || opcode == BPF_JNE) &&
  3349. dst_reg->type == SCALAR_VALUE &&
  3350. tnum_is_const(dst_reg->var_off)) {
  3351. if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) ||
  3352. (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) {
  3353. /* if (imm == imm) goto pc+off;
  3354. * only follow the goto, ignore fall-through
  3355. */
  3356. *insn_idx += insn->off;
  3357. return 0;
  3358. } else {
  3359. /* if (imm != imm) goto pc+off;
  3360. * only follow fall-through branch, since
  3361. * that's where the program will go
  3362. */
  3363. return 0;
  3364. }
  3365. }
  3366. other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
  3367. if (!other_branch)
  3368. return -EFAULT;
  3369. other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
  3370. /* detect if we are comparing against a constant value so we can adjust
  3371. * our min/max values for our dst register.
  3372. * this is only legit if both are scalars (or pointers to the same
  3373. * object, I suppose, but we don't support that right now), because
  3374. * otherwise the different base pointers mean the offsets aren't
  3375. * comparable.
  3376. */
  3377. if (BPF_SRC(insn->code) == BPF_X) {
  3378. if (dst_reg->type == SCALAR_VALUE &&
  3379. regs[insn->src_reg].type == SCALAR_VALUE) {
  3380. if (tnum_is_const(regs[insn->src_reg].var_off))
  3381. reg_set_min_max(&other_branch_regs[insn->dst_reg],
  3382. dst_reg, regs[insn->src_reg].var_off.value,
  3383. opcode);
  3384. else if (tnum_is_const(dst_reg->var_off))
  3385. reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
  3386. &regs[insn->src_reg],
  3387. dst_reg->var_off.value, opcode);
  3388. else if (opcode == BPF_JEQ || opcode == BPF_JNE)
  3389. /* Comparing for equality, we can combine knowledge */
  3390. reg_combine_min_max(&other_branch_regs[insn->src_reg],
  3391. &other_branch_regs[insn->dst_reg],
  3392. &regs[insn->src_reg],
  3393. &regs[insn->dst_reg], opcode);
  3394. }
  3395. } else if (dst_reg->type == SCALAR_VALUE) {
  3396. reg_set_min_max(&other_branch_regs[insn->dst_reg],
  3397. dst_reg, insn->imm, opcode);
  3398. }
  3399. /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
  3400. if (BPF_SRC(insn->code) == BPF_K &&
  3401. insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
  3402. dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
  3403. /* Mark all identical map registers in each branch as either
  3404. * safe or unknown depending R == 0 or R != 0 conditional.
  3405. */
  3406. mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
  3407. mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
  3408. } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
  3409. this_branch, other_branch) &&
  3410. is_pointer_value(env, insn->dst_reg)) {
  3411. verbose(env, "R%d pointer comparison prohibited\n",
  3412. insn->dst_reg);
  3413. return -EACCES;
  3414. }
  3415. if (env->log.level)
  3416. print_verifier_state(env, this_branch->frame[this_branch->curframe]);
  3417. return 0;
  3418. }
  3419. /* return the map pointer stored inside BPF_LD_IMM64 instruction */
  3420. static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
  3421. {
  3422. u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
  3423. return (struct bpf_map *) (unsigned long) imm64;
  3424. }
  3425. /* verify BPF_LD_IMM64 instruction */
  3426. static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
  3427. {
  3428. struct bpf_reg_state *regs = cur_regs(env);
  3429. int err;
  3430. if (BPF_SIZE(insn->code) != BPF_DW) {
  3431. verbose(env, "invalid BPF_LD_IMM insn\n");
  3432. return -EINVAL;
  3433. }
  3434. if (insn->off != 0) {
  3435. verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
  3436. return -EINVAL;
  3437. }
  3438. err = check_reg_arg(env, insn->dst_reg, DST_OP);
  3439. if (err)
  3440. return err;
  3441. if (insn->src_reg == 0) {
  3442. u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
  3443. regs[insn->dst_reg].type = SCALAR_VALUE;
  3444. __mark_reg_known(&regs[insn->dst_reg], imm);
  3445. return 0;
  3446. }
  3447. /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
  3448. BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
  3449. regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
  3450. regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
  3451. return 0;
  3452. }
  3453. static bool may_access_skb(enum bpf_prog_type type)
  3454. {
  3455. switch (type) {
  3456. case BPF_PROG_TYPE_SOCKET_FILTER:
  3457. case BPF_PROG_TYPE_SCHED_CLS:
  3458. case BPF_PROG_TYPE_SCHED_ACT:
  3459. return true;
  3460. default:
  3461. return false;
  3462. }
  3463. }
  3464. /* verify safety of LD_ABS|LD_IND instructions:
  3465. * - they can only appear in the programs where ctx == skb
  3466. * - since they are wrappers of function calls, they scratch R1-R5 registers,
  3467. * preserve R6-R9, and store return value into R0
  3468. *
  3469. * Implicit input:
  3470. * ctx == skb == R6 == CTX
  3471. *
  3472. * Explicit input:
  3473. * SRC == any register
  3474. * IMM == 32-bit immediate
  3475. *
  3476. * Output:
  3477. * R0 - 8/16/32-bit skb data converted to cpu endianness
  3478. */
  3479. static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
  3480. {
  3481. struct bpf_reg_state *regs = cur_regs(env);
  3482. u8 mode = BPF_MODE(insn->code);
  3483. int i, err;
  3484. if (!may_access_skb(env->prog->type)) {
  3485. verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
  3486. return -EINVAL;
  3487. }
  3488. if (env->subprog_cnt) {
  3489. /* when program has LD_ABS insn JITs and interpreter assume
  3490. * that r1 == ctx == skb which is not the case for callees
  3491. * that can have arbitrary arguments. It's problematic
  3492. * for main prog as well since JITs would need to analyze
  3493. * all functions in order to make proper register save/restore
  3494. * decisions in the main prog. Hence disallow LD_ABS with calls
  3495. */
  3496. verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
  3497. return -EINVAL;
  3498. }
  3499. if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
  3500. BPF_SIZE(insn->code) == BPF_DW ||
  3501. (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
  3502. verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
  3503. return -EINVAL;
  3504. }
  3505. /* check whether implicit source operand (register R6) is readable */
  3506. err = check_reg_arg(env, BPF_REG_6, SRC_OP);
  3507. if (err)
  3508. return err;
  3509. if (regs[BPF_REG_6].type != PTR_TO_CTX) {
  3510. verbose(env,
  3511. "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
  3512. return -EINVAL;
  3513. }
  3514. if (mode == BPF_IND) {
  3515. /* check explicit source operand */
  3516. err = check_reg_arg(env, insn->src_reg, SRC_OP);
  3517. if (err)
  3518. return err;
  3519. }
  3520. /* reset caller saved regs to unreadable */
  3521. for (i = 0; i < CALLER_SAVED_REGS; i++) {
  3522. mark_reg_not_init(env, regs, caller_saved[i]);
  3523. check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
  3524. }
  3525. /* mark destination R0 register as readable, since it contains
  3526. * the value fetched from the packet.
  3527. * Already marked as written above.
  3528. */
  3529. mark_reg_unknown(env, regs, BPF_REG_0);
  3530. return 0;
  3531. }
  3532. static int check_return_code(struct bpf_verifier_env *env)
  3533. {
  3534. struct bpf_reg_state *reg;
  3535. struct tnum range = tnum_range(0, 1);
  3536. switch (env->prog->type) {
  3537. case BPF_PROG_TYPE_CGROUP_SKB:
  3538. case BPF_PROG_TYPE_CGROUP_SOCK:
  3539. case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
  3540. case BPF_PROG_TYPE_SOCK_OPS:
  3541. case BPF_PROG_TYPE_CGROUP_DEVICE:
  3542. break;
  3543. default:
  3544. return 0;
  3545. }
  3546. reg = cur_regs(env) + BPF_REG_0;
  3547. if (reg->type != SCALAR_VALUE) {
  3548. verbose(env, "At program exit the register R0 is not a known value (%s)\n",
  3549. reg_type_str[reg->type]);
  3550. return -EINVAL;
  3551. }
  3552. if (!tnum_in(range, reg->var_off)) {
  3553. verbose(env, "At program exit the register R0 ");
  3554. if (!tnum_is_unknown(reg->var_off)) {
  3555. char tn_buf[48];
  3556. tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
  3557. verbose(env, "has value %s", tn_buf);
  3558. } else {
  3559. verbose(env, "has unknown scalar value");
  3560. }
  3561. verbose(env, " should have been 0 or 1\n");
  3562. return -EINVAL;
  3563. }
  3564. return 0;
  3565. }
  3566. /* non-recursive DFS pseudo code
  3567. * 1 procedure DFS-iterative(G,v):
  3568. * 2 label v as discovered
  3569. * 3 let S be a stack
  3570. * 4 S.push(v)
  3571. * 5 while S is not empty
  3572. * 6 t <- S.pop()
  3573. * 7 if t is what we're looking for:
  3574. * 8 return t
  3575. * 9 for all edges e in G.adjacentEdges(t) do
  3576. * 10 if edge e is already labelled
  3577. * 11 continue with the next edge
  3578. * 12 w <- G.adjacentVertex(t,e)
  3579. * 13 if vertex w is not discovered and not explored
  3580. * 14 label e as tree-edge
  3581. * 15 label w as discovered
  3582. * 16 S.push(w)
  3583. * 17 continue at 5
  3584. * 18 else if vertex w is discovered
  3585. * 19 label e as back-edge
  3586. * 20 else
  3587. * 21 // vertex w is explored
  3588. * 22 label e as forward- or cross-edge
  3589. * 23 label t as explored
  3590. * 24 S.pop()
  3591. *
  3592. * convention:
  3593. * 0x10 - discovered
  3594. * 0x11 - discovered and fall-through edge labelled
  3595. * 0x12 - discovered and fall-through and branch edges labelled
  3596. * 0x20 - explored
  3597. */
  3598. enum {
  3599. DISCOVERED = 0x10,
  3600. EXPLORED = 0x20,
  3601. FALLTHROUGH = 1,
  3602. BRANCH = 2,
  3603. };
  3604. #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
  3605. static int *insn_stack; /* stack of insns to process */
  3606. static int cur_stack; /* current stack index */
  3607. static int *insn_state;
  3608. /* t, w, e - match pseudo-code above:
  3609. * t - index of current instruction
  3610. * w - next instruction
  3611. * e - edge
  3612. */
  3613. static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
  3614. {
  3615. if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
  3616. return 0;
  3617. if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
  3618. return 0;
  3619. if (w < 0 || w >= env->prog->len) {
  3620. verbose(env, "jump out of range from insn %d to %d\n", t, w);
  3621. return -EINVAL;
  3622. }
  3623. if (e == BRANCH)
  3624. /* mark branch target for state pruning */
  3625. env->explored_states[w] = STATE_LIST_MARK;
  3626. if (insn_state[w] == 0) {
  3627. /* tree-edge */
  3628. insn_state[t] = DISCOVERED | e;
  3629. insn_state[w] = DISCOVERED;
  3630. if (cur_stack >= env->prog->len)
  3631. return -E2BIG;
  3632. insn_stack[cur_stack++] = w;
  3633. return 1;
  3634. } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
  3635. verbose(env, "back-edge from insn %d to %d\n", t, w);
  3636. return -EINVAL;
  3637. } else if (insn_state[w] == EXPLORED) {
  3638. /* forward- or cross-edge */
  3639. insn_state[t] = DISCOVERED | e;
  3640. } else {
  3641. verbose(env, "insn state internal bug\n");
  3642. return -EFAULT;
  3643. }
  3644. return 0;
  3645. }
  3646. /* non-recursive depth-first-search to detect loops in BPF program
  3647. * loop == back-edge in directed graph
  3648. */
  3649. static int check_cfg(struct bpf_verifier_env *env)
  3650. {
  3651. struct bpf_insn *insns = env->prog->insnsi;
  3652. int insn_cnt = env->prog->len;
  3653. int ret = 0;
  3654. int i, t;
  3655. ret = check_subprogs(env);
  3656. if (ret < 0)
  3657. return ret;
  3658. insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
  3659. if (!insn_state)
  3660. return -ENOMEM;
  3661. insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
  3662. if (!insn_stack) {
  3663. kfree(insn_state);
  3664. return -ENOMEM;
  3665. }
  3666. insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
  3667. insn_stack[0] = 0; /* 0 is the first instruction */
  3668. cur_stack = 1;
  3669. peek_stack:
  3670. if (cur_stack == 0)
  3671. goto check_state;
  3672. t = insn_stack[cur_stack - 1];
  3673. if (BPF_CLASS(insns[t].code) == BPF_JMP) {
  3674. u8 opcode = BPF_OP(insns[t].code);
  3675. if (opcode == BPF_EXIT) {
  3676. goto mark_explored;
  3677. } else if (opcode == BPF_CALL) {
  3678. ret = push_insn(t, t + 1, FALLTHROUGH, env);
  3679. if (ret == 1)
  3680. goto peek_stack;
  3681. else if (ret < 0)
  3682. goto err_free;
  3683. if (t + 1 < insn_cnt)
  3684. env->explored_states[t + 1] = STATE_LIST_MARK;
  3685. if (insns[t].src_reg == BPF_PSEUDO_CALL) {
  3686. env->explored_states[t] = STATE_LIST_MARK;
  3687. ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
  3688. if (ret == 1)
  3689. goto peek_stack;
  3690. else if (ret < 0)
  3691. goto err_free;
  3692. }
  3693. } else if (opcode == BPF_JA) {
  3694. if (BPF_SRC(insns[t].code) != BPF_K) {
  3695. ret = -EINVAL;
  3696. goto err_free;
  3697. }
  3698. /* unconditional jump with single edge */
  3699. ret = push_insn(t, t + insns[t].off + 1,
  3700. FALLTHROUGH, env);
  3701. if (ret == 1)
  3702. goto peek_stack;
  3703. else if (ret < 0)
  3704. goto err_free;
  3705. /* tell verifier to check for equivalent states
  3706. * after every call and jump
  3707. */
  3708. if (t + 1 < insn_cnt)
  3709. env->explored_states[t + 1] = STATE_LIST_MARK;
  3710. } else {
  3711. /* conditional jump with two edges */
  3712. env->explored_states[t] = STATE_LIST_MARK;
  3713. ret = push_insn(t, t + 1, FALLTHROUGH, env);
  3714. if (ret == 1)
  3715. goto peek_stack;
  3716. else if (ret < 0)
  3717. goto err_free;
  3718. ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
  3719. if (ret == 1)
  3720. goto peek_stack;
  3721. else if (ret < 0)
  3722. goto err_free;
  3723. }
  3724. } else {
  3725. /* all other non-branch instructions with single
  3726. * fall-through edge
  3727. */
  3728. ret = push_insn(t, t + 1, FALLTHROUGH, env);
  3729. if (ret == 1)
  3730. goto peek_stack;
  3731. else if (ret < 0)
  3732. goto err_free;
  3733. }
  3734. mark_explored:
  3735. insn_state[t] = EXPLORED;
  3736. if (cur_stack-- <= 0) {
  3737. verbose(env, "pop stack internal bug\n");
  3738. ret = -EFAULT;
  3739. goto err_free;
  3740. }
  3741. goto peek_stack;
  3742. check_state:
  3743. for (i = 0; i < insn_cnt; i++) {
  3744. if (insn_state[i] != EXPLORED) {
  3745. verbose(env, "unreachable insn %d\n", i);
  3746. ret = -EINVAL;
  3747. goto err_free;
  3748. }
  3749. }
  3750. ret = 0; /* cfg looks good */
  3751. err_free:
  3752. kfree(insn_state);
  3753. kfree(insn_stack);
  3754. return ret;
  3755. }
  3756. /* check %cur's range satisfies %old's */
  3757. static bool range_within(struct bpf_reg_state *old,
  3758. struct bpf_reg_state *cur)
  3759. {
  3760. return old->umin_value <= cur->umin_value &&
  3761. old->umax_value >= cur->umax_value &&
  3762. old->smin_value <= cur->smin_value &&
  3763. old->smax_value >= cur->smax_value;
  3764. }
  3765. /* Maximum number of register states that can exist at once */
  3766. #define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
  3767. struct idpair {
  3768. u32 old;
  3769. u32 cur;
  3770. };
  3771. /* If in the old state two registers had the same id, then they need to have
  3772. * the same id in the new state as well. But that id could be different from
  3773. * the old state, so we need to track the mapping from old to new ids.
  3774. * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
  3775. * regs with old id 5 must also have new id 9 for the new state to be safe. But
  3776. * regs with a different old id could still have new id 9, we don't care about
  3777. * that.
  3778. * So we look through our idmap to see if this old id has been seen before. If
  3779. * so, we require the new id to match; otherwise, we add the id pair to the map.
  3780. */
  3781. static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
  3782. {
  3783. unsigned int i;
  3784. for (i = 0; i < ID_MAP_SIZE; i++) {
  3785. if (!idmap[i].old) {
  3786. /* Reached an empty slot; haven't seen this id before */
  3787. idmap[i].old = old_id;
  3788. idmap[i].cur = cur_id;
  3789. return true;
  3790. }
  3791. if (idmap[i].old == old_id)
  3792. return idmap[i].cur == cur_id;
  3793. }
  3794. /* We ran out of idmap slots, which should be impossible */
  3795. WARN_ON_ONCE(1);
  3796. return false;
  3797. }
  3798. /* Returns true if (rold safe implies rcur safe) */
  3799. static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
  3800. struct idpair *idmap)
  3801. {
  3802. bool equal;
  3803. if (!(rold->live & REG_LIVE_READ))
  3804. /* explored state didn't use this */
  3805. return true;
  3806. equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, frameno)) == 0;
  3807. if (rold->type == PTR_TO_STACK)
  3808. /* two stack pointers are equal only if they're pointing to
  3809. * the same stack frame, since fp-8 in foo != fp-8 in bar
  3810. */
  3811. return equal && rold->frameno == rcur->frameno;
  3812. if (equal)
  3813. return true;
  3814. if (rold->type == NOT_INIT)
  3815. /* explored state can't have used this */
  3816. return true;
  3817. if (rcur->type == NOT_INIT)
  3818. return false;
  3819. switch (rold->type) {
  3820. case SCALAR_VALUE:
  3821. if (rcur->type == SCALAR_VALUE) {
  3822. /* new val must satisfy old val knowledge */
  3823. return range_within(rold, rcur) &&
  3824. tnum_in(rold->var_off, rcur->var_off);
  3825. } else {
  3826. /* We're trying to use a pointer in place of a scalar.
  3827. * Even if the scalar was unbounded, this could lead to
  3828. * pointer leaks because scalars are allowed to leak
  3829. * while pointers are not. We could make this safe in
  3830. * special cases if root is calling us, but it's
  3831. * probably not worth the hassle.
  3832. */
  3833. return false;
  3834. }
  3835. case PTR_TO_MAP_VALUE:
  3836. /* If the new min/max/var_off satisfy the old ones and
  3837. * everything else matches, we are OK.
  3838. * We don't care about the 'id' value, because nothing
  3839. * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
  3840. */
  3841. return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
  3842. range_within(rold, rcur) &&
  3843. tnum_in(rold->var_off, rcur->var_off);
  3844. case PTR_TO_MAP_VALUE_OR_NULL:
  3845. /* a PTR_TO_MAP_VALUE could be safe to use as a
  3846. * PTR_TO_MAP_VALUE_OR_NULL into the same map.
  3847. * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
  3848. * checked, doing so could have affected others with the same
  3849. * id, and we can't check for that because we lost the id when
  3850. * we converted to a PTR_TO_MAP_VALUE.
  3851. */
  3852. if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
  3853. return false;
  3854. if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
  3855. return false;
  3856. /* Check our ids match any regs they're supposed to */
  3857. return check_ids(rold->id, rcur->id, idmap);
  3858. case PTR_TO_PACKET_META:
  3859. case PTR_TO_PACKET:
  3860. if (rcur->type != rold->type)
  3861. return false;
  3862. /* We must have at least as much range as the old ptr
  3863. * did, so that any accesses which were safe before are
  3864. * still safe. This is true even if old range < old off,
  3865. * since someone could have accessed through (ptr - k), or
  3866. * even done ptr -= k in a register, to get a safe access.
  3867. */
  3868. if (rold->range > rcur->range)
  3869. return false;
  3870. /* If the offsets don't match, we can't trust our alignment;
  3871. * nor can we be sure that we won't fall out of range.
  3872. */
  3873. if (rold->off != rcur->off)
  3874. return false;
  3875. /* id relations must be preserved */
  3876. if (rold->id && !check_ids(rold->id, rcur->id, idmap))
  3877. return false;
  3878. /* new val must satisfy old val knowledge */
  3879. return range_within(rold, rcur) &&
  3880. tnum_in(rold->var_off, rcur->var_off);
  3881. case PTR_TO_CTX:
  3882. case CONST_PTR_TO_MAP:
  3883. case PTR_TO_PACKET_END:
  3884. /* Only valid matches are exact, which memcmp() above
  3885. * would have accepted
  3886. */
  3887. default:
  3888. /* Don't know what's going on, just say it's not safe */
  3889. return false;
  3890. }
  3891. /* Shouldn't get here; if we do, say it's not safe */
  3892. WARN_ON_ONCE(1);
  3893. return false;
  3894. }
  3895. static bool stacksafe(struct bpf_func_state *old,
  3896. struct bpf_func_state *cur,
  3897. struct idpair *idmap)
  3898. {
  3899. int i, spi;
  3900. /* if explored stack has more populated slots than current stack
  3901. * such stacks are not equivalent
  3902. */
  3903. if (old->allocated_stack > cur->allocated_stack)
  3904. return false;
  3905. /* walk slots of the explored stack and ignore any additional
  3906. * slots in the current stack, since explored(safe) state
  3907. * didn't use them
  3908. */
  3909. for (i = 0; i < old->allocated_stack; i++) {
  3910. spi = i / BPF_REG_SIZE;
  3911. if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
  3912. /* explored state didn't use this */
  3913. continue;
  3914. if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
  3915. continue;
  3916. /* if old state was safe with misc data in the stack
  3917. * it will be safe with zero-initialized stack.
  3918. * The opposite is not true
  3919. */
  3920. if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
  3921. cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
  3922. continue;
  3923. if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
  3924. cur->stack[spi].slot_type[i % BPF_REG_SIZE])
  3925. /* Ex: old explored (safe) state has STACK_SPILL in
  3926. * this stack slot, but current has has STACK_MISC ->
  3927. * this verifier states are not equivalent,
  3928. * return false to continue verification of this path
  3929. */
  3930. return false;
  3931. if (i % BPF_REG_SIZE)
  3932. continue;
  3933. if (old->stack[spi].slot_type[0] != STACK_SPILL)
  3934. continue;
  3935. if (!regsafe(&old->stack[spi].spilled_ptr,
  3936. &cur->stack[spi].spilled_ptr,
  3937. idmap))
  3938. /* when explored and current stack slot are both storing
  3939. * spilled registers, check that stored pointers types
  3940. * are the same as well.
  3941. * Ex: explored safe path could have stored
  3942. * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
  3943. * but current path has stored:
  3944. * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
  3945. * such verifier states are not equivalent.
  3946. * return false to continue verification of this path
  3947. */
  3948. return false;
  3949. }
  3950. return true;
  3951. }
  3952. /* compare two verifier states
  3953. *
  3954. * all states stored in state_list are known to be valid, since
  3955. * verifier reached 'bpf_exit' instruction through them
  3956. *
  3957. * this function is called when verifier exploring different branches of
  3958. * execution popped from the state stack. If it sees an old state that has
  3959. * more strict register state and more strict stack state then this execution
  3960. * branch doesn't need to be explored further, since verifier already
  3961. * concluded that more strict state leads to valid finish.
  3962. *
  3963. * Therefore two states are equivalent if register state is more conservative
  3964. * and explored stack state is more conservative than the current one.
  3965. * Example:
  3966. * explored current
  3967. * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
  3968. * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
  3969. *
  3970. * In other words if current stack state (one being explored) has more
  3971. * valid slots than old one that already passed validation, it means
  3972. * the verifier can stop exploring and conclude that current state is valid too
  3973. *
  3974. * Similarly with registers. If explored state has register type as invalid
  3975. * whereas register type in current state is meaningful, it means that
  3976. * the current state will reach 'bpf_exit' instruction safely
  3977. */
  3978. static bool func_states_equal(struct bpf_func_state *old,
  3979. struct bpf_func_state *cur)
  3980. {
  3981. struct idpair *idmap;
  3982. bool ret = false;
  3983. int i;
  3984. idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
  3985. /* If we failed to allocate the idmap, just say it's not safe */
  3986. if (!idmap)
  3987. return false;
  3988. for (i = 0; i < MAX_BPF_REG; i++) {
  3989. if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
  3990. goto out_free;
  3991. }
  3992. if (!stacksafe(old, cur, idmap))
  3993. goto out_free;
  3994. ret = true;
  3995. out_free:
  3996. kfree(idmap);
  3997. return ret;
  3998. }
  3999. static bool states_equal(struct bpf_verifier_env *env,
  4000. struct bpf_verifier_state *old,
  4001. struct bpf_verifier_state *cur)
  4002. {
  4003. int i;
  4004. if (old->curframe != cur->curframe)
  4005. return false;
  4006. /* for states to be equal callsites have to be the same
  4007. * and all frame states need to be equivalent
  4008. */
  4009. for (i = 0; i <= old->curframe; i++) {
  4010. if (old->frame[i]->callsite != cur->frame[i]->callsite)
  4011. return false;
  4012. if (!func_states_equal(old->frame[i], cur->frame[i]))
  4013. return false;
  4014. }
  4015. return true;
  4016. }
  4017. /* A write screens off any subsequent reads; but write marks come from the
  4018. * straight-line code between a state and its parent. When we arrive at an
  4019. * equivalent state (jump target or such) we didn't arrive by the straight-line
  4020. * code, so read marks in the state must propagate to the parent regardless
  4021. * of the state's write marks. That's what 'parent == state->parent' comparison
  4022. * in mark_reg_read() and mark_stack_slot_read() is for.
  4023. */
  4024. static int propagate_liveness(struct bpf_verifier_env *env,
  4025. const struct bpf_verifier_state *vstate,
  4026. struct bpf_verifier_state *vparent)
  4027. {
  4028. int i, frame, err = 0;
  4029. struct bpf_func_state *state, *parent;
  4030. if (vparent->curframe != vstate->curframe) {
  4031. WARN(1, "propagate_live: parent frame %d current frame %d\n",
  4032. vparent->curframe, vstate->curframe);
  4033. return -EFAULT;
  4034. }
  4035. /* Propagate read liveness of registers... */
  4036. BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
  4037. /* We don't need to worry about FP liveness because it's read-only */
  4038. for (i = 0; i < BPF_REG_FP; i++) {
  4039. if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
  4040. continue;
  4041. if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
  4042. err = mark_reg_read(env, vstate, vparent, i);
  4043. if (err)
  4044. return err;
  4045. }
  4046. }
  4047. /* ... and stack slots */
  4048. for (frame = 0; frame <= vstate->curframe; frame++) {
  4049. state = vstate->frame[frame];
  4050. parent = vparent->frame[frame];
  4051. for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
  4052. i < parent->allocated_stack / BPF_REG_SIZE; i++) {
  4053. if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
  4054. continue;
  4055. if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
  4056. mark_stack_slot_read(env, vstate, vparent, i, frame);
  4057. }
  4058. }
  4059. return err;
  4060. }
  4061. static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
  4062. {
  4063. struct bpf_verifier_state_list *new_sl;
  4064. struct bpf_verifier_state_list *sl;
  4065. struct bpf_verifier_state *cur = env->cur_state;
  4066. int i, j, err;
  4067. sl = env->explored_states[insn_idx];
  4068. if (!sl)
  4069. /* this 'insn_idx' instruction wasn't marked, so we will not
  4070. * be doing state search here
  4071. */
  4072. return 0;
  4073. while (sl != STATE_LIST_MARK) {
  4074. if (states_equal(env, &sl->state, cur)) {
  4075. /* reached equivalent register/stack state,
  4076. * prune the search.
  4077. * Registers read by the continuation are read by us.
  4078. * If we have any write marks in env->cur_state, they
  4079. * will prevent corresponding reads in the continuation
  4080. * from reaching our parent (an explored_state). Our
  4081. * own state will get the read marks recorded, but
  4082. * they'll be immediately forgotten as we're pruning
  4083. * this state and will pop a new one.
  4084. */
  4085. err = propagate_liveness(env, &sl->state, cur);
  4086. if (err)
  4087. return err;
  4088. return 1;
  4089. }
  4090. sl = sl->next;
  4091. }
  4092. /* there were no equivalent states, remember current one.
  4093. * technically the current state is not proven to be safe yet,
  4094. * but it will either reach outer most bpf_exit (which means it's safe)
  4095. * or it will be rejected. Since there are no loops, we won't be
  4096. * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
  4097. * again on the way to bpf_exit
  4098. */
  4099. new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
  4100. if (!new_sl)
  4101. return -ENOMEM;
  4102. /* add new state to the head of linked list */
  4103. err = copy_verifier_state(&new_sl->state, cur);
  4104. if (err) {
  4105. free_verifier_state(&new_sl->state, false);
  4106. kfree(new_sl);
  4107. return err;
  4108. }
  4109. new_sl->next = env->explored_states[insn_idx];
  4110. env->explored_states[insn_idx] = new_sl;
  4111. /* connect new state to parentage chain */
  4112. cur->parent = &new_sl->state;
  4113. /* clear write marks in current state: the writes we did are not writes
  4114. * our child did, so they don't screen off its reads from us.
  4115. * (There are no read marks in current state, because reads always mark
  4116. * their parent and current state never has children yet. Only
  4117. * explored_states can get read marks.)
  4118. */
  4119. for (i = 0; i < BPF_REG_FP; i++)
  4120. cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
  4121. /* all stack frames are accessible from callee, clear them all */
  4122. for (j = 0; j <= cur->curframe; j++) {
  4123. struct bpf_func_state *frame = cur->frame[j];
  4124. for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++)
  4125. frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
  4126. }
  4127. return 0;
  4128. }
  4129. static int do_check(struct bpf_verifier_env *env)
  4130. {
  4131. struct bpf_verifier_state *state;
  4132. struct bpf_insn *insns = env->prog->insnsi;
  4133. struct bpf_reg_state *regs;
  4134. int insn_cnt = env->prog->len, i;
  4135. int insn_idx, prev_insn_idx = 0;
  4136. int insn_processed = 0;
  4137. bool do_print_state = false;
  4138. state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
  4139. if (!state)
  4140. return -ENOMEM;
  4141. state->curframe = 0;
  4142. state->parent = NULL;
  4143. state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
  4144. if (!state->frame[0]) {
  4145. kfree(state);
  4146. return -ENOMEM;
  4147. }
  4148. env->cur_state = state;
  4149. init_func_state(env, state->frame[0],
  4150. BPF_MAIN_FUNC /* callsite */,
  4151. 0 /* frameno */,
  4152. 0 /* subprogno, zero == main subprog */);
  4153. insn_idx = 0;
  4154. for (;;) {
  4155. struct bpf_insn *insn;
  4156. u8 class;
  4157. int err;
  4158. if (insn_idx >= insn_cnt) {
  4159. verbose(env, "invalid insn idx %d insn_cnt %d\n",
  4160. insn_idx, insn_cnt);
  4161. return -EFAULT;
  4162. }
  4163. insn = &insns[insn_idx];
  4164. class = BPF_CLASS(insn->code);
  4165. if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
  4166. verbose(env,
  4167. "BPF program is too large. Processed %d insn\n",
  4168. insn_processed);
  4169. return -E2BIG;
  4170. }
  4171. err = is_state_visited(env, insn_idx);
  4172. if (err < 0)
  4173. return err;
  4174. if (err == 1) {
  4175. /* found equivalent state, can prune the search */
  4176. if (env->log.level) {
  4177. if (do_print_state)
  4178. verbose(env, "\nfrom %d to %d: safe\n",
  4179. prev_insn_idx, insn_idx);
  4180. else
  4181. verbose(env, "%d: safe\n", insn_idx);
  4182. }
  4183. goto process_bpf_exit;
  4184. }
  4185. if (need_resched())
  4186. cond_resched();
  4187. if (env->log.level > 1 || (env->log.level && do_print_state)) {
  4188. if (env->log.level > 1)
  4189. verbose(env, "%d:", insn_idx);
  4190. else
  4191. verbose(env, "\nfrom %d to %d:",
  4192. prev_insn_idx, insn_idx);
  4193. print_verifier_state(env, state->frame[state->curframe]);
  4194. do_print_state = false;
  4195. }
  4196. if (env->log.level) {
  4197. const struct bpf_insn_cbs cbs = {
  4198. .cb_print = verbose,
  4199. .private_data = env,
  4200. };
  4201. verbose(env, "%d: ", insn_idx);
  4202. print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
  4203. }
  4204. if (bpf_prog_is_dev_bound(env->prog->aux)) {
  4205. err = bpf_prog_offload_verify_insn(env, insn_idx,
  4206. prev_insn_idx);
  4207. if (err)
  4208. return err;
  4209. }
  4210. regs = cur_regs(env);
  4211. env->insn_aux_data[insn_idx].seen = true;
  4212. if (class == BPF_ALU || class == BPF_ALU64) {
  4213. err = check_alu_op(env, insn);
  4214. if (err)
  4215. return err;
  4216. } else if (class == BPF_LDX) {
  4217. enum bpf_reg_type *prev_src_type, src_reg_type;
  4218. /* check for reserved fields is already done */
  4219. /* check src operand */
  4220. err = check_reg_arg(env, insn->src_reg, SRC_OP);
  4221. if (err)
  4222. return err;
  4223. err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
  4224. if (err)
  4225. return err;
  4226. src_reg_type = regs[insn->src_reg].type;
  4227. /* check that memory (src_reg + off) is readable,
  4228. * the state of dst_reg will be updated by this func
  4229. */
  4230. err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
  4231. BPF_SIZE(insn->code), BPF_READ,
  4232. insn->dst_reg, false);
  4233. if (err)
  4234. return err;
  4235. prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
  4236. if (*prev_src_type == NOT_INIT) {
  4237. /* saw a valid insn
  4238. * dst_reg = *(u32 *)(src_reg + off)
  4239. * save type to validate intersecting paths
  4240. */
  4241. *prev_src_type = src_reg_type;
  4242. } else if (src_reg_type != *prev_src_type &&
  4243. (src_reg_type == PTR_TO_CTX ||
  4244. *prev_src_type == PTR_TO_CTX)) {
  4245. /* ABuser program is trying to use the same insn
  4246. * dst_reg = *(u32*) (src_reg + off)
  4247. * with different pointer types:
  4248. * src_reg == ctx in one branch and
  4249. * src_reg == stack|map in some other branch.
  4250. * Reject it.
  4251. */
  4252. verbose(env, "same insn cannot be used with different pointers\n");
  4253. return -EINVAL;
  4254. }
  4255. } else if (class == BPF_STX) {
  4256. enum bpf_reg_type *prev_dst_type, dst_reg_type;
  4257. if (BPF_MODE(insn->code) == BPF_XADD) {
  4258. err = check_xadd(env, insn_idx, insn);
  4259. if (err)
  4260. return err;
  4261. insn_idx++;
  4262. continue;
  4263. }
  4264. /* check src1 operand */
  4265. err = check_reg_arg(env, insn->src_reg, SRC_OP);
  4266. if (err)
  4267. return err;
  4268. /* check src2 operand */
  4269. err = check_reg_arg(env, insn->dst_reg, SRC_OP);
  4270. if (err)
  4271. return err;
  4272. dst_reg_type = regs[insn->dst_reg].type;
  4273. /* check that memory (dst_reg + off) is writeable */
  4274. err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
  4275. BPF_SIZE(insn->code), BPF_WRITE,
  4276. insn->src_reg, false);
  4277. if (err)
  4278. return err;
  4279. prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
  4280. if (*prev_dst_type == NOT_INIT) {
  4281. *prev_dst_type = dst_reg_type;
  4282. } else if (dst_reg_type != *prev_dst_type &&
  4283. (dst_reg_type == PTR_TO_CTX ||
  4284. *prev_dst_type == PTR_TO_CTX)) {
  4285. verbose(env, "same insn cannot be used with different pointers\n");
  4286. return -EINVAL;
  4287. }
  4288. } else if (class == BPF_ST) {
  4289. if (BPF_MODE(insn->code) != BPF_MEM ||
  4290. insn->src_reg != BPF_REG_0) {
  4291. verbose(env, "BPF_ST uses reserved fields\n");
  4292. return -EINVAL;
  4293. }
  4294. /* check src operand */
  4295. err = check_reg_arg(env, insn->dst_reg, SRC_OP);
  4296. if (err)
  4297. return err;
  4298. if (is_ctx_reg(env, insn->dst_reg)) {
  4299. verbose(env, "BPF_ST stores into R%d context is not allowed\n",
  4300. insn->dst_reg);
  4301. return -EACCES;
  4302. }
  4303. /* check that memory (dst_reg + off) is writeable */
  4304. err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
  4305. BPF_SIZE(insn->code), BPF_WRITE,
  4306. -1, false);
  4307. if (err)
  4308. return err;
  4309. } else if (class == BPF_JMP) {
  4310. u8 opcode = BPF_OP(insn->code);
  4311. if (opcode == BPF_CALL) {
  4312. if (BPF_SRC(insn->code) != BPF_K ||
  4313. insn->off != 0 ||
  4314. (insn->src_reg != BPF_REG_0 &&
  4315. insn->src_reg != BPF_PSEUDO_CALL) ||
  4316. insn->dst_reg != BPF_REG_0) {
  4317. verbose(env, "BPF_CALL uses reserved fields\n");
  4318. return -EINVAL;
  4319. }
  4320. if (insn->src_reg == BPF_PSEUDO_CALL)
  4321. err = check_func_call(env, insn, &insn_idx);
  4322. else
  4323. err = check_helper_call(env, insn->imm, insn_idx);
  4324. if (err)
  4325. return err;
  4326. } else if (opcode == BPF_JA) {
  4327. if (BPF_SRC(insn->code) != BPF_K ||
  4328. insn->imm != 0 ||
  4329. insn->src_reg != BPF_REG_0 ||
  4330. insn->dst_reg != BPF_REG_0) {
  4331. verbose(env, "BPF_JA uses reserved fields\n");
  4332. return -EINVAL;
  4333. }
  4334. insn_idx += insn->off + 1;
  4335. continue;
  4336. } else if (opcode == BPF_EXIT) {
  4337. if (BPF_SRC(insn->code) != BPF_K ||
  4338. insn->imm != 0 ||
  4339. insn->src_reg != BPF_REG_0 ||
  4340. insn->dst_reg != BPF_REG_0) {
  4341. verbose(env, "BPF_EXIT uses reserved fields\n");
  4342. return -EINVAL;
  4343. }
  4344. if (state->curframe) {
  4345. /* exit from nested function */
  4346. prev_insn_idx = insn_idx;
  4347. err = prepare_func_exit(env, &insn_idx);
  4348. if (err)
  4349. return err;
  4350. do_print_state = true;
  4351. continue;
  4352. }
  4353. /* eBPF calling convetion is such that R0 is used
  4354. * to return the value from eBPF program.
  4355. * Make sure that it's readable at this time
  4356. * of bpf_exit, which means that program wrote
  4357. * something into it earlier
  4358. */
  4359. err = check_reg_arg(env, BPF_REG_0, SRC_OP);
  4360. if (err)
  4361. return err;
  4362. if (is_pointer_value(env, BPF_REG_0)) {
  4363. verbose(env, "R0 leaks addr as return value\n");
  4364. return -EACCES;
  4365. }
  4366. err = check_return_code(env);
  4367. if (err)
  4368. return err;
  4369. process_bpf_exit:
  4370. err = pop_stack(env, &prev_insn_idx, &insn_idx);
  4371. if (err < 0) {
  4372. if (err != -ENOENT)
  4373. return err;
  4374. break;
  4375. } else {
  4376. do_print_state = true;
  4377. continue;
  4378. }
  4379. } else {
  4380. err = check_cond_jmp_op(env, insn, &insn_idx);
  4381. if (err)
  4382. return err;
  4383. }
  4384. } else if (class == BPF_LD) {
  4385. u8 mode = BPF_MODE(insn->code);
  4386. if (mode == BPF_ABS || mode == BPF_IND) {
  4387. err = check_ld_abs(env, insn);
  4388. if (err)
  4389. return err;
  4390. } else if (mode == BPF_IMM) {
  4391. err = check_ld_imm(env, insn);
  4392. if (err)
  4393. return err;
  4394. insn_idx++;
  4395. env->insn_aux_data[insn_idx].seen = true;
  4396. } else {
  4397. verbose(env, "invalid BPF_LD mode\n");
  4398. return -EINVAL;
  4399. }
  4400. } else {
  4401. verbose(env, "unknown insn class %d\n", class);
  4402. return -EINVAL;
  4403. }
  4404. insn_idx++;
  4405. }
  4406. verbose(env, "processed %d insns (limit %d), stack depth ",
  4407. insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
  4408. for (i = 0; i < env->subprog_cnt + 1; i++) {
  4409. u32 depth = env->subprog_stack_depth[i];
  4410. verbose(env, "%d", depth);
  4411. if (i + 1 < env->subprog_cnt + 1)
  4412. verbose(env, "+");
  4413. }
  4414. verbose(env, "\n");
  4415. env->prog->aux->stack_depth = env->subprog_stack_depth[0];
  4416. return 0;
  4417. }
  4418. static int check_map_prealloc(struct bpf_map *map)
  4419. {
  4420. return (map->map_type != BPF_MAP_TYPE_HASH &&
  4421. map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
  4422. map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
  4423. !(map->map_flags & BPF_F_NO_PREALLOC);
  4424. }
  4425. static int check_map_prog_compatibility(struct bpf_verifier_env *env,
  4426. struct bpf_map *map,
  4427. struct bpf_prog *prog)
  4428. {
  4429. /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
  4430. * preallocated hash maps, since doing memory allocation
  4431. * in overflow_handler can crash depending on where nmi got
  4432. * triggered.
  4433. */
  4434. if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
  4435. if (!check_map_prealloc(map)) {
  4436. verbose(env, "perf_event programs can only use preallocated hash map\n");
  4437. return -EINVAL;
  4438. }
  4439. if (map->inner_map_meta &&
  4440. !check_map_prealloc(map->inner_map_meta)) {
  4441. verbose(env, "perf_event programs can only use preallocated inner hash map\n");
  4442. return -EINVAL;
  4443. }
  4444. }
  4445. if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
  4446. !bpf_offload_dev_match(prog, map)) {
  4447. verbose(env, "offload device mismatch between prog and map\n");
  4448. return -EINVAL;
  4449. }
  4450. return 0;
  4451. }
  4452. /* look for pseudo eBPF instructions that access map FDs and
  4453. * replace them with actual map pointers
  4454. */
  4455. static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
  4456. {
  4457. struct bpf_insn *insn = env->prog->insnsi;
  4458. int insn_cnt = env->prog->len;
  4459. int i, j, err;
  4460. err = bpf_prog_calc_tag(env->prog);
  4461. if (err)
  4462. return err;
  4463. for (i = 0; i < insn_cnt; i++, insn++) {
  4464. if (BPF_CLASS(insn->code) == BPF_LDX &&
  4465. (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
  4466. verbose(env, "BPF_LDX uses reserved fields\n");
  4467. return -EINVAL;
  4468. }
  4469. if (BPF_CLASS(insn->code) == BPF_STX &&
  4470. ((BPF_MODE(insn->code) != BPF_MEM &&
  4471. BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
  4472. verbose(env, "BPF_STX uses reserved fields\n");
  4473. return -EINVAL;
  4474. }
  4475. if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
  4476. struct bpf_map *map;
  4477. struct fd f;
  4478. if (i == insn_cnt - 1 || insn[1].code != 0 ||
  4479. insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
  4480. insn[1].off != 0) {
  4481. verbose(env, "invalid bpf_ld_imm64 insn\n");
  4482. return -EINVAL;
  4483. }
  4484. if (insn->src_reg == 0)
  4485. /* valid generic load 64-bit imm */
  4486. goto next_insn;
  4487. if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
  4488. verbose(env,
  4489. "unrecognized bpf_ld_imm64 insn\n");
  4490. return -EINVAL;
  4491. }
  4492. f = fdget(insn->imm);
  4493. map = __bpf_map_get(f);
  4494. if (IS_ERR(map)) {
  4495. verbose(env, "fd %d is not pointing to valid bpf_map\n",
  4496. insn->imm);
  4497. return PTR_ERR(map);
  4498. }
  4499. err = check_map_prog_compatibility(env, map, env->prog);
  4500. if (err) {
  4501. fdput(f);
  4502. return err;
  4503. }
  4504. /* store map pointer inside BPF_LD_IMM64 instruction */
  4505. insn[0].imm = (u32) (unsigned long) map;
  4506. insn[1].imm = ((u64) (unsigned long) map) >> 32;
  4507. /* check whether we recorded this map already */
  4508. for (j = 0; j < env->used_map_cnt; j++)
  4509. if (env->used_maps[j] == map) {
  4510. fdput(f);
  4511. goto next_insn;
  4512. }
  4513. if (env->used_map_cnt >= MAX_USED_MAPS) {
  4514. fdput(f);
  4515. return -E2BIG;
  4516. }
  4517. /* hold the map. If the program is rejected by verifier,
  4518. * the map will be released by release_maps() or it
  4519. * will be used by the valid program until it's unloaded
  4520. * and all maps are released in free_bpf_prog_info()
  4521. */
  4522. map = bpf_map_inc(map, false);
  4523. if (IS_ERR(map)) {
  4524. fdput(f);
  4525. return PTR_ERR(map);
  4526. }
  4527. env->used_maps[env->used_map_cnt++] = map;
  4528. fdput(f);
  4529. next_insn:
  4530. insn++;
  4531. i++;
  4532. continue;
  4533. }
  4534. /* Basic sanity check before we invest more work here. */
  4535. if (!bpf_opcode_in_insntable(insn->code)) {
  4536. verbose(env, "unknown opcode %02x\n", insn->code);
  4537. return -EINVAL;
  4538. }
  4539. }
  4540. /* now all pseudo BPF_LD_IMM64 instructions load valid
  4541. * 'struct bpf_map *' into a register instead of user map_fd.
  4542. * These pointers will be used later by verifier to validate map access.
  4543. */
  4544. return 0;
  4545. }
  4546. /* drop refcnt of maps used by the rejected program */
  4547. static void release_maps(struct bpf_verifier_env *env)
  4548. {
  4549. int i;
  4550. for (i = 0; i < env->used_map_cnt; i++)
  4551. bpf_map_put(env->used_maps[i]);
  4552. }
  4553. /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
  4554. static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
  4555. {
  4556. struct bpf_insn *insn = env->prog->insnsi;
  4557. int insn_cnt = env->prog->len;
  4558. int i;
  4559. for (i = 0; i < insn_cnt; i++, insn++)
  4560. if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
  4561. insn->src_reg = 0;
  4562. }
  4563. /* single env->prog->insni[off] instruction was replaced with the range
  4564. * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
  4565. * [0, off) and [off, end) to new locations, so the patched range stays zero
  4566. */
  4567. static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
  4568. u32 off, u32 cnt)
  4569. {
  4570. struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
  4571. int i;
  4572. if (cnt == 1)
  4573. return 0;
  4574. new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len);
  4575. if (!new_data)
  4576. return -ENOMEM;
  4577. memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
  4578. memcpy(new_data + off + cnt - 1, old_data + off,
  4579. sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
  4580. for (i = off; i < off + cnt - 1; i++)
  4581. new_data[i].seen = true;
  4582. env->insn_aux_data = new_data;
  4583. vfree(old_data);
  4584. return 0;
  4585. }
  4586. static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
  4587. {
  4588. int i;
  4589. if (len == 1)
  4590. return;
  4591. for (i = 0; i < env->subprog_cnt; i++) {
  4592. if (env->subprog_starts[i] < off)
  4593. continue;
  4594. env->subprog_starts[i] += len - 1;
  4595. }
  4596. }
  4597. static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
  4598. const struct bpf_insn *patch, u32 len)
  4599. {
  4600. struct bpf_prog *new_prog;
  4601. new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
  4602. if (!new_prog)
  4603. return NULL;
  4604. if (adjust_insn_aux_data(env, new_prog->len, off, len))
  4605. return NULL;
  4606. adjust_subprog_starts(env, off, len);
  4607. return new_prog;
  4608. }
  4609. /* The verifier does more data flow analysis than llvm and will not
  4610. * explore branches that are dead at run time. Malicious programs can
  4611. * have dead code too. Therefore replace all dead at-run-time code
  4612. * with 'ja -1'.
  4613. *
  4614. * Just nops are not optimal, e.g. if they would sit at the end of the
  4615. * program and through another bug we would manage to jump there, then
  4616. * we'd execute beyond program memory otherwise. Returning exception
  4617. * code also wouldn't work since we can have subprogs where the dead
  4618. * code could be located.
  4619. */
  4620. static void sanitize_dead_code(struct bpf_verifier_env *env)
  4621. {
  4622. struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
  4623. struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
  4624. struct bpf_insn *insn = env->prog->insnsi;
  4625. const int insn_cnt = env->prog->len;
  4626. int i;
  4627. for (i = 0; i < insn_cnt; i++) {
  4628. if (aux_data[i].seen)
  4629. continue;
  4630. memcpy(insn + i, &trap, sizeof(trap));
  4631. }
  4632. }
  4633. /* convert load instructions that access fields of 'struct __sk_buff'
  4634. * into sequence of instructions that access fields of 'struct sk_buff'
  4635. */
  4636. static int convert_ctx_accesses(struct bpf_verifier_env *env)
  4637. {
  4638. const struct bpf_verifier_ops *ops = env->ops;
  4639. int i, cnt, size, ctx_field_size, delta = 0;
  4640. const int insn_cnt = env->prog->len;
  4641. struct bpf_insn insn_buf[16], *insn;
  4642. struct bpf_prog *new_prog;
  4643. enum bpf_access_type type;
  4644. bool is_narrower_load;
  4645. u32 target_size;
  4646. if (ops->gen_prologue) {
  4647. cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
  4648. env->prog);
  4649. if (cnt >= ARRAY_SIZE(insn_buf)) {
  4650. verbose(env, "bpf verifier is misconfigured\n");
  4651. return -EINVAL;
  4652. } else if (cnt) {
  4653. new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
  4654. if (!new_prog)
  4655. return -ENOMEM;
  4656. env->prog = new_prog;
  4657. delta += cnt - 1;
  4658. }
  4659. }
  4660. if (!ops->convert_ctx_access)
  4661. return 0;
  4662. insn = env->prog->insnsi + delta;
  4663. for (i = 0; i < insn_cnt; i++, insn++) {
  4664. if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
  4665. insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
  4666. insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
  4667. insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
  4668. type = BPF_READ;
  4669. else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
  4670. insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
  4671. insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
  4672. insn->code == (BPF_STX | BPF_MEM | BPF_DW))
  4673. type = BPF_WRITE;
  4674. else
  4675. continue;
  4676. if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
  4677. continue;
  4678. ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
  4679. size = BPF_LDST_BYTES(insn);
  4680. /* If the read access is a narrower load of the field,
  4681. * convert to a 4/8-byte load, to minimum program type specific
  4682. * convert_ctx_access changes. If conversion is successful,
  4683. * we will apply proper mask to the result.
  4684. */
  4685. is_narrower_load = size < ctx_field_size;
  4686. if (is_narrower_load) {
  4687. u32 off = insn->off;
  4688. u8 size_code;
  4689. if (type == BPF_WRITE) {
  4690. verbose(env, "bpf verifier narrow ctx access misconfigured\n");
  4691. return -EINVAL;
  4692. }
  4693. size_code = BPF_H;
  4694. if (ctx_field_size == 4)
  4695. size_code = BPF_W;
  4696. else if (ctx_field_size == 8)
  4697. size_code = BPF_DW;
  4698. insn->off = off & ~(ctx_field_size - 1);
  4699. insn->code = BPF_LDX | BPF_MEM | size_code;
  4700. }
  4701. target_size = 0;
  4702. cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
  4703. &target_size);
  4704. if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
  4705. (ctx_field_size && !target_size)) {
  4706. verbose(env, "bpf verifier is misconfigured\n");
  4707. return -EINVAL;
  4708. }
  4709. if (is_narrower_load && size < target_size) {
  4710. if (ctx_field_size <= 4)
  4711. insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
  4712. (1 << size * 8) - 1);
  4713. else
  4714. insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
  4715. (1 << size * 8) - 1);
  4716. }
  4717. new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
  4718. if (!new_prog)
  4719. return -ENOMEM;
  4720. delta += cnt - 1;
  4721. /* keep walking new program and skip insns we just inserted */
  4722. env->prog = new_prog;
  4723. insn = new_prog->insnsi + i + delta;
  4724. }
  4725. return 0;
  4726. }
  4727. static int jit_subprogs(struct bpf_verifier_env *env)
  4728. {
  4729. struct bpf_prog *prog = env->prog, **func, *tmp;
  4730. int i, j, subprog_start, subprog_end = 0, len, subprog;
  4731. struct bpf_insn *insn;
  4732. void *old_bpf_func;
  4733. int err = -ENOMEM;
  4734. if (env->subprog_cnt == 0)
  4735. return 0;
  4736. for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
  4737. if (insn->code != (BPF_JMP | BPF_CALL) ||
  4738. insn->src_reg != BPF_PSEUDO_CALL)
  4739. continue;
  4740. subprog = find_subprog(env, i + insn->imm + 1);
  4741. if (subprog < 0) {
  4742. WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
  4743. i + insn->imm + 1);
  4744. return -EFAULT;
  4745. }
  4746. /* temporarily remember subprog id inside insn instead of
  4747. * aux_data, since next loop will split up all insns into funcs
  4748. */
  4749. insn->off = subprog + 1;
  4750. /* remember original imm in case JIT fails and fallback
  4751. * to interpreter will be needed
  4752. */
  4753. env->insn_aux_data[i].call_imm = insn->imm;
  4754. /* point imm to __bpf_call_base+1 from JITs point of view */
  4755. insn->imm = 1;
  4756. }
  4757. func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
  4758. if (!func)
  4759. return -ENOMEM;
  4760. for (i = 0; i <= env->subprog_cnt; i++) {
  4761. subprog_start = subprog_end;
  4762. if (env->subprog_cnt == i)
  4763. subprog_end = prog->len;
  4764. else
  4765. subprog_end = env->subprog_starts[i];
  4766. len = subprog_end - subprog_start;
  4767. func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
  4768. if (!func[i])
  4769. goto out_free;
  4770. memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
  4771. len * sizeof(struct bpf_insn));
  4772. func[i]->type = prog->type;
  4773. func[i]->len = len;
  4774. if (bpf_prog_calc_tag(func[i]))
  4775. goto out_free;
  4776. func[i]->is_func = 1;
  4777. /* Use bpf_prog_F_tag to indicate functions in stack traces.
  4778. * Long term would need debug info to populate names
  4779. */
  4780. func[i]->aux->name[0] = 'F';
  4781. func[i]->aux->stack_depth = env->subprog_stack_depth[i];
  4782. func[i]->jit_requested = 1;
  4783. func[i] = bpf_int_jit_compile(func[i]);
  4784. if (!func[i]->jited) {
  4785. err = -ENOTSUPP;
  4786. goto out_free;
  4787. }
  4788. cond_resched();
  4789. }
  4790. /* at this point all bpf functions were successfully JITed
  4791. * now populate all bpf_calls with correct addresses and
  4792. * run last pass of JIT
  4793. */
  4794. for (i = 0; i <= env->subprog_cnt; i++) {
  4795. insn = func[i]->insnsi;
  4796. for (j = 0; j < func[i]->len; j++, insn++) {
  4797. if (insn->code != (BPF_JMP | BPF_CALL) ||
  4798. insn->src_reg != BPF_PSEUDO_CALL)
  4799. continue;
  4800. subprog = insn->off;
  4801. insn->off = 0;
  4802. insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
  4803. func[subprog]->bpf_func -
  4804. __bpf_call_base;
  4805. }
  4806. }
  4807. for (i = 0; i <= env->subprog_cnt; i++) {
  4808. old_bpf_func = func[i]->bpf_func;
  4809. tmp = bpf_int_jit_compile(func[i]);
  4810. if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
  4811. verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
  4812. err = -EFAULT;
  4813. goto out_free;
  4814. }
  4815. cond_resched();
  4816. }
  4817. /* finally lock prog and jit images for all functions and
  4818. * populate kallsysm
  4819. */
  4820. for (i = 0; i <= env->subprog_cnt; i++) {
  4821. bpf_prog_lock_ro(func[i]);
  4822. bpf_prog_kallsyms_add(func[i]);
  4823. }
  4824. /* Last step: make now unused interpreter insns from main
  4825. * prog consistent for later dump requests, so they can
  4826. * later look the same as if they were interpreted only.
  4827. */
  4828. for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
  4829. unsigned long addr;
  4830. if (insn->code != (BPF_JMP | BPF_CALL) ||
  4831. insn->src_reg != BPF_PSEUDO_CALL)
  4832. continue;
  4833. insn->off = env->insn_aux_data[i].call_imm;
  4834. subprog = find_subprog(env, i + insn->off + 1);
  4835. addr = (unsigned long)func[subprog + 1]->bpf_func;
  4836. addr &= PAGE_MASK;
  4837. insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
  4838. addr - __bpf_call_base;
  4839. }
  4840. prog->jited = 1;
  4841. prog->bpf_func = func[0]->bpf_func;
  4842. prog->aux->func = func;
  4843. prog->aux->func_cnt = env->subprog_cnt + 1;
  4844. return 0;
  4845. out_free:
  4846. for (i = 0; i <= env->subprog_cnt; i++)
  4847. if (func[i])
  4848. bpf_jit_free(func[i]);
  4849. kfree(func);
  4850. /* cleanup main prog to be interpreted */
  4851. prog->jit_requested = 0;
  4852. for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
  4853. if (insn->code != (BPF_JMP | BPF_CALL) ||
  4854. insn->src_reg != BPF_PSEUDO_CALL)
  4855. continue;
  4856. insn->off = 0;
  4857. insn->imm = env->insn_aux_data[i].call_imm;
  4858. }
  4859. return err;
  4860. }
  4861. static int fixup_call_args(struct bpf_verifier_env *env)
  4862. {
  4863. #ifndef CONFIG_BPF_JIT_ALWAYS_ON
  4864. struct bpf_prog *prog = env->prog;
  4865. struct bpf_insn *insn = prog->insnsi;
  4866. int i, depth;
  4867. #endif
  4868. int err;
  4869. err = 0;
  4870. if (env->prog->jit_requested) {
  4871. err = jit_subprogs(env);
  4872. if (err == 0)
  4873. return 0;
  4874. }
  4875. #ifndef CONFIG_BPF_JIT_ALWAYS_ON
  4876. for (i = 0; i < prog->len; i++, insn++) {
  4877. if (insn->code != (BPF_JMP | BPF_CALL) ||
  4878. insn->src_reg != BPF_PSEUDO_CALL)
  4879. continue;
  4880. depth = get_callee_stack_depth(env, insn, i);
  4881. if (depth < 0)
  4882. return depth;
  4883. bpf_patch_call_args(insn, depth);
  4884. }
  4885. err = 0;
  4886. #endif
  4887. return err;
  4888. }
  4889. /* fixup insn->imm field of bpf_call instructions
  4890. * and inline eligible helpers as explicit sequence of BPF instructions
  4891. *
  4892. * this function is called after eBPF program passed verification
  4893. */
  4894. static int fixup_bpf_calls(struct bpf_verifier_env *env)
  4895. {
  4896. struct bpf_prog *prog = env->prog;
  4897. struct bpf_insn *insn = prog->insnsi;
  4898. const struct bpf_func_proto *fn;
  4899. const int insn_cnt = prog->len;
  4900. struct bpf_insn insn_buf[16];
  4901. struct bpf_prog *new_prog;
  4902. struct bpf_map *map_ptr;
  4903. int i, cnt, delta = 0;
  4904. for (i = 0; i < insn_cnt; i++, insn++) {
  4905. if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
  4906. insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
  4907. insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
  4908. insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
  4909. bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
  4910. struct bpf_insn mask_and_div[] = {
  4911. BPF_MOV32_REG(insn->src_reg, insn->src_reg),
  4912. /* Rx div 0 -> 0 */
  4913. BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
  4914. BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
  4915. BPF_JMP_IMM(BPF_JA, 0, 0, 1),
  4916. *insn,
  4917. };
  4918. struct bpf_insn mask_and_mod[] = {
  4919. BPF_MOV32_REG(insn->src_reg, insn->src_reg),
  4920. /* Rx mod 0 -> Rx */
  4921. BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
  4922. *insn,
  4923. };
  4924. struct bpf_insn *patchlet;
  4925. if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
  4926. insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
  4927. patchlet = mask_and_div + (is64 ? 1 : 0);
  4928. cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
  4929. } else {
  4930. patchlet = mask_and_mod + (is64 ? 1 : 0);
  4931. cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
  4932. }
  4933. new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
  4934. if (!new_prog)
  4935. return -ENOMEM;
  4936. delta += cnt - 1;
  4937. env->prog = prog = new_prog;
  4938. insn = new_prog->insnsi + i + delta;
  4939. continue;
  4940. }
  4941. if (insn->code != (BPF_JMP | BPF_CALL))
  4942. continue;
  4943. if (insn->src_reg == BPF_PSEUDO_CALL)
  4944. continue;
  4945. if (insn->imm == BPF_FUNC_get_route_realm)
  4946. prog->dst_needed = 1;
  4947. if (insn->imm == BPF_FUNC_get_prandom_u32)
  4948. bpf_user_rnd_init_once();
  4949. if (insn->imm == BPF_FUNC_override_return)
  4950. prog->kprobe_override = 1;
  4951. if (insn->imm == BPF_FUNC_tail_call) {
  4952. /* If we tail call into other programs, we
  4953. * cannot make any assumptions since they can
  4954. * be replaced dynamically during runtime in
  4955. * the program array.
  4956. */
  4957. prog->cb_access = 1;
  4958. env->prog->aux->stack_depth = MAX_BPF_STACK;
  4959. /* mark bpf_tail_call as different opcode to avoid
  4960. * conditional branch in the interpeter for every normal
  4961. * call and to prevent accidental JITing by JIT compiler
  4962. * that doesn't support bpf_tail_call yet
  4963. */
  4964. insn->imm = 0;
  4965. insn->code = BPF_JMP | BPF_TAIL_CALL;
  4966. /* instead of changing every JIT dealing with tail_call
  4967. * emit two extra insns:
  4968. * if (index >= max_entries) goto out;
  4969. * index &= array->index_mask;
  4970. * to avoid out-of-bounds cpu speculation
  4971. */
  4972. map_ptr = env->insn_aux_data[i + delta].map_ptr;
  4973. if (map_ptr == BPF_MAP_PTR_POISON) {
  4974. verbose(env, "tail_call abusing map_ptr\n");
  4975. return -EINVAL;
  4976. }
  4977. if (!map_ptr->unpriv_array)
  4978. continue;
  4979. insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
  4980. map_ptr->max_entries, 2);
  4981. insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
  4982. container_of(map_ptr,
  4983. struct bpf_array,
  4984. map)->index_mask);
  4985. insn_buf[2] = *insn;
  4986. cnt = 3;
  4987. new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
  4988. if (!new_prog)
  4989. return -ENOMEM;
  4990. delta += cnt - 1;
  4991. env->prog = prog = new_prog;
  4992. insn = new_prog->insnsi + i + delta;
  4993. continue;
  4994. }
  4995. /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
  4996. * handlers are currently limited to 64 bit only.
  4997. */
  4998. if (prog->jit_requested && BITS_PER_LONG == 64 &&
  4999. insn->imm == BPF_FUNC_map_lookup_elem) {
  5000. map_ptr = env->insn_aux_data[i + delta].map_ptr;
  5001. if (map_ptr == BPF_MAP_PTR_POISON ||
  5002. !map_ptr->ops->map_gen_lookup)
  5003. goto patch_call_imm;
  5004. cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
  5005. if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
  5006. verbose(env, "bpf verifier is misconfigured\n");
  5007. return -EINVAL;
  5008. }
  5009. new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
  5010. cnt);
  5011. if (!new_prog)
  5012. return -ENOMEM;
  5013. delta += cnt - 1;
  5014. /* keep walking new program and skip insns we just inserted */
  5015. env->prog = prog = new_prog;
  5016. insn = new_prog->insnsi + i + delta;
  5017. continue;
  5018. }
  5019. if (insn->imm == BPF_FUNC_redirect_map) {
  5020. /* Note, we cannot use prog directly as imm as subsequent
  5021. * rewrites would still change the prog pointer. The only
  5022. * stable address we can use is aux, which also works with
  5023. * prog clones during blinding.
  5024. */
  5025. u64 addr = (unsigned long)prog->aux;
  5026. struct bpf_insn r4_ld[] = {
  5027. BPF_LD_IMM64(BPF_REG_4, addr),
  5028. *insn,
  5029. };
  5030. cnt = ARRAY_SIZE(r4_ld);
  5031. new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt);
  5032. if (!new_prog)
  5033. return -ENOMEM;
  5034. delta += cnt - 1;
  5035. env->prog = prog = new_prog;
  5036. insn = new_prog->insnsi + i + delta;
  5037. }
  5038. patch_call_imm:
  5039. fn = env->ops->get_func_proto(insn->imm, env->prog);
  5040. /* all functions that have prototype and verifier allowed
  5041. * programs to call them, must be real in-kernel functions
  5042. */
  5043. if (!fn->func) {
  5044. verbose(env,
  5045. "kernel subsystem misconfigured func %s#%d\n",
  5046. func_id_name(insn->imm), insn->imm);
  5047. return -EFAULT;
  5048. }
  5049. insn->imm = fn->func - __bpf_call_base;
  5050. }
  5051. return 0;
  5052. }
  5053. static void free_states(struct bpf_verifier_env *env)
  5054. {
  5055. struct bpf_verifier_state_list *sl, *sln;
  5056. int i;
  5057. if (!env->explored_states)
  5058. return;
  5059. for (i = 0; i < env->prog->len; i++) {
  5060. sl = env->explored_states[i];
  5061. if (sl)
  5062. while (sl != STATE_LIST_MARK) {
  5063. sln = sl->next;
  5064. free_verifier_state(&sl->state, false);
  5065. kfree(sl);
  5066. sl = sln;
  5067. }
  5068. }
  5069. kfree(env->explored_states);
  5070. }
  5071. int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
  5072. {
  5073. struct bpf_verifier_env *env;
  5074. struct bpf_verifier_log *log;
  5075. int ret = -EINVAL;
  5076. /* no program is valid */
  5077. if (ARRAY_SIZE(bpf_verifier_ops) == 0)
  5078. return -EINVAL;
  5079. /* 'struct bpf_verifier_env' can be global, but since it's not small,
  5080. * allocate/free it every time bpf_check() is called
  5081. */
  5082. env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
  5083. if (!env)
  5084. return -ENOMEM;
  5085. log = &env->log;
  5086. env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
  5087. (*prog)->len);
  5088. ret = -ENOMEM;
  5089. if (!env->insn_aux_data)
  5090. goto err_free_env;
  5091. env->prog = *prog;
  5092. env->ops = bpf_verifier_ops[env->prog->type];
  5093. /* grab the mutex to protect few globals used by verifier */
  5094. mutex_lock(&bpf_verifier_lock);
  5095. if (attr->log_level || attr->log_buf || attr->log_size) {
  5096. /* user requested verbose verifier output
  5097. * and supplied buffer to store the verification trace
  5098. */
  5099. log->level = attr->log_level;
  5100. log->ubuf = (char __user *) (unsigned long) attr->log_buf;
  5101. log->len_total = attr->log_size;
  5102. ret = -EINVAL;
  5103. /* log attributes have to be sane */
  5104. if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
  5105. !log->level || !log->ubuf)
  5106. goto err_unlock;
  5107. }
  5108. env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
  5109. if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
  5110. env->strict_alignment = true;
  5111. if (bpf_prog_is_dev_bound(env->prog->aux)) {
  5112. ret = bpf_prog_offload_verifier_prep(env);
  5113. if (ret)
  5114. goto err_unlock;
  5115. }
  5116. ret = replace_map_fd_with_map_ptr(env);
  5117. if (ret < 0)
  5118. goto skip_full_check;
  5119. env->explored_states = kcalloc(env->prog->len,
  5120. sizeof(struct bpf_verifier_state_list *),
  5121. GFP_USER);
  5122. ret = -ENOMEM;
  5123. if (!env->explored_states)
  5124. goto skip_full_check;
  5125. env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
  5126. ret = check_cfg(env);
  5127. if (ret < 0)
  5128. goto skip_full_check;
  5129. ret = do_check(env);
  5130. if (env->cur_state) {
  5131. free_verifier_state(env->cur_state, true);
  5132. env->cur_state = NULL;
  5133. }
  5134. skip_full_check:
  5135. while (!pop_stack(env, NULL, NULL));
  5136. free_states(env);
  5137. if (ret == 0)
  5138. sanitize_dead_code(env);
  5139. if (ret == 0)
  5140. ret = check_max_stack_depth(env);
  5141. if (ret == 0)
  5142. /* program is valid, convert *(u32*)(ctx + off) accesses */
  5143. ret = convert_ctx_accesses(env);
  5144. if (ret == 0)
  5145. ret = fixup_bpf_calls(env);
  5146. if (ret == 0)
  5147. ret = fixup_call_args(env);
  5148. if (log->level && bpf_verifier_log_full(log))
  5149. ret = -ENOSPC;
  5150. if (log->level && !log->ubuf) {
  5151. ret = -EFAULT;
  5152. goto err_release_maps;
  5153. }
  5154. if (ret == 0 && env->used_map_cnt) {
  5155. /* if program passed verifier, update used_maps in bpf_prog_info */
  5156. env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
  5157. sizeof(env->used_maps[0]),
  5158. GFP_KERNEL);
  5159. if (!env->prog->aux->used_maps) {
  5160. ret = -ENOMEM;
  5161. goto err_release_maps;
  5162. }
  5163. memcpy(env->prog->aux->used_maps, env->used_maps,
  5164. sizeof(env->used_maps[0]) * env->used_map_cnt);
  5165. env->prog->aux->used_map_cnt = env->used_map_cnt;
  5166. /* program is valid. Convert pseudo bpf_ld_imm64 into generic
  5167. * bpf_ld_imm64 instructions
  5168. */
  5169. convert_pseudo_ld_imm64(env);
  5170. }
  5171. err_release_maps:
  5172. if (!env->prog->aux->used_maps)
  5173. /* if we didn't copy map pointers into bpf_prog_info, release
  5174. * them now. Otherwise free_bpf_prog_info() will release them.
  5175. */
  5176. release_maps(env);
  5177. *prog = env->prog;
  5178. err_unlock:
  5179. mutex_unlock(&bpf_verifier_lock);
  5180. vfree(env->insn_aux_data);
  5181. err_free_env:
  5182. kfree(env);
  5183. return ret;
  5184. }