kcmsock.c 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091
  1. /*
  2. * Kernel Connection Multiplexor
  3. *
  4. * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2
  8. * as published by the Free Software Foundation.
  9. */
  10. #include <linux/bpf.h>
  11. #include <linux/errno.h>
  12. #include <linux/errqueue.h>
  13. #include <linux/file.h>
  14. #include <linux/in.h>
  15. #include <linux/kernel.h>
  16. #include <linux/module.h>
  17. #include <linux/net.h>
  18. #include <linux/netdevice.h>
  19. #include <linux/poll.h>
  20. #include <linux/rculist.h>
  21. #include <linux/skbuff.h>
  22. #include <linux/socket.h>
  23. #include <linux/uaccess.h>
  24. #include <linux/workqueue.h>
  25. #include <linux/syscalls.h>
  26. #include <net/kcm.h>
  27. #include <net/netns/generic.h>
  28. #include <net/sock.h>
  29. #include <uapi/linux/kcm.h>
  30. unsigned int kcm_net_id;
  31. static struct kmem_cache *kcm_psockp __read_mostly;
  32. static struct kmem_cache *kcm_muxp __read_mostly;
  33. static struct workqueue_struct *kcm_wq;
  34. static inline struct kcm_sock *kcm_sk(const struct sock *sk)
  35. {
  36. return (struct kcm_sock *)sk;
  37. }
  38. static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
  39. {
  40. return (struct kcm_tx_msg *)skb->cb;
  41. }
  42. static void report_csk_error(struct sock *csk, int err)
  43. {
  44. csk->sk_err = EPIPE;
  45. csk->sk_error_report(csk);
  46. }
  47. static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
  48. bool wakeup_kcm)
  49. {
  50. struct sock *csk = psock->sk;
  51. struct kcm_mux *mux = psock->mux;
  52. /* Unrecoverable error in transmit */
  53. spin_lock_bh(&mux->lock);
  54. if (psock->tx_stopped) {
  55. spin_unlock_bh(&mux->lock);
  56. return;
  57. }
  58. psock->tx_stopped = 1;
  59. KCM_STATS_INCR(psock->stats.tx_aborts);
  60. if (!psock->tx_kcm) {
  61. /* Take off psocks_avail list */
  62. list_del(&psock->psock_avail_list);
  63. } else if (wakeup_kcm) {
  64. /* In this case psock is being aborted while outside of
  65. * write_msgs and psock is reserved. Schedule tx_work
  66. * to handle the failure there. Need to commit tx_stopped
  67. * before queuing work.
  68. */
  69. smp_mb();
  70. queue_work(kcm_wq, &psock->tx_kcm->tx_work);
  71. }
  72. spin_unlock_bh(&mux->lock);
  73. /* Report error on lower socket */
  74. report_csk_error(csk, err);
  75. }
  76. /* RX mux lock held. */
  77. static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
  78. struct kcm_psock *psock)
  79. {
  80. STRP_STATS_ADD(mux->stats.rx_bytes,
  81. psock->strp.stats.rx_bytes -
  82. psock->saved_rx_bytes);
  83. mux->stats.rx_msgs +=
  84. psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
  85. psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
  86. psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
  87. }
  88. static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
  89. struct kcm_psock *psock)
  90. {
  91. KCM_STATS_ADD(mux->stats.tx_bytes,
  92. psock->stats.tx_bytes - psock->saved_tx_bytes);
  93. mux->stats.tx_msgs +=
  94. psock->stats.tx_msgs - psock->saved_tx_msgs;
  95. psock->saved_tx_msgs = psock->stats.tx_msgs;
  96. psock->saved_tx_bytes = psock->stats.tx_bytes;
  97. }
  98. static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
  99. /* KCM is ready to receive messages on its queue-- either the KCM is new or
  100. * has become unblocked after being blocked on full socket buffer. Queue any
  101. * pending ready messages on a psock. RX mux lock held.
  102. */
  103. static void kcm_rcv_ready(struct kcm_sock *kcm)
  104. {
  105. struct kcm_mux *mux = kcm->mux;
  106. struct kcm_psock *psock;
  107. struct sk_buff *skb;
  108. if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled))
  109. return;
  110. while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) {
  111. if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
  112. /* Assuming buffer limit has been reached */
  113. skb_queue_head(&mux->rx_hold_queue, skb);
  114. WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
  115. return;
  116. }
  117. }
  118. while (!list_empty(&mux->psocks_ready)) {
  119. psock = list_first_entry(&mux->psocks_ready, struct kcm_psock,
  120. psock_ready_list);
  121. if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) {
  122. /* Assuming buffer limit has been reached */
  123. WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
  124. return;
  125. }
  126. /* Consumed the ready message on the psock. Schedule rx_work to
  127. * get more messages.
  128. */
  129. list_del(&psock->psock_ready_list);
  130. psock->ready_rx_msg = NULL;
  131. /* Commit clearing of ready_rx_msg for queuing work */
  132. smp_mb();
  133. strp_unpause(&psock->strp);
  134. strp_check_rcv(&psock->strp);
  135. }
  136. /* Buffer limit is okay now, add to ready list */
  137. list_add_tail(&kcm->wait_rx_list,
  138. &kcm->mux->kcm_rx_waiters);
  139. kcm->rx_wait = true;
  140. }
  141. static void kcm_rfree(struct sk_buff *skb)
  142. {
  143. struct sock *sk = skb->sk;
  144. struct kcm_sock *kcm = kcm_sk(sk);
  145. struct kcm_mux *mux = kcm->mux;
  146. unsigned int len = skb->truesize;
  147. sk_mem_uncharge(sk, len);
  148. atomic_sub(len, &sk->sk_rmem_alloc);
  149. /* For reading rx_wait and rx_psock without holding lock */
  150. smp_mb__after_atomic();
  151. if (!kcm->rx_wait && !kcm->rx_psock &&
  152. sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
  153. spin_lock_bh(&mux->rx_lock);
  154. kcm_rcv_ready(kcm);
  155. spin_unlock_bh(&mux->rx_lock);
  156. }
  157. }
  158. static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
  159. {
  160. struct sk_buff_head *list = &sk->sk_receive_queue;
  161. if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
  162. return -ENOMEM;
  163. if (!sk_rmem_schedule(sk, skb, skb->truesize))
  164. return -ENOBUFS;
  165. skb->dev = NULL;
  166. skb_orphan(skb);
  167. skb->sk = sk;
  168. skb->destructor = kcm_rfree;
  169. atomic_add(skb->truesize, &sk->sk_rmem_alloc);
  170. sk_mem_charge(sk, skb->truesize);
  171. skb_queue_tail(list, skb);
  172. if (!sock_flag(sk, SOCK_DEAD))
  173. sk->sk_data_ready(sk);
  174. return 0;
  175. }
  176. /* Requeue received messages for a kcm socket to other kcm sockets. This is
  177. * called with a kcm socket is receive disabled.
  178. * RX mux lock held.
  179. */
  180. static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head)
  181. {
  182. struct sk_buff *skb;
  183. struct kcm_sock *kcm;
  184. while ((skb = __skb_dequeue(head))) {
  185. /* Reset destructor to avoid calling kcm_rcv_ready */
  186. skb->destructor = sock_rfree;
  187. skb_orphan(skb);
  188. try_again:
  189. if (list_empty(&mux->kcm_rx_waiters)) {
  190. skb_queue_tail(&mux->rx_hold_queue, skb);
  191. continue;
  192. }
  193. kcm = list_first_entry(&mux->kcm_rx_waiters,
  194. struct kcm_sock, wait_rx_list);
  195. if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
  196. /* Should mean socket buffer full */
  197. list_del(&kcm->wait_rx_list);
  198. kcm->rx_wait = false;
  199. /* Commit rx_wait to read in kcm_free */
  200. smp_wmb();
  201. goto try_again;
  202. }
  203. }
  204. }
  205. /* Lower sock lock held */
  206. static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
  207. struct sk_buff *head)
  208. {
  209. struct kcm_mux *mux = psock->mux;
  210. struct kcm_sock *kcm;
  211. WARN_ON(psock->ready_rx_msg);
  212. if (psock->rx_kcm)
  213. return psock->rx_kcm;
  214. spin_lock_bh(&mux->rx_lock);
  215. if (psock->rx_kcm) {
  216. spin_unlock_bh(&mux->rx_lock);
  217. return psock->rx_kcm;
  218. }
  219. kcm_update_rx_mux_stats(mux, psock);
  220. if (list_empty(&mux->kcm_rx_waiters)) {
  221. psock->ready_rx_msg = head;
  222. strp_pause(&psock->strp);
  223. list_add_tail(&psock->psock_ready_list,
  224. &mux->psocks_ready);
  225. spin_unlock_bh(&mux->rx_lock);
  226. return NULL;
  227. }
  228. kcm = list_first_entry(&mux->kcm_rx_waiters,
  229. struct kcm_sock, wait_rx_list);
  230. list_del(&kcm->wait_rx_list);
  231. kcm->rx_wait = false;
  232. psock->rx_kcm = kcm;
  233. kcm->rx_psock = psock;
  234. spin_unlock_bh(&mux->rx_lock);
  235. return kcm;
  236. }
  237. static void kcm_done(struct kcm_sock *kcm);
  238. static void kcm_done_work(struct work_struct *w)
  239. {
  240. kcm_done(container_of(w, struct kcm_sock, done_work));
  241. }
  242. /* Lower sock held */
  243. static void unreserve_rx_kcm(struct kcm_psock *psock,
  244. bool rcv_ready)
  245. {
  246. struct kcm_sock *kcm = psock->rx_kcm;
  247. struct kcm_mux *mux = psock->mux;
  248. if (!kcm)
  249. return;
  250. spin_lock_bh(&mux->rx_lock);
  251. psock->rx_kcm = NULL;
  252. kcm->rx_psock = NULL;
  253. /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
  254. * kcm_rfree
  255. */
  256. smp_mb();
  257. if (unlikely(kcm->done)) {
  258. spin_unlock_bh(&mux->rx_lock);
  259. /* Need to run kcm_done in a task since we need to qcquire
  260. * callback locks which may already be held here.
  261. */
  262. INIT_WORK(&kcm->done_work, kcm_done_work);
  263. schedule_work(&kcm->done_work);
  264. return;
  265. }
  266. if (unlikely(kcm->rx_disabled)) {
  267. requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
  268. } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) {
  269. /* Check for degenerative race with rx_wait that all
  270. * data was dequeued (accounted for in kcm_rfree).
  271. */
  272. kcm_rcv_ready(kcm);
  273. }
  274. spin_unlock_bh(&mux->rx_lock);
  275. }
  276. /* Lower sock lock held */
  277. static void psock_data_ready(struct sock *sk)
  278. {
  279. struct kcm_psock *psock;
  280. read_lock_bh(&sk->sk_callback_lock);
  281. psock = (struct kcm_psock *)sk->sk_user_data;
  282. if (likely(psock))
  283. strp_data_ready(&psock->strp);
  284. read_unlock_bh(&sk->sk_callback_lock);
  285. }
  286. /* Called with lower sock held */
  287. static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb)
  288. {
  289. struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
  290. struct kcm_sock *kcm;
  291. try_queue:
  292. kcm = reserve_rx_kcm(psock, skb);
  293. if (!kcm) {
  294. /* Unable to reserve a KCM, message is held in psock and strp
  295. * is paused.
  296. */
  297. return;
  298. }
  299. if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
  300. /* Should mean socket buffer full */
  301. unreserve_rx_kcm(psock, false);
  302. goto try_queue;
  303. }
  304. }
  305. static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
  306. {
  307. struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
  308. struct bpf_prog *prog = psock->bpf_prog;
  309. return (*prog->bpf_func)(skb, prog->insnsi);
  310. }
  311. static int kcm_read_sock_done(struct strparser *strp, int err)
  312. {
  313. struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
  314. unreserve_rx_kcm(psock, true);
  315. return err;
  316. }
  317. static void psock_state_change(struct sock *sk)
  318. {
  319. /* TCP only does a POLLIN for a half close. Do a POLLHUP here
  320. * since application will normally not poll with POLLIN
  321. * on the TCP sockets.
  322. */
  323. report_csk_error(sk, EPIPE);
  324. }
  325. static void psock_write_space(struct sock *sk)
  326. {
  327. struct kcm_psock *psock;
  328. struct kcm_mux *mux;
  329. struct kcm_sock *kcm;
  330. read_lock_bh(&sk->sk_callback_lock);
  331. psock = (struct kcm_psock *)sk->sk_user_data;
  332. if (unlikely(!psock))
  333. goto out;
  334. mux = psock->mux;
  335. spin_lock_bh(&mux->lock);
  336. /* Check if the socket is reserved so someone is waiting for sending. */
  337. kcm = psock->tx_kcm;
  338. if (kcm && !unlikely(kcm->tx_stopped))
  339. queue_work(kcm_wq, &kcm->tx_work);
  340. spin_unlock_bh(&mux->lock);
  341. out:
  342. read_unlock_bh(&sk->sk_callback_lock);
  343. }
  344. static void unreserve_psock(struct kcm_sock *kcm);
  345. /* kcm sock is locked. */
  346. static struct kcm_psock *reserve_psock(struct kcm_sock *kcm)
  347. {
  348. struct kcm_mux *mux = kcm->mux;
  349. struct kcm_psock *psock;
  350. psock = kcm->tx_psock;
  351. smp_rmb(); /* Must read tx_psock before tx_wait */
  352. if (psock) {
  353. WARN_ON(kcm->tx_wait);
  354. if (unlikely(psock->tx_stopped))
  355. unreserve_psock(kcm);
  356. else
  357. return kcm->tx_psock;
  358. }
  359. spin_lock_bh(&mux->lock);
  360. /* Check again under lock to see if psock was reserved for this
  361. * psock via psock_unreserve.
  362. */
  363. psock = kcm->tx_psock;
  364. if (unlikely(psock)) {
  365. WARN_ON(kcm->tx_wait);
  366. spin_unlock_bh(&mux->lock);
  367. return kcm->tx_psock;
  368. }
  369. if (!list_empty(&mux->psocks_avail)) {
  370. psock = list_first_entry(&mux->psocks_avail,
  371. struct kcm_psock,
  372. psock_avail_list);
  373. list_del(&psock->psock_avail_list);
  374. if (kcm->tx_wait) {
  375. list_del(&kcm->wait_psock_list);
  376. kcm->tx_wait = false;
  377. }
  378. kcm->tx_psock = psock;
  379. psock->tx_kcm = kcm;
  380. KCM_STATS_INCR(psock->stats.reserved);
  381. } else if (!kcm->tx_wait) {
  382. list_add_tail(&kcm->wait_psock_list,
  383. &mux->kcm_tx_waiters);
  384. kcm->tx_wait = true;
  385. }
  386. spin_unlock_bh(&mux->lock);
  387. return psock;
  388. }
  389. /* mux lock held */
  390. static void psock_now_avail(struct kcm_psock *psock)
  391. {
  392. struct kcm_mux *mux = psock->mux;
  393. struct kcm_sock *kcm;
  394. if (list_empty(&mux->kcm_tx_waiters)) {
  395. list_add_tail(&psock->psock_avail_list,
  396. &mux->psocks_avail);
  397. } else {
  398. kcm = list_first_entry(&mux->kcm_tx_waiters,
  399. struct kcm_sock,
  400. wait_psock_list);
  401. list_del(&kcm->wait_psock_list);
  402. kcm->tx_wait = false;
  403. psock->tx_kcm = kcm;
  404. /* Commit before changing tx_psock since that is read in
  405. * reserve_psock before queuing work.
  406. */
  407. smp_mb();
  408. kcm->tx_psock = psock;
  409. KCM_STATS_INCR(psock->stats.reserved);
  410. queue_work(kcm_wq, &kcm->tx_work);
  411. }
  412. }
  413. /* kcm sock is locked. */
  414. static void unreserve_psock(struct kcm_sock *kcm)
  415. {
  416. struct kcm_psock *psock;
  417. struct kcm_mux *mux = kcm->mux;
  418. spin_lock_bh(&mux->lock);
  419. psock = kcm->tx_psock;
  420. if (WARN_ON(!psock)) {
  421. spin_unlock_bh(&mux->lock);
  422. return;
  423. }
  424. smp_rmb(); /* Read tx_psock before tx_wait */
  425. kcm_update_tx_mux_stats(mux, psock);
  426. WARN_ON(kcm->tx_wait);
  427. kcm->tx_psock = NULL;
  428. psock->tx_kcm = NULL;
  429. KCM_STATS_INCR(psock->stats.unreserved);
  430. if (unlikely(psock->tx_stopped)) {
  431. if (psock->done) {
  432. /* Deferred free */
  433. list_del(&psock->psock_list);
  434. mux->psocks_cnt--;
  435. sock_put(psock->sk);
  436. fput(psock->sk->sk_socket->file);
  437. kmem_cache_free(kcm_psockp, psock);
  438. }
  439. /* Don't put back on available list */
  440. spin_unlock_bh(&mux->lock);
  441. return;
  442. }
  443. psock_now_avail(psock);
  444. spin_unlock_bh(&mux->lock);
  445. }
  446. static void kcm_report_tx_retry(struct kcm_sock *kcm)
  447. {
  448. struct kcm_mux *mux = kcm->mux;
  449. spin_lock_bh(&mux->lock);
  450. KCM_STATS_INCR(mux->stats.tx_retries);
  451. spin_unlock_bh(&mux->lock);
  452. }
  453. /* Write any messages ready on the kcm socket. Called with kcm sock lock
  454. * held. Return bytes actually sent or error.
  455. */
  456. static int kcm_write_msgs(struct kcm_sock *kcm)
  457. {
  458. struct sock *sk = &kcm->sk;
  459. struct kcm_psock *psock;
  460. struct sk_buff *skb, *head;
  461. struct kcm_tx_msg *txm;
  462. unsigned short fragidx, frag_offset;
  463. unsigned int sent, total_sent = 0;
  464. int ret = 0;
  465. kcm->tx_wait_more = false;
  466. psock = kcm->tx_psock;
  467. if (unlikely(psock && psock->tx_stopped)) {
  468. /* A reserved psock was aborted asynchronously. Unreserve
  469. * it and we'll retry the message.
  470. */
  471. unreserve_psock(kcm);
  472. kcm_report_tx_retry(kcm);
  473. if (skb_queue_empty(&sk->sk_write_queue))
  474. return 0;
  475. kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0;
  476. } else if (skb_queue_empty(&sk->sk_write_queue)) {
  477. return 0;
  478. }
  479. head = skb_peek(&sk->sk_write_queue);
  480. txm = kcm_tx_msg(head);
  481. if (txm->sent) {
  482. /* Send of first skbuff in queue already in progress */
  483. if (WARN_ON(!psock)) {
  484. ret = -EINVAL;
  485. goto out;
  486. }
  487. sent = txm->sent;
  488. frag_offset = txm->frag_offset;
  489. fragidx = txm->fragidx;
  490. skb = txm->frag_skb;
  491. goto do_frag;
  492. }
  493. try_again:
  494. psock = reserve_psock(kcm);
  495. if (!psock)
  496. goto out;
  497. do {
  498. skb = head;
  499. txm = kcm_tx_msg(head);
  500. sent = 0;
  501. do_frag_list:
  502. if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
  503. ret = -EINVAL;
  504. goto out;
  505. }
  506. for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags;
  507. fragidx++) {
  508. skb_frag_t *frag;
  509. frag_offset = 0;
  510. do_frag:
  511. frag = &skb_shinfo(skb)->frags[fragidx];
  512. if (WARN_ON(!frag->size)) {
  513. ret = -EINVAL;
  514. goto out;
  515. }
  516. ret = kernel_sendpage(psock->sk->sk_socket,
  517. frag->page.p,
  518. frag->page_offset + frag_offset,
  519. frag->size - frag_offset,
  520. MSG_DONTWAIT);
  521. if (ret <= 0) {
  522. if (ret == -EAGAIN) {
  523. /* Save state to try again when there's
  524. * write space on the socket
  525. */
  526. txm->sent = sent;
  527. txm->frag_offset = frag_offset;
  528. txm->fragidx = fragidx;
  529. txm->frag_skb = skb;
  530. ret = 0;
  531. goto out;
  532. }
  533. /* Hard failure in sending message, abort this
  534. * psock since it has lost framing
  535. * synchonization and retry sending the
  536. * message from the beginning.
  537. */
  538. kcm_abort_tx_psock(psock, ret ? -ret : EPIPE,
  539. true);
  540. unreserve_psock(kcm);
  541. txm->sent = 0;
  542. kcm_report_tx_retry(kcm);
  543. ret = 0;
  544. goto try_again;
  545. }
  546. sent += ret;
  547. frag_offset += ret;
  548. KCM_STATS_ADD(psock->stats.tx_bytes, ret);
  549. if (frag_offset < frag->size) {
  550. /* Not finished with this frag */
  551. goto do_frag;
  552. }
  553. }
  554. if (skb == head) {
  555. if (skb_has_frag_list(skb)) {
  556. skb = skb_shinfo(skb)->frag_list;
  557. goto do_frag_list;
  558. }
  559. } else if (skb->next) {
  560. skb = skb->next;
  561. goto do_frag_list;
  562. }
  563. /* Successfully sent the whole packet, account for it. */
  564. skb_dequeue(&sk->sk_write_queue);
  565. kfree_skb(head);
  566. sk->sk_wmem_queued -= sent;
  567. total_sent += sent;
  568. KCM_STATS_INCR(psock->stats.tx_msgs);
  569. } while ((head = skb_peek(&sk->sk_write_queue)));
  570. out:
  571. if (!head) {
  572. /* Done with all queued messages. */
  573. WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
  574. unreserve_psock(kcm);
  575. }
  576. /* Check if write space is available */
  577. sk->sk_write_space(sk);
  578. return total_sent ? : ret;
  579. }
  580. static void kcm_tx_work(struct work_struct *w)
  581. {
  582. struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work);
  583. struct sock *sk = &kcm->sk;
  584. int err;
  585. lock_sock(sk);
  586. /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx
  587. * aborts
  588. */
  589. err = kcm_write_msgs(kcm);
  590. if (err < 0) {
  591. /* Hard failure in write, report error on KCM socket */
  592. pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err);
  593. report_csk_error(&kcm->sk, -err);
  594. goto out;
  595. }
  596. /* Primarily for SOCK_SEQPACKET sockets */
  597. if (likely(sk->sk_socket) &&
  598. test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
  599. clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  600. sk->sk_write_space(sk);
  601. }
  602. out:
  603. release_sock(sk);
  604. }
  605. static void kcm_push(struct kcm_sock *kcm)
  606. {
  607. if (kcm->tx_wait_more)
  608. kcm_write_msgs(kcm);
  609. }
  610. static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
  611. int offset, size_t size, int flags)
  612. {
  613. struct sock *sk = sock->sk;
  614. struct kcm_sock *kcm = kcm_sk(sk);
  615. struct sk_buff *skb = NULL, *head = NULL;
  616. long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
  617. bool eor;
  618. int err = 0;
  619. int i;
  620. if (flags & MSG_SENDPAGE_NOTLAST)
  621. flags |= MSG_MORE;
  622. /* No MSG_EOR from splice, only look at MSG_MORE */
  623. eor = !(flags & MSG_MORE);
  624. lock_sock(sk);
  625. sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
  626. err = -EPIPE;
  627. if (sk->sk_err)
  628. goto out_error;
  629. if (kcm->seq_skb) {
  630. /* Previously opened message */
  631. head = kcm->seq_skb;
  632. skb = kcm_tx_msg(head)->last_skb;
  633. i = skb_shinfo(skb)->nr_frags;
  634. if (skb_can_coalesce(skb, i, page, offset)) {
  635. skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
  636. skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
  637. goto coalesced;
  638. }
  639. if (i >= MAX_SKB_FRAGS) {
  640. struct sk_buff *tskb;
  641. tskb = alloc_skb(0, sk->sk_allocation);
  642. while (!tskb) {
  643. kcm_push(kcm);
  644. err = sk_stream_wait_memory(sk, &timeo);
  645. if (err)
  646. goto out_error;
  647. }
  648. if (head == skb)
  649. skb_shinfo(head)->frag_list = tskb;
  650. else
  651. skb->next = tskb;
  652. skb = tskb;
  653. skb->ip_summed = CHECKSUM_UNNECESSARY;
  654. i = 0;
  655. }
  656. } else {
  657. /* Call the sk_stream functions to manage the sndbuf mem. */
  658. if (!sk_stream_memory_free(sk)) {
  659. kcm_push(kcm);
  660. set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  661. err = sk_stream_wait_memory(sk, &timeo);
  662. if (err)
  663. goto out_error;
  664. }
  665. head = alloc_skb(0, sk->sk_allocation);
  666. while (!head) {
  667. kcm_push(kcm);
  668. err = sk_stream_wait_memory(sk, &timeo);
  669. if (err)
  670. goto out_error;
  671. }
  672. skb = head;
  673. i = 0;
  674. }
  675. get_page(page);
  676. skb_fill_page_desc(skb, i, page, offset, size);
  677. skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
  678. coalesced:
  679. skb->len += size;
  680. skb->data_len += size;
  681. skb->truesize += size;
  682. sk->sk_wmem_queued += size;
  683. sk_mem_charge(sk, size);
  684. if (head != skb) {
  685. head->len += size;
  686. head->data_len += size;
  687. head->truesize += size;
  688. }
  689. if (eor) {
  690. bool not_busy = skb_queue_empty(&sk->sk_write_queue);
  691. /* Message complete, queue it on send buffer */
  692. __skb_queue_tail(&sk->sk_write_queue, head);
  693. kcm->seq_skb = NULL;
  694. KCM_STATS_INCR(kcm->stats.tx_msgs);
  695. if (flags & MSG_BATCH) {
  696. kcm->tx_wait_more = true;
  697. } else if (kcm->tx_wait_more || not_busy) {
  698. err = kcm_write_msgs(kcm);
  699. if (err < 0) {
  700. /* We got a hard error in write_msgs but have
  701. * already queued this message. Report an error
  702. * in the socket, but don't affect return value
  703. * from sendmsg
  704. */
  705. pr_warn("KCM: Hard failure on kcm_write_msgs\n");
  706. report_csk_error(&kcm->sk, -err);
  707. }
  708. }
  709. } else {
  710. /* Message not complete, save state */
  711. kcm->seq_skb = head;
  712. kcm_tx_msg(head)->last_skb = skb;
  713. }
  714. KCM_STATS_ADD(kcm->stats.tx_bytes, size);
  715. release_sock(sk);
  716. return size;
  717. out_error:
  718. kcm_push(kcm);
  719. err = sk_stream_error(sk, flags, err);
  720. /* make sure we wake any epoll edge trigger waiter */
  721. if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
  722. sk->sk_write_space(sk);
  723. release_sock(sk);
  724. return err;
  725. }
  726. static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
  727. {
  728. struct sock *sk = sock->sk;
  729. struct kcm_sock *kcm = kcm_sk(sk);
  730. struct sk_buff *skb = NULL, *head = NULL;
  731. size_t copy, copied = 0;
  732. long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
  733. int eor = (sock->type == SOCK_DGRAM) ?
  734. !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR);
  735. int err = -EPIPE;
  736. lock_sock(sk);
  737. /* Per tcp_sendmsg this should be in poll */
  738. sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
  739. if (sk->sk_err)
  740. goto out_error;
  741. if (kcm->seq_skb) {
  742. /* Previously opened message */
  743. head = kcm->seq_skb;
  744. skb = kcm_tx_msg(head)->last_skb;
  745. goto start;
  746. }
  747. /* Call the sk_stream functions to manage the sndbuf mem. */
  748. if (!sk_stream_memory_free(sk)) {
  749. kcm_push(kcm);
  750. set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  751. err = sk_stream_wait_memory(sk, &timeo);
  752. if (err)
  753. goto out_error;
  754. }
  755. /* New message, alloc head skb */
  756. head = alloc_skb(0, sk->sk_allocation);
  757. while (!head) {
  758. kcm_push(kcm);
  759. err = sk_stream_wait_memory(sk, &timeo);
  760. if (err)
  761. goto out_error;
  762. head = alloc_skb(0, sk->sk_allocation);
  763. }
  764. skb = head;
  765. /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
  766. * csum_and_copy_from_iter from skb_do_copy_data_nocache.
  767. */
  768. skb->ip_summed = CHECKSUM_UNNECESSARY;
  769. start:
  770. while (msg_data_left(msg)) {
  771. bool merge = true;
  772. int i = skb_shinfo(skb)->nr_frags;
  773. struct page_frag *pfrag = sk_page_frag(sk);
  774. if (!sk_page_frag_refill(sk, pfrag))
  775. goto wait_for_memory;
  776. if (!skb_can_coalesce(skb, i, pfrag->page,
  777. pfrag->offset)) {
  778. if (i == MAX_SKB_FRAGS) {
  779. struct sk_buff *tskb;
  780. tskb = alloc_skb(0, sk->sk_allocation);
  781. if (!tskb)
  782. goto wait_for_memory;
  783. if (head == skb)
  784. skb_shinfo(head)->frag_list = tskb;
  785. else
  786. skb->next = tskb;
  787. skb = tskb;
  788. skb->ip_summed = CHECKSUM_UNNECESSARY;
  789. continue;
  790. }
  791. merge = false;
  792. }
  793. copy = min_t(int, msg_data_left(msg),
  794. pfrag->size - pfrag->offset);
  795. if (!sk_wmem_schedule(sk, copy))
  796. goto wait_for_memory;
  797. err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
  798. pfrag->page,
  799. pfrag->offset,
  800. copy);
  801. if (err)
  802. goto out_error;
  803. /* Update the skb. */
  804. if (merge) {
  805. skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
  806. } else {
  807. skb_fill_page_desc(skb, i, pfrag->page,
  808. pfrag->offset, copy);
  809. get_page(pfrag->page);
  810. }
  811. pfrag->offset += copy;
  812. copied += copy;
  813. if (head != skb) {
  814. head->len += copy;
  815. head->data_len += copy;
  816. }
  817. continue;
  818. wait_for_memory:
  819. kcm_push(kcm);
  820. err = sk_stream_wait_memory(sk, &timeo);
  821. if (err)
  822. goto out_error;
  823. }
  824. if (eor) {
  825. bool not_busy = skb_queue_empty(&sk->sk_write_queue);
  826. /* Message complete, queue it on send buffer */
  827. __skb_queue_tail(&sk->sk_write_queue, head);
  828. kcm->seq_skb = NULL;
  829. KCM_STATS_INCR(kcm->stats.tx_msgs);
  830. if (msg->msg_flags & MSG_BATCH) {
  831. kcm->tx_wait_more = true;
  832. } else if (kcm->tx_wait_more || not_busy) {
  833. err = kcm_write_msgs(kcm);
  834. if (err < 0) {
  835. /* We got a hard error in write_msgs but have
  836. * already queued this message. Report an error
  837. * in the socket, but don't affect return value
  838. * from sendmsg
  839. */
  840. pr_warn("KCM: Hard failure on kcm_write_msgs\n");
  841. report_csk_error(&kcm->sk, -err);
  842. }
  843. }
  844. } else {
  845. /* Message not complete, save state */
  846. partial_message:
  847. kcm->seq_skb = head;
  848. kcm_tx_msg(head)->last_skb = skb;
  849. }
  850. KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
  851. release_sock(sk);
  852. return copied;
  853. out_error:
  854. kcm_push(kcm);
  855. if (copied && sock->type == SOCK_SEQPACKET) {
  856. /* Wrote some bytes before encountering an
  857. * error, return partial success.
  858. */
  859. goto partial_message;
  860. }
  861. if (head != kcm->seq_skb)
  862. kfree_skb(head);
  863. err = sk_stream_error(sk, msg->msg_flags, err);
  864. /* make sure we wake any epoll edge trigger waiter */
  865. if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
  866. sk->sk_write_space(sk);
  867. release_sock(sk);
  868. return err;
  869. }
  870. static struct sk_buff *kcm_wait_data(struct sock *sk, int flags,
  871. long timeo, int *err)
  872. {
  873. struct sk_buff *skb;
  874. while (!(skb = skb_peek(&sk->sk_receive_queue))) {
  875. if (sk->sk_err) {
  876. *err = sock_error(sk);
  877. return NULL;
  878. }
  879. if (sock_flag(sk, SOCK_DONE))
  880. return NULL;
  881. if ((flags & MSG_DONTWAIT) || !timeo) {
  882. *err = -EAGAIN;
  883. return NULL;
  884. }
  885. sk_wait_data(sk, &timeo, NULL);
  886. /* Handle signals */
  887. if (signal_pending(current)) {
  888. *err = sock_intr_errno(timeo);
  889. return NULL;
  890. }
  891. }
  892. return skb;
  893. }
  894. static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
  895. size_t len, int flags)
  896. {
  897. struct sock *sk = sock->sk;
  898. struct kcm_sock *kcm = kcm_sk(sk);
  899. int err = 0;
  900. long timeo;
  901. struct strp_rx_msg *rxm;
  902. int copied = 0;
  903. struct sk_buff *skb;
  904. timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
  905. lock_sock(sk);
  906. skb = kcm_wait_data(sk, flags, timeo, &err);
  907. if (!skb)
  908. goto out;
  909. /* Okay, have a message on the receive queue */
  910. rxm = strp_rx_msg(skb);
  911. if (len > rxm->full_len)
  912. len = rxm->full_len;
  913. err = skb_copy_datagram_msg(skb, rxm->offset, msg, len);
  914. if (err < 0)
  915. goto out;
  916. copied = len;
  917. if (likely(!(flags & MSG_PEEK))) {
  918. KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
  919. if (copied < rxm->full_len) {
  920. if (sock->type == SOCK_DGRAM) {
  921. /* Truncated message */
  922. msg->msg_flags |= MSG_TRUNC;
  923. goto msg_finished;
  924. }
  925. rxm->offset += copied;
  926. rxm->full_len -= copied;
  927. } else {
  928. msg_finished:
  929. /* Finished with message */
  930. msg->msg_flags |= MSG_EOR;
  931. KCM_STATS_INCR(kcm->stats.rx_msgs);
  932. skb_unlink(skb, &sk->sk_receive_queue);
  933. kfree_skb(skb);
  934. }
  935. }
  936. out:
  937. release_sock(sk);
  938. return copied ? : err;
  939. }
  940. static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
  941. struct pipe_inode_info *pipe, size_t len,
  942. unsigned int flags)
  943. {
  944. struct sock *sk = sock->sk;
  945. struct kcm_sock *kcm = kcm_sk(sk);
  946. long timeo;
  947. struct strp_rx_msg *rxm;
  948. int err = 0;
  949. ssize_t copied;
  950. struct sk_buff *skb;
  951. /* Only support splice for SOCKSEQPACKET */
  952. timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
  953. lock_sock(sk);
  954. skb = kcm_wait_data(sk, flags, timeo, &err);
  955. if (!skb)
  956. goto err_out;
  957. /* Okay, have a message on the receive queue */
  958. rxm = strp_rx_msg(skb);
  959. if (len > rxm->full_len)
  960. len = rxm->full_len;
  961. copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
  962. if (copied < 0) {
  963. err = copied;
  964. goto err_out;
  965. }
  966. KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
  967. rxm->offset += copied;
  968. rxm->full_len -= copied;
  969. /* We have no way to return MSG_EOR. If all the bytes have been
  970. * read we still leave the message in the receive socket buffer.
  971. * A subsequent recvmsg needs to be done to return MSG_EOR and
  972. * finish reading the message.
  973. */
  974. release_sock(sk);
  975. return copied;
  976. err_out:
  977. release_sock(sk);
  978. return err;
  979. }
  980. /* kcm sock lock held */
  981. static void kcm_recv_disable(struct kcm_sock *kcm)
  982. {
  983. struct kcm_mux *mux = kcm->mux;
  984. if (kcm->rx_disabled)
  985. return;
  986. spin_lock_bh(&mux->rx_lock);
  987. kcm->rx_disabled = 1;
  988. /* If a psock is reserved we'll do cleanup in unreserve */
  989. if (!kcm->rx_psock) {
  990. if (kcm->rx_wait) {
  991. list_del(&kcm->wait_rx_list);
  992. kcm->rx_wait = false;
  993. }
  994. requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
  995. }
  996. spin_unlock_bh(&mux->rx_lock);
  997. }
  998. /* kcm sock lock held */
  999. static void kcm_recv_enable(struct kcm_sock *kcm)
  1000. {
  1001. struct kcm_mux *mux = kcm->mux;
  1002. if (!kcm->rx_disabled)
  1003. return;
  1004. spin_lock_bh(&mux->rx_lock);
  1005. kcm->rx_disabled = 0;
  1006. kcm_rcv_ready(kcm);
  1007. spin_unlock_bh(&mux->rx_lock);
  1008. }
  1009. static int kcm_setsockopt(struct socket *sock, int level, int optname,
  1010. char __user *optval, unsigned int optlen)
  1011. {
  1012. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1013. int val, valbool;
  1014. int err = 0;
  1015. if (level != SOL_KCM)
  1016. return -ENOPROTOOPT;
  1017. if (optlen < sizeof(int))
  1018. return -EINVAL;
  1019. if (get_user(val, (int __user *)optval))
  1020. return -EINVAL;
  1021. valbool = val ? 1 : 0;
  1022. switch (optname) {
  1023. case KCM_RECV_DISABLE:
  1024. lock_sock(&kcm->sk);
  1025. if (valbool)
  1026. kcm_recv_disable(kcm);
  1027. else
  1028. kcm_recv_enable(kcm);
  1029. release_sock(&kcm->sk);
  1030. break;
  1031. default:
  1032. err = -ENOPROTOOPT;
  1033. }
  1034. return err;
  1035. }
  1036. static int kcm_getsockopt(struct socket *sock, int level, int optname,
  1037. char __user *optval, int __user *optlen)
  1038. {
  1039. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1040. int val, len;
  1041. if (level != SOL_KCM)
  1042. return -ENOPROTOOPT;
  1043. if (get_user(len, optlen))
  1044. return -EFAULT;
  1045. len = min_t(unsigned int, len, sizeof(int));
  1046. if (len < 0)
  1047. return -EINVAL;
  1048. switch (optname) {
  1049. case KCM_RECV_DISABLE:
  1050. val = kcm->rx_disabled;
  1051. break;
  1052. default:
  1053. return -ENOPROTOOPT;
  1054. }
  1055. if (put_user(len, optlen))
  1056. return -EFAULT;
  1057. if (copy_to_user(optval, &val, len))
  1058. return -EFAULT;
  1059. return 0;
  1060. }
  1061. static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
  1062. {
  1063. struct kcm_sock *tkcm;
  1064. struct list_head *head;
  1065. int index = 0;
  1066. /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
  1067. * we set sk_state, otherwise epoll_wait always returns right away with
  1068. * POLLHUP
  1069. */
  1070. kcm->sk.sk_state = TCP_ESTABLISHED;
  1071. /* Add to mux's kcm sockets list */
  1072. kcm->mux = mux;
  1073. spin_lock_bh(&mux->lock);
  1074. head = &mux->kcm_socks;
  1075. list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) {
  1076. if (tkcm->index != index)
  1077. break;
  1078. head = &tkcm->kcm_sock_list;
  1079. index++;
  1080. }
  1081. list_add(&kcm->kcm_sock_list, head);
  1082. kcm->index = index;
  1083. mux->kcm_socks_cnt++;
  1084. spin_unlock_bh(&mux->lock);
  1085. INIT_WORK(&kcm->tx_work, kcm_tx_work);
  1086. spin_lock_bh(&mux->rx_lock);
  1087. kcm_rcv_ready(kcm);
  1088. spin_unlock_bh(&mux->rx_lock);
  1089. }
  1090. static int kcm_attach(struct socket *sock, struct socket *csock,
  1091. struct bpf_prog *prog)
  1092. {
  1093. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1094. struct kcm_mux *mux = kcm->mux;
  1095. struct sock *csk;
  1096. struct kcm_psock *psock = NULL, *tpsock;
  1097. struct list_head *head;
  1098. int index = 0;
  1099. struct strp_callbacks cb;
  1100. int err;
  1101. csk = csock->sk;
  1102. if (!csk)
  1103. return -EINVAL;
  1104. psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
  1105. if (!psock)
  1106. return -ENOMEM;
  1107. psock->mux = mux;
  1108. psock->sk = csk;
  1109. psock->bpf_prog = prog;
  1110. cb.rcv_msg = kcm_rcv_strparser;
  1111. cb.abort_parser = NULL;
  1112. cb.parse_msg = kcm_parse_func_strparser;
  1113. cb.read_sock_done = kcm_read_sock_done;
  1114. err = strp_init(&psock->strp, csk, &cb);
  1115. if (err) {
  1116. kmem_cache_free(kcm_psockp, psock);
  1117. return err;
  1118. }
  1119. sock_hold(csk);
  1120. write_lock_bh(&csk->sk_callback_lock);
  1121. psock->save_data_ready = csk->sk_data_ready;
  1122. psock->save_write_space = csk->sk_write_space;
  1123. psock->save_state_change = csk->sk_state_change;
  1124. csk->sk_user_data = psock;
  1125. csk->sk_data_ready = psock_data_ready;
  1126. csk->sk_write_space = psock_write_space;
  1127. csk->sk_state_change = psock_state_change;
  1128. write_unlock_bh(&csk->sk_callback_lock);
  1129. /* Finished initialization, now add the psock to the MUX. */
  1130. spin_lock_bh(&mux->lock);
  1131. head = &mux->psocks;
  1132. list_for_each_entry(tpsock, &mux->psocks, psock_list) {
  1133. if (tpsock->index != index)
  1134. break;
  1135. head = &tpsock->psock_list;
  1136. index++;
  1137. }
  1138. list_add(&psock->psock_list, head);
  1139. psock->index = index;
  1140. KCM_STATS_INCR(mux->stats.psock_attach);
  1141. mux->psocks_cnt++;
  1142. psock_now_avail(psock);
  1143. spin_unlock_bh(&mux->lock);
  1144. /* Schedule RX work in case there are already bytes queued */
  1145. strp_check_rcv(&psock->strp);
  1146. return 0;
  1147. }
  1148. static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
  1149. {
  1150. struct socket *csock;
  1151. struct bpf_prog *prog;
  1152. int err;
  1153. csock = sockfd_lookup(info->fd, &err);
  1154. if (!csock)
  1155. return -ENOENT;
  1156. prog = bpf_prog_get_type(info->bpf_fd, BPF_PROG_TYPE_SOCKET_FILTER);
  1157. if (IS_ERR(prog)) {
  1158. err = PTR_ERR(prog);
  1159. goto out;
  1160. }
  1161. err = kcm_attach(sock, csock, prog);
  1162. if (err) {
  1163. bpf_prog_put(prog);
  1164. goto out;
  1165. }
  1166. /* Keep reference on file also */
  1167. return 0;
  1168. out:
  1169. fput(csock->file);
  1170. return err;
  1171. }
  1172. static void kcm_unattach(struct kcm_psock *psock)
  1173. {
  1174. struct sock *csk = psock->sk;
  1175. struct kcm_mux *mux = psock->mux;
  1176. lock_sock(csk);
  1177. /* Stop getting callbacks from TCP socket. After this there should
  1178. * be no way to reserve a kcm for this psock.
  1179. */
  1180. write_lock_bh(&csk->sk_callback_lock);
  1181. csk->sk_user_data = NULL;
  1182. csk->sk_data_ready = psock->save_data_ready;
  1183. csk->sk_write_space = psock->save_write_space;
  1184. csk->sk_state_change = psock->save_state_change;
  1185. strp_stop(&psock->strp);
  1186. if (WARN_ON(psock->rx_kcm)) {
  1187. write_unlock_bh(&csk->sk_callback_lock);
  1188. return;
  1189. }
  1190. spin_lock_bh(&mux->rx_lock);
  1191. /* Stop receiver activities. After this point psock should not be
  1192. * able to get onto ready list either through callbacks or work.
  1193. */
  1194. if (psock->ready_rx_msg) {
  1195. list_del(&psock->psock_ready_list);
  1196. kfree_skb(psock->ready_rx_msg);
  1197. psock->ready_rx_msg = NULL;
  1198. KCM_STATS_INCR(mux->stats.rx_ready_drops);
  1199. }
  1200. spin_unlock_bh(&mux->rx_lock);
  1201. write_unlock_bh(&csk->sk_callback_lock);
  1202. /* Call strp_done without sock lock */
  1203. release_sock(csk);
  1204. strp_done(&psock->strp);
  1205. lock_sock(csk);
  1206. bpf_prog_put(psock->bpf_prog);
  1207. spin_lock_bh(&mux->lock);
  1208. aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
  1209. save_strp_stats(&psock->strp, &mux->aggregate_strp_stats);
  1210. KCM_STATS_INCR(mux->stats.psock_unattach);
  1211. if (psock->tx_kcm) {
  1212. /* psock was reserved. Just mark it finished and we will clean
  1213. * up in the kcm paths, we need kcm lock which can not be
  1214. * acquired here.
  1215. */
  1216. KCM_STATS_INCR(mux->stats.psock_unattach_rsvd);
  1217. spin_unlock_bh(&mux->lock);
  1218. /* We are unattaching a socket that is reserved. Abort the
  1219. * socket since we may be out of sync in sending on it. We need
  1220. * to do this without the mux lock.
  1221. */
  1222. kcm_abort_tx_psock(psock, EPIPE, false);
  1223. spin_lock_bh(&mux->lock);
  1224. if (!psock->tx_kcm) {
  1225. /* psock now unreserved in window mux was unlocked */
  1226. goto no_reserved;
  1227. }
  1228. psock->done = 1;
  1229. /* Commit done before queuing work to process it */
  1230. smp_mb();
  1231. /* Queue tx work to make sure psock->done is handled */
  1232. queue_work(kcm_wq, &psock->tx_kcm->tx_work);
  1233. spin_unlock_bh(&mux->lock);
  1234. } else {
  1235. no_reserved:
  1236. if (!psock->tx_stopped)
  1237. list_del(&psock->psock_avail_list);
  1238. list_del(&psock->psock_list);
  1239. mux->psocks_cnt--;
  1240. spin_unlock_bh(&mux->lock);
  1241. sock_put(csk);
  1242. fput(csk->sk_socket->file);
  1243. kmem_cache_free(kcm_psockp, psock);
  1244. }
  1245. release_sock(csk);
  1246. }
  1247. static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
  1248. {
  1249. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1250. struct kcm_mux *mux = kcm->mux;
  1251. struct kcm_psock *psock;
  1252. struct socket *csock;
  1253. struct sock *csk;
  1254. int err;
  1255. csock = sockfd_lookup(info->fd, &err);
  1256. if (!csock)
  1257. return -ENOENT;
  1258. csk = csock->sk;
  1259. if (!csk) {
  1260. err = -EINVAL;
  1261. goto out;
  1262. }
  1263. err = -ENOENT;
  1264. spin_lock_bh(&mux->lock);
  1265. list_for_each_entry(psock, &mux->psocks, psock_list) {
  1266. if (psock->sk != csk)
  1267. continue;
  1268. /* Found the matching psock */
  1269. if (psock->unattaching || WARN_ON(psock->done)) {
  1270. err = -EALREADY;
  1271. break;
  1272. }
  1273. psock->unattaching = 1;
  1274. spin_unlock_bh(&mux->lock);
  1275. /* Lower socket lock should already be held */
  1276. kcm_unattach(psock);
  1277. err = 0;
  1278. goto out;
  1279. }
  1280. spin_unlock_bh(&mux->lock);
  1281. out:
  1282. fput(csock->file);
  1283. return err;
  1284. }
  1285. static struct proto kcm_proto = {
  1286. .name = "KCM",
  1287. .owner = THIS_MODULE,
  1288. .obj_size = sizeof(struct kcm_sock),
  1289. };
  1290. /* Clone a kcm socket. */
  1291. static int kcm_clone(struct socket *osock, struct kcm_clone *info,
  1292. struct socket **newsockp)
  1293. {
  1294. struct socket *newsock;
  1295. struct sock *newsk;
  1296. struct file *newfile;
  1297. int err, newfd;
  1298. err = -ENFILE;
  1299. newsock = sock_alloc();
  1300. if (!newsock)
  1301. goto out;
  1302. newsock->type = osock->type;
  1303. newsock->ops = osock->ops;
  1304. __module_get(newsock->ops->owner);
  1305. newfd = get_unused_fd_flags(0);
  1306. if (unlikely(newfd < 0)) {
  1307. err = newfd;
  1308. goto out_fd_fail;
  1309. }
  1310. newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
  1311. if (unlikely(IS_ERR(newfile))) {
  1312. err = PTR_ERR(newfile);
  1313. goto out_sock_alloc_fail;
  1314. }
  1315. newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
  1316. &kcm_proto, true);
  1317. if (!newsk) {
  1318. err = -ENOMEM;
  1319. goto out_sk_alloc_fail;
  1320. }
  1321. sock_init_data(newsock, newsk);
  1322. init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
  1323. fd_install(newfd, newfile);
  1324. *newsockp = newsock;
  1325. info->fd = newfd;
  1326. return 0;
  1327. out_sk_alloc_fail:
  1328. fput(newfile);
  1329. out_sock_alloc_fail:
  1330. put_unused_fd(newfd);
  1331. out_fd_fail:
  1332. sock_release(newsock);
  1333. out:
  1334. return err;
  1335. }
  1336. static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
  1337. {
  1338. int err;
  1339. switch (cmd) {
  1340. case SIOCKCMATTACH: {
  1341. struct kcm_attach info;
  1342. if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
  1343. err = -EFAULT;
  1344. err = kcm_attach_ioctl(sock, &info);
  1345. break;
  1346. }
  1347. case SIOCKCMUNATTACH: {
  1348. struct kcm_unattach info;
  1349. if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
  1350. err = -EFAULT;
  1351. err = kcm_unattach_ioctl(sock, &info);
  1352. break;
  1353. }
  1354. case SIOCKCMCLONE: {
  1355. struct kcm_clone info;
  1356. struct socket *newsock = NULL;
  1357. if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
  1358. err = -EFAULT;
  1359. err = kcm_clone(sock, &info, &newsock);
  1360. if (!err) {
  1361. if (copy_to_user((void __user *)arg, &info,
  1362. sizeof(info))) {
  1363. err = -EFAULT;
  1364. sys_close(info.fd);
  1365. }
  1366. }
  1367. break;
  1368. }
  1369. default:
  1370. err = -ENOIOCTLCMD;
  1371. break;
  1372. }
  1373. return err;
  1374. }
  1375. static void free_mux(struct rcu_head *rcu)
  1376. {
  1377. struct kcm_mux *mux = container_of(rcu,
  1378. struct kcm_mux, rcu);
  1379. kmem_cache_free(kcm_muxp, mux);
  1380. }
  1381. static void release_mux(struct kcm_mux *mux)
  1382. {
  1383. struct kcm_net *knet = mux->knet;
  1384. struct kcm_psock *psock, *tmp_psock;
  1385. /* Release psocks */
  1386. list_for_each_entry_safe(psock, tmp_psock,
  1387. &mux->psocks, psock_list) {
  1388. if (!WARN_ON(psock->unattaching))
  1389. kcm_unattach(psock);
  1390. }
  1391. if (WARN_ON(mux->psocks_cnt))
  1392. return;
  1393. __skb_queue_purge(&mux->rx_hold_queue);
  1394. mutex_lock(&knet->mutex);
  1395. aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
  1396. aggregate_psock_stats(&mux->aggregate_psock_stats,
  1397. &knet->aggregate_psock_stats);
  1398. aggregate_strp_stats(&mux->aggregate_strp_stats,
  1399. &knet->aggregate_strp_stats);
  1400. list_del_rcu(&mux->kcm_mux_list);
  1401. knet->count--;
  1402. mutex_unlock(&knet->mutex);
  1403. call_rcu(&mux->rcu, free_mux);
  1404. }
  1405. static void kcm_done(struct kcm_sock *kcm)
  1406. {
  1407. struct kcm_mux *mux = kcm->mux;
  1408. struct sock *sk = &kcm->sk;
  1409. int socks_cnt;
  1410. spin_lock_bh(&mux->rx_lock);
  1411. if (kcm->rx_psock) {
  1412. /* Cleanup in unreserve_rx_kcm */
  1413. WARN_ON(kcm->done);
  1414. kcm->rx_disabled = 1;
  1415. kcm->done = 1;
  1416. spin_unlock_bh(&mux->rx_lock);
  1417. return;
  1418. }
  1419. if (kcm->rx_wait) {
  1420. list_del(&kcm->wait_rx_list);
  1421. kcm->rx_wait = false;
  1422. }
  1423. /* Move any pending receive messages to other kcm sockets */
  1424. requeue_rx_msgs(mux, &sk->sk_receive_queue);
  1425. spin_unlock_bh(&mux->rx_lock);
  1426. if (WARN_ON(sk_rmem_alloc_get(sk)))
  1427. return;
  1428. /* Detach from MUX */
  1429. spin_lock_bh(&mux->lock);
  1430. list_del(&kcm->kcm_sock_list);
  1431. mux->kcm_socks_cnt--;
  1432. socks_cnt = mux->kcm_socks_cnt;
  1433. spin_unlock_bh(&mux->lock);
  1434. if (!socks_cnt) {
  1435. /* We are done with the mux now. */
  1436. release_mux(mux);
  1437. }
  1438. WARN_ON(kcm->rx_wait);
  1439. sock_put(&kcm->sk);
  1440. }
  1441. /* Called by kcm_release to close a KCM socket.
  1442. * If this is the last KCM socket on the MUX, destroy the MUX.
  1443. */
  1444. static int kcm_release(struct socket *sock)
  1445. {
  1446. struct sock *sk = sock->sk;
  1447. struct kcm_sock *kcm;
  1448. struct kcm_mux *mux;
  1449. struct kcm_psock *psock;
  1450. if (!sk)
  1451. return 0;
  1452. kcm = kcm_sk(sk);
  1453. mux = kcm->mux;
  1454. sock_orphan(sk);
  1455. kfree_skb(kcm->seq_skb);
  1456. lock_sock(sk);
  1457. /* Purge queue under lock to avoid race condition with tx_work trying
  1458. * to act when queue is nonempty. If tx_work runs after this point
  1459. * it will just return.
  1460. */
  1461. __skb_queue_purge(&sk->sk_write_queue);
  1462. /* Set tx_stopped. This is checked when psock is bound to a kcm and we
  1463. * get a writespace callback. This prevents further work being queued
  1464. * from the callback (unbinding the psock occurs after canceling work.
  1465. */
  1466. kcm->tx_stopped = 1;
  1467. release_sock(sk);
  1468. spin_lock_bh(&mux->lock);
  1469. if (kcm->tx_wait) {
  1470. /* Take of tx_wait list, after this point there should be no way
  1471. * that a psock will be assigned to this kcm.
  1472. */
  1473. list_del(&kcm->wait_psock_list);
  1474. kcm->tx_wait = false;
  1475. }
  1476. spin_unlock_bh(&mux->lock);
  1477. /* Cancel work. After this point there should be no outside references
  1478. * to the kcm socket.
  1479. */
  1480. cancel_work_sync(&kcm->tx_work);
  1481. lock_sock(sk);
  1482. psock = kcm->tx_psock;
  1483. if (psock) {
  1484. /* A psock was reserved, so we need to kill it since it
  1485. * may already have some bytes queued from a message. We
  1486. * need to do this after removing kcm from tx_wait list.
  1487. */
  1488. kcm_abort_tx_psock(psock, EPIPE, false);
  1489. unreserve_psock(kcm);
  1490. }
  1491. release_sock(sk);
  1492. WARN_ON(kcm->tx_wait);
  1493. WARN_ON(kcm->tx_psock);
  1494. sock->sk = NULL;
  1495. kcm_done(kcm);
  1496. return 0;
  1497. }
  1498. static const struct proto_ops kcm_dgram_ops = {
  1499. .family = PF_KCM,
  1500. .owner = THIS_MODULE,
  1501. .release = kcm_release,
  1502. .bind = sock_no_bind,
  1503. .connect = sock_no_connect,
  1504. .socketpair = sock_no_socketpair,
  1505. .accept = sock_no_accept,
  1506. .getname = sock_no_getname,
  1507. .poll = datagram_poll,
  1508. .ioctl = kcm_ioctl,
  1509. .listen = sock_no_listen,
  1510. .shutdown = sock_no_shutdown,
  1511. .setsockopt = kcm_setsockopt,
  1512. .getsockopt = kcm_getsockopt,
  1513. .sendmsg = kcm_sendmsg,
  1514. .recvmsg = kcm_recvmsg,
  1515. .mmap = sock_no_mmap,
  1516. .sendpage = kcm_sendpage,
  1517. };
  1518. static const struct proto_ops kcm_seqpacket_ops = {
  1519. .family = PF_KCM,
  1520. .owner = THIS_MODULE,
  1521. .release = kcm_release,
  1522. .bind = sock_no_bind,
  1523. .connect = sock_no_connect,
  1524. .socketpair = sock_no_socketpair,
  1525. .accept = sock_no_accept,
  1526. .getname = sock_no_getname,
  1527. .poll = datagram_poll,
  1528. .ioctl = kcm_ioctl,
  1529. .listen = sock_no_listen,
  1530. .shutdown = sock_no_shutdown,
  1531. .setsockopt = kcm_setsockopt,
  1532. .getsockopt = kcm_getsockopt,
  1533. .sendmsg = kcm_sendmsg,
  1534. .recvmsg = kcm_recvmsg,
  1535. .mmap = sock_no_mmap,
  1536. .sendpage = kcm_sendpage,
  1537. .splice_read = kcm_splice_read,
  1538. };
  1539. /* Create proto operation for kcm sockets */
  1540. static int kcm_create(struct net *net, struct socket *sock,
  1541. int protocol, int kern)
  1542. {
  1543. struct kcm_net *knet = net_generic(net, kcm_net_id);
  1544. struct sock *sk;
  1545. struct kcm_mux *mux;
  1546. switch (sock->type) {
  1547. case SOCK_DGRAM:
  1548. sock->ops = &kcm_dgram_ops;
  1549. break;
  1550. case SOCK_SEQPACKET:
  1551. sock->ops = &kcm_seqpacket_ops;
  1552. break;
  1553. default:
  1554. return -ESOCKTNOSUPPORT;
  1555. }
  1556. if (protocol != KCMPROTO_CONNECTED)
  1557. return -EPROTONOSUPPORT;
  1558. sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern);
  1559. if (!sk)
  1560. return -ENOMEM;
  1561. /* Allocate a kcm mux, shared between KCM sockets */
  1562. mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL);
  1563. if (!mux) {
  1564. sk_free(sk);
  1565. return -ENOMEM;
  1566. }
  1567. spin_lock_init(&mux->lock);
  1568. spin_lock_init(&mux->rx_lock);
  1569. INIT_LIST_HEAD(&mux->kcm_socks);
  1570. INIT_LIST_HEAD(&mux->kcm_rx_waiters);
  1571. INIT_LIST_HEAD(&mux->kcm_tx_waiters);
  1572. INIT_LIST_HEAD(&mux->psocks);
  1573. INIT_LIST_HEAD(&mux->psocks_ready);
  1574. INIT_LIST_HEAD(&mux->psocks_avail);
  1575. mux->knet = knet;
  1576. /* Add new MUX to list */
  1577. mutex_lock(&knet->mutex);
  1578. list_add_rcu(&mux->kcm_mux_list, &knet->mux_list);
  1579. knet->count++;
  1580. mutex_unlock(&knet->mutex);
  1581. skb_queue_head_init(&mux->rx_hold_queue);
  1582. /* Init KCM socket */
  1583. sock_init_data(sock, sk);
  1584. init_kcm_sock(kcm_sk(sk), mux);
  1585. return 0;
  1586. }
  1587. static struct net_proto_family kcm_family_ops = {
  1588. .family = PF_KCM,
  1589. .create = kcm_create,
  1590. .owner = THIS_MODULE,
  1591. };
  1592. static __net_init int kcm_init_net(struct net *net)
  1593. {
  1594. struct kcm_net *knet = net_generic(net, kcm_net_id);
  1595. INIT_LIST_HEAD_RCU(&knet->mux_list);
  1596. mutex_init(&knet->mutex);
  1597. return 0;
  1598. }
  1599. static __net_exit void kcm_exit_net(struct net *net)
  1600. {
  1601. struct kcm_net *knet = net_generic(net, kcm_net_id);
  1602. /* All KCM sockets should be closed at this point, which should mean
  1603. * that all multiplexors and psocks have been destroyed.
  1604. */
  1605. WARN_ON(!list_empty(&knet->mux_list));
  1606. }
  1607. static struct pernet_operations kcm_net_ops = {
  1608. .init = kcm_init_net,
  1609. .exit = kcm_exit_net,
  1610. .id = &kcm_net_id,
  1611. .size = sizeof(struct kcm_net),
  1612. };
  1613. static int __init kcm_init(void)
  1614. {
  1615. int err = -ENOMEM;
  1616. kcm_muxp = kmem_cache_create("kcm_mux_cache",
  1617. sizeof(struct kcm_mux), 0,
  1618. SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
  1619. if (!kcm_muxp)
  1620. goto fail;
  1621. kcm_psockp = kmem_cache_create("kcm_psock_cache",
  1622. sizeof(struct kcm_psock), 0,
  1623. SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
  1624. if (!kcm_psockp)
  1625. goto fail;
  1626. kcm_wq = create_singlethread_workqueue("kkcmd");
  1627. if (!kcm_wq)
  1628. goto fail;
  1629. err = proto_register(&kcm_proto, 1);
  1630. if (err)
  1631. goto fail;
  1632. err = sock_register(&kcm_family_ops);
  1633. if (err)
  1634. goto sock_register_fail;
  1635. err = register_pernet_device(&kcm_net_ops);
  1636. if (err)
  1637. goto net_ops_fail;
  1638. err = kcm_proc_init();
  1639. if (err)
  1640. goto proc_init_fail;
  1641. return 0;
  1642. proc_init_fail:
  1643. unregister_pernet_device(&kcm_net_ops);
  1644. net_ops_fail:
  1645. sock_unregister(PF_KCM);
  1646. sock_register_fail:
  1647. proto_unregister(&kcm_proto);
  1648. fail:
  1649. kmem_cache_destroy(kcm_muxp);
  1650. kmem_cache_destroy(kcm_psockp);
  1651. if (kcm_wq)
  1652. destroy_workqueue(kcm_wq);
  1653. return err;
  1654. }
  1655. static void __exit kcm_exit(void)
  1656. {
  1657. kcm_proc_exit();
  1658. unregister_pernet_device(&kcm_net_ops);
  1659. sock_unregister(PF_KCM);
  1660. proto_unregister(&kcm_proto);
  1661. destroy_workqueue(kcm_wq);
  1662. kmem_cache_destroy(kcm_muxp);
  1663. kmem_cache_destroy(kcm_psockp);
  1664. }
  1665. module_init(kcm_init);
  1666. module_exit(kcm_exit);
  1667. MODULE_LICENSE("GPL");
  1668. MODULE_ALIAS_NETPROTO(PF_KCM);