scif_api.c 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496
  1. /*
  2. * Intel MIC Platform Software Stack (MPSS)
  3. *
  4. * Copyright(c) 2014 Intel Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License, version 2, as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * Intel SCIF driver.
  16. *
  17. */
  18. #include <linux/scif.h>
  19. #include "scif_main.h"
  20. #include "scif_map.h"
  21. static const char * const scif_ep_states[] = {
  22. "Unbound",
  23. "Bound",
  24. "Listening",
  25. "Connected",
  26. "Connecting",
  27. "Mapping",
  28. "Closing",
  29. "Close Listening",
  30. "Disconnected",
  31. "Zombie"};
  32. enum conn_async_state {
  33. ASYNC_CONN_IDLE = 1, /* ep setup for async connect */
  34. ASYNC_CONN_INPROGRESS, /* async connect in progress */
  35. ASYNC_CONN_FLUSH_WORK /* async work flush in progress */
  36. };
  37. /*
  38. * File operations for anonymous inode file associated with a SCIF endpoint,
  39. * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
  40. * poll API in the kernel and these take in a struct file *. Since a struct
  41. * file is not available to kernel mode SCIF, it uses an anonymous file for
  42. * this purpose.
  43. */
  44. const struct file_operations scif_anon_fops = {
  45. .owner = THIS_MODULE,
  46. };
  47. scif_epd_t scif_open(void)
  48. {
  49. struct scif_endpt *ep;
  50. int err;
  51. might_sleep();
  52. ep = kzalloc(sizeof(*ep), GFP_KERNEL);
  53. if (!ep)
  54. goto err_ep_alloc;
  55. ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
  56. if (!ep->qp_info.qp)
  57. goto err_qp_alloc;
  58. err = scif_anon_inode_getfile(ep);
  59. if (err)
  60. goto err_anon_inode;
  61. spin_lock_init(&ep->lock);
  62. mutex_init(&ep->sendlock);
  63. mutex_init(&ep->recvlock);
  64. scif_rma_ep_init(ep);
  65. ep->state = SCIFEP_UNBOUND;
  66. dev_dbg(scif_info.mdev.this_device,
  67. "SCIFAPI open: ep %p success\n", ep);
  68. return ep;
  69. err_anon_inode:
  70. kfree(ep->qp_info.qp);
  71. err_qp_alloc:
  72. kfree(ep);
  73. err_ep_alloc:
  74. return NULL;
  75. }
  76. EXPORT_SYMBOL_GPL(scif_open);
  77. /*
  78. * scif_disconnect_ep - Disconnects the endpoint if found
  79. * @epd: The end point returned from scif_open()
  80. */
  81. static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
  82. {
  83. struct scifmsg msg;
  84. struct scif_endpt *fep = NULL;
  85. struct scif_endpt *tmpep;
  86. struct list_head *pos, *tmpq;
  87. int err;
  88. /*
  89. * Wake up any threads blocked in send()/recv() before closing
  90. * out the connection. Grabbing and releasing the send/recv lock
  91. * will ensure that any blocked senders/receivers have exited for
  92. * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
  93. * close. Ring 3 endpoints are not affected since close will not
  94. * be called while there are IOCTLs executing.
  95. */
  96. wake_up_interruptible(&ep->sendwq);
  97. wake_up_interruptible(&ep->recvwq);
  98. mutex_lock(&ep->sendlock);
  99. mutex_unlock(&ep->sendlock);
  100. mutex_lock(&ep->recvlock);
  101. mutex_unlock(&ep->recvlock);
  102. /* Remove from the connected list */
  103. mutex_lock(&scif_info.connlock);
  104. list_for_each_safe(pos, tmpq, &scif_info.connected) {
  105. tmpep = list_entry(pos, struct scif_endpt, list);
  106. if (tmpep == ep) {
  107. list_del(pos);
  108. fep = tmpep;
  109. spin_lock(&ep->lock);
  110. break;
  111. }
  112. }
  113. if (!fep) {
  114. /*
  115. * The other side has completed the disconnect before
  116. * the end point can be removed from the list. Therefore
  117. * the ep lock is not locked, traverse the disconnected
  118. * list to find the endpoint and release the conn lock.
  119. */
  120. list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
  121. tmpep = list_entry(pos, struct scif_endpt, list);
  122. if (tmpep == ep) {
  123. list_del(pos);
  124. break;
  125. }
  126. }
  127. mutex_unlock(&scif_info.connlock);
  128. return NULL;
  129. }
  130. init_completion(&ep->discon);
  131. msg.uop = SCIF_DISCNCT;
  132. msg.src = ep->port;
  133. msg.dst = ep->peer;
  134. msg.payload[0] = (u64)ep;
  135. msg.payload[1] = ep->remote_ep;
  136. err = scif_nodeqp_send(ep->remote_dev, &msg);
  137. spin_unlock(&ep->lock);
  138. mutex_unlock(&scif_info.connlock);
  139. if (!err)
  140. /* Wait for the remote node to respond with SCIF_DISCNT_ACK */
  141. wait_for_completion_timeout(&ep->discon,
  142. SCIF_NODE_ALIVE_TIMEOUT);
  143. return ep;
  144. }
  145. int scif_close(scif_epd_t epd)
  146. {
  147. struct scif_endpt *ep = (struct scif_endpt *)epd;
  148. struct scif_endpt *tmpep;
  149. struct list_head *pos, *tmpq;
  150. enum scif_epd_state oldstate;
  151. bool flush_conn;
  152. dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
  153. ep, scif_ep_states[ep->state]);
  154. might_sleep();
  155. spin_lock(&ep->lock);
  156. flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
  157. spin_unlock(&ep->lock);
  158. if (flush_conn)
  159. flush_work(&scif_info.conn_work);
  160. spin_lock(&ep->lock);
  161. oldstate = ep->state;
  162. ep->state = SCIFEP_CLOSING;
  163. switch (oldstate) {
  164. case SCIFEP_ZOMBIE:
  165. dev_err(scif_info.mdev.this_device,
  166. "SCIFAPI close: zombie state unexpected\n");
  167. /* fall through */
  168. case SCIFEP_DISCONNECTED:
  169. spin_unlock(&ep->lock);
  170. scif_unregister_all_windows(epd);
  171. /* Remove from the disconnected list */
  172. mutex_lock(&scif_info.connlock);
  173. list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
  174. tmpep = list_entry(pos, struct scif_endpt, list);
  175. if (tmpep == ep) {
  176. list_del(pos);
  177. break;
  178. }
  179. }
  180. mutex_unlock(&scif_info.connlock);
  181. break;
  182. case SCIFEP_UNBOUND:
  183. case SCIFEP_BOUND:
  184. case SCIFEP_CONNECTING:
  185. spin_unlock(&ep->lock);
  186. break;
  187. case SCIFEP_MAPPING:
  188. case SCIFEP_CONNECTED:
  189. case SCIFEP_CLOSING:
  190. {
  191. spin_unlock(&ep->lock);
  192. scif_unregister_all_windows(epd);
  193. scif_disconnect_ep(ep);
  194. break;
  195. }
  196. case SCIFEP_LISTENING:
  197. case SCIFEP_CLLISTEN:
  198. {
  199. struct scif_conreq *conreq;
  200. struct scifmsg msg;
  201. struct scif_endpt *aep;
  202. spin_unlock(&ep->lock);
  203. mutex_lock(&scif_info.eplock);
  204. /* remove from listen list */
  205. list_for_each_safe(pos, tmpq, &scif_info.listen) {
  206. tmpep = list_entry(pos, struct scif_endpt, list);
  207. if (tmpep == ep)
  208. list_del(pos);
  209. }
  210. /* Remove any dangling accepts */
  211. while (ep->acceptcnt) {
  212. aep = list_first_entry(&ep->li_accept,
  213. struct scif_endpt, liacceptlist);
  214. list_del(&aep->liacceptlist);
  215. scif_put_port(aep->port.port);
  216. list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
  217. tmpep = list_entry(pos, struct scif_endpt,
  218. miacceptlist);
  219. if (tmpep == aep) {
  220. list_del(pos);
  221. break;
  222. }
  223. }
  224. mutex_unlock(&scif_info.eplock);
  225. mutex_lock(&scif_info.connlock);
  226. list_for_each_safe(pos, tmpq, &scif_info.connected) {
  227. tmpep = list_entry(pos,
  228. struct scif_endpt, list);
  229. if (tmpep == aep) {
  230. list_del(pos);
  231. break;
  232. }
  233. }
  234. list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
  235. tmpep = list_entry(pos,
  236. struct scif_endpt, list);
  237. if (tmpep == aep) {
  238. list_del(pos);
  239. break;
  240. }
  241. }
  242. mutex_unlock(&scif_info.connlock);
  243. scif_teardown_ep(aep);
  244. mutex_lock(&scif_info.eplock);
  245. scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
  246. ep->acceptcnt--;
  247. }
  248. spin_lock(&ep->lock);
  249. mutex_unlock(&scif_info.eplock);
  250. /* Remove and reject any pending connection requests. */
  251. while (ep->conreqcnt) {
  252. conreq = list_first_entry(&ep->conlist,
  253. struct scif_conreq, list);
  254. list_del(&conreq->list);
  255. msg.uop = SCIF_CNCT_REJ;
  256. msg.dst.node = conreq->msg.src.node;
  257. msg.dst.port = conreq->msg.src.port;
  258. msg.payload[0] = conreq->msg.payload[0];
  259. msg.payload[1] = conreq->msg.payload[1];
  260. /*
  261. * No Error Handling on purpose for scif_nodeqp_send().
  262. * If the remote node is lost we still want free the
  263. * connection requests on the self node.
  264. */
  265. scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
  266. &msg);
  267. ep->conreqcnt--;
  268. kfree(conreq);
  269. }
  270. spin_unlock(&ep->lock);
  271. /* If a kSCIF accept is waiting wake it up */
  272. wake_up_interruptible(&ep->conwq);
  273. break;
  274. }
  275. }
  276. scif_put_port(ep->port.port);
  277. scif_anon_inode_fput(ep);
  278. scif_teardown_ep(ep);
  279. scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
  280. return 0;
  281. }
  282. EXPORT_SYMBOL_GPL(scif_close);
  283. /**
  284. * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
  285. * accept new connections.
  286. * @epd: The end point returned from scif_open()
  287. */
  288. int __scif_flush(scif_epd_t epd)
  289. {
  290. struct scif_endpt *ep = (struct scif_endpt *)epd;
  291. switch (ep->state) {
  292. case SCIFEP_LISTENING:
  293. {
  294. ep->state = SCIFEP_CLLISTEN;
  295. /* If an accept is waiting wake it up */
  296. wake_up_interruptible(&ep->conwq);
  297. break;
  298. }
  299. default:
  300. break;
  301. }
  302. return 0;
  303. }
  304. int scif_bind(scif_epd_t epd, u16 pn)
  305. {
  306. struct scif_endpt *ep = (struct scif_endpt *)epd;
  307. int ret = 0;
  308. int tmp;
  309. dev_dbg(scif_info.mdev.this_device,
  310. "SCIFAPI bind: ep %p %s requested port number %d\n",
  311. ep, scif_ep_states[ep->state], pn);
  312. if (pn) {
  313. /*
  314. * Similar to IETF RFC 1700, SCIF ports below
  315. * SCIF_ADMIN_PORT_END can only be bound by system (or root)
  316. * processes or by processes executed by privileged users.
  317. */
  318. if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
  319. ret = -EACCES;
  320. goto scif_bind_admin_exit;
  321. }
  322. }
  323. spin_lock(&ep->lock);
  324. if (ep->state == SCIFEP_BOUND) {
  325. ret = -EINVAL;
  326. goto scif_bind_exit;
  327. } else if (ep->state != SCIFEP_UNBOUND) {
  328. ret = -EISCONN;
  329. goto scif_bind_exit;
  330. }
  331. if (pn) {
  332. tmp = scif_rsrv_port(pn);
  333. if (tmp != pn) {
  334. ret = -EINVAL;
  335. goto scif_bind_exit;
  336. }
  337. } else {
  338. ret = scif_get_new_port();
  339. if (ret < 0)
  340. goto scif_bind_exit;
  341. pn = ret;
  342. }
  343. ep->state = SCIFEP_BOUND;
  344. ep->port.node = scif_info.nodeid;
  345. ep->port.port = pn;
  346. ep->conn_async_state = ASYNC_CONN_IDLE;
  347. ret = pn;
  348. dev_dbg(scif_info.mdev.this_device,
  349. "SCIFAPI bind: bound to port number %d\n", pn);
  350. scif_bind_exit:
  351. spin_unlock(&ep->lock);
  352. scif_bind_admin_exit:
  353. return ret;
  354. }
  355. EXPORT_SYMBOL_GPL(scif_bind);
  356. int scif_listen(scif_epd_t epd, int backlog)
  357. {
  358. struct scif_endpt *ep = (struct scif_endpt *)epd;
  359. dev_dbg(scif_info.mdev.this_device,
  360. "SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
  361. spin_lock(&ep->lock);
  362. switch (ep->state) {
  363. case SCIFEP_ZOMBIE:
  364. case SCIFEP_CLOSING:
  365. case SCIFEP_CLLISTEN:
  366. case SCIFEP_UNBOUND:
  367. case SCIFEP_DISCONNECTED:
  368. spin_unlock(&ep->lock);
  369. return -EINVAL;
  370. case SCIFEP_LISTENING:
  371. case SCIFEP_CONNECTED:
  372. case SCIFEP_CONNECTING:
  373. case SCIFEP_MAPPING:
  374. spin_unlock(&ep->lock);
  375. return -EISCONN;
  376. case SCIFEP_BOUND:
  377. break;
  378. }
  379. ep->state = SCIFEP_LISTENING;
  380. ep->backlog = backlog;
  381. ep->conreqcnt = 0;
  382. ep->acceptcnt = 0;
  383. INIT_LIST_HEAD(&ep->conlist);
  384. init_waitqueue_head(&ep->conwq);
  385. INIT_LIST_HEAD(&ep->li_accept);
  386. spin_unlock(&ep->lock);
  387. /*
  388. * Listen status is complete so delete the qp information not needed
  389. * on a listen before placing on the list of listening ep's
  390. */
  391. scif_teardown_ep(ep);
  392. ep->qp_info.qp = NULL;
  393. mutex_lock(&scif_info.eplock);
  394. list_add_tail(&ep->list, &scif_info.listen);
  395. mutex_unlock(&scif_info.eplock);
  396. return 0;
  397. }
  398. EXPORT_SYMBOL_GPL(scif_listen);
  399. /*
  400. ************************************************************************
  401. * SCIF connection flow:
  402. *
  403. * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
  404. * connections via a SCIF_CNCT_REQ message
  405. * 2) A SCIF endpoint can initiate a SCIF connection by calling
  406. * scif_connect(..) which calls scif_setup_qp_connect(..) which
  407. * allocates the local qp for the endpoint ring buffer and then sends
  408. * a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
  409. * a SCIF_CNCT_REJ message
  410. * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
  411. * wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
  412. * message otherwise
  413. * 4) A thread blocked waiting for incoming connections allocates its local
  414. * endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
  415. * and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
  416. * the node sends a SCIF_CNCT_REJ message
  417. * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
  418. * connecting endpoint is woken up as part of handling
  419. * scif_cnctgnt_resp(..) following which it maps the remote endpoints'
  420. * QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
  421. * success or a SCIF_CNCT_GNTNACK message on failure and completes
  422. * the scif_connect(..) API
  423. * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
  424. * in step 4 is woken up and completes the scif_accept(..) API
  425. * 7) The SCIF connection is now established between the two SCIF endpoints.
  426. */
  427. static int scif_conn_func(struct scif_endpt *ep)
  428. {
  429. int err = 0;
  430. struct scifmsg msg;
  431. struct device *spdev;
  432. err = scif_reserve_dma_chan(ep);
  433. if (err) {
  434. dev_err(&ep->remote_dev->sdev->dev,
  435. "%s %d err %d\n", __func__, __LINE__, err);
  436. ep->state = SCIFEP_BOUND;
  437. goto connect_error_simple;
  438. }
  439. /* Initiate the first part of the endpoint QP setup */
  440. err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
  441. SCIF_ENDPT_QP_SIZE, ep->remote_dev);
  442. if (err) {
  443. dev_err(&ep->remote_dev->sdev->dev,
  444. "%s err %d qp_offset 0x%llx\n",
  445. __func__, err, ep->qp_info.qp_offset);
  446. ep->state = SCIFEP_BOUND;
  447. goto connect_error_simple;
  448. }
  449. spdev = scif_get_peer_dev(ep->remote_dev);
  450. if (IS_ERR(spdev)) {
  451. err = PTR_ERR(spdev);
  452. goto cleanup_qp;
  453. }
  454. /* Format connect message and send it */
  455. msg.src = ep->port;
  456. msg.dst = ep->conn_port;
  457. msg.uop = SCIF_CNCT_REQ;
  458. msg.payload[0] = (u64)ep;
  459. msg.payload[1] = ep->qp_info.qp_offset;
  460. err = _scif_nodeqp_send(ep->remote_dev, &msg);
  461. if (err)
  462. goto connect_error_dec;
  463. scif_put_peer_dev(spdev);
  464. /*
  465. * Wait for the remote node to respond with SCIF_CNCT_GNT or
  466. * SCIF_CNCT_REJ message.
  467. */
  468. err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
  469. SCIF_NODE_ALIVE_TIMEOUT);
  470. if (!err) {
  471. dev_err(&ep->remote_dev->sdev->dev,
  472. "%s %d timeout\n", __func__, __LINE__);
  473. ep->state = SCIFEP_BOUND;
  474. }
  475. spdev = scif_get_peer_dev(ep->remote_dev);
  476. if (IS_ERR(spdev)) {
  477. err = PTR_ERR(spdev);
  478. goto cleanup_qp;
  479. }
  480. if (ep->state == SCIFEP_MAPPING) {
  481. err = scif_setup_qp_connect_response(ep->remote_dev,
  482. ep->qp_info.qp,
  483. ep->qp_info.gnt_pld);
  484. /*
  485. * If the resource to map the queue are not available then
  486. * we need to tell the other side to terminate the accept
  487. */
  488. if (err) {
  489. dev_err(&ep->remote_dev->sdev->dev,
  490. "%s %d err %d\n", __func__, __LINE__, err);
  491. msg.uop = SCIF_CNCT_GNTNACK;
  492. msg.payload[0] = ep->remote_ep;
  493. _scif_nodeqp_send(ep->remote_dev, &msg);
  494. ep->state = SCIFEP_BOUND;
  495. goto connect_error_dec;
  496. }
  497. msg.uop = SCIF_CNCT_GNTACK;
  498. msg.payload[0] = ep->remote_ep;
  499. err = _scif_nodeqp_send(ep->remote_dev, &msg);
  500. if (err) {
  501. ep->state = SCIFEP_BOUND;
  502. goto connect_error_dec;
  503. }
  504. ep->state = SCIFEP_CONNECTED;
  505. mutex_lock(&scif_info.connlock);
  506. list_add_tail(&ep->list, &scif_info.connected);
  507. mutex_unlock(&scif_info.connlock);
  508. dev_dbg(&ep->remote_dev->sdev->dev,
  509. "SCIFAPI connect: ep %p connected\n", ep);
  510. } else if (ep->state == SCIFEP_BOUND) {
  511. dev_dbg(&ep->remote_dev->sdev->dev,
  512. "SCIFAPI connect: ep %p connection refused\n", ep);
  513. err = -ECONNREFUSED;
  514. goto connect_error_dec;
  515. }
  516. scif_put_peer_dev(spdev);
  517. return err;
  518. connect_error_dec:
  519. scif_put_peer_dev(spdev);
  520. cleanup_qp:
  521. scif_cleanup_ep_qp(ep);
  522. connect_error_simple:
  523. return err;
  524. }
  525. /*
  526. * scif_conn_handler:
  527. *
  528. * Workqueue handler for servicing non-blocking SCIF connect
  529. *
  530. */
  531. void scif_conn_handler(struct work_struct *work)
  532. {
  533. struct scif_endpt *ep;
  534. do {
  535. ep = NULL;
  536. spin_lock(&scif_info.nb_connect_lock);
  537. if (!list_empty(&scif_info.nb_connect_list)) {
  538. ep = list_first_entry(&scif_info.nb_connect_list,
  539. struct scif_endpt, conn_list);
  540. list_del(&ep->conn_list);
  541. }
  542. spin_unlock(&scif_info.nb_connect_lock);
  543. if (ep) {
  544. ep->conn_err = scif_conn_func(ep);
  545. wake_up_interruptible(&ep->conn_pend_wq);
  546. }
  547. } while (ep);
  548. }
  549. int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
  550. {
  551. struct scif_endpt *ep = (struct scif_endpt *)epd;
  552. int err = 0;
  553. struct scif_dev *remote_dev;
  554. struct device *spdev;
  555. dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
  556. scif_ep_states[ep->state]);
  557. if (!scif_dev || dst->node > scif_info.maxid)
  558. return -ENODEV;
  559. might_sleep();
  560. remote_dev = &scif_dev[dst->node];
  561. spdev = scif_get_peer_dev(remote_dev);
  562. if (IS_ERR(spdev)) {
  563. err = PTR_ERR(spdev);
  564. return err;
  565. }
  566. spin_lock(&ep->lock);
  567. switch (ep->state) {
  568. case SCIFEP_ZOMBIE:
  569. case SCIFEP_CLOSING:
  570. err = -EINVAL;
  571. break;
  572. case SCIFEP_DISCONNECTED:
  573. if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
  574. ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
  575. else
  576. err = -EINVAL;
  577. break;
  578. case SCIFEP_LISTENING:
  579. case SCIFEP_CLLISTEN:
  580. err = -EOPNOTSUPP;
  581. break;
  582. case SCIFEP_CONNECTING:
  583. case SCIFEP_MAPPING:
  584. if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
  585. err = -EINPROGRESS;
  586. else
  587. err = -EISCONN;
  588. break;
  589. case SCIFEP_CONNECTED:
  590. if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
  591. ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
  592. else
  593. err = -EISCONN;
  594. break;
  595. case SCIFEP_UNBOUND:
  596. err = scif_get_new_port();
  597. if (err < 0)
  598. break;
  599. ep->port.port = err;
  600. ep->port.node = scif_info.nodeid;
  601. ep->conn_async_state = ASYNC_CONN_IDLE;
  602. /* Fall through */
  603. case SCIFEP_BOUND:
  604. /*
  605. * If a non-blocking connect has been already initiated
  606. * (conn_async_state is either ASYNC_CONN_INPROGRESS or
  607. * ASYNC_CONN_FLUSH_WORK), the end point could end up in
  608. * SCIF_BOUND due an error in the connection process
  609. * (e.g., connection refused) If conn_async_state is
  610. * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
  611. * so that the error status can be collected. If the state is
  612. * already ASYNC_CONN_FLUSH_WORK - then set the error to
  613. * EINPROGRESS since some other thread is waiting to collect
  614. * error status.
  615. */
  616. if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
  617. ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
  618. } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
  619. err = -EINPROGRESS;
  620. } else {
  621. ep->conn_port = *dst;
  622. init_waitqueue_head(&ep->sendwq);
  623. init_waitqueue_head(&ep->recvwq);
  624. init_waitqueue_head(&ep->conwq);
  625. ep->conn_async_state = 0;
  626. if (unlikely(non_block))
  627. ep->conn_async_state = ASYNC_CONN_INPROGRESS;
  628. }
  629. break;
  630. }
  631. if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
  632. goto connect_simple_unlock1;
  633. ep->state = SCIFEP_CONNECTING;
  634. ep->remote_dev = &scif_dev[dst->node];
  635. ep->qp_info.qp->magic = SCIFEP_MAGIC;
  636. if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
  637. init_waitqueue_head(&ep->conn_pend_wq);
  638. spin_lock(&scif_info.nb_connect_lock);
  639. list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
  640. spin_unlock(&scif_info.nb_connect_lock);
  641. err = -EINPROGRESS;
  642. schedule_work(&scif_info.conn_work);
  643. }
  644. connect_simple_unlock1:
  645. spin_unlock(&ep->lock);
  646. scif_put_peer_dev(spdev);
  647. if (err) {
  648. return err;
  649. } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
  650. flush_work(&scif_info.conn_work);
  651. err = ep->conn_err;
  652. spin_lock(&ep->lock);
  653. ep->conn_async_state = ASYNC_CONN_IDLE;
  654. spin_unlock(&ep->lock);
  655. } else {
  656. err = scif_conn_func(ep);
  657. }
  658. return err;
  659. }
  660. int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
  661. {
  662. return __scif_connect(epd, dst, false);
  663. }
  664. EXPORT_SYMBOL_GPL(scif_connect);
  665. /**
  666. * scif_accept() - Accept a connection request from the remote node
  667. *
  668. * The function accepts a connection request from the remote node. Successful
  669. * complete is indicate by a new end point being created and passed back
  670. * to the caller for future reference.
  671. *
  672. * Upon successful complete a zero will be returned and the peer information
  673. * will be filled in.
  674. *
  675. * If the end point is not in the listening state -EINVAL will be returned.
  676. *
  677. * If during the connection sequence resource allocation fails the -ENOMEM
  678. * will be returned.
  679. *
  680. * If the function is called with the ASYNC flag set and no connection requests
  681. * are pending it will return -EAGAIN.
  682. *
  683. * If the remote side is not sending any connection requests the caller may
  684. * terminate this function with a signal. If so a -EINTR will be returned.
  685. */
  686. int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
  687. scif_epd_t *newepd, int flags)
  688. {
  689. struct scif_endpt *lep = (struct scif_endpt *)epd;
  690. struct scif_endpt *cep;
  691. struct scif_conreq *conreq;
  692. struct scifmsg msg;
  693. int err;
  694. struct device *spdev;
  695. dev_dbg(scif_info.mdev.this_device,
  696. "SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
  697. if (flags & ~SCIF_ACCEPT_SYNC)
  698. return -EINVAL;
  699. if (!peer || !newepd)
  700. return -EINVAL;
  701. might_sleep();
  702. spin_lock(&lep->lock);
  703. if (lep->state != SCIFEP_LISTENING) {
  704. spin_unlock(&lep->lock);
  705. return -EINVAL;
  706. }
  707. if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
  708. /* No connection request present and we do not want to wait */
  709. spin_unlock(&lep->lock);
  710. return -EAGAIN;
  711. }
  712. lep->files = current->files;
  713. retry_connection:
  714. spin_unlock(&lep->lock);
  715. /* Wait for the remote node to send us a SCIF_CNCT_REQ */
  716. err = wait_event_interruptible(lep->conwq,
  717. (lep->conreqcnt ||
  718. (lep->state != SCIFEP_LISTENING)));
  719. if (err)
  720. return err;
  721. if (lep->state != SCIFEP_LISTENING)
  722. return -EINTR;
  723. spin_lock(&lep->lock);
  724. if (!lep->conreqcnt)
  725. goto retry_connection;
  726. /* Get the first connect request off the list */
  727. conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
  728. list_del(&conreq->list);
  729. lep->conreqcnt--;
  730. spin_unlock(&lep->lock);
  731. /* Fill in the peer information */
  732. peer->node = conreq->msg.src.node;
  733. peer->port = conreq->msg.src.port;
  734. cep = kzalloc(sizeof(*cep), GFP_KERNEL);
  735. if (!cep) {
  736. err = -ENOMEM;
  737. goto scif_accept_error_epalloc;
  738. }
  739. spin_lock_init(&cep->lock);
  740. mutex_init(&cep->sendlock);
  741. mutex_init(&cep->recvlock);
  742. cep->state = SCIFEP_CONNECTING;
  743. cep->remote_dev = &scif_dev[peer->node];
  744. cep->remote_ep = conreq->msg.payload[0];
  745. scif_rma_ep_init(cep);
  746. err = scif_reserve_dma_chan(cep);
  747. if (err) {
  748. dev_err(scif_info.mdev.this_device,
  749. "%s %d err %d\n", __func__, __LINE__, err);
  750. goto scif_accept_error_qpalloc;
  751. }
  752. cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
  753. if (!cep->qp_info.qp) {
  754. err = -ENOMEM;
  755. goto scif_accept_error_qpalloc;
  756. }
  757. err = scif_anon_inode_getfile(cep);
  758. if (err)
  759. goto scif_accept_error_anon_inode;
  760. cep->qp_info.qp->magic = SCIFEP_MAGIC;
  761. spdev = scif_get_peer_dev(cep->remote_dev);
  762. if (IS_ERR(spdev)) {
  763. err = PTR_ERR(spdev);
  764. goto scif_accept_error_map;
  765. }
  766. err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
  767. conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
  768. cep->remote_dev);
  769. if (err) {
  770. dev_dbg(&cep->remote_dev->sdev->dev,
  771. "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
  772. lep, cep, err, cep->qp_info.qp_offset);
  773. scif_put_peer_dev(spdev);
  774. goto scif_accept_error_map;
  775. }
  776. cep->port.node = lep->port.node;
  777. cep->port.port = lep->port.port;
  778. cep->peer.node = peer->node;
  779. cep->peer.port = peer->port;
  780. init_waitqueue_head(&cep->sendwq);
  781. init_waitqueue_head(&cep->recvwq);
  782. init_waitqueue_head(&cep->conwq);
  783. msg.uop = SCIF_CNCT_GNT;
  784. msg.src = cep->port;
  785. msg.payload[0] = cep->remote_ep;
  786. msg.payload[1] = cep->qp_info.qp_offset;
  787. msg.payload[2] = (u64)cep;
  788. err = _scif_nodeqp_send(cep->remote_dev, &msg);
  789. scif_put_peer_dev(spdev);
  790. if (err)
  791. goto scif_accept_error_map;
  792. retry:
  793. /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
  794. err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
  795. SCIF_NODE_ACCEPT_TIMEOUT);
  796. if (!err && scifdev_alive(cep))
  797. goto retry;
  798. err = !err ? -ENODEV : 0;
  799. if (err)
  800. goto scif_accept_error_map;
  801. kfree(conreq);
  802. spin_lock(&cep->lock);
  803. if (cep->state == SCIFEP_CLOSING) {
  804. /*
  805. * Remote failed to allocate resources and NAKed the grant.
  806. * There is at this point nothing referencing the new end point.
  807. */
  808. spin_unlock(&cep->lock);
  809. scif_teardown_ep(cep);
  810. kfree(cep);
  811. /* If call with sync flag then go back and wait. */
  812. if (flags & SCIF_ACCEPT_SYNC) {
  813. spin_lock(&lep->lock);
  814. goto retry_connection;
  815. }
  816. return -EAGAIN;
  817. }
  818. scif_get_port(cep->port.port);
  819. *newepd = (scif_epd_t)cep;
  820. spin_unlock(&cep->lock);
  821. return 0;
  822. scif_accept_error_map:
  823. scif_anon_inode_fput(cep);
  824. scif_accept_error_anon_inode:
  825. scif_teardown_ep(cep);
  826. scif_accept_error_qpalloc:
  827. kfree(cep);
  828. scif_accept_error_epalloc:
  829. msg.uop = SCIF_CNCT_REJ;
  830. msg.dst.node = conreq->msg.src.node;
  831. msg.dst.port = conreq->msg.src.port;
  832. msg.payload[0] = conreq->msg.payload[0];
  833. msg.payload[1] = conreq->msg.payload[1];
  834. scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
  835. kfree(conreq);
  836. return err;
  837. }
  838. EXPORT_SYMBOL_GPL(scif_accept);
  839. /*
  840. * scif_msg_param_check:
  841. * @epd: The end point returned from scif_open()
  842. * @len: Length to receive
  843. * @flags: blocking or non blocking
  844. *
  845. * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
  846. */
  847. static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
  848. {
  849. int ret = -EINVAL;
  850. if (len < 0)
  851. goto err_ret;
  852. if (flags && (!(flags & SCIF_RECV_BLOCK)))
  853. goto err_ret;
  854. ret = 0;
  855. err_ret:
  856. return ret;
  857. }
  858. static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
  859. {
  860. struct scif_endpt *ep = (struct scif_endpt *)epd;
  861. struct scifmsg notif_msg;
  862. int curr_xfer_len = 0, sent_len = 0, write_count;
  863. int ret = 0;
  864. struct scif_qp *qp = ep->qp_info.qp;
  865. if (flags & SCIF_SEND_BLOCK)
  866. might_sleep();
  867. spin_lock(&ep->lock);
  868. while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
  869. write_count = scif_rb_space(&qp->outbound_q);
  870. if (write_count) {
  871. /* Best effort to send as much data as possible */
  872. curr_xfer_len = min(len - sent_len, write_count);
  873. ret = scif_rb_write(&qp->outbound_q, msg,
  874. curr_xfer_len);
  875. if (ret < 0)
  876. break;
  877. /* Success. Update write pointer */
  878. scif_rb_commit(&qp->outbound_q);
  879. /*
  880. * Send a notification to the peer about the
  881. * produced data message.
  882. */
  883. notif_msg.src = ep->port;
  884. notif_msg.uop = SCIF_CLIENT_SENT;
  885. notif_msg.payload[0] = ep->remote_ep;
  886. ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
  887. if (ret)
  888. break;
  889. sent_len += curr_xfer_len;
  890. msg = msg + curr_xfer_len;
  891. continue;
  892. }
  893. curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
  894. /* Not enough RB space. return for the Non Blocking case */
  895. if (!(flags & SCIF_SEND_BLOCK))
  896. break;
  897. spin_unlock(&ep->lock);
  898. /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
  899. ret =
  900. wait_event_interruptible(ep->sendwq,
  901. (SCIFEP_CONNECTED != ep->state) ||
  902. (scif_rb_space(&qp->outbound_q) >=
  903. curr_xfer_len));
  904. spin_lock(&ep->lock);
  905. if (ret)
  906. break;
  907. }
  908. if (sent_len)
  909. ret = sent_len;
  910. else if (!ret && SCIFEP_CONNECTED != ep->state)
  911. ret = SCIFEP_DISCONNECTED == ep->state ?
  912. -ECONNRESET : -ENOTCONN;
  913. spin_unlock(&ep->lock);
  914. return ret;
  915. }
  916. static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
  917. {
  918. int read_size;
  919. struct scif_endpt *ep = (struct scif_endpt *)epd;
  920. struct scifmsg notif_msg;
  921. int curr_recv_len = 0, remaining_len = len, read_count;
  922. int ret = 0;
  923. struct scif_qp *qp = ep->qp_info.qp;
  924. if (flags & SCIF_RECV_BLOCK)
  925. might_sleep();
  926. spin_lock(&ep->lock);
  927. while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
  928. SCIFEP_DISCONNECTED == ep->state)) {
  929. read_count = scif_rb_count(&qp->inbound_q, remaining_len);
  930. if (read_count) {
  931. /*
  932. * Best effort to recv as much data as there
  933. * are bytes to read in the RB particularly
  934. * important for the Non Blocking case.
  935. */
  936. curr_recv_len = min(remaining_len, read_count);
  937. read_size = scif_rb_get_next(&qp->inbound_q,
  938. msg, curr_recv_len);
  939. if (ep->state == SCIFEP_CONNECTED) {
  940. /*
  941. * Update the read pointer only if the endpoint
  942. * is still connected else the read pointer
  943. * might no longer exist since the peer has
  944. * freed resources!
  945. */
  946. scif_rb_update_read_ptr(&qp->inbound_q);
  947. /*
  948. * Send a notification to the peer about the
  949. * consumed data message only if the EP is in
  950. * SCIFEP_CONNECTED state.
  951. */
  952. notif_msg.src = ep->port;
  953. notif_msg.uop = SCIF_CLIENT_RCVD;
  954. notif_msg.payload[0] = ep->remote_ep;
  955. ret = _scif_nodeqp_send(ep->remote_dev,
  956. &notif_msg);
  957. if (ret)
  958. break;
  959. }
  960. remaining_len -= curr_recv_len;
  961. msg = msg + curr_recv_len;
  962. continue;
  963. }
  964. /*
  965. * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
  966. * we will keep looping forever.
  967. */
  968. if (ep->state == SCIFEP_DISCONNECTED)
  969. break;
  970. /*
  971. * Return in the Non Blocking case if there is no data
  972. * to read in this iteration.
  973. */
  974. if (!(flags & SCIF_RECV_BLOCK))
  975. break;
  976. curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
  977. spin_unlock(&ep->lock);
  978. /*
  979. * Wait for a SCIF_CLIENT_SEND message in the blocking case
  980. * or until other side disconnects.
  981. */
  982. ret =
  983. wait_event_interruptible(ep->recvwq,
  984. SCIFEP_CONNECTED != ep->state ||
  985. scif_rb_count(&qp->inbound_q,
  986. curr_recv_len)
  987. >= curr_recv_len);
  988. spin_lock(&ep->lock);
  989. if (ret)
  990. break;
  991. }
  992. if (len - remaining_len)
  993. ret = len - remaining_len;
  994. else if (!ret && ep->state != SCIFEP_CONNECTED)
  995. ret = ep->state == SCIFEP_DISCONNECTED ?
  996. -ECONNRESET : -ENOTCONN;
  997. spin_unlock(&ep->lock);
  998. return ret;
  999. }
  1000. /**
  1001. * scif_user_send() - Send data to connection queue
  1002. * @epd: The end point returned from scif_open()
  1003. * @msg: Address to place data
  1004. * @len: Length to receive
  1005. * @flags: blocking or non blocking
  1006. *
  1007. * This function is called from the driver IOCTL entry point
  1008. * only and is a wrapper for _scif_send().
  1009. */
  1010. int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
  1011. {
  1012. struct scif_endpt *ep = (struct scif_endpt *)epd;
  1013. int err = 0;
  1014. int sent_len = 0;
  1015. char *tmp;
  1016. int loop_len;
  1017. int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
  1018. dev_dbg(scif_info.mdev.this_device,
  1019. "SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
  1020. if (!len)
  1021. return 0;
  1022. err = scif_msg_param_check(epd, len, flags);
  1023. if (err)
  1024. goto send_err;
  1025. tmp = kmalloc(chunk_len, GFP_KERNEL);
  1026. if (!tmp) {
  1027. err = -ENOMEM;
  1028. goto send_err;
  1029. }
  1030. /*
  1031. * Grabbing the lock before breaking up the transfer in
  1032. * multiple chunks is required to ensure that messages do
  1033. * not get fragmented and reordered.
  1034. */
  1035. mutex_lock(&ep->sendlock);
  1036. while (sent_len != len) {
  1037. loop_len = len - sent_len;
  1038. loop_len = min(chunk_len, loop_len);
  1039. if (copy_from_user(tmp, msg, loop_len)) {
  1040. err = -EFAULT;
  1041. goto send_free_err;
  1042. }
  1043. err = _scif_send(epd, tmp, loop_len, flags);
  1044. if (err < 0)
  1045. goto send_free_err;
  1046. sent_len += err;
  1047. msg += err;
  1048. if (err != loop_len)
  1049. goto send_free_err;
  1050. }
  1051. send_free_err:
  1052. mutex_unlock(&ep->sendlock);
  1053. kfree(tmp);
  1054. send_err:
  1055. return err < 0 ? err : sent_len;
  1056. }
  1057. /**
  1058. * scif_user_recv() - Receive data from connection queue
  1059. * @epd: The end point returned from scif_open()
  1060. * @msg: Address to place data
  1061. * @len: Length to receive
  1062. * @flags: blocking or non blocking
  1063. *
  1064. * This function is called from the driver IOCTL entry point
  1065. * only and is a wrapper for _scif_recv().
  1066. */
  1067. int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
  1068. {
  1069. struct scif_endpt *ep = (struct scif_endpt *)epd;
  1070. int err = 0;
  1071. int recv_len = 0;
  1072. char *tmp;
  1073. int loop_len;
  1074. int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
  1075. dev_dbg(scif_info.mdev.this_device,
  1076. "SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
  1077. if (!len)
  1078. return 0;
  1079. err = scif_msg_param_check(epd, len, flags);
  1080. if (err)
  1081. goto recv_err;
  1082. tmp = kmalloc(chunk_len, GFP_KERNEL);
  1083. if (!tmp) {
  1084. err = -ENOMEM;
  1085. goto recv_err;
  1086. }
  1087. /*
  1088. * Grabbing the lock before breaking up the transfer in
  1089. * multiple chunks is required to ensure that messages do
  1090. * not get fragmented and reordered.
  1091. */
  1092. mutex_lock(&ep->recvlock);
  1093. while (recv_len != len) {
  1094. loop_len = len - recv_len;
  1095. loop_len = min(chunk_len, loop_len);
  1096. err = _scif_recv(epd, tmp, loop_len, flags);
  1097. if (err < 0)
  1098. goto recv_free_err;
  1099. if (copy_to_user(msg, tmp, err)) {
  1100. err = -EFAULT;
  1101. goto recv_free_err;
  1102. }
  1103. recv_len += err;
  1104. msg += err;
  1105. if (err != loop_len)
  1106. goto recv_free_err;
  1107. }
  1108. recv_free_err:
  1109. mutex_unlock(&ep->recvlock);
  1110. kfree(tmp);
  1111. recv_err:
  1112. return err < 0 ? err : recv_len;
  1113. }
  1114. /**
  1115. * scif_send() - Send data to connection queue
  1116. * @epd: The end point returned from scif_open()
  1117. * @msg: Address to place data
  1118. * @len: Length to receive
  1119. * @flags: blocking or non blocking
  1120. *
  1121. * This function is called from the kernel mode only and is
  1122. * a wrapper for _scif_send().
  1123. */
  1124. int scif_send(scif_epd_t epd, void *msg, int len, int flags)
  1125. {
  1126. struct scif_endpt *ep = (struct scif_endpt *)epd;
  1127. int ret;
  1128. dev_dbg(scif_info.mdev.this_device,
  1129. "SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
  1130. if (!len)
  1131. return 0;
  1132. ret = scif_msg_param_check(epd, len, flags);
  1133. if (ret)
  1134. return ret;
  1135. if (!ep->remote_dev)
  1136. return -ENOTCONN;
  1137. /*
  1138. * Grab the mutex lock in the blocking case only
  1139. * to ensure messages do not get fragmented/reordered.
  1140. * The non blocking mode is protected using spin locks
  1141. * in _scif_send().
  1142. */
  1143. if (flags & SCIF_SEND_BLOCK)
  1144. mutex_lock(&ep->sendlock);
  1145. ret = _scif_send(epd, msg, len, flags);
  1146. if (flags & SCIF_SEND_BLOCK)
  1147. mutex_unlock(&ep->sendlock);
  1148. return ret;
  1149. }
  1150. EXPORT_SYMBOL_GPL(scif_send);
  1151. /**
  1152. * scif_recv() - Receive data from connection queue
  1153. * @epd: The end point returned from scif_open()
  1154. * @msg: Address to place data
  1155. * @len: Length to receive
  1156. * @flags: blocking or non blocking
  1157. *
  1158. * This function is called from the kernel mode only and is
  1159. * a wrapper for _scif_recv().
  1160. */
  1161. int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
  1162. {
  1163. struct scif_endpt *ep = (struct scif_endpt *)epd;
  1164. int ret;
  1165. dev_dbg(scif_info.mdev.this_device,
  1166. "SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
  1167. if (!len)
  1168. return 0;
  1169. ret = scif_msg_param_check(epd, len, flags);
  1170. if (ret)
  1171. return ret;
  1172. /*
  1173. * Grab the mutex lock in the blocking case only
  1174. * to ensure messages do not get fragmented/reordered.
  1175. * The non blocking mode is protected using spin locks
  1176. * in _scif_send().
  1177. */
  1178. if (flags & SCIF_RECV_BLOCK)
  1179. mutex_lock(&ep->recvlock);
  1180. ret = _scif_recv(epd, msg, len, flags);
  1181. if (flags & SCIF_RECV_BLOCK)
  1182. mutex_unlock(&ep->recvlock);
  1183. return ret;
  1184. }
  1185. EXPORT_SYMBOL_GPL(scif_recv);
  1186. static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
  1187. poll_table *p, struct scif_endpt *ep)
  1188. {
  1189. /*
  1190. * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
  1191. * and regrab it afterwards. Because the endpoint state might have
  1192. * changed while the lock was given up, the state must be checked
  1193. * again after re-acquiring the lock. The code in __scif_pollfd(..)
  1194. * does this.
  1195. */
  1196. spin_unlock(&ep->lock);
  1197. poll_wait(f, wq, p);
  1198. spin_lock(&ep->lock);
  1199. }
  1200. __poll_t
  1201. __scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
  1202. {
  1203. __poll_t mask = 0;
  1204. dev_dbg(scif_info.mdev.this_device,
  1205. "SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
  1206. spin_lock(&ep->lock);
  1207. /* Endpoint is waiting for a non-blocking connect to complete */
  1208. if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
  1209. _scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
  1210. if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
  1211. if (ep->state == SCIFEP_CONNECTED ||
  1212. ep->state == SCIFEP_DISCONNECTED ||
  1213. ep->conn_err)
  1214. mask |= EPOLLOUT;
  1215. goto exit;
  1216. }
  1217. }
  1218. /* Endpoint is listening for incoming connection requests */
  1219. if (ep->state == SCIFEP_LISTENING) {
  1220. _scif_poll_wait(f, &ep->conwq, wait, ep);
  1221. if (ep->state == SCIFEP_LISTENING) {
  1222. if (ep->conreqcnt)
  1223. mask |= EPOLLIN;
  1224. goto exit;
  1225. }
  1226. }
  1227. /* Endpoint is connected or disconnected */
  1228. if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
  1229. if (poll_requested_events(wait) & EPOLLIN)
  1230. _scif_poll_wait(f, &ep->recvwq, wait, ep);
  1231. if (poll_requested_events(wait) & EPOLLOUT)
  1232. _scif_poll_wait(f, &ep->sendwq, wait, ep);
  1233. if (ep->state == SCIFEP_CONNECTED ||
  1234. ep->state == SCIFEP_DISCONNECTED) {
  1235. /* Data can be read without blocking */
  1236. if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
  1237. mask |= EPOLLIN;
  1238. /* Data can be written without blocking */
  1239. if (scif_rb_space(&ep->qp_info.qp->outbound_q))
  1240. mask |= EPOLLOUT;
  1241. /* Return EPOLLHUP if endpoint is disconnected */
  1242. if (ep->state == SCIFEP_DISCONNECTED)
  1243. mask |= EPOLLHUP;
  1244. goto exit;
  1245. }
  1246. }
  1247. /* Return EPOLLERR if the endpoint is in none of the above states */
  1248. mask |= EPOLLERR;
  1249. exit:
  1250. spin_unlock(&ep->lock);
  1251. return mask;
  1252. }
  1253. /**
  1254. * scif_poll() - Kernel mode SCIF poll
  1255. * @ufds: Array of scif_pollepd structures containing the end points
  1256. * and events to poll on
  1257. * @nfds: Size of the ufds array
  1258. * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
  1259. *
  1260. * The code flow in this function is based on do_poll(..) in select.c
  1261. *
  1262. * Returns the number of endpoints which have pending events or 0 in
  1263. * the event of a timeout. If a signal is used for wake up, -EINTR is
  1264. * returned.
  1265. */
  1266. int
  1267. scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
  1268. {
  1269. struct poll_wqueues table;
  1270. poll_table *pt;
  1271. int i, count = 0, timed_out = timeout_msecs == 0;
  1272. __poll_t mask;
  1273. u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
  1274. : msecs_to_jiffies(timeout_msecs);
  1275. poll_initwait(&table);
  1276. pt = &table.pt;
  1277. while (1) {
  1278. for (i = 0; i < nfds; i++) {
  1279. pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
  1280. mask = __scif_pollfd(ufds[i].epd->anon,
  1281. pt, ufds[i].epd);
  1282. mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
  1283. if (mask) {
  1284. count++;
  1285. pt->_qproc = NULL;
  1286. }
  1287. ufds[i].revents = mask;
  1288. }
  1289. pt->_qproc = NULL;
  1290. if (!count) {
  1291. count = table.error;
  1292. if (signal_pending(current))
  1293. count = -EINTR;
  1294. }
  1295. if (count || timed_out)
  1296. break;
  1297. if (!schedule_timeout_interruptible(timeout))
  1298. timed_out = 1;
  1299. }
  1300. poll_freewait(&table);
  1301. return count;
  1302. }
  1303. EXPORT_SYMBOL_GPL(scif_poll);
  1304. int scif_get_node_ids(u16 *nodes, int len, u16 *self)
  1305. {
  1306. int online = 0;
  1307. int offset = 0;
  1308. int node;
  1309. if (!scif_is_mgmt_node())
  1310. scif_get_node_info();
  1311. *self = scif_info.nodeid;
  1312. mutex_lock(&scif_info.conflock);
  1313. len = min_t(int, len, scif_info.total);
  1314. for (node = 0; node <= scif_info.maxid; node++) {
  1315. if (_scifdev_alive(&scif_dev[node])) {
  1316. online++;
  1317. if (offset < len)
  1318. nodes[offset++] = node;
  1319. }
  1320. }
  1321. dev_dbg(scif_info.mdev.this_device,
  1322. "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
  1323. scif_info.total, online, offset);
  1324. mutex_unlock(&scif_info.conflock);
  1325. return online;
  1326. }
  1327. EXPORT_SYMBOL_GPL(scif_get_node_ids);
  1328. static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
  1329. {
  1330. struct scif_client *client =
  1331. container_of(si, struct scif_client, si);
  1332. struct scif_peer_dev *spdev =
  1333. container_of(dev, struct scif_peer_dev, dev);
  1334. if (client->probe)
  1335. client->probe(spdev);
  1336. return 0;
  1337. }
  1338. static void scif_remove_client_dev(struct device *dev,
  1339. struct subsys_interface *si)
  1340. {
  1341. struct scif_client *client =
  1342. container_of(si, struct scif_client, si);
  1343. struct scif_peer_dev *spdev =
  1344. container_of(dev, struct scif_peer_dev, dev);
  1345. if (client->remove)
  1346. client->remove(spdev);
  1347. }
  1348. void scif_client_unregister(struct scif_client *client)
  1349. {
  1350. subsys_interface_unregister(&client->si);
  1351. }
  1352. EXPORT_SYMBOL_GPL(scif_client_unregister);
  1353. int scif_client_register(struct scif_client *client)
  1354. {
  1355. struct subsys_interface *si = &client->si;
  1356. si->name = client->name;
  1357. si->subsys = &scif_peer_bus;
  1358. si->add_dev = scif_add_client_dev;
  1359. si->remove_dev = scif_remove_client_dev;
  1360. return subsys_interface_register(&client->si);
  1361. }
  1362. EXPORT_SYMBOL_GPL(scif_client_register);