rc.c 69 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620
  1. /*
  2. * Copyright(c) 2015, 2016 Intel Corporation.
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of version 2 of the GNU General Public License as
  11. * published by the Free Software Foundation.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * BSD LICENSE
  19. *
  20. * Redistribution and use in source and binary forms, with or without
  21. * modification, are permitted provided that the following conditions
  22. * are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright
  25. * notice, this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright
  27. * notice, this list of conditions and the following disclaimer in
  28. * the documentation and/or other materials provided with the
  29. * distribution.
  30. * - Neither the name of Intel Corporation nor the names of its
  31. * contributors may be used to endorse or promote products derived
  32. * from this software without specific prior written permission.
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. *
  46. */
  47. #include <linux/io.h>
  48. #include <rdma/rdma_vt.h>
  49. #include <rdma/rdmavt_qp.h>
  50. #include "hfi.h"
  51. #include "qp.h"
  52. #include "verbs_txreq.h"
  53. #include "trace.h"
  54. /* cut down ridiculously long IB macro names */
  55. #define OP(x) IB_OPCODE_RC_##x
  56. /**
  57. * hfi1_add_retry_timer - add/start a retry timer
  58. * @qp - the QP
  59. *
  60. * add a retry timer on the QP
  61. */
  62. static inline void hfi1_add_retry_timer(struct rvt_qp *qp)
  63. {
  64. struct ib_qp *ibqp = &qp->ibqp;
  65. struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
  66. qp->s_flags |= RVT_S_TIMER;
  67. /* 4.096 usec. * (1 << qp->timeout) */
  68. qp->s_timer.expires = jiffies + qp->timeout_jiffies +
  69. rdi->busy_jiffies;
  70. add_timer(&qp->s_timer);
  71. }
  72. /**
  73. * hfi1_add_rnr_timer - add/start an rnr timer
  74. * @qp - the QP
  75. * @to - timeout in usecs
  76. *
  77. * add an rnr timer on the QP
  78. */
  79. void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
  80. {
  81. struct hfi1_qp_priv *priv = qp->priv;
  82. qp->s_flags |= RVT_S_WAIT_RNR;
  83. qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
  84. add_timer(&priv->s_rnr_timer);
  85. }
  86. /**
  87. * hfi1_mod_retry_timer - mod a retry timer
  88. * @qp - the QP
  89. *
  90. * Modify a potentially already running retry
  91. * timer
  92. */
  93. static inline void hfi1_mod_retry_timer(struct rvt_qp *qp)
  94. {
  95. struct ib_qp *ibqp = &qp->ibqp;
  96. struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
  97. qp->s_flags |= RVT_S_TIMER;
  98. /* 4.096 usec. * (1 << qp->timeout) */
  99. mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
  100. rdi->busy_jiffies);
  101. }
  102. /**
  103. * hfi1_stop_retry_timer - stop a retry timer
  104. * @qp - the QP
  105. *
  106. * stop a retry timer and return if the timer
  107. * had been pending.
  108. */
  109. static inline int hfi1_stop_retry_timer(struct rvt_qp *qp)
  110. {
  111. int rval = 0;
  112. /* Remove QP from retry */
  113. if (qp->s_flags & RVT_S_TIMER) {
  114. qp->s_flags &= ~RVT_S_TIMER;
  115. rval = del_timer(&qp->s_timer);
  116. }
  117. return rval;
  118. }
  119. /**
  120. * hfi1_stop_rc_timers - stop all timers
  121. * @qp - the QP
  122. *
  123. * stop any pending timers
  124. */
  125. void hfi1_stop_rc_timers(struct rvt_qp *qp)
  126. {
  127. struct hfi1_qp_priv *priv = qp->priv;
  128. /* Remove QP from all timers */
  129. if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
  130. qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
  131. del_timer(&qp->s_timer);
  132. del_timer(&priv->s_rnr_timer);
  133. }
  134. }
  135. /**
  136. * hfi1_stop_rnr_timer - stop an rnr timer
  137. * @qp - the QP
  138. *
  139. * stop an rnr timer and return if the timer
  140. * had been pending.
  141. */
  142. static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp)
  143. {
  144. int rval = 0;
  145. struct hfi1_qp_priv *priv = qp->priv;
  146. /* Remove QP from rnr timer */
  147. if (qp->s_flags & RVT_S_WAIT_RNR) {
  148. qp->s_flags &= ~RVT_S_WAIT_RNR;
  149. rval = del_timer(&priv->s_rnr_timer);
  150. }
  151. return rval;
  152. }
  153. /**
  154. * hfi1_del_timers_sync - wait for any timeout routines to exit
  155. * @qp - the QP
  156. */
  157. void hfi1_del_timers_sync(struct rvt_qp *qp)
  158. {
  159. struct hfi1_qp_priv *priv = qp->priv;
  160. del_timer_sync(&qp->s_timer);
  161. del_timer_sync(&priv->s_rnr_timer);
  162. }
  163. /* only opcode mask for adaptive pio */
  164. const u32 rc_only_opcode =
  165. BIT(OP(SEND_ONLY) & 0x1f) |
  166. BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
  167. BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
  168. BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) |
  169. BIT(OP(RDMA_READ_REQUEST & 0x1f)) |
  170. BIT(OP(ACKNOWLEDGE & 0x1f)) |
  171. BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) |
  172. BIT(OP(COMPARE_SWAP & 0x1f)) |
  173. BIT(OP(FETCH_ADD & 0x1f));
  174. static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
  175. u32 psn, u32 pmtu)
  176. {
  177. u32 len;
  178. len = delta_psn(psn, wqe->psn) * pmtu;
  179. ss->sge = wqe->sg_list[0];
  180. ss->sg_list = wqe->sg_list + 1;
  181. ss->num_sge = wqe->wr.num_sge;
  182. ss->total_len = wqe->length;
  183. hfi1_skip_sge(ss, len, 0);
  184. return wqe->length - len;
  185. }
  186. /**
  187. * make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
  188. * @dev: the device for this QP
  189. * @qp: a pointer to the QP
  190. * @ohdr: a pointer to the IB header being constructed
  191. * @ps: the xmit packet state
  192. *
  193. * Return 1 if constructed; otherwise, return 0.
  194. * Note that we are in the responder's side of the QP context.
  195. * Note the QP s_lock must be held.
  196. */
  197. static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
  198. struct hfi1_other_headers *ohdr,
  199. struct hfi1_pkt_state *ps)
  200. {
  201. struct rvt_ack_entry *e;
  202. u32 hwords;
  203. u32 len;
  204. u32 bth0;
  205. u32 bth2;
  206. int middle = 0;
  207. u32 pmtu = qp->pmtu;
  208. struct hfi1_qp_priv *priv = qp->priv;
  209. /* Don't send an ACK if we aren't supposed to. */
  210. if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
  211. goto bail;
  212. /* header size in 32-bit words LRH+BTH = (8+12)/4. */
  213. hwords = 5;
  214. switch (qp->s_ack_state) {
  215. case OP(RDMA_READ_RESPONSE_LAST):
  216. case OP(RDMA_READ_RESPONSE_ONLY):
  217. e = &qp->s_ack_queue[qp->s_tail_ack_queue];
  218. if (e->rdma_sge.mr) {
  219. rvt_put_mr(e->rdma_sge.mr);
  220. e->rdma_sge.mr = NULL;
  221. }
  222. /* FALLTHROUGH */
  223. case OP(ATOMIC_ACKNOWLEDGE):
  224. /*
  225. * We can increment the tail pointer now that the last
  226. * response has been sent instead of only being
  227. * constructed.
  228. */
  229. if (++qp->s_tail_ack_queue > HFI1_MAX_RDMA_ATOMIC)
  230. qp->s_tail_ack_queue = 0;
  231. /* FALLTHROUGH */
  232. case OP(SEND_ONLY):
  233. case OP(ACKNOWLEDGE):
  234. /* Check for no next entry in the queue. */
  235. if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
  236. if (qp->s_flags & RVT_S_ACK_PENDING)
  237. goto normal;
  238. goto bail;
  239. }
  240. e = &qp->s_ack_queue[qp->s_tail_ack_queue];
  241. if (e->opcode == OP(RDMA_READ_REQUEST)) {
  242. /*
  243. * If a RDMA read response is being resent and
  244. * we haven't seen the duplicate request yet,
  245. * then stop sending the remaining responses the
  246. * responder has seen until the requester re-sends it.
  247. */
  248. len = e->rdma_sge.sge_length;
  249. if (len && !e->rdma_sge.mr) {
  250. qp->s_tail_ack_queue = qp->r_head_ack_queue;
  251. goto bail;
  252. }
  253. /* Copy SGE state in case we need to resend */
  254. ps->s_txreq->mr = e->rdma_sge.mr;
  255. if (ps->s_txreq->mr)
  256. rvt_get_mr(ps->s_txreq->mr);
  257. qp->s_ack_rdma_sge.sge = e->rdma_sge;
  258. qp->s_ack_rdma_sge.num_sge = 1;
  259. qp->s_cur_sge = &qp->s_ack_rdma_sge;
  260. if (len > pmtu) {
  261. len = pmtu;
  262. qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
  263. } else {
  264. qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
  265. e->sent = 1;
  266. }
  267. ohdr->u.aeth = hfi1_compute_aeth(qp);
  268. hwords++;
  269. qp->s_ack_rdma_psn = e->psn;
  270. bth2 = mask_psn(qp->s_ack_rdma_psn++);
  271. } else {
  272. /* COMPARE_SWAP or FETCH_ADD */
  273. qp->s_cur_sge = NULL;
  274. len = 0;
  275. qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
  276. ohdr->u.at.aeth = hfi1_compute_aeth(qp);
  277. ohdr->u.at.atomic_ack_eth[0] =
  278. cpu_to_be32(e->atomic_data >> 32);
  279. ohdr->u.at.atomic_ack_eth[1] =
  280. cpu_to_be32(e->atomic_data);
  281. hwords += sizeof(ohdr->u.at) / sizeof(u32);
  282. bth2 = mask_psn(e->psn);
  283. e->sent = 1;
  284. }
  285. bth0 = qp->s_ack_state << 24;
  286. break;
  287. case OP(RDMA_READ_RESPONSE_FIRST):
  288. qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
  289. /* FALLTHROUGH */
  290. case OP(RDMA_READ_RESPONSE_MIDDLE):
  291. qp->s_cur_sge = &qp->s_ack_rdma_sge;
  292. ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
  293. if (ps->s_txreq->mr)
  294. rvt_get_mr(ps->s_txreq->mr);
  295. len = qp->s_ack_rdma_sge.sge.sge_length;
  296. if (len > pmtu) {
  297. len = pmtu;
  298. middle = HFI1_CAP_IS_KSET(SDMA_AHG);
  299. } else {
  300. ohdr->u.aeth = hfi1_compute_aeth(qp);
  301. hwords++;
  302. qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
  303. e = &qp->s_ack_queue[qp->s_tail_ack_queue];
  304. e->sent = 1;
  305. }
  306. bth0 = qp->s_ack_state << 24;
  307. bth2 = mask_psn(qp->s_ack_rdma_psn++);
  308. break;
  309. default:
  310. normal:
  311. /*
  312. * Send a regular ACK.
  313. * Set the s_ack_state so we wait until after sending
  314. * the ACK before setting s_ack_state to ACKNOWLEDGE
  315. * (see above).
  316. */
  317. qp->s_ack_state = OP(SEND_ONLY);
  318. qp->s_flags &= ~RVT_S_ACK_PENDING;
  319. qp->s_cur_sge = NULL;
  320. if (qp->s_nak_state)
  321. ohdr->u.aeth =
  322. cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
  323. (qp->s_nak_state <<
  324. HFI1_AETH_CREDIT_SHIFT));
  325. else
  326. ohdr->u.aeth = hfi1_compute_aeth(qp);
  327. hwords++;
  328. len = 0;
  329. bth0 = OP(ACKNOWLEDGE) << 24;
  330. bth2 = mask_psn(qp->s_ack_psn);
  331. }
  332. qp->s_rdma_ack_cnt++;
  333. qp->s_hdrwords = hwords;
  334. ps->s_txreq->sde = priv->s_sde;
  335. qp->s_cur_size = len;
  336. hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps);
  337. /* pbc */
  338. ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
  339. return 1;
  340. bail:
  341. qp->s_ack_state = OP(ACKNOWLEDGE);
  342. /*
  343. * Ensure s_rdma_ack_cnt changes are committed prior to resetting
  344. * RVT_S_RESP_PENDING
  345. */
  346. smp_wmb();
  347. qp->s_flags &= ~(RVT_S_RESP_PENDING
  348. | RVT_S_ACK_PENDING
  349. | RVT_S_AHG_VALID);
  350. return 0;
  351. }
  352. /**
  353. * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
  354. * @qp: a pointer to the QP
  355. *
  356. * Assumes s_lock is held.
  357. *
  358. * Return 1 if constructed; otherwise, return 0.
  359. */
  360. int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
  361. {
  362. struct hfi1_qp_priv *priv = qp->priv;
  363. struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
  364. struct hfi1_other_headers *ohdr;
  365. struct rvt_sge_state *ss;
  366. struct rvt_swqe *wqe;
  367. /* header size in 32-bit words LRH+BTH = (8+12)/4. */
  368. u32 hwords = 5;
  369. u32 len;
  370. u32 bth0 = 0;
  371. u32 bth2;
  372. u32 pmtu = qp->pmtu;
  373. char newreq;
  374. int middle = 0;
  375. int delta;
  376. ps->s_txreq = get_txreq(ps->dev, qp);
  377. if (IS_ERR(ps->s_txreq))
  378. goto bail_no_tx;
  379. ohdr = &ps->s_txreq->phdr.hdr.u.oth;
  380. if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
  381. ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
  382. /* Sending responses has higher priority over sending requests. */
  383. if ((qp->s_flags & RVT_S_RESP_PENDING) &&
  384. make_rc_ack(dev, qp, ohdr, ps))
  385. return 1;
  386. if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
  387. if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
  388. goto bail;
  389. /* We are in the error state, flush the work request. */
  390. smp_read_barrier_depends(); /* see post_one_send() */
  391. if (qp->s_last == ACCESS_ONCE(qp->s_head))
  392. goto bail;
  393. /* If DMAs are in progress, we can't flush immediately. */
  394. if (iowait_sdma_pending(&priv->s_iowait)) {
  395. qp->s_flags |= RVT_S_WAIT_DMA;
  396. goto bail;
  397. }
  398. clear_ahg(qp);
  399. wqe = rvt_get_swqe_ptr(qp, qp->s_last);
  400. hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
  401. IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
  402. /* will get called again */
  403. goto done_free_tx;
  404. }
  405. if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
  406. goto bail;
  407. if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
  408. if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
  409. qp->s_flags |= RVT_S_WAIT_PSN;
  410. goto bail;
  411. }
  412. qp->s_sending_psn = qp->s_psn;
  413. qp->s_sending_hpsn = qp->s_psn - 1;
  414. }
  415. /* Send a request. */
  416. wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
  417. switch (qp->s_state) {
  418. default:
  419. if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
  420. goto bail;
  421. /*
  422. * Resend an old request or start a new one.
  423. *
  424. * We keep track of the current SWQE so that
  425. * we don't reset the "furthest progress" state
  426. * if we need to back up.
  427. */
  428. newreq = 0;
  429. if (qp->s_cur == qp->s_tail) {
  430. /* Check if send work queue is empty. */
  431. if (qp->s_tail == qp->s_head) {
  432. clear_ahg(qp);
  433. goto bail;
  434. }
  435. /*
  436. * If a fence is requested, wait for previous
  437. * RDMA read and atomic operations to finish.
  438. */
  439. if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
  440. qp->s_num_rd_atomic) {
  441. qp->s_flags |= RVT_S_WAIT_FENCE;
  442. goto bail;
  443. }
  444. /*
  445. * Local operations are processed immediately
  446. * after all prior requests have completed
  447. */
  448. if (wqe->wr.opcode == IB_WR_REG_MR ||
  449. wqe->wr.opcode == IB_WR_LOCAL_INV) {
  450. int local_ops = 0;
  451. int err = 0;
  452. if (qp->s_last != qp->s_cur)
  453. goto bail;
  454. if (++qp->s_cur == qp->s_size)
  455. qp->s_cur = 0;
  456. if (++qp->s_tail == qp->s_size)
  457. qp->s_tail = 0;
  458. if (!(wqe->wr.send_flags &
  459. RVT_SEND_COMPLETION_ONLY)) {
  460. err = rvt_invalidate_rkey(
  461. qp,
  462. wqe->wr.ex.invalidate_rkey);
  463. local_ops = 1;
  464. }
  465. hfi1_send_complete(qp, wqe,
  466. err ? IB_WC_LOC_PROT_ERR
  467. : IB_WC_SUCCESS);
  468. if (local_ops)
  469. atomic_dec(&qp->local_ops_pending);
  470. qp->s_hdrwords = 0;
  471. goto done_free_tx;
  472. }
  473. newreq = 1;
  474. qp->s_psn = wqe->psn;
  475. }
  476. /*
  477. * Note that we have to be careful not to modify the
  478. * original work request since we may need to resend
  479. * it.
  480. */
  481. len = wqe->length;
  482. ss = &qp->s_sge;
  483. bth2 = mask_psn(qp->s_psn);
  484. switch (wqe->wr.opcode) {
  485. case IB_WR_SEND:
  486. case IB_WR_SEND_WITH_IMM:
  487. case IB_WR_SEND_WITH_INV:
  488. /* If no credit, return. */
  489. if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
  490. cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
  491. qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
  492. goto bail;
  493. }
  494. if (len > pmtu) {
  495. qp->s_state = OP(SEND_FIRST);
  496. len = pmtu;
  497. break;
  498. }
  499. if (wqe->wr.opcode == IB_WR_SEND) {
  500. qp->s_state = OP(SEND_ONLY);
  501. } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
  502. qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
  503. /* Immediate data comes after the BTH */
  504. ohdr->u.imm_data = wqe->wr.ex.imm_data;
  505. hwords += 1;
  506. } else {
  507. qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE);
  508. /* Invalidate rkey comes after the BTH */
  509. ohdr->u.ieth = cpu_to_be32(
  510. wqe->wr.ex.invalidate_rkey);
  511. hwords += 1;
  512. }
  513. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  514. bth0 |= IB_BTH_SOLICITED;
  515. bth2 |= IB_BTH_REQ_ACK;
  516. if (++qp->s_cur == qp->s_size)
  517. qp->s_cur = 0;
  518. break;
  519. case IB_WR_RDMA_WRITE:
  520. if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
  521. qp->s_lsn++;
  522. /* FALLTHROUGH */
  523. case IB_WR_RDMA_WRITE_WITH_IMM:
  524. /* If no credit, return. */
  525. if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
  526. cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
  527. qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
  528. goto bail;
  529. }
  530. ohdr->u.rc.reth.vaddr =
  531. cpu_to_be64(wqe->rdma_wr.remote_addr);
  532. ohdr->u.rc.reth.rkey =
  533. cpu_to_be32(wqe->rdma_wr.rkey);
  534. ohdr->u.rc.reth.length = cpu_to_be32(len);
  535. hwords += sizeof(struct ib_reth) / sizeof(u32);
  536. if (len > pmtu) {
  537. qp->s_state = OP(RDMA_WRITE_FIRST);
  538. len = pmtu;
  539. break;
  540. }
  541. if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
  542. qp->s_state = OP(RDMA_WRITE_ONLY);
  543. } else {
  544. qp->s_state =
  545. OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
  546. /* Immediate data comes after RETH */
  547. ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
  548. hwords += 1;
  549. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  550. bth0 |= IB_BTH_SOLICITED;
  551. }
  552. bth2 |= IB_BTH_REQ_ACK;
  553. if (++qp->s_cur == qp->s_size)
  554. qp->s_cur = 0;
  555. break;
  556. case IB_WR_RDMA_READ:
  557. /*
  558. * Don't allow more operations to be started
  559. * than the QP limits allow.
  560. */
  561. if (newreq) {
  562. if (qp->s_num_rd_atomic >=
  563. qp->s_max_rd_atomic) {
  564. qp->s_flags |= RVT_S_WAIT_RDMAR;
  565. goto bail;
  566. }
  567. qp->s_num_rd_atomic++;
  568. if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
  569. qp->s_lsn++;
  570. }
  571. ohdr->u.rc.reth.vaddr =
  572. cpu_to_be64(wqe->rdma_wr.remote_addr);
  573. ohdr->u.rc.reth.rkey =
  574. cpu_to_be32(wqe->rdma_wr.rkey);
  575. ohdr->u.rc.reth.length = cpu_to_be32(len);
  576. qp->s_state = OP(RDMA_READ_REQUEST);
  577. hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
  578. ss = NULL;
  579. len = 0;
  580. bth2 |= IB_BTH_REQ_ACK;
  581. if (++qp->s_cur == qp->s_size)
  582. qp->s_cur = 0;
  583. break;
  584. case IB_WR_ATOMIC_CMP_AND_SWP:
  585. case IB_WR_ATOMIC_FETCH_AND_ADD:
  586. /*
  587. * Don't allow more operations to be started
  588. * than the QP limits allow.
  589. */
  590. if (newreq) {
  591. if (qp->s_num_rd_atomic >=
  592. qp->s_max_rd_atomic) {
  593. qp->s_flags |= RVT_S_WAIT_RDMAR;
  594. goto bail;
  595. }
  596. qp->s_num_rd_atomic++;
  597. if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
  598. qp->s_lsn++;
  599. }
  600. if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
  601. qp->s_state = OP(COMPARE_SWAP);
  602. ohdr->u.atomic_eth.swap_data = cpu_to_be64(
  603. wqe->atomic_wr.swap);
  604. ohdr->u.atomic_eth.compare_data = cpu_to_be64(
  605. wqe->atomic_wr.compare_add);
  606. } else {
  607. qp->s_state = OP(FETCH_ADD);
  608. ohdr->u.atomic_eth.swap_data = cpu_to_be64(
  609. wqe->atomic_wr.compare_add);
  610. ohdr->u.atomic_eth.compare_data = 0;
  611. }
  612. ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
  613. wqe->atomic_wr.remote_addr >> 32);
  614. ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
  615. wqe->atomic_wr.remote_addr);
  616. ohdr->u.atomic_eth.rkey = cpu_to_be32(
  617. wqe->atomic_wr.rkey);
  618. hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
  619. ss = NULL;
  620. len = 0;
  621. bth2 |= IB_BTH_REQ_ACK;
  622. if (++qp->s_cur == qp->s_size)
  623. qp->s_cur = 0;
  624. break;
  625. default:
  626. goto bail;
  627. }
  628. qp->s_sge.sge = wqe->sg_list[0];
  629. qp->s_sge.sg_list = wqe->sg_list + 1;
  630. qp->s_sge.num_sge = wqe->wr.num_sge;
  631. qp->s_sge.total_len = wqe->length;
  632. qp->s_len = wqe->length;
  633. if (newreq) {
  634. qp->s_tail++;
  635. if (qp->s_tail >= qp->s_size)
  636. qp->s_tail = 0;
  637. }
  638. if (wqe->wr.opcode == IB_WR_RDMA_READ)
  639. qp->s_psn = wqe->lpsn + 1;
  640. else
  641. qp->s_psn++;
  642. break;
  643. case OP(RDMA_READ_RESPONSE_FIRST):
  644. /*
  645. * qp->s_state is normally set to the opcode of the
  646. * last packet constructed for new requests and therefore
  647. * is never set to RDMA read response.
  648. * RDMA_READ_RESPONSE_FIRST is used by the ACK processing
  649. * thread to indicate a SEND needs to be restarted from an
  650. * earlier PSN without interfering with the sending thread.
  651. * See restart_rc().
  652. */
  653. qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
  654. /* FALLTHROUGH */
  655. case OP(SEND_FIRST):
  656. qp->s_state = OP(SEND_MIDDLE);
  657. /* FALLTHROUGH */
  658. case OP(SEND_MIDDLE):
  659. bth2 = mask_psn(qp->s_psn++);
  660. ss = &qp->s_sge;
  661. len = qp->s_len;
  662. if (len > pmtu) {
  663. len = pmtu;
  664. middle = HFI1_CAP_IS_KSET(SDMA_AHG);
  665. break;
  666. }
  667. if (wqe->wr.opcode == IB_WR_SEND) {
  668. qp->s_state = OP(SEND_LAST);
  669. } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
  670. qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
  671. /* Immediate data comes after the BTH */
  672. ohdr->u.imm_data = wqe->wr.ex.imm_data;
  673. hwords += 1;
  674. } else {
  675. qp->s_state = OP(SEND_LAST_WITH_INVALIDATE);
  676. /* invalidate data comes after the BTH */
  677. ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey);
  678. hwords += 1;
  679. }
  680. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  681. bth0 |= IB_BTH_SOLICITED;
  682. bth2 |= IB_BTH_REQ_ACK;
  683. qp->s_cur++;
  684. if (qp->s_cur >= qp->s_size)
  685. qp->s_cur = 0;
  686. break;
  687. case OP(RDMA_READ_RESPONSE_LAST):
  688. /*
  689. * qp->s_state is normally set to the opcode of the
  690. * last packet constructed for new requests and therefore
  691. * is never set to RDMA read response.
  692. * RDMA_READ_RESPONSE_LAST is used by the ACK processing
  693. * thread to indicate a RDMA write needs to be restarted from
  694. * an earlier PSN without interfering with the sending thread.
  695. * See restart_rc().
  696. */
  697. qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
  698. /* FALLTHROUGH */
  699. case OP(RDMA_WRITE_FIRST):
  700. qp->s_state = OP(RDMA_WRITE_MIDDLE);
  701. /* FALLTHROUGH */
  702. case OP(RDMA_WRITE_MIDDLE):
  703. bth2 = mask_psn(qp->s_psn++);
  704. ss = &qp->s_sge;
  705. len = qp->s_len;
  706. if (len > pmtu) {
  707. len = pmtu;
  708. middle = HFI1_CAP_IS_KSET(SDMA_AHG);
  709. break;
  710. }
  711. if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
  712. qp->s_state = OP(RDMA_WRITE_LAST);
  713. } else {
  714. qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
  715. /* Immediate data comes after the BTH */
  716. ohdr->u.imm_data = wqe->wr.ex.imm_data;
  717. hwords += 1;
  718. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  719. bth0 |= IB_BTH_SOLICITED;
  720. }
  721. bth2 |= IB_BTH_REQ_ACK;
  722. qp->s_cur++;
  723. if (qp->s_cur >= qp->s_size)
  724. qp->s_cur = 0;
  725. break;
  726. case OP(RDMA_READ_RESPONSE_MIDDLE):
  727. /*
  728. * qp->s_state is normally set to the opcode of the
  729. * last packet constructed for new requests and therefore
  730. * is never set to RDMA read response.
  731. * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
  732. * thread to indicate a RDMA read needs to be restarted from
  733. * an earlier PSN without interfering with the sending thread.
  734. * See restart_rc().
  735. */
  736. len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
  737. ohdr->u.rc.reth.vaddr =
  738. cpu_to_be64(wqe->rdma_wr.remote_addr + len);
  739. ohdr->u.rc.reth.rkey =
  740. cpu_to_be32(wqe->rdma_wr.rkey);
  741. ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
  742. qp->s_state = OP(RDMA_READ_REQUEST);
  743. hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
  744. bth2 = mask_psn(qp->s_psn) | IB_BTH_REQ_ACK;
  745. qp->s_psn = wqe->lpsn + 1;
  746. ss = NULL;
  747. len = 0;
  748. qp->s_cur++;
  749. if (qp->s_cur == qp->s_size)
  750. qp->s_cur = 0;
  751. break;
  752. }
  753. qp->s_sending_hpsn = bth2;
  754. delta = delta_psn(bth2, wqe->psn);
  755. if (delta && delta % HFI1_PSN_CREDIT == 0)
  756. bth2 |= IB_BTH_REQ_ACK;
  757. if (qp->s_flags & RVT_S_SEND_ONE) {
  758. qp->s_flags &= ~RVT_S_SEND_ONE;
  759. qp->s_flags |= RVT_S_WAIT_ACK;
  760. bth2 |= IB_BTH_REQ_ACK;
  761. }
  762. qp->s_len -= len;
  763. qp->s_hdrwords = hwords;
  764. ps->s_txreq->sde = priv->s_sde;
  765. qp->s_cur_sge = ss;
  766. qp->s_cur_size = len;
  767. hfi1_make_ruc_header(
  768. qp,
  769. ohdr,
  770. bth0 | (qp->s_state << 24),
  771. bth2,
  772. middle,
  773. ps);
  774. /* pbc */
  775. ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
  776. return 1;
  777. done_free_tx:
  778. hfi1_put_txreq(ps->s_txreq);
  779. ps->s_txreq = NULL;
  780. return 1;
  781. bail:
  782. hfi1_put_txreq(ps->s_txreq);
  783. bail_no_tx:
  784. ps->s_txreq = NULL;
  785. qp->s_flags &= ~RVT_S_BUSY;
  786. qp->s_hdrwords = 0;
  787. return 0;
  788. }
  789. /**
  790. * hfi1_send_rc_ack - Construct an ACK packet and send it
  791. * @qp: a pointer to the QP
  792. *
  793. * This is called from hfi1_rc_rcv() and handle_receive_interrupt().
  794. * Note that RDMA reads and atomics are handled in the
  795. * send side QP state and tasklet.
  796. */
  797. void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
  798. int is_fecn)
  799. {
  800. struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
  801. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  802. u64 pbc, pbc_flags = 0;
  803. u16 lrh0;
  804. u16 sc5;
  805. u32 bth0;
  806. u32 hwords;
  807. u32 vl, plen;
  808. struct send_context *sc;
  809. struct pio_buf *pbuf;
  810. struct hfi1_ib_header hdr;
  811. struct hfi1_other_headers *ohdr;
  812. unsigned long flags;
  813. /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
  814. if (qp->s_flags & RVT_S_RESP_PENDING)
  815. goto queue_ack;
  816. /* Ensure s_rdma_ack_cnt changes are committed */
  817. smp_read_barrier_depends();
  818. if (qp->s_rdma_ack_cnt)
  819. goto queue_ack;
  820. /* Construct the header */
  821. /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */
  822. hwords = 6;
  823. if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
  824. hwords += hfi1_make_grh(ibp, &hdr.u.l.grh,
  825. &qp->remote_ah_attr.grh, hwords, 0);
  826. ohdr = &hdr.u.l.oth;
  827. lrh0 = HFI1_LRH_GRH;
  828. } else {
  829. ohdr = &hdr.u.oth;
  830. lrh0 = HFI1_LRH_BTH;
  831. }
  832. /* read pkey_index w/o lock (its atomic) */
  833. bth0 = hfi1_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24);
  834. if (qp->s_mig_state == IB_MIG_MIGRATED)
  835. bth0 |= IB_BTH_MIG_REQ;
  836. if (qp->r_nak_state)
  837. ohdr->u.aeth = cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
  838. (qp->r_nak_state <<
  839. HFI1_AETH_CREDIT_SHIFT));
  840. else
  841. ohdr->u.aeth = hfi1_compute_aeth(qp);
  842. sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
  843. /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
  844. pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
  845. lrh0 |= (sc5 & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
  846. hdr.lrh[0] = cpu_to_be16(lrh0);
  847. hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
  848. hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
  849. hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
  850. ohdr->bth[0] = cpu_to_be32(bth0);
  851. ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
  852. ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << HFI1_BECN_SHIFT);
  853. ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn));
  854. /* Don't try to send ACKs if the link isn't ACTIVE */
  855. if (driver_lstate(ppd) != IB_PORT_ACTIVE)
  856. return;
  857. sc = rcd->sc;
  858. plen = 2 /* PBC */ + hwords;
  859. vl = sc_to_vlt(ppd->dd, sc5);
  860. pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
  861. pbuf = sc_buffer_alloc(sc, plen, NULL, NULL);
  862. if (!pbuf) {
  863. /*
  864. * We have no room to send at the moment. Pass
  865. * responsibility for sending the ACK to the send tasklet
  866. * so that when enough buffer space becomes available,
  867. * the ACK is sent ahead of other outgoing packets.
  868. */
  869. goto queue_ack;
  870. }
  871. trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
  872. /* write the pbc and data */
  873. ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords);
  874. return;
  875. queue_ack:
  876. this_cpu_inc(*ibp->rvp.rc_qacks);
  877. spin_lock_irqsave(&qp->s_lock, flags);
  878. qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
  879. qp->s_nak_state = qp->r_nak_state;
  880. qp->s_ack_psn = qp->r_ack_psn;
  881. if (is_fecn)
  882. qp->s_flags |= RVT_S_ECN;
  883. /* Schedule the send tasklet. */
  884. hfi1_schedule_send(qp);
  885. spin_unlock_irqrestore(&qp->s_lock, flags);
  886. }
  887. /**
  888. * reset_psn - reset the QP state to send starting from PSN
  889. * @qp: the QP
  890. * @psn: the packet sequence number to restart at
  891. *
  892. * This is called from hfi1_rc_rcv() to process an incoming RC ACK
  893. * for the given QP.
  894. * Called at interrupt level with the QP s_lock held.
  895. */
  896. static void reset_psn(struct rvt_qp *qp, u32 psn)
  897. {
  898. u32 n = qp->s_acked;
  899. struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
  900. u32 opcode;
  901. qp->s_cur = n;
  902. /*
  903. * If we are starting the request from the beginning,
  904. * let the normal send code handle initialization.
  905. */
  906. if (cmp_psn(psn, wqe->psn) <= 0) {
  907. qp->s_state = OP(SEND_LAST);
  908. goto done;
  909. }
  910. /* Find the work request opcode corresponding to the given PSN. */
  911. opcode = wqe->wr.opcode;
  912. for (;;) {
  913. int diff;
  914. if (++n == qp->s_size)
  915. n = 0;
  916. if (n == qp->s_tail)
  917. break;
  918. wqe = rvt_get_swqe_ptr(qp, n);
  919. diff = cmp_psn(psn, wqe->psn);
  920. if (diff < 0)
  921. break;
  922. qp->s_cur = n;
  923. /*
  924. * If we are starting the request from the beginning,
  925. * let the normal send code handle initialization.
  926. */
  927. if (diff == 0) {
  928. qp->s_state = OP(SEND_LAST);
  929. goto done;
  930. }
  931. opcode = wqe->wr.opcode;
  932. }
  933. /*
  934. * Set the state to restart in the middle of a request.
  935. * Don't change the s_sge, s_cur_sge, or s_cur_size.
  936. * See hfi1_make_rc_req().
  937. */
  938. switch (opcode) {
  939. case IB_WR_SEND:
  940. case IB_WR_SEND_WITH_IMM:
  941. qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
  942. break;
  943. case IB_WR_RDMA_WRITE:
  944. case IB_WR_RDMA_WRITE_WITH_IMM:
  945. qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
  946. break;
  947. case IB_WR_RDMA_READ:
  948. qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
  949. break;
  950. default:
  951. /*
  952. * This case shouldn't happen since its only
  953. * one PSN per req.
  954. */
  955. qp->s_state = OP(SEND_LAST);
  956. }
  957. done:
  958. qp->s_psn = psn;
  959. /*
  960. * Set RVT_S_WAIT_PSN as rc_complete() may start the timer
  961. * asynchronously before the send tasklet can get scheduled.
  962. * Doing it in hfi1_make_rc_req() is too late.
  963. */
  964. if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
  965. (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
  966. qp->s_flags |= RVT_S_WAIT_PSN;
  967. qp->s_flags &= ~RVT_S_AHG_VALID;
  968. }
  969. /*
  970. * Back up requester to resend the last un-ACKed request.
  971. * The QP r_lock and s_lock should be held and interrupts disabled.
  972. */
  973. static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
  974. {
  975. struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  976. struct hfi1_ibport *ibp;
  977. if (qp->s_retry == 0) {
  978. if (qp->s_mig_state == IB_MIG_ARMED) {
  979. hfi1_migrate_qp(qp);
  980. qp->s_retry = qp->s_retry_cnt;
  981. } else if (qp->s_last == qp->s_acked) {
  982. hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
  983. rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
  984. return;
  985. } else { /* need to handle delayed completion */
  986. return;
  987. }
  988. } else {
  989. qp->s_retry--;
  990. }
  991. ibp = to_iport(qp->ibqp.device, qp->port_num);
  992. if (wqe->wr.opcode == IB_WR_RDMA_READ)
  993. ibp->rvp.n_rc_resends++;
  994. else
  995. ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
  996. qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
  997. RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
  998. RVT_S_WAIT_ACK);
  999. if (wait)
  1000. qp->s_flags |= RVT_S_SEND_ONE;
  1001. reset_psn(qp, psn);
  1002. }
  1003. /*
  1004. * This is called from s_timer for missing responses.
  1005. */
  1006. void hfi1_rc_timeout(unsigned long arg)
  1007. {
  1008. struct rvt_qp *qp = (struct rvt_qp *)arg;
  1009. struct hfi1_ibport *ibp;
  1010. unsigned long flags;
  1011. spin_lock_irqsave(&qp->r_lock, flags);
  1012. spin_lock(&qp->s_lock);
  1013. if (qp->s_flags & RVT_S_TIMER) {
  1014. ibp = to_iport(qp->ibqp.device, qp->port_num);
  1015. ibp->rvp.n_rc_timeouts++;
  1016. qp->s_flags &= ~RVT_S_TIMER;
  1017. del_timer(&qp->s_timer);
  1018. trace_hfi1_timeout(qp, qp->s_last_psn + 1);
  1019. restart_rc(qp, qp->s_last_psn + 1, 1);
  1020. hfi1_schedule_send(qp);
  1021. }
  1022. spin_unlock(&qp->s_lock);
  1023. spin_unlock_irqrestore(&qp->r_lock, flags);
  1024. }
  1025. /*
  1026. * This is called from s_timer for RNR timeouts.
  1027. */
  1028. void hfi1_rc_rnr_retry(unsigned long arg)
  1029. {
  1030. struct rvt_qp *qp = (struct rvt_qp *)arg;
  1031. unsigned long flags;
  1032. spin_lock_irqsave(&qp->s_lock, flags);
  1033. hfi1_stop_rnr_timer(qp);
  1034. hfi1_schedule_send(qp);
  1035. spin_unlock_irqrestore(&qp->s_lock, flags);
  1036. }
  1037. /*
  1038. * Set qp->s_sending_psn to the next PSN after the given one.
  1039. * This would be psn+1 except when RDMA reads are present.
  1040. */
  1041. static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
  1042. {
  1043. struct rvt_swqe *wqe;
  1044. u32 n = qp->s_last;
  1045. /* Find the work request corresponding to the given PSN. */
  1046. for (;;) {
  1047. wqe = rvt_get_swqe_ptr(qp, n);
  1048. if (cmp_psn(psn, wqe->lpsn) <= 0) {
  1049. if (wqe->wr.opcode == IB_WR_RDMA_READ)
  1050. qp->s_sending_psn = wqe->lpsn + 1;
  1051. else
  1052. qp->s_sending_psn = psn + 1;
  1053. break;
  1054. }
  1055. if (++n == qp->s_size)
  1056. n = 0;
  1057. if (n == qp->s_tail)
  1058. break;
  1059. }
  1060. }
  1061. /*
  1062. * This should be called with the QP s_lock held and interrupts disabled.
  1063. */
  1064. void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr)
  1065. {
  1066. struct hfi1_other_headers *ohdr;
  1067. struct rvt_swqe *wqe;
  1068. struct ib_wc wc;
  1069. unsigned i;
  1070. u32 opcode;
  1071. u32 psn;
  1072. if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
  1073. return;
  1074. /* Find out where the BTH is */
  1075. if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
  1076. ohdr = &hdr->u.oth;
  1077. else
  1078. ohdr = &hdr->u.l.oth;
  1079. opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
  1080. if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
  1081. opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
  1082. WARN_ON(!qp->s_rdma_ack_cnt);
  1083. qp->s_rdma_ack_cnt--;
  1084. return;
  1085. }
  1086. psn = be32_to_cpu(ohdr->bth[2]);
  1087. reset_sending_psn(qp, psn);
  1088. /*
  1089. * Start timer after a packet requesting an ACK has been sent and
  1090. * there are still requests that haven't been acked.
  1091. */
  1092. if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
  1093. !(qp->s_flags &
  1094. (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
  1095. (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
  1096. hfi1_add_retry_timer(qp);
  1097. while (qp->s_last != qp->s_acked) {
  1098. u32 s_last;
  1099. wqe = rvt_get_swqe_ptr(qp, qp->s_last);
  1100. if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
  1101. cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
  1102. break;
  1103. s_last = qp->s_last;
  1104. if (++s_last >= qp->s_size)
  1105. s_last = 0;
  1106. qp->s_last = s_last;
  1107. /* see post_send() */
  1108. barrier();
  1109. for (i = 0; i < wqe->wr.num_sge; i++) {
  1110. struct rvt_sge *sge = &wqe->sg_list[i];
  1111. rvt_put_mr(sge->mr);
  1112. }
  1113. /* Post a send completion queue entry if requested. */
  1114. if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
  1115. (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
  1116. memset(&wc, 0, sizeof(wc));
  1117. wc.wr_id = wqe->wr.wr_id;
  1118. wc.status = IB_WC_SUCCESS;
  1119. wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
  1120. wc.byte_len = wqe->length;
  1121. wc.qp = &qp->ibqp;
  1122. rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
  1123. }
  1124. }
  1125. /*
  1126. * If we were waiting for sends to complete before re-sending,
  1127. * and they are now complete, restart sending.
  1128. */
  1129. trace_hfi1_sendcomplete(qp, psn);
  1130. if (qp->s_flags & RVT_S_WAIT_PSN &&
  1131. cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
  1132. qp->s_flags &= ~RVT_S_WAIT_PSN;
  1133. qp->s_sending_psn = qp->s_psn;
  1134. qp->s_sending_hpsn = qp->s_psn - 1;
  1135. hfi1_schedule_send(qp);
  1136. }
  1137. }
  1138. static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
  1139. {
  1140. qp->s_last_psn = psn;
  1141. }
  1142. /*
  1143. * Generate a SWQE completion.
  1144. * This is similar to hfi1_send_complete but has to check to be sure
  1145. * that the SGEs are not being referenced if the SWQE is being resent.
  1146. */
  1147. static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
  1148. struct rvt_swqe *wqe,
  1149. struct hfi1_ibport *ibp)
  1150. {
  1151. struct ib_wc wc;
  1152. unsigned i;
  1153. /*
  1154. * Don't decrement refcount and don't generate a
  1155. * completion if the SWQE is being resent until the send
  1156. * is finished.
  1157. */
  1158. if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
  1159. cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
  1160. u32 s_last;
  1161. for (i = 0; i < wqe->wr.num_sge; i++) {
  1162. struct rvt_sge *sge = &wqe->sg_list[i];
  1163. rvt_put_mr(sge->mr);
  1164. }
  1165. s_last = qp->s_last;
  1166. if (++s_last >= qp->s_size)
  1167. s_last = 0;
  1168. qp->s_last = s_last;
  1169. /* see post_send() */
  1170. barrier();
  1171. /* Post a send completion queue entry if requested. */
  1172. if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
  1173. (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
  1174. memset(&wc, 0, sizeof(wc));
  1175. wc.wr_id = wqe->wr.wr_id;
  1176. wc.status = IB_WC_SUCCESS;
  1177. wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
  1178. wc.byte_len = wqe->length;
  1179. wc.qp = &qp->ibqp;
  1180. rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
  1181. }
  1182. } else {
  1183. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  1184. this_cpu_inc(*ibp->rvp.rc_delayed_comp);
  1185. /*
  1186. * If send progress not running attempt to progress
  1187. * SDMA queue.
  1188. */
  1189. if (ppd->dd->flags & HFI1_HAS_SEND_DMA) {
  1190. struct sdma_engine *engine;
  1191. u8 sc5;
  1192. /* For now use sc to find engine */
  1193. sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
  1194. engine = qp_to_sdma_engine(qp, sc5);
  1195. sdma_engine_progress_schedule(engine);
  1196. }
  1197. }
  1198. qp->s_retry = qp->s_retry_cnt;
  1199. update_last_psn(qp, wqe->lpsn);
  1200. /*
  1201. * If we are completing a request which is in the process of
  1202. * being resent, we can stop re-sending it since we know the
  1203. * responder has already seen it.
  1204. */
  1205. if (qp->s_acked == qp->s_cur) {
  1206. if (++qp->s_cur >= qp->s_size)
  1207. qp->s_cur = 0;
  1208. qp->s_acked = qp->s_cur;
  1209. wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
  1210. if (qp->s_acked != qp->s_tail) {
  1211. qp->s_state = OP(SEND_LAST);
  1212. qp->s_psn = wqe->psn;
  1213. }
  1214. } else {
  1215. if (++qp->s_acked >= qp->s_size)
  1216. qp->s_acked = 0;
  1217. if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
  1218. qp->s_draining = 0;
  1219. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  1220. }
  1221. return wqe;
  1222. }
  1223. /**
  1224. * do_rc_ack - process an incoming RC ACK
  1225. * @qp: the QP the ACK came in on
  1226. * @psn: the packet sequence number of the ACK
  1227. * @opcode: the opcode of the request that resulted in the ACK
  1228. *
  1229. * This is called from rc_rcv_resp() to process an incoming RC ACK
  1230. * for the given QP.
  1231. * May be called at interrupt level, with the QP s_lock held.
  1232. * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  1233. */
  1234. static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
  1235. u64 val, struct hfi1_ctxtdata *rcd)
  1236. {
  1237. struct hfi1_ibport *ibp;
  1238. enum ib_wc_status status;
  1239. struct rvt_swqe *wqe;
  1240. int ret = 0;
  1241. u32 ack_psn;
  1242. int diff;
  1243. unsigned long to;
  1244. /*
  1245. * Note that NAKs implicitly ACK outstanding SEND and RDMA write
  1246. * requests and implicitly NAK RDMA read and atomic requests issued
  1247. * before the NAK'ed request. The MSN won't include the NAK'ed
  1248. * request but will include an ACK'ed request(s).
  1249. */
  1250. ack_psn = psn;
  1251. if (aeth >> 29)
  1252. ack_psn--;
  1253. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  1254. ibp = to_iport(qp->ibqp.device, qp->port_num);
  1255. /*
  1256. * The MSN might be for a later WQE than the PSN indicates so
  1257. * only complete WQEs that the PSN finishes.
  1258. */
  1259. while ((diff = delta_psn(ack_psn, wqe->lpsn)) >= 0) {
  1260. /*
  1261. * RDMA_READ_RESPONSE_ONLY is a special case since
  1262. * we want to generate completion events for everything
  1263. * before the RDMA read, copy the data, then generate
  1264. * the completion for the read.
  1265. */
  1266. if (wqe->wr.opcode == IB_WR_RDMA_READ &&
  1267. opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
  1268. diff == 0) {
  1269. ret = 1;
  1270. goto bail_stop;
  1271. }
  1272. /*
  1273. * If this request is a RDMA read or atomic, and the ACK is
  1274. * for a later operation, this ACK NAKs the RDMA read or
  1275. * atomic. In other words, only a RDMA_READ_LAST or ONLY
  1276. * can ACK a RDMA read and likewise for atomic ops. Note
  1277. * that the NAK case can only happen if relaxed ordering is
  1278. * used and requests are sent after an RDMA read or atomic
  1279. * is sent but before the response is received.
  1280. */
  1281. if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
  1282. (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
  1283. ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  1284. wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
  1285. (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
  1286. /* Retry this request. */
  1287. if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
  1288. qp->r_flags |= RVT_R_RDMAR_SEQ;
  1289. restart_rc(qp, qp->s_last_psn + 1, 0);
  1290. if (list_empty(&qp->rspwait)) {
  1291. qp->r_flags |= RVT_R_RSP_SEND;
  1292. atomic_inc(&qp->refcount);
  1293. list_add_tail(&qp->rspwait,
  1294. &rcd->qp_wait_list);
  1295. }
  1296. }
  1297. /*
  1298. * No need to process the ACK/NAK since we are
  1299. * restarting an earlier request.
  1300. */
  1301. goto bail_stop;
  1302. }
  1303. if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  1304. wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
  1305. u64 *vaddr = wqe->sg_list[0].vaddr;
  1306. *vaddr = val;
  1307. }
  1308. if (qp->s_num_rd_atomic &&
  1309. (wqe->wr.opcode == IB_WR_RDMA_READ ||
  1310. wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  1311. wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
  1312. qp->s_num_rd_atomic--;
  1313. /* Restart sending task if fence is complete */
  1314. if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
  1315. !qp->s_num_rd_atomic) {
  1316. qp->s_flags &= ~(RVT_S_WAIT_FENCE |
  1317. RVT_S_WAIT_ACK);
  1318. hfi1_schedule_send(qp);
  1319. } else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
  1320. qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
  1321. RVT_S_WAIT_ACK);
  1322. hfi1_schedule_send(qp);
  1323. }
  1324. }
  1325. wqe = do_rc_completion(qp, wqe, ibp);
  1326. if (qp->s_acked == qp->s_tail)
  1327. break;
  1328. }
  1329. switch (aeth >> 29) {
  1330. case 0: /* ACK */
  1331. this_cpu_inc(*ibp->rvp.rc_acks);
  1332. if (qp->s_acked != qp->s_tail) {
  1333. /*
  1334. * We are expecting more ACKs so
  1335. * mod the retry timer.
  1336. */
  1337. hfi1_mod_retry_timer(qp);
  1338. /*
  1339. * We can stop re-sending the earlier packets and
  1340. * continue with the next packet the receiver wants.
  1341. */
  1342. if (cmp_psn(qp->s_psn, psn) <= 0)
  1343. reset_psn(qp, psn + 1);
  1344. } else {
  1345. /* No more acks - kill all timers */
  1346. hfi1_stop_rc_timers(qp);
  1347. if (cmp_psn(qp->s_psn, psn) <= 0) {
  1348. qp->s_state = OP(SEND_LAST);
  1349. qp->s_psn = psn + 1;
  1350. }
  1351. }
  1352. if (qp->s_flags & RVT_S_WAIT_ACK) {
  1353. qp->s_flags &= ~RVT_S_WAIT_ACK;
  1354. hfi1_schedule_send(qp);
  1355. }
  1356. hfi1_get_credit(qp, aeth);
  1357. qp->s_rnr_retry = qp->s_rnr_retry_cnt;
  1358. qp->s_retry = qp->s_retry_cnt;
  1359. update_last_psn(qp, psn);
  1360. return 1;
  1361. case 1: /* RNR NAK */
  1362. ibp->rvp.n_rnr_naks++;
  1363. if (qp->s_acked == qp->s_tail)
  1364. goto bail_stop;
  1365. if (qp->s_flags & RVT_S_WAIT_RNR)
  1366. goto bail_stop;
  1367. if (qp->s_rnr_retry == 0) {
  1368. status = IB_WC_RNR_RETRY_EXC_ERR;
  1369. goto class_b;
  1370. }
  1371. if (qp->s_rnr_retry_cnt < 7)
  1372. qp->s_rnr_retry--;
  1373. /* The last valid PSN is the previous PSN. */
  1374. update_last_psn(qp, psn - 1);
  1375. ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
  1376. reset_psn(qp, psn);
  1377. qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
  1378. hfi1_stop_rc_timers(qp);
  1379. to =
  1380. ib_hfi1_rnr_table[(aeth >> HFI1_AETH_CREDIT_SHIFT) &
  1381. HFI1_AETH_CREDIT_MASK];
  1382. hfi1_add_rnr_timer(qp, to);
  1383. return 0;
  1384. case 3: /* NAK */
  1385. if (qp->s_acked == qp->s_tail)
  1386. goto bail_stop;
  1387. /* The last valid PSN is the previous PSN. */
  1388. update_last_psn(qp, psn - 1);
  1389. switch ((aeth >> HFI1_AETH_CREDIT_SHIFT) &
  1390. HFI1_AETH_CREDIT_MASK) {
  1391. case 0: /* PSN sequence error */
  1392. ibp->rvp.n_seq_naks++;
  1393. /*
  1394. * Back up to the responder's expected PSN.
  1395. * Note that we might get a NAK in the middle of an
  1396. * RDMA READ response which terminates the RDMA
  1397. * READ.
  1398. */
  1399. restart_rc(qp, psn, 0);
  1400. hfi1_schedule_send(qp);
  1401. break;
  1402. case 1: /* Invalid Request */
  1403. status = IB_WC_REM_INV_REQ_ERR;
  1404. ibp->rvp.n_other_naks++;
  1405. goto class_b;
  1406. case 2: /* Remote Access Error */
  1407. status = IB_WC_REM_ACCESS_ERR;
  1408. ibp->rvp.n_other_naks++;
  1409. goto class_b;
  1410. case 3: /* Remote Operation Error */
  1411. status = IB_WC_REM_OP_ERR;
  1412. ibp->rvp.n_other_naks++;
  1413. class_b:
  1414. if (qp->s_last == qp->s_acked) {
  1415. hfi1_send_complete(qp, wqe, status);
  1416. rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
  1417. }
  1418. break;
  1419. default:
  1420. /* Ignore other reserved NAK error codes */
  1421. goto reserved;
  1422. }
  1423. qp->s_retry = qp->s_retry_cnt;
  1424. qp->s_rnr_retry = qp->s_rnr_retry_cnt;
  1425. goto bail_stop;
  1426. default: /* 2: reserved */
  1427. reserved:
  1428. /* Ignore reserved NAK codes. */
  1429. goto bail_stop;
  1430. }
  1431. /* cannot be reached */
  1432. bail_stop:
  1433. hfi1_stop_rc_timers(qp);
  1434. return ret;
  1435. }
  1436. /*
  1437. * We have seen an out of sequence RDMA read middle or last packet.
  1438. * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
  1439. */
  1440. static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
  1441. struct hfi1_ctxtdata *rcd)
  1442. {
  1443. struct rvt_swqe *wqe;
  1444. /* Remove QP from retry timer */
  1445. hfi1_stop_rc_timers(qp);
  1446. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  1447. while (cmp_psn(psn, wqe->lpsn) > 0) {
  1448. if (wqe->wr.opcode == IB_WR_RDMA_READ ||
  1449. wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  1450. wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
  1451. break;
  1452. wqe = do_rc_completion(qp, wqe, ibp);
  1453. }
  1454. ibp->rvp.n_rdma_seq++;
  1455. qp->r_flags |= RVT_R_RDMAR_SEQ;
  1456. restart_rc(qp, qp->s_last_psn + 1, 0);
  1457. if (list_empty(&qp->rspwait)) {
  1458. qp->r_flags |= RVT_R_RSP_SEND;
  1459. atomic_inc(&qp->refcount);
  1460. list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
  1461. }
  1462. }
  1463. /**
  1464. * rc_rcv_resp - process an incoming RC response packet
  1465. * @ibp: the port this packet came in on
  1466. * @ohdr: the other headers for this packet
  1467. * @data: the packet data
  1468. * @tlen: the packet length
  1469. * @qp: the QP for this packet
  1470. * @opcode: the opcode for this packet
  1471. * @psn: the packet sequence number for this packet
  1472. * @hdrsize: the header length
  1473. * @pmtu: the path MTU
  1474. *
  1475. * This is called from hfi1_rc_rcv() to process an incoming RC response
  1476. * packet for the given QP.
  1477. * Called at interrupt level.
  1478. */
  1479. static void rc_rcv_resp(struct hfi1_ibport *ibp,
  1480. struct hfi1_other_headers *ohdr,
  1481. void *data, u32 tlen, struct rvt_qp *qp,
  1482. u32 opcode, u32 psn, u32 hdrsize, u32 pmtu,
  1483. struct hfi1_ctxtdata *rcd)
  1484. {
  1485. struct rvt_swqe *wqe;
  1486. enum ib_wc_status status;
  1487. unsigned long flags;
  1488. int diff;
  1489. u32 pad;
  1490. u32 aeth;
  1491. u64 val;
  1492. spin_lock_irqsave(&qp->s_lock, flags);
  1493. trace_hfi1_ack(qp, psn);
  1494. /* Ignore invalid responses. */
  1495. smp_read_barrier_depends(); /* see post_one_send */
  1496. if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
  1497. goto ack_done;
  1498. /* Ignore duplicate responses. */
  1499. diff = cmp_psn(psn, qp->s_last_psn);
  1500. if (unlikely(diff <= 0)) {
  1501. /* Update credits for "ghost" ACKs */
  1502. if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
  1503. aeth = be32_to_cpu(ohdr->u.aeth);
  1504. if ((aeth >> 29) == 0)
  1505. hfi1_get_credit(qp, aeth);
  1506. }
  1507. goto ack_done;
  1508. }
  1509. /*
  1510. * Skip everything other than the PSN we expect, if we are waiting
  1511. * for a reply to a restarted RDMA read or atomic op.
  1512. */
  1513. if (qp->r_flags & RVT_R_RDMAR_SEQ) {
  1514. if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
  1515. goto ack_done;
  1516. qp->r_flags &= ~RVT_R_RDMAR_SEQ;
  1517. }
  1518. if (unlikely(qp->s_acked == qp->s_tail))
  1519. goto ack_done;
  1520. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  1521. status = IB_WC_SUCCESS;
  1522. switch (opcode) {
  1523. case OP(ACKNOWLEDGE):
  1524. case OP(ATOMIC_ACKNOWLEDGE):
  1525. case OP(RDMA_READ_RESPONSE_FIRST):
  1526. aeth = be32_to_cpu(ohdr->u.aeth);
  1527. if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
  1528. __be32 *p = ohdr->u.at.atomic_ack_eth;
  1529. val = ((u64)be32_to_cpu(p[0]) << 32) |
  1530. be32_to_cpu(p[1]);
  1531. } else {
  1532. val = 0;
  1533. }
  1534. if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
  1535. opcode != OP(RDMA_READ_RESPONSE_FIRST))
  1536. goto ack_done;
  1537. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  1538. if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
  1539. goto ack_op_err;
  1540. /*
  1541. * If this is a response to a resent RDMA read, we
  1542. * have to be careful to copy the data to the right
  1543. * location.
  1544. */
  1545. qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
  1546. wqe, psn, pmtu);
  1547. goto read_middle;
  1548. case OP(RDMA_READ_RESPONSE_MIDDLE):
  1549. /* no AETH, no ACK */
  1550. if (unlikely(cmp_psn(psn, qp->s_last_psn + 1)))
  1551. goto ack_seq_err;
  1552. if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
  1553. goto ack_op_err;
  1554. read_middle:
  1555. if (unlikely(tlen != (hdrsize + pmtu + 4)))
  1556. goto ack_len_err;
  1557. if (unlikely(pmtu >= qp->s_rdma_read_len))
  1558. goto ack_len_err;
  1559. /*
  1560. * We got a response so update the timeout.
  1561. * 4.096 usec. * (1 << qp->timeout)
  1562. */
  1563. qp->s_flags |= RVT_S_TIMER;
  1564. mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
  1565. if (qp->s_flags & RVT_S_WAIT_ACK) {
  1566. qp->s_flags &= ~RVT_S_WAIT_ACK;
  1567. hfi1_schedule_send(qp);
  1568. }
  1569. if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
  1570. qp->s_retry = qp->s_retry_cnt;
  1571. /*
  1572. * Update the RDMA receive state but do the copy w/o
  1573. * holding the locks and blocking interrupts.
  1574. */
  1575. qp->s_rdma_read_len -= pmtu;
  1576. update_last_psn(qp, psn);
  1577. spin_unlock_irqrestore(&qp->s_lock, flags);
  1578. hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0, 0);
  1579. goto bail;
  1580. case OP(RDMA_READ_RESPONSE_ONLY):
  1581. aeth = be32_to_cpu(ohdr->u.aeth);
  1582. if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
  1583. goto ack_done;
  1584. /* Get the number of bytes the message was padded by. */
  1585. pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
  1586. /*
  1587. * Check that the data size is >= 0 && <= pmtu.
  1588. * Remember to account for ICRC (4).
  1589. */
  1590. if (unlikely(tlen < (hdrsize + pad + 4)))
  1591. goto ack_len_err;
  1592. /*
  1593. * If this is a response to a resent RDMA read, we
  1594. * have to be careful to copy the data to the right
  1595. * location.
  1596. */
  1597. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  1598. qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
  1599. wqe, psn, pmtu);
  1600. goto read_last;
  1601. case OP(RDMA_READ_RESPONSE_LAST):
  1602. /* ACKs READ req. */
  1603. if (unlikely(cmp_psn(psn, qp->s_last_psn + 1)))
  1604. goto ack_seq_err;
  1605. if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
  1606. goto ack_op_err;
  1607. /* Get the number of bytes the message was padded by. */
  1608. pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
  1609. /*
  1610. * Check that the data size is >= 1 && <= pmtu.
  1611. * Remember to account for ICRC (4).
  1612. */
  1613. if (unlikely(tlen <= (hdrsize + pad + 4)))
  1614. goto ack_len_err;
  1615. read_last:
  1616. tlen -= hdrsize + pad + 4;
  1617. if (unlikely(tlen != qp->s_rdma_read_len))
  1618. goto ack_len_err;
  1619. aeth = be32_to_cpu(ohdr->u.aeth);
  1620. hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0, 0);
  1621. WARN_ON(qp->s_rdma_read_sge.num_sge);
  1622. (void)do_rc_ack(qp, aeth, psn,
  1623. OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
  1624. goto ack_done;
  1625. }
  1626. ack_op_err:
  1627. status = IB_WC_LOC_QP_OP_ERR;
  1628. goto ack_err;
  1629. ack_seq_err:
  1630. rdma_seq_err(qp, ibp, psn, rcd);
  1631. goto ack_done;
  1632. ack_len_err:
  1633. status = IB_WC_LOC_LEN_ERR;
  1634. ack_err:
  1635. if (qp->s_last == qp->s_acked) {
  1636. hfi1_send_complete(qp, wqe, status);
  1637. rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
  1638. }
  1639. ack_done:
  1640. spin_unlock_irqrestore(&qp->s_lock, flags);
  1641. bail:
  1642. return;
  1643. }
  1644. static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
  1645. struct rvt_qp *qp)
  1646. {
  1647. if (list_empty(&qp->rspwait)) {
  1648. qp->r_flags |= RVT_R_RSP_NAK;
  1649. atomic_inc(&qp->refcount);
  1650. list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
  1651. }
  1652. }
  1653. static inline void rc_cancel_ack(struct rvt_qp *qp)
  1654. {
  1655. struct hfi1_qp_priv *priv = qp->priv;
  1656. priv->r_adefered = 0;
  1657. if (list_empty(&qp->rspwait))
  1658. return;
  1659. list_del_init(&qp->rspwait);
  1660. qp->r_flags &= ~RVT_R_RSP_NAK;
  1661. if (atomic_dec_and_test(&qp->refcount))
  1662. wake_up(&qp->wait);
  1663. }
  1664. /**
  1665. * rc_rcv_error - process an incoming duplicate or error RC packet
  1666. * @ohdr: the other headers for this packet
  1667. * @data: the packet data
  1668. * @qp: the QP for this packet
  1669. * @opcode: the opcode for this packet
  1670. * @psn: the packet sequence number for this packet
  1671. * @diff: the difference between the PSN and the expected PSN
  1672. *
  1673. * This is called from hfi1_rc_rcv() to process an unexpected
  1674. * incoming RC packet for the given QP.
  1675. * Called at interrupt level.
  1676. * Return 1 if no more processing is needed; otherwise return 0 to
  1677. * schedule a response to be sent.
  1678. */
  1679. static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
  1680. struct rvt_qp *qp, u32 opcode, u32 psn,
  1681. int diff, struct hfi1_ctxtdata *rcd)
  1682. {
  1683. struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
  1684. struct rvt_ack_entry *e;
  1685. unsigned long flags;
  1686. u8 i, prev;
  1687. int old_req;
  1688. trace_hfi1_rcv_error(qp, psn);
  1689. if (diff > 0) {
  1690. /*
  1691. * Packet sequence error.
  1692. * A NAK will ACK earlier sends and RDMA writes.
  1693. * Don't queue the NAK if we already sent one.
  1694. */
  1695. if (!qp->r_nak_state) {
  1696. ibp->rvp.n_rc_seqnak++;
  1697. qp->r_nak_state = IB_NAK_PSN_ERROR;
  1698. /* Use the expected PSN. */
  1699. qp->r_ack_psn = qp->r_psn;
  1700. /*
  1701. * Wait to send the sequence NAK until all packets
  1702. * in the receive queue have been processed.
  1703. * Otherwise, we end up propagating congestion.
  1704. */
  1705. rc_defered_ack(rcd, qp);
  1706. }
  1707. goto done;
  1708. }
  1709. /*
  1710. * Handle a duplicate request. Don't re-execute SEND, RDMA
  1711. * write or atomic op. Don't NAK errors, just silently drop
  1712. * the duplicate request. Note that r_sge, r_len, and
  1713. * r_rcv_len may be in use so don't modify them.
  1714. *
  1715. * We are supposed to ACK the earliest duplicate PSN but we
  1716. * can coalesce an outstanding duplicate ACK. We have to
  1717. * send the earliest so that RDMA reads can be restarted at
  1718. * the requester's expected PSN.
  1719. *
  1720. * First, find where this duplicate PSN falls within the
  1721. * ACKs previously sent.
  1722. * old_req is true if there is an older response that is scheduled
  1723. * to be sent before sending this one.
  1724. */
  1725. e = NULL;
  1726. old_req = 1;
  1727. ibp->rvp.n_rc_dupreq++;
  1728. spin_lock_irqsave(&qp->s_lock, flags);
  1729. for (i = qp->r_head_ack_queue; ; i = prev) {
  1730. if (i == qp->s_tail_ack_queue)
  1731. old_req = 0;
  1732. if (i)
  1733. prev = i - 1;
  1734. else
  1735. prev = HFI1_MAX_RDMA_ATOMIC;
  1736. if (prev == qp->r_head_ack_queue) {
  1737. e = NULL;
  1738. break;
  1739. }
  1740. e = &qp->s_ack_queue[prev];
  1741. if (!e->opcode) {
  1742. e = NULL;
  1743. break;
  1744. }
  1745. if (cmp_psn(psn, e->psn) >= 0) {
  1746. if (prev == qp->s_tail_ack_queue &&
  1747. cmp_psn(psn, e->lpsn) <= 0)
  1748. old_req = 0;
  1749. break;
  1750. }
  1751. }
  1752. switch (opcode) {
  1753. case OP(RDMA_READ_REQUEST): {
  1754. struct ib_reth *reth;
  1755. u32 offset;
  1756. u32 len;
  1757. /*
  1758. * If we didn't find the RDMA read request in the ack queue,
  1759. * we can ignore this request.
  1760. */
  1761. if (!e || e->opcode != OP(RDMA_READ_REQUEST))
  1762. goto unlock_done;
  1763. /* RETH comes after BTH */
  1764. reth = &ohdr->u.rc.reth;
  1765. /*
  1766. * Address range must be a subset of the original
  1767. * request and start on pmtu boundaries.
  1768. * We reuse the old ack_queue slot since the requester
  1769. * should not back up and request an earlier PSN for the
  1770. * same request.
  1771. */
  1772. offset = delta_psn(psn, e->psn) * qp->pmtu;
  1773. len = be32_to_cpu(reth->length);
  1774. if (unlikely(offset + len != e->rdma_sge.sge_length))
  1775. goto unlock_done;
  1776. if (e->rdma_sge.mr) {
  1777. rvt_put_mr(e->rdma_sge.mr);
  1778. e->rdma_sge.mr = NULL;
  1779. }
  1780. if (len != 0) {
  1781. u32 rkey = be32_to_cpu(reth->rkey);
  1782. u64 vaddr = be64_to_cpu(reth->vaddr);
  1783. int ok;
  1784. ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
  1785. IB_ACCESS_REMOTE_READ);
  1786. if (unlikely(!ok))
  1787. goto unlock_done;
  1788. } else {
  1789. e->rdma_sge.vaddr = NULL;
  1790. e->rdma_sge.length = 0;
  1791. e->rdma_sge.sge_length = 0;
  1792. }
  1793. e->psn = psn;
  1794. if (old_req)
  1795. goto unlock_done;
  1796. qp->s_tail_ack_queue = prev;
  1797. break;
  1798. }
  1799. case OP(COMPARE_SWAP):
  1800. case OP(FETCH_ADD): {
  1801. /*
  1802. * If we didn't find the atomic request in the ack queue
  1803. * or the send tasklet is already backed up to send an
  1804. * earlier entry, we can ignore this request.
  1805. */
  1806. if (!e || e->opcode != (u8)opcode || old_req)
  1807. goto unlock_done;
  1808. qp->s_tail_ack_queue = prev;
  1809. break;
  1810. }
  1811. default:
  1812. /*
  1813. * Ignore this operation if it doesn't request an ACK
  1814. * or an earlier RDMA read or atomic is going to be resent.
  1815. */
  1816. if (!(psn & IB_BTH_REQ_ACK) || old_req)
  1817. goto unlock_done;
  1818. /*
  1819. * Resend the most recent ACK if this request is
  1820. * after all the previous RDMA reads and atomics.
  1821. */
  1822. if (i == qp->r_head_ack_queue) {
  1823. spin_unlock_irqrestore(&qp->s_lock, flags);
  1824. qp->r_nak_state = 0;
  1825. qp->r_ack_psn = qp->r_psn - 1;
  1826. goto send_ack;
  1827. }
  1828. /*
  1829. * Resend the RDMA read or atomic op which
  1830. * ACKs this duplicate request.
  1831. */
  1832. qp->s_tail_ack_queue = i;
  1833. break;
  1834. }
  1835. qp->s_ack_state = OP(ACKNOWLEDGE);
  1836. qp->s_flags |= RVT_S_RESP_PENDING;
  1837. qp->r_nak_state = 0;
  1838. hfi1_schedule_send(qp);
  1839. unlock_done:
  1840. spin_unlock_irqrestore(&qp->s_lock, flags);
  1841. done:
  1842. return 1;
  1843. send_ack:
  1844. return 0;
  1845. }
  1846. void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
  1847. {
  1848. unsigned long flags;
  1849. int lastwqe;
  1850. spin_lock_irqsave(&qp->s_lock, flags);
  1851. lastwqe = rvt_error_qp(qp, err);
  1852. spin_unlock_irqrestore(&qp->s_lock, flags);
  1853. if (lastwqe) {
  1854. struct ib_event ev;
  1855. ev.device = qp->ibqp.device;
  1856. ev.element.qp = &qp->ibqp;
  1857. ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
  1858. qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
  1859. }
  1860. }
  1861. static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
  1862. {
  1863. unsigned next;
  1864. next = n + 1;
  1865. if (next > HFI1_MAX_RDMA_ATOMIC)
  1866. next = 0;
  1867. qp->s_tail_ack_queue = next;
  1868. qp->s_ack_state = OP(ACKNOWLEDGE);
  1869. }
  1870. static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid,
  1871. u32 lqpn, u32 rqpn, u8 svc_type)
  1872. {
  1873. struct opa_hfi1_cong_log_event_internal *cc_event;
  1874. unsigned long flags;
  1875. if (sl >= OPA_MAX_SLS)
  1876. return;
  1877. spin_lock_irqsave(&ppd->cc_log_lock, flags);
  1878. ppd->threshold_cong_event_map[sl / 8] |= 1 << (sl % 8);
  1879. ppd->threshold_event_counter++;
  1880. cc_event = &ppd->cc_events[ppd->cc_log_idx++];
  1881. if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS)
  1882. ppd->cc_log_idx = 0;
  1883. cc_event->lqpn = lqpn & RVT_QPN_MASK;
  1884. cc_event->rqpn = rqpn & RVT_QPN_MASK;
  1885. cc_event->sl = sl;
  1886. cc_event->svc_type = svc_type;
  1887. cc_event->rlid = rlid;
  1888. /* keep timestamp in units of 1.024 usec */
  1889. cc_event->timestamp = ktime_to_ns(ktime_get()) / 1024;
  1890. spin_unlock_irqrestore(&ppd->cc_log_lock, flags);
  1891. }
  1892. void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
  1893. u32 rqpn, u8 svc_type)
  1894. {
  1895. struct cca_timer *cca_timer;
  1896. u16 ccti, ccti_incr, ccti_timer, ccti_limit;
  1897. u8 trigger_threshold;
  1898. struct cc_state *cc_state;
  1899. unsigned long flags;
  1900. if (sl >= OPA_MAX_SLS)
  1901. return;
  1902. cc_state = get_cc_state(ppd);
  1903. if (!cc_state)
  1904. return;
  1905. /*
  1906. * 1) increase CCTI (for this SL)
  1907. * 2) select IPG (i.e., call set_link_ipg())
  1908. * 3) start timer
  1909. */
  1910. ccti_limit = cc_state->cct.ccti_limit;
  1911. ccti_incr = cc_state->cong_setting.entries[sl].ccti_increase;
  1912. ccti_timer = cc_state->cong_setting.entries[sl].ccti_timer;
  1913. trigger_threshold =
  1914. cc_state->cong_setting.entries[sl].trigger_threshold;
  1915. spin_lock_irqsave(&ppd->cca_timer_lock, flags);
  1916. cca_timer = &ppd->cca_timer[sl];
  1917. if (cca_timer->ccti < ccti_limit) {
  1918. if (cca_timer->ccti + ccti_incr <= ccti_limit)
  1919. cca_timer->ccti += ccti_incr;
  1920. else
  1921. cca_timer->ccti = ccti_limit;
  1922. set_link_ipg(ppd);
  1923. }
  1924. ccti = cca_timer->ccti;
  1925. if (!hrtimer_active(&cca_timer->hrtimer)) {
  1926. /* ccti_timer is in units of 1.024 usec */
  1927. unsigned long nsec = 1024 * ccti_timer;
  1928. hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec),
  1929. HRTIMER_MODE_REL);
  1930. }
  1931. spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
  1932. if ((trigger_threshold != 0) && (ccti >= trigger_threshold))
  1933. log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type);
  1934. }
  1935. /**
  1936. * hfi1_rc_rcv - process an incoming RC packet
  1937. * @rcd: the context pointer
  1938. * @hdr: the header of this packet
  1939. * @rcv_flags: flags relevant to rcv processing
  1940. * @data: the packet data
  1941. * @tlen: the packet length
  1942. * @qp: the QP for this packet
  1943. *
  1944. * This is called from qp_rcv() to process an incoming RC packet
  1945. * for the given QP.
  1946. * May be called at interrupt level.
  1947. */
  1948. void hfi1_rc_rcv(struct hfi1_packet *packet)
  1949. {
  1950. struct hfi1_ctxtdata *rcd = packet->rcd;
  1951. struct hfi1_ib_header *hdr = packet->hdr;
  1952. u32 rcv_flags = packet->rcv_flags;
  1953. void *data = packet->ebuf;
  1954. u32 tlen = packet->tlen;
  1955. struct rvt_qp *qp = packet->qp;
  1956. struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
  1957. struct hfi1_other_headers *ohdr = packet->ohdr;
  1958. u32 bth0, opcode;
  1959. u32 hdrsize = packet->hlen;
  1960. u32 psn;
  1961. u32 pad;
  1962. struct ib_wc wc;
  1963. u32 pmtu = qp->pmtu;
  1964. int diff;
  1965. struct ib_reth *reth;
  1966. unsigned long flags;
  1967. int ret, is_fecn = 0;
  1968. int copy_last = 0;
  1969. u32 rkey;
  1970. bth0 = be32_to_cpu(ohdr->bth[0]);
  1971. if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
  1972. return;
  1973. is_fecn = process_ecn(qp, packet, false);
  1974. psn = be32_to_cpu(ohdr->bth[2]);
  1975. opcode = (bth0 >> 24) & 0xff;
  1976. /*
  1977. * Process responses (ACKs) before anything else. Note that the
  1978. * packet sequence number will be for something in the send work
  1979. * queue rather than the expected receive packet sequence number.
  1980. * In other words, this QP is the requester.
  1981. */
  1982. if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
  1983. opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
  1984. rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
  1985. hdrsize, pmtu, rcd);
  1986. if (is_fecn)
  1987. goto send_ack;
  1988. return;
  1989. }
  1990. /* Compute 24 bits worth of difference. */
  1991. diff = delta_psn(psn, qp->r_psn);
  1992. if (unlikely(diff)) {
  1993. if (rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
  1994. return;
  1995. goto send_ack;
  1996. }
  1997. /* Check for opcode sequence errors. */
  1998. switch (qp->r_state) {
  1999. case OP(SEND_FIRST):
  2000. case OP(SEND_MIDDLE):
  2001. if (opcode == OP(SEND_MIDDLE) ||
  2002. opcode == OP(SEND_LAST) ||
  2003. opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
  2004. opcode == OP(SEND_LAST_WITH_INVALIDATE))
  2005. break;
  2006. goto nack_inv;
  2007. case OP(RDMA_WRITE_FIRST):
  2008. case OP(RDMA_WRITE_MIDDLE):
  2009. if (opcode == OP(RDMA_WRITE_MIDDLE) ||
  2010. opcode == OP(RDMA_WRITE_LAST) ||
  2011. opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
  2012. break;
  2013. goto nack_inv;
  2014. default:
  2015. if (opcode == OP(SEND_MIDDLE) ||
  2016. opcode == OP(SEND_LAST) ||
  2017. opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
  2018. opcode == OP(SEND_LAST_WITH_INVALIDATE) ||
  2019. opcode == OP(RDMA_WRITE_MIDDLE) ||
  2020. opcode == OP(RDMA_WRITE_LAST) ||
  2021. opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
  2022. goto nack_inv;
  2023. /*
  2024. * Note that it is up to the requester to not send a new
  2025. * RDMA read or atomic operation before receiving an ACK
  2026. * for the previous operation.
  2027. */
  2028. break;
  2029. }
  2030. if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
  2031. qp_comm_est(qp);
  2032. /* OK, process the packet. */
  2033. switch (opcode) {
  2034. case OP(SEND_FIRST):
  2035. ret = hfi1_rvt_get_rwqe(qp, 0);
  2036. if (ret < 0)
  2037. goto nack_op_err;
  2038. if (!ret)
  2039. goto rnr_nak;
  2040. qp->r_rcv_len = 0;
  2041. /* FALLTHROUGH */
  2042. case OP(SEND_MIDDLE):
  2043. case OP(RDMA_WRITE_MIDDLE):
  2044. send_middle:
  2045. /* Check for invalid length PMTU or posted rwqe len. */
  2046. if (unlikely(tlen != (hdrsize + pmtu + 4)))
  2047. goto nack_inv;
  2048. qp->r_rcv_len += pmtu;
  2049. if (unlikely(qp->r_rcv_len > qp->r_len))
  2050. goto nack_inv;
  2051. hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
  2052. break;
  2053. case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
  2054. /* consume RWQE */
  2055. ret = hfi1_rvt_get_rwqe(qp, 1);
  2056. if (ret < 0)
  2057. goto nack_op_err;
  2058. if (!ret)
  2059. goto rnr_nak;
  2060. goto send_last_imm;
  2061. case OP(SEND_ONLY):
  2062. case OP(SEND_ONLY_WITH_IMMEDIATE):
  2063. case OP(SEND_ONLY_WITH_INVALIDATE):
  2064. ret = hfi1_rvt_get_rwqe(qp, 0);
  2065. if (ret < 0)
  2066. goto nack_op_err;
  2067. if (!ret)
  2068. goto rnr_nak;
  2069. qp->r_rcv_len = 0;
  2070. if (opcode == OP(SEND_ONLY))
  2071. goto no_immediate_data;
  2072. if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
  2073. goto send_last_inv;
  2074. /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
  2075. case OP(SEND_LAST_WITH_IMMEDIATE):
  2076. send_last_imm:
  2077. wc.ex.imm_data = ohdr->u.imm_data;
  2078. wc.wc_flags = IB_WC_WITH_IMM;
  2079. goto send_last;
  2080. case OP(SEND_LAST_WITH_INVALIDATE):
  2081. send_last_inv:
  2082. rkey = be32_to_cpu(ohdr->u.ieth);
  2083. if (rvt_invalidate_rkey(qp, rkey))
  2084. goto no_immediate_data;
  2085. wc.ex.invalidate_rkey = rkey;
  2086. wc.wc_flags = IB_WC_WITH_INVALIDATE;
  2087. goto send_last;
  2088. case OP(RDMA_WRITE_LAST):
  2089. copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
  2090. /* fall through */
  2091. case OP(SEND_LAST):
  2092. no_immediate_data:
  2093. wc.wc_flags = 0;
  2094. wc.ex.imm_data = 0;
  2095. send_last:
  2096. /* Get the number of bytes the message was padded by. */
  2097. pad = (bth0 >> 20) & 3;
  2098. /* Check for invalid length. */
  2099. /* LAST len should be >= 1 */
  2100. if (unlikely(tlen < (hdrsize + pad + 4)))
  2101. goto nack_inv;
  2102. /* Don't count the CRC. */
  2103. tlen -= (hdrsize + pad + 4);
  2104. wc.byte_len = tlen + qp->r_rcv_len;
  2105. if (unlikely(wc.byte_len > qp->r_len))
  2106. goto nack_inv;
  2107. hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
  2108. rvt_put_ss(&qp->r_sge);
  2109. qp->r_msn++;
  2110. if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
  2111. break;
  2112. wc.wr_id = qp->r_wr_id;
  2113. wc.status = IB_WC_SUCCESS;
  2114. if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
  2115. opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
  2116. wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
  2117. else
  2118. wc.opcode = IB_WC_RECV;
  2119. wc.qp = &qp->ibqp;
  2120. wc.src_qp = qp->remote_qpn;
  2121. wc.slid = qp->remote_ah_attr.dlid;
  2122. /*
  2123. * It seems that IB mandates the presence of an SL in a
  2124. * work completion only for the UD transport (see section
  2125. * 11.4.2 of IBTA Vol. 1).
  2126. *
  2127. * However, the way the SL is chosen below is consistent
  2128. * with the way that IB/qib works and is trying avoid
  2129. * introducing incompatibilities.
  2130. *
  2131. * See also OPA Vol. 1, section 9.7.6, and table 9-17.
  2132. */
  2133. wc.sl = qp->remote_ah_attr.sl;
  2134. /* zero fields that are N/A */
  2135. wc.vendor_err = 0;
  2136. wc.pkey_index = 0;
  2137. wc.dlid_path_bits = 0;
  2138. wc.port_num = 0;
  2139. /* Signal completion event if the solicited bit is set. */
  2140. rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
  2141. (bth0 & IB_BTH_SOLICITED) != 0);
  2142. break;
  2143. case OP(RDMA_WRITE_ONLY):
  2144. copy_last = 1;
  2145. /* fall through */
  2146. case OP(RDMA_WRITE_FIRST):
  2147. case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
  2148. if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
  2149. goto nack_inv;
  2150. /* consume RWQE */
  2151. reth = &ohdr->u.rc.reth;
  2152. qp->r_len = be32_to_cpu(reth->length);
  2153. qp->r_rcv_len = 0;
  2154. qp->r_sge.sg_list = NULL;
  2155. if (qp->r_len != 0) {
  2156. u32 rkey = be32_to_cpu(reth->rkey);
  2157. u64 vaddr = be64_to_cpu(reth->vaddr);
  2158. int ok;
  2159. /* Check rkey & NAK */
  2160. ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
  2161. rkey, IB_ACCESS_REMOTE_WRITE);
  2162. if (unlikely(!ok))
  2163. goto nack_acc;
  2164. qp->r_sge.num_sge = 1;
  2165. } else {
  2166. qp->r_sge.num_sge = 0;
  2167. qp->r_sge.sge.mr = NULL;
  2168. qp->r_sge.sge.vaddr = NULL;
  2169. qp->r_sge.sge.length = 0;
  2170. qp->r_sge.sge.sge_length = 0;
  2171. }
  2172. if (opcode == OP(RDMA_WRITE_FIRST))
  2173. goto send_middle;
  2174. else if (opcode == OP(RDMA_WRITE_ONLY))
  2175. goto no_immediate_data;
  2176. ret = hfi1_rvt_get_rwqe(qp, 1);
  2177. if (ret < 0)
  2178. goto nack_op_err;
  2179. if (!ret)
  2180. goto rnr_nak;
  2181. wc.ex.imm_data = ohdr->u.rc.imm_data;
  2182. wc.wc_flags = IB_WC_WITH_IMM;
  2183. goto send_last;
  2184. case OP(RDMA_READ_REQUEST): {
  2185. struct rvt_ack_entry *e;
  2186. u32 len;
  2187. u8 next;
  2188. if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
  2189. goto nack_inv;
  2190. next = qp->r_head_ack_queue + 1;
  2191. /* s_ack_queue is size HFI1_MAX_RDMA_ATOMIC+1 so use > not >= */
  2192. if (next > HFI1_MAX_RDMA_ATOMIC)
  2193. next = 0;
  2194. spin_lock_irqsave(&qp->s_lock, flags);
  2195. if (unlikely(next == qp->s_tail_ack_queue)) {
  2196. if (!qp->s_ack_queue[next].sent)
  2197. goto nack_inv_unlck;
  2198. update_ack_queue(qp, next);
  2199. }
  2200. e = &qp->s_ack_queue[qp->r_head_ack_queue];
  2201. if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
  2202. rvt_put_mr(e->rdma_sge.mr);
  2203. e->rdma_sge.mr = NULL;
  2204. }
  2205. reth = &ohdr->u.rc.reth;
  2206. len = be32_to_cpu(reth->length);
  2207. if (len) {
  2208. u32 rkey = be32_to_cpu(reth->rkey);
  2209. u64 vaddr = be64_to_cpu(reth->vaddr);
  2210. int ok;
  2211. /* Check rkey & NAK */
  2212. ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
  2213. rkey, IB_ACCESS_REMOTE_READ);
  2214. if (unlikely(!ok))
  2215. goto nack_acc_unlck;
  2216. /*
  2217. * Update the next expected PSN. We add 1 later
  2218. * below, so only add the remainder here.
  2219. */
  2220. if (len > pmtu)
  2221. qp->r_psn += (len - 1) / pmtu;
  2222. } else {
  2223. e->rdma_sge.mr = NULL;
  2224. e->rdma_sge.vaddr = NULL;
  2225. e->rdma_sge.length = 0;
  2226. e->rdma_sge.sge_length = 0;
  2227. }
  2228. e->opcode = opcode;
  2229. e->sent = 0;
  2230. e->psn = psn;
  2231. e->lpsn = qp->r_psn;
  2232. /*
  2233. * We need to increment the MSN here instead of when we
  2234. * finish sending the result since a duplicate request would
  2235. * increment it more than once.
  2236. */
  2237. qp->r_msn++;
  2238. qp->r_psn++;
  2239. qp->r_state = opcode;
  2240. qp->r_nak_state = 0;
  2241. qp->r_head_ack_queue = next;
  2242. /* Schedule the send tasklet. */
  2243. qp->s_flags |= RVT_S_RESP_PENDING;
  2244. hfi1_schedule_send(qp);
  2245. spin_unlock_irqrestore(&qp->s_lock, flags);
  2246. if (is_fecn)
  2247. goto send_ack;
  2248. return;
  2249. }
  2250. case OP(COMPARE_SWAP):
  2251. case OP(FETCH_ADD): {
  2252. struct ib_atomic_eth *ateth;
  2253. struct rvt_ack_entry *e;
  2254. u64 vaddr;
  2255. atomic64_t *maddr;
  2256. u64 sdata;
  2257. u32 rkey;
  2258. u8 next;
  2259. if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
  2260. goto nack_inv;
  2261. next = qp->r_head_ack_queue + 1;
  2262. if (next > HFI1_MAX_RDMA_ATOMIC)
  2263. next = 0;
  2264. spin_lock_irqsave(&qp->s_lock, flags);
  2265. if (unlikely(next == qp->s_tail_ack_queue)) {
  2266. if (!qp->s_ack_queue[next].sent)
  2267. goto nack_inv_unlck;
  2268. update_ack_queue(qp, next);
  2269. }
  2270. e = &qp->s_ack_queue[qp->r_head_ack_queue];
  2271. if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
  2272. rvt_put_mr(e->rdma_sge.mr);
  2273. e->rdma_sge.mr = NULL;
  2274. }
  2275. ateth = &ohdr->u.atomic_eth;
  2276. vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) |
  2277. be32_to_cpu(ateth->vaddr[1]);
  2278. if (unlikely(vaddr & (sizeof(u64) - 1)))
  2279. goto nack_inv_unlck;
  2280. rkey = be32_to_cpu(ateth->rkey);
  2281. /* Check rkey & NAK */
  2282. if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
  2283. vaddr, rkey,
  2284. IB_ACCESS_REMOTE_ATOMIC)))
  2285. goto nack_acc_unlck;
  2286. /* Perform atomic OP and save result. */
  2287. maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
  2288. sdata = be64_to_cpu(ateth->swap_data);
  2289. e->atomic_data = (opcode == OP(FETCH_ADD)) ?
  2290. (u64)atomic64_add_return(sdata, maddr) - sdata :
  2291. (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
  2292. be64_to_cpu(ateth->compare_data),
  2293. sdata);
  2294. rvt_put_mr(qp->r_sge.sge.mr);
  2295. qp->r_sge.num_sge = 0;
  2296. e->opcode = opcode;
  2297. e->sent = 0;
  2298. e->psn = psn;
  2299. e->lpsn = psn;
  2300. qp->r_msn++;
  2301. qp->r_psn++;
  2302. qp->r_state = opcode;
  2303. qp->r_nak_state = 0;
  2304. qp->r_head_ack_queue = next;
  2305. /* Schedule the send tasklet. */
  2306. qp->s_flags |= RVT_S_RESP_PENDING;
  2307. hfi1_schedule_send(qp);
  2308. spin_unlock_irqrestore(&qp->s_lock, flags);
  2309. if (is_fecn)
  2310. goto send_ack;
  2311. return;
  2312. }
  2313. default:
  2314. /* NAK unknown opcodes. */
  2315. goto nack_inv;
  2316. }
  2317. qp->r_psn++;
  2318. qp->r_state = opcode;
  2319. qp->r_ack_psn = psn;
  2320. qp->r_nak_state = 0;
  2321. /* Send an ACK if requested or required. */
  2322. if (psn & IB_BTH_REQ_ACK) {
  2323. struct hfi1_qp_priv *priv = qp->priv;
  2324. if (packet->numpkt == 0) {
  2325. rc_cancel_ack(qp);
  2326. goto send_ack;
  2327. }
  2328. if (priv->r_adefered >= HFI1_PSN_CREDIT) {
  2329. rc_cancel_ack(qp);
  2330. goto send_ack;
  2331. }
  2332. if (unlikely(is_fecn)) {
  2333. rc_cancel_ack(qp);
  2334. goto send_ack;
  2335. }
  2336. priv->r_adefered++;
  2337. rc_defered_ack(rcd, qp);
  2338. }
  2339. return;
  2340. rnr_nak:
  2341. qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK;
  2342. qp->r_ack_psn = qp->r_psn;
  2343. /* Queue RNR NAK for later */
  2344. rc_defered_ack(rcd, qp);
  2345. return;
  2346. nack_op_err:
  2347. hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
  2348. qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
  2349. qp->r_ack_psn = qp->r_psn;
  2350. /* Queue NAK for later */
  2351. rc_defered_ack(rcd, qp);
  2352. return;
  2353. nack_inv_unlck:
  2354. spin_unlock_irqrestore(&qp->s_lock, flags);
  2355. nack_inv:
  2356. hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
  2357. qp->r_nak_state = IB_NAK_INVALID_REQUEST;
  2358. qp->r_ack_psn = qp->r_psn;
  2359. /* Queue NAK for later */
  2360. rc_defered_ack(rcd, qp);
  2361. return;
  2362. nack_acc_unlck:
  2363. spin_unlock_irqrestore(&qp->s_lock, flags);
  2364. nack_acc:
  2365. hfi1_rc_error(qp, IB_WC_LOC_PROT_ERR);
  2366. qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
  2367. qp->r_ack_psn = qp->r_psn;
  2368. send_ack:
  2369. hfi1_send_rc_ack(rcd, qp, is_fecn);
  2370. }
  2371. void hfi1_rc_hdrerr(
  2372. struct hfi1_ctxtdata *rcd,
  2373. struct hfi1_ib_header *hdr,
  2374. u32 rcv_flags,
  2375. struct rvt_qp *qp)
  2376. {
  2377. int has_grh = rcv_flags & HFI1_HAS_GRH;
  2378. struct hfi1_other_headers *ohdr;
  2379. struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
  2380. int diff;
  2381. u32 opcode;
  2382. u32 psn, bth0;
  2383. /* Check for GRH */
  2384. ohdr = &hdr->u.oth;
  2385. if (has_grh)
  2386. ohdr = &hdr->u.l.oth;
  2387. bth0 = be32_to_cpu(ohdr->bth[0]);
  2388. if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
  2389. return;
  2390. psn = be32_to_cpu(ohdr->bth[2]);
  2391. opcode = (bth0 >> 24) & 0xff;
  2392. /* Only deal with RDMA Writes for now */
  2393. if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
  2394. diff = delta_psn(psn, qp->r_psn);
  2395. if (!qp->r_nak_state && diff >= 0) {
  2396. ibp->rvp.n_rc_seqnak++;
  2397. qp->r_nak_state = IB_NAK_PSN_ERROR;
  2398. /* Use the expected PSN. */
  2399. qp->r_ack_psn = qp->r_psn;
  2400. /*
  2401. * Wait to send the sequence
  2402. * NAK until all packets
  2403. * in the receive queue have
  2404. * been processed.
  2405. * Otherwise, we end up
  2406. * propagating congestion.
  2407. */
  2408. rc_defered_ack(rcd, qp);
  2409. } /* Out of sequence NAK */
  2410. } /* QP Request NAKs */
  2411. }