verbs.c 58 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039
  1. /*
  2. * Copyright(c) 2015 - 2017 Intel Corporation.
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of version 2 of the GNU General Public License as
  11. * published by the Free Software Foundation.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * BSD LICENSE
  19. *
  20. * Redistribution and use in source and binary forms, with or without
  21. * modification, are permitted provided that the following conditions
  22. * are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright
  25. * notice, this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright
  27. * notice, this list of conditions and the following disclaimer in
  28. * the documentation and/or other materials provided with the
  29. * distribution.
  30. * - Neither the name of Intel Corporation nor the names of its
  31. * contributors may be used to endorse or promote products derived
  32. * from this software without specific prior written permission.
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. *
  46. */
  47. #include <rdma/ib_mad.h>
  48. #include <rdma/ib_user_verbs.h>
  49. #include <linux/io.h>
  50. #include <linux/module.h>
  51. #include <linux/utsname.h>
  52. #include <linux/rculist.h>
  53. #include <linux/mm.h>
  54. #include <linux/vmalloc.h>
  55. #include <rdma/opa_addr.h>
  56. #include "hfi.h"
  57. #include "common.h"
  58. #include "device.h"
  59. #include "trace.h"
  60. #include "qp.h"
  61. #include "verbs_txreq.h"
  62. #include "debugfs.h"
  63. #include "vnic.h"
  64. static unsigned int hfi1_lkey_table_size = 16;
  65. module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
  66. S_IRUGO);
  67. MODULE_PARM_DESC(lkey_table_size,
  68. "LKEY table size in bits (2^n, 1 <= n <= 23)");
  69. static unsigned int hfi1_max_pds = 0xFFFF;
  70. module_param_named(max_pds, hfi1_max_pds, uint, S_IRUGO);
  71. MODULE_PARM_DESC(max_pds,
  72. "Maximum number of protection domains to support");
  73. static unsigned int hfi1_max_ahs = 0xFFFF;
  74. module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO);
  75. MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
  76. unsigned int hfi1_max_cqes = 0x2FFFFF;
  77. module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO);
  78. MODULE_PARM_DESC(max_cqes,
  79. "Maximum number of completion queue entries to support");
  80. unsigned int hfi1_max_cqs = 0x1FFFF;
  81. module_param_named(max_cqs, hfi1_max_cqs, uint, S_IRUGO);
  82. MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
  83. unsigned int hfi1_max_qp_wrs = 0x3FFF;
  84. module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO);
  85. MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
  86. unsigned int hfi1_max_qps = 32768;
  87. module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO);
  88. MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
  89. unsigned int hfi1_max_sges = 0x60;
  90. module_param_named(max_sges, hfi1_max_sges, uint, S_IRUGO);
  91. MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
  92. unsigned int hfi1_max_mcast_grps = 16384;
  93. module_param_named(max_mcast_grps, hfi1_max_mcast_grps, uint, S_IRUGO);
  94. MODULE_PARM_DESC(max_mcast_grps,
  95. "Maximum number of multicast groups to support");
  96. unsigned int hfi1_max_mcast_qp_attached = 16;
  97. module_param_named(max_mcast_qp_attached, hfi1_max_mcast_qp_attached,
  98. uint, S_IRUGO);
  99. MODULE_PARM_DESC(max_mcast_qp_attached,
  100. "Maximum number of attached QPs to support");
  101. unsigned int hfi1_max_srqs = 1024;
  102. module_param_named(max_srqs, hfi1_max_srqs, uint, S_IRUGO);
  103. MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
  104. unsigned int hfi1_max_srq_sges = 128;
  105. module_param_named(max_srq_sges, hfi1_max_srq_sges, uint, S_IRUGO);
  106. MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
  107. unsigned int hfi1_max_srq_wrs = 0x1FFFF;
  108. module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO);
  109. MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
  110. unsigned short piothreshold = 256;
  111. module_param(piothreshold, ushort, S_IRUGO);
  112. MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
  113. #define COPY_CACHELESS 1
  114. #define COPY_ADAPTIVE 2
  115. static unsigned int sge_copy_mode;
  116. module_param(sge_copy_mode, uint, S_IRUGO);
  117. MODULE_PARM_DESC(sge_copy_mode,
  118. "Verbs copy mode: 0 use memcpy, 1 use cacheless copy, 2 adapt based on WSS");
  119. static void verbs_sdma_complete(
  120. struct sdma_txreq *cookie,
  121. int status);
  122. static int pio_wait(struct rvt_qp *qp,
  123. struct send_context *sc,
  124. struct hfi1_pkt_state *ps,
  125. u32 flag);
  126. /* Length of buffer to create verbs txreq cache name */
  127. #define TXREQ_NAME_LEN 24
  128. static uint wss_threshold;
  129. module_param(wss_threshold, uint, S_IRUGO);
  130. MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
  131. static uint wss_clean_period = 256;
  132. module_param(wss_clean_period, uint, S_IRUGO);
  133. MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
  134. /* memory working set size */
  135. struct hfi1_wss {
  136. unsigned long *entries;
  137. atomic_t total_count;
  138. atomic_t clean_counter;
  139. atomic_t clean_entry;
  140. int threshold;
  141. int num_entries;
  142. long pages_mask;
  143. };
  144. static struct hfi1_wss wss;
  145. int hfi1_wss_init(void)
  146. {
  147. long llc_size;
  148. long llc_bits;
  149. long table_size;
  150. long table_bits;
  151. /* check for a valid percent range - default to 80 if none or invalid */
  152. if (wss_threshold < 1 || wss_threshold > 100)
  153. wss_threshold = 80;
  154. /* reject a wildly large period */
  155. if (wss_clean_period > 1000000)
  156. wss_clean_period = 256;
  157. /* reject a zero period */
  158. if (wss_clean_period == 0)
  159. wss_clean_period = 1;
  160. /*
  161. * Calculate the table size - the next power of 2 larger than the
  162. * LLC size. LLC size is in KiB.
  163. */
  164. llc_size = wss_llc_size() * 1024;
  165. table_size = roundup_pow_of_two(llc_size);
  166. /* one bit per page in rounded up table */
  167. llc_bits = llc_size / PAGE_SIZE;
  168. table_bits = table_size / PAGE_SIZE;
  169. wss.pages_mask = table_bits - 1;
  170. wss.num_entries = table_bits / BITS_PER_LONG;
  171. wss.threshold = (llc_bits * wss_threshold) / 100;
  172. if (wss.threshold == 0)
  173. wss.threshold = 1;
  174. atomic_set(&wss.clean_counter, wss_clean_period);
  175. wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
  176. GFP_KERNEL);
  177. if (!wss.entries) {
  178. hfi1_wss_exit();
  179. return -ENOMEM;
  180. }
  181. return 0;
  182. }
  183. void hfi1_wss_exit(void)
  184. {
  185. /* coded to handle partially initialized and repeat callers */
  186. kfree(wss.entries);
  187. wss.entries = NULL;
  188. }
  189. /*
  190. * Advance the clean counter. When the clean period has expired,
  191. * clean an entry.
  192. *
  193. * This is implemented in atomics to avoid locking. Because multiple
  194. * variables are involved, it can be racy which can lead to slightly
  195. * inaccurate information. Since this is only a heuristic, this is
  196. * OK. Any innaccuracies will clean themselves out as the counter
  197. * advances. That said, it is unlikely the entry clean operation will
  198. * race - the next possible racer will not start until the next clean
  199. * period.
  200. *
  201. * The clean counter is implemented as a decrement to zero. When zero
  202. * is reached an entry is cleaned.
  203. */
  204. static void wss_advance_clean_counter(void)
  205. {
  206. int entry;
  207. int weight;
  208. unsigned long bits;
  209. /* become the cleaner if we decrement the counter to zero */
  210. if (atomic_dec_and_test(&wss.clean_counter)) {
  211. /*
  212. * Set, not add, the clean period. This avoids an issue
  213. * where the counter could decrement below the clean period.
  214. * Doing a set can result in lost decrements, slowing the
  215. * clean advance. Since this a heuristic, this possible
  216. * slowdown is OK.
  217. *
  218. * An alternative is to loop, advancing the counter by a
  219. * clean period until the result is > 0. However, this could
  220. * lead to several threads keeping another in the clean loop.
  221. * This could be mitigated by limiting the number of times
  222. * we stay in the loop.
  223. */
  224. atomic_set(&wss.clean_counter, wss_clean_period);
  225. /*
  226. * Uniquely grab the entry to clean and move to next.
  227. * The current entry is always the lower bits of
  228. * wss.clean_entry. The table size, wss.num_entries,
  229. * is always a power-of-2.
  230. */
  231. entry = (atomic_inc_return(&wss.clean_entry) - 1)
  232. & (wss.num_entries - 1);
  233. /* clear the entry and count the bits */
  234. bits = xchg(&wss.entries[entry], 0);
  235. weight = hweight64((u64)bits);
  236. /* only adjust the contended total count if needed */
  237. if (weight)
  238. atomic_sub(weight, &wss.total_count);
  239. }
  240. }
  241. /*
  242. * Insert the given address into the working set array.
  243. */
  244. static void wss_insert(void *address)
  245. {
  246. u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
  247. u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
  248. u32 nr = page & (BITS_PER_LONG - 1);
  249. if (!test_and_set_bit(nr, &wss.entries[entry]))
  250. atomic_inc(&wss.total_count);
  251. wss_advance_clean_counter();
  252. }
  253. /*
  254. * Is the working set larger than the threshold?
  255. */
  256. static inline bool wss_exceeds_threshold(void)
  257. {
  258. return atomic_read(&wss.total_count) >= wss.threshold;
  259. }
  260. /*
  261. * Translate ib_wr_opcode into ib_wc_opcode.
  262. */
  263. const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
  264. [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
  265. [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
  266. [IB_WR_SEND] = IB_WC_SEND,
  267. [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
  268. [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
  269. [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
  270. [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
  271. [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
  272. [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
  273. [IB_WR_REG_MR] = IB_WC_REG_MR
  274. };
  275. /*
  276. * Length of header by opcode, 0 --> not supported
  277. */
  278. const u8 hdr_len_by_opcode[256] = {
  279. /* RC */
  280. [IB_OPCODE_RC_SEND_FIRST] = 12 + 8,
  281. [IB_OPCODE_RC_SEND_MIDDLE] = 12 + 8,
  282. [IB_OPCODE_RC_SEND_LAST] = 12 + 8,
  283. [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4,
  284. [IB_OPCODE_RC_SEND_ONLY] = 12 + 8,
  285. [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4,
  286. [IB_OPCODE_RC_RDMA_WRITE_FIRST] = 12 + 8 + 16,
  287. [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = 12 + 8,
  288. [IB_OPCODE_RC_RDMA_WRITE_LAST] = 12 + 8,
  289. [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4,
  290. [IB_OPCODE_RC_RDMA_WRITE_ONLY] = 12 + 8 + 16,
  291. [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20,
  292. [IB_OPCODE_RC_RDMA_READ_REQUEST] = 12 + 8 + 16,
  293. [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = 12 + 8 + 4,
  294. [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = 12 + 8,
  295. [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4,
  296. [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4,
  297. [IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4,
  298. [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8,
  299. [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28,
  300. [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
  301. [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
  302. [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4,
  303. /* UC */
  304. [IB_OPCODE_UC_SEND_FIRST] = 12 + 8,
  305. [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8,
  306. [IB_OPCODE_UC_SEND_LAST] = 12 + 8,
  307. [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4,
  308. [IB_OPCODE_UC_SEND_ONLY] = 12 + 8,
  309. [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4,
  310. [IB_OPCODE_UC_RDMA_WRITE_FIRST] = 12 + 8 + 16,
  311. [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = 12 + 8,
  312. [IB_OPCODE_UC_RDMA_WRITE_LAST] = 12 + 8,
  313. [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4,
  314. [IB_OPCODE_UC_RDMA_WRITE_ONLY] = 12 + 8 + 16,
  315. [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20,
  316. /* UD */
  317. [IB_OPCODE_UD_SEND_ONLY] = 12 + 8 + 8,
  318. [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 12
  319. };
  320. static const opcode_handler opcode_handler_tbl[256] = {
  321. /* RC */
  322. [IB_OPCODE_RC_SEND_FIRST] = &hfi1_rc_rcv,
  323. [IB_OPCODE_RC_SEND_MIDDLE] = &hfi1_rc_rcv,
  324. [IB_OPCODE_RC_SEND_LAST] = &hfi1_rc_rcv,
  325. [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv,
  326. [IB_OPCODE_RC_SEND_ONLY] = &hfi1_rc_rcv,
  327. [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv,
  328. [IB_OPCODE_RC_RDMA_WRITE_FIRST] = &hfi1_rc_rcv,
  329. [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = &hfi1_rc_rcv,
  330. [IB_OPCODE_RC_RDMA_WRITE_LAST] = &hfi1_rc_rcv,
  331. [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv,
  332. [IB_OPCODE_RC_RDMA_WRITE_ONLY] = &hfi1_rc_rcv,
  333. [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv,
  334. [IB_OPCODE_RC_RDMA_READ_REQUEST] = &hfi1_rc_rcv,
  335. [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = &hfi1_rc_rcv,
  336. [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = &hfi1_rc_rcv,
  337. [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = &hfi1_rc_rcv,
  338. [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = &hfi1_rc_rcv,
  339. [IB_OPCODE_RC_ACKNOWLEDGE] = &hfi1_rc_rcv,
  340. [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = &hfi1_rc_rcv,
  341. [IB_OPCODE_RC_COMPARE_SWAP] = &hfi1_rc_rcv,
  342. [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv,
  343. [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv,
  344. [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv,
  345. /* UC */
  346. [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv,
  347. [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv,
  348. [IB_OPCODE_UC_SEND_LAST] = &hfi1_uc_rcv,
  349. [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv,
  350. [IB_OPCODE_UC_SEND_ONLY] = &hfi1_uc_rcv,
  351. [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv,
  352. [IB_OPCODE_UC_RDMA_WRITE_FIRST] = &hfi1_uc_rcv,
  353. [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = &hfi1_uc_rcv,
  354. [IB_OPCODE_UC_RDMA_WRITE_LAST] = &hfi1_uc_rcv,
  355. [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv,
  356. [IB_OPCODE_UC_RDMA_WRITE_ONLY] = &hfi1_uc_rcv,
  357. [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv,
  358. /* UD */
  359. [IB_OPCODE_UD_SEND_ONLY] = &hfi1_ud_rcv,
  360. [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_ud_rcv,
  361. /* CNP */
  362. [IB_OPCODE_CNP] = &hfi1_cnp_rcv
  363. };
  364. #define OPMASK 0x1f
  365. static const u32 pio_opmask[BIT(3)] = {
  366. /* RC */
  367. [IB_OPCODE_RC >> 5] =
  368. BIT(RC_OP(SEND_ONLY) & OPMASK) |
  369. BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
  370. BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) |
  371. BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) |
  372. BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) |
  373. BIT(RC_OP(ACKNOWLEDGE) & OPMASK) |
  374. BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) |
  375. BIT(RC_OP(COMPARE_SWAP) & OPMASK) |
  376. BIT(RC_OP(FETCH_ADD) & OPMASK),
  377. /* UC */
  378. [IB_OPCODE_UC >> 5] =
  379. BIT(UC_OP(SEND_ONLY) & OPMASK) |
  380. BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
  381. BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) |
  382. BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK),
  383. };
  384. /*
  385. * System image GUID.
  386. */
  387. __be64 ib_hfi1_sys_image_guid;
  388. /**
  389. * hfi1_copy_sge - copy data to SGE memory
  390. * @ss: the SGE state
  391. * @data: the data to copy
  392. * @length: the length of the data
  393. * @release: boolean to release MR
  394. * @copy_last: do a separate copy of the last 8 bytes
  395. */
  396. void hfi1_copy_sge(
  397. struct rvt_sge_state *ss,
  398. void *data, u32 length,
  399. bool release,
  400. bool copy_last)
  401. {
  402. struct rvt_sge *sge = &ss->sge;
  403. int i;
  404. bool in_last = false;
  405. bool cacheless_copy = false;
  406. if (sge_copy_mode == COPY_CACHELESS) {
  407. cacheless_copy = length >= PAGE_SIZE;
  408. } else if (sge_copy_mode == COPY_ADAPTIVE) {
  409. if (length >= PAGE_SIZE) {
  410. /*
  411. * NOTE: this *assumes*:
  412. * o The first vaddr is the dest.
  413. * o If multiple pages, then vaddr is sequential.
  414. */
  415. wss_insert(sge->vaddr);
  416. if (length >= (2 * PAGE_SIZE))
  417. wss_insert(sge->vaddr + PAGE_SIZE);
  418. cacheless_copy = wss_exceeds_threshold();
  419. } else {
  420. wss_advance_clean_counter();
  421. }
  422. }
  423. if (copy_last) {
  424. if (length > 8) {
  425. length -= 8;
  426. } else {
  427. copy_last = false;
  428. in_last = true;
  429. }
  430. }
  431. again:
  432. while (length) {
  433. u32 len = rvt_get_sge_length(sge, length);
  434. WARN_ON_ONCE(len == 0);
  435. if (unlikely(in_last)) {
  436. /* enforce byte transfer ordering */
  437. for (i = 0; i < len; i++)
  438. ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
  439. } else if (cacheless_copy) {
  440. cacheless_memcpy(sge->vaddr, data, len);
  441. } else {
  442. memcpy(sge->vaddr, data, len);
  443. }
  444. rvt_update_sge(ss, len, release);
  445. data += len;
  446. length -= len;
  447. }
  448. if (copy_last) {
  449. copy_last = false;
  450. in_last = true;
  451. length = 8;
  452. goto again;
  453. }
  454. }
  455. /*
  456. * Make sure the QP is ready and able to accept the given opcode.
  457. */
  458. static inline opcode_handler qp_ok(struct hfi1_packet *packet)
  459. {
  460. if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
  461. return NULL;
  462. if (((packet->opcode & RVT_OPCODE_QP_MASK) ==
  463. packet->qp->allowed_ops) ||
  464. (packet->opcode == IB_OPCODE_CNP))
  465. return opcode_handler_tbl[packet->opcode];
  466. return NULL;
  467. }
  468. static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
  469. {
  470. #ifdef CONFIG_FAULT_INJECTION
  471. if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
  472. /*
  473. * In order to drop non-IB traffic we
  474. * set PbcInsertHrc to NONE (0x2).
  475. * The packet will still be delivered
  476. * to the receiving node but a
  477. * KHdrHCRCErr (KDETH packet with a bad
  478. * HCRC) will be triggered and the
  479. * packet will not be delivered to the
  480. * correct context.
  481. */
  482. pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
  483. else
  484. /*
  485. * In order to drop regular verbs
  486. * traffic we set the PbcTestEbp
  487. * flag. The packet will still be
  488. * delivered to the receiving node but
  489. * a 'late ebp error' will be
  490. * triggered and will be dropped.
  491. */
  492. pbc |= PBC_TEST_EBP;
  493. #endif
  494. return pbc;
  495. }
  496. static int hfi1_do_pkey_check(struct hfi1_packet *packet)
  497. {
  498. struct hfi1_ctxtdata *rcd = packet->rcd;
  499. struct hfi1_pportdata *ppd = rcd->ppd;
  500. struct hfi1_16b_header *hdr = packet->hdr;
  501. u16 pkey;
  502. /* Pkey check needed only for bypass packets */
  503. if (packet->etype != RHF_RCV_TYPE_BYPASS)
  504. return 0;
  505. /* Perform pkey check */
  506. pkey = hfi1_16B_get_pkey(hdr);
  507. return ingress_pkey_check(ppd, pkey, packet->sc,
  508. packet->qp->s_pkey_index,
  509. packet->slid, true);
  510. }
  511. static inline void hfi1_handle_packet(struct hfi1_packet *packet,
  512. bool is_mcast)
  513. {
  514. u32 qp_num;
  515. struct hfi1_ctxtdata *rcd = packet->rcd;
  516. struct hfi1_pportdata *ppd = rcd->ppd;
  517. struct hfi1_ibport *ibp = rcd_to_iport(rcd);
  518. struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
  519. opcode_handler packet_handler;
  520. unsigned long flags;
  521. inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]);
  522. if (unlikely(is_mcast)) {
  523. struct rvt_mcast *mcast;
  524. struct rvt_mcast_qp *p;
  525. if (!packet->grh)
  526. goto drop;
  527. mcast = rvt_mcast_find(&ibp->rvp,
  528. &packet->grh->dgid,
  529. opa_get_lid(packet->dlid, 9B));
  530. if (!mcast)
  531. goto drop;
  532. list_for_each_entry_rcu(p, &mcast->qp_list, list) {
  533. packet->qp = p->qp;
  534. if (hfi1_do_pkey_check(packet))
  535. goto drop;
  536. spin_lock_irqsave(&packet->qp->r_lock, flags);
  537. packet_handler = qp_ok(packet);
  538. if (likely(packet_handler))
  539. packet_handler(packet);
  540. else
  541. ibp->rvp.n_pkt_drops++;
  542. spin_unlock_irqrestore(&packet->qp->r_lock, flags);
  543. }
  544. /*
  545. * Notify rvt_multicast_detach() if it is waiting for us
  546. * to finish.
  547. */
  548. if (atomic_dec_return(&mcast->refcount) <= 1)
  549. wake_up(&mcast->wait);
  550. } else {
  551. /* Get the destination QP number. */
  552. qp_num = ib_bth_get_qpn(packet->ohdr);
  553. rcu_read_lock();
  554. packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
  555. if (!packet->qp)
  556. goto unlock_drop;
  557. if (hfi1_do_pkey_check(packet))
  558. goto unlock_drop;
  559. if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode,
  560. true)))
  561. goto unlock_drop;
  562. spin_lock_irqsave(&packet->qp->r_lock, flags);
  563. packet_handler = qp_ok(packet);
  564. if (likely(packet_handler))
  565. packet_handler(packet);
  566. else
  567. ibp->rvp.n_pkt_drops++;
  568. spin_unlock_irqrestore(&packet->qp->r_lock, flags);
  569. rcu_read_unlock();
  570. }
  571. return;
  572. unlock_drop:
  573. rcu_read_unlock();
  574. drop:
  575. ibp->rvp.n_pkt_drops++;
  576. }
  577. /**
  578. * hfi1_ib_rcv - process an incoming packet
  579. * @packet: data packet information
  580. *
  581. * This is called to process an incoming packet at interrupt level.
  582. */
  583. void hfi1_ib_rcv(struct hfi1_packet *packet)
  584. {
  585. struct hfi1_ctxtdata *rcd = packet->rcd;
  586. trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
  587. hfi1_handle_packet(packet, hfi1_check_mcast(packet->dlid));
  588. }
  589. void hfi1_16B_rcv(struct hfi1_packet *packet)
  590. {
  591. struct hfi1_ctxtdata *rcd = packet->rcd;
  592. trace_input_ibhdr(rcd->dd, packet, false);
  593. hfi1_handle_packet(packet, hfi1_check_mcast(packet->dlid));
  594. }
  595. /*
  596. * This is called from a timer to check for QPs
  597. * which need kernel memory in order to send a packet.
  598. */
  599. static void mem_timer(unsigned long data)
  600. {
  601. struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data;
  602. struct list_head *list = &dev->memwait;
  603. struct rvt_qp *qp = NULL;
  604. struct iowait *wait;
  605. unsigned long flags;
  606. struct hfi1_qp_priv *priv;
  607. write_seqlock_irqsave(&dev->iowait_lock, flags);
  608. if (!list_empty(list)) {
  609. wait = list_first_entry(list, struct iowait, list);
  610. qp = iowait_to_qp(wait);
  611. priv = qp->priv;
  612. list_del_init(&priv->s_iowait.list);
  613. priv->s_iowait.lock = NULL;
  614. /* refcount held until actual wake up */
  615. if (!list_empty(list))
  616. mod_timer(&dev->mem_timer, jiffies + 1);
  617. }
  618. write_sequnlock_irqrestore(&dev->iowait_lock, flags);
  619. if (qp)
  620. hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM);
  621. }
  622. /*
  623. * This is called with progress side lock held.
  624. */
  625. /* New API */
  626. static void verbs_sdma_complete(
  627. struct sdma_txreq *cookie,
  628. int status)
  629. {
  630. struct verbs_txreq *tx =
  631. container_of(cookie, struct verbs_txreq, txreq);
  632. struct rvt_qp *qp = tx->qp;
  633. spin_lock(&qp->s_lock);
  634. if (tx->wqe) {
  635. hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
  636. } else if (qp->ibqp.qp_type == IB_QPT_RC) {
  637. struct hfi1_opa_header *hdr;
  638. hdr = &tx->phdr.hdr;
  639. hfi1_rc_send_complete(qp, hdr);
  640. }
  641. spin_unlock(&qp->s_lock);
  642. hfi1_put_txreq(tx);
  643. }
  644. static int wait_kmem(struct hfi1_ibdev *dev,
  645. struct rvt_qp *qp,
  646. struct hfi1_pkt_state *ps)
  647. {
  648. struct hfi1_qp_priv *priv = qp->priv;
  649. unsigned long flags;
  650. int ret = 0;
  651. spin_lock_irqsave(&qp->s_lock, flags);
  652. if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
  653. write_seqlock(&dev->iowait_lock);
  654. list_add_tail(&ps->s_txreq->txreq.list,
  655. &priv->s_iowait.tx_head);
  656. if (list_empty(&priv->s_iowait.list)) {
  657. if (list_empty(&dev->memwait))
  658. mod_timer(&dev->mem_timer, jiffies + 1);
  659. qp->s_flags |= RVT_S_WAIT_KMEM;
  660. list_add_tail(&priv->s_iowait.list, &dev->memwait);
  661. priv->s_iowait.lock = &dev->iowait_lock;
  662. trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
  663. rvt_get_qp(qp);
  664. }
  665. write_sequnlock(&dev->iowait_lock);
  666. qp->s_flags &= ~RVT_S_BUSY;
  667. ret = -EBUSY;
  668. }
  669. spin_unlock_irqrestore(&qp->s_lock, flags);
  670. return ret;
  671. }
  672. /*
  673. * This routine calls txadds for each sg entry.
  674. *
  675. * Add failures will revert the sge cursor
  676. */
  677. static noinline int build_verbs_ulp_payload(
  678. struct sdma_engine *sde,
  679. u32 length,
  680. struct verbs_txreq *tx)
  681. {
  682. struct rvt_sge_state *ss = tx->ss;
  683. struct rvt_sge *sg_list = ss->sg_list;
  684. struct rvt_sge sge = ss->sge;
  685. u8 num_sge = ss->num_sge;
  686. u32 len;
  687. int ret = 0;
  688. while (length) {
  689. len = ss->sge.length;
  690. if (len > length)
  691. len = length;
  692. if (len > ss->sge.sge_length)
  693. len = ss->sge.sge_length;
  694. WARN_ON_ONCE(len == 0);
  695. ret = sdma_txadd_kvaddr(
  696. sde->dd,
  697. &tx->txreq,
  698. ss->sge.vaddr,
  699. len);
  700. if (ret)
  701. goto bail_txadd;
  702. rvt_update_sge(ss, len, false);
  703. length -= len;
  704. }
  705. return ret;
  706. bail_txadd:
  707. /* unwind cursor */
  708. ss->sge = sge;
  709. ss->num_sge = num_sge;
  710. ss->sg_list = sg_list;
  711. return ret;
  712. }
  713. /*
  714. * Build the number of DMA descriptors needed to send length bytes of data.
  715. *
  716. * NOTE: DMA mapping is held in the tx until completed in the ring or
  717. * the tx desc is freed without having been submitted to the ring
  718. *
  719. * This routine ensures all the helper routine calls succeed.
  720. */
  721. /* New API */
  722. static int build_verbs_tx_desc(
  723. struct sdma_engine *sde,
  724. u32 length,
  725. struct verbs_txreq *tx,
  726. struct hfi1_ahg_info *ahg_info,
  727. u64 pbc)
  728. {
  729. int ret = 0;
  730. struct hfi1_sdma_header *phdr = &tx->phdr;
  731. u16 hdrbytes = tx->hdr_dwords << 2;
  732. u32 *hdr;
  733. u8 extra_bytes = 0;
  734. static char trail_buf[12]; /* CRC = 4, LT = 1, Pad = 0 to 7 bytes */
  735. if (tx->phdr.hdr.hdr_type) {
  736. /*
  737. * hdrbytes accounts for PBC. Need to subtract 8 bytes
  738. * before calculating padding.
  739. */
  740. extra_bytes = hfi1_get_16b_padding(hdrbytes - 8, length) +
  741. (SIZE_OF_CRC << 2) + SIZE_OF_LT;
  742. hdr = (u32 *)&phdr->hdr.opah;
  743. } else {
  744. hdr = (u32 *)&phdr->hdr.ibh;
  745. }
  746. if (!ahg_info->ahgcount) {
  747. ret = sdma_txinit_ahg(
  748. &tx->txreq,
  749. ahg_info->tx_flags,
  750. hdrbytes + length +
  751. extra_bytes,
  752. ahg_info->ahgidx,
  753. 0,
  754. NULL,
  755. 0,
  756. verbs_sdma_complete);
  757. if (ret)
  758. goto bail_txadd;
  759. phdr->pbc = cpu_to_le64(pbc);
  760. ret = sdma_txadd_kvaddr(
  761. sde->dd,
  762. &tx->txreq,
  763. phdr,
  764. hdrbytes);
  765. if (ret)
  766. goto bail_txadd;
  767. } else {
  768. ret = sdma_txinit_ahg(
  769. &tx->txreq,
  770. ahg_info->tx_flags,
  771. length,
  772. ahg_info->ahgidx,
  773. ahg_info->ahgcount,
  774. ahg_info->ahgdesc,
  775. hdrbytes,
  776. verbs_sdma_complete);
  777. if (ret)
  778. goto bail_txadd;
  779. }
  780. /* add the ulp payload - if any. tx->ss can be NULL for acks */
  781. if (tx->ss) {
  782. ret = build_verbs_ulp_payload(sde, length, tx);
  783. if (ret)
  784. goto bail_txadd;
  785. }
  786. /* add icrc, lt byte, and padding to flit */
  787. if (extra_bytes != 0)
  788. ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
  789. trail_buf, extra_bytes);
  790. bail_txadd:
  791. return ret;
  792. }
  793. int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
  794. u64 pbc)
  795. {
  796. struct hfi1_qp_priv *priv = qp->priv;
  797. struct hfi1_ahg_info *ahg_info = priv->s_ahg;
  798. u32 hdrwords = qp->s_hdrwords;
  799. u32 len = ps->s_txreq->s_cur_size;
  800. u32 plen;
  801. struct hfi1_ibdev *dev = ps->dev;
  802. struct hfi1_pportdata *ppd = ps->ppd;
  803. struct verbs_txreq *tx;
  804. u8 sc5 = priv->s_sc;
  805. int ret;
  806. u32 dwords;
  807. bool bypass = false;
  808. if (ps->s_txreq->phdr.hdr.hdr_type) {
  809. u8 extra_bytes = hfi1_get_16b_padding((hdrwords << 2), len);
  810. dwords = (len + extra_bytes + (SIZE_OF_CRC << 2) +
  811. SIZE_OF_LT) >> 2;
  812. bypass = true;
  813. } else {
  814. dwords = (len + 3) >> 2;
  815. }
  816. plen = hdrwords + dwords + 2;
  817. tx = ps->s_txreq;
  818. if (!sdma_txreq_built(&tx->txreq)) {
  819. if (likely(pbc == 0)) {
  820. u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
  821. /* No vl15 here */
  822. /* set PBC_DC_INFO bit (aka SC[4]) in pbc */
  823. if (ps->s_txreq->phdr.hdr.hdr_type)
  824. pbc |= PBC_PACKET_BYPASS |
  825. PBC_INSERT_BYPASS_ICRC;
  826. else
  827. pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
  828. if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode,
  829. false)))
  830. pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
  831. pbc = create_pbc(ppd,
  832. pbc,
  833. qp->srate_mbps,
  834. vl,
  835. plen);
  836. }
  837. tx->wqe = qp->s_wqe;
  838. ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
  839. if (unlikely(ret))
  840. goto bail_build;
  841. }
  842. ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq,
  843. ps->pkts_sent);
  844. if (unlikely(ret < 0)) {
  845. if (ret == -ECOMM)
  846. goto bail_ecomm;
  847. return ret;
  848. }
  849. trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
  850. &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
  851. return ret;
  852. bail_ecomm:
  853. /* The current one got "sent" */
  854. return 0;
  855. bail_build:
  856. ret = wait_kmem(dev, qp, ps);
  857. if (!ret) {
  858. /* free txreq - bad state */
  859. hfi1_put_txreq(ps->s_txreq);
  860. ps->s_txreq = NULL;
  861. }
  862. return ret;
  863. }
  864. /*
  865. * If we are now in the error state, return zero to flush the
  866. * send work request.
  867. */
  868. static int pio_wait(struct rvt_qp *qp,
  869. struct send_context *sc,
  870. struct hfi1_pkt_state *ps,
  871. u32 flag)
  872. {
  873. struct hfi1_qp_priv *priv = qp->priv;
  874. struct hfi1_devdata *dd = sc->dd;
  875. struct hfi1_ibdev *dev = &dd->verbs_dev;
  876. unsigned long flags;
  877. int ret = 0;
  878. /*
  879. * Note that as soon as want_buffer() is called and
  880. * possibly before it returns, sc_piobufavail()
  881. * could be called. Therefore, put QP on the I/O wait list before
  882. * enabling the PIO avail interrupt.
  883. */
  884. spin_lock_irqsave(&qp->s_lock, flags);
  885. if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
  886. write_seqlock(&dev->iowait_lock);
  887. list_add_tail(&ps->s_txreq->txreq.list,
  888. &priv->s_iowait.tx_head);
  889. if (list_empty(&priv->s_iowait.list)) {
  890. struct hfi1_ibdev *dev = &dd->verbs_dev;
  891. int was_empty;
  892. dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
  893. dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
  894. qp->s_flags |= flag;
  895. was_empty = list_empty(&sc->piowait);
  896. iowait_queue(ps->pkts_sent, &priv->s_iowait,
  897. &sc->piowait);
  898. priv->s_iowait.lock = &dev->iowait_lock;
  899. trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
  900. rvt_get_qp(qp);
  901. /* counting: only call wantpiobuf_intr if first user */
  902. if (was_empty)
  903. hfi1_sc_wantpiobuf_intr(sc, 1);
  904. }
  905. write_sequnlock(&dev->iowait_lock);
  906. qp->s_flags &= ~RVT_S_BUSY;
  907. ret = -EBUSY;
  908. }
  909. spin_unlock_irqrestore(&qp->s_lock, flags);
  910. return ret;
  911. }
  912. static void verbs_pio_complete(void *arg, int code)
  913. {
  914. struct rvt_qp *qp = (struct rvt_qp *)arg;
  915. struct hfi1_qp_priv *priv = qp->priv;
  916. if (iowait_pio_dec(&priv->s_iowait))
  917. iowait_drain_wakeup(&priv->s_iowait);
  918. }
  919. int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
  920. u64 pbc)
  921. {
  922. struct hfi1_qp_priv *priv = qp->priv;
  923. u32 hdrwords = qp->s_hdrwords;
  924. struct rvt_sge_state *ss = ps->s_txreq->ss;
  925. u32 len = ps->s_txreq->s_cur_size;
  926. u32 dwords;
  927. u32 plen;
  928. struct hfi1_pportdata *ppd = ps->ppd;
  929. u32 *hdr;
  930. u8 sc5;
  931. unsigned long flags = 0;
  932. struct send_context *sc;
  933. struct pio_buf *pbuf;
  934. int wc_status = IB_WC_SUCCESS;
  935. int ret = 0;
  936. pio_release_cb cb = NULL;
  937. u32 lrh0_16b;
  938. bool bypass = false;
  939. u8 extra_bytes = 0;
  940. if (ps->s_txreq->phdr.hdr.hdr_type) {
  941. u8 pad_size = hfi1_get_16b_padding((hdrwords << 2), len);
  942. extra_bytes = pad_size + (SIZE_OF_CRC << 2) + SIZE_OF_LT;
  943. dwords = (len + extra_bytes) >> 2;
  944. hdr = (u32 *)&ps->s_txreq->phdr.hdr.opah;
  945. lrh0_16b = ps->s_txreq->phdr.hdr.opah.lrh[0];
  946. bypass = true;
  947. } else {
  948. dwords = (len + 3) >> 2;
  949. hdr = (u32 *)&ps->s_txreq->phdr.hdr.ibh;
  950. }
  951. plen = hdrwords + dwords + 2;
  952. /* only RC/UC use complete */
  953. switch (qp->ibqp.qp_type) {
  954. case IB_QPT_RC:
  955. case IB_QPT_UC:
  956. cb = verbs_pio_complete;
  957. break;
  958. default:
  959. break;
  960. }
  961. /* vl15 special case taken care of in ud.c */
  962. sc5 = priv->s_sc;
  963. sc = ps->s_txreq->psc;
  964. if (likely(pbc == 0)) {
  965. u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
  966. /* set PBC_DC_INFO bit (aka SC[4]) in pbc */
  967. if (ps->s_txreq->phdr.hdr.hdr_type)
  968. pbc |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
  969. else
  970. pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
  971. if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, false)))
  972. pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
  973. pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
  974. }
  975. if (cb)
  976. iowait_pio_inc(&priv->s_iowait);
  977. pbuf = sc_buffer_alloc(sc, plen, cb, qp);
  978. if (unlikely(!pbuf)) {
  979. if (cb)
  980. verbs_pio_complete(qp, 0);
  981. if (ppd->host_link_state != HLS_UP_ACTIVE) {
  982. /*
  983. * If we have filled the PIO buffers to capacity and are
  984. * not in an active state this request is not going to
  985. * go out to so just complete it with an error or else a
  986. * ULP or the core may be stuck waiting.
  987. */
  988. hfi1_cdbg(
  989. PIO,
  990. "alloc failed. state not active, completing");
  991. wc_status = IB_WC_GENERAL_ERR;
  992. goto pio_bail;
  993. } else {
  994. /*
  995. * This is a normal occurrence. The PIO buffs are full
  996. * up but we are still happily sending, well we could be
  997. * so lets continue to queue the request.
  998. */
  999. hfi1_cdbg(PIO, "alloc failed. state active, queuing");
  1000. ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO);
  1001. if (!ret)
  1002. /* txreq not queued - free */
  1003. goto bail;
  1004. /* tx consumed in wait */
  1005. return ret;
  1006. }
  1007. }
  1008. if (dwords == 0) {
  1009. pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords);
  1010. } else {
  1011. seg_pio_copy_start(pbuf, pbc,
  1012. hdr, hdrwords * 4);
  1013. if (ss) {
  1014. while (len) {
  1015. void *addr = ss->sge.vaddr;
  1016. u32 slen = ss->sge.length;
  1017. if (slen > len)
  1018. slen = len;
  1019. rvt_update_sge(ss, slen, false);
  1020. seg_pio_copy_mid(pbuf, addr, slen);
  1021. len -= slen;
  1022. }
  1023. }
  1024. /*
  1025. * Bypass packet will need to copy additional
  1026. * bytes to accommodate for CRC and LT bytes
  1027. */
  1028. if (extra_bytes) {
  1029. u8 *empty_buf;
  1030. empty_buf = kcalloc(extra_bytes, sizeof(u8),
  1031. GFP_KERNEL);
  1032. seg_pio_copy_mid(pbuf, empty_buf, extra_bytes);
  1033. kfree(empty_buf);
  1034. }
  1035. seg_pio_copy_end(pbuf);
  1036. }
  1037. trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
  1038. &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
  1039. pio_bail:
  1040. if (qp->s_wqe) {
  1041. spin_lock_irqsave(&qp->s_lock, flags);
  1042. hfi1_send_complete(qp, qp->s_wqe, wc_status);
  1043. spin_unlock_irqrestore(&qp->s_lock, flags);
  1044. } else if (qp->ibqp.qp_type == IB_QPT_RC) {
  1045. spin_lock_irqsave(&qp->s_lock, flags);
  1046. hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr);
  1047. spin_unlock_irqrestore(&qp->s_lock, flags);
  1048. }
  1049. ret = 0;
  1050. bail:
  1051. hfi1_put_txreq(ps->s_txreq);
  1052. return ret;
  1053. }
  1054. /*
  1055. * egress_pkey_matches_entry - return 1 if the pkey matches ent (ent
  1056. * being an entry from the partition key table), return 0
  1057. * otherwise. Use the matching criteria for egress partition keys
  1058. * specified in the OPAv1 spec., section 9.1l.7.
  1059. */
  1060. static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
  1061. {
  1062. u16 mkey = pkey & PKEY_LOW_15_MASK;
  1063. u16 mentry = ent & PKEY_LOW_15_MASK;
  1064. if (mkey == mentry) {
  1065. /*
  1066. * If pkey[15] is set (full partition member),
  1067. * is bit 15 in the corresponding table element
  1068. * clear (limited member)?
  1069. */
  1070. if (pkey & PKEY_MEMBER_MASK)
  1071. return !!(ent & PKEY_MEMBER_MASK);
  1072. return 1;
  1073. }
  1074. return 0;
  1075. }
  1076. /**
  1077. * egress_pkey_check - check P_KEY of a packet
  1078. * @ppd: Physical IB port data
  1079. * @slid: SLID for packet
  1080. * @bkey: PKEY for header
  1081. * @sc5: SC for packet
  1082. * @s_pkey_index: It will be used for look up optimization for kernel contexts
  1083. * only. If it is negative value, then it means user contexts is calling this
  1084. * function.
  1085. *
  1086. * It checks if hdr's pkey is valid.
  1087. *
  1088. * Return: 0 on success, otherwise, 1
  1089. */
  1090. int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey,
  1091. u8 sc5, int8_t s_pkey_index)
  1092. {
  1093. struct hfi1_devdata *dd;
  1094. int i;
  1095. int is_user_ctxt_mechanism = (s_pkey_index < 0);
  1096. if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT))
  1097. return 0;
  1098. /* If SC15, pkey[0:14] must be 0x7fff */
  1099. if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
  1100. goto bad;
  1101. /* Is the pkey = 0x0, or 0x8000? */
  1102. if ((pkey & PKEY_LOW_15_MASK) == 0)
  1103. goto bad;
  1104. /*
  1105. * For the kernel contexts only, if a qp is passed into the function,
  1106. * the most likely matching pkey has index qp->s_pkey_index
  1107. */
  1108. if (!is_user_ctxt_mechanism &&
  1109. egress_pkey_matches_entry(pkey, ppd->pkeys[s_pkey_index])) {
  1110. return 0;
  1111. }
  1112. for (i = 0; i < MAX_PKEY_VALUES; i++) {
  1113. if (egress_pkey_matches_entry(pkey, ppd->pkeys[i]))
  1114. return 0;
  1115. }
  1116. bad:
  1117. /*
  1118. * For the user-context mechanism, the P_KEY check would only happen
  1119. * once per SDMA request, not once per packet. Therefore, there's no
  1120. * need to increment the counter for the user-context mechanism.
  1121. */
  1122. if (!is_user_ctxt_mechanism) {
  1123. incr_cntr64(&ppd->port_xmit_constraint_errors);
  1124. dd = ppd->dd;
  1125. if (!(dd->err_info_xmit_constraint.status &
  1126. OPA_EI_STATUS_SMASK)) {
  1127. dd->err_info_xmit_constraint.status |=
  1128. OPA_EI_STATUS_SMASK;
  1129. dd->err_info_xmit_constraint.slid = slid;
  1130. dd->err_info_xmit_constraint.pkey = pkey;
  1131. }
  1132. }
  1133. return 1;
  1134. }
  1135. /**
  1136. * get_send_routine - choose an egress routine
  1137. *
  1138. * Choose an egress routine based on QP type
  1139. * and size
  1140. */
  1141. static inline send_routine get_send_routine(struct rvt_qp *qp,
  1142. struct hfi1_pkt_state *ps)
  1143. {
  1144. struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
  1145. struct hfi1_qp_priv *priv = qp->priv;
  1146. struct verbs_txreq *tx = ps->s_txreq;
  1147. if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
  1148. return dd->process_pio_send;
  1149. switch (qp->ibqp.qp_type) {
  1150. case IB_QPT_SMI:
  1151. return dd->process_pio_send;
  1152. case IB_QPT_GSI:
  1153. case IB_QPT_UD:
  1154. break;
  1155. case IB_QPT_UC:
  1156. case IB_QPT_RC: {
  1157. if (piothreshold &&
  1158. tx->s_cur_size <= min(piothreshold, qp->pmtu) &&
  1159. (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) &&
  1160. iowait_sdma_pending(&priv->s_iowait) == 0 &&
  1161. !sdma_txreq_built(&tx->txreq))
  1162. return dd->process_pio_send;
  1163. break;
  1164. }
  1165. default:
  1166. break;
  1167. }
  1168. return dd->process_dma_send;
  1169. }
  1170. /**
  1171. * hfi1_verbs_send - send a packet
  1172. * @qp: the QP to send on
  1173. * @ps: the state of the packet to send
  1174. *
  1175. * Return zero if packet is sent or queued OK.
  1176. * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
  1177. */
  1178. int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
  1179. {
  1180. struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
  1181. struct hfi1_qp_priv *priv = qp->priv;
  1182. struct ib_other_headers *ohdr;
  1183. send_routine sr;
  1184. int ret;
  1185. u16 pkey;
  1186. u32 slid;
  1187. /* locate the pkey within the headers */
  1188. if (ps->s_txreq->phdr.hdr.hdr_type) {
  1189. struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah;
  1190. u8 l4 = hfi1_16B_get_l4(hdr);
  1191. if (l4 == OPA_16B_L4_IB_GLOBAL)
  1192. ohdr = &hdr->u.l.oth;
  1193. else
  1194. ohdr = &hdr->u.oth;
  1195. slid = hfi1_16B_get_slid(hdr);
  1196. pkey = hfi1_16B_get_pkey(hdr);
  1197. } else {
  1198. struct ib_header *hdr = &ps->s_txreq->phdr.hdr.ibh;
  1199. u8 lnh = ib_get_lnh(hdr);
  1200. if (lnh == HFI1_LRH_GRH)
  1201. ohdr = &hdr->u.l.oth;
  1202. else
  1203. ohdr = &hdr->u.oth;
  1204. slid = ib_get_slid(hdr);
  1205. pkey = ib_bth_get_pkey(ohdr);
  1206. }
  1207. ps->opcode = ib_bth_get_opcode(ohdr);
  1208. sr = get_send_routine(qp, ps);
  1209. ret = egress_pkey_check(dd->pport, slid, pkey,
  1210. priv->s_sc, qp->s_pkey_index);
  1211. if (unlikely(ret)) {
  1212. /*
  1213. * The value we are returning here does not get propagated to
  1214. * the verbs caller. Thus we need to complete the request with
  1215. * error otherwise the caller could be sitting waiting on the
  1216. * completion event. Only do this for PIO. SDMA has its own
  1217. * mechanism for handling the errors. So for SDMA we can just
  1218. * return.
  1219. */
  1220. if (sr == dd->process_pio_send) {
  1221. unsigned long flags;
  1222. hfi1_cdbg(PIO, "%s() Failed. Completing with err",
  1223. __func__);
  1224. spin_lock_irqsave(&qp->s_lock, flags);
  1225. hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
  1226. spin_unlock_irqrestore(&qp->s_lock, flags);
  1227. }
  1228. return -EINVAL;
  1229. }
  1230. if (sr == dd->process_dma_send && iowait_pio_pending(&priv->s_iowait))
  1231. return pio_wait(qp,
  1232. ps->s_txreq->psc,
  1233. ps,
  1234. RVT_S_WAIT_PIO_DRAIN);
  1235. return sr(qp, ps, 0);
  1236. }
  1237. /**
  1238. * hfi1_fill_device_attr - Fill in rvt dev info device attributes.
  1239. * @dd: the device data structure
  1240. */
  1241. static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
  1242. {
  1243. struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
  1244. u32 ver = dd->dc8051_ver;
  1245. memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
  1246. rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) |
  1247. ((u64)(dc8051_ver_min(ver)) << 16) |
  1248. (u64)dc8051_ver_patch(ver);
  1249. rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
  1250. IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
  1251. IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
  1252. IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
  1253. IB_DEVICE_MEM_MGT_EXTENSIONS |
  1254. IB_DEVICE_RDMA_NETDEV_OPA_VNIC;
  1255. rdi->dparms.props.page_size_cap = PAGE_SIZE;
  1256. rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
  1257. rdi->dparms.props.vendor_part_id = dd->pcidev->device;
  1258. rdi->dparms.props.hw_ver = dd->minrev;
  1259. rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid;
  1260. rdi->dparms.props.max_mr_size = U64_MAX;
  1261. rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
  1262. rdi->dparms.props.max_qp = hfi1_max_qps;
  1263. rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
  1264. rdi->dparms.props.max_sge = hfi1_max_sges;
  1265. rdi->dparms.props.max_sge_rd = hfi1_max_sges;
  1266. rdi->dparms.props.max_cq = hfi1_max_cqs;
  1267. rdi->dparms.props.max_ah = hfi1_max_ahs;
  1268. rdi->dparms.props.max_cqe = hfi1_max_cqes;
  1269. rdi->dparms.props.max_mr = rdi->lkey_table.max;
  1270. rdi->dparms.props.max_fmr = rdi->lkey_table.max;
  1271. rdi->dparms.props.max_map_per_fmr = 32767;
  1272. rdi->dparms.props.max_pd = hfi1_max_pds;
  1273. rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
  1274. rdi->dparms.props.max_qp_init_rd_atom = 255;
  1275. rdi->dparms.props.max_srq = hfi1_max_srqs;
  1276. rdi->dparms.props.max_srq_wr = hfi1_max_srq_wrs;
  1277. rdi->dparms.props.max_srq_sge = hfi1_max_srq_sges;
  1278. rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
  1279. rdi->dparms.props.max_pkeys = hfi1_get_npkeys(dd);
  1280. rdi->dparms.props.max_mcast_grp = hfi1_max_mcast_grps;
  1281. rdi->dparms.props.max_mcast_qp_attach = hfi1_max_mcast_qp_attached;
  1282. rdi->dparms.props.max_total_mcast_qp_attach =
  1283. rdi->dparms.props.max_mcast_qp_attach *
  1284. rdi->dparms.props.max_mcast_grp;
  1285. }
  1286. static inline u16 opa_speed_to_ib(u16 in)
  1287. {
  1288. u16 out = 0;
  1289. if (in & OPA_LINK_SPEED_25G)
  1290. out |= IB_SPEED_EDR;
  1291. if (in & OPA_LINK_SPEED_12_5G)
  1292. out |= IB_SPEED_FDR;
  1293. return out;
  1294. }
  1295. /*
  1296. * Convert a single OPA link width (no multiple flags) to an IB value.
  1297. * A zero OPA link width means link down, which means the IB width value
  1298. * is a don't care.
  1299. */
  1300. static inline u16 opa_width_to_ib(u16 in)
  1301. {
  1302. switch (in) {
  1303. case OPA_LINK_WIDTH_1X:
  1304. /* map 2x and 3x to 1x as they don't exist in IB */
  1305. case OPA_LINK_WIDTH_2X:
  1306. case OPA_LINK_WIDTH_3X:
  1307. return IB_WIDTH_1X;
  1308. default: /* link down or unknown, return our largest width */
  1309. case OPA_LINK_WIDTH_4X:
  1310. return IB_WIDTH_4X;
  1311. }
  1312. }
  1313. static int query_port(struct rvt_dev_info *rdi, u8 port_num,
  1314. struct ib_port_attr *props)
  1315. {
  1316. struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
  1317. struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
  1318. struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
  1319. u32 lid = ppd->lid;
  1320. /* props being zeroed by the caller, avoid zeroing it here */
  1321. props->lid = lid ? lid : 0;
  1322. props->lmc = ppd->lmc;
  1323. /* OPA logical states match IB logical states */
  1324. props->state = driver_lstate(ppd);
  1325. props->phys_state = driver_pstate(ppd);
  1326. props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
  1327. props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
  1328. /* see rate_show() in ib core/sysfs.c */
  1329. props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
  1330. props->max_vl_num = ppd->vls_supported;
  1331. /* Once we are a "first class" citizen and have added the OPA MTUs to
  1332. * the core we can advertise the larger MTU enum to the ULPs, for now
  1333. * advertise only 4K.
  1334. *
  1335. * Those applications which are either OPA aware or pass the MTU enum
  1336. * from the Path Records to us will get the new 8k MTU. Those that
  1337. * attempt to process the MTU enum may fail in various ways.
  1338. */
  1339. props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ?
  1340. 4096 : hfi1_max_mtu), IB_MTU_4096);
  1341. props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
  1342. mtu_to_enum(ppd->ibmtu, IB_MTU_2048);
  1343. /*
  1344. * sm_lid of 0xFFFF needs special handling so that it can
  1345. * be differentiated from a permissve LID of 0xFFFF.
  1346. * We set the grh_required flag here so the SA can program
  1347. * the DGID in the address handle appropriately
  1348. */
  1349. if (props->sm_lid == be16_to_cpu(IB_LID_PERMISSIVE))
  1350. props->grh_required = true;
  1351. return 0;
  1352. }
  1353. static int modify_device(struct ib_device *device,
  1354. int device_modify_mask,
  1355. struct ib_device_modify *device_modify)
  1356. {
  1357. struct hfi1_devdata *dd = dd_from_ibdev(device);
  1358. unsigned i;
  1359. int ret;
  1360. if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
  1361. IB_DEVICE_MODIFY_NODE_DESC)) {
  1362. ret = -EOPNOTSUPP;
  1363. goto bail;
  1364. }
  1365. if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
  1366. memcpy(device->node_desc, device_modify->node_desc,
  1367. IB_DEVICE_NODE_DESC_MAX);
  1368. for (i = 0; i < dd->num_pports; i++) {
  1369. struct hfi1_ibport *ibp = &dd->pport[i].ibport_data;
  1370. hfi1_node_desc_chg(ibp);
  1371. }
  1372. }
  1373. if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
  1374. ib_hfi1_sys_image_guid =
  1375. cpu_to_be64(device_modify->sys_image_guid);
  1376. for (i = 0; i < dd->num_pports; i++) {
  1377. struct hfi1_ibport *ibp = &dd->pport[i].ibport_data;
  1378. hfi1_sys_guid_chg(ibp);
  1379. }
  1380. }
  1381. ret = 0;
  1382. bail:
  1383. return ret;
  1384. }
  1385. static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
  1386. {
  1387. struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
  1388. struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
  1389. struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
  1390. int ret;
  1391. set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
  1392. OPA_LINKDOWN_REASON_UNKNOWN);
  1393. ret = set_link_state(ppd, HLS_DN_DOWNDEF);
  1394. return ret;
  1395. }
  1396. static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
  1397. int guid_index, __be64 *guid)
  1398. {
  1399. struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp);
  1400. if (guid_index >= HFI1_GUIDS_PER_PORT)
  1401. return -EINVAL;
  1402. *guid = get_sguid(ibp, guid_index);
  1403. return 0;
  1404. }
  1405. /*
  1406. * convert ah port,sl to sc
  1407. */
  1408. u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah)
  1409. {
  1410. struct hfi1_ibport *ibp = to_iport(ibdev, rdma_ah_get_port_num(ah));
  1411. return ibp->sl_to_sc[rdma_ah_get_sl(ah)];
  1412. }
  1413. static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
  1414. {
  1415. struct hfi1_ibport *ibp;
  1416. struct hfi1_pportdata *ppd;
  1417. struct hfi1_devdata *dd;
  1418. u8 sc5;
  1419. if (hfi1_check_mcast(rdma_ah_get_dlid(ah_attr)) &&
  1420. !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
  1421. return -EINVAL;
  1422. /* test the mapping for validity */
  1423. ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
  1424. ppd = ppd_from_ibp(ibp);
  1425. sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
  1426. dd = dd_from_ppd(ppd);
  1427. if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
  1428. return -EINVAL;
  1429. return 0;
  1430. }
  1431. static void hfi1_notify_new_ah(struct ib_device *ibdev,
  1432. struct rdma_ah_attr *ah_attr,
  1433. struct rvt_ah *ah)
  1434. {
  1435. struct hfi1_ibport *ibp;
  1436. struct hfi1_pportdata *ppd;
  1437. struct hfi1_devdata *dd;
  1438. u8 sc5;
  1439. struct rdma_ah_attr *attr = &ah->attr;
  1440. /*
  1441. * Do not trust reading anything from rvt_ah at this point as it is not
  1442. * done being setup. We can however modify things which we need to set.
  1443. */
  1444. ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
  1445. ppd = ppd_from_ibp(ibp);
  1446. sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)];
  1447. hfi1_update_ah_attr(ibdev, attr);
  1448. hfi1_make_opa_lid(attr);
  1449. dd = dd_from_ppd(ppd);
  1450. ah->vl = sc_to_vlt(dd, sc5);
  1451. if (ah->vl < num_vls || ah->vl == 15)
  1452. ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu);
  1453. }
  1454. /**
  1455. * hfi1_get_npkeys - return the size of the PKEY table for context 0
  1456. * @dd: the hfi1_ib device
  1457. */
  1458. unsigned hfi1_get_npkeys(struct hfi1_devdata *dd)
  1459. {
  1460. return ARRAY_SIZE(dd->pport[0].pkeys);
  1461. }
  1462. static void init_ibport(struct hfi1_pportdata *ppd)
  1463. {
  1464. struct hfi1_ibport *ibp = &ppd->ibport_data;
  1465. size_t sz = ARRAY_SIZE(ibp->sl_to_sc);
  1466. int i;
  1467. for (i = 0; i < sz; i++) {
  1468. ibp->sl_to_sc[i] = i;
  1469. ibp->sc_to_sl[i] = i;
  1470. }
  1471. for (i = 0; i < RVT_MAX_TRAP_LISTS ; i++)
  1472. INIT_LIST_HEAD(&ibp->rvp.trap_lists[i].list);
  1473. setup_timer(&ibp->rvp.trap_timer, hfi1_handle_trap_timer,
  1474. (unsigned long)ibp);
  1475. spin_lock_init(&ibp->rvp.lock);
  1476. /* Set the prefix to the default value (see ch. 4.1.1) */
  1477. ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
  1478. ibp->rvp.sm_lid = 0;
  1479. /*
  1480. * Below should only set bits defined in OPA PortInfo.CapabilityMask
  1481. * and PortInfo.CapabilityMask3
  1482. */
  1483. ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
  1484. IB_PORT_CAP_MASK_NOTICE_SUP;
  1485. ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported;
  1486. ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
  1487. ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
  1488. ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
  1489. ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
  1490. ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
  1491. RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
  1492. RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
  1493. }
  1494. static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str)
  1495. {
  1496. struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
  1497. struct hfi1_ibdev *dev = dev_from_rdi(rdi);
  1498. u32 ver = dd_from_dev(dev)->dc8051_ver;
  1499. snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u", dc8051_ver_maj(ver),
  1500. dc8051_ver_min(ver), dc8051_ver_patch(ver));
  1501. }
  1502. static const char * const driver_cntr_names[] = {
  1503. /* must be element 0*/
  1504. "DRIVER_KernIntr",
  1505. "DRIVER_ErrorIntr",
  1506. "DRIVER_Tx_Errs",
  1507. "DRIVER_Rcv_Errs",
  1508. "DRIVER_HW_Errs",
  1509. "DRIVER_NoPIOBufs",
  1510. "DRIVER_CtxtsOpen",
  1511. "DRIVER_RcvLen_Errs",
  1512. "DRIVER_EgrBufFull",
  1513. "DRIVER_EgrHdrFull"
  1514. };
  1515. static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */
  1516. static const char **dev_cntr_names;
  1517. static const char **port_cntr_names;
  1518. static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
  1519. static int num_dev_cntrs;
  1520. static int num_port_cntrs;
  1521. static int cntr_names_initialized;
  1522. /*
  1523. * Convert a list of names separated by '\n' into an array of NULL terminated
  1524. * strings. Optionally some entries can be reserved in the array to hold extra
  1525. * external strings.
  1526. */
  1527. static int init_cntr_names(const char *names_in,
  1528. const size_t names_len,
  1529. int num_extra_names,
  1530. int *num_cntrs,
  1531. const char ***cntr_names)
  1532. {
  1533. char *names_out, *p, **q;
  1534. int i, n;
  1535. n = 0;
  1536. for (i = 0; i < names_len; i++)
  1537. if (names_in[i] == '\n')
  1538. n++;
  1539. names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len,
  1540. GFP_KERNEL);
  1541. if (!names_out) {
  1542. *num_cntrs = 0;
  1543. *cntr_names = NULL;
  1544. return -ENOMEM;
  1545. }
  1546. p = names_out + (n + num_extra_names) * sizeof(char *);
  1547. memcpy(p, names_in, names_len);
  1548. q = (char **)names_out;
  1549. for (i = 0; i < n; i++) {
  1550. q[i] = p;
  1551. p = strchr(p, '\n');
  1552. *p++ = '\0';
  1553. }
  1554. *num_cntrs = n;
  1555. *cntr_names = (const char **)names_out;
  1556. return 0;
  1557. }
  1558. static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
  1559. u8 port_num)
  1560. {
  1561. int i, err;
  1562. mutex_lock(&cntr_names_lock);
  1563. if (!cntr_names_initialized) {
  1564. struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
  1565. err = init_cntr_names(dd->cntrnames,
  1566. dd->cntrnameslen,
  1567. num_driver_cntrs,
  1568. &num_dev_cntrs,
  1569. &dev_cntr_names);
  1570. if (err) {
  1571. mutex_unlock(&cntr_names_lock);
  1572. return NULL;
  1573. }
  1574. for (i = 0; i < num_driver_cntrs; i++)
  1575. dev_cntr_names[num_dev_cntrs + i] =
  1576. driver_cntr_names[i];
  1577. err = init_cntr_names(dd->portcntrnames,
  1578. dd->portcntrnameslen,
  1579. 0,
  1580. &num_port_cntrs,
  1581. &port_cntr_names);
  1582. if (err) {
  1583. kfree(dev_cntr_names);
  1584. dev_cntr_names = NULL;
  1585. mutex_unlock(&cntr_names_lock);
  1586. return NULL;
  1587. }
  1588. cntr_names_initialized = 1;
  1589. }
  1590. mutex_unlock(&cntr_names_lock);
  1591. if (!port_num)
  1592. return rdma_alloc_hw_stats_struct(
  1593. dev_cntr_names,
  1594. num_dev_cntrs + num_driver_cntrs,
  1595. RDMA_HW_STATS_DEFAULT_LIFESPAN);
  1596. else
  1597. return rdma_alloc_hw_stats_struct(
  1598. port_cntr_names,
  1599. num_port_cntrs,
  1600. RDMA_HW_STATS_DEFAULT_LIFESPAN);
  1601. }
  1602. static u64 hfi1_sps_ints(void)
  1603. {
  1604. unsigned long flags;
  1605. struct hfi1_devdata *dd;
  1606. u64 sps_ints = 0;
  1607. spin_lock_irqsave(&hfi1_devs_lock, flags);
  1608. list_for_each_entry(dd, &hfi1_dev_list, list) {
  1609. sps_ints += get_all_cpu_total(dd->int_counter);
  1610. }
  1611. spin_unlock_irqrestore(&hfi1_devs_lock, flags);
  1612. return sps_ints;
  1613. }
  1614. static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
  1615. u8 port, int index)
  1616. {
  1617. u64 *values;
  1618. int count;
  1619. if (!port) {
  1620. u64 *stats = (u64 *)&hfi1_stats;
  1621. int i;
  1622. hfi1_read_cntrs(dd_from_ibdev(ibdev), NULL, &values);
  1623. values[num_dev_cntrs] = hfi1_sps_ints();
  1624. for (i = 1; i < num_driver_cntrs; i++)
  1625. values[num_dev_cntrs + i] = stats[i];
  1626. count = num_dev_cntrs + num_driver_cntrs;
  1627. } else {
  1628. struct hfi1_ibport *ibp = to_iport(ibdev, port);
  1629. hfi1_read_portcntrs(ppd_from_ibp(ibp), NULL, &values);
  1630. count = num_port_cntrs;
  1631. }
  1632. memcpy(stats->value, values, count * sizeof(u64));
  1633. return count;
  1634. }
  1635. /**
  1636. * hfi1_register_ib_device - register our device with the infiniband core
  1637. * @dd: the device data structure
  1638. * Return 0 if successful, errno if unsuccessful.
  1639. */
  1640. int hfi1_register_ib_device(struct hfi1_devdata *dd)
  1641. {
  1642. struct hfi1_ibdev *dev = &dd->verbs_dev;
  1643. struct ib_device *ibdev = &dev->rdi.ibdev;
  1644. struct hfi1_pportdata *ppd = dd->pport;
  1645. struct hfi1_ibport *ibp = &ppd->ibport_data;
  1646. unsigned i;
  1647. int ret;
  1648. size_t lcpysz = IB_DEVICE_NAME_MAX;
  1649. for (i = 0; i < dd->num_pports; i++)
  1650. init_ibport(ppd + i);
  1651. /* Only need to initialize non-zero fields. */
  1652. setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
  1653. seqlock_init(&dev->iowait_lock);
  1654. seqlock_init(&dev->txwait_lock);
  1655. INIT_LIST_HEAD(&dev->txwait);
  1656. INIT_LIST_HEAD(&dev->memwait);
  1657. ret = verbs_txreq_init(dev);
  1658. if (ret)
  1659. goto err_verbs_txreq;
  1660. /* Use first-port GUID as node guid */
  1661. ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX);
  1662. /*
  1663. * The system image GUID is supposed to be the same for all
  1664. * HFIs in a single system but since there can be other
  1665. * device types in the system, we can't be sure this is unique.
  1666. */
  1667. if (!ib_hfi1_sys_image_guid)
  1668. ib_hfi1_sys_image_guid = ibdev->node_guid;
  1669. lcpysz = strlcpy(ibdev->name, class_name(), lcpysz);
  1670. strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
  1671. ibdev->owner = THIS_MODULE;
  1672. ibdev->phys_port_cnt = dd->num_pports;
  1673. ibdev->dev.parent = &dd->pcidev->dev;
  1674. ibdev->modify_device = modify_device;
  1675. ibdev->alloc_hw_stats = alloc_hw_stats;
  1676. ibdev->get_hw_stats = get_hw_stats;
  1677. ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn;
  1678. /* keep process mad in the driver */
  1679. ibdev->process_mad = hfi1_process_mad;
  1680. ibdev->get_dev_fw_str = hfi1_get_dev_fw_str;
  1681. strncpy(ibdev->node_desc, init_utsname()->nodename,
  1682. sizeof(ibdev->node_desc));
  1683. /*
  1684. * Fill in rvt info object.
  1685. */
  1686. dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
  1687. dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name;
  1688. dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
  1689. dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
  1690. dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
  1691. dd->verbs_dev.rdi.driver_f.get_guid_be = hfi1_get_guid_be;
  1692. dd->verbs_dev.rdi.driver_f.query_port_state = query_port;
  1693. dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port;
  1694. dd->verbs_dev.rdi.driver_f.cap_mask_chg = hfi1_cap_mask_chg;
  1695. /*
  1696. * Fill in rvt info device attributes.
  1697. */
  1698. hfi1_fill_device_attr(dd);
  1699. /* queue pair */
  1700. dd->verbs_dev.rdi.dparms.qp_table_size = hfi1_qp_table_size;
  1701. dd->verbs_dev.rdi.dparms.qpn_start = 0;
  1702. dd->verbs_dev.rdi.dparms.qpn_inc = 1;
  1703. dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift;
  1704. dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16;
  1705. dd->verbs_dev.rdi.dparms.qpn_res_end =
  1706. dd->verbs_dev.rdi.dparms.qpn_res_start + 65535;
  1707. dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC;
  1708. dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK;
  1709. dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT;
  1710. dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK;
  1711. dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA |
  1712. RDMA_CORE_CAP_OPA_AH;
  1713. dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE;
  1714. dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc;
  1715. dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
  1716. dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
  1717. dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
  1718. dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt;
  1719. dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
  1720. dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
  1721. dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
  1722. dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
  1723. dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters;
  1724. dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue;
  1725. dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp;
  1726. dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
  1727. dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp;
  1728. dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu;
  1729. dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
  1730. dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
  1731. dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
  1732. dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
  1733. /* completeion queue */
  1734. snprintf(dd->verbs_dev.rdi.dparms.cq_name,
  1735. sizeof(dd->verbs_dev.rdi.dparms.cq_name),
  1736. "hfi1_cq%d", dd->unit);
  1737. dd->verbs_dev.rdi.dparms.node = dd->node;
  1738. /* misc settings */
  1739. dd->verbs_dev.rdi.flags = 0; /* Let rdmavt handle it all */
  1740. dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
  1741. dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
  1742. dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
  1743. /* post send table */
  1744. dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
  1745. ppd = dd->pport;
  1746. for (i = 0; i < dd->num_pports; i++, ppd++)
  1747. rvt_init_port(&dd->verbs_dev.rdi,
  1748. &ppd->ibport_data.rvp,
  1749. i,
  1750. ppd->pkeys);
  1751. ret = rvt_register_device(&dd->verbs_dev.rdi);
  1752. if (ret)
  1753. goto err_verbs_txreq;
  1754. ret = hfi1_verbs_register_sysfs(dd);
  1755. if (ret)
  1756. goto err_class;
  1757. return ret;
  1758. err_class:
  1759. rvt_unregister_device(&dd->verbs_dev.rdi);
  1760. err_verbs_txreq:
  1761. verbs_txreq_exit(dev);
  1762. dd_dev_err(dd, "cannot register verbs: %d!\n", -ret);
  1763. return ret;
  1764. }
  1765. void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
  1766. {
  1767. struct hfi1_ibdev *dev = &dd->verbs_dev;
  1768. hfi1_verbs_unregister_sysfs(dd);
  1769. rvt_unregister_device(&dd->verbs_dev.rdi);
  1770. if (!list_empty(&dev->txwait))
  1771. dd_dev_err(dd, "txwait list not empty!\n");
  1772. if (!list_empty(&dev->memwait))
  1773. dd_dev_err(dd, "memwait list not empty!\n");
  1774. del_timer_sync(&dev->mem_timer);
  1775. verbs_txreq_exit(dev);
  1776. mutex_lock(&cntr_names_lock);
  1777. kfree(dev_cntr_names);
  1778. kfree(port_cntr_names);
  1779. dev_cntr_names = NULL;
  1780. port_cntr_names = NULL;
  1781. cntr_names_initialized = 0;
  1782. mutex_unlock(&cntr_names_lock);
  1783. }
  1784. void hfi1_cnp_rcv(struct hfi1_packet *packet)
  1785. {
  1786. struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
  1787. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  1788. struct ib_header *hdr = packet->hdr;
  1789. struct rvt_qp *qp = packet->qp;
  1790. u32 lqpn, rqpn = 0;
  1791. u16 rlid = 0;
  1792. u8 sl, sc5, svc_type;
  1793. switch (packet->qp->ibqp.qp_type) {
  1794. case IB_QPT_UC:
  1795. rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
  1796. rqpn = qp->remote_qpn;
  1797. svc_type = IB_CC_SVCTYPE_UC;
  1798. break;
  1799. case IB_QPT_RC:
  1800. rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
  1801. rqpn = qp->remote_qpn;
  1802. svc_type = IB_CC_SVCTYPE_RC;
  1803. break;
  1804. case IB_QPT_SMI:
  1805. case IB_QPT_GSI:
  1806. case IB_QPT_UD:
  1807. svc_type = IB_CC_SVCTYPE_UD;
  1808. break;
  1809. default:
  1810. ibp->rvp.n_pkt_drops++;
  1811. return;
  1812. }
  1813. sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
  1814. sl = ibp->sc_to_sl[sc5];
  1815. lqpn = qp->ibqp.qp_num;
  1816. process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
  1817. }