mr.c 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871
  1. /*
  2. * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/kref.h>
  33. #include <linux/random.h>
  34. #include <linux/debugfs.h>
  35. #include <linux/export.h>
  36. #include <linux/delay.h>
  37. #include <rdma/ib_umem.h>
  38. #include <rdma/ib_umem_odp.h>
  39. #include <rdma/ib_verbs.h>
  40. #include "mlx5_ib.h"
  41. enum {
  42. MAX_PENDING_REG_MR = 8,
  43. };
  44. #define MLX5_UMR_ALIGN 2048
  45. static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  46. static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  47. static int mr_cache_max_order(struct mlx5_ib_dev *dev);
  48. static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  49. static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  50. {
  51. int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
  52. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  53. /* Wait until all page fault handlers using the mr complete. */
  54. synchronize_srcu(&dev->mr_srcu);
  55. #endif
  56. return err;
  57. }
  58. static int order2idx(struct mlx5_ib_dev *dev, int order)
  59. {
  60. struct mlx5_mr_cache *cache = &dev->cache;
  61. if (order < cache->ent[0].order)
  62. return 0;
  63. else
  64. return order - cache->ent[0].order;
  65. }
  66. static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
  67. {
  68. return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
  69. length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
  70. }
  71. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  72. static void update_odp_mr(struct mlx5_ib_mr *mr)
  73. {
  74. if (mr->umem->odp_data) {
  75. /*
  76. * This barrier prevents the compiler from moving the
  77. * setting of umem->odp_data->private to point to our
  78. * MR, before reg_umr finished, to ensure that the MR
  79. * initialization have finished before starting to
  80. * handle invalidations.
  81. */
  82. smp_wmb();
  83. mr->umem->odp_data->private = mr;
  84. /*
  85. * Make sure we will see the new
  86. * umem->odp_data->private value in the invalidation
  87. * routines, before we can get page faults on the
  88. * MR. Page faults can happen once we put the MR in
  89. * the tree, below this line. Without the barrier,
  90. * there can be a fault handling and an invalidation
  91. * before umem->odp_data->private == mr is visible to
  92. * the invalidation handler.
  93. */
  94. smp_wmb();
  95. }
  96. }
  97. #endif
  98. static void reg_mr_callback(int status, void *context)
  99. {
  100. struct mlx5_ib_mr *mr = context;
  101. struct mlx5_ib_dev *dev = mr->dev;
  102. struct mlx5_mr_cache *cache = &dev->cache;
  103. int c = order2idx(dev, mr->order);
  104. struct mlx5_cache_ent *ent = &cache->ent[c];
  105. u8 key;
  106. unsigned long flags;
  107. struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
  108. int err;
  109. spin_lock_irqsave(&ent->lock, flags);
  110. ent->pending--;
  111. spin_unlock_irqrestore(&ent->lock, flags);
  112. if (status) {
  113. mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
  114. kfree(mr);
  115. dev->fill_delay = 1;
  116. mod_timer(&dev->delay_timer, jiffies + HZ);
  117. return;
  118. }
  119. mr->mmkey.type = MLX5_MKEY_MR;
  120. spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
  121. key = dev->mdev->priv.mkey_key++;
  122. spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
  123. mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
  124. cache->last_add = jiffies;
  125. spin_lock_irqsave(&ent->lock, flags);
  126. list_add_tail(&mr->list, &ent->head);
  127. ent->cur++;
  128. ent->size++;
  129. spin_unlock_irqrestore(&ent->lock, flags);
  130. write_lock_irqsave(&table->lock, flags);
  131. err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
  132. &mr->mmkey);
  133. if (err)
  134. pr_err("Error inserting to mkey tree. 0x%x\n", -err);
  135. write_unlock_irqrestore(&table->lock, flags);
  136. if (!completion_done(&ent->compl))
  137. complete(&ent->compl);
  138. }
  139. static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
  140. {
  141. struct mlx5_mr_cache *cache = &dev->cache;
  142. struct mlx5_cache_ent *ent = &cache->ent[c];
  143. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  144. struct mlx5_ib_mr *mr;
  145. void *mkc;
  146. u32 *in;
  147. int err = 0;
  148. int i;
  149. in = kzalloc(inlen, GFP_KERNEL);
  150. if (!in)
  151. return -ENOMEM;
  152. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  153. for (i = 0; i < num; i++) {
  154. if (ent->pending >= MAX_PENDING_REG_MR) {
  155. err = -EAGAIN;
  156. break;
  157. }
  158. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  159. if (!mr) {
  160. err = -ENOMEM;
  161. break;
  162. }
  163. mr->order = ent->order;
  164. mr->allocated_from_cache = 1;
  165. mr->dev = dev;
  166. MLX5_SET(mkc, mkc, free, 1);
  167. MLX5_SET(mkc, mkc, umr_en, 1);
  168. MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
  169. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  170. MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
  171. MLX5_SET(mkc, mkc, log_page_size, ent->page);
  172. spin_lock_irq(&ent->lock);
  173. ent->pending++;
  174. spin_unlock_irq(&ent->lock);
  175. err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
  176. in, inlen,
  177. mr->out, sizeof(mr->out),
  178. reg_mr_callback, mr);
  179. if (err) {
  180. spin_lock_irq(&ent->lock);
  181. ent->pending--;
  182. spin_unlock_irq(&ent->lock);
  183. mlx5_ib_warn(dev, "create mkey failed %d\n", err);
  184. kfree(mr);
  185. break;
  186. }
  187. }
  188. kfree(in);
  189. return err;
  190. }
  191. static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
  192. {
  193. struct mlx5_mr_cache *cache = &dev->cache;
  194. struct mlx5_cache_ent *ent = &cache->ent[c];
  195. struct mlx5_ib_mr *mr;
  196. int err;
  197. int i;
  198. for (i = 0; i < num; i++) {
  199. spin_lock_irq(&ent->lock);
  200. if (list_empty(&ent->head)) {
  201. spin_unlock_irq(&ent->lock);
  202. return;
  203. }
  204. mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
  205. list_del(&mr->list);
  206. ent->cur--;
  207. ent->size--;
  208. spin_unlock_irq(&ent->lock);
  209. err = destroy_mkey(dev, mr);
  210. if (err)
  211. mlx5_ib_warn(dev, "failed destroy mkey\n");
  212. else
  213. kfree(mr);
  214. }
  215. }
  216. static ssize_t size_write(struct file *filp, const char __user *buf,
  217. size_t count, loff_t *pos)
  218. {
  219. struct mlx5_cache_ent *ent = filp->private_data;
  220. struct mlx5_ib_dev *dev = ent->dev;
  221. char lbuf[20];
  222. u32 var;
  223. int err;
  224. int c;
  225. if (copy_from_user(lbuf, buf, sizeof(lbuf)))
  226. return -EFAULT;
  227. c = order2idx(dev, ent->order);
  228. lbuf[sizeof(lbuf) - 1] = 0;
  229. if (sscanf(lbuf, "%u", &var) != 1)
  230. return -EINVAL;
  231. if (var < ent->limit)
  232. return -EINVAL;
  233. if (var > ent->size) {
  234. do {
  235. err = add_keys(dev, c, var - ent->size);
  236. if (err && err != -EAGAIN)
  237. return err;
  238. usleep_range(3000, 5000);
  239. } while (err);
  240. } else if (var < ent->size) {
  241. remove_keys(dev, c, ent->size - var);
  242. }
  243. return count;
  244. }
  245. static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
  246. loff_t *pos)
  247. {
  248. struct mlx5_cache_ent *ent = filp->private_data;
  249. char lbuf[20];
  250. int err;
  251. if (*pos)
  252. return 0;
  253. err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
  254. if (err < 0)
  255. return err;
  256. if (copy_to_user(buf, lbuf, err))
  257. return -EFAULT;
  258. *pos += err;
  259. return err;
  260. }
  261. static const struct file_operations size_fops = {
  262. .owner = THIS_MODULE,
  263. .open = simple_open,
  264. .write = size_write,
  265. .read = size_read,
  266. };
  267. static ssize_t limit_write(struct file *filp, const char __user *buf,
  268. size_t count, loff_t *pos)
  269. {
  270. struct mlx5_cache_ent *ent = filp->private_data;
  271. struct mlx5_ib_dev *dev = ent->dev;
  272. char lbuf[20];
  273. u32 var;
  274. int err;
  275. int c;
  276. if (copy_from_user(lbuf, buf, sizeof(lbuf)))
  277. return -EFAULT;
  278. c = order2idx(dev, ent->order);
  279. lbuf[sizeof(lbuf) - 1] = 0;
  280. if (sscanf(lbuf, "%u", &var) != 1)
  281. return -EINVAL;
  282. if (var > ent->size)
  283. return -EINVAL;
  284. ent->limit = var;
  285. if (ent->cur < ent->limit) {
  286. err = add_keys(dev, c, 2 * ent->limit - ent->cur);
  287. if (err)
  288. return err;
  289. }
  290. return count;
  291. }
  292. static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
  293. loff_t *pos)
  294. {
  295. struct mlx5_cache_ent *ent = filp->private_data;
  296. char lbuf[20];
  297. int err;
  298. if (*pos)
  299. return 0;
  300. err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
  301. if (err < 0)
  302. return err;
  303. if (copy_to_user(buf, lbuf, err))
  304. return -EFAULT;
  305. *pos += err;
  306. return err;
  307. }
  308. static const struct file_operations limit_fops = {
  309. .owner = THIS_MODULE,
  310. .open = simple_open,
  311. .write = limit_write,
  312. .read = limit_read,
  313. };
  314. static int someone_adding(struct mlx5_mr_cache *cache)
  315. {
  316. int i;
  317. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  318. if (cache->ent[i].cur < cache->ent[i].limit)
  319. return 1;
  320. }
  321. return 0;
  322. }
  323. static void __cache_work_func(struct mlx5_cache_ent *ent)
  324. {
  325. struct mlx5_ib_dev *dev = ent->dev;
  326. struct mlx5_mr_cache *cache = &dev->cache;
  327. int i = order2idx(dev, ent->order);
  328. int err;
  329. if (cache->stopped)
  330. return;
  331. ent = &dev->cache.ent[i];
  332. if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
  333. err = add_keys(dev, i, 1);
  334. if (ent->cur < 2 * ent->limit) {
  335. if (err == -EAGAIN) {
  336. mlx5_ib_dbg(dev, "returned eagain, order %d\n",
  337. i + 2);
  338. queue_delayed_work(cache->wq, &ent->dwork,
  339. msecs_to_jiffies(3));
  340. } else if (err) {
  341. mlx5_ib_warn(dev, "command failed order %d, err %d\n",
  342. i + 2, err);
  343. queue_delayed_work(cache->wq, &ent->dwork,
  344. msecs_to_jiffies(1000));
  345. } else {
  346. queue_work(cache->wq, &ent->work);
  347. }
  348. }
  349. } else if (ent->cur > 2 * ent->limit) {
  350. /*
  351. * The remove_keys() logic is performed as garbage collection
  352. * task. Such task is intended to be run when no other active
  353. * processes are running.
  354. *
  355. * The need_resched() will return TRUE if there are user tasks
  356. * to be activated in near future.
  357. *
  358. * In such case, we don't execute remove_keys() and postpone
  359. * the garbage collection work to try to run in next cycle,
  360. * in order to free CPU resources to other tasks.
  361. */
  362. if (!need_resched() && !someone_adding(cache) &&
  363. time_after(jiffies, cache->last_add + 300 * HZ)) {
  364. remove_keys(dev, i, 1);
  365. if (ent->cur > ent->limit)
  366. queue_work(cache->wq, &ent->work);
  367. } else {
  368. queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
  369. }
  370. }
  371. }
  372. static void delayed_cache_work_func(struct work_struct *work)
  373. {
  374. struct mlx5_cache_ent *ent;
  375. ent = container_of(work, struct mlx5_cache_ent, dwork.work);
  376. __cache_work_func(ent);
  377. }
  378. static void cache_work_func(struct work_struct *work)
  379. {
  380. struct mlx5_cache_ent *ent;
  381. ent = container_of(work, struct mlx5_cache_ent, work);
  382. __cache_work_func(ent);
  383. }
  384. struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
  385. {
  386. struct mlx5_mr_cache *cache = &dev->cache;
  387. struct mlx5_cache_ent *ent;
  388. struct mlx5_ib_mr *mr;
  389. int err;
  390. if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
  391. mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
  392. return NULL;
  393. }
  394. ent = &cache->ent[entry];
  395. while (1) {
  396. spin_lock_irq(&ent->lock);
  397. if (list_empty(&ent->head)) {
  398. spin_unlock_irq(&ent->lock);
  399. err = add_keys(dev, entry, 1);
  400. if (err && err != -EAGAIN)
  401. return ERR_PTR(err);
  402. wait_for_completion(&ent->compl);
  403. } else {
  404. mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
  405. list);
  406. list_del(&mr->list);
  407. ent->cur--;
  408. spin_unlock_irq(&ent->lock);
  409. if (ent->cur < ent->limit)
  410. queue_work(cache->wq, &ent->work);
  411. return mr;
  412. }
  413. }
  414. }
  415. static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
  416. {
  417. struct mlx5_mr_cache *cache = &dev->cache;
  418. struct mlx5_ib_mr *mr = NULL;
  419. struct mlx5_cache_ent *ent;
  420. int last_umr_cache_entry;
  421. int c;
  422. int i;
  423. c = order2idx(dev, order);
  424. last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
  425. if (c < 0 || c > last_umr_cache_entry) {
  426. mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
  427. return NULL;
  428. }
  429. for (i = c; i <= last_umr_cache_entry; i++) {
  430. ent = &cache->ent[i];
  431. mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
  432. spin_lock_irq(&ent->lock);
  433. if (!list_empty(&ent->head)) {
  434. mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
  435. list);
  436. list_del(&mr->list);
  437. ent->cur--;
  438. spin_unlock_irq(&ent->lock);
  439. if (ent->cur < ent->limit)
  440. queue_work(cache->wq, &ent->work);
  441. break;
  442. }
  443. spin_unlock_irq(&ent->lock);
  444. queue_work(cache->wq, &ent->work);
  445. }
  446. if (!mr)
  447. cache->ent[c].miss++;
  448. return mr;
  449. }
  450. void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  451. {
  452. struct mlx5_mr_cache *cache = &dev->cache;
  453. struct mlx5_cache_ent *ent;
  454. int shrink = 0;
  455. int c;
  456. c = order2idx(dev, mr->order);
  457. if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
  458. mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
  459. return;
  460. }
  461. if (unreg_umr(dev, mr))
  462. return;
  463. ent = &cache->ent[c];
  464. spin_lock_irq(&ent->lock);
  465. list_add_tail(&mr->list, &ent->head);
  466. ent->cur++;
  467. if (ent->cur > 2 * ent->limit)
  468. shrink = 1;
  469. spin_unlock_irq(&ent->lock);
  470. if (shrink)
  471. queue_work(cache->wq, &ent->work);
  472. }
  473. static void clean_keys(struct mlx5_ib_dev *dev, int c)
  474. {
  475. struct mlx5_mr_cache *cache = &dev->cache;
  476. struct mlx5_cache_ent *ent = &cache->ent[c];
  477. struct mlx5_ib_mr *mr;
  478. int err;
  479. cancel_delayed_work(&ent->dwork);
  480. while (1) {
  481. spin_lock_irq(&ent->lock);
  482. if (list_empty(&ent->head)) {
  483. spin_unlock_irq(&ent->lock);
  484. return;
  485. }
  486. mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
  487. list_del(&mr->list);
  488. ent->cur--;
  489. ent->size--;
  490. spin_unlock_irq(&ent->lock);
  491. err = destroy_mkey(dev, mr);
  492. if (err)
  493. mlx5_ib_warn(dev, "failed destroy mkey\n");
  494. else
  495. kfree(mr);
  496. }
  497. }
  498. static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
  499. {
  500. if (!mlx5_debugfs_root)
  501. return;
  502. debugfs_remove_recursive(dev->cache.root);
  503. dev->cache.root = NULL;
  504. }
  505. static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
  506. {
  507. struct mlx5_mr_cache *cache = &dev->cache;
  508. struct mlx5_cache_ent *ent;
  509. int i;
  510. if (!mlx5_debugfs_root)
  511. return 0;
  512. cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
  513. if (!cache->root)
  514. return -ENOMEM;
  515. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  516. ent = &cache->ent[i];
  517. sprintf(ent->name, "%d", ent->order);
  518. ent->dir = debugfs_create_dir(ent->name, cache->root);
  519. if (!ent->dir)
  520. goto err;
  521. ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
  522. &size_fops);
  523. if (!ent->fsize)
  524. goto err;
  525. ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
  526. &limit_fops);
  527. if (!ent->flimit)
  528. goto err;
  529. ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
  530. &ent->cur);
  531. if (!ent->fcur)
  532. goto err;
  533. ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
  534. &ent->miss);
  535. if (!ent->fmiss)
  536. goto err;
  537. }
  538. return 0;
  539. err:
  540. mlx5_mr_cache_debugfs_cleanup(dev);
  541. return -ENOMEM;
  542. }
  543. static void delay_time_func(struct timer_list *t)
  544. {
  545. struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
  546. dev->fill_delay = 0;
  547. }
  548. int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
  549. {
  550. struct mlx5_mr_cache *cache = &dev->cache;
  551. struct mlx5_cache_ent *ent;
  552. int err;
  553. int i;
  554. mutex_init(&dev->slow_path_mutex);
  555. cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
  556. if (!cache->wq) {
  557. mlx5_ib_warn(dev, "failed to create work queue\n");
  558. return -ENOMEM;
  559. }
  560. timer_setup(&dev->delay_timer, delay_time_func, 0);
  561. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  562. ent = &cache->ent[i];
  563. INIT_LIST_HEAD(&ent->head);
  564. spin_lock_init(&ent->lock);
  565. ent->order = i + 2;
  566. ent->dev = dev;
  567. ent->limit = 0;
  568. init_completion(&ent->compl);
  569. INIT_WORK(&ent->work, cache_work_func);
  570. INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
  571. queue_work(cache->wq, &ent->work);
  572. if (i > MR_CACHE_LAST_STD_ENTRY) {
  573. mlx5_odp_init_mr_cache_entry(ent);
  574. continue;
  575. }
  576. if (ent->order > mr_cache_max_order(dev))
  577. continue;
  578. ent->page = PAGE_SHIFT;
  579. ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
  580. MLX5_IB_UMR_OCTOWORD;
  581. ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
  582. if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
  583. mlx5_core_is_pf(dev->mdev))
  584. ent->limit = dev->mdev->profile->mr_cache[i].limit;
  585. else
  586. ent->limit = 0;
  587. }
  588. err = mlx5_mr_cache_debugfs_init(dev);
  589. if (err)
  590. mlx5_ib_warn(dev, "cache debugfs failure\n");
  591. /*
  592. * We don't want to fail driver if debugfs failed to initialize,
  593. * so we are not forwarding error to the user.
  594. */
  595. return 0;
  596. }
  597. static void wait_for_async_commands(struct mlx5_ib_dev *dev)
  598. {
  599. struct mlx5_mr_cache *cache = &dev->cache;
  600. struct mlx5_cache_ent *ent;
  601. int total = 0;
  602. int i;
  603. int j;
  604. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  605. ent = &cache->ent[i];
  606. for (j = 0 ; j < 1000; j++) {
  607. if (!ent->pending)
  608. break;
  609. msleep(50);
  610. }
  611. }
  612. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  613. ent = &cache->ent[i];
  614. total += ent->pending;
  615. }
  616. if (total)
  617. mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
  618. else
  619. mlx5_ib_warn(dev, "done with all pending requests\n");
  620. }
  621. int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
  622. {
  623. int i;
  624. dev->cache.stopped = 1;
  625. flush_workqueue(dev->cache.wq);
  626. mlx5_mr_cache_debugfs_cleanup(dev);
  627. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
  628. clean_keys(dev, i);
  629. destroy_workqueue(dev->cache.wq);
  630. wait_for_async_commands(dev);
  631. del_timer_sync(&dev->delay_timer);
  632. return 0;
  633. }
  634. struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
  635. {
  636. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  637. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  638. struct mlx5_core_dev *mdev = dev->mdev;
  639. struct mlx5_ib_mr *mr;
  640. void *mkc;
  641. u32 *in;
  642. int err;
  643. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  644. if (!mr)
  645. return ERR_PTR(-ENOMEM);
  646. in = kzalloc(inlen, GFP_KERNEL);
  647. if (!in) {
  648. err = -ENOMEM;
  649. goto err_free;
  650. }
  651. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  652. MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
  653. MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
  654. MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
  655. MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
  656. MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
  657. MLX5_SET(mkc, mkc, lr, 1);
  658. MLX5_SET(mkc, mkc, length64, 1);
  659. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  660. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  661. MLX5_SET64(mkc, mkc, start_addr, 0);
  662. err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
  663. if (err)
  664. goto err_in;
  665. kfree(in);
  666. mr->mmkey.type = MLX5_MKEY_MR;
  667. mr->ibmr.lkey = mr->mmkey.key;
  668. mr->ibmr.rkey = mr->mmkey.key;
  669. mr->umem = NULL;
  670. return &mr->ibmr;
  671. err_in:
  672. kfree(in);
  673. err_free:
  674. kfree(mr);
  675. return ERR_PTR(err);
  676. }
  677. static int get_octo_len(u64 addr, u64 len, int page_shift)
  678. {
  679. u64 page_size = 1ULL << page_shift;
  680. u64 offset;
  681. int npages;
  682. offset = addr & (page_size - 1);
  683. npages = ALIGN(len + offset, page_size) >> page_shift;
  684. return (npages + 1) / 2;
  685. }
  686. static int mr_cache_max_order(struct mlx5_ib_dev *dev)
  687. {
  688. if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
  689. return MR_CACHE_LAST_STD_ENTRY + 2;
  690. return MLX5_MAX_UMR_SHIFT;
  691. }
  692. static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
  693. int access_flags, struct ib_umem **umem,
  694. int *npages, int *page_shift, int *ncont,
  695. int *order)
  696. {
  697. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  698. int err;
  699. *umem = ib_umem_get(pd->uobject->context, start, length,
  700. access_flags, 0);
  701. err = PTR_ERR_OR_ZERO(*umem);
  702. if (err < 0) {
  703. mlx5_ib_err(dev, "umem get failed (%d)\n", err);
  704. return err;
  705. }
  706. mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
  707. page_shift, ncont, order);
  708. if (!*npages) {
  709. mlx5_ib_warn(dev, "avoid zero region\n");
  710. ib_umem_release(*umem);
  711. return -EINVAL;
  712. }
  713. mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
  714. *npages, *ncont, *order, *page_shift);
  715. return 0;
  716. }
  717. static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
  718. {
  719. struct mlx5_ib_umr_context *context =
  720. container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
  721. context->status = wc->status;
  722. complete(&context->done);
  723. }
  724. static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
  725. {
  726. context->cqe.done = mlx5_ib_umr_done;
  727. context->status = -1;
  728. init_completion(&context->done);
  729. }
  730. static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
  731. struct mlx5_umr_wr *umrwr)
  732. {
  733. struct umr_common *umrc = &dev->umrc;
  734. struct ib_send_wr *bad;
  735. int err;
  736. struct mlx5_ib_umr_context umr_context;
  737. mlx5_ib_init_umr_context(&umr_context);
  738. umrwr->wr.wr_cqe = &umr_context.cqe;
  739. down(&umrc->sem);
  740. err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
  741. if (err) {
  742. mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
  743. } else {
  744. wait_for_completion(&umr_context.done);
  745. if (umr_context.status != IB_WC_SUCCESS) {
  746. mlx5_ib_warn(dev, "reg umr failed (%u)\n",
  747. umr_context.status);
  748. err = -EFAULT;
  749. }
  750. }
  751. up(&umrc->sem);
  752. return err;
  753. }
  754. static struct mlx5_ib_mr *alloc_mr_from_cache(
  755. struct ib_pd *pd, struct ib_umem *umem,
  756. u64 virt_addr, u64 len, int npages,
  757. int page_shift, int order, int access_flags)
  758. {
  759. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  760. struct mlx5_ib_mr *mr;
  761. int err = 0;
  762. int i;
  763. for (i = 0; i < 1; i++) {
  764. mr = alloc_cached_mr(dev, order);
  765. if (mr)
  766. break;
  767. err = add_keys(dev, order2idx(dev, order), 1);
  768. if (err && err != -EAGAIN) {
  769. mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
  770. break;
  771. }
  772. }
  773. if (!mr)
  774. return ERR_PTR(-EAGAIN);
  775. mr->ibmr.pd = pd;
  776. mr->umem = umem;
  777. mr->access_flags = access_flags;
  778. mr->desc_size = sizeof(struct mlx5_mtt);
  779. mr->mmkey.iova = virt_addr;
  780. mr->mmkey.size = len;
  781. mr->mmkey.pd = to_mpd(pd)->pdn;
  782. return mr;
  783. }
  784. static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
  785. void *xlt, int page_shift, size_t size,
  786. int flags)
  787. {
  788. struct mlx5_ib_dev *dev = mr->dev;
  789. struct ib_umem *umem = mr->umem;
  790. if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
  791. mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
  792. return npages;
  793. }
  794. npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
  795. if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
  796. __mlx5_ib_populate_pas(dev, umem, page_shift,
  797. idx, npages, xlt,
  798. MLX5_IB_MTT_PRESENT);
  799. /* Clear padding after the pages
  800. * brought from the umem.
  801. */
  802. memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
  803. size - npages * sizeof(struct mlx5_mtt));
  804. }
  805. return npages;
  806. }
  807. #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
  808. MLX5_UMR_MTT_ALIGNMENT)
  809. #define MLX5_SPARE_UMR_CHUNK 0x10000
  810. int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
  811. int page_shift, int flags)
  812. {
  813. struct mlx5_ib_dev *dev = mr->dev;
  814. struct device *ddev = dev->ib_dev.dev.parent;
  815. struct mlx5_ib_ucontext *uctx = NULL;
  816. int size;
  817. void *xlt;
  818. dma_addr_t dma;
  819. struct mlx5_umr_wr wr;
  820. struct ib_sge sg;
  821. int err = 0;
  822. int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
  823. ? sizeof(struct mlx5_klm)
  824. : sizeof(struct mlx5_mtt);
  825. const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
  826. const int page_mask = page_align - 1;
  827. size_t pages_mapped = 0;
  828. size_t pages_to_map = 0;
  829. size_t pages_iter = 0;
  830. gfp_t gfp;
  831. /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
  832. * so we need to align the offset and length accordingly
  833. */
  834. if (idx & page_mask) {
  835. npages += idx & page_mask;
  836. idx &= ~page_mask;
  837. }
  838. gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
  839. gfp |= __GFP_ZERO | __GFP_NOWARN;
  840. pages_to_map = ALIGN(npages, page_align);
  841. size = desc_size * pages_to_map;
  842. size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
  843. xlt = (void *)__get_free_pages(gfp, get_order(size));
  844. if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
  845. mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
  846. size, get_order(size), MLX5_SPARE_UMR_CHUNK);
  847. size = MLX5_SPARE_UMR_CHUNK;
  848. xlt = (void *)__get_free_pages(gfp, get_order(size));
  849. }
  850. if (!xlt) {
  851. uctx = to_mucontext(mr->ibmr.pd->uobject->context);
  852. mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
  853. size = PAGE_SIZE;
  854. xlt = (void *)uctx->upd_xlt_page;
  855. mutex_lock(&uctx->upd_xlt_page_mutex);
  856. memset(xlt, 0, size);
  857. }
  858. pages_iter = size / desc_size;
  859. dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
  860. if (dma_mapping_error(ddev, dma)) {
  861. mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
  862. err = -ENOMEM;
  863. goto free_xlt;
  864. }
  865. sg.addr = dma;
  866. sg.lkey = dev->umrc.pd->local_dma_lkey;
  867. memset(&wr, 0, sizeof(wr));
  868. wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
  869. if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
  870. wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  871. wr.wr.sg_list = &sg;
  872. wr.wr.num_sge = 1;
  873. wr.wr.opcode = MLX5_IB_WR_UMR;
  874. wr.pd = mr->ibmr.pd;
  875. wr.mkey = mr->mmkey.key;
  876. wr.length = mr->mmkey.size;
  877. wr.virt_addr = mr->mmkey.iova;
  878. wr.access_flags = mr->access_flags;
  879. wr.page_shift = page_shift;
  880. for (pages_mapped = 0;
  881. pages_mapped < pages_to_map && !err;
  882. pages_mapped += pages_iter, idx += pages_iter) {
  883. npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
  884. dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
  885. npages = populate_xlt(mr, idx, npages, xlt,
  886. page_shift, size, flags);
  887. dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
  888. sg.length = ALIGN(npages * desc_size,
  889. MLX5_UMR_MTT_ALIGNMENT);
  890. if (pages_mapped + pages_iter >= pages_to_map) {
  891. if (flags & MLX5_IB_UPD_XLT_ENABLE)
  892. wr.wr.send_flags |=
  893. MLX5_IB_SEND_UMR_ENABLE_MR |
  894. MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
  895. MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
  896. if (flags & MLX5_IB_UPD_XLT_PD ||
  897. flags & MLX5_IB_UPD_XLT_ACCESS)
  898. wr.wr.send_flags |=
  899. MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
  900. if (flags & MLX5_IB_UPD_XLT_ADDR)
  901. wr.wr.send_flags |=
  902. MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
  903. }
  904. wr.offset = idx * desc_size;
  905. wr.xlt_size = sg.length;
  906. err = mlx5_ib_post_send_wait(dev, &wr);
  907. }
  908. dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
  909. free_xlt:
  910. if (uctx)
  911. mutex_unlock(&uctx->upd_xlt_page_mutex);
  912. else
  913. free_pages((unsigned long)xlt, get_order(size));
  914. return err;
  915. }
  916. /*
  917. * If ibmr is NULL it will be allocated by reg_create.
  918. * Else, the given ibmr will be used.
  919. */
  920. static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
  921. u64 virt_addr, u64 length,
  922. struct ib_umem *umem, int npages,
  923. int page_shift, int access_flags,
  924. bool populate)
  925. {
  926. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  927. struct mlx5_ib_mr *mr;
  928. __be64 *pas;
  929. void *mkc;
  930. int inlen;
  931. u32 *in;
  932. int err;
  933. bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
  934. mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
  935. if (!mr)
  936. return ERR_PTR(-ENOMEM);
  937. mr->ibmr.pd = pd;
  938. mr->access_flags = access_flags;
  939. inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  940. if (populate)
  941. inlen += sizeof(*pas) * roundup(npages, 2);
  942. in = kvzalloc(inlen, GFP_KERNEL);
  943. if (!in) {
  944. err = -ENOMEM;
  945. goto err_1;
  946. }
  947. pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
  948. if (populate && !(access_flags & IB_ACCESS_ON_DEMAND))
  949. mlx5_ib_populate_pas(dev, umem, page_shift, pas,
  950. pg_cap ? MLX5_IB_MTT_PRESENT : 0);
  951. /* The pg_access bit allows setting the access flags
  952. * in the page list submitted with the command. */
  953. MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
  954. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  955. MLX5_SET(mkc, mkc, free, !populate);
  956. MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
  957. MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
  958. MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
  959. MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
  960. MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
  961. MLX5_SET(mkc, mkc, lr, 1);
  962. MLX5_SET(mkc, mkc, umr_en, 1);
  963. MLX5_SET64(mkc, mkc, start_addr, virt_addr);
  964. MLX5_SET64(mkc, mkc, len, length);
  965. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  966. MLX5_SET(mkc, mkc, bsf_octword_size, 0);
  967. MLX5_SET(mkc, mkc, translations_octword_size,
  968. get_octo_len(virt_addr, length, page_shift));
  969. MLX5_SET(mkc, mkc, log_page_size, page_shift);
  970. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  971. if (populate) {
  972. MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
  973. get_octo_len(virt_addr, length, page_shift));
  974. }
  975. err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
  976. if (err) {
  977. mlx5_ib_warn(dev, "create mkey failed\n");
  978. goto err_2;
  979. }
  980. mr->mmkey.type = MLX5_MKEY_MR;
  981. mr->desc_size = sizeof(struct mlx5_mtt);
  982. mr->dev = dev;
  983. kvfree(in);
  984. mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
  985. return mr;
  986. err_2:
  987. kvfree(in);
  988. err_1:
  989. if (!ibmr)
  990. kfree(mr);
  991. return ERR_PTR(err);
  992. }
  993. static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
  994. int npages, u64 length, int access_flags)
  995. {
  996. mr->npages = npages;
  997. atomic_add(npages, &dev->mdev->priv.reg_pages);
  998. mr->ibmr.lkey = mr->mmkey.key;
  999. mr->ibmr.rkey = mr->mmkey.key;
  1000. mr->ibmr.length = length;
  1001. mr->access_flags = access_flags;
  1002. }
  1003. struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
  1004. u64 virt_addr, int access_flags,
  1005. struct ib_udata *udata)
  1006. {
  1007. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1008. struct mlx5_ib_mr *mr = NULL;
  1009. struct ib_umem *umem;
  1010. int page_shift;
  1011. int npages;
  1012. int ncont;
  1013. int order;
  1014. int err;
  1015. bool use_umr = true;
  1016. mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
  1017. start, virt_addr, length, access_flags);
  1018. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1019. if (!start && length == U64_MAX) {
  1020. if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
  1021. !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
  1022. return ERR_PTR(-EINVAL);
  1023. mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
  1024. return &mr->ibmr;
  1025. }
  1026. #endif
  1027. err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
  1028. &page_shift, &ncont, &order);
  1029. if (err < 0)
  1030. return ERR_PTR(err);
  1031. if (order <= mr_cache_max_order(dev)) {
  1032. mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
  1033. page_shift, order, access_flags);
  1034. if (PTR_ERR(mr) == -EAGAIN) {
  1035. mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
  1036. mr = NULL;
  1037. }
  1038. } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
  1039. if (access_flags & IB_ACCESS_ON_DEMAND) {
  1040. err = -EINVAL;
  1041. pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
  1042. goto error;
  1043. }
  1044. use_umr = false;
  1045. }
  1046. if (!mr) {
  1047. mutex_lock(&dev->slow_path_mutex);
  1048. mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
  1049. page_shift, access_flags, !use_umr);
  1050. mutex_unlock(&dev->slow_path_mutex);
  1051. }
  1052. if (IS_ERR(mr)) {
  1053. err = PTR_ERR(mr);
  1054. goto error;
  1055. }
  1056. mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
  1057. mr->umem = umem;
  1058. set_mr_fileds(dev, mr, npages, length, access_flags);
  1059. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1060. update_odp_mr(mr);
  1061. #endif
  1062. if (use_umr) {
  1063. int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
  1064. if (access_flags & IB_ACCESS_ON_DEMAND)
  1065. update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP;
  1066. err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
  1067. update_xlt_flags);
  1068. if (err) {
  1069. dereg_mr(dev, mr);
  1070. return ERR_PTR(err);
  1071. }
  1072. }
  1073. mr->live = 1;
  1074. return &mr->ibmr;
  1075. error:
  1076. ib_umem_release(umem);
  1077. return ERR_PTR(err);
  1078. }
  1079. static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  1080. {
  1081. struct mlx5_core_dev *mdev = dev->mdev;
  1082. struct mlx5_umr_wr umrwr = {};
  1083. if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
  1084. return 0;
  1085. umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
  1086. MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  1087. umrwr.wr.opcode = MLX5_IB_WR_UMR;
  1088. umrwr.mkey = mr->mmkey.key;
  1089. return mlx5_ib_post_send_wait(dev, &umrwr);
  1090. }
  1091. static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
  1092. int access_flags, int flags)
  1093. {
  1094. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1095. struct mlx5_umr_wr umrwr = {};
  1096. int err;
  1097. umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  1098. umrwr.wr.opcode = MLX5_IB_WR_UMR;
  1099. umrwr.mkey = mr->mmkey.key;
  1100. if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
  1101. umrwr.pd = pd;
  1102. umrwr.access_flags = access_flags;
  1103. umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
  1104. }
  1105. err = mlx5_ib_post_send_wait(dev, &umrwr);
  1106. return err;
  1107. }
  1108. int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
  1109. u64 length, u64 virt_addr, int new_access_flags,
  1110. struct ib_pd *new_pd, struct ib_udata *udata)
  1111. {
  1112. struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
  1113. struct mlx5_ib_mr *mr = to_mmr(ib_mr);
  1114. struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
  1115. int access_flags = flags & IB_MR_REREG_ACCESS ?
  1116. new_access_flags :
  1117. mr->access_flags;
  1118. u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
  1119. u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
  1120. int page_shift = 0;
  1121. int upd_flags = 0;
  1122. int npages = 0;
  1123. int ncont = 0;
  1124. int order = 0;
  1125. int err;
  1126. mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
  1127. start, virt_addr, length, access_flags);
  1128. atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
  1129. if (flags != IB_MR_REREG_PD) {
  1130. /*
  1131. * Replace umem. This needs to be done whether or not UMR is
  1132. * used.
  1133. */
  1134. flags |= IB_MR_REREG_TRANS;
  1135. ib_umem_release(mr->umem);
  1136. err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
  1137. &npages, &page_shift, &ncont, &order);
  1138. if (err < 0) {
  1139. clean_mr(dev, mr);
  1140. return err;
  1141. }
  1142. }
  1143. if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
  1144. /*
  1145. * UMR can't be used - MKey needs to be replaced.
  1146. */
  1147. if (mr->allocated_from_cache) {
  1148. err = unreg_umr(dev, mr);
  1149. if (err)
  1150. mlx5_ib_warn(dev, "Failed to unregister MR\n");
  1151. } else {
  1152. err = destroy_mkey(dev, mr);
  1153. if (err)
  1154. mlx5_ib_warn(dev, "Failed to destroy MKey\n");
  1155. }
  1156. if (err)
  1157. return err;
  1158. mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
  1159. page_shift, access_flags, true);
  1160. if (IS_ERR(mr))
  1161. return PTR_ERR(mr);
  1162. mr->allocated_from_cache = 0;
  1163. mr->live = 1;
  1164. } else {
  1165. /*
  1166. * Send a UMR WQE
  1167. */
  1168. mr->ibmr.pd = pd;
  1169. mr->access_flags = access_flags;
  1170. mr->mmkey.iova = addr;
  1171. mr->mmkey.size = len;
  1172. mr->mmkey.pd = to_mpd(pd)->pdn;
  1173. if (flags & IB_MR_REREG_TRANS) {
  1174. upd_flags = MLX5_IB_UPD_XLT_ADDR;
  1175. if (flags & IB_MR_REREG_PD)
  1176. upd_flags |= MLX5_IB_UPD_XLT_PD;
  1177. if (flags & IB_MR_REREG_ACCESS)
  1178. upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
  1179. err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
  1180. upd_flags);
  1181. } else {
  1182. err = rereg_umr(pd, mr, access_flags, flags);
  1183. }
  1184. if (err) {
  1185. mlx5_ib_warn(dev, "Failed to rereg UMR\n");
  1186. ib_umem_release(mr->umem);
  1187. clean_mr(dev, mr);
  1188. return err;
  1189. }
  1190. }
  1191. set_mr_fileds(dev, mr, npages, len, access_flags);
  1192. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1193. update_odp_mr(mr);
  1194. #endif
  1195. return 0;
  1196. }
  1197. static int
  1198. mlx5_alloc_priv_descs(struct ib_device *device,
  1199. struct mlx5_ib_mr *mr,
  1200. int ndescs,
  1201. int desc_size)
  1202. {
  1203. int size = ndescs * desc_size;
  1204. int add_size;
  1205. int ret;
  1206. add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
  1207. mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
  1208. if (!mr->descs_alloc)
  1209. return -ENOMEM;
  1210. mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
  1211. mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
  1212. size, DMA_TO_DEVICE);
  1213. if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
  1214. ret = -ENOMEM;
  1215. goto err;
  1216. }
  1217. return 0;
  1218. err:
  1219. kfree(mr->descs_alloc);
  1220. return ret;
  1221. }
  1222. static void
  1223. mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
  1224. {
  1225. if (mr->descs) {
  1226. struct ib_device *device = mr->ibmr.device;
  1227. int size = mr->max_descs * mr->desc_size;
  1228. dma_unmap_single(device->dev.parent, mr->desc_map,
  1229. size, DMA_TO_DEVICE);
  1230. kfree(mr->descs_alloc);
  1231. mr->descs = NULL;
  1232. }
  1233. }
  1234. static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  1235. {
  1236. int allocated_from_cache = mr->allocated_from_cache;
  1237. int err;
  1238. if (mr->sig) {
  1239. if (mlx5_core_destroy_psv(dev->mdev,
  1240. mr->sig->psv_memory.psv_idx))
  1241. mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
  1242. mr->sig->psv_memory.psv_idx);
  1243. if (mlx5_core_destroy_psv(dev->mdev,
  1244. mr->sig->psv_wire.psv_idx))
  1245. mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
  1246. mr->sig->psv_wire.psv_idx);
  1247. kfree(mr->sig);
  1248. mr->sig = NULL;
  1249. }
  1250. mlx5_free_priv_descs(mr);
  1251. if (!allocated_from_cache) {
  1252. u32 key = mr->mmkey.key;
  1253. err = destroy_mkey(dev, mr);
  1254. kfree(mr);
  1255. if (err) {
  1256. mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
  1257. key, err);
  1258. return err;
  1259. }
  1260. } else {
  1261. mlx5_mr_cache_free(dev, mr);
  1262. }
  1263. return 0;
  1264. }
  1265. static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  1266. {
  1267. int npages = mr->npages;
  1268. struct ib_umem *umem = mr->umem;
  1269. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1270. if (umem && umem->odp_data) {
  1271. /* Prevent new page faults from succeeding */
  1272. mr->live = 0;
  1273. /* Wait for all running page-fault handlers to finish. */
  1274. synchronize_srcu(&dev->mr_srcu);
  1275. /* Destroy all page mappings */
  1276. if (umem->odp_data->page_list)
  1277. mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
  1278. ib_umem_end(umem));
  1279. else
  1280. mlx5_ib_free_implicit_mr(mr);
  1281. /*
  1282. * We kill the umem before the MR for ODP,
  1283. * so that there will not be any invalidations in
  1284. * flight, looking at the *mr struct.
  1285. */
  1286. ib_umem_release(umem);
  1287. atomic_sub(npages, &dev->mdev->priv.reg_pages);
  1288. /* Avoid double-freeing the umem. */
  1289. umem = NULL;
  1290. }
  1291. #endif
  1292. clean_mr(dev, mr);
  1293. if (umem) {
  1294. ib_umem_release(umem);
  1295. atomic_sub(npages, &dev->mdev->priv.reg_pages);
  1296. }
  1297. return 0;
  1298. }
  1299. int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
  1300. {
  1301. struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
  1302. struct mlx5_ib_mr *mr = to_mmr(ibmr);
  1303. return dereg_mr(dev, mr);
  1304. }
  1305. struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
  1306. enum ib_mr_type mr_type,
  1307. u32 max_num_sg)
  1308. {
  1309. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1310. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  1311. int ndescs = ALIGN(max_num_sg, 4);
  1312. struct mlx5_ib_mr *mr;
  1313. void *mkc;
  1314. u32 *in;
  1315. int err;
  1316. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  1317. if (!mr)
  1318. return ERR_PTR(-ENOMEM);
  1319. in = kzalloc(inlen, GFP_KERNEL);
  1320. if (!in) {
  1321. err = -ENOMEM;
  1322. goto err_free;
  1323. }
  1324. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  1325. MLX5_SET(mkc, mkc, free, 1);
  1326. MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
  1327. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  1328. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  1329. if (mr_type == IB_MR_TYPE_MEM_REG) {
  1330. mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
  1331. MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
  1332. err = mlx5_alloc_priv_descs(pd->device, mr,
  1333. ndescs, sizeof(struct mlx5_mtt));
  1334. if (err)
  1335. goto err_free_in;
  1336. mr->desc_size = sizeof(struct mlx5_mtt);
  1337. mr->max_descs = ndescs;
  1338. } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
  1339. mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
  1340. err = mlx5_alloc_priv_descs(pd->device, mr,
  1341. ndescs, sizeof(struct mlx5_klm));
  1342. if (err)
  1343. goto err_free_in;
  1344. mr->desc_size = sizeof(struct mlx5_klm);
  1345. mr->max_descs = ndescs;
  1346. } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
  1347. u32 psv_index[2];
  1348. MLX5_SET(mkc, mkc, bsf_en, 1);
  1349. MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
  1350. mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
  1351. if (!mr->sig) {
  1352. err = -ENOMEM;
  1353. goto err_free_in;
  1354. }
  1355. /* create mem & wire PSVs */
  1356. err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
  1357. 2, psv_index);
  1358. if (err)
  1359. goto err_free_sig;
  1360. mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
  1361. mr->sig->psv_memory.psv_idx = psv_index[0];
  1362. mr->sig->psv_wire.psv_idx = psv_index[1];
  1363. mr->sig->sig_status_checked = true;
  1364. mr->sig->sig_err_exists = false;
  1365. /* Next UMR, Arm SIGERR */
  1366. ++mr->sig->sigerr_count;
  1367. } else {
  1368. mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
  1369. err = -EINVAL;
  1370. goto err_free_in;
  1371. }
  1372. MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
  1373. MLX5_SET(mkc, mkc, umr_en, 1);
  1374. err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
  1375. if (err)
  1376. goto err_destroy_psv;
  1377. mr->mmkey.type = MLX5_MKEY_MR;
  1378. mr->ibmr.lkey = mr->mmkey.key;
  1379. mr->ibmr.rkey = mr->mmkey.key;
  1380. mr->umem = NULL;
  1381. kfree(in);
  1382. return &mr->ibmr;
  1383. err_destroy_psv:
  1384. if (mr->sig) {
  1385. if (mlx5_core_destroy_psv(dev->mdev,
  1386. mr->sig->psv_memory.psv_idx))
  1387. mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
  1388. mr->sig->psv_memory.psv_idx);
  1389. if (mlx5_core_destroy_psv(dev->mdev,
  1390. mr->sig->psv_wire.psv_idx))
  1391. mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
  1392. mr->sig->psv_wire.psv_idx);
  1393. }
  1394. mlx5_free_priv_descs(mr);
  1395. err_free_sig:
  1396. kfree(mr->sig);
  1397. err_free_in:
  1398. kfree(in);
  1399. err_free:
  1400. kfree(mr);
  1401. return ERR_PTR(err);
  1402. }
  1403. struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
  1404. struct ib_udata *udata)
  1405. {
  1406. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1407. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  1408. struct mlx5_ib_mw *mw = NULL;
  1409. u32 *in = NULL;
  1410. void *mkc;
  1411. int ndescs;
  1412. int err;
  1413. struct mlx5_ib_alloc_mw req = {};
  1414. struct {
  1415. __u32 comp_mask;
  1416. __u32 response_length;
  1417. } resp = {};
  1418. err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
  1419. if (err)
  1420. return ERR_PTR(err);
  1421. if (req.comp_mask || req.reserved1 || req.reserved2)
  1422. return ERR_PTR(-EOPNOTSUPP);
  1423. if (udata->inlen > sizeof(req) &&
  1424. !ib_is_udata_cleared(udata, sizeof(req),
  1425. udata->inlen - sizeof(req)))
  1426. return ERR_PTR(-EOPNOTSUPP);
  1427. ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
  1428. mw = kzalloc(sizeof(*mw), GFP_KERNEL);
  1429. in = kzalloc(inlen, GFP_KERNEL);
  1430. if (!mw || !in) {
  1431. err = -ENOMEM;
  1432. goto free;
  1433. }
  1434. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  1435. MLX5_SET(mkc, mkc, free, 1);
  1436. MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
  1437. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  1438. MLX5_SET(mkc, mkc, umr_en, 1);
  1439. MLX5_SET(mkc, mkc, lr, 1);
  1440. MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS);
  1441. MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
  1442. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  1443. err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
  1444. if (err)
  1445. goto free;
  1446. mw->mmkey.type = MLX5_MKEY_MW;
  1447. mw->ibmw.rkey = mw->mmkey.key;
  1448. mw->ndescs = ndescs;
  1449. resp.response_length = min(offsetof(typeof(resp), response_length) +
  1450. sizeof(resp.response_length), udata->outlen);
  1451. if (resp.response_length) {
  1452. err = ib_copy_to_udata(udata, &resp, resp.response_length);
  1453. if (err) {
  1454. mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
  1455. goto free;
  1456. }
  1457. }
  1458. kfree(in);
  1459. return &mw->ibmw;
  1460. free:
  1461. kfree(mw);
  1462. kfree(in);
  1463. return ERR_PTR(err);
  1464. }
  1465. int mlx5_ib_dealloc_mw(struct ib_mw *mw)
  1466. {
  1467. struct mlx5_ib_mw *mmw = to_mmw(mw);
  1468. int err;
  1469. err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
  1470. &mmw->mmkey);
  1471. if (!err)
  1472. kfree(mmw);
  1473. return err;
  1474. }
  1475. int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
  1476. struct ib_mr_status *mr_status)
  1477. {
  1478. struct mlx5_ib_mr *mmr = to_mmr(ibmr);
  1479. int ret = 0;
  1480. if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
  1481. pr_err("Invalid status check mask\n");
  1482. ret = -EINVAL;
  1483. goto done;
  1484. }
  1485. mr_status->fail_status = 0;
  1486. if (check_mask & IB_MR_CHECK_SIG_STATUS) {
  1487. if (!mmr->sig) {
  1488. ret = -EINVAL;
  1489. pr_err("signature status check requested on a non-signature enabled MR\n");
  1490. goto done;
  1491. }
  1492. mmr->sig->sig_status_checked = true;
  1493. if (!mmr->sig->sig_err_exists)
  1494. goto done;
  1495. if (ibmr->lkey == mmr->sig->err_item.key)
  1496. memcpy(&mr_status->sig_err, &mmr->sig->err_item,
  1497. sizeof(mr_status->sig_err));
  1498. else {
  1499. mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
  1500. mr_status->sig_err.sig_err_offset = 0;
  1501. mr_status->sig_err.key = mmr->sig->err_item.key;
  1502. }
  1503. mmr->sig->sig_err_exists = false;
  1504. mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
  1505. }
  1506. done:
  1507. return ret;
  1508. }
  1509. static int
  1510. mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
  1511. struct scatterlist *sgl,
  1512. unsigned short sg_nents,
  1513. unsigned int *sg_offset_p)
  1514. {
  1515. struct scatterlist *sg = sgl;
  1516. struct mlx5_klm *klms = mr->descs;
  1517. unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
  1518. u32 lkey = mr->ibmr.pd->local_dma_lkey;
  1519. int i;
  1520. mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
  1521. mr->ibmr.length = 0;
  1522. mr->ndescs = sg_nents;
  1523. for_each_sg(sgl, sg, sg_nents, i) {
  1524. if (unlikely(i >= mr->max_descs))
  1525. break;
  1526. klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
  1527. klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
  1528. klms[i].key = cpu_to_be32(lkey);
  1529. mr->ibmr.length += sg_dma_len(sg) - sg_offset;
  1530. sg_offset = 0;
  1531. }
  1532. if (sg_offset_p)
  1533. *sg_offset_p = sg_offset;
  1534. return i;
  1535. }
  1536. static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
  1537. {
  1538. struct mlx5_ib_mr *mr = to_mmr(ibmr);
  1539. __be64 *descs;
  1540. if (unlikely(mr->ndescs == mr->max_descs))
  1541. return -ENOMEM;
  1542. descs = mr->descs;
  1543. descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
  1544. return 0;
  1545. }
  1546. int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
  1547. unsigned int *sg_offset)
  1548. {
  1549. struct mlx5_ib_mr *mr = to_mmr(ibmr);
  1550. int n;
  1551. mr->ndescs = 0;
  1552. ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
  1553. mr->desc_size * mr->max_descs,
  1554. DMA_TO_DEVICE);
  1555. if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
  1556. n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
  1557. else
  1558. n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
  1559. mlx5_set_page);
  1560. ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
  1561. mr->desc_size * mr->max_descs,
  1562. DMA_TO_DEVICE);
  1563. return n;
  1564. }