mr.c 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848
  1. /*
  2. * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/kref.h>
  33. #include <linux/random.h>
  34. #include <linux/debugfs.h>
  35. #include <linux/export.h>
  36. #include <linux/delay.h>
  37. #include <rdma/ib_umem.h>
  38. #include <rdma/ib_umem_odp.h>
  39. #include <rdma/ib_verbs.h>
  40. #include "mlx5_ib.h"
  41. enum {
  42. MAX_PENDING_REG_MR = 8,
  43. };
  44. #define MLX5_UMR_ALIGN 2048
  45. static int clean_mr(struct mlx5_ib_mr *mr);
  46. static int mr_cache_max_order(struct mlx5_ib_dev *dev);
  47. static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  48. static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  49. {
  50. int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
  51. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  52. /* Wait until all page fault handlers using the mr complete. */
  53. synchronize_srcu(&dev->mr_srcu);
  54. #endif
  55. return err;
  56. }
  57. static int order2idx(struct mlx5_ib_dev *dev, int order)
  58. {
  59. struct mlx5_mr_cache *cache = &dev->cache;
  60. if (order < cache->ent[0].order)
  61. return 0;
  62. else
  63. return order - cache->ent[0].order;
  64. }
  65. static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
  66. {
  67. return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
  68. length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
  69. }
  70. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  71. static void update_odp_mr(struct mlx5_ib_mr *mr)
  72. {
  73. if (mr->umem->odp_data) {
  74. /*
  75. * This barrier prevents the compiler from moving the
  76. * setting of umem->odp_data->private to point to our
  77. * MR, before reg_umr finished, to ensure that the MR
  78. * initialization have finished before starting to
  79. * handle invalidations.
  80. */
  81. smp_wmb();
  82. mr->umem->odp_data->private = mr;
  83. /*
  84. * Make sure we will see the new
  85. * umem->odp_data->private value in the invalidation
  86. * routines, before we can get page faults on the
  87. * MR. Page faults can happen once we put the MR in
  88. * the tree, below this line. Without the barrier,
  89. * there can be a fault handling and an invalidation
  90. * before umem->odp_data->private == mr is visible to
  91. * the invalidation handler.
  92. */
  93. smp_wmb();
  94. }
  95. }
  96. #endif
  97. static void reg_mr_callback(int status, void *context)
  98. {
  99. struct mlx5_ib_mr *mr = context;
  100. struct mlx5_ib_dev *dev = mr->dev;
  101. struct mlx5_mr_cache *cache = &dev->cache;
  102. int c = order2idx(dev, mr->order);
  103. struct mlx5_cache_ent *ent = &cache->ent[c];
  104. u8 key;
  105. unsigned long flags;
  106. struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
  107. int err;
  108. spin_lock_irqsave(&ent->lock, flags);
  109. ent->pending--;
  110. spin_unlock_irqrestore(&ent->lock, flags);
  111. if (status) {
  112. mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
  113. kfree(mr);
  114. dev->fill_delay = 1;
  115. mod_timer(&dev->delay_timer, jiffies + HZ);
  116. return;
  117. }
  118. mr->mmkey.type = MLX5_MKEY_MR;
  119. spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
  120. key = dev->mdev->priv.mkey_key++;
  121. spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
  122. mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
  123. cache->last_add = jiffies;
  124. spin_lock_irqsave(&ent->lock, flags);
  125. list_add_tail(&mr->list, &ent->head);
  126. ent->cur++;
  127. ent->size++;
  128. spin_unlock_irqrestore(&ent->lock, flags);
  129. write_lock_irqsave(&table->lock, flags);
  130. err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
  131. &mr->mmkey);
  132. if (err)
  133. pr_err("Error inserting to mkey tree. 0x%x\n", -err);
  134. write_unlock_irqrestore(&table->lock, flags);
  135. if (!completion_done(&ent->compl))
  136. complete(&ent->compl);
  137. }
  138. static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
  139. {
  140. struct mlx5_mr_cache *cache = &dev->cache;
  141. struct mlx5_cache_ent *ent = &cache->ent[c];
  142. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  143. struct mlx5_ib_mr *mr;
  144. void *mkc;
  145. u32 *in;
  146. int err = 0;
  147. int i;
  148. in = kzalloc(inlen, GFP_KERNEL);
  149. if (!in)
  150. return -ENOMEM;
  151. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  152. for (i = 0; i < num; i++) {
  153. if (ent->pending >= MAX_PENDING_REG_MR) {
  154. err = -EAGAIN;
  155. break;
  156. }
  157. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  158. if (!mr) {
  159. err = -ENOMEM;
  160. break;
  161. }
  162. mr->order = ent->order;
  163. mr->allocated_from_cache = 1;
  164. mr->dev = dev;
  165. MLX5_SET(mkc, mkc, free, 1);
  166. MLX5_SET(mkc, mkc, umr_en, 1);
  167. MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
  168. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  169. MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
  170. MLX5_SET(mkc, mkc, log_page_size, ent->page);
  171. spin_lock_irq(&ent->lock);
  172. ent->pending++;
  173. spin_unlock_irq(&ent->lock);
  174. err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
  175. in, inlen,
  176. mr->out, sizeof(mr->out),
  177. reg_mr_callback, mr);
  178. if (err) {
  179. spin_lock_irq(&ent->lock);
  180. ent->pending--;
  181. spin_unlock_irq(&ent->lock);
  182. mlx5_ib_warn(dev, "create mkey failed %d\n", err);
  183. kfree(mr);
  184. break;
  185. }
  186. }
  187. kfree(in);
  188. return err;
  189. }
  190. static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
  191. {
  192. struct mlx5_mr_cache *cache = &dev->cache;
  193. struct mlx5_cache_ent *ent = &cache->ent[c];
  194. struct mlx5_ib_mr *mr;
  195. int err;
  196. int i;
  197. for (i = 0; i < num; i++) {
  198. spin_lock_irq(&ent->lock);
  199. if (list_empty(&ent->head)) {
  200. spin_unlock_irq(&ent->lock);
  201. return;
  202. }
  203. mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
  204. list_del(&mr->list);
  205. ent->cur--;
  206. ent->size--;
  207. spin_unlock_irq(&ent->lock);
  208. err = destroy_mkey(dev, mr);
  209. if (err)
  210. mlx5_ib_warn(dev, "failed destroy mkey\n");
  211. else
  212. kfree(mr);
  213. }
  214. }
  215. static ssize_t size_write(struct file *filp, const char __user *buf,
  216. size_t count, loff_t *pos)
  217. {
  218. struct mlx5_cache_ent *ent = filp->private_data;
  219. struct mlx5_ib_dev *dev = ent->dev;
  220. char lbuf[20];
  221. u32 var;
  222. int err;
  223. int c;
  224. if (copy_from_user(lbuf, buf, sizeof(lbuf)))
  225. return -EFAULT;
  226. c = order2idx(dev, ent->order);
  227. lbuf[sizeof(lbuf) - 1] = 0;
  228. if (sscanf(lbuf, "%u", &var) != 1)
  229. return -EINVAL;
  230. if (var < ent->limit)
  231. return -EINVAL;
  232. if (var > ent->size) {
  233. do {
  234. err = add_keys(dev, c, var - ent->size);
  235. if (err && err != -EAGAIN)
  236. return err;
  237. usleep_range(3000, 5000);
  238. } while (err);
  239. } else if (var < ent->size) {
  240. remove_keys(dev, c, ent->size - var);
  241. }
  242. return count;
  243. }
  244. static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
  245. loff_t *pos)
  246. {
  247. struct mlx5_cache_ent *ent = filp->private_data;
  248. char lbuf[20];
  249. int err;
  250. if (*pos)
  251. return 0;
  252. err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
  253. if (err < 0)
  254. return err;
  255. if (copy_to_user(buf, lbuf, err))
  256. return -EFAULT;
  257. *pos += err;
  258. return err;
  259. }
  260. static const struct file_operations size_fops = {
  261. .owner = THIS_MODULE,
  262. .open = simple_open,
  263. .write = size_write,
  264. .read = size_read,
  265. };
  266. static ssize_t limit_write(struct file *filp, const char __user *buf,
  267. size_t count, loff_t *pos)
  268. {
  269. struct mlx5_cache_ent *ent = filp->private_data;
  270. struct mlx5_ib_dev *dev = ent->dev;
  271. char lbuf[20];
  272. u32 var;
  273. int err;
  274. int c;
  275. if (copy_from_user(lbuf, buf, sizeof(lbuf)))
  276. return -EFAULT;
  277. c = order2idx(dev, ent->order);
  278. lbuf[sizeof(lbuf) - 1] = 0;
  279. if (sscanf(lbuf, "%u", &var) != 1)
  280. return -EINVAL;
  281. if (var > ent->size)
  282. return -EINVAL;
  283. ent->limit = var;
  284. if (ent->cur < ent->limit) {
  285. err = add_keys(dev, c, 2 * ent->limit - ent->cur);
  286. if (err)
  287. return err;
  288. }
  289. return count;
  290. }
  291. static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
  292. loff_t *pos)
  293. {
  294. struct mlx5_cache_ent *ent = filp->private_data;
  295. char lbuf[20];
  296. int err;
  297. if (*pos)
  298. return 0;
  299. err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
  300. if (err < 0)
  301. return err;
  302. if (copy_to_user(buf, lbuf, err))
  303. return -EFAULT;
  304. *pos += err;
  305. return err;
  306. }
  307. static const struct file_operations limit_fops = {
  308. .owner = THIS_MODULE,
  309. .open = simple_open,
  310. .write = limit_write,
  311. .read = limit_read,
  312. };
  313. static int someone_adding(struct mlx5_mr_cache *cache)
  314. {
  315. int i;
  316. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  317. if (cache->ent[i].cur < cache->ent[i].limit)
  318. return 1;
  319. }
  320. return 0;
  321. }
  322. static void __cache_work_func(struct mlx5_cache_ent *ent)
  323. {
  324. struct mlx5_ib_dev *dev = ent->dev;
  325. struct mlx5_mr_cache *cache = &dev->cache;
  326. int i = order2idx(dev, ent->order);
  327. int err;
  328. if (cache->stopped)
  329. return;
  330. ent = &dev->cache.ent[i];
  331. if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
  332. err = add_keys(dev, i, 1);
  333. if (ent->cur < 2 * ent->limit) {
  334. if (err == -EAGAIN) {
  335. mlx5_ib_dbg(dev, "returned eagain, order %d\n",
  336. i + 2);
  337. queue_delayed_work(cache->wq, &ent->dwork,
  338. msecs_to_jiffies(3));
  339. } else if (err) {
  340. mlx5_ib_warn(dev, "command failed order %d, err %d\n",
  341. i + 2, err);
  342. queue_delayed_work(cache->wq, &ent->dwork,
  343. msecs_to_jiffies(1000));
  344. } else {
  345. queue_work(cache->wq, &ent->work);
  346. }
  347. }
  348. } else if (ent->cur > 2 * ent->limit) {
  349. /*
  350. * The remove_keys() logic is performed as garbage collection
  351. * task. Such task is intended to be run when no other active
  352. * processes are running.
  353. *
  354. * The need_resched() will return TRUE if there are user tasks
  355. * to be activated in near future.
  356. *
  357. * In such case, we don't execute remove_keys() and postpone
  358. * the garbage collection work to try to run in next cycle,
  359. * in order to free CPU resources to other tasks.
  360. */
  361. if (!need_resched() && !someone_adding(cache) &&
  362. time_after(jiffies, cache->last_add + 300 * HZ)) {
  363. remove_keys(dev, i, 1);
  364. if (ent->cur > ent->limit)
  365. queue_work(cache->wq, &ent->work);
  366. } else {
  367. queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
  368. }
  369. }
  370. }
  371. static void delayed_cache_work_func(struct work_struct *work)
  372. {
  373. struct mlx5_cache_ent *ent;
  374. ent = container_of(work, struct mlx5_cache_ent, dwork.work);
  375. __cache_work_func(ent);
  376. }
  377. static void cache_work_func(struct work_struct *work)
  378. {
  379. struct mlx5_cache_ent *ent;
  380. ent = container_of(work, struct mlx5_cache_ent, work);
  381. __cache_work_func(ent);
  382. }
  383. struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
  384. {
  385. struct mlx5_mr_cache *cache = &dev->cache;
  386. struct mlx5_cache_ent *ent;
  387. struct mlx5_ib_mr *mr;
  388. int err;
  389. if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
  390. mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
  391. return NULL;
  392. }
  393. ent = &cache->ent[entry];
  394. while (1) {
  395. spin_lock_irq(&ent->lock);
  396. if (list_empty(&ent->head)) {
  397. spin_unlock_irq(&ent->lock);
  398. err = add_keys(dev, entry, 1);
  399. if (err && err != -EAGAIN)
  400. return ERR_PTR(err);
  401. wait_for_completion(&ent->compl);
  402. } else {
  403. mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
  404. list);
  405. list_del(&mr->list);
  406. ent->cur--;
  407. spin_unlock_irq(&ent->lock);
  408. if (ent->cur < ent->limit)
  409. queue_work(cache->wq, &ent->work);
  410. return mr;
  411. }
  412. }
  413. }
  414. static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
  415. {
  416. struct mlx5_mr_cache *cache = &dev->cache;
  417. struct mlx5_ib_mr *mr = NULL;
  418. struct mlx5_cache_ent *ent;
  419. int last_umr_cache_entry;
  420. int c;
  421. int i;
  422. c = order2idx(dev, order);
  423. last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
  424. if (c < 0 || c > last_umr_cache_entry) {
  425. mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
  426. return NULL;
  427. }
  428. for (i = c; i <= last_umr_cache_entry; i++) {
  429. ent = &cache->ent[i];
  430. mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
  431. spin_lock_irq(&ent->lock);
  432. if (!list_empty(&ent->head)) {
  433. mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
  434. list);
  435. list_del(&mr->list);
  436. ent->cur--;
  437. spin_unlock_irq(&ent->lock);
  438. if (ent->cur < ent->limit)
  439. queue_work(cache->wq, &ent->work);
  440. break;
  441. }
  442. spin_unlock_irq(&ent->lock);
  443. queue_work(cache->wq, &ent->work);
  444. }
  445. if (!mr)
  446. cache->ent[c].miss++;
  447. return mr;
  448. }
  449. void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  450. {
  451. struct mlx5_mr_cache *cache = &dev->cache;
  452. struct mlx5_cache_ent *ent;
  453. int shrink = 0;
  454. int c;
  455. c = order2idx(dev, mr->order);
  456. if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
  457. mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
  458. return;
  459. }
  460. if (unreg_umr(dev, mr))
  461. return;
  462. ent = &cache->ent[c];
  463. spin_lock_irq(&ent->lock);
  464. list_add_tail(&mr->list, &ent->head);
  465. ent->cur++;
  466. if (ent->cur > 2 * ent->limit)
  467. shrink = 1;
  468. spin_unlock_irq(&ent->lock);
  469. if (shrink)
  470. queue_work(cache->wq, &ent->work);
  471. }
  472. static void clean_keys(struct mlx5_ib_dev *dev, int c)
  473. {
  474. struct mlx5_mr_cache *cache = &dev->cache;
  475. struct mlx5_cache_ent *ent = &cache->ent[c];
  476. struct mlx5_ib_mr *mr;
  477. int err;
  478. cancel_delayed_work(&ent->dwork);
  479. while (1) {
  480. spin_lock_irq(&ent->lock);
  481. if (list_empty(&ent->head)) {
  482. spin_unlock_irq(&ent->lock);
  483. return;
  484. }
  485. mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
  486. list_del(&mr->list);
  487. ent->cur--;
  488. ent->size--;
  489. spin_unlock_irq(&ent->lock);
  490. err = destroy_mkey(dev, mr);
  491. if (err)
  492. mlx5_ib_warn(dev, "failed destroy mkey\n");
  493. else
  494. kfree(mr);
  495. }
  496. }
  497. static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
  498. {
  499. if (!mlx5_debugfs_root)
  500. return;
  501. debugfs_remove_recursive(dev->cache.root);
  502. dev->cache.root = NULL;
  503. }
  504. static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
  505. {
  506. struct mlx5_mr_cache *cache = &dev->cache;
  507. struct mlx5_cache_ent *ent;
  508. int i;
  509. if (!mlx5_debugfs_root)
  510. return 0;
  511. cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
  512. if (!cache->root)
  513. return -ENOMEM;
  514. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  515. ent = &cache->ent[i];
  516. sprintf(ent->name, "%d", ent->order);
  517. ent->dir = debugfs_create_dir(ent->name, cache->root);
  518. if (!ent->dir)
  519. goto err;
  520. ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
  521. &size_fops);
  522. if (!ent->fsize)
  523. goto err;
  524. ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
  525. &limit_fops);
  526. if (!ent->flimit)
  527. goto err;
  528. ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
  529. &ent->cur);
  530. if (!ent->fcur)
  531. goto err;
  532. ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
  533. &ent->miss);
  534. if (!ent->fmiss)
  535. goto err;
  536. }
  537. return 0;
  538. err:
  539. mlx5_mr_cache_debugfs_cleanup(dev);
  540. return -ENOMEM;
  541. }
  542. static void delay_time_func(unsigned long ctx)
  543. {
  544. struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
  545. dev->fill_delay = 0;
  546. }
  547. int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
  548. {
  549. struct mlx5_mr_cache *cache = &dev->cache;
  550. struct mlx5_cache_ent *ent;
  551. int err;
  552. int i;
  553. mutex_init(&dev->slow_path_mutex);
  554. cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
  555. if (!cache->wq) {
  556. mlx5_ib_warn(dev, "failed to create work queue\n");
  557. return -ENOMEM;
  558. }
  559. setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
  560. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  561. ent = &cache->ent[i];
  562. INIT_LIST_HEAD(&ent->head);
  563. spin_lock_init(&ent->lock);
  564. ent->order = i + 2;
  565. ent->dev = dev;
  566. ent->limit = 0;
  567. init_completion(&ent->compl);
  568. INIT_WORK(&ent->work, cache_work_func);
  569. INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
  570. queue_work(cache->wq, &ent->work);
  571. if (i > MR_CACHE_LAST_STD_ENTRY) {
  572. mlx5_odp_init_mr_cache_entry(ent);
  573. continue;
  574. }
  575. if (ent->order > mr_cache_max_order(dev))
  576. continue;
  577. ent->page = PAGE_SHIFT;
  578. ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
  579. MLX5_IB_UMR_OCTOWORD;
  580. ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
  581. if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
  582. mlx5_core_is_pf(dev->mdev))
  583. ent->limit = dev->mdev->profile->mr_cache[i].limit;
  584. else
  585. ent->limit = 0;
  586. }
  587. err = mlx5_mr_cache_debugfs_init(dev);
  588. if (err)
  589. mlx5_ib_warn(dev, "cache debugfs failure\n");
  590. /*
  591. * We don't want to fail driver if debugfs failed to initialize,
  592. * so we are not forwarding error to the user.
  593. */
  594. return 0;
  595. }
  596. static void wait_for_async_commands(struct mlx5_ib_dev *dev)
  597. {
  598. struct mlx5_mr_cache *cache = &dev->cache;
  599. struct mlx5_cache_ent *ent;
  600. int total = 0;
  601. int i;
  602. int j;
  603. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  604. ent = &cache->ent[i];
  605. for (j = 0 ; j < 1000; j++) {
  606. if (!ent->pending)
  607. break;
  608. msleep(50);
  609. }
  610. }
  611. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  612. ent = &cache->ent[i];
  613. total += ent->pending;
  614. }
  615. if (total)
  616. mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
  617. else
  618. mlx5_ib_warn(dev, "done with all pending requests\n");
  619. }
  620. int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
  621. {
  622. int i;
  623. dev->cache.stopped = 1;
  624. flush_workqueue(dev->cache.wq);
  625. mlx5_mr_cache_debugfs_cleanup(dev);
  626. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
  627. clean_keys(dev, i);
  628. destroy_workqueue(dev->cache.wq);
  629. wait_for_async_commands(dev);
  630. del_timer_sync(&dev->delay_timer);
  631. return 0;
  632. }
  633. struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
  634. {
  635. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  636. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  637. struct mlx5_core_dev *mdev = dev->mdev;
  638. struct mlx5_ib_mr *mr;
  639. void *mkc;
  640. u32 *in;
  641. int err;
  642. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  643. if (!mr)
  644. return ERR_PTR(-ENOMEM);
  645. in = kzalloc(inlen, GFP_KERNEL);
  646. if (!in) {
  647. err = -ENOMEM;
  648. goto err_free;
  649. }
  650. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  651. MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
  652. MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
  653. MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
  654. MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
  655. MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
  656. MLX5_SET(mkc, mkc, lr, 1);
  657. MLX5_SET(mkc, mkc, length64, 1);
  658. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  659. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  660. MLX5_SET64(mkc, mkc, start_addr, 0);
  661. err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
  662. if (err)
  663. goto err_in;
  664. kfree(in);
  665. mr->mmkey.type = MLX5_MKEY_MR;
  666. mr->ibmr.lkey = mr->mmkey.key;
  667. mr->ibmr.rkey = mr->mmkey.key;
  668. mr->umem = NULL;
  669. return &mr->ibmr;
  670. err_in:
  671. kfree(in);
  672. err_free:
  673. kfree(mr);
  674. return ERR_PTR(err);
  675. }
  676. static int get_octo_len(u64 addr, u64 len, int page_size)
  677. {
  678. u64 offset;
  679. int npages;
  680. offset = addr & (page_size - 1);
  681. npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
  682. return (npages + 1) / 2;
  683. }
  684. static int mr_cache_max_order(struct mlx5_ib_dev *dev)
  685. {
  686. if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
  687. return MR_CACHE_LAST_STD_ENTRY + 2;
  688. return MLX5_MAX_UMR_SHIFT;
  689. }
  690. static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
  691. int access_flags, struct ib_umem **umem,
  692. int *npages, int *page_shift, int *ncont,
  693. int *order)
  694. {
  695. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  696. int err;
  697. *umem = ib_umem_get(pd->uobject->context, start, length,
  698. access_flags, 0);
  699. err = PTR_ERR_OR_ZERO(*umem);
  700. if (err < 0) {
  701. mlx5_ib_err(dev, "umem get failed (%d)\n", err);
  702. return err;
  703. }
  704. mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
  705. page_shift, ncont, order);
  706. if (!*npages) {
  707. mlx5_ib_warn(dev, "avoid zero region\n");
  708. ib_umem_release(*umem);
  709. return -EINVAL;
  710. }
  711. mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
  712. *npages, *ncont, *order, *page_shift);
  713. return 0;
  714. }
  715. static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
  716. {
  717. struct mlx5_ib_umr_context *context =
  718. container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
  719. context->status = wc->status;
  720. complete(&context->done);
  721. }
  722. static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
  723. {
  724. context->cqe.done = mlx5_ib_umr_done;
  725. context->status = -1;
  726. init_completion(&context->done);
  727. }
  728. static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
  729. struct mlx5_umr_wr *umrwr)
  730. {
  731. struct umr_common *umrc = &dev->umrc;
  732. struct ib_send_wr *bad;
  733. int err;
  734. struct mlx5_ib_umr_context umr_context;
  735. mlx5_ib_init_umr_context(&umr_context);
  736. umrwr->wr.wr_cqe = &umr_context.cqe;
  737. down(&umrc->sem);
  738. err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
  739. if (err) {
  740. mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
  741. } else {
  742. wait_for_completion(&umr_context.done);
  743. if (umr_context.status != IB_WC_SUCCESS) {
  744. mlx5_ib_warn(dev, "reg umr failed (%u)\n",
  745. umr_context.status);
  746. err = -EFAULT;
  747. }
  748. }
  749. up(&umrc->sem);
  750. return err;
  751. }
  752. static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
  753. u64 virt_addr, u64 len, int npages,
  754. int page_shift, int order, int access_flags)
  755. {
  756. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  757. struct mlx5_ib_mr *mr;
  758. int err = 0;
  759. int i;
  760. for (i = 0; i < 1; i++) {
  761. mr = alloc_cached_mr(dev, order);
  762. if (mr)
  763. break;
  764. err = add_keys(dev, order2idx(dev, order), 1);
  765. if (err && err != -EAGAIN) {
  766. mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
  767. break;
  768. }
  769. }
  770. if (!mr)
  771. return ERR_PTR(-EAGAIN);
  772. mr->ibmr.pd = pd;
  773. mr->umem = umem;
  774. mr->access_flags = access_flags;
  775. mr->desc_size = sizeof(struct mlx5_mtt);
  776. mr->mmkey.iova = virt_addr;
  777. mr->mmkey.size = len;
  778. mr->mmkey.pd = to_mpd(pd)->pdn;
  779. err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
  780. MLX5_IB_UPD_XLT_ENABLE);
  781. if (err) {
  782. mlx5_mr_cache_free(dev, mr);
  783. return ERR_PTR(err);
  784. }
  785. mr->live = 1;
  786. return mr;
  787. }
  788. static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
  789. void *xlt, int page_shift, size_t size,
  790. int flags)
  791. {
  792. struct mlx5_ib_dev *dev = mr->dev;
  793. struct ib_umem *umem = mr->umem;
  794. if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
  795. mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
  796. return npages;
  797. }
  798. npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
  799. if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
  800. __mlx5_ib_populate_pas(dev, umem, page_shift,
  801. idx, npages, xlt,
  802. MLX5_IB_MTT_PRESENT);
  803. /* Clear padding after the pages
  804. * brought from the umem.
  805. */
  806. memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
  807. size - npages * sizeof(struct mlx5_mtt));
  808. }
  809. return npages;
  810. }
  811. #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
  812. MLX5_UMR_MTT_ALIGNMENT)
  813. #define MLX5_SPARE_UMR_CHUNK 0x10000
  814. int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
  815. int page_shift, int flags)
  816. {
  817. struct mlx5_ib_dev *dev = mr->dev;
  818. struct device *ddev = dev->ib_dev.dev.parent;
  819. struct mlx5_ib_ucontext *uctx = NULL;
  820. int size;
  821. void *xlt;
  822. dma_addr_t dma;
  823. struct mlx5_umr_wr wr;
  824. struct ib_sge sg;
  825. int err = 0;
  826. int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
  827. ? sizeof(struct mlx5_klm)
  828. : sizeof(struct mlx5_mtt);
  829. const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
  830. const int page_mask = page_align - 1;
  831. size_t pages_mapped = 0;
  832. size_t pages_to_map = 0;
  833. size_t pages_iter = 0;
  834. gfp_t gfp;
  835. /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
  836. * so we need to align the offset and length accordingly
  837. */
  838. if (idx & page_mask) {
  839. npages += idx & page_mask;
  840. idx &= ~page_mask;
  841. }
  842. gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
  843. gfp |= __GFP_ZERO | __GFP_NOWARN;
  844. pages_to_map = ALIGN(npages, page_align);
  845. size = desc_size * pages_to_map;
  846. size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
  847. xlt = (void *)__get_free_pages(gfp, get_order(size));
  848. if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
  849. mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
  850. size, get_order(size), MLX5_SPARE_UMR_CHUNK);
  851. size = MLX5_SPARE_UMR_CHUNK;
  852. xlt = (void *)__get_free_pages(gfp, get_order(size));
  853. }
  854. if (!xlt) {
  855. uctx = to_mucontext(mr->ibmr.pd->uobject->context);
  856. mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
  857. size = PAGE_SIZE;
  858. xlt = (void *)uctx->upd_xlt_page;
  859. mutex_lock(&uctx->upd_xlt_page_mutex);
  860. memset(xlt, 0, size);
  861. }
  862. pages_iter = size / desc_size;
  863. dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
  864. if (dma_mapping_error(ddev, dma)) {
  865. mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
  866. err = -ENOMEM;
  867. goto free_xlt;
  868. }
  869. sg.addr = dma;
  870. sg.lkey = dev->umrc.pd->local_dma_lkey;
  871. memset(&wr, 0, sizeof(wr));
  872. wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
  873. if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
  874. wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  875. wr.wr.sg_list = &sg;
  876. wr.wr.num_sge = 1;
  877. wr.wr.opcode = MLX5_IB_WR_UMR;
  878. wr.pd = mr->ibmr.pd;
  879. wr.mkey = mr->mmkey.key;
  880. wr.length = mr->mmkey.size;
  881. wr.virt_addr = mr->mmkey.iova;
  882. wr.access_flags = mr->access_flags;
  883. wr.page_shift = page_shift;
  884. for (pages_mapped = 0;
  885. pages_mapped < pages_to_map && !err;
  886. pages_mapped += pages_iter, idx += pages_iter) {
  887. npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
  888. dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
  889. npages = populate_xlt(mr, idx, npages, xlt,
  890. page_shift, size, flags);
  891. dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
  892. sg.length = ALIGN(npages * desc_size,
  893. MLX5_UMR_MTT_ALIGNMENT);
  894. if (pages_mapped + pages_iter >= pages_to_map) {
  895. if (flags & MLX5_IB_UPD_XLT_ENABLE)
  896. wr.wr.send_flags |=
  897. MLX5_IB_SEND_UMR_ENABLE_MR |
  898. MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
  899. MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
  900. if (flags & MLX5_IB_UPD_XLT_PD ||
  901. flags & MLX5_IB_UPD_XLT_ACCESS)
  902. wr.wr.send_flags |=
  903. MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
  904. if (flags & MLX5_IB_UPD_XLT_ADDR)
  905. wr.wr.send_flags |=
  906. MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
  907. }
  908. wr.offset = idx * desc_size;
  909. wr.xlt_size = sg.length;
  910. err = mlx5_ib_post_send_wait(dev, &wr);
  911. }
  912. dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
  913. free_xlt:
  914. if (uctx)
  915. mutex_unlock(&uctx->upd_xlt_page_mutex);
  916. else
  917. free_pages((unsigned long)xlt, get_order(size));
  918. return err;
  919. }
  920. /*
  921. * If ibmr is NULL it will be allocated by reg_create.
  922. * Else, the given ibmr will be used.
  923. */
  924. static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
  925. u64 virt_addr, u64 length,
  926. struct ib_umem *umem, int npages,
  927. int page_shift, int access_flags)
  928. {
  929. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  930. struct mlx5_ib_mr *mr;
  931. __be64 *pas;
  932. void *mkc;
  933. int inlen;
  934. u32 *in;
  935. int err;
  936. bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
  937. mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
  938. if (!mr)
  939. return ERR_PTR(-ENOMEM);
  940. inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
  941. sizeof(*pas) * ((npages + 1) / 2) * 2;
  942. in = kvzalloc(inlen, GFP_KERNEL);
  943. if (!in) {
  944. err = -ENOMEM;
  945. goto err_1;
  946. }
  947. pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
  948. if (!(access_flags & IB_ACCESS_ON_DEMAND))
  949. mlx5_ib_populate_pas(dev, umem, page_shift, pas,
  950. pg_cap ? MLX5_IB_MTT_PRESENT : 0);
  951. /* The pg_access bit allows setting the access flags
  952. * in the page list submitted with the command. */
  953. MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
  954. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  955. MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
  956. MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
  957. MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
  958. MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
  959. MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
  960. MLX5_SET(mkc, mkc, lr, 1);
  961. MLX5_SET(mkc, mkc, umr_en, 1);
  962. MLX5_SET64(mkc, mkc, start_addr, virt_addr);
  963. MLX5_SET64(mkc, mkc, len, length);
  964. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  965. MLX5_SET(mkc, mkc, bsf_octword_size, 0);
  966. MLX5_SET(mkc, mkc, translations_octword_size,
  967. get_octo_len(virt_addr, length, 1 << page_shift));
  968. MLX5_SET(mkc, mkc, log_page_size, page_shift);
  969. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  970. MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
  971. get_octo_len(virt_addr, length, 1 << page_shift));
  972. err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
  973. if (err) {
  974. mlx5_ib_warn(dev, "create mkey failed\n");
  975. goto err_2;
  976. }
  977. mr->mmkey.type = MLX5_MKEY_MR;
  978. mr->desc_size = sizeof(struct mlx5_mtt);
  979. mr->umem = umem;
  980. mr->dev = dev;
  981. mr->live = 1;
  982. kvfree(in);
  983. mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
  984. return mr;
  985. err_2:
  986. kvfree(in);
  987. err_1:
  988. if (!ibmr)
  989. kfree(mr);
  990. return ERR_PTR(err);
  991. }
  992. static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
  993. int npages, u64 length, int access_flags)
  994. {
  995. mr->npages = npages;
  996. atomic_add(npages, &dev->mdev->priv.reg_pages);
  997. mr->ibmr.lkey = mr->mmkey.key;
  998. mr->ibmr.rkey = mr->mmkey.key;
  999. mr->ibmr.length = length;
  1000. mr->access_flags = access_flags;
  1001. }
  1002. struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
  1003. u64 virt_addr, int access_flags,
  1004. struct ib_udata *udata)
  1005. {
  1006. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1007. struct mlx5_ib_mr *mr = NULL;
  1008. struct ib_umem *umem;
  1009. int page_shift;
  1010. int npages;
  1011. int ncont;
  1012. int order;
  1013. int err;
  1014. mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
  1015. start, virt_addr, length, access_flags);
  1016. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1017. if (!start && length == U64_MAX) {
  1018. if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
  1019. !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
  1020. return ERR_PTR(-EINVAL);
  1021. mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
  1022. return &mr->ibmr;
  1023. }
  1024. #endif
  1025. err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
  1026. &page_shift, &ncont, &order);
  1027. if (err < 0)
  1028. return ERR_PTR(err);
  1029. if (order <= mr_cache_max_order(dev)) {
  1030. mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
  1031. order, access_flags);
  1032. if (PTR_ERR(mr) == -EAGAIN) {
  1033. mlx5_ib_dbg(dev, "cache empty for order %d", order);
  1034. mr = NULL;
  1035. }
  1036. } else if (access_flags & IB_ACCESS_ON_DEMAND &&
  1037. !MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
  1038. err = -EINVAL;
  1039. pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
  1040. goto error;
  1041. }
  1042. if (!mr) {
  1043. mutex_lock(&dev->slow_path_mutex);
  1044. mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
  1045. page_shift, access_flags);
  1046. mutex_unlock(&dev->slow_path_mutex);
  1047. }
  1048. if (IS_ERR(mr)) {
  1049. err = PTR_ERR(mr);
  1050. goto error;
  1051. }
  1052. mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
  1053. mr->umem = umem;
  1054. set_mr_fileds(dev, mr, npages, length, access_flags);
  1055. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1056. update_odp_mr(mr);
  1057. #endif
  1058. return &mr->ibmr;
  1059. error:
  1060. ib_umem_release(umem);
  1061. return ERR_PTR(err);
  1062. }
  1063. static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  1064. {
  1065. struct mlx5_core_dev *mdev = dev->mdev;
  1066. struct mlx5_umr_wr umrwr = {};
  1067. if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
  1068. return 0;
  1069. umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
  1070. MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  1071. umrwr.wr.opcode = MLX5_IB_WR_UMR;
  1072. umrwr.mkey = mr->mmkey.key;
  1073. return mlx5_ib_post_send_wait(dev, &umrwr);
  1074. }
  1075. static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
  1076. int access_flags, int flags)
  1077. {
  1078. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1079. struct mlx5_umr_wr umrwr = {};
  1080. int err;
  1081. umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  1082. umrwr.wr.opcode = MLX5_IB_WR_UMR;
  1083. umrwr.mkey = mr->mmkey.key;
  1084. if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
  1085. umrwr.pd = pd;
  1086. umrwr.access_flags = access_flags;
  1087. umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
  1088. }
  1089. err = mlx5_ib_post_send_wait(dev, &umrwr);
  1090. return err;
  1091. }
  1092. int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
  1093. u64 length, u64 virt_addr, int new_access_flags,
  1094. struct ib_pd *new_pd, struct ib_udata *udata)
  1095. {
  1096. struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
  1097. struct mlx5_ib_mr *mr = to_mmr(ib_mr);
  1098. struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
  1099. int access_flags = flags & IB_MR_REREG_ACCESS ?
  1100. new_access_flags :
  1101. mr->access_flags;
  1102. u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
  1103. u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
  1104. int page_shift = 0;
  1105. int upd_flags = 0;
  1106. int npages = 0;
  1107. int ncont = 0;
  1108. int order = 0;
  1109. int err;
  1110. mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
  1111. start, virt_addr, length, access_flags);
  1112. atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
  1113. if (flags != IB_MR_REREG_PD) {
  1114. /*
  1115. * Replace umem. This needs to be done whether or not UMR is
  1116. * used.
  1117. */
  1118. flags |= IB_MR_REREG_TRANS;
  1119. ib_umem_release(mr->umem);
  1120. err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
  1121. &npages, &page_shift, &ncont, &order);
  1122. if (err < 0) {
  1123. clean_mr(mr);
  1124. return err;
  1125. }
  1126. }
  1127. if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
  1128. /*
  1129. * UMR can't be used - MKey needs to be replaced.
  1130. */
  1131. if (mr->allocated_from_cache) {
  1132. err = unreg_umr(dev, mr);
  1133. if (err)
  1134. mlx5_ib_warn(dev, "Failed to unregister MR\n");
  1135. } else {
  1136. err = destroy_mkey(dev, mr);
  1137. if (err)
  1138. mlx5_ib_warn(dev, "Failed to destroy MKey\n");
  1139. }
  1140. if (err)
  1141. return err;
  1142. mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
  1143. page_shift, access_flags);
  1144. if (IS_ERR(mr))
  1145. return PTR_ERR(mr);
  1146. mr->allocated_from_cache = 0;
  1147. } else {
  1148. /*
  1149. * Send a UMR WQE
  1150. */
  1151. mr->ibmr.pd = pd;
  1152. mr->access_flags = access_flags;
  1153. mr->mmkey.iova = addr;
  1154. mr->mmkey.size = len;
  1155. mr->mmkey.pd = to_mpd(pd)->pdn;
  1156. if (flags & IB_MR_REREG_TRANS) {
  1157. upd_flags = MLX5_IB_UPD_XLT_ADDR;
  1158. if (flags & IB_MR_REREG_PD)
  1159. upd_flags |= MLX5_IB_UPD_XLT_PD;
  1160. if (flags & IB_MR_REREG_ACCESS)
  1161. upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
  1162. err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
  1163. upd_flags);
  1164. } else {
  1165. err = rereg_umr(pd, mr, access_flags, flags);
  1166. }
  1167. if (err) {
  1168. mlx5_ib_warn(dev, "Failed to rereg UMR\n");
  1169. ib_umem_release(mr->umem);
  1170. clean_mr(mr);
  1171. return err;
  1172. }
  1173. }
  1174. set_mr_fileds(dev, mr, npages, len, access_flags);
  1175. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1176. update_odp_mr(mr);
  1177. #endif
  1178. return 0;
  1179. }
  1180. static int
  1181. mlx5_alloc_priv_descs(struct ib_device *device,
  1182. struct mlx5_ib_mr *mr,
  1183. int ndescs,
  1184. int desc_size)
  1185. {
  1186. int size = ndescs * desc_size;
  1187. int add_size;
  1188. int ret;
  1189. add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
  1190. mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
  1191. if (!mr->descs_alloc)
  1192. return -ENOMEM;
  1193. mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
  1194. mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
  1195. size, DMA_TO_DEVICE);
  1196. if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
  1197. ret = -ENOMEM;
  1198. goto err;
  1199. }
  1200. return 0;
  1201. err:
  1202. kfree(mr->descs_alloc);
  1203. return ret;
  1204. }
  1205. static void
  1206. mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
  1207. {
  1208. if (mr->descs) {
  1209. struct ib_device *device = mr->ibmr.device;
  1210. int size = mr->max_descs * mr->desc_size;
  1211. dma_unmap_single(device->dev.parent, mr->desc_map,
  1212. size, DMA_TO_DEVICE);
  1213. kfree(mr->descs_alloc);
  1214. mr->descs = NULL;
  1215. }
  1216. }
  1217. static int clean_mr(struct mlx5_ib_mr *mr)
  1218. {
  1219. struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
  1220. int allocated_from_cache = mr->allocated_from_cache;
  1221. int err;
  1222. if (mr->sig) {
  1223. if (mlx5_core_destroy_psv(dev->mdev,
  1224. mr->sig->psv_memory.psv_idx))
  1225. mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
  1226. mr->sig->psv_memory.psv_idx);
  1227. if (mlx5_core_destroy_psv(dev->mdev,
  1228. mr->sig->psv_wire.psv_idx))
  1229. mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
  1230. mr->sig->psv_wire.psv_idx);
  1231. kfree(mr->sig);
  1232. mr->sig = NULL;
  1233. }
  1234. mlx5_free_priv_descs(mr);
  1235. if (!allocated_from_cache) {
  1236. err = destroy_mkey(dev, mr);
  1237. if (err) {
  1238. mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
  1239. mr->mmkey.key, err);
  1240. return err;
  1241. }
  1242. } else {
  1243. mlx5_mr_cache_free(dev, mr);
  1244. }
  1245. if (!allocated_from_cache)
  1246. kfree(mr);
  1247. return 0;
  1248. }
  1249. int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
  1250. {
  1251. struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
  1252. struct mlx5_ib_mr *mr = to_mmr(ibmr);
  1253. int npages = mr->npages;
  1254. struct ib_umem *umem = mr->umem;
  1255. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1256. if (umem && umem->odp_data) {
  1257. /* Prevent new page faults from succeeding */
  1258. mr->live = 0;
  1259. /* Wait for all running page-fault handlers to finish. */
  1260. synchronize_srcu(&dev->mr_srcu);
  1261. /* Destroy all page mappings */
  1262. if (umem->odp_data->page_list)
  1263. mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
  1264. ib_umem_end(umem));
  1265. else
  1266. mlx5_ib_free_implicit_mr(mr);
  1267. /*
  1268. * We kill the umem before the MR for ODP,
  1269. * so that there will not be any invalidations in
  1270. * flight, looking at the *mr struct.
  1271. */
  1272. ib_umem_release(umem);
  1273. atomic_sub(npages, &dev->mdev->priv.reg_pages);
  1274. /* Avoid double-freeing the umem. */
  1275. umem = NULL;
  1276. }
  1277. #endif
  1278. clean_mr(mr);
  1279. if (umem) {
  1280. ib_umem_release(umem);
  1281. atomic_sub(npages, &dev->mdev->priv.reg_pages);
  1282. }
  1283. return 0;
  1284. }
  1285. struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
  1286. enum ib_mr_type mr_type,
  1287. u32 max_num_sg)
  1288. {
  1289. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1290. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  1291. int ndescs = ALIGN(max_num_sg, 4);
  1292. struct mlx5_ib_mr *mr;
  1293. void *mkc;
  1294. u32 *in;
  1295. int err;
  1296. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  1297. if (!mr)
  1298. return ERR_PTR(-ENOMEM);
  1299. in = kzalloc(inlen, GFP_KERNEL);
  1300. if (!in) {
  1301. err = -ENOMEM;
  1302. goto err_free;
  1303. }
  1304. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  1305. MLX5_SET(mkc, mkc, free, 1);
  1306. MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
  1307. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  1308. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  1309. if (mr_type == IB_MR_TYPE_MEM_REG) {
  1310. mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
  1311. MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
  1312. err = mlx5_alloc_priv_descs(pd->device, mr,
  1313. ndescs, sizeof(struct mlx5_mtt));
  1314. if (err)
  1315. goto err_free_in;
  1316. mr->desc_size = sizeof(struct mlx5_mtt);
  1317. mr->max_descs = ndescs;
  1318. } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
  1319. mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
  1320. err = mlx5_alloc_priv_descs(pd->device, mr,
  1321. ndescs, sizeof(struct mlx5_klm));
  1322. if (err)
  1323. goto err_free_in;
  1324. mr->desc_size = sizeof(struct mlx5_klm);
  1325. mr->max_descs = ndescs;
  1326. } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
  1327. u32 psv_index[2];
  1328. MLX5_SET(mkc, mkc, bsf_en, 1);
  1329. MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
  1330. mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
  1331. if (!mr->sig) {
  1332. err = -ENOMEM;
  1333. goto err_free_in;
  1334. }
  1335. /* create mem & wire PSVs */
  1336. err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
  1337. 2, psv_index);
  1338. if (err)
  1339. goto err_free_sig;
  1340. mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
  1341. mr->sig->psv_memory.psv_idx = psv_index[0];
  1342. mr->sig->psv_wire.psv_idx = psv_index[1];
  1343. mr->sig->sig_status_checked = true;
  1344. mr->sig->sig_err_exists = false;
  1345. /* Next UMR, Arm SIGERR */
  1346. ++mr->sig->sigerr_count;
  1347. } else {
  1348. mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
  1349. err = -EINVAL;
  1350. goto err_free_in;
  1351. }
  1352. MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
  1353. MLX5_SET(mkc, mkc, umr_en, 1);
  1354. err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
  1355. if (err)
  1356. goto err_destroy_psv;
  1357. mr->mmkey.type = MLX5_MKEY_MR;
  1358. mr->ibmr.lkey = mr->mmkey.key;
  1359. mr->ibmr.rkey = mr->mmkey.key;
  1360. mr->umem = NULL;
  1361. kfree(in);
  1362. return &mr->ibmr;
  1363. err_destroy_psv:
  1364. if (mr->sig) {
  1365. if (mlx5_core_destroy_psv(dev->mdev,
  1366. mr->sig->psv_memory.psv_idx))
  1367. mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
  1368. mr->sig->psv_memory.psv_idx);
  1369. if (mlx5_core_destroy_psv(dev->mdev,
  1370. mr->sig->psv_wire.psv_idx))
  1371. mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
  1372. mr->sig->psv_wire.psv_idx);
  1373. }
  1374. mlx5_free_priv_descs(mr);
  1375. err_free_sig:
  1376. kfree(mr->sig);
  1377. err_free_in:
  1378. kfree(in);
  1379. err_free:
  1380. kfree(mr);
  1381. return ERR_PTR(err);
  1382. }
  1383. struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
  1384. struct ib_udata *udata)
  1385. {
  1386. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1387. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  1388. struct mlx5_ib_mw *mw = NULL;
  1389. u32 *in = NULL;
  1390. void *mkc;
  1391. int ndescs;
  1392. int err;
  1393. struct mlx5_ib_alloc_mw req = {};
  1394. struct {
  1395. __u32 comp_mask;
  1396. __u32 response_length;
  1397. } resp = {};
  1398. err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
  1399. if (err)
  1400. return ERR_PTR(err);
  1401. if (req.comp_mask || req.reserved1 || req.reserved2)
  1402. return ERR_PTR(-EOPNOTSUPP);
  1403. if (udata->inlen > sizeof(req) &&
  1404. !ib_is_udata_cleared(udata, sizeof(req),
  1405. udata->inlen - sizeof(req)))
  1406. return ERR_PTR(-EOPNOTSUPP);
  1407. ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
  1408. mw = kzalloc(sizeof(*mw), GFP_KERNEL);
  1409. in = kzalloc(inlen, GFP_KERNEL);
  1410. if (!mw || !in) {
  1411. err = -ENOMEM;
  1412. goto free;
  1413. }
  1414. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  1415. MLX5_SET(mkc, mkc, free, 1);
  1416. MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
  1417. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  1418. MLX5_SET(mkc, mkc, umr_en, 1);
  1419. MLX5_SET(mkc, mkc, lr, 1);
  1420. MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS);
  1421. MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
  1422. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  1423. err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
  1424. if (err)
  1425. goto free;
  1426. mw->mmkey.type = MLX5_MKEY_MW;
  1427. mw->ibmw.rkey = mw->mmkey.key;
  1428. mw->ndescs = ndescs;
  1429. resp.response_length = min(offsetof(typeof(resp), response_length) +
  1430. sizeof(resp.response_length), udata->outlen);
  1431. if (resp.response_length) {
  1432. err = ib_copy_to_udata(udata, &resp, resp.response_length);
  1433. if (err) {
  1434. mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
  1435. goto free;
  1436. }
  1437. }
  1438. kfree(in);
  1439. return &mw->ibmw;
  1440. free:
  1441. kfree(mw);
  1442. kfree(in);
  1443. return ERR_PTR(err);
  1444. }
  1445. int mlx5_ib_dealloc_mw(struct ib_mw *mw)
  1446. {
  1447. struct mlx5_ib_mw *mmw = to_mmw(mw);
  1448. int err;
  1449. err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
  1450. &mmw->mmkey);
  1451. if (!err)
  1452. kfree(mmw);
  1453. return err;
  1454. }
  1455. int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
  1456. struct ib_mr_status *mr_status)
  1457. {
  1458. struct mlx5_ib_mr *mmr = to_mmr(ibmr);
  1459. int ret = 0;
  1460. if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
  1461. pr_err("Invalid status check mask\n");
  1462. ret = -EINVAL;
  1463. goto done;
  1464. }
  1465. mr_status->fail_status = 0;
  1466. if (check_mask & IB_MR_CHECK_SIG_STATUS) {
  1467. if (!mmr->sig) {
  1468. ret = -EINVAL;
  1469. pr_err("signature status check requested on a non-signature enabled MR\n");
  1470. goto done;
  1471. }
  1472. mmr->sig->sig_status_checked = true;
  1473. if (!mmr->sig->sig_err_exists)
  1474. goto done;
  1475. if (ibmr->lkey == mmr->sig->err_item.key)
  1476. memcpy(&mr_status->sig_err, &mmr->sig->err_item,
  1477. sizeof(mr_status->sig_err));
  1478. else {
  1479. mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
  1480. mr_status->sig_err.sig_err_offset = 0;
  1481. mr_status->sig_err.key = mmr->sig->err_item.key;
  1482. }
  1483. mmr->sig->sig_err_exists = false;
  1484. mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
  1485. }
  1486. done:
  1487. return ret;
  1488. }
  1489. static int
  1490. mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
  1491. struct scatterlist *sgl,
  1492. unsigned short sg_nents,
  1493. unsigned int *sg_offset_p)
  1494. {
  1495. struct scatterlist *sg = sgl;
  1496. struct mlx5_klm *klms = mr->descs;
  1497. unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
  1498. u32 lkey = mr->ibmr.pd->local_dma_lkey;
  1499. int i;
  1500. mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
  1501. mr->ibmr.length = 0;
  1502. mr->ndescs = sg_nents;
  1503. for_each_sg(sgl, sg, sg_nents, i) {
  1504. if (unlikely(i >= mr->max_descs))
  1505. break;
  1506. klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
  1507. klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
  1508. klms[i].key = cpu_to_be32(lkey);
  1509. mr->ibmr.length += sg_dma_len(sg) - sg_offset;
  1510. sg_offset = 0;
  1511. }
  1512. if (sg_offset_p)
  1513. *sg_offset_p = sg_offset;
  1514. return i;
  1515. }
  1516. static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
  1517. {
  1518. struct mlx5_ib_mr *mr = to_mmr(ibmr);
  1519. __be64 *descs;
  1520. if (unlikely(mr->ndescs == mr->max_descs))
  1521. return -ENOMEM;
  1522. descs = mr->descs;
  1523. descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
  1524. return 0;
  1525. }
  1526. int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
  1527. unsigned int *sg_offset)
  1528. {
  1529. struct mlx5_ib_mr *mr = to_mmr(ibmr);
  1530. int n;
  1531. mr->ndescs = 0;
  1532. ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
  1533. mr->desc_size * mr->max_descs,
  1534. DMA_TO_DEVICE);
  1535. if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
  1536. n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
  1537. else
  1538. n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
  1539. mlx5_set_page);
  1540. ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
  1541. mr->desc_size * mr->max_descs,
  1542. DMA_TO_DEVICE);
  1543. return n;
  1544. }