mr.c 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408
  1. /*
  2. * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/kref.h>
  33. #include <linux/random.h>
  34. #include <linux/debugfs.h>
  35. #include <linux/export.h>
  36. #include <linux/delay.h>
  37. #include <rdma/ib_umem.h>
  38. #include <rdma/ib_umem_odp.h>
  39. #include <rdma/ib_verbs.h>
  40. #include "mlx5_ib.h"
  41. enum {
  42. MAX_PENDING_REG_MR = 8,
  43. };
  44. #define MLX5_UMR_ALIGN 2048
  45. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  46. static __be64 mlx5_ib_update_mtt_emergency_buffer[
  47. MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
  48. __aligned(MLX5_UMR_ALIGN);
  49. static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
  50. #endif
  51. static int clean_mr(struct mlx5_ib_mr *mr);
  52. static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  53. {
  54. int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
  55. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  56. /* Wait until all page fault handlers using the mr complete. */
  57. synchronize_srcu(&dev->mr_srcu);
  58. #endif
  59. return err;
  60. }
  61. static int order2idx(struct mlx5_ib_dev *dev, int order)
  62. {
  63. struct mlx5_mr_cache *cache = &dev->cache;
  64. if (order < cache->ent[0].order)
  65. return 0;
  66. else
  67. return order - cache->ent[0].order;
  68. }
  69. static void reg_mr_callback(int status, void *context)
  70. {
  71. struct mlx5_ib_mr *mr = context;
  72. struct mlx5_ib_dev *dev = mr->dev;
  73. struct mlx5_mr_cache *cache = &dev->cache;
  74. int c = order2idx(dev, mr->order);
  75. struct mlx5_cache_ent *ent = &cache->ent[c];
  76. u8 key;
  77. unsigned long flags;
  78. struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
  79. int err;
  80. spin_lock_irqsave(&ent->lock, flags);
  81. ent->pending--;
  82. spin_unlock_irqrestore(&ent->lock, flags);
  83. if (status) {
  84. mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
  85. kfree(mr);
  86. dev->fill_delay = 1;
  87. mod_timer(&dev->delay_timer, jiffies + HZ);
  88. return;
  89. }
  90. if (mr->out.hdr.status) {
  91. mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
  92. mr->out.hdr.status,
  93. be32_to_cpu(mr->out.hdr.syndrome));
  94. kfree(mr);
  95. dev->fill_delay = 1;
  96. mod_timer(&dev->delay_timer, jiffies + HZ);
  97. return;
  98. }
  99. spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
  100. key = dev->mdev->priv.mkey_key++;
  101. spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
  102. mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
  103. cache->last_add = jiffies;
  104. spin_lock_irqsave(&ent->lock, flags);
  105. list_add_tail(&mr->list, &ent->head);
  106. ent->cur++;
  107. ent->size++;
  108. spin_unlock_irqrestore(&ent->lock, flags);
  109. write_lock_irqsave(&table->lock, flags);
  110. err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
  111. &mr->mmr);
  112. if (err)
  113. pr_err("Error inserting to mr tree. 0x%x\n", -err);
  114. write_unlock_irqrestore(&table->lock, flags);
  115. }
  116. static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
  117. {
  118. struct mlx5_mr_cache *cache = &dev->cache;
  119. struct mlx5_cache_ent *ent = &cache->ent[c];
  120. struct mlx5_create_mkey_mbox_in *in;
  121. struct mlx5_ib_mr *mr;
  122. int npages = 1 << ent->order;
  123. int err = 0;
  124. int i;
  125. in = kzalloc(sizeof(*in), GFP_KERNEL);
  126. if (!in)
  127. return -ENOMEM;
  128. for (i = 0; i < num; i++) {
  129. if (ent->pending >= MAX_PENDING_REG_MR) {
  130. err = -EAGAIN;
  131. break;
  132. }
  133. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  134. if (!mr) {
  135. err = -ENOMEM;
  136. break;
  137. }
  138. mr->order = ent->order;
  139. mr->umred = 1;
  140. mr->dev = dev;
  141. in->seg.status = MLX5_MKEY_STATUS_FREE;
  142. in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
  143. in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
  144. in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
  145. in->seg.log2_page_size = 12;
  146. spin_lock_irq(&ent->lock);
  147. ent->pending++;
  148. spin_unlock_irq(&ent->lock);
  149. err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
  150. sizeof(*in), reg_mr_callback,
  151. mr, &mr->out);
  152. if (err) {
  153. spin_lock_irq(&ent->lock);
  154. ent->pending--;
  155. spin_unlock_irq(&ent->lock);
  156. mlx5_ib_warn(dev, "create mkey failed %d\n", err);
  157. kfree(mr);
  158. break;
  159. }
  160. }
  161. kfree(in);
  162. return err;
  163. }
  164. static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
  165. {
  166. struct mlx5_mr_cache *cache = &dev->cache;
  167. struct mlx5_cache_ent *ent = &cache->ent[c];
  168. struct mlx5_ib_mr *mr;
  169. int err;
  170. int i;
  171. for (i = 0; i < num; i++) {
  172. spin_lock_irq(&ent->lock);
  173. if (list_empty(&ent->head)) {
  174. spin_unlock_irq(&ent->lock);
  175. return;
  176. }
  177. mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
  178. list_del(&mr->list);
  179. ent->cur--;
  180. ent->size--;
  181. spin_unlock_irq(&ent->lock);
  182. err = destroy_mkey(dev, mr);
  183. if (err)
  184. mlx5_ib_warn(dev, "failed destroy mkey\n");
  185. else
  186. kfree(mr);
  187. }
  188. }
  189. static ssize_t size_write(struct file *filp, const char __user *buf,
  190. size_t count, loff_t *pos)
  191. {
  192. struct mlx5_cache_ent *ent = filp->private_data;
  193. struct mlx5_ib_dev *dev = ent->dev;
  194. char lbuf[20];
  195. u32 var;
  196. int err;
  197. int c;
  198. if (copy_from_user(lbuf, buf, sizeof(lbuf)))
  199. return -EFAULT;
  200. c = order2idx(dev, ent->order);
  201. lbuf[sizeof(lbuf) - 1] = 0;
  202. if (sscanf(lbuf, "%u", &var) != 1)
  203. return -EINVAL;
  204. if (var < ent->limit)
  205. return -EINVAL;
  206. if (var > ent->size) {
  207. do {
  208. err = add_keys(dev, c, var - ent->size);
  209. if (err && err != -EAGAIN)
  210. return err;
  211. usleep_range(3000, 5000);
  212. } while (err);
  213. } else if (var < ent->size) {
  214. remove_keys(dev, c, ent->size - var);
  215. }
  216. return count;
  217. }
  218. static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
  219. loff_t *pos)
  220. {
  221. struct mlx5_cache_ent *ent = filp->private_data;
  222. char lbuf[20];
  223. int err;
  224. if (*pos)
  225. return 0;
  226. err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
  227. if (err < 0)
  228. return err;
  229. if (copy_to_user(buf, lbuf, err))
  230. return -EFAULT;
  231. *pos += err;
  232. return err;
  233. }
  234. static const struct file_operations size_fops = {
  235. .owner = THIS_MODULE,
  236. .open = simple_open,
  237. .write = size_write,
  238. .read = size_read,
  239. };
  240. static ssize_t limit_write(struct file *filp, const char __user *buf,
  241. size_t count, loff_t *pos)
  242. {
  243. struct mlx5_cache_ent *ent = filp->private_data;
  244. struct mlx5_ib_dev *dev = ent->dev;
  245. char lbuf[20];
  246. u32 var;
  247. int err;
  248. int c;
  249. if (copy_from_user(lbuf, buf, sizeof(lbuf)))
  250. return -EFAULT;
  251. c = order2idx(dev, ent->order);
  252. lbuf[sizeof(lbuf) - 1] = 0;
  253. if (sscanf(lbuf, "%u", &var) != 1)
  254. return -EINVAL;
  255. if (var > ent->size)
  256. return -EINVAL;
  257. ent->limit = var;
  258. if (ent->cur < ent->limit) {
  259. err = add_keys(dev, c, 2 * ent->limit - ent->cur);
  260. if (err)
  261. return err;
  262. }
  263. return count;
  264. }
  265. static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
  266. loff_t *pos)
  267. {
  268. struct mlx5_cache_ent *ent = filp->private_data;
  269. char lbuf[20];
  270. int err;
  271. if (*pos)
  272. return 0;
  273. err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
  274. if (err < 0)
  275. return err;
  276. if (copy_to_user(buf, lbuf, err))
  277. return -EFAULT;
  278. *pos += err;
  279. return err;
  280. }
  281. static const struct file_operations limit_fops = {
  282. .owner = THIS_MODULE,
  283. .open = simple_open,
  284. .write = limit_write,
  285. .read = limit_read,
  286. };
  287. static int someone_adding(struct mlx5_mr_cache *cache)
  288. {
  289. int i;
  290. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  291. if (cache->ent[i].cur < cache->ent[i].limit)
  292. return 1;
  293. }
  294. return 0;
  295. }
  296. static void __cache_work_func(struct mlx5_cache_ent *ent)
  297. {
  298. struct mlx5_ib_dev *dev = ent->dev;
  299. struct mlx5_mr_cache *cache = &dev->cache;
  300. int i = order2idx(dev, ent->order);
  301. int err;
  302. if (cache->stopped)
  303. return;
  304. ent = &dev->cache.ent[i];
  305. if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
  306. err = add_keys(dev, i, 1);
  307. if (ent->cur < 2 * ent->limit) {
  308. if (err == -EAGAIN) {
  309. mlx5_ib_dbg(dev, "returned eagain, order %d\n",
  310. i + 2);
  311. queue_delayed_work(cache->wq, &ent->dwork,
  312. msecs_to_jiffies(3));
  313. } else if (err) {
  314. mlx5_ib_warn(dev, "command failed order %d, err %d\n",
  315. i + 2, err);
  316. queue_delayed_work(cache->wq, &ent->dwork,
  317. msecs_to_jiffies(1000));
  318. } else {
  319. queue_work(cache->wq, &ent->work);
  320. }
  321. }
  322. } else if (ent->cur > 2 * ent->limit) {
  323. if (!someone_adding(cache) &&
  324. time_after(jiffies, cache->last_add + 300 * HZ)) {
  325. remove_keys(dev, i, 1);
  326. if (ent->cur > ent->limit)
  327. queue_work(cache->wq, &ent->work);
  328. } else {
  329. queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
  330. }
  331. }
  332. }
  333. static void delayed_cache_work_func(struct work_struct *work)
  334. {
  335. struct mlx5_cache_ent *ent;
  336. ent = container_of(work, struct mlx5_cache_ent, dwork.work);
  337. __cache_work_func(ent);
  338. }
  339. static void cache_work_func(struct work_struct *work)
  340. {
  341. struct mlx5_cache_ent *ent;
  342. ent = container_of(work, struct mlx5_cache_ent, work);
  343. __cache_work_func(ent);
  344. }
  345. static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
  346. {
  347. struct mlx5_mr_cache *cache = &dev->cache;
  348. struct mlx5_ib_mr *mr = NULL;
  349. struct mlx5_cache_ent *ent;
  350. int c;
  351. int i;
  352. c = order2idx(dev, order);
  353. if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
  354. mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
  355. return NULL;
  356. }
  357. for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
  358. ent = &cache->ent[i];
  359. mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
  360. spin_lock_irq(&ent->lock);
  361. if (!list_empty(&ent->head)) {
  362. mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
  363. list);
  364. list_del(&mr->list);
  365. ent->cur--;
  366. spin_unlock_irq(&ent->lock);
  367. if (ent->cur < ent->limit)
  368. queue_work(cache->wq, &ent->work);
  369. break;
  370. }
  371. spin_unlock_irq(&ent->lock);
  372. queue_work(cache->wq, &ent->work);
  373. }
  374. if (!mr)
  375. cache->ent[c].miss++;
  376. return mr;
  377. }
  378. static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  379. {
  380. struct mlx5_mr_cache *cache = &dev->cache;
  381. struct mlx5_cache_ent *ent;
  382. int shrink = 0;
  383. int c;
  384. c = order2idx(dev, mr->order);
  385. if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
  386. mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
  387. return;
  388. }
  389. ent = &cache->ent[c];
  390. spin_lock_irq(&ent->lock);
  391. list_add_tail(&mr->list, &ent->head);
  392. ent->cur++;
  393. if (ent->cur > 2 * ent->limit)
  394. shrink = 1;
  395. spin_unlock_irq(&ent->lock);
  396. if (shrink)
  397. queue_work(cache->wq, &ent->work);
  398. }
  399. static void clean_keys(struct mlx5_ib_dev *dev, int c)
  400. {
  401. struct mlx5_mr_cache *cache = &dev->cache;
  402. struct mlx5_cache_ent *ent = &cache->ent[c];
  403. struct mlx5_ib_mr *mr;
  404. int err;
  405. cancel_delayed_work(&ent->dwork);
  406. while (1) {
  407. spin_lock_irq(&ent->lock);
  408. if (list_empty(&ent->head)) {
  409. spin_unlock_irq(&ent->lock);
  410. return;
  411. }
  412. mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
  413. list_del(&mr->list);
  414. ent->cur--;
  415. ent->size--;
  416. spin_unlock_irq(&ent->lock);
  417. err = destroy_mkey(dev, mr);
  418. if (err)
  419. mlx5_ib_warn(dev, "failed destroy mkey\n");
  420. else
  421. kfree(mr);
  422. }
  423. }
  424. static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
  425. {
  426. struct mlx5_mr_cache *cache = &dev->cache;
  427. struct mlx5_cache_ent *ent;
  428. int i;
  429. if (!mlx5_debugfs_root)
  430. return 0;
  431. cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
  432. if (!cache->root)
  433. return -ENOMEM;
  434. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  435. ent = &cache->ent[i];
  436. sprintf(ent->name, "%d", ent->order);
  437. ent->dir = debugfs_create_dir(ent->name, cache->root);
  438. if (!ent->dir)
  439. return -ENOMEM;
  440. ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
  441. &size_fops);
  442. if (!ent->fsize)
  443. return -ENOMEM;
  444. ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
  445. &limit_fops);
  446. if (!ent->flimit)
  447. return -ENOMEM;
  448. ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
  449. &ent->cur);
  450. if (!ent->fcur)
  451. return -ENOMEM;
  452. ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
  453. &ent->miss);
  454. if (!ent->fmiss)
  455. return -ENOMEM;
  456. }
  457. return 0;
  458. }
  459. static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
  460. {
  461. if (!mlx5_debugfs_root)
  462. return;
  463. debugfs_remove_recursive(dev->cache.root);
  464. }
  465. static void delay_time_func(unsigned long ctx)
  466. {
  467. struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
  468. dev->fill_delay = 0;
  469. }
  470. int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
  471. {
  472. struct mlx5_mr_cache *cache = &dev->cache;
  473. struct mlx5_cache_ent *ent;
  474. int limit;
  475. int err;
  476. int i;
  477. cache->wq = create_singlethread_workqueue("mkey_cache");
  478. if (!cache->wq) {
  479. mlx5_ib_warn(dev, "failed to create work queue\n");
  480. return -ENOMEM;
  481. }
  482. setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
  483. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  484. INIT_LIST_HEAD(&cache->ent[i].head);
  485. spin_lock_init(&cache->ent[i].lock);
  486. ent = &cache->ent[i];
  487. INIT_LIST_HEAD(&ent->head);
  488. spin_lock_init(&ent->lock);
  489. ent->order = i + 2;
  490. ent->dev = dev;
  491. if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
  492. limit = dev->mdev->profile->mr_cache[i].limit;
  493. else
  494. limit = 0;
  495. INIT_WORK(&ent->work, cache_work_func);
  496. INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
  497. ent->limit = limit;
  498. queue_work(cache->wq, &ent->work);
  499. }
  500. err = mlx5_mr_cache_debugfs_init(dev);
  501. if (err)
  502. mlx5_ib_warn(dev, "cache debugfs failure\n");
  503. return 0;
  504. }
  505. int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
  506. {
  507. int i;
  508. dev->cache.stopped = 1;
  509. flush_workqueue(dev->cache.wq);
  510. mlx5_mr_cache_debugfs_cleanup(dev);
  511. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
  512. clean_keys(dev, i);
  513. destroy_workqueue(dev->cache.wq);
  514. del_timer_sync(&dev->delay_timer);
  515. return 0;
  516. }
  517. struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
  518. {
  519. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  520. struct mlx5_core_dev *mdev = dev->mdev;
  521. struct mlx5_create_mkey_mbox_in *in;
  522. struct mlx5_mkey_seg *seg;
  523. struct mlx5_ib_mr *mr;
  524. int err;
  525. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  526. if (!mr)
  527. return ERR_PTR(-ENOMEM);
  528. in = kzalloc(sizeof(*in), GFP_KERNEL);
  529. if (!in) {
  530. err = -ENOMEM;
  531. goto err_free;
  532. }
  533. seg = &in->seg;
  534. seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
  535. seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
  536. seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
  537. seg->start_addr = 0;
  538. err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
  539. NULL);
  540. if (err)
  541. goto err_in;
  542. kfree(in);
  543. mr->ibmr.lkey = mr->mmr.key;
  544. mr->ibmr.rkey = mr->mmr.key;
  545. mr->umem = NULL;
  546. return &mr->ibmr;
  547. err_in:
  548. kfree(in);
  549. err_free:
  550. kfree(mr);
  551. return ERR_PTR(err);
  552. }
  553. static int get_octo_len(u64 addr, u64 len, int page_size)
  554. {
  555. u64 offset;
  556. int npages;
  557. offset = addr & (page_size - 1);
  558. npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
  559. return (npages + 1) / 2;
  560. }
  561. static int use_umr(int order)
  562. {
  563. return order <= MLX5_MAX_UMR_SHIFT;
  564. }
  565. static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
  566. struct ib_sge *sg, u64 dma, int n, u32 key,
  567. int page_shift, u64 virt_addr, u64 len,
  568. int access_flags)
  569. {
  570. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  571. struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
  572. sg->addr = dma;
  573. sg->length = ALIGN(sizeof(u64) * n, 64);
  574. sg->lkey = dev->umrc.pd->local_dma_lkey;
  575. wr->next = NULL;
  576. wr->send_flags = 0;
  577. wr->sg_list = sg;
  578. if (n)
  579. wr->num_sge = 1;
  580. else
  581. wr->num_sge = 0;
  582. wr->opcode = MLX5_IB_WR_UMR;
  583. umrwr->npages = n;
  584. umrwr->page_shift = page_shift;
  585. umrwr->mkey = key;
  586. umrwr->target.virt_addr = virt_addr;
  587. umrwr->length = len;
  588. umrwr->access_flags = access_flags;
  589. umrwr->pd = pd;
  590. }
  591. static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
  592. struct ib_send_wr *wr, u32 key)
  593. {
  594. struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
  595. wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  596. wr->opcode = MLX5_IB_WR_UMR;
  597. umrwr->mkey = key;
  598. }
  599. void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
  600. {
  601. struct mlx5_ib_umr_context *context;
  602. struct ib_wc wc;
  603. int err;
  604. while (1) {
  605. err = ib_poll_cq(cq, 1, &wc);
  606. if (err < 0) {
  607. pr_warn("poll cq error %d\n", err);
  608. return;
  609. }
  610. if (err == 0)
  611. break;
  612. context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
  613. context->status = wc.status;
  614. complete(&context->done);
  615. }
  616. ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
  617. }
  618. static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
  619. u64 virt_addr, u64 len, int npages,
  620. int page_shift, int order, int access_flags)
  621. {
  622. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  623. struct device *ddev = dev->ib_dev.dma_device;
  624. struct umr_common *umrc = &dev->umrc;
  625. struct mlx5_ib_umr_context umr_context;
  626. struct ib_send_wr wr, *bad;
  627. struct mlx5_ib_mr *mr;
  628. struct ib_sge sg;
  629. int size;
  630. __be64 *mr_pas;
  631. __be64 *pas;
  632. dma_addr_t dma;
  633. int err = 0;
  634. int i;
  635. for (i = 0; i < 1; i++) {
  636. mr = alloc_cached_mr(dev, order);
  637. if (mr)
  638. break;
  639. err = add_keys(dev, order2idx(dev, order), 1);
  640. if (err && err != -EAGAIN) {
  641. mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
  642. break;
  643. }
  644. }
  645. if (!mr)
  646. return ERR_PTR(-EAGAIN);
  647. /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
  648. * To avoid copying garbage after the pas array, we allocate
  649. * a little more. */
  650. size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
  651. mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
  652. if (!mr_pas) {
  653. err = -ENOMEM;
  654. goto free_mr;
  655. }
  656. pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
  657. mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
  658. /* Clear padding after the actual pages. */
  659. memset(pas + npages, 0, size - npages * sizeof(u64));
  660. dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
  661. if (dma_mapping_error(ddev, dma)) {
  662. err = -ENOMEM;
  663. goto free_pas;
  664. }
  665. memset(&wr, 0, sizeof(wr));
  666. wr.wr_id = (u64)(unsigned long)&umr_context;
  667. prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift,
  668. virt_addr, len, access_flags);
  669. mlx5_ib_init_umr_context(&umr_context);
  670. down(&umrc->sem);
  671. err = ib_post_send(umrc->qp, &wr, &bad);
  672. if (err) {
  673. mlx5_ib_warn(dev, "post send failed, err %d\n", err);
  674. goto unmap_dma;
  675. } else {
  676. wait_for_completion(&umr_context.done);
  677. if (umr_context.status != IB_WC_SUCCESS) {
  678. mlx5_ib_warn(dev, "reg umr failed\n");
  679. err = -EFAULT;
  680. }
  681. }
  682. mr->mmr.iova = virt_addr;
  683. mr->mmr.size = len;
  684. mr->mmr.pd = to_mpd(pd)->pdn;
  685. mr->live = 1;
  686. unmap_dma:
  687. up(&umrc->sem);
  688. dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
  689. free_pas:
  690. kfree(mr_pas);
  691. free_mr:
  692. if (err) {
  693. free_cached_mr(dev, mr);
  694. return ERR_PTR(err);
  695. }
  696. return mr;
  697. }
  698. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  699. int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
  700. int zap)
  701. {
  702. struct mlx5_ib_dev *dev = mr->dev;
  703. struct device *ddev = dev->ib_dev.dma_device;
  704. struct umr_common *umrc = &dev->umrc;
  705. struct mlx5_ib_umr_context umr_context;
  706. struct ib_umem *umem = mr->umem;
  707. int size;
  708. __be64 *pas;
  709. dma_addr_t dma;
  710. struct ib_send_wr wr, *bad;
  711. struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg;
  712. struct ib_sge sg;
  713. int err = 0;
  714. const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
  715. const int page_index_mask = page_index_alignment - 1;
  716. size_t pages_mapped = 0;
  717. size_t pages_to_map = 0;
  718. size_t pages_iter = 0;
  719. int use_emergency_buf = 0;
  720. /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
  721. * so we need to align the offset and length accordingly */
  722. if (start_page_index & page_index_mask) {
  723. npages += start_page_index & page_index_mask;
  724. start_page_index &= ~page_index_mask;
  725. }
  726. pages_to_map = ALIGN(npages, page_index_alignment);
  727. if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
  728. return -EINVAL;
  729. size = sizeof(u64) * pages_to_map;
  730. size = min_t(int, PAGE_SIZE, size);
  731. /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
  732. * code, when we are called from an invalidation. The pas buffer must
  733. * be 2k-aligned for Connect-IB. */
  734. pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
  735. if (!pas) {
  736. mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
  737. pas = mlx5_ib_update_mtt_emergency_buffer;
  738. size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
  739. use_emergency_buf = 1;
  740. mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
  741. memset(pas, 0, size);
  742. }
  743. pages_iter = size / sizeof(u64);
  744. dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
  745. if (dma_mapping_error(ddev, dma)) {
  746. mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
  747. err = -ENOMEM;
  748. goto free_pas;
  749. }
  750. for (pages_mapped = 0;
  751. pages_mapped < pages_to_map && !err;
  752. pages_mapped += pages_iter, start_page_index += pages_iter) {
  753. dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
  754. npages = min_t(size_t,
  755. pages_iter,
  756. ib_umem_num_pages(umem) - start_page_index);
  757. if (!zap) {
  758. __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
  759. start_page_index, npages, pas,
  760. MLX5_IB_MTT_PRESENT);
  761. /* Clear padding after the pages brought from the
  762. * umem. */
  763. memset(pas + npages, 0, size - npages * sizeof(u64));
  764. }
  765. dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
  766. memset(&wr, 0, sizeof(wr));
  767. wr.wr_id = (u64)(unsigned long)&umr_context;
  768. sg.addr = dma;
  769. sg.length = ALIGN(npages * sizeof(u64),
  770. MLX5_UMR_MTT_ALIGNMENT);
  771. sg.lkey = dev->umrc.pd->local_dma_lkey;
  772. wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
  773. MLX5_IB_SEND_UMR_UPDATE_MTT;
  774. wr.sg_list = &sg;
  775. wr.num_sge = 1;
  776. wr.opcode = MLX5_IB_WR_UMR;
  777. umrwr->npages = sg.length / sizeof(u64);
  778. umrwr->page_shift = PAGE_SHIFT;
  779. umrwr->mkey = mr->mmr.key;
  780. umrwr->target.offset = start_page_index;
  781. mlx5_ib_init_umr_context(&umr_context);
  782. down(&umrc->sem);
  783. err = ib_post_send(umrc->qp, &wr, &bad);
  784. if (err) {
  785. mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
  786. } else {
  787. wait_for_completion(&umr_context.done);
  788. if (umr_context.status != IB_WC_SUCCESS) {
  789. mlx5_ib_err(dev, "UMR completion failed, code %d\n",
  790. umr_context.status);
  791. err = -EFAULT;
  792. }
  793. }
  794. up(&umrc->sem);
  795. }
  796. dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
  797. free_pas:
  798. if (!use_emergency_buf)
  799. free_page((unsigned long)pas);
  800. else
  801. mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
  802. return err;
  803. }
  804. #endif
  805. static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
  806. u64 length, struct ib_umem *umem,
  807. int npages, int page_shift,
  808. int access_flags)
  809. {
  810. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  811. struct mlx5_create_mkey_mbox_in *in;
  812. struct mlx5_ib_mr *mr;
  813. int inlen;
  814. int err;
  815. bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
  816. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  817. if (!mr)
  818. return ERR_PTR(-ENOMEM);
  819. inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
  820. in = mlx5_vzalloc(inlen);
  821. if (!in) {
  822. err = -ENOMEM;
  823. goto err_1;
  824. }
  825. mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
  826. pg_cap ? MLX5_IB_MTT_PRESENT : 0);
  827. /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
  828. * in the page list submitted with the command. */
  829. in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
  830. in->seg.flags = convert_access(access_flags) |
  831. MLX5_ACCESS_MODE_MTT;
  832. in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
  833. in->seg.start_addr = cpu_to_be64(virt_addr);
  834. in->seg.len = cpu_to_be64(length);
  835. in->seg.bsfs_octo_size = 0;
  836. in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
  837. in->seg.log2_page_size = page_shift;
  838. in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
  839. in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
  840. 1 << page_shift));
  841. err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
  842. NULL, NULL);
  843. if (err) {
  844. mlx5_ib_warn(dev, "create mkey failed\n");
  845. goto err_2;
  846. }
  847. mr->umem = umem;
  848. mr->dev = dev;
  849. mr->live = 1;
  850. kvfree(in);
  851. mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
  852. return mr;
  853. err_2:
  854. kvfree(in);
  855. err_1:
  856. kfree(mr);
  857. return ERR_PTR(err);
  858. }
  859. struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
  860. u64 virt_addr, int access_flags,
  861. struct ib_udata *udata)
  862. {
  863. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  864. struct mlx5_ib_mr *mr = NULL;
  865. struct ib_umem *umem;
  866. int page_shift;
  867. int npages;
  868. int ncont;
  869. int order;
  870. int err;
  871. mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
  872. start, virt_addr, length, access_flags);
  873. umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
  874. 0);
  875. if (IS_ERR(umem)) {
  876. mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
  877. return (void *)umem;
  878. }
  879. mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
  880. if (!npages) {
  881. mlx5_ib_warn(dev, "avoid zero region\n");
  882. err = -EINVAL;
  883. goto error;
  884. }
  885. mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
  886. npages, ncont, order, page_shift);
  887. if (use_umr(order)) {
  888. mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
  889. order, access_flags);
  890. if (PTR_ERR(mr) == -EAGAIN) {
  891. mlx5_ib_dbg(dev, "cache empty for order %d", order);
  892. mr = NULL;
  893. }
  894. } else if (access_flags & IB_ACCESS_ON_DEMAND) {
  895. err = -EINVAL;
  896. pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
  897. goto error;
  898. }
  899. if (!mr)
  900. mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
  901. access_flags);
  902. if (IS_ERR(mr)) {
  903. err = PTR_ERR(mr);
  904. goto error;
  905. }
  906. mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
  907. mr->umem = umem;
  908. mr->npages = npages;
  909. atomic_add(npages, &dev->mdev->priv.reg_pages);
  910. mr->ibmr.lkey = mr->mmr.key;
  911. mr->ibmr.rkey = mr->mmr.key;
  912. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  913. if (umem->odp_data) {
  914. /*
  915. * This barrier prevents the compiler from moving the
  916. * setting of umem->odp_data->private to point to our
  917. * MR, before reg_umr finished, to ensure that the MR
  918. * initialization have finished before starting to
  919. * handle invalidations.
  920. */
  921. smp_wmb();
  922. mr->umem->odp_data->private = mr;
  923. /*
  924. * Make sure we will see the new
  925. * umem->odp_data->private value in the invalidation
  926. * routines, before we can get page faults on the
  927. * MR. Page faults can happen once we put the MR in
  928. * the tree, below this line. Without the barrier,
  929. * there can be a fault handling and an invalidation
  930. * before umem->odp_data->private == mr is visible to
  931. * the invalidation handler.
  932. */
  933. smp_wmb();
  934. }
  935. #endif
  936. return &mr->ibmr;
  937. error:
  938. ib_umem_release(umem);
  939. return ERR_PTR(err);
  940. }
  941. static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  942. {
  943. struct umr_common *umrc = &dev->umrc;
  944. struct mlx5_ib_umr_context umr_context;
  945. struct ib_send_wr wr, *bad;
  946. int err;
  947. memset(&wr, 0, sizeof(wr));
  948. wr.wr_id = (u64)(unsigned long)&umr_context;
  949. prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
  950. mlx5_ib_init_umr_context(&umr_context);
  951. down(&umrc->sem);
  952. err = ib_post_send(umrc->qp, &wr, &bad);
  953. if (err) {
  954. up(&umrc->sem);
  955. mlx5_ib_dbg(dev, "err %d\n", err);
  956. goto error;
  957. } else {
  958. wait_for_completion(&umr_context.done);
  959. up(&umrc->sem);
  960. }
  961. if (umr_context.status != IB_WC_SUCCESS) {
  962. mlx5_ib_warn(dev, "unreg umr failed\n");
  963. err = -EFAULT;
  964. goto error;
  965. }
  966. return 0;
  967. error:
  968. return err;
  969. }
  970. static int clean_mr(struct mlx5_ib_mr *mr)
  971. {
  972. struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
  973. int umred = mr->umred;
  974. int err;
  975. if (mr->sig) {
  976. if (mlx5_core_destroy_psv(dev->mdev,
  977. mr->sig->psv_memory.psv_idx))
  978. mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
  979. mr->sig->psv_memory.psv_idx);
  980. if (mlx5_core_destroy_psv(dev->mdev,
  981. mr->sig->psv_wire.psv_idx))
  982. mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
  983. mr->sig->psv_wire.psv_idx);
  984. kfree(mr->sig);
  985. mr->sig = NULL;
  986. }
  987. if (!umred) {
  988. err = destroy_mkey(dev, mr);
  989. if (err) {
  990. mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
  991. mr->mmr.key, err);
  992. return err;
  993. }
  994. } else {
  995. err = unreg_umr(dev, mr);
  996. if (err) {
  997. mlx5_ib_warn(dev, "failed unregister\n");
  998. return err;
  999. }
  1000. free_cached_mr(dev, mr);
  1001. }
  1002. if (!umred)
  1003. kfree(mr);
  1004. return 0;
  1005. }
  1006. int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
  1007. {
  1008. struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
  1009. struct mlx5_ib_mr *mr = to_mmr(ibmr);
  1010. int npages = mr->npages;
  1011. struct ib_umem *umem = mr->umem;
  1012. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1013. if (umem && umem->odp_data) {
  1014. /* Prevent new page faults from succeeding */
  1015. mr->live = 0;
  1016. /* Wait for all running page-fault handlers to finish. */
  1017. synchronize_srcu(&dev->mr_srcu);
  1018. /* Destroy all page mappings */
  1019. mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
  1020. ib_umem_end(umem));
  1021. /*
  1022. * We kill the umem before the MR for ODP,
  1023. * so that there will not be any invalidations in
  1024. * flight, looking at the *mr struct.
  1025. */
  1026. ib_umem_release(umem);
  1027. atomic_sub(npages, &dev->mdev->priv.reg_pages);
  1028. /* Avoid double-freeing the umem. */
  1029. umem = NULL;
  1030. }
  1031. #endif
  1032. clean_mr(mr);
  1033. if (umem) {
  1034. ib_umem_release(umem);
  1035. atomic_sub(npages, &dev->mdev->priv.reg_pages);
  1036. }
  1037. return 0;
  1038. }
  1039. struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
  1040. enum ib_mr_type mr_type,
  1041. u32 max_num_sg)
  1042. {
  1043. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1044. struct mlx5_create_mkey_mbox_in *in;
  1045. struct mlx5_ib_mr *mr;
  1046. int access_mode, err;
  1047. int ndescs = roundup(max_num_sg, 4);
  1048. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  1049. if (!mr)
  1050. return ERR_PTR(-ENOMEM);
  1051. in = kzalloc(sizeof(*in), GFP_KERNEL);
  1052. if (!in) {
  1053. err = -ENOMEM;
  1054. goto err_free;
  1055. }
  1056. in->seg.status = MLX5_MKEY_STATUS_FREE;
  1057. in->seg.xlt_oct_size = cpu_to_be32(ndescs);
  1058. in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
  1059. in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
  1060. if (mr_type == IB_MR_TYPE_MEM_REG) {
  1061. access_mode = MLX5_ACCESS_MODE_MTT;
  1062. in->seg.log2_page_size = PAGE_SHIFT;
  1063. } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
  1064. u32 psv_index[2];
  1065. in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
  1066. MLX5_MKEY_BSF_EN);
  1067. in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
  1068. mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
  1069. if (!mr->sig) {
  1070. err = -ENOMEM;
  1071. goto err_free_in;
  1072. }
  1073. /* create mem & wire PSVs */
  1074. err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
  1075. 2, psv_index);
  1076. if (err)
  1077. goto err_free_sig;
  1078. access_mode = MLX5_ACCESS_MODE_KLM;
  1079. mr->sig->psv_memory.psv_idx = psv_index[0];
  1080. mr->sig->psv_wire.psv_idx = psv_index[1];
  1081. mr->sig->sig_status_checked = true;
  1082. mr->sig->sig_err_exists = false;
  1083. /* Next UMR, Arm SIGERR */
  1084. ++mr->sig->sigerr_count;
  1085. } else {
  1086. mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
  1087. err = -EINVAL;
  1088. goto err_free_in;
  1089. }
  1090. in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
  1091. err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in),
  1092. NULL, NULL, NULL);
  1093. if (err)
  1094. goto err_destroy_psv;
  1095. mr->ibmr.lkey = mr->mmr.key;
  1096. mr->ibmr.rkey = mr->mmr.key;
  1097. mr->umem = NULL;
  1098. kfree(in);
  1099. return &mr->ibmr;
  1100. err_destroy_psv:
  1101. if (mr->sig) {
  1102. if (mlx5_core_destroy_psv(dev->mdev,
  1103. mr->sig->psv_memory.psv_idx))
  1104. mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
  1105. mr->sig->psv_memory.psv_idx);
  1106. if (mlx5_core_destroy_psv(dev->mdev,
  1107. mr->sig->psv_wire.psv_idx))
  1108. mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
  1109. mr->sig->psv_wire.psv_idx);
  1110. }
  1111. err_free_sig:
  1112. kfree(mr->sig);
  1113. err_free_in:
  1114. kfree(in);
  1115. err_free:
  1116. kfree(mr);
  1117. return ERR_PTR(err);
  1118. }
  1119. struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
  1120. int page_list_len)
  1121. {
  1122. struct mlx5_ib_fast_reg_page_list *mfrpl;
  1123. int size = page_list_len * sizeof(u64);
  1124. mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
  1125. if (!mfrpl)
  1126. return ERR_PTR(-ENOMEM);
  1127. mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
  1128. if (!mfrpl->ibfrpl.page_list)
  1129. goto err_free;
  1130. mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
  1131. size, &mfrpl->map,
  1132. GFP_KERNEL);
  1133. if (!mfrpl->mapped_page_list)
  1134. goto err_free;
  1135. WARN_ON(mfrpl->map & 0x3f);
  1136. return &mfrpl->ibfrpl;
  1137. err_free:
  1138. kfree(mfrpl->ibfrpl.page_list);
  1139. kfree(mfrpl);
  1140. return ERR_PTR(-ENOMEM);
  1141. }
  1142. void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
  1143. {
  1144. struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
  1145. struct mlx5_ib_dev *dev = to_mdev(page_list->device);
  1146. int size = page_list->max_page_list_len * sizeof(u64);
  1147. dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list,
  1148. mfrpl->map);
  1149. kfree(mfrpl->ibfrpl.page_list);
  1150. kfree(mfrpl);
  1151. }
  1152. int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
  1153. struct ib_mr_status *mr_status)
  1154. {
  1155. struct mlx5_ib_mr *mmr = to_mmr(ibmr);
  1156. int ret = 0;
  1157. if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
  1158. pr_err("Invalid status check mask\n");
  1159. ret = -EINVAL;
  1160. goto done;
  1161. }
  1162. mr_status->fail_status = 0;
  1163. if (check_mask & IB_MR_CHECK_SIG_STATUS) {
  1164. if (!mmr->sig) {
  1165. ret = -EINVAL;
  1166. pr_err("signature status check requested on a non-signature enabled MR\n");
  1167. goto done;
  1168. }
  1169. mmr->sig->sig_status_checked = true;
  1170. if (!mmr->sig->sig_err_exists)
  1171. goto done;
  1172. if (ibmr->lkey == mmr->sig->err_item.key)
  1173. memcpy(&mr_status->sig_err, &mmr->sig->err_item,
  1174. sizeof(mr_status->sig_err));
  1175. else {
  1176. mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
  1177. mr_status->sig_err.sig_err_offset = 0;
  1178. mr_status->sig_err.key = mmr->sig->err_item.key;
  1179. }
  1180. mmr->sig->sig_err_exists = false;
  1181. mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
  1182. }
  1183. done:
  1184. return ret;
  1185. }