mr.c 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990
  1. /*
  2. * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/kref.h>
  33. #include <linux/random.h>
  34. #include <linux/debugfs.h>
  35. #include <linux/export.h>
  36. #include <linux/delay.h>
  37. #include <rdma/ib_umem.h>
  38. #include <rdma/ib_umem_odp.h>
  39. #include <rdma/ib_verbs.h>
  40. #include "mlx5_ib.h"
  41. enum {
  42. MAX_PENDING_REG_MR = 8,
  43. };
  44. #define MLX5_UMR_ALIGN 2048
  45. static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  46. static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  47. static int mr_cache_max_order(struct mlx5_ib_dev *dev);
  48. static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  49. static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev)
  50. {
  51. return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled);
  52. }
  53. static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
  54. {
  55. return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
  56. }
  57. static bool use_umr(struct mlx5_ib_dev *dev, int order)
  58. {
  59. return order <= mr_cache_max_order(dev) &&
  60. umr_can_modify_entity_size(dev);
  61. }
  62. static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  63. {
  64. int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
  65. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  66. /* Wait until all page fault handlers using the mr complete. */
  67. synchronize_srcu(&dev->mr_srcu);
  68. #endif
  69. return err;
  70. }
  71. static int order2idx(struct mlx5_ib_dev *dev, int order)
  72. {
  73. struct mlx5_mr_cache *cache = &dev->cache;
  74. if (order < cache->ent[0].order)
  75. return 0;
  76. else
  77. return order - cache->ent[0].order;
  78. }
  79. static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
  80. {
  81. return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
  82. length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
  83. }
  84. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  85. static void update_odp_mr(struct mlx5_ib_mr *mr)
  86. {
  87. if (mr->umem->is_odp) {
  88. /*
  89. * This barrier prevents the compiler from moving the
  90. * setting of umem->odp_data->private to point to our
  91. * MR, before reg_umr finished, to ensure that the MR
  92. * initialization have finished before starting to
  93. * handle invalidations.
  94. */
  95. smp_wmb();
  96. to_ib_umem_odp(mr->umem)->private = mr;
  97. /*
  98. * Make sure we will see the new
  99. * umem->odp_data->private value in the invalidation
  100. * routines, before we can get page faults on the
  101. * MR. Page faults can happen once we put the MR in
  102. * the tree, below this line. Without the barrier,
  103. * there can be a fault handling and an invalidation
  104. * before umem->odp_data->private == mr is visible to
  105. * the invalidation handler.
  106. */
  107. smp_wmb();
  108. }
  109. }
  110. #endif
  111. static void reg_mr_callback(int status, void *context)
  112. {
  113. struct mlx5_ib_mr *mr = context;
  114. struct mlx5_ib_dev *dev = mr->dev;
  115. struct mlx5_mr_cache *cache = &dev->cache;
  116. int c = order2idx(dev, mr->order);
  117. struct mlx5_cache_ent *ent = &cache->ent[c];
  118. u8 key;
  119. unsigned long flags;
  120. struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
  121. int err;
  122. spin_lock_irqsave(&ent->lock, flags);
  123. ent->pending--;
  124. spin_unlock_irqrestore(&ent->lock, flags);
  125. if (status) {
  126. mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
  127. kfree(mr);
  128. dev->fill_delay = 1;
  129. mod_timer(&dev->delay_timer, jiffies + HZ);
  130. return;
  131. }
  132. mr->mmkey.type = MLX5_MKEY_MR;
  133. spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
  134. key = dev->mdev->priv.mkey_key++;
  135. spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
  136. mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
  137. cache->last_add = jiffies;
  138. spin_lock_irqsave(&ent->lock, flags);
  139. list_add_tail(&mr->list, &ent->head);
  140. ent->cur++;
  141. ent->size++;
  142. spin_unlock_irqrestore(&ent->lock, flags);
  143. write_lock_irqsave(&table->lock, flags);
  144. err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
  145. &mr->mmkey);
  146. if (err)
  147. pr_err("Error inserting to mkey tree. 0x%x\n", -err);
  148. write_unlock_irqrestore(&table->lock, flags);
  149. if (!completion_done(&ent->compl))
  150. complete(&ent->compl);
  151. }
  152. static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
  153. {
  154. struct mlx5_mr_cache *cache = &dev->cache;
  155. struct mlx5_cache_ent *ent = &cache->ent[c];
  156. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  157. struct mlx5_ib_mr *mr;
  158. void *mkc;
  159. u32 *in;
  160. int err = 0;
  161. int i;
  162. in = kzalloc(inlen, GFP_KERNEL);
  163. if (!in)
  164. return -ENOMEM;
  165. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  166. for (i = 0; i < num; i++) {
  167. if (ent->pending >= MAX_PENDING_REG_MR) {
  168. err = -EAGAIN;
  169. break;
  170. }
  171. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  172. if (!mr) {
  173. err = -ENOMEM;
  174. break;
  175. }
  176. mr->order = ent->order;
  177. mr->allocated_from_cache = 1;
  178. mr->dev = dev;
  179. MLX5_SET(mkc, mkc, free, 1);
  180. MLX5_SET(mkc, mkc, umr_en, 1);
  181. MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
  182. MLX5_SET(mkc, mkc, access_mode_4_2,
  183. (ent->access_mode >> 2) & 0x7);
  184. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  185. MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
  186. MLX5_SET(mkc, mkc, log_page_size, ent->page);
  187. spin_lock_irq(&ent->lock);
  188. ent->pending++;
  189. spin_unlock_irq(&ent->lock);
  190. err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
  191. in, inlen,
  192. mr->out, sizeof(mr->out),
  193. reg_mr_callback, mr);
  194. if (err) {
  195. spin_lock_irq(&ent->lock);
  196. ent->pending--;
  197. spin_unlock_irq(&ent->lock);
  198. mlx5_ib_warn(dev, "create mkey failed %d\n", err);
  199. kfree(mr);
  200. break;
  201. }
  202. }
  203. kfree(in);
  204. return err;
  205. }
  206. static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
  207. {
  208. struct mlx5_mr_cache *cache = &dev->cache;
  209. struct mlx5_cache_ent *ent = &cache->ent[c];
  210. struct mlx5_ib_mr *tmp_mr;
  211. struct mlx5_ib_mr *mr;
  212. LIST_HEAD(del_list);
  213. int i;
  214. for (i = 0; i < num; i++) {
  215. spin_lock_irq(&ent->lock);
  216. if (list_empty(&ent->head)) {
  217. spin_unlock_irq(&ent->lock);
  218. break;
  219. }
  220. mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
  221. list_move(&mr->list, &del_list);
  222. ent->cur--;
  223. ent->size--;
  224. spin_unlock_irq(&ent->lock);
  225. mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
  226. }
  227. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  228. synchronize_srcu(&dev->mr_srcu);
  229. #endif
  230. list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
  231. list_del(&mr->list);
  232. kfree(mr);
  233. }
  234. }
  235. static ssize_t size_write(struct file *filp, const char __user *buf,
  236. size_t count, loff_t *pos)
  237. {
  238. struct mlx5_cache_ent *ent = filp->private_data;
  239. struct mlx5_ib_dev *dev = ent->dev;
  240. char lbuf[20] = {0};
  241. u32 var;
  242. int err;
  243. int c;
  244. count = min(count, sizeof(lbuf) - 1);
  245. if (copy_from_user(lbuf, buf, count))
  246. return -EFAULT;
  247. c = order2idx(dev, ent->order);
  248. if (sscanf(lbuf, "%u", &var) != 1)
  249. return -EINVAL;
  250. if (var < ent->limit)
  251. return -EINVAL;
  252. if (var > ent->size) {
  253. do {
  254. err = add_keys(dev, c, var - ent->size);
  255. if (err && err != -EAGAIN)
  256. return err;
  257. usleep_range(3000, 5000);
  258. } while (err);
  259. } else if (var < ent->size) {
  260. remove_keys(dev, c, ent->size - var);
  261. }
  262. return count;
  263. }
  264. static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
  265. loff_t *pos)
  266. {
  267. struct mlx5_cache_ent *ent = filp->private_data;
  268. char lbuf[20];
  269. int err;
  270. err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
  271. if (err < 0)
  272. return err;
  273. return simple_read_from_buffer(buf, count, pos, lbuf, err);
  274. }
  275. static const struct file_operations size_fops = {
  276. .owner = THIS_MODULE,
  277. .open = simple_open,
  278. .write = size_write,
  279. .read = size_read,
  280. };
  281. static ssize_t limit_write(struct file *filp, const char __user *buf,
  282. size_t count, loff_t *pos)
  283. {
  284. struct mlx5_cache_ent *ent = filp->private_data;
  285. struct mlx5_ib_dev *dev = ent->dev;
  286. char lbuf[20] = {0};
  287. u32 var;
  288. int err;
  289. int c;
  290. count = min(count, sizeof(lbuf) - 1);
  291. if (copy_from_user(lbuf, buf, count))
  292. return -EFAULT;
  293. c = order2idx(dev, ent->order);
  294. if (sscanf(lbuf, "%u", &var) != 1)
  295. return -EINVAL;
  296. if (var > ent->size)
  297. return -EINVAL;
  298. ent->limit = var;
  299. if (ent->cur < ent->limit) {
  300. err = add_keys(dev, c, 2 * ent->limit - ent->cur);
  301. if (err)
  302. return err;
  303. }
  304. return count;
  305. }
  306. static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
  307. loff_t *pos)
  308. {
  309. struct mlx5_cache_ent *ent = filp->private_data;
  310. char lbuf[20];
  311. int err;
  312. err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
  313. if (err < 0)
  314. return err;
  315. return simple_read_from_buffer(buf, count, pos, lbuf, err);
  316. }
  317. static const struct file_operations limit_fops = {
  318. .owner = THIS_MODULE,
  319. .open = simple_open,
  320. .write = limit_write,
  321. .read = limit_read,
  322. };
  323. static int someone_adding(struct mlx5_mr_cache *cache)
  324. {
  325. int i;
  326. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  327. if (cache->ent[i].cur < cache->ent[i].limit)
  328. return 1;
  329. }
  330. return 0;
  331. }
  332. static void __cache_work_func(struct mlx5_cache_ent *ent)
  333. {
  334. struct mlx5_ib_dev *dev = ent->dev;
  335. struct mlx5_mr_cache *cache = &dev->cache;
  336. int i = order2idx(dev, ent->order);
  337. int err;
  338. if (cache->stopped)
  339. return;
  340. ent = &dev->cache.ent[i];
  341. if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
  342. err = add_keys(dev, i, 1);
  343. if (ent->cur < 2 * ent->limit) {
  344. if (err == -EAGAIN) {
  345. mlx5_ib_dbg(dev, "returned eagain, order %d\n",
  346. i + 2);
  347. queue_delayed_work(cache->wq, &ent->dwork,
  348. msecs_to_jiffies(3));
  349. } else if (err) {
  350. mlx5_ib_warn(dev, "command failed order %d, err %d\n",
  351. i + 2, err);
  352. queue_delayed_work(cache->wq, &ent->dwork,
  353. msecs_to_jiffies(1000));
  354. } else {
  355. queue_work(cache->wq, &ent->work);
  356. }
  357. }
  358. } else if (ent->cur > 2 * ent->limit) {
  359. /*
  360. * The remove_keys() logic is performed as garbage collection
  361. * task. Such task is intended to be run when no other active
  362. * processes are running.
  363. *
  364. * The need_resched() will return TRUE if there are user tasks
  365. * to be activated in near future.
  366. *
  367. * In such case, we don't execute remove_keys() and postpone
  368. * the garbage collection work to try to run in next cycle,
  369. * in order to free CPU resources to other tasks.
  370. */
  371. if (!need_resched() && !someone_adding(cache) &&
  372. time_after(jiffies, cache->last_add + 300 * HZ)) {
  373. remove_keys(dev, i, 1);
  374. if (ent->cur > ent->limit)
  375. queue_work(cache->wq, &ent->work);
  376. } else {
  377. queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
  378. }
  379. }
  380. }
  381. static void delayed_cache_work_func(struct work_struct *work)
  382. {
  383. struct mlx5_cache_ent *ent;
  384. ent = container_of(work, struct mlx5_cache_ent, dwork.work);
  385. __cache_work_func(ent);
  386. }
  387. static void cache_work_func(struct work_struct *work)
  388. {
  389. struct mlx5_cache_ent *ent;
  390. ent = container_of(work, struct mlx5_cache_ent, work);
  391. __cache_work_func(ent);
  392. }
  393. struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
  394. {
  395. struct mlx5_mr_cache *cache = &dev->cache;
  396. struct mlx5_cache_ent *ent;
  397. struct mlx5_ib_mr *mr;
  398. int err;
  399. if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
  400. mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
  401. return NULL;
  402. }
  403. ent = &cache->ent[entry];
  404. while (1) {
  405. spin_lock_irq(&ent->lock);
  406. if (list_empty(&ent->head)) {
  407. spin_unlock_irq(&ent->lock);
  408. err = add_keys(dev, entry, 1);
  409. if (err && err != -EAGAIN)
  410. return ERR_PTR(err);
  411. wait_for_completion(&ent->compl);
  412. } else {
  413. mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
  414. list);
  415. list_del(&mr->list);
  416. ent->cur--;
  417. spin_unlock_irq(&ent->lock);
  418. if (ent->cur < ent->limit)
  419. queue_work(cache->wq, &ent->work);
  420. return mr;
  421. }
  422. }
  423. }
  424. static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
  425. {
  426. struct mlx5_mr_cache *cache = &dev->cache;
  427. struct mlx5_ib_mr *mr = NULL;
  428. struct mlx5_cache_ent *ent;
  429. int last_umr_cache_entry;
  430. int c;
  431. int i;
  432. c = order2idx(dev, order);
  433. last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
  434. if (c < 0 || c > last_umr_cache_entry) {
  435. mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
  436. return NULL;
  437. }
  438. for (i = c; i <= last_umr_cache_entry; i++) {
  439. ent = &cache->ent[i];
  440. mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
  441. spin_lock_irq(&ent->lock);
  442. if (!list_empty(&ent->head)) {
  443. mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
  444. list);
  445. list_del(&mr->list);
  446. ent->cur--;
  447. spin_unlock_irq(&ent->lock);
  448. if (ent->cur < ent->limit)
  449. queue_work(cache->wq, &ent->work);
  450. break;
  451. }
  452. spin_unlock_irq(&ent->lock);
  453. queue_work(cache->wq, &ent->work);
  454. }
  455. if (!mr)
  456. cache->ent[c].miss++;
  457. return mr;
  458. }
  459. void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  460. {
  461. struct mlx5_mr_cache *cache = &dev->cache;
  462. struct mlx5_cache_ent *ent;
  463. int shrink = 0;
  464. int c;
  465. if (!mr->allocated_from_cache)
  466. return;
  467. c = order2idx(dev, mr->order);
  468. if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
  469. mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
  470. return;
  471. }
  472. if (unreg_umr(dev, mr))
  473. return;
  474. ent = &cache->ent[c];
  475. spin_lock_irq(&ent->lock);
  476. list_add_tail(&mr->list, &ent->head);
  477. ent->cur++;
  478. if (ent->cur > 2 * ent->limit)
  479. shrink = 1;
  480. spin_unlock_irq(&ent->lock);
  481. if (shrink)
  482. queue_work(cache->wq, &ent->work);
  483. }
  484. static void clean_keys(struct mlx5_ib_dev *dev, int c)
  485. {
  486. struct mlx5_mr_cache *cache = &dev->cache;
  487. struct mlx5_cache_ent *ent = &cache->ent[c];
  488. struct mlx5_ib_mr *tmp_mr;
  489. struct mlx5_ib_mr *mr;
  490. LIST_HEAD(del_list);
  491. cancel_delayed_work(&ent->dwork);
  492. while (1) {
  493. spin_lock_irq(&ent->lock);
  494. if (list_empty(&ent->head)) {
  495. spin_unlock_irq(&ent->lock);
  496. break;
  497. }
  498. mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
  499. list_move(&mr->list, &del_list);
  500. ent->cur--;
  501. ent->size--;
  502. spin_unlock_irq(&ent->lock);
  503. mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
  504. }
  505. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  506. synchronize_srcu(&dev->mr_srcu);
  507. #endif
  508. list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
  509. list_del(&mr->list);
  510. kfree(mr);
  511. }
  512. }
  513. static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
  514. {
  515. if (!mlx5_debugfs_root || dev->rep)
  516. return;
  517. debugfs_remove_recursive(dev->cache.root);
  518. dev->cache.root = NULL;
  519. }
  520. static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
  521. {
  522. struct mlx5_mr_cache *cache = &dev->cache;
  523. struct mlx5_cache_ent *ent;
  524. int i;
  525. if (!mlx5_debugfs_root || dev->rep)
  526. return 0;
  527. cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
  528. if (!cache->root)
  529. return -ENOMEM;
  530. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  531. ent = &cache->ent[i];
  532. sprintf(ent->name, "%d", ent->order);
  533. ent->dir = debugfs_create_dir(ent->name, cache->root);
  534. if (!ent->dir)
  535. goto err;
  536. ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
  537. &size_fops);
  538. if (!ent->fsize)
  539. goto err;
  540. ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
  541. &limit_fops);
  542. if (!ent->flimit)
  543. goto err;
  544. ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
  545. &ent->cur);
  546. if (!ent->fcur)
  547. goto err;
  548. ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
  549. &ent->miss);
  550. if (!ent->fmiss)
  551. goto err;
  552. }
  553. return 0;
  554. err:
  555. mlx5_mr_cache_debugfs_cleanup(dev);
  556. return -ENOMEM;
  557. }
  558. static void delay_time_func(struct timer_list *t)
  559. {
  560. struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
  561. dev->fill_delay = 0;
  562. }
  563. int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
  564. {
  565. struct mlx5_mr_cache *cache = &dev->cache;
  566. struct mlx5_cache_ent *ent;
  567. int err;
  568. int i;
  569. mutex_init(&dev->slow_path_mutex);
  570. cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
  571. if (!cache->wq) {
  572. mlx5_ib_warn(dev, "failed to create work queue\n");
  573. return -ENOMEM;
  574. }
  575. timer_setup(&dev->delay_timer, delay_time_func, 0);
  576. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  577. ent = &cache->ent[i];
  578. INIT_LIST_HEAD(&ent->head);
  579. spin_lock_init(&ent->lock);
  580. ent->order = i + 2;
  581. ent->dev = dev;
  582. ent->limit = 0;
  583. init_completion(&ent->compl);
  584. INIT_WORK(&ent->work, cache_work_func);
  585. INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
  586. if (i > MR_CACHE_LAST_STD_ENTRY) {
  587. mlx5_odp_init_mr_cache_entry(ent);
  588. continue;
  589. }
  590. if (ent->order > mr_cache_max_order(dev))
  591. continue;
  592. ent->page = PAGE_SHIFT;
  593. ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
  594. MLX5_IB_UMR_OCTOWORD;
  595. ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
  596. if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
  597. !dev->rep &&
  598. mlx5_core_is_pf(dev->mdev))
  599. ent->limit = dev->mdev->profile->mr_cache[i].limit;
  600. else
  601. ent->limit = 0;
  602. queue_work(cache->wq, &ent->work);
  603. }
  604. err = mlx5_mr_cache_debugfs_init(dev);
  605. if (err)
  606. mlx5_ib_warn(dev, "cache debugfs failure\n");
  607. /*
  608. * We don't want to fail driver if debugfs failed to initialize,
  609. * so we are not forwarding error to the user.
  610. */
  611. return 0;
  612. }
  613. static void wait_for_async_commands(struct mlx5_ib_dev *dev)
  614. {
  615. struct mlx5_mr_cache *cache = &dev->cache;
  616. struct mlx5_cache_ent *ent;
  617. int total = 0;
  618. int i;
  619. int j;
  620. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  621. ent = &cache->ent[i];
  622. for (j = 0 ; j < 1000; j++) {
  623. if (!ent->pending)
  624. break;
  625. msleep(50);
  626. }
  627. }
  628. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  629. ent = &cache->ent[i];
  630. total += ent->pending;
  631. }
  632. if (total)
  633. mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
  634. else
  635. mlx5_ib_warn(dev, "done with all pending requests\n");
  636. }
  637. int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
  638. {
  639. int i;
  640. if (!dev->cache.wq)
  641. return 0;
  642. dev->cache.stopped = 1;
  643. flush_workqueue(dev->cache.wq);
  644. mlx5_mr_cache_debugfs_cleanup(dev);
  645. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
  646. clean_keys(dev, i);
  647. destroy_workqueue(dev->cache.wq);
  648. wait_for_async_commands(dev);
  649. del_timer_sync(&dev->delay_timer);
  650. return 0;
  651. }
  652. struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
  653. {
  654. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  655. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  656. struct mlx5_core_dev *mdev = dev->mdev;
  657. struct mlx5_ib_mr *mr;
  658. void *mkc;
  659. u32 *in;
  660. int err;
  661. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  662. if (!mr)
  663. return ERR_PTR(-ENOMEM);
  664. in = kzalloc(inlen, GFP_KERNEL);
  665. if (!in) {
  666. err = -ENOMEM;
  667. goto err_free;
  668. }
  669. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  670. MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
  671. MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
  672. MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
  673. MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
  674. MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
  675. MLX5_SET(mkc, mkc, lr, 1);
  676. MLX5_SET(mkc, mkc, length64, 1);
  677. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  678. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  679. MLX5_SET64(mkc, mkc, start_addr, 0);
  680. err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
  681. if (err)
  682. goto err_in;
  683. kfree(in);
  684. mr->mmkey.type = MLX5_MKEY_MR;
  685. mr->ibmr.lkey = mr->mmkey.key;
  686. mr->ibmr.rkey = mr->mmkey.key;
  687. mr->umem = NULL;
  688. return &mr->ibmr;
  689. err_in:
  690. kfree(in);
  691. err_free:
  692. kfree(mr);
  693. return ERR_PTR(err);
  694. }
  695. static int get_octo_len(u64 addr, u64 len, int page_shift)
  696. {
  697. u64 page_size = 1ULL << page_shift;
  698. u64 offset;
  699. int npages;
  700. offset = addr & (page_size - 1);
  701. npages = ALIGN(len + offset, page_size) >> page_shift;
  702. return (npages + 1) / 2;
  703. }
  704. static int mr_cache_max_order(struct mlx5_ib_dev *dev)
  705. {
  706. if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
  707. return MR_CACHE_LAST_STD_ENTRY + 2;
  708. return MLX5_MAX_UMR_SHIFT;
  709. }
  710. static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
  711. int access_flags, struct ib_umem **umem,
  712. int *npages, int *page_shift, int *ncont,
  713. int *order)
  714. {
  715. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  716. struct ib_umem *u;
  717. int err;
  718. *umem = NULL;
  719. u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0);
  720. err = PTR_ERR_OR_ZERO(u);
  721. if (err) {
  722. mlx5_ib_dbg(dev, "umem get failed (%d)\n", err);
  723. return err;
  724. }
  725. mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
  726. page_shift, ncont, order);
  727. if (!*npages) {
  728. mlx5_ib_warn(dev, "avoid zero region\n");
  729. ib_umem_release(u);
  730. return -EINVAL;
  731. }
  732. *umem = u;
  733. mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
  734. *npages, *ncont, *order, *page_shift);
  735. return 0;
  736. }
  737. static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
  738. {
  739. struct mlx5_ib_umr_context *context =
  740. container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
  741. context->status = wc->status;
  742. complete(&context->done);
  743. }
  744. static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
  745. {
  746. context->cqe.done = mlx5_ib_umr_done;
  747. context->status = -1;
  748. init_completion(&context->done);
  749. }
  750. static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
  751. struct mlx5_umr_wr *umrwr)
  752. {
  753. struct umr_common *umrc = &dev->umrc;
  754. const struct ib_send_wr *bad;
  755. int err;
  756. struct mlx5_ib_umr_context umr_context;
  757. mlx5_ib_init_umr_context(&umr_context);
  758. umrwr->wr.wr_cqe = &umr_context.cqe;
  759. down(&umrc->sem);
  760. err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
  761. if (err) {
  762. mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
  763. } else {
  764. wait_for_completion(&umr_context.done);
  765. if (umr_context.status != IB_WC_SUCCESS) {
  766. mlx5_ib_warn(dev, "reg umr failed (%u)\n",
  767. umr_context.status);
  768. err = -EFAULT;
  769. }
  770. }
  771. up(&umrc->sem);
  772. return err;
  773. }
  774. static struct mlx5_ib_mr *alloc_mr_from_cache(
  775. struct ib_pd *pd, struct ib_umem *umem,
  776. u64 virt_addr, u64 len, int npages,
  777. int page_shift, int order, int access_flags)
  778. {
  779. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  780. struct mlx5_ib_mr *mr;
  781. int err = 0;
  782. int i;
  783. for (i = 0; i < 1; i++) {
  784. mr = alloc_cached_mr(dev, order);
  785. if (mr)
  786. break;
  787. err = add_keys(dev, order2idx(dev, order), 1);
  788. if (err && err != -EAGAIN) {
  789. mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
  790. break;
  791. }
  792. }
  793. if (!mr)
  794. return ERR_PTR(-EAGAIN);
  795. mr->ibmr.pd = pd;
  796. mr->umem = umem;
  797. mr->access_flags = access_flags;
  798. mr->desc_size = sizeof(struct mlx5_mtt);
  799. mr->mmkey.iova = virt_addr;
  800. mr->mmkey.size = len;
  801. mr->mmkey.pd = to_mpd(pd)->pdn;
  802. return mr;
  803. }
  804. static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
  805. void *xlt, int page_shift, size_t size,
  806. int flags)
  807. {
  808. struct mlx5_ib_dev *dev = mr->dev;
  809. struct ib_umem *umem = mr->umem;
  810. if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
  811. if (!umr_can_use_indirect_mkey(dev))
  812. return -EPERM;
  813. mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
  814. return npages;
  815. }
  816. npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
  817. if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
  818. __mlx5_ib_populate_pas(dev, umem, page_shift,
  819. idx, npages, xlt,
  820. MLX5_IB_MTT_PRESENT);
  821. /* Clear padding after the pages
  822. * brought from the umem.
  823. */
  824. memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
  825. size - npages * sizeof(struct mlx5_mtt));
  826. }
  827. return npages;
  828. }
  829. #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
  830. MLX5_UMR_MTT_ALIGNMENT)
  831. #define MLX5_SPARE_UMR_CHUNK 0x10000
  832. int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
  833. int page_shift, int flags)
  834. {
  835. struct mlx5_ib_dev *dev = mr->dev;
  836. struct device *ddev = dev->ib_dev.dev.parent;
  837. int size;
  838. void *xlt;
  839. dma_addr_t dma;
  840. struct mlx5_umr_wr wr;
  841. struct ib_sge sg;
  842. int err = 0;
  843. int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
  844. ? sizeof(struct mlx5_klm)
  845. : sizeof(struct mlx5_mtt);
  846. const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
  847. const int page_mask = page_align - 1;
  848. size_t pages_mapped = 0;
  849. size_t pages_to_map = 0;
  850. size_t pages_iter = 0;
  851. gfp_t gfp;
  852. bool use_emergency_page = false;
  853. if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
  854. !umr_can_use_indirect_mkey(dev))
  855. return -EPERM;
  856. /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
  857. * so we need to align the offset and length accordingly
  858. */
  859. if (idx & page_mask) {
  860. npages += idx & page_mask;
  861. idx &= ~page_mask;
  862. }
  863. gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
  864. gfp |= __GFP_ZERO | __GFP_NOWARN;
  865. pages_to_map = ALIGN(npages, page_align);
  866. size = desc_size * pages_to_map;
  867. size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
  868. xlt = (void *)__get_free_pages(gfp, get_order(size));
  869. if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
  870. mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
  871. size, get_order(size), MLX5_SPARE_UMR_CHUNK);
  872. size = MLX5_SPARE_UMR_CHUNK;
  873. xlt = (void *)__get_free_pages(gfp, get_order(size));
  874. }
  875. if (!xlt) {
  876. mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
  877. xlt = (void *)mlx5_ib_get_xlt_emergency_page();
  878. size = PAGE_SIZE;
  879. memset(xlt, 0, size);
  880. use_emergency_page = true;
  881. }
  882. pages_iter = size / desc_size;
  883. dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
  884. if (dma_mapping_error(ddev, dma)) {
  885. mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
  886. err = -ENOMEM;
  887. goto free_xlt;
  888. }
  889. sg.addr = dma;
  890. sg.lkey = dev->umrc.pd->local_dma_lkey;
  891. memset(&wr, 0, sizeof(wr));
  892. wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
  893. if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
  894. wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  895. wr.wr.sg_list = &sg;
  896. wr.wr.num_sge = 1;
  897. wr.wr.opcode = MLX5_IB_WR_UMR;
  898. wr.pd = mr->ibmr.pd;
  899. wr.mkey = mr->mmkey.key;
  900. wr.length = mr->mmkey.size;
  901. wr.virt_addr = mr->mmkey.iova;
  902. wr.access_flags = mr->access_flags;
  903. wr.page_shift = page_shift;
  904. for (pages_mapped = 0;
  905. pages_mapped < pages_to_map && !err;
  906. pages_mapped += pages_iter, idx += pages_iter) {
  907. npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
  908. dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
  909. npages = populate_xlt(mr, idx, npages, xlt,
  910. page_shift, size, flags);
  911. dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
  912. sg.length = ALIGN(npages * desc_size,
  913. MLX5_UMR_MTT_ALIGNMENT);
  914. if (pages_mapped + pages_iter >= pages_to_map) {
  915. if (flags & MLX5_IB_UPD_XLT_ENABLE)
  916. wr.wr.send_flags |=
  917. MLX5_IB_SEND_UMR_ENABLE_MR |
  918. MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
  919. MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
  920. if (flags & MLX5_IB_UPD_XLT_PD ||
  921. flags & MLX5_IB_UPD_XLT_ACCESS)
  922. wr.wr.send_flags |=
  923. MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
  924. if (flags & MLX5_IB_UPD_XLT_ADDR)
  925. wr.wr.send_flags |=
  926. MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
  927. }
  928. wr.offset = idx * desc_size;
  929. wr.xlt_size = sg.length;
  930. err = mlx5_ib_post_send_wait(dev, &wr);
  931. }
  932. dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
  933. free_xlt:
  934. if (use_emergency_page)
  935. mlx5_ib_put_xlt_emergency_page();
  936. else
  937. free_pages((unsigned long)xlt, get_order(size));
  938. return err;
  939. }
  940. /*
  941. * If ibmr is NULL it will be allocated by reg_create.
  942. * Else, the given ibmr will be used.
  943. */
  944. static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
  945. u64 virt_addr, u64 length,
  946. struct ib_umem *umem, int npages,
  947. int page_shift, int access_flags,
  948. bool populate)
  949. {
  950. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  951. struct mlx5_ib_mr *mr;
  952. __be64 *pas;
  953. void *mkc;
  954. int inlen;
  955. u32 *in;
  956. int err;
  957. bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
  958. mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
  959. if (!mr)
  960. return ERR_PTR(-ENOMEM);
  961. mr->ibmr.pd = pd;
  962. mr->access_flags = access_flags;
  963. inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  964. if (populate)
  965. inlen += sizeof(*pas) * roundup(npages, 2);
  966. in = kvzalloc(inlen, GFP_KERNEL);
  967. if (!in) {
  968. err = -ENOMEM;
  969. goto err_1;
  970. }
  971. pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
  972. if (populate && !(access_flags & IB_ACCESS_ON_DEMAND))
  973. mlx5_ib_populate_pas(dev, umem, page_shift, pas,
  974. pg_cap ? MLX5_IB_MTT_PRESENT : 0);
  975. /* The pg_access bit allows setting the access flags
  976. * in the page list submitted with the command. */
  977. MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
  978. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  979. MLX5_SET(mkc, mkc, free, !populate);
  980. MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
  981. MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
  982. MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
  983. MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
  984. MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
  985. MLX5_SET(mkc, mkc, lr, 1);
  986. MLX5_SET(mkc, mkc, umr_en, 1);
  987. MLX5_SET64(mkc, mkc, start_addr, virt_addr);
  988. MLX5_SET64(mkc, mkc, len, length);
  989. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  990. MLX5_SET(mkc, mkc, bsf_octword_size, 0);
  991. MLX5_SET(mkc, mkc, translations_octword_size,
  992. get_octo_len(virt_addr, length, page_shift));
  993. MLX5_SET(mkc, mkc, log_page_size, page_shift);
  994. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  995. if (populate) {
  996. MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
  997. get_octo_len(virt_addr, length, page_shift));
  998. }
  999. err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
  1000. if (err) {
  1001. mlx5_ib_warn(dev, "create mkey failed\n");
  1002. goto err_2;
  1003. }
  1004. mr->mmkey.type = MLX5_MKEY_MR;
  1005. mr->desc_size = sizeof(struct mlx5_mtt);
  1006. mr->dev = dev;
  1007. kvfree(in);
  1008. mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
  1009. return mr;
  1010. err_2:
  1011. kvfree(in);
  1012. err_1:
  1013. if (!ibmr)
  1014. kfree(mr);
  1015. return ERR_PTR(err);
  1016. }
  1017. static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
  1018. int npages, u64 length, int access_flags)
  1019. {
  1020. mr->npages = npages;
  1021. atomic_add(npages, &dev->mdev->priv.reg_pages);
  1022. mr->ibmr.lkey = mr->mmkey.key;
  1023. mr->ibmr.rkey = mr->mmkey.key;
  1024. mr->ibmr.length = length;
  1025. mr->access_flags = access_flags;
  1026. }
  1027. static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr,
  1028. u64 length, int acc)
  1029. {
  1030. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1031. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  1032. struct mlx5_core_dev *mdev = dev->mdev;
  1033. struct mlx5_ib_mr *mr;
  1034. void *mkc;
  1035. u32 *in;
  1036. int err;
  1037. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  1038. if (!mr)
  1039. return ERR_PTR(-ENOMEM);
  1040. in = kzalloc(inlen, GFP_KERNEL);
  1041. if (!in) {
  1042. err = -ENOMEM;
  1043. goto err_free;
  1044. }
  1045. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  1046. MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MEMIC & 0x3);
  1047. MLX5_SET(mkc, mkc, access_mode_4_2,
  1048. (MLX5_MKC_ACCESS_MODE_MEMIC >> 2) & 0x7);
  1049. MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
  1050. MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
  1051. MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
  1052. MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
  1053. MLX5_SET(mkc, mkc, lr, 1);
  1054. MLX5_SET64(mkc, mkc, len, length);
  1055. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  1056. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  1057. MLX5_SET64(mkc, mkc, start_addr,
  1058. memic_addr - pci_resource_start(dev->mdev->pdev, 0));
  1059. err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
  1060. if (err)
  1061. goto err_in;
  1062. kfree(in);
  1063. mr->umem = NULL;
  1064. set_mr_fileds(dev, mr, 0, length, acc);
  1065. return &mr->ibmr;
  1066. err_in:
  1067. kfree(in);
  1068. err_free:
  1069. kfree(mr);
  1070. return ERR_PTR(err);
  1071. }
  1072. struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
  1073. struct ib_dm_mr_attr *attr,
  1074. struct uverbs_attr_bundle *attrs)
  1075. {
  1076. struct mlx5_ib_dm *mdm = to_mdm(dm);
  1077. u64 memic_addr;
  1078. if (attr->access_flags & ~MLX5_IB_DM_ALLOWED_ACCESS)
  1079. return ERR_PTR(-EINVAL);
  1080. memic_addr = mdm->dev_addr + attr->offset;
  1081. return mlx5_ib_get_memic_mr(pd, memic_addr, attr->length,
  1082. attr->access_flags);
  1083. }
  1084. struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
  1085. u64 virt_addr, int access_flags,
  1086. struct ib_udata *udata)
  1087. {
  1088. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1089. struct mlx5_ib_mr *mr = NULL;
  1090. bool populate_mtts = false;
  1091. struct ib_umem *umem;
  1092. int page_shift;
  1093. int npages;
  1094. int ncont;
  1095. int order;
  1096. int err;
  1097. if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
  1098. return ERR_PTR(-EOPNOTSUPP);
  1099. mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
  1100. start, virt_addr, length, access_flags);
  1101. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1102. if (!start && length == U64_MAX) {
  1103. if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
  1104. !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
  1105. return ERR_PTR(-EINVAL);
  1106. mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
  1107. if (IS_ERR(mr))
  1108. return ERR_CAST(mr);
  1109. return &mr->ibmr;
  1110. }
  1111. #endif
  1112. err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
  1113. &page_shift, &ncont, &order);
  1114. if (err < 0)
  1115. return ERR_PTR(err);
  1116. if (use_umr(dev, order)) {
  1117. mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
  1118. page_shift, order, access_flags);
  1119. if (PTR_ERR(mr) == -EAGAIN) {
  1120. mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
  1121. mr = NULL;
  1122. }
  1123. populate_mtts = false;
  1124. } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
  1125. if (access_flags & IB_ACCESS_ON_DEMAND) {
  1126. err = -EINVAL;
  1127. pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
  1128. goto error;
  1129. }
  1130. populate_mtts = true;
  1131. }
  1132. if (!mr) {
  1133. if (!umr_can_modify_entity_size(dev))
  1134. populate_mtts = true;
  1135. mutex_lock(&dev->slow_path_mutex);
  1136. mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
  1137. page_shift, access_flags, populate_mtts);
  1138. mutex_unlock(&dev->slow_path_mutex);
  1139. }
  1140. if (IS_ERR(mr)) {
  1141. err = PTR_ERR(mr);
  1142. goto error;
  1143. }
  1144. mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
  1145. mr->umem = umem;
  1146. set_mr_fileds(dev, mr, npages, length, access_flags);
  1147. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1148. update_odp_mr(mr);
  1149. #endif
  1150. if (!populate_mtts) {
  1151. int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
  1152. if (access_flags & IB_ACCESS_ON_DEMAND)
  1153. update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP;
  1154. err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
  1155. update_xlt_flags);
  1156. if (err) {
  1157. dereg_mr(dev, mr);
  1158. return ERR_PTR(err);
  1159. }
  1160. }
  1161. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1162. mr->live = 1;
  1163. #endif
  1164. return &mr->ibmr;
  1165. error:
  1166. ib_umem_release(umem);
  1167. return ERR_PTR(err);
  1168. }
  1169. static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  1170. {
  1171. struct mlx5_core_dev *mdev = dev->mdev;
  1172. struct mlx5_umr_wr umrwr = {};
  1173. if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
  1174. return 0;
  1175. umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
  1176. MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  1177. umrwr.wr.opcode = MLX5_IB_WR_UMR;
  1178. umrwr.mkey = mr->mmkey.key;
  1179. return mlx5_ib_post_send_wait(dev, &umrwr);
  1180. }
  1181. static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
  1182. int access_flags, int flags)
  1183. {
  1184. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1185. struct mlx5_umr_wr umrwr = {};
  1186. int err;
  1187. umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  1188. umrwr.wr.opcode = MLX5_IB_WR_UMR;
  1189. umrwr.mkey = mr->mmkey.key;
  1190. if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
  1191. umrwr.pd = pd;
  1192. umrwr.access_flags = access_flags;
  1193. umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
  1194. }
  1195. err = mlx5_ib_post_send_wait(dev, &umrwr);
  1196. return err;
  1197. }
  1198. int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
  1199. u64 length, u64 virt_addr, int new_access_flags,
  1200. struct ib_pd *new_pd, struct ib_udata *udata)
  1201. {
  1202. struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
  1203. struct mlx5_ib_mr *mr = to_mmr(ib_mr);
  1204. struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
  1205. int access_flags = flags & IB_MR_REREG_ACCESS ?
  1206. new_access_flags :
  1207. mr->access_flags;
  1208. int page_shift = 0;
  1209. int upd_flags = 0;
  1210. int npages = 0;
  1211. int ncont = 0;
  1212. int order = 0;
  1213. u64 addr, len;
  1214. int err;
  1215. mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
  1216. start, virt_addr, length, access_flags);
  1217. atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
  1218. if (!mr->umem)
  1219. return -EINVAL;
  1220. if (flags & IB_MR_REREG_TRANS) {
  1221. addr = virt_addr;
  1222. len = length;
  1223. } else {
  1224. addr = mr->umem->address;
  1225. len = mr->umem->length;
  1226. }
  1227. if (flags != IB_MR_REREG_PD) {
  1228. /*
  1229. * Replace umem. This needs to be done whether or not UMR is
  1230. * used.
  1231. */
  1232. flags |= IB_MR_REREG_TRANS;
  1233. ib_umem_release(mr->umem);
  1234. mr->umem = NULL;
  1235. err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
  1236. &npages, &page_shift, &ncont, &order);
  1237. if (err)
  1238. goto err;
  1239. }
  1240. if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
  1241. /*
  1242. * UMR can't be used - MKey needs to be replaced.
  1243. */
  1244. if (mr->allocated_from_cache)
  1245. err = unreg_umr(dev, mr);
  1246. else
  1247. err = destroy_mkey(dev, mr);
  1248. if (err)
  1249. goto err;
  1250. mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
  1251. page_shift, access_flags, true);
  1252. if (IS_ERR(mr)) {
  1253. err = PTR_ERR(mr);
  1254. mr = to_mmr(ib_mr);
  1255. goto err;
  1256. }
  1257. mr->allocated_from_cache = 0;
  1258. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1259. mr->live = 1;
  1260. #endif
  1261. } else {
  1262. /*
  1263. * Send a UMR WQE
  1264. */
  1265. mr->ibmr.pd = pd;
  1266. mr->access_flags = access_flags;
  1267. mr->mmkey.iova = addr;
  1268. mr->mmkey.size = len;
  1269. mr->mmkey.pd = to_mpd(pd)->pdn;
  1270. if (flags & IB_MR_REREG_TRANS) {
  1271. upd_flags = MLX5_IB_UPD_XLT_ADDR;
  1272. if (flags & IB_MR_REREG_PD)
  1273. upd_flags |= MLX5_IB_UPD_XLT_PD;
  1274. if (flags & IB_MR_REREG_ACCESS)
  1275. upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
  1276. err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
  1277. upd_flags);
  1278. } else {
  1279. err = rereg_umr(pd, mr, access_flags, flags);
  1280. }
  1281. if (err)
  1282. goto err;
  1283. }
  1284. set_mr_fileds(dev, mr, npages, len, access_flags);
  1285. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1286. update_odp_mr(mr);
  1287. #endif
  1288. return 0;
  1289. err:
  1290. if (mr->umem) {
  1291. ib_umem_release(mr->umem);
  1292. mr->umem = NULL;
  1293. }
  1294. clean_mr(dev, mr);
  1295. return err;
  1296. }
  1297. static int
  1298. mlx5_alloc_priv_descs(struct ib_device *device,
  1299. struct mlx5_ib_mr *mr,
  1300. int ndescs,
  1301. int desc_size)
  1302. {
  1303. int size = ndescs * desc_size;
  1304. int add_size;
  1305. int ret;
  1306. add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
  1307. mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
  1308. if (!mr->descs_alloc)
  1309. return -ENOMEM;
  1310. mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
  1311. mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
  1312. size, DMA_TO_DEVICE);
  1313. if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
  1314. ret = -ENOMEM;
  1315. goto err;
  1316. }
  1317. return 0;
  1318. err:
  1319. kfree(mr->descs_alloc);
  1320. return ret;
  1321. }
  1322. static void
  1323. mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
  1324. {
  1325. if (mr->descs) {
  1326. struct ib_device *device = mr->ibmr.device;
  1327. int size = mr->max_descs * mr->desc_size;
  1328. dma_unmap_single(device->dev.parent, mr->desc_map,
  1329. size, DMA_TO_DEVICE);
  1330. kfree(mr->descs_alloc);
  1331. mr->descs = NULL;
  1332. }
  1333. }
  1334. static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  1335. {
  1336. int allocated_from_cache = mr->allocated_from_cache;
  1337. if (mr->sig) {
  1338. if (mlx5_core_destroy_psv(dev->mdev,
  1339. mr->sig->psv_memory.psv_idx))
  1340. mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
  1341. mr->sig->psv_memory.psv_idx);
  1342. if (mlx5_core_destroy_psv(dev->mdev,
  1343. mr->sig->psv_wire.psv_idx))
  1344. mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
  1345. mr->sig->psv_wire.psv_idx);
  1346. kfree(mr->sig);
  1347. mr->sig = NULL;
  1348. }
  1349. mlx5_free_priv_descs(mr);
  1350. if (!allocated_from_cache)
  1351. destroy_mkey(dev, mr);
  1352. }
  1353. static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  1354. {
  1355. int npages = mr->npages;
  1356. struct ib_umem *umem = mr->umem;
  1357. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1358. if (umem && umem->is_odp) {
  1359. struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
  1360. /* Prevent new page faults from succeeding */
  1361. mr->live = 0;
  1362. /* Wait for all running page-fault handlers to finish. */
  1363. synchronize_srcu(&dev->mr_srcu);
  1364. /* Destroy all page mappings */
  1365. if (umem_odp->page_list)
  1366. mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem),
  1367. ib_umem_end(umem));
  1368. else
  1369. mlx5_ib_free_implicit_mr(mr);
  1370. /*
  1371. * We kill the umem before the MR for ODP,
  1372. * so that there will not be any invalidations in
  1373. * flight, looking at the *mr struct.
  1374. */
  1375. ib_umem_release(umem);
  1376. atomic_sub(npages, &dev->mdev->priv.reg_pages);
  1377. /* Avoid double-freeing the umem. */
  1378. umem = NULL;
  1379. }
  1380. #endif
  1381. clean_mr(dev, mr);
  1382. /*
  1383. * We should unregister the DMA address from the HCA before
  1384. * remove the DMA mapping.
  1385. */
  1386. mlx5_mr_cache_free(dev, mr);
  1387. if (umem) {
  1388. ib_umem_release(umem);
  1389. atomic_sub(npages, &dev->mdev->priv.reg_pages);
  1390. }
  1391. if (!mr->allocated_from_cache)
  1392. kfree(mr);
  1393. }
  1394. int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
  1395. {
  1396. dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr));
  1397. return 0;
  1398. }
  1399. struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
  1400. enum ib_mr_type mr_type,
  1401. u32 max_num_sg)
  1402. {
  1403. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1404. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  1405. int ndescs = ALIGN(max_num_sg, 4);
  1406. struct mlx5_ib_mr *mr;
  1407. void *mkc;
  1408. u32 *in;
  1409. int err;
  1410. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  1411. if (!mr)
  1412. return ERR_PTR(-ENOMEM);
  1413. in = kzalloc(inlen, GFP_KERNEL);
  1414. if (!in) {
  1415. err = -ENOMEM;
  1416. goto err_free;
  1417. }
  1418. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  1419. MLX5_SET(mkc, mkc, free, 1);
  1420. MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
  1421. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  1422. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  1423. if (mr_type == IB_MR_TYPE_MEM_REG) {
  1424. mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
  1425. MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
  1426. err = mlx5_alloc_priv_descs(pd->device, mr,
  1427. ndescs, sizeof(struct mlx5_mtt));
  1428. if (err)
  1429. goto err_free_in;
  1430. mr->desc_size = sizeof(struct mlx5_mtt);
  1431. mr->max_descs = ndescs;
  1432. } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
  1433. mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
  1434. err = mlx5_alloc_priv_descs(pd->device, mr,
  1435. ndescs, sizeof(struct mlx5_klm));
  1436. if (err)
  1437. goto err_free_in;
  1438. mr->desc_size = sizeof(struct mlx5_klm);
  1439. mr->max_descs = ndescs;
  1440. } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
  1441. u32 psv_index[2];
  1442. MLX5_SET(mkc, mkc, bsf_en, 1);
  1443. MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
  1444. mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
  1445. if (!mr->sig) {
  1446. err = -ENOMEM;
  1447. goto err_free_in;
  1448. }
  1449. /* create mem & wire PSVs */
  1450. err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
  1451. 2, psv_index);
  1452. if (err)
  1453. goto err_free_sig;
  1454. mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
  1455. mr->sig->psv_memory.psv_idx = psv_index[0];
  1456. mr->sig->psv_wire.psv_idx = psv_index[1];
  1457. mr->sig->sig_status_checked = true;
  1458. mr->sig->sig_err_exists = false;
  1459. /* Next UMR, Arm SIGERR */
  1460. ++mr->sig->sigerr_count;
  1461. } else {
  1462. mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
  1463. err = -EINVAL;
  1464. goto err_free_in;
  1465. }
  1466. MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3);
  1467. MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7);
  1468. MLX5_SET(mkc, mkc, umr_en, 1);
  1469. mr->ibmr.device = pd->device;
  1470. err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
  1471. if (err)
  1472. goto err_destroy_psv;
  1473. mr->mmkey.type = MLX5_MKEY_MR;
  1474. mr->ibmr.lkey = mr->mmkey.key;
  1475. mr->ibmr.rkey = mr->mmkey.key;
  1476. mr->umem = NULL;
  1477. kfree(in);
  1478. return &mr->ibmr;
  1479. err_destroy_psv:
  1480. if (mr->sig) {
  1481. if (mlx5_core_destroy_psv(dev->mdev,
  1482. mr->sig->psv_memory.psv_idx))
  1483. mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
  1484. mr->sig->psv_memory.psv_idx);
  1485. if (mlx5_core_destroy_psv(dev->mdev,
  1486. mr->sig->psv_wire.psv_idx))
  1487. mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
  1488. mr->sig->psv_wire.psv_idx);
  1489. }
  1490. mlx5_free_priv_descs(mr);
  1491. err_free_sig:
  1492. kfree(mr->sig);
  1493. err_free_in:
  1494. kfree(in);
  1495. err_free:
  1496. kfree(mr);
  1497. return ERR_PTR(err);
  1498. }
  1499. struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
  1500. struct ib_udata *udata)
  1501. {
  1502. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1503. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  1504. struct mlx5_ib_mw *mw = NULL;
  1505. u32 *in = NULL;
  1506. void *mkc;
  1507. int ndescs;
  1508. int err;
  1509. struct mlx5_ib_alloc_mw req = {};
  1510. struct {
  1511. __u32 comp_mask;
  1512. __u32 response_length;
  1513. } resp = {};
  1514. err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
  1515. if (err)
  1516. return ERR_PTR(err);
  1517. if (req.comp_mask || req.reserved1 || req.reserved2)
  1518. return ERR_PTR(-EOPNOTSUPP);
  1519. if (udata->inlen > sizeof(req) &&
  1520. !ib_is_udata_cleared(udata, sizeof(req),
  1521. udata->inlen - sizeof(req)))
  1522. return ERR_PTR(-EOPNOTSUPP);
  1523. ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
  1524. mw = kzalloc(sizeof(*mw), GFP_KERNEL);
  1525. in = kzalloc(inlen, GFP_KERNEL);
  1526. if (!mw || !in) {
  1527. err = -ENOMEM;
  1528. goto free;
  1529. }
  1530. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  1531. MLX5_SET(mkc, mkc, free, 1);
  1532. MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
  1533. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  1534. MLX5_SET(mkc, mkc, umr_en, 1);
  1535. MLX5_SET(mkc, mkc, lr, 1);
  1536. MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
  1537. MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
  1538. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  1539. err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
  1540. if (err)
  1541. goto free;
  1542. mw->mmkey.type = MLX5_MKEY_MW;
  1543. mw->ibmw.rkey = mw->mmkey.key;
  1544. mw->ndescs = ndescs;
  1545. resp.response_length = min(offsetof(typeof(resp), response_length) +
  1546. sizeof(resp.response_length), udata->outlen);
  1547. if (resp.response_length) {
  1548. err = ib_copy_to_udata(udata, &resp, resp.response_length);
  1549. if (err) {
  1550. mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
  1551. goto free;
  1552. }
  1553. }
  1554. kfree(in);
  1555. return &mw->ibmw;
  1556. free:
  1557. kfree(mw);
  1558. kfree(in);
  1559. return ERR_PTR(err);
  1560. }
  1561. int mlx5_ib_dealloc_mw(struct ib_mw *mw)
  1562. {
  1563. struct mlx5_ib_mw *mmw = to_mmw(mw);
  1564. int err;
  1565. err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
  1566. &mmw->mmkey);
  1567. if (!err)
  1568. kfree(mmw);
  1569. return err;
  1570. }
  1571. int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
  1572. struct ib_mr_status *mr_status)
  1573. {
  1574. struct mlx5_ib_mr *mmr = to_mmr(ibmr);
  1575. int ret = 0;
  1576. if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
  1577. pr_err("Invalid status check mask\n");
  1578. ret = -EINVAL;
  1579. goto done;
  1580. }
  1581. mr_status->fail_status = 0;
  1582. if (check_mask & IB_MR_CHECK_SIG_STATUS) {
  1583. if (!mmr->sig) {
  1584. ret = -EINVAL;
  1585. pr_err("signature status check requested on a non-signature enabled MR\n");
  1586. goto done;
  1587. }
  1588. mmr->sig->sig_status_checked = true;
  1589. if (!mmr->sig->sig_err_exists)
  1590. goto done;
  1591. if (ibmr->lkey == mmr->sig->err_item.key)
  1592. memcpy(&mr_status->sig_err, &mmr->sig->err_item,
  1593. sizeof(mr_status->sig_err));
  1594. else {
  1595. mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
  1596. mr_status->sig_err.sig_err_offset = 0;
  1597. mr_status->sig_err.key = mmr->sig->err_item.key;
  1598. }
  1599. mmr->sig->sig_err_exists = false;
  1600. mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
  1601. }
  1602. done:
  1603. return ret;
  1604. }
  1605. static int
  1606. mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
  1607. struct scatterlist *sgl,
  1608. unsigned short sg_nents,
  1609. unsigned int *sg_offset_p)
  1610. {
  1611. struct scatterlist *sg = sgl;
  1612. struct mlx5_klm *klms = mr->descs;
  1613. unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
  1614. u32 lkey = mr->ibmr.pd->local_dma_lkey;
  1615. int i;
  1616. mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
  1617. mr->ibmr.length = 0;
  1618. for_each_sg(sgl, sg, sg_nents, i) {
  1619. if (unlikely(i >= mr->max_descs))
  1620. break;
  1621. klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
  1622. klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
  1623. klms[i].key = cpu_to_be32(lkey);
  1624. mr->ibmr.length += sg_dma_len(sg) - sg_offset;
  1625. sg_offset = 0;
  1626. }
  1627. mr->ndescs = i;
  1628. if (sg_offset_p)
  1629. *sg_offset_p = sg_offset;
  1630. return i;
  1631. }
  1632. static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
  1633. {
  1634. struct mlx5_ib_mr *mr = to_mmr(ibmr);
  1635. __be64 *descs;
  1636. if (unlikely(mr->ndescs == mr->max_descs))
  1637. return -ENOMEM;
  1638. descs = mr->descs;
  1639. descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
  1640. return 0;
  1641. }
  1642. int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
  1643. unsigned int *sg_offset)
  1644. {
  1645. struct mlx5_ib_mr *mr = to_mmr(ibmr);
  1646. int n;
  1647. mr->ndescs = 0;
  1648. ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
  1649. mr->desc_size * mr->max_descs,
  1650. DMA_TO_DEVICE);
  1651. if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
  1652. n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
  1653. else
  1654. n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
  1655. mlx5_set_page);
  1656. ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
  1657. mr->desc_size * mr->max_descs,
  1658. DMA_TO_DEVICE);
  1659. return n;
  1660. }