mr.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789
  1. /*
  2. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
  3. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
  4. *
  5. * This software is available to you under a choice of one of two
  6. * licenses. You may choose to be licensed under the terms of the GNU
  7. * General Public License (GPL) Version 2, available from the file
  8. * COPYING in the main directory of this source tree, or the
  9. * OpenIB.org BSD license below:
  10. *
  11. * Redistribution and use in source and binary forms, with or
  12. * without modification, are permitted provided that the following
  13. * conditions are met:
  14. *
  15. * - Redistributions of source code must retain the above
  16. * copyright notice, this list of conditions and the following
  17. * disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials
  22. * provided with the distribution.
  23. *
  24. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31. * SOFTWARE.
  32. */
  33. #include <linux/slab.h>
  34. #include <rdma/ib_user_verbs.h>
  35. #include "mlx4_ib.h"
  36. static u32 convert_access(int acc)
  37. {
  38. return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) |
  39. (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
  40. (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
  41. (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
  42. (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) |
  43. MLX4_PERM_LOCAL_READ;
  44. }
  45. static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type)
  46. {
  47. switch (type) {
  48. case IB_MW_TYPE_1: return MLX4_MW_TYPE_1;
  49. case IB_MW_TYPE_2: return MLX4_MW_TYPE_2;
  50. default: return -1;
  51. }
  52. }
  53. struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
  54. {
  55. struct mlx4_ib_mr *mr;
  56. int err;
  57. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  58. if (!mr)
  59. return ERR_PTR(-ENOMEM);
  60. err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
  61. ~0ull, convert_access(acc), 0, 0, &mr->mmr);
  62. if (err)
  63. goto err_free;
  64. err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
  65. if (err)
  66. goto err_mr;
  67. mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
  68. mr->umem = NULL;
  69. return &mr->ibmr;
  70. err_mr:
  71. (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
  72. err_free:
  73. kfree(mr);
  74. return ERR_PTR(err);
  75. }
  76. enum {
  77. MLX4_MAX_MTT_SHIFT = 31
  78. };
  79. static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
  80. struct mlx4_mtt *mtt,
  81. u64 mtt_size, u64 mtt_shift, u64 len,
  82. u64 cur_start_addr, u64 *pages,
  83. int *start_index, int *npages)
  84. {
  85. u64 cur_end_addr = cur_start_addr + len;
  86. u64 cur_end_addr_aligned = 0;
  87. u64 mtt_entries;
  88. int err = 0;
  89. int k;
  90. len += (cur_start_addr & (mtt_size - 1ULL));
  91. cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
  92. len += (cur_end_addr_aligned - cur_end_addr);
  93. if (len & (mtt_size - 1ULL)) {
  94. pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n",
  95. len, mtt_size);
  96. return -EINVAL;
  97. }
  98. mtt_entries = (len >> mtt_shift);
  99. /*
  100. * Align the MTT start address to the mtt_size.
  101. * Required to handle cases when the MR starts in the middle of an MTT
  102. * record. Was not required in old code since the physical addresses
  103. * provided by the dma subsystem were page aligned, which was also the
  104. * MTT size.
  105. */
  106. cur_start_addr = round_down(cur_start_addr, mtt_size);
  107. /* A new block is started ... */
  108. for (k = 0; k < mtt_entries; ++k) {
  109. pages[*npages] = cur_start_addr + (mtt_size * k);
  110. (*npages)++;
  111. /*
  112. * Be friendly to mlx4_write_mtt() and pass it chunks of
  113. * appropriate size.
  114. */
  115. if (*npages == PAGE_SIZE / sizeof(u64)) {
  116. err = mlx4_write_mtt(dev->dev, mtt, *start_index,
  117. *npages, pages);
  118. if (err)
  119. return err;
  120. (*start_index) += *npages;
  121. *npages = 0;
  122. }
  123. }
  124. return 0;
  125. }
  126. static inline u64 alignment_of(u64 ptr)
  127. {
  128. return ilog2(ptr & (~(ptr - 1)));
  129. }
  130. static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
  131. u64 current_block_end,
  132. u64 block_shift)
  133. {
  134. /* Check whether the alignment of the new block is aligned as well as
  135. * the previous block.
  136. * Block address must start with zeros till size of entity_size.
  137. */
  138. if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
  139. /*
  140. * It is not as well aligned as the previous block-reduce the
  141. * mtt size accordingly. Here we take the last right bit which
  142. * is 1.
  143. */
  144. block_shift = alignment_of(next_block_start);
  145. /*
  146. * Check whether the alignment of the end of previous block - is it
  147. * aligned as well as the start of the block
  148. */
  149. if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
  150. /*
  151. * It is not as well aligned as the start of the block -
  152. * reduce the mtt size accordingly.
  153. */
  154. block_shift = alignment_of(current_block_end);
  155. return block_shift;
  156. }
  157. int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
  158. struct ib_umem *umem)
  159. {
  160. u64 *pages;
  161. u64 len = 0;
  162. int err = 0;
  163. u64 mtt_size;
  164. u64 cur_start_addr = 0;
  165. u64 mtt_shift;
  166. int start_index = 0;
  167. int npages = 0;
  168. struct scatterlist *sg;
  169. int i;
  170. pages = (u64 *) __get_free_page(GFP_KERNEL);
  171. if (!pages)
  172. return -ENOMEM;
  173. mtt_shift = mtt->page_shift;
  174. mtt_size = 1ULL << mtt_shift;
  175. for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
  176. if (cur_start_addr + len == sg_dma_address(sg)) {
  177. /* still the same block */
  178. len += sg_dma_len(sg);
  179. continue;
  180. }
  181. /*
  182. * A new block is started ...
  183. * If len is malaligned, write an extra mtt entry to cover the
  184. * misaligned area (round up the division)
  185. */
  186. err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
  187. mtt_shift, len,
  188. cur_start_addr,
  189. pages, &start_index,
  190. &npages);
  191. if (err)
  192. goto out;
  193. cur_start_addr = sg_dma_address(sg);
  194. len = sg_dma_len(sg);
  195. }
  196. /* Handle the last block */
  197. if (len > 0) {
  198. /*
  199. * If len is malaligned, write an extra mtt entry to cover
  200. * the misaligned area (round up the division)
  201. */
  202. err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
  203. mtt_shift, len,
  204. cur_start_addr, pages,
  205. &start_index, &npages);
  206. if (err)
  207. goto out;
  208. }
  209. if (npages)
  210. err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
  211. out:
  212. free_page((unsigned long) pages);
  213. return err;
  214. }
  215. /*
  216. * Calculate optimal mtt size based on contiguous pages.
  217. * Function will return also the number of pages that are not aligned to the
  218. * calculated mtt_size to be added to total number of pages. For that we should
  219. * check the first chunk length & last chunk length and if not aligned to
  220. * mtt_size we should increment the non_aligned_pages number. All chunks in the
  221. * middle already handled as part of mtt shift calculation for both their start
  222. * & end addresses.
  223. */
  224. int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
  225. int *num_of_mtts)
  226. {
  227. u64 block_shift = MLX4_MAX_MTT_SHIFT;
  228. u64 min_shift = umem->page_shift;
  229. u64 last_block_aligned_end = 0;
  230. u64 current_block_start = 0;
  231. u64 first_block_start = 0;
  232. u64 current_block_len = 0;
  233. u64 last_block_end = 0;
  234. struct scatterlist *sg;
  235. u64 current_block_end;
  236. u64 misalignment_bits;
  237. u64 next_block_start;
  238. u64 total_len = 0;
  239. int i;
  240. for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
  241. /*
  242. * Initialization - save the first chunk start as the
  243. * current_block_start - block means contiguous pages.
  244. */
  245. if (current_block_len == 0 && current_block_start == 0) {
  246. current_block_start = sg_dma_address(sg);
  247. first_block_start = current_block_start;
  248. /*
  249. * Find the bits that are different between the physical
  250. * address and the virtual address for the start of the
  251. * MR.
  252. * umem_get aligned the start_va to a page boundary.
  253. * Therefore, we need to align the start va to the same
  254. * boundary.
  255. * misalignment_bits is needed to handle the case of a
  256. * single memory region. In this case, the rest of the
  257. * logic will not reduce the block size. If we use a
  258. * block size which is bigger than the alignment of the
  259. * misalignment bits, we might use the virtual page
  260. * number instead of the physical page number, resulting
  261. * in access to the wrong data.
  262. */
  263. misalignment_bits =
  264. (start_va & (~(((u64)(BIT(umem->page_shift))) - 1ULL)))
  265. ^ current_block_start;
  266. block_shift = min(alignment_of(misalignment_bits),
  267. block_shift);
  268. }
  269. /*
  270. * Go over the scatter entries and check if they continue the
  271. * previous scatter entry.
  272. */
  273. next_block_start = sg_dma_address(sg);
  274. current_block_end = current_block_start + current_block_len;
  275. /* If we have a split (non-contig.) between two blocks */
  276. if (current_block_end != next_block_start) {
  277. block_shift = mlx4_ib_umem_calc_block_mtt
  278. (next_block_start,
  279. current_block_end,
  280. block_shift);
  281. /*
  282. * If we reached the minimum shift for 4k page we stop
  283. * the loop.
  284. */
  285. if (block_shift <= min_shift)
  286. goto end;
  287. /*
  288. * If not saved yet we are in first block - we save the
  289. * length of first block to calculate the
  290. * non_aligned_pages number at the end.
  291. */
  292. total_len += current_block_len;
  293. /* Start a new block */
  294. current_block_start = next_block_start;
  295. current_block_len = sg_dma_len(sg);
  296. continue;
  297. }
  298. /* The scatter entry is another part of the current block,
  299. * increase the block size.
  300. * An entry in the scatter can be larger than 4k (page) as of
  301. * dma mapping which merge some blocks together.
  302. */
  303. current_block_len += sg_dma_len(sg);
  304. }
  305. /* Account for the last block in the total len */
  306. total_len += current_block_len;
  307. /* Add to the first block the misalignment that it suffers from. */
  308. total_len += (first_block_start & ((1ULL << block_shift) - 1ULL));
  309. last_block_end = current_block_start + current_block_len;
  310. last_block_aligned_end = round_up(last_block_end, 1 << block_shift);
  311. total_len += (last_block_aligned_end - last_block_end);
  312. if (total_len & ((1ULL << block_shift) - 1ULL))
  313. pr_warn("misaligned total length detected (%llu, %llu)!",
  314. total_len, block_shift);
  315. *num_of_mtts = total_len >> block_shift;
  316. end:
  317. if (block_shift < min_shift) {
  318. /*
  319. * If shift is less than the min we set a warning and return the
  320. * min shift.
  321. */
  322. pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift);
  323. block_shift = min_shift;
  324. }
  325. return block_shift;
  326. }
  327. struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
  328. u64 virt_addr, int access_flags,
  329. struct ib_udata *udata)
  330. {
  331. struct mlx4_ib_dev *dev = to_mdev(pd->device);
  332. struct mlx4_ib_mr *mr;
  333. int shift;
  334. int err;
  335. int n;
  336. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  337. if (!mr)
  338. return ERR_PTR(-ENOMEM);
  339. /* Force registering the memory as writable. */
  340. /* Used for memory re-registeration. HCA protects the access */
  341. mr->umem = ib_umem_get(pd->uobject->context, start, length,
  342. access_flags | IB_ACCESS_LOCAL_WRITE, 0);
  343. if (IS_ERR(mr->umem)) {
  344. err = PTR_ERR(mr->umem);
  345. goto err_free;
  346. }
  347. n = ib_umem_page_count(mr->umem);
  348. shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
  349. err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
  350. convert_access(access_flags), n, shift, &mr->mmr);
  351. if (err)
  352. goto err_umem;
  353. err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
  354. if (err)
  355. goto err_mr;
  356. err = mlx4_mr_enable(dev->dev, &mr->mmr);
  357. if (err)
  358. goto err_mr;
  359. mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
  360. mr->ibmr.length = length;
  361. mr->ibmr.iova = virt_addr;
  362. mr->ibmr.page_size = 1U << shift;
  363. return &mr->ibmr;
  364. err_mr:
  365. (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
  366. err_umem:
  367. ib_umem_release(mr->umem);
  368. err_free:
  369. kfree(mr);
  370. return ERR_PTR(err);
  371. }
  372. int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
  373. u64 start, u64 length, u64 virt_addr,
  374. int mr_access_flags, struct ib_pd *pd,
  375. struct ib_udata *udata)
  376. {
  377. struct mlx4_ib_dev *dev = to_mdev(mr->device);
  378. struct mlx4_ib_mr *mmr = to_mmr(mr);
  379. struct mlx4_mpt_entry *mpt_entry;
  380. struct mlx4_mpt_entry **pmpt_entry = &mpt_entry;
  381. int err;
  382. /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs,
  383. * we assume that the calls can't run concurrently. Otherwise, a
  384. * race exists.
  385. */
  386. err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
  387. if (err)
  388. return err;
  389. if (flags & IB_MR_REREG_PD) {
  390. err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
  391. to_mpd(pd)->pdn);
  392. if (err)
  393. goto release_mpt_entry;
  394. }
  395. if (flags & IB_MR_REREG_ACCESS) {
  396. err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
  397. convert_access(mr_access_flags));
  398. if (err)
  399. goto release_mpt_entry;
  400. }
  401. if (flags & IB_MR_REREG_TRANS) {
  402. int shift;
  403. int n;
  404. mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
  405. ib_umem_release(mmr->umem);
  406. mmr->umem = ib_umem_get(mr->uobject->context, start, length,
  407. mr_access_flags |
  408. IB_ACCESS_LOCAL_WRITE,
  409. 0);
  410. if (IS_ERR(mmr->umem)) {
  411. err = PTR_ERR(mmr->umem);
  412. /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
  413. mmr->umem = NULL;
  414. goto release_mpt_entry;
  415. }
  416. n = ib_umem_page_count(mmr->umem);
  417. shift = mmr->umem->page_shift;
  418. err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
  419. virt_addr, length, n, shift,
  420. *pmpt_entry);
  421. if (err) {
  422. ib_umem_release(mmr->umem);
  423. goto release_mpt_entry;
  424. }
  425. mmr->mmr.iova = virt_addr;
  426. mmr->mmr.size = length;
  427. err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem);
  428. if (err) {
  429. mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
  430. ib_umem_release(mmr->umem);
  431. goto release_mpt_entry;
  432. }
  433. }
  434. /* If we couldn't transfer the MR to the HCA, just remember to
  435. * return a failure. But dereg_mr will free the resources.
  436. */
  437. err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry);
  438. if (!err && flags & IB_MR_REREG_ACCESS)
  439. mmr->mmr.access = mr_access_flags;
  440. release_mpt_entry:
  441. mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
  442. return err;
  443. }
  444. static int
  445. mlx4_alloc_priv_pages(struct ib_device *device,
  446. struct mlx4_ib_mr *mr,
  447. int max_pages)
  448. {
  449. int ret;
  450. /* Ensure that size is aligned to DMA cacheline
  451. * requirements.
  452. * max_pages is limited to MLX4_MAX_FAST_REG_PAGES
  453. * so page_map_size will never cross PAGE_SIZE.
  454. */
  455. mr->page_map_size = roundup(max_pages * sizeof(u64),
  456. MLX4_MR_PAGES_ALIGN);
  457. /* Prevent cross page boundary allocation. */
  458. mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL);
  459. if (!mr->pages)
  460. return -ENOMEM;
  461. mr->page_map = dma_map_single(device->dev.parent, mr->pages,
  462. mr->page_map_size, DMA_TO_DEVICE);
  463. if (dma_mapping_error(device->dev.parent, mr->page_map)) {
  464. ret = -ENOMEM;
  465. goto err;
  466. }
  467. return 0;
  468. err:
  469. free_page((unsigned long)mr->pages);
  470. return ret;
  471. }
  472. static void
  473. mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
  474. {
  475. if (mr->pages) {
  476. struct ib_device *device = mr->ibmr.device;
  477. dma_unmap_single(device->dev.parent, mr->page_map,
  478. mr->page_map_size, DMA_TO_DEVICE);
  479. free_page((unsigned long)mr->pages);
  480. mr->pages = NULL;
  481. }
  482. }
  483. int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
  484. {
  485. struct mlx4_ib_mr *mr = to_mmr(ibmr);
  486. int ret;
  487. mlx4_free_priv_pages(mr);
  488. ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
  489. if (ret)
  490. return ret;
  491. if (mr->umem)
  492. ib_umem_release(mr->umem);
  493. kfree(mr);
  494. return 0;
  495. }
  496. struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
  497. struct ib_udata *udata)
  498. {
  499. struct mlx4_ib_dev *dev = to_mdev(pd->device);
  500. struct mlx4_ib_mw *mw;
  501. int err;
  502. mw = kmalloc(sizeof(*mw), GFP_KERNEL);
  503. if (!mw)
  504. return ERR_PTR(-ENOMEM);
  505. err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn,
  506. to_mlx4_type(type), &mw->mmw);
  507. if (err)
  508. goto err_free;
  509. err = mlx4_mw_enable(dev->dev, &mw->mmw);
  510. if (err)
  511. goto err_mw;
  512. mw->ibmw.rkey = mw->mmw.key;
  513. return &mw->ibmw;
  514. err_mw:
  515. mlx4_mw_free(dev->dev, &mw->mmw);
  516. err_free:
  517. kfree(mw);
  518. return ERR_PTR(err);
  519. }
  520. int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
  521. {
  522. struct mlx4_ib_mw *mw = to_mmw(ibmw);
  523. mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
  524. kfree(mw);
  525. return 0;
  526. }
  527. struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
  528. enum ib_mr_type mr_type,
  529. u32 max_num_sg)
  530. {
  531. struct mlx4_ib_dev *dev = to_mdev(pd->device);
  532. struct mlx4_ib_mr *mr;
  533. int err;
  534. if (mr_type != IB_MR_TYPE_MEM_REG ||
  535. max_num_sg > MLX4_MAX_FAST_REG_PAGES)
  536. return ERR_PTR(-EINVAL);
  537. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  538. if (!mr)
  539. return ERR_PTR(-ENOMEM);
  540. err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
  541. max_num_sg, 0, &mr->mmr);
  542. if (err)
  543. goto err_free;
  544. err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg);
  545. if (err)
  546. goto err_free_mr;
  547. mr->max_pages = max_num_sg;
  548. err = mlx4_mr_enable(dev->dev, &mr->mmr);
  549. if (err)
  550. goto err_free_pl;
  551. mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
  552. mr->umem = NULL;
  553. return &mr->ibmr;
  554. err_free_pl:
  555. mr->ibmr.device = pd->device;
  556. mlx4_free_priv_pages(mr);
  557. err_free_mr:
  558. (void) mlx4_mr_free(dev->dev, &mr->mmr);
  559. err_free:
  560. kfree(mr);
  561. return ERR_PTR(err);
  562. }
  563. struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
  564. struct ib_fmr_attr *fmr_attr)
  565. {
  566. struct mlx4_ib_dev *dev = to_mdev(pd->device);
  567. struct mlx4_ib_fmr *fmr;
  568. int err = -ENOMEM;
  569. fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
  570. if (!fmr)
  571. return ERR_PTR(-ENOMEM);
  572. err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
  573. fmr_attr->max_pages, fmr_attr->max_maps,
  574. fmr_attr->page_shift, &fmr->mfmr);
  575. if (err)
  576. goto err_free;
  577. err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr);
  578. if (err)
  579. goto err_mr;
  580. fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
  581. return &fmr->ibfmr;
  582. err_mr:
  583. (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
  584. err_free:
  585. kfree(fmr);
  586. return ERR_PTR(err);
  587. }
  588. int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
  589. int npages, u64 iova)
  590. {
  591. struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
  592. struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
  593. return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
  594. &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
  595. }
  596. int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
  597. {
  598. struct ib_fmr *ibfmr;
  599. int err;
  600. struct mlx4_dev *mdev = NULL;
  601. list_for_each_entry(ibfmr, fmr_list, list) {
  602. if (mdev && to_mdev(ibfmr->device)->dev != mdev)
  603. return -EINVAL;
  604. mdev = to_mdev(ibfmr->device)->dev;
  605. }
  606. if (!mdev)
  607. return 0;
  608. list_for_each_entry(ibfmr, fmr_list, list) {
  609. struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
  610. mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
  611. }
  612. /*
  613. * Make sure all MPT status updates are visible before issuing
  614. * SYNC_TPT firmware command.
  615. */
  616. wmb();
  617. err = mlx4_SYNC_TPT(mdev);
  618. if (err)
  619. pr_warn("SYNC_TPT error %d when "
  620. "unmapping FMRs\n", err);
  621. return 0;
  622. }
  623. int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
  624. {
  625. struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
  626. struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
  627. int err;
  628. err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
  629. if (!err)
  630. kfree(ifmr);
  631. return err;
  632. }
  633. static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
  634. {
  635. struct mlx4_ib_mr *mr = to_mmr(ibmr);
  636. if (unlikely(mr->npages == mr->max_pages))
  637. return -ENOMEM;
  638. mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT);
  639. return 0;
  640. }
  641. int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
  642. unsigned int *sg_offset)
  643. {
  644. struct mlx4_ib_mr *mr = to_mmr(ibmr);
  645. int rc;
  646. mr->npages = 0;
  647. ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
  648. mr->page_map_size, DMA_TO_DEVICE);
  649. rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
  650. ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
  651. mr->page_map_size, DMA_TO_DEVICE);
  652. return rc;
  653. }