rdma_core.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066
  1. /*
  2. * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/file.h>
  33. #include <linux/anon_inodes.h>
  34. #include <linux/sched/mm.h>
  35. #include <rdma/ib_verbs.h>
  36. #include <rdma/uverbs_types.h>
  37. #include <linux/rcupdate.h>
  38. #include <rdma/uverbs_ioctl.h>
  39. #include <rdma/rdma_user_ioctl.h>
  40. #include "uverbs.h"
  41. #include "core_priv.h"
  42. #include "rdma_core.h"
  43. int uverbs_ns_idx(u16 *id, unsigned int ns_count)
  44. {
  45. int ret = (*id & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT;
  46. if (ret >= ns_count)
  47. return -EINVAL;
  48. *id &= ~UVERBS_ID_NS_MASK;
  49. return ret;
  50. }
  51. const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile,
  52. uint16_t object)
  53. {
  54. const struct uverbs_root_spec *object_hash = ufile->device->specs_root;
  55. const struct uverbs_object_spec_hash *objects;
  56. int ret = uverbs_ns_idx(&object, object_hash->num_buckets);
  57. if (ret < 0)
  58. return NULL;
  59. objects = object_hash->object_buckets[ret];
  60. if (object >= objects->num_objects)
  61. return NULL;
  62. return objects->objects[object];
  63. }
  64. const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
  65. uint16_t method)
  66. {
  67. const struct uverbs_method_spec_hash *methods;
  68. int ret = uverbs_ns_idx(&method, object->num_buckets);
  69. if (ret < 0)
  70. return NULL;
  71. methods = object->method_buckets[ret];
  72. if (method >= methods->num_methods)
  73. return NULL;
  74. return methods->methods[method];
  75. }
  76. void uverbs_uobject_get(struct ib_uobject *uobject)
  77. {
  78. kref_get(&uobject->ref);
  79. }
  80. static void uverbs_uobject_free(struct kref *ref)
  81. {
  82. struct ib_uobject *uobj =
  83. container_of(ref, struct ib_uobject, ref);
  84. if (uobj->uapi_object->type_class->needs_kfree_rcu)
  85. kfree_rcu(uobj, rcu);
  86. else
  87. kfree(uobj);
  88. }
  89. void uverbs_uobject_put(struct ib_uobject *uobject)
  90. {
  91. kref_put(&uobject->ref, uverbs_uobject_free);
  92. }
  93. static int uverbs_try_lock_object(struct ib_uobject *uobj,
  94. enum rdma_lookup_mode mode)
  95. {
  96. /*
  97. * When a shared access is required, we use a positive counter. Each
  98. * shared access request checks that the value != -1 and increment it.
  99. * Exclusive access is required for operations like write or destroy.
  100. * In exclusive access mode, we check that the counter is zero (nobody
  101. * claimed this object) and we set it to -1. Releasing a shared access
  102. * lock is done simply by decreasing the counter. As for exclusive
  103. * access locks, since only a single one of them is is allowed
  104. * concurrently, setting the counter to zero is enough for releasing
  105. * this lock.
  106. */
  107. switch (mode) {
  108. case UVERBS_LOOKUP_READ:
  109. return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
  110. -EBUSY : 0;
  111. case UVERBS_LOOKUP_WRITE:
  112. /* lock is exclusive */
  113. return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
  114. case UVERBS_LOOKUP_DESTROY:
  115. return 0;
  116. }
  117. return 0;
  118. }
  119. static void assert_uverbs_usecnt(struct ib_uobject *uobj,
  120. enum rdma_lookup_mode mode)
  121. {
  122. #ifdef CONFIG_LOCKDEP
  123. switch (mode) {
  124. case UVERBS_LOOKUP_READ:
  125. WARN_ON(atomic_read(&uobj->usecnt) <= 0);
  126. break;
  127. case UVERBS_LOOKUP_WRITE:
  128. WARN_ON(atomic_read(&uobj->usecnt) != -1);
  129. break;
  130. case UVERBS_LOOKUP_DESTROY:
  131. break;
  132. }
  133. #endif
  134. }
  135. /*
  136. * This must be called with the hw_destroy_rwsem locked for read or write,
  137. * also the uobject itself must be locked for write.
  138. *
  139. * Upon return the HW object is guaranteed to be destroyed.
  140. *
  141. * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held,
  142. * however the type's allocat_commit function cannot have been called and the
  143. * uobject cannot be on the uobjects_lists
  144. *
  145. * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via
  146. * rdma_lookup_get_uobject) and the object is left in a state where the caller
  147. * needs to call rdma_lookup_put_uobject.
  148. *
  149. * For all other destroy modes this function internally unlocks the uobject
  150. * and consumes the kref on the uobj.
  151. */
  152. static int uverbs_destroy_uobject(struct ib_uobject *uobj,
  153. enum rdma_remove_reason reason)
  154. {
  155. struct ib_uverbs_file *ufile = uobj->ufile;
  156. unsigned long flags;
  157. int ret;
  158. lockdep_assert_held(&ufile->hw_destroy_rwsem);
  159. assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
  160. if (uobj->object) {
  161. ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason);
  162. if (ret) {
  163. if (ib_is_destroy_retryable(ret, reason, uobj))
  164. return ret;
  165. /* Nothing to be done, dangle the memory and move on */
  166. WARN(true,
  167. "ib_uverbs: failed to remove uobject id %d, driver err=%d",
  168. uobj->id, ret);
  169. }
  170. uobj->object = NULL;
  171. }
  172. if (reason == RDMA_REMOVE_ABORT) {
  173. WARN_ON(!list_empty(&uobj->list));
  174. WARN_ON(!uobj->context);
  175. uobj->uapi_object->type_class->alloc_abort(uobj);
  176. }
  177. uobj->context = NULL;
  178. /*
  179. * For DESTROY the usecnt is held write locked, the caller is expected
  180. * to put it unlock and put the object when done with it. Only DESTROY
  181. * can remove the IDR handle.
  182. */
  183. if (reason != RDMA_REMOVE_DESTROY)
  184. atomic_set(&uobj->usecnt, 0);
  185. else
  186. uobj->uapi_object->type_class->remove_handle(uobj);
  187. if (!list_empty(&uobj->list)) {
  188. spin_lock_irqsave(&ufile->uobjects_lock, flags);
  189. list_del_init(&uobj->list);
  190. spin_unlock_irqrestore(&ufile->uobjects_lock, flags);
  191. /*
  192. * Pairs with the get in rdma_alloc_commit_uobject(), could
  193. * destroy uobj.
  194. */
  195. uverbs_uobject_put(uobj);
  196. }
  197. /*
  198. * When aborting the stack kref remains owned by the core code, and is
  199. * not transferred into the type. Pairs with the get in alloc_uobj
  200. */
  201. if (reason == RDMA_REMOVE_ABORT)
  202. uverbs_uobject_put(uobj);
  203. return 0;
  204. }
  205. /*
  206. * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
  207. * sequence. It should only be used from command callbacks. On success the
  208. * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This
  209. * version requires the caller to have already obtained an
  210. * LOOKUP_DESTROY uobject kref.
  211. */
  212. int uobj_destroy(struct ib_uobject *uobj)
  213. {
  214. struct ib_uverbs_file *ufile = uobj->ufile;
  215. int ret;
  216. down_read(&ufile->hw_destroy_rwsem);
  217. ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
  218. if (ret)
  219. goto out_unlock;
  220. ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY);
  221. if (ret) {
  222. atomic_set(&uobj->usecnt, 0);
  223. goto out_unlock;
  224. }
  225. out_unlock:
  226. up_read(&ufile->hw_destroy_rwsem);
  227. return ret;
  228. }
  229. /*
  230. * uobj_get_destroy destroys the HW object and returns a handle to the uobj
  231. * with a NULL object pointer. The caller must pair this with
  232. * uverbs_put_destroy.
  233. */
  234. struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
  235. u32 id, struct ib_uverbs_file *ufile)
  236. {
  237. struct ib_uobject *uobj;
  238. int ret;
  239. uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY);
  240. if (IS_ERR(uobj))
  241. return uobj;
  242. ret = uobj_destroy(uobj);
  243. if (ret) {
  244. rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
  245. return ERR_PTR(ret);
  246. }
  247. return uobj;
  248. }
  249. /*
  250. * Does both uobj_get_destroy() and uobj_put_destroy(). Returns success_res
  251. * on success (negative errno on failure). For use by callers that do not need
  252. * the uobj.
  253. */
  254. int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
  255. struct ib_uverbs_file *ufile, int success_res)
  256. {
  257. struct ib_uobject *uobj;
  258. uobj = __uobj_get_destroy(obj, id, ufile);
  259. if (IS_ERR(uobj))
  260. return PTR_ERR(uobj);
  261. rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
  262. return success_res;
  263. }
  264. /* alloc_uobj must be undone by uverbs_destroy_uobject() */
  265. static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
  266. const struct uverbs_api_object *obj)
  267. {
  268. struct ib_uobject *uobj;
  269. struct ib_ucontext *ucontext;
  270. ucontext = ib_uverbs_get_ucontext(ufile);
  271. if (IS_ERR(ucontext))
  272. return ERR_CAST(ucontext);
  273. uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
  274. if (!uobj)
  275. return ERR_PTR(-ENOMEM);
  276. /*
  277. * user_handle should be filled by the handler,
  278. * The object is added to the list in the commit stage.
  279. */
  280. uobj->ufile = ufile;
  281. uobj->context = ucontext;
  282. INIT_LIST_HEAD(&uobj->list);
  283. uobj->uapi_object = obj;
  284. /*
  285. * Allocated objects start out as write locked to deny any other
  286. * syscalls from accessing them until they are committed. See
  287. * rdma_alloc_commit_uobject
  288. */
  289. atomic_set(&uobj->usecnt, -1);
  290. kref_init(&uobj->ref);
  291. return uobj;
  292. }
  293. static int idr_add_uobj(struct ib_uobject *uobj)
  294. {
  295. int ret;
  296. idr_preload(GFP_KERNEL);
  297. spin_lock(&uobj->ufile->idr_lock);
  298. /*
  299. * We start with allocating an idr pointing to NULL. This represents an
  300. * object which isn't initialized yet. We'll replace it later on with
  301. * the real object once we commit.
  302. */
  303. ret = idr_alloc(&uobj->ufile->idr, NULL, 0,
  304. min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
  305. if (ret >= 0)
  306. uobj->id = ret;
  307. spin_unlock(&uobj->ufile->idr_lock);
  308. idr_preload_end();
  309. return ret < 0 ? ret : 0;
  310. }
  311. /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
  312. static struct ib_uobject *
  313. lookup_get_idr_uobject(const struct uverbs_api_object *obj,
  314. struct ib_uverbs_file *ufile, s64 id,
  315. enum rdma_lookup_mode mode)
  316. {
  317. struct ib_uobject *uobj;
  318. unsigned long idrno = id;
  319. if (id < 0 || id > ULONG_MAX)
  320. return ERR_PTR(-EINVAL);
  321. rcu_read_lock();
  322. /* object won't be released as we're protected in rcu */
  323. uobj = idr_find(&ufile->idr, idrno);
  324. if (!uobj) {
  325. uobj = ERR_PTR(-ENOENT);
  326. goto free;
  327. }
  328. /*
  329. * The idr_find is guaranteed to return a pointer to something that
  330. * isn't freed yet, or NULL, as the free after idr_remove goes through
  331. * kfree_rcu(). However the object may still have been released and
  332. * kfree() could be called at any time.
  333. */
  334. if (!kref_get_unless_zero(&uobj->ref))
  335. uobj = ERR_PTR(-ENOENT);
  336. free:
  337. rcu_read_unlock();
  338. return uobj;
  339. }
  340. static struct ib_uobject *
  341. lookup_get_fd_uobject(const struct uverbs_api_object *obj,
  342. struct ib_uverbs_file *ufile, s64 id,
  343. enum rdma_lookup_mode mode)
  344. {
  345. const struct uverbs_obj_fd_type *fd_type;
  346. struct file *f;
  347. struct ib_uobject *uobject;
  348. int fdno = id;
  349. if (fdno != id)
  350. return ERR_PTR(-EINVAL);
  351. if (mode != UVERBS_LOOKUP_READ)
  352. return ERR_PTR(-EOPNOTSUPP);
  353. if (!obj->type_attrs)
  354. return ERR_PTR(-EIO);
  355. fd_type =
  356. container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
  357. f = fget(fdno);
  358. if (!f)
  359. return ERR_PTR(-EBADF);
  360. uobject = f->private_data;
  361. /*
  362. * fget(id) ensures we are not currently running uverbs_close_fd,
  363. * and the caller is expected to ensure that uverbs_close_fd is never
  364. * done while a call top lookup is possible.
  365. */
  366. if (f->f_op != fd_type->fops) {
  367. fput(f);
  368. return ERR_PTR(-EBADF);
  369. }
  370. uverbs_uobject_get(uobject);
  371. return uobject;
  372. }
  373. struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
  374. struct ib_uverbs_file *ufile, s64 id,
  375. enum rdma_lookup_mode mode)
  376. {
  377. struct ib_uobject *uobj;
  378. int ret;
  379. if (!obj)
  380. return ERR_PTR(-EINVAL);
  381. uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
  382. if (IS_ERR(uobj))
  383. return uobj;
  384. if (uobj->uapi_object != obj) {
  385. ret = -EINVAL;
  386. goto free;
  387. }
  388. /*
  389. * If we have been disassociated block every command except for
  390. * DESTROY based commands.
  391. */
  392. if (mode != UVERBS_LOOKUP_DESTROY &&
  393. !srcu_dereference(ufile->device->ib_dev,
  394. &ufile->device->disassociate_srcu)) {
  395. ret = -EIO;
  396. goto free;
  397. }
  398. ret = uverbs_try_lock_object(uobj, mode);
  399. if (ret)
  400. goto free;
  401. return uobj;
  402. free:
  403. obj->type_class->lookup_put(uobj, mode);
  404. uverbs_uobject_put(uobj);
  405. return ERR_PTR(ret);
  406. }
  407. static struct ib_uobject *
  408. alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
  409. struct ib_uverbs_file *ufile)
  410. {
  411. int ret;
  412. struct ib_uobject *uobj;
  413. uobj = alloc_uobj(ufile, obj);
  414. if (IS_ERR(uobj))
  415. return uobj;
  416. ret = idr_add_uobj(uobj);
  417. if (ret)
  418. goto uobj_put;
  419. ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
  420. RDMACG_RESOURCE_HCA_OBJECT);
  421. if (ret)
  422. goto idr_remove;
  423. return uobj;
  424. idr_remove:
  425. spin_lock(&ufile->idr_lock);
  426. idr_remove(&ufile->idr, uobj->id);
  427. spin_unlock(&ufile->idr_lock);
  428. uobj_put:
  429. uverbs_uobject_put(uobj);
  430. return ERR_PTR(ret);
  431. }
  432. static struct ib_uobject *
  433. alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
  434. struct ib_uverbs_file *ufile)
  435. {
  436. int new_fd;
  437. struct ib_uobject *uobj;
  438. new_fd = get_unused_fd_flags(O_CLOEXEC);
  439. if (new_fd < 0)
  440. return ERR_PTR(new_fd);
  441. uobj = alloc_uobj(ufile, obj);
  442. if (IS_ERR(uobj)) {
  443. put_unused_fd(new_fd);
  444. return uobj;
  445. }
  446. uobj->id = new_fd;
  447. uobj->ufile = ufile;
  448. return uobj;
  449. }
  450. struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
  451. struct ib_uverbs_file *ufile)
  452. {
  453. struct ib_uobject *ret;
  454. if (!obj)
  455. return ERR_PTR(-EINVAL);
  456. /*
  457. * The hw_destroy_rwsem is held across the entire object creation and
  458. * released during rdma_alloc_commit_uobject or
  459. * rdma_alloc_abort_uobject
  460. */
  461. if (!down_read_trylock(&ufile->hw_destroy_rwsem))
  462. return ERR_PTR(-EIO);
  463. ret = obj->type_class->alloc_begin(obj, ufile);
  464. if (IS_ERR(ret)) {
  465. up_read(&ufile->hw_destroy_rwsem);
  466. return ret;
  467. }
  468. return ret;
  469. }
  470. static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
  471. {
  472. ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
  473. RDMACG_RESOURCE_HCA_OBJECT);
  474. spin_lock(&uobj->ufile->idr_lock);
  475. idr_remove(&uobj->ufile->idr, uobj->id);
  476. spin_unlock(&uobj->ufile->idr_lock);
  477. }
  478. static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
  479. enum rdma_remove_reason why)
  480. {
  481. const struct uverbs_obj_idr_type *idr_type =
  482. container_of(uobj->uapi_object->type_attrs,
  483. struct uverbs_obj_idr_type, type);
  484. int ret = idr_type->destroy_object(uobj, why);
  485. /*
  486. * We can only fail gracefully if the user requested to destroy the
  487. * object or when a retry may be called upon an error.
  488. * In the rest of the cases, just remove whatever you can.
  489. */
  490. if (ib_is_destroy_retryable(ret, why, uobj))
  491. return ret;
  492. if (why == RDMA_REMOVE_ABORT)
  493. return 0;
  494. ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
  495. RDMACG_RESOURCE_HCA_OBJECT);
  496. return 0;
  497. }
  498. static void remove_handle_idr_uobject(struct ib_uobject *uobj)
  499. {
  500. spin_lock(&uobj->ufile->idr_lock);
  501. idr_remove(&uobj->ufile->idr, uobj->id);
  502. spin_unlock(&uobj->ufile->idr_lock);
  503. /* Matches the kref in alloc_commit_idr_uobject */
  504. uverbs_uobject_put(uobj);
  505. }
  506. static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
  507. {
  508. put_unused_fd(uobj->id);
  509. }
  510. static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
  511. enum rdma_remove_reason why)
  512. {
  513. const struct uverbs_obj_fd_type *fd_type = container_of(
  514. uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
  515. int ret = fd_type->context_closed(uobj, why);
  516. if (ib_is_destroy_retryable(ret, why, uobj))
  517. return ret;
  518. return 0;
  519. }
  520. static void remove_handle_fd_uobject(struct ib_uobject *uobj)
  521. {
  522. }
  523. static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
  524. {
  525. struct ib_uverbs_file *ufile = uobj->ufile;
  526. spin_lock(&ufile->idr_lock);
  527. /*
  528. * We already allocated this IDR with a NULL object, so
  529. * this shouldn't fail.
  530. *
  531. * NOTE: Once we set the IDR we loose ownership of our kref on uobj.
  532. * It will be put by remove_commit_idr_uobject()
  533. */
  534. WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id));
  535. spin_unlock(&ufile->idr_lock);
  536. return 0;
  537. }
  538. static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
  539. {
  540. const struct uverbs_obj_fd_type *fd_type = container_of(
  541. uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
  542. int fd = uobj->id;
  543. struct file *filp;
  544. /*
  545. * The kref for uobj is moved into filp->private data and put in
  546. * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd()
  547. * must be guaranteed to be called from the provided fops release
  548. * callback.
  549. */
  550. filp = anon_inode_getfile(fd_type->name,
  551. fd_type->fops,
  552. uobj,
  553. fd_type->flags);
  554. if (IS_ERR(filp))
  555. return PTR_ERR(filp);
  556. uobj->object = filp;
  557. /* Matching put will be done in uverbs_close_fd() */
  558. kref_get(&uobj->ufile->ref);
  559. /* This shouldn't be used anymore. Use the file object instead */
  560. uobj->id = 0;
  561. /*
  562. * NOTE: Once we install the file we loose ownership of our kref on
  563. * uobj. It will be put by uverbs_close_fd()
  564. */
  565. fd_install(fd, filp);
  566. return 0;
  567. }
  568. /*
  569. * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the
  570. * caller can no longer assume uobj is valid. If this function fails it
  571. * destroys the uboject, including the attached HW object.
  572. */
  573. int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
  574. {
  575. struct ib_uverbs_file *ufile = uobj->ufile;
  576. int ret;
  577. /* alloc_commit consumes the uobj kref */
  578. ret = uobj->uapi_object->type_class->alloc_commit(uobj);
  579. if (ret) {
  580. uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
  581. up_read(&ufile->hw_destroy_rwsem);
  582. return ret;
  583. }
  584. /* kref is held so long as the uobj is on the uobj list. */
  585. uverbs_uobject_get(uobj);
  586. spin_lock_irq(&ufile->uobjects_lock);
  587. list_add(&uobj->list, &ufile->uobjects);
  588. spin_unlock_irq(&ufile->uobjects_lock);
  589. /* matches atomic_set(-1) in alloc_uobj */
  590. atomic_set(&uobj->usecnt, 0);
  591. /* Matches the down_read in rdma_alloc_begin_uobject */
  592. up_read(&ufile->hw_destroy_rwsem);
  593. return 0;
  594. }
  595. /*
  596. * This consumes the kref for uobj. It is up to the caller to unwind the HW
  597. * object and anything else connected to uobj before calling this.
  598. */
  599. void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
  600. {
  601. struct ib_uverbs_file *ufile = uobj->ufile;
  602. uobj->object = NULL;
  603. uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
  604. /* Matches the down_read in rdma_alloc_begin_uobject */
  605. up_read(&ufile->hw_destroy_rwsem);
  606. }
  607. static void lookup_put_idr_uobject(struct ib_uobject *uobj,
  608. enum rdma_lookup_mode mode)
  609. {
  610. }
  611. static void lookup_put_fd_uobject(struct ib_uobject *uobj,
  612. enum rdma_lookup_mode mode)
  613. {
  614. struct file *filp = uobj->object;
  615. WARN_ON(mode != UVERBS_LOOKUP_READ);
  616. /* This indirectly calls uverbs_close_fd and free the object */
  617. fput(filp);
  618. }
  619. void rdma_lookup_put_uobject(struct ib_uobject *uobj,
  620. enum rdma_lookup_mode mode)
  621. {
  622. assert_uverbs_usecnt(uobj, mode);
  623. uobj->uapi_object->type_class->lookup_put(uobj, mode);
  624. /*
  625. * In order to unlock an object, either decrease its usecnt for
  626. * read access or zero it in case of exclusive access. See
  627. * uverbs_try_lock_object for locking schema information.
  628. */
  629. switch (mode) {
  630. case UVERBS_LOOKUP_READ:
  631. atomic_dec(&uobj->usecnt);
  632. break;
  633. case UVERBS_LOOKUP_WRITE:
  634. atomic_set(&uobj->usecnt, 0);
  635. break;
  636. case UVERBS_LOOKUP_DESTROY:
  637. break;
  638. }
  639. /* Pairs with the kref obtained by type->lookup_get */
  640. uverbs_uobject_put(uobj);
  641. }
  642. void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
  643. {
  644. spin_lock_init(&ufile->idr_lock);
  645. idr_init(&ufile->idr);
  646. }
  647. void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
  648. {
  649. struct ib_uobject *entry;
  650. int id;
  651. /*
  652. * At this point uverbs_cleanup_ufile() is guaranteed to have run, and
  653. * there are no HW objects left, however the IDR is still populated
  654. * with anything that has not been cleaned up by userspace. Since the
  655. * kref on ufile is 0, nothing is allowed to call lookup_get.
  656. *
  657. * This is an optimized equivalent to remove_handle_idr_uobject
  658. */
  659. idr_for_each_entry(&ufile->idr, entry, id) {
  660. WARN_ON(entry->object);
  661. uverbs_uobject_put(entry);
  662. }
  663. idr_destroy(&ufile->idr);
  664. }
  665. const struct uverbs_obj_type_class uverbs_idr_class = {
  666. .alloc_begin = alloc_begin_idr_uobject,
  667. .lookup_get = lookup_get_idr_uobject,
  668. .alloc_commit = alloc_commit_idr_uobject,
  669. .alloc_abort = alloc_abort_idr_uobject,
  670. .lookup_put = lookup_put_idr_uobject,
  671. .destroy_hw = destroy_hw_idr_uobject,
  672. .remove_handle = remove_handle_idr_uobject,
  673. /*
  674. * When we destroy an object, we first just lock it for WRITE and
  675. * actually DESTROY it in the finalize stage. So, the problematic
  676. * scenario is when we just started the finalize stage of the
  677. * destruction (nothing was executed yet). Now, the other thread
  678. * fetched the object for READ access, but it didn't lock it yet.
  679. * The DESTROY thread continues and starts destroying the object.
  680. * When the other thread continue - without the RCU, it would
  681. * access freed memory. However, the rcu_read_lock delays the free
  682. * until the rcu_read_lock of the READ operation quits. Since the
  683. * exclusive lock of the object is still taken by the DESTROY flow, the
  684. * READ operation will get -EBUSY and it'll just bail out.
  685. */
  686. .needs_kfree_rcu = true,
  687. };
  688. EXPORT_SYMBOL(uverbs_idr_class);
  689. void uverbs_close_fd(struct file *f)
  690. {
  691. struct ib_uobject *uobj = f->private_data;
  692. struct ib_uverbs_file *ufile = uobj->ufile;
  693. if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
  694. /*
  695. * lookup_get_fd_uobject holds the kref on the struct file any
  696. * time a FD uobj is locked, which prevents this release
  697. * method from being invoked. Meaning we can always get the
  698. * write lock here, or we have a kernel bug.
  699. */
  700. WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE));
  701. uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE);
  702. up_read(&ufile->hw_destroy_rwsem);
  703. }
  704. /* Matches the get in alloc_begin_fd_uobject */
  705. kref_put(&ufile->ref, ib_uverbs_release_file);
  706. /* Pairs with filp->private_data in alloc_begin_fd_uobject */
  707. uverbs_uobject_put(uobj);
  708. }
  709. static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
  710. {
  711. struct ib_device *ib_dev = ibcontext->device;
  712. struct task_struct *owning_process = NULL;
  713. struct mm_struct *owning_mm = NULL;
  714. owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
  715. if (!owning_process)
  716. return;
  717. owning_mm = get_task_mm(owning_process);
  718. if (!owning_mm) {
  719. pr_info("no mm, disassociate ucontext is pending task termination\n");
  720. while (1) {
  721. put_task_struct(owning_process);
  722. usleep_range(1000, 2000);
  723. owning_process = get_pid_task(ibcontext->tgid,
  724. PIDTYPE_PID);
  725. if (!owning_process ||
  726. owning_process->state == TASK_DEAD) {
  727. pr_info("disassociate ucontext done, task was terminated\n");
  728. /* in case task was dead need to release the
  729. * task struct.
  730. */
  731. if (owning_process)
  732. put_task_struct(owning_process);
  733. return;
  734. }
  735. }
  736. }
  737. down_write(&owning_mm->mmap_sem);
  738. ib_dev->disassociate_ucontext(ibcontext);
  739. up_write(&owning_mm->mmap_sem);
  740. mmput(owning_mm);
  741. put_task_struct(owning_process);
  742. }
  743. /*
  744. * Drop the ucontext off the ufile and completely disconnect it from the
  745. * ib_device
  746. */
  747. static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
  748. enum rdma_remove_reason reason)
  749. {
  750. struct ib_ucontext *ucontext = ufile->ucontext;
  751. int ret;
  752. if (reason == RDMA_REMOVE_DRIVER_REMOVE)
  753. ufile_disassociate_ucontext(ucontext);
  754. put_pid(ucontext->tgid);
  755. ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
  756. RDMACG_RESOURCE_HCA_HANDLE);
  757. /*
  758. * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
  759. * the error return.
  760. */
  761. ret = ucontext->device->dealloc_ucontext(ucontext);
  762. WARN_ON(ret);
  763. ufile->ucontext = NULL;
  764. }
  765. static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
  766. enum rdma_remove_reason reason)
  767. {
  768. struct ib_uobject *obj, *next_obj;
  769. int ret = -EINVAL;
  770. /*
  771. * This shouldn't run while executing other commands on this
  772. * context. Thus, the only thing we should take care of is
  773. * releasing a FD while traversing this list. The FD could be
  774. * closed and released from the _release fop of this FD.
  775. * In order to mitigate this, we add a lock.
  776. * We take and release the lock per traversal in order to let
  777. * other threads (which might still use the FDs) chance to run.
  778. */
  779. list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
  780. /*
  781. * if we hit this WARN_ON, that means we are
  782. * racing with a lookup_get.
  783. */
  784. WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
  785. if (!uverbs_destroy_uobject(obj, reason))
  786. ret = 0;
  787. }
  788. return ret;
  789. }
  790. /*
  791. * Destroy the uncontext and every uobject associated with it. If called with
  792. * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
  793. * been completed and ufile->ucontext is NULL.
  794. *
  795. * This is internally locked and can be called in parallel from multiple
  796. * contexts.
  797. */
  798. void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
  799. enum rdma_remove_reason reason)
  800. {
  801. if (reason == RDMA_REMOVE_CLOSE) {
  802. /*
  803. * During destruction we might trigger something that
  804. * synchronously calls release on any file descriptor. For
  805. * this reason all paths that come from file_operations
  806. * release must use try_lock. They can progress knowing that
  807. * there is an ongoing uverbs_destroy_ufile_hw that will clean
  808. * up the driver resources.
  809. */
  810. if (!mutex_trylock(&ufile->ucontext_lock))
  811. return;
  812. } else {
  813. mutex_lock(&ufile->ucontext_lock);
  814. }
  815. down_write(&ufile->hw_destroy_rwsem);
  816. /*
  817. * If a ucontext was never created then we can't have any uobjects to
  818. * cleanup, nothing to do.
  819. */
  820. if (!ufile->ucontext)
  821. goto done;
  822. ufile->ucontext->closing = true;
  823. ufile->ucontext->cleanup_retryable = true;
  824. while (!list_empty(&ufile->uobjects))
  825. if (__uverbs_cleanup_ufile(ufile, reason)) {
  826. /*
  827. * No entry was cleaned-up successfully during this
  828. * iteration
  829. */
  830. break;
  831. }
  832. ufile->ucontext->cleanup_retryable = false;
  833. if (!list_empty(&ufile->uobjects))
  834. __uverbs_cleanup_ufile(ufile, reason);
  835. ufile_destroy_ucontext(ufile, reason);
  836. done:
  837. up_write(&ufile->hw_destroy_rwsem);
  838. mutex_unlock(&ufile->ucontext_lock);
  839. }
  840. const struct uverbs_obj_type_class uverbs_fd_class = {
  841. .alloc_begin = alloc_begin_fd_uobject,
  842. .lookup_get = lookup_get_fd_uobject,
  843. .alloc_commit = alloc_commit_fd_uobject,
  844. .alloc_abort = alloc_abort_fd_uobject,
  845. .lookup_put = lookup_put_fd_uobject,
  846. .destroy_hw = destroy_hw_fd_uobject,
  847. .remove_handle = remove_handle_fd_uobject,
  848. .needs_kfree_rcu = false,
  849. };
  850. EXPORT_SYMBOL(uverbs_fd_class);
  851. struct ib_uobject *
  852. uverbs_get_uobject_from_file(u16 object_id,
  853. struct ib_uverbs_file *ufile,
  854. enum uverbs_obj_access access, s64 id)
  855. {
  856. const struct uverbs_api_object *obj =
  857. uapi_get_object(ufile->device->uapi, object_id);
  858. switch (access) {
  859. case UVERBS_ACCESS_READ:
  860. return rdma_lookup_get_uobject(obj, ufile, id,
  861. UVERBS_LOOKUP_READ);
  862. case UVERBS_ACCESS_DESTROY:
  863. /* Actual destruction is done inside uverbs_handle_method */
  864. return rdma_lookup_get_uobject(obj, ufile, id,
  865. UVERBS_LOOKUP_DESTROY);
  866. case UVERBS_ACCESS_WRITE:
  867. return rdma_lookup_get_uobject(obj, ufile, id,
  868. UVERBS_LOOKUP_WRITE);
  869. case UVERBS_ACCESS_NEW:
  870. return rdma_alloc_begin_uobject(obj, ufile);
  871. default:
  872. WARN_ON(true);
  873. return ERR_PTR(-EOPNOTSUPP);
  874. }
  875. }
  876. int uverbs_finalize_object(struct ib_uobject *uobj,
  877. enum uverbs_obj_access access,
  878. bool commit)
  879. {
  880. int ret = 0;
  881. /*
  882. * refcounts should be handled at the object level and not at the
  883. * uobject level. Refcounts of the objects themselves are done in
  884. * handlers.
  885. */
  886. switch (access) {
  887. case UVERBS_ACCESS_READ:
  888. rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
  889. break;
  890. case UVERBS_ACCESS_WRITE:
  891. rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
  892. break;
  893. case UVERBS_ACCESS_DESTROY:
  894. if (uobj)
  895. rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
  896. break;
  897. case UVERBS_ACCESS_NEW:
  898. if (commit)
  899. ret = rdma_alloc_commit_uobject(uobj);
  900. else
  901. rdma_alloc_abort_uobject(uobj);
  902. break;
  903. default:
  904. WARN_ON(true);
  905. ret = -EOPNOTSUPP;
  906. }
  907. return ret;
  908. }