waitqueue.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. /*
  2. * (C) 2001 Clemson University and The University of Chicago
  3. * (C) 2011 Omnibond Systems
  4. *
  5. * Changes by Acxiom Corporation to implement generic service_operation()
  6. * function, Copyright Acxiom Corporation, 2005.
  7. *
  8. * See COPYING in top-level directory.
  9. */
  10. /*
  11. * In-kernel waitqueue operations.
  12. */
  13. #include "protocol.h"
  14. #include "orangefs-kernel.h"
  15. #include "orangefs-bufmap.h"
  16. /*
  17. * What we do in this function is to walk the list of operations that are
  18. * present in the request queue and mark them as purged.
  19. * NOTE: This is called from the device close after client-core has
  20. * guaranteed that no new operations could appear on the list since the
  21. * client-core is anyway going to exit.
  22. */
  23. void purge_waiting_ops(void)
  24. {
  25. struct orangefs_kernel_op_s *op;
  26. spin_lock(&orangefs_request_list_lock);
  27. list_for_each_entry(op, &orangefs_request_list, list) {
  28. gossip_debug(GOSSIP_WAIT_DEBUG,
  29. "pvfs2-client-core: purging op tag %llu %s\n",
  30. llu(op->tag),
  31. get_opname_string(op));
  32. spin_lock(&op->lock);
  33. set_op_state_purged(op);
  34. spin_unlock(&op->lock);
  35. wake_up_interruptible(&op->waitq);
  36. }
  37. spin_unlock(&orangefs_request_list_lock);
  38. }
  39. static inline void
  40. add_op_to_request_list(struct orangefs_kernel_op_s *op)
  41. {
  42. spin_lock(&orangefs_request_list_lock);
  43. spin_lock(&op->lock);
  44. set_op_state_waiting(op);
  45. list_add_tail(&op->list, &orangefs_request_list);
  46. spin_unlock(&orangefs_request_list_lock);
  47. spin_unlock(&op->lock);
  48. wake_up_interruptible(&orangefs_request_list_waitq);
  49. }
  50. static inline
  51. void add_priority_op_to_request_list(struct orangefs_kernel_op_s *op)
  52. {
  53. spin_lock(&orangefs_request_list_lock);
  54. spin_lock(&op->lock);
  55. set_op_state_waiting(op);
  56. list_add(&op->list, &orangefs_request_list);
  57. spin_unlock(&orangefs_request_list_lock);
  58. spin_unlock(&op->lock);
  59. wake_up_interruptible(&orangefs_request_list_waitq);
  60. }
  61. /*
  62. * submits a ORANGEFS operation and waits for it to complete
  63. *
  64. * Note op->downcall.status will contain the status of the operation (in
  65. * errno format), whether provided by pvfs2-client or a result of failure to
  66. * service the operation. If the caller wishes to distinguish, then
  67. * op->state can be checked to see if it was serviced or not.
  68. *
  69. * Returns contents of op->downcall.status for convenience
  70. */
  71. int service_operation(struct orangefs_kernel_op_s *op,
  72. const char *op_name,
  73. int flags)
  74. {
  75. /* flags to modify behavior */
  76. sigset_t orig_sigset;
  77. int ret = 0;
  78. /* irqflags and wait_entry are only used IF the client-core aborts */
  79. unsigned long irqflags;
  80. DEFINE_WAIT(wait_entry);
  81. op->upcall.tgid = current->tgid;
  82. op->upcall.pid = current->pid;
  83. retry_servicing:
  84. op->downcall.status = 0;
  85. gossip_debug(GOSSIP_WAIT_DEBUG,
  86. "orangefs: service_operation: %s %p\n",
  87. op_name,
  88. op);
  89. gossip_debug(GOSSIP_WAIT_DEBUG,
  90. "orangefs: operation posted by process: %s, pid: %i\n",
  91. current->comm,
  92. current->pid);
  93. /* mask out signals if this operation is not to be interrupted */
  94. if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
  95. orangefs_block_signals(&orig_sigset);
  96. if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) {
  97. ret = mutex_lock_interruptible(&request_mutex);
  98. /*
  99. * check to see if we were interrupted while waiting for
  100. * semaphore
  101. */
  102. if (ret < 0) {
  103. if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
  104. orangefs_set_signals(&orig_sigset);
  105. op->downcall.status = ret;
  106. gossip_debug(GOSSIP_WAIT_DEBUG,
  107. "orangefs: service_operation interrupted.\n");
  108. return ret;
  109. }
  110. }
  111. gossip_debug(GOSSIP_WAIT_DEBUG,
  112. "%s:About to call is_daemon_in_service().\n",
  113. __func__);
  114. if (is_daemon_in_service() < 0) {
  115. /*
  116. * By incrementing the per-operation attempt counter, we
  117. * directly go into the timeout logic while waiting for
  118. * the matching downcall to be read
  119. */
  120. gossip_debug(GOSSIP_WAIT_DEBUG,
  121. "%s:client core is NOT in service(%d).\n",
  122. __func__,
  123. is_daemon_in_service());
  124. op->attempts++;
  125. }
  126. /* queue up the operation */
  127. if (flags & ORANGEFS_OP_PRIORITY) {
  128. add_priority_op_to_request_list(op);
  129. } else {
  130. gossip_debug(GOSSIP_WAIT_DEBUG,
  131. "%s:About to call add_op_to_request_list().\n",
  132. __func__);
  133. add_op_to_request_list(op);
  134. }
  135. if (!(flags & ORANGEFS_OP_NO_SEMAPHORE))
  136. mutex_unlock(&request_mutex);
  137. /*
  138. * If we are asked to service an asynchronous operation from
  139. * VFS perspective, we are done.
  140. */
  141. if (flags & ORANGEFS_OP_ASYNC)
  142. return 0;
  143. if (flags & ORANGEFS_OP_CANCELLATION) {
  144. gossip_debug(GOSSIP_WAIT_DEBUG,
  145. "%s:"
  146. "About to call wait_for_cancellation_downcall.\n",
  147. __func__);
  148. ret = wait_for_cancellation_downcall(op);
  149. } else {
  150. ret = wait_for_matching_downcall(op);
  151. }
  152. if (ret < 0) {
  153. /* failed to get matching downcall */
  154. if (ret == -ETIMEDOUT) {
  155. gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n",
  156. op_name);
  157. }
  158. op->downcall.status = ret;
  159. } else {
  160. /* got matching downcall; make sure status is in errno format */
  161. op->downcall.status =
  162. orangefs_normalize_to_errno(op->downcall.status);
  163. ret = op->downcall.status;
  164. }
  165. if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
  166. orangefs_set_signals(&orig_sigset);
  167. BUG_ON(ret != op->downcall.status);
  168. /* retry if operation has not been serviced and if requested */
  169. if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) {
  170. gossip_debug(GOSSIP_WAIT_DEBUG,
  171. "orangefs: tag %llu (%s)"
  172. " -- operation to be retried (%d attempt)\n",
  173. llu(op->tag),
  174. op_name,
  175. op->attempts + 1);
  176. if (!op->uses_shared_memory)
  177. /*
  178. * this operation doesn't use the shared memory
  179. * system
  180. */
  181. goto retry_servicing;
  182. /* op uses shared memory */
  183. if (orangefs_get_bufmap_init() == 0) {
  184. /*
  185. * This operation uses the shared memory system AND
  186. * the system is not yet ready. This situation occurs
  187. * when the client-core is restarted AND there were
  188. * operations waiting to be processed or were already
  189. * in process.
  190. */
  191. gossip_debug(GOSSIP_WAIT_DEBUG,
  192. "uses_shared_memory is true.\n");
  193. gossip_debug(GOSSIP_WAIT_DEBUG,
  194. "Client core in-service status(%d).\n",
  195. is_daemon_in_service());
  196. gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n",
  197. orangefs_get_bufmap_init());
  198. gossip_debug(GOSSIP_WAIT_DEBUG,
  199. "operation's status is 0x%0x.\n",
  200. op->op_state);
  201. /*
  202. * let process sleep for a few seconds so shared
  203. * memory system can be initialized.
  204. */
  205. spin_lock_irqsave(&op->lock, irqflags);
  206. prepare_to_wait(&orangefs_bufmap_init_waitq,
  207. &wait_entry,
  208. TASK_INTERRUPTIBLE);
  209. spin_unlock_irqrestore(&op->lock, irqflags);
  210. /*
  211. * Wait for orangefs_bufmap_initialize() to wake me up
  212. * within the allotted time.
  213. */
  214. ret = schedule_timeout(MSECS_TO_JIFFIES
  215. (1000 * ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS));
  216. gossip_debug(GOSSIP_WAIT_DEBUG,
  217. "Value returned from schedule_timeout:"
  218. "%d.\n",
  219. ret);
  220. gossip_debug(GOSSIP_WAIT_DEBUG,
  221. "Is shared memory available? (%d).\n",
  222. orangefs_get_bufmap_init());
  223. spin_lock_irqsave(&op->lock, irqflags);
  224. finish_wait(&orangefs_bufmap_init_waitq, &wait_entry);
  225. spin_unlock_irqrestore(&op->lock, irqflags);
  226. if (orangefs_get_bufmap_init() == 0) {
  227. gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted. Aborting user's request(%s).\n",
  228. __func__,
  229. ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS,
  230. get_opname_string(op));
  231. return -EIO;
  232. }
  233. /*
  234. * Return to the calling function and re-populate a
  235. * shared memory buffer.
  236. */
  237. return -EAGAIN;
  238. }
  239. }
  240. gossip_debug(GOSSIP_WAIT_DEBUG,
  241. "orangefs: service_operation %s returning: %d for %p.\n",
  242. op_name,
  243. ret,
  244. op);
  245. return ret;
  246. }
  247. static inline void remove_op_from_request_list(struct orangefs_kernel_op_s *op)
  248. {
  249. struct list_head *tmp = NULL;
  250. struct list_head *tmp_safe = NULL;
  251. struct orangefs_kernel_op_s *tmp_op = NULL;
  252. spin_lock(&orangefs_request_list_lock);
  253. list_for_each_safe(tmp, tmp_safe, &orangefs_request_list) {
  254. tmp_op = list_entry(tmp,
  255. struct orangefs_kernel_op_s,
  256. list);
  257. if (tmp_op && (tmp_op == op)) {
  258. list_del(&tmp_op->list);
  259. break;
  260. }
  261. }
  262. spin_unlock(&orangefs_request_list_lock);
  263. }
  264. void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op)
  265. {
  266. /*
  267. * handle interrupted cases depending on what state we were in when
  268. * the interruption is detected. there is a coarse grained lock
  269. * across the operation.
  270. *
  271. * NOTE: be sure not to reverse lock ordering by locking an op lock
  272. * while holding the request_list lock. Here, we first lock the op
  273. * and then lock the appropriate list.
  274. */
  275. if (!op) {
  276. gossip_debug(GOSSIP_WAIT_DEBUG,
  277. "%s: op is null, ignoring\n",
  278. __func__);
  279. return;
  280. }
  281. /*
  282. * one more sanity check, make sure it's in one of the possible states
  283. * or don't try to cancel it
  284. */
  285. if (!(op_state_waiting(op) ||
  286. op_state_in_progress(op) ||
  287. op_state_serviced(op) ||
  288. op_state_purged(op))) {
  289. gossip_debug(GOSSIP_WAIT_DEBUG,
  290. "%s: op %p not in a valid state (%0x), "
  291. "ignoring\n",
  292. __func__,
  293. op,
  294. op->op_state);
  295. return;
  296. }
  297. spin_lock(&op->lock);
  298. if (op_state_waiting(op)) {
  299. /*
  300. * upcall hasn't been read; remove op from upcall request
  301. * list.
  302. */
  303. spin_unlock(&op->lock);
  304. remove_op_from_request_list(op);
  305. gossip_debug(GOSSIP_WAIT_DEBUG,
  306. "Interrupted: Removed op %p from request_list\n",
  307. op);
  308. } else if (op_state_in_progress(op)) {
  309. /* op must be removed from the in progress htable */
  310. spin_unlock(&op->lock);
  311. spin_lock(&htable_ops_in_progress_lock);
  312. list_del(&op->list);
  313. spin_unlock(&htable_ops_in_progress_lock);
  314. gossip_debug(GOSSIP_WAIT_DEBUG,
  315. "Interrupted: Removed op %p"
  316. " from htable_ops_in_progress\n",
  317. op);
  318. } else if (!op_state_serviced(op)) {
  319. spin_unlock(&op->lock);
  320. gossip_err("interrupted operation is in a weird state 0x%x\n",
  321. op->op_state);
  322. } else {
  323. /*
  324. * It is not intended for execution to flow here,
  325. * but having this unlock here makes sparse happy.
  326. */
  327. gossip_err("%s: can't get here.\n", __func__);
  328. spin_unlock(&op->lock);
  329. }
  330. }
  331. /*
  332. * sleeps on waitqueue waiting for matching downcall.
  333. * if client-core finishes servicing, then we are good to go.
  334. * else if client-core exits, we get woken up here, and retry with a timeout
  335. *
  336. * Post when this call returns to the caller, the specified op will no
  337. * longer be on any list or htable.
  338. *
  339. * Returns 0 on success and -errno on failure
  340. * Errors are:
  341. * EAGAIN in case we want the caller to requeue and try again..
  342. * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
  343. * operation since client-core seems to be exiting too often
  344. * or if we were interrupted.
  345. */
  346. int wait_for_matching_downcall(struct orangefs_kernel_op_s *op)
  347. {
  348. int ret = -EINVAL;
  349. DEFINE_WAIT(wait_entry);
  350. while (1) {
  351. spin_lock(&op->lock);
  352. prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
  353. if (op_state_serviced(op)) {
  354. spin_unlock(&op->lock);
  355. ret = 0;
  356. break;
  357. }
  358. spin_unlock(&op->lock);
  359. if (!signal_pending(current)) {
  360. /*
  361. * if this was our first attempt and client-core
  362. * has not purged our operation, we are happy to
  363. * simply wait
  364. */
  365. spin_lock(&op->lock);
  366. if (op->attempts == 0 && !op_state_purged(op)) {
  367. spin_unlock(&op->lock);
  368. schedule();
  369. } else {
  370. spin_unlock(&op->lock);
  371. /*
  372. * subsequent attempts, we retry exactly once
  373. * with timeouts
  374. */
  375. if (!schedule_timeout(MSECS_TO_JIFFIES
  376. (1000 * op_timeout_secs))) {
  377. gossip_debug(GOSSIP_WAIT_DEBUG,
  378. "*** %s:"
  379. " operation timed out (tag"
  380. " %llu, %p, att %d)\n",
  381. __func__,
  382. llu(op->tag),
  383. op,
  384. op->attempts);
  385. ret = -ETIMEDOUT;
  386. orangefs_clean_up_interrupted_operation
  387. (op);
  388. break;
  389. }
  390. }
  391. spin_lock(&op->lock);
  392. op->attempts++;
  393. /*
  394. * if the operation was purged in the meantime, it
  395. * is better to requeue it afresh but ensure that
  396. * we have not been purged repeatedly. This could
  397. * happen if client-core crashes when an op
  398. * is being serviced, so we requeue the op, client
  399. * core crashes again so we requeue the op, client
  400. * core starts, and so on...
  401. */
  402. if (op_state_purged(op)) {
  403. ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ?
  404. -EAGAIN :
  405. -EIO;
  406. spin_unlock(&op->lock);
  407. gossip_debug(GOSSIP_WAIT_DEBUG,
  408. "*** %s:"
  409. " operation purged (tag "
  410. "%llu, %p, att %d)\n",
  411. __func__,
  412. llu(op->tag),
  413. op,
  414. op->attempts);
  415. orangefs_clean_up_interrupted_operation(op);
  416. break;
  417. }
  418. spin_unlock(&op->lock);
  419. continue;
  420. }
  421. gossip_debug(GOSSIP_WAIT_DEBUG,
  422. "*** %s:"
  423. " operation interrupted by a signal (tag "
  424. "%llu, op %p)\n",
  425. __func__,
  426. llu(op->tag),
  427. op);
  428. orangefs_clean_up_interrupted_operation(op);
  429. ret = -EINTR;
  430. break;
  431. }
  432. spin_lock(&op->lock);
  433. finish_wait(&op->waitq, &wait_entry);
  434. spin_unlock(&op->lock);
  435. return ret;
  436. }
  437. /*
  438. * similar to wait_for_matching_downcall(), but used in the special case
  439. * of I/O cancellations.
  440. *
  441. * Note we need a special wait function because if this is called we already
  442. * know that a signal is pending in current and need to service the
  443. * cancellation upcall anyway. the only way to exit this is to either
  444. * timeout or have the cancellation be serviced properly.
  445. */
  446. int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op)
  447. {
  448. int ret = -EINVAL;
  449. DEFINE_WAIT(wait_entry);
  450. while (1) {
  451. spin_lock(&op->lock);
  452. prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
  453. if (op_state_serviced(op)) {
  454. gossip_debug(GOSSIP_WAIT_DEBUG,
  455. "%s:op-state is SERVICED.\n",
  456. __func__);
  457. spin_unlock(&op->lock);
  458. ret = 0;
  459. break;
  460. }
  461. spin_unlock(&op->lock);
  462. if (signal_pending(current)) {
  463. gossip_debug(GOSSIP_WAIT_DEBUG,
  464. "%s:operation interrupted by a signal (tag"
  465. " %llu, op %p)\n",
  466. __func__,
  467. llu(op->tag),
  468. op);
  469. orangefs_clean_up_interrupted_operation(op);
  470. ret = -EINTR;
  471. break;
  472. }
  473. gossip_debug(GOSSIP_WAIT_DEBUG,
  474. "%s:About to call schedule_timeout.\n",
  475. __func__);
  476. ret =
  477. schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs));
  478. gossip_debug(GOSSIP_WAIT_DEBUG,
  479. "%s:Value returned from schedule_timeout(%d).\n",
  480. __func__,
  481. ret);
  482. if (!ret) {
  483. gossip_debug(GOSSIP_WAIT_DEBUG,
  484. "%s:*** operation timed out: %p\n",
  485. __func__,
  486. op);
  487. orangefs_clean_up_interrupted_operation(op);
  488. ret = -ETIMEDOUT;
  489. break;
  490. }
  491. gossip_debug(GOSSIP_WAIT_DEBUG,
  492. "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",
  493. __func__);
  494. ret = -ETIMEDOUT;
  495. break;
  496. }
  497. spin_lock(&op->lock);
  498. finish_wait(&op->waitq, &wait_entry);
  499. spin_unlock(&op->lock);
  500. gossip_debug(GOSSIP_WAIT_DEBUG,
  501. "%s:returning ret(%d)\n",
  502. __func__,
  503. ret);
  504. return ret;
  505. }