dm-log-userspace-base.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930
  1. /*
  2. * Copyright (C) 2006-2009 Red Hat, Inc.
  3. *
  4. * This file is released under the LGPL.
  5. */
  6. #include <linux/bio.h>
  7. #include <linux/slab.h>
  8. #include <linux/dm-dirty-log.h>
  9. #include <linux/device-mapper.h>
  10. #include <linux/dm-log-userspace.h>
  11. #include <linux/module.h>
  12. #include <linux/workqueue.h>
  13. #include "dm-log-userspace-transfer.h"
  14. #define DM_LOG_USERSPACE_VSN "1.3.0"
  15. struct flush_entry {
  16. int type;
  17. region_t region;
  18. struct list_head list;
  19. };
  20. /*
  21. * This limit on the number of mark and clear request is, to a degree,
  22. * arbitrary. However, there is some basis for the choice in the limits
  23. * imposed on the size of data payload by dm-log-userspace-transfer.c:
  24. * dm_consult_userspace().
  25. */
  26. #define MAX_FLUSH_GROUP_COUNT 32
  27. struct log_c {
  28. struct dm_target *ti;
  29. struct dm_dev *log_dev;
  30. uint32_t region_size;
  31. region_t region_count;
  32. uint64_t luid;
  33. char uuid[DM_UUID_LEN];
  34. char *usr_argv_str;
  35. uint32_t usr_argc;
  36. /*
  37. * in_sync_hint gets set when doing is_remote_recovering. It
  38. * represents the first region that needs recovery. IOW, the
  39. * first zero bit of sync_bits. This can be useful for to limit
  40. * traffic for calls like is_remote_recovering and get_resync_work,
  41. * but be take care in its use for anything else.
  42. */
  43. uint64_t in_sync_hint;
  44. /*
  45. * Mark and clear requests are held until a flush is issued
  46. * so that we can group, and thereby limit, the amount of
  47. * network traffic between kernel and userspace. The 'flush_lock'
  48. * is used to protect these lists.
  49. */
  50. spinlock_t flush_lock;
  51. struct list_head mark_list;
  52. struct list_head clear_list;
  53. /*
  54. * Workqueue for flush of clear region requests.
  55. */
  56. struct workqueue_struct *dmlog_wq;
  57. struct delayed_work flush_log_work;
  58. atomic_t sched_flush;
  59. /*
  60. * Combine userspace flush and mark requests for efficiency.
  61. */
  62. uint32_t integrated_flush;
  63. };
  64. static mempool_t *flush_entry_pool;
  65. static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
  66. {
  67. return kmalloc(sizeof(struct flush_entry), gfp_mask);
  68. }
  69. static void flush_entry_free(void *element, void *pool_data)
  70. {
  71. kfree(element);
  72. }
  73. static int userspace_do_request(struct log_c *lc, const char *uuid,
  74. int request_type, char *data, size_t data_size,
  75. char *rdata, size_t *rdata_size)
  76. {
  77. int r;
  78. /*
  79. * If the server isn't there, -ESRCH is returned,
  80. * and we must keep trying until the server is
  81. * restored.
  82. */
  83. retry:
  84. r = dm_consult_userspace(uuid, lc->luid, request_type, data,
  85. data_size, rdata, rdata_size);
  86. if (r != -ESRCH)
  87. return r;
  88. DMERR(" Userspace log server not found.");
  89. while (1) {
  90. set_current_state(TASK_INTERRUPTIBLE);
  91. schedule_timeout(2*HZ);
  92. DMWARN("Attempting to contact userspace log server...");
  93. r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
  94. lc->usr_argv_str,
  95. strlen(lc->usr_argv_str) + 1,
  96. NULL, NULL);
  97. if (!r)
  98. break;
  99. }
  100. DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
  101. r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
  102. 0, NULL, NULL);
  103. if (!r)
  104. goto retry;
  105. DMERR("Error trying to resume userspace log: %d", r);
  106. return -ESRCH;
  107. }
  108. static int build_constructor_string(struct dm_target *ti,
  109. unsigned argc, char **argv,
  110. char **ctr_str)
  111. {
  112. int i, str_size;
  113. char *str = NULL;
  114. *ctr_str = NULL;
  115. /*
  116. * Determine overall size of the string.
  117. */
  118. for (i = 0, str_size = 0; i < argc; i++)
  119. str_size += strlen(argv[i]) + 1; /* +1 for space between args */
  120. str_size += 20; /* Max number of chars in a printed u64 number */
  121. str = kzalloc(str_size, GFP_KERNEL);
  122. if (!str) {
  123. DMWARN("Unable to allocate memory for constructor string");
  124. return -ENOMEM;
  125. }
  126. str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
  127. for (i = 0; i < argc; i++)
  128. str_size += sprintf(str + str_size, " %s", argv[i]);
  129. *ctr_str = str;
  130. return str_size;
  131. }
  132. static void do_flush(struct work_struct *work)
  133. {
  134. int r;
  135. struct log_c *lc = container_of(work, struct log_c, flush_log_work.work);
  136. atomic_set(&lc->sched_flush, 0);
  137. r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, NULL, 0, NULL, NULL);
  138. if (r)
  139. dm_table_event(lc->ti->table);
  140. }
  141. /*
  142. * userspace_ctr
  143. *
  144. * argv contains:
  145. * <UUID> [integrated_flush] <other args>
  146. * Where 'other args' are the userspace implementation-specific log
  147. * arguments.
  148. *
  149. * Example:
  150. * <UUID> [integrated_flush] clustered-disk <arg count> <log dev>
  151. * <region_size> [[no]sync]
  152. *
  153. * This module strips off the <UUID> and uses it for identification
  154. * purposes when communicating with userspace about a log.
  155. *
  156. * If integrated_flush is defined, the kernel combines flush
  157. * and mark requests.
  158. *
  159. * The rest of the line, beginning with 'clustered-disk', is passed
  160. * to the userspace ctr function.
  161. */
  162. static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
  163. unsigned argc, char **argv)
  164. {
  165. int r = 0;
  166. int str_size;
  167. char *ctr_str = NULL;
  168. struct log_c *lc = NULL;
  169. uint64_t rdata;
  170. size_t rdata_size = sizeof(rdata);
  171. char *devices_rdata = NULL;
  172. size_t devices_rdata_size = DM_NAME_LEN;
  173. if (argc < 3) {
  174. DMWARN("Too few arguments to userspace dirty log");
  175. return -EINVAL;
  176. }
  177. lc = kzalloc(sizeof(*lc), GFP_KERNEL);
  178. if (!lc) {
  179. DMWARN("Unable to allocate userspace log context.");
  180. return -ENOMEM;
  181. }
  182. /* The ptr value is sufficient for local unique id */
  183. lc->luid = (unsigned long)lc;
  184. lc->ti = ti;
  185. if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
  186. DMWARN("UUID argument too long.");
  187. kfree(lc);
  188. return -EINVAL;
  189. }
  190. lc->usr_argc = argc;
  191. strncpy(lc->uuid, argv[0], DM_UUID_LEN);
  192. argc--;
  193. argv++;
  194. spin_lock_init(&lc->flush_lock);
  195. INIT_LIST_HEAD(&lc->mark_list);
  196. INIT_LIST_HEAD(&lc->clear_list);
  197. if (!strcasecmp(argv[0], "integrated_flush")) {
  198. lc->integrated_flush = 1;
  199. argc--;
  200. argv++;
  201. }
  202. str_size = build_constructor_string(ti, argc, argv, &ctr_str);
  203. if (str_size < 0) {
  204. kfree(lc);
  205. return str_size;
  206. }
  207. devices_rdata = kzalloc(devices_rdata_size, GFP_KERNEL);
  208. if (!devices_rdata) {
  209. DMERR("Failed to allocate memory for device information");
  210. r = -ENOMEM;
  211. goto out;
  212. }
  213. /*
  214. * Send table string and get back any opened device.
  215. */
  216. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
  217. ctr_str, str_size,
  218. devices_rdata, &devices_rdata_size);
  219. if (r < 0) {
  220. if (r == -ESRCH)
  221. DMERR("Userspace log server not found");
  222. else
  223. DMERR("Userspace log server failed to create log");
  224. goto out;
  225. }
  226. /* Since the region size does not change, get it now */
  227. rdata_size = sizeof(rdata);
  228. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
  229. NULL, 0, (char *)&rdata, &rdata_size);
  230. if (r) {
  231. DMERR("Failed to get region size of dirty log");
  232. goto out;
  233. }
  234. lc->region_size = (uint32_t)rdata;
  235. lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
  236. if (devices_rdata_size) {
  237. if (devices_rdata[devices_rdata_size - 1] != '\0') {
  238. DMERR("DM_ULOG_CTR device return string not properly terminated");
  239. r = -EINVAL;
  240. goto out;
  241. }
  242. r = dm_get_device(ti, devices_rdata,
  243. dm_table_get_mode(ti->table), &lc->log_dev);
  244. if (r)
  245. DMERR("Failed to register %s with device-mapper",
  246. devices_rdata);
  247. }
  248. if (lc->integrated_flush) {
  249. lc->dmlog_wq = alloc_workqueue("dmlogd", WQ_MEM_RECLAIM, 0);
  250. if (!lc->dmlog_wq) {
  251. DMERR("couldn't start dmlogd");
  252. r = -ENOMEM;
  253. goto out;
  254. }
  255. INIT_DELAYED_WORK(&lc->flush_log_work, do_flush);
  256. atomic_set(&lc->sched_flush, 0);
  257. }
  258. out:
  259. kfree(devices_rdata);
  260. if (r) {
  261. kfree(lc);
  262. kfree(ctr_str);
  263. } else {
  264. lc->usr_argv_str = ctr_str;
  265. log->context = lc;
  266. }
  267. return r;
  268. }
  269. static void userspace_dtr(struct dm_dirty_log *log)
  270. {
  271. struct log_c *lc = log->context;
  272. if (lc->integrated_flush) {
  273. /* flush workqueue */
  274. if (atomic_read(&lc->sched_flush))
  275. flush_delayed_work(&lc->flush_log_work);
  276. destroy_workqueue(lc->dmlog_wq);
  277. }
  278. (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
  279. NULL, 0, NULL, NULL);
  280. if (lc->log_dev)
  281. dm_put_device(lc->ti, lc->log_dev);
  282. kfree(lc->usr_argv_str);
  283. kfree(lc);
  284. return;
  285. }
  286. static int userspace_presuspend(struct dm_dirty_log *log)
  287. {
  288. int r;
  289. struct log_c *lc = log->context;
  290. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
  291. NULL, 0, NULL, NULL);
  292. return r;
  293. }
  294. static int userspace_postsuspend(struct dm_dirty_log *log)
  295. {
  296. int r;
  297. struct log_c *lc = log->context;
  298. /*
  299. * Run planned flush earlier.
  300. */
  301. if (lc->integrated_flush && atomic_read(&lc->sched_flush))
  302. flush_delayed_work(&lc->flush_log_work);
  303. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
  304. NULL, 0, NULL, NULL);
  305. return r;
  306. }
  307. static int userspace_resume(struct dm_dirty_log *log)
  308. {
  309. int r;
  310. struct log_c *lc = log->context;
  311. lc->in_sync_hint = 0;
  312. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
  313. NULL, 0, NULL, NULL);
  314. return r;
  315. }
  316. static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
  317. {
  318. struct log_c *lc = log->context;
  319. return lc->region_size;
  320. }
  321. /*
  322. * userspace_is_clean
  323. *
  324. * Check whether a region is clean. If there is any sort of
  325. * failure when consulting the server, we return not clean.
  326. *
  327. * Returns: 1 if clean, 0 otherwise
  328. */
  329. static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
  330. {
  331. int r;
  332. uint64_t region64 = (uint64_t)region;
  333. int64_t is_clean;
  334. size_t rdata_size;
  335. struct log_c *lc = log->context;
  336. rdata_size = sizeof(is_clean);
  337. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
  338. (char *)&region64, sizeof(region64),
  339. (char *)&is_clean, &rdata_size);
  340. return (r) ? 0 : (int)is_clean;
  341. }
  342. /*
  343. * userspace_in_sync
  344. *
  345. * Check if the region is in-sync. If there is any sort
  346. * of failure when consulting the server, we assume that
  347. * the region is not in sync.
  348. *
  349. * If 'can_block' is set, return immediately
  350. *
  351. * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
  352. */
  353. static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
  354. int can_block)
  355. {
  356. int r;
  357. uint64_t region64 = region;
  358. int64_t in_sync;
  359. size_t rdata_size;
  360. struct log_c *lc = log->context;
  361. /*
  362. * We can never respond directly - even if in_sync_hint is
  363. * set. This is because another machine could see a device
  364. * failure and mark the region out-of-sync. If we don't go
  365. * to userspace to ask, we might think the region is in-sync
  366. * and allow a read to pick up data that is stale. (This is
  367. * very unlikely if a device actually fails; but it is very
  368. * likely if a connection to one device from one machine fails.)
  369. *
  370. * There still might be a problem if the mirror caches the region
  371. * state as in-sync... but then this call would not be made. So,
  372. * that is a mirror problem.
  373. */
  374. if (!can_block)
  375. return -EWOULDBLOCK;
  376. rdata_size = sizeof(in_sync);
  377. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
  378. (char *)&region64, sizeof(region64),
  379. (char *)&in_sync, &rdata_size);
  380. return (r) ? 0 : (int)in_sync;
  381. }
  382. static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list)
  383. {
  384. int r = 0;
  385. struct flush_entry *fe;
  386. list_for_each_entry(fe, flush_list, list) {
  387. r = userspace_do_request(lc, lc->uuid, fe->type,
  388. (char *)&fe->region,
  389. sizeof(fe->region),
  390. NULL, NULL);
  391. if (r)
  392. break;
  393. }
  394. return r;
  395. }
  396. static int flush_by_group(struct log_c *lc, struct list_head *flush_list,
  397. int flush_with_payload)
  398. {
  399. int r = 0;
  400. int count;
  401. uint32_t type = 0;
  402. struct flush_entry *fe, *tmp_fe;
  403. LIST_HEAD(tmp_list);
  404. uint64_t group[MAX_FLUSH_GROUP_COUNT];
  405. /*
  406. * Group process the requests
  407. */
  408. while (!list_empty(flush_list)) {
  409. count = 0;
  410. list_for_each_entry_safe(fe, tmp_fe, flush_list, list) {
  411. group[count] = fe->region;
  412. count++;
  413. list_move(&fe->list, &tmp_list);
  414. type = fe->type;
  415. if (count >= MAX_FLUSH_GROUP_COUNT)
  416. break;
  417. }
  418. if (flush_with_payload) {
  419. r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
  420. (char *)(group),
  421. count * sizeof(uint64_t),
  422. NULL, NULL);
  423. /*
  424. * Integrated flush failed.
  425. */
  426. if (r)
  427. break;
  428. } else {
  429. r = userspace_do_request(lc, lc->uuid, type,
  430. (char *)(group),
  431. count * sizeof(uint64_t),
  432. NULL, NULL);
  433. if (r) {
  434. /*
  435. * Group send failed. Attempt one-by-one.
  436. */
  437. list_splice_init(&tmp_list, flush_list);
  438. r = flush_one_by_one(lc, flush_list);
  439. break;
  440. }
  441. }
  442. }
  443. /*
  444. * Must collect flush_entrys that were successfully processed
  445. * as a group so that they will be free'd by the caller.
  446. */
  447. list_splice_init(&tmp_list, flush_list);
  448. return r;
  449. }
  450. /*
  451. * userspace_flush
  452. *
  453. * This function is ok to block.
  454. * The flush happens in two stages. First, it sends all
  455. * clear/mark requests that are on the list. Then it
  456. * tells the server to commit them. This gives the
  457. * server a chance to optimise the commit, instead of
  458. * doing it for every request.
  459. *
  460. * Additionally, we could implement another thread that
  461. * sends the requests up to the server - reducing the
  462. * load on flush. Then the flush would have less in
  463. * the list and be responsible for the finishing commit.
  464. *
  465. * Returns: 0 on success, < 0 on failure
  466. */
  467. static int userspace_flush(struct dm_dirty_log *log)
  468. {
  469. int r = 0;
  470. unsigned long flags;
  471. struct log_c *lc = log->context;
  472. LIST_HEAD(mark_list);
  473. LIST_HEAD(clear_list);
  474. int mark_list_is_empty;
  475. int clear_list_is_empty;
  476. struct flush_entry *fe, *tmp_fe;
  477. spin_lock_irqsave(&lc->flush_lock, flags);
  478. list_splice_init(&lc->mark_list, &mark_list);
  479. list_splice_init(&lc->clear_list, &clear_list);
  480. spin_unlock_irqrestore(&lc->flush_lock, flags);
  481. mark_list_is_empty = list_empty(&mark_list);
  482. clear_list_is_empty = list_empty(&clear_list);
  483. if (mark_list_is_empty && clear_list_is_empty)
  484. return 0;
  485. r = flush_by_group(lc, &clear_list, 0);
  486. if (r)
  487. goto out;
  488. if (!lc->integrated_flush) {
  489. r = flush_by_group(lc, &mark_list, 0);
  490. if (r)
  491. goto out;
  492. r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
  493. NULL, 0, NULL, NULL);
  494. goto out;
  495. }
  496. /*
  497. * Send integrated flush request with mark_list as payload.
  498. */
  499. r = flush_by_group(lc, &mark_list, 1);
  500. if (r)
  501. goto out;
  502. if (mark_list_is_empty && !atomic_read(&lc->sched_flush)) {
  503. /*
  504. * When there are only clear region requests,
  505. * we schedule a flush in the future.
  506. */
  507. queue_delayed_work(lc->dmlog_wq, &lc->flush_log_work, 3 * HZ);
  508. atomic_set(&lc->sched_flush, 1);
  509. } else {
  510. /*
  511. * Cancel pending flush because we
  512. * have already flushed in mark_region.
  513. */
  514. cancel_delayed_work(&lc->flush_log_work);
  515. atomic_set(&lc->sched_flush, 0);
  516. }
  517. out:
  518. /*
  519. * We can safely remove these entries, even after failure.
  520. * Calling code will receive an error and will know that
  521. * the log facility has failed.
  522. */
  523. list_for_each_entry_safe(fe, tmp_fe, &mark_list, list) {
  524. list_del(&fe->list);
  525. mempool_free(fe, flush_entry_pool);
  526. }
  527. list_for_each_entry_safe(fe, tmp_fe, &clear_list, list) {
  528. list_del(&fe->list);
  529. mempool_free(fe, flush_entry_pool);
  530. }
  531. if (r)
  532. dm_table_event(lc->ti->table);
  533. return r;
  534. }
  535. /*
  536. * userspace_mark_region
  537. *
  538. * This function should avoid blocking unless absolutely required.
  539. * (Memory allocation is valid for blocking.)
  540. */
  541. static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
  542. {
  543. unsigned long flags;
  544. struct log_c *lc = log->context;
  545. struct flush_entry *fe;
  546. /* Wait for an allocation, but _never_ fail */
  547. fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
  548. BUG_ON(!fe);
  549. spin_lock_irqsave(&lc->flush_lock, flags);
  550. fe->type = DM_ULOG_MARK_REGION;
  551. fe->region = region;
  552. list_add(&fe->list, &lc->mark_list);
  553. spin_unlock_irqrestore(&lc->flush_lock, flags);
  554. return;
  555. }
  556. /*
  557. * userspace_clear_region
  558. *
  559. * This function must not block.
  560. * So, the alloc can't block. In the worst case, it is ok to
  561. * fail. It would simply mean we can't clear the region.
  562. * Does nothing to current sync context, but does mean
  563. * the region will be re-sync'ed on a reload of the mirror
  564. * even though it is in-sync.
  565. */
  566. static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
  567. {
  568. unsigned long flags;
  569. struct log_c *lc = log->context;
  570. struct flush_entry *fe;
  571. /*
  572. * If we fail to allocate, we skip the clearing of
  573. * the region. This doesn't hurt us in any way, except
  574. * to cause the region to be resync'ed when the
  575. * device is activated next time.
  576. */
  577. fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
  578. if (!fe) {
  579. DMERR("Failed to allocate memory to clear region.");
  580. return;
  581. }
  582. spin_lock_irqsave(&lc->flush_lock, flags);
  583. fe->type = DM_ULOG_CLEAR_REGION;
  584. fe->region = region;
  585. list_add(&fe->list, &lc->clear_list);
  586. spin_unlock_irqrestore(&lc->flush_lock, flags);
  587. return;
  588. }
  589. /*
  590. * userspace_get_resync_work
  591. *
  592. * Get a region that needs recovery. It is valid to return
  593. * an error for this function.
  594. *
  595. * Returns: 1 if region filled, 0 if no work, <0 on error
  596. */
  597. static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
  598. {
  599. int r;
  600. size_t rdata_size;
  601. struct log_c *lc = log->context;
  602. struct {
  603. int64_t i; /* 64-bit for mix arch compatibility */
  604. region_t r;
  605. } pkg;
  606. if (lc->in_sync_hint >= lc->region_count)
  607. return 0;
  608. rdata_size = sizeof(pkg);
  609. r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
  610. NULL, 0, (char *)&pkg, &rdata_size);
  611. *region = pkg.r;
  612. return (r) ? r : (int)pkg.i;
  613. }
  614. /*
  615. * userspace_set_region_sync
  616. *
  617. * Set the sync status of a given region. This function
  618. * must not fail.
  619. */
  620. static void userspace_set_region_sync(struct dm_dirty_log *log,
  621. region_t region, int in_sync)
  622. {
  623. int r;
  624. struct log_c *lc = log->context;
  625. struct {
  626. region_t r;
  627. int64_t i;
  628. } pkg;
  629. pkg.r = region;
  630. pkg.i = (int64_t)in_sync;
  631. r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
  632. (char *)&pkg, sizeof(pkg), NULL, NULL);
  633. /*
  634. * It would be nice to be able to report failures.
  635. * However, it is easy emough to detect and resolve.
  636. */
  637. return;
  638. }
  639. /*
  640. * userspace_get_sync_count
  641. *
  642. * If there is any sort of failure when consulting the server,
  643. * we assume that the sync count is zero.
  644. *
  645. * Returns: sync count on success, 0 on failure
  646. */
  647. static region_t userspace_get_sync_count(struct dm_dirty_log *log)
  648. {
  649. int r;
  650. size_t rdata_size;
  651. uint64_t sync_count;
  652. struct log_c *lc = log->context;
  653. rdata_size = sizeof(sync_count);
  654. r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
  655. NULL, 0, (char *)&sync_count, &rdata_size);
  656. if (r)
  657. return 0;
  658. if (sync_count >= lc->region_count)
  659. lc->in_sync_hint = lc->region_count;
  660. return (region_t)sync_count;
  661. }
  662. /*
  663. * userspace_status
  664. *
  665. * Returns: amount of space consumed
  666. */
  667. static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
  668. char *result, unsigned maxlen)
  669. {
  670. int r = 0;
  671. char *table_args;
  672. size_t sz = (size_t)maxlen;
  673. struct log_c *lc = log->context;
  674. switch (status_type) {
  675. case STATUSTYPE_INFO:
  676. r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
  677. NULL, 0, result, &sz);
  678. if (r) {
  679. sz = 0;
  680. DMEMIT("%s 1 COM_FAILURE", log->type->name);
  681. }
  682. break;
  683. case STATUSTYPE_TABLE:
  684. sz = 0;
  685. table_args = strchr(lc->usr_argv_str, ' ');
  686. BUG_ON(!table_args); /* There will always be a ' ' */
  687. table_args++;
  688. DMEMIT("%s %u %s ", log->type->name, lc->usr_argc, lc->uuid);
  689. if (lc->integrated_flush)
  690. DMEMIT("integrated_flush ");
  691. DMEMIT("%s ", table_args);
  692. break;
  693. }
  694. return (r) ? 0 : (int)sz;
  695. }
  696. /*
  697. * userspace_is_remote_recovering
  698. *
  699. * Returns: 1 if region recovering, 0 otherwise
  700. */
  701. static int userspace_is_remote_recovering(struct dm_dirty_log *log,
  702. region_t region)
  703. {
  704. int r;
  705. uint64_t region64 = region;
  706. struct log_c *lc = log->context;
  707. static unsigned long long limit;
  708. struct {
  709. int64_t is_recovering;
  710. uint64_t in_sync_hint;
  711. } pkg;
  712. size_t rdata_size = sizeof(pkg);
  713. /*
  714. * Once the mirror has been reported to be in-sync,
  715. * it will never again ask for recovery work. So,
  716. * we can safely say there is not a remote machine
  717. * recovering if the device is in-sync. (in_sync_hint
  718. * must be reset at resume time.)
  719. */
  720. if (region < lc->in_sync_hint)
  721. return 0;
  722. else if (jiffies < limit)
  723. return 1;
  724. limit = jiffies + (HZ / 4);
  725. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
  726. (char *)&region64, sizeof(region64),
  727. (char *)&pkg, &rdata_size);
  728. if (r)
  729. return 1;
  730. lc->in_sync_hint = pkg.in_sync_hint;
  731. return (int)pkg.is_recovering;
  732. }
  733. static struct dm_dirty_log_type _userspace_type = {
  734. .name = "userspace",
  735. .module = THIS_MODULE,
  736. .ctr = userspace_ctr,
  737. .dtr = userspace_dtr,
  738. .presuspend = userspace_presuspend,
  739. .postsuspend = userspace_postsuspend,
  740. .resume = userspace_resume,
  741. .get_region_size = userspace_get_region_size,
  742. .is_clean = userspace_is_clean,
  743. .in_sync = userspace_in_sync,
  744. .flush = userspace_flush,
  745. .mark_region = userspace_mark_region,
  746. .clear_region = userspace_clear_region,
  747. .get_resync_work = userspace_get_resync_work,
  748. .set_region_sync = userspace_set_region_sync,
  749. .get_sync_count = userspace_get_sync_count,
  750. .status = userspace_status,
  751. .is_remote_recovering = userspace_is_remote_recovering,
  752. };
  753. static int __init userspace_dirty_log_init(void)
  754. {
  755. int r = 0;
  756. flush_entry_pool = mempool_create(100, flush_entry_alloc,
  757. flush_entry_free, NULL);
  758. if (!flush_entry_pool) {
  759. DMWARN("Unable to create flush_entry_pool: No memory.");
  760. return -ENOMEM;
  761. }
  762. r = dm_ulog_tfr_init();
  763. if (r) {
  764. DMWARN("Unable to initialize userspace log communications");
  765. mempool_destroy(flush_entry_pool);
  766. return r;
  767. }
  768. r = dm_dirty_log_type_register(&_userspace_type);
  769. if (r) {
  770. DMWARN("Couldn't register userspace dirty log type");
  771. dm_ulog_tfr_exit();
  772. mempool_destroy(flush_entry_pool);
  773. return r;
  774. }
  775. DMINFO("version " DM_LOG_USERSPACE_VSN " loaded");
  776. return 0;
  777. }
  778. static void __exit userspace_dirty_log_exit(void)
  779. {
  780. dm_dirty_log_type_unregister(&_userspace_type);
  781. dm_ulog_tfr_exit();
  782. mempool_destroy(flush_entry_pool);
  783. DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded");
  784. return;
  785. }
  786. module_init(userspace_dirty_log_init);
  787. module_exit(userspace_dirty_log_exit);
  788. MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
  789. MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
  790. MODULE_LICENSE("GPL");