dm-log-userspace-base.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931
  1. /*
  2. * Copyright (C) 2006-2009 Red Hat, Inc.
  3. *
  4. * This file is released under the LGPL.
  5. */
  6. #include <linux/bio.h>
  7. #include <linux/slab.h>
  8. #include <linux/jiffies.h>
  9. #include <linux/dm-dirty-log.h>
  10. #include <linux/device-mapper.h>
  11. #include <linux/dm-log-userspace.h>
  12. #include <linux/module.h>
  13. #include <linux/workqueue.h>
  14. #include "dm-log-userspace-transfer.h"
  15. #define DM_LOG_USERSPACE_VSN "1.3.0"
  16. struct flush_entry {
  17. int type;
  18. region_t region;
  19. struct list_head list;
  20. };
  21. /*
  22. * This limit on the number of mark and clear request is, to a degree,
  23. * arbitrary. However, there is some basis for the choice in the limits
  24. * imposed on the size of data payload by dm-log-userspace-transfer.c:
  25. * dm_consult_userspace().
  26. */
  27. #define MAX_FLUSH_GROUP_COUNT 32
  28. struct log_c {
  29. struct dm_target *ti;
  30. struct dm_dev *log_dev;
  31. uint32_t region_size;
  32. region_t region_count;
  33. uint64_t luid;
  34. char uuid[DM_UUID_LEN];
  35. char *usr_argv_str;
  36. uint32_t usr_argc;
  37. /*
  38. * in_sync_hint gets set when doing is_remote_recovering. It
  39. * represents the first region that needs recovery. IOW, the
  40. * first zero bit of sync_bits. This can be useful for to limit
  41. * traffic for calls like is_remote_recovering and get_resync_work,
  42. * but be take care in its use for anything else.
  43. */
  44. uint64_t in_sync_hint;
  45. /*
  46. * Mark and clear requests are held until a flush is issued
  47. * so that we can group, and thereby limit, the amount of
  48. * network traffic between kernel and userspace. The 'flush_lock'
  49. * is used to protect these lists.
  50. */
  51. spinlock_t flush_lock;
  52. struct list_head mark_list;
  53. struct list_head clear_list;
  54. /*
  55. * Workqueue for flush of clear region requests.
  56. */
  57. struct workqueue_struct *dmlog_wq;
  58. struct delayed_work flush_log_work;
  59. atomic_t sched_flush;
  60. /*
  61. * Combine userspace flush and mark requests for efficiency.
  62. */
  63. uint32_t integrated_flush;
  64. };
  65. static mempool_t *flush_entry_pool;
  66. static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
  67. {
  68. return kmalloc(sizeof(struct flush_entry), gfp_mask);
  69. }
  70. static void flush_entry_free(void *element, void *pool_data)
  71. {
  72. kfree(element);
  73. }
  74. static int userspace_do_request(struct log_c *lc, const char *uuid,
  75. int request_type, char *data, size_t data_size,
  76. char *rdata, size_t *rdata_size)
  77. {
  78. int r;
  79. /*
  80. * If the server isn't there, -ESRCH is returned,
  81. * and we must keep trying until the server is
  82. * restored.
  83. */
  84. retry:
  85. r = dm_consult_userspace(uuid, lc->luid, request_type, data,
  86. data_size, rdata, rdata_size);
  87. if (r != -ESRCH)
  88. return r;
  89. DMERR(" Userspace log server not found.");
  90. while (1) {
  91. set_current_state(TASK_INTERRUPTIBLE);
  92. schedule_timeout(2*HZ);
  93. DMWARN("Attempting to contact userspace log server...");
  94. r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
  95. lc->usr_argv_str,
  96. strlen(lc->usr_argv_str) + 1,
  97. NULL, NULL);
  98. if (!r)
  99. break;
  100. }
  101. DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
  102. r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
  103. 0, NULL, NULL);
  104. if (!r)
  105. goto retry;
  106. DMERR("Error trying to resume userspace log: %d", r);
  107. return -ESRCH;
  108. }
  109. static int build_constructor_string(struct dm_target *ti,
  110. unsigned argc, char **argv,
  111. char **ctr_str)
  112. {
  113. int i, str_size;
  114. char *str = NULL;
  115. *ctr_str = NULL;
  116. /*
  117. * Determine overall size of the string.
  118. */
  119. for (i = 0, str_size = 0; i < argc; i++)
  120. str_size += strlen(argv[i]) + 1; /* +1 for space between args */
  121. str_size += 20; /* Max number of chars in a printed u64 number */
  122. str = kzalloc(str_size, GFP_KERNEL);
  123. if (!str) {
  124. DMWARN("Unable to allocate memory for constructor string");
  125. return -ENOMEM;
  126. }
  127. str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
  128. for (i = 0; i < argc; i++)
  129. str_size += sprintf(str + str_size, " %s", argv[i]);
  130. *ctr_str = str;
  131. return str_size;
  132. }
  133. static void do_flush(struct work_struct *work)
  134. {
  135. int r;
  136. struct log_c *lc = container_of(work, struct log_c, flush_log_work.work);
  137. atomic_set(&lc->sched_flush, 0);
  138. r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, NULL, 0, NULL, NULL);
  139. if (r)
  140. dm_table_event(lc->ti->table);
  141. }
  142. /*
  143. * userspace_ctr
  144. *
  145. * argv contains:
  146. * <UUID> [integrated_flush] <other args>
  147. * Where 'other args' are the userspace implementation-specific log
  148. * arguments.
  149. *
  150. * Example:
  151. * <UUID> [integrated_flush] clustered-disk <arg count> <log dev>
  152. * <region_size> [[no]sync]
  153. *
  154. * This module strips off the <UUID> and uses it for identification
  155. * purposes when communicating with userspace about a log.
  156. *
  157. * If integrated_flush is defined, the kernel combines flush
  158. * and mark requests.
  159. *
  160. * The rest of the line, beginning with 'clustered-disk', is passed
  161. * to the userspace ctr function.
  162. */
  163. static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
  164. unsigned argc, char **argv)
  165. {
  166. int r = 0;
  167. int str_size;
  168. char *ctr_str = NULL;
  169. struct log_c *lc = NULL;
  170. uint64_t rdata;
  171. size_t rdata_size = sizeof(rdata);
  172. char *devices_rdata = NULL;
  173. size_t devices_rdata_size = DM_NAME_LEN;
  174. if (argc < 3) {
  175. DMWARN("Too few arguments to userspace dirty log");
  176. return -EINVAL;
  177. }
  178. lc = kzalloc(sizeof(*lc), GFP_KERNEL);
  179. if (!lc) {
  180. DMWARN("Unable to allocate userspace log context.");
  181. return -ENOMEM;
  182. }
  183. /* The ptr value is sufficient for local unique id */
  184. lc->luid = (unsigned long)lc;
  185. lc->ti = ti;
  186. if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
  187. DMWARN("UUID argument too long.");
  188. kfree(lc);
  189. return -EINVAL;
  190. }
  191. lc->usr_argc = argc;
  192. strncpy(lc->uuid, argv[0], DM_UUID_LEN);
  193. argc--;
  194. argv++;
  195. spin_lock_init(&lc->flush_lock);
  196. INIT_LIST_HEAD(&lc->mark_list);
  197. INIT_LIST_HEAD(&lc->clear_list);
  198. if (!strcasecmp(argv[0], "integrated_flush")) {
  199. lc->integrated_flush = 1;
  200. argc--;
  201. argv++;
  202. }
  203. str_size = build_constructor_string(ti, argc, argv, &ctr_str);
  204. if (str_size < 0) {
  205. kfree(lc);
  206. return str_size;
  207. }
  208. devices_rdata = kzalloc(devices_rdata_size, GFP_KERNEL);
  209. if (!devices_rdata) {
  210. DMERR("Failed to allocate memory for device information");
  211. r = -ENOMEM;
  212. goto out;
  213. }
  214. /*
  215. * Send table string and get back any opened device.
  216. */
  217. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
  218. ctr_str, str_size,
  219. devices_rdata, &devices_rdata_size);
  220. if (r < 0) {
  221. if (r == -ESRCH)
  222. DMERR("Userspace log server not found");
  223. else
  224. DMERR("Userspace log server failed to create log");
  225. goto out;
  226. }
  227. /* Since the region size does not change, get it now */
  228. rdata_size = sizeof(rdata);
  229. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
  230. NULL, 0, (char *)&rdata, &rdata_size);
  231. if (r) {
  232. DMERR("Failed to get region size of dirty log");
  233. goto out;
  234. }
  235. lc->region_size = (uint32_t)rdata;
  236. lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
  237. if (devices_rdata_size) {
  238. if (devices_rdata[devices_rdata_size - 1] != '\0') {
  239. DMERR("DM_ULOG_CTR device return string not properly terminated");
  240. r = -EINVAL;
  241. goto out;
  242. }
  243. r = dm_get_device(ti, devices_rdata,
  244. dm_table_get_mode(ti->table), &lc->log_dev);
  245. if (r)
  246. DMERR("Failed to register %s with device-mapper",
  247. devices_rdata);
  248. }
  249. if (lc->integrated_flush) {
  250. lc->dmlog_wq = alloc_workqueue("dmlogd", WQ_MEM_RECLAIM, 0);
  251. if (!lc->dmlog_wq) {
  252. DMERR("couldn't start dmlogd");
  253. r = -ENOMEM;
  254. goto out;
  255. }
  256. INIT_DELAYED_WORK(&lc->flush_log_work, do_flush);
  257. atomic_set(&lc->sched_flush, 0);
  258. }
  259. out:
  260. kfree(devices_rdata);
  261. if (r) {
  262. kfree(lc);
  263. kfree(ctr_str);
  264. } else {
  265. lc->usr_argv_str = ctr_str;
  266. log->context = lc;
  267. }
  268. return r;
  269. }
  270. static void userspace_dtr(struct dm_dirty_log *log)
  271. {
  272. struct log_c *lc = log->context;
  273. if (lc->integrated_flush) {
  274. /* flush workqueue */
  275. if (atomic_read(&lc->sched_flush))
  276. flush_delayed_work(&lc->flush_log_work);
  277. destroy_workqueue(lc->dmlog_wq);
  278. }
  279. (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
  280. NULL, 0, NULL, NULL);
  281. if (lc->log_dev)
  282. dm_put_device(lc->ti, lc->log_dev);
  283. kfree(lc->usr_argv_str);
  284. kfree(lc);
  285. return;
  286. }
  287. static int userspace_presuspend(struct dm_dirty_log *log)
  288. {
  289. int r;
  290. struct log_c *lc = log->context;
  291. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
  292. NULL, 0, NULL, NULL);
  293. return r;
  294. }
  295. static int userspace_postsuspend(struct dm_dirty_log *log)
  296. {
  297. int r;
  298. struct log_c *lc = log->context;
  299. /*
  300. * Run planned flush earlier.
  301. */
  302. if (lc->integrated_flush && atomic_read(&lc->sched_flush))
  303. flush_delayed_work(&lc->flush_log_work);
  304. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
  305. NULL, 0, NULL, NULL);
  306. return r;
  307. }
  308. static int userspace_resume(struct dm_dirty_log *log)
  309. {
  310. int r;
  311. struct log_c *lc = log->context;
  312. lc->in_sync_hint = 0;
  313. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
  314. NULL, 0, NULL, NULL);
  315. return r;
  316. }
  317. static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
  318. {
  319. struct log_c *lc = log->context;
  320. return lc->region_size;
  321. }
  322. /*
  323. * userspace_is_clean
  324. *
  325. * Check whether a region is clean. If there is any sort of
  326. * failure when consulting the server, we return not clean.
  327. *
  328. * Returns: 1 if clean, 0 otherwise
  329. */
  330. static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
  331. {
  332. int r;
  333. uint64_t region64 = (uint64_t)region;
  334. int64_t is_clean;
  335. size_t rdata_size;
  336. struct log_c *lc = log->context;
  337. rdata_size = sizeof(is_clean);
  338. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
  339. (char *)&region64, sizeof(region64),
  340. (char *)&is_clean, &rdata_size);
  341. return (r) ? 0 : (int)is_clean;
  342. }
  343. /*
  344. * userspace_in_sync
  345. *
  346. * Check if the region is in-sync. If there is any sort
  347. * of failure when consulting the server, we assume that
  348. * the region is not in sync.
  349. *
  350. * If 'can_block' is set, return immediately
  351. *
  352. * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
  353. */
  354. static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
  355. int can_block)
  356. {
  357. int r;
  358. uint64_t region64 = region;
  359. int64_t in_sync;
  360. size_t rdata_size;
  361. struct log_c *lc = log->context;
  362. /*
  363. * We can never respond directly - even if in_sync_hint is
  364. * set. This is because another machine could see a device
  365. * failure and mark the region out-of-sync. If we don't go
  366. * to userspace to ask, we might think the region is in-sync
  367. * and allow a read to pick up data that is stale. (This is
  368. * very unlikely if a device actually fails; but it is very
  369. * likely if a connection to one device from one machine fails.)
  370. *
  371. * There still might be a problem if the mirror caches the region
  372. * state as in-sync... but then this call would not be made. So,
  373. * that is a mirror problem.
  374. */
  375. if (!can_block)
  376. return -EWOULDBLOCK;
  377. rdata_size = sizeof(in_sync);
  378. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
  379. (char *)&region64, sizeof(region64),
  380. (char *)&in_sync, &rdata_size);
  381. return (r) ? 0 : (int)in_sync;
  382. }
  383. static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list)
  384. {
  385. int r = 0;
  386. struct flush_entry *fe;
  387. list_for_each_entry(fe, flush_list, list) {
  388. r = userspace_do_request(lc, lc->uuid, fe->type,
  389. (char *)&fe->region,
  390. sizeof(fe->region),
  391. NULL, NULL);
  392. if (r)
  393. break;
  394. }
  395. return r;
  396. }
  397. static int flush_by_group(struct log_c *lc, struct list_head *flush_list,
  398. int flush_with_payload)
  399. {
  400. int r = 0;
  401. int count;
  402. uint32_t type = 0;
  403. struct flush_entry *fe, *tmp_fe;
  404. LIST_HEAD(tmp_list);
  405. uint64_t group[MAX_FLUSH_GROUP_COUNT];
  406. /*
  407. * Group process the requests
  408. */
  409. while (!list_empty(flush_list)) {
  410. count = 0;
  411. list_for_each_entry_safe(fe, tmp_fe, flush_list, list) {
  412. group[count] = fe->region;
  413. count++;
  414. list_move(&fe->list, &tmp_list);
  415. type = fe->type;
  416. if (count >= MAX_FLUSH_GROUP_COUNT)
  417. break;
  418. }
  419. if (flush_with_payload) {
  420. r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
  421. (char *)(group),
  422. count * sizeof(uint64_t),
  423. NULL, NULL);
  424. /*
  425. * Integrated flush failed.
  426. */
  427. if (r)
  428. break;
  429. } else {
  430. r = userspace_do_request(lc, lc->uuid, type,
  431. (char *)(group),
  432. count * sizeof(uint64_t),
  433. NULL, NULL);
  434. if (r) {
  435. /*
  436. * Group send failed. Attempt one-by-one.
  437. */
  438. list_splice_init(&tmp_list, flush_list);
  439. r = flush_one_by_one(lc, flush_list);
  440. break;
  441. }
  442. }
  443. }
  444. /*
  445. * Must collect flush_entrys that were successfully processed
  446. * as a group so that they will be free'd by the caller.
  447. */
  448. list_splice_init(&tmp_list, flush_list);
  449. return r;
  450. }
  451. /*
  452. * userspace_flush
  453. *
  454. * This function is ok to block.
  455. * The flush happens in two stages. First, it sends all
  456. * clear/mark requests that are on the list. Then it
  457. * tells the server to commit them. This gives the
  458. * server a chance to optimise the commit, instead of
  459. * doing it for every request.
  460. *
  461. * Additionally, we could implement another thread that
  462. * sends the requests up to the server - reducing the
  463. * load on flush. Then the flush would have less in
  464. * the list and be responsible for the finishing commit.
  465. *
  466. * Returns: 0 on success, < 0 on failure
  467. */
  468. static int userspace_flush(struct dm_dirty_log *log)
  469. {
  470. int r = 0;
  471. unsigned long flags;
  472. struct log_c *lc = log->context;
  473. LIST_HEAD(mark_list);
  474. LIST_HEAD(clear_list);
  475. int mark_list_is_empty;
  476. int clear_list_is_empty;
  477. struct flush_entry *fe, *tmp_fe;
  478. spin_lock_irqsave(&lc->flush_lock, flags);
  479. list_splice_init(&lc->mark_list, &mark_list);
  480. list_splice_init(&lc->clear_list, &clear_list);
  481. spin_unlock_irqrestore(&lc->flush_lock, flags);
  482. mark_list_is_empty = list_empty(&mark_list);
  483. clear_list_is_empty = list_empty(&clear_list);
  484. if (mark_list_is_empty && clear_list_is_empty)
  485. return 0;
  486. r = flush_by_group(lc, &clear_list, 0);
  487. if (r)
  488. goto out;
  489. if (!lc->integrated_flush) {
  490. r = flush_by_group(lc, &mark_list, 0);
  491. if (r)
  492. goto out;
  493. r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
  494. NULL, 0, NULL, NULL);
  495. goto out;
  496. }
  497. /*
  498. * Send integrated flush request with mark_list as payload.
  499. */
  500. r = flush_by_group(lc, &mark_list, 1);
  501. if (r)
  502. goto out;
  503. if (mark_list_is_empty && !atomic_read(&lc->sched_flush)) {
  504. /*
  505. * When there are only clear region requests,
  506. * we schedule a flush in the future.
  507. */
  508. queue_delayed_work(lc->dmlog_wq, &lc->flush_log_work, 3 * HZ);
  509. atomic_set(&lc->sched_flush, 1);
  510. } else {
  511. /*
  512. * Cancel pending flush because we
  513. * have already flushed in mark_region.
  514. */
  515. cancel_delayed_work(&lc->flush_log_work);
  516. atomic_set(&lc->sched_flush, 0);
  517. }
  518. out:
  519. /*
  520. * We can safely remove these entries, even after failure.
  521. * Calling code will receive an error and will know that
  522. * the log facility has failed.
  523. */
  524. list_for_each_entry_safe(fe, tmp_fe, &mark_list, list) {
  525. list_del(&fe->list);
  526. mempool_free(fe, flush_entry_pool);
  527. }
  528. list_for_each_entry_safe(fe, tmp_fe, &clear_list, list) {
  529. list_del(&fe->list);
  530. mempool_free(fe, flush_entry_pool);
  531. }
  532. if (r)
  533. dm_table_event(lc->ti->table);
  534. return r;
  535. }
  536. /*
  537. * userspace_mark_region
  538. *
  539. * This function should avoid blocking unless absolutely required.
  540. * (Memory allocation is valid for blocking.)
  541. */
  542. static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
  543. {
  544. unsigned long flags;
  545. struct log_c *lc = log->context;
  546. struct flush_entry *fe;
  547. /* Wait for an allocation, but _never_ fail */
  548. fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
  549. BUG_ON(!fe);
  550. spin_lock_irqsave(&lc->flush_lock, flags);
  551. fe->type = DM_ULOG_MARK_REGION;
  552. fe->region = region;
  553. list_add(&fe->list, &lc->mark_list);
  554. spin_unlock_irqrestore(&lc->flush_lock, flags);
  555. return;
  556. }
  557. /*
  558. * userspace_clear_region
  559. *
  560. * This function must not block.
  561. * So, the alloc can't block. In the worst case, it is ok to
  562. * fail. It would simply mean we can't clear the region.
  563. * Does nothing to current sync context, but does mean
  564. * the region will be re-sync'ed on a reload of the mirror
  565. * even though it is in-sync.
  566. */
  567. static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
  568. {
  569. unsigned long flags;
  570. struct log_c *lc = log->context;
  571. struct flush_entry *fe;
  572. /*
  573. * If we fail to allocate, we skip the clearing of
  574. * the region. This doesn't hurt us in any way, except
  575. * to cause the region to be resync'ed when the
  576. * device is activated next time.
  577. */
  578. fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
  579. if (!fe) {
  580. DMERR("Failed to allocate memory to clear region.");
  581. return;
  582. }
  583. spin_lock_irqsave(&lc->flush_lock, flags);
  584. fe->type = DM_ULOG_CLEAR_REGION;
  585. fe->region = region;
  586. list_add(&fe->list, &lc->clear_list);
  587. spin_unlock_irqrestore(&lc->flush_lock, flags);
  588. return;
  589. }
  590. /*
  591. * userspace_get_resync_work
  592. *
  593. * Get a region that needs recovery. It is valid to return
  594. * an error for this function.
  595. *
  596. * Returns: 1 if region filled, 0 if no work, <0 on error
  597. */
  598. static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
  599. {
  600. int r;
  601. size_t rdata_size;
  602. struct log_c *lc = log->context;
  603. struct {
  604. int64_t i; /* 64-bit for mix arch compatibility */
  605. region_t r;
  606. } pkg;
  607. if (lc->in_sync_hint >= lc->region_count)
  608. return 0;
  609. rdata_size = sizeof(pkg);
  610. r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
  611. NULL, 0, (char *)&pkg, &rdata_size);
  612. *region = pkg.r;
  613. return (r) ? r : (int)pkg.i;
  614. }
  615. /*
  616. * userspace_set_region_sync
  617. *
  618. * Set the sync status of a given region. This function
  619. * must not fail.
  620. */
  621. static void userspace_set_region_sync(struct dm_dirty_log *log,
  622. region_t region, int in_sync)
  623. {
  624. int r;
  625. struct log_c *lc = log->context;
  626. struct {
  627. region_t r;
  628. int64_t i;
  629. } pkg;
  630. pkg.r = region;
  631. pkg.i = (int64_t)in_sync;
  632. r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
  633. (char *)&pkg, sizeof(pkg), NULL, NULL);
  634. /*
  635. * It would be nice to be able to report failures.
  636. * However, it is easy emough to detect and resolve.
  637. */
  638. return;
  639. }
  640. /*
  641. * userspace_get_sync_count
  642. *
  643. * If there is any sort of failure when consulting the server,
  644. * we assume that the sync count is zero.
  645. *
  646. * Returns: sync count on success, 0 on failure
  647. */
  648. static region_t userspace_get_sync_count(struct dm_dirty_log *log)
  649. {
  650. int r;
  651. size_t rdata_size;
  652. uint64_t sync_count;
  653. struct log_c *lc = log->context;
  654. rdata_size = sizeof(sync_count);
  655. r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
  656. NULL, 0, (char *)&sync_count, &rdata_size);
  657. if (r)
  658. return 0;
  659. if (sync_count >= lc->region_count)
  660. lc->in_sync_hint = lc->region_count;
  661. return (region_t)sync_count;
  662. }
  663. /*
  664. * userspace_status
  665. *
  666. * Returns: amount of space consumed
  667. */
  668. static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
  669. char *result, unsigned maxlen)
  670. {
  671. int r = 0;
  672. char *table_args;
  673. size_t sz = (size_t)maxlen;
  674. struct log_c *lc = log->context;
  675. switch (status_type) {
  676. case STATUSTYPE_INFO:
  677. r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
  678. NULL, 0, result, &sz);
  679. if (r) {
  680. sz = 0;
  681. DMEMIT("%s 1 COM_FAILURE", log->type->name);
  682. }
  683. break;
  684. case STATUSTYPE_TABLE:
  685. sz = 0;
  686. table_args = strchr(lc->usr_argv_str, ' ');
  687. BUG_ON(!table_args); /* There will always be a ' ' */
  688. table_args++;
  689. DMEMIT("%s %u %s ", log->type->name, lc->usr_argc, lc->uuid);
  690. if (lc->integrated_flush)
  691. DMEMIT("integrated_flush ");
  692. DMEMIT("%s ", table_args);
  693. break;
  694. }
  695. return (r) ? 0 : (int)sz;
  696. }
  697. /*
  698. * userspace_is_remote_recovering
  699. *
  700. * Returns: 1 if region recovering, 0 otherwise
  701. */
  702. static int userspace_is_remote_recovering(struct dm_dirty_log *log,
  703. region_t region)
  704. {
  705. int r;
  706. uint64_t region64 = region;
  707. struct log_c *lc = log->context;
  708. static unsigned long limit;
  709. struct {
  710. int64_t is_recovering;
  711. uint64_t in_sync_hint;
  712. } pkg;
  713. size_t rdata_size = sizeof(pkg);
  714. /*
  715. * Once the mirror has been reported to be in-sync,
  716. * it will never again ask for recovery work. So,
  717. * we can safely say there is not a remote machine
  718. * recovering if the device is in-sync. (in_sync_hint
  719. * must be reset at resume time.)
  720. */
  721. if (region < lc->in_sync_hint)
  722. return 0;
  723. else if (time_after(limit, jiffies))
  724. return 1;
  725. limit = jiffies + (HZ / 4);
  726. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
  727. (char *)&region64, sizeof(region64),
  728. (char *)&pkg, &rdata_size);
  729. if (r)
  730. return 1;
  731. lc->in_sync_hint = pkg.in_sync_hint;
  732. return (int)pkg.is_recovering;
  733. }
  734. static struct dm_dirty_log_type _userspace_type = {
  735. .name = "userspace",
  736. .module = THIS_MODULE,
  737. .ctr = userspace_ctr,
  738. .dtr = userspace_dtr,
  739. .presuspend = userspace_presuspend,
  740. .postsuspend = userspace_postsuspend,
  741. .resume = userspace_resume,
  742. .get_region_size = userspace_get_region_size,
  743. .is_clean = userspace_is_clean,
  744. .in_sync = userspace_in_sync,
  745. .flush = userspace_flush,
  746. .mark_region = userspace_mark_region,
  747. .clear_region = userspace_clear_region,
  748. .get_resync_work = userspace_get_resync_work,
  749. .set_region_sync = userspace_set_region_sync,
  750. .get_sync_count = userspace_get_sync_count,
  751. .status = userspace_status,
  752. .is_remote_recovering = userspace_is_remote_recovering,
  753. };
  754. static int __init userspace_dirty_log_init(void)
  755. {
  756. int r = 0;
  757. flush_entry_pool = mempool_create(100, flush_entry_alloc,
  758. flush_entry_free, NULL);
  759. if (!flush_entry_pool) {
  760. DMWARN("Unable to create flush_entry_pool: No memory.");
  761. return -ENOMEM;
  762. }
  763. r = dm_ulog_tfr_init();
  764. if (r) {
  765. DMWARN("Unable to initialize userspace log communications");
  766. mempool_destroy(flush_entry_pool);
  767. return r;
  768. }
  769. r = dm_dirty_log_type_register(&_userspace_type);
  770. if (r) {
  771. DMWARN("Couldn't register userspace dirty log type");
  772. dm_ulog_tfr_exit();
  773. mempool_destroy(flush_entry_pool);
  774. return r;
  775. }
  776. DMINFO("version " DM_LOG_USERSPACE_VSN " loaded");
  777. return 0;
  778. }
  779. static void __exit userspace_dirty_log_exit(void)
  780. {
  781. dm_dirty_log_type_unregister(&_userspace_type);
  782. dm_ulog_tfr_exit();
  783. mempool_destroy(flush_entry_pool);
  784. DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded");
  785. return;
  786. }
  787. module_init(userspace_dirty_log_init);
  788. module_exit(userspace_dirty_log_exit);
  789. MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
  790. MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
  791. MODULE_LICENSE("GPL");