ubd_kern.c 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555
  1. /*
  2. * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
  3. * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
  4. * Licensed under the GPL
  5. */
  6. /* 2001-09-28...2002-04-17
  7. * Partition stuff by James_McMechan@hotmail.com
  8. * old style ubd by setting UBD_SHIFT to 0
  9. * 2002-09-27...2002-10-18 massive tinkering for 2.5
  10. * partitions have changed in 2.5
  11. * 2003-01-29 more tinkering for 2.5.59-1
  12. * This should now address the sysfs problems and has
  13. * the symlink for devfs to allow for booting with
  14. * the common /dev/ubd/discX/... names rather than
  15. * only /dev/ubdN/discN this version also has lots of
  16. * clean ups preparing for ubd-many.
  17. * James McMechan
  18. */
  19. #define UBD_SHIFT 4
  20. #include <linux/module.h>
  21. #include <linux/init.h>
  22. #include <linux/blkdev.h>
  23. #include <linux/blk-mq.h>
  24. #include <linux/ata.h>
  25. #include <linux/hdreg.h>
  26. #include <linux/cdrom.h>
  27. #include <linux/proc_fs.h>
  28. #include <linux/seq_file.h>
  29. #include <linux/ctype.h>
  30. #include <linux/slab.h>
  31. #include <linux/vmalloc.h>
  32. #include <linux/platform_device.h>
  33. #include <linux/scatterlist.h>
  34. #include <asm/tlbflush.h>
  35. #include <kern_util.h>
  36. #include "mconsole_kern.h"
  37. #include <init.h>
  38. #include <irq_kern.h>
  39. #include "ubd.h"
  40. #include <os.h>
  41. #include "cow.h"
  42. enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
  43. struct io_thread_req {
  44. struct request *req;
  45. enum ubd_req op;
  46. int fds[2];
  47. unsigned long offsets[2];
  48. unsigned long long offset;
  49. unsigned long length;
  50. char *buffer;
  51. int sectorsize;
  52. unsigned long sector_mask;
  53. unsigned long long cow_offset;
  54. unsigned long bitmap_words[2];
  55. int error;
  56. };
  57. static struct io_thread_req * (*irq_req_buffer)[];
  58. static struct io_thread_req *irq_remainder;
  59. static int irq_remainder_size;
  60. static struct io_thread_req * (*io_req_buffer)[];
  61. static struct io_thread_req *io_remainder;
  62. static int io_remainder_size;
  63. static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  64. {
  65. __u64 n;
  66. int bits, off;
  67. bits = sizeof(data[0]) * 8;
  68. n = bit / bits;
  69. off = bit % bits;
  70. return (data[n] & (1 << off)) != 0;
  71. }
  72. static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  73. {
  74. __u64 n;
  75. int bits, off;
  76. bits = sizeof(data[0]) * 8;
  77. n = bit / bits;
  78. off = bit % bits;
  79. data[n] |= (1 << off);
  80. }
  81. /*End stuff from ubd_user.h*/
  82. #define DRIVER_NAME "uml-blkdev"
  83. static DEFINE_MUTEX(ubd_lock);
  84. static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
  85. static int ubd_open(struct block_device *bdev, fmode_t mode);
  86. static void ubd_release(struct gendisk *disk, fmode_t mode);
  87. static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
  88. unsigned int cmd, unsigned long arg);
  89. static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
  90. #define MAX_DEV (16)
  91. static const struct block_device_operations ubd_blops = {
  92. .owner = THIS_MODULE,
  93. .open = ubd_open,
  94. .release = ubd_release,
  95. .ioctl = ubd_ioctl,
  96. .getgeo = ubd_getgeo,
  97. };
  98. /* Protected by ubd_lock */
  99. static int fake_major = UBD_MAJOR;
  100. static struct gendisk *ubd_gendisk[MAX_DEV];
  101. static struct gendisk *fake_gendisk[MAX_DEV];
  102. #ifdef CONFIG_BLK_DEV_UBD_SYNC
  103. #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
  104. .cl = 1 })
  105. #else
  106. #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
  107. .cl = 1 })
  108. #endif
  109. static struct openflags global_openflags = OPEN_FLAGS;
  110. struct cow {
  111. /* backing file name */
  112. char *file;
  113. /* backing file fd */
  114. int fd;
  115. unsigned long *bitmap;
  116. unsigned long bitmap_len;
  117. int bitmap_offset;
  118. int data_offset;
  119. };
  120. #define MAX_SG 64
  121. struct ubd {
  122. /* name (and fd, below) of the file opened for writing, either the
  123. * backing or the cow file. */
  124. char *file;
  125. int count;
  126. int fd;
  127. __u64 size;
  128. struct openflags boot_openflags;
  129. struct openflags openflags;
  130. unsigned shared:1;
  131. unsigned no_cow:1;
  132. struct cow cow;
  133. struct platform_device pdev;
  134. struct request_queue *queue;
  135. struct blk_mq_tag_set tag_set;
  136. spinlock_t lock;
  137. };
  138. #define DEFAULT_COW { \
  139. .file = NULL, \
  140. .fd = -1, \
  141. .bitmap = NULL, \
  142. .bitmap_offset = 0, \
  143. .data_offset = 0, \
  144. }
  145. #define DEFAULT_UBD { \
  146. .file = NULL, \
  147. .count = 0, \
  148. .fd = -1, \
  149. .size = -1, \
  150. .boot_openflags = OPEN_FLAGS, \
  151. .openflags = OPEN_FLAGS, \
  152. .no_cow = 0, \
  153. .shared = 0, \
  154. .cow = DEFAULT_COW, \
  155. .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
  156. }
  157. /* Protected by ubd_lock */
  158. static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
  159. /* Only changed by fake_ide_setup which is a setup */
  160. static int fake_ide = 0;
  161. static struct proc_dir_entry *proc_ide_root = NULL;
  162. static struct proc_dir_entry *proc_ide = NULL;
  163. static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
  164. const struct blk_mq_queue_data *bd);
  165. static void make_proc_ide(void)
  166. {
  167. proc_ide_root = proc_mkdir("ide", NULL);
  168. proc_ide = proc_mkdir("ide0", proc_ide_root);
  169. }
  170. static int fake_ide_media_proc_show(struct seq_file *m, void *v)
  171. {
  172. seq_puts(m, "disk\n");
  173. return 0;
  174. }
  175. static void make_ide_entries(const char *dev_name)
  176. {
  177. struct proc_dir_entry *dir, *ent;
  178. char name[64];
  179. if(proc_ide_root == NULL) make_proc_ide();
  180. dir = proc_mkdir(dev_name, proc_ide);
  181. if(!dir) return;
  182. ent = proc_create_single("media", S_IRUGO, dir,
  183. fake_ide_media_proc_show);
  184. if(!ent) return;
  185. snprintf(name, sizeof(name), "ide0/%s", dev_name);
  186. proc_symlink(dev_name, proc_ide_root, name);
  187. }
  188. static int fake_ide_setup(char *str)
  189. {
  190. fake_ide = 1;
  191. return 1;
  192. }
  193. __setup("fake_ide", fake_ide_setup);
  194. __uml_help(fake_ide_setup,
  195. "fake_ide\n"
  196. " Create ide0 entries that map onto ubd devices.\n\n"
  197. );
  198. static int parse_unit(char **ptr)
  199. {
  200. char *str = *ptr, *end;
  201. int n = -1;
  202. if(isdigit(*str)) {
  203. n = simple_strtoul(str, &end, 0);
  204. if(end == str)
  205. return -1;
  206. *ptr = end;
  207. }
  208. else if (('a' <= *str) && (*str <= 'z')) {
  209. n = *str - 'a';
  210. str++;
  211. *ptr = str;
  212. }
  213. return n;
  214. }
  215. /* If *index_out == -1 at exit, the passed option was a general one;
  216. * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
  217. * should not be freed on exit.
  218. */
  219. static int ubd_setup_common(char *str, int *index_out, char **error_out)
  220. {
  221. struct ubd *ubd_dev;
  222. struct openflags flags = global_openflags;
  223. char *backing_file;
  224. int n, err = 0, i;
  225. if(index_out) *index_out = -1;
  226. n = *str;
  227. if(n == '='){
  228. char *end;
  229. int major;
  230. str++;
  231. if(!strcmp(str, "sync")){
  232. global_openflags = of_sync(global_openflags);
  233. goto out1;
  234. }
  235. err = -EINVAL;
  236. major = simple_strtoul(str, &end, 0);
  237. if((*end != '\0') || (end == str)){
  238. *error_out = "Didn't parse major number";
  239. goto out1;
  240. }
  241. mutex_lock(&ubd_lock);
  242. if (fake_major != UBD_MAJOR) {
  243. *error_out = "Can't assign a fake major twice";
  244. goto out1;
  245. }
  246. fake_major = major;
  247. printk(KERN_INFO "Setting extra ubd major number to %d\n",
  248. major);
  249. err = 0;
  250. out1:
  251. mutex_unlock(&ubd_lock);
  252. return err;
  253. }
  254. n = parse_unit(&str);
  255. if(n < 0){
  256. *error_out = "Couldn't parse device number";
  257. return -EINVAL;
  258. }
  259. if(n >= MAX_DEV){
  260. *error_out = "Device number out of range";
  261. return 1;
  262. }
  263. err = -EBUSY;
  264. mutex_lock(&ubd_lock);
  265. ubd_dev = &ubd_devs[n];
  266. if(ubd_dev->file != NULL){
  267. *error_out = "Device is already configured";
  268. goto out;
  269. }
  270. if (index_out)
  271. *index_out = n;
  272. err = -EINVAL;
  273. for (i = 0; i < sizeof("rscd="); i++) {
  274. switch (*str) {
  275. case 'r':
  276. flags.w = 0;
  277. break;
  278. case 's':
  279. flags.s = 1;
  280. break;
  281. case 'd':
  282. ubd_dev->no_cow = 1;
  283. break;
  284. case 'c':
  285. ubd_dev->shared = 1;
  286. break;
  287. case '=':
  288. str++;
  289. goto break_loop;
  290. default:
  291. *error_out = "Expected '=' or flag letter "
  292. "(r, s, c, or d)";
  293. goto out;
  294. }
  295. str++;
  296. }
  297. if (*str == '=')
  298. *error_out = "Too many flags specified";
  299. else
  300. *error_out = "Missing '='";
  301. goto out;
  302. break_loop:
  303. backing_file = strchr(str, ',');
  304. if (backing_file == NULL)
  305. backing_file = strchr(str, ':');
  306. if(backing_file != NULL){
  307. if(ubd_dev->no_cow){
  308. *error_out = "Can't specify both 'd' and a cow file";
  309. goto out;
  310. }
  311. else {
  312. *backing_file = '\0';
  313. backing_file++;
  314. }
  315. }
  316. err = 0;
  317. ubd_dev->file = str;
  318. ubd_dev->cow.file = backing_file;
  319. ubd_dev->boot_openflags = flags;
  320. out:
  321. mutex_unlock(&ubd_lock);
  322. return err;
  323. }
  324. static int ubd_setup(char *str)
  325. {
  326. char *error;
  327. int err;
  328. err = ubd_setup_common(str, NULL, &error);
  329. if(err)
  330. printk(KERN_ERR "Failed to initialize device with \"%s\" : "
  331. "%s\n", str, error);
  332. return 1;
  333. }
  334. __setup("ubd", ubd_setup);
  335. __uml_help(ubd_setup,
  336. "ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
  337. " This is used to associate a device with a file in the underlying\n"
  338. " filesystem. When specifying two filenames, the first one is the\n"
  339. " COW name and the second is the backing file name. As separator you can\n"
  340. " use either a ':' or a ',': the first one allows writing things like;\n"
  341. " ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
  342. " while with a ',' the shell would not expand the 2nd '~'.\n"
  343. " When using only one filename, UML will detect whether to treat it like\n"
  344. " a COW file or a backing file. To override this detection, add the 'd'\n"
  345. " flag:\n"
  346. " ubd0d=BackingFile\n"
  347. " Usually, there is a filesystem in the file, but \n"
  348. " that's not required. Swap devices containing swap files can be\n"
  349. " specified like this. Also, a file which doesn't contain a\n"
  350. " filesystem can have its contents read in the virtual \n"
  351. " machine by running 'dd' on the device. <n> must be in the range\n"
  352. " 0 to 7. Appending an 'r' to the number will cause that device\n"
  353. " to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
  354. " an 's' will cause data to be written to disk on the host immediately.\n"
  355. " 'c' will cause the device to be treated as being shared between multiple\n"
  356. " UMLs and file locking will be turned off - this is appropriate for a\n"
  357. " cluster filesystem and inappropriate at almost all other times.\n\n"
  358. );
  359. static int udb_setup(char *str)
  360. {
  361. printk("udb%s specified on command line is almost certainly a ubd -> "
  362. "udb TYPO\n", str);
  363. return 1;
  364. }
  365. __setup("udb", udb_setup);
  366. __uml_help(udb_setup,
  367. "udb\n"
  368. " This option is here solely to catch ubd -> udb typos, which can be\n"
  369. " to impossible to catch visually unless you specifically look for\n"
  370. " them. The only result of any option starting with 'udb' is an error\n"
  371. " in the boot output.\n\n"
  372. );
  373. /* Only changed by ubd_init, which is an initcall. */
  374. static int thread_fd = -1;
  375. /* Function to read several request pointers at a time
  376. * handling fractional reads if (and as) needed
  377. */
  378. static int bulk_req_safe_read(
  379. int fd,
  380. struct io_thread_req * (*request_buffer)[],
  381. struct io_thread_req **remainder,
  382. int *remainder_size,
  383. int max_recs
  384. )
  385. {
  386. int n = 0;
  387. int res = 0;
  388. if (*remainder_size > 0) {
  389. memmove(
  390. (char *) request_buffer,
  391. (char *) remainder, *remainder_size
  392. );
  393. n = *remainder_size;
  394. }
  395. res = os_read_file(
  396. fd,
  397. ((char *) request_buffer) + *remainder_size,
  398. sizeof(struct io_thread_req *)*max_recs
  399. - *remainder_size
  400. );
  401. if (res > 0) {
  402. n += res;
  403. if ((n % sizeof(struct io_thread_req *)) > 0) {
  404. /*
  405. * Read somehow returned not a multiple of dword
  406. * theoretically possible, but never observed in the
  407. * wild, so read routine must be able to handle it
  408. */
  409. *remainder_size = n % sizeof(struct io_thread_req *);
  410. WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
  411. memmove(
  412. remainder,
  413. ((char *) request_buffer) +
  414. (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
  415. *remainder_size
  416. );
  417. n = n - *remainder_size;
  418. }
  419. } else {
  420. n = res;
  421. }
  422. return n;
  423. }
  424. /* Called without dev->lock held, and only in interrupt context. */
  425. static void ubd_handler(void)
  426. {
  427. int n;
  428. int count;
  429. while(1){
  430. n = bulk_req_safe_read(
  431. thread_fd,
  432. irq_req_buffer,
  433. &irq_remainder,
  434. &irq_remainder_size,
  435. UBD_REQ_BUFFER_SIZE
  436. );
  437. if (n < 0) {
  438. if(n == -EAGAIN)
  439. break;
  440. printk(KERN_ERR "spurious interrupt in ubd_handler, "
  441. "err = %d\n", -n);
  442. return;
  443. }
  444. for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
  445. struct io_thread_req *io_req = (*irq_req_buffer)[count];
  446. int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK;
  447. if (!blk_update_request(io_req->req, err, io_req->length))
  448. __blk_mq_end_request(io_req->req, err);
  449. kfree(io_req);
  450. }
  451. }
  452. reactivate_fd(thread_fd, UBD_IRQ);
  453. }
  454. static irqreturn_t ubd_intr(int irq, void *dev)
  455. {
  456. ubd_handler();
  457. return IRQ_HANDLED;
  458. }
  459. /* Only changed by ubd_init, which is an initcall. */
  460. static int io_pid = -1;
  461. static void kill_io_thread(void)
  462. {
  463. if(io_pid != -1)
  464. os_kill_process(io_pid, 1);
  465. }
  466. __uml_exitcall(kill_io_thread);
  467. static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
  468. {
  469. char *file;
  470. int fd;
  471. int err;
  472. __u32 version;
  473. __u32 align;
  474. char *backing_file;
  475. time_t mtime;
  476. unsigned long long size;
  477. int sector_size;
  478. int bitmap_offset;
  479. if (ubd_dev->file && ubd_dev->cow.file) {
  480. file = ubd_dev->cow.file;
  481. goto out;
  482. }
  483. fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
  484. if (fd < 0)
  485. return fd;
  486. err = read_cow_header(file_reader, &fd, &version, &backing_file, \
  487. &mtime, &size, &sector_size, &align, &bitmap_offset);
  488. os_close_file(fd);
  489. if(err == -EINVAL)
  490. file = ubd_dev->file;
  491. else
  492. file = backing_file;
  493. out:
  494. return os_file_size(file, size_out);
  495. }
  496. static int read_cow_bitmap(int fd, void *buf, int offset, int len)
  497. {
  498. int err;
  499. err = os_pread_file(fd, buf, len, offset);
  500. if (err < 0)
  501. return err;
  502. return 0;
  503. }
  504. static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
  505. {
  506. unsigned long modtime;
  507. unsigned long long actual;
  508. int err;
  509. err = os_file_modtime(file, &modtime);
  510. if (err < 0) {
  511. printk(KERN_ERR "Failed to get modification time of backing "
  512. "file \"%s\", err = %d\n", file, -err);
  513. return err;
  514. }
  515. err = os_file_size(file, &actual);
  516. if (err < 0) {
  517. printk(KERN_ERR "Failed to get size of backing file \"%s\", "
  518. "err = %d\n", file, -err);
  519. return err;
  520. }
  521. if (actual != size) {
  522. /*__u64 can be a long on AMD64 and with %lu GCC complains; so
  523. * the typecast.*/
  524. printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
  525. "vs backing file\n", (unsigned long long) size, actual);
  526. return -EINVAL;
  527. }
  528. if (modtime != mtime) {
  529. printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
  530. "backing file\n", mtime, modtime);
  531. return -EINVAL;
  532. }
  533. return 0;
  534. }
  535. static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
  536. {
  537. struct uml_stat buf1, buf2;
  538. int err;
  539. if (from_cmdline == NULL)
  540. return 0;
  541. if (!strcmp(from_cmdline, from_cow))
  542. return 0;
  543. err = os_stat_file(from_cmdline, &buf1);
  544. if (err < 0) {
  545. printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
  546. -err);
  547. return 0;
  548. }
  549. err = os_stat_file(from_cow, &buf2);
  550. if (err < 0) {
  551. printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
  552. -err);
  553. return 1;
  554. }
  555. if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
  556. return 0;
  557. printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
  558. "\"%s\" specified in COW header of \"%s\"\n",
  559. from_cmdline, from_cow, cow);
  560. return 1;
  561. }
  562. static int open_ubd_file(char *file, struct openflags *openflags, int shared,
  563. char **backing_file_out, int *bitmap_offset_out,
  564. unsigned long *bitmap_len_out, int *data_offset_out,
  565. int *create_cow_out)
  566. {
  567. time_t mtime;
  568. unsigned long long size;
  569. __u32 version, align;
  570. char *backing_file;
  571. int fd, err, sectorsize, asked_switch, mode = 0644;
  572. fd = os_open_file(file, *openflags, mode);
  573. if (fd < 0) {
  574. if ((fd == -ENOENT) && (create_cow_out != NULL))
  575. *create_cow_out = 1;
  576. if (!openflags->w ||
  577. ((fd != -EROFS) && (fd != -EACCES)))
  578. return fd;
  579. openflags->w = 0;
  580. fd = os_open_file(file, *openflags, mode);
  581. if (fd < 0)
  582. return fd;
  583. }
  584. if (shared)
  585. printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
  586. else {
  587. err = os_lock_file(fd, openflags->w);
  588. if (err < 0) {
  589. printk(KERN_ERR "Failed to lock '%s', err = %d\n",
  590. file, -err);
  591. goto out_close;
  592. }
  593. }
  594. /* Successful return case! */
  595. if (backing_file_out == NULL)
  596. return fd;
  597. err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
  598. &size, &sectorsize, &align, bitmap_offset_out);
  599. if (err && (*backing_file_out != NULL)) {
  600. printk(KERN_ERR "Failed to read COW header from COW file "
  601. "\"%s\", errno = %d\n", file, -err);
  602. goto out_close;
  603. }
  604. if (err)
  605. return fd;
  606. asked_switch = path_requires_switch(*backing_file_out, backing_file,
  607. file);
  608. /* Allow switching only if no mismatch. */
  609. if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
  610. mtime)) {
  611. printk(KERN_ERR "Switching backing file to '%s'\n",
  612. *backing_file_out);
  613. err = write_cow_header(file, fd, *backing_file_out,
  614. sectorsize, align, &size);
  615. if (err) {
  616. printk(KERN_ERR "Switch failed, errno = %d\n", -err);
  617. goto out_close;
  618. }
  619. } else {
  620. *backing_file_out = backing_file;
  621. err = backing_file_mismatch(*backing_file_out, size, mtime);
  622. if (err)
  623. goto out_close;
  624. }
  625. cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
  626. bitmap_len_out, data_offset_out);
  627. return fd;
  628. out_close:
  629. os_close_file(fd);
  630. return err;
  631. }
  632. static int create_cow_file(char *cow_file, char *backing_file,
  633. struct openflags flags,
  634. int sectorsize, int alignment, int *bitmap_offset_out,
  635. unsigned long *bitmap_len_out, int *data_offset_out)
  636. {
  637. int err, fd;
  638. flags.c = 1;
  639. fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
  640. if (fd < 0) {
  641. err = fd;
  642. printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
  643. cow_file, -err);
  644. goto out;
  645. }
  646. err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
  647. bitmap_offset_out, bitmap_len_out,
  648. data_offset_out);
  649. if (!err)
  650. return fd;
  651. os_close_file(fd);
  652. out:
  653. return err;
  654. }
  655. static void ubd_close_dev(struct ubd *ubd_dev)
  656. {
  657. os_close_file(ubd_dev->fd);
  658. if(ubd_dev->cow.file == NULL)
  659. return;
  660. os_close_file(ubd_dev->cow.fd);
  661. vfree(ubd_dev->cow.bitmap);
  662. ubd_dev->cow.bitmap = NULL;
  663. }
  664. static int ubd_open_dev(struct ubd *ubd_dev)
  665. {
  666. struct openflags flags;
  667. char **back_ptr;
  668. int err, create_cow, *create_ptr;
  669. int fd;
  670. ubd_dev->openflags = ubd_dev->boot_openflags;
  671. create_cow = 0;
  672. create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
  673. back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
  674. fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
  675. back_ptr, &ubd_dev->cow.bitmap_offset,
  676. &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
  677. create_ptr);
  678. if((fd == -ENOENT) && create_cow){
  679. fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
  680. ubd_dev->openflags, 1 << 9, PAGE_SIZE,
  681. &ubd_dev->cow.bitmap_offset,
  682. &ubd_dev->cow.bitmap_len,
  683. &ubd_dev->cow.data_offset);
  684. if(fd >= 0){
  685. printk(KERN_INFO "Creating \"%s\" as COW file for "
  686. "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
  687. }
  688. }
  689. if(fd < 0){
  690. printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
  691. -fd);
  692. return fd;
  693. }
  694. ubd_dev->fd = fd;
  695. if(ubd_dev->cow.file != NULL){
  696. blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
  697. err = -ENOMEM;
  698. ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
  699. if(ubd_dev->cow.bitmap == NULL){
  700. printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
  701. goto error;
  702. }
  703. flush_tlb_kernel_vm();
  704. err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
  705. ubd_dev->cow.bitmap_offset,
  706. ubd_dev->cow.bitmap_len);
  707. if(err < 0)
  708. goto error;
  709. flags = ubd_dev->openflags;
  710. flags.w = 0;
  711. err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
  712. NULL, NULL, NULL, NULL);
  713. if(err < 0) goto error;
  714. ubd_dev->cow.fd = err;
  715. }
  716. return 0;
  717. error:
  718. os_close_file(ubd_dev->fd);
  719. return err;
  720. }
  721. static void ubd_device_release(struct device *dev)
  722. {
  723. struct ubd *ubd_dev = dev_get_drvdata(dev);
  724. blk_cleanup_queue(ubd_dev->queue);
  725. blk_mq_free_tag_set(&ubd_dev->tag_set);
  726. *ubd_dev = ((struct ubd) DEFAULT_UBD);
  727. }
  728. static int ubd_disk_register(int major, u64 size, int unit,
  729. struct gendisk **disk_out)
  730. {
  731. struct device *parent = NULL;
  732. struct gendisk *disk;
  733. disk = alloc_disk(1 << UBD_SHIFT);
  734. if(disk == NULL)
  735. return -ENOMEM;
  736. disk->major = major;
  737. disk->first_minor = unit << UBD_SHIFT;
  738. disk->fops = &ubd_blops;
  739. set_capacity(disk, size / 512);
  740. if (major == UBD_MAJOR)
  741. sprintf(disk->disk_name, "ubd%c", 'a' + unit);
  742. else
  743. sprintf(disk->disk_name, "ubd_fake%d", unit);
  744. /* sysfs register (not for ide fake devices) */
  745. if (major == UBD_MAJOR) {
  746. ubd_devs[unit].pdev.id = unit;
  747. ubd_devs[unit].pdev.name = DRIVER_NAME;
  748. ubd_devs[unit].pdev.dev.release = ubd_device_release;
  749. dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
  750. platform_device_register(&ubd_devs[unit].pdev);
  751. parent = &ubd_devs[unit].pdev.dev;
  752. }
  753. disk->private_data = &ubd_devs[unit];
  754. disk->queue = ubd_devs[unit].queue;
  755. device_add_disk(parent, disk, NULL);
  756. *disk_out = disk;
  757. return 0;
  758. }
  759. #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
  760. static const struct blk_mq_ops ubd_mq_ops = {
  761. .queue_rq = ubd_queue_rq,
  762. };
  763. static int ubd_add(int n, char **error_out)
  764. {
  765. struct ubd *ubd_dev = &ubd_devs[n];
  766. int err = 0;
  767. if(ubd_dev->file == NULL)
  768. goto out;
  769. err = ubd_file_size(ubd_dev, &ubd_dev->size);
  770. if(err < 0){
  771. *error_out = "Couldn't determine size of device's file";
  772. goto out;
  773. }
  774. ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
  775. ubd_dev->tag_set.ops = &ubd_mq_ops;
  776. ubd_dev->tag_set.queue_depth = 64;
  777. ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
  778. ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
  779. ubd_dev->tag_set.driver_data = ubd_dev;
  780. ubd_dev->tag_set.nr_hw_queues = 1;
  781. err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
  782. if (err)
  783. goto out;
  784. ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set);
  785. if (IS_ERR(ubd_dev->queue)) {
  786. err = PTR_ERR(ubd_dev->queue);
  787. goto out_cleanup;
  788. }
  789. ubd_dev->queue->queuedata = ubd_dev;
  790. blk_queue_write_cache(ubd_dev->queue, true, false);
  791. blk_queue_max_segments(ubd_dev->queue, MAX_SG);
  792. err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
  793. if(err){
  794. *error_out = "Failed to register device";
  795. goto out_cleanup_tags;
  796. }
  797. if (fake_major != UBD_MAJOR)
  798. ubd_disk_register(fake_major, ubd_dev->size, n,
  799. &fake_gendisk[n]);
  800. /*
  801. * Perhaps this should also be under the "if (fake_major)" above
  802. * using the fake_disk->disk_name
  803. */
  804. if (fake_ide)
  805. make_ide_entries(ubd_gendisk[n]->disk_name);
  806. err = 0;
  807. out:
  808. return err;
  809. out_cleanup_tags:
  810. blk_mq_free_tag_set(&ubd_dev->tag_set);
  811. out_cleanup:
  812. blk_cleanup_queue(ubd_dev->queue);
  813. goto out;
  814. }
  815. static int ubd_config(char *str, char **error_out)
  816. {
  817. int n, ret;
  818. /* This string is possibly broken up and stored, so it's only
  819. * freed if ubd_setup_common fails, or if only general options
  820. * were set.
  821. */
  822. str = kstrdup(str, GFP_KERNEL);
  823. if (str == NULL) {
  824. *error_out = "Failed to allocate memory";
  825. return -ENOMEM;
  826. }
  827. ret = ubd_setup_common(str, &n, error_out);
  828. if (ret)
  829. goto err_free;
  830. if (n == -1) {
  831. ret = 0;
  832. goto err_free;
  833. }
  834. mutex_lock(&ubd_lock);
  835. ret = ubd_add(n, error_out);
  836. if (ret)
  837. ubd_devs[n].file = NULL;
  838. mutex_unlock(&ubd_lock);
  839. out:
  840. return ret;
  841. err_free:
  842. kfree(str);
  843. goto out;
  844. }
  845. static int ubd_get_config(char *name, char *str, int size, char **error_out)
  846. {
  847. struct ubd *ubd_dev;
  848. int n, len = 0;
  849. n = parse_unit(&name);
  850. if((n >= MAX_DEV) || (n < 0)){
  851. *error_out = "ubd_get_config : device number out of range";
  852. return -1;
  853. }
  854. ubd_dev = &ubd_devs[n];
  855. mutex_lock(&ubd_lock);
  856. if(ubd_dev->file == NULL){
  857. CONFIG_CHUNK(str, size, len, "", 1);
  858. goto out;
  859. }
  860. CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
  861. if(ubd_dev->cow.file != NULL){
  862. CONFIG_CHUNK(str, size, len, ",", 0);
  863. CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
  864. }
  865. else CONFIG_CHUNK(str, size, len, "", 1);
  866. out:
  867. mutex_unlock(&ubd_lock);
  868. return len;
  869. }
  870. static int ubd_id(char **str, int *start_out, int *end_out)
  871. {
  872. int n;
  873. n = parse_unit(str);
  874. *start_out = 0;
  875. *end_out = MAX_DEV - 1;
  876. return n;
  877. }
  878. static int ubd_remove(int n, char **error_out)
  879. {
  880. struct gendisk *disk = ubd_gendisk[n];
  881. struct ubd *ubd_dev;
  882. int err = -ENODEV;
  883. mutex_lock(&ubd_lock);
  884. ubd_dev = &ubd_devs[n];
  885. if(ubd_dev->file == NULL)
  886. goto out;
  887. /* you cannot remove a open disk */
  888. err = -EBUSY;
  889. if(ubd_dev->count > 0)
  890. goto out;
  891. ubd_gendisk[n] = NULL;
  892. if(disk != NULL){
  893. del_gendisk(disk);
  894. put_disk(disk);
  895. }
  896. if(fake_gendisk[n] != NULL){
  897. del_gendisk(fake_gendisk[n]);
  898. put_disk(fake_gendisk[n]);
  899. fake_gendisk[n] = NULL;
  900. }
  901. err = 0;
  902. platform_device_unregister(&ubd_dev->pdev);
  903. out:
  904. mutex_unlock(&ubd_lock);
  905. return err;
  906. }
  907. /* All these are called by mconsole in process context and without
  908. * ubd-specific locks. The structure itself is const except for .list.
  909. */
  910. static struct mc_device ubd_mc = {
  911. .list = LIST_HEAD_INIT(ubd_mc.list),
  912. .name = "ubd",
  913. .config = ubd_config,
  914. .get_config = ubd_get_config,
  915. .id = ubd_id,
  916. .remove = ubd_remove,
  917. };
  918. static int __init ubd_mc_init(void)
  919. {
  920. mconsole_register_dev(&ubd_mc);
  921. return 0;
  922. }
  923. __initcall(ubd_mc_init);
  924. static int __init ubd0_init(void)
  925. {
  926. struct ubd *ubd_dev = &ubd_devs[0];
  927. mutex_lock(&ubd_lock);
  928. if(ubd_dev->file == NULL)
  929. ubd_dev->file = "root_fs";
  930. mutex_unlock(&ubd_lock);
  931. return 0;
  932. }
  933. __initcall(ubd0_init);
  934. /* Used in ubd_init, which is an initcall */
  935. static struct platform_driver ubd_driver = {
  936. .driver = {
  937. .name = DRIVER_NAME,
  938. },
  939. };
  940. static int __init ubd_init(void)
  941. {
  942. char *error;
  943. int i, err;
  944. if (register_blkdev(UBD_MAJOR, "ubd"))
  945. return -1;
  946. if (fake_major != UBD_MAJOR) {
  947. char name[sizeof("ubd_nnn\0")];
  948. snprintf(name, sizeof(name), "ubd_%d", fake_major);
  949. if (register_blkdev(fake_major, "ubd"))
  950. return -1;
  951. }
  952. irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
  953. sizeof(struct io_thread_req *),
  954. GFP_KERNEL
  955. );
  956. irq_remainder = 0;
  957. if (irq_req_buffer == NULL) {
  958. printk(KERN_ERR "Failed to initialize ubd buffering\n");
  959. return -1;
  960. }
  961. io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
  962. sizeof(struct io_thread_req *),
  963. GFP_KERNEL
  964. );
  965. io_remainder = 0;
  966. if (io_req_buffer == NULL) {
  967. printk(KERN_ERR "Failed to initialize ubd buffering\n");
  968. return -1;
  969. }
  970. platform_driver_register(&ubd_driver);
  971. mutex_lock(&ubd_lock);
  972. for (i = 0; i < MAX_DEV; i++){
  973. err = ubd_add(i, &error);
  974. if(err)
  975. printk(KERN_ERR "Failed to initialize ubd device %d :"
  976. "%s\n", i, error);
  977. }
  978. mutex_unlock(&ubd_lock);
  979. return 0;
  980. }
  981. late_initcall(ubd_init);
  982. static int __init ubd_driver_init(void){
  983. unsigned long stack;
  984. int err;
  985. /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
  986. if(global_openflags.s){
  987. printk(KERN_INFO "ubd: Synchronous mode\n");
  988. /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
  989. * enough. So use anyway the io thread. */
  990. }
  991. stack = alloc_stack(0, 0);
  992. io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
  993. &thread_fd);
  994. if(io_pid < 0){
  995. printk(KERN_ERR
  996. "ubd : Failed to start I/O thread (errno = %d) - "
  997. "falling back to synchronous I/O\n", -io_pid);
  998. io_pid = -1;
  999. return 0;
  1000. }
  1001. err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
  1002. 0, "ubd", ubd_devs);
  1003. if(err != 0)
  1004. printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
  1005. return 0;
  1006. }
  1007. device_initcall(ubd_driver_init);
  1008. static int ubd_open(struct block_device *bdev, fmode_t mode)
  1009. {
  1010. struct gendisk *disk = bdev->bd_disk;
  1011. struct ubd *ubd_dev = disk->private_data;
  1012. int err = 0;
  1013. mutex_lock(&ubd_mutex);
  1014. if(ubd_dev->count == 0){
  1015. err = ubd_open_dev(ubd_dev);
  1016. if(err){
  1017. printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
  1018. disk->disk_name, ubd_dev->file, -err);
  1019. goto out;
  1020. }
  1021. }
  1022. ubd_dev->count++;
  1023. set_disk_ro(disk, !ubd_dev->openflags.w);
  1024. /* This should no more be needed. And it didn't work anyway to exclude
  1025. * read-write remounting of filesystems.*/
  1026. /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
  1027. if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
  1028. err = -EROFS;
  1029. }*/
  1030. out:
  1031. mutex_unlock(&ubd_mutex);
  1032. return err;
  1033. }
  1034. static void ubd_release(struct gendisk *disk, fmode_t mode)
  1035. {
  1036. struct ubd *ubd_dev = disk->private_data;
  1037. mutex_lock(&ubd_mutex);
  1038. if(--ubd_dev->count == 0)
  1039. ubd_close_dev(ubd_dev);
  1040. mutex_unlock(&ubd_mutex);
  1041. }
  1042. static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
  1043. __u64 *cow_offset, unsigned long *bitmap,
  1044. __u64 bitmap_offset, unsigned long *bitmap_words,
  1045. __u64 bitmap_len)
  1046. {
  1047. __u64 sector = io_offset >> 9;
  1048. int i, update_bitmap = 0;
  1049. for(i = 0; i < length >> 9; i++){
  1050. if(cow_mask != NULL)
  1051. ubd_set_bit(i, (unsigned char *) cow_mask);
  1052. if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
  1053. continue;
  1054. update_bitmap = 1;
  1055. ubd_set_bit(sector + i, (unsigned char *) bitmap);
  1056. }
  1057. if(!update_bitmap)
  1058. return;
  1059. *cow_offset = sector / (sizeof(unsigned long) * 8);
  1060. /* This takes care of the case where we're exactly at the end of the
  1061. * device, and *cow_offset + 1 is off the end. So, just back it up
  1062. * by one word. Thanks to Lynn Kerby for the fix and James McMechan
  1063. * for the original diagnosis.
  1064. */
  1065. if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
  1066. sizeof(unsigned long)) - 1))
  1067. (*cow_offset)--;
  1068. bitmap_words[0] = bitmap[*cow_offset];
  1069. bitmap_words[1] = bitmap[*cow_offset + 1];
  1070. *cow_offset *= sizeof(unsigned long);
  1071. *cow_offset += bitmap_offset;
  1072. }
  1073. static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
  1074. __u64 bitmap_offset, __u64 bitmap_len)
  1075. {
  1076. __u64 sector = req->offset >> 9;
  1077. int i;
  1078. if(req->length > (sizeof(req->sector_mask) * 8) << 9)
  1079. panic("Operation too long");
  1080. if(req->op == UBD_READ) {
  1081. for(i = 0; i < req->length >> 9; i++){
  1082. if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
  1083. ubd_set_bit(i, (unsigned char *)
  1084. &req->sector_mask);
  1085. }
  1086. }
  1087. else cowify_bitmap(req->offset, req->length, &req->sector_mask,
  1088. &req->cow_offset, bitmap, bitmap_offset,
  1089. req->bitmap_words, bitmap_len);
  1090. }
  1091. static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
  1092. u64 off, struct bio_vec *bvec)
  1093. {
  1094. struct ubd *dev = hctx->queue->queuedata;
  1095. struct io_thread_req *io_req;
  1096. int ret;
  1097. io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC);
  1098. if (!io_req)
  1099. return -ENOMEM;
  1100. io_req->req = req;
  1101. if (dev->cow.file)
  1102. io_req->fds[0] = dev->cow.fd;
  1103. else
  1104. io_req->fds[0] = dev->fd;
  1105. io_req->error = 0;
  1106. if (req_op(req) == REQ_OP_FLUSH) {
  1107. io_req->op = UBD_FLUSH;
  1108. } else {
  1109. io_req->fds[1] = dev->fd;
  1110. io_req->cow_offset = -1;
  1111. io_req->offset = off;
  1112. io_req->length = bvec->bv_len;
  1113. io_req->sector_mask = 0;
  1114. io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE;
  1115. io_req->offsets[0] = 0;
  1116. io_req->offsets[1] = dev->cow.data_offset;
  1117. io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
  1118. io_req->sectorsize = 1 << 9;
  1119. if (dev->cow.file) {
  1120. cowify_req(io_req, dev->cow.bitmap,
  1121. dev->cow.bitmap_offset, dev->cow.bitmap_len);
  1122. }
  1123. }
  1124. ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
  1125. if (ret != sizeof(io_req)) {
  1126. if (ret != -EAGAIN)
  1127. pr_err("write to io thread failed: %d\n", -ret);
  1128. kfree(io_req);
  1129. }
  1130. return ret;
  1131. }
  1132. static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
  1133. const struct blk_mq_queue_data *bd)
  1134. {
  1135. struct ubd *ubd_dev = hctx->queue->queuedata;
  1136. struct request *req = bd->rq;
  1137. int ret = 0;
  1138. blk_mq_start_request(req);
  1139. spin_lock_irq(&ubd_dev->lock);
  1140. if (req_op(req) == REQ_OP_FLUSH) {
  1141. ret = ubd_queue_one_vec(hctx, req, 0, NULL);
  1142. } else {
  1143. struct req_iterator iter;
  1144. struct bio_vec bvec;
  1145. u64 off = (u64)blk_rq_pos(req) << 9;
  1146. rq_for_each_segment(bvec, req, iter) {
  1147. ret = ubd_queue_one_vec(hctx, req, off, &bvec);
  1148. if (ret < 0)
  1149. goto out;
  1150. off += bvec.bv_len;
  1151. }
  1152. }
  1153. out:
  1154. spin_unlock_irq(&ubd_dev->lock);
  1155. if (ret < 0)
  1156. blk_mq_requeue_request(req, true);
  1157. return BLK_STS_OK;
  1158. }
  1159. static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
  1160. {
  1161. struct ubd *ubd_dev = bdev->bd_disk->private_data;
  1162. geo->heads = 128;
  1163. geo->sectors = 32;
  1164. geo->cylinders = ubd_dev->size / (128 * 32 * 512);
  1165. return 0;
  1166. }
  1167. static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
  1168. unsigned int cmd, unsigned long arg)
  1169. {
  1170. struct ubd *ubd_dev = bdev->bd_disk->private_data;
  1171. u16 ubd_id[ATA_ID_WORDS];
  1172. switch (cmd) {
  1173. struct cdrom_volctrl volume;
  1174. case HDIO_GET_IDENTITY:
  1175. memset(&ubd_id, 0, ATA_ID_WORDS * 2);
  1176. ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
  1177. ubd_id[ATA_ID_HEADS] = 128;
  1178. ubd_id[ATA_ID_SECTORS] = 32;
  1179. if(copy_to_user((char __user *) arg, (char *) &ubd_id,
  1180. sizeof(ubd_id)))
  1181. return -EFAULT;
  1182. return 0;
  1183. case CDROMVOLREAD:
  1184. if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
  1185. return -EFAULT;
  1186. volume.channel0 = 255;
  1187. volume.channel1 = 255;
  1188. volume.channel2 = 255;
  1189. volume.channel3 = 255;
  1190. if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
  1191. return -EFAULT;
  1192. return 0;
  1193. }
  1194. return -EINVAL;
  1195. }
  1196. static int update_bitmap(struct io_thread_req *req)
  1197. {
  1198. int n;
  1199. if(req->cow_offset == -1)
  1200. return 0;
  1201. n = os_pwrite_file(req->fds[1], &req->bitmap_words,
  1202. sizeof(req->bitmap_words), req->cow_offset);
  1203. if(n != sizeof(req->bitmap_words)){
  1204. printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
  1205. req->fds[1]);
  1206. return 1;
  1207. }
  1208. return 0;
  1209. }
  1210. static void do_io(struct io_thread_req *req)
  1211. {
  1212. char *buf;
  1213. unsigned long len;
  1214. int n, nsectors, start, end, bit;
  1215. __u64 off;
  1216. if (req->op == UBD_FLUSH) {
  1217. /* fds[0] is always either the rw image or our cow file */
  1218. n = os_sync_file(req->fds[0]);
  1219. if (n != 0) {
  1220. printk("do_io - sync failed err = %d "
  1221. "fd = %d\n", -n, req->fds[0]);
  1222. req->error = 1;
  1223. }
  1224. return;
  1225. }
  1226. nsectors = req->length / req->sectorsize;
  1227. start = 0;
  1228. do {
  1229. bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
  1230. end = start;
  1231. while((end < nsectors) &&
  1232. (ubd_test_bit(end, (unsigned char *)
  1233. &req->sector_mask) == bit))
  1234. end++;
  1235. off = req->offset + req->offsets[bit] +
  1236. start * req->sectorsize;
  1237. len = (end - start) * req->sectorsize;
  1238. buf = &req->buffer[start * req->sectorsize];
  1239. if(req->op == UBD_READ){
  1240. n = 0;
  1241. do {
  1242. buf = &buf[n];
  1243. len -= n;
  1244. n = os_pread_file(req->fds[bit], buf, len, off);
  1245. if (n < 0) {
  1246. printk("do_io - read failed, err = %d "
  1247. "fd = %d\n", -n, req->fds[bit]);
  1248. req->error = 1;
  1249. return;
  1250. }
  1251. } while((n < len) && (n != 0));
  1252. if (n < len) memset(&buf[n], 0, len - n);
  1253. } else {
  1254. n = os_pwrite_file(req->fds[bit], buf, len, off);
  1255. if(n != len){
  1256. printk("do_io - write failed err = %d "
  1257. "fd = %d\n", -n, req->fds[bit]);
  1258. req->error = 1;
  1259. return;
  1260. }
  1261. }
  1262. start = end;
  1263. } while(start < nsectors);
  1264. req->error = update_bitmap(req);
  1265. }
  1266. /* Changed in start_io_thread, which is serialized by being called only
  1267. * from ubd_init, which is an initcall.
  1268. */
  1269. int kernel_fd = -1;
  1270. /* Only changed by the io thread. XXX: currently unused. */
  1271. static int io_count = 0;
  1272. int io_thread(void *arg)
  1273. {
  1274. int n, count, written, res;
  1275. os_fix_helper_signals();
  1276. while(1){
  1277. n = bulk_req_safe_read(
  1278. kernel_fd,
  1279. io_req_buffer,
  1280. &io_remainder,
  1281. &io_remainder_size,
  1282. UBD_REQ_BUFFER_SIZE
  1283. );
  1284. if (n < 0) {
  1285. if (n == -EAGAIN) {
  1286. ubd_read_poll(-1);
  1287. continue;
  1288. } else {
  1289. printk("io_thread - read failed, fd = %d, "
  1290. "err = %d,"
  1291. "reminder = %d\n",
  1292. kernel_fd, -n, io_remainder_size);
  1293. }
  1294. }
  1295. for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
  1296. io_count++;
  1297. do_io((*io_req_buffer)[count]);
  1298. }
  1299. written = 0;
  1300. do {
  1301. res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
  1302. if (res >= 0) {
  1303. written += res;
  1304. } else {
  1305. if (res != -EAGAIN) {
  1306. printk("io_thread - write failed, fd = %d, "
  1307. "err = %d\n", kernel_fd, -n);
  1308. }
  1309. }
  1310. if (written < n) {
  1311. ubd_write_poll(-1);
  1312. }
  1313. } while (written < n);
  1314. }
  1315. return 0;
  1316. }