genhd.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685
  1. /*
  2. * gendisk handling
  3. */
  4. #include <linux/config.h>
  5. #include <linux/module.h>
  6. #include <linux/fs.h>
  7. #include <linux/genhd.h>
  8. #include <linux/kernel.h>
  9. #include <linux/blkdev.h>
  10. #include <linux/init.h>
  11. #include <linux/spinlock.h>
  12. #include <linux/seq_file.h>
  13. #include <linux/slab.h>
  14. #include <linux/kmod.h>
  15. #include <linux/kobj_map.h>
  16. #define MAX_PROBE_HASH 255 /* random */
  17. static struct subsystem block_subsys;
  18. static DECLARE_MUTEX(block_subsys_sem);
  19. /*
  20. * Can be deleted altogether. Later.
  21. *
  22. */
  23. static struct blk_major_name {
  24. struct blk_major_name *next;
  25. int major;
  26. char name[16];
  27. } *major_names[MAX_PROBE_HASH];
  28. /* index in the above - for now: assume no multimajor ranges */
  29. static inline int major_to_index(int major)
  30. {
  31. return major % MAX_PROBE_HASH;
  32. }
  33. #ifdef CONFIG_PROC_FS
  34. /* get block device names in somewhat random order */
  35. int get_blkdev_list(char *p)
  36. {
  37. struct blk_major_name *n;
  38. int i, len;
  39. len = sprintf(p, "\nBlock devices:\n");
  40. down(&block_subsys_sem);
  41. for (i = 0; i < ARRAY_SIZE(major_names); i++) {
  42. for (n = major_names[i]; n; n = n->next)
  43. len += sprintf(p+len, "%3d %s\n",
  44. n->major, n->name);
  45. }
  46. up(&block_subsys_sem);
  47. return len;
  48. }
  49. #endif
  50. int register_blkdev(unsigned int major, const char *name)
  51. {
  52. struct blk_major_name **n, *p;
  53. int index, ret = 0;
  54. down(&block_subsys_sem);
  55. /* temporary */
  56. if (major == 0) {
  57. for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
  58. if (major_names[index] == NULL)
  59. break;
  60. }
  61. if (index == 0) {
  62. printk("register_blkdev: failed to get major for %s\n",
  63. name);
  64. ret = -EBUSY;
  65. goto out;
  66. }
  67. major = index;
  68. ret = major;
  69. }
  70. p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
  71. if (p == NULL) {
  72. ret = -ENOMEM;
  73. goto out;
  74. }
  75. p->major = major;
  76. strlcpy(p->name, name, sizeof(p->name));
  77. p->next = NULL;
  78. index = major_to_index(major);
  79. for (n = &major_names[index]; *n; n = &(*n)->next) {
  80. if ((*n)->major == major)
  81. break;
  82. }
  83. if (!*n)
  84. *n = p;
  85. else
  86. ret = -EBUSY;
  87. if (ret < 0) {
  88. printk("register_blkdev: cannot get major %d for %s\n",
  89. major, name);
  90. kfree(p);
  91. }
  92. out:
  93. up(&block_subsys_sem);
  94. return ret;
  95. }
  96. EXPORT_SYMBOL(register_blkdev);
  97. /* todo: make void - error printk here */
  98. int unregister_blkdev(unsigned int major, const char *name)
  99. {
  100. struct blk_major_name **n;
  101. struct blk_major_name *p = NULL;
  102. int index = major_to_index(major);
  103. int ret = 0;
  104. down(&block_subsys_sem);
  105. for (n = &major_names[index]; *n; n = &(*n)->next)
  106. if ((*n)->major == major)
  107. break;
  108. if (!*n || strcmp((*n)->name, name))
  109. ret = -EINVAL;
  110. else {
  111. p = *n;
  112. *n = p->next;
  113. }
  114. up(&block_subsys_sem);
  115. kfree(p);
  116. return ret;
  117. }
  118. EXPORT_SYMBOL(unregister_blkdev);
  119. static struct kobj_map *bdev_map;
  120. /*
  121. * Register device numbers dev..(dev+range-1)
  122. * range must be nonzero
  123. * The hash chain is sorted on range, so that subranges can override.
  124. */
  125. void blk_register_region(dev_t dev, unsigned long range, struct module *module,
  126. struct kobject *(*probe)(dev_t, int *, void *),
  127. int (*lock)(dev_t, void *), void *data)
  128. {
  129. kobj_map(bdev_map, dev, range, module, probe, lock, data);
  130. }
  131. EXPORT_SYMBOL(blk_register_region);
  132. void blk_unregister_region(dev_t dev, unsigned long range)
  133. {
  134. kobj_unmap(bdev_map, dev, range);
  135. }
  136. EXPORT_SYMBOL(blk_unregister_region);
  137. static struct kobject *exact_match(dev_t dev, int *part, void *data)
  138. {
  139. struct gendisk *p = data;
  140. return &p->kobj;
  141. }
  142. static int exact_lock(dev_t dev, void *data)
  143. {
  144. struct gendisk *p = data;
  145. if (!get_disk(p))
  146. return -1;
  147. return 0;
  148. }
  149. /**
  150. * add_disk - add partitioning information to kernel list
  151. * @disk: per-device partitioning information
  152. *
  153. * This function registers the partitioning information in @disk
  154. * with the kernel.
  155. */
  156. void add_disk(struct gendisk *disk)
  157. {
  158. disk->flags |= GENHD_FL_UP;
  159. blk_register_region(MKDEV(disk->major, disk->first_minor),
  160. disk->minors, NULL, exact_match, exact_lock, disk);
  161. register_disk(disk);
  162. blk_register_queue(disk);
  163. }
  164. EXPORT_SYMBOL(add_disk);
  165. EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
  166. void unlink_gendisk(struct gendisk *disk)
  167. {
  168. blk_unregister_queue(disk);
  169. blk_unregister_region(MKDEV(disk->major, disk->first_minor),
  170. disk->minors);
  171. }
  172. #define to_disk(obj) container_of(obj,struct gendisk,kobj)
  173. /**
  174. * get_gendisk - get partitioning information for a given device
  175. * @dev: device to get partitioning information for
  176. *
  177. * This function gets the structure containing partitioning
  178. * information for the given device @dev.
  179. */
  180. struct gendisk *get_gendisk(dev_t dev, int *part)
  181. {
  182. struct kobject *kobj = kobj_lookup(bdev_map, dev, part);
  183. return kobj ? to_disk(kobj) : NULL;
  184. }
  185. #ifdef CONFIG_PROC_FS
  186. /* iterator */
  187. static void *part_start(struct seq_file *part, loff_t *pos)
  188. {
  189. struct list_head *p;
  190. loff_t l = *pos;
  191. down(&block_subsys_sem);
  192. list_for_each(p, &block_subsys.kset.list)
  193. if (!l--)
  194. return list_entry(p, struct gendisk, kobj.entry);
  195. return NULL;
  196. }
  197. static void *part_next(struct seq_file *part, void *v, loff_t *pos)
  198. {
  199. struct list_head *p = ((struct gendisk *)v)->kobj.entry.next;
  200. ++*pos;
  201. return p==&block_subsys.kset.list ? NULL :
  202. list_entry(p, struct gendisk, kobj.entry);
  203. }
  204. static void part_stop(struct seq_file *part, void *v)
  205. {
  206. up(&block_subsys_sem);
  207. }
  208. static int show_partition(struct seq_file *part, void *v)
  209. {
  210. struct gendisk *sgp = v;
  211. int n;
  212. char buf[BDEVNAME_SIZE];
  213. if (&sgp->kobj.entry == block_subsys.kset.list.next)
  214. seq_puts(part, "major minor #blocks name\n\n");
  215. /* Don't show non-partitionable removeable devices or empty devices */
  216. if (!get_capacity(sgp) ||
  217. (sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE)))
  218. return 0;
  219. if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
  220. return 0;
  221. /* show the full disk and all non-0 size partitions of it */
  222. seq_printf(part, "%4d %4d %10llu %s\n",
  223. sgp->major, sgp->first_minor,
  224. (unsigned long long)get_capacity(sgp) >> 1,
  225. disk_name(sgp, 0, buf));
  226. for (n = 0; n < sgp->minors - 1; n++) {
  227. if (!sgp->part[n])
  228. continue;
  229. if (sgp->part[n]->nr_sects == 0)
  230. continue;
  231. seq_printf(part, "%4d %4d %10llu %s\n",
  232. sgp->major, n + 1 + sgp->first_minor,
  233. (unsigned long long)sgp->part[n]->nr_sects >> 1 ,
  234. disk_name(sgp, n + 1, buf));
  235. }
  236. return 0;
  237. }
  238. struct seq_operations partitions_op = {
  239. .start =part_start,
  240. .next = part_next,
  241. .stop = part_stop,
  242. .show = show_partition
  243. };
  244. #endif
  245. extern int blk_dev_init(void);
  246. static struct kobject *base_probe(dev_t dev, int *part, void *data)
  247. {
  248. if (request_module("block-major-%d-%d", MAJOR(dev), MINOR(dev)) > 0)
  249. /* Make old-style 2.4 aliases work */
  250. request_module("block-major-%d", MAJOR(dev));
  251. return NULL;
  252. }
  253. static int __init genhd_device_init(void)
  254. {
  255. bdev_map = kobj_map_init(base_probe, &block_subsys_sem);
  256. blk_dev_init();
  257. subsystem_register(&block_subsys);
  258. return 0;
  259. }
  260. subsys_initcall(genhd_device_init);
  261. /*
  262. * kobject & sysfs bindings for block devices
  263. */
  264. static ssize_t disk_attr_show(struct kobject *kobj, struct attribute *attr,
  265. char *page)
  266. {
  267. struct gendisk *disk = to_disk(kobj);
  268. struct disk_attribute *disk_attr =
  269. container_of(attr,struct disk_attribute,attr);
  270. ssize_t ret = 0;
  271. if (disk_attr->show)
  272. ret = disk_attr->show(disk,page);
  273. return ret;
  274. }
  275. static struct sysfs_ops disk_sysfs_ops = {
  276. .show = &disk_attr_show,
  277. };
  278. static ssize_t disk_dev_read(struct gendisk * disk, char *page)
  279. {
  280. dev_t base = MKDEV(disk->major, disk->first_minor);
  281. return print_dev_t(page, base);
  282. }
  283. static ssize_t disk_range_read(struct gendisk * disk, char *page)
  284. {
  285. return sprintf(page, "%d\n", disk->minors);
  286. }
  287. static ssize_t disk_removable_read(struct gendisk * disk, char *page)
  288. {
  289. return sprintf(page, "%d\n",
  290. (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
  291. }
  292. static ssize_t disk_size_read(struct gendisk * disk, char *page)
  293. {
  294. return sprintf(page, "%llu\n", (unsigned long long)get_capacity(disk));
  295. }
  296. static ssize_t disk_stats_read(struct gendisk * disk, char *page)
  297. {
  298. preempt_disable();
  299. disk_round_stats(disk);
  300. preempt_enable();
  301. return sprintf(page,
  302. "%8u %8u %8llu %8u "
  303. "%8u %8u %8llu %8u "
  304. "%8u %8u %8u"
  305. "\n",
  306. disk_stat_read(disk, reads), disk_stat_read(disk, read_merges),
  307. (unsigned long long)disk_stat_read(disk, read_sectors),
  308. jiffies_to_msecs(disk_stat_read(disk, read_ticks)),
  309. disk_stat_read(disk, writes),
  310. disk_stat_read(disk, write_merges),
  311. (unsigned long long)disk_stat_read(disk, write_sectors),
  312. jiffies_to_msecs(disk_stat_read(disk, write_ticks)),
  313. disk->in_flight,
  314. jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
  315. jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
  316. }
  317. static struct disk_attribute disk_attr_dev = {
  318. .attr = {.name = "dev", .mode = S_IRUGO },
  319. .show = disk_dev_read
  320. };
  321. static struct disk_attribute disk_attr_range = {
  322. .attr = {.name = "range", .mode = S_IRUGO },
  323. .show = disk_range_read
  324. };
  325. static struct disk_attribute disk_attr_removable = {
  326. .attr = {.name = "removable", .mode = S_IRUGO },
  327. .show = disk_removable_read
  328. };
  329. static struct disk_attribute disk_attr_size = {
  330. .attr = {.name = "size", .mode = S_IRUGO },
  331. .show = disk_size_read
  332. };
  333. static struct disk_attribute disk_attr_stat = {
  334. .attr = {.name = "stat", .mode = S_IRUGO },
  335. .show = disk_stats_read
  336. };
  337. static struct attribute * default_attrs[] = {
  338. &disk_attr_dev.attr,
  339. &disk_attr_range.attr,
  340. &disk_attr_removable.attr,
  341. &disk_attr_size.attr,
  342. &disk_attr_stat.attr,
  343. NULL,
  344. };
  345. static void disk_release(struct kobject * kobj)
  346. {
  347. struct gendisk *disk = to_disk(kobj);
  348. kfree(disk->random);
  349. kfree(disk->part);
  350. free_disk_stats(disk);
  351. kfree(disk);
  352. }
  353. static struct kobj_type ktype_block = {
  354. .release = disk_release,
  355. .sysfs_ops = &disk_sysfs_ops,
  356. .default_attrs = default_attrs,
  357. };
  358. extern struct kobj_type ktype_part;
  359. static int block_hotplug_filter(struct kset *kset, struct kobject *kobj)
  360. {
  361. struct kobj_type *ktype = get_ktype(kobj);
  362. return ((ktype == &ktype_block) || (ktype == &ktype_part));
  363. }
  364. static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
  365. int num_envp, char *buffer, int buffer_size)
  366. {
  367. struct kobj_type *ktype = get_ktype(kobj);
  368. struct device *physdev;
  369. struct gendisk *disk;
  370. struct hd_struct *part;
  371. int length = 0;
  372. int i = 0;
  373. if (ktype == &ktype_block) {
  374. disk = container_of(kobj, struct gendisk, kobj);
  375. add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
  376. &length, "MINOR=%u", disk->first_minor);
  377. } else if (ktype == &ktype_part) {
  378. disk = container_of(kobj->parent, struct gendisk, kobj);
  379. part = container_of(kobj, struct hd_struct, kobj);
  380. add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
  381. &length, "MINOR=%u",
  382. disk->first_minor + part->partno);
  383. } else
  384. return 0;
  385. add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &length,
  386. "MAJOR=%u", disk->major);
  387. /* add physical device, backing this device */
  388. physdev = disk->driverfs_dev;
  389. if (physdev) {
  390. char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL);
  391. add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
  392. &length, "PHYSDEVPATH=%s", path);
  393. kfree(path);
  394. if (physdev->bus)
  395. add_hotplug_env_var(envp, num_envp, &i,
  396. buffer, buffer_size, &length,
  397. "PHYSDEVBUS=%s",
  398. physdev->bus->name);
  399. if (physdev->driver)
  400. add_hotplug_env_var(envp, num_envp, &i,
  401. buffer, buffer_size, &length,
  402. "PHYSDEVDRIVER=%s",
  403. physdev->driver->name);
  404. }
  405. /* terminate, set to next free slot, shrink available space */
  406. envp[i] = NULL;
  407. envp = &envp[i];
  408. num_envp -= i;
  409. buffer = &buffer[length];
  410. buffer_size -= length;
  411. return 0;
  412. }
  413. static struct kset_hotplug_ops block_hotplug_ops = {
  414. .filter = block_hotplug_filter,
  415. .hotplug = block_hotplug,
  416. };
  417. /* declare block_subsys. */
  418. static decl_subsys(block, &ktype_block, &block_hotplug_ops);
  419. /*
  420. * aggregate disk stat collector. Uses the same stats that the sysfs
  421. * entries do, above, but makes them available through one seq_file.
  422. * Watching a few disks may be efficient through sysfs, but watching
  423. * all of them will be more efficient through this interface.
  424. *
  425. * The output looks suspiciously like /proc/partitions with a bunch of
  426. * extra fields.
  427. */
  428. /* iterator */
  429. static void *diskstats_start(struct seq_file *part, loff_t *pos)
  430. {
  431. loff_t k = *pos;
  432. struct list_head *p;
  433. down(&block_subsys_sem);
  434. list_for_each(p, &block_subsys.kset.list)
  435. if (!k--)
  436. return list_entry(p, struct gendisk, kobj.entry);
  437. return NULL;
  438. }
  439. static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos)
  440. {
  441. struct list_head *p = ((struct gendisk *)v)->kobj.entry.next;
  442. ++*pos;
  443. return p==&block_subsys.kset.list ? NULL :
  444. list_entry(p, struct gendisk, kobj.entry);
  445. }
  446. static void diskstats_stop(struct seq_file *part, void *v)
  447. {
  448. up(&block_subsys_sem);
  449. }
  450. static int diskstats_show(struct seq_file *s, void *v)
  451. {
  452. struct gendisk *gp = v;
  453. char buf[BDEVNAME_SIZE];
  454. int n = 0;
  455. /*
  456. if (&sgp->kobj.entry == block_subsys.kset.list.next)
  457. seq_puts(s, "major minor name"
  458. " rio rmerge rsect ruse wio wmerge "
  459. "wsect wuse running use aveq"
  460. "\n\n");
  461. */
  462. preempt_disable();
  463. disk_round_stats(gp);
  464. preempt_enable();
  465. seq_printf(s, "%4d %4d %s %u %u %llu %u %u %u %llu %u %u %u %u\n",
  466. gp->major, n + gp->first_minor, disk_name(gp, n, buf),
  467. disk_stat_read(gp, reads), disk_stat_read(gp, read_merges),
  468. (unsigned long long)disk_stat_read(gp, read_sectors),
  469. jiffies_to_msecs(disk_stat_read(gp, read_ticks)),
  470. disk_stat_read(gp, writes), disk_stat_read(gp, write_merges),
  471. (unsigned long long)disk_stat_read(gp, write_sectors),
  472. jiffies_to_msecs(disk_stat_read(gp, write_ticks)),
  473. gp->in_flight,
  474. jiffies_to_msecs(disk_stat_read(gp, io_ticks)),
  475. jiffies_to_msecs(disk_stat_read(gp, time_in_queue)));
  476. /* now show all non-0 size partitions of it */
  477. for (n = 0; n < gp->minors - 1; n++) {
  478. struct hd_struct *hd = gp->part[n];
  479. if (hd && hd->nr_sects)
  480. seq_printf(s, "%4d %4d %s %u %u %u %u\n",
  481. gp->major, n + gp->first_minor + 1,
  482. disk_name(gp, n + 1, buf),
  483. hd->reads, hd->read_sectors,
  484. hd->writes, hd->write_sectors);
  485. }
  486. return 0;
  487. }
  488. struct seq_operations diskstats_op = {
  489. .start = diskstats_start,
  490. .next = diskstats_next,
  491. .stop = diskstats_stop,
  492. .show = diskstats_show
  493. };
  494. struct gendisk *alloc_disk(int minors)
  495. {
  496. struct gendisk *disk = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
  497. if (disk) {
  498. memset(disk, 0, sizeof(struct gendisk));
  499. if (!init_disk_stats(disk)) {
  500. kfree(disk);
  501. return NULL;
  502. }
  503. if (minors > 1) {
  504. int size = (minors - 1) * sizeof(struct hd_struct *);
  505. disk->part = kmalloc(size, GFP_KERNEL);
  506. if (!disk->part) {
  507. kfree(disk);
  508. return NULL;
  509. }
  510. memset(disk->part, 0, size);
  511. }
  512. disk->minors = minors;
  513. kobj_set_kset_s(disk,block_subsys);
  514. kobject_init(&disk->kobj);
  515. rand_initialize_disk(disk);
  516. }
  517. return disk;
  518. }
  519. EXPORT_SYMBOL(alloc_disk);
  520. struct kobject *get_disk(struct gendisk *disk)
  521. {
  522. struct module *owner;
  523. struct kobject *kobj;
  524. if (!disk->fops)
  525. return NULL;
  526. owner = disk->fops->owner;
  527. if (owner && !try_module_get(owner))
  528. return NULL;
  529. kobj = kobject_get(&disk->kobj);
  530. if (kobj == NULL) {
  531. module_put(owner);
  532. return NULL;
  533. }
  534. return kobj;
  535. }
  536. EXPORT_SYMBOL(get_disk);
  537. void put_disk(struct gendisk *disk)
  538. {
  539. if (disk)
  540. kobject_put(&disk->kobj);
  541. }
  542. EXPORT_SYMBOL(put_disk);
  543. void set_device_ro(struct block_device *bdev, int flag)
  544. {
  545. if (bdev->bd_contains != bdev)
  546. bdev->bd_part->policy = flag;
  547. else
  548. bdev->bd_disk->policy = flag;
  549. }
  550. EXPORT_SYMBOL(set_device_ro);
  551. void set_disk_ro(struct gendisk *disk, int flag)
  552. {
  553. int i;
  554. disk->policy = flag;
  555. for (i = 0; i < disk->minors - 1; i++)
  556. if (disk->part[i]) disk->part[i]->policy = flag;
  557. }
  558. EXPORT_SYMBOL(set_disk_ro);
  559. int bdev_read_only(struct block_device *bdev)
  560. {
  561. if (!bdev)
  562. return 0;
  563. else if (bdev->bd_contains != bdev)
  564. return bdev->bd_part->policy;
  565. else
  566. return bdev->bd_disk->policy;
  567. }
  568. EXPORT_SYMBOL(bdev_read_only);
  569. int invalidate_partition(struct gendisk *disk, int index)
  570. {
  571. int res = 0;
  572. struct block_device *bdev = bdget_disk(disk, index);
  573. if (bdev) {
  574. res = __invalidate_device(bdev, 1);
  575. bdput(bdev);
  576. }
  577. return res;
  578. }
  579. EXPORT_SYMBOL(invalidate_partition);