Эх сурвалжийг харах

Merge branch 'block' of git://brick.kernel.dk/data/git/linux-2.6-block

* 'block' of git://brick.kernel.dk/data/git/linux-2.6-block: (67 commits)
  [PATCH] blk_queue_start_tag() shared map race fix
  [PATCH] Update axboe@suse.de email address
  [PATCH] fix creating zero sized bio mempools in low memory system
  [PATCH] CONFIG_BLOCK: blk_congestion_wait() fix
  [PATCH] CONFIG_BLOCK internal.h cleanups
  [PATCH] BLOCK: Make USB storage depend on SCSI rather than selecting it [try #6]
  [PATCH] BLOCK: Make it possible to disable the block layer [try #6]
  [PATCH] BLOCK: Remove no-longer necessary linux/buffer_head.h inclusions [try #6]
  [PATCH] BLOCK: Remove no-longer necessary linux/mpage.h inclusions [try #6]
  [PATCH] BLOCK: Move the msdos device ioctl compat stuff to the msdos driver [try #6]
  [PATCH] BLOCK: Move the Ext3 device ioctl compat stuff to the Ext3 driver [try #6]
  [PATCH] BLOCK: Move the Ext2 device ioctl compat stuff to the Ext2 driver [try #6]
  [PATCH] BLOCK: Move the ReiserFS device ioctl compat stuff to the ReiserFS driver [try #6]
  [PATCH] BLOCK: Move common FS-specific ioctls to linux/fs.h [try #6]
  [PATCH] BLOCK: Move the loop device ioctl compat stuff to the loop driver [try #6]
  [PATCH] BLOCK: Move __invalidate_device() to block_dev.c [try #6]
  [PATCH] BLOCK: Dissociate generic_writepages() from mpage stuff [try #6]
  [PATCH] BLOCK: Remove dependence on existence of blockdev_superblock [try #6]
  [PATCH] BLOCK: Move extern declarations out of fs/*.c into header files [try #6]
  [PATCH] BLOCK: Don't call block_sync_page() from AFS [try #6]
  ...
Linus Torvalds 19 жил өмнө
parent
commit
56f29d7fe4
100 өөрчлөгдсөн 1620 нэмэгдсэн , 2345 устгасан
  1. 4 4
      MAINTAINERS
  2. 2 2
      arch/mips/kernel/signal_n32.c
  3. 0 2
      arch/um/drivers/ubd_kern.c
  4. 20 0
      block/Kconfig
  5. 3 0
      block/Kconfig.iosched
  6. 1 1
      block/Makefile
  7. 161 511
      block/as-iosched.c
  8. 9 17
      block/blktrace.c
  9. 281 584
      block/cfq-iosched.c
  10. 74 390
      block/deadline-iosched.c
  11. 263 52
      block/elevator.c
  12. 103 133
      block/ll_rw_blk.c
  13. 1 1
      block/noop-iosched.c
  14. 3 3
      block/scsi_ioctl.c
  15. 1 1
      drivers/block/DAC960.c
  16. 4 0
      drivers/block/Kconfig
  17. 0 1
      drivers/block/cciss.c
  18. 0 1
      drivers/block/cpqarray.c
  19. 2 2
      drivers/block/floppy.c
  20. 160 0
      drivers/block/loop.c
  21. 4 4
      drivers/block/nbd.c
  22. 2 4
      drivers/block/paride/pd.c
  23. 4 4
      drivers/block/pktcdvd.c
  24. 2 2
      drivers/block/swim3.c
  25. 2 2
      drivers/block/swim_iop.c
  26. 1 1
      drivers/block/xd.c
  27. 1 1
      drivers/cdrom/Kconfig
  28. 1 1
      drivers/cdrom/cdrom.c
  29. 3 1
      drivers/cdrom/cdu31a.c
  30. 1 0
      drivers/char/Kconfig
  31. 4 0
      drivers/char/random.c
  32. 0 1
      drivers/fc4/fc.c
  33. 4 0
      drivers/ide/Kconfig
  34. 35 34
      drivers/ide/ide-cd.c
  35. 3 2
      drivers/ide/ide-disk.c
  36. 1 1
      drivers/ide/ide-dma.c
  37. 8 9
      drivers/ide/ide-floppy.c
  38. 24 26
      drivers/ide/ide-io.c
  39. 3 2
      drivers/ide/ide-lib.c
  40. 7 7
      drivers/ide/ide-tape.c
  41. 4 4
      drivers/ide/ide-taskfile.c
  42. 4 4
      drivers/ide/ide.c
  43. 1 1
      drivers/ide/legacy/hd.c
  44. 3 0
      drivers/md/Kconfig
  45. 2 1
      drivers/md/dm-emc.c
  46. 1 1
      drivers/message/i2o/Kconfig
  47. 4 3
      drivers/message/i2o/i2o_block.c
  48. 1 1
      drivers/mmc/Kconfig
  49. 2 1
      drivers/mmc/Makefile
  50. 3 3
      drivers/mmc/mmc_queue.c
  51. 6 6
      drivers/mtd/Kconfig
  52. 1 1
      drivers/mtd/devices/Kconfig
  53. 1 1
      drivers/mtd/mtd_blkdevs.c
  54. 1 1
      drivers/s390/block/Kconfig
  55. 1 1
      drivers/s390/block/dasd_diag.c
  56. 1 1
      drivers/s390/block/dasd_eckd.c
  57. 1 1
      drivers/s390/block/dasd_fba.c
  58. 2 0
      drivers/scsi/Kconfig
  59. 2 2
      drivers/scsi/aic7xxx_old.c
  60. 8 8
      drivers/scsi/ide-scsi.c
  61. 3 3
      drivers/scsi/pluto.c
  62. 2 11
      drivers/scsi/scsi.c
  63. 19 18
      drivers/scsi/scsi_lib.c
  64. 2 3
      drivers/scsi/sd.c
  65. 1 1
      drivers/scsi/sun3_NCR5380.c
  66. 1 1
      drivers/scsi/sun3_scsi.c
  67. 1 1
      drivers/scsi/sun3_scsi_vme.c
  68. 2 3
      drivers/usb/storage/Kconfig
  69. 24 7
      fs/Kconfig
  70. 10 4
      fs/Makefile
  71. 0 2
      fs/afs/file.c
  72. 0 1
      fs/binfmt_elf.c
  73. 2 2
      fs/bio.c
  74. 23 0
      fs/block_dev.c
  75. 0 174
      fs/buffer.c
  76. 1 0
      fs/char_dev.c
  77. 0 1
      fs/cifs/file.c
  78. 0 1
      fs/cifs/inode.c
  79. 3 4
      fs/cifs/ioctl.c
  80. 3 7
      fs/compat.c
  81. 18 190
      fs/compat_ioctl.c
  82. 1 3
      fs/dcache.c
  83. 3 0
      fs/ext2/dir.c
  84. 1 0
      fs/ext2/ext2.h
  85. 6 0
      fs/ext2/file.c
  86. 32 0
      fs/ext2/ioctl.c
  87. 3 0
      fs/ext3/dir.c
  88. 3 0
      fs/ext3/file.c
  89. 3 2
      fs/ext3/inode.c
  90. 54 1
      fs/ext3/ioctl.c
  91. 2 1
      fs/ext3/namei.c
  92. 56 0
      fs/fat/dir.c
  93. 4 5
      fs/fs-writeback.c
  94. 2 6
      fs/hfsplus/hfsplus_fs.h
  95. 8 9
      fs/hfsplus/ioctl.c
  96. 0 21
      fs/inode.c
  97. 55 0
      fs/internal.h
  98. 12 7
      fs/ioprio.c
  99. 7 8
      fs/jfs/ioctl.c
  100. 2 0
      fs/mpage.c

+ 4 - 4
MAINTAINERS

@@ -501,7 +501,7 @@ S:	Maintained
 
 
 BLOCK LAYER
 BLOCK LAYER
 P:	Jens Axboe
 P:	Jens Axboe
-M:	axboe@suse.de
+M:	axboe@kernel.dk
 L:	linux-kernel@vger.kernel.org
 L:	linux-kernel@vger.kernel.org
 T:	git kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
 T:	git kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
 S:	Maintained
 S:	Maintained
@@ -1380,7 +1380,7 @@ S:	Maintained
 
 
 IDE/ATAPI CDROM DRIVER
 IDE/ATAPI CDROM DRIVER
 P:	Jens Axboe
 P:	Jens Axboe
-M:	axboe@suse.de
+M:	axboe@kernel.dk
 L:	linux-kernel@vger.kernel.org
 L:	linux-kernel@vger.kernel.org
 W:	http://www.kernel.dk
 W:	http://www.kernel.dk
 S:	Maintained
 S:	Maintained
@@ -2531,7 +2531,7 @@ S:	Maintained
 
 
 SCSI CDROM DRIVER
 SCSI CDROM DRIVER
 P:	Jens Axboe
 P:	Jens Axboe
-M:	axboe@suse.de
+M:	axboe@kernel.dk
 L:	linux-scsi@vger.kernel.org
 L:	linux-scsi@vger.kernel.org
 W:	http://www.kernel.dk
 W:	http://www.kernel.dk
 S:	Maintained
 S:	Maintained
@@ -2976,7 +2976,7 @@ S:	Maintained
 
 
 UNIFORM CDROM DRIVER
 UNIFORM CDROM DRIVER
 P:	Jens Axboe
 P:	Jens Axboe
-M:	axboe@suse.de
+M:	axboe@kernel.dk
 L:	linux-kernel@vger.kernel.org
 L:	linux-kernel@vger.kernel.org
 W:	http://www.kernel.dk
 W:	http://www.kernel.dk
 S:	Maintained
 S:	Maintained

+ 2 - 2
arch/mips/kernel/signal_n32.c

@@ -42,6 +42,8 @@
 
 
 #include "signal-common.h"
 #include "signal-common.h"
 
 
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
 /*
 /*
  * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
  * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
  */
  */
@@ -81,8 +83,6 @@ struct rt_sigframe_n32 {
 #endif
 #endif
 };
 };
 
 
-extern void sigset_from_compat (sigset_t *set, compat_sigset_t *compat);
-
 save_static_function(sysn32_rt_sigsuspend);
 save_static_function(sysn32_rt_sigsuspend);
 __attribute_used__ noinline static int
 __attribute_used__ noinline static int
 _sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs)
 _sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs)

+ 0 - 2
arch/um/drivers/ubd_kern.c

@@ -981,8 +981,6 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req)
 	__u64 offset;
 	__u64 offset;
 	int len;
 	int len;
 
 
-	if(req->rq_status == RQ_INACTIVE) return(1);
-
 	/* This should be impossible now */
 	/* This should be impossible now */
 	if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
 	if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
 		printk("Write attempted on readonly ubd device %s\n",
 		printk("Write attempted on readonly ubd device %s\n",

+ 20 - 0
block/Kconfig

@@ -1,6 +1,24 @@
 #
 #
 # Block layer core configuration
 # Block layer core configuration
 #
 #
+config BLOCK
+       bool "Enable the block layer"
+       default y
+       help
+	 This permits the block layer to be removed from the kernel if it's not
+	 needed (on some embedded devices for example).  If this option is
+	 disabled, then blockdev files will become unusable and some
+	 filesystems (such as ext3) will become unavailable.
+
+	 This option will also disable SCSI character devices and USB storage
+	 since they make use of various block layer definitions and
+	 facilities.
+
+	 Say Y here unless you know you really don't want to mount disks and
+	 suchlike.
+
+if BLOCK
+
 #XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64
 #XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64
 #for instance.
 #for instance.
 config LBD
 config LBD
@@ -33,4 +51,6 @@ config LSF
 
 
 	  If unsure, say Y.
 	  If unsure, say Y.
 
 
+endif
+
 source block/Kconfig.iosched
 source block/Kconfig.iosched

+ 3 - 0
block/Kconfig.iosched

@@ -1,3 +1,4 @@
+if BLOCK
 
 
 menu "IO Schedulers"
 menu "IO Schedulers"
 
 
@@ -67,3 +68,5 @@ config DEFAULT_IOSCHED
 	default "noop" if DEFAULT_NOOP
 	default "noop" if DEFAULT_NOOP
 
 
 endmenu
 endmenu
+
+endif

+ 1 - 1
block/Makefile

@@ -2,7 +2,7 @@
 # Makefile for the kernel block layer
 # Makefile for the kernel block layer
 #
 #
 
 
-obj-y	:= elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
+obj-$(CONFIG_BLOCK) := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
 
 
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 obj-$(CONFIG_IOSCHED_AS)	+= as-iosched.o
 obj-$(CONFIG_IOSCHED_AS)	+= as-iosched.o

+ 161 - 511
block/as-iosched.c

@@ -1,7 +1,7 @@
 /*
 /*
  *  Anticipatory & deadline i/o scheduler.
  *  Anticipatory & deadline i/o scheduler.
  *
  *
- *  Copyright (C) 2002 Jens Axboe <axboe@suse.de>
+ *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
  *                     Nick Piggin <nickpiggin@yahoo.com.au>
  *                     Nick Piggin <nickpiggin@yahoo.com.au>
  *
  *
  */
  */
@@ -14,7 +14,6 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
 #include <linux/compiler.h>
-#include <linux/hash.h>
 #include <linux/rbtree.h>
 #include <linux/rbtree.h>
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
 
 
@@ -93,9 +92,8 @@ struct as_data {
 	struct rb_root sort_list[2];
 	struct rb_root sort_list[2];
 	struct list_head fifo_list[2];
 	struct list_head fifo_list[2];
 
 
-	struct as_rq *next_arq[2];	/* next in sort order */
+	struct request *next_rq[2];	/* next in sort order */
 	sector_t last_sector[2];	/* last REQ_SYNC & REQ_ASYNC sectors */
 	sector_t last_sector[2];	/* last REQ_SYNC & REQ_ASYNC sectors */
-	struct hlist_head *hash;	/* request hash */
 
 
 	unsigned long exit_prob;	/* probability a task will exit while
 	unsigned long exit_prob;	/* probability a task will exit while
 					   being waited on */
 					   being waited on */
@@ -115,7 +113,6 @@ struct as_data {
 	int write_batch_count;		/* max # of reqs in a write batch */
 	int write_batch_count;		/* max # of reqs in a write batch */
 	int current_write_count;	/* how many requests left this batch */
 	int current_write_count;	/* how many requests left this batch */
 	int write_batch_idled;		/* has the write batch gone idle? */
 	int write_batch_idled;		/* has the write batch gone idle? */
-	mempool_t *arq_pool;
 
 
 	enum anticipation_status antic_status;
 	enum anticipation_status antic_status;
 	unsigned long antic_start;	/* jiffies: when it started */
 	unsigned long antic_start;	/* jiffies: when it started */
@@ -133,8 +130,6 @@ struct as_data {
 	unsigned long antic_expire;
 	unsigned long antic_expire;
 };
 };
 
 
-#define list_entry_fifo(ptr)	list_entry((ptr), struct as_rq, fifo)
-
 /*
 /*
  * per-request data.
  * per-request data.
  */
  */
@@ -150,40 +145,14 @@ enum arq_state {
 	AS_RQ_POSTSCHED,	/* when they shouldn't be */
 	AS_RQ_POSTSCHED,	/* when they shouldn't be */
 };
 };
 
 
-struct as_rq {
-	/*
-	 * rbtree index, key is the starting offset
-	 */
-	struct rb_node rb_node;
-	sector_t rb_key;
-
-	struct request *request;
-
-	struct io_context *io_context;	/* The submitting task */
-
-	/*
-	 * request hash, key is the ending offset (for back merge lookup)
-	 */
-	struct hlist_node hash;
-
-	/*
-	 * expire fifo
-	 */
-	struct list_head fifo;
-	unsigned long expires;
+#define RQ_IOC(rq)	((struct io_context *) (rq)->elevator_private)
+#define RQ_STATE(rq)	((enum arq_state)(rq)->elevator_private2)
+#define RQ_SET_STATE(rq, state)	((rq)->elevator_private2 = (void *) state)
 
 
-	unsigned int is_sync;
-	enum arq_state state;
-};
-
-#define RQ_DATA(rq)	((struct as_rq *) (rq)->elevator_private)
-
-static kmem_cache_t *arq_pool;
-
-static atomic_t ioc_count = ATOMIC_INIT(0);
+static DEFINE_PER_CPU(unsigned long, ioc_count);
 static struct completion *ioc_gone;
 static struct completion *ioc_gone;
 
 
-static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq);
+static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
 static void as_antic_stop(struct as_data *ad);
 static void as_antic_stop(struct as_data *ad);
 
 
 /*
 /*
@@ -194,7 +163,8 @@ static void as_antic_stop(struct as_data *ad);
 static void free_as_io_context(struct as_io_context *aic)
 static void free_as_io_context(struct as_io_context *aic)
 {
 {
 	kfree(aic);
 	kfree(aic);
-	if (atomic_dec_and_test(&ioc_count) && ioc_gone)
+	elv_ioc_count_dec(ioc_count);
+	if (ioc_gone && !elv_ioc_count_read(ioc_count))
 		complete(ioc_gone);
 		complete(ioc_gone);
 }
 }
 
 
@@ -230,7 +200,7 @@ static struct as_io_context *alloc_as_io_context(void)
 		ret->seek_total = 0;
 		ret->seek_total = 0;
 		ret->seek_samples = 0;
 		ret->seek_samples = 0;
 		ret->seek_mean = 0;
 		ret->seek_mean = 0;
-		atomic_inc(&ioc_count);
+		elv_ioc_count_inc(ioc_count);
 	}
 	}
 
 
 	return ret;
 	return ret;
@@ -240,9 +210,9 @@ static struct as_io_context *alloc_as_io_context(void)
  * If the current task has no AS IO context then create one and initialise it.
  * If the current task has no AS IO context then create one and initialise it.
  * Then take a ref on the task's io context and return it.
  * Then take a ref on the task's io context and return it.
  */
  */
-static struct io_context *as_get_io_context(void)
+static struct io_context *as_get_io_context(int node)
 {
 {
-	struct io_context *ioc = get_io_context(GFP_ATOMIC);
+	struct io_context *ioc = get_io_context(GFP_ATOMIC, node);
 	if (ioc && !ioc->aic) {
 	if (ioc && !ioc->aic) {
 		ioc->aic = alloc_as_io_context();
 		ioc->aic = alloc_as_io_context();
 		if (!ioc->aic) {
 		if (!ioc->aic) {
@@ -253,194 +223,43 @@ static struct io_context *as_get_io_context(void)
 	return ioc;
 	return ioc;
 }
 }
 
 
-static void as_put_io_context(struct as_rq *arq)
+static void as_put_io_context(struct request *rq)
 {
 {
 	struct as_io_context *aic;
 	struct as_io_context *aic;
 
 
-	if (unlikely(!arq->io_context))
+	if (unlikely(!RQ_IOC(rq)))
 		return;
 		return;
 
 
-	aic = arq->io_context->aic;
+	aic = RQ_IOC(rq)->aic;
 
 
-	if (arq->is_sync == REQ_SYNC && aic) {
+	if (rq_is_sync(rq) && aic) {
 		spin_lock(&aic->lock);
 		spin_lock(&aic->lock);
 		set_bit(AS_TASK_IORUNNING, &aic->state);
 		set_bit(AS_TASK_IORUNNING, &aic->state);
 		aic->last_end_request = jiffies;
 		aic->last_end_request = jiffies;
 		spin_unlock(&aic->lock);
 		spin_unlock(&aic->lock);
 	}
 	}
 
 
-	put_io_context(arq->io_context);
-}
-
-/*
- * the back merge hash support functions
- */
-static const int as_hash_shift = 6;
-#define AS_HASH_BLOCK(sec)	((sec) >> 3)
-#define AS_HASH_FN(sec)		(hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))
-#define AS_HASH_ENTRIES		(1 << as_hash_shift)
-#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
-
-static inline void __as_del_arq_hash(struct as_rq *arq)
-{
-	hlist_del_init(&arq->hash);
-}
-
-static inline void as_del_arq_hash(struct as_rq *arq)
-{
-	if (!hlist_unhashed(&arq->hash))
-		__as_del_arq_hash(arq);
-}
-
-static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
-{
-	struct request *rq = arq->request;
-
-	BUG_ON(!hlist_unhashed(&arq->hash));
-
-	hlist_add_head(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);
-}
-
-/*
- * move hot entry to front of chain
- */
-static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq)
-{
-	struct request *rq = arq->request;
-	struct hlist_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];
-
-	if (hlist_unhashed(&arq->hash)) {
-		WARN_ON(1);
-		return;
-	}
-
-	if (&arq->hash != head->first) {
-		hlist_del(&arq->hash);
-		hlist_add_head(&arq->hash, head);
-	}
-}
-
-static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
-{
-	struct hlist_head *hash_list = &ad->hash[AS_HASH_FN(offset)];
-	struct hlist_node *entry, *next;
-	struct as_rq *arq;
-
-	hlist_for_each_entry_safe(arq, entry, next, hash_list, hash) {
-		struct request *__rq = arq->request;
-
-		BUG_ON(hlist_unhashed(&arq->hash));
-
-		if (!rq_mergeable(__rq)) {
-			as_del_arq_hash(arq);
-			continue;
-		}
-
-		if (rq_hash_key(__rq) == offset)
-			return __rq;
-	}
-
-	return NULL;
+	put_io_context(RQ_IOC(rq));
 }
 }
 
 
 /*
 /*
  * rb tree support functions
  * rb tree support functions
  */
  */
-#define rb_entry_arq(node)	rb_entry((node), struct as_rq, rb_node)
-#define ARQ_RB_ROOT(ad, arq)	(&(ad)->sort_list[(arq)->is_sync])
-#define rq_rb_key(rq)		(rq)->sector
-
-/*
- * as_find_first_arq finds the first (lowest sector numbered) request
- * for the specified data_dir. Used to sweep back to the start of the disk
- * (1-way elevator) after we process the last (highest sector) request.
- */
-static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir)
-{
-	struct rb_node *n = ad->sort_list[data_dir].rb_node;
-
-	if (n == NULL)
-		return NULL;
-
-	for (;;) {
-		if (n->rb_left == NULL)
-			return rb_entry_arq(n);
-
-		n = n->rb_left;
-	}
-}
-
-/*
- * Add the request to the rb tree if it is unique.  If there is an alias (an
- * existing request against the same sector), which can happen when using
- * direct IO, then return the alias.
- */
-static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
-{
-	struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
-	struct rb_node *parent = NULL;
-	struct as_rq *__arq;
-	struct request *rq = arq->request;
-
-	arq->rb_key = rq_rb_key(rq);
-
-	while (*p) {
-		parent = *p;
-		__arq = rb_entry_arq(parent);
-
-		if (arq->rb_key < __arq->rb_key)
-			p = &(*p)->rb_left;
-		else if (arq->rb_key > __arq->rb_key)
-			p = &(*p)->rb_right;
-		else
-			return __arq;
-	}
-
-	rb_link_node(&arq->rb_node, parent, p);
-	rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
-
-	return NULL;
-}
+#define RQ_RB_ROOT(ad, rq)	(&(ad)->sort_list[rq_is_sync((rq))])
 
 
-static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
+static void as_add_rq_rb(struct as_data *ad, struct request *rq)
 {
 {
-	struct as_rq *alias;
+	struct request *alias;
 
 
-	while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) {
+	while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) {
 		as_move_to_dispatch(ad, alias);
 		as_move_to_dispatch(ad, alias);
 		as_antic_stop(ad);
 		as_antic_stop(ad);
 	}
 	}
 }
 }
 
 
-static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq)
-{
-	if (!RB_EMPTY_NODE(&arq->rb_node)) {
-		WARN_ON(1);
-		return;
-	}
-
-	rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
-	RB_CLEAR_NODE(&arq->rb_node);
-}
-
-static struct request *
-as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
+static inline void as_del_rq_rb(struct as_data *ad, struct request *rq)
 {
 {
-	struct rb_node *n = ad->sort_list[data_dir].rb_node;
-	struct as_rq *arq;
-
-	while (n) {
-		arq = rb_entry_arq(n);
-
-		if (sector < arq->rb_key)
-			n = n->rb_left;
-		else if (sector > arq->rb_key)
-			n = n->rb_right;
-		else
-			return arq->request;
-	}
-
-	return NULL;
+	elv_rb_del(RQ_RB_ROOT(ad, rq), rq);
 }
 }
 
 
 /*
 /*
@@ -458,26 +277,26 @@ as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
  * as_choose_req selects the preferred one of two requests of the same data_dir
  * as_choose_req selects the preferred one of two requests of the same data_dir
  * ignoring time - eg. timeouts, which is the job of as_dispatch_request
  * ignoring time - eg. timeouts, which is the job of as_dispatch_request
  */
  */
-static struct as_rq *
-as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2)
+static struct request *
+as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2)
 {
 {
 	int data_dir;
 	int data_dir;
 	sector_t last, s1, s2, d1, d2;
 	sector_t last, s1, s2, d1, d2;
 	int r1_wrap=0, r2_wrap=0;	/* requests are behind the disk head */
 	int r1_wrap=0, r2_wrap=0;	/* requests are behind the disk head */
 	const sector_t maxback = MAXBACK;
 	const sector_t maxback = MAXBACK;
 
 
-	if (arq1 == NULL || arq1 == arq2)
-		return arq2;
-	if (arq2 == NULL)
-		return arq1;
+	if (rq1 == NULL || rq1 == rq2)
+		return rq2;
+	if (rq2 == NULL)
+		return rq1;
 
 
-	data_dir = arq1->is_sync;
+	data_dir = rq_is_sync(rq1);
 
 
 	last = ad->last_sector[data_dir];
 	last = ad->last_sector[data_dir];
-	s1 = arq1->request->sector;
-	s2 = arq2->request->sector;
+	s1 = rq1->sector;
+	s2 = rq2->sector;
 
 
-	BUG_ON(data_dir != arq2->is_sync);
+	BUG_ON(data_dir != rq_is_sync(rq2));
 
 
 	/*
 	/*
 	 * Strict one way elevator _except_ in the case where we allow
 	 * Strict one way elevator _except_ in the case where we allow
@@ -504,61 +323,58 @@ as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2)
 
 
 	/* Found required data */
 	/* Found required data */
 	if (!r1_wrap && r2_wrap)
 	if (!r1_wrap && r2_wrap)
-		return arq1;
+		return rq1;
 	else if (!r2_wrap && r1_wrap)
 	else if (!r2_wrap && r1_wrap)
-		return arq2;
+		return rq2;
 	else if (r1_wrap && r2_wrap) {
 	else if (r1_wrap && r2_wrap) {
 		/* both behind the head */
 		/* both behind the head */
 		if (s1 <= s2)
 		if (s1 <= s2)
-			return arq1;
+			return rq1;
 		else
 		else
-			return arq2;
+			return rq2;
 	}
 	}
 
 
 	/* Both requests in front of the head */
 	/* Both requests in front of the head */
 	if (d1 < d2)
 	if (d1 < d2)
-		return arq1;
+		return rq1;
 	else if (d2 < d1)
 	else if (d2 < d1)
-		return arq2;
+		return rq2;
 	else {
 	else {
 		if (s1 >= s2)
 		if (s1 >= s2)
-			return arq1;
+			return rq1;
 		else
 		else
-			return arq2;
+			return rq2;
 	}
 	}
 }
 }
 
 
 /*
 /*
- * as_find_next_arq finds the next request after @prev in elevator order.
+ * as_find_next_rq finds the next request after @prev in elevator order.
  * this with as_choose_req form the basis for how the scheduler chooses
  * this with as_choose_req form the basis for how the scheduler chooses
  * what request to process next. Anticipation works on top of this.
  * what request to process next. Anticipation works on top of this.
  */
  */
-static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last)
+static struct request *
+as_find_next_rq(struct as_data *ad, struct request *last)
 {
 {
-	const int data_dir = last->is_sync;
-	struct as_rq *ret;
 	struct rb_node *rbnext = rb_next(&last->rb_node);
 	struct rb_node *rbnext = rb_next(&last->rb_node);
 	struct rb_node *rbprev = rb_prev(&last->rb_node);
 	struct rb_node *rbprev = rb_prev(&last->rb_node);
-	struct as_rq *arq_next, *arq_prev;
+	struct request *next = NULL, *prev = NULL;
 
 
-	BUG_ON(!RB_EMPTY_NODE(&last->rb_node));
+	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
 
 
 	if (rbprev)
 	if (rbprev)
-		arq_prev = rb_entry_arq(rbprev);
-	else
-		arq_prev = NULL;
+		prev = rb_entry_rq(rbprev);
 
 
 	if (rbnext)
 	if (rbnext)
-		arq_next = rb_entry_arq(rbnext);
+		next = rb_entry_rq(rbnext);
 	else {
 	else {
-		arq_next = as_find_first_arq(ad, data_dir);
-		if (arq_next == last)
-			arq_next = NULL;
-	}
+		const int data_dir = rq_is_sync(last);
 
 
-	ret = as_choose_req(ad,	arq_next, arq_prev);
+		rbnext = rb_first(&ad->sort_list[data_dir]);
+		if (rbnext && rbnext != &last->rb_node)
+			next = rb_entry_rq(rbnext);
+	}
 
 
-	return ret;
+	return as_choose_req(ad, next, prev);
 }
 }
 
 
 /*
 /*
@@ -712,8 +528,7 @@ static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic,
 static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
 static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
 				struct request *rq)
 				struct request *rq)
 {
 {
-	struct as_rq *arq = RQ_DATA(rq);
-	int data_dir = arq->is_sync;
+	int data_dir = rq_is_sync(rq);
 	unsigned long thinktime = 0;
 	unsigned long thinktime = 0;
 	sector_t seek_dist;
 	sector_t seek_dist;
 
 
@@ -752,11 +567,11 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
  * previous one issued.
  * previous one issued.
  */
  */
 static int as_close_req(struct as_data *ad, struct as_io_context *aic,
 static int as_close_req(struct as_data *ad, struct as_io_context *aic,
-				struct as_rq *arq)
+			struct request *rq)
 {
 {
 	unsigned long delay;	/* milliseconds */
 	unsigned long delay;	/* milliseconds */
 	sector_t last = ad->last_sector[ad->batch_data_dir];
 	sector_t last = ad->last_sector[ad->batch_data_dir];
-	sector_t next = arq->request->sector;
+	sector_t next = rq->sector;
 	sector_t delta; /* acceptable close offset (in sectors) */
 	sector_t delta; /* acceptable close offset (in sectors) */
 	sector_t s;
 	sector_t s;
 
 
@@ -813,7 +628,7 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic,
  *
  *
  * If this task has queued some other IO, do not enter enticipation.
  * If this task has queued some other IO, do not enter enticipation.
  */
  */
-static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
+static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
 {
 {
 	struct io_context *ioc;
 	struct io_context *ioc;
 	struct as_io_context *aic;
 	struct as_io_context *aic;
@@ -821,7 +636,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
 	ioc = ad->io_context;
 	ioc = ad->io_context;
 	BUG_ON(!ioc);
 	BUG_ON(!ioc);
 
 
-	if (arq && ioc == arq->io_context) {
+	if (rq && ioc == RQ_IOC(rq)) {
 		/* request from same process */
 		/* request from same process */
 		return 1;
 		return 1;
 	}
 	}
@@ -848,7 +663,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
 		return 1;
 		return 1;
 	}
 	}
 
 
-	if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) {
+	if (rq && rq_is_sync(rq) && as_close_req(ad, aic, rq)) {
 		/*
 		/*
 		 * Found a close request that is not one of ours.
 		 * Found a close request that is not one of ours.
 		 *
 		 *
@@ -864,7 +679,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
 			ad->exit_no_coop = (7*ad->exit_no_coop)/8;
 			ad->exit_no_coop = (7*ad->exit_no_coop)/8;
 		}
 		}
 
 
-		as_update_iohist(ad, aic, arq->request);
+		as_update_iohist(ad, aic, rq);
 		return 1;
 		return 1;
 	}
 	}
 
 
@@ -891,10 +706,10 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
 }
 }
 
 
 /*
 /*
- * as_can_anticipate indicates whether we should either run arq
+ * as_can_anticipate indicates whether we should either run rq
  * or keep anticipating a better request.
  * or keep anticipating a better request.
  */
  */
-static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
+static int as_can_anticipate(struct as_data *ad, struct request *rq)
 {
 {
 	if (!ad->io_context)
 	if (!ad->io_context)
 		/*
 		/*
@@ -908,7 +723,7 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
 		 */
 		 */
 		return 0;
 		return 0;
 
 
-	if (as_can_break_anticipation(ad, arq))
+	if (as_can_break_anticipation(ad, rq))
 		/*
 		/*
 		 * This request is a good candidate. Don't keep anticipating,
 		 * This request is a good candidate. Don't keep anticipating,
 		 * run it.
 		 * run it.
@@ -926,16 +741,16 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
 }
 }
 
 
 /*
 /*
- * as_update_arq must be called whenever a request (arq) is added to
+ * as_update_rq must be called whenever a request (rq) is added to
  * the sort_list. This function keeps caches up to date, and checks if the
  * the sort_list. This function keeps caches up to date, and checks if the
  * request might be one we are "anticipating"
  * request might be one we are "anticipating"
  */
  */
-static void as_update_arq(struct as_data *ad, struct as_rq *arq)
+static void as_update_rq(struct as_data *ad, struct request *rq)
 {
 {
-	const int data_dir = arq->is_sync;
+	const int data_dir = rq_is_sync(rq);
 
 
-	/* keep the next_arq cache up to date */
-	ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]);
+	/* keep the next_rq cache up to date */
+	ad->next_rq[data_dir] = as_choose_req(ad, rq, ad->next_rq[data_dir]);
 
 
 	/*
 	/*
 	 * have we been anticipating this request?
 	 * have we been anticipating this request?
@@ -944,7 +759,7 @@ static void as_update_arq(struct as_data *ad, struct as_rq *arq)
 	 */
 	 */
 	if (ad->antic_status == ANTIC_WAIT_REQ
 	if (ad->antic_status == ANTIC_WAIT_REQ
 			|| ad->antic_status == ANTIC_WAIT_NEXT) {
 			|| ad->antic_status == ANTIC_WAIT_NEXT) {
-		if (as_can_break_anticipation(ad, arq))
+		if (as_can_break_anticipation(ad, rq))
 			as_antic_stop(ad);
 			as_antic_stop(ad);
 	}
 	}
 }
 }
@@ -984,12 +799,11 @@ static void update_write_batch(struct as_data *ad)
 static void as_completed_request(request_queue_t *q, struct request *rq)
 static void as_completed_request(request_queue_t *q, struct request *rq)
 {
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_data *ad = q->elevator->elevator_data;
-	struct as_rq *arq = RQ_DATA(rq);
 
 
 	WARN_ON(!list_empty(&rq->queuelist));
 	WARN_ON(!list_empty(&rq->queuelist));
 
 
-	if (arq->state != AS_RQ_REMOVED) {
-		printk("arq->state %d\n", arq->state);
+	if (RQ_STATE(rq) != AS_RQ_REMOVED) {
+		printk("rq->state %d\n", RQ_STATE(rq));
 		WARN_ON(1);
 		WARN_ON(1);
 		goto out;
 		goto out;
 	}
 	}
@@ -1009,14 +823,14 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
 	 * actually serviced. This should help devices with big TCQ windows
 	 * actually serviced. This should help devices with big TCQ windows
 	 * and writeback caches
 	 * and writeback caches
 	 */
 	 */
-	if (ad->new_batch && ad->batch_data_dir == arq->is_sync) {
+	if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
 		update_write_batch(ad);
 		update_write_batch(ad);
 		ad->current_batch_expires = jiffies +
 		ad->current_batch_expires = jiffies +
 				ad->batch_expire[REQ_SYNC];
 				ad->batch_expire[REQ_SYNC];
 		ad->new_batch = 0;
 		ad->new_batch = 0;
 	}
 	}
 
 
-	if (ad->io_context == arq->io_context && ad->io_context) {
+	if (ad->io_context == RQ_IOC(rq) && ad->io_context) {
 		ad->antic_start = jiffies;
 		ad->antic_start = jiffies;
 		ad->ioc_finished = 1;
 		ad->ioc_finished = 1;
 		if (ad->antic_status == ANTIC_WAIT_REQ) {
 		if (ad->antic_status == ANTIC_WAIT_REQ) {
@@ -1028,9 +842,9 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
 		}
 		}
 	}
 	}
 
 
-	as_put_io_context(arq);
+	as_put_io_context(rq);
 out:
 out:
-	arq->state = AS_RQ_POSTSCHED;
+	RQ_SET_STATE(rq, AS_RQ_POSTSCHED);
 }
 }
 
 
 /*
 /*
@@ -1041,27 +855,27 @@ out:
  */
  */
 static void as_remove_queued_request(request_queue_t *q, struct request *rq)
 static void as_remove_queued_request(request_queue_t *q, struct request *rq)
 {
 {
-	struct as_rq *arq = RQ_DATA(rq);
-	const int data_dir = arq->is_sync;
+	const int data_dir = rq_is_sync(rq);
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_data *ad = q->elevator->elevator_data;
+	struct io_context *ioc;
 
 
-	WARN_ON(arq->state != AS_RQ_QUEUED);
+	WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
 
 
-	if (arq->io_context && arq->io_context->aic) {
-		BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued));
-		atomic_dec(&arq->io_context->aic->nr_queued);
+	ioc = RQ_IOC(rq);
+	if (ioc && ioc->aic) {
+		BUG_ON(!atomic_read(&ioc->aic->nr_queued));
+		atomic_dec(&ioc->aic->nr_queued);
 	}
 	}
 
 
 	/*
 	/*
-	 * Update the "next_arq" cache if we are about to remove its
+	 * Update the "next_rq" cache if we are about to remove its
 	 * entry
 	 * entry
 	 */
 	 */
-	if (ad->next_arq[data_dir] == arq)
-		ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
+	if (ad->next_rq[data_dir] == rq)
+		ad->next_rq[data_dir] = as_find_next_rq(ad, rq);
 
 
-	list_del_init(&arq->fifo);
-	as_del_arq_hash(arq);
-	as_del_arq_rb(ad, arq);
+	rq_fifo_clear(rq);
+	as_del_rq_rb(ad, rq);
 }
 }
 
 
 /*
 /*
@@ -1074,7 +888,7 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
  */
  */
 static int as_fifo_expired(struct as_data *ad, int adir)
 static int as_fifo_expired(struct as_data *ad, int adir)
 {
 {
-	struct as_rq *arq;
+	struct request *rq;
 	long delta_jif;
 	long delta_jif;
 
 
 	delta_jif = jiffies - ad->last_check_fifo[adir];
 	delta_jif = jiffies - ad->last_check_fifo[adir];
@@ -1088,9 +902,9 @@ static int as_fifo_expired(struct as_data *ad, int adir)
 	if (list_empty(&ad->fifo_list[adir]))
 	if (list_empty(&ad->fifo_list[adir]))
 		return 0;
 		return 0;
 
 
-	arq = list_entry_fifo(ad->fifo_list[adir].next);
+	rq = rq_entry_fifo(ad->fifo_list[adir].next);
 
 
-	return time_after(jiffies, arq->expires);
+	return time_after(jiffies, rq_fifo_time(rq));
 }
 }
 
 
 /*
 /*
@@ -1113,25 +927,25 @@ static inline int as_batch_expired(struct as_data *ad)
 /*
 /*
  * move an entry to dispatch queue
  * move an entry to dispatch queue
  */
  */
-static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
+static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
 {
 {
-	struct request *rq = arq->request;
-	const int data_dir = arq->is_sync;
+	const int data_dir = rq_is_sync(rq);
 
 
-	BUG_ON(!RB_EMPTY_NODE(&arq->rb_node));
+	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
 
 
 	as_antic_stop(ad);
 	as_antic_stop(ad);
 	ad->antic_status = ANTIC_OFF;
 	ad->antic_status = ANTIC_OFF;
 
 
 	/*
 	/*
 	 * This has to be set in order to be correctly updated by
 	 * This has to be set in order to be correctly updated by
-	 * as_find_next_arq
+	 * as_find_next_rq
 	 */
 	 */
 	ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
 	ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
 
 
 	if (data_dir == REQ_SYNC) {
 	if (data_dir == REQ_SYNC) {
+		struct io_context *ioc = RQ_IOC(rq);
 		/* In case we have to anticipate after this */
 		/* In case we have to anticipate after this */
-		copy_io_context(&ad->io_context, &arq->io_context);
+		copy_io_context(&ad->io_context, &ioc);
 	} else {
 	} else {
 		if (ad->io_context) {
 		if (ad->io_context) {
 			put_io_context(ad->io_context);
 			put_io_context(ad->io_context);
@@ -1143,19 +957,19 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 	}
 	}
 	ad->ioc_finished = 0;
 	ad->ioc_finished = 0;
 
 
-	ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
+	ad->next_rq[data_dir] = as_find_next_rq(ad, rq);
 
 
 	/*
 	/*
 	 * take it off the sort and fifo list, add to dispatch queue
 	 * take it off the sort and fifo list, add to dispatch queue
 	 */
 	 */
 	as_remove_queued_request(ad->q, rq);
 	as_remove_queued_request(ad->q, rq);
-	WARN_ON(arq->state != AS_RQ_QUEUED);
+	WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
 
 
 	elv_dispatch_sort(ad->q, rq);
 	elv_dispatch_sort(ad->q, rq);
 
 
-	arq->state = AS_RQ_DISPATCHED;
-	if (arq->io_context && arq->io_context->aic)
-		atomic_inc(&arq->io_context->aic->nr_dispatched);
+	RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
+	if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
+		atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
 	ad->nr_dispatched++;
 	ad->nr_dispatched++;
 }
 }
 
 
@@ -1167,9 +981,9 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 static int as_dispatch_request(request_queue_t *q, int force)
 static int as_dispatch_request(request_queue_t *q, int force)
 {
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_data *ad = q->elevator->elevator_data;
-	struct as_rq *arq;
 	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
 	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
 	const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
 	const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
+	struct request *rq;
 
 
 	if (unlikely(force)) {
 	if (unlikely(force)) {
 		/*
 		/*
@@ -1185,14 +999,14 @@ static int as_dispatch_request(request_queue_t *q, int force)
 		ad->changed_batch = 0;
 		ad->changed_batch = 0;
 		ad->new_batch = 0;
 		ad->new_batch = 0;
 
 
-		while (ad->next_arq[REQ_SYNC]) {
-			as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
+		while (ad->next_rq[REQ_SYNC]) {
+			as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]);
 			dispatched++;
 			dispatched++;
 		}
 		}
 		ad->last_check_fifo[REQ_SYNC] = jiffies;
 		ad->last_check_fifo[REQ_SYNC] = jiffies;
 
 
-		while (ad->next_arq[REQ_ASYNC]) {
-			as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
+		while (ad->next_rq[REQ_ASYNC]) {
+			as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]);
 			dispatched++;
 			dispatched++;
 		}
 		}
 		ad->last_check_fifo[REQ_ASYNC] = jiffies;
 		ad->last_check_fifo[REQ_ASYNC] = jiffies;
@@ -1216,19 +1030,19 @@ static int as_dispatch_request(request_queue_t *q, int force)
 		/*
 		/*
 		 * batch is still running or no reads or no writes
 		 * batch is still running or no reads or no writes
 		 */
 		 */
-		arq = ad->next_arq[ad->batch_data_dir];
+		rq = ad->next_rq[ad->batch_data_dir];
 
 
 		if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
 		if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
 			if (as_fifo_expired(ad, REQ_SYNC))
 			if (as_fifo_expired(ad, REQ_SYNC))
 				goto fifo_expired;
 				goto fifo_expired;
 
 
-			if (as_can_anticipate(ad, arq)) {
+			if (as_can_anticipate(ad, rq)) {
 				as_antic_waitreq(ad);
 				as_antic_waitreq(ad);
 				return 0;
 				return 0;
 			}
 			}
 		}
 		}
 
 
-		if (arq) {
+		if (rq) {
 			/* we have a "next request" */
 			/* we have a "next request" */
 			if (reads && !writes)
 			if (reads && !writes)
 				ad->current_batch_expires =
 				ad->current_batch_expires =
@@ -1256,7 +1070,7 @@ static int as_dispatch_request(request_queue_t *q, int force)
 			ad->changed_batch = 1;
 			ad->changed_batch = 1;
 		}
 		}
 		ad->batch_data_dir = REQ_SYNC;
 		ad->batch_data_dir = REQ_SYNC;
-		arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
+		rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next);
 		ad->last_check_fifo[ad->batch_data_dir] = jiffies;
 		ad->last_check_fifo[ad->batch_data_dir] = jiffies;
 		goto dispatch_request;
 		goto dispatch_request;
 	}
 	}
@@ -1282,7 +1096,7 @@ dispatch_writes:
 		ad->batch_data_dir = REQ_ASYNC;
 		ad->batch_data_dir = REQ_ASYNC;
 		ad->current_write_count = ad->write_batch_count;
 		ad->current_write_count = ad->write_batch_count;
 		ad->write_batch_idled = 0;
 		ad->write_batch_idled = 0;
-		arq = ad->next_arq[ad->batch_data_dir];
+		rq = ad->next_rq[ad->batch_data_dir];
 		goto dispatch_request;
 		goto dispatch_request;
 	}
 	}
 
 
@@ -1296,8 +1110,7 @@ dispatch_request:
 
 
 	if (as_fifo_expired(ad, ad->batch_data_dir)) {
 	if (as_fifo_expired(ad, ad->batch_data_dir)) {
 fifo_expired:
 fifo_expired:
-		arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
-		BUG_ON(arq == NULL);
+		rq = rq_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
 	}
 	}
 
 
 	if (ad->changed_batch) {
 	if (ad->changed_batch) {
@@ -1316,70 +1129,58 @@ fifo_expired:
 	}
 	}
 
 
 	/*
 	/*
-	 * arq is the selected appropriate request.
+	 * rq is the selected appropriate request.
 	 */
 	 */
-	as_move_to_dispatch(ad, arq);
+	as_move_to_dispatch(ad, rq);
 
 
 	return 1;
 	return 1;
 }
 }
 
 
 /*
 /*
- * add arq to rbtree and fifo
+ * add rq to rbtree and fifo
  */
  */
 static void as_add_request(request_queue_t *q, struct request *rq)
 static void as_add_request(request_queue_t *q, struct request *rq)
 {
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_data *ad = q->elevator->elevator_data;
-	struct as_rq *arq = RQ_DATA(rq);
 	int data_dir;
 	int data_dir;
 
 
-	arq->state = AS_RQ_NEW;
+	RQ_SET_STATE(rq, AS_RQ_NEW);
 
 
-	if (rq_data_dir(arq->request) == READ
-			|| (arq->request->flags & REQ_RW_SYNC))
-		arq->is_sync = 1;
-	else
-		arq->is_sync = 0;
-	data_dir = arq->is_sync;
+	data_dir = rq_is_sync(rq);
 
 
-	arq->io_context = as_get_io_context();
+	rq->elevator_private = as_get_io_context(q->node);
 
 
-	if (arq->io_context) {
-		as_update_iohist(ad, arq->io_context->aic, arq->request);
-		atomic_inc(&arq->io_context->aic->nr_queued);
+	if (RQ_IOC(rq)) {
+		as_update_iohist(ad, RQ_IOC(rq)->aic, rq);
+		atomic_inc(&RQ_IOC(rq)->aic->nr_queued);
 	}
 	}
 
 
-	as_add_arq_rb(ad, arq);
-	if (rq_mergeable(arq->request))
-		as_add_arq_hash(ad, arq);
+	as_add_rq_rb(ad, rq);
 
 
 	/*
 	/*
 	 * set expire time (only used for reads) and add to fifo list
 	 * set expire time (only used for reads) and add to fifo list
 	 */
 	 */
-	arq->expires = jiffies + ad->fifo_expire[data_dir];
-	list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
+	rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]);
+	list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]);
 
 
-	as_update_arq(ad, arq); /* keep state machine up to date */
-	arq->state = AS_RQ_QUEUED;
+	as_update_rq(ad, rq); /* keep state machine up to date */
+	RQ_SET_STATE(rq, AS_RQ_QUEUED);
 }
 }
 
 
 static void as_activate_request(request_queue_t *q, struct request *rq)
 static void as_activate_request(request_queue_t *q, struct request *rq)
 {
 {
-	struct as_rq *arq = RQ_DATA(rq);
-
-	WARN_ON(arq->state != AS_RQ_DISPATCHED);
-	arq->state = AS_RQ_REMOVED;
-	if (arq->io_context && arq->io_context->aic)
-		atomic_dec(&arq->io_context->aic->nr_dispatched);
+	WARN_ON(RQ_STATE(rq) != AS_RQ_DISPATCHED);
+	RQ_SET_STATE(rq, AS_RQ_REMOVED);
+	if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
+		atomic_dec(&RQ_IOC(rq)->aic->nr_dispatched);
 }
 }
 
 
 static void as_deactivate_request(request_queue_t *q, struct request *rq)
 static void as_deactivate_request(request_queue_t *q, struct request *rq)
 {
 {
-	struct as_rq *arq = RQ_DATA(rq);
-
-	WARN_ON(arq->state != AS_RQ_REMOVED);
-	arq->state = AS_RQ_DISPATCHED;
-	if (arq->io_context && arq->io_context->aic)
-		atomic_inc(&arq->io_context->aic->nr_dispatched);
+	WARN_ON(RQ_STATE(rq) != AS_RQ_REMOVED);
+	RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
+	if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
+		atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
 }
 }
 
 
 /*
 /*
@@ -1396,93 +1197,35 @@ static int as_queue_empty(request_queue_t *q)
 		&& list_empty(&ad->fifo_list[REQ_SYNC]);
 		&& list_empty(&ad->fifo_list[REQ_SYNC]);
 }
 }
 
 
-static struct request *as_former_request(request_queue_t *q,
-					struct request *rq)
-{
-	struct as_rq *arq = RQ_DATA(rq);
-	struct rb_node *rbprev = rb_prev(&arq->rb_node);
-	struct request *ret = NULL;
-
-	if (rbprev)
-		ret = rb_entry_arq(rbprev)->request;
-
-	return ret;
-}
-
-static struct request *as_latter_request(request_queue_t *q,
-					struct request *rq)
-{
-	struct as_rq *arq = RQ_DATA(rq);
-	struct rb_node *rbnext = rb_next(&arq->rb_node);
-	struct request *ret = NULL;
-
-	if (rbnext)
-		ret = rb_entry_arq(rbnext)->request;
-
-	return ret;
-}
-
 static int
 static int
 as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_data *ad = q->elevator->elevator_data;
 	sector_t rb_key = bio->bi_sector + bio_sectors(bio);
 	sector_t rb_key = bio->bi_sector + bio_sectors(bio);
 	struct request *__rq;
 	struct request *__rq;
-	int ret;
-
-	/*
-	 * see if the merge hash can satisfy a back merge
-	 */
-	__rq = as_find_arq_hash(ad, bio->bi_sector);
-	if (__rq) {
-		BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
-		if (elv_rq_merge_ok(__rq, bio)) {
-			ret = ELEVATOR_BACK_MERGE;
-			goto out;
-		}
-	}
 
 
 	/*
 	/*
 	 * check for front merge
 	 * check for front merge
 	 */
 	 */
-	__rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio));
-	if (__rq) {
-		BUG_ON(rb_key != rq_rb_key(__rq));
-
-		if (elv_rq_merge_ok(__rq, bio)) {
-			ret = ELEVATOR_FRONT_MERGE;
-			goto out;
-		}
+	__rq = elv_rb_find(&ad->sort_list[bio_data_dir(bio)], rb_key);
+	if (__rq && elv_rq_merge_ok(__rq, bio)) {
+		*req = __rq;
+		return ELEVATOR_FRONT_MERGE;
 	}
 	}
 
 
 	return ELEVATOR_NO_MERGE;
 	return ELEVATOR_NO_MERGE;
-out:
-	if (ret) {
-		if (rq_mergeable(__rq))
-			as_hot_arq_hash(ad, RQ_DATA(__rq));
-	}
-	*req = __rq;
-	return ret;
 }
 }
 
 
-static void as_merged_request(request_queue_t *q, struct request *req)
+static void as_merged_request(request_queue_t *q, struct request *req, int type)
 {
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_data *ad = q->elevator->elevator_data;
-	struct as_rq *arq = RQ_DATA(req);
-
-	/*
-	 * hash always needs to be repositioned, key is end sector
-	 */
-	as_del_arq_hash(arq);
-	as_add_arq_hash(ad, arq);
 
 
 	/*
 	/*
 	 * if the merge was a front merge, we need to reposition request
 	 * if the merge was a front merge, we need to reposition request
 	 */
 	 */
-	if (rq_rb_key(req) != arq->rb_key) {
-		as_del_arq_rb(ad, arq);
-		as_add_arq_rb(ad, arq);
+	if (type == ELEVATOR_FRONT_MERGE) {
+		as_del_rq_rb(ad, req);
+		as_add_rq_rb(ad, req);
 		/*
 		/*
 		 * Note! At this stage of this and the next function, our next
 		 * Note! At this stage of this and the next function, our next
 		 * request may not be optimal - eg the request may have "grown"
 		 * request may not be optimal - eg the request may have "grown"
@@ -1494,38 +1237,22 @@ static void as_merged_request(request_queue_t *q, struct request *req)
 static void as_merged_requests(request_queue_t *q, struct request *req,
 static void as_merged_requests(request_queue_t *q, struct request *req,
 			 	struct request *next)
 			 	struct request *next)
 {
 {
-	struct as_data *ad = q->elevator->elevator_data;
-	struct as_rq *arq = RQ_DATA(req);
-	struct as_rq *anext = RQ_DATA(next);
-
-	BUG_ON(!arq);
-	BUG_ON(!anext);
-
 	/*
 	/*
-	 * reposition arq (this is the merged request) in hash, and in rbtree
-	 * in case of a front merge
+	 * if next expires before rq, assign its expire time to arq
+	 * and move into next position (next will be deleted) in fifo
 	 */
 	 */
-	as_del_arq_hash(arq);
-	as_add_arq_hash(ad, arq);
-
-	if (rq_rb_key(req) != arq->rb_key) {
-		as_del_arq_rb(ad, arq);
-		as_add_arq_rb(ad, arq);
-	}
+	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
+		if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
+			struct io_context *rioc = RQ_IOC(req);
+			struct io_context *nioc = RQ_IOC(next);
 
 
-	/*
-	 * if anext expires before arq, assign its expire time to arq
-	 * and move into anext position (anext will be deleted) in fifo
-	 */
-	if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) {
-		if (time_before(anext->expires, arq->expires)) {
-			list_move(&arq->fifo, &anext->fifo);
-			arq->expires = anext->expires;
+			list_move(&req->queuelist, &next->queuelist);
+			rq_set_fifo_time(req, rq_fifo_time(next));
 			/*
 			/*
 			 * Don't copy here but swap, because when anext is
 			 * Don't copy here but swap, because when anext is
 			 * removed below, it must contain the unused context
 			 * removed below, it must contain the unused context
 			 */
 			 */
-			swap_io_context(&arq->io_context, &anext->io_context);
+			swap_io_context(&rioc, &nioc);
 		}
 		}
 	}
 	}
 
 
@@ -1533,9 +1260,9 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
 	 * kill knowledge of next, this one is a goner
 	 * kill knowledge of next, this one is a goner
 	 */
 	 */
 	as_remove_queued_request(q, next);
 	as_remove_queued_request(q, next);
-	as_put_io_context(anext);
+	as_put_io_context(next);
 
 
-	anext->state = AS_RQ_MERGED;
+	RQ_SET_STATE(next, AS_RQ_MERGED);
 }
 }
 
 
 /*
 /*
@@ -1553,61 +1280,18 @@ static void as_work_handler(void *data)
 	unsigned long flags;
 	unsigned long flags;
 
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (!as_queue_empty(q))
-		q->request_fn(q);
+	blk_start_queueing(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 }
 
 
-static void as_put_request(request_queue_t *q, struct request *rq)
-{
-	struct as_data *ad = q->elevator->elevator_data;
-	struct as_rq *arq = RQ_DATA(rq);
-
-	if (!arq) {
-		WARN_ON(1);
-		return;
-	}
-
-	if (unlikely(arq->state != AS_RQ_POSTSCHED &&
-		     arq->state != AS_RQ_PRESCHED &&
-		     arq->state != AS_RQ_MERGED)) {
-		printk("arq->state %d\n", arq->state);
-		WARN_ON(1);
-	}
-
-	mempool_free(arq, ad->arq_pool);
-	rq->elevator_private = NULL;
-}
-
-static int as_set_request(request_queue_t *q, struct request *rq,
-			  struct bio *bio, gfp_t gfp_mask)
-{
-	struct as_data *ad = q->elevator->elevator_data;
-	struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
-
-	if (arq) {
-		memset(arq, 0, sizeof(*arq));
-		RB_CLEAR_NODE(&arq->rb_node);
-		arq->request = rq;
-		arq->state = AS_RQ_PRESCHED;
-		arq->io_context = NULL;
-		INIT_HLIST_NODE(&arq->hash);
-		INIT_LIST_HEAD(&arq->fifo);
-		rq->elevator_private = arq;
-		return 0;
-	}
-
-	return 1;
-}
-
-static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
+static int as_may_queue(request_queue_t *q, int rw)
 {
 {
 	int ret = ELV_MQUEUE_MAY;
 	int ret = ELV_MQUEUE_MAY;
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_data *ad = q->elevator->elevator_data;
 	struct io_context *ioc;
 	struct io_context *ioc;
 	if (ad->antic_status == ANTIC_WAIT_REQ ||
 	if (ad->antic_status == ANTIC_WAIT_REQ ||
 			ad->antic_status == ANTIC_WAIT_NEXT) {
 			ad->antic_status == ANTIC_WAIT_NEXT) {
-		ioc = as_get_io_context();
+		ioc = as_get_io_context(q->node);
 		if (ad->io_context == ioc)
 		if (ad->io_context == ioc)
 			ret = ELV_MQUEUE_MUST;
 			ret = ELV_MQUEUE_MUST;
 		put_io_context(ioc);
 		put_io_context(ioc);
@@ -1626,23 +1310,16 @@ static void as_exit_queue(elevator_t *e)
 	BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
 	BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
 	BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
 	BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
 
 
-	mempool_destroy(ad->arq_pool);
 	put_io_context(ad->io_context);
 	put_io_context(ad->io_context);
-	kfree(ad->hash);
 	kfree(ad);
 	kfree(ad);
 }
 }
 
 
 /*
 /*
- * initialize elevator private data (as_data), and alloc a arq for
- * each request on the free lists
+ * initialize elevator private data (as_data).
  */
  */
 static void *as_init_queue(request_queue_t *q, elevator_t *e)
 static void *as_init_queue(request_queue_t *q, elevator_t *e)
 {
 {
 	struct as_data *ad;
 	struct as_data *ad;
-	int i;
-
-	if (!arq_pool)
-		return NULL;
 
 
 	ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
 	ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
 	if (!ad)
 	if (!ad)
@@ -1651,30 +1328,12 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e)
 
 
 	ad->q = q; /* Identify what queue the data belongs to */
 	ad->q = q; /* Identify what queue the data belongs to */
 
 
-	ad->hash = kmalloc_node(sizeof(struct hlist_head)*AS_HASH_ENTRIES,
-				GFP_KERNEL, q->node);
-	if (!ad->hash) {
-		kfree(ad);
-		return NULL;
-	}
-
-	ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
-				mempool_free_slab, arq_pool, q->node);
-	if (!ad->arq_pool) {
-		kfree(ad->hash);
-		kfree(ad);
-		return NULL;
-	}
-
 	/* anticipatory scheduling helpers */
 	/* anticipatory scheduling helpers */
 	ad->antic_timer.function = as_antic_timeout;
 	ad->antic_timer.function = as_antic_timeout;
 	ad->antic_timer.data = (unsigned long)q;
 	ad->antic_timer.data = (unsigned long)q;
 	init_timer(&ad->antic_timer);
 	init_timer(&ad->antic_timer);
 	INIT_WORK(&ad->antic_work, as_work_handler, q);
 	INIT_WORK(&ad->antic_work, as_work_handler, q);
 
 
-	for (i = 0; i < AS_HASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&ad->hash[i]);
-
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
 	ad->sort_list[REQ_SYNC] = RB_ROOT;
 	ad->sort_list[REQ_SYNC] = RB_ROOT;
@@ -1787,10 +1446,8 @@ static struct elevator_type iosched_as = {
 		.elevator_deactivate_req_fn = 	as_deactivate_request,
 		.elevator_deactivate_req_fn = 	as_deactivate_request,
 		.elevator_queue_empty_fn =	as_queue_empty,
 		.elevator_queue_empty_fn =	as_queue_empty,
 		.elevator_completed_req_fn =	as_completed_request,
 		.elevator_completed_req_fn =	as_completed_request,
-		.elevator_former_req_fn =	as_former_request,
-		.elevator_latter_req_fn =	as_latter_request,
-		.elevator_set_req_fn =		as_set_request,
-		.elevator_put_req_fn =		as_put_request,
+		.elevator_former_req_fn =	elv_rb_former_request,
+		.elevator_latter_req_fn =	elv_rb_latter_request,
 		.elevator_may_queue_fn =	as_may_queue,
 		.elevator_may_queue_fn =	as_may_queue,
 		.elevator_init_fn =		as_init_queue,
 		.elevator_init_fn =		as_init_queue,
 		.elevator_exit_fn =		as_exit_queue,
 		.elevator_exit_fn =		as_exit_queue,
@@ -1806,11 +1463,6 @@ static int __init as_init(void)
 {
 {
 	int ret;
 	int ret;
 
 
-	arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq),
-				     0, 0, NULL, NULL);
-	if (!arq_pool)
-		return -ENOMEM;
-
 	ret = elv_register(&iosched_as);
 	ret = elv_register(&iosched_as);
 	if (!ret) {
 	if (!ret) {
 		/*
 		/*
@@ -1822,7 +1474,6 @@ static int __init as_init(void)
 		return 0;
 		return 0;
 	}
 	}
 
 
-	kmem_cache_destroy(arq_pool);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -1833,10 +1484,9 @@ static void __exit as_exit(void)
 	ioc_gone = &all_gone;
 	ioc_gone = &all_gone;
 	/* ioc_gone's update must be visible before reading ioc_count */
 	/* ioc_gone's update must be visible before reading ioc_count */
 	smp_wmb();
 	smp_wmb();
-	if (atomic_read(&ioc_count))
+	if (elv_ioc_count_read(ioc_count))
 		wait_for_completion(ioc_gone);
 		wait_for_completion(ioc_gone);
 	synchronize_rcu();
 	synchronize_rcu();
-	kmem_cache_destroy(arq_pool);
 }
 }
 
 
 module_init(as_init);
 module_init(as_init);

+ 9 - 17
block/blktrace.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2006 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -69,7 +69,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK
 /*
 /*
  * Bio action bits of interest
  * Bio action bits of interest
  */
  */
-static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD) };
+static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) };
 
 
 /*
 /*
  * More could be added as needed, taking care to increment the decrementer
  * More could be added as needed, taking care to increment the decrementer
@@ -81,6 +81,8 @@ static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_AC
 	(((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
 	(((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
 #define trace_ahead_bit(rw)	\
 #define trace_ahead_bit(rw)	\
 	(((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
 	(((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
+#define trace_meta_bit(rw)	\
+	(((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3))
 
 
 /*
 /*
  * The worker for the various blk_add_trace*() types. Fills out a
  * The worker for the various blk_add_trace*() types. Fills out a
@@ -103,6 +105,7 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 	what |= bio_act[trace_barrier_bit(rw)];
 	what |= bio_act[trace_barrier_bit(rw)];
 	what |= bio_act[trace_sync_bit(rw)];
 	what |= bio_act[trace_sync_bit(rw)];
 	what |= bio_act[trace_ahead_bit(rw)];
 	what |= bio_act[trace_ahead_bit(rw)];
+	what |= bio_act[trace_meta_bit(rw)];
 
 
 	pid = tsk->pid;
 	pid = tsk->pid;
 	if (unlikely(act_log_check(bt, what, sector, pid)))
 	if (unlikely(act_log_check(bt, what, sector, pid)))
@@ -473,6 +476,9 @@ static void blk_check_time(unsigned long long *t)
 	*t -= (a + b) / 2;
 	*t -= (a + b) / 2;
 }
 }
 
 
+/*
+ * calibrate our inter-CPU timings
+ */
 static void blk_trace_check_cpu_time(void *data)
 static void blk_trace_check_cpu_time(void *data)
 {
 {
 	unsigned long long *t;
 	unsigned long long *t;
@@ -490,20 +496,6 @@ static void blk_trace_check_cpu_time(void *data)
 	put_cpu();
 	put_cpu();
 }
 }
 
 
-/*
- * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU
- * timings
- */
-static void blk_trace_calibrate_offsets(void)
-{
-	unsigned long flags;
-
-	smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1);
-	local_irq_save(flags);
-	blk_trace_check_cpu_time(NULL);
-	local_irq_restore(flags);
-}
-
 static void blk_trace_set_ht_offsets(void)
 static void blk_trace_set_ht_offsets(void)
 {
 {
 #if defined(CONFIG_SCHED_SMT)
 #if defined(CONFIG_SCHED_SMT)
@@ -532,7 +524,7 @@ static void blk_trace_set_ht_offsets(void)
 static __init int blk_trace_init(void)
 static __init int blk_trace_init(void)
 {
 {
 	mutex_init(&blk_tree_mutex);
 	mutex_init(&blk_tree_mutex);
-	blk_trace_calibrate_offsets();
+	on_each_cpu(blk_trace_check_cpu_time, NULL, 1, 1);
 	blk_trace_set_ht_offsets();
 	blk_trace_set_ht_offsets();
 
 
 	return 0;
 	return 0;

+ 281 - 584
block/cfq-iosched.c

@@ -4,7 +4,7 @@
  *  Based on ideas from a previously unfinished io
  *  Based on ideas from a previously unfinished io
  *  scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
  *  scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
  *
  *
- *  Copyright (C) 2003 Jens Axboe <axboe@suse.de>
+ *  Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
  */
  */
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/blkdev.h>
 #include <linux/blkdev.h>
@@ -17,7 +17,6 @@
  * tunables
  * tunables
  */
  */
 static const int cfq_quantum = 4;		/* max queue in one round of service */
 static const int cfq_quantum = 4;		/* max queue in one round of service */
-static const int cfq_queued = 8;		/* minimum rq allocate limit per-queue*/
 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
 static const int cfq_back_max = 16 * 1024;	/* maximum backwards seek, in KiB */
 static const int cfq_back_max = 16 * 1024;	/* maximum backwards seek, in KiB */
 static const int cfq_back_penalty = 2;		/* penalty of a backwards seek */
 static const int cfq_back_penalty = 2;		/* penalty of a backwards seek */
@@ -32,8 +31,6 @@ static int cfq_slice_idle = HZ / 125;
 
 
 #define CFQ_KEY_ASYNC		(0)
 #define CFQ_KEY_ASYNC		(0)
 
 
-static DEFINE_SPINLOCK(cfq_exit_lock);
-
 /*
 /*
  * for the hash of cfqq inside the cfqd
  * for the hash of cfqq inside the cfqd
  */
  */
@@ -41,37 +38,19 @@ static DEFINE_SPINLOCK(cfq_exit_lock);
 #define CFQ_QHASH_ENTRIES	(1 << CFQ_QHASH_SHIFT)
 #define CFQ_QHASH_ENTRIES	(1 << CFQ_QHASH_SHIFT)
 #define list_entry_qhash(entry)	hlist_entry((entry), struct cfq_queue, cfq_hash)
 #define list_entry_qhash(entry)	hlist_entry((entry), struct cfq_queue, cfq_hash)
 
 
-/*
- * for the hash of crq inside the cfqq
- */
-#define CFQ_MHASH_SHIFT		6
-#define CFQ_MHASH_BLOCK(sec)	((sec) >> 3)
-#define CFQ_MHASH_ENTRIES	(1 << CFQ_MHASH_SHIFT)
-#define CFQ_MHASH_FN(sec)	hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
-#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
-#define list_entry_hash(ptr)	hlist_entry((ptr), struct cfq_rq, hash)
-
 #define list_entry_cfqq(ptr)	list_entry((ptr), struct cfq_queue, cfq_list)
 #define list_entry_cfqq(ptr)	list_entry((ptr), struct cfq_queue, cfq_list)
-#define list_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
 
 
-#define RQ_DATA(rq)		(rq)->elevator_private
+#define RQ_CIC(rq)		((struct cfq_io_context*)(rq)->elevator_private)
+#define RQ_CFQQ(rq)		((rq)->elevator_private2)
 
 
-/*
- * rb-tree defines
- */
-#define rb_entry_crq(node)	rb_entry((node), struct cfq_rq, rb_node)
-#define rq_rb_key(rq)		(rq)->sector
-
-static kmem_cache_t *crq_pool;
 static kmem_cache_t *cfq_pool;
 static kmem_cache_t *cfq_pool;
 static kmem_cache_t *cfq_ioc_pool;
 static kmem_cache_t *cfq_ioc_pool;
 
 
-static atomic_t ioc_count = ATOMIC_INIT(0);
+static DEFINE_PER_CPU(unsigned long, ioc_count);
 static struct completion *ioc_gone;
 static struct completion *ioc_gone;
 
 
 #define CFQ_PRIO_LISTS		IOPRIO_BE_NR
 #define CFQ_PRIO_LISTS		IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
-#define cfq_class_be(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
 #define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
 #define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
 
 
 #define ASYNC			(0)
 #define ASYNC			(0)
@@ -102,29 +81,14 @@ struct cfq_data {
 	struct list_head idle_rr;
 	struct list_head idle_rr;
 	unsigned int busy_queues;
 	unsigned int busy_queues;
 
 
-	/*
-	 * non-ordered list of empty cfqq's
-	 */
-	struct list_head empty_list;
-
 	/*
 	/*
 	 * cfqq lookup hash
 	 * cfqq lookup hash
 	 */
 	 */
 	struct hlist_head *cfq_hash;
 	struct hlist_head *cfq_hash;
 
 
-	/*
-	 * global crq hash for all queues
-	 */
-	struct hlist_head *crq_hash;
-
-	mempool_t *crq_pool;
-
 	int rq_in_driver;
 	int rq_in_driver;
 	int hw_tag;
 	int hw_tag;
 
 
-	/*
-	 * schedule slice state info
-	 */
 	/*
 	/*
 	 * idle window management
 	 * idle window management
 	 */
 	 */
@@ -141,13 +105,10 @@ struct cfq_data {
 	sector_t last_sector;
 	sector_t last_sector;
 	unsigned long last_end_request;
 	unsigned long last_end_request;
 
 
-	unsigned int rq_starved;
-
 	/*
 	/*
 	 * tunables, see top of file
 	 * tunables, see top of file
 	 */
 	 */
 	unsigned int cfq_quantum;
 	unsigned int cfq_quantum;
-	unsigned int cfq_queued;
 	unsigned int cfq_fifo_expire[2];
 	unsigned int cfq_fifo_expire[2];
 	unsigned int cfq_back_penalty;
 	unsigned int cfq_back_penalty;
 	unsigned int cfq_back_max;
 	unsigned int cfq_back_max;
@@ -170,23 +131,24 @@ struct cfq_queue {
 	struct hlist_node cfq_hash;
 	struct hlist_node cfq_hash;
 	/* hash key */
 	/* hash key */
 	unsigned int key;
 	unsigned int key;
-	/* on either rr or empty list of cfqd */
+	/* member of the rr/busy/cur/idle cfqd list */
 	struct list_head cfq_list;
 	struct list_head cfq_list;
 	/* sorted list of pending requests */
 	/* sorted list of pending requests */
 	struct rb_root sort_list;
 	struct rb_root sort_list;
 	/* if fifo isn't expired, next request to serve */
 	/* if fifo isn't expired, next request to serve */
-	struct cfq_rq *next_crq;
+	struct request *next_rq;
 	/* requests queued in sort_list */
 	/* requests queued in sort_list */
 	int queued[2];
 	int queued[2];
 	/* currently allocated requests */
 	/* currently allocated requests */
 	int allocated[2];
 	int allocated[2];
+	/* pending metadata requests */
+	int meta_pending;
 	/* fifo list of requests in sort_list */
 	/* fifo list of requests in sort_list */
 	struct list_head fifo;
 	struct list_head fifo;
 
 
 	unsigned long slice_start;
 	unsigned long slice_start;
 	unsigned long slice_end;
 	unsigned long slice_end;
 	unsigned long slice_left;
 	unsigned long slice_left;
-	unsigned long service_last;
 
 
 	/* number of requests that are on the dispatch list */
 	/* number of requests that are on the dispatch list */
 	int on_dispatch[2];
 	int on_dispatch[2];
@@ -199,18 +161,6 @@ struct cfq_queue {
 	unsigned int flags;
 	unsigned int flags;
 };
 };
 
 
-struct cfq_rq {
-	struct rb_node rb_node;
-	sector_t rb_key;
-	struct request *request;
-	struct hlist_node hash;
-
-	struct cfq_queue *cfq_queue;
-	struct cfq_io_context *io_context;
-
-	unsigned int crq_flags;
-};
-
 enum cfqq_state_flags {
 enum cfqq_state_flags {
 	CFQ_CFQQ_FLAG_on_rr = 0,
 	CFQ_CFQQ_FLAG_on_rr = 0,
 	CFQ_CFQQ_FLAG_wait_request,
 	CFQ_CFQQ_FLAG_wait_request,
@@ -220,6 +170,7 @@ enum cfqq_state_flags {
 	CFQ_CFQQ_FLAG_fifo_expire,
 	CFQ_CFQQ_FLAG_fifo_expire,
 	CFQ_CFQQ_FLAG_idle_window,
 	CFQ_CFQQ_FLAG_idle_window,
 	CFQ_CFQQ_FLAG_prio_changed,
 	CFQ_CFQQ_FLAG_prio_changed,
+	CFQ_CFQQ_FLAG_queue_new,
 };
 };
 
 
 #define CFQ_CFQQ_FNS(name)						\
 #define CFQ_CFQQ_FNS(name)						\
@@ -244,69 +195,13 @@ CFQ_CFQQ_FNS(must_dispatch);
 CFQ_CFQQ_FNS(fifo_expire);
 CFQ_CFQQ_FNS(fifo_expire);
 CFQ_CFQQ_FNS(idle_window);
 CFQ_CFQQ_FNS(idle_window);
 CFQ_CFQQ_FNS(prio_changed);
 CFQ_CFQQ_FNS(prio_changed);
+CFQ_CFQQ_FNS(queue_new);
 #undef CFQ_CFQQ_FNS
 #undef CFQ_CFQQ_FNS
 
 
-enum cfq_rq_state_flags {
-	CFQ_CRQ_FLAG_is_sync = 0,
-};
-
-#define CFQ_CRQ_FNS(name)						\
-static inline void cfq_mark_crq_##name(struct cfq_rq *crq)		\
-{									\
-	crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name);			\
-}									\
-static inline void cfq_clear_crq_##name(struct cfq_rq *crq)		\
-{									\
-	crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name);			\
-}									\
-static inline int cfq_crq_##name(const struct cfq_rq *crq)		\
-{									\
-	return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0;	\
-}
-
-CFQ_CRQ_FNS(is_sync);
-#undef CFQ_CRQ_FNS
-
 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
-static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
+static void cfq_dispatch_insert(request_queue_t *, struct request *);
 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask);
 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask);
 
 
-/*
- * lots of deadline iosched dupes, can be abstracted later...
- */
-static inline void cfq_del_crq_hash(struct cfq_rq *crq)
-{
-	hlist_del_init(&crq->hash);
-}
-
-static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
-{
-	const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
-
-	hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
-}
-
-static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
-{
-	struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
-	struct hlist_node *entry, *next;
-
-	hlist_for_each_safe(entry, next, hash_list) {
-		struct cfq_rq *crq = list_entry_hash(entry);
-		struct request *__rq = crq->request;
-
-		if (!rq_mergeable(__rq)) {
-			cfq_del_crq_hash(crq);
-			continue;
-		}
-
-		if (rq_hash_key(__rq) == offset)
-			return __rq;
-	}
-
-	return NULL;
-}
-
 /*
 /*
  * scheduler run of queue, if there are requests pending and no one in the
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
  * driver that will restart queueing
@@ -333,12 +228,12 @@ static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
 }
 }
 
 
 /*
 /*
- * Lifted from AS - choose which of crq1 and crq2 that is best served now.
+ * Lifted from AS - choose which of rq1 and rq2 that is best served now.
  * We choose the request that is closest to the head right now. Distance
  * We choose the request that is closest to the head right now. Distance
  * behind the head is penalized and only allowed to a certain extent.
  * behind the head is penalized and only allowed to a certain extent.
  */
  */
-static struct cfq_rq *
-cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
+static struct request *
+cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
 {
 {
 	sector_t last, s1, s2, d1 = 0, d2 = 0;
 	sector_t last, s1, s2, d1 = 0, d2 = 0;
 	unsigned long back_max;
 	unsigned long back_max;
@@ -346,18 +241,22 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
 #define CFQ_RQ2_WRAP	0x02 /* request 2 wraps */
 #define CFQ_RQ2_WRAP	0x02 /* request 2 wraps */
 	unsigned wrap = 0; /* bit mask: requests behind the disk head? */
 	unsigned wrap = 0; /* bit mask: requests behind the disk head? */
 
 
-	if (crq1 == NULL || crq1 == crq2)
-		return crq2;
-	if (crq2 == NULL)
-		return crq1;
+	if (rq1 == NULL || rq1 == rq2)
+		return rq2;
+	if (rq2 == NULL)
+		return rq1;
 
 
-	if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
-		return crq1;
-	else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
-		return crq2;
+	if (rq_is_sync(rq1) && !rq_is_sync(rq2))
+		return rq1;
+	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
+		return rq2;
+	if (rq_is_meta(rq1) && !rq_is_meta(rq2))
+		return rq1;
+	else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
+		return rq2;
 
 
-	s1 = crq1->request->sector;
-	s2 = crq2->request->sector;
+	s1 = rq1->sector;
+	s2 = rq2->sector;
 
 
 	last = cfqd->last_sector;
 	last = cfqd->last_sector;
 
 
@@ -392,23 +291,23 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
 	 * check two variables for all permutations: --> faster!
 	 * check two variables for all permutations: --> faster!
 	 */
 	 */
 	switch (wrap) {
 	switch (wrap) {
-	case 0: /* common case for CFQ: crq1 and crq2 not wrapped */
+	case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
 		if (d1 < d2)
 		if (d1 < d2)
-			return crq1;
+			return rq1;
 		else if (d2 < d1)
 		else if (d2 < d1)
-			return crq2;
+			return rq2;
 		else {
 		else {
 			if (s1 >= s2)
 			if (s1 >= s2)
-				return crq1;
+				return rq1;
 			else
 			else
-				return crq2;
+				return rq2;
 		}
 		}
 
 
 	case CFQ_RQ2_WRAP:
 	case CFQ_RQ2_WRAP:
-		return crq1;
+		return rq1;
 	case CFQ_RQ1_WRAP:
 	case CFQ_RQ1_WRAP:
-		return crq2;
-	case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both crqs wrapped */
+		return rq2;
+	case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
 	default:
 	default:
 		/*
 		/*
 		 * Since both rqs are wrapped,
 		 * Since both rqs are wrapped,
@@ -417,50 +316,43 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
 		 * since back seek takes more time than forward.
 		 * since back seek takes more time than forward.
 		 */
 		 */
 		if (s1 <= s2)
 		if (s1 <= s2)
-			return crq1;
+			return rq1;
 		else
 		else
-			return crq2;
+			return rq2;
 	}
 	}
 }
 }
 
 
 /*
 /*
  * would be nice to take fifo expire time into account as well
  * would be nice to take fifo expire time into account as well
  */
  */
-static struct cfq_rq *
-cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-		  struct cfq_rq *last)
+static struct request *
+cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+		  struct request *last)
 {
 {
-	struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
-	struct rb_node *rbnext, *rbprev;
-
-	if (!(rbnext = rb_next(&last->rb_node))) {
-		rbnext = rb_first(&cfqq->sort_list);
-		if (rbnext == &last->rb_node)
-			rbnext = NULL;
-	}
+	struct rb_node *rbnext = rb_next(&last->rb_node);
+	struct rb_node *rbprev = rb_prev(&last->rb_node);
+	struct request *next = NULL, *prev = NULL;
 
 
-	rbprev = rb_prev(&last->rb_node);
+	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
 
 
 	if (rbprev)
 	if (rbprev)
-		crq_prev = rb_entry_crq(rbprev);
-	if (rbnext)
-		crq_next = rb_entry_crq(rbnext);
-
-	return cfq_choose_req(cfqd, crq_next, crq_prev);
-}
+		prev = rb_entry_rq(rbprev);
 
 
-static void cfq_update_next_crq(struct cfq_rq *crq)
-{
-	struct cfq_queue *cfqq = crq->cfq_queue;
+	if (rbnext)
+		next = rb_entry_rq(rbnext);
+	else {
+		rbnext = rb_first(&cfqq->sort_list);
+		if (rbnext && rbnext != &last->rb_node)
+			next = rb_entry_rq(rbnext);
+	}
 
 
-	if (cfqq->next_crq == crq)
-		cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
+	return cfq_choose_req(cfqd, next, prev);
 }
 }
 
 
 static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
 static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
 {
 {
 	struct cfq_data *cfqd = cfqq->cfqd;
 	struct cfq_data *cfqd = cfqq->cfqd;
-	struct list_head *list, *entry;
+	struct list_head *list;
 
 
 	BUG_ON(!cfq_cfqq_on_rr(cfqq));
 	BUG_ON(!cfq_cfqq_on_rr(cfqq));
 
 
@@ -485,31 +377,26 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
 	}
 	}
 
 
 	/*
 	/*
-	 * if queue was preempted, just add to front to be fair. busy_rr
-	 * isn't sorted, but insert at the back for fairness.
+	 * If this queue was preempted or is new (never been serviced), let
+	 * it be added first for fairness but beind other new queues.
+	 * Otherwise, just add to the back  of the list.
 	 */
 	 */
-	if (preempted || list == &cfqd->busy_rr) {
-		if (preempted)
-			list = list->prev;
+	if (preempted || cfq_cfqq_queue_new(cfqq)) {
+		struct list_head *n = list;
+		struct cfq_queue *__cfqq;
 
 
-		list_add_tail(&cfqq->cfq_list, list);
-		return;
-	}
+		while (n->next != list) {
+			__cfqq = list_entry_cfqq(n->next);
+			if (!cfq_cfqq_queue_new(__cfqq))
+				break;
 
 
-	/*
-	 * sort by when queue was last serviced
-	 */
-	entry = list;
-	while ((entry = entry->prev) != list) {
-		struct cfq_queue *__cfqq = list_entry_cfqq(entry);
+			n = n->next;
+		}
 
 
-		if (!__cfqq->service_last)
-			break;
-		if (time_before(__cfqq->service_last, cfqq->service_last))
-			break;
+		list = n;
 	}
 	}
 
 
-	list_add(&cfqq->cfq_list, entry);
+	list_add_tail(&cfqq->cfq_list, list);
 }
 }
 
 
 /*
 /*
@@ -531,7 +418,7 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 {
 	BUG_ON(!cfq_cfqq_on_rr(cfqq));
 	BUG_ON(!cfq_cfqq_on_rr(cfqq));
 	cfq_clear_cfqq_on_rr(cfqq);
 	cfq_clear_cfqq_on_rr(cfqq);
-	list_move(&cfqq->cfq_list, &cfqd->empty_list);
+	list_del_init(&cfqq->cfq_list);
 
 
 	BUG_ON(!cfqd->busy_queues);
 	BUG_ON(!cfqd->busy_queues);
 	cfqd->busy_queues--;
 	cfqd->busy_queues--;
@@ -540,81 +427,43 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 /*
 /*
  * rb tree support functions
  * rb tree support functions
  */
  */
-static inline void cfq_del_crq_rb(struct cfq_rq *crq)
+static inline void cfq_del_rq_rb(struct request *rq)
 {
 {
-	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct cfq_queue *cfqq = RQ_CFQQ(rq);
 	struct cfq_data *cfqd = cfqq->cfqd;
 	struct cfq_data *cfqd = cfqq->cfqd;
-	const int sync = cfq_crq_is_sync(crq);
+	const int sync = rq_is_sync(rq);
 
 
 	BUG_ON(!cfqq->queued[sync]);
 	BUG_ON(!cfqq->queued[sync]);
 	cfqq->queued[sync]--;
 	cfqq->queued[sync]--;
 
 
-	cfq_update_next_crq(crq);
-
-	rb_erase(&crq->rb_node, &cfqq->sort_list);
+	elv_rb_del(&cfqq->sort_list, rq);
 
 
 	if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
 	if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
 		cfq_del_cfqq_rr(cfqd, cfqq);
 		cfq_del_cfqq_rr(cfqd, cfqq);
 }
 }
 
 
-static struct cfq_rq *
-__cfq_add_crq_rb(struct cfq_rq *crq)
+static void cfq_add_rq_rb(struct request *rq)
 {
 {
-	struct rb_node **p = &crq->cfq_queue->sort_list.rb_node;
-	struct rb_node *parent = NULL;
-	struct cfq_rq *__crq;
-
-	while (*p) {
-		parent = *p;
-		__crq = rb_entry_crq(parent);
-
-		if (crq->rb_key < __crq->rb_key)
-			p = &(*p)->rb_left;
-		else if (crq->rb_key > __crq->rb_key)
-			p = &(*p)->rb_right;
-		else
-			return __crq;
-	}
-
-	rb_link_node(&crq->rb_node, parent, p);
-	return NULL;
-}
-
-static void cfq_add_crq_rb(struct cfq_rq *crq)
-{
-	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct cfq_queue *cfqq = RQ_CFQQ(rq);
 	struct cfq_data *cfqd = cfqq->cfqd;
 	struct cfq_data *cfqd = cfqq->cfqd;
-	struct request *rq = crq->request;
-	struct cfq_rq *__alias;
+	struct request *__alias;
 
 
-	crq->rb_key = rq_rb_key(rq);
-	cfqq->queued[cfq_crq_is_sync(crq)]++;
+	cfqq->queued[rq_is_sync(rq)]++;
 
 
 	/*
 	/*
 	 * looks a little odd, but the first insert might return an alias.
 	 * looks a little odd, but the first insert might return an alias.
 	 * if that happens, put the alias on the dispatch list
 	 * if that happens, put the alias on the dispatch list
 	 */
 	 */
-	while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
+	while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
 		cfq_dispatch_insert(cfqd->queue, __alias);
 		cfq_dispatch_insert(cfqd->queue, __alias);
-
-	rb_insert_color(&crq->rb_node, &cfqq->sort_list);
-
-	if (!cfq_cfqq_on_rr(cfqq))
-		cfq_add_cfqq_rr(cfqd, cfqq);
-
-	/*
-	 * check if this request is a better next-serve candidate
-	 */
-	cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
 }
 }
 
 
 static inline void
 static inline void
-cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
+cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
 {
 {
-	rb_erase(&crq->rb_node, &cfqq->sort_list);
-	cfqq->queued[cfq_crq_is_sync(crq)]--;
-
-	cfq_add_crq_rb(crq);
+	elv_rb_del(&cfqq->sort_list, rq);
+	cfqq->queued[rq_is_sync(rq)]--;
+	cfq_add_rq_rb(rq);
 }
 }
 
 
 static struct request *
 static struct request *
@@ -623,27 +472,14 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
 	pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio));
 	pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio));
 	struct cfq_queue *cfqq;
 	struct cfq_queue *cfqq;
-	struct rb_node *n;
-	sector_t sector;
 
 
 	cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio);
 	cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio);
-	if (!cfqq)
-		goto out;
-
-	sector = bio->bi_sector + bio_sectors(bio);
-	n = cfqq->sort_list.rb_node;
-	while (n) {
-		struct cfq_rq *crq = rb_entry_crq(n);
+	if (cfqq) {
+		sector_t sector = bio->bi_sector + bio_sectors(bio);
 
 
-		if (sector < crq->rb_key)
-			n = n->rb_left;
-		else if (sector > crq->rb_key)
-			n = n->rb_right;
-		else
-			return crq->request;
+		return elv_rb_find(&cfqq->sort_list, sector);
 	}
 	}
 
 
-out:
 	return NULL;
 	return NULL;
 }
 }
 
 
@@ -673,11 +509,18 @@ static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
 
 
 static void cfq_remove_request(struct request *rq)
 static void cfq_remove_request(struct request *rq)
 {
 {
-	struct cfq_rq *crq = RQ_DATA(rq);
+	struct cfq_queue *cfqq = RQ_CFQQ(rq);
+
+	if (cfqq->next_rq == rq)
+		cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
 
 
 	list_del_init(&rq->queuelist);
 	list_del_init(&rq->queuelist);
-	cfq_del_crq_rb(crq);
-	cfq_del_crq_hash(crq);
+	cfq_del_rq_rb(rq);
+
+	if (rq_is_meta(rq)) {
+		WARN_ON(!cfqq->meta_pending);
+		cfqq->meta_pending--;
+	}
 }
 }
 
 
 static int
 static int
@@ -685,39 +528,23 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct request *__rq;
 	struct request *__rq;
-	int ret;
-
-	__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
-	if (__rq && elv_rq_merge_ok(__rq, bio)) {
-		ret = ELEVATOR_BACK_MERGE;
-		goto out;
-	}
 
 
 	__rq = cfq_find_rq_fmerge(cfqd, bio);
 	__rq = cfq_find_rq_fmerge(cfqd, bio);
 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
-		ret = ELEVATOR_FRONT_MERGE;
-		goto out;
+		*req = __rq;
+		return ELEVATOR_FRONT_MERGE;
 	}
 	}
 
 
 	return ELEVATOR_NO_MERGE;
 	return ELEVATOR_NO_MERGE;
-out:
-	*req = __rq;
-	return ret;
 }
 }
 
 
-static void cfq_merged_request(request_queue_t *q, struct request *req)
+static void cfq_merged_request(request_queue_t *q, struct request *req,
+			       int type)
 {
 {
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_rq *crq = RQ_DATA(req);
-
-	cfq_del_crq_hash(crq);
-	cfq_add_crq_hash(cfqd, crq);
-
-	if (rq_rb_key(req) != crq->rb_key) {
-		struct cfq_queue *cfqq = crq->cfq_queue;
+	if (type == ELEVATOR_FRONT_MERGE) {
+		struct cfq_queue *cfqq = RQ_CFQQ(req);
 
 
-		cfq_update_next_crq(crq);
-		cfq_reposition_crq_rb(cfqq, crq);
+		cfq_reposition_rq_rb(cfqq, req);
 	}
 	}
 }
 }
 
 
@@ -725,8 +552,6 @@ static void
 cfq_merged_requests(request_queue_t *q, struct request *rq,
 cfq_merged_requests(request_queue_t *q, struct request *rq,
 		    struct request *next)
 		    struct request *next)
 {
 {
-	cfq_merged_request(q, rq);
-
 	/*
 	/*
 	 * reposition in fifo if next is older than rq
 	 * reposition in fifo if next is older than rq
 	 */
 	 */
@@ -768,13 +593,12 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	if (cfq_cfqq_wait_request(cfqq))
 	if (cfq_cfqq_wait_request(cfqq))
 		del_timer(&cfqd->idle_slice_timer);
 		del_timer(&cfqd->idle_slice_timer);
 
 
-	if (!preempted && !cfq_cfqq_dispatched(cfqq)) {
-		cfqq->service_last = now;
+	if (!preempted && !cfq_cfqq_dispatched(cfqq))
 		cfq_schedule_dispatch(cfqd);
 		cfq_schedule_dispatch(cfqd);
-	}
 
 
 	cfq_clear_cfqq_must_dispatch(cfqq);
 	cfq_clear_cfqq_must_dispatch(cfqq);
 	cfq_clear_cfqq_wait_request(cfqq);
 	cfq_clear_cfqq_wait_request(cfqq);
+	cfq_clear_cfqq_queue_new(cfqq);
 
 
 	/*
 	/*
 	 * store what was left of this slice, if the queue idled out
 	 * store what was left of this slice, if the queue idled out
@@ -868,26 +692,25 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
 {
 {
 	struct cfq_queue *cfqq = NULL;
 	struct cfq_queue *cfqq = NULL;
 
 
-	/*
-	 * if current list is non-empty, grab first entry. if it is empty,
-	 * get next prio level and grab first entry then if any are spliced
-	 */
-	if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1)
+	if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) {
+		/*
+		 * if current list is non-empty, grab first entry. if it is
+		 * empty, get next prio level and grab first entry then if any
+		 * are spliced
+		 */
 		cfqq = list_entry_cfqq(cfqd->cur_rr.next);
 		cfqq = list_entry_cfqq(cfqd->cur_rr.next);
-
-	/*
-	 * If no new queues are available, check if the busy list has some
-	 * before falling back to idle io.
-	 */
-	if (!cfqq && !list_empty(&cfqd->busy_rr))
+	} else if (!list_empty(&cfqd->busy_rr)) {
+		/*
+		 * If no new queues are available, check if the busy list has
+		 * some before falling back to idle io.
+		 */
 		cfqq = list_entry_cfqq(cfqd->busy_rr.next);
 		cfqq = list_entry_cfqq(cfqd->busy_rr.next);
-
-	/*
-	 * if we have idle queues and no rt or be queues had pending
-	 * requests, either allow immediate service if the grace period
-	 * has passed or arm the idle grace timer
-	 */
-	if (!cfqq && !list_empty(&cfqd->idle_rr)) {
+	} else if (!list_empty(&cfqd->idle_rr)) {
+		/*
+		 * if we have idle queues and no rt or be queues had pending
+		 * requests, either allow immediate service if the grace period
+		 * has passed or arm the idle grace timer
+		 */
 		unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
 		unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
 
 
 		if (time_after_eq(jiffies, end))
 		if (time_after_eq(jiffies, end))
@@ -942,16 +765,14 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	return 1;
 	return 1;
 }
 }
 
 
-static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
+static void cfq_dispatch_insert(request_queue_t *q, struct request *rq)
 {
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_queue *cfqq = crq->cfq_queue;
-	struct request *rq;
+	struct cfq_queue *cfqq = RQ_CFQQ(rq);
 
 
-	cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
-	cfq_remove_request(crq->request);
-	cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
-	elv_dispatch_sort(q, crq->request);
+	cfq_remove_request(rq);
+	cfqq->on_dispatch[rq_is_sync(rq)]++;
+	elv_dispatch_sort(q, rq);
 
 
 	rq = list_entry(q->queue_head.prev, struct request, queuelist);
 	rq = list_entry(q->queue_head.prev, struct request, queuelist);
 	cfqd->last_sector = rq->sector + rq->nr_sectors;
 	cfqd->last_sector = rq->sector + rq->nr_sectors;
@@ -960,24 +781,23 @@ static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
 /*
 /*
  * return expired entry, or NULL to just start from scratch in rbtree
  * return expired entry, or NULL to just start from scratch in rbtree
  */
  */
-static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
+static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq)
 {
 {
 	struct cfq_data *cfqd = cfqq->cfqd;
 	struct cfq_data *cfqd = cfqq->cfqd;
 	struct request *rq;
 	struct request *rq;
-	struct cfq_rq *crq;
+	int fifo;
 
 
 	if (cfq_cfqq_fifo_expire(cfqq))
 	if (cfq_cfqq_fifo_expire(cfqq))
 		return NULL;
 		return NULL;
+	if (list_empty(&cfqq->fifo))
+		return NULL;
 
 
-	if (!list_empty(&cfqq->fifo)) {
-		int fifo = cfq_cfqq_class_sync(cfqq);
+	fifo = cfq_cfqq_class_sync(cfqq);
+	rq = rq_entry_fifo(cfqq->fifo.next);
 
 
-		crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next));
-		rq = crq->request;
-		if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
-			cfq_mark_cfqq_fifo_expire(cfqq);
-			return crq;
-		}
+	if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
+		cfq_mark_cfqq_fifo_expire(cfqq);
+		return rq;
 	}
 	}
 
 
 	return NULL;
 	return NULL;
@@ -1063,25 +883,25 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
 	BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
 
 
 	do {
 	do {
-		struct cfq_rq *crq;
+		struct request *rq;
 
 
 		/*
 		/*
 		 * follow expired path, else get first next available
 		 * follow expired path, else get first next available
 		 */
 		 */
-		if ((crq = cfq_check_fifo(cfqq)) == NULL)
-			crq = cfqq->next_crq;
+		if ((rq = cfq_check_fifo(cfqq)) == NULL)
+			rq = cfqq->next_rq;
 
 
 		/*
 		/*
 		 * finally, insert request into driver dispatch list
 		 * finally, insert request into driver dispatch list
 		 */
 		 */
-		cfq_dispatch_insert(cfqd->queue, crq);
+		cfq_dispatch_insert(cfqd->queue, rq);
 
 
 		cfqd->dispatch_slice++;
 		cfqd->dispatch_slice++;
 		dispatched++;
 		dispatched++;
 
 
 		if (!cfqd->active_cic) {
 		if (!cfqd->active_cic) {
-			atomic_inc(&crq->io_context->ioc->refcount);
-			cfqd->active_cic = crq->io_context;
+			atomic_inc(&RQ_CIC(rq)->ioc->refcount);
+			cfqd->active_cic = RQ_CIC(rq);
 		}
 		}
 
 
 		if (RB_EMPTY_ROOT(&cfqq->sort_list))
 		if (RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -1112,13 +932,12 @@ static int
 cfq_forced_dispatch_cfqqs(struct list_head *list)
 cfq_forced_dispatch_cfqqs(struct list_head *list)
 {
 {
 	struct cfq_queue *cfqq, *next;
 	struct cfq_queue *cfqq, *next;
-	struct cfq_rq *crq;
 	int dispatched;
 	int dispatched;
 
 
 	dispatched = 0;
 	dispatched = 0;
 	list_for_each_entry_safe(cfqq, next, list, cfq_list) {
 	list_for_each_entry_safe(cfqq, next, list, cfq_list) {
-		while ((crq = cfqq->next_crq)) {
-			cfq_dispatch_insert(cfqq->cfqd->queue, crq);
+		while (cfqq->next_rq) {
+			cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
 			dispatched++;
 			dispatched++;
 		}
 		}
 		BUG_ON(!list_empty(&cfqq->fifo));
 		BUG_ON(!list_empty(&cfqq->fifo));
@@ -1194,8 +1013,8 @@ cfq_dispatch_requests(request_queue_t *q, int force)
 }
 }
 
 
 /*
 /*
- * task holds one reference to the queue, dropped when task exits. each crq
- * in-flight on this queue also holds a reference, dropped when crq is freed.
+ * task holds one reference to the queue, dropped when task exits. each rq
+ * in-flight on this queue also holds a reference, dropped when rq is freed.
  *
  *
  * queue lock must be held here.
  * queue lock must be held here.
  */
  */
@@ -1223,7 +1042,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 	kmem_cache_free(cfq_pool, cfqq);
 	kmem_cache_free(cfq_pool, cfqq);
 }
 }
 
 
-static inline struct cfq_queue *
+static struct cfq_queue *
 __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
 __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
 		    const int hashval)
 		    const int hashval)
 {
 {
@@ -1260,62 +1079,63 @@ static void cfq_free_io_context(struct io_context *ioc)
 		freed++;
 		freed++;
 	}
 	}
 
 
-	if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone)
+	elv_ioc_count_mod(ioc_count, -freed);
+
+	if (ioc_gone && !elv_ioc_count_read(ioc_count))
 		complete(ioc_gone);
 		complete(ioc_gone);
 }
 }
 
 
-static void cfq_trim(struct io_context *ioc)
+static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 {
-	ioc->set_ioprio = NULL;
-	cfq_free_io_context(ioc);
+	if (unlikely(cfqq == cfqd->active_queue))
+		__cfq_slice_expired(cfqd, cfqq, 0);
+
+	cfq_put_queue(cfqq);
 }
 }
 
 
-/*
- * Called with interrupts disabled
- */
-static void cfq_exit_single_io_context(struct cfq_io_context *cic)
+static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
+					 struct cfq_io_context *cic)
 {
 {
-	struct cfq_data *cfqd = cic->key;
-	request_queue_t *q;
-
-	if (!cfqd)
-		return;
-
-	q = cfqd->queue;
-
-	WARN_ON(!irqs_disabled());
-
-	spin_lock(q->queue_lock);
+	list_del_init(&cic->queue_list);
+	smp_wmb();
+	cic->key = NULL;
 
 
 	if (cic->cfqq[ASYNC]) {
 	if (cic->cfqq[ASYNC]) {
-		if (unlikely(cic->cfqq[ASYNC] == cfqd->active_queue))
-			__cfq_slice_expired(cfqd, cic->cfqq[ASYNC], 0);
-		cfq_put_queue(cic->cfqq[ASYNC]);
+		cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]);
 		cic->cfqq[ASYNC] = NULL;
 		cic->cfqq[ASYNC] = NULL;
 	}
 	}
 
 
 	if (cic->cfqq[SYNC]) {
 	if (cic->cfqq[SYNC]) {
-		if (unlikely(cic->cfqq[SYNC] == cfqd->active_queue))
-			__cfq_slice_expired(cfqd, cic->cfqq[SYNC], 0);
-		cfq_put_queue(cic->cfqq[SYNC]);
+		cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]);
 		cic->cfqq[SYNC] = NULL;
 		cic->cfqq[SYNC] = NULL;
 	}
 	}
+}
 
 
-	cic->key = NULL;
-	list_del_init(&cic->queue_list);
-	spin_unlock(q->queue_lock);
+
+/*
+ * Called with interrupts disabled
+ */
+static void cfq_exit_single_io_context(struct cfq_io_context *cic)
+{
+	struct cfq_data *cfqd = cic->key;
+
+	if (cfqd) {
+		request_queue_t *q = cfqd->queue;
+
+		spin_lock_irq(q->queue_lock);
+		__cfq_exit_single_io_context(cfqd, cic);
+		spin_unlock_irq(q->queue_lock);
+	}
 }
 }
 
 
 static void cfq_exit_io_context(struct io_context *ioc)
 static void cfq_exit_io_context(struct io_context *ioc)
 {
 {
 	struct cfq_io_context *__cic;
 	struct cfq_io_context *__cic;
-	unsigned long flags;
 	struct rb_node *n;
 	struct rb_node *n;
 
 
 	/*
 	/*
 	 * put the reference this task is holding to the various queues
 	 * put the reference this task is holding to the various queues
 	 */
 	 */
-	spin_lock_irqsave(&cfq_exit_lock, flags);
 
 
 	n = rb_first(&ioc->cic_root);
 	n = rb_first(&ioc->cic_root);
 	while (n != NULL) {
 	while (n != NULL) {
@@ -1324,22 +1144,21 @@ static void cfq_exit_io_context(struct io_context *ioc)
 		cfq_exit_single_io_context(__cic);
 		cfq_exit_single_io_context(__cic);
 		n = rb_next(n);
 		n = rb_next(n);
 	}
 	}
-
-	spin_unlock_irqrestore(&cfq_exit_lock, flags);
 }
 }
 
 
 static struct cfq_io_context *
 static struct cfq_io_context *
 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
 {
-	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
+	struct cfq_io_context *cic;
 
 
+	cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask, cfqd->queue->node);
 	if (cic) {
 	if (cic) {
 		memset(cic, 0, sizeof(*cic));
 		memset(cic, 0, sizeof(*cic));
 		cic->last_end_request = jiffies;
 		cic->last_end_request = jiffies;
 		INIT_LIST_HEAD(&cic->queue_list);
 		INIT_LIST_HEAD(&cic->queue_list);
 		cic->dtor = cfq_free_io_context;
 		cic->dtor = cfq_free_io_context;
 		cic->exit = cfq_exit_io_context;
 		cic->exit = cfq_exit_io_context;
-		atomic_inc(&ioc_count);
+		elv_ioc_count_inc(ioc_count);
 	}
 	}
 
 
 	return cic;
 	return cic;
@@ -1420,15 +1239,12 @@ static inline void changed_ioprio(struct cfq_io_context *cic)
 	spin_unlock(cfqd->queue->queue_lock);
 	spin_unlock(cfqd->queue->queue_lock);
 }
 }
 
 
-/*
- * callback from sys_ioprio_set, irqs are disabled
- */
-static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
+static void cfq_ioc_set_ioprio(struct io_context *ioc)
 {
 {
 	struct cfq_io_context *cic;
 	struct cfq_io_context *cic;
 	struct rb_node *n;
 	struct rb_node *n;
 
 
-	spin_lock(&cfq_exit_lock);
+	ioc->ioprio_changed = 0;
 
 
 	n = rb_first(&ioc->cic_root);
 	n = rb_first(&ioc->cic_root);
 	while (n != NULL) {
 	while (n != NULL) {
@@ -1437,10 +1253,6 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
 		changed_ioprio(cic);
 		changed_ioprio(cic);
 		n = rb_next(n);
 		n = rb_next(n);
 	}
 	}
-
-	spin_unlock(&cfq_exit_lock);
-
-	return 0;
 }
 }
 
 
 static struct cfq_queue *
 static struct cfq_queue *
@@ -1460,12 +1272,18 @@ retry:
 			cfqq = new_cfqq;
 			cfqq = new_cfqq;
 			new_cfqq = NULL;
 			new_cfqq = NULL;
 		} else if (gfp_mask & __GFP_WAIT) {
 		} else if (gfp_mask & __GFP_WAIT) {
+			/*
+			 * Inform the allocator of the fact that we will
+			 * just repeat this allocation if it fails, to allow
+			 * the allocator to do whatever it needs to attempt to
+			 * free memory.
+			 */
 			spin_unlock_irq(cfqd->queue->queue_lock);
 			spin_unlock_irq(cfqd->queue->queue_lock);
-			new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+			new_cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask|__GFP_NOFAIL, cfqd->queue->node);
 			spin_lock_irq(cfqd->queue->queue_lock);
 			spin_lock_irq(cfqd->queue->queue_lock);
 			goto retry;
 			goto retry;
 		} else {
 		} else {
-			cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+			cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask, cfqd->queue->node);
 			if (!cfqq)
 			if (!cfqq)
 				goto out;
 				goto out;
 		}
 		}
@@ -1480,13 +1298,13 @@ retry:
 		hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
 		hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
 		atomic_set(&cfqq->ref, 0);
 		atomic_set(&cfqq->ref, 0);
 		cfqq->cfqd = cfqd;
 		cfqq->cfqd = cfqd;
-		cfqq->service_last = 0;
 		/*
 		/*
 		 * set ->slice_left to allow preemption for a new process
 		 * set ->slice_left to allow preemption for a new process
 		 */
 		 */
 		cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
 		cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
 		cfq_mark_cfqq_idle_window(cfqq);
 		cfq_mark_cfqq_idle_window(cfqq);
 		cfq_mark_cfqq_prio_changed(cfqq);
 		cfq_mark_cfqq_prio_changed(cfqq);
+		cfq_mark_cfqq_queue_new(cfqq);
 		cfq_init_prio_data(cfqq);
 		cfq_init_prio_data(cfqq);
 	}
 	}
 
 
@@ -1502,12 +1320,10 @@ out:
 static void
 static void
 cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
 cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
 {
 {
-	spin_lock(&cfq_exit_lock);
+	WARN_ON(!list_empty(&cic->queue_list));
 	rb_erase(&cic->rb_node, &ioc->cic_root);
 	rb_erase(&cic->rb_node, &ioc->cic_root);
-	list_del_init(&cic->queue_list);
-	spin_unlock(&cfq_exit_lock);
 	kmem_cache_free(cfq_ioc_pool, cic);
 	kmem_cache_free(cfq_ioc_pool, cic);
-	atomic_dec(&ioc_count);
+	elv_ioc_count_dec(ioc_count);
 }
 }
 
 
 static struct cfq_io_context *
 static struct cfq_io_context *
@@ -1551,7 +1367,6 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
 	cic->ioc = ioc;
 	cic->ioc = ioc;
 	cic->key = cfqd;
 	cic->key = cfqd;
 
 
-	ioc->set_ioprio = cfq_ioc_set_ioprio;
 restart:
 restart:
 	parent = NULL;
 	parent = NULL;
 	p = &ioc->cic_root.rb_node;
 	p = &ioc->cic_root.rb_node;
@@ -1573,11 +1388,12 @@ restart:
 			BUG();
 			BUG();
 	}
 	}
 
 
-	spin_lock(&cfq_exit_lock);
 	rb_link_node(&cic->rb_node, parent, p);
 	rb_link_node(&cic->rb_node, parent, p);
 	rb_insert_color(&cic->rb_node, &ioc->cic_root);
 	rb_insert_color(&cic->rb_node, &ioc->cic_root);
+
+	spin_lock_irq(cfqd->queue->queue_lock);
 	list_add(&cic->queue_list, &cfqd->cic_list);
 	list_add(&cic->queue_list, &cfqd->cic_list);
-	spin_unlock(&cfq_exit_lock);
+	spin_unlock_irq(cfqd->queue->queue_lock);
 }
 }
 
 
 /*
 /*
@@ -1593,7 +1409,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 
 
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
 
-	ioc = get_io_context(gfp_mask);
+	ioc = get_io_context(gfp_mask, cfqd->queue->node);
 	if (!ioc)
 	if (!ioc)
 		return NULL;
 		return NULL;
 
 
@@ -1607,6 +1423,10 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 
 
 	cfq_cic_link(cfqd, ioc, cic);
 	cfq_cic_link(cfqd, ioc, cic);
 out:
 out:
+	smp_read_barrier_depends();
+	if (unlikely(ioc->ioprio_changed))
+		cfq_ioc_set_ioprio(ioc);
+
 	return cic;
 	return cic;
 err:
 err:
 	put_io_context(ioc);
 	put_io_context(ioc);
@@ -1640,15 +1460,15 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
 
 
 static void
 static void
 cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
 cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
-		       struct cfq_rq *crq)
+		       struct request *rq)
 {
 {
 	sector_t sdist;
 	sector_t sdist;
 	u64 total;
 	u64 total;
 
 
-	if (cic->last_request_pos < crq->request->sector)
-		sdist = crq->request->sector - cic->last_request_pos;
+	if (cic->last_request_pos < rq->sector)
+		sdist = rq->sector - cic->last_request_pos;
 	else
 	else
-		sdist = cic->last_request_pos - crq->request->sector;
+		sdist = cic->last_request_pos - rq->sector;
 
 
 	/*
 	/*
 	 * Don't allow the seek distance to get too large from the
 	 * Don't allow the seek distance to get too large from the
@@ -1699,7 +1519,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
  */
  */
 static int
 static int
 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
-		   struct cfq_rq *crq)
+		   struct request *rq)
 {
 {
 	struct cfq_queue *cfqq = cfqd->active_queue;
 	struct cfq_queue *cfqq = cfqd->active_queue;
 
 
@@ -1718,7 +1538,17 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
 	 */
 	 */
 	if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
 	if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
 		return 0;
 		return 0;
-	if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq))
+	/*
+	 * if the new request is sync, but the currently running queue is
+	 * not, let the sync request have priority.
+	 */
+	if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
+		return 1;
+	/*
+	 * So both queues are sync. Let the new request get disk time if
+	 * it's a metadata request and the current queue is doing regular IO.
+	 */
+	if (rq_is_meta(rq) && !cfqq->meta_pending)
 		return 1;
 		return 1;
 
 
 	return 0;
 	return 0;
@@ -1730,47 +1560,45 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
  */
  */
 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 {
-	struct cfq_queue *__cfqq, *next;
-
-	list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list)
-		cfq_resort_rr_list(__cfqq, 1);
+	cfq_slice_expired(cfqd, 1);
 
 
 	if (!cfqq->slice_left)
 	if (!cfqq->slice_left)
 		cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
 		cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
 
 
-	cfqq->slice_end = cfqq->slice_left + jiffies;
-	cfq_slice_expired(cfqd, 1);
-	__cfq_set_active_queue(cfqd, cfqq);
-}
-
-/*
- * should really be a ll_rw_blk.c helper
- */
-static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
-	request_queue_t *q = cfqd->queue;
+	/*
+	 * Put the new queue at the front of the of the current list,
+	 * so we know that it will be selected next.
+	 */
+	BUG_ON(!cfq_cfqq_on_rr(cfqq));
+	list_move(&cfqq->cfq_list, &cfqd->cur_rr);
 
 
-	if (!blk_queue_plugged(q))
-		q->request_fn(q);
-	else
-		__generic_unplug_device(q);
+	cfqq->slice_end = cfqq->slice_left + jiffies;
 }
 }
 
 
 /*
 /*
- * Called when a new fs request (crq) is added (to cfqq). Check if there's
+ * Called when a new fs request (rq) is added (to cfqq). Check if there's
  * something we should do about it
  * something we should do about it
  */
  */
 static void
 static void
-cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-		 struct cfq_rq *crq)
+cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+		struct request *rq)
 {
 {
-	struct cfq_io_context *cic = crq->io_context;
+	struct cfq_io_context *cic = RQ_CIC(rq);
+
+	if (rq_is_meta(rq))
+		cfqq->meta_pending++;
+
+	/*
+	 * check if this request is a better next-serve candidate)) {
+	 */
+	cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
+	BUG_ON(!cfqq->next_rq);
 
 
 	/*
 	/*
 	 * we never wait for an async request and we don't allow preemption
 	 * we never wait for an async request and we don't allow preemption
 	 * of an async request. so just return early
 	 * of an async request. so just return early
 	 */
 	 */
-	if (!cfq_crq_is_sync(crq)) {
+	if (!rq_is_sync(rq)) {
 		/*
 		/*
 		 * sync process issued an async request, if it's waiting
 		 * sync process issued an async request, if it's waiting
 		 * then expire it and kick rq handling.
 		 * then expire it and kick rq handling.
@@ -1778,17 +1606,17 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		if (cic == cfqd->active_cic &&
 		if (cic == cfqd->active_cic &&
 		    del_timer(&cfqd->idle_slice_timer)) {
 		    del_timer(&cfqd->idle_slice_timer)) {
 			cfq_slice_expired(cfqd, 0);
 			cfq_slice_expired(cfqd, 0);
-			cfq_start_queueing(cfqd, cfqq);
+			blk_start_queueing(cfqd->queue);
 		}
 		}
 		return;
 		return;
 	}
 	}
 
 
 	cfq_update_io_thinktime(cfqd, cic);
 	cfq_update_io_thinktime(cfqd, cic);
-	cfq_update_io_seektime(cfqd, cic, crq);
+	cfq_update_io_seektime(cfqd, cic, rq);
 	cfq_update_idle_window(cfqd, cfqq, cic);
 	cfq_update_idle_window(cfqd, cfqq, cic);
 
 
 	cic->last_queue = jiffies;
 	cic->last_queue = jiffies;
-	cic->last_request_pos = crq->request->sector + crq->request->nr_sectors;
+	cic->last_request_pos = rq->sector + rq->nr_sectors;
 
 
 	if (cfqq == cfqd->active_queue) {
 	if (cfqq == cfqd->active_queue) {
 		/*
 		/*
@@ -1799,9 +1627,9 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		if (cfq_cfqq_wait_request(cfqq)) {
 		if (cfq_cfqq_wait_request(cfqq)) {
 			cfq_mark_cfqq_must_dispatch(cfqq);
 			cfq_mark_cfqq_must_dispatch(cfqq);
 			del_timer(&cfqd->idle_slice_timer);
 			del_timer(&cfqd->idle_slice_timer);
-			cfq_start_queueing(cfqd, cfqq);
+			blk_start_queueing(cfqd->queue);
 		}
 		}
-	} else if (cfq_should_preempt(cfqd, cfqq, crq)) {
+	} else if (cfq_should_preempt(cfqd, cfqq, rq)) {
 		/*
 		/*
 		 * not the active queue - expire current slice if it is
 		 * not the active queue - expire current slice if it is
 		 * idle and has expired it's mean thinktime or this new queue
 		 * idle and has expired it's mean thinktime or this new queue
@@ -1809,34 +1637,32 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 */
 		 */
 		cfq_preempt_queue(cfqd, cfqq);
 		cfq_preempt_queue(cfqd, cfqq);
 		cfq_mark_cfqq_must_dispatch(cfqq);
 		cfq_mark_cfqq_must_dispatch(cfqq);
-		cfq_start_queueing(cfqd, cfqq);
+		blk_start_queueing(cfqd->queue);
 	}
 	}
 }
 }
 
 
 static void cfq_insert_request(request_queue_t *q, struct request *rq)
 static void cfq_insert_request(request_queue_t *q, struct request *rq)
 {
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_rq *crq = RQ_DATA(rq);
-	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct cfq_queue *cfqq = RQ_CFQQ(rq);
 
 
 	cfq_init_prio_data(cfqq);
 	cfq_init_prio_data(cfqq);
 
 
-	cfq_add_crq_rb(crq);
+	cfq_add_rq_rb(rq);
 
 
-	list_add_tail(&rq->queuelist, &cfqq->fifo);
+	if (!cfq_cfqq_on_rr(cfqq))
+		cfq_add_cfqq_rr(cfqd, cfqq);
 
 
-	if (rq_mergeable(rq))
-		cfq_add_crq_hash(cfqd, crq);
+	list_add_tail(&rq->queuelist, &cfqq->fifo);
 
 
-	cfq_crq_enqueued(cfqd, cfqq, crq);
+	cfq_rq_enqueued(cfqd, cfqq, rq);
 }
 }
 
 
 static void cfq_completed_request(request_queue_t *q, struct request *rq)
 static void cfq_completed_request(request_queue_t *q, struct request *rq)
 {
 {
-	struct cfq_rq *crq = RQ_DATA(rq);
-	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct cfq_queue *cfqq = RQ_CFQQ(rq);
 	struct cfq_data *cfqd = cfqq->cfqd;
 	struct cfq_data *cfqd = cfqq->cfqd;
-	const int sync = cfq_crq_is_sync(crq);
+	const int sync = rq_is_sync(rq);
 	unsigned long now;
 	unsigned long now;
 
 
 	now = jiffies;
 	now = jiffies;
@@ -1849,15 +1675,11 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
 	if (!cfq_class_idle(cfqq))
 	if (!cfq_class_idle(cfqq))
 		cfqd->last_end_request = now;
 		cfqd->last_end_request = now;
 
 
-	if (!cfq_cfqq_dispatched(cfqq)) {
-		if (cfq_cfqq_on_rr(cfqq)) {
-			cfqq->service_last = now;
-			cfq_resort_rr_list(cfqq, 0);
-		}
-	}
+	if (!cfq_cfqq_dispatched(cfqq) && cfq_cfqq_on_rr(cfqq))
+		cfq_resort_rr_list(cfqq, 0);
 
 
 	if (sync)
 	if (sync)
-		crq->io_context->last_end_request = now;
+		RQ_CIC(rq)->last_end_request = now;
 
 
 	/*
 	/*
 	 * If this is the active queue, check if it needs to be expired,
 	 * If this is the active queue, check if it needs to be expired,
@@ -1873,30 +1695,6 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
 	}
 	}
 }
 }
 
 
-static struct request *
-cfq_former_request(request_queue_t *q, struct request *rq)
-{
-	struct cfq_rq *crq = RQ_DATA(rq);
-	struct rb_node *rbprev = rb_prev(&crq->rb_node);
-
-	if (rbprev)
-		return rb_entry_crq(rbprev)->request;
-
-	return NULL;
-}
-
-static struct request *
-cfq_latter_request(request_queue_t *q, struct request *rq)
-{
-	struct cfq_rq *crq = RQ_DATA(rq);
-	struct rb_node *rbnext = rb_next(&crq->rb_node);
-
-	if (rbnext)
-		return rb_entry_crq(rbnext)->request;
-
-	return NULL;
-}
-
 /*
 /*
  * we temporarily boost lower priority queues if they are holding fs exclusive
  * we temporarily boost lower priority queues if they are holding fs exclusive
  * resources. they are boosted to normal prio (CLASS_BE/4)
  * resources. they are boosted to normal prio (CLASS_BE/4)
@@ -1933,9 +1731,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
 		cfq_resort_rr_list(cfqq, 0);
 		cfq_resort_rr_list(cfqq, 0);
 }
 }
 
 
-static inline int
-__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-		struct task_struct *task, int rw)
+static inline int __cfq_may_queue(struct cfq_queue *cfqq)
 {
 {
 	if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
 	if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
 	    !cfq_cfqq_must_alloc_slice(cfqq)) {
 	    !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -1946,7 +1742,7 @@ __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	return ELV_MQUEUE_MAY;
 	return ELV_MQUEUE_MAY;
 }
 }
 
 
-static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
+static int cfq_may_queue(request_queue_t *q, int rw)
 {
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
@@ -1963,48 +1759,30 @@ static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
 		cfq_init_prio_data(cfqq);
 		cfq_init_prio_data(cfqq);
 		cfq_prio_boost(cfqq);
 		cfq_prio_boost(cfqq);
 
 
-		return __cfq_may_queue(cfqd, cfqq, tsk, rw);
+		return __cfq_may_queue(cfqq);
 	}
 	}
 
 
 	return ELV_MQUEUE_MAY;
 	return ELV_MQUEUE_MAY;
 }
 }
 
 
-static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
-{
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-
-	if (unlikely(cfqd->rq_starved)) {
-		struct request_list *rl = &q->rq;
-
-		smp_mb();
-		if (waitqueue_active(&rl->wait[READ]))
-			wake_up(&rl->wait[READ]);
-		if (waitqueue_active(&rl->wait[WRITE]))
-			wake_up(&rl->wait[WRITE]);
-	}
-}
-
 /*
 /*
  * queue lock held here
  * queue lock held here
  */
  */
 static void cfq_put_request(request_queue_t *q, struct request *rq)
 static void cfq_put_request(request_queue_t *q, struct request *rq)
 {
 {
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_rq *crq = RQ_DATA(rq);
+	struct cfq_queue *cfqq = RQ_CFQQ(rq);
 
 
-	if (crq) {
-		struct cfq_queue *cfqq = crq->cfq_queue;
+	if (cfqq) {
 		const int rw = rq_data_dir(rq);
 		const int rw = rq_data_dir(rq);
 
 
 		BUG_ON(!cfqq->allocated[rw]);
 		BUG_ON(!cfqq->allocated[rw]);
 		cfqq->allocated[rw]--;
 		cfqq->allocated[rw]--;
 
 
-		put_io_context(crq->io_context->ioc);
+		put_io_context(RQ_CIC(rq)->ioc);
 
 
-		mempool_free(crq, cfqd->crq_pool);
 		rq->elevator_private = NULL;
 		rq->elevator_private = NULL;
+		rq->elevator_private2 = NULL;
 
 
-		cfq_check_waiters(q, cfqq);
 		cfq_put_queue(cfqq);
 		cfq_put_queue(cfqq);
 	}
 	}
 }
 }
@@ -2013,8 +1791,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq)
  * Allocate cfq data structures associated with this request.
  * Allocate cfq data structures associated with this request.
  */
  */
 static int
 static int
-cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		gfp_t gfp_mask)
+cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
 {
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
@@ -2022,7 +1799,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 	const int rw = rq_data_dir(rq);
 	const int rw = rq_data_dir(rq);
 	pid_t key = cfq_queue_pid(tsk, rw);
 	pid_t key = cfq_queue_pid(tsk, rw);
 	struct cfq_queue *cfqq;
 	struct cfq_queue *cfqq;
-	struct cfq_rq *crq;
 	unsigned long flags;
 	unsigned long flags;
 	int is_sync = key != CFQ_KEY_ASYNC;
 	int is_sync = key != CFQ_KEY_ASYNC;
 
 
@@ -2046,42 +1822,18 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 
 
 	cfqq->allocated[rw]++;
 	cfqq->allocated[rw]++;
 	cfq_clear_cfqq_must_alloc(cfqq);
 	cfq_clear_cfqq_must_alloc(cfqq);
-	cfqd->rq_starved = 0;
 	atomic_inc(&cfqq->ref);
 	atomic_inc(&cfqq->ref);
-	spin_unlock_irqrestore(q->queue_lock, flags);
 
 
-	crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
-	if (crq) {
-		RB_CLEAR_NODE(&crq->rb_node);
-		crq->rb_key = 0;
-		crq->request = rq;
-		INIT_HLIST_NODE(&crq->hash);
-		crq->cfq_queue = cfqq;
-		crq->io_context = cic;
-
-		if (is_sync)
-			cfq_mark_crq_is_sync(crq);
-		else
-			cfq_clear_crq_is_sync(crq);
+	spin_unlock_irqrestore(q->queue_lock, flags);
 
 
-		rq->elevator_private = crq;
-		return 0;
-	}
+	rq->elevator_private = cic;
+	rq->elevator_private2 = cfqq;
+	return 0;
 
 
-	spin_lock_irqsave(q->queue_lock, flags);
-	cfqq->allocated[rw]--;
-	if (!(cfqq->allocated[0] + cfqq->allocated[1]))
-		cfq_mark_cfqq_must_alloc(cfqq);
-	cfq_put_queue(cfqq);
 queue_fail:
 queue_fail:
 	if (cic)
 	if (cic)
 		put_io_context(cic->ioc);
 		put_io_context(cic->ioc);
-	/*
-	 * mark us rq allocation starved. we need to kickstart the process
-	 * ourselves if there are no pending requests that can do it for us.
-	 * that would be an extremely rare OOM situation
-	 */
-	cfqd->rq_starved = 1;
+
 	cfq_schedule_dispatch(cfqd);
 	cfq_schedule_dispatch(cfqd);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	return 1;
 	return 1;
@@ -2090,27 +1842,10 @@ queue_fail:
 static void cfq_kick_queue(void *data)
 static void cfq_kick_queue(void *data)
 {
 {
 	request_queue_t *q = data;
 	request_queue_t *q = data;
-	struct cfq_data *cfqd = q->elevator->elevator_data;
 	unsigned long flags;
 	unsigned long flags;
 
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	spin_lock_irqsave(q->queue_lock, flags);
-
-	if (cfqd->rq_starved) {
-		struct request_list *rl = &q->rq;
-
-		/*
-		 * we aren't guaranteed to get a request after this, but we
-		 * have to be opportunistic
-		 */
-		smp_mb();
-		if (waitqueue_active(&rl->wait[READ]))
-			wake_up(&rl->wait[READ]);
-		if (waitqueue_active(&rl->wait[WRITE]))
-			wake_up(&rl->wait[WRITE]);
-	}
-
-	blk_remove_plug(q);
-	q->request_fn(q);
+	blk_start_queueing(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 }
 
 
@@ -2193,7 +1928,6 @@ static void cfq_exit_queue(elevator_t *e)
 
 
 	cfq_shutdown_timer_wq(cfqd);
 	cfq_shutdown_timer_wq(cfqd);
 
 
-	spin_lock(&cfq_exit_lock);
 	spin_lock_irq(q->queue_lock);
 	spin_lock_irq(q->queue_lock);
 
 
 	if (cfqd->active_queue)
 	if (cfqd->active_queue)
@@ -2203,25 +1937,14 @@ static void cfq_exit_queue(elevator_t *e)
 		struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
 		struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
 							struct cfq_io_context,
 							struct cfq_io_context,
 							queue_list);
 							queue_list);
-		if (cic->cfqq[ASYNC]) {
-			cfq_put_queue(cic->cfqq[ASYNC]);
-			cic->cfqq[ASYNC] = NULL;
-		}
-		if (cic->cfqq[SYNC]) {
-			cfq_put_queue(cic->cfqq[SYNC]);
-			cic->cfqq[SYNC] = NULL;
-		}
-		cic->key = NULL;
-		list_del_init(&cic->queue_list);
+
+		__cfq_exit_single_io_context(cfqd, cic);
 	}
 	}
 
 
 	spin_unlock_irq(q->queue_lock);
 	spin_unlock_irq(q->queue_lock);
-	spin_unlock(&cfq_exit_lock);
 
 
 	cfq_shutdown_timer_wq(cfqd);
 	cfq_shutdown_timer_wq(cfqd);
 
 
-	mempool_destroy(cfqd->crq_pool);
-	kfree(cfqd->crq_hash);
 	kfree(cfqd->cfq_hash);
 	kfree(cfqd->cfq_hash);
 	kfree(cfqd);
 	kfree(cfqd);
 }
 }
@@ -2231,7 +1954,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	struct cfq_data *cfqd;
 	struct cfq_data *cfqd;
 	int i;
 	int i;
 
 
-	cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
+	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node);
 	if (!cfqd)
 	if (!cfqd)
 		return NULL;
 		return NULL;
 
 
@@ -2243,23 +1966,12 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&cfqd->busy_rr);
 	INIT_LIST_HEAD(&cfqd->busy_rr);
 	INIT_LIST_HEAD(&cfqd->cur_rr);
 	INIT_LIST_HEAD(&cfqd->cur_rr);
 	INIT_LIST_HEAD(&cfqd->idle_rr);
 	INIT_LIST_HEAD(&cfqd->idle_rr);
-	INIT_LIST_HEAD(&cfqd->empty_list);
 	INIT_LIST_HEAD(&cfqd->cic_list);
 	INIT_LIST_HEAD(&cfqd->cic_list);
 
 
-	cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
-	if (!cfqd->crq_hash)
-		goto out_crqhash;
-
-	cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
+	cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node);
 	if (!cfqd->cfq_hash)
 	if (!cfqd->cfq_hash)
-		goto out_cfqhash;
-
-	cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool);
-	if (!cfqd->crq_pool)
-		goto out_crqpool;
+		goto out_free;
 
 
-	for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
 	for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
 	for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
 		INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
 		INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
 
 
@@ -2275,7 +1987,6 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 
 
 	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
 	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
 
 
-	cfqd->cfq_queued = cfq_queued;
 	cfqd->cfq_quantum = cfq_quantum;
 	cfqd->cfq_quantum = cfq_quantum;
 	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
 	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
 	cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
 	cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
@@ -2287,19 +1998,13 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	cfqd->cfq_slice_idle = cfq_slice_idle;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
 
 
 	return cfqd;
 	return cfqd;
-out_crqpool:
-	kfree(cfqd->cfq_hash);
-out_cfqhash:
-	kfree(cfqd->crq_hash);
-out_crqhash:
+out_free:
 	kfree(cfqd);
 	kfree(cfqd);
 	return NULL;
 	return NULL;
 }
 }
 
 
 static void cfq_slab_kill(void)
 static void cfq_slab_kill(void)
 {
 {
-	if (crq_pool)
-		kmem_cache_destroy(crq_pool);
 	if (cfq_pool)
 	if (cfq_pool)
 		kmem_cache_destroy(cfq_pool);
 		kmem_cache_destroy(cfq_pool);
 	if (cfq_ioc_pool)
 	if (cfq_ioc_pool)
@@ -2308,11 +2013,6 @@ static void cfq_slab_kill(void)
 
 
 static int __init cfq_slab_setup(void)
 static int __init cfq_slab_setup(void)
 {
 {
-	crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
-					NULL, NULL);
-	if (!crq_pool)
-		goto fail;
-
 	cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
 	cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
 					NULL, NULL);
 					NULL, NULL);
 	if (!cfq_pool)
 	if (!cfq_pool)
@@ -2358,7 +2058,6 @@ static ssize_t __FUNC(elevator_t *e, char *page)			\
 	return cfq_var_show(__data, (page));				\
 	return cfq_var_show(__data, (page));				\
 }
 }
 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
-SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);
 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
 SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
 SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
@@ -2386,7 +2085,6 @@ static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)	\
 	return ret;							\
 	return ret;							\
 }
 }
 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
-STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);
 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
 STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
@@ -2402,7 +2100,6 @@ STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX,
 
 
 static struct elv_fs_entry cfq_attrs[] = {
 static struct elv_fs_entry cfq_attrs[] = {
 	CFQ_ATTR(quantum),
 	CFQ_ATTR(quantum),
-	CFQ_ATTR(queued),
 	CFQ_ATTR(fifo_expire_sync),
 	CFQ_ATTR(fifo_expire_sync),
 	CFQ_ATTR(fifo_expire_async),
 	CFQ_ATTR(fifo_expire_async),
 	CFQ_ATTR(back_seek_max),
 	CFQ_ATTR(back_seek_max),
@@ -2425,14 +2122,14 @@ static struct elevator_type iosched_cfq = {
 		.elevator_deactivate_req_fn =	cfq_deactivate_request,
 		.elevator_deactivate_req_fn =	cfq_deactivate_request,
 		.elevator_queue_empty_fn =	cfq_queue_empty,
 		.elevator_queue_empty_fn =	cfq_queue_empty,
 		.elevator_completed_req_fn =	cfq_completed_request,
 		.elevator_completed_req_fn =	cfq_completed_request,
-		.elevator_former_req_fn =	cfq_former_request,
-		.elevator_latter_req_fn =	cfq_latter_request,
+		.elevator_former_req_fn =	elv_rb_former_request,
+		.elevator_latter_req_fn =	elv_rb_latter_request,
 		.elevator_set_req_fn =		cfq_set_request,
 		.elevator_set_req_fn =		cfq_set_request,
 		.elevator_put_req_fn =		cfq_put_request,
 		.elevator_put_req_fn =		cfq_put_request,
 		.elevator_may_queue_fn =	cfq_may_queue,
 		.elevator_may_queue_fn =	cfq_may_queue,
 		.elevator_init_fn =		cfq_init_queue,
 		.elevator_init_fn =		cfq_init_queue,
 		.elevator_exit_fn =		cfq_exit_queue,
 		.elevator_exit_fn =		cfq_exit_queue,
-		.trim =				cfq_trim,
+		.trim =				cfq_free_io_context,
 	},
 	},
 	.elevator_attrs =	cfq_attrs,
 	.elevator_attrs =	cfq_attrs,
 	.elevator_name =	"cfq",
 	.elevator_name =	"cfq",
@@ -2468,7 +2165,7 @@ static void __exit cfq_exit(void)
 	ioc_gone = &all_gone;
 	ioc_gone = &all_gone;
 	/* ioc_gone's update must be visible before reading ioc_count */
 	/* ioc_gone's update must be visible before reading ioc_count */
 	smp_wmb();
 	smp_wmb();
-	if (atomic_read(&ioc_count))
+	if (elv_ioc_count_read(ioc_count))
 		wait_for_completion(ioc_gone);
 		wait_for_completion(ioc_gone);
 	synchronize_rcu();
 	synchronize_rcu();
 	cfq_slab_kill();
 	cfq_slab_kill();

+ 74 - 390
block/deadline-iosched.c

@@ -1,7 +1,7 @@
 /*
 /*
  *  Deadline i/o scheduler.
  *  Deadline i/o scheduler.
  *
  *
- *  Copyright (C) 2002 Jens Axboe <axboe@suse.de>
+ *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
  */
  */
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/fs.h>
@@ -12,7 +12,6 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
 #include <linux/compiler.h>
-#include <linux/hash.h>
 #include <linux/rbtree.h>
 #include <linux/rbtree.h>
 
 
 /*
 /*
@@ -24,13 +23,6 @@ static const int writes_starved = 2;    /* max times reads can starve a write */
 static const int fifo_batch = 16;       /* # of sequential requests treated as one
 static const int fifo_batch = 16;       /* # of sequential requests treated as one
 				     by the above parameters. For throughput. */
 				     by the above parameters. For throughput. */
 
 
-static const int deadline_hash_shift = 5;
-#define DL_HASH_BLOCK(sec)	((sec) >> 3)
-#define DL_HASH_FN(sec)		(hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
-#define DL_HASH_ENTRIES		(1 << deadline_hash_shift)
-#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
-#define ON_HASH(drq)		(!hlist_unhashed(&(drq)->hash))
-
 struct deadline_data {
 struct deadline_data {
 	/*
 	/*
 	 * run time data
 	 * run time data
@@ -45,8 +37,7 @@ struct deadline_data {
 	/*
 	/*
 	 * next in sort order. read, write or both are NULL
 	 * next in sort order. read, write or both are NULL
 	 */
 	 */
-	struct deadline_rq *next_drq[2];
-	struct hlist_head *hash;	/* request hash */
+	struct request *next_rq[2];
 	unsigned int batching;		/* number of sequential requests made */
 	unsigned int batching;		/* number of sequential requests made */
 	sector_t last_sector;		/* head position */
 	sector_t last_sector;		/* head position */
 	unsigned int starved;		/* times reads have starved writes */
 	unsigned int starved;		/* times reads have starved writes */
@@ -58,240 +49,69 @@ struct deadline_data {
 	int fifo_batch;
 	int fifo_batch;
 	int writes_starved;
 	int writes_starved;
 	int front_merges;
 	int front_merges;
-
-	mempool_t *drq_pool;
 };
 };
 
 
-/*
- * pre-request data.
- */
-struct deadline_rq {
-	/*
-	 * rbtree index, key is the starting offset
-	 */
-	struct rb_node rb_node;
-	sector_t rb_key;
-
-	struct request *request;
-
-	/*
-	 * request hash, key is the ending offset (for back merge lookup)
-	 */
-	struct hlist_node hash;
-
-	/*
-	 * expire fifo
-	 */
-	struct list_head fifo;
-	unsigned long expires;
-};
-
-static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq);
-
-static kmem_cache_t *drq_pool;
-
-#define RQ_DATA(rq)	((struct deadline_rq *) (rq)->elevator_private)
+static void deadline_move_request(struct deadline_data *, struct request *);
 
 
-/*
- * the back merge hash support functions
- */
-static inline void __deadline_del_drq_hash(struct deadline_rq *drq)
-{
-	hlist_del_init(&drq->hash);
-}
-
-static inline void deadline_del_drq_hash(struct deadline_rq *drq)
-{
-	if (ON_HASH(drq))
-		__deadline_del_drq_hash(drq);
-}
-
-static inline void
-deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
-{
-	struct request *rq = drq->request;
-
-	BUG_ON(ON_HASH(drq));
-
-	hlist_add_head(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);
-}
-
-/*
- * move hot entry to front of chain
- */
-static inline void
-deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
-{
-	struct request *rq = drq->request;
-	struct hlist_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))];
-
-	if (ON_HASH(drq) && &drq->hash != head->first) {
-		hlist_del(&drq->hash);
-		hlist_add_head(&drq->hash, head);
-	}
-}
-
-static struct request *
-deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)
-{
-	struct hlist_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
-	struct hlist_node *entry, *next;
-	struct deadline_rq *drq;
-
-	hlist_for_each_entry_safe(drq, entry, next, hash_list, hash) {
-		struct request *__rq = drq->request;
-
-		BUG_ON(!ON_HASH(drq));
-
-		if (!rq_mergeable(__rq)) {
-			__deadline_del_drq_hash(drq);
-			continue;
-		}
-
-		if (rq_hash_key(__rq) == offset)
-			return __rq;
-	}
-
-	return NULL;
-}
-
-/*
- * rb tree support functions
- */
-#define rb_entry_drq(node)	rb_entry((node), struct deadline_rq, rb_node)
-#define DRQ_RB_ROOT(dd, drq)	(&(dd)->sort_list[rq_data_dir((drq)->request)])
-#define rq_rb_key(rq)		(rq)->sector
-
-static struct deadline_rq *
-__deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
-{
-	struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;
-	struct rb_node *parent = NULL;
-	struct deadline_rq *__drq;
-
-	while (*p) {
-		parent = *p;
-		__drq = rb_entry_drq(parent);
-
-		if (drq->rb_key < __drq->rb_key)
-			p = &(*p)->rb_left;
-		else if (drq->rb_key > __drq->rb_key)
-			p = &(*p)->rb_right;
-		else
-			return __drq;
-	}
-
-	rb_link_node(&drq->rb_node, parent, p);
-	return NULL;
-}
+#define RQ_RB_ROOT(dd, rq)	(&(dd)->sort_list[rq_data_dir((rq))])
 
 
 static void
 static void
-deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
 {
 {
-	struct deadline_rq *__alias;
-
-	drq->rb_key = rq_rb_key(drq->request);
+	struct rb_root *root = RQ_RB_ROOT(dd, rq);
+	struct request *__alias;
 
 
 retry:
 retry:
-	__alias = __deadline_add_drq_rb(dd, drq);
-	if (!__alias) {
-		rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
-		return;
+	__alias = elv_rb_add(root, rq);
+	if (unlikely(__alias)) {
+		deadline_move_request(dd, __alias);
+		goto retry;
 	}
 	}
-
-	deadline_move_request(dd, __alias);
-	goto retry;
 }
 }
 
 
 static inline void
 static inline void
-deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
 {
 {
-	const int data_dir = rq_data_dir(drq->request);
+	const int data_dir = rq_data_dir(rq);
 
 
-	if (dd->next_drq[data_dir] == drq) {
-		struct rb_node *rbnext = rb_next(&drq->rb_node);
+	if (dd->next_rq[data_dir] == rq) {
+		struct rb_node *rbnext = rb_next(&rq->rb_node);
 
 
-		dd->next_drq[data_dir] = NULL;
+		dd->next_rq[data_dir] = NULL;
 		if (rbnext)
 		if (rbnext)
-			dd->next_drq[data_dir] = rb_entry_drq(rbnext);
-	}
-
-	BUG_ON(!RB_EMPTY_NODE(&drq->rb_node));
-	rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
-	RB_CLEAR_NODE(&drq->rb_node);
-}
-
-static struct request *
-deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)
-{
-	struct rb_node *n = dd->sort_list[data_dir].rb_node;
-	struct deadline_rq *drq;
-
-	while (n) {
-		drq = rb_entry_drq(n);
-
-		if (sector < drq->rb_key)
-			n = n->rb_left;
-		else if (sector > drq->rb_key)
-			n = n->rb_right;
-		else
-			return drq->request;
+			dd->next_rq[data_dir] = rb_entry_rq(rbnext);
 	}
 	}
 
 
-	return NULL;
+	elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
 }
 }
 
 
 /*
 /*
- * deadline_find_first_drq finds the first (lowest sector numbered) request
- * for the specified data_dir. Used to sweep back to the start of the disk
- * (1-way elevator) after we process the last (highest sector) request.
- */
-static struct deadline_rq *
-deadline_find_first_drq(struct deadline_data *dd, int data_dir)
-{
-	struct rb_node *n = dd->sort_list[data_dir].rb_node;
-
-	for (;;) {
-		if (n->rb_left == NULL)
-			return rb_entry_drq(n);
-		
-		n = n->rb_left;
-	}
-}
-
-/*
- * add drq to rbtree and fifo
+ * add rq to rbtree and fifo
  */
  */
 static void
 static void
 deadline_add_request(struct request_queue *q, struct request *rq)
 deadline_add_request(struct request_queue *q, struct request *rq)
 {
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_data *dd = q->elevator->elevator_data;
-	struct deadline_rq *drq = RQ_DATA(rq);
+	const int data_dir = rq_data_dir(rq);
 
 
-	const int data_dir = rq_data_dir(drq->request);
+	deadline_add_rq_rb(dd, rq);
 
 
-	deadline_add_drq_rb(dd, drq);
 	/*
 	/*
 	 * set expire time (only used for reads) and add to fifo list
 	 * set expire time (only used for reads) and add to fifo list
 	 */
 	 */
-	drq->expires = jiffies + dd->fifo_expire[data_dir];
-	list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
-
-	if (rq_mergeable(rq))
-		deadline_add_drq_hash(dd, drq);
+	rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
+	list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
 }
 }
 
 
 /*
 /*
- * remove rq from rbtree, fifo, and hash
+ * remove rq from rbtree and fifo.
  */
  */
 static void deadline_remove_request(request_queue_t *q, struct request *rq)
 static void deadline_remove_request(request_queue_t *q, struct request *rq)
 {
 {
-	struct deadline_rq *drq = RQ_DATA(rq);
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_data *dd = q->elevator->elevator_data;
 
 
-	list_del_init(&drq->fifo);
-	deadline_del_drq_rb(dd, drq);
-	deadline_del_drq_hash(drq);
+	rq_fifo_clear(rq);
+	deadline_del_rq_rb(dd, rq);
 }
 }
 
 
 static int
 static int
@@ -301,28 +121,15 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct request *__rq;
 	struct request *__rq;
 	int ret;
 	int ret;
 
 
-	/*
-	 * see if the merge hash can satisfy a back merge
-	 */
-	__rq = deadline_find_drq_hash(dd, bio->bi_sector);
-	if (__rq) {
-		BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
-		if (elv_rq_merge_ok(__rq, bio)) {
-			ret = ELEVATOR_BACK_MERGE;
-			goto out;
-		}
-	}
-
 	/*
 	/*
 	 * check for front merge
 	 * check for front merge
 	 */
 	 */
 	if (dd->front_merges) {
 	if (dd->front_merges) {
-		sector_t rb_key = bio->bi_sector + bio_sectors(bio);
+		sector_t sector = bio->bi_sector + bio_sectors(bio);
 
 
-		__rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio));
+		__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
 		if (__rq) {
 		if (__rq) {
-			BUG_ON(rb_key != rq_rb_key(__rq));
+			BUG_ON(sector != __rq->sector);
 
 
 			if (elv_rq_merge_ok(__rq, bio)) {
 			if (elv_rq_merge_ok(__rq, bio)) {
 				ret = ELEVATOR_FRONT_MERGE;
 				ret = ELEVATOR_FRONT_MERGE;
@@ -333,29 +140,21 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 
 	return ELEVATOR_NO_MERGE;
 	return ELEVATOR_NO_MERGE;
 out:
 out:
-	if (ret)
-		deadline_hot_drq_hash(dd, RQ_DATA(__rq));
 	*req = __rq;
 	*req = __rq;
 	return ret;
 	return ret;
 }
 }
 
 
-static void deadline_merged_request(request_queue_t *q, struct request *req)
+static void deadline_merged_request(request_queue_t *q, struct request *req,
+				    int type)
 {
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_data *dd = q->elevator->elevator_data;
-	struct deadline_rq *drq = RQ_DATA(req);
-
-	/*
-	 * hash always needs to be repositioned, key is end sector
-	 */
-	deadline_del_drq_hash(drq);
-	deadline_add_drq_hash(dd, drq);
 
 
 	/*
 	/*
 	 * if the merge was a front merge, we need to reposition request
 	 * if the merge was a front merge, we need to reposition request
 	 */
 	 */
-	if (rq_rb_key(req) != drq->rb_key) {
-		deadline_del_drq_rb(dd, drq);
-		deadline_add_drq_rb(dd, drq);
+	if (type == ELEVATOR_FRONT_MERGE) {
+		elv_rb_del(RQ_RB_ROOT(dd, req), req);
+		deadline_add_rq_rb(dd, req);
 	}
 	}
 }
 }
 
 
@@ -363,33 +162,14 @@ static void
 deadline_merged_requests(request_queue_t *q, struct request *req,
 deadline_merged_requests(request_queue_t *q, struct request *req,
 			 struct request *next)
 			 struct request *next)
 {
 {
-	struct deadline_data *dd = q->elevator->elevator_data;
-	struct deadline_rq *drq = RQ_DATA(req);
-	struct deadline_rq *dnext = RQ_DATA(next);
-
-	BUG_ON(!drq);
-	BUG_ON(!dnext);
-
 	/*
 	/*
-	 * reposition drq (this is the merged request) in hash, and in rbtree
-	 * in case of a front merge
+	 * if next expires before rq, assign its expire time to rq
+	 * and move into next position (next will be deleted) in fifo
 	 */
 	 */
-	deadline_del_drq_hash(drq);
-	deadline_add_drq_hash(dd, drq);
-
-	if (rq_rb_key(req) != drq->rb_key) {
-		deadline_del_drq_rb(dd, drq);
-		deadline_add_drq_rb(dd, drq);
-	}
-
-	/*
-	 * if dnext expires before drq, assign its expire time to drq
-	 * and move into dnext position (dnext will be deleted) in fifo
-	 */
-	if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
-		if (time_before(dnext->expires, drq->expires)) {
-			list_move(&drq->fifo, &dnext->fifo);
-			drq->expires = dnext->expires;
+	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
+		if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
+			list_move(&req->queuelist, &next->queuelist);
+			rq_set_fifo_time(req, rq_fifo_time(next));
 		}
 		}
 	}
 	}
 
 
@@ -403,52 +183,50 @@ deadline_merged_requests(request_queue_t *q, struct request *req,
  * move request from sort list to dispatch queue.
  * move request from sort list to dispatch queue.
  */
  */
 static inline void
 static inline void
-deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
 {
 {
-	request_queue_t *q = drq->request->q;
+	request_queue_t *q = rq->q;
 
 
-	deadline_remove_request(q, drq->request);
-	elv_dispatch_add_tail(q, drq->request);
+	deadline_remove_request(q, rq);
+	elv_dispatch_add_tail(q, rq);
 }
 }
 
 
 /*
 /*
  * move an entry to dispatch queue
  * move an entry to dispatch queue
  */
  */
 static void
 static void
-deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_move_request(struct deadline_data *dd, struct request *rq)
 {
 {
-	const int data_dir = rq_data_dir(drq->request);
-	struct rb_node *rbnext = rb_next(&drq->rb_node);
+	const int data_dir = rq_data_dir(rq);
+	struct rb_node *rbnext = rb_next(&rq->rb_node);
 
 
-	dd->next_drq[READ] = NULL;
-	dd->next_drq[WRITE] = NULL;
+	dd->next_rq[READ] = NULL;
+	dd->next_rq[WRITE] = NULL;
 
 
 	if (rbnext)
 	if (rbnext)
-		dd->next_drq[data_dir] = rb_entry_drq(rbnext);
+		dd->next_rq[data_dir] = rb_entry_rq(rbnext);
 	
 	
-	dd->last_sector = drq->request->sector + drq->request->nr_sectors;
+	dd->last_sector = rq->sector + rq->nr_sectors;
 
 
 	/*
 	/*
 	 * take it off the sort and fifo list, move
 	 * take it off the sort and fifo list, move
 	 * to dispatch queue
 	 * to dispatch queue
 	 */
 	 */
-	deadline_move_to_dispatch(dd, drq);
+	deadline_move_to_dispatch(dd, rq);
 }
 }
 
 
-#define list_entry_fifo(ptr)	list_entry((ptr), struct deadline_rq, fifo)
-
 /*
 /*
  * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
  * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
  */
  */
 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
 {
 {
-	struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next);
+	struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
 
 
 	/*
 	/*
-	 * drq is expired!
+	 * rq is expired!
 	 */
 	 */
-	if (time_after(jiffies, drq->expires))
+	if (time_after(jiffies, rq_fifo_time(rq)))
 		return 1;
 		return 1;
 
 
 	return 0;
 	return 0;
@@ -463,21 +241,21 @@ static int deadline_dispatch_requests(request_queue_t *q, int force)
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_data *dd = q->elevator->elevator_data;
 	const int reads = !list_empty(&dd->fifo_list[READ]);
 	const int reads = !list_empty(&dd->fifo_list[READ]);
 	const int writes = !list_empty(&dd->fifo_list[WRITE]);
 	const int writes = !list_empty(&dd->fifo_list[WRITE]);
-	struct deadline_rq *drq;
+	struct request *rq;
 	int data_dir;
 	int data_dir;
 
 
 	/*
 	/*
 	 * batches are currently reads XOR writes
 	 * batches are currently reads XOR writes
 	 */
 	 */
-	if (dd->next_drq[WRITE])
-		drq = dd->next_drq[WRITE];
+	if (dd->next_rq[WRITE])
+		rq = dd->next_rq[WRITE];
 	else
 	else
-		drq = dd->next_drq[READ];
+		rq = dd->next_rq[READ];
 
 
-	if (drq) {
+	if (rq) {
 		/* we have a "next request" */
 		/* we have a "next request" */
 		
 		
-		if (dd->last_sector != drq->request->sector)
+		if (dd->last_sector != rq->sector)
 			/* end the batch on a non sequential request */
 			/* end the batch on a non sequential request */
 			dd->batching += dd->fifo_batch;
 			dd->batching += dd->fifo_batch;
 		
 		
@@ -526,30 +304,33 @@ dispatch_find_request:
 	if (deadline_check_fifo(dd, data_dir)) {
 	if (deadline_check_fifo(dd, data_dir)) {
 		/* An expired request exists - satisfy it */
 		/* An expired request exists - satisfy it */
 		dd->batching = 0;
 		dd->batching = 0;
-		drq = list_entry_fifo(dd->fifo_list[data_dir].next);
+		rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
 		
 		
-	} else if (dd->next_drq[data_dir]) {
+	} else if (dd->next_rq[data_dir]) {
 		/*
 		/*
 		 * The last req was the same dir and we have a next request in
 		 * The last req was the same dir and we have a next request in
 		 * sort order. No expired requests so continue on from here.
 		 * sort order. No expired requests so continue on from here.
 		 */
 		 */
-		drq = dd->next_drq[data_dir];
+		rq = dd->next_rq[data_dir];
 	} else {
 	} else {
+		struct rb_node *node;
 		/*
 		/*
 		 * The last req was the other direction or we have run out of
 		 * The last req was the other direction or we have run out of
 		 * higher-sectored requests. Go back to the lowest sectored
 		 * higher-sectored requests. Go back to the lowest sectored
 		 * request (1 way elevator) and start a new batch.
 		 * request (1 way elevator) and start a new batch.
 		 */
 		 */
 		dd->batching = 0;
 		dd->batching = 0;
-		drq = deadline_find_first_drq(dd, data_dir);
+		node = rb_first(&dd->sort_list[data_dir]);
+		if (node)
+			rq = rb_entry_rq(node);
 	}
 	}
 
 
 dispatch_request:
 dispatch_request:
 	/*
 	/*
-	 * drq is the selected appropriate request.
+	 * rq is the selected appropriate request.
 	 */
 	 */
 	dd->batching++;
 	dd->batching++;
-	deadline_move_request(dd, drq);
+	deadline_move_request(dd, rq);
 
 
 	return 1;
 	return 1;
 }
 }
@@ -562,30 +343,6 @@ static int deadline_queue_empty(request_queue_t *q)
 		&& list_empty(&dd->fifo_list[READ]);
 		&& list_empty(&dd->fifo_list[READ]);
 }
 }
 
 
-static struct request *
-deadline_former_request(request_queue_t *q, struct request *rq)
-{
-	struct deadline_rq *drq = RQ_DATA(rq);
-	struct rb_node *rbprev = rb_prev(&drq->rb_node);
-
-	if (rbprev)
-		return rb_entry_drq(rbprev)->request;
-
-	return NULL;
-}
-
-static struct request *
-deadline_latter_request(request_queue_t *q, struct request *rq)
-{
-	struct deadline_rq *drq = RQ_DATA(rq);
-	struct rb_node *rbnext = rb_next(&drq->rb_node);
-
-	if (rbnext)
-		return rb_entry_drq(rbnext)->request;
-
-	return NULL;
-}
-
 static void deadline_exit_queue(elevator_t *e)
 static void deadline_exit_queue(elevator_t *e)
 {
 {
 	struct deadline_data *dd = e->elevator_data;
 	struct deadline_data *dd = e->elevator_data;
@@ -593,46 +350,21 @@ static void deadline_exit_queue(elevator_t *e)
 	BUG_ON(!list_empty(&dd->fifo_list[READ]));
 	BUG_ON(!list_empty(&dd->fifo_list[READ]));
 	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
 	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
 
 
-	mempool_destroy(dd->drq_pool);
-	kfree(dd->hash);
 	kfree(dd);
 	kfree(dd);
 }
 }
 
 
 /*
 /*
- * initialize elevator private data (deadline_data), and alloc a drq for
- * each request on the free lists
+ * initialize elevator private data (deadline_data).
  */
  */
 static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
 static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
 {
 {
 	struct deadline_data *dd;
 	struct deadline_data *dd;
-	int i;
-
-	if (!drq_pool)
-		return NULL;
 
 
 	dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
 	dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
 	if (!dd)
 	if (!dd)
 		return NULL;
 		return NULL;
 	memset(dd, 0, sizeof(*dd));
 	memset(dd, 0, sizeof(*dd));
 
 
-	dd->hash = kmalloc_node(sizeof(struct hlist_head)*DL_HASH_ENTRIES,
-				GFP_KERNEL, q->node);
-	if (!dd->hash) {
-		kfree(dd);
-		return NULL;
-	}
-
-	dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
-					mempool_free_slab, drq_pool, q->node);
-	if (!dd->drq_pool) {
-		kfree(dd->hash);
-		kfree(dd);
-		return NULL;
-	}
-
-	for (i = 0; i < DL_HASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&dd->hash[i]);
-
 	INIT_LIST_HEAD(&dd->fifo_list[READ]);
 	INIT_LIST_HEAD(&dd->fifo_list[READ]);
 	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
 	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
 	dd->sort_list[READ] = RB_ROOT;
 	dd->sort_list[READ] = RB_ROOT;
@@ -645,39 +377,6 @@ static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
 	return dd;
 	return dd;
 }
 }
 
 
-static void deadline_put_request(request_queue_t *q, struct request *rq)
-{
-	struct deadline_data *dd = q->elevator->elevator_data;
-	struct deadline_rq *drq = RQ_DATA(rq);
-
-	mempool_free(drq, dd->drq_pool);
-	rq->elevator_private = NULL;
-}
-
-static int
-deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		     gfp_t gfp_mask)
-{
-	struct deadline_data *dd = q->elevator->elevator_data;
-	struct deadline_rq *drq;
-
-	drq = mempool_alloc(dd->drq_pool, gfp_mask);
-	if (drq) {
-		memset(drq, 0, sizeof(*drq));
-		RB_CLEAR_NODE(&drq->rb_node);
-		drq->request = rq;
-
-		INIT_HLIST_NODE(&drq->hash);
-
-		INIT_LIST_HEAD(&drq->fifo);
-
-		rq->elevator_private = drq;
-		return 0;
-	}
-
-	return 1;
-}
-
 /*
 /*
  * sysfs parts below
  * sysfs parts below
  */
  */
@@ -757,10 +456,8 @@ static struct elevator_type iosched_deadline = {
 		.elevator_dispatch_fn =		deadline_dispatch_requests,
 		.elevator_dispatch_fn =		deadline_dispatch_requests,
 		.elevator_add_req_fn =		deadline_add_request,
 		.elevator_add_req_fn =		deadline_add_request,
 		.elevator_queue_empty_fn =	deadline_queue_empty,
 		.elevator_queue_empty_fn =	deadline_queue_empty,
-		.elevator_former_req_fn =	deadline_former_request,
-		.elevator_latter_req_fn =	deadline_latter_request,
-		.elevator_set_req_fn =		deadline_set_request,
-		.elevator_put_req_fn = 		deadline_put_request,
+		.elevator_former_req_fn =	elv_rb_former_request,
+		.elevator_latter_req_fn =	elv_rb_latter_request,
 		.elevator_init_fn =		deadline_init_queue,
 		.elevator_init_fn =		deadline_init_queue,
 		.elevator_exit_fn =		deadline_exit_queue,
 		.elevator_exit_fn =		deadline_exit_queue,
 	},
 	},
@@ -772,24 +469,11 @@ static struct elevator_type iosched_deadline = {
 
 
 static int __init deadline_init(void)
 static int __init deadline_init(void)
 {
 {
-	int ret;
-
-	drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
-				     0, 0, NULL, NULL);
-
-	if (!drq_pool)
-		return -ENOMEM;
-
-	ret = elv_register(&iosched_deadline);
-	if (ret)
-		kmem_cache_destroy(drq_pool);
-
-	return ret;
+	return elv_register(&iosched_deadline);
 }
 }
 
 
 static void __exit deadline_exit(void)
 static void __exit deadline_exit(void)
 {
 {
-	kmem_cache_destroy(drq_pool);
 	elv_unregister(&iosched_deadline);
 	elv_unregister(&iosched_deadline);
 }
 }
 
 

+ 263 - 52
block/elevator.c

@@ -3,7 +3,7 @@
  *
  *
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  *
  *
- * 30042000 Jens Axboe <axboe@suse.de> :
+ * 30042000 Jens Axboe <axboe@kernel.dk> :
  *
  *
  * Split the elevator a bit so that it is possible to choose a different
  * Split the elevator a bit so that it is possible to choose a different
  * one or even write a new "plug in". There are three pieces:
  * one or even write a new "plug in". There are three pieces:
@@ -33,12 +33,23 @@
 #include <linux/compiler.h>
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
 #include <linux/blktrace_api.h>
+#include <linux/hash.h>
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 static LIST_HEAD(elv_list);
 
 
+/*
+ * Merge hash stuff.
+ */
+static const int elv_hash_shift = 6;
+#define ELV_HASH_BLOCK(sec)	((sec) >> 3)
+#define ELV_HASH_FN(sec)	(hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
+#define ELV_HASH_ENTRIES	(1 << elv_hash_shift)
+#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
+#define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
+
 /*
 /*
  * can we safely merge with this request?
  * can we safely merge with this request?
  */
  */
@@ -56,8 +67,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
 	/*
 	/*
 	 * same device and no special stuff set, merge is ok
 	 * same device and no special stuff set, merge is ok
 	 */
 	 */
-	if (rq->rq_disk == bio->bi_bdev->bd_disk &&
-	    !rq->waiting && !rq->special)
+	if (rq->rq_disk == bio->bi_bdev->bd_disk && !rq->special)
 		return 1;
 		return 1;
 
 
 	return 0;
 	return 0;
@@ -151,27 +161,44 @@ __setup("elevator=", elevator_setup);
 
 
 static struct kobj_type elv_ktype;
 static struct kobj_type elv_ktype;
 
 
-static elevator_t *elevator_alloc(struct elevator_type *e)
-{
-	elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
-	if (eq) {
-		memset(eq, 0, sizeof(*eq));
-		eq->ops = &e->ops;
-		eq->elevator_type = e;
-		kobject_init(&eq->kobj);
-		snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
-		eq->kobj.ktype = &elv_ktype;
-		mutex_init(&eq->sysfs_lock);
-	} else {
-		elevator_put(e);
-	}
+static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e)
+{
+	elevator_t *eq;
+	int i;
+
+	eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node);
+	if (unlikely(!eq))
+		goto err;
+
+	memset(eq, 0, sizeof(*eq));
+	eq->ops = &e->ops;
+	eq->elevator_type = e;
+	kobject_init(&eq->kobj);
+	snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
+	eq->kobj.ktype = &elv_ktype;
+	mutex_init(&eq->sysfs_lock);
+
+	eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
+					GFP_KERNEL, q->node);
+	if (!eq->hash)
+		goto err;
+
+	for (i = 0; i < ELV_HASH_ENTRIES; i++)
+		INIT_HLIST_HEAD(&eq->hash[i]);
+
 	return eq;
 	return eq;
+err:
+	kfree(eq);
+	elevator_put(e);
+	return NULL;
 }
 }
 
 
 static void elevator_release(struct kobject *kobj)
 static void elevator_release(struct kobject *kobj)
 {
 {
 	elevator_t *e = container_of(kobj, elevator_t, kobj);
 	elevator_t *e = container_of(kobj, elevator_t, kobj);
+
 	elevator_put(e->elevator_type);
 	elevator_put(e->elevator_type);
+	kfree(e->hash);
 	kfree(e);
 	kfree(e);
 }
 }
 
 
@@ -198,7 +225,7 @@ int elevator_init(request_queue_t *q, char *name)
 		e = elevator_get("noop");
 		e = elevator_get("noop");
 	}
 	}
 
 
-	eq = elevator_alloc(e);
+	eq = elevator_alloc(q, e);
 	if (!eq)
 	if (!eq)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
@@ -212,6 +239,8 @@ int elevator_init(request_queue_t *q, char *name)
 	return ret;
 	return ret;
 }
 }
 
 
+EXPORT_SYMBOL(elevator_init);
+
 void elevator_exit(elevator_t *e)
 void elevator_exit(elevator_t *e)
 {
 {
 	mutex_lock(&e->sysfs_lock);
 	mutex_lock(&e->sysfs_lock);
@@ -223,10 +252,118 @@ void elevator_exit(elevator_t *e)
 	kobject_put(&e->kobj);
 	kobject_put(&e->kobj);
 }
 }
 
 
+EXPORT_SYMBOL(elevator_exit);
+
+static inline void __elv_rqhash_del(struct request *rq)
+{
+	hlist_del_init(&rq->hash);
+}
+
+static void elv_rqhash_del(request_queue_t *q, struct request *rq)
+{
+	if (ELV_ON_HASH(rq))
+		__elv_rqhash_del(rq);
+}
+
+static void elv_rqhash_add(request_queue_t *q, struct request *rq)
+{
+	elevator_t *e = q->elevator;
+
+	BUG_ON(ELV_ON_HASH(rq));
+	hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
+}
+
+static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
+{
+	__elv_rqhash_del(rq);
+	elv_rqhash_add(q, rq);
+}
+
+static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
+{
+	elevator_t *e = q->elevator;
+	struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
+	struct hlist_node *entry, *next;
+	struct request *rq;
+
+	hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
+		BUG_ON(!ELV_ON_HASH(rq));
+
+		if (unlikely(!rq_mergeable(rq))) {
+			__elv_rqhash_del(rq);
+			continue;
+		}
+
+		if (rq_hash_key(rq) == offset)
+			return rq;
+	}
+
+	return NULL;
+}
+
+/*
+ * RB-tree support functions for inserting/lookup/removal of requests
+ * in a sorted RB tree.
+ */
+struct request *elv_rb_add(struct rb_root *root, struct request *rq)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct request *__rq;
+
+	while (*p) {
+		parent = *p;
+		__rq = rb_entry(parent, struct request, rb_node);
+
+		if (rq->sector < __rq->sector)
+			p = &(*p)->rb_left;
+		else if (rq->sector > __rq->sector)
+			p = &(*p)->rb_right;
+		else
+			return __rq;
+	}
+
+	rb_link_node(&rq->rb_node, parent, p);
+	rb_insert_color(&rq->rb_node, root);
+	return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_add);
+
+void elv_rb_del(struct rb_root *root, struct request *rq)
+{
+	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
+	rb_erase(&rq->rb_node, root);
+	RB_CLEAR_NODE(&rq->rb_node);
+}
+
+EXPORT_SYMBOL(elv_rb_del);
+
+struct request *elv_rb_find(struct rb_root *root, sector_t sector)
+{
+	struct rb_node *n = root->rb_node;
+	struct request *rq;
+
+	while (n) {
+		rq = rb_entry(n, struct request, rb_node);
+
+		if (sector < rq->sector)
+			n = n->rb_left;
+		else if (sector > rq->sector)
+			n = n->rb_right;
+		else
+			return rq;
+	}
+
+	return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_find);
+
 /*
 /*
  * Insert rq into dispatch queue of q.  Queue lock must be held on
  * Insert rq into dispatch queue of q.  Queue lock must be held on
- * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
- * appended to the dispatch queue.  To be used by specific elevators.
+ * entry.  rq is sort insted into the dispatch queue. To be used by
+ * specific elevators.
  */
  */
 void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 {
 {
@@ -235,6 +372,9 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 
 
 	if (q->last_merge == rq)
 	if (q->last_merge == rq)
 		q->last_merge = NULL;
 		q->last_merge = NULL;
+
+	elv_rqhash_del(q, rq);
+
 	q->nr_sorted--;
 	q->nr_sorted--;
 
 
 	boundary = q->end_sector;
 	boundary = q->end_sector;
@@ -242,7 +382,7 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 	list_for_each_prev(entry, &q->queue_head) {
 	list_for_each_prev(entry, &q->queue_head) {
 		struct request *pos = list_entry_rq(entry);
 		struct request *pos = list_entry_rq(entry);
 
 
-		if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
+		if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
 			break;
 			break;
 		if (rq->sector >= boundary) {
 		if (rq->sector >= boundary) {
 			if (pos->sector < boundary)
 			if (pos->sector < boundary)
@@ -258,11 +398,38 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 	list_add(&rq->queuelist, entry);
 	list_add(&rq->queuelist, entry);
 }
 }
 
 
+EXPORT_SYMBOL(elv_dispatch_sort);
+
+/*
+ * Insert rq into dispatch queue of q.  Queue lock must be held on
+ * entry.  rq is added to the back of the dispatch queue. To be used by
+ * specific elevators.
+ */
+void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
+{
+	if (q->last_merge == rq)
+		q->last_merge = NULL;
+
+	elv_rqhash_del(q, rq);
+
+	q->nr_sorted--;
+
+	q->end_sector = rq_end_sector(rq);
+	q->boundary_rq = rq;
+	list_add_tail(&rq->queuelist, &q->queue_head);
+}
+
+EXPORT_SYMBOL(elv_dispatch_add_tail);
+
 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 {
 	elevator_t *e = q->elevator;
 	elevator_t *e = q->elevator;
+	struct request *__rq;
 	int ret;
 	int ret;
 
 
+	/*
+	 * First try one-hit cache.
+	 */
 	if (q->last_merge) {
 	if (q->last_merge) {
 		ret = elv_try_merge(q->last_merge, bio);
 		ret = elv_try_merge(q->last_merge, bio);
 		if (ret != ELEVATOR_NO_MERGE) {
 		if (ret != ELEVATOR_NO_MERGE) {
@@ -271,18 +438,30 @@ int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 		}
 		}
 	}
 	}
 
 
+	/*
+	 * See if our hash lookup can find a potential backmerge.
+	 */
+	__rq = elv_rqhash_find(q, bio->bi_sector);
+	if (__rq && elv_rq_merge_ok(__rq, bio)) {
+		*req = __rq;
+		return ELEVATOR_BACK_MERGE;
+	}
+
 	if (e->ops->elevator_merge_fn)
 	if (e->ops->elevator_merge_fn)
 		return e->ops->elevator_merge_fn(q, req, bio);
 		return e->ops->elevator_merge_fn(q, req, bio);
 
 
 	return ELEVATOR_NO_MERGE;
 	return ELEVATOR_NO_MERGE;
 }
 }
 
 
-void elv_merged_request(request_queue_t *q, struct request *rq)
+void elv_merged_request(request_queue_t *q, struct request *rq, int type)
 {
 {
 	elevator_t *e = q->elevator;
 	elevator_t *e = q->elevator;
 
 
 	if (e->ops->elevator_merged_fn)
 	if (e->ops->elevator_merged_fn)
-		e->ops->elevator_merged_fn(q, rq);
+		e->ops->elevator_merged_fn(q, rq, type);
+
+	if (type == ELEVATOR_BACK_MERGE)
+		elv_rqhash_reposition(q, rq);
 
 
 	q->last_merge = rq;
 	q->last_merge = rq;
 }
 }
@@ -294,8 +473,11 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
 
 
 	if (e->ops->elevator_merge_req_fn)
 	if (e->ops->elevator_merge_req_fn)
 		e->ops->elevator_merge_req_fn(q, rq, next);
 		e->ops->elevator_merge_req_fn(q, rq, next);
-	q->nr_sorted--;
 
 
+	elv_rqhash_reposition(q, rq);
+	elv_rqhash_del(q, next);
+
+	q->nr_sorted--;
 	q->last_merge = rq;
 	q->last_merge = rq;
 }
 }
 
 
@@ -313,7 +495,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
 			e->ops->elevator_deactivate_req_fn(q, rq);
 			e->ops->elevator_deactivate_req_fn(q, rq);
 	}
 	}
 
 
-	rq->flags &= ~REQ_STARTED;
+	rq->cmd_flags &= ~REQ_STARTED;
 
 
 	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 }
 }
@@ -344,13 +526,13 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 
 
 	switch (where) {
 	switch (where) {
 	case ELEVATOR_INSERT_FRONT:
 	case ELEVATOR_INSERT_FRONT:
-		rq->flags |= REQ_SOFTBARRIER;
+		rq->cmd_flags |= REQ_SOFTBARRIER;
 
 
 		list_add(&rq->queuelist, &q->queue_head);
 		list_add(&rq->queuelist, &q->queue_head);
 		break;
 		break;
 
 
 	case ELEVATOR_INSERT_BACK:
 	case ELEVATOR_INSERT_BACK:
-		rq->flags |= REQ_SOFTBARRIER;
+		rq->cmd_flags |= REQ_SOFTBARRIER;
 		elv_drain_elevator(q);
 		elv_drain_elevator(q);
 		list_add_tail(&rq->queuelist, &q->queue_head);
 		list_add_tail(&rq->queuelist, &q->queue_head);
 		/*
 		/*
@@ -369,10 +551,14 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 
 
 	case ELEVATOR_INSERT_SORT:
 	case ELEVATOR_INSERT_SORT:
 		BUG_ON(!blk_fs_request(rq));
 		BUG_ON(!blk_fs_request(rq));
-		rq->flags |= REQ_SORTED;
+		rq->cmd_flags |= REQ_SORTED;
 		q->nr_sorted++;
 		q->nr_sorted++;
-		if (q->last_merge == NULL && rq_mergeable(rq))
-			q->last_merge = rq;
+		if (rq_mergeable(rq)) {
+			elv_rqhash_add(q, rq);
+			if (!q->last_merge)
+				q->last_merge = rq;
+		}
+
 		/*
 		/*
 		 * Some ioscheds (cfq) run q->request_fn directly, so
 		 * Some ioscheds (cfq) run q->request_fn directly, so
 		 * rq cannot be accessed after calling
 		 * rq cannot be accessed after calling
@@ -387,7 +573,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 		 * insertion; otherwise, requests should be requeued
 		 * insertion; otherwise, requests should be requeued
 		 * in ordseq order.
 		 * in ordseq order.
 		 */
 		 */
-		rq->flags |= REQ_SOFTBARRIER;
+		rq->cmd_flags |= REQ_SOFTBARRIER;
 
 
 		if (q->ordseq == 0) {
 		if (q->ordseq == 0) {
 			list_add(&rq->queuelist, &q->queue_head);
 			list_add(&rq->queuelist, &q->queue_head);
@@ -429,9 +615,9 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 		       int plug)
 		       int plug)
 {
 {
 	if (q->ordcolor)
 	if (q->ordcolor)
-		rq->flags |= REQ_ORDERED_COLOR;
+		rq->cmd_flags |= REQ_ORDERED_COLOR;
 
 
-	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
+	if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
 		/*
 		/*
 		 * toggle ordered color
 		 * toggle ordered color
 		 */
 		 */
@@ -452,7 +638,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 			q->end_sector = rq_end_sector(rq);
 			q->end_sector = rq_end_sector(rq);
 			q->boundary_rq = rq;
 			q->boundary_rq = rq;
 		}
 		}
-	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
+	} else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
 		where = ELEVATOR_INSERT_BACK;
 		where = ELEVATOR_INSERT_BACK;
 
 
 	if (plug)
 	if (plug)
@@ -461,6 +647,8 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 	elv_insert(q, rq, where);
 	elv_insert(q, rq, where);
 }
 }
 
 
+EXPORT_SYMBOL(__elv_add_request);
+
 void elv_add_request(request_queue_t *q, struct request *rq, int where,
 void elv_add_request(request_queue_t *q, struct request *rq, int where,
 		     int plug)
 		     int plug)
 {
 {
@@ -471,6 +659,8 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 }
 
 
+EXPORT_SYMBOL(elv_add_request);
+
 static inline struct request *__elv_next_request(request_queue_t *q)
 static inline struct request *__elv_next_request(request_queue_t *q)
 {
 {
 	struct request *rq;
 	struct request *rq;
@@ -493,7 +683,7 @@ struct request *elv_next_request(request_queue_t *q)
 	int ret;
 	int ret;
 
 
 	while ((rq = __elv_next_request(q)) != NULL) {
 	while ((rq = __elv_next_request(q)) != NULL) {
-		if (!(rq->flags & REQ_STARTED)) {
+		if (!(rq->cmd_flags & REQ_STARTED)) {
 			elevator_t *e = q->elevator;
 			elevator_t *e = q->elevator;
 
 
 			/*
 			/*
@@ -510,7 +700,7 @@ struct request *elv_next_request(request_queue_t *q)
 			 * it, a request that has been delayed should
 			 * it, a request that has been delayed should
 			 * not be passed by new incoming requests
 			 * not be passed by new incoming requests
 			 */
 			 */
-			rq->flags |= REQ_STARTED;
+			rq->cmd_flags |= REQ_STARTED;
 			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
 			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
 		}
 		}
 
 
@@ -519,7 +709,7 @@ struct request *elv_next_request(request_queue_t *q)
 			q->boundary_rq = NULL;
 			q->boundary_rq = NULL;
 		}
 		}
 
 
-		if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
+		if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
 			break;
 			break;
 
 
 		ret = q->prep_rq_fn(q, rq);
 		ret = q->prep_rq_fn(q, rq);
@@ -541,7 +731,7 @@ struct request *elv_next_request(request_queue_t *q)
 				nr_bytes = rq->data_len;
 				nr_bytes = rq->data_len;
 
 
 			blkdev_dequeue_request(rq);
 			blkdev_dequeue_request(rq);
-			rq->flags |= REQ_QUIET;
+			rq->cmd_flags |= REQ_QUIET;
 			end_that_request_chunk(rq, 0, nr_bytes);
 			end_that_request_chunk(rq, 0, nr_bytes);
 			end_that_request_last(rq, 0);
 			end_that_request_last(rq, 0);
 		} else {
 		} else {
@@ -554,9 +744,12 @@ struct request *elv_next_request(request_queue_t *q)
 	return rq;
 	return rq;
 }
 }
 
 
+EXPORT_SYMBOL(elv_next_request);
+
 void elv_dequeue_request(request_queue_t *q, struct request *rq)
 void elv_dequeue_request(request_queue_t *q, struct request *rq)
 {
 {
 	BUG_ON(list_empty(&rq->queuelist));
 	BUG_ON(list_empty(&rq->queuelist));
+	BUG_ON(ELV_ON_HASH(rq));
 
 
 	list_del_init(&rq->queuelist);
 	list_del_init(&rq->queuelist);
 
 
@@ -569,6 +762,8 @@ void elv_dequeue_request(request_queue_t *q, struct request *rq)
 		q->in_flight++;
 		q->in_flight++;
 }
 }
 
 
+EXPORT_SYMBOL(elv_dequeue_request);
+
 int elv_queue_empty(request_queue_t *q)
 int elv_queue_empty(request_queue_t *q)
 {
 {
 	elevator_t *e = q->elevator;
 	elevator_t *e = q->elevator;
@@ -582,6 +777,8 @@ int elv_queue_empty(request_queue_t *q)
 	return 1;
 	return 1;
 }
 }
 
 
+EXPORT_SYMBOL(elv_queue_empty);
+
 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
 {
 {
 	elevator_t *e = q->elevator;
 	elevator_t *e = q->elevator;
@@ -600,13 +797,12 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
 	return NULL;
 	return NULL;
 }
 }
 
 
-int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		    gfp_t gfp_mask)
+int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
 {
 {
 	elevator_t *e = q->elevator;
 	elevator_t *e = q->elevator;
 
 
 	if (e->ops->elevator_set_req_fn)
 	if (e->ops->elevator_set_req_fn)
-		return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
+		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
 
 
 	rq->elevator_private = NULL;
 	rq->elevator_private = NULL;
 	return 0;
 	return 0;
@@ -620,12 +816,12 @@ void elv_put_request(request_queue_t *q, struct request *rq)
 		e->ops->elevator_put_req_fn(q, rq);
 		e->ops->elevator_put_req_fn(q, rq);
 }
 }
 
 
-int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
+int elv_may_queue(request_queue_t *q, int rw)
 {
 {
 	elevator_t *e = q->elevator;
 	elevator_t *e = q->elevator;
 
 
 	if (e->ops->elevator_may_queue_fn)
 	if (e->ops->elevator_may_queue_fn)
-		return e->ops->elevator_may_queue_fn(q, rw, bio);
+		return e->ops->elevator_may_queue_fn(q, rw);
 
 
 	return ELV_MQUEUE_MAY;
 	return ELV_MQUEUE_MAY;
 }
 }
@@ -792,7 +988,7 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 	/*
 	/*
 	 * Allocate new elevator
 	 * Allocate new elevator
 	 */
 	 */
-	e = elevator_alloc(new_e);
+	e = elevator_alloc(q, new_e);
 	if (!e)
 	if (!e)
 		return 0;
 		return 0;
 
 
@@ -908,11 +1104,26 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
 	return len;
 	return len;
 }
 }
 
 
-EXPORT_SYMBOL(elv_dispatch_sort);
-EXPORT_SYMBOL(elv_add_request);
-EXPORT_SYMBOL(__elv_add_request);
-EXPORT_SYMBOL(elv_next_request);
-EXPORT_SYMBOL(elv_dequeue_request);
-EXPORT_SYMBOL(elv_queue_empty);
-EXPORT_SYMBOL(elevator_exit);
-EXPORT_SYMBOL(elevator_init);
+struct request *elv_rb_former_request(request_queue_t *q, struct request *rq)
+{
+	struct rb_node *rbprev = rb_prev(&rq->rb_node);
+
+	if (rbprev)
+		return rb_entry_rq(rbprev);
+
+	return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_former_request);
+
+struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq)
+{
+	struct rb_node *rbnext = rb_next(&rq->rb_node);
+
+	if (rbnext)
+		return rb_entry_rq(rbnext);
+
+	return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_latter_request);

+ 103 - 133
block/ll_rw_blk.c

@@ -39,6 +39,7 @@ static void blk_unplug_timeout(unsigned long data);
 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
 static void init_request_from_bio(struct request *req, struct bio *bio);
 static void init_request_from_bio(struct request *req, struct bio *bio);
 static int __make_request(request_queue_t *q, struct bio *bio);
 static int __make_request(request_queue_t *q, struct bio *bio);
+static struct io_context *current_io_context(gfp_t gfp_flags, int node);
 
 
 /*
 /*
  * For the allocated request tables
  * For the allocated request tables
@@ -277,19 +278,19 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 
 
 EXPORT_SYMBOL(blk_queue_make_request);
 EXPORT_SYMBOL(blk_queue_make_request);
 
 
-static inline void rq_init(request_queue_t *q, struct request *rq)
+static void rq_init(request_queue_t *q, struct request *rq)
 {
 {
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->donelist);
 	INIT_LIST_HEAD(&rq->donelist);
 
 
 	rq->errors = 0;
 	rq->errors = 0;
-	rq->rq_status = RQ_ACTIVE;
 	rq->bio = rq->biotail = NULL;
 	rq->bio = rq->biotail = NULL;
+	INIT_HLIST_NODE(&rq->hash);
+	RB_CLEAR_NODE(&rq->rb_node);
 	rq->ioprio = 0;
 	rq->ioprio = 0;
 	rq->buffer = NULL;
 	rq->buffer = NULL;
 	rq->ref_count = 1;
 	rq->ref_count = 1;
 	rq->q = q;
 	rq->q = q;
-	rq->waiting = NULL;
 	rq->special = NULL;
 	rq->special = NULL;
 	rq->data_len = 0;
 	rq->data_len = 0;
 	rq->data = NULL;
 	rq->data = NULL;
@@ -382,8 +383,8 @@ unsigned blk_ordered_req_seq(struct request *rq)
 	if (rq == &q->post_flush_rq)
 	if (rq == &q->post_flush_rq)
 		return QUEUE_ORDSEQ_POSTFLUSH;
 		return QUEUE_ORDSEQ_POSTFLUSH;
 
 
-	if ((rq->flags & REQ_ORDERED_COLOR) ==
-	    (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
+	if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
+	    (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
 		return QUEUE_ORDSEQ_DRAIN;
 		return QUEUE_ORDSEQ_DRAIN;
 	else
 	else
 		return QUEUE_ORDSEQ_DONE;
 		return QUEUE_ORDSEQ_DONE;
@@ -446,11 +447,11 @@ static void queue_flush(request_queue_t *q, unsigned which)
 		end_io = post_flush_end_io;
 		end_io = post_flush_end_io;
 	}
 	}
 
 
+	rq->cmd_flags = REQ_HARDBARRIER;
 	rq_init(q, rq);
 	rq_init(q, rq);
-	rq->flags = REQ_HARDBARRIER;
 	rq->elevator_private = NULL;
 	rq->elevator_private = NULL;
+	rq->elevator_private2 = NULL;
 	rq->rq_disk = q->bar_rq.rq_disk;
 	rq->rq_disk = q->bar_rq.rq_disk;
-	rq->rl = NULL;
 	rq->end_io = end_io;
 	rq->end_io = end_io;
 	q->prepare_flush_fn(q, rq);
 	q->prepare_flush_fn(q, rq);
 
 
@@ -471,11 +472,13 @@ static inline struct request *start_ordered(request_queue_t *q,
 	blkdev_dequeue_request(rq);
 	blkdev_dequeue_request(rq);
 	q->orig_bar_rq = rq;
 	q->orig_bar_rq = rq;
 	rq = &q->bar_rq;
 	rq = &q->bar_rq;
+	rq->cmd_flags = 0;
 	rq_init(q, rq);
 	rq_init(q, rq);
-	rq->flags = bio_data_dir(q->orig_bar_rq->bio);
-	rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
+	if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
+		rq->cmd_flags |= REQ_RW;
+	rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
 	rq->elevator_private = NULL;
 	rq->elevator_private = NULL;
-	rq->rl = NULL;
+	rq->elevator_private2 = NULL;
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
 	rq->end_io = bar_end_io;
 	rq->end_io = bar_end_io;
 
 
@@ -587,8 +590,8 @@ static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
 	return 0;
 	return 0;
 }
 }
 
 
-static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
-				    unsigned int nbytes, int error)
+static int ordered_bio_endio(struct request *rq, struct bio *bio,
+			     unsigned int nbytes, int error)
 {
 {
 	request_queue_t *q = rq->q;
 	request_queue_t *q = rq->q;
 	bio_end_io_t *endio;
 	bio_end_io_t *endio;
@@ -1124,7 +1127,7 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq)
 	}
 	}
 
 
 	list_del_init(&rq->queuelist);
 	list_del_init(&rq->queuelist);
-	rq->flags &= ~REQ_QUEUED;
+	rq->cmd_flags &= ~REQ_QUEUED;
 	rq->tag = -1;
 	rq->tag = -1;
 
 
 	if (unlikely(bqt->tag_index[tag] == NULL))
 	if (unlikely(bqt->tag_index[tag] == NULL))
@@ -1160,7 +1163,7 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq)
 	struct blk_queue_tag *bqt = q->queue_tags;
 	struct blk_queue_tag *bqt = q->queue_tags;
 	int tag;
 	int tag;
 
 
-	if (unlikely((rq->flags & REQ_QUEUED))) {
+	if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
 		printk(KERN_ERR 
 		printk(KERN_ERR 
 		       "%s: request %p for device [%s] already tagged %d",
 		       "%s: request %p for device [%s] already tagged %d",
 		       __FUNCTION__, rq,
 		       __FUNCTION__, rq,
@@ -1168,13 +1171,18 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq)
 		BUG();
 		BUG();
 	}
 	}
 
 
-	tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
-	if (tag >= bqt->max_depth)
-		return 1;
+	/*
+	 * Protect against shared tag maps, as we may not have exclusive
+	 * access to the tag map.
+	 */
+	do {
+		tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
+		if (tag >= bqt->max_depth)
+			return 1;
 
 
-	__set_bit(tag, bqt->tag_map);
+	} while (test_and_set_bit(tag, bqt->tag_map));
 
 
-	rq->flags |= REQ_QUEUED;
+	rq->cmd_flags |= REQ_QUEUED;
 	rq->tag = tag;
 	rq->tag = tag;
 	bqt->tag_index[tag] = rq;
 	bqt->tag_index[tag] = rq;
 	blkdev_dequeue_request(rq);
 	blkdev_dequeue_request(rq);
@@ -1210,65 +1218,31 @@ void blk_queue_invalidate_tags(request_queue_t *q)
 			printk(KERN_ERR
 			printk(KERN_ERR
 			       "%s: bad tag found on list\n", __FUNCTION__);
 			       "%s: bad tag found on list\n", __FUNCTION__);
 			list_del_init(&rq->queuelist);
 			list_del_init(&rq->queuelist);
-			rq->flags &= ~REQ_QUEUED;
+			rq->cmd_flags &= ~REQ_QUEUED;
 		} else
 		} else
 			blk_queue_end_tag(q, rq);
 			blk_queue_end_tag(q, rq);
 
 
-		rq->flags &= ~REQ_STARTED;
+		rq->cmd_flags &= ~REQ_STARTED;
 		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
 		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
 	}
 	}
 }
 }
 
 
 EXPORT_SYMBOL(blk_queue_invalidate_tags);
 EXPORT_SYMBOL(blk_queue_invalidate_tags);
 
 
-static const char * const rq_flags[] = {
-	"REQ_RW",
-	"REQ_FAILFAST",
-	"REQ_SORTED",
-	"REQ_SOFTBARRIER",
-	"REQ_HARDBARRIER",
-	"REQ_FUA",
-	"REQ_CMD",
-	"REQ_NOMERGE",
-	"REQ_STARTED",
-	"REQ_DONTPREP",
-	"REQ_QUEUED",
-	"REQ_ELVPRIV",
-	"REQ_PC",
-	"REQ_BLOCK_PC",
-	"REQ_SENSE",
-	"REQ_FAILED",
-	"REQ_QUIET",
-	"REQ_SPECIAL",
-	"REQ_DRIVE_CMD",
-	"REQ_DRIVE_TASK",
-	"REQ_DRIVE_TASKFILE",
-	"REQ_PREEMPT",
-	"REQ_PM_SUSPEND",
-	"REQ_PM_RESUME",
-	"REQ_PM_SHUTDOWN",
-	"REQ_ORDERED_COLOR",
-};
-
 void blk_dump_rq_flags(struct request *rq, char *msg)
 void blk_dump_rq_flags(struct request *rq, char *msg)
 {
 {
 	int bit;
 	int bit;
 
 
-	printk("%s: dev %s: flags = ", msg,
-		rq->rq_disk ? rq->rq_disk->disk_name : "?");
-	bit = 0;
-	do {
-		if (rq->flags & (1 << bit))
-			printk("%s ", rq_flags[bit]);
-		bit++;
-	} while (bit < __REQ_NR_BITS);
+	printk("%s: dev %s: type=%x, flags=%x\n", msg,
+		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
+		rq->cmd_flags);
 
 
 	printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
 	printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
 						       rq->nr_sectors,
 						       rq->nr_sectors,
 						       rq->current_nr_sectors);
 						       rq->current_nr_sectors);
 	printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
 	printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
 
 
-	if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {
+	if (blk_pc_request(rq)) {
 		printk("cdb: ");
 		printk("cdb: ");
 		for (bit = 0; bit < sizeof(rq->cmd); bit++)
 		for (bit = 0; bit < sizeof(rq->cmd); bit++)
 			printk("%02x ", rq->cmd[bit]);
 			printk("%02x ", rq->cmd[bit]);
@@ -1441,7 +1415,7 @@ static inline int ll_new_mergeable(request_queue_t *q,
 	int nr_phys_segs = bio_phys_segments(q, bio);
 	int nr_phys_segs = bio_phys_segments(q, bio);
 
 
 	if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
 	if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 			q->last_merge = NULL;
 		return 0;
 		return 0;
@@ -1464,7 +1438,7 @@ static inline int ll_new_hw_segment(request_queue_t *q,
 
 
 	if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
 	if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
 	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
 	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 			q->last_merge = NULL;
 		return 0;
 		return 0;
@@ -1491,7 +1465,7 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
 		max_sectors = q->max_sectors;
 		max_sectors = q->max_sectors;
 
 
 	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
 	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 			q->last_merge = NULL;
 		return 0;
 		return 0;
@@ -1530,7 +1504,7 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req,
 
 
 
 
 	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
 	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 			q->last_merge = NULL;
 		return 0;
 		return 0;
@@ -2029,14 +2003,13 @@ EXPORT_SYMBOL(blk_get_queue);
 
 
 static inline void blk_free_request(request_queue_t *q, struct request *rq)
 static inline void blk_free_request(request_queue_t *q, struct request *rq)
 {
 {
-	if (rq->flags & REQ_ELVPRIV)
+	if (rq->cmd_flags & REQ_ELVPRIV)
 		elv_put_request(q, rq);
 		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
 	mempool_free(rq, q->rq.rq_pool);
 }
 }
 
 
-static inline struct request *
-blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
-		  int priv, gfp_t gfp_mask)
+static struct request *
+blk_alloc_request(request_queue_t *q, int rw, int priv, gfp_t gfp_mask)
 {
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
 
@@ -2044,17 +2017,17 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
 		return NULL;
 		return NULL;
 
 
 	/*
 	/*
-	 * first three bits are identical in rq->flags and bio->bi_rw,
+	 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
 	 * see bio.h and blkdev.h
 	 * see bio.h and blkdev.h
 	 */
 	 */
-	rq->flags = rw;
+	rq->cmd_flags = rw | REQ_ALLOCED;
 
 
 	if (priv) {
 	if (priv) {
-		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
+		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
 			mempool_free(rq, q->rq.rq_pool);
 			mempool_free(rq, q->rq.rq_pool);
 			return NULL;
 			return NULL;
 		}
 		}
-		rq->flags |= REQ_ELVPRIV;
+		rq->cmd_flags |= REQ_ELVPRIV;
 	}
 	}
 
 
 	return rq;
 	return rq;
@@ -2141,13 +2114,13 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
 	struct io_context *ioc = NULL;
 	struct io_context *ioc = NULL;
 	int may_queue, priv;
 	int may_queue, priv;
 
 
-	may_queue = elv_may_queue(q, rw, bio);
+	may_queue = elv_may_queue(q, rw);
 	if (may_queue == ELV_MQUEUE_NO)
 	if (may_queue == ELV_MQUEUE_NO)
 		goto rq_starved;
 		goto rq_starved;
 
 
 	if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
 	if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
 		if (rl->count[rw]+1 >= q->nr_requests) {
 		if (rl->count[rw]+1 >= q->nr_requests) {
-			ioc = current_io_context(GFP_ATOMIC);
+			ioc = current_io_context(GFP_ATOMIC, q->node);
 			/*
 			/*
 			 * The queue will fill after this allocation, so set
 			 * The queue will fill after this allocation, so set
 			 * it as full, and mark this process as "batching".
 			 * it as full, and mark this process as "batching".
@@ -2189,7 +2162,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
 
 
 	spin_unlock_irq(q->queue_lock);
 	spin_unlock_irq(q->queue_lock);
 
 
-	rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
+	rq = blk_alloc_request(q, rw, priv, gfp_mask);
 	if (unlikely(!rq)) {
 	if (unlikely(!rq)) {
 		/*
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
 		 * Allocation failed presumably due to memory. Undo anything
@@ -2225,7 +2198,6 @@ rq_starved:
 		ioc->nr_batch_requests--;
 		ioc->nr_batch_requests--;
 	
 	
 	rq_init(q, rq);
 	rq_init(q, rq);
-	rq->rl = rl;
 
 
 	blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
 	blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
 out:
 out:
@@ -2268,7 +2240,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
 			 * up to a big batch of them for a small period time.
 			 * up to a big batch of them for a small period time.
 			 * See ioc_batching, ioc_set_batching
 			 * See ioc_batching, ioc_set_batching
 			 */
 			 */
-			ioc = current_io_context(GFP_NOIO);
+			ioc = current_io_context(GFP_NOIO, q->node);
 			ioc_set_batching(q, ioc);
 			ioc_set_batching(q, ioc);
 
 
 			spin_lock_irq(q->queue_lock);
 			spin_lock_irq(q->queue_lock);
@@ -2299,6 +2271,25 @@ struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
 }
 }
 EXPORT_SYMBOL(blk_get_request);
 EXPORT_SYMBOL(blk_get_request);
 
 
+/**
+ * blk_start_queueing - initiate dispatch of requests to device
+ * @q:		request queue to kick into gear
+ *
+ * This is basically a helper to remove the need to know whether a queue
+ * is plugged or not if someone just wants to initiate dispatch of requests
+ * for this queue.
+ *
+ * The queue lock must be held with interrupts disabled.
+ */
+void blk_start_queueing(request_queue_t *q)
+{
+	if (!blk_queue_plugged(q))
+		q->request_fn(q);
+	else
+		__generic_unplug_device(q);
+}
+EXPORT_SYMBOL(blk_start_queueing);
+
 /**
 /**
  * blk_requeue_request - put a request back on queue
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
  * @q:		request queue where request should be inserted
@@ -2351,7 +2342,8 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
 	 * must not attempt merges on this) and that it acts as a soft
 	 * must not attempt merges on this) and that it acts as a soft
 	 * barrier
 	 * barrier
 	 */
 	 */
-	rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
+	rq->cmd_flags |= REQ_SOFTBARRIER;
 
 
 	rq->special = data;
 	rq->special = data;
 
 
@@ -2365,11 +2357,7 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
 
 
 	drive_stat_acct(rq, rq->nr_sectors, 1);
 	drive_stat_acct(rq, rq->nr_sectors, 1);
 	__elv_add_request(q, rq, where, 0);
 	__elv_add_request(q, rq, where, 0);
-
-	if (blk_queue_plugged(q))
-		__generic_unplug_device(q);
-	else
-		q->request_fn(q);
+	blk_start_queueing(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 }
 
 
@@ -2558,7 +2546,7 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 
 
 	rq->rq_disk = bd_disk;
 	rq->rq_disk = bd_disk;
-	rq->flags |= REQ_NOMERGE;
+	rq->cmd_flags |= REQ_NOMERGE;
 	rq->end_io = done;
 	rq->end_io = done;
 	WARN_ON(irqs_disabled());
 	WARN_ON(irqs_disabled());
 	spin_lock_irq(q->queue_lock);
 	spin_lock_irq(q->queue_lock);
@@ -2598,10 +2586,9 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
 		rq->sense_len = 0;
 		rq->sense_len = 0;
 	}
 	}
 
 
-	rq->waiting = &wait;
+	rq->end_io_data = &wait;
 	blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
 	blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
 	wait_for_completion(&wait);
 	wait_for_completion(&wait);
-	rq->waiting = NULL;
 
 
 	if (rq->errors)
 	if (rq->errors)
 		err = -EIO;
 		err = -EIO;
@@ -2710,8 +2697,6 @@ EXPORT_SYMBOL_GPL(disk_round_stats);
  */
  */
 void __blk_put_request(request_queue_t *q, struct request *req)
 void __blk_put_request(request_queue_t *q, struct request *req)
 {
 {
-	struct request_list *rl = req->rl;
-
 	if (unlikely(!q))
 	if (unlikely(!q))
 		return;
 		return;
 	if (unlikely(--req->ref_count))
 	if (unlikely(--req->ref_count))
@@ -2719,18 +2704,16 @@ void __blk_put_request(request_queue_t *q, struct request *req)
 
 
 	elv_completed_request(q, req);
 	elv_completed_request(q, req);
 
 
-	req->rq_status = RQ_INACTIVE;
-	req->rl = NULL;
-
 	/*
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
 	 * it didn't come out of our reserved rq pools
 	 */
 	 */
-	if (rl) {
+	if (req->cmd_flags & REQ_ALLOCED) {
 		int rw = rq_data_dir(req);
 		int rw = rq_data_dir(req);
-		int priv = req->flags & REQ_ELVPRIV;
+		int priv = req->cmd_flags & REQ_ELVPRIV;
 
 
 		BUG_ON(!list_empty(&req->queuelist));
 		BUG_ON(!list_empty(&req->queuelist));
+		BUG_ON(!hlist_unhashed(&req->hash));
 
 
 		blk_free_request(q, req);
 		blk_free_request(q, req);
 		freed_request(q, rw, priv);
 		freed_request(q, rw, priv);
@@ -2764,9 +2747,9 @@ EXPORT_SYMBOL(blk_put_request);
  */
  */
 void blk_end_sync_rq(struct request *rq, int error)
 void blk_end_sync_rq(struct request *rq, int error)
 {
 {
-	struct completion *waiting = rq->waiting;
+	struct completion *waiting = rq->end_io_data;
 
 
-	rq->waiting = NULL;
+	rq->end_io_data = NULL;
 	__blk_put_request(rq->q, rq);
 	__blk_put_request(rq->q, rq);
 
 
 	/*
 	/*
@@ -2829,7 +2812,7 @@ static int attempt_merge(request_queue_t *q, struct request *req,
 
 
 	if (rq_data_dir(req) != rq_data_dir(next)
 	if (rq_data_dir(req) != rq_data_dir(next)
 	    || req->rq_disk != next->rq_disk
 	    || req->rq_disk != next->rq_disk
-	    || next->waiting || next->special)
+	    || next->special)
 		return 0;
 		return 0;
 
 
 	/*
 	/*
@@ -2890,22 +2873,24 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
 
 
 static void init_request_from_bio(struct request *req, struct bio *bio)
 static void init_request_from_bio(struct request *req, struct bio *bio)
 {
 {
-	req->flags |= REQ_CMD;
+	req->cmd_type = REQ_TYPE_FS;
 
 
 	/*
 	/*
 	 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
 	 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
 	 */
 	 */
 	if (bio_rw_ahead(bio) || bio_failfast(bio))
 	if (bio_rw_ahead(bio) || bio_failfast(bio))
-		req->flags |= REQ_FAILFAST;
+		req->cmd_flags |= REQ_FAILFAST;
 
 
 	/*
 	/*
 	 * REQ_BARRIER implies no merging, but lets make it explicit
 	 * REQ_BARRIER implies no merging, but lets make it explicit
 	 */
 	 */
 	if (unlikely(bio_barrier(bio)))
 	if (unlikely(bio_barrier(bio)))
-		req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
+		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 
 
 	if (bio_sync(bio))
 	if (bio_sync(bio))
-		req->flags |= REQ_RW_SYNC;
+		req->cmd_flags |= REQ_RW_SYNC;
+	if (bio_rw_meta(bio))
+		req->cmd_flags |= REQ_RW_META;
 
 
 	req->errors = 0;
 	req->errors = 0;
 	req->hard_sector = req->sector = bio->bi_sector;
 	req->hard_sector = req->sector = bio->bi_sector;
@@ -2914,7 +2899,6 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
 	req->nr_phys_segments = bio_phys_segments(req->q, bio);
 	req->nr_phys_segments = bio_phys_segments(req->q, bio);
 	req->nr_hw_segments = bio_hw_segments(req->q, bio);
 	req->nr_hw_segments = bio_hw_segments(req->q, bio);
 	req->buffer = bio_data(bio);	/* see ->buffer comment above */
 	req->buffer = bio_data(bio);	/* see ->buffer comment above */
-	req->waiting = NULL;
 	req->bio = req->biotail = bio;
 	req->bio = req->biotail = bio;
 	req->ioprio = bio_prio(bio);
 	req->ioprio = bio_prio(bio);
 	req->rq_disk = bio->bi_bdev->bd_disk;
 	req->rq_disk = bio->bi_bdev->bd_disk;
@@ -2924,17 +2908,11 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
 static int __make_request(request_queue_t *q, struct bio *bio)
 static int __make_request(request_queue_t *q, struct bio *bio)
 {
 {
 	struct request *req;
 	struct request *req;
-	int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
-	unsigned short prio;
-	sector_t sector;
+	int el_ret, nr_sectors, barrier, err;
+	const unsigned short prio = bio_prio(bio);
+	const int sync = bio_sync(bio);
 
 
-	sector = bio->bi_sector;
 	nr_sectors = bio_sectors(bio);
 	nr_sectors = bio_sectors(bio);
-	cur_nr_sectors = bio_cur_sectors(bio);
-	prio = bio_prio(bio);
-
-	rw = bio_data_dir(bio);
-	sync = bio_sync(bio);
 
 
 	/*
 	/*
 	 * low level driver can indicate that it wants pages above a
 	 * low level driver can indicate that it wants pages above a
@@ -2943,8 +2921,6 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 	 */
 	 */
 	blk_queue_bounce(q, &bio);
 	blk_queue_bounce(q, &bio);
 
 
-	spin_lock_prefetch(q->queue_lock);
-
 	barrier = bio_barrier(bio);
 	barrier = bio_barrier(bio);
 	if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
 	if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
 		err = -EOPNOTSUPP;
 		err = -EOPNOTSUPP;
@@ -2972,7 +2948,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 			req->ioprio = ioprio_best(req->ioprio, prio);
 			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_back_merge(q, req))
 			if (!attempt_back_merge(q, req))
-				elv_merged_request(q, req);
+				elv_merged_request(q, req, el_ret);
 			goto out;
 			goto out;
 
 
 		case ELEVATOR_FRONT_MERGE:
 		case ELEVATOR_FRONT_MERGE:
@@ -2992,14 +2968,14 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 			 * not touch req->buffer either...
 			 * not touch req->buffer either...
 			 */
 			 */
 			req->buffer = bio_data(bio);
 			req->buffer = bio_data(bio);
-			req->current_nr_sectors = cur_nr_sectors;
-			req->hard_cur_sectors = cur_nr_sectors;
-			req->sector = req->hard_sector = sector;
+			req->current_nr_sectors = bio_cur_sectors(bio);
+			req->hard_cur_sectors = req->current_nr_sectors;
+			req->sector = req->hard_sector = bio->bi_sector;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 			req->ioprio = ioprio_best(req->ioprio, prio);
 			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_front_merge(q, req))
 			if (!attempt_front_merge(q, req))
-				elv_merged_request(q, req);
+				elv_merged_request(q, req, el_ret);
 			goto out;
 			goto out;
 
 
 		/* ELV_NO_MERGE: elevator says don't/can't merge. */
 		/* ELV_NO_MERGE: elevator says don't/can't merge. */
@@ -3012,7 +2988,7 @@ get_rq:
 	 * Grab a free request. This is might sleep but can not fail.
 	 * Grab a free request. This is might sleep but can not fail.
 	 * Returns with the queue unlocked.
 	 * Returns with the queue unlocked.
 	 */
 	 */
-	req = get_request_wait(q, rw, bio);
+	req = get_request_wait(q, bio_data_dir(bio), bio);
 
 
 	/*
 	/*
 	 * After dropping the lock and possibly sleeping here, our request
 	 * After dropping the lock and possibly sleeping here, our request
@@ -3306,7 +3282,7 @@ static int __end_that_request_first(struct request *req, int uptodate,
 		req->errors = 0;
 		req->errors = 0;
 
 
 	if (!uptodate) {
 	if (!uptodate) {
-		if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
+		if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
 			printk("end_request: I/O error, dev %s, sector %llu\n",
 			printk("end_request: I/O error, dev %s, sector %llu\n",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
 				(unsigned long long)req->sector);
 				(unsigned long long)req->sector);
@@ -3569,8 +3545,8 @@ EXPORT_SYMBOL(end_request);
 
 
 void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
 void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
 {
 {
-	/* first two bits are identical in rq->flags and bio->bi_rw */
-	rq->flags |= (bio->bi_rw & 3);
+	/* first two bits are identical in rq->cmd_flags and bio->bi_rw */
+	rq->cmd_flags |= (bio->bi_rw & 3);
 
 
 	rq->nr_phys_segments = bio_phys_segments(q, bio);
 	rq->nr_phys_segments = bio_phys_segments(q, bio);
 	rq->nr_hw_segments = bio_hw_segments(q, bio);
 	rq->nr_hw_segments = bio_hw_segments(q, bio);
@@ -3658,25 +3634,22 @@ EXPORT_SYMBOL(put_io_context);
 /* Called by the exitting task */
 /* Called by the exitting task */
 void exit_io_context(void)
 void exit_io_context(void)
 {
 {
-	unsigned long flags;
 	struct io_context *ioc;
 	struct io_context *ioc;
 	struct cfq_io_context *cic;
 	struct cfq_io_context *cic;
 
 
-	local_irq_save(flags);
 	task_lock(current);
 	task_lock(current);
 	ioc = current->io_context;
 	ioc = current->io_context;
 	current->io_context = NULL;
 	current->io_context = NULL;
-	ioc->task = NULL;
 	task_unlock(current);
 	task_unlock(current);
-	local_irq_restore(flags);
 
 
+	ioc->task = NULL;
 	if (ioc->aic && ioc->aic->exit)
 	if (ioc->aic && ioc->aic->exit)
 		ioc->aic->exit(ioc->aic);
 		ioc->aic->exit(ioc->aic);
 	if (ioc->cic_root.rb_node != NULL) {
 	if (ioc->cic_root.rb_node != NULL) {
 		cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
 		cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
 		cic->exit(ioc);
 		cic->exit(ioc);
 	}
 	}
- 
+
 	put_io_context(ioc);
 	put_io_context(ioc);
 }
 }
 
 
@@ -3688,7 +3661,7 @@ void exit_io_context(void)
  * but since the current task itself holds a reference, the context can be
  * but since the current task itself holds a reference, the context can be
  * used in general code, so long as it stays within `current` context.
  * used in general code, so long as it stays within `current` context.
  */
  */
-struct io_context *current_io_context(gfp_t gfp_flags)
+static struct io_context *current_io_context(gfp_t gfp_flags, int node)
 {
 {
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
 	struct io_context *ret;
 	struct io_context *ret;
@@ -3697,11 +3670,11 @@ struct io_context *current_io_context(gfp_t gfp_flags)
 	if (likely(ret))
 	if (likely(ret))
 		return ret;
 		return ret;
 
 
-	ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
+	ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
 	if (ret) {
 	if (ret) {
 		atomic_set(&ret->refcount, 1);
 		atomic_set(&ret->refcount, 1);
 		ret->task = current;
 		ret->task = current;
-		ret->set_ioprio = NULL;
+		ret->ioprio_changed = 0;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
 		ret->aic = NULL;
@@ -3721,10 +3694,10 @@ EXPORT_SYMBOL(current_io_context);
  *
  *
  * This is always called in the context of the task which submitted the I/O.
  * This is always called in the context of the task which submitted the I/O.
  */
  */
-struct io_context *get_io_context(gfp_t gfp_flags)
+struct io_context *get_io_context(gfp_t gfp_flags, int node)
 {
 {
 	struct io_context *ret;
 	struct io_context *ret;
-	ret = current_io_context(gfp_flags);
+	ret = current_io_context(gfp_flags, node);
 	if (likely(ret))
 	if (likely(ret))
 		atomic_inc(&ret->refcount);
 		atomic_inc(&ret->refcount);
 	return ret;
 	return ret;
@@ -3837,9 +3810,6 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
 	ssize_t ret = queue_var_store(&ra_kb, page, count);
 	ssize_t ret = queue_var_store(&ra_kb, page, count);
 
 
 	spin_lock_irq(q->queue_lock);
 	spin_lock_irq(q->queue_lock);
-	if (ra_kb > (q->max_sectors >> 1))
-		ra_kb = (q->max_sectors >> 1);
-
 	q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
 	q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
 	spin_unlock_irq(q->queue_lock);
 	spin_unlock_irq(q->queue_lock);
 
 

+ 1 - 1
block/noop-iosched.c

@@ -69,7 +69,7 @@ static void *noop_init_queue(request_queue_t *q, elevator_t *e)
 {
 {
 	struct noop_data *nd;
 	struct noop_data *nd;
 
 
-	nd = kmalloc(sizeof(*nd), GFP_KERNEL);
+	nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
 	if (!nd)
 	if (!nd)
 		return NULL;
 		return NULL;
 	INIT_LIST_HEAD(&nd->queue);
 	INIT_LIST_HEAD(&nd->queue);

+ 3 - 3
block/scsi_ioctl.c

@@ -294,7 +294,7 @@ static int sg_io(struct file *file, request_queue_t *q,
 	rq->sense = sense;
 	rq->sense = sense;
 	rq->sense_len = 0;
 	rq->sense_len = 0;
 
 
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	bio = rq->bio;
 	bio = rq->bio;
 
 
 	/*
 	/*
@@ -470,7 +470,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
 	memset(sense, 0, sizeof(sense));
 	memset(sense, 0, sizeof(sense));
 	rq->sense = sense;
 	rq->sense = sense;
 	rq->sense_len = 0;
 	rq->sense_len = 0;
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 
 	blk_execute_rq(q, disk, rq, 0);
 	blk_execute_rq(q, disk, rq, 0);
 
 
@@ -502,7 +502,7 @@ static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int c
 	int err;
 	int err;
 
 
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->data = NULL;
 	rq->data = NULL;
 	rq->data_len = 0;
 	rq->data_len = 0;
 	rq->timeout = BLK_DEFAULT_TIMEOUT;
 	rq->timeout = BLK_DEFAULT_TIMEOUT;

+ 1 - 1
drivers/block/DAC960.c

@@ -3331,7 +3331,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_
 		Command->DmaDirection = PCI_DMA_TODEVICE;
 		Command->DmaDirection = PCI_DMA_TODEVICE;
 		Command->CommandType = DAC960_WriteCommand;
 		Command->CommandType = DAC960_WriteCommand;
 	}
 	}
-	Command->Completion = Request->waiting;
+	Command->Completion = Request->end_io_data;
 	Command->LogicalDriveNumber = (long)Request->rq_disk->private_data;
 	Command->LogicalDriveNumber = (long)Request->rq_disk->private_data;
 	Command->BlockNumber = Request->sector;
 	Command->BlockNumber = Request->sector;
 	Command->BlockCount = Request->nr_sectors;
 	Command->BlockCount = Request->nr_sectors;

+ 4 - 0
drivers/block/Kconfig

@@ -2,6 +2,8 @@
 # Block device driver configuration
 # Block device driver configuration
 #
 #
 
 
+if BLOCK
+
 menu "Block devices"
 menu "Block devices"
 
 
 config BLK_DEV_FD
 config BLK_DEV_FD
@@ -468,3 +470,5 @@ config ATA_OVER_ETH
 	devices like the Coraid EtherDrive (R) Storage Blade.
 	devices like the Coraid EtherDrive (R) Storage Blade.
 
 
 endmenu
 endmenu
+
+endif

+ 0 - 1
drivers/block/cciss.c

@@ -1229,7 +1229,6 @@ static inline void complete_buffers(struct bio *bio, int status)
 		int nr_sectors = bio_sectors(bio);
 		int nr_sectors = bio_sectors(bio);
 
 
 		bio->bi_next = NULL;
 		bio->bi_next = NULL;
-		blk_finished_io(len);
 		bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
 		bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
 		bio = xbh;
 		bio = xbh;
 	}
 	}

+ 0 - 1
drivers/block/cpqarray.c

@@ -989,7 +989,6 @@ static inline void complete_buffers(struct bio *bio, int ok)
 		xbh = bio->bi_next;
 		xbh = bio->bi_next;
 		bio->bi_next = NULL;
 		bio->bi_next = NULL;
 		
 		
-		blk_finished_io(nr_sectors);
 		bio_endio(bio, nr_sectors << 9, ok ? 0 : -EIO);
 		bio_endio(bio, nr_sectors << 9, ok ? 0 : -EIO);
 
 
 		bio = xbh;
 		bio = xbh;

+ 2 - 2
drivers/block/floppy.c

@@ -2991,8 +2991,8 @@ static void do_fd_request(request_queue_t * q)
 	if (usage_count == 0) {
 	if (usage_count == 0) {
 		printk("warning: usage count=0, current_req=%p exiting\n",
 		printk("warning: usage count=0, current_req=%p exiting\n",
 		       current_req);
 		       current_req);
-		printk("sect=%ld flags=%lx\n", (long)current_req->sector,
-		       current_req->flags);
+		printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector,
+		       current_req->cmd_type, current_req->cmd_flags);
 		return;
 		return;
 	}
 	}
 	if (test_bit(0, &fdc_busy)) {
 	if (test_bit(0, &fdc_busy)) {

+ 160 - 0
drivers/block/loop.c

@@ -66,6 +66,7 @@
 #include <linux/swap.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/loop.h>
 #include <linux/loop.h>
+#include <linux/compat.h>
 #include <linux/suspend.h>
 #include <linux/suspend.h>
 #include <linux/writeback.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>		/* for invalidate_bdev() */
 #include <linux/buffer_head.h>		/* for invalidate_bdev() */
@@ -1165,6 +1166,162 @@ static int lo_ioctl(struct inode * inode, struct file * file,
 	return err;
 	return err;
 }
 }
 
 
+#ifdef CONFIG_COMPAT
+struct compat_loop_info {
+	compat_int_t	lo_number;      /* ioctl r/o */
+	compat_dev_t	lo_device;      /* ioctl r/o */
+	compat_ulong_t	lo_inode;       /* ioctl r/o */
+	compat_dev_t	lo_rdevice;     /* ioctl r/o */
+	compat_int_t	lo_offset;
+	compat_int_t	lo_encrypt_type;
+	compat_int_t	lo_encrypt_key_size;    /* ioctl w/o */
+	compat_int_t	lo_flags;       /* ioctl r/o */
+	char		lo_name[LO_NAME_SIZE];
+	unsigned char	lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
+	compat_ulong_t	lo_init[2];
+	char		reserved[4];
+};
+
+/*
+ * Transfer 32-bit compatibility structure in userspace to 64-bit loop info
+ * - noinlined to reduce stack space usage in main part of driver
+ */
+static noinline int
+loop_info64_from_compat(const struct compat_loop_info *arg,
+			struct loop_info64 *info64)
+{
+	struct compat_loop_info info;
+
+	if (copy_from_user(&info, arg, sizeof(info)))
+		return -EFAULT;
+
+	memset(info64, 0, sizeof(*info64));
+	info64->lo_number = info.lo_number;
+	info64->lo_device = info.lo_device;
+	info64->lo_inode = info.lo_inode;
+	info64->lo_rdevice = info.lo_rdevice;
+	info64->lo_offset = info.lo_offset;
+	info64->lo_sizelimit = 0;
+	info64->lo_encrypt_type = info.lo_encrypt_type;
+	info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
+	info64->lo_flags = info.lo_flags;
+	info64->lo_init[0] = info.lo_init[0];
+	info64->lo_init[1] = info.lo_init[1];
+	if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
+		memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
+	else
+		memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
+	memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
+	return 0;
+}
+
+/*
+ * Transfer 64-bit loop info to 32-bit compatibility structure in userspace
+ * - noinlined to reduce stack space usage in main part of driver
+ */
+static noinline int
+loop_info64_to_compat(const struct loop_info64 *info64,
+		      struct compat_loop_info __user *arg)
+{
+	struct compat_loop_info info;
+
+	memset(&info, 0, sizeof(info));
+	info.lo_number = info64->lo_number;
+	info.lo_device = info64->lo_device;
+	info.lo_inode = info64->lo_inode;
+	info.lo_rdevice = info64->lo_rdevice;
+	info.lo_offset = info64->lo_offset;
+	info.lo_encrypt_type = info64->lo_encrypt_type;
+	info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
+	info.lo_flags = info64->lo_flags;
+	info.lo_init[0] = info64->lo_init[0];
+	info.lo_init[1] = info64->lo_init[1];
+	if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
+		memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
+	else
+		memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
+	memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
+
+	/* error in case values were truncated */
+	if (info.lo_device != info64->lo_device ||
+	    info.lo_rdevice != info64->lo_rdevice ||
+	    info.lo_inode != info64->lo_inode ||
+	    info.lo_offset != info64->lo_offset ||
+	    info.lo_init[0] != info64->lo_init[0] ||
+	    info.lo_init[1] != info64->lo_init[1])
+		return -EOVERFLOW;
+
+	if (copy_to_user(arg, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+static int
+loop_set_status_compat(struct loop_device *lo,
+		       const struct compat_loop_info __user *arg)
+{
+	struct loop_info64 info64;
+	int ret;
+
+	ret = loop_info64_from_compat(arg, &info64);
+	if (ret < 0)
+		return ret;
+	return loop_set_status(lo, &info64);
+}
+
+static int
+loop_get_status_compat(struct loop_device *lo,
+		       struct compat_loop_info __user *arg)
+{
+	struct loop_info64 info64;
+	int err = 0;
+
+	if (!arg)
+		err = -EINVAL;
+	if (!err)
+		err = loop_get_status(lo, &info64);
+	if (!err)
+		err = loop_info64_to_compat(&info64, arg);
+	return err;
+}
+
+static long lo_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
+	int err;
+
+	lock_kernel();
+	switch(cmd) {
+	case LOOP_SET_STATUS:
+		mutex_lock(&lo->lo_ctl_mutex);
+		err = loop_set_status_compat(
+			lo, (const struct compat_loop_info __user *) arg);
+		mutex_unlock(&lo->lo_ctl_mutex);
+		break;
+	case LOOP_GET_STATUS:
+		mutex_lock(&lo->lo_ctl_mutex);
+		err = loop_get_status_compat(
+			lo, (struct compat_loop_info __user *) arg);
+		mutex_unlock(&lo->lo_ctl_mutex);
+		break;
+	case LOOP_CLR_FD:
+	case LOOP_GET_STATUS64:
+	case LOOP_SET_STATUS64:
+		arg = (unsigned long) compat_ptr(arg);
+	case LOOP_SET_FD:
+	case LOOP_CHANGE_FD:
+		err = lo_ioctl(inode, file, cmd, arg);
+		break;
+	default:
+		err = -ENOIOCTLCMD;
+		break;
+	}
+	unlock_kernel();
+	return err;
+}
+#endif
+
 static int lo_open(struct inode *inode, struct file *file)
 static int lo_open(struct inode *inode, struct file *file)
 {
 {
 	struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
 	struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
@@ -1192,6 +1349,9 @@ static struct block_device_operations lo_fops = {
 	.open =		lo_open,
 	.open =		lo_open,
 	.release =	lo_release,
 	.release =	lo_release,
 	.ioctl =	lo_ioctl,
 	.ioctl =	lo_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl =	lo_compat_ioctl,
+#endif
 };
 };
 
 
 /*
 /*

+ 4 - 4
drivers/block/nbd.c

@@ -407,10 +407,10 @@ static void do_nbd_request(request_queue_t * q)
 		struct nbd_device *lo;
 		struct nbd_device *lo;
 
 
 		blkdev_dequeue_request(req);
 		blkdev_dequeue_request(req);
-		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n",
-				req->rq_disk->disk_name, req, req->flags);
+		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
+				req->rq_disk->disk_name, req, req->cmd_type);
 
 
-		if (!(req->flags & REQ_CMD))
+		if (!blk_fs_request(req))
 			goto error_out;
 			goto error_out;
 
 
 		lo = req->rq_disk->private_data;
 		lo = req->rq_disk->private_data;
@@ -489,7 +489,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
 	switch (cmd) {
 	switch (cmd) {
 	case NBD_DISCONNECT:
 	case NBD_DISCONNECT:
 	        printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
 	        printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
-		sreq.flags = REQ_SPECIAL;
+		sreq.cmd_type = REQ_TYPE_SPECIAL;
 		nbd_cmd(&sreq) = NBD_CMD_DISC;
 		nbd_cmd(&sreq) = NBD_CMD_DISC;
 		/*
 		/*
 		 * Set these to sane values in case server implementation
 		 * Set these to sane values in case server implementation

+ 2 - 4
drivers/block/paride/pd.c

@@ -437,7 +437,7 @@ static char *pd_buf;		/* buffer for request in progress */
 
 
 static enum action do_pd_io_start(void)
 static enum action do_pd_io_start(void)
 {
 {
-	if (pd_req->flags & REQ_SPECIAL) {
+	if (blk_special_request(pd_req)) {
 		phase = pd_special;
 		phase = pd_special;
 		return pd_special();
 		return pd_special();
 	}
 	}
@@ -719,14 +719,12 @@ static int pd_special_command(struct pd_unit *disk,
 
 
 	memset(&rq, 0, sizeof(rq));
 	memset(&rq, 0, sizeof(rq));
 	rq.errors = 0;
 	rq.errors = 0;
-	rq.rq_status = RQ_ACTIVE;
 	rq.rq_disk = disk->gd;
 	rq.rq_disk = disk->gd;
 	rq.ref_count = 1;
 	rq.ref_count = 1;
-	rq.waiting = &wait;
+	rq.end_io_data = &wait;
 	rq.end_io = blk_end_sync_rq;
 	rq.end_io = blk_end_sync_rq;
 	blk_insert_request(disk->gd->queue, &rq, 0, func);
 	blk_insert_request(disk->gd->queue, &rq, 0, func);
 	wait_for_completion(&wait);
 	wait_for_completion(&wait);
-	rq.waiting = NULL;
 	if (rq.errors)
 	if (rq.errors)
 		err = -EIO;
 		err = -EIO;
 	blk_put_request(&rq);
 	blk_put_request(&rq);

+ 4 - 4
drivers/block/pktcdvd.c

@@ -365,17 +365,17 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 	rq->sense = sense;
 	rq->sense = sense;
 	memset(sense, 0, sizeof(sense));
 	memset(sense, 0, sizeof(sense));
 	rq->sense_len = 0;
 	rq->sense_len = 0;
-	rq->flags |= REQ_BLOCK_PC | REQ_HARDBARRIER;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->cmd_flags |= REQ_HARDBARRIER;
 	if (cgc->quiet)
 	if (cgc->quiet)
-		rq->flags |= REQ_QUIET;
+		rq->cmd_flags |= REQ_QUIET;
 	memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
 	memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
 	if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
 	if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
 		memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
 		memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
 	rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
 	rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
 
 
 	rq->ref_count++;
 	rq->ref_count++;
-	rq->flags |= REQ_NOMERGE;
-	rq->waiting = &wait;
+	rq->end_io_data = &wait;
 	rq->end_io = blk_end_sync_rq;
 	rq->end_io = blk_end_sync_rq;
 	elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
 	elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
 	generic_unplug_device(q);
 	generic_unplug_device(q);

+ 2 - 2
drivers/block/swim3.c

@@ -319,8 +319,8 @@ static void start_request(struct floppy_state *fs)
 		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
 		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
 		       req->rq_disk->disk_name, req->cmd,
 		       req->rq_disk->disk_name, req->cmd,
 		       (long)req->sector, req->nr_sectors, req->buffer);
 		       (long)req->sector, req->nr_sectors, req->buffer);
-		printk("           rq_status=%d errors=%d current_nr_sectors=%ld\n",
-		       req->rq_status, req->errors, req->current_nr_sectors);
+		printk("           errors=%d current_nr_sectors=%ld\n",
+		       req->errors, req->current_nr_sectors);
 #endif
 #endif
 
 
 		if (req->sector < 0 || req->sector >= fs->total_secs) {
 		if (req->sector < 0 || req->sector >= fs->total_secs) {

+ 2 - 2
drivers/block/swim_iop.c

@@ -529,8 +529,8 @@ static void start_request(struct floppy_state *fs)
 		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
 		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
 		       CURRENT->rq_disk->disk_name, CURRENT->cmd,
 		       CURRENT->rq_disk->disk_name, CURRENT->cmd,
 		       CURRENT->sector, CURRENT->nr_sectors, CURRENT->buffer);
 		       CURRENT->sector, CURRENT->nr_sectors, CURRENT->buffer);
-		printk("           rq_status=%d errors=%d current_nr_sectors=%ld\n",
-		       CURRENT->rq_status, CURRENT->errors, CURRENT->current_nr_sectors);
+		printk("           errors=%d current_nr_sectors=%ld\n",
+		      CURRENT->errors, CURRENT->current_nr_sectors);
 #endif
 #endif
 
 
 		if (CURRENT->sector < 0 || CURRENT->sector >= fs->total_secs) {
 		if (CURRENT->sector < 0 || CURRENT->sector >= fs->total_secs) {

+ 1 - 1
drivers/block/xd.c

@@ -313,7 +313,7 @@ static void do_xd_request (request_queue_t * q)
 		int res = 0;
 		int res = 0;
 		int retry;
 		int retry;
 
 
-		if (!(req->flags & REQ_CMD)) {
+		if (!blk_fs_request(req)) {
 			end_request(req, 0);
 			end_request(req, 0);
 			continue;
 			continue;
 		}
 		}

+ 1 - 1
drivers/cdrom/Kconfig

@@ -3,7 +3,7 @@
 #
 #
 
 
 menu "Old CD-ROM drivers (not SCSI, not IDE)"
 menu "Old CD-ROM drivers (not SCSI, not IDE)"
-	depends on ISA
+	depends on ISA && BLOCK
 
 
 config CD_NO_IDESCSI
 config CD_NO_IDESCSI
 	bool "Support non-SCSI/IDE/ATAPI CDROM drives"
 	bool "Support non-SCSI/IDE/ATAPI CDROM drives"

+ 1 - 1
drivers/cdrom/cdrom.c

@@ -2129,7 +2129,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 		rq->cmd[9] = 0xf8;
 		rq->cmd[9] = 0xf8;
 
 
 		rq->cmd_len = 12;
 		rq->cmd_len = 12;
-		rq->flags |= REQ_BLOCK_PC;
+		rq->cmd_type = REQ_TYPE_BLOCK_PC;
 		rq->timeout = 60 * HZ;
 		rq->timeout = 60 * HZ;
 		bio = rq->bio;
 		bio = rq->bio;
 
 

+ 3 - 1
drivers/cdrom/cdu31a.c

@@ -1338,8 +1338,10 @@ static void do_cdu31a_request(request_queue_t * q)
 		}
 		}
 
 
 		/* WTF??? */
 		/* WTF??? */
-		if (!(req->flags & REQ_CMD))
+		if (!blk_fs_request(req)) {
+			end_request(req, 0);
 			continue;
 			continue;
+		}
 		if (rq_data_dir(req) == WRITE) {
 		if (rq_data_dir(req) == WRITE) {
 			end_request(req, 0);
 			end_request(req, 0);
 			continue;
 			continue;

+ 1 - 0
drivers/char/Kconfig

@@ -1006,6 +1006,7 @@ config GPIO_VR41XX
 
 
 config RAW_DRIVER
 config RAW_DRIVER
 	tristate "RAW driver (/dev/raw/rawN) (OBSOLETE)"
 	tristate "RAW driver (/dev/raw/rawN) (OBSOLETE)"
+	depends on BLOCK
 	help
 	help
 	  The raw driver permits block devices to be bound to /dev/raw/rawN. 
 	  The raw driver permits block devices to be bound to /dev/raw/rawN. 
 	  Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O. 
 	  Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O. 

+ 4 - 0
drivers/char/random.c

@@ -655,6 +655,7 @@ void add_interrupt_randomness(int irq)
 	add_timer_randomness(irq_timer_state[irq], 0x100 + irq);
 	add_timer_randomness(irq_timer_state[irq], 0x100 + irq);
 }
 }
 
 
+#ifdef CONFIG_BLOCK
 void add_disk_randomness(struct gendisk *disk)
 void add_disk_randomness(struct gendisk *disk)
 {
 {
 	if (!disk || !disk->random)
 	if (!disk || !disk->random)
@@ -667,6 +668,7 @@ void add_disk_randomness(struct gendisk *disk)
 }
 }
 
 
 EXPORT_SYMBOL(add_disk_randomness);
 EXPORT_SYMBOL(add_disk_randomness);
+#endif
 
 
 #define EXTRACT_SIZE 10
 #define EXTRACT_SIZE 10
 
 
@@ -918,6 +920,7 @@ void rand_initialize_irq(int irq)
 	}
 	}
 }
 }
 
 
+#ifdef CONFIG_BLOCK
 void rand_initialize_disk(struct gendisk *disk)
 void rand_initialize_disk(struct gendisk *disk)
 {
 {
 	struct timer_rand_state *state;
 	struct timer_rand_state *state;
@@ -932,6 +935,7 @@ void rand_initialize_disk(struct gendisk *disk)
 		disk->random = state;
 		disk->random = state;
 	}
 	}
 }
 }
+#endif
 
 
 static ssize_t
 static ssize_t
 random_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
 random_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)

+ 0 - 1
drivers/fc4/fc.c

@@ -974,7 +974,6 @@ int fcp_scsi_dev_reset(Scsi_Cmnd *SCpnt)
 	 */
 	 */
 
 
 	fc->rst_pkt->device->host->eh_action = &sem;
 	fc->rst_pkt->device->host->eh_action = &sem;
-	fc->rst_pkt->request->rq_status = RQ_SCSI_BUSY;
 
 
 	fc->rst_pkt->done = fcp_scsi_reset_done;
 	fc->rst_pkt->done = fcp_scsi_reset_done;
 
 

+ 4 - 0
drivers/ide/Kconfig

@@ -4,6 +4,8 @@
 # Andre Hedrick <andre@linux-ide.org>
 # Andre Hedrick <andre@linux-ide.org>
 #
 #
 
 
+if BLOCK
+
 menu "ATA/ATAPI/MFM/RLL support"
 menu "ATA/ATAPI/MFM/RLL support"
 
 
 config IDE
 config IDE
@@ -1082,3 +1084,5 @@ config BLK_DEV_HD
 endif
 endif
 
 
 endmenu
 endmenu
+
+endif

+ 35 - 34
drivers/ide/ide-cd.c

@@ -372,7 +372,7 @@ static int cdrom_log_sense(ide_drive_t *drive, struct request *rq,
 {
 {
 	int log = 0;
 	int log = 0;
 
 
-	if (!sense || !rq || (rq->flags & REQ_QUIET))
+	if (!sense || !rq || (rq->cmd_flags & REQ_QUIET))
 		return 0;
 		return 0;
 
 
 	switch (sense->sense_key) {
 	switch (sense->sense_key) {
@@ -597,7 +597,7 @@ static void cdrom_prepare_request(ide_drive_t *drive, struct request *rq)
 	struct cdrom_info *cd = drive->driver_data;
 	struct cdrom_info *cd = drive->driver_data;
 
 
 	ide_init_drive_cmd(rq);
 	ide_init_drive_cmd(rq);
-	rq->flags = REQ_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->rq_disk = cd->disk;
 	rq->rq_disk = cd->disk;
 }
 }
 
 
@@ -617,7 +617,7 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
 	rq->cmd[0] = GPCMD_REQUEST_SENSE;
 	rq->cmd[0] = GPCMD_REQUEST_SENSE;
 	rq->cmd[4] = rq->data_len = 18;
 	rq->cmd[4] = rq->data_len = 18;
 
 
-	rq->flags = REQ_SENSE;
+	rq->cmd_type = REQ_TYPE_SENSE;
 
 
 	/* NOTE! Save the failed command in "rq->buffer" */
 	/* NOTE! Save the failed command in "rq->buffer" */
 	rq->buffer = (void *) failed_command;
 	rq->buffer = (void *) failed_command;
@@ -630,10 +630,10 @@ static void cdrom_end_request (ide_drive_t *drive, int uptodate)
 	struct request *rq = HWGROUP(drive)->rq;
 	struct request *rq = HWGROUP(drive)->rq;
 	int nsectors = rq->hard_cur_sectors;
 	int nsectors = rq->hard_cur_sectors;
 
 
-	if ((rq->flags & REQ_SENSE) && uptodate) {
+	if (blk_sense_request(rq) && uptodate) {
 		/*
 		/*
-		 * For REQ_SENSE, "rq->buffer" points to the original failed
-		 * request
+		 * For REQ_TYPE_SENSE, "rq->buffer" points to the original
+		 * failed request
 		 */
 		 */
 		struct request *failed = (struct request *) rq->buffer;
 		struct request *failed = (struct request *) rq->buffer;
 		struct cdrom_info *info = drive->driver_data;
 		struct cdrom_info *info = drive->driver_data;
@@ -706,17 +706,17 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 		return 1;
 		return 1;
 	}
 	}
 
 
-	if (rq->flags & REQ_SENSE) {
+	if (blk_sense_request(rq)) {
 		/* We got an error trying to get sense info
 		/* We got an error trying to get sense info
 		   from the drive (probably while trying
 		   from the drive (probably while trying
 		   to recover from a former error).  Just give up. */
 		   to recover from a former error).  Just give up. */
 
 
-		rq->flags |= REQ_FAILED;
+		rq->cmd_flags |= REQ_FAILED;
 		cdrom_end_request(drive, 0);
 		cdrom_end_request(drive, 0);
 		ide_error(drive, "request sense failure", stat);
 		ide_error(drive, "request sense failure", stat);
 		return 1;
 		return 1;
 
 
-	} else if (rq->flags & (REQ_PC | REQ_BLOCK_PC)) {
+	} else if (blk_pc_request(rq)) {
 		/* All other functions, except for READ. */
 		/* All other functions, except for READ. */
 		unsigned long flags;
 		unsigned long flags;
 
 
@@ -724,7 +724,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 		 * if we have an error, pass back CHECK_CONDITION as the
 		 * if we have an error, pass back CHECK_CONDITION as the
 		 * scsi status byte
 		 * scsi status byte
 		 */
 		 */
-		if ((rq->flags & REQ_BLOCK_PC) && !rq->errors)
+		if (!rq->errors)
 			rq->errors = SAM_STAT_CHECK_CONDITION;
 			rq->errors = SAM_STAT_CHECK_CONDITION;
 
 
 		/* Check for tray open. */
 		/* Check for tray open. */
@@ -735,12 +735,12 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 			cdrom_saw_media_change (drive);
 			cdrom_saw_media_change (drive);
 			/*printk("%s: media changed\n",drive->name);*/
 			/*printk("%s: media changed\n",drive->name);*/
 			return 0;
 			return 0;
-		} else if (!(rq->flags & REQ_QUIET)) {
+		} else if (!(rq->cmd_flags & REQ_QUIET)) {
 			/* Otherwise, print an error. */
 			/* Otherwise, print an error. */
 			ide_dump_status(drive, "packet command error", stat);
 			ide_dump_status(drive, "packet command error", stat);
 		}
 		}
 		
 		
-		rq->flags |= REQ_FAILED;
+		rq->cmd_flags |= REQ_FAILED;
 
 
 		/*
 		/*
 		 * instead of playing games with moving completions around,
 		 * instead of playing games with moving completions around,
@@ -881,7 +881,7 @@ static int cdrom_timer_expiry(ide_drive_t *drive)
 			wait = ATAPI_WAIT_PC;
 			wait = ATAPI_WAIT_PC;
 			break;
 			break;
 		default:
 		default:
-			if (!(rq->flags & REQ_QUIET))
+			if (!(rq->cmd_flags & REQ_QUIET))
 				printk(KERN_INFO "ide-cd: cmd 0x%x timed out\n", rq->cmd[0]);
 				printk(KERN_INFO "ide-cd: cmd 0x%x timed out\n", rq->cmd[0]);
 			wait = 0;
 			wait = 0;
 			break;
 			break;
@@ -1124,7 +1124,7 @@ static ide_startstop_t cdrom_read_intr (ide_drive_t *drive)
 		if (rq->current_nr_sectors > 0) {
 		if (rq->current_nr_sectors > 0) {
 			printk (KERN_ERR "%s: cdrom_read_intr: data underrun (%d blocks)\n",
 			printk (KERN_ERR "%s: cdrom_read_intr: data underrun (%d blocks)\n",
 				drive->name, rq->current_nr_sectors);
 				drive->name, rq->current_nr_sectors);
-			rq->flags |= REQ_FAILED;
+			rq->cmd_flags |= REQ_FAILED;
 			cdrom_end_request(drive, 0);
 			cdrom_end_request(drive, 0);
 		} else
 		} else
 			cdrom_end_request(drive, 1);
 			cdrom_end_request(drive, 1);
@@ -1456,7 +1456,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive)
 			printk ("%s: cdrom_pc_intr: data underrun %d\n",
 			printk ("%s: cdrom_pc_intr: data underrun %d\n",
 				drive->name, pc->buflen);
 				drive->name, pc->buflen);
 			*/
 			*/
-			rq->flags |= REQ_FAILED;
+			rq->cmd_flags |= REQ_FAILED;
 			cdrom_end_request(drive, 0);
 			cdrom_end_request(drive, 0);
 		}
 		}
 		return ide_stopped;
 		return ide_stopped;
@@ -1509,7 +1509,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive)
 		rq->data += thislen;
 		rq->data += thislen;
 		rq->data_len -= thislen;
 		rq->data_len -= thislen;
 
 
-		if (rq->flags & REQ_SENSE)
+		if (blk_sense_request(rq))
 			rq->sense_len += thislen;
 			rq->sense_len += thislen;
 	} else {
 	} else {
 confused:
 confused:
@@ -1517,7 +1517,7 @@ confused:
 			"appears confused (ireason = 0x%02x). "
 			"appears confused (ireason = 0x%02x). "
 			"Trying to recover by ending request.\n",
 			"Trying to recover by ending request.\n",
 			drive->name, ireason);
 			drive->name, ireason);
-		rq->flags |= REQ_FAILED;
+		rq->cmd_flags |= REQ_FAILED;
 		cdrom_end_request(drive, 0);
 		cdrom_end_request(drive, 0);
 		return ide_stopped;
 		return ide_stopped;
 	}
 	}
@@ -1546,7 +1546,7 @@ static ide_startstop_t cdrom_do_packet_command (ide_drive_t *drive)
 	struct cdrom_info *info = drive->driver_data;
 	struct cdrom_info *info = drive->driver_data;
 
 
 	info->dma = 0;
 	info->dma = 0;
-	rq->flags &= ~REQ_FAILED;
+	rq->cmd_flags &= ~REQ_FAILED;
 	len = rq->data_len;
 	len = rq->data_len;
 
 
 	/* Start sending the command to the drive. */
 	/* Start sending the command to the drive. */
@@ -1558,7 +1558,7 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
 {
 {
 	struct request_sense sense;
 	struct request_sense sense;
 	int retries = 10;
 	int retries = 10;
-	unsigned int flags = rq->flags;
+	unsigned int flags = rq->cmd_flags;
 
 
 	if (rq->sense == NULL)
 	if (rq->sense == NULL)
 		rq->sense = &sense;
 		rq->sense = &sense;
@@ -1567,14 +1567,14 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
 	do {
 	do {
 		int error;
 		int error;
 		unsigned long time = jiffies;
 		unsigned long time = jiffies;
-		rq->flags = flags;
+		rq->cmd_flags = flags;
 
 
 		error = ide_do_drive_cmd(drive, rq, ide_wait);
 		error = ide_do_drive_cmd(drive, rq, ide_wait);
 		time = jiffies - time;
 		time = jiffies - time;
 
 
 		/* FIXME: we should probably abort/retry or something 
 		/* FIXME: we should probably abort/retry or something 
 		 * in case of failure */
 		 * in case of failure */
-		if (rq->flags & REQ_FAILED) {
+		if (rq->cmd_flags & REQ_FAILED) {
 			/* The request failed.  Retry if it was due to a unit
 			/* The request failed.  Retry if it was due to a unit
 			   attention status
 			   attention status
 			   (usually means media was changed). */
 			   (usually means media was changed). */
@@ -1596,10 +1596,10 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
 		}
 		}
 
 
 		/* End of retry loop. */
 		/* End of retry loop. */
-	} while ((rq->flags & REQ_FAILED) && retries >= 0);
+	} while ((rq->cmd_flags & REQ_FAILED) && retries >= 0);
 
 
 	/* Return an error if the command failed. */
 	/* Return an error if the command failed. */
-	return (rq->flags & REQ_FAILED) ? -EIO : 0;
+	return (rq->cmd_flags & REQ_FAILED) ? -EIO : 0;
 }
 }
 
 
 /*
 /*
@@ -1963,7 +1963,7 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 {
 {
 	struct cdrom_info *info = drive->driver_data;
 	struct cdrom_info *info = drive->driver_data;
 
 
-	rq->flags |= REQ_QUIET;
+	rq->cmd_flags |= REQ_QUIET;
 
 
 	info->dma = 0;
 	info->dma = 0;
 
 
@@ -2023,11 +2023,11 @@ ide_do_rw_cdrom (ide_drive_t *drive, struct request *rq, sector_t block)
 		}
 		}
 		info->last_block = block;
 		info->last_block = block;
 		return action;
 		return action;
-	} else if (rq->flags & (REQ_PC | REQ_SENSE)) {
+	} else if (rq->cmd_type == REQ_TYPE_SENSE) {
 		return cdrom_do_packet_command(drive);
 		return cdrom_do_packet_command(drive);
-	} else if (rq->flags & REQ_BLOCK_PC) {
+	} else if (blk_pc_request(rq)) {
 		return cdrom_do_block_pc(drive, rq);
 		return cdrom_do_block_pc(drive, rq);
-	} else if (rq->flags & REQ_SPECIAL) {
+	} else if (blk_special_request(rq)) {
 		/*
 		/*
 		 * right now this can only be a reset...
 		 * right now this can only be a reset...
 		 */
 		 */
@@ -2105,7 +2105,7 @@ static int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense)
 
 
 	req.sense = sense;
 	req.sense = sense;
 	req.cmd[0] = GPCMD_TEST_UNIT_READY;
 	req.cmd[0] = GPCMD_TEST_UNIT_READY;
-	req.flags |= REQ_QUIET;
+	req.cmd_flags |= REQ_QUIET;
 
 
 #if ! STANDARD_ATAPI
 #if ! STANDARD_ATAPI
         /* the Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to 
         /* the Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to 
@@ -2207,7 +2207,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
 	req.cmd[0] = GPCMD_READ_CDVD_CAPACITY;
 	req.cmd[0] = GPCMD_READ_CDVD_CAPACITY;
 	req.data = (char *)&capbuf;
 	req.data = (char *)&capbuf;
 	req.data_len = sizeof(capbuf);
 	req.data_len = sizeof(capbuf);
-	req.flags |= REQ_QUIET;
+	req.cmd_flags |= REQ_QUIET;
 
 
 	stat = cdrom_queue_packet_command(drive, &req);
 	stat = cdrom_queue_packet_command(drive, &req);
 	if (stat == 0) {
 	if (stat == 0) {
@@ -2230,7 +2230,7 @@ static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag,
 	req.sense = sense;
 	req.sense = sense;
 	req.data =  buf;
 	req.data =  buf;
 	req.data_len = buflen;
 	req.data_len = buflen;
-	req.flags |= REQ_QUIET;
+	req.cmd_flags |= REQ_QUIET;
 	req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
 	req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
 	req.cmd[6] = trackno;
 	req.cmd[6] = trackno;
 	req.cmd[7] = (buflen >> 8);
 	req.cmd[7] = (buflen >> 8);
@@ -2531,7 +2531,7 @@ static int ide_cdrom_packet(struct cdrom_device_info *cdi,
 	req.timeout = cgc->timeout;
 	req.timeout = cgc->timeout;
 
 
 	if (cgc->quiet)
 	if (cgc->quiet)
-		req.flags |= REQ_QUIET;
+		req.cmd_flags |= REQ_QUIET;
 
 
 	req.sense = cgc->sense;
 	req.sense = cgc->sense;
 	cgc->stat = cdrom_queue_packet_command(drive, &req);
 	cgc->stat = cdrom_queue_packet_command(drive, &req);
@@ -2629,7 +2629,8 @@ int ide_cdrom_reset (struct cdrom_device_info *cdi)
 	int ret;
 	int ret;
 
 
 	cdrom_prepare_request(drive, &req);
 	cdrom_prepare_request(drive, &req);
-	req.flags = REQ_SPECIAL | REQ_QUIET;
+	req.cmd_type = REQ_TYPE_SPECIAL;
+	req.cmd_flags = REQ_QUIET;
 	ret = ide_do_drive_cmd(drive, &req, ide_wait);
 	ret = ide_do_drive_cmd(drive, &req, ide_wait);
 
 
 	/*
 	/*
@@ -3116,9 +3117,9 @@ static int ide_cdrom_prep_pc(struct request *rq)
 
 
 static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq)
 static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq)
 {
 {
-	if (rq->flags & REQ_CMD)
+	if (blk_fs_request(rq))
 		return ide_cdrom_prep_fs(q, rq);
 		return ide_cdrom_prep_fs(q, rq);
-	else if (rq->flags & REQ_BLOCK_PC)
+	else if (blk_pc_request(rq))
 		return ide_cdrom_prep_pc(rq);
 		return ide_cdrom_prep_pc(rq);
 
 
 	return 0;
 	return 0;

+ 3 - 2
drivers/ide/ide-disk.c

@@ -699,7 +699,8 @@ static void idedisk_prepare_flush(request_queue_t *q, struct request *rq)
 		rq->cmd[0] = WIN_FLUSH_CACHE;
 		rq->cmd[0] = WIN_FLUSH_CACHE;
 
 
 
 
-	rq->flags |= REQ_DRIVE_TASK;
+	rq->cmd_type = REQ_TYPE_ATA_TASK;
+	rq->cmd_flags |= REQ_SOFTBARRIER;
 	rq->buffer = rq->cmd;
 	rq->buffer = rq->cmd;
 }
 }
 
 
@@ -740,7 +741,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
 	if (drive->special.b.set_multmode)
 	if (drive->special.b.set_multmode)
 		return -EBUSY;
 		return -EBUSY;
 	ide_init_drive_cmd (&rq);
 	ide_init_drive_cmd (&rq);
-	rq.flags = REQ_DRIVE_CMD;
+	rq.cmd_type = REQ_TYPE_ATA_CMD;
 	drive->mult_req = arg;
 	drive->mult_req = arg;
 	drive->special.b.set_multmode = 1;
 	drive->special.b.set_multmode = 1;
 	(void) ide_do_drive_cmd (drive, &rq, ide_wait);
 	(void) ide_do_drive_cmd (drive, &rq, ide_wait);

+ 1 - 1
drivers/ide/ide-dma.c

@@ -205,7 +205,7 @@ int ide_build_sglist(ide_drive_t *drive, struct request *rq)
 	ide_hwif_t *hwif = HWIF(drive);
 	ide_hwif_t *hwif = HWIF(drive);
 	struct scatterlist *sg = hwif->sg_table;
 	struct scatterlist *sg = hwif->sg_table;
 
 
-	BUG_ON((rq->flags & REQ_DRIVE_TASKFILE) && rq->nr_sectors > 256);
+	BUG_ON((rq->cmd_type == REQ_TYPE_ATA_TASKFILE) && rq->nr_sectors > 256);
 
 
 	ide_map_sg(drive, rq);
 	ide_map_sg(drive, rq);
 
 

+ 8 - 9
drivers/ide/ide-floppy.c

@@ -588,7 +588,7 @@ static int idefloppy_do_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 	/* Why does this happen? */
 	/* Why does this happen? */
 	if (!rq)
 	if (!rq)
 		return 0;
 		return 0;
-	if (!(rq->flags & REQ_SPECIAL)) { //if (!IDEFLOPPY_RQ_CMD (rq->cmd)) {
+	if (!blk_special_request(rq)) {
 		/* our real local end request function */
 		/* our real local end request function */
 		ide_end_request(drive, uptodate, nsecs);
 		ide_end_request(drive, uptodate, nsecs);
 		return 0;
 		return 0;
@@ -689,7 +689,7 @@ static void idefloppy_queue_pc_head (ide_drive_t *drive,idefloppy_pc_t *pc,struc
 
 
 	ide_init_drive_cmd(rq);
 	ide_init_drive_cmd(rq);
 	rq->buffer = (char *) pc;
 	rq->buffer = (char *) pc;
-	rq->flags = REQ_SPECIAL;	//rq->cmd = IDEFLOPPY_PC_RQ;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->rq_disk = floppy->disk;
 	rq->rq_disk = floppy->disk;
 	(void) ide_do_drive_cmd(drive, rq, ide_preempt);
 	(void) ide_do_drive_cmd(drive, rq, ide_preempt);
 }
 }
@@ -1250,7 +1250,7 @@ static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t
 	pc->callback = &idefloppy_rw_callback;
 	pc->callback = &idefloppy_rw_callback;
 	pc->rq = rq;
 	pc->rq = rq;
 	pc->b_count = cmd == READ ? 0 : rq->bio->bi_size;
 	pc->b_count = cmd == READ ? 0 : rq->bio->bi_size;
-	if (rq->flags & REQ_RW)
+	if (rq->cmd_flags & REQ_RW)
 		set_bit(PC_WRITING, &pc->flags);
 		set_bit(PC_WRITING, &pc->flags);
 	pc->buffer = NULL;
 	pc->buffer = NULL;
 	pc->request_transfer = pc->buffer_size = blocks * floppy->block_size;
 	pc->request_transfer = pc->buffer_size = blocks * floppy->block_size;
@@ -1281,8 +1281,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
 	idefloppy_pc_t *pc;
 	idefloppy_pc_t *pc;
 	unsigned long block = (unsigned long)block_s;
 	unsigned long block = (unsigned long)block_s;
 
 
-	debug_log(KERN_INFO "rq_status: %d, dev: %s, flags: %lx, errors: %d\n",
-			rq->rq_status,
+	debug_log(KERN_INFO "dev: %s, flags: %lx, errors: %d\n",
 			rq->rq_disk ? rq->rq_disk->disk_name : "?",
 			rq->rq_disk ? rq->rq_disk->disk_name : "?",
 			rq->flags, rq->errors);
 			rq->flags, rq->errors);
 	debug_log(KERN_INFO "sector: %ld, nr_sectors: %ld, "
 	debug_log(KERN_INFO "sector: %ld, nr_sectors: %ld, "
@@ -1303,7 +1302,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
 		idefloppy_do_end_request(drive, 0, 0);
 		idefloppy_do_end_request(drive, 0, 0);
 		return ide_stopped;
 		return ide_stopped;
 	}
 	}
-	if (rq->flags & REQ_CMD) {
+	if (blk_fs_request(rq)) {
 		if (((long)rq->sector % floppy->bs_factor) ||
 		if (((long)rq->sector % floppy->bs_factor) ||
 		    (rq->nr_sectors % floppy->bs_factor)) {
 		    (rq->nr_sectors % floppy->bs_factor)) {
 			printk("%s: unsupported r/w request size\n",
 			printk("%s: unsupported r/w request size\n",
@@ -1313,9 +1312,9 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
 		}
 		}
 		pc = idefloppy_next_pc_storage(drive);
 		pc = idefloppy_next_pc_storage(drive);
 		idefloppy_create_rw_cmd(floppy, pc, rq, block);
 		idefloppy_create_rw_cmd(floppy, pc, rq, block);
-	} else if (rq->flags & REQ_SPECIAL) {
+	} else if (blk_special_request(rq)) {
 		pc = (idefloppy_pc_t *) rq->buffer;
 		pc = (idefloppy_pc_t *) rq->buffer;
-	} else if (rq->flags & REQ_BLOCK_PC) {
+	} else if (blk_pc_request(rq)) {
 		pc = idefloppy_next_pc_storage(drive);
 		pc = idefloppy_next_pc_storage(drive);
 		if (idefloppy_blockpc_cmd(floppy, pc, rq)) {
 		if (idefloppy_blockpc_cmd(floppy, pc, rq)) {
 			idefloppy_do_end_request(drive, 0, 0);
 			idefloppy_do_end_request(drive, 0, 0);
@@ -1343,7 +1342,7 @@ static int idefloppy_queue_pc_tail (ide_drive_t *drive,idefloppy_pc_t *pc)
 
 
 	ide_init_drive_cmd (&rq);
 	ide_init_drive_cmd (&rq);
 	rq.buffer = (char *) pc;
 	rq.buffer = (char *) pc;
-	rq.flags = REQ_SPECIAL;		//	rq.cmd = IDEFLOPPY_PC_RQ;
+	rq.cmd_type = REQ_TYPE_SPECIAL;
 	rq.rq_disk = floppy->disk;
 	rq.rq_disk = floppy->disk;
 
 
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
 	return ide_do_drive_cmd(drive, &rq, ide_wait);

+ 24 - 26
drivers/ide/ide-io.c

@@ -59,7 +59,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq,
 {
 {
 	int ret = 1;
 	int ret = 1;
 
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	BUG_ON(!blk_rq_started(rq));
 
 
 	/*
 	/*
 	 * if failfast is set on a request, override number of sectors and
 	 * if failfast is set on a request, override number of sectors and
@@ -141,7 +141,7 @@ enum {
 
 
 static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 stat, u8 error)
 static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 stat, u8 error)
 {
 {
-	struct request_pm_state *pm = rq->end_io_data;
+	struct request_pm_state *pm = rq->data;
 
 
 	if (drive->media != ide_disk)
 	if (drive->media != ide_disk)
 		return;
 		return;
@@ -164,7 +164,7 @@ static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 s
 
 
 static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 {
 {
-	struct request_pm_state *pm = rq->end_io_data;
+	struct request_pm_state *pm = rq->data;
 	ide_task_t *args = rq->special;
 	ide_task_t *args = rq->special;
 
 
 	memset(args, 0, sizeof(*args));
 	memset(args, 0, sizeof(*args));
@@ -244,7 +244,7 @@ int ide_end_dequeued_request(ide_drive_t *drive, struct request *rq,
 
 
 	spin_lock_irqsave(&ide_lock, flags);
 	spin_lock_irqsave(&ide_lock, flags);
 
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	BUG_ON(!blk_rq_started(rq));
 
 
 	/*
 	/*
 	 * if failfast is set on a request, override number of sectors and
 	 * if failfast is set on a request, override number of sectors and
@@ -366,7 +366,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 	rq = HWGROUP(drive)->rq;
 	rq = HWGROUP(drive)->rq;
 	spin_unlock_irqrestore(&ide_lock, flags);
 	spin_unlock_irqrestore(&ide_lock, flags);
 
 
-	if (rq->flags & REQ_DRIVE_CMD) {
+	if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
 		u8 *args = (u8 *) rq->buffer;
 		u8 *args = (u8 *) rq->buffer;
 		if (rq->errors == 0)
 		if (rq->errors == 0)
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -376,7 +376,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 			args[1] = err;
 			args[1] = err;
 			args[2] = hwif->INB(IDE_NSECTOR_REG);
 			args[2] = hwif->INB(IDE_NSECTOR_REG);
 		}
 		}
-	} else if (rq->flags & REQ_DRIVE_TASK) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
 		u8 *args = (u8 *) rq->buffer;
 		u8 *args = (u8 *) rq->buffer;
 		if (rq->errors == 0)
 		if (rq->errors == 0)
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -390,7 +390,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 			args[5] = hwif->INB(IDE_HCYL_REG);
 			args[5] = hwif->INB(IDE_HCYL_REG);
 			args[6] = hwif->INB(IDE_SELECT_REG);
 			args[6] = hwif->INB(IDE_SELECT_REG);
 		}
 		}
-	} else if (rq->flags & REQ_DRIVE_TASKFILE) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *args = (ide_task_t *) rq->special;
 		ide_task_t *args = (ide_task_t *) rq->special;
 		if (rq->errors == 0)
 		if (rq->errors == 0)
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -421,7 +421,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 			}
 			}
 		}
 		}
 	} else if (blk_pm_request(rq)) {
 	} else if (blk_pm_request(rq)) {
-		struct request_pm_state *pm = rq->end_io_data;
+		struct request_pm_state *pm = rq->data;
 #ifdef DEBUG_PM
 #ifdef DEBUG_PM
 		printk("%s: complete_power_step(step: %d, stat: %x, err: %x)\n",
 		printk("%s: complete_power_step(step: %d, stat: %x, err: %x)\n",
 			drive->name, rq->pm->pm_step, stat, err);
 			drive->name, rq->pm->pm_step, stat, err);
@@ -587,7 +587,7 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat)
 		return ide_stopped;
 		return ide_stopped;
 
 
 	/* retry only "normal" I/O: */
 	/* retry only "normal" I/O: */
-	if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) {
+	if (!blk_fs_request(rq)) {
 		rq->errors = 1;
 		rq->errors = 1;
 		ide_end_drive_cmd(drive, stat, err);
 		ide_end_drive_cmd(drive, stat, err);
 		return ide_stopped;
 		return ide_stopped;
@@ -638,7 +638,7 @@ ide_startstop_t ide_abort(ide_drive_t *drive, const char *msg)
 		return ide_stopped;
 		return ide_stopped;
 
 
 	/* retry only "normal" I/O: */
 	/* retry only "normal" I/O: */
-	if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) {
+	if (!blk_fs_request(rq)) {
 		rq->errors = 1;
 		rq->errors = 1;
 		ide_end_drive_cmd(drive, BUSY_STAT, 0);
 		ide_end_drive_cmd(drive, BUSY_STAT, 0);
 		return ide_stopped;
 		return ide_stopped;
@@ -808,7 +808,7 @@ void ide_map_sg(ide_drive_t *drive, struct request *rq)
 	if (hwif->sg_mapped)	/* needed by ide-scsi */
 	if (hwif->sg_mapped)	/* needed by ide-scsi */
 		return;
 		return;
 
 
-	if ((rq->flags & REQ_DRIVE_TASKFILE) == 0) {
+	if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) {
 		hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 		hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 	} else {
 	} else {
 		sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
 		sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
@@ -844,7 +844,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
 		struct request *rq)
 		struct request *rq)
 {
 {
 	ide_hwif_t *hwif = HWIF(drive);
 	ide_hwif_t *hwif = HWIF(drive);
-	if (rq->flags & REQ_DRIVE_TASKFILE) {
+	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
  		ide_task_t *args = rq->special;
  		ide_task_t *args = rq->special;
  
  
 		if (!args)
 		if (!args)
@@ -866,7 +866,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
 		if (args->tf_out_flags.all != 0) 
 		if (args->tf_out_flags.all != 0) 
 			return flagged_taskfile(drive, args);
 			return flagged_taskfile(drive, args);
 		return do_rw_taskfile(drive, args);
 		return do_rw_taskfile(drive, args);
-	} else if (rq->flags & REQ_DRIVE_TASK) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
 		u8 *args = rq->buffer;
 		u8 *args = rq->buffer;
 		u8 sel;
 		u8 sel;
  
  
@@ -892,7 +892,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
  		hwif->OUTB(sel, IDE_SELECT_REG);
  		hwif->OUTB(sel, IDE_SELECT_REG);
  		ide_cmd(drive, args[0], args[2], &drive_cmd_intr);
  		ide_cmd(drive, args[0], args[2], &drive_cmd_intr);
  		return ide_started;
  		return ide_started;
- 	} else if (rq->flags & REQ_DRIVE_CMD) {
+ 	} else if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
  		u8 *args = rq->buffer;
  		u8 *args = rq->buffer;
 
 
 		if (!args)
 		if (!args)
@@ -933,7 +933,7 @@ done:
 
 
 static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
 static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
 {
 {
-	struct request_pm_state *pm = rq->end_io_data;
+	struct request_pm_state *pm = rq->data;
 
 
 	if (blk_pm_suspend_request(rq) &&
 	if (blk_pm_suspend_request(rq) &&
 	    pm->pm_step == ide_pm_state_start_suspend)
 	    pm->pm_step == ide_pm_state_start_suspend)
@@ -980,7 +980,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 	ide_startstop_t startstop;
 	ide_startstop_t startstop;
 	sector_t block;
 	sector_t block;
 
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	BUG_ON(!blk_rq_started(rq));
 
 
 #ifdef DEBUG
 #ifdef DEBUG
 	printk("%s: start_request: current=0x%08lx\n",
 	printk("%s: start_request: current=0x%08lx\n",
@@ -1013,12 +1013,12 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 	if (!drive->special.all) {
 	if (!drive->special.all) {
 		ide_driver_t *drv;
 		ide_driver_t *drv;
 
 
-		if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK))
-			return execute_drive_cmd(drive, rq);
-		else if (rq->flags & REQ_DRIVE_TASKFILE)
+		if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
+		    rq->cmd_type == REQ_TYPE_ATA_TASK ||
+		    rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
 			return execute_drive_cmd(drive, rq);
 			return execute_drive_cmd(drive, rq);
 		else if (blk_pm_request(rq)) {
 		else if (blk_pm_request(rq)) {
-			struct request_pm_state *pm = rq->end_io_data;
+			struct request_pm_state *pm = rq->data;
 #ifdef DEBUG_PM
 #ifdef DEBUG_PM
 			printk("%s: start_power_step(step: %d)\n",
 			printk("%s: start_power_step(step: %d)\n",
 				drive->name, rq->pm->pm_step);
 				drive->name, rq->pm->pm_step);
@@ -1264,7 +1264,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 		 * We count how many times we loop here to make sure we service
 		 * We count how many times we loop here to make sure we service
 		 * all drives in the hwgroup without looping for ever
 		 * all drives in the hwgroup without looping for ever
 		 */
 		 */
-		if (drive->blocked && !blk_pm_request(rq) && !(rq->flags & REQ_PREEMPT)) {
+		if (drive->blocked && !blk_pm_request(rq) && !(rq->cmd_flags & REQ_PREEMPT)) {
 			drive = drive->next ? drive->next : hwgroup->drive;
 			drive = drive->next ? drive->next : hwgroup->drive;
 			if (loops++ < 4 && !blk_queue_plugged(drive->queue))
 			if (loops++ < 4 && !blk_queue_plugged(drive->queue))
 				goto again;
 				goto again;
@@ -1670,7 +1670,7 @@ irqreturn_t ide_intr (int irq, void *dev_id, struct pt_regs *regs)
 void ide_init_drive_cmd (struct request *rq)
 void ide_init_drive_cmd (struct request *rq)
 {
 {
 	memset(rq, 0, sizeof(*rq));
 	memset(rq, 0, sizeof(*rq));
-	rq->flags = REQ_DRIVE_CMD;
+	rq->cmd_type = REQ_TYPE_ATA_CMD;
 	rq->ref_count = 1;
 	rq->ref_count = 1;
 }
 }
 
 
@@ -1710,7 +1710,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 	int must_wait = (action == ide_wait || action == ide_head_wait);
 	int must_wait = (action == ide_wait || action == ide_head_wait);
 
 
 	rq->errors = 0;
 	rq->errors = 0;
-	rq->rq_status = RQ_ACTIVE;
 
 
 	/*
 	/*
 	 * we need to hold an extra reference to request for safe inspection
 	 * we need to hold an extra reference to request for safe inspection
@@ -1718,7 +1717,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 	 */
 	 */
 	if (must_wait) {
 	if (must_wait) {
 		rq->ref_count++;
 		rq->ref_count++;
-		rq->waiting = &wait;
+		rq->end_io_data = &wait;
 		rq->end_io = blk_end_sync_rq;
 		rq->end_io = blk_end_sync_rq;
 	}
 	}
 
 
@@ -1727,7 +1726,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 		hwgroup->rq = NULL;
 		hwgroup->rq = NULL;
 	if (action == ide_preempt || action == ide_head_wait) {
 	if (action == ide_preempt || action == ide_head_wait) {
 		where = ELEVATOR_INSERT_FRONT;
 		where = ELEVATOR_INSERT_FRONT;
-		rq->flags |= REQ_PREEMPT;
+		rq->cmd_flags |= REQ_PREEMPT;
 	}
 	}
 	__elv_add_request(drive->queue, rq, where, 0);
 	__elv_add_request(drive->queue, rq, where, 0);
 	ide_do_request(hwgroup, IDE_NO_IRQ);
 	ide_do_request(hwgroup, IDE_NO_IRQ);
@@ -1736,7 +1735,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 	err = 0;
 	err = 0;
 	if (must_wait) {
 	if (must_wait) {
 		wait_for_completion(&wait);
 		wait_for_completion(&wait);
-		rq->waiting = NULL;
 		if (rq->errors)
 		if (rq->errors)
 			err = -EIO;
 			err = -EIO;
 
 

+ 3 - 2
drivers/ide/ide-lib.c

@@ -456,13 +456,14 @@ static void ide_dump_opcode(ide_drive_t *drive)
 	spin_unlock(&ide_lock);
 	spin_unlock(&ide_lock);
 	if (!rq)
 	if (!rq)
 		return;
 		return;
-	if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) {
+	if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
+	    rq->cmd_type == REQ_TYPE_ATA_TASK) {
 		char *args = rq->buffer;
 		char *args = rq->buffer;
 		if (args) {
 		if (args) {
 			opcode = args[0];
 			opcode = args[0];
 			found = 1;
 			found = 1;
 		}
 		}
-	} else if (rq->flags & REQ_DRIVE_TASKFILE) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *args = rq->special;
 		ide_task_t *args = rq->special;
 		if (args) {
 		if (args) {
 			task_struct_t *tf = (task_struct_t *) args->tfRegister;
 			task_struct_t *tf = (task_struct_t *) args->tfRegister;

+ 7 - 7
drivers/ide/ide-tape.c

@@ -1776,7 +1776,7 @@ static void idetape_create_request_sense_cmd (idetape_pc_t *pc)
 static void idetape_init_rq(struct request *rq, u8 cmd)
 static void idetape_init_rq(struct request *rq, u8 cmd)
 {
 {
 	memset(rq, 0, sizeof(*rq));
 	memset(rq, 0, sizeof(*rq));
-	rq->flags = REQ_SPECIAL;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd[0] = cmd;
 	rq->cmd[0] = cmd;
 }
 }
 
 
@@ -2423,8 +2423,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 #if IDETAPE_DEBUG_LOG
 #if IDETAPE_DEBUG_LOG
 #if 0
 #if 0
 	if (tape->debug_level >= 5)
 	if (tape->debug_level >= 5)
-		printk(KERN_INFO "ide-tape: rq_status: %d, "
-			"dev: %s, cmd: %ld, errors: %d\n", rq->rq_status,
+		printk(KERN_INFO "ide-tape:  %d, "
+			"dev: %s, cmd: %ld, errors: %d\n",
 			 rq->rq_disk->disk_name, rq->cmd[0], rq->errors);
 			 rq->rq_disk->disk_name, rq->cmd[0], rq->errors);
 #endif
 #endif
 	if (tape->debug_level >= 2)
 	if (tape->debug_level >= 2)
@@ -2433,12 +2433,12 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 			rq->sector, rq->nr_sectors, rq->current_nr_sectors);
 			rq->sector, rq->nr_sectors, rq->current_nr_sectors);
 #endif /* IDETAPE_DEBUG_LOG */
 #endif /* IDETAPE_DEBUG_LOG */
 
 
-	if ((rq->flags & REQ_SPECIAL) == 0) {
+	if (!blk_special_request(rq)) {
 		/*
 		/*
 		 * We do not support buffer cache originated requests.
 		 * We do not support buffer cache originated requests.
 		 */
 		 */
 		printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
 		printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
-			"request queue (%ld)\n", drive->name, rq->flags);
+			"request queue (%d)\n", drive->name, rq->cmd_type);
 		ide_end_request(drive, 0, 0);
 		ide_end_request(drive, 0, 0);
 		return ide_stopped;
 		return ide_stopped;
 	}
 	}
@@ -2768,12 +2768,12 @@ static void idetape_wait_for_request (ide_drive_t *drive, struct request *rq)
 	idetape_tape_t *tape = drive->driver_data;
 	idetape_tape_t *tape = drive->driver_data;
 
 
 #if IDETAPE_DEBUG_BUGS
 #if IDETAPE_DEBUG_BUGS
-	if (rq == NULL || (rq->flags & REQ_SPECIAL) == 0) {
+	if (rq == NULL || !blk_special_request(rq)) {
 		printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n");
 		printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n");
 		return;
 		return;
 	}
 	}
 #endif /* IDETAPE_DEBUG_BUGS */
 #endif /* IDETAPE_DEBUG_BUGS */
-	rq->waiting = &wait;
+	rq->end_io_data = &wait;
 	rq->end_io = blk_end_sync_rq;
 	rq->end_io = blk_end_sync_rq;
 	spin_unlock_irq(&tape->spinlock);
 	spin_unlock_irq(&tape->spinlock);
 	wait_for_completion(&wait);
 	wait_for_completion(&wait);

+ 4 - 4
drivers/ide/ide-taskfile.c

@@ -363,7 +363,7 @@ static ide_startstop_t task_error(ide_drive_t *drive, struct request *rq,
 
 
 static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
 static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
 {
 {
-	if (rq->flags & REQ_DRIVE_TASKFILE) {
+	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *task = rq->special;
 		ide_task_t *task = rq->special;
 
 
 		if (task->tf_out_flags.all) {
 		if (task->tf_out_flags.all) {
@@ -474,7 +474,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long
 	struct request rq;
 	struct request rq;
 
 
 	memset(&rq, 0, sizeof(rq));
 	memset(&rq, 0, sizeof(rq));
-	rq.flags = REQ_DRIVE_TASKFILE;
+	rq.cmd_type = REQ_TYPE_ATA_TASKFILE;
 	rq.buffer = buf;
 	rq.buffer = buf;
 
 
 	/*
 	/*
@@ -499,7 +499,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long
 		rq.hard_cur_sectors = rq.current_nr_sectors = rq.nr_sectors;
 		rq.hard_cur_sectors = rq.current_nr_sectors = rq.nr_sectors;
 
 
 		if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE)
 		if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE)
-			rq.flags |= REQ_RW;
+			rq.cmd_flags |= REQ_RW;
 	}
 	}
 
 
 	rq.special = args;
 	rq.special = args;
@@ -737,7 +737,7 @@ static int ide_wait_cmd_task(ide_drive_t *drive, u8 *buf)
 	struct request rq;
 	struct request rq;
 
 
 	ide_init_drive_cmd(&rq);
 	ide_init_drive_cmd(&rq);
-	rq.flags = REQ_DRIVE_TASK;
+	rq.cmd_type = REQ_TYPE_ATA_TASK;
 	rq.buffer = buf;
 	rq.buffer = buf;
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
 }
 }

+ 4 - 4
drivers/ide/ide.c

@@ -1217,9 +1217,9 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	memset(&rq, 0, sizeof(rq));
 	memset(&rq, 0, sizeof(rq));
 	memset(&rqpm, 0, sizeof(rqpm));
 	memset(&rqpm, 0, sizeof(rqpm));
 	memset(&args, 0, sizeof(args));
 	memset(&args, 0, sizeof(args));
-	rq.flags = REQ_PM_SUSPEND;
+	rq.cmd_type = REQ_TYPE_PM_SUSPEND;
 	rq.special = &args;
 	rq.special = &args;
-	rq.end_io_data = &rqpm;
+	rq.data = &rqpm;
 	rqpm.pm_step = ide_pm_state_start_suspend;
 	rqpm.pm_step = ide_pm_state_start_suspend;
 	if (mesg.event == PM_EVENT_PRETHAW)
 	if (mesg.event == PM_EVENT_PRETHAW)
 		mesg.event = PM_EVENT_FREEZE;
 		mesg.event = PM_EVENT_FREEZE;
@@ -1238,9 +1238,9 @@ static int generic_ide_resume(struct device *dev)
 	memset(&rq, 0, sizeof(rq));
 	memset(&rq, 0, sizeof(rq));
 	memset(&rqpm, 0, sizeof(rqpm));
 	memset(&rqpm, 0, sizeof(rqpm));
 	memset(&args, 0, sizeof(args));
 	memset(&args, 0, sizeof(args));
-	rq.flags = REQ_PM_RESUME;
+	rq.cmd_type = REQ_TYPE_PM_RESUME;
 	rq.special = &args;
 	rq.special = &args;
-	rq.end_io_data = &rqpm;
+	rq.data = &rqpm;
 	rqpm.pm_step = ide_pm_state_start_resume;
 	rqpm.pm_step = ide_pm_state_start_resume;
 	rqpm.pm_state = PM_EVENT_ON;
 	rqpm.pm_state = PM_EVENT_ON;
 
 

+ 1 - 1
drivers/ide/legacy/hd.c

@@ -626,7 +626,7 @@ repeat:
 		req->rq_disk->disk_name, (req->cmd == READ)?"read":"writ",
 		req->rq_disk->disk_name, (req->cmd == READ)?"read":"writ",
 		cyl, head, sec, nsect, req->buffer);
 		cyl, head, sec, nsect, req->buffer);
 #endif
 #endif
-	if (req->flags & REQ_CMD) {
+	if (blk_fs_request(req)) {
 		switch (rq_data_dir(req)) {
 		switch (rq_data_dir(req)) {
 		case READ:
 		case READ:
 			hd_out(disk,nsect,sec,head,cyl,WIN_READ,&read_intr);
 			hd_out(disk,nsect,sec,head,cyl,WIN_READ,&read_intr);

+ 3 - 0
drivers/md/Kconfig

@@ -2,6 +2,8 @@
 # Block device driver configuration
 # Block device driver configuration
 #
 #
 
 
+if BLOCK
+
 menu "Multi-device support (RAID and LVM)"
 menu "Multi-device support (RAID and LVM)"
 
 
 config MD
 config MD
@@ -251,3 +253,4 @@ config DM_MULTIPATH_EMC
 
 
 endmenu
 endmenu
 
 
+endif

+ 2 - 1
drivers/md/dm-emc.c

@@ -126,7 +126,8 @@ static struct request *get_failover_req(struct emc_handler *h,
 	memset(&rq->cmd, 0, BLK_MAX_CDB);
 	memset(&rq->cmd, 0, BLK_MAX_CDB);
 
 
 	rq->timeout = EMC_FAILOVER_TIMEOUT;
 	rq->timeout = EMC_FAILOVER_TIMEOUT;
-	rq->flags |= (REQ_BLOCK_PC | REQ_FAILFAST | REQ_NOMERGE);
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
 
 
 	return rq;
 	return rq;
 }
 }

+ 1 - 1
drivers/message/i2o/Kconfig

@@ -88,7 +88,7 @@ config I2O_BUS
 
 
 config I2O_BLOCK
 config I2O_BLOCK
 	tristate "I2O Block OSM"
 	tristate "I2O Block OSM"
-	depends on I2O
+	depends on I2O && BLOCK
 	---help---
 	---help---
 	  Include support for the I2O Block OSM. The Block OSM presents disk
 	  Include support for the I2O Block OSM. The Block OSM presents disk
 	  and other structured block devices to the operating system. If you
 	  and other structured block devices to the operating system. If you

+ 4 - 3
drivers/message/i2o/i2o_block.c

@@ -390,9 +390,9 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req)
 	}
 	}
 
 
 	/* request is already processed by us, so return */
 	/* request is already processed by us, so return */
-	if (req->flags & REQ_SPECIAL) {
+	if (blk_special_request(req)) {
 		osm_debug("REQ_SPECIAL already set!\n");
 		osm_debug("REQ_SPECIAL already set!\n");
-		req->flags |= REQ_DONTPREP;
+		req->cmd_flags |= REQ_DONTPREP;
 		return BLKPREP_OK;
 		return BLKPREP_OK;
 	}
 	}
 
 
@@ -411,7 +411,8 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req)
 		ireq = req->special;
 		ireq = req->special;
 
 
 	/* do not come back here */
 	/* do not come back here */
-	req->flags |= REQ_DONTPREP | REQ_SPECIAL;
+	req->cmd_type = REQ_TYPE_SPECIAL;
+	req->cmd_flags |= REQ_DONTPREP;
 
 
 	return BLKPREP_OK;
 	return BLKPREP_OK;
 };
 };

+ 1 - 1
drivers/mmc/Kconfig

@@ -21,7 +21,7 @@ config MMC_DEBUG
 
 
 config MMC_BLOCK
 config MMC_BLOCK
 	tristate "MMC block device driver"
 	tristate "MMC block device driver"
-	depends on MMC
+	depends on MMC && BLOCK
 	default y
 	default y
 	help
 	help
 	  Say Y here to enable the MMC block device driver support.
 	  Say Y here to enable the MMC block device driver support.

+ 2 - 1
drivers/mmc/Makefile

@@ -24,7 +24,8 @@ obj-$(CONFIG_MMC_AU1X)		+= au1xmmc.o
 obj-$(CONFIG_MMC_OMAP)		+= omap.o
 obj-$(CONFIG_MMC_OMAP)		+= omap.o
 obj-$(CONFIG_MMC_AT91RM9200)	+= at91_mci.o
 obj-$(CONFIG_MMC_AT91RM9200)	+= at91_mci.o
 
 
-mmc_core-y := mmc.o mmc_queue.o mmc_sysfs.o
+mmc_core-y := mmc.o mmc_sysfs.o
+mmc_core-$(CONFIG_BLOCK) += mmc_queue.o
 
 
 ifeq ($(CONFIG_MMC_DEBUG),y)
 ifeq ($(CONFIG_MMC_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
 EXTRA_CFLAGS += -DDEBUG

+ 3 - 3
drivers/mmc/mmc_queue.c

@@ -28,7 +28,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 	struct mmc_queue *mq = q->queuedata;
 	struct mmc_queue *mq = q->queuedata;
 	int ret = BLKPREP_KILL;
 	int ret = BLKPREP_KILL;
 
 
-	if (req->flags & REQ_SPECIAL) {
+	if (blk_special_request(req)) {
 		/*
 		/*
 		 * Special commands already have the command
 		 * Special commands already have the command
 		 * blocks already setup in req->special.
 		 * blocks already setup in req->special.
@@ -36,7 +36,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 		BUG_ON(!req->special);
 		BUG_ON(!req->special);
 
 
 		ret = BLKPREP_OK;
 		ret = BLKPREP_OK;
-	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+	} else if (blk_fs_request(req) || blk_pc_request(req)) {
 		/*
 		/*
 		 * Block I/O requests need translating according
 		 * Block I/O requests need translating according
 		 * to the protocol.
 		 * to the protocol.
@@ -50,7 +50,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 	}
 	}
 
 
 	if (ret == BLKPREP_OK)
 	if (ret == BLKPREP_OK)
-		req->flags |= REQ_DONTPREP;
+		req->cmd_flags |= REQ_DONTPREP;
 
 
 	return ret;
 	return ret;
 }
 }

+ 6 - 6
drivers/mtd/Kconfig

@@ -166,7 +166,7 @@ config MTD_CHAR
 
 
 config MTD_BLOCK
 config MTD_BLOCK
 	tristate "Caching block device access to MTD devices"
 	tristate "Caching block device access to MTD devices"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	---help---
 	  Although most flash chips have an erase size too large to be useful
 	  Although most flash chips have an erase size too large to be useful
 	  as block devices, it is possible to use MTD devices which are based
 	  as block devices, it is possible to use MTD devices which are based
@@ -188,7 +188,7 @@ config MTD_BLOCK
 
 
 config MTD_BLOCK_RO
 config MTD_BLOCK_RO
 	tristate "Readonly block device access to MTD devices"
 	tristate "Readonly block device access to MTD devices"
-	depends on MTD_BLOCK!=y && MTD
+	depends on MTD_BLOCK!=y && MTD && BLOCK
 	help
 	help
 	  This allows you to mount read-only file systems (such as cramfs)
 	  This allows you to mount read-only file systems (such as cramfs)
 	  from an MTD device, without the overhead (and danger) of the caching
 	  from an MTD device, without the overhead (and danger) of the caching
@@ -199,7 +199,7 @@ config MTD_BLOCK_RO
 
 
 config FTL
 config FTL
 	tristate "FTL (Flash Translation Layer) support"
 	tristate "FTL (Flash Translation Layer) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	---help---
 	  This provides support for the original Flash Translation Layer which
 	  This provides support for the original Flash Translation Layer which
 	  is part of the PCMCIA specification. It uses a kind of pseudo-
 	  is part of the PCMCIA specification. It uses a kind of pseudo-
@@ -215,7 +215,7 @@ config FTL
 
 
 config NFTL
 config NFTL
 	tristate "NFTL (NAND Flash Translation Layer) support"
 	tristate "NFTL (NAND Flash Translation Layer) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	---help---
 	  This provides support for the NAND Flash Translation Layer which is
 	  This provides support for the NAND Flash Translation Layer which is
 	  used on M-Systems' DiskOnChip devices. It uses a kind of pseudo-
 	  used on M-Systems' DiskOnChip devices. It uses a kind of pseudo-
@@ -238,7 +238,7 @@ config NFTL_RW
 
 
 config INFTL
 config INFTL
 	tristate "INFTL (Inverse NAND Flash Translation Layer) support"
 	tristate "INFTL (Inverse NAND Flash Translation Layer) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	---help---
 	  This provides support for the Inverse NAND Flash Translation
 	  This provides support for the Inverse NAND Flash Translation
 	  Layer which is used on M-Systems' newer DiskOnChip devices. It
 	  Layer which is used on M-Systems' newer DiskOnChip devices. It
@@ -255,7 +255,7 @@ config INFTL
 
 
 config RFD_FTL
 config RFD_FTL
         tristate "Resident Flash Disk (Flash Translation Layer) support"
         tristate "Resident Flash Disk (Flash Translation Layer) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	---help---
 	  This provides support for the flash translation layer known
 	  This provides support for the flash translation layer known
 	  as the Resident Flash Disk (RFD), as used by the Embedded BIOS
 	  as the Resident Flash Disk (RFD), as used by the Embedded BIOS

+ 1 - 1
drivers/mtd/devices/Kconfig

@@ -136,7 +136,7 @@ config MTDRAM_ABS_POS
 
 
 config MTD_BLOCK2MTD
 config MTD_BLOCK2MTD
 	tristate "MTD using block device"
 	tristate "MTD using block device"
-	depends on MTD
+	depends on MTD && BLOCK
 	help
 	help
 	  This driver allows a block device to appear as an MTD. It would
 	  This driver allows a block device to appear as an MTD. It would
 	  generally be used in the following cases:
 	  generally be used in the following cases:

+ 1 - 1
drivers/mtd/mtd_blkdevs.c

@@ -46,7 +46,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	nsect = req->current_nr_sectors;
 	nsect = req->current_nr_sectors;
 	buf = req->buffer;
 	buf = req->buffer;
 
 
-	if (!(req->flags & REQ_CMD))
+	if (!blk_fs_request(req))
 		return 0;
 		return 0;
 
 
 	if (block + nsect > get_capacity(req->rq_disk))
 	if (block + nsect > get_capacity(req->rq_disk))

+ 1 - 1
drivers/s390/block/Kconfig

@@ -1,4 +1,4 @@
-if S390
+if S390 && BLOCK
 
 
 comment "S/390 block device drivers"
 comment "S/390 block device drivers"
 	depends on S390
 	depends on S390

+ 1 - 1
drivers/s390/block/dasd_diag.c

@@ -529,7 +529,7 @@ dasd_diag_build_cp(struct dasd_device * device, struct request *req)
 	}
 	}
 	cqr->retries = DIAG_MAX_RETRIES;
 	cqr->retries = DIAG_MAX_RETRIES;
 	cqr->buildclk = get_clock();
 	cqr->buildclk = get_clock();
-	if (req->flags & REQ_FAILFAST)
+	if (req->cmd_flags & REQ_FAILFAST)
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->device = device;
 	cqr->device = device;
 	cqr->expires = DIAG_TIMEOUT;
 	cqr->expires = DIAG_TIMEOUT;

+ 1 - 1
drivers/s390/block/dasd_eckd.c

@@ -1266,7 +1266,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req)
 			recid++;
 			recid++;
 		}
 		}
 	}
 	}
-	if (req->flags & REQ_FAILFAST)
+	if (req->cmd_flags & REQ_FAILFAST)
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->device = device;
 	cqr->device = device;
 	cqr->expires = 5 * 60 * HZ;	/* 5 minutes */
 	cqr->expires = 5 * 60 * HZ;	/* 5 minutes */

+ 1 - 1
drivers/s390/block/dasd_fba.c

@@ -344,7 +344,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req)
 			recid++;
 			recid++;
 		}
 		}
 	}
 	}
-	if (req->flags & REQ_FAILFAST)
+	if (req->cmd_flags & REQ_FAILFAST)
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->device = device;
 	cqr->device = device;
 	cqr->expires = 5 * 60 * HZ;	/* 5 minutes */
 	cqr->expires = 5 * 60 * HZ;	/* 5 minutes */

+ 2 - 0
drivers/scsi/Kconfig

@@ -3,11 +3,13 @@ menu "SCSI device support"
 config RAID_ATTRS
 config RAID_ATTRS
 	tristate "RAID Transport Class"
 	tristate "RAID Transport Class"
 	default n
 	default n
+	depends on BLOCK
 	---help---
 	---help---
 	  Provides RAID
 	  Provides RAID
 
 
 config SCSI
 config SCSI
 	tristate "SCSI device support"
 	tristate "SCSI device support"
+	depends on BLOCK
 	---help---
 	---help---
 	  If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or
 	  If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or
 	  any other SCSI device under Linux, say Y and make sure that you know
 	  any other SCSI device under Linux, say Y and make sure that you know

+ 2 - 2
drivers/scsi/aic7xxx_old.c

@@ -2862,7 +2862,7 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb)
       aic_dev->r_total++;
       aic_dev->r_total++;
       ptr = aic_dev->r_bins;
       ptr = aic_dev->r_bins;
     }
     }
-    if(cmd->device->simple_tags && cmd->request->flags & REQ_HARDBARRIER)
+    if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER)
     {
     {
       aic_dev->barrier_total++;
       aic_dev->barrier_total++;
       if(scb->tag_action == MSG_ORDERED_Q_TAG)
       if(scb->tag_action == MSG_ORDERED_Q_TAG)
@@ -10158,7 +10158,7 @@ aic7xxx_buildscb(struct aic7xxx_host *p, Scsi_Cmnd *cmd,
     /* We always force TEST_UNIT_READY to untagged */
     /* We always force TEST_UNIT_READY to untagged */
     if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags)
     if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags)
     {
     {
-      if (req->flags & REQ_HARDBARRIER)
+      if (req->cmd_flags & REQ_HARDBARRIER)
       {
       {
 	if(sdptr->ordered_tags)
 	if(sdptr->ordered_tags)
 	{
 	{

+ 8 - 8
drivers/scsi/ide-scsi.c

@@ -344,7 +344,7 @@ static int idescsi_check_condition(ide_drive_t *drive, struct request *failed_co
 	pc->buffer = buf;
 	pc->buffer = buf;
 	pc->c[0] = REQUEST_SENSE;
 	pc->c[0] = REQUEST_SENSE;
 	pc->c[4] = pc->request_transfer = pc->buffer_size = SCSI_SENSE_BUFFERSIZE;
 	pc->c[4] = pc->request_transfer = pc->buffer_size = SCSI_SENSE_BUFFERSIZE;
-	rq->flags = REQ_SENSE;
+	rq->cmd_type = REQ_TYPE_SENSE;
 	pc->timeout = jiffies + WAIT_READY;
 	pc->timeout = jiffies + WAIT_READY;
 	/* NOTE! Save the failed packet command in "rq->buffer" */
 	/* NOTE! Save the failed packet command in "rq->buffer" */
 	rq->buffer = (void *) failed_command->special;
 	rq->buffer = (void *) failed_command->special;
@@ -398,12 +398,12 @@ static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
 	int errors = rq->errors;
 	int errors = rq->errors;
 	unsigned long flags;
 	unsigned long flags;
 
 
-	if (!(rq->flags & (REQ_SPECIAL|REQ_SENSE))) {
+	if (!blk_special_request(rq) && !blk_sense_request(rq)) {
 		ide_end_request(drive, uptodate, nrsecs);
 		ide_end_request(drive, uptodate, nrsecs);
 		return 0;
 		return 0;
 	}
 	}
 	ide_end_drive_cmd (drive, 0, 0);
 	ide_end_drive_cmd (drive, 0, 0);
-	if (rq->flags & REQ_SENSE) {
+	if (blk_sense_request(rq)) {
 		idescsi_pc_t *opc = (idescsi_pc_t *) rq->buffer;
 		idescsi_pc_t *opc = (idescsi_pc_t *) rq->buffer;
 		if (log) {
 		if (log) {
 			printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number);
 			printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number);
@@ -708,11 +708,11 @@ static ide_startstop_t idescsi_issue_pc (ide_drive_t *drive, idescsi_pc_t *pc)
 static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block)
 static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block)
 {
 {
 #if IDESCSI_DEBUG_LOG
 #if IDESCSI_DEBUG_LOG
-	printk (KERN_INFO "rq_status: %d, dev: %s, cmd: %x, errors: %d\n",rq->rq_status, rq->rq_disk->disk_name,rq->cmd[0],rq->errors);
+	printk (KERN_INFO "dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,rq->cmd[0],rq->errors);
 	printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
 	printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
 #endif /* IDESCSI_DEBUG_LOG */
 #endif /* IDESCSI_DEBUG_LOG */
 
 
-	if (rq->flags & (REQ_SPECIAL|REQ_SENSE)) {
+	if (blk_sense_request(rq) || blk_special_request(rq)) {
 		return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->special);
 		return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->special);
 	}
 	}
 	blk_dump_rq_flags(rq, "ide-scsi: unsup command");
 	blk_dump_rq_flags(rq, "ide-scsi: unsup command");
@@ -938,7 +938,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
 
 
 	ide_init_drive_cmd (rq);
 	ide_init_drive_cmd (rq);
 	rq->special = (char *) pc;
 	rq->special = (char *) pc;
-	rq->flags = REQ_SPECIAL;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
 	spin_unlock_irq(host->host_lock);
 	spin_unlock_irq(host->host_lock);
 	rq->rq_disk = scsi->disk;
 	rq->rq_disk = scsi->disk;
 	(void) ide_do_drive_cmd (drive, rq, ide_end);
 	(void) ide_do_drive_cmd (drive, rq, ide_end);
@@ -992,7 +992,7 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd)
 		 */
 		 */
 		printk (KERN_ERR "ide-scsi: cmd aborted!\n");
 		printk (KERN_ERR "ide-scsi: cmd aborted!\n");
 
 
-		if (scsi->pc->rq->flags & REQ_SENSE)
+		if (blk_sense_request(scsi->pc->rq))
 			kfree(scsi->pc->buffer);
 			kfree(scsi->pc->buffer);
 		kfree(scsi->pc->rq);
 		kfree(scsi->pc->rq);
 		kfree(scsi->pc);
 		kfree(scsi->pc);
@@ -1042,7 +1042,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
 	/* kill current request */
 	/* kill current request */
 	blkdev_dequeue_request(req);
 	blkdev_dequeue_request(req);
 	end_that_request_last(req, 0);
 	end_that_request_last(req, 0);
-	if (req->flags & REQ_SENSE)
+	if (blk_sense_request(req))
 		kfree(scsi->pc->buffer);
 		kfree(scsi->pc->buffer);
 	kfree(scsi->pc);
 	kfree(scsi->pc);
 	scsi->pc = NULL;
 	scsi->pc = NULL;

+ 3 - 3
drivers/scsi/pluto.c

@@ -67,7 +67,6 @@ static void __init pluto_detect_done(Scsi_Cmnd *SCpnt)
 
 
 static void __init pluto_detect_scsi_done(Scsi_Cmnd *SCpnt)
 static void __init pluto_detect_scsi_done(Scsi_Cmnd *SCpnt)
 {
 {
-	SCpnt->request->rq_status = RQ_SCSI_DONE;
 	PLND(("Detect done %08lx\n", (long)SCpnt))
 	PLND(("Detect done %08lx\n", (long)SCpnt))
 	if (atomic_dec_and_test (&fcss))
 	if (atomic_dec_and_test (&fcss))
 		up(&fc_sem);
 		up(&fc_sem);
@@ -166,7 +165,7 @@ int __init pluto_detect(struct scsi_host_template *tpnt)
 		
 		
 		SCpnt->cmd_len = COMMAND_SIZE(INQUIRY);
 		SCpnt->cmd_len = COMMAND_SIZE(INQUIRY);
 	
 	
-		SCpnt->request->rq_status = RQ_SCSI_BUSY;
+		SCpnt->request->cmd_flags &= ~REQ_STARTED;
 		
 		
 		SCpnt->done = pluto_detect_done;
 		SCpnt->done = pluto_detect_done;
 		SCpnt->request_bufflen = 256;
 		SCpnt->request_bufflen = 256;
@@ -178,7 +177,8 @@ int __init pluto_detect(struct scsi_host_template *tpnt)
 	for (retry = 0; retry < 5; retry++) {
 	for (retry = 0; retry < 5; retry++) {
 		for (i = 0; i < fcscount; i++) {
 		for (i = 0; i < fcscount; i++) {
 			if (!fcs[i].fc) break;
 			if (!fcs[i].fc) break;
-			if (fcs[i].cmd.request->rq_status != RQ_SCSI_DONE) {
+			if (!(fcs[i].cmd.request->cmd_flags & REQ_STARTED)) {
+				fcs[i].cmd.request->cmd_flags |= REQ_STARTED;
 				disable_irq(fcs[i].fc->irq);
 				disable_irq(fcs[i].fc->irq);
 				PLND(("queuecommand %d %d\n", retry, i))
 				PLND(("queuecommand %d %d\n", retry, i))
 				fcp_scsi_queuecommand (&(fcs[i].cmd), 
 				fcp_scsi_queuecommand (&(fcs[i].cmd), 

+ 2 - 11
drivers/scsi/scsi.c

@@ -592,12 +592,6 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
 	return rtn;
 	return rtn;
 }
 }
 
 
-
-/*
- * Per-CPU I/O completion queue.
- */
-static DEFINE_PER_CPU(struct list_head, scsi_done_q);
-
 /**
 /**
  * scsi_req_abort_cmd -- Request command recovery for the specified command
  * scsi_req_abort_cmd -- Request command recovery for the specified command
  * cmd: pointer to the SCSI command of interest
  * cmd: pointer to the SCSI command of interest
@@ -1065,7 +1059,7 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery)
 
 
 	spin_lock_irqsave(&sdev->list_lock, flags);
 	spin_lock_irqsave(&sdev->list_lock, flags);
 	list_for_each_entry(scmd, &sdev->cmd_list, list) {
 	list_for_each_entry(scmd, &sdev->cmd_list, list) {
-		if (scmd->request && scmd->request->rq_status != RQ_INACTIVE) {
+		if (scmd->request) {
 			/*
 			/*
 			 * If we are unable to remove the timer, it means
 			 * If we are unable to remove the timer, it means
 			 * that the command has already timed out or
 			 * that the command has already timed out or
@@ -1102,7 +1096,7 @@ MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging levels");
 
 
 static int __init init_scsi(void)
 static int __init init_scsi(void)
 {
 {
-	int error, i;
+	int error;
 
 
 	error = scsi_init_queue();
 	error = scsi_init_queue();
 	if (error)
 	if (error)
@@ -1123,9 +1117,6 @@ static int __init init_scsi(void)
 	if (error)
 	if (error)
 		goto cleanup_sysctl;
 		goto cleanup_sysctl;
 
 
-	for_each_possible_cpu(i)
-		INIT_LIST_HEAD(&per_cpu(scsi_done_q, i));
-
 	scsi_netlink_init();
 	scsi_netlink_init();
 
 
 	printk(KERN_NOTICE "SCSI subsystem initialized\n");
 	printk(KERN_NOTICE "SCSI subsystem initialized\n");

+ 19 - 18
drivers/scsi/scsi_lib.c

@@ -82,7 +82,7 @@ static void scsi_unprep_request(struct request *req)
 {
 {
 	struct scsi_cmnd *cmd = req->special;
 	struct scsi_cmnd *cmd = req->special;
 
 
-	req->flags &= ~REQ_DONTPREP;
+	req->cmd_flags &= ~REQ_DONTPREP;
 	req->special = NULL;
 	req->special = NULL;
 
 
 	scsi_put_command(cmd);
 	scsi_put_command(cmd);
@@ -196,7 +196,8 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	req->sense_len = 0;
 	req->sense_len = 0;
 	req->retries = retries;
 	req->retries = retries;
 	req->timeout = timeout;
 	req->timeout = timeout;
-	req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET;
+	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
 
 
 	/*
 	/*
 	 * head injection *required* here otherwise quiesce won't work
 	 * head injection *required* here otherwise quiesce won't work
@@ -397,7 +398,8 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
 	req = blk_get_request(sdev->request_queue, write, gfp);
 	req = blk_get_request(sdev->request_queue, write, gfp);
 	if (!req)
 	if (!req)
 		goto free_sense;
 		goto free_sense;
-	req->flags |= REQ_BLOCK_PC | REQ_QUIET;
+	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= REQ_QUIET;
 
 
 	if (use_sg)
 	if (use_sg)
 		err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
 		err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
@@ -933,7 +935,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 					break;
 					break;
 				}
 				}
 			}
 			}
-			if (!(req->flags & REQ_QUIET)) {
+			if (!(req->cmd_flags & REQ_QUIET)) {
 				scmd_printk(KERN_INFO, cmd,
 				scmd_printk(KERN_INFO, cmd,
 					    "Device not ready: ");
 					    "Device not ready: ");
 				scsi_print_sense_hdr("", &sshdr);
 				scsi_print_sense_hdr("", &sshdr);
@@ -941,7 +943,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			scsi_end_request(cmd, 0, this_count, 1);
 			scsi_end_request(cmd, 0, this_count, 1);
 			return;
 			return;
 		case VOLUME_OVERFLOW:
 		case VOLUME_OVERFLOW:
-			if (!(req->flags & REQ_QUIET)) {
+			if (!(req->cmd_flags & REQ_QUIET)) {
 				scmd_printk(KERN_INFO, cmd,
 				scmd_printk(KERN_INFO, cmd,
 					    "Volume overflow, CDB: ");
 					    "Volume overflow, CDB: ");
 				__scsi_print_command(cmd->cmnd);
 				__scsi_print_command(cmd->cmnd);
@@ -963,7 +965,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 		return;
 		return;
 	}
 	}
 	if (result) {
 	if (result) {
-		if (!(req->flags & REQ_QUIET)) {
+		if (!(req->cmd_flags & REQ_QUIET)) {
 			scmd_printk(KERN_INFO, cmd,
 			scmd_printk(KERN_INFO, cmd,
 				    "SCSI error: return code = 0x%08x\n",
 				    "SCSI error: return code = 0x%08x\n",
 				    result);
 				    result);
@@ -995,7 +997,7 @@ static int scsi_init_io(struct scsi_cmnd *cmd)
 	/*
 	/*
 	 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
 	 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
 	 */
 	 */
-	if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
+	if (blk_pc_request(req) && !req->bio) {
 		cmd->request_bufflen = req->data_len;
 		cmd->request_bufflen = req->data_len;
 		cmd->request_buffer = req->data;
 		cmd->request_buffer = req->data;
 		req->buffer = req->data;
 		req->buffer = req->data;
@@ -1139,13 +1141,12 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 	 * these two cases differently.  We differentiate by looking
 	 * these two cases differently.  We differentiate by looking
 	 * at request->cmd, as this tells us the real story.
 	 * at request->cmd, as this tells us the real story.
 	 */
 	 */
-	if (req->flags & REQ_SPECIAL && req->special) {
+	if (blk_special_request(req) && req->special)
 		cmd = req->special;
 		cmd = req->special;
-	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
-
-		if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) {
-			if(specials_only == SDEV_QUIESCE ||
-					specials_only == SDEV_BLOCK)
+	else if (blk_pc_request(req) || blk_fs_request(req)) {
+		if (unlikely(specials_only) && !(req->cmd_flags & REQ_PREEMPT)){
+			if (specials_only == SDEV_QUIESCE ||
+			    specials_only == SDEV_BLOCK)
 				goto defer;
 				goto defer;
 			
 			
 			sdev_printk(KERN_ERR, sdev,
 			sdev_printk(KERN_ERR, sdev,
@@ -1153,7 +1154,6 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 			goto kill;
 			goto kill;
 		}
 		}
 			
 			
-			
 		/*
 		/*
 		 * Now try and find a command block that we can use.
 		 * Now try and find a command block that we can use.
 		 */
 		 */
@@ -1184,7 +1184,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 	 * lock.  We hope REQ_STARTED prevents anything untoward from
 	 * lock.  We hope REQ_STARTED prevents anything untoward from
 	 * happening now.
 	 * happening now.
 	 */
 	 */
-	if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+	if (blk_fs_request(req) || blk_pc_request(req)) {
 		int ret;
 		int ret;
 
 
 		/*
 		/*
@@ -1216,7 +1216,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 		/*
 		/*
 		 * Initialize the actual SCSI command for this request.
 		 * Initialize the actual SCSI command for this request.
 		 */
 		 */
-		if (req->flags & REQ_BLOCK_PC) {
+		if (blk_pc_request(req)) {
 			scsi_setup_blk_pc_cmnd(cmd);
 			scsi_setup_blk_pc_cmnd(cmd);
 		} else if (req->rq_disk) {
 		} else if (req->rq_disk) {
 			struct scsi_driver *drv;
 			struct scsi_driver *drv;
@@ -1233,7 +1233,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 	/*
 	/*
 	 * The request is now prepped, no need to come back here
 	 * The request is now prepped, no need to come back here
 	 */
 	 */
-	req->flags |= REQ_DONTPREP;
+	req->cmd_flags |= REQ_DONTPREP;
 	return BLKPREP_OK;
 	return BLKPREP_OK;
 
 
  defer:
  defer:
@@ -1454,8 +1454,9 @@ static void scsi_request_fn(struct request_queue *q)
 		if (unlikely(cmd == NULL)) {
 		if (unlikely(cmd == NULL)) {
 			printk(KERN_CRIT "impossible request in %s.\n"
 			printk(KERN_CRIT "impossible request in %s.\n"
 					 "please mail a stack trace to "
 					 "please mail a stack trace to "
-					 "linux-scsi@vger.kernel.org",
+					 "linux-scsi@vger.kernel.org\n",
 					 __FUNCTION__);
 					 __FUNCTION__);
+			blk_dump_rq_flags(req, "foo");
 			BUG();
 			BUG();
 		}
 		}
 		spin_lock(shost->host_lock);
 		spin_lock(shost->host_lock);

+ 2 - 3
drivers/scsi/sd.c

@@ -443,8 +443,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
 		SCpnt->cmnd[0] = READ_6;
 		SCpnt->cmnd[0] = READ_6;
 		SCpnt->sc_data_direction = DMA_FROM_DEVICE;
 		SCpnt->sc_data_direction = DMA_FROM_DEVICE;
 	} else {
 	} else {
-		printk(KERN_ERR "sd: Unknown command %lx\n", rq->flags);
-/* overkill 	panic("Unknown sd command %lx\n", rq->flags); */
+		printk(KERN_ERR "sd: Unknown command %x\n", rq->cmd_flags);
 		return 0;
 		return 0;
 	}
 	}
 
 
@@ -840,7 +839,7 @@ static int sd_issue_flush(struct device *dev, sector_t *error_sector)
 static void sd_prepare_flush(request_queue_t *q, struct request *rq)
 static void sd_prepare_flush(request_queue_t *q, struct request *rq)
 {
 {
 	memset(rq->cmd, 0, sizeof(rq->cmd));
 	memset(rq->cmd, 0, sizeof(rq->cmd));
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->timeout = SD_TIMEOUT;
 	rq->timeout = SD_TIMEOUT;
 	rq->cmd[0] = SYNCHRONIZE_CACHE;
 	rq->cmd[0] = SYNCHRONIZE_CACHE;
 	rq->cmd_len = 10;
 	rq->cmd_len = 10;

+ 1 - 1
drivers/scsi/sun3_NCR5380.c

@@ -2017,7 +2017,7 @@ static void NCR5380_information_transfer (struct Scsi_Host *instance)
 		if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done
 		if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done
 						  != cmd))
 						  != cmd))
 		{
 		{
-			if(cmd->request->flags & REQ_CMD) {
+			if(blk_fs_request(cmd->request)) {
 				sun3scsi_dma_setup(d, count,
 				sun3scsi_dma_setup(d, count,
 						   rq_data_dir(cmd->request));
 						   rq_data_dir(cmd->request));
 				sun3_dma_setup_done = cmd;
 				sun3_dma_setup_done = cmd;

+ 1 - 1
drivers/scsi/sun3_scsi.c

@@ -524,7 +524,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
 static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
 static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
 				    int write_flag)
 				    int write_flag)
 {
 {
-	if(cmd->request->flags & REQ_CMD)
+	if(blk_fs_request(cmd->request))
  		return wanted;
  		return wanted;
 	else
 	else
 		return 0;
 		return 0;

+ 1 - 1
drivers/scsi/sun3_scsi_vme.c

@@ -458,7 +458,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
 static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
 static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
 				    int write_flag)
 				    int write_flag)
 {
 {
-	if(cmd->request->flags & REQ_CMD)
+	if(blk_fs_request(cmd->request))
  		return wanted;
  		return wanted;
 	else
 	else
 		return 0;
 		return 0;

+ 2 - 3
drivers/usb/storage/Kconfig

@@ -8,8 +8,7 @@ comment "may also be needed; see USB_STORAGE Help for more information"
 
 
 config USB_STORAGE
 config USB_STORAGE
 	tristate "USB Mass Storage support"
 	tristate "USB Mass Storage support"
-	depends on USB
-	select SCSI
+	depends on USB && SCSI
 	---help---
 	---help---
 	  Say Y here if you want to connect USB mass storage devices to your
 	  Say Y here if you want to connect USB mass storage devices to your
 	  computer's USB port. This is the driver you need for USB
 	  computer's USB port. This is the driver you need for USB
@@ -18,7 +17,7 @@ config USB_STORAGE
 	  similar devices. This driver may also be used for some cameras
 	  similar devices. This driver may also be used for some cameras
 	  and card readers.
 	  and card readers.
 
 
-	  This option 'selects' (turns on, enables) 'SCSI', but you
+	  This option depends on 'SCSI' support being enabled, but you
 	  probably also need 'SCSI device support: SCSI disk support'
 	  probably also need 'SCSI device support: SCSI disk support'
 	  (BLK_DEV_SD) for most USB storage devices.
 	  (BLK_DEV_SD) for most USB storage devices.
 
 

+ 24 - 7
fs/Kconfig

@@ -4,6 +4,8 @@
 
 
 menu "File systems"
 menu "File systems"
 
 
+if BLOCK
+
 config EXT2_FS
 config EXT2_FS
 	tristate "Second extended fs support"
 	tristate "Second extended fs support"
 	help
 	help
@@ -399,6 +401,8 @@ config ROMFS_FS
 	  If you don't know whether you need it, then you don't need it:
 	  If you don't know whether you need it, then you don't need it:
 	  answer N.
 	  answer N.
 
 
+endif
+
 config INOTIFY
 config INOTIFY
 	bool "Inotify file change notification support"
 	bool "Inotify file change notification support"
 	default y
 	default y
@@ -530,6 +534,7 @@ config FUSE_FS
 	  If you want to develop a userspace FS, or if you want to use
 	  If you want to develop a userspace FS, or if you want to use
 	  a filesystem based on FUSE, answer Y or M.
 	  a filesystem based on FUSE, answer Y or M.
 
 
+if BLOCK
 menu "CD-ROM/DVD Filesystems"
 menu "CD-ROM/DVD Filesystems"
 
 
 config ISO9660_FS
 config ISO9660_FS
@@ -597,7 +602,9 @@ config UDF_NLS
 	depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y)
 	depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y)
 
 
 endmenu
 endmenu
+endif
 
 
+if BLOCK
 menu "DOS/FAT/NT Filesystems"
 menu "DOS/FAT/NT Filesystems"
 
 
 config FAT_FS
 config FAT_FS
@@ -782,6 +789,7 @@ config NTFS_RW
 	  It is perfectly safe to say N here.
 	  It is perfectly safe to say N here.
 
 
 endmenu
 endmenu
+endif
 
 
 menu "Pseudo filesystems"
 menu "Pseudo filesystems"
 
 
@@ -939,7 +947,7 @@ menu "Miscellaneous filesystems"
 
 
 config ADFS_FS
 config ADFS_FS
 	tristate "ADFS file system support (EXPERIMENTAL)"
 	tristate "ADFS file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	help
 	  The Acorn Disc Filing System is the standard file system of the
 	  The Acorn Disc Filing System is the standard file system of the
 	  RiscOS operating system which runs on Acorn's ARM-based Risc PC
 	  RiscOS operating system which runs on Acorn's ARM-based Risc PC
@@ -967,7 +975,7 @@ config ADFS_FS_RW
 
 
 config AFFS_FS
 config AFFS_FS
 	tristate "Amiga FFS file system support (EXPERIMENTAL)"
 	tristate "Amiga FFS file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	help
 	  The Fast File System (FFS) is the common file system used on hard
 	  The Fast File System (FFS) is the common file system used on hard
 	  disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20).  Say Y
 	  disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20).  Say Y
@@ -989,7 +997,7 @@ config AFFS_FS
 
 
 config HFS_FS
 config HFS_FS
 	tristate "Apple Macintosh file system support (EXPERIMENTAL)"
 	tristate "Apple Macintosh file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	select NLS
 	select NLS
 	help
 	help
 	  If you say Y here, you will be able to mount Macintosh-formatted
 	  If you say Y here, you will be able to mount Macintosh-formatted
@@ -1002,6 +1010,7 @@ config HFS_FS
 
 
 config HFSPLUS_FS
 config HFSPLUS_FS
 	tristate "Apple Extended HFS file system support"
 	tristate "Apple Extended HFS file system support"
+	depends on BLOCK
 	select NLS
 	select NLS
 	select NLS_UTF8
 	select NLS_UTF8
 	help
 	help
@@ -1015,7 +1024,7 @@ config HFSPLUS_FS
 
 
 config BEFS_FS
 config BEFS_FS
 	tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)"
 	tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	select NLS
 	select NLS
 	help
 	help
 	  The BeOS File System (BeFS) is the native file system of Be, Inc's
 	  The BeOS File System (BeFS) is the native file system of Be, Inc's
@@ -1042,7 +1051,7 @@ config BEFS_DEBUG
 
 
 config BFS_FS
 config BFS_FS
 	tristate "BFS file system support (EXPERIMENTAL)"
 	tristate "BFS file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	help
 	  Boot File System (BFS) is a file system used under SCO UnixWare to
 	  Boot File System (BFS) is a file system used under SCO UnixWare to
 	  allow the bootloader access to the kernel image and other important
 	  allow the bootloader access to the kernel image and other important
@@ -1064,7 +1073,7 @@ config BFS_FS
 
 
 config EFS_FS
 config EFS_FS
 	tristate "EFS file system support (read only) (EXPERIMENTAL)"
 	tristate "EFS file system support (read only) (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	help
 	  EFS is an older file system used for non-ISO9660 CD-ROMs and hard
 	  EFS is an older file system used for non-ISO9660 CD-ROMs and hard
 	  disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
 	  disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
@@ -1079,7 +1088,7 @@ config EFS_FS
 
 
 config JFFS_FS
 config JFFS_FS
 	tristate "Journalling Flash File System (JFFS) support"
 	tristate "Journalling Flash File System (JFFS) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	help
 	help
 	  JFFS is the Journaling Flash File System developed by Axis
 	  JFFS is the Journaling Flash File System developed by Axis
 	  Communications in Sweden, aimed at providing a crash/powerdown-safe
 	  Communications in Sweden, aimed at providing a crash/powerdown-safe
@@ -1264,6 +1273,7 @@ endchoice
 
 
 config CRAMFS
 config CRAMFS
 	tristate "Compressed ROM file system support (cramfs)"
 	tristate "Compressed ROM file system support (cramfs)"
+	depends on BLOCK
 	select ZLIB_INFLATE
 	select ZLIB_INFLATE
 	help
 	help
 	  Saying Y here includes support for CramFs (Compressed ROM File
 	  Saying Y here includes support for CramFs (Compressed ROM File
@@ -1283,6 +1293,7 @@ config CRAMFS
 
 
 config VXFS_FS
 config VXFS_FS
 	tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
 	tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
+	depends on BLOCK
 	help
 	help
 	  FreeVxFS is a file system driver that support the VERITAS VxFS(TM)
 	  FreeVxFS is a file system driver that support the VERITAS VxFS(TM)
 	  file system format.  VERITAS VxFS(TM) is the standard file system
 	  file system format.  VERITAS VxFS(TM) is the standard file system
@@ -1300,6 +1311,7 @@ config VXFS_FS
 
 
 config HPFS_FS
 config HPFS_FS
 	tristate "OS/2 HPFS file system support"
 	tristate "OS/2 HPFS file system support"
+	depends on BLOCK
 	help
 	help
 	  OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
 	  OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
 	  is the file system used for organizing files on OS/2 hard disk
 	  is the file system used for organizing files on OS/2 hard disk
@@ -1316,6 +1328,7 @@ config HPFS_FS
 
 
 config QNX4FS_FS
 config QNX4FS_FS
 	tristate "QNX4 file system support (read only)"
 	tristate "QNX4 file system support (read only)"
+	depends on BLOCK
 	help
 	help
 	  This is the file system used by the real-time operating systems
 	  This is the file system used by the real-time operating systems
 	  QNX 4 and QNX 6 (the latter is also called QNX RTP).
 	  QNX 4 and QNX 6 (the latter is also called QNX RTP).
@@ -1343,6 +1356,7 @@ config QNX4FS_RW
 
 
 config SYSV_FS
 config SYSV_FS
 	tristate "System V/Xenix/V7/Coherent file system support"
 	tristate "System V/Xenix/V7/Coherent file system support"
+	depends on BLOCK
 	help
 	help
 	  SCO, Xenix and Coherent are commercial Unix systems for Intel
 	  SCO, Xenix and Coherent are commercial Unix systems for Intel
 	  machines, and Version 7 was used on the DEC PDP-11. Saying Y
 	  machines, and Version 7 was used on the DEC PDP-11. Saying Y
@@ -1381,6 +1395,7 @@ config SYSV_FS
 
 
 config UFS_FS
 config UFS_FS
 	tristate "UFS file system support (read only)"
 	tristate "UFS file system support (read only)"
+	depends on BLOCK
 	help
 	help
 	  BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
 	  BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
 	  OpenBSD and NeXTstep) use a file system called UFS. Some System V
 	  OpenBSD and NeXTstep) use a file system called UFS. Some System V
@@ -1959,11 +1974,13 @@ config GENERIC_ACL
 
 
 endmenu
 endmenu
 
 
+if BLOCK
 menu "Partition Types"
 menu "Partition Types"
 
 
 source "fs/partitions/Kconfig"
 source "fs/partitions/Kconfig"
 
 
 endmenu
 endmenu
+endif
 
 
 source "fs/nls/Kconfig"
 source "fs/nls/Kconfig"
 
 

+ 10 - 4
fs/Makefile

@@ -5,12 +5,18 @@
 # Rewritten to use lists instead of if-statements.
 # Rewritten to use lists instead of if-statements.
 # 
 # 
 
 
-obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
-		block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
+obj-y :=	open.o read_write.o file_table.o super.o \
+		char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
-		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o pnode.o drop_caches.o splice.o sync.o
+		seq_file.o xattr.o libfs.o fs-writeback.o \
+		pnode.o drop_caches.o splice.o sync.o
+
+ifeq ($(CONFIG_BLOCK),y)
+obj-y +=	buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
+else
+obj-y +=	no-block.o
+endif
 
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
 obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o

+ 0 - 2
fs/afs/file.c

@@ -16,7 +16,6 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/pagemap.h>
-#include <linux/buffer_head.h>
 #include "volume.h"
 #include "volume.h"
 #include "vnode.h"
 #include "vnode.h"
 #include <rxrpc/call.h>
 #include <rxrpc/call.h>
@@ -37,7 +36,6 @@ struct inode_operations afs_file_inode_operations = {
 
 
 const struct address_space_operations afs_fs_aops = {
 const struct address_space_operations afs_fs_aops = {
 	.readpage	= afs_file_readpage,
 	.readpage	= afs_file_readpage,
-	.sync_page	= block_sync_page,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
 	.releasepage	= afs_file_releasepage,
 	.releasepage	= afs_file_releasepage,
 	.invalidatepage	= afs_file_invalidatepage,
 	.invalidatepage	= afs_file_invalidatepage,

+ 0 - 1
fs/binfmt_elf.c

@@ -46,7 +46,6 @@
 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
 static int load_elf_library(struct file *);
 static int load_elf_library(struct file *);
 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
-extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
 
 
 #ifndef elf_addr_t
 #ifndef elf_addr_t
 #define elf_addr_t unsigned long
 #define elf_addr_t unsigned long

+ 2 - 2
fs/bio.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -1142,7 +1142,7 @@ static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale)
 		struct biovec_slab *bp = bvec_slabs + i;
 		struct biovec_slab *bp = bvec_slabs + i;
 		mempool_t **bvp = bs->bvec_pools + i;
 		mempool_t **bvp = bs->bvec_pools + i;
 
 
-		if (i >= scale)
+		if (pool_entries > 1 && i >= scale)
 			pool_entries >>= 1;
 			pool_entries >>= 1;
 
 
 		*bvp = mempool_create_slab_pool(pool_entries, bp->slab);
 		*bvp = mempool_create_slab_pool(pool_entries, bp->slab);

+ 23 - 0
fs/block_dev.c

@@ -17,11 +17,13 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/blkpg.h>
 #include <linux/blkpg.h>
 #include <linux/buffer_head.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include <linux/mpage.h>
 #include <linux/mpage.h>
 #include <linux/mount.h>
 #include <linux/mount.h>
 #include <linux/uio.h>
 #include <linux/uio.h>
 #include <linux/namei.h>
 #include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
+#include "internal.h"
 
 
 struct bdev_inode {
 struct bdev_inode {
 	struct block_device bdev;
 	struct block_device bdev;
@@ -1313,3 +1315,24 @@ void close_bdev_excl(struct block_device *bdev)
 }
 }
 
 
 EXPORT_SYMBOL(close_bdev_excl);
 EXPORT_SYMBOL(close_bdev_excl);
+
+int __invalidate_device(struct block_device *bdev)
+{
+	struct super_block *sb = get_super(bdev);
+	int res = 0;
+
+	if (sb) {
+		/*
+		 * no need to lock the super, get_super holds the
+		 * read mutex so the filesystem cannot go away
+		 * under us (->put_super runs with the write lock
+		 * hold).
+		 */
+		shrink_dcache_sb(sb);
+		res = invalidate_inodes(sb);
+		drop_super(sb);
+	}
+	invalidate_bdev(bdev, 0);
+	return res;
+}
+EXPORT_SYMBOL(__invalidate_device);

+ 0 - 174
fs/buffer.c

@@ -159,31 +159,6 @@ int sync_blockdev(struct block_device *bdev)
 }
 }
 EXPORT_SYMBOL(sync_blockdev);
 EXPORT_SYMBOL(sync_blockdev);
 
 
-static void __fsync_super(struct super_block *sb)
-{
-	sync_inodes_sb(sb, 0);
-	DQUOT_SYNC(sb);
-	lock_super(sb);
-	if (sb->s_dirt && sb->s_op->write_super)
-		sb->s_op->write_super(sb);
-	unlock_super(sb);
-	if (sb->s_op->sync_fs)
-		sb->s_op->sync_fs(sb, 1);
-	sync_blockdev(sb->s_bdev);
-	sync_inodes_sb(sb, 1);
-}
-
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_super(struct super_block *sb)
-{
-	__fsync_super(sb);
-	return sync_blockdev(sb->s_bdev);
-}
-
 /*
 /*
  * Write out and wait upon all dirty data associated with this
  * Write out and wait upon all dirty data associated with this
  * device.   Filesystem data as well as the underlying block
  * device.   Filesystem data as well as the underlying block
@@ -259,118 +234,6 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb)
 }
 }
 EXPORT_SYMBOL(thaw_bdev);
 EXPORT_SYMBOL(thaw_bdev);
 
 
-/*
- * sync everything.  Start out by waking pdflush, because that writes back
- * all queues in parallel.
- */
-static void do_sync(unsigned long wait)
-{
-	wakeup_pdflush(0);
-	sync_inodes(0);		/* All mappings, inodes and their blockdevs */
-	DQUOT_SYNC(NULL);
-	sync_supers();		/* Write the superblocks */
-	sync_filesystems(0);	/* Start syncing the filesystems */
-	sync_filesystems(wait);	/* Waitingly sync the filesystems */
-	sync_inodes(wait);	/* Mappings, inodes and blockdevs, again. */
-	if (!wait)
-		printk("Emergency Sync complete\n");
-	if (unlikely(laptop_mode))
-		laptop_sync_completion();
-}
-
-asmlinkage long sys_sync(void)
-{
-	do_sync(1);
-	return 0;
-}
-
-void emergency_sync(void)
-{
-	pdflush_operation(do_sync, 0);
-}
-
-/*
- * Generic function to fsync a file.
- *
- * filp may be NULL if called via the msync of a vma.
- */
- 
-int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
-{
-	struct inode * inode = dentry->d_inode;
-	struct super_block * sb;
-	int ret, err;
-
-	/* sync the inode to buffers */
-	ret = write_inode_now(inode, 0);
-
-	/* sync the superblock to buffers */
-	sb = inode->i_sb;
-	lock_super(sb);
-	if (sb->s_op->write_super)
-		sb->s_op->write_super(sb);
-	unlock_super(sb);
-
-	/* .. finally sync the buffers to disk */
-	err = sync_blockdev(sb->s_bdev);
-	if (!ret)
-		ret = err;
-	return ret;
-}
-
-long do_fsync(struct file *file, int datasync)
-{
-	int ret;
-	int err;
-	struct address_space *mapping = file->f_mapping;
-
-	if (!file->f_op || !file->f_op->fsync) {
-		/* Why?  We can still call filemap_fdatawrite */
-		ret = -EINVAL;
-		goto out;
-	}
-
-	ret = filemap_fdatawrite(mapping);
-
-	/*
-	 * We need to protect against concurrent writers, which could cause
-	 * livelocks in fsync_buffers_list().
-	 */
-	mutex_lock(&mapping->host->i_mutex);
-	err = file->f_op->fsync(file, file->f_dentry, datasync);
-	if (!ret)
-		ret = err;
-	mutex_unlock(&mapping->host->i_mutex);
-	err = filemap_fdatawait(mapping);
-	if (!ret)
-		ret = err;
-out:
-	return ret;
-}
-
-static long __do_fsync(unsigned int fd, int datasync)
-{
-	struct file *file;
-	int ret = -EBADF;
-
-	file = fget(fd);
-	if (file) {
-		ret = do_fsync(file, datasync);
-		fput(file);
-	}
-	return ret;
-}
-
-asmlinkage long sys_fsync(unsigned int fd)
-{
-	return __do_fsync(fd, 0);
-}
-
-asmlinkage long sys_fdatasync(unsigned int fd)
-{
-	return __do_fsync(fd, 1);
-}
-
 /*
 /*
  * Various filesystems appear to want __find_get_block to be non-blocking.
  * Various filesystems appear to want __find_get_block to be non-blocking.
  * But it's the page lock which protects the buffers.  To get around this,
  * But it's the page lock which protects the buffers.  To get around this,
@@ -1550,35 +1413,6 @@ static void discard_buffer(struct buffer_head * bh)
 	unlock_buffer(bh);
 	unlock_buffer(bh);
 }
 }
 
 
-/**
- * try_to_release_page() - release old fs-specific metadata on a page
- *
- * @page: the page which the kernel is trying to free
- * @gfp_mask: memory allocation flags (and I/O mode)
- *
- * The address_space is to try to release any data against the page
- * (presumably at page->private).  If the release was successful, return `1'.
- * Otherwise return zero.
- *
- * The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
- *
- * NOTE: @gfp_mask may go away, and this function may become non-blocking.
- */
-int try_to_release_page(struct page *page, gfp_t gfp_mask)
-{
-	struct address_space * const mapping = page->mapping;
-
-	BUG_ON(!PageLocked(page));
-	if (PageWriteback(page))
-		return 0;
-	
-	if (mapping && mapping->a_ops->releasepage)
-		return mapping->a_ops->releasepage(page, gfp_mask);
-	return try_to_free_buffers(page);
-}
-EXPORT_SYMBOL(try_to_release_page);
-
 /**
 /**
  * block_invalidatepage - invalidate part of all of a buffer-backed page
  * block_invalidatepage - invalidate part of all of a buffer-backed page
  *
  *
@@ -1630,14 +1464,6 @@ out:
 }
 }
 EXPORT_SYMBOL(block_invalidatepage);
 EXPORT_SYMBOL(block_invalidatepage);
 
 
-void do_invalidatepage(struct page *page, unsigned long offset)
-{
-	void (*invalidatepage)(struct page *, unsigned long);
-	invalidatepage = page->mapping->a_ops->invalidatepage ? :
-		block_invalidatepage;
-	(*invalidatepage)(page, offset);
-}
-
 /*
 /*
  * We attach and possibly dirty the buffers atomically wrt
  * We attach and possibly dirty the buffers atomically wrt
  * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers
  * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers

+ 1 - 0
fs/char_dev.c

@@ -24,6 +24,7 @@
 #ifdef CONFIG_KMOD
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #include <linux/kmod.h>
 #endif
 #endif
+#include "internal.h"
 
 
 /*
 /*
  * capabilities for /dev/mem, /dev/kmem and similar directly mappable character
  * capabilities for /dev/mem, /dev/kmem and similar directly mappable character

+ 0 - 1
fs/cifs/file.c

@@ -25,7 +25,6 @@
 #include <linux/backing-dev.h>
 #include <linux/backing-dev.h>
 #include <linux/stat.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/fcntl.h>
-#include <linux/mpage.h>
 #include <linux/pagemap.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 #include <linux/pagevec.h>
 #include <linux/smp_lock.h>
 #include <linux/smp_lock.h>

+ 0 - 1
fs/cifs/inode.c

@@ -19,7 +19,6 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
  */
 #include <linux/fs.h>
 #include <linux/fs.h>
-#include <linux/buffer_head.h>
 #include <linux/stat.h>
 #include <linux/stat.h>
 #include <linux/pagemap.h>
 #include <linux/pagemap.h>
 #include <asm/div64.h>
 #include <asm/div64.h>

+ 3 - 4
fs/cifs/ioctl.c

@@ -22,7 +22,6 @@
  */
  */
 
 
 #include <linux/fs.h>
 #include <linux/fs.h>
-#include <linux/ext2_fs.h>
 #include "cifspdu.h"
 #include "cifspdu.h"
 #include "cifsglob.h"
 #include "cifsglob.h"
 #include "cifsproto.h"
 #include "cifsproto.h"
@@ -74,7 +73,7 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
 			}
 			}
 			break;
 			break;
 #ifdef CONFIG_CIFS_POSIX
 #ifdef CONFIG_CIFS_POSIX
-		case EXT2_IOC_GETFLAGS:
+		case FS_IOC_GETFLAGS:
 			if(CIFS_UNIX_EXTATTR_CAP & caps) {
 			if(CIFS_UNIX_EXTATTR_CAP & caps) {
 				if (pSMBFile == NULL)
 				if (pSMBFile == NULL)
 					break;
 					break;
@@ -82,12 +81,12 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
 					&ExtAttrBits, &ExtAttrMask);
 					&ExtAttrBits, &ExtAttrMask);
 				if(rc == 0)
 				if(rc == 0)
 					rc = put_user(ExtAttrBits &
 					rc = put_user(ExtAttrBits &
-						EXT2_FL_USER_VISIBLE,
+						FS_FL_USER_VISIBLE,
 						(int __user *)arg);
 						(int __user *)arg);
 			}
 			}
 			break;
 			break;
 
 
-		case EXT2_IOC_SETFLAGS:
+		case FS_IOC_SETFLAGS:
 			if(CIFS_UNIX_EXTATTR_CAP & caps) {
 			if(CIFS_UNIX_EXTATTR_CAP & caps) {
 				if(get_user(ExtAttrBits,(int __user *)arg)) {
 				if(get_user(ExtAttrBits,(int __user *)arg)) {
 					rc = -EFAULT;
 					rc = -EFAULT;

+ 3 - 7
fs/compat.c

@@ -52,11 +52,12 @@
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/mmu_context.h>
 #include <asm/ioctls.h>
 #include <asm/ioctls.h>
-
-extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+#include "internal.h"
 
 
 int compat_log = 1;
 int compat_log = 1;
 
 
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
 int compat_printk(const char *fmt, ...)
 int compat_printk(const char *fmt, ...)
 {
 {
 	va_list ap;
 	va_list ap;
@@ -313,9 +314,6 @@ out:
 #define IOCTL_HASHSIZE 256
 #define IOCTL_HASHSIZE 256
 static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE];
 static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE];
 
 
-extern struct ioctl_trans ioctl_start[];
-extern int ioctl_table_size;
-
 static inline unsigned long ioctl32_hash(unsigned long cmd)
 static inline unsigned long ioctl32_hash(unsigned long cmd)
 {
 {
 	return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE;
 	return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE;
@@ -838,8 +836,6 @@ static int do_nfs4_super_data_conv(void *raw_data)
 	return 0;
 	return 0;
 }
 }
 
 
-extern int copy_mount_options (const void __user *, unsigned long *);
-
 #define SMBFS_NAME      "smbfs"
 #define SMBFS_NAME      "smbfs"
 #define NCPFS_NAME      "ncpfs"
 #define NCPFS_NAME      "ncpfs"
 #define NFS4_NAME	"nfs4"
 #define NFS4_NAME	"nfs4"

+ 18 - 190
fs/compat_ioctl.c

@@ -40,15 +40,11 @@
 #include <linux/if_pppox.h>
 #include <linux/if_pppox.h>
 #include <linux/mtio.h>
 #include <linux/mtio.h>
 #include <linux/cdrom.h>
 #include <linux/cdrom.h>
-#include <linux/loop.h>
 #include <linux/auto_fs.h>
 #include <linux/auto_fs.h>
 #include <linux/auto_fs4.h>
 #include <linux/auto_fs4.h>
 #include <linux/tty.h>
 #include <linux/tty.h>
 #include <linux/vt_kern.h>
 #include <linux/vt_kern.h>
 #include <linux/fb.h>
 #include <linux/fb.h>
-#include <linux/ext2_fs.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
 #include <linux/videodev.h>
 #include <linux/videodev.h>
 #include <linux/netdevice.h>
 #include <linux/netdevice.h>
 #include <linux/raw.h>
 #include <linux/raw.h>
@@ -60,7 +56,6 @@
 #include <linux/pci.h>
 #include <linux/pci.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/serial.h>
 #include <linux/serial.h>
-#include <linux/reiserfs_fs.h>
 #include <linux/if_tun.h>
 #include <linux/if_tun.h>
 #include <linux/ctype.h>
 #include <linux/ctype.h>
 #include <linux/ioctl32.h>
 #include <linux/ioctl32.h>
@@ -113,7 +108,6 @@
 #include <linux/nbd.h>
 #include <linux/nbd.h>
 #include <linux/random.h>
 #include <linux/random.h>
 #include <linux/filter.h>
 #include <linux/filter.h>
-#include <linux/msdos_fs.h>
 #include <linux/pktcdvd.h>
 #include <linux/pktcdvd.h>
 
 
 #include <linux/hiddev.h>
 #include <linux/hiddev.h>
@@ -124,21 +118,6 @@
 #include <linux/dvb/video.h>
 #include <linux/dvb/video.h>
 #include <linux/lp.h>
 #include <linux/lp.h>
 
 
-/* Aiee. Someone does not find a difference between int and long */
-#define EXT2_IOC32_GETFLAGS               _IOR('f', 1, int)
-#define EXT2_IOC32_SETFLAGS               _IOW('f', 2, int)
-#define EXT3_IOC32_GETVERSION             _IOR('f', 3, int)
-#define EXT3_IOC32_SETVERSION             _IOW('f', 4, int)
-#define EXT3_IOC32_GETRSVSZ               _IOR('f', 5, int)
-#define EXT3_IOC32_SETRSVSZ               _IOW('f', 6, int)
-#define EXT3_IOC32_GROUP_EXTEND           _IOW('f', 7, unsigned int)
-#ifdef CONFIG_JBD_DEBUG
-#define EXT3_IOC32_WAIT_FOR_READONLY      _IOR('f', 99, int)
-#endif
-
-#define EXT2_IOC32_GETVERSION             _IOR('v', 1, int)
-#define EXT2_IOC32_SETVERSION             _IOW('v', 2, int)
-
 static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
 static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
 			      unsigned long arg, struct file *f)
 			      unsigned long arg, struct file *f)
 {
 {
@@ -176,34 +155,6 @@ static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return err;
 	return err;
 }
 }
 
 
-static int do_ext2_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	/* These are just misnamed, they actually get/put from/to user an int */
-	switch (cmd) {
-	case EXT2_IOC32_GETFLAGS: cmd = EXT2_IOC_GETFLAGS; break;
-	case EXT2_IOC32_SETFLAGS: cmd = EXT2_IOC_SETFLAGS; break;
-	case EXT2_IOC32_GETVERSION: cmd = EXT2_IOC_GETVERSION; break;
-	case EXT2_IOC32_SETVERSION: cmd = EXT2_IOC_SETVERSION; break;
-	}
-	return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
-}
-
-static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	/* These are just misnamed, they actually get/put from/to user an int */
-	switch (cmd) {
-	case EXT3_IOC32_GETVERSION: cmd = EXT3_IOC_GETVERSION; break;
-	case EXT3_IOC32_SETVERSION: cmd = EXT3_IOC_SETVERSION; break;
-	case EXT3_IOC32_GETRSVSZ: cmd = EXT3_IOC_GETRSVSZ; break;
-	case EXT3_IOC32_SETRSVSZ: cmd = EXT3_IOC_SETRSVSZ; break;
-	case EXT3_IOC32_GROUP_EXTEND: cmd = EXT3_IOC_GROUP_EXTEND; break;
-#ifdef CONFIG_JBD_DEBUG
-	case EXT3_IOC32_WAIT_FOR_READONLY: cmd = EXT3_IOC_WAIT_FOR_READONLY; break;
-#endif
-	}
-	return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
-}
-
 struct compat_video_event {
 struct compat_video_event {
 	int32_t		type;
 	int32_t		type;
 	compat_time_t	timestamp;
 	compat_time_t	timestamp;
@@ -694,6 +645,7 @@ out:
 }
 }
 #endif
 #endif
 
 
+#ifdef CONFIG_BLOCK
 struct hd_geometry32 {
 struct hd_geometry32 {
 	unsigned char heads;
 	unsigned char heads;
 	unsigned char sectors;
 	unsigned char sectors;
@@ -918,6 +870,7 @@ static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
 	}
 	}
 	return err;
 	return err;
 }
 }
+#endif /* CONFIG_BLOCK */
 
 
 struct sock_fprog32 {
 struct sock_fprog32 {
 	unsigned short	len;
 	unsigned short	len;
@@ -1041,6 +994,7 @@ static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
 }
 }
 
 
 
 
+#ifdef CONFIG_BLOCK
 struct mtget32 {
 struct mtget32 {
 	compat_long_t	mt_type;
 	compat_long_t	mt_type;
 	compat_long_t	mt_resid;
 	compat_long_t	mt_resid;
@@ -1213,73 +1167,7 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar
 
 
 	return err;
 	return err;
 }
 }
-
-struct loop_info32 {
-	compat_int_t	lo_number;      /* ioctl r/o */
-	compat_dev_t	lo_device;      /* ioctl r/o */
-	compat_ulong_t	lo_inode;       /* ioctl r/o */
-	compat_dev_t	lo_rdevice;     /* ioctl r/o */
-	compat_int_t	lo_offset;
-	compat_int_t	lo_encrypt_type;
-	compat_int_t	lo_encrypt_key_size;    /* ioctl w/o */
-	compat_int_t	lo_flags;       /* ioctl r/o */
-	char		lo_name[LO_NAME_SIZE];
-	unsigned char	lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
-	compat_ulong_t	lo_init[2];
-	char		reserved[4];
-};
-
-static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	mm_segment_t old_fs = get_fs();
-	struct loop_info l;
-	struct loop_info32 __user *ul;
-	int err = -EINVAL;
-
-	ul = compat_ptr(arg);
-	switch(cmd) {
-	case LOOP_SET_STATUS:
-		err = get_user(l.lo_number, &ul->lo_number);
-		err |= __get_user(l.lo_device, &ul->lo_device);
-		err |= __get_user(l.lo_inode, &ul->lo_inode);
-		err |= __get_user(l.lo_rdevice, &ul->lo_rdevice);
-		err |= __copy_from_user(&l.lo_offset, &ul->lo_offset,
-		        8 + (unsigned long)l.lo_init - (unsigned long)&l.lo_offset);
-		if (err) {
-			err = -EFAULT;
-		} else {
-			set_fs (KERNEL_DS);
-			err = sys_ioctl (fd, cmd, (unsigned long)&l);
-			set_fs (old_fs);
-		}
-		break;
-	case LOOP_GET_STATUS:
-		set_fs (KERNEL_DS);
-		err = sys_ioctl (fd, cmd, (unsigned long)&l);
-		set_fs (old_fs);
-		if (!err) {
-			err = put_user(l.lo_number, &ul->lo_number);
-			err |= __put_user(l.lo_device, &ul->lo_device);
-			err |= __put_user(l.lo_inode, &ul->lo_inode);
-			err |= __put_user(l.lo_rdevice, &ul->lo_rdevice);
-			err |= __copy_to_user(&ul->lo_offset, &l.lo_offset,
-				(unsigned long)l.lo_init - (unsigned long)&l.lo_offset);
-			if (err)
-				err = -EFAULT;
-		}
-		break;
-	default: {
-		static int count;
-		if (++count <= 20)
-			printk("%s: Unknown loop ioctl cmd, fd(%d) "
-			       "cmd(%08x) arg(%08lx)\n",
-			       __FUNCTION__, fd, cmd, arg);
-	}
-	}
-	return err;
-}
-
-extern int tty_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg);
+#endif /* CONFIG_BLOCK */
 
 
 #ifdef CONFIG_VT
 #ifdef CONFIG_VT
 
 
@@ -1607,6 +1495,7 @@ ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
+#ifdef CONFIG_BLOCK
 static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg)
 static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
 {
 	/* The mkswap binary hard codes it to Intel value :-((( */
 	/* The mkswap binary hard codes it to Intel value :-((( */
@@ -1641,12 +1530,14 @@ static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar
 
 
 	return sys_ioctl(fd, cmd, (unsigned long)a);
 	return sys_ioctl(fd, cmd, (unsigned long)a);
 }
 }
+#endif
 
 
 static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
 static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
 {
 	return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
 	return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
 }
 }
 
 
+#ifdef CONFIG_BLOCK
 /* Fix sizeof(sizeof()) breakage */
 /* Fix sizeof(sizeof()) breakage */
 #define BLKBSZGET_32   _IOR(0x12,112,int)
 #define BLKBSZGET_32   _IOR(0x12,112,int)
 #define BLKBSZSET_32   _IOW(0x12,113,int)
 #define BLKBSZSET_32   _IOW(0x12,113,int)
@@ -1667,6 +1558,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
 {
 {
        return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg));
        return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg));
 }
 }
+#endif
 
 
 /* Bluetooth ioctls */
 /* Bluetooth ioctls */
 #define HCIUARTSETPROTO	_IOW('U', 200, int)
 #define HCIUARTSETPROTO	_IOW('U', 200, int)
@@ -1687,6 +1579,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
 #define HIDPGETCONNLIST	_IOR('H', 210, int)
 #define HIDPGETCONNLIST	_IOR('H', 210, int)
 #define HIDPGETCONNINFO	_IOR('H', 211, int)
 #define HIDPGETCONNINFO	_IOR('H', 211, int)
 
 
+#ifdef CONFIG_BLOCK
 struct floppy_struct32 {
 struct floppy_struct32 {
 	compat_uint_t	size;
 	compat_uint_t	size;
 	compat_uint_t	sect;
 	compat_uint_t	sect;
@@ -2011,6 +1904,7 @@ out:
 	kfree(karg);
 	kfree(karg);
 	return err;
 	return err;
 }
 }
+#endif
 
 
 struct mtd_oob_buf32 {
 struct mtd_oob_buf32 {
 	u_int32_t start;
 	u_int32_t start;
@@ -2052,61 +1946,7 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return err;
 	return err;
 }	
 }	
 
 
-#define	VFAT_IOCTL_READDIR_BOTH32	_IOR('r', 1, struct compat_dirent[2])
-#define	VFAT_IOCTL_READDIR_SHORT32	_IOR('r', 2, struct compat_dirent[2])
-
-static long
-put_dirent32 (struct dirent *d, struct compat_dirent __user *d32)
-{
-        if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent)))
-                return -EFAULT;
-
-        __put_user(d->d_ino, &d32->d_ino);
-        __put_user(d->d_off, &d32->d_off);
-        __put_user(d->d_reclen, &d32->d_reclen);
-        if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen))
-		return -EFAULT;
-
-        return 0;
-}
-
-static int vfat_ioctl32(unsigned fd, unsigned cmd, unsigned long arg)
-{
-	struct compat_dirent __user *p = compat_ptr(arg);
-	int ret;
-	mm_segment_t oldfs = get_fs();
-	struct dirent d[2];
-
-	switch(cmd)
-	{
-        	case VFAT_IOCTL_READDIR_BOTH32:
-                	cmd = VFAT_IOCTL_READDIR_BOTH;
-                	break;
-        	case VFAT_IOCTL_READDIR_SHORT32:
-                	cmd = VFAT_IOCTL_READDIR_SHORT;
-                	break;
-	}
-
-	set_fs(KERNEL_DS);
-	ret = sys_ioctl(fd,cmd,(unsigned long)&d);
-	set_fs(oldfs);
-	if (ret >= 0) {
-		ret |= put_dirent32(&d[0], p);
-		ret |= put_dirent32(&d[1], p + 1);
-	}
-	return ret;
-}
-
-#define REISERFS_IOC_UNPACK32               _IOW(0xCD,1,int)
-
-static int reiserfs_ioctl32(unsigned fd, unsigned cmd, unsigned long ptr)
-{
-        if (cmd == REISERFS_IOC_UNPACK32)
-                cmd = REISERFS_IOC_UNPACK;
-
-        return sys_ioctl(fd,cmd,ptr);
-}
-
+#ifdef CONFIG_BLOCK
 struct raw32_config_request
 struct raw32_config_request
 {
 {
         compat_int_t    raw_minor;
         compat_int_t    raw_minor;
@@ -2171,6 +2011,7 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
         }
         }
         return ret;
         return ret;
 }
 }
+#endif /* CONFIG_BLOCK */
 
 
 struct serial_struct32 {
 struct serial_struct32 {
         compat_int_t    type;
         compat_int_t    type;
@@ -2777,6 +2618,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc)
 HANDLE_IOCTL(SIOCRTMSG, ret_einval)
 HANDLE_IOCTL(SIOCRTMSG, ret_einval)
 HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
 HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
 #endif
 #endif
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
 HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
 HANDLE_IOCTL(BLKRAGET, w_long)
 HANDLE_IOCTL(BLKRAGET, w_long)
 HANDLE_IOCTL(BLKGETSIZE, w_long)
 HANDLE_IOCTL(BLKGETSIZE, w_long)
@@ -2802,16 +2644,17 @@ HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans)
 HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
 HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
 HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
 HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
 HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
 HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
+#endif
 HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
 HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
 HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
 HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
 HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans)
 HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans)
 HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
 HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
-HANDLE_IOCTL(LOOP_SET_STATUS, loop_status)
-HANDLE_IOCTL(LOOP_GET_STATUS, loop_status)
+#endif
 #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
 #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
 HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
 HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
 #ifdef CONFIG_VT
 #ifdef CONFIG_VT
@@ -2821,19 +2664,6 @@ HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl)
 HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl)
 HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl)
 HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl)
 HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl)
 #endif
 #endif
-HANDLE_IOCTL(EXT2_IOC32_GETFLAGS, do_ext2_ioctl)
-HANDLE_IOCTL(EXT2_IOC32_SETFLAGS, do_ext2_ioctl)
-HANDLE_IOCTL(EXT2_IOC32_GETVERSION, do_ext2_ioctl)
-HANDLE_IOCTL(EXT2_IOC32_SETVERSION, do_ext2_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_GETVERSION, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_SETVERSION, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_GETRSVSZ, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_SETRSVSZ, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_GROUP_EXTEND, do_ext3_ioctl)
-COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD)
-#ifdef CONFIG_JBD_DEBUG
-HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl)
-#endif
 /* One SMB ioctl needs translations. */
 /* One SMB ioctl needs translations. */
 #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
 #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
 HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
 HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
@@ -2863,16 +2693,14 @@ HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl)
 HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
 HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
 HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
 HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
 /* block stuff */
 /* block stuff */
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget)
 HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget)
 HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset)
 HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset)
 HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64)
 HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64)
-/* vfat */
-HANDLE_IOCTL(VFAT_IOCTL_READDIR_BOTH32, vfat_ioctl32)
-HANDLE_IOCTL(VFAT_IOCTL_READDIR_SHORT32, vfat_ioctl32)
-HANDLE_IOCTL(REISERFS_IOC_UNPACK32, reiserfs_ioctl32)
 /* Raw devices */
 /* Raw devices */
 HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
 HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
 HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
 HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
+#endif
 /* Serial */
 /* Serial */
 HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl)
 HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl)
 HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)
 HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)

+ 1 - 3
fs/dcache.c

@@ -32,6 +32,7 @@
 #include <linux/seqlock.h>
 #include <linux/seqlock.h>
 #include <linux/swap.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
 #include <linux/bootmem.h>
+#include "internal.h"
 
 
 
 
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 int sysctl_vfs_cache_pressure __read_mostly = 100;
@@ -1877,9 +1878,6 @@ kmem_cache_t *filp_cachep __read_mostly;
 
 
 EXPORT_SYMBOL(d_genocide);
 EXPORT_SYMBOL(d_genocide);
 
 
-extern void bdev_cache_init(void);
-extern void chrdev_init(void);
-
 void __init vfs_caches_init_early(void)
 void __init vfs_caches_init_early(void)
 {
 {
 	dcache_init_early();
 	dcache_init_early();

+ 3 - 0
fs/ext2/dir.c

@@ -661,5 +661,8 @@ const struct file_operations ext2_dir_operations = {
 	.read		= generic_read_dir,
 	.read		= generic_read_dir,
 	.readdir	= ext2_readdir,
 	.readdir	= ext2_readdir,
 	.ioctl		= ext2_ioctl,
 	.ioctl		= ext2_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ext2_compat_ioctl,
+#endif
 	.fsync		= ext2_sync_file,
 	.fsync		= ext2_sync_file,
 };
 };

+ 1 - 0
fs/ext2/ext2.h

@@ -137,6 +137,7 @@ extern void ext2_set_inode_flags(struct inode *inode);
 /* ioctl.c */
 /* ioctl.c */
 extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
 extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
 		       unsigned long);
 		       unsigned long);
+extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long);
 
 
 /* namei.c */
 /* namei.c */
 struct dentry *ext2_get_parent(struct dentry *child);
 struct dentry *ext2_get_parent(struct dentry *child);

+ 6 - 0
fs/ext2/file.c

@@ -46,6 +46,9 @@ const struct file_operations ext2_file_operations = {
 	.aio_read	= generic_file_aio_read,
 	.aio_read	= generic_file_aio_read,
 	.aio_write	= generic_file_aio_write,
 	.aio_write	= generic_file_aio_write,
 	.ioctl		= ext2_ioctl,
 	.ioctl		= ext2_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ext2_compat_ioctl,
+#endif
 	.mmap		= generic_file_mmap,
 	.mmap		= generic_file_mmap,
 	.open		= generic_file_open,
 	.open		= generic_file_open,
 	.release	= ext2_release_file,
 	.release	= ext2_release_file,
@@ -63,6 +66,9 @@ const struct file_operations ext2_xip_file_operations = {
 	.read		= xip_file_read,
 	.read		= xip_file_read,
 	.write		= xip_file_write,
 	.write		= xip_file_write,
 	.ioctl		= ext2_ioctl,
 	.ioctl		= ext2_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ext2_compat_ioctl,
+#endif
 	.mmap		= xip_file_mmap,
 	.mmap		= xip_file_mmap,
 	.open		= generic_file_open,
 	.open		= generic_file_open,
 	.release	= ext2_release_file,
 	.release	= ext2_release_file,

+ 32 - 0
fs/ext2/ioctl.c

@@ -11,6 +11,8 @@
 #include <linux/capability.h>
 #include <linux/capability.h>
 #include <linux/time.h>
 #include <linux/time.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/compat.h>
+#include <linux/smp_lock.h>
 #include <asm/current.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 
 
@@ -80,3 +82,33 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		return -ENOTTY;
 		return -ENOTTY;
 	}
 	}
 }
 }
+
+#ifdef CONFIG_COMPAT
+long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	int ret;
+
+	/* These are just misnamed, they actually get/put from/to user an int */
+	switch (cmd) {
+	case EXT2_IOC32_GETFLAGS:
+		cmd = EXT2_IOC_GETFLAGS;
+		break;
+	case EXT2_IOC32_SETFLAGS:
+		cmd = EXT2_IOC_SETFLAGS;
+		break;
+	case EXT2_IOC32_GETVERSION:
+		cmd = EXT2_IOC_GETVERSION;
+		break;
+	case EXT2_IOC32_SETVERSION:
+		cmd = EXT2_IOC_SETVERSION;
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+	lock_kernel();
+	ret = ext2_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+	unlock_kernel();
+	return ret;
+}
+#endif

+ 3 - 0
fs/ext3/dir.c

@@ -44,6 +44,9 @@ const struct file_operations ext3_dir_operations = {
 	.read		= generic_read_dir,
 	.read		= generic_read_dir,
 	.readdir	= ext3_readdir,		/* we take BKL. needed?*/
 	.readdir	= ext3_readdir,		/* we take BKL. needed?*/
 	.ioctl		= ext3_ioctl,		/* BKL held */
 	.ioctl		= ext3_ioctl,		/* BKL held */
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ext3_compat_ioctl,
+#endif
 	.fsync		= ext3_sync_file,	/* BKL held */
 	.fsync		= ext3_sync_file,	/* BKL held */
 #ifdef CONFIG_EXT3_INDEX
 #ifdef CONFIG_EXT3_INDEX
 	.release	= ext3_release_dir,
 	.release	= ext3_release_dir,

+ 3 - 0
fs/ext3/file.c

@@ -114,6 +114,9 @@ const struct file_operations ext3_file_operations = {
 	.readv		= generic_file_readv,
 	.readv		= generic_file_readv,
 	.writev		= generic_file_writev,
 	.writev		= generic_file_writev,
 	.ioctl		= ext3_ioctl,
 	.ioctl		= ext3_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ext3_compat_ioctl,
+#endif
 	.mmap		= generic_file_mmap,
 	.mmap		= generic_file_mmap,
 	.open		= generic_file_open,
 	.open		= generic_file_open,
 	.release	= ext3_release_file,
 	.release	= ext3_release_file,

+ 3 - 2
fs/ext3/inode.c

@@ -36,6 +36,7 @@
 #include <linux/writeback.h>
 #include <linux/writeback.h>
 #include <linux/mpage.h>
 #include <linux/mpage.h>
 #include <linux/uio.h>
 #include <linux/uio.h>
+#include <linux/bio.h>
 #include "xattr.h"
 #include "xattr.h"
 #include "acl.h"
 #include "acl.h"
 
 
@@ -1073,7 +1074,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
 		return bh;
 		return bh;
 	if (buffer_uptodate(bh))
 	if (buffer_uptodate(bh))
 		return bh;
 		return bh;
-	ll_rw_block(READ, 1, &bh);
+	ll_rw_block(READ_META, 1, &bh);
 	wait_on_buffer(bh);
 	wait_on_buffer(bh);
 	if (buffer_uptodate(bh))
 	if (buffer_uptodate(bh))
 		return bh;
 		return bh;
@@ -2540,7 +2541,7 @@ make_io:
 		 */
 		 */
 		get_bh(bh);
 		get_bh(bh);
 		bh->b_end_io = end_buffer_read_sync;
 		bh->b_end_io = end_buffer_read_sync;
-		submit_bh(READ, bh);
+		submit_bh(READ_META, bh);
 		wait_on_buffer(bh);
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
 		if (!buffer_uptodate(bh)) {
 			ext3_error(inode->i_sb, "ext3_get_inode_loc",
 			ext3_error(inode->i_sb, "ext3_get_inode_loc",

+ 54 - 1
fs/ext3/ioctl.c

@@ -13,9 +13,10 @@
 #include <linux/ext3_fs.h>
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
 #include <linux/ext3_jbd.h>
 #include <linux/time.h>
 #include <linux/time.h>
+#include <linux/compat.h>
+#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 
 
-
 int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		unsigned long arg)
 		unsigned long arg)
 {
 {
@@ -252,3 +253,55 @@ flags_err:
 		return -ENOTTY;
 		return -ENOTTY;
 	}
 	}
 }
 }
+
+#ifdef CONFIG_COMPAT
+long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	int ret;
+
+	/* These are just misnamed, they actually get/put from/to user an int */
+	switch (cmd) {
+	case EXT3_IOC32_GETFLAGS:
+		cmd = EXT3_IOC_GETFLAGS;
+		break;
+	case EXT3_IOC32_SETFLAGS:
+		cmd = EXT3_IOC_SETFLAGS;
+		break;
+	case EXT3_IOC32_GETVERSION:
+		cmd = EXT3_IOC_GETVERSION;
+		break;
+	case EXT3_IOC32_SETVERSION:
+		cmd = EXT3_IOC_SETVERSION;
+		break;
+	case EXT3_IOC32_GROUP_EXTEND:
+		cmd = EXT3_IOC_GROUP_EXTEND;
+		break;
+	case EXT3_IOC32_GETVERSION_OLD:
+		cmd = EXT3_IOC_GETVERSION_OLD;
+		break;
+	case EXT3_IOC32_SETVERSION_OLD:
+		cmd = EXT3_IOC_SETVERSION_OLD;
+		break;
+#ifdef CONFIG_JBD_DEBUG
+	case EXT3_IOC32_WAIT_FOR_READONLY:
+		cmd = EXT3_IOC_WAIT_FOR_READONLY;
+		break;
+#endif
+	case EXT3_IOC32_GETRSVSZ:
+		cmd = EXT3_IOC_GETRSVSZ;
+		break;
+	case EXT3_IOC32_SETRSVSZ:
+		cmd = EXT3_IOC_SETRSVSZ;
+		break;
+	case EXT3_IOC_GROUP_ADD:
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+	lock_kernel();
+	ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+	unlock_kernel();
+	return ret;
+}
+#endif

+ 2 - 1
fs/ext3/namei.c

@@ -35,6 +35,7 @@
 #include <linux/string.h>
 #include <linux/string.h>
 #include <linux/quotaops.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 #include <linux/buffer_head.h>
+#include <linux/bio.h>
 #include <linux/smp_lock.h>
 #include <linux/smp_lock.h>
 
 
 #include "namei.h"
 #include "namei.h"
@@ -870,7 +871,7 @@ restart:
 				bh = ext3_getblk(NULL, dir, b++, 0, &err);
 				bh = ext3_getblk(NULL, dir, b++, 0, &err);
 				bh_use[ra_max] = bh;
 				bh_use[ra_max] = bh;
 				if (bh)
 				if (bh)
-					ll_rw_block(READ, 1, &bh);
+					ll_rw_block(READ_META, 1, &bh);
 			}
 			}
 		}
 		}
 		if ((bh = bh_use[ra_ptr++]) == NULL)
 		if ((bh = bh_use[ra_ptr++]) == NULL)

+ 56 - 0
fs/fat/dir.c

@@ -20,6 +20,7 @@
 #include <linux/dirent.h>
 #include <linux/dirent.h>
 #include <linux/smp_lock.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/buffer_head.h>
+#include <linux/compat.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 
 
 static inline loff_t fat_make_i_pos(struct super_block *sb,
 static inline loff_t fat_make_i_pos(struct super_block *sb,
@@ -741,10 +742,65 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp,
 	return ret;
 	return ret;
 }
 }
 
 
+#ifdef CONFIG_COMPAT
+#define	VFAT_IOCTL_READDIR_BOTH32	_IOR('r', 1, struct compat_dirent[2])
+#define	VFAT_IOCTL_READDIR_SHORT32	_IOR('r', 2, struct compat_dirent[2])
+
+static long fat_compat_put_dirent32(struct dirent *d,
+				    struct compat_dirent __user *d32)
+{
+        if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent)))
+                return -EFAULT;
+
+        __put_user(d->d_ino, &d32->d_ino);
+        __put_user(d->d_off, &d32->d_off);
+        __put_user(d->d_reclen, &d32->d_reclen);
+        if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen))
+		return -EFAULT;
+
+        return 0;
+}
+
+static long fat_compat_dir_ioctl(struct file *file, unsigned cmd,
+				 unsigned long arg)
+{
+	struct compat_dirent __user *p = compat_ptr(arg);
+	int ret;
+	mm_segment_t oldfs = get_fs();
+	struct dirent d[2];
+
+	switch (cmd) {
+	case VFAT_IOCTL_READDIR_BOTH32:
+		cmd = VFAT_IOCTL_READDIR_BOTH;
+		break;
+	case VFAT_IOCTL_READDIR_SHORT32:
+		cmd = VFAT_IOCTL_READDIR_SHORT;
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	set_fs(KERNEL_DS);
+	lock_kernel();
+	ret = fat_dir_ioctl(file->f_dentry->d_inode, file,
+			    cmd, (unsigned long) &d);
+	unlock_kernel();
+	set_fs(oldfs);
+	if (ret >= 0) {
+		ret |= fat_compat_put_dirent32(&d[0], p);
+		ret |= fat_compat_put_dirent32(&d[1], p + 1);
+	}
+	return ret;
+}
+#endif /* CONFIG_COMPAT */
+
 const struct file_operations fat_dir_operations = {
 const struct file_operations fat_dir_operations = {
 	.read		= generic_read_dir,
 	.read		= generic_read_dir,
 	.readdir	= fat_readdir,
 	.readdir	= fat_readdir,
 	.ioctl		= fat_dir_ioctl,
 	.ioctl		= fat_dir_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= fat_compat_dir_ioctl,
+#endif
 	.fsync		= file_fsync,
 	.fsync		= file_fsync,
 };
 };
 
 

+ 4 - 5
fs/fs-writeback.c

@@ -22,8 +22,7 @@
 #include <linux/blkdev.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include <linux/buffer_head.h>
-
-extern struct super_block *blockdev_superblock;
+#include "internal.h"
 
 
 /**
 /**
  *	__mark_inode_dirty -	internal function
  *	__mark_inode_dirty -	internal function
@@ -320,7 +319,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
 
 
 		if (!bdi_cap_writeback_dirty(bdi)) {
 		if (!bdi_cap_writeback_dirty(bdi)) {
 			list_move(&inode->i_list, &sb->s_dirty);
 			list_move(&inode->i_list, &sb->s_dirty);
-			if (sb == blockdev_superblock) {
+			if (sb_is_blkdev_sb(sb)) {
 				/*
 				/*
 				 * Dirty memory-backed blockdev: the ramdisk
 				 * Dirty memory-backed blockdev: the ramdisk
 				 * driver does this.  Skip just this inode
 				 * driver does this.  Skip just this inode
@@ -337,14 +336,14 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
 
 
 		if (wbc->nonblocking && bdi_write_congested(bdi)) {
 		if (wbc->nonblocking && bdi_write_congested(bdi)) {
 			wbc->encountered_congestion = 1;
 			wbc->encountered_congestion = 1;
-			if (sb != blockdev_superblock)
+			if (!sb_is_blkdev_sb(sb))
 				break;		/* Skip a congested fs */
 				break;		/* Skip a congested fs */
 			list_move(&inode->i_list, &sb->s_dirty);
 			list_move(&inode->i_list, &sb->s_dirty);
 			continue;		/* Skip a congested blockdev */
 			continue;		/* Skip a congested blockdev */
 		}
 		}
 
 
 		if (wbc->bdi && bdi != wbc->bdi) {
 		if (wbc->bdi && bdi != wbc->bdi) {
-			if (sb != blockdev_superblock)
+			if (!sb_is_blkdev_sb(sb))
 				break;		/* fs has the wrong queue */
 				break;		/* fs has the wrong queue */
 			list_move(&inode->i_list, &sb->s_dirty);
 			list_move(&inode->i_list, &sb->s_dirty);
 			continue;		/* blockdev has wrong queue */
 			continue;		/* blockdev has wrong queue */

+ 2 - 6
fs/hfsplus/hfsplus_fs.h

@@ -246,12 +246,8 @@ struct hfsplus_readdir_data {
 
 
 /* ext2 ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) to support
 /* ext2 ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) to support
  * chattr/lsattr */
  * chattr/lsattr */
-#define HFSPLUS_IOC_EXT2_GETFLAGS	_IOR('f', 1, long)
-#define HFSPLUS_IOC_EXT2_SETFLAGS	_IOW('f', 2, long)
-
-#define EXT2_FLAG_IMMUTABLE		0x00000010 /* Immutable file */
-#define EXT2_FLAG_APPEND		0x00000020 /* writes to file may only append */
-#define EXT2_FLAG_NODUMP		0x00000040 /* do not dump file */
+#define HFSPLUS_IOC_EXT2_GETFLAGS	FS_IOC_GETFLAGS
+#define HFSPLUS_IOC_EXT2_SETFLAGS	FS_IOC_SETFLAGS
 
 
 
 
 /*
 /*

+ 8 - 9
fs/hfsplus/ioctl.c

@@ -28,11 +28,11 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 	case HFSPLUS_IOC_EXT2_GETFLAGS:
 	case HFSPLUS_IOC_EXT2_GETFLAGS:
 		flags = 0;
 		flags = 0;
 		if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE)
 		if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE)
-			flags |= EXT2_FLAG_IMMUTABLE; /* EXT2_IMMUTABLE_FL */
+			flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */
 		if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND)
 		if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND)
-			flags |= EXT2_FLAG_APPEND; /* EXT2_APPEND_FL */
+			flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */
 		if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP)
 		if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP)
-			flags |= EXT2_FLAG_NODUMP; /* EXT2_NODUMP_FL */
+			flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */
 		return put_user(flags, (int __user *)arg);
 		return put_user(flags, (int __user *)arg);
 	case HFSPLUS_IOC_EXT2_SETFLAGS: {
 	case HFSPLUS_IOC_EXT2_SETFLAGS: {
 		if (IS_RDONLY(inode))
 		if (IS_RDONLY(inode))
@@ -44,32 +44,31 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		if (get_user(flags, (int __user *)arg))
 		if (get_user(flags, (int __user *)arg))
 			return -EFAULT;
 			return -EFAULT;
 
 
-		if (flags & (EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND) ||
+		if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) ||
 		    HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) {
 		    HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) {
 			if (!capable(CAP_LINUX_IMMUTABLE))
 			if (!capable(CAP_LINUX_IMMUTABLE))
 				return -EPERM;
 				return -EPERM;
 		}
 		}
 
 
 		/* don't silently ignore unsupported ext2 flags */
 		/* don't silently ignore unsupported ext2 flags */
-		if (flags & ~(EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND|
-			      EXT2_FLAG_NODUMP))
+		if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL))
 			return -EOPNOTSUPP;
 			return -EOPNOTSUPP;
 
 
-		if (flags & EXT2_FLAG_IMMUTABLE) { /* EXT2_IMMUTABLE_FL */
+		if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */
 			inode->i_flags |= S_IMMUTABLE;
 			inode->i_flags |= S_IMMUTABLE;
 			HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE;
 			HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE;
 		} else {
 		} else {
 			inode->i_flags &= ~S_IMMUTABLE;
 			inode->i_flags &= ~S_IMMUTABLE;
 			HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE;
 			HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE;
 		}
 		}
-		if (flags & EXT2_FLAG_APPEND) { /* EXT2_APPEND_FL */
+		if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */
 			inode->i_flags |= S_APPEND;
 			inode->i_flags |= S_APPEND;
 			HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND;
 			HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND;
 		} else {
 		} else {
 			inode->i_flags &= ~S_APPEND;
 			inode->i_flags &= ~S_APPEND;
 			HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND;
 			HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND;
 		}
 		}
-		if (flags & EXT2_FLAG_NODUMP) /* EXT2_NODUMP_FL */
+		if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */
 			HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP;
 			HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP;
 		else
 		else
 			HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP;
 			HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP;

+ 0 - 21
fs/inode.c

@@ -362,27 +362,6 @@ int invalidate_inodes(struct super_block * sb)
 }
 }
 
 
 EXPORT_SYMBOL(invalidate_inodes);
 EXPORT_SYMBOL(invalidate_inodes);
- 
-int __invalidate_device(struct block_device *bdev)
-{
-	struct super_block *sb = get_super(bdev);
-	int res = 0;
-
-	if (sb) {
-		/*
-		 * no need to lock the super, get_super holds the
-		 * read mutex so the filesystem cannot go away
-		 * under us (->put_super runs with the write lock
-		 * hold).
-		 */
-		shrink_dcache_sb(sb);
-		res = invalidate_inodes(sb);
-		drop_super(sb);
-	}
-	invalidate_bdev(bdev, 0);
-	return res;
-}
-EXPORT_SYMBOL(__invalidate_device);
 
 
 static int can_unuse(struct inode *inode)
 static int can_unuse(struct inode *inode)
 {
 {

+ 55 - 0
fs/internal.h

@@ -0,0 +1,55 @@
+/* fs/ internal definitions
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/ioctl32.h>
+
+struct super_block;
+
+/*
+ * block_dev.c
+ */
+#ifdef CONFIG_BLOCK
+extern struct super_block *blockdev_superblock;
+extern void __init bdev_cache_init(void);
+
+static inline int sb_is_blkdev_sb(struct super_block *sb)
+{
+	return sb == blockdev_superblock;
+}
+
+#else
+static inline void bdev_cache_init(void)
+{
+}
+
+static inline int sb_is_blkdev_sb(struct super_block *sb)
+{
+	return 0;
+}
+#endif
+
+/*
+ * char_dev.c
+ */
+extern void __init chrdev_init(void);
+
+/*
+ * compat_ioctl.c
+ */
+#ifdef CONFIG_COMPAT
+extern struct ioctl_trans ioctl_start[];
+extern int ioctl_table_size;
+#endif
+
+/*
+ * namespace.c
+ */
+extern int copy_mount_options(const void __user *, unsigned long *);

+ 12 - 7
fs/ioprio.c

@@ -1,7 +1,7 @@
 /*
 /*
  * fs/ioprio.c
  * fs/ioprio.c
  *
  *
- * Copyright (C) 2004 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk>
  *
  *
  * Helper functions for setting/querying io priorities of processes. The
  * Helper functions for setting/querying io priorities of processes. The
  * system calls closely mimmick getpriority/setpriority, see the man page for
  * system calls closely mimmick getpriority/setpriority, see the man page for
@@ -47,8 +47,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
 	/* see wmb() in current_io_context() */
 	/* see wmb() in current_io_context() */
 	smp_read_barrier_depends();
 	smp_read_barrier_depends();
 
 
-	if (ioc && ioc->set_ioprio)
-		ioc->set_ioprio(ioc, ioprio);
+	if (ioc)
+		ioc->ioprio_changed = 1;
 
 
 	task_unlock(task);
 	task_unlock(task);
 	return 0;
 	return 0;
@@ -81,7 +81,12 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 	}
 	}
 
 
 	ret = -ESRCH;
 	ret = -ESRCH;
-	read_lock_irq(&tasklist_lock);
+	/*
+	 * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic",
+	 * so we can't use rcu_read_lock(). See re-copy of ->ioprio
+	 * in copy_process().
+	 */
+	read_lock(&tasklist_lock);
 	switch (which) {
 	switch (which) {
 		case IOPRIO_WHO_PROCESS:
 		case IOPRIO_WHO_PROCESS:
 			if (!who)
 			if (!who)
@@ -124,7 +129,7 @@ free_uid:
 			ret = -EINVAL;
 			ret = -EINVAL;
 	}
 	}
 
 
-	read_unlock_irq(&tasklist_lock);
+	read_unlock(&tasklist_lock);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -170,7 +175,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
 	int ret = -ESRCH;
 	int ret = -ESRCH;
 	int tmpio;
 	int tmpio;
 
 
-	read_lock_irq(&tasklist_lock);
+	read_lock(&tasklist_lock);
 	switch (which) {
 	switch (which) {
 		case IOPRIO_WHO_PROCESS:
 		case IOPRIO_WHO_PROCESS:
 			if (!who)
 			if (!who)
@@ -221,7 +226,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
 			ret = -EINVAL;
 			ret = -EINVAL;
 	}
 	}
 
 
-	read_unlock_irq(&tasklist_lock);
+	read_unlock(&tasklist_lock);
 	return ret;
 	return ret;
 }
 }
 
 

+ 7 - 8
fs/jfs/ioctl.c

@@ -6,7 +6,6 @@
  */
  */
 
 
 #include <linux/fs.h>
 #include <linux/fs.h>
-#include <linux/ext2_fs.h>
 #include <linux/ctype.h>
 #include <linux/ctype.h>
 #include <linux/capability.h>
 #include <linux/capability.h>
 #include <linux/time.h>
 #include <linux/time.h>
@@ -22,13 +21,13 @@ static struct {
 	long jfs_flag;
 	long jfs_flag;
 	long ext2_flag;
 	long ext2_flag;
 } jfs_map[] = {
 } jfs_map[] = {
-	{JFS_NOATIME_FL, EXT2_NOATIME_FL},
-	{JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL},
-	{JFS_SYNC_FL, EXT2_SYNC_FL},
-	{JFS_SECRM_FL, EXT2_SECRM_FL},
-	{JFS_UNRM_FL, EXT2_UNRM_FL},
-	{JFS_APPEND_FL, EXT2_APPEND_FL},
-	{JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL},
+	{JFS_NOATIME_FL,	FS_NOATIME_FL},
+	{JFS_DIRSYNC_FL,	FS_DIRSYNC_FL},
+	{JFS_SYNC_FL,		FS_SYNC_FL},
+	{JFS_SECRM_FL,		FS_SECRM_FL},
+	{JFS_UNRM_FL,		FS_UNRM_FL},
+	{JFS_APPEND_FL,		FS_APPEND_FL},
+	{JFS_IMMUTABLE_FL,	FS_IMMUTABLE_FL},
 	{0, 0},
 	{0, 0},
 };
 };
 
 

+ 2 - 0
fs/mpage.c

@@ -693,6 +693,8 @@ out:
  * the call was made get new I/O started against them.  If wbc->sync_mode is
  * the call was made get new I/O started against them.  If wbc->sync_mode is
  * WB_SYNC_ALL then we were called for data integrity and we must wait for
  * WB_SYNC_ALL then we were called for data integrity and we must wait for
  * existing IO to complete.
  * existing IO to complete.
+ *
+ * If you fix this you should check generic_writepages() also!
  */
  */
 int
 int
 mpage_writepages(struct address_space *mapping,
 mpage_writepages(struct address_space *mapping,

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно