Эх сурвалжийг харах

Merge branch 'for-3.16/drivers' of git://git.kernel.dk/linux-block into next

Pull block driver changes from Jens Axboe:
 "Now that the core bits are in, here's the pull request for the driver
  related changes for 3.16.  Nothing out of the ordinary here, mostly
  business as usual.  There are a few pulls of for-3.16/core into this
  branch, which were done when the blk-mq was modified after the
  mtip32xx conversion was put in.

  The pull request contains:

   - skd and cciss converted to use pci_enable_msix_exact().  From
     Alexander Gordeev.

   - A few mtip32xx fixes from Asai @ Micron.

   - The conversion of mtip32xx from make_request_fn to blk-mq, and a
     later small fix for that conversion on quiescing for non-queued IO.
     From me.

   - A fix for bsg to use an exported function to check whether this
     driver is request based or not.  Needed updating for blk-mq, which
     is request based, but does not have a request_fn hook.  From me.

   - Small floppy bug fix from Jiri.

   - A series of cleanups for the cdrom uniform layer from Joe Perches.
     Gets rid of various old ugly macros, making the code conform more
     to the modern coding style.

   - A series of patches for drbd from the drbd crew (Lars Ellenberg and
     Philipp Reisner).

   - A use-after-free fix for null_blk from Ming Lei.

   - Also from Ming Lei is a performance patch for virtio-blk, which can
     net us a 3x win on kvm platforms where world notification is
     expensive.

   - Ming Lei also fixed a stall issue in virtio-blk, due to a race
     between queue start/stop and resource limits.

   - A small batch of fixes for xen-blk{back,front} from Olaf Hering and
     Valentin Priescu"

* 'for-3.16/drivers' of git://git.kernel.dk/linux-block: (54 commits)
  block: virtio_blk: don't hold spin lock during world switch
  xen-blkback: defer freeing blkif to avoid blocking xenwatch
  xen blkif.h: fix comment typo in discard-alignment
  xen/blkback: disable discard feature if requested by toolstack
  xen-blkfront: remove type check from blkfront_setup_discard
  floppy: do not corrupt bio.bi_flags when reading block 0
  mtip32xx: move error handling to service thread
  virtio_blk: fix race between start and stop queue
  mtip32xx: stop block hardware queues before quiescing IO
  mtip32xx: blk_mq_init_queue() returns an ERR_PTR
  mtip32xx: convert to use blk-mq
  cdrom: Remove unnecessary prototype for cdrom_get_disc_info
  cdrom: Remove unnecessary prototype for cdrom_mrw_exit
  cdrom: Remove cdrom_count_tracks prototype
  cdrom: Remove cdrom_get_next_writeable prototype
  cdrom: Remove cdrom_get_last_written prototype
  cdrom: Move mmc_ioctls above cdrom_ioctl to remove unnecessary prototype
  cdrom: Remove unnecessary sanitize_format prototype
  cdrom: Remove unnecessary check_for_audio_disc prototype
  cdrom: Remove prototype for open_for_data
  ...
Linus Torvalds 11 жил өмнө
parent
commit
80081ec309

+ 1 - 1
block/bsg.c

@@ -1008,7 +1008,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
 	/*
 	/*
 	 * we need a proper transport to send commands, not a stacked device
 	 * we need a proper transport to send commands, not a stacked device
 	 */
 	 */
-	if (!q->request_fn)
+	if (!queue_is_rq_based(q))
 		return 0;
 		return 0;
 
 
 	bcd = &q->bsg_dev;
 	bcd = &q->bsg_dev;

+ 1 - 5
drivers/block/cciss.c

@@ -4080,7 +4080,7 @@ static void cciss_interrupt_mode(ctlr_info_t *h)
 		goto default_int_mode;
 		goto default_int_mode;
 
 
 	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
 	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
-		err = pci_enable_msix(h->pdev, cciss_msix_entries, 4);
+		err = pci_enable_msix_exact(h->pdev, cciss_msix_entries, 4);
 		if (!err) {
 		if (!err) {
 			h->intr[0] = cciss_msix_entries[0].vector;
 			h->intr[0] = cciss_msix_entries[0].vector;
 			h->intr[1] = cciss_msix_entries[1].vector;
 			h->intr[1] = cciss_msix_entries[1].vector;
@@ -4088,10 +4088,6 @@ static void cciss_interrupt_mode(ctlr_info_t *h)
 			h->intr[3] = cciss_msix_entries[3].vector;
 			h->intr[3] = cciss_msix_entries[3].vector;
 			h->msix_vector = 1;
 			h->msix_vector = 1;
 			return;
 			return;
-		}
-		if (err > 0) {
-			dev_warn(&h->pdev->dev,
-				"only %d MSI-X vectors available\n", err);
 		} else {
 		} else {
 			dev_warn(&h->pdev->dev,
 			dev_warn(&h->pdev->dev,
 				"MSI-X init failed %d\n", err);
 				"MSI-X init failed %d\n", err);

+ 10 - 13
drivers/block/drbd/drbd_actlog.c

@@ -29,7 +29,6 @@
 #include <linux/drbd_limits.h>
 #include <linux/drbd_limits.h>
 #include <linux/dynamic_debug.h>
 #include <linux/dynamic_debug.h>
 #include "drbd_int.h"
 #include "drbd_int.h"
-#include "drbd_wrappers.h"
 
 
 
 
 enum al_transaction_types {
 enum al_transaction_types {
@@ -204,7 +203,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
 
 
 	BUG_ON(!bdev->md_bdev);
 	BUG_ON(!bdev->md_bdev);
 
 
-	drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
+	dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
 	     current->comm, current->pid, __func__,
 	     current->comm, current->pid, __func__,
 	     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
 	     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
 	     (void*)_RET_IP_ );
 	     (void*)_RET_IP_ );
@@ -276,7 +275,6 @@ bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval
 	return _al_get(device, first, true);
 	return _al_get(device, first, true);
 }
 }
 
 
-static
 bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i)
 bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i)
 {
 {
 	/* for bios crossing activity log extent boundaries,
 	/* for bios crossing activity log extent boundaries,
@@ -846,7 +844,7 @@ void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size,
 	int wake_up = 0;
 	int wake_up = 0;
 	unsigned long flags;
 	unsigned long flags;
 
 
-	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
+	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
 		drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
 		drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
 				(unsigned long long)sector, size);
 				(unsigned long long)sector, size);
 		return;
 		return;
@@ -920,7 +918,7 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size
 	if (size == 0)
 	if (size == 0)
 		return 0;
 		return 0;
 
 
-	if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
+	if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
 		drbd_err(device, "sector: %llus, size: %d\n",
 		drbd_err(device, "sector: %llus, size: %d\n",
 			(unsigned long long)sector, size);
 			(unsigned long long)sector, size);
 		return 0;
 		return 0;
@@ -1023,8 +1021,7 @@ int drbd_rs_begin_io(struct drbd_device *device, sector_t sector)
 	unsigned int enr = BM_SECT_TO_EXT(sector);
 	unsigned int enr = BM_SECT_TO_EXT(sector);
 	struct bm_extent *bm_ext;
 	struct bm_extent *bm_ext;
 	int i, sig;
 	int i, sig;
-	int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
-			 200 times -> 20 seconds. */
+	bool sa;
 
 
 retry:
 retry:
 	sig = wait_event_interruptible(device->al_wait,
 	sig = wait_event_interruptible(device->al_wait,
@@ -1035,12 +1032,15 @@ retry:
 	if (test_bit(BME_LOCKED, &bm_ext->flags))
 	if (test_bit(BME_LOCKED, &bm_ext->flags))
 		return 0;
 		return 0;
 
 
+	/* step aside only while we are above c-min-rate; unless disabled. */
+	sa = drbd_rs_c_min_rate_throttle(device);
+
 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
 		sig = wait_event_interruptible(device->al_wait,
 		sig = wait_event_interruptible(device->al_wait,
 					       !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) ||
 					       !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) ||
-					       test_bit(BME_PRIORITY, &bm_ext->flags));
+					       (sa && test_bit(BME_PRIORITY, &bm_ext->flags)));
 
 
-		if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
+		if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) {
 			spin_lock_irq(&device->al_lock);
 			spin_lock_irq(&device->al_lock);
 			if (lc_put(device->resync, &bm_ext->lce) == 0) {
 			if (lc_put(device->resync, &bm_ext->lce) == 0) {
 				bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
 				bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
@@ -1052,9 +1052,6 @@ retry:
 				return -EINTR;
 				return -EINTR;
 			if (schedule_timeout_interruptible(HZ/10))
 			if (schedule_timeout_interruptible(HZ/10))
 				return -EINTR;
 				return -EINTR;
-			if (sa && --sa == 0)
-				drbd_warn(device, "drbd_rs_begin_io() stepped aside for 20sec."
-					 "Resync stalled?\n");
 			goto retry;
 			goto retry;
 		}
 		}
 	}
 	}
@@ -1288,7 +1285,7 @@ void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size)
 	sector_t esector, nr_sectors;
 	sector_t esector, nr_sectors;
 	int wake_up = 0;
 	int wake_up = 0;
 
 
-	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
+	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
 		drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
 		drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
 				(unsigned long long)sector, size);
 				(unsigned long long)sector, size);
 		return;
 		return;

+ 85 - 7
drivers/block/drbd/drbd_int.h

@@ -382,6 +382,12 @@ enum {
 	__EE_CALL_AL_COMPLETE_IO,
 	__EE_CALL_AL_COMPLETE_IO,
 	__EE_MAY_SET_IN_SYNC,
 	__EE_MAY_SET_IN_SYNC,
 
 
+	/* is this a TRIM aka REQ_DISCARD? */
+	__EE_IS_TRIM,
+	/* our lower level cannot handle trim,
+	 * and we want to fall back to zeroout instead */
+	__EE_IS_TRIM_USE_ZEROOUT,
+
 	/* In case a barrier failed,
 	/* In case a barrier failed,
 	 * we need to resubmit without the barrier flag. */
 	 * we need to resubmit without the barrier flag. */
 	__EE_RESUBMITTED,
 	__EE_RESUBMITTED,
@@ -405,7 +411,9 @@ enum {
 };
 };
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
 #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
-#define	EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
+#define EE_IS_TRIM             (1<<__EE_IS_TRIM)
+#define EE_IS_TRIM_USE_ZEROOUT (1<<__EE_IS_TRIM_USE_ZEROOUT)
+#define EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
 #define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
 #define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
 #define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
 #define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
 #define EE_RESTART_REQUESTS	(1<<__EE_RESTART_REQUESTS)
 #define EE_RESTART_REQUESTS	(1<<__EE_RESTART_REQUESTS)
@@ -579,6 +587,7 @@ struct drbd_resource {
 	struct list_head resources;
 	struct list_head resources;
 	struct res_opts res_opts;
 	struct res_opts res_opts;
 	struct mutex conf_update;	/* mutex for ready-copy-update of net_conf and disk_conf */
 	struct mutex conf_update;	/* mutex for ready-copy-update of net_conf and disk_conf */
+	struct mutex adm_mutex;		/* mutex to serialize administrative requests */
 	spinlock_t req_lock;
 	spinlock_t req_lock;
 
 
 	unsigned susp:1;		/* IO suspended by user */
 	unsigned susp:1;		/* IO suspended by user */
@@ -609,6 +618,7 @@ struct drbd_connection {
 	struct drbd_socket data;	/* data/barrier/cstate/parameter packets */
 	struct drbd_socket data;	/* data/barrier/cstate/parameter packets */
 	struct drbd_socket meta;	/* ping/ack (metadata) packets */
 	struct drbd_socket meta;	/* ping/ack (metadata) packets */
 	int agreed_pro_version;		/* actually used protocol version */
 	int agreed_pro_version;		/* actually used protocol version */
+	u32 agreed_features;
 	unsigned long last_received;	/* in jiffies, either socket */
 	unsigned long last_received;	/* in jiffies, either socket */
 	unsigned int ko_count;
 	unsigned int ko_count;
 
 
@@ -814,6 +824,28 @@ struct drbd_device {
 	struct submit_worker submit;
 	struct submit_worker submit;
 };
 };
 
 
+struct drbd_config_context {
+	/* assigned from drbd_genlmsghdr */
+	unsigned int minor;
+	/* assigned from request attributes, if present */
+	unsigned int volume;
+#define VOLUME_UNSPECIFIED		(-1U)
+	/* pointer into the request skb,
+	 * limited lifetime! */
+	char *resource_name;
+	struct nlattr *my_addr;
+	struct nlattr *peer_addr;
+
+	/* reply buffer */
+	struct sk_buff *reply_skb;
+	/* pointer into reply buffer */
+	struct drbd_genlmsghdr *reply_dh;
+	/* resolved from attributes, if possible */
+	struct drbd_device *device;
+	struct drbd_resource *resource;
+	struct drbd_connection *connection;
+};
+
 static inline struct drbd_device *minor_to_device(unsigned int minor)
 static inline struct drbd_device *minor_to_device(unsigned int minor)
 {
 {
 	return (struct drbd_device *)idr_find(&drbd_devices, minor);
 	return (struct drbd_device *)idr_find(&drbd_devices, minor);
@@ -821,7 +853,7 @@ static inline struct drbd_device *minor_to_device(unsigned int minor)
 
 
 static inline struct drbd_peer_device *first_peer_device(struct drbd_device *device)
 static inline struct drbd_peer_device *first_peer_device(struct drbd_device *device)
 {
 {
-	return list_first_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
+	return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices);
 }
 }
 
 
 #define for_each_resource(resource, _resources) \
 #define for_each_resource(resource, _resources) \
@@ -1139,6 +1171,12 @@ struct bm_extent {
 #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */
 #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */
 #define DRBD_MAX_BIO_SIZE_P95    (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */
 #define DRBD_MAX_BIO_SIZE_P95    (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */
 
 
+/* For now, don't allow more than one activity log extent worth of data
+ * to be discarded in one go. We may need to rework drbd_al_begin_io()
+ * to allow for even larger discard ranges */
+#define DRBD_MAX_DISCARD_SIZE	AL_EXTENT_SIZE
+#define DRBD_MAX_DISCARD_SECTORS (DRBD_MAX_DISCARD_SIZE >> 9)
+
 extern int  drbd_bm_init(struct drbd_device *device);
 extern int  drbd_bm_init(struct drbd_device *device);
 extern int  drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits);
 extern int  drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits);
 extern void drbd_bm_cleanup(struct drbd_device *device);
 extern void drbd_bm_cleanup(struct drbd_device *device);
@@ -1229,9 +1267,9 @@ extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
 extern rwlock_t global_state_lock;
 extern rwlock_t global_state_lock;
 
 
 extern int conn_lowest_minor(struct drbd_connection *connection);
 extern int conn_lowest_minor(struct drbd_connection *connection);
-enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr);
+extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
 extern void drbd_destroy_device(struct kref *kref);
 extern void drbd_destroy_device(struct kref *kref);
-extern void drbd_delete_device(struct drbd_device *mdev);
+extern void drbd_delete_device(struct drbd_device *device);
 
 
 extern struct drbd_resource *drbd_create_resource(const char *name);
 extern struct drbd_resource *drbd_create_resource(const char *name);
 extern void drbd_free_resource(struct drbd_resource *resource);
 extern void drbd_free_resource(struct drbd_resource *resource);
@@ -1257,7 +1295,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 
 
 
 
 /* drbd_nl.c */
 /* drbd_nl.c */
-extern int drbd_msg_put_info(const char *info);
+extern int drbd_msg_put_info(struct sk_buff *skb, const char *info);
 extern void drbd_suspend_io(struct drbd_device *device);
 extern void drbd_suspend_io(struct drbd_device *device);
 extern void drbd_resume_io(struct drbd_device *device);
 extern void drbd_resume_io(struct drbd_device *device);
 extern char *ppsize(char *buf, unsigned long long size);
 extern char *ppsize(char *buf, unsigned long long size);
@@ -1283,6 +1321,10 @@ extern void conn_try_outdate_peer_async(struct drbd_connection *connection);
 extern int drbd_khelper(struct drbd_device *device, char *cmd);
 extern int drbd_khelper(struct drbd_device *device, char *cmd);
 
 
 /* drbd_worker.c */
 /* drbd_worker.c */
+/* bi_end_io handlers */
+extern void drbd_md_io_complete(struct bio *bio, int error);
+extern void drbd_peer_request_endio(struct bio *bio, int error);
+extern void drbd_request_endio(struct bio *bio, int error);
 extern int drbd_worker(struct drbd_thread *thi);
 extern int drbd_worker(struct drbd_thread *thi);
 enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor);
 enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor);
 void drbd_resync_after_changed(struct drbd_device *device);
 void drbd_resync_after_changed(struct drbd_device *device);
@@ -1332,16 +1374,20 @@ extern int w_start_resync(struct drbd_work *, int);
 extern void resync_timer_fn(unsigned long data);
 extern void resync_timer_fn(unsigned long data);
 extern void start_resync_timer_fn(unsigned long data);
 extern void start_resync_timer_fn(unsigned long data);
 
 
+extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
+
 /* drbd_receiver.c */
 /* drbd_receiver.c */
 extern int drbd_receiver(struct drbd_thread *thi);
 extern int drbd_receiver(struct drbd_thread *thi);
 extern int drbd_asender(struct drbd_thread *thi);
 extern int drbd_asender(struct drbd_thread *thi);
-extern int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
+extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
+extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
 extern int drbd_submit_peer_request(struct drbd_device *,
 extern int drbd_submit_peer_request(struct drbd_device *,
 				    struct drbd_peer_request *, const unsigned,
 				    struct drbd_peer_request *, const unsigned,
 				    const int);
 				    const int);
 extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
 extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
 extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64,
 extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64,
 						     sector_t, unsigned int,
 						     sector_t, unsigned int,
+						     bool,
 						     gfp_t) __must_hold(local);
 						     gfp_t) __must_hold(local);
 extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *,
 extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *,
 				 int);
 				 int);
@@ -1401,6 +1447,37 @@ static inline void drbd_tcp_quickack(struct socket *sock)
 			(char*)&val, sizeof(val));
 			(char*)&val, sizeof(val));
 }
 }
 
 
+/* sets the number of 512 byte sectors of our virtual device */
+static inline void drbd_set_my_capacity(struct drbd_device *device,
+					sector_t size)
+{
+	/* set_capacity(device->this_bdev->bd_disk, size); */
+	set_capacity(device->vdisk, size);
+	device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
+}
+
+/*
+ * used to submit our private bio
+ */
+static inline void drbd_generic_make_request(struct drbd_device *device,
+					     int fault_type, struct bio *bio)
+{
+	__release(local);
+	if (!bio->bi_bdev) {
+		printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
+				"bio->bi_bdev == NULL\n",
+		       device_to_minor(device));
+		dump_stack();
+		bio_endio(bio, -ENODEV);
+		return;
+	}
+
+	if (drbd_insert_fault(device, fault_type))
+		bio_endio(bio, -EIO);
+	else
+		generic_make_request(bio);
+}
+
 void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo);
 void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo);
 
 
 /* drbd_proc.c */
 /* drbd_proc.c */
@@ -1410,6 +1487,7 @@ extern const char *drbd_conn_str(enum drbd_conns s);
 extern const char *drbd_role_str(enum drbd_role s);
 extern const char *drbd_role_str(enum drbd_role s);
 
 
 /* drbd_actlog.c */
 /* drbd_actlog.c */
+extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
 extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i);
 extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i);
 extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate);
 extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate);
 extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i);
 extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i);
@@ -2144,7 +2222,7 @@ static inline void drbd_md_flush(struct drbd_device *device)
 
 
 static inline struct drbd_connection *first_connection(struct drbd_resource *resource)
 static inline struct drbd_connection *first_connection(struct drbd_resource *resource)
 {
 {
-	return list_first_entry(&resource->connections,
+	return list_first_entry_or_null(&resource->connections,
 				struct drbd_connection, connections);
 				struct drbd_connection, connections);
 }
 }
 
 

+ 21 - 7
drivers/block/drbd/drbd_main.c

@@ -1607,8 +1607,8 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, unsigned long b
 		return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
 		return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
 }
 }
 
 
-/* Used to send write requests
- * R_PRIMARY -> Peer	(P_DATA)
+/* Used to send write or TRIM aka REQ_DISCARD requests
+ * R_PRIMARY -> Peer	(P_DATA, P_TRIM)
  */
  */
 int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req)
 int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req)
 {
 {
@@ -1640,6 +1640,16 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
 			dp_flags |= DP_SEND_WRITE_ACK;
 			dp_flags |= DP_SEND_WRITE_ACK;
 	}
 	}
 	p->dp_flags = cpu_to_be32(dp_flags);
 	p->dp_flags = cpu_to_be32(dp_flags);
+
+	if (dp_flags & DP_DISCARD) {
+		struct p_trim *t = (struct p_trim*)p;
+		t->size = cpu_to_be32(req->i.size);
+		err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0);
+		goto out;
+	}
+
+	/* our digest is still only over the payload.
+	 * TRIM does not carry any payload. */
 	if (dgs)
 	if (dgs)
 		drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1);
 		drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1);
 	err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size);
 	err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size);
@@ -1675,6 +1685,7 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
 		     ... Be noisy about digest too large ...
 		     ... Be noisy about digest too large ...
 		} */
 		} */
 	}
 	}
+out:
 	mutex_unlock(&sock->mutex);  /* locked by drbd_prepare_command() */
 	mutex_unlock(&sock->mutex);  /* locked by drbd_prepare_command() */
 
 
 	return err;
 	return err;
@@ -2570,6 +2581,7 @@ struct drbd_resource *drbd_create_resource(const char *name)
 	INIT_LIST_HEAD(&resource->connections);
 	INIT_LIST_HEAD(&resource->connections);
 	list_add_tail_rcu(&resource->resources, &drbd_resources);
 	list_add_tail_rcu(&resource->resources, &drbd_resources);
 	mutex_init(&resource->conf_update);
 	mutex_init(&resource->conf_update);
+	mutex_init(&resource->adm_mutex);
 	spin_lock_init(&resource->req_lock);
 	spin_lock_init(&resource->req_lock);
 	return resource;
 	return resource;
 
 
@@ -2687,14 +2699,16 @@ static int init_submitter(struct drbd_device *device)
 	return 0;
 	return 0;
 }
 }
 
 
-enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr)
+enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor)
 {
 {
+	struct drbd_resource *resource = adm_ctx->resource;
 	struct drbd_connection *connection;
 	struct drbd_connection *connection;
 	struct drbd_device *device;
 	struct drbd_device *device;
 	struct drbd_peer_device *peer_device, *tmp_peer_device;
 	struct drbd_peer_device *peer_device, *tmp_peer_device;
 	struct gendisk *disk;
 	struct gendisk *disk;
 	struct request_queue *q;
 	struct request_queue *q;
 	int id;
 	int id;
+	int vnr = adm_ctx->volume;
 	enum drbd_ret_code err = ERR_NOMEM;
 	enum drbd_ret_code err = ERR_NOMEM;
 
 
 	device = minor_to_device(minor);
 	device = minor_to_device(minor);
@@ -2763,7 +2777,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
 	if (id < 0) {
 	if (id < 0) {
 		if (id == -ENOSPC) {
 		if (id == -ENOSPC) {
 			err = ERR_MINOR_EXISTS;
 			err = ERR_MINOR_EXISTS;
-			drbd_msg_put_info("requested minor exists already");
+			drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already");
 		}
 		}
 		goto out_no_minor_idr;
 		goto out_no_minor_idr;
 	}
 	}
@@ -2773,7 +2787,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
 	if (id < 0) {
 	if (id < 0) {
 		if (id == -ENOSPC) {
 		if (id == -ENOSPC) {
 			err = ERR_MINOR_EXISTS;
 			err = ERR_MINOR_EXISTS;
-			drbd_msg_put_info("requested minor exists already");
+			drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already");
 		}
 		}
 		goto out_idr_remove_minor;
 		goto out_idr_remove_minor;
 	}
 	}
@@ -2794,7 +2808,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
 		if (id < 0) {
 		if (id < 0) {
 			if (id == -ENOSPC) {
 			if (id == -ENOSPC) {
 				err = ERR_INVALID_REQUEST;
 				err = ERR_INVALID_REQUEST;
-				drbd_msg_put_info("requested volume exists already");
+				drbd_msg_put_info(adm_ctx->reply_skb, "requested volume exists already");
 			}
 			}
 			goto out_idr_remove_from_resource;
 			goto out_idr_remove_from_resource;
 		}
 		}
@@ -2803,7 +2817,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
 
 
 	if (init_submitter(device)) {
 	if (init_submitter(device)) {
 		err = ERR_NOMEM;
 		err = ERR_NOMEM;
-		drbd_msg_put_info("unable to create submit workqueue");
+		drbd_msg_put_info(adm_ctx->reply_skb, "unable to create submit workqueue");
 		goto out_idr_remove_vol;
 		goto out_idr_remove_vol;
 	}
 	}
 
 

+ 295 - 190
drivers/block/drbd/drbd_nl.c

@@ -34,7 +34,6 @@
 #include "drbd_int.h"
 #include "drbd_int.h"
 #include "drbd_protocol.h"
 #include "drbd_protocol.h"
 #include "drbd_req.h"
 #include "drbd_req.h"
-#include "drbd_wrappers.h"
 #include <asm/unaligned.h>
 #include <asm/unaligned.h>
 #include <linux/drbd_limits.h>
 #include <linux/drbd_limits.h>
 #include <linux/kthread.h>
 #include <linux/kthread.h>
@@ -82,32 +81,6 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
 /* used blkdev_get_by_path, to claim our meta data device(s) */
 /* used blkdev_get_by_path, to claim our meta data device(s) */
 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
 
 
-/* Configuration is strictly serialized, because generic netlink message
- * processing is strictly serialized by the genl_lock().
- * Which means we can use one static global drbd_config_context struct.
- */
-static struct drbd_config_context {
-	/* assigned from drbd_genlmsghdr */
-	unsigned int minor;
-	/* assigned from request attributes, if present */
-	unsigned int volume;
-#define VOLUME_UNSPECIFIED		(-1U)
-	/* pointer into the request skb,
-	 * limited lifetime! */
-	char *resource_name;
-	struct nlattr *my_addr;
-	struct nlattr *peer_addr;
-
-	/* reply buffer */
-	struct sk_buff *reply_skb;
-	/* pointer into reply buffer */
-	struct drbd_genlmsghdr *reply_dh;
-	/* resolved from attributes, if possible */
-	struct drbd_device *device;
-	struct drbd_resource *resource;
-	struct drbd_connection *connection;
-} adm_ctx;
-
 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
 {
 {
 	genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
 	genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
@@ -117,9 +90,8 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
 
 
 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
  * reason it could fail was no space in skb, and there are 4k available. */
  * reason it could fail was no space in skb, and there are 4k available. */
-int drbd_msg_put_info(const char *info)
+int drbd_msg_put_info(struct sk_buff *skb, const char *info)
 {
 {
-	struct sk_buff *skb = adm_ctx.reply_skb;
 	struct nlattr *nla;
 	struct nlattr *nla;
 	int err = -EMSGSIZE;
 	int err = -EMSGSIZE;
 
 
@@ -143,42 +115,46 @@ int drbd_msg_put_info(const char *info)
  * and per-family private info->pointers.
  * and per-family private info->pointers.
  * But we need to stay compatible with older kernels.
  * But we need to stay compatible with older kernels.
  * If it returns successfully, adm_ctx members are valid.
  * If it returns successfully, adm_ctx members are valid.
+ *
+ * At this point, we still rely on the global genl_lock().
+ * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
+ * to add additional synchronization against object destruction/modification.
  */
  */
 #define DRBD_ADM_NEED_MINOR	1
 #define DRBD_ADM_NEED_MINOR	1
 #define DRBD_ADM_NEED_RESOURCE	2
 #define DRBD_ADM_NEED_RESOURCE	2
 #define DRBD_ADM_NEED_CONNECTION 4
 #define DRBD_ADM_NEED_CONNECTION 4
-static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
-		unsigned flags)
+static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
+	struct sk_buff *skb, struct genl_info *info, unsigned flags)
 {
 {
 	struct drbd_genlmsghdr *d_in = info->userhdr;
 	struct drbd_genlmsghdr *d_in = info->userhdr;
 	const u8 cmd = info->genlhdr->cmd;
 	const u8 cmd = info->genlhdr->cmd;
 	int err;
 	int err;
 
 
-	memset(&adm_ctx, 0, sizeof(adm_ctx));
+	memset(adm_ctx, 0, sizeof(*adm_ctx));
 
 
 	/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
 	/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
 	if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
 	if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
 	       return -EPERM;
 	       return -EPERM;
 
 
-	adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!adm_ctx.reply_skb) {
+	adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!adm_ctx->reply_skb) {
 		err = -ENOMEM;
 		err = -ENOMEM;
 		goto fail;
 		goto fail;
 	}
 	}
 
 
-	adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
+	adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
 					info, &drbd_genl_family, 0, cmd);
 					info, &drbd_genl_family, 0, cmd);
 	/* put of a few bytes into a fresh skb of >= 4k will always succeed.
 	/* put of a few bytes into a fresh skb of >= 4k will always succeed.
 	 * but anyways */
 	 * but anyways */
-	if (!adm_ctx.reply_dh) {
+	if (!adm_ctx->reply_dh) {
 		err = -ENOMEM;
 		err = -ENOMEM;
 		goto fail;
 		goto fail;
 	}
 	}
 
 
-	adm_ctx.reply_dh->minor = d_in->minor;
-	adm_ctx.reply_dh->ret_code = NO_ERROR;
+	adm_ctx->reply_dh->minor = d_in->minor;
+	adm_ctx->reply_dh->ret_code = NO_ERROR;
 
 
-	adm_ctx.volume = VOLUME_UNSPECIFIED;
+	adm_ctx->volume = VOLUME_UNSPECIFIED;
 	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
 	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
 		struct nlattr *nla;
 		struct nlattr *nla;
 		/* parse and validate only */
 		/* parse and validate only */
@@ -188,111 +164,131 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 
 
 		/* It was present, and valid,
 		/* It was present, and valid,
 		 * copy it over to the reply skb. */
 		 * copy it over to the reply skb. */
-		err = nla_put_nohdr(adm_ctx.reply_skb,
+		err = nla_put_nohdr(adm_ctx->reply_skb,
 				info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
 				info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
 				info->attrs[DRBD_NLA_CFG_CONTEXT]);
 				info->attrs[DRBD_NLA_CFG_CONTEXT]);
 		if (err)
 		if (err)
 			goto fail;
 			goto fail;
 
 
-		/* and assign stuff to the global adm_ctx */
+		/* and assign stuff to the adm_ctx */
 		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
 		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
 		if (nla)
 		if (nla)
-			adm_ctx.volume = nla_get_u32(nla);
+			adm_ctx->volume = nla_get_u32(nla);
 		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
 		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
 		if (nla)
 		if (nla)
-			adm_ctx.resource_name = nla_data(nla);
-		adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
-		adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
-		if ((adm_ctx.my_addr &&
-		     nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.connection->my_addr)) ||
-		    (adm_ctx.peer_addr &&
-		     nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.connection->peer_addr))) {
+			adm_ctx->resource_name = nla_data(nla);
+		adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
+		adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
+		if ((adm_ctx->my_addr &&
+		     nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
+		    (adm_ctx->peer_addr &&
+		     nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
 			err = -EINVAL;
 			err = -EINVAL;
 			goto fail;
 			goto fail;
 		}
 		}
 	}
 	}
 
 
-	adm_ctx.minor = d_in->minor;
-	adm_ctx.device = minor_to_device(d_in->minor);
-	if (adm_ctx.resource_name) {
-		adm_ctx.resource = drbd_find_resource(adm_ctx.resource_name);
+	adm_ctx->minor = d_in->minor;
+	adm_ctx->device = minor_to_device(d_in->minor);
+
+	/* We are protected by the global genl_lock().
+	 * But we may explicitly drop it/retake it in drbd_adm_set_role(),
+	 * so make sure this object stays around. */
+	if (adm_ctx->device)
+		kref_get(&adm_ctx->device->kref);
+
+	if (adm_ctx->resource_name) {
+		adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
 	}
 	}
 
 
-	if (!adm_ctx.device && (flags & DRBD_ADM_NEED_MINOR)) {
-		drbd_msg_put_info("unknown minor");
+	if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
+		drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
 		return ERR_MINOR_INVALID;
 		return ERR_MINOR_INVALID;
 	}
 	}
-	if (!adm_ctx.resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
-		drbd_msg_put_info("unknown resource");
-		if (adm_ctx.resource_name)
+	if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
+		drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
+		if (adm_ctx->resource_name)
 			return ERR_RES_NOT_KNOWN;
 			return ERR_RES_NOT_KNOWN;
 		return ERR_INVALID_REQUEST;
 		return ERR_INVALID_REQUEST;
 	}
 	}
 
 
 	if (flags & DRBD_ADM_NEED_CONNECTION) {
 	if (flags & DRBD_ADM_NEED_CONNECTION) {
-		if (adm_ctx.resource) {
-			drbd_msg_put_info("no resource name expected");
+		if (adm_ctx->resource) {
+			drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
 			return ERR_INVALID_REQUEST;
 			return ERR_INVALID_REQUEST;
 		}
 		}
-		if (adm_ctx.device) {
-			drbd_msg_put_info("no minor number expected");
+		if (adm_ctx->device) {
+			drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
 			return ERR_INVALID_REQUEST;
 			return ERR_INVALID_REQUEST;
 		}
 		}
-		if (adm_ctx.my_addr && adm_ctx.peer_addr)
-			adm_ctx.connection = conn_get_by_addrs(nla_data(adm_ctx.my_addr),
-							  nla_len(adm_ctx.my_addr),
-							  nla_data(adm_ctx.peer_addr),
-							  nla_len(adm_ctx.peer_addr));
-		if (!adm_ctx.connection) {
-			drbd_msg_put_info("unknown connection");
+		if (adm_ctx->my_addr && adm_ctx->peer_addr)
+			adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
+							  nla_len(adm_ctx->my_addr),
+							  nla_data(adm_ctx->peer_addr),
+							  nla_len(adm_ctx->peer_addr));
+		if (!adm_ctx->connection) {
+			drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
 			return ERR_INVALID_REQUEST;
 			return ERR_INVALID_REQUEST;
 		}
 		}
 	}
 	}
 
 
 	/* some more paranoia, if the request was over-determined */
 	/* some more paranoia, if the request was over-determined */
-	if (adm_ctx.device && adm_ctx.resource &&
-	    adm_ctx.device->resource != adm_ctx.resource) {
+	if (adm_ctx->device && adm_ctx->resource &&
+	    adm_ctx->device->resource != adm_ctx->resource) {
 		pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
 		pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
-				adm_ctx.minor, adm_ctx.resource->name,
-				adm_ctx.device->resource->name);
-		drbd_msg_put_info("minor exists in different resource");
+				adm_ctx->minor, adm_ctx->resource->name,
+				adm_ctx->device->resource->name);
+		drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
 		return ERR_INVALID_REQUEST;
 		return ERR_INVALID_REQUEST;
 	}
 	}
-	if (adm_ctx.device &&
-	    adm_ctx.volume != VOLUME_UNSPECIFIED &&
-	    adm_ctx.volume != adm_ctx.device->vnr) {
+	if (adm_ctx->device &&
+	    adm_ctx->volume != VOLUME_UNSPECIFIED &&
+	    adm_ctx->volume != adm_ctx->device->vnr) {
 		pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
 		pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
-				adm_ctx.minor, adm_ctx.volume,
-				adm_ctx.device->vnr,
-				adm_ctx.device->resource->name);
-		drbd_msg_put_info("minor exists as different volume");
+				adm_ctx->minor, adm_ctx->volume,
+				adm_ctx->device->vnr,
+				adm_ctx->device->resource->name);
+		drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
 		return ERR_INVALID_REQUEST;
 		return ERR_INVALID_REQUEST;
 	}
 	}
 
 
+	/* still, provide adm_ctx->resource always, if possible. */
+	if (!adm_ctx->resource) {
+		adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
+			: adm_ctx->connection ? adm_ctx->connection->resource : NULL;
+		if (adm_ctx->resource)
+			kref_get(&adm_ctx->resource->kref);
+	}
+
 	return NO_ERROR;
 	return NO_ERROR;
 
 
 fail:
 fail:
-	nlmsg_free(adm_ctx.reply_skb);
-	adm_ctx.reply_skb = NULL;
+	nlmsg_free(adm_ctx->reply_skb);
+	adm_ctx->reply_skb = NULL;
 	return err;
 	return err;
 }
 }
 
 
-static int drbd_adm_finish(struct genl_info *info, int retcode)
+static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
+	struct genl_info *info, int retcode)
 {
 {
-	if (adm_ctx.connection) {
-		kref_put(&adm_ctx.connection->kref, drbd_destroy_connection);
-		adm_ctx.connection = NULL;
+	if (adm_ctx->device) {
+		kref_put(&adm_ctx->device->kref, drbd_destroy_device);
+		adm_ctx->device = NULL;
 	}
 	}
-	if (adm_ctx.resource) {
-		kref_put(&adm_ctx.resource->kref, drbd_destroy_resource);
-		adm_ctx.resource = NULL;
+	if (adm_ctx->connection) {
+		kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
+		adm_ctx->connection = NULL;
+	}
+	if (adm_ctx->resource) {
+		kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
+		adm_ctx->resource = NULL;
 	}
 	}
 
 
-	if (!adm_ctx.reply_skb)
+	if (!adm_ctx->reply_skb)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	adm_ctx.reply_dh->ret_code = retcode;
-	drbd_adm_send_reply(adm_ctx.reply_skb, info);
+	adm_ctx->reply_dh->ret_code = retcode;
+	drbd_adm_send_reply(adm_ctx->reply_skb, info);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -426,6 +422,14 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connec
 	}
 	}
 	rcu_read_unlock();
 	rcu_read_unlock();
 
 
+	if (fp == FP_NOT_AVAIL) {
+		/* IO Suspending works on the whole resource.
+		   Do it only for one device. */
+		vnr = 0;
+		peer_device = idr_get_next(&connection->peer_devices, &vnr);
+		drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
+	}
+
 	return fp;
 	return fp;
 }
 }
 
 
@@ -438,12 +442,13 @@ bool conn_try_outdate_peer(struct drbd_connection *connection)
 	char *ex_to_string;
 	char *ex_to_string;
 	int r;
 	int r;
 
 
+	spin_lock_irq(&connection->resource->req_lock);
 	if (connection->cstate >= C_WF_REPORT_PARAMS) {
 	if (connection->cstate >= C_WF_REPORT_PARAMS) {
 		drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
 		drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
+		spin_unlock_irq(&connection->resource->req_lock);
 		return false;
 		return false;
 	}
 	}
 
 
-	spin_lock_irq(&connection->resource->req_lock);
 	connect_cnt = connection->connect_cnt;
 	connect_cnt = connection->connect_cnt;
 	spin_unlock_irq(&connection->resource->req_lock);
 	spin_unlock_irq(&connection->resource->req_lock);
 
 
@@ -654,11 +659,11 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
 			put_ldev(device);
 			put_ldev(device);
 		}
 		}
 	} else {
 	} else {
-		mutex_lock(&device->resource->conf_update);
+		/* Called from drbd_adm_set_role only.
+		 * We are still holding the conf_update mutex. */
 		nc = first_peer_device(device)->connection->net_conf;
 		nc = first_peer_device(device)->connection->net_conf;
 		if (nc)
 		if (nc)
 			nc->discard_my_data = 0; /* without copy; single bit op is atomic */
 			nc->discard_my_data = 0; /* without copy; single bit op is atomic */
-		mutex_unlock(&device->resource->conf_update);
 
 
 		set_disk_ro(device->vdisk, false);
 		set_disk_ro(device->vdisk, false);
 		if (get_ldev(device)) {
 		if (get_ldev(device)) {
@@ -700,11 +705,12 @@ static const char *from_attrs_err_to_txt(int err)
 
 
 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	struct set_role_parms parms;
 	struct set_role_parms parms;
 	int err;
 	int err;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
@@ -715,17 +721,22 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 		err = set_role_parms_from_attrs(&parms, info);
 		err = set_role_parms_from_attrs(&parms, info);
 		if (err) {
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			retcode = ERR_MANDATORY_TAG;
-			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 			goto out;
 			goto out;
 		}
 		}
 	}
 	}
+	genl_unlock();
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 
 
 	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
 	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
 		retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
 		retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
 	else
 	else
 		retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
 		retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
+
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	genl_lock();
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1104,15 +1115,18 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_
 	struct request_queue * const q = device->rq_queue;
 	struct request_queue * const q = device->rq_queue;
 	unsigned int max_hw_sectors = max_bio_size >> 9;
 	unsigned int max_hw_sectors = max_bio_size >> 9;
 	unsigned int max_segments = 0;
 	unsigned int max_segments = 0;
+	struct request_queue *b = NULL;
 
 
 	if (get_ldev_if_state(device, D_ATTACHING)) {
 	if (get_ldev_if_state(device, D_ATTACHING)) {
-		struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
+		b = device->ldev->backing_bdev->bd_disk->queue;
 
 
 		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
 		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
 		rcu_read_lock();
 		rcu_read_lock();
 		max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
 		max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
 		rcu_read_unlock();
 		rcu_read_unlock();
-		put_ldev(device);
+
+		blk_set_stacking_limits(&q->limits);
+		blk_queue_max_write_same_sectors(q, 0);
 	}
 	}
 
 
 	blk_queue_logical_block_size(q, 512);
 	blk_queue_logical_block_size(q, 512);
@@ -1121,8 +1135,25 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_
 	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
 	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
 	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
 	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
 
 
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
+	if (b) {
+		struct drbd_connection *connection = first_peer_device(device)->connection;
+
+		if (blk_queue_discard(b) &&
+		    (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
+			/* For now, don't allow more than one activity log extent worth of data
+			 * to be discarded in one go. We may need to rework drbd_al_begin_io()
+			 * to allow for even larger discard ranges */
+			q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
+
+			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+			/* REALLY? Is stacking secdiscard "legal"? */
+			if (blk_queue_secdiscard(b))
+				queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
+		} else {
+			q->limits.max_discard_sectors = 0;
+			queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+			queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
+		}
 
 
 		blk_queue_stack_limits(q, b);
 		blk_queue_stack_limits(q, b);
 
 
@@ -1164,8 +1195,14 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
 			peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
 			peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
 		else
 		else
 			peer = DRBD_MAX_BIO_SIZE;
 			peer = DRBD_MAX_BIO_SIZE;
-	}
 
 
+		/* We may later detach and re-attach on a disconnected Primary.
+		 * Avoid this setting to jump back in that case.
+		 * We want to store what we know the peer DRBD can handle,
+		 * not what the peer IO backend can handle. */
+		if (peer > device->peer_max_bio_size)
+			device->peer_max_bio_size = peer;
+	}
 	new = min(local, peer);
 	new = min(local, peer);
 
 
 	if (device->state.role == R_PRIMARY && new < now)
 	if (device->state.role == R_PRIMARY && new < now)
@@ -1258,19 +1295,21 @@ static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
 
 
 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 	struct drbd_device *device;
 	struct drbd_device *device;
 	struct disk_conf *new_disk_conf, *old_disk_conf;
 	struct disk_conf *new_disk_conf, *old_disk_conf;
 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
 	int err, fifo_size;
 	int err, fifo_size;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
-		goto out;
+		goto finish;
 
 
 	device = adm_ctx.device;
 	device = adm_ctx.device;
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 
 
 	/* we also need a disk
 	/* we also need a disk
 	 * to change the options on */
 	 * to change the options on */
@@ -1294,7 +1333,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	err = disk_conf_from_attrs_for_change(new_disk_conf, info);
 	err = disk_conf_from_attrs_for_change(new_disk_conf, info);
 	if (err && err != -ENOMSG) {
 	if (err && err != -ENOMSG) {
 		retcode = ERR_MANDATORY_TAG;
 		retcode = ERR_MANDATORY_TAG;
-		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 		goto fail_unlock;
 		goto fail_unlock;
 	}
 	}
 
 
@@ -1385,12 +1424,15 @@ fail_unlock:
 success:
 success:
 	put_ldev(device);
 	put_ldev(device);
  out:
  out:
-	drbd_adm_finish(info, retcode);
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
+ finish:
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	struct drbd_device *device;
 	struct drbd_device *device;
 	int err;
 	int err;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
@@ -1406,13 +1448,14 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	enum drbd_state_rv rv;
 	enum drbd_state_rv rv;
 	struct net_conf *nc;
 	struct net_conf *nc;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto finish;
 		goto finish;
 
 
 	device = adm_ctx.device;
 	device = adm_ctx.device;
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	conn_reconfig_start(first_peer_device(device)->connection);
 	conn_reconfig_start(first_peer_device(device)->connection);
 
 
 	/* if you want to reconfigure, please tear down first */
 	/* if you want to reconfigure, please tear down first */
@@ -1455,7 +1498,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	err = disk_conf_from_attrs(new_disk_conf, info);
 	err = disk_conf_from_attrs(new_disk_conf, info);
 	if (err) {
 	if (err) {
 		retcode = ERR_MANDATORY_TAG;
 		retcode = ERR_MANDATORY_TAG;
-		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 		goto fail;
 		goto fail;
 	}
 	}
 
 
@@ -1619,7 +1662,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	}
 	}
 
 
 	if (device->state.conn < C_CONNECTED &&
 	if (device->state.conn < C_CONNECTED &&
-	    device->state.role == R_PRIMARY &&
+	    device->state.role == R_PRIMARY && device->ed_uuid &&
 	    (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
 	    (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
 		drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
 		drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
 		    (unsigned long long)device->ed_uuid);
 		    (unsigned long long)device->ed_uuid);
@@ -1797,7 +1840,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
 	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
 	put_ldev(device);
 	put_ldev(device);
 	conn_reconfig_done(first_peer_device(device)->connection);
 	conn_reconfig_done(first_peer_device(device)->connection);
-	drbd_adm_finish(info, retcode);
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 
 
  force_diskless_dec:
  force_diskless_dec:
@@ -1819,9 +1863,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	kfree(new_disk_conf);
 	kfree(new_disk_conf);
 	lc_destroy(resync_lru);
 	lc_destroy(resync_lru);
 	kfree(new_plan);
 	kfree(new_plan);
-
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
  finish:
  finish:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1860,11 +1904,12 @@ out:
  * Only then we have finally detached. */
  * Only then we have finally detached. */
 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 	struct detach_parms parms = { };
 	struct detach_parms parms = { };
 	int err;
 	int err;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
@@ -1874,14 +1919,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 		err = detach_parms_from_attrs(&parms, info);
 		err = detach_parms_from_attrs(&parms, info);
 		if (err) {
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			retcode = ERR_MANDATORY_TAG;
-			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 			goto out;
 			goto out;
 		}
 		}
 	}
 	}
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	retcode = adm_detach(adm_ctx.device, parms.force_detach);
 	retcode = adm_detach(adm_ctx.device, parms.force_detach);
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2055,6 +2102,7 @@ static void free_crypto(struct crypto *crypto)
 
 
 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 	struct drbd_connection *connection;
 	struct drbd_connection *connection;
 	struct net_conf *old_net_conf, *new_net_conf = NULL;
 	struct net_conf *old_net_conf, *new_net_conf = NULL;
@@ -2063,13 +2111,14 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 	int rsr; /* re-sync running */
 	int rsr; /* re-sync running */
 	struct crypto crypto = { };
 	struct crypto crypto = { };
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
-		goto out;
+		goto finish;
 
 
 	connection = adm_ctx.connection;
 	connection = adm_ctx.connection;
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 
 
 	new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
 	new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
 	if (!new_net_conf) {
 	if (!new_net_conf) {
@@ -2084,7 +2133,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 	old_net_conf = connection->net_conf;
 	old_net_conf = connection->net_conf;
 
 
 	if (!old_net_conf) {
 	if (!old_net_conf) {
-		drbd_msg_put_info("net conf missing, try connect");
+		drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
 		retcode = ERR_INVALID_REQUEST;
 		retcode = ERR_INVALID_REQUEST;
 		goto fail;
 		goto fail;
 	}
 	}
@@ -2096,7 +2145,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 	err = net_conf_from_attrs_for_change(new_net_conf, info);
 	err = net_conf_from_attrs_for_change(new_net_conf, info);
 	if (err && err != -ENOMSG) {
 	if (err && err != -ENOMSG) {
 		retcode = ERR_MANDATORY_TAG;
 		retcode = ERR_MANDATORY_TAG;
-		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 		goto fail;
 		goto fail;
 	}
 	}
 
 
@@ -2167,12 +2216,15 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
  done:
  done:
 	conn_reconfig_done(connection);
 	conn_reconfig_done(connection);
  out:
  out:
-	drbd_adm_finish(info, retcode);
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
+ finish:
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	struct drbd_peer_device *peer_device;
 	struct drbd_peer_device *peer_device;
 	struct net_conf *old_net_conf, *new_net_conf = NULL;
 	struct net_conf *old_net_conf, *new_net_conf = NULL;
 	struct crypto crypto = { };
 	struct crypto crypto = { };
@@ -2182,14 +2234,14 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	int i;
 	int i;
 	int err;
 	int err;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
 
 
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto out;
 		goto out;
 	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
 	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
-		drbd_msg_put_info("connection endpoint(s) missing");
+		drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
 		retcode = ERR_INVALID_REQUEST;
 		retcode = ERR_INVALID_REQUEST;
 		goto out;
 		goto out;
 	}
 	}
@@ -2215,6 +2267,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 		}
 		}
 	}
 	}
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	connection = first_connection(adm_ctx.resource);
 	connection = first_connection(adm_ctx.resource);
 	conn_reconfig_start(connection);
 	conn_reconfig_start(connection);
 
 
@@ -2235,7 +2288,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	err = net_conf_from_attrs(new_net_conf, info);
 	err = net_conf_from_attrs(new_net_conf, info);
 	if (err && err != -ENOMSG) {
 	if (err && err != -ENOMSG) {
 		retcode = ERR_MANDATORY_TAG;
 		retcode = ERR_MANDATORY_TAG;
-		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 		goto fail;
 		goto fail;
 	}
 	}
 
 
@@ -2284,7 +2337,8 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
 	retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
 
 
 	conn_reconfig_done(connection);
 	conn_reconfig_done(connection);
-	drbd_adm_finish(info, retcode);
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 
 
 fail:
 fail:
@@ -2292,8 +2346,9 @@ fail:
 	kfree(new_net_conf);
 	kfree(new_net_conf);
 
 
 	conn_reconfig_done(connection);
 	conn_reconfig_done(connection);
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2356,13 +2411,14 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection
 
 
 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	struct disconnect_parms parms;
 	struct disconnect_parms parms;
 	struct drbd_connection *connection;
 	struct drbd_connection *connection;
 	enum drbd_state_rv rv;
 	enum drbd_state_rv rv;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 	int err;
 	int err;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
@@ -2374,18 +2430,20 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 		err = disconnect_parms_from_attrs(&parms, info);
 		err = disconnect_parms_from_attrs(&parms, info);
 		if (err) {
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			retcode = ERR_MANDATORY_TAG;
-			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 			goto fail;
 			goto fail;
 		}
 		}
 	}
 	}
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	rv = conn_try_disconnect(connection, parms.force_disconnect);
 	rv = conn_try_disconnect(connection, parms.force_disconnect);
 	if (rv < SS_SUCCESS)
 	if (rv < SS_SUCCESS)
 		retcode = rv;  /* FIXME: Type mismatch. */
 		retcode = rv;  /* FIXME: Type mismatch. */
 	else
 	else
 		retcode = NO_ERROR;
 		retcode = NO_ERROR;
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
  fail:
  fail:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2407,6 +2465,7 @@ void resync_after_online_grow(struct drbd_device *device)
 
 
 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
 	struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
 	struct resize_parms rs;
 	struct resize_parms rs;
 	struct drbd_device *device;
 	struct drbd_device *device;
@@ -2417,12 +2476,13 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	sector_t u_size;
 	sector_t u_size;
 	int err;
 	int err;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
-		goto fail;
+		goto finish;
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	device = adm_ctx.device;
 	device = adm_ctx.device;
 	if (!get_ldev(device)) {
 	if (!get_ldev(device)) {
 		retcode = ERR_NO_DISK;
 		retcode = ERR_NO_DISK;
@@ -2436,7 +2496,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 		err = resize_parms_from_attrs(&rs, info);
 		err = resize_parms_from_attrs(&rs, info);
 		if (err) {
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			retcode = ERR_MANDATORY_TAG;
-			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 			goto fail_ldev;
 			goto fail_ldev;
 		}
 		}
 	}
 	}
@@ -2482,7 +2542,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 			goto fail_ldev;
 			goto fail_ldev;
 		}
 		}
 
 
-		if (device->state.conn != C_CONNECTED) {
+		if (device->state.conn != C_CONNECTED && !rs.resize_force) {
 			retcode = ERR_MD_LAYOUT_CONNECTED;
 			retcode = ERR_MD_LAYOUT_CONNECTED;
 			goto fail_ldev;
 			goto fail_ldev;
 		}
 		}
@@ -2528,7 +2588,9 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	}
 	}
 
 
  fail:
  fail:
-	drbd_adm_finish(info, retcode);
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
+ finish:
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 
 
  fail_ldev:
  fail_ldev:
@@ -2538,11 +2600,12 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 
 
 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 	struct res_opts res_opts;
 	struct res_opts res_opts;
 	int err;
 	int err;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
@@ -2555,33 +2618,37 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
 	err = res_opts_from_attrs(&res_opts, info);
 	err = res_opts_from_attrs(&res_opts, info);
 	if (err && err != -ENOMSG) {
 	if (err && err != -ENOMSG) {
 		retcode = ERR_MANDATORY_TAG;
 		retcode = ERR_MANDATORY_TAG;
-		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 		goto fail;
 		goto fail;
 	}
 	}
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	err = set_resource_options(adm_ctx.resource, &res_opts);
 	err = set_resource_options(adm_ctx.resource, &res_opts);
 	if (err) {
 	if (err) {
 		retcode = ERR_INVALID_REQUEST;
 		retcode = ERR_INVALID_REQUEST;
 		if (err == -ENOMEM)
 		if (err == -ENOMEM)
 			retcode = ERR_NOMEM;
 			retcode = ERR_NOMEM;
 	}
 	}
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 
 
 fail:
 fail:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	struct drbd_device *device;
 	struct drbd_device *device;
 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto out;
 		goto out;
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	device = adm_ctx.device;
 	device = adm_ctx.device;
 
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	/* If there is still bitmap IO pending, probably because of a previous
@@ -2605,26 +2672,29 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 	} else
 	} else
 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
 	drbd_resume_io(device);
 	drbd_resume_io(device);
-
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
 		union drbd_state mask, union drbd_state val)
 		union drbd_state mask, union drbd_state val)
 {
 {
+	struct drbd_config_context adm_ctx;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto out;
 		goto out;
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	retcode = drbd_request_state(adm_ctx.device, mask, val);
 	retcode = drbd_request_state(adm_ctx.device, mask, val);
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2639,15 +2709,17 @@ static int drbd_bmio_set_susp_al(struct drbd_device *device)
 
 
 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	int retcode; /* drbd_ret_code, drbd_state_rv */
 	int retcode; /* drbd_ret_code, drbd_state_rv */
 	struct drbd_device *device;
 	struct drbd_device *device;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto out;
 		goto out;
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	device = adm_ctx.device;
 	device = adm_ctx.device;
 
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	/* If there is still bitmap IO pending, probably because of a previous
@@ -2674,40 +2746,45 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 	} else
 	} else
 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
 	drbd_resume_io(device);
 	drbd_resume_io(device);
-
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto out;
 		goto out;
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
 	if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
 		retcode = ERR_PAUSE_IS_SET;
 		retcode = ERR_PAUSE_IS_SET;
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	union drbd_dev_state s;
 	union drbd_dev_state s;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto out;
 		goto out;
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
 	if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
 		s = adm_ctx.device->state;
 		s = adm_ctx.device->state;
 		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
 		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
@@ -2717,9 +2794,9 @@ int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
 			retcode = ERR_PAUSE_IS_CLEAR;
 			retcode = ERR_PAUSE_IS_CLEAR;
 		}
 		}
 	}
 	}
-
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2730,15 +2807,17 @@ int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
 
 
 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	struct drbd_device *device;
 	struct drbd_device *device;
 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto out;
 		goto out;
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	device = adm_ctx.device;
 	device = adm_ctx.device;
 	if (test_bit(NEW_CUR_UUID, &device->flags)) {
 	if (test_bit(NEW_CUR_UUID, &device->flags)) {
 		drbd_uuid_new_current(device);
 		drbd_uuid_new_current(device);
@@ -2753,9 +2832,9 @@ int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
 			tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
 			tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
 	}
 	}
 	drbd_resume_io(device);
 	drbd_resume_io(device);
-
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2931,10 +3010,11 @@ nla_put_failure:
 
 
 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 	int err;
 	int err;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
@@ -2946,7 +3026,7 @@ int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
 		return err;
 		return err;
 	}
 	}
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -3133,11 +3213,12 @@ dump:
 
 
 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 	struct timeout_parms tp;
 	struct timeout_parms tp;
 	int err;
 	int err;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
@@ -3154,17 +3235,18 @@ int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
 		return err;
 		return err;
 	}
 	}
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	struct drbd_device *device;
 	struct drbd_device *device;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 	struct start_ov_parms parms;
 	struct start_ov_parms parms;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
@@ -3179,10 +3261,12 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 		int err = start_ov_parms_from_attrs(&parms, info);
 		int err = start_ov_parms_from_attrs(&parms, info);
 		if (err) {
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			retcode = ERR_MANDATORY_TAG;
-			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 			goto out;
 			goto out;
 		}
 		}
 	}
 	}
+	mutex_lock(&adm_ctx.resource->adm_mutex);
+
 	/* w_make_ov_request expects position to be aligned */
 	/* w_make_ov_request expects position to be aligned */
 	device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
 	device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
 	device->ov_stop_sector = parms.ov_stop_sector;
 	device->ov_stop_sector = parms.ov_stop_sector;
@@ -3193,21 +3277,24 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
 	retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
 	retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
 	drbd_resume_io(device);
 	drbd_resume_io(device);
+
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
 
 
 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	struct drbd_device *device;
 	struct drbd_device *device;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 	int skip_initial_sync = 0;
 	int skip_initial_sync = 0;
 	int err;
 	int err;
 	struct new_c_uuid_parms args;
 	struct new_c_uuid_parms args;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
@@ -3219,11 +3306,12 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
 		err = new_c_uuid_parms_from_attrs(&args, info);
 		err = new_c_uuid_parms_from_attrs(&args, info);
 		if (err) {
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			retcode = ERR_MANDATORY_TAG;
-			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 			goto out_nolock;
 			goto out_nolock;
 		}
 		}
 	}
 	}
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
 	mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
 
 
 	if (!get_ldev(device)) {
 	if (!get_ldev(device)) {
@@ -3268,22 +3356,24 @@ out_dec:
 	put_ldev(device);
 	put_ldev(device);
 out:
 out:
 	mutex_unlock(device->state_mutex);
 	mutex_unlock(device->state_mutex);
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 out_nolock:
 out_nolock:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
 static enum drbd_ret_code
 static enum drbd_ret_code
-drbd_check_resource_name(const char *name)
+drbd_check_resource_name(struct drbd_config_context *adm_ctx)
 {
 {
+	const char *name = adm_ctx->resource_name;
 	if (!name || !name[0]) {
 	if (!name || !name[0]) {
-		drbd_msg_put_info("resource name missing");
+		drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
 		return ERR_MANDATORY_TAG;
 		return ERR_MANDATORY_TAG;
 	}
 	}
 	/* if we want to use these in sysfs/configfs/debugfs some day,
 	/* if we want to use these in sysfs/configfs/debugfs some day,
 	 * we must not allow slashes */
 	 * we must not allow slashes */
 	if (strchr(name, '/')) {
 	if (strchr(name, '/')) {
-		drbd_msg_put_info("invalid resource name");
+		drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
 		return ERR_INVALID_REQUEST;
 		return ERR_INVALID_REQUEST;
 	}
 	}
 	return NO_ERROR;
 	return NO_ERROR;
@@ -3291,11 +3381,12 @@ drbd_check_resource_name(const char *name)
 
 
 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 	struct res_opts res_opts;
 	struct res_opts res_opts;
 	int err;
 	int err;
 
 
-	retcode = drbd_adm_prepare(skb, info, 0);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
@@ -3305,48 +3396,50 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 	err = res_opts_from_attrs(&res_opts, info);
 	err = res_opts_from_attrs(&res_opts, info);
 	if (err && err != -ENOMSG) {
 	if (err && err != -ENOMSG) {
 		retcode = ERR_MANDATORY_TAG;
 		retcode = ERR_MANDATORY_TAG;
-		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 		goto out;
 		goto out;
 	}
 	}
 
 
-	retcode = drbd_check_resource_name(adm_ctx.resource_name);
+	retcode = drbd_check_resource_name(&adm_ctx);
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto out;
 		goto out;
 
 
 	if (adm_ctx.resource) {
 	if (adm_ctx.resource) {
 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
 			retcode = ERR_INVALID_REQUEST;
 			retcode = ERR_INVALID_REQUEST;
-			drbd_msg_put_info("resource exists");
+			drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
 		}
 		}
 		/* else: still NO_ERROR */
 		/* else: still NO_ERROR */
 		goto out;
 		goto out;
 	}
 	}
 
 
+	/* not yet safe for genl_family.parallel_ops */
 	if (!conn_create(adm_ctx.resource_name, &res_opts))
 	if (!conn_create(adm_ctx.resource_name, &res_opts))
 		retcode = ERR_NOMEM;
 		retcode = ERR_NOMEM;
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	struct drbd_genlmsghdr *dh = info->userhdr;
 	struct drbd_genlmsghdr *dh = info->userhdr;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto out;
 		goto out;
 
 
 	if (dh->minor > MINORMASK) {
 	if (dh->minor > MINORMASK) {
-		drbd_msg_put_info("requested minor out of range");
+		drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
 		retcode = ERR_INVALID_REQUEST;
 		retcode = ERR_INVALID_REQUEST;
 		goto out;
 		goto out;
 	}
 	}
 	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
 	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
-		drbd_msg_put_info("requested volume id out of range");
+		drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
 		retcode = ERR_INVALID_REQUEST;
 		retcode = ERR_INVALID_REQUEST;
 		goto out;
 		goto out;
 	}
 	}
@@ -3360,9 +3453,11 @@ int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 		goto out;
 	}
 	}
 
 
-	retcode = drbd_create_device(adm_ctx.resource, dh->minor, adm_ctx.volume);
+	mutex_lock(&adm_ctx.resource->adm_mutex);
+	retcode = drbd_create_device(&adm_ctx, dh->minor);
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -3383,35 +3478,40 @@ static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
 
 
 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto out;
 		goto out;
 
 
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 	retcode = adm_del_minor(adm_ctx.device);
 	retcode = adm_del_minor(adm_ctx.device);
+	mutex_unlock(&adm_ctx.resource->adm_mutex);
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	struct drbd_resource *resource;
 	struct drbd_resource *resource;
 	struct drbd_connection *connection;
 	struct drbd_connection *connection;
 	struct drbd_device *device;
 	struct drbd_device *device;
 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
 	unsigned i;
 	unsigned i;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
-		goto out;
+		goto finish;
 
 
 	resource = adm_ctx.resource;
 	resource = adm_ctx.resource;
+	mutex_lock(&resource->adm_mutex);
 	/* demote */
 	/* demote */
 	for_each_connection(connection, resource) {
 	for_each_connection(connection, resource) {
 		struct drbd_peer_device *peer_device;
 		struct drbd_peer_device *peer_device;
@@ -3419,14 +3519,14 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 		idr_for_each_entry(&connection->peer_devices, peer_device, i) {
 		idr_for_each_entry(&connection->peer_devices, peer_device, i) {
 			retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
 			retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
 			if (retcode < SS_SUCCESS) {
 			if (retcode < SS_SUCCESS) {
-				drbd_msg_put_info("failed to demote");
+				drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
 				goto out;
 				goto out;
 			}
 			}
 		}
 		}
 
 
 		retcode = conn_try_disconnect(connection, 0);
 		retcode = conn_try_disconnect(connection, 0);
 		if (retcode < SS_SUCCESS) {
 		if (retcode < SS_SUCCESS) {
-			drbd_msg_put_info("failed to disconnect");
+			drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
 			goto out;
 			goto out;
 		}
 		}
 	}
 	}
@@ -3435,7 +3535,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 	idr_for_each_entry(&resource->devices, device, i) {
 	idr_for_each_entry(&resource->devices, device, i) {
 		retcode = adm_detach(device, 0);
 		retcode = adm_detach(device, 0);
 		if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
 		if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
-			drbd_msg_put_info("failed to detach");
+			drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
 			goto out;
 			goto out;
 		}
 		}
 	}
 	}
@@ -3453,7 +3553,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 		retcode = adm_del_minor(device);
 		retcode = adm_del_minor(device);
 		if (retcode != NO_ERROR) {
 		if (retcode != NO_ERROR) {
 			/* "can not happen" */
 			/* "can not happen" */
-			drbd_msg_put_info("failed to delete volume");
+			drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
 			goto out;
 			goto out;
 		}
 		}
 	}
 	}
@@ -3462,25 +3562,28 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 	synchronize_rcu();
 	synchronize_rcu();
 	drbd_free_resource(resource);
 	drbd_free_resource(resource);
 	retcode = NO_ERROR;
 	retcode = NO_ERROR;
-
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	mutex_unlock(&resource->adm_mutex);
+finish:
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 
 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
 {
 {
+	struct drbd_config_context adm_ctx;
 	struct drbd_resource *resource;
 	struct drbd_resource *resource;
 	struct drbd_connection *connection;
 	struct drbd_connection *connection;
 	enum drbd_ret_code retcode;
 	enum drbd_ret_code retcode;
 
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
 	if (!adm_ctx.reply_skb)
 	if (!adm_ctx.reply_skb)
 		return retcode;
 		return retcode;
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
-		goto out;
+		goto finish;
 
 
 	resource = adm_ctx.resource;
 	resource = adm_ctx.resource;
+	mutex_lock(&resource->adm_mutex);
 	for_each_connection(connection, resource) {
 	for_each_connection(connection, resource) {
 		if (connection->cstate > C_STANDALONE) {
 		if (connection->cstate > C_STANDALONE) {
 			retcode = ERR_NET_CONFIGURED;
 			retcode = ERR_NET_CONFIGURED;
@@ -3499,7 +3602,9 @@ int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
 	drbd_free_resource(resource);
 	drbd_free_resource(resource);
 	retcode = NO_ERROR;
 	retcode = NO_ERROR;
 out:
 out:
-	drbd_adm_finish(info, retcode);
+	mutex_unlock(&resource->adm_mutex);
+finish:
+	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
 	return 0;
 }
 }
 
 

+ 0 - 1
drivers/block/drbd/drbd_nla.c

@@ -1,4 +1,3 @@
-#include "drbd_wrappers.h"
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <net/netlink.h>
 #include <net/netlink.h>
 #include <linux/drbd_genl_api.h>
 #include <linux/drbd_genl_api.h>

+ 1 - 1
drivers/block/drbd/drbd_proc.c

@@ -116,7 +116,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
 	/* ------------------------ ~18s average ------------------------ */
 	/* ------------------------ ~18s average ------------------------ */
 	i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS;
 	i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS;
 	dt = (jiffies - device->rs_mark_time[i]) / HZ;
 	dt = (jiffies - device->rs_mark_time[i]) / HZ;
-	if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
+	if (dt > 180)
 		stalled = 1;
 		stalled = 1;
 
 
 	if (!dt)
 	if (!dt)

+ 12 - 0
drivers/block/drbd/drbd_protocol.h

@@ -54,6 +54,11 @@ enum drbd_packet {
 	P_CONN_ST_CHG_REPLY   = 0x2b, /* meta sock: Connection side state req reply */
 	P_CONN_ST_CHG_REPLY   = 0x2b, /* meta sock: Connection side state req reply */
 	P_RETRY_WRITE	      = 0x2c, /* Protocol C: retry conflicting write request */
 	P_RETRY_WRITE	      = 0x2c, /* Protocol C: retry conflicting write request */
 	P_PROTOCOL_UPDATE     = 0x2d, /* data sock: is used in established connections */
 	P_PROTOCOL_UPDATE     = 0x2d, /* data sock: is used in established connections */
+        /* 0x2e to 0x30 reserved, used in drbd 9 */
+
+	/* REQ_DISCARD. We used "discard" in different contexts before,
+	 * which is why I chose TRIM here, to disambiguate. */
+	P_TRIM                = 0x31,
 
 
 	P_MAY_IGNORE	      = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
 	P_MAY_IGNORE	      = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
 	P_MAX_OPT_CMD	      = 0x101,
 	P_MAX_OPT_CMD	      = 0x101,
@@ -119,6 +124,11 @@ struct p_data {
 	u32	    dp_flags;
 	u32	    dp_flags;
 } __packed;
 } __packed;
 
 
+struct p_trim {
+	struct p_data p_data;
+	u32	    size;	/* == bio->bi_size */
+} __packed;
+
 /*
 /*
  * commands which share a struct:
  * commands which share a struct:
  *  p_block_ack:
  *  p_block_ack:
@@ -150,6 +160,8 @@ struct p_block_req {
  *   ReportParams
  *   ReportParams
  */
  */
 
 
+#define FF_TRIM      1
+
 struct p_connection_features {
 struct p_connection_features {
 	u32 protocol_min;
 	u32 protocol_min;
 	u32 feature_flags;
 	u32 feature_flags;

+ 138 - 58
drivers/block/drbd/drbd_receiver.c

@@ -46,9 +46,10 @@
 #include "drbd_int.h"
 #include "drbd_int.h"
 #include "drbd_protocol.h"
 #include "drbd_protocol.h"
 #include "drbd_req.h"
 #include "drbd_req.h"
-
 #include "drbd_vli.h"
 #include "drbd_vli.h"
 
 
+#define PRO_FEATURES (FF_TRIM)
+
 struct packet_info {
 struct packet_info {
 	enum drbd_packet cmd;
 	enum drbd_packet cmd;
 	unsigned int size;
 	unsigned int size;
@@ -65,7 +66,7 @@ enum finish_epoch {
 static int drbd_do_features(struct drbd_connection *connection);
 static int drbd_do_features(struct drbd_connection *connection);
 static int drbd_do_auth(struct drbd_connection *connection);
 static int drbd_do_auth(struct drbd_connection *connection);
 static int drbd_disconnected(struct drbd_peer_device *);
 static int drbd_disconnected(struct drbd_peer_device *);
-
+static void conn_wait_active_ee_empty(struct drbd_connection *connection);
 static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
 static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
 static int e_end_block(struct drbd_work *, int);
 static int e_end_block(struct drbd_work *, int);
 
 
@@ -234,9 +235,17 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
  * @retry:	whether to retry, if not enough pages are available right now
  * @retry:	whether to retry, if not enough pages are available right now
  *
  *
  * Tries to allocate number pages, first from our own page pool, then from
  * Tries to allocate number pages, first from our own page pool, then from
- * the kernel, unless this allocation would exceed the max_buffers setting.
+ * the kernel.
  * Possibly retry until DRBD frees sufficient pages somewhere else.
  * Possibly retry until DRBD frees sufficient pages somewhere else.
  *
  *
+ * If this allocation would exceed the max_buffers setting, we throttle
+ * allocation (schedule_timeout) to give the system some room to breathe.
+ *
+ * We do not use max-buffers as hard limit, because it could lead to
+ * congestion and further to a distributed deadlock during online-verify or
+ * (checksum based) resync, if the max-buffers, socket buffer sizes and
+ * resync-rate settings are mis-configured.
+ *
  * Returns a page chain linked via page->private.
  * Returns a page chain linked via page->private.
  */
  */
 struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
 struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
@@ -246,10 +255,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
 	struct page *page = NULL;
 	struct page *page = NULL;
 	struct net_conf *nc;
 	struct net_conf *nc;
 	DEFINE_WAIT(wait);
 	DEFINE_WAIT(wait);
-	int mxb;
+	unsigned int mxb;
 
 
-	/* Yes, we may run up to @number over max_buffers. If we
-	 * follow it strictly, the admin will get it wrong anyways. */
 	rcu_read_lock();
 	rcu_read_lock();
 	nc = rcu_dereference(peer_device->connection->net_conf);
 	nc = rcu_dereference(peer_device->connection->net_conf);
 	mxb = nc ? nc->max_buffers : 1000000;
 	mxb = nc ? nc->max_buffers : 1000000;
@@ -277,7 +284,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
 			break;
 			break;
 		}
 		}
 
 
-		schedule();
+		if (schedule_timeout(HZ/10) == 0)
+			mxb = UINT_MAX;
 	}
 	}
 	finish_wait(&drbd_pp_wait, &wait);
 	finish_wait(&drbd_pp_wait, &wait);
 
 
@@ -331,7 +339,7 @@ You must not have the req_lock:
 
 
 struct drbd_peer_request *
 struct drbd_peer_request *
 drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
-		    unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
+		    unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
 {
 {
 	struct drbd_device *device = peer_device->device;
 	struct drbd_device *device = peer_device->device;
 	struct drbd_peer_request *peer_req;
 	struct drbd_peer_request *peer_req;
@@ -348,7 +356,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
 		return NULL;
 		return NULL;
 	}
 	}
 
 
-	if (data_size) {
+	if (has_payload && data_size) {
 		page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
 		page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
 		if (!page)
 		if (!page)
 			goto fail;
 			goto fail;
@@ -1026,24 +1034,27 @@ randomize:
 	if (drbd_send_protocol(connection) == -EOPNOTSUPP)
 	if (drbd_send_protocol(connection) == -EOPNOTSUPP)
 		return -1;
 		return -1;
 
 
+	/* Prevent a race between resync-handshake and
+	 * being promoted to Primary.
+	 *
+	 * Grab and release the state mutex, so we know that any current
+	 * drbd_set_role() is finished, and any incoming drbd_set_role
+	 * will see the STATE_SENT flag, and wait for it to be cleared.
+	 */
+	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
+		mutex_lock(peer_device->device->state_mutex);
+
 	set_bit(STATE_SENT, &connection->flags);
 	set_bit(STATE_SENT, &connection->flags);
 
 
+	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
+		mutex_unlock(peer_device->device->state_mutex);
+
 	rcu_read_lock();
 	rcu_read_lock();
 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 		struct drbd_device *device = peer_device->device;
 		struct drbd_device *device = peer_device->device;
 		kref_get(&device->kref);
 		kref_get(&device->kref);
 		rcu_read_unlock();
 		rcu_read_unlock();
 
 
-		/* Prevent a race between resync-handshake and
-		 * being promoted to Primary.
-		 *
-		 * Grab and release the state mutex, so we know that any current
-		 * drbd_set_role() is finished, and any incoming drbd_set_role
-		 * will see the STATE_SENT flag, and wait for it to be cleared.
-		 */
-		mutex_lock(device->state_mutex);
-		mutex_unlock(device->state_mutex);
-
 		if (discard_my_data)
 		if (discard_my_data)
 			set_bit(DISCARD_MY_DATA, &device->flags);
 			set_bit(DISCARD_MY_DATA, &device->flags);
 		else
 		else
@@ -1315,6 +1326,20 @@ int drbd_submit_peer_request(struct drbd_device *device,
 	unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
 	unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
 	int err = -ENOMEM;
 	int err = -ENOMEM;
 
 
+	if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
+		/* wait for all pending IO completions, before we start
+		 * zeroing things out. */
+		conn_wait_active_ee_empty(first_peer_device(device)->connection);
+		if (blkdev_issue_zeroout(device->ldev->backing_bdev,
+			sector, ds >> 9, GFP_NOIO))
+			peer_req->flags |= EE_WAS_ERROR;
+		drbd_endio_write_sec_final(peer_req);
+		return 0;
+	}
+
+	if (peer_req->flags & EE_IS_TRIM)
+		nr_pages = 0; /* discards don't have any payload. */
+
 	/* In most cases, we will only need one bio.  But in case the lower
 	/* In most cases, we will only need one bio.  But in case the lower
 	 * level restrictions happen to be different at this offset on this
 	 * level restrictions happen to be different at this offset on this
 	 * side than those of the sending peer, we may need to submit the
 	 * side than those of the sending peer, we may need to submit the
@@ -1326,7 +1351,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
 next_bio:
 next_bio:
 	bio = bio_alloc(GFP_NOIO, nr_pages);
 	bio = bio_alloc(GFP_NOIO, nr_pages);
 	if (!bio) {
 	if (!bio) {
-		drbd_err(device, "submit_ee: Allocation of a bio failed\n");
+		drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
 		goto fail;
 		goto fail;
 	}
 	}
 	/* > peer_req->i.sector, unless this is the first bio */
 	/* > peer_req->i.sector, unless this is the first bio */
@@ -1340,6 +1365,11 @@ next_bio:
 	bios = bio;
 	bios = bio;
 	++n_bios;
 	++n_bios;
 
 
+	if (rw & REQ_DISCARD) {
+		bio->bi_iter.bi_size = ds;
+		goto submit;
+	}
+
 	page_chain_for_each(page) {
 	page_chain_for_each(page) {
 		unsigned len = min_t(unsigned, ds, PAGE_SIZE);
 		unsigned len = min_t(unsigned, ds, PAGE_SIZE);
 		if (!bio_add_page(bio, page, len, 0)) {
 		if (!bio_add_page(bio, page, len, 0)) {
@@ -1360,8 +1390,9 @@ next_bio:
 		sector += len >> 9;
 		sector += len >> 9;
 		--nr_pages;
 		--nr_pages;
 	}
 	}
-	D_ASSERT(device, page == NULL);
 	D_ASSERT(device, ds == 0);
 	D_ASSERT(device, ds == 0);
+submit:
+	D_ASSERT(device, page == NULL);
 
 
 	atomic_set(&peer_req->pending_bios, n_bios);
 	atomic_set(&peer_req->pending_bios, n_bios);
 	do {
 	do {
@@ -1490,19 +1521,21 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
  * and from receive_Data */
  * and from receive_Data */
 static struct drbd_peer_request *
 static struct drbd_peer_request *
 read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
-	      int data_size) __must_hold(local)
+	      struct packet_info *pi) __must_hold(local)
 {
 {
 	struct drbd_device *device = peer_device->device;
 	struct drbd_device *device = peer_device->device;
 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
 	struct drbd_peer_request *peer_req;
 	struct drbd_peer_request *peer_req;
 	struct page *page;
 	struct page *page;
 	int dgs, ds, err;
 	int dgs, ds, err;
+	int data_size = pi->size;
 	void *dig_in = peer_device->connection->int_dig_in;
 	void *dig_in = peer_device->connection->int_dig_in;
 	void *dig_vv = peer_device->connection->int_dig_vv;
 	void *dig_vv = peer_device->connection->int_dig_vv;
 	unsigned long *data;
 	unsigned long *data;
+	struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
 
 
 	dgs = 0;
 	dgs = 0;
-	if (peer_device->connection->peer_integrity_tfm) {
+	if (!trim && peer_device->connection->peer_integrity_tfm) {
 		dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
 		dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
 		/*
 		/*
 		 * FIXME: Receive the incoming digest into the receive buffer
 		 * FIXME: Receive the incoming digest into the receive buffer
@@ -1514,9 +1547,15 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 		data_size -= dgs;
 		data_size -= dgs;
 	}
 	}
 
 
+	if (trim) {
+		D_ASSERT(peer_device, data_size == 0);
+		data_size = be32_to_cpu(trim->size);
+	}
+
 	if (!expect(IS_ALIGNED(data_size, 512)))
 	if (!expect(IS_ALIGNED(data_size, 512)))
 		return NULL;
 		return NULL;
-	if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
+	/* prepare for larger trim requests. */
+	if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
 		return NULL;
 		return NULL;
 
 
 	/* even though we trust out peer,
 	/* even though we trust out peer,
@@ -1532,11 +1571,11 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
 	 * which in turn might block on the other node at this very place.  */
 	 * which in turn might block on the other node at this very place.  */
-	peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, GFP_NOIO);
+	peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
 	if (!peer_req)
 	if (!peer_req)
 		return NULL;
 		return NULL;
 
 
-	if (!data_size)
+	if (trim)
 		return peer_req;
 		return peer_req;
 
 
 	ds = data_size;
 	ds = data_size;
@@ -1676,12 +1715,12 @@ static int e_end_resync_block(struct drbd_work *w, int unused)
 }
 }
 
 
 static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
 static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
-			    int data_size) __releases(local)
+			    struct packet_info *pi) __releases(local)
 {
 {
 	struct drbd_device *device = peer_device->device;
 	struct drbd_device *device = peer_device->device;
 	struct drbd_peer_request *peer_req;
 	struct drbd_peer_request *peer_req;
 
 
-	peer_req = read_in_block(peer_device, ID_SYNCER, sector, data_size);
+	peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
 	if (!peer_req)
 	if (!peer_req)
 		goto fail;
 		goto fail;
 
 
@@ -1697,7 +1736,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
 	list_add(&peer_req->w.list, &device->sync_ee);
 	list_add(&peer_req->w.list, &device->sync_ee);
 	spin_unlock_irq(&device->resource->req_lock);
 	spin_unlock_irq(&device->resource->req_lock);
 
 
-	atomic_add(data_size >> 9, &device->rs_sect_ev);
+	atomic_add(pi->size >> 9, &device->rs_sect_ev);
 	if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
 	if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
 		return 0;
 		return 0;
 
 
@@ -1785,7 +1824,7 @@ static int receive_RSDataReply(struct drbd_connection *connection, struct packet
 		/* data is submitted to disk within recv_resync_read.
 		/* data is submitted to disk within recv_resync_read.
 		 * corresponding put_ldev done below on error,
 		 * corresponding put_ldev done below on error,
 		 * or in drbd_peer_request_endio. */
 		 * or in drbd_peer_request_endio. */
-		err = recv_resync_read(peer_device, sector, pi->size);
+		err = recv_resync_read(peer_device, sector, pi);
 	} else {
 	} else {
 		if (__ratelimit(&drbd_ratelimit_state))
 		if (__ratelimit(&drbd_ratelimit_state))
 			drbd_err(device, "Can not write resync data to local disk.\n");
 			drbd_err(device, "Can not write resync data to local disk.\n");
@@ -2196,7 +2235,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 	 */
 	 */
 
 
 	sector = be64_to_cpu(p->sector);
 	sector = be64_to_cpu(p->sector);
-	peer_req = read_in_block(peer_device, p->block_id, sector, pi->size);
+	peer_req = read_in_block(peer_device, p->block_id, sector, pi);
 	if (!peer_req) {
 	if (!peer_req) {
 		put_ldev(device);
 		put_ldev(device);
 		return -EIO;
 		return -EIO;
@@ -2206,7 +2245,15 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 
 
 	dp_flags = be32_to_cpu(p->dp_flags);
 	dp_flags = be32_to_cpu(p->dp_flags);
 	rw |= wire_flags_to_bio(dp_flags);
 	rw |= wire_flags_to_bio(dp_flags);
-	if (peer_req->pages == NULL) {
+	if (pi->cmd == P_TRIM) {
+		struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
+		peer_req->flags |= EE_IS_TRIM;
+		if (!blk_queue_discard(q))
+			peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
+		D_ASSERT(peer_device, peer_req->i.size > 0);
+		D_ASSERT(peer_device, rw & REQ_DISCARD);
+		D_ASSERT(peer_device, peer_req->pages == NULL);
+	} else if (peer_req->pages == NULL) {
 		D_ASSERT(device, peer_req->i.size == 0);
 		D_ASSERT(device, peer_req->i.size == 0);
 		D_ASSERT(device, dp_flags & DP_FLUSH);
 		D_ASSERT(device, dp_flags & DP_FLUSH);
 	}
 	}
@@ -2242,7 +2289,12 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 		update_peer_seq(peer_device, peer_seq);
 		update_peer_seq(peer_device, peer_seq);
 		spin_lock_irq(&device->resource->req_lock);
 		spin_lock_irq(&device->resource->req_lock);
 	}
 	}
-	list_add(&peer_req->w.list, &device->active_ee);
+	/* if we use the zeroout fallback code, we process synchronously
+	 * and we wait for all pending requests, respectively wait for
+	 * active_ee to become empty in drbd_submit_peer_request();
+	 * better not add ourselves here. */
+	if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
+		list_add(&peer_req->w.list, &device->active_ee);
 	spin_unlock_irq(&device->resource->req_lock);
 	spin_unlock_irq(&device->resource->req_lock);
 
 
 	if (device->state.conn == C_SYNC_TARGET)
 	if (device->state.conn == C_SYNC_TARGET)
@@ -2313,39 +2365,45 @@ out_interrupted:
  * The current sync rate used here uses only the most recent two step marks,
  * The current sync rate used here uses only the most recent two step marks,
  * to have a short time average so we can react faster.
  * to have a short time average so we can react faster.
  */
  */
-int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
+bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
 {
 {
-	struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
-	unsigned long db, dt, dbdt;
 	struct lc_element *tmp;
 	struct lc_element *tmp;
-	int curr_events;
-	int throttle = 0;
-	unsigned int c_min_rate;
-
-	rcu_read_lock();
-	c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
-	rcu_read_unlock();
+	bool throttle = true;
 
 
-	/* feature disabled? */
-	if (c_min_rate == 0)
-		return 0;
+	if (!drbd_rs_c_min_rate_throttle(device))
+		return false;
 
 
 	spin_lock_irq(&device->al_lock);
 	spin_lock_irq(&device->al_lock);
 	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
 	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
 	if (tmp) {
 	if (tmp) {
 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
-		if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
-			spin_unlock_irq(&device->al_lock);
-			return 0;
-		}
+		if (test_bit(BME_PRIORITY, &bm_ext->flags))
+			throttle = false;
 		/* Do not slow down if app IO is already waiting for this extent */
 		/* Do not slow down if app IO is already waiting for this extent */
 	}
 	}
 	spin_unlock_irq(&device->al_lock);
 	spin_unlock_irq(&device->al_lock);
 
 
+	return throttle;
+}
+
+bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
+{
+	struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
+	unsigned long db, dt, dbdt;
+	unsigned int c_min_rate;
+	int curr_events;
+
+	rcu_read_lock();
+	c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
+	rcu_read_unlock();
+
+	/* feature disabled? */
+	if (c_min_rate == 0)
+		return false;
+
 	curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
 	curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
 		      (int)part_stat_read(&disk->part0, sectors[1]) -
 		      (int)part_stat_read(&disk->part0, sectors[1]) -
 			atomic_read(&device->rs_sect_ev);
 			atomic_read(&device->rs_sect_ev);
-
 	if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
 	if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
 		unsigned long rs_left;
 		unsigned long rs_left;
 		int i;
 		int i;
@@ -2368,12 +2426,11 @@ int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
 		dbdt = Bit2KB(db/dt);
 		dbdt = Bit2KB(db/dt);
 
 
 		if (dbdt > c_min_rate)
 		if (dbdt > c_min_rate)
-			throttle = 1;
+			return true;
 	}
 	}
-	return throttle;
+	return false;
 }
 }
 
 
-
 static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
 static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
 {
 {
 	struct drbd_peer_device *peer_device;
 	struct drbd_peer_device *peer_device;
@@ -2436,7 +2493,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
 	 * which in turn might block on the other node at this very place.  */
 	 * which in turn might block on the other node at this very place.  */
-	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, GFP_NOIO);
+	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
+			true /* has real payload */, GFP_NOIO);
 	if (!peer_req) {
 	if (!peer_req) {
 		put_ldev(device);
 		put_ldev(device);
 		return -ENOMEM;
 		return -ENOMEM;
@@ -3648,6 +3706,13 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 		put_ldev(device);
 		put_ldev(device);
 	}
 	}
 
 
+	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
+	drbd_reconsider_max_bio_size(device);
+	/* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
+	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
+	   drbd_reconsider_max_bio_size(), we can be sure that after
+	   drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
+
 	ddsf = be16_to_cpu(p->dds_flags);
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(device)) {
 	if (get_ldev(device)) {
 		dd = drbd_determine_dev_size(device, ddsf, NULL);
 		dd = drbd_determine_dev_size(device, ddsf, NULL);
@@ -3660,9 +3725,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 		drbd_set_my_capacity(device, p_size);
 		drbd_set_my_capacity(device, p_size);
 	}
 	}
 
 
-	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
-	drbd_reconsider_max_bio_size(device);
-
 	if (get_ldev(device)) {
 	if (get_ldev(device)) {
 		if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
 		if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
 			device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
 			device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
@@ -4423,6 +4485,7 @@ static struct data_cmd drbd_cmd_handler[] = {
 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
+	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
 };
 };
 
 
 static void drbdd(struct drbd_connection *connection)
 static void drbdd(struct drbd_connection *connection)
@@ -4630,6 +4693,7 @@ static int drbd_send_features(struct drbd_connection *connection)
 	memset(p, 0, sizeof(*p));
 	memset(p, 0, sizeof(*p));
 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
+	p->feature_flags = cpu_to_be32(PRO_FEATURES);
 	return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
 	return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
 }
 }
 
 
@@ -4683,10 +4747,14 @@ static int drbd_do_features(struct drbd_connection *connection)
 		goto incompat;
 		goto incompat;
 
 
 	connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
 	connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
+	connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
 
 
 	drbd_info(connection, "Handshake successful: "
 	drbd_info(connection, "Handshake successful: "
 	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
 	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
 
 
+	drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
+		  connection->agreed_features & FF_TRIM ? " " : " not ");
+
 	return 1;
 	return 1;
 
 
  incompat:
  incompat:
@@ -4778,6 +4846,12 @@ static int drbd_do_auth(struct drbd_connection *connection)
 		goto fail;
 		goto fail;
 	}
 	}
 
 
+	if (pi.size < CHALLENGE_LEN) {
+		drbd_err(connection, "AuthChallenge payload too small.\n");
+		rv = -1;
+		goto fail;
+	}
+
 	peers_ch = kmalloc(pi.size, GFP_NOIO);
 	peers_ch = kmalloc(pi.size, GFP_NOIO);
 	if (peers_ch == NULL) {
 	if (peers_ch == NULL) {
 		drbd_err(connection, "kmalloc of peers_ch failed\n");
 		drbd_err(connection, "kmalloc of peers_ch failed\n");
@@ -4791,6 +4865,12 @@ static int drbd_do_auth(struct drbd_connection *connection)
 		goto fail;
 		goto fail;
 	}
 	}
 
 
+	if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
+		drbd_err(connection, "Peer presented the same challenge!\n");
+		rv = -1;
+		goto fail;
+	}
+
 	resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
 	resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
 	response = kmalloc(resp_size, GFP_NOIO);
 	response = kmalloc(resp_size, GFP_NOIO);
 	if (response == NULL) {
 	if (response == NULL) {

+ 60 - 14
drivers/block/drbd/drbd_req.c

@@ -522,6 +522,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
 		mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
 		break;
 		break;
 
 
+	case DISCARD_COMPLETED_NOTSUPP:
+	case DISCARD_COMPLETED_WITH_ERROR:
+		/* I'd rather not detach from local disk just because it
+		 * failed a REQ_DISCARD. */
+		mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
+		break;
+
 	case QUEUE_FOR_NET_READ:
 	case QUEUE_FOR_NET_READ:
 		/* READ or READA, and
 		/* READ or READA, and
 		 * no local disk,
 		 * no local disk,
@@ -1235,6 +1242,7 @@ void do_submit(struct work_struct *ws)
 		if (list_empty(&incoming))
 		if (list_empty(&incoming))
 			break;
 			break;
 
 
+skip_fast_path:
 		wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending));
 		wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending));
 		/* Maybe more was queued, while we prepared the transaction?
 		/* Maybe more was queued, while we prepared the transaction?
 		 * Try to stuff them into this transaction as well.
 		 * Try to stuff them into this transaction as well.
@@ -1273,6 +1281,25 @@ void do_submit(struct work_struct *ws)
 			list_del_init(&req->tl_requests);
 			list_del_init(&req->tl_requests);
 			drbd_send_and_submit(device, req);
 			drbd_send_and_submit(device, req);
 		}
 		}
+
+		/* If all currently hot activity log extents are kept busy by
+		 * incoming requests, we still must not totally starve new
+		 * requests to cold extents. In that case, prepare one request
+		 * in blocking mode. */
+		list_for_each_entry_safe(req, tmp, &incoming, tl_requests) {
+			list_del_init(&req->tl_requests);
+			req->rq_state |= RQ_IN_ACT_LOG;
+			if (!drbd_al_begin_io_prepare(device, &req->i)) {
+				/* Corresponding extent was hot after all? */
+				drbd_send_and_submit(device, req);
+			} else {
+				/* Found a request to a cold extent.
+				 * Put on "pending" list,
+				 * and try to cumulate with more. */
+				list_add(&req->tl_requests, &pending);
+				goto skip_fast_path;
+			}
+		}
 	}
 	}
 }
 }
 
 
@@ -1326,23 +1353,35 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
 	return limit;
 	return limit;
 }
 }
 
 
-static struct drbd_request *find_oldest_request(struct drbd_connection *connection)
+static void find_oldest_requests(
+		struct drbd_connection *connection,
+		struct drbd_device *device,
+		struct drbd_request **oldest_req_waiting_for_peer,
+		struct drbd_request **oldest_req_waiting_for_disk)
 {
 {
-	/* Walk the transfer log,
-	 * and find the oldest not yet completed request */
 	struct drbd_request *r;
 	struct drbd_request *r;
+	*oldest_req_waiting_for_peer = NULL;
+	*oldest_req_waiting_for_disk = NULL;
 	list_for_each_entry(r, &connection->transfer_log, tl_requests) {
 	list_for_each_entry(r, &connection->transfer_log, tl_requests) {
-		if (atomic_read(&r->completion_ref))
-			return r;
+		const unsigned s = r->rq_state;
+		if (!*oldest_req_waiting_for_peer
+		&& ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE)))
+			*oldest_req_waiting_for_peer = r;
+
+		if (!*oldest_req_waiting_for_disk
+		&& (s & RQ_LOCAL_PENDING) && r->device == device)
+			*oldest_req_waiting_for_disk = r;
+
+		if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk)
+			break;
 	}
 	}
-	return NULL;
 }
 }
 
 
 void request_timer_fn(unsigned long data)
 void request_timer_fn(unsigned long data)
 {
 {
 	struct drbd_device *device = (struct drbd_device *) data;
 	struct drbd_device *device = (struct drbd_device *) data;
 	struct drbd_connection *connection = first_peer_device(device)->connection;
 	struct drbd_connection *connection = first_peer_device(device)->connection;
-	struct drbd_request *req; /* oldest request */
+	struct drbd_request *req_disk, *req_peer; /* oldest request */
 	struct net_conf *nc;
 	struct net_conf *nc;
 	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
 	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
 	unsigned long now;
 	unsigned long now;
@@ -1366,8 +1405,8 @@ void request_timer_fn(unsigned long data)
 	now = jiffies;
 	now = jiffies;
 
 
 	spin_lock_irq(&device->resource->req_lock);
 	spin_lock_irq(&device->resource->req_lock);
-	req = find_oldest_request(connection);
-	if (!req) {
+	find_oldest_requests(connection, device, &req_peer, &req_disk);
+	if (req_peer == NULL && req_disk == NULL) {
 		spin_unlock_irq(&device->resource->req_lock);
 		spin_unlock_irq(&device->resource->req_lock);
 		mod_timer(&device->request_timer, now + et);
 		mod_timer(&device->request_timer, now + et);
 		return;
 		return;
@@ -1389,19 +1428,26 @@ void request_timer_fn(unsigned long data)
 	 * ~198 days with 250 HZ, we have a window where the timeout would need
 	 * ~198 days with 250 HZ, we have a window where the timeout would need
 	 * to expire twice (worst case) to become effective. Good enough.
 	 * to expire twice (worst case) to become effective. Good enough.
 	 */
 	 */
-	if (ent && req->rq_state & RQ_NET_PENDING &&
-		 time_after(now, req->start_time + ent) &&
+	if (ent && req_peer &&
+		 time_after(now, req_peer->start_time + ent) &&
 		!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
 		!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
 		drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
 		drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
 		_drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
 		_drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
 	}
 	}
-	if (dt && req->rq_state & RQ_LOCAL_PENDING && req->device == device &&
-		 time_after(now, req->start_time + dt) &&
+	if (dt && req_disk &&
+		 time_after(now, req_disk->start_time + dt) &&
 		!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
 		!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
 		drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
 		drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
 		__drbd_chk_io_error(device, DRBD_FORCE_DETACH);
 		__drbd_chk_io_error(device, DRBD_FORCE_DETACH);
 	}
 	}
-	nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
+
+	/* Reschedule timer for the nearest not already expired timeout.
+	 * Fallback to now + min(effective network timeout, disk timeout). */
+	ent = (ent && req_peer && time_before(now, req_peer->start_time + ent))
+		? req_peer->start_time + ent : now + et;
+	dt = (dt && req_disk && time_before(now, req_disk->start_time + dt))
+		? req_disk->start_time + dt : now + et;
+	nt = time_before(ent, dt) ? ent : dt;
 	spin_unlock_irq(&connection->resource->req_lock);
 	spin_unlock_irq(&connection->resource->req_lock);
 	mod_timer(&device->request_timer, nt);
 	mod_timer(&device->request_timer, nt);
 }
 }

+ 4 - 2
drivers/block/drbd/drbd_req.h

@@ -30,7 +30,6 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/drbd.h>
 #include <linux/drbd.h>
 #include "drbd_int.h"
 #include "drbd_int.h"
-#include "drbd_wrappers.h"
 
 
 /* The request callbacks will be called in irq context by the IDE drivers,
 /* The request callbacks will be called in irq context by the IDE drivers,
    and in Softirqs/Tasklets/BH context by the SCSI drivers,
    and in Softirqs/Tasklets/BH context by the SCSI drivers,
@@ -111,11 +110,14 @@ enum drbd_req_event {
 	BARRIER_ACKED, /* in protocol A and B */
 	BARRIER_ACKED, /* in protocol A and B */
 	DATA_RECEIVED, /* (remote read) */
 	DATA_RECEIVED, /* (remote read) */
 
 
+	COMPLETED_OK,
 	READ_COMPLETED_WITH_ERROR,
 	READ_COMPLETED_WITH_ERROR,
 	READ_AHEAD_COMPLETED_WITH_ERROR,
 	READ_AHEAD_COMPLETED_WITH_ERROR,
 	WRITE_COMPLETED_WITH_ERROR,
 	WRITE_COMPLETED_WITH_ERROR,
+	DISCARD_COMPLETED_NOTSUPP,
+	DISCARD_COMPLETED_WITH_ERROR,
+
 	ABORT_DISK_IO,
 	ABORT_DISK_IO,
-	COMPLETED_OK,
 	RESEND,
 	RESEND,
 	FAIL_FROZEN_DISK_IO,
 	FAIL_FROZEN_DISK_IO,
 	RESTART_FROZEN_DISK_IO,
 	RESTART_FROZEN_DISK_IO,

+ 20 - 18
drivers/block/drbd/drbd_state.c

@@ -54,8 +54,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state);
 static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state);
 static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
 static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
 static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
 static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
-static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns,
-				       enum sanitize_state_warnings *warn);
+static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
+				       union drbd_state ns, enum sanitize_state_warnings *warn);
 
 
 static inline bool is_susp(union drbd_state s)
 static inline bool is_susp(union drbd_state s)
 {
 {
@@ -287,7 +287,7 @@ _req_st_cond(struct drbd_device *device, union drbd_state mask,
 
 
 	spin_lock_irqsave(&device->resource->req_lock, flags);
 	spin_lock_irqsave(&device->resource->req_lock, flags);
 	os = drbd_read_state(device);
 	os = drbd_read_state(device);
-	ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL);
+	ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
 	rv = is_valid_transition(os, ns);
 	rv = is_valid_transition(os, ns);
 	if (rv >= SS_SUCCESS)
 	if (rv >= SS_SUCCESS)
 		rv = SS_UNKNOWN_ERROR;  /* cont waiting, otherwise fail. */
 		rv = SS_UNKNOWN_ERROR;  /* cont waiting, otherwise fail. */
@@ -333,7 +333,7 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
 
 
 	spin_lock_irqsave(&device->resource->req_lock, flags);
 	spin_lock_irqsave(&device->resource->req_lock, flags);
 	os = drbd_read_state(device);
 	os = drbd_read_state(device);
-	ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL);
+	ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
 	rv = is_valid_transition(os, ns);
 	rv = is_valid_transition(os, ns);
 	if (rv < SS_SUCCESS) {
 	if (rv < SS_SUCCESS) {
 		spin_unlock_irqrestore(&device->resource->req_lock, flags);
 		spin_unlock_irqrestore(&device->resource->req_lock, flags);
@@ -740,8 +740,8 @@ static void print_sanitize_warnings(struct drbd_device *device, enum sanitize_st
  * When we loose connection, we have to set the state of the peers disk (pdsk)
  * When we loose connection, we have to set the state of the peers disk (pdsk)
  * to D_UNKNOWN. This rule and many more along those lines are in this function.
  * to D_UNKNOWN. This rule and many more along those lines are in this function.
  */
  */
-static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns,
-				       enum sanitize_state_warnings *warn)
+static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
+				       union drbd_state ns, enum sanitize_state_warnings *warn)
 {
 {
 	enum drbd_fencing_p fp;
 	enum drbd_fencing_p fp;
 	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
 	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
@@ -882,11 +882,13 @@ static union drbd_state sanitize_state(struct drbd_device *device, union drbd_st
 	}
 	}
 
 
 	if (fp == FP_STONITH &&
 	if (fp == FP_STONITH &&
-	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED))
+	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
+	    !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
 		ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
 		ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
 
 
 	if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO &&
 	if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO &&
-	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
+	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
+	    !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
 		ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
 		ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
 
 
 	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
 	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
@@ -958,7 +960,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
 
 
 	os = drbd_read_state(device);
 	os = drbd_read_state(device);
 
 
-	ns = sanitize_state(device, ns, &ssw);
+	ns = sanitize_state(device, os, ns, &ssw);
 	if (ns.i == os.i)
 	if (ns.i == os.i)
 		return SS_NOTHING_TO_DO;
 		return SS_NOTHING_TO_DO;
 
 
@@ -1656,7 +1658,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma
 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 		struct drbd_device *device = peer_device->device;
 		struct drbd_device *device = peer_device->device;
 		os = drbd_read_state(device);
 		os = drbd_read_state(device);
-		ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL);
+		ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
 
 
 		if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
 		if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
 			ns.disk = os.disk;
 			ns.disk = os.disk;
@@ -1718,7 +1720,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
 		number_of_volumes++;
 		number_of_volumes++;
 		os = drbd_read_state(device);
 		os = drbd_read_state(device);
 		ns = apply_mask_val(os, mask, val);
 		ns = apply_mask_val(os, mask, val);
-		ns = sanitize_state(device, ns, NULL);
+		ns = sanitize_state(device, os, ns, NULL);
 
 
 		if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
 		if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
 			ns.disk = os.disk;
 			ns.disk = os.disk;
@@ -1763,19 +1765,19 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
 static enum drbd_state_rv
 static enum drbd_state_rv
 _conn_rq_cond(struct drbd_connection *connection, union drbd_state mask, union drbd_state val)
 _conn_rq_cond(struct drbd_connection *connection, union drbd_state mask, union drbd_state val)
 {
 {
-	enum drbd_state_rv rv;
+	enum drbd_state_rv err, rv = SS_UNKNOWN_ERROR; /* continue waiting */;
 
 
 	if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &connection->flags))
 	if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &connection->flags))
-		return SS_CW_SUCCESS;
+		rv = SS_CW_SUCCESS;
 
 
 	if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &connection->flags))
 	if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &connection->flags))
-		return SS_CW_FAILED_BY_PEER;
+		rv = SS_CW_FAILED_BY_PEER;
 
 
-	rv = conn_is_valid_transition(connection, mask, val, 0);
-	if (rv == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS)
-		rv = SS_UNKNOWN_ERROR; /* continue waiting */
+	err = conn_is_valid_transition(connection, mask, val, 0);
+	if (err == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS)
+		return rv;
 
 
-	return rv;
+	return err;
 }
 }
 
 
 enum drbd_state_rv
 enum drbd_state_rv

+ 55 - 52
drivers/block/drbd/drbd_worker.c

@@ -118,7 +118,7 @@ static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __rele
 
 
 /* writes on behalf of the partner, or resync writes,
 /* writes on behalf of the partner, or resync writes,
  * "submitted" by the receiver, final stage.  */
  * "submitted" by the receiver, final stage.  */
-static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
+void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
 {
 {
 	unsigned long flags = 0;
 	unsigned long flags = 0;
 	struct drbd_peer_device *peer_device = peer_req->peer_device;
 	struct drbd_peer_device *peer_device = peer_req->peer_device;
@@ -150,7 +150,9 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel
 
 
 	do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
 	do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
 
 
-	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
+	/* FIXME do we want to detach for failed REQ_DISCARD?
+	 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
+	if (peer_req->flags & EE_WAS_ERROR)
 		__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
 		__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
 
 
@@ -176,10 +178,12 @@ void drbd_peer_request_endio(struct bio *bio, int error)
 	struct drbd_device *device = peer_req->peer_device->device;
 	struct drbd_device *device = peer_req->peer_device->device;
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
 	int is_write = bio_data_dir(bio) == WRITE;
 	int is_write = bio_data_dir(bio) == WRITE;
+	int is_discard = !!(bio->bi_rw & REQ_DISCARD);
 
 
 	if (error && __ratelimit(&drbd_ratelimit_state))
 	if (error && __ratelimit(&drbd_ratelimit_state))
 		drbd_warn(device, "%s: error=%d s=%llus\n",
 		drbd_warn(device, "%s: error=%d s=%llus\n",
-				is_write ? "write" : "read", error,
+				is_write ? (is_discard ? "discard" : "write")
+					: "read", error,
 				(unsigned long long)peer_req->i.sector);
 				(unsigned long long)peer_req->i.sector);
 	if (!error && !uptodate) {
 	if (!error && !uptodate) {
 		if (__ratelimit(&drbd_ratelimit_state))
 		if (__ratelimit(&drbd_ratelimit_state))
@@ -263,7 +267,12 @@ void drbd_request_endio(struct bio *bio, int error)
 
 
 	/* to avoid recursion in __req_mod */
 	/* to avoid recursion in __req_mod */
 	if (unlikely(error)) {
 	if (unlikely(error)) {
-		what = (bio_data_dir(bio) == WRITE)
+		if (bio->bi_rw & REQ_DISCARD)
+			what = (error == -EOPNOTSUPP)
+				? DISCARD_COMPLETED_NOTSUPP
+				: DISCARD_COMPLETED_WITH_ERROR;
+		else
+			what = (bio_data_dir(bio) == WRITE)
 			? WRITE_COMPLETED_WITH_ERROR
 			? WRITE_COMPLETED_WITH_ERROR
 			: (bio_rw(bio) == READ)
 			: (bio_rw(bio) == READ)
 			  ? READ_COMPLETED_WITH_ERROR
 			  ? READ_COMPLETED_WITH_ERROR
@@ -395,7 +404,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
 	/* GFP_TRY, because if there is no memory available right now, this may
 	/* GFP_TRY, because if there is no memory available right now, this may
 	 * be rescheduled for later. It is "only" background resync, after all. */
 	 * be rescheduled for later. It is "only" background resync, after all. */
 	peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
 	peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
-				       size, GFP_TRY);
+				       size, true /* has real payload */, GFP_TRY);
 	if (!peer_req)
 	if (!peer_req)
 		goto defer;
 		goto defer;
 
 
@@ -492,10 +501,9 @@ struct fifo_buffer *fifo_alloc(int fifo_size)
 	return fb;
 	return fb;
 }
 }
 
 
-static int drbd_rs_controller(struct drbd_device *device)
+static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
 {
 {
 	struct disk_conf *dc;
 	struct disk_conf *dc;
-	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
 	unsigned int want;     /* The number of sectors we want in the proxy */
 	unsigned int want;     /* The number of sectors we want in the proxy */
 	int req_sect; /* Number of sectors to request in this turn */
 	int req_sect; /* Number of sectors to request in this turn */
 	int correction; /* Number of sectors more we need in the proxy*/
 	int correction; /* Number of sectors more we need in the proxy*/
@@ -505,9 +513,6 @@ static int drbd_rs_controller(struct drbd_device *device)
 	int max_sect;
 	int max_sect;
 	struct fifo_buffer *plan;
 	struct fifo_buffer *plan;
 
 
-	sect_in = atomic_xchg(&device->rs_sect_in, 0); /* Number of sectors that came in */
-	device->rs_in_flight -= sect_in;
-
 	dc = rcu_dereference(device->ldev->disk_conf);
 	dc = rcu_dereference(device->ldev->disk_conf);
 	plan = rcu_dereference(device->rs_plan_s);
 	plan = rcu_dereference(device->rs_plan_s);
 
 
@@ -550,11 +555,16 @@ static int drbd_rs_controller(struct drbd_device *device)
 
 
 static int drbd_rs_number_requests(struct drbd_device *device)
 static int drbd_rs_number_requests(struct drbd_device *device)
 {
 {
-	int number;
+	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
+	int number, mxb;
+
+	sect_in = atomic_xchg(&device->rs_sect_in, 0);
+	device->rs_in_flight -= sect_in;
 
 
 	rcu_read_lock();
 	rcu_read_lock();
+	mxb = drbd_get_max_buffers(device) / 2;
 	if (rcu_dereference(device->rs_plan_s)->size) {
 	if (rcu_dereference(device->rs_plan_s)->size) {
-		number = drbd_rs_controller(device) >> (BM_BLOCK_SHIFT - 9);
+		number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
 		device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
 		device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
 	} else {
 	} else {
 		device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
 		device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
@@ -562,8 +572,14 @@ static int drbd_rs_number_requests(struct drbd_device *device)
 	}
 	}
 	rcu_read_unlock();
 	rcu_read_unlock();
 
 
-	/* ignore the amount of pending requests, the resync controller should
-	 * throttle down to incoming reply rate soon enough anyways. */
+	/* Don't have more than "max-buffers"/2 in-flight.
+	 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
+	 * potentially causing a distributed deadlock on congestion during
+	 * online-verify or (checksum-based) resync, if max-buffers,
+	 * socket buffer sizes and resync rate settings are mis-configured. */
+	if (mxb - device->rs_in_flight < number)
+		number = mxb - device->rs_in_flight;
+
 	return number;
 	return number;
 }
 }
 
 
@@ -597,7 +613,7 @@ static int make_resync_request(struct drbd_device *device, int cancel)
 
 
 	max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
 	max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
 	number = drbd_rs_number_requests(device);
 	number = drbd_rs_number_requests(device);
-	if (number == 0)
+	if (number <= 0)
 		goto requeue;
 		goto requeue;
 
 
 	for (i = 0; i < number; i++) {
 	for (i = 0; i < number; i++) {
@@ -647,7 +663,7 @@ next_sector:
 		 */
 		 */
 		align = 1;
 		align = 1;
 		rollback_i = i;
 		rollback_i = i;
-		for (;;) {
+		while (i < number) {
 			if (size + BM_BLOCK_SIZE > max_bio_size)
 			if (size + BM_BLOCK_SIZE > max_bio_size)
 				break;
 				break;
 
 
@@ -1670,11 +1686,15 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 	}
 	}
 	clear_bit(B_RS_H_DONE, &device->flags);
 	clear_bit(B_RS_H_DONE, &device->flags);
 
 
-	write_lock_irq(&global_state_lock);
+	/* req_lock: serialize with drbd_send_and_submit() and others
+	 * global_state_lock: for stable sync-after dependencies */
+	spin_lock_irq(&device->resource->req_lock);
+	write_lock(&global_state_lock);
 	/* Did some connection breakage or IO error race with us? */
 	/* Did some connection breakage or IO error race with us? */
 	if (device->state.conn < C_CONNECTED
 	if (device->state.conn < C_CONNECTED
 	|| !get_ldev_if_state(device, D_NEGOTIATING)) {
 	|| !get_ldev_if_state(device, D_NEGOTIATING)) {
-		write_unlock_irq(&global_state_lock);
+		write_unlock(&global_state_lock);
+		spin_unlock_irq(&device->resource->req_lock);
 		mutex_unlock(device->state_mutex);
 		mutex_unlock(device->state_mutex);
 		return;
 		return;
 	}
 	}
@@ -1714,7 +1734,8 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 		}
 		}
 		_drbd_pause_after(device);
 		_drbd_pause_after(device);
 	}
 	}
-	write_unlock_irq(&global_state_lock);
+	write_unlock(&global_state_lock);
+	spin_unlock_irq(&device->resource->req_lock);
 
 
 	if (r == SS_SUCCESS) {
 	if (r == SS_SUCCESS) {
 		/* reset rs_last_bcast when a resync or verify is started,
 		/* reset rs_last_bcast when a resync or verify is started,
@@ -1778,34 +1799,6 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 	mutex_unlock(device->state_mutex);
 	mutex_unlock(device->state_mutex);
 }
 }
 
 
-/* If the resource already closed the current epoch, but we did not
- * (because we have not yet seen new requests), we should send the
- * corresponding barrier now.  Must be checked within the same spinlock
- * that is used to check for new requests. */
-static bool need_to_send_barrier(struct drbd_connection *connection)
-{
-	if (!connection->send.seen_any_write_yet)
-		return false;
-
-	/* Skip barriers that do not contain any writes.
-	 * This may happen during AHEAD mode. */
-	if (!connection->send.current_epoch_writes)
-		return false;
-
-	/* ->req_lock is held when requests are queued on
-	 * connection->sender_work, and put into ->transfer_log.
-	 * It is also held when ->current_tle_nr is increased.
-	 * So either there are already new requests queued,
-	 * and corresponding barriers will be send there.
-	 * Or nothing new is queued yet, so the difference will be 1.
-	 */
-	if (atomic_read(&connection->current_tle_nr) !=
-	    connection->send.current_epoch_nr + 1)
-		return false;
-
-	return true;
-}
-
 static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
 static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
 {
 {
 	spin_lock_irq(&queue->q_lock);
 	spin_lock_irq(&queue->q_lock);
@@ -1864,12 +1857,22 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
 			spin_unlock_irq(&connection->resource->req_lock);
 			spin_unlock_irq(&connection->resource->req_lock);
 			break;
 			break;
 		}
 		}
-		send_barrier = need_to_send_barrier(connection);
+
+		/* We found nothing new to do, no to-be-communicated request,
+		 * no other work item.  We may still need to close the last
+		 * epoch.  Next incoming request epoch will be connection ->
+		 * current transfer log epoch number.  If that is different
+		 * from the epoch of the last request we communicated, it is
+		 * safe to send the epoch separating barrier now.
+		 */
+		send_barrier =
+			atomic_read(&connection->current_tle_nr) !=
+			connection->send.current_epoch_nr;
 		spin_unlock_irq(&connection->resource->req_lock);
 		spin_unlock_irq(&connection->resource->req_lock);
-		if (send_barrier) {
-			drbd_send_barrier(connection);
-			connection->send.current_epoch_nr++;
-		}
+
+		if (send_barrier)
+			maybe_send_barrier(connection,
+					connection->send.current_epoch_nr + 1);
 		schedule();
 		schedule();
 		/* may be woken up for other things but new work, too,
 		/* may be woken up for other things but new work, too,
 		 * e.g. if the current epoch got closed.
 		 * e.g. if the current epoch got closed.

+ 0 - 54
drivers/block/drbd/drbd_wrappers.h

@@ -1,54 +0,0 @@
-#ifndef _DRBD_WRAPPERS_H
-#define _DRBD_WRAPPERS_H
-
-#include <linux/ctype.h>
-#include <linux/mm.h>
-#include "drbd_int.h"
-
-/* see get_sb_bdev and bd_claim */
-extern char *drbd_sec_holder;
-
-/* sets the number of 512 byte sectors of our virtual device */
-static inline void drbd_set_my_capacity(struct drbd_device *device,
-					sector_t size)
-{
-	/* set_capacity(device->this_bdev->bd_disk, size); */
-	set_capacity(device->vdisk, size);
-	device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
-}
-
-#define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE)
-
-/* bi_end_io handlers */
-extern void drbd_md_io_complete(struct bio *bio, int error);
-extern void drbd_peer_request_endio(struct bio *bio, int error);
-extern void drbd_request_endio(struct bio *bio, int error);
-
-/*
- * used to submit our private bio
- */
-static inline void drbd_generic_make_request(struct drbd_device *device,
-					     int fault_type, struct bio *bio)
-{
-	__release(local);
-	if (!bio->bi_bdev) {
-		printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
-				"bio->bi_bdev == NULL\n",
-		       device_to_minor(device));
-		dump_stack();
-		bio_endio(bio, -ENODEV);
-		return;
-	}
-
-	if (drbd_insert_fault(device, fault_type))
-		bio_endio(bio, -EIO);
-	else
-		generic_make_request(bio);
-}
-
-#ifndef __CHECKER__
-# undef __cond_lock
-# define __cond_lock(x,c) (c)
-#endif
-
-#endif

+ 1 - 1
drivers/block/floppy.c

@@ -3812,7 +3812,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
 	bio.bi_iter.bi_size = size;
 	bio.bi_iter.bi_size = size;
 	bio.bi_bdev = bdev;
 	bio.bi_bdev = bdev;
 	bio.bi_iter.bi_sector = 0;
 	bio.bi_iter.bi_sector = 0;
-	bio.bi_flags = (1 << BIO_QUIET);
+	bio.bi_flags |= (1 << BIO_QUIET);
 	bio.bi_private = &cbdata;
 	bio.bi_private = &cbdata;
 	bio.bi_end_io = floppy_rb0_cb;
 	bio.bi_end_io = floppy_rb0_cb;
 
 

+ 435 - 652
drivers/block/mtip32xx/mtip32xx.c

@@ -31,6 +31,7 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/genhd.h>
 #include <linux/genhd.h>
 #include <linux/blkdev.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/bio.h>
 #include <linux/bio.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma-mapping.h>
 #include <linux/idr.h>
 #include <linux/idr.h>
@@ -173,60 +174,34 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
 	return false; /* device present */
 	return false; /* device present */
 }
 }
 
 
-/*
- * Obtain an empty command slot.
- *
- * This function needs to be reentrant since it could be called
- * at the same time on multiple CPUs. The allocation of the
- * command slot must be atomic.
- *
- * @port Pointer to the port data structure.
- *
- * return value
- *	>= 0	Index of command slot obtained.
- *	-1	No command slots available.
- */
-static int get_slot(struct mtip_port *port)
+static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
 {
 {
-	int slot, i;
-	unsigned int num_command_slots = port->dd->slot_groups * 32;
+	struct request *rq;
 
 
-	/*
-	 * Try 10 times, because there is a small race here.
-	 *  that's ok, because it's still cheaper than a lock.
-	 *
-	 * Race: Since this section is not protected by lock, same bit
-	 * could be chosen by different process contexts running in
-	 * different processor. So instead of costly lock, we are going
-	 * with loop.
-	 */
-	for (i = 0; i < 10; i++) {
-		slot = find_next_zero_bit(port->allocated,
-					 num_command_slots, 1);
-		if ((slot < num_command_slots) &&
-		    (!test_and_set_bit(slot, port->allocated)))
-			return slot;
-	}
-	dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n");
+	rq = blk_mq_alloc_request(dd->queue, 0, __GFP_WAIT, true);
+	return blk_mq_rq_to_pdu(rq);
+}
 
 
-	mtip_check_surprise_removal(port->dd->pdev);
-	return -1;
+static void mtip_put_int_command(struct driver_data *dd, struct mtip_cmd *cmd)
+{
+	blk_put_request(blk_mq_rq_from_pdu(cmd));
 }
 }
 
 
 /*
 /*
- * Release a command slot.
- *
- * @port Pointer to the port data structure.
- * @tag  Tag of command to release
- *
- * return value
- *	None
+ * Once we add support for one hctx per mtip group, this will change a bit
  */
  */
-static inline void release_slot(struct mtip_port *port, int tag)
+static struct request *mtip_rq_from_tag(struct driver_data *dd,
+					unsigned int tag)
+{
+	return blk_mq_tag_to_rq(dd->queue->queue_hw_ctx[0], tag);
+}
+
+static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd,
+					  unsigned int tag)
 {
 {
-	smp_mb__before_clear_bit();
-	clear_bit(tag, port->allocated);
-	smp_mb__after_clear_bit();
+	struct request *rq = mtip_rq_from_tag(dd, tag);
+
+	return blk_mq_rq_to_pdu(rq);
 }
 }
 
 
 /*
 /*
@@ -248,93 +223,28 @@ static inline void release_slot(struct mtip_port *port, int tag)
  *	None
  *	None
  */
  */
 static void mtip_async_complete(struct mtip_port *port,
 static void mtip_async_complete(struct mtip_port *port,
-				int tag,
-				void *data,
-				int status)
+				int tag, struct mtip_cmd *cmd, int status)
 {
 {
-	struct mtip_cmd *cmd;
-	struct driver_data *dd = data;
-	int unaligned, cb_status = status ? -EIO : 0;
-	void (*func)(void *, int);
+	struct driver_data *dd = port->dd;
+	struct request *rq;
 
 
 	if (unlikely(!dd) || unlikely(!port))
 	if (unlikely(!dd) || unlikely(!port))
 		return;
 		return;
 
 
-	cmd = &port->commands[tag];
-
 	if (unlikely(status == PORT_IRQ_TF_ERR)) {
 	if (unlikely(status == PORT_IRQ_TF_ERR)) {
 		dev_warn(&port->dd->pdev->dev,
 		dev_warn(&port->dd->pdev->dev,
 			"Command tag %d failed due to TFE\n", tag);
 			"Command tag %d failed due to TFE\n", tag);
 	}
 	}
 
 
-	/* Clear the active flag */
-	atomic_set(&port->commands[tag].active, 0);
+	/* Unmap the DMA scatter list entries */
+	dma_unmap_sg(&dd->pdev->dev, cmd->sg, cmd->scatter_ents, cmd->direction);
 
 
-	/* Upper layer callback */
-	func = cmd->async_callback;
-	if (likely(func && cmpxchg(&cmd->async_callback, func, 0) == func)) {
+	rq = mtip_rq_from_tag(dd, tag);
 
 
-		/* Unmap the DMA scatter list entries */
-		dma_unmap_sg(&dd->pdev->dev,
-			cmd->sg,
-			cmd->scatter_ents,
-			cmd->direction);
+	if (unlikely(cmd->unaligned))
+		up(&port->cmd_slot_unal);
 
 
-		func(cmd->async_data, cb_status);
-		unaligned = cmd->unaligned;
-
-		/* Clear the allocated bit for the command */
-		release_slot(port, tag);
-
-		if (unlikely(unaligned))
-			up(&port->cmd_slot_unal);
-		else
-			up(&port->cmd_slot);
-	}
-}
-
-/*
- * This function is called for clean the pending command in the
- * command slot during the surprise removal of device and return
- * error to the upper layer.
- *
- * @dd Pointer to the DRIVER_DATA structure.
- *
- * return value
- *	None
- */
-static void mtip_command_cleanup(struct driver_data *dd)
-{
-	int tag = 0;
-	struct mtip_cmd *cmd;
-	struct mtip_port *port = dd->port;
-	unsigned int num_cmd_slots = dd->slot_groups * 32;
-
-	if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
-		return;
-
-	if (!port)
-		return;
-
-	cmd = &port->commands[MTIP_TAG_INTERNAL];
-	if (atomic_read(&cmd->active))
-		if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) &
-					(1 << MTIP_TAG_INTERNAL))
-			if (cmd->comp_func)
-				cmd->comp_func(port, MTIP_TAG_INTERNAL,
-					 cmd->comp_data, -ENODEV);
-
-	while (1) {
-		tag = find_next_bit(port->allocated, num_cmd_slots, tag);
-		if (tag >= num_cmd_slots)
-			break;
-
-		cmd = &port->commands[tag];
-		if (atomic_read(&cmd->active))
-			mtip_async_complete(port, tag, dd, -ENODEV);
-	}
-
-	set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag);
+	blk_mq_end_io(rq, status ? -EIO : 0);
 }
 }
 
 
 /*
 /*
@@ -388,8 +298,6 @@ static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
 {
 {
 	int group = tag >> 5;
 	int group = tag >> 5;
 
 
-	atomic_set(&port->commands[tag].active, 1);
-
 	/* guard SACT and CI registers */
 	/* guard SACT and CI registers */
 	spin_lock(&port->cmd_issue_lock[group]);
 	spin_lock(&port->cmd_issue_lock[group]);
 	writel((1 << MTIP_TAG_BIT(tag)),
 	writel((1 << MTIP_TAG_BIT(tag)),
@@ -397,10 +305,6 @@ static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
 	writel((1 << MTIP_TAG_BIT(tag)),
 	writel((1 << MTIP_TAG_BIT(tag)),
 			port->cmd_issue[MTIP_TAG_INDEX(tag)]);
 			port->cmd_issue[MTIP_TAG_INDEX(tag)]);
 	spin_unlock(&port->cmd_issue_lock[group]);
 	spin_unlock(&port->cmd_issue_lock[group]);
-
-	/* Set the command's timeout value.*/
-	port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
-					MTIP_NCQ_COMMAND_TIMEOUT_MS);
 }
 }
 
 
 /*
 /*
@@ -648,131 +552,12 @@ static void print_tags(struct driver_data *dd,
 
 
 	memset(tagmap, 0, sizeof(tagmap));
 	memset(tagmap, 0, sizeof(tagmap));
 	for (group = SLOTBITS_IN_LONGS; group > 0; group--)
 	for (group = SLOTBITS_IN_LONGS; group > 0; group--)
-		tagmap_len = sprintf(tagmap + tagmap_len, "%016lX ",
+		tagmap_len += sprintf(tagmap + tagmap_len, "%016lX ",
 						tagbits[group-1]);
 						tagbits[group-1]);
 	dev_warn(&dd->pdev->dev,
 	dev_warn(&dd->pdev->dev,
 			"%d command(s) %s: tagmap [%s]", cnt, msg, tagmap);
 			"%d command(s) %s: tagmap [%s]", cnt, msg, tagmap);
 }
 }
 
 
-/*
- * Called periodically to see if any read/write commands are
- * taking too long to complete.
- *
- * @data Pointer to the PORT data structure.
- *
- * return value
- *	None
- */
-static void mtip_timeout_function(unsigned long int data)
-{
-	struct mtip_port *port = (struct mtip_port *) data;
-	struct host_to_dev_fis *fis;
-	struct mtip_cmd *cmd;
-	int unaligned, tag, cmdto_cnt = 0;
-	unsigned int bit, group;
-	unsigned int num_command_slots;
-	unsigned long to, tagaccum[SLOTBITS_IN_LONGS];
-	void (*func)(void *, int);
-
-	if (unlikely(!port))
-		return;
-
-	if (unlikely(port->dd->sr))
-		return;
-
-	if (test_bit(MTIP_DDF_RESUME_BIT, &port->dd->dd_flag)) {
-		mod_timer(&port->cmd_timer,
-			jiffies + msecs_to_jiffies(30000));
-		return;
-	}
-	/* clear the tag accumulator */
-	memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
-	num_command_slots = port->dd->slot_groups * 32;
-
-	for (tag = 0; tag < num_command_slots; tag++) {
-		/*
-		 * Skip internal command slot as it has
-		 * its own timeout mechanism
-		 */
-		if (tag == MTIP_TAG_INTERNAL)
-			continue;
-
-		if (atomic_read(&port->commands[tag].active) &&
-		   (time_after(jiffies, port->commands[tag].comp_time))) {
-			group = tag >> 5;
-			bit = tag & 0x1F;
-
-			cmd = &port->commands[tag];
-			fis = (struct host_to_dev_fis *) cmd->command;
-
-			set_bit(tag, tagaccum);
-			cmdto_cnt++;
-			if (cmdto_cnt == 1)
-				set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
-
-			/*
-			 * Clear the completed bit. This should prevent
-			 *  any interrupt handlers from trying to retire
-			 *  the command.
-			 */
-			writel(1 << bit, port->completed[group]);
-
-			/* Clear the active flag for the command */
-			atomic_set(&port->commands[tag].active, 0);
-
-			func = cmd->async_callback;
-			if (func &&
-			    cmpxchg(&cmd->async_callback, func, 0) == func) {
-
-				/* Unmap the DMA scatter list entries */
-				dma_unmap_sg(&port->dd->pdev->dev,
-						cmd->sg,
-						cmd->scatter_ents,
-						cmd->direction);
-
-				func(cmd->async_data, -EIO);
-				unaligned = cmd->unaligned;
-
-				/* Clear the allocated bit for the command. */
-				release_slot(port, tag);
-
-				if (unaligned)
-					up(&port->cmd_slot_unal);
-				else
-					up(&port->cmd_slot);
-			}
-		}
-	}
-
-	if (cmdto_cnt) {
-		print_tags(port->dd, "timed out", tagaccum, cmdto_cnt);
-		if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
-			mtip_device_reset(port->dd);
-			wake_up_interruptible(&port->svc_wait);
-		}
-		clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
-	}
-
-	if (port->ic_pause_timer) {
-		to  = port->ic_pause_timer + msecs_to_jiffies(1000);
-		if (time_after(jiffies, to)) {
-			if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
-				port->ic_pause_timer = 0;
-				clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
-				clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
-				clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
-				wake_up_interruptible(&port->svc_wait);
-			}
-
-
-		}
-	}
-
-	/* Restart the timer */
-	mod_timer(&port->cmd_timer,
-		jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
-}
-
 /*
 /*
  * Internal command completion callback function.
  * Internal command completion callback function.
  *
  *
@@ -789,28 +574,19 @@ static void mtip_timeout_function(unsigned long int data)
  *	None
  *	None
  */
  */
 static void mtip_completion(struct mtip_port *port,
 static void mtip_completion(struct mtip_port *port,
-			    int tag,
-			    void *data,
-			    int status)
+			    int tag, struct mtip_cmd *command, int status)
 {
 {
-	struct mtip_cmd *command = &port->commands[tag];
-	struct completion *waiting = data;
+	struct completion *waiting = command->comp_data;
 	if (unlikely(status == PORT_IRQ_TF_ERR))
 	if (unlikely(status == PORT_IRQ_TF_ERR))
 		dev_warn(&port->dd->pdev->dev,
 		dev_warn(&port->dd->pdev->dev,
 			"Internal command %d completed with TFE\n", tag);
 			"Internal command %d completed with TFE\n", tag);
 
 
-	command->async_callback = NULL;
-	command->comp_func = NULL;
-
 	complete(waiting);
 	complete(waiting);
 }
 }
 
 
 static void mtip_null_completion(struct mtip_port *port,
 static void mtip_null_completion(struct mtip_port *port,
-			    int tag,
-			    void *data,
-			    int status)
+			    int tag, struct mtip_cmd *command, int status)
 {
 {
-	return;
 }
 }
 
 
 static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
 static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
@@ -842,19 +618,16 @@ static void mtip_handle_tfe(struct driver_data *dd)
 
 
 	port = dd->port;
 	port = dd->port;
 
 
-	/* Stop the timer to prevent command timeouts. */
-	del_timer(&port->cmd_timer);
 	set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
 	set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
 
 
 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
 			test_bit(MTIP_TAG_INTERNAL, port->allocated)) {
 			test_bit(MTIP_TAG_INTERNAL, port->allocated)) {
-		cmd = &port->commands[MTIP_TAG_INTERNAL];
+		cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
 		dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
 		dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
 
 
-		atomic_inc(&cmd->active); /* active > 1 indicates error */
 		if (cmd->comp_data && cmd->comp_func) {
 		if (cmd->comp_data && cmd->comp_func) {
 			cmd->comp_func(port, MTIP_TAG_INTERNAL,
 			cmd->comp_func(port, MTIP_TAG_INTERNAL,
-					cmd->comp_data, PORT_IRQ_TF_ERR);
+					cmd, PORT_IRQ_TF_ERR);
 		}
 		}
 		goto handle_tfe_exit;
 		goto handle_tfe_exit;
 	}
 	}
@@ -866,6 +639,8 @@ static void mtip_handle_tfe(struct driver_data *dd)
 	for (group = 0; group < dd->slot_groups; group++) {
 	for (group = 0; group < dd->slot_groups; group++) {
 		completed = readl(port->completed[group]);
 		completed = readl(port->completed[group]);
 
 
+		dev_warn(&dd->pdev->dev, "g=%u, comp=%x\n", group, completed);
+
 		/* clear completed status register in the hardware.*/
 		/* clear completed status register in the hardware.*/
 		writel(completed, port->completed[group]);
 		writel(completed, port->completed[group]);
 
 
@@ -879,15 +654,11 @@ static void mtip_handle_tfe(struct driver_data *dd)
 			if (tag == MTIP_TAG_INTERNAL)
 			if (tag == MTIP_TAG_INTERNAL)
 				continue;
 				continue;
 
 
-			cmd = &port->commands[tag];
+			cmd = mtip_cmd_from_tag(dd, tag);
 			if (likely(cmd->comp_func)) {
 			if (likely(cmd->comp_func)) {
 				set_bit(tag, tagaccum);
 				set_bit(tag, tagaccum);
 				cmd_cnt++;
 				cmd_cnt++;
-				atomic_set(&cmd->active, 0);
-				cmd->comp_func(port,
-					 tag,
-					 cmd->comp_data,
-					 0);
+				cmd->comp_func(port, tag, cmd, 0);
 			} else {
 			} else {
 				dev_err(&port->dd->pdev->dev,
 				dev_err(&port->dd->pdev->dev,
 					"Missing completion func for tag %d",
 					"Missing completion func for tag %d",
@@ -947,11 +718,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 		for (bit = 0; bit < 32; bit++) {
 		for (bit = 0; bit < 32; bit++) {
 			reissue = 1;
 			reissue = 1;
 			tag = (group << 5) + bit;
 			tag = (group << 5) + bit;
-			cmd = &port->commands[tag];
-
-			/* If the active bit is set re-issue the command */
-			if (atomic_read(&cmd->active) == 0)
-				continue;
+			cmd = mtip_cmd_from_tag(dd, tag);
 
 
 			fis = (struct host_to_dev_fis *)cmd->command;
 			fis = (struct host_to_dev_fis *)cmd->command;
 
 
@@ -970,11 +737,9 @@ static void mtip_handle_tfe(struct driver_data *dd)
 					tag,
 					tag,
 					fail_reason != NULL ?
 					fail_reason != NULL ?
 						fail_reason : "unknown");
 						fail_reason : "unknown");
-					atomic_set(&cmd->active, 0);
 					if (cmd->comp_func) {
 					if (cmd->comp_func) {
 						cmd->comp_func(port, tag,
 						cmd->comp_func(port, tag,
-							cmd->comp_data,
-							-ENODATA);
+							cmd, -ENODATA);
 					}
 					}
 					continue;
 					continue;
 				}
 				}
@@ -997,14 +762,9 @@ static void mtip_handle_tfe(struct driver_data *dd)
 			/* Retire a command that will not be reissued */
 			/* Retire a command that will not be reissued */
 			dev_warn(&port->dd->pdev->dev,
 			dev_warn(&port->dd->pdev->dev,
 				"retiring tag %d\n", tag);
 				"retiring tag %d\n", tag);
-			atomic_set(&cmd->active, 0);
 
 
 			if (cmd->comp_func)
 			if (cmd->comp_func)
-				cmd->comp_func(
-					port,
-					tag,
-					cmd->comp_data,
-					PORT_IRQ_TF_ERR);
+				cmd->comp_func(port, tag, cmd, PORT_IRQ_TF_ERR);
 			else
 			else
 				dev_warn(&port->dd->pdev->dev,
 				dev_warn(&port->dd->pdev->dev,
 					"Bad completion for tag %d\n",
 					"Bad completion for tag %d\n",
@@ -1017,9 +777,6 @@ handle_tfe_exit:
 	/* clear eh_active */
 	/* clear eh_active */
 	clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
 	clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
 	wake_up_interruptible(&port->svc_wait);
 	wake_up_interruptible(&port->svc_wait);
-
-	mod_timer(&port->cmd_timer,
-		 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
 }
 }
 
 
 /*
 /*
@@ -1048,15 +805,10 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
 			if (unlikely(tag == MTIP_TAG_INTERNAL))
 			if (unlikely(tag == MTIP_TAG_INTERNAL))
 				continue;
 				continue;
 
 
-			command = &port->commands[tag];
-			/* make internal callback */
-			if (likely(command->comp_func)) {
-				command->comp_func(
-					port,
-					tag,
-					command->comp_data,
-					0);
-			} else {
+			command = mtip_cmd_from_tag(dd, tag);
+			if (likely(command->comp_func))
+				command->comp_func(port, tag, command, 0);
+			else {
 				dev_dbg(&dd->pdev->dev,
 				dev_dbg(&dd->pdev->dev,
 					"Null completion for tag %d",
 					"Null completion for tag %d",
 					tag);
 					tag);
@@ -1081,16 +833,13 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
 static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
 static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
 {
 {
 	struct mtip_port *port = dd->port;
 	struct mtip_port *port = dd->port;
-	struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL];
+	struct mtip_cmd *cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
 
 
 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
 	    (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL])
 	    (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL])
 		& (1 << MTIP_TAG_INTERNAL))) {
 		& (1 << MTIP_TAG_INTERNAL))) {
 		if (cmd->comp_func) {
 		if (cmd->comp_func) {
-			cmd->comp_func(port,
-				MTIP_TAG_INTERNAL,
-				cmd->comp_data,
-				0);
+			cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd, 0);
 			return;
 			return;
 		}
 		}
 	}
 	}
@@ -1103,8 +852,6 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
  */
  */
 static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat)
 static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat)
 {
 {
-	if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR)))
-		mtip_handle_tfe(dd);
 
 
 	if (unlikely(port_stat & PORT_IRQ_CONNECT)) {
 	if (unlikely(port_stat & PORT_IRQ_CONNECT)) {
 		dev_warn(&dd->pdev->dev,
 		dev_warn(&dd->pdev->dev,
@@ -1122,6 +869,12 @@ static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat)
 		dev_warn(&dd->pdev->dev,
 		dev_warn(&dd->pdev->dev,
 			"Port stat errors %x unhandled\n",
 			"Port stat errors %x unhandled\n",
 			(port_stat & ~PORT_IRQ_HANDLED));
 			(port_stat & ~PORT_IRQ_HANDLED));
+		if (mtip_check_surprise_removal(dd->pdev))
+			return;
+	}
+	if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR))) {
+		set_bit(MTIP_PF_EH_ACTIVE_BIT, &dd->port->flags);
+		wake_up_interruptible(&dd->port->svc_wait);
 	}
 	}
 }
 }
 
 
@@ -1222,7 +975,6 @@ static irqreturn_t mtip_irq_handler(int irq, void *instance)
 
 
 static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
 static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
 {
 {
-	atomic_set(&port->commands[tag].active, 1);
 	writel(1 << MTIP_TAG_BIT(tag),
 	writel(1 << MTIP_TAG_BIT(tag),
 		port->cmd_issue[MTIP_TAG_INDEX(tag)]);
 		port->cmd_issue[MTIP_TAG_INDEX(tag)]);
 }
 }
@@ -1280,6 +1032,8 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 	unsigned int n;
 	unsigned int n;
 	unsigned int active = 1;
 	unsigned int active = 1;
 
 
+	blk_mq_stop_hw_queues(port->dd->queue);
+
 	to = jiffies + msecs_to_jiffies(timeout);
 	to = jiffies + msecs_to_jiffies(timeout);
 	do {
 	do {
 		if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) &&
 		if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) &&
@@ -1287,8 +1041,13 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 			msleep(20);
 			msleep(20);
 			continue; /* svc thd is actively issuing commands */
 			continue; /* svc thd is actively issuing commands */
 		}
 		}
+
+		msleep(100);
+		if (mtip_check_surprise_removal(port->dd->pdev))
+			goto err_fault;
 		if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
 		if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
-			return -EFAULT;
+			goto err_fault;
+
 		/*
 		/*
 		 * Ignore s_active bit 0 of array element 0.
 		 * Ignore s_active bit 0 of array element 0.
 		 * This bit will always be set
 		 * This bit will always be set
@@ -1299,11 +1058,13 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 
 
 		if (!active)
 		if (!active)
 			break;
 			break;
-
-		msleep(20);
 	} while (time_before(jiffies, to));
 	} while (time_before(jiffies, to));
 
 
+	blk_mq_start_stopped_hw_queues(port->dd->queue, true);
 	return active ? -EBUSY : 0;
 	return active ? -EBUSY : 0;
+err_fault:
+	blk_mq_start_stopped_hw_queues(port->dd->queue, true);
+	return -EFAULT;
 }
 }
 
 
 /*
 /*
@@ -1335,10 +1096,9 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 {
 {
 	struct mtip_cmd_sg *command_sg;
 	struct mtip_cmd_sg *command_sg;
 	DECLARE_COMPLETION_ONSTACK(wait);
 	DECLARE_COMPLETION_ONSTACK(wait);
-	int rv = 0, ready2go = 1;
-	struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL];
-	unsigned long to;
+	struct mtip_cmd *int_cmd;
 	struct driver_data *dd = port->dd;
 	struct driver_data *dd = port->dd;
+	int rv = 0;
 
 
 	/* Make sure the buffer is 8 byte aligned. This is asic specific. */
 	/* Make sure the buffer is 8 byte aligned. This is asic specific. */
 	if (buffer & 0x00000007) {
 	if (buffer & 0x00000007) {
@@ -1346,19 +1106,8 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 		return -EFAULT;
 		return -EFAULT;
 	}
 	}
 
 
-	to = jiffies + msecs_to_jiffies(timeout);
-	do {
-		ready2go = !test_and_set_bit(MTIP_TAG_INTERNAL,
-						port->allocated);
-		if (ready2go)
-			break;
-		mdelay(100);
-	} while (time_before(jiffies, to));
-	if (!ready2go) {
-		dev_warn(&dd->pdev->dev,
-			"Internal cmd active. new cmd [%02X]\n", fis->command);
-		return -EBUSY;
-	}
+	int_cmd = mtip_get_int_command(dd);
+
 	set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
 	set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
 	port->ic_pause_timer = 0;
 	port->ic_pause_timer = 0;
 
 
@@ -1368,10 +1117,11 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 	if (atomic == GFP_KERNEL) {
 	if (atomic == GFP_KERNEL) {
 		if (fis->command != ATA_CMD_STANDBYNOW1) {
 		if (fis->command != ATA_CMD_STANDBYNOW1) {
 			/* wait for io to complete if non atomic */
 			/* wait for io to complete if non atomic */
-			if (mtip_quiesce_io(port, 5000) < 0) {
+			if (mtip_quiesce_io(port,
+					MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) {
 				dev_warn(&dd->pdev->dev,
 				dev_warn(&dd->pdev->dev,
 					"Failed to quiesce IO\n");
 					"Failed to quiesce IO\n");
-				release_slot(port, MTIP_TAG_INTERNAL);
+				mtip_put_int_command(dd, int_cmd);
 				clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
 				clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
 				wake_up_interruptible(&port->svc_wait);
 				wake_up_interruptible(&port->svc_wait);
 				return -EBUSY;
 				return -EBUSY;
@@ -1416,9 +1166,9 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 
 
 	if (atomic == GFP_KERNEL) {
 	if (atomic == GFP_KERNEL) {
 		/* Wait for the command to complete or timeout. */
 		/* Wait for the command to complete or timeout. */
-		if (wait_for_completion_interruptible_timeout(
+		if ((rv = wait_for_completion_interruptible_timeout(
 				&wait,
 				&wait,
-				msecs_to_jiffies(timeout)) <= 0) {
+				msecs_to_jiffies(timeout))) <= 0) {
 			if (rv == -ERESTARTSYS) { /* interrupted */
 			if (rv == -ERESTARTSYS) { /* interrupted */
 				dev_err(&dd->pdev->dev,
 				dev_err(&dd->pdev->dev,
 					"Internal command [%02X] was interrupted after %lu ms\n",
 					"Internal command [%02X] was interrupted after %lu ms\n",
@@ -1497,8 +1247,7 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 	}
 	}
 exec_ic_exit:
 exec_ic_exit:
 	/* Clear the allocated and active bits for the internal command. */
 	/* Clear the allocated and active bits for the internal command. */
-	atomic_set(&int_cmd->active, 0);
-	release_slot(port, MTIP_TAG_INTERNAL);
+	mtip_put_int_command(dd, int_cmd);
 	if (rv >= 0 && mtip_pause_ncq(port, fis)) {
 	if (rv >= 0 && mtip_pause_ncq(port, fis)) {
 		/* NCQ paused */
 		/* NCQ paused */
 		return rv;
 		return rv;
@@ -1529,6 +1278,37 @@ static inline void ata_swap_string(u16 *buf, unsigned int len)
 		be16_to_cpus(&buf[i]);
 		be16_to_cpus(&buf[i]);
 }
 }
 
 
+static void mtip_set_timeout(struct driver_data *dd,
+					struct host_to_dev_fis *fis,
+					unsigned int *timeout, u8 erasemode)
+{
+	switch (fis->command) {
+	case ATA_CMD_DOWNLOAD_MICRO:
+		*timeout = 120000; /* 2 minutes */
+		break;
+	case ATA_CMD_SEC_ERASE_UNIT:
+	case 0xFC:
+		if (erasemode)
+			*timeout = ((*(dd->port->identify + 90) * 2) * 60000);
+		else
+			*timeout = ((*(dd->port->identify + 89) * 2) * 60000);
+		break;
+	case ATA_CMD_STANDBYNOW1:
+		*timeout = 120000;  /* 2 minutes */
+		break;
+	case 0xF7:
+	case 0xFA:
+		*timeout = 60000;  /* 60 seconds */
+		break;
+	case ATA_CMD_SMART:
+		*timeout = 15000;  /* 15 seconds */
+		break;
+	default:
+		*timeout = MTIP_IOCTL_CMD_TIMEOUT_MS;
+		break;
+	}
+}
+
 /*
 /*
  * Request the device identity information.
  * Request the device identity information.
  *
  *
@@ -1576,7 +1356,7 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
 				sizeof(u16) * ATA_ID_WORDS,
 				sizeof(u16) * ATA_ID_WORDS,
 				0,
 				0,
 				GFP_KERNEL,
 				GFP_KERNEL,
-				MTIP_INTERNAL_COMMAND_TIMEOUT_MS)
+				MTIP_INT_CMD_TIMEOUT_MS)
 				< 0) {
 				< 0) {
 		rv = -1;
 		rv = -1;
 		goto out;
 		goto out;
@@ -1644,6 +1424,7 @@ static int mtip_standby_immediate(struct mtip_port *port)
 	int rv;
 	int rv;
 	struct host_to_dev_fis	fis;
 	struct host_to_dev_fis	fis;
 	unsigned long start;
 	unsigned long start;
+	unsigned int timeout;
 
 
 	/* Build the FIS. */
 	/* Build the FIS. */
 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
@@ -1651,6 +1432,8 @@ static int mtip_standby_immediate(struct mtip_port *port)
 	fis.opts	= 1 << 7;
 	fis.opts	= 1 << 7;
 	fis.command	= ATA_CMD_STANDBYNOW1;
 	fis.command	= ATA_CMD_STANDBYNOW1;
 
 
+	mtip_set_timeout(port->dd, &fis, &timeout, 0);
+
 	start = jiffies;
 	start = jiffies;
 	rv = mtip_exec_internal_command(port,
 	rv = mtip_exec_internal_command(port,
 					&fis,
 					&fis,
@@ -1659,7 +1442,7 @@ static int mtip_standby_immediate(struct mtip_port *port)
 					0,
 					0,
 					0,
 					0,
 					GFP_ATOMIC,
 					GFP_ATOMIC,
-					15000);
+					timeout);
 	dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n",
 	dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n",
 			jiffies_to_msecs(jiffies - start));
 			jiffies_to_msecs(jiffies - start));
 	if (rv)
 	if (rv)
@@ -1705,7 +1488,7 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
 					sectors * ATA_SECT_SIZE,
 					sectors * ATA_SECT_SIZE,
 					0,
 					0,
 					GFP_ATOMIC,
 					GFP_ATOMIC,
-					MTIP_INTERNAL_COMMAND_TIMEOUT_MS);
+					MTIP_INT_CMD_TIMEOUT_MS);
 }
 }
 
 
 /*
 /*
@@ -1998,6 +1781,7 @@ static int exec_drive_task(struct mtip_port *port, u8 *command)
 {
 {
 	struct host_to_dev_fis	fis;
 	struct host_to_dev_fis	fis;
 	struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
 	struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
+	unsigned int to;
 
 
 	/* Build the FIS. */
 	/* Build the FIS. */
 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
@@ -2011,6 +1795,8 @@ static int exec_drive_task(struct mtip_port *port, u8 *command)
 	fis.cyl_hi	= command[5];
 	fis.cyl_hi	= command[5];
 	fis.device	= command[6] & ~0x10; /* Clear the dev bit*/
 	fis.device	= command[6] & ~0x10; /* Clear the dev bit*/
 
 
+	mtip_set_timeout(port->dd, &fis, &to, 0);
+
 	dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, feat %x, nsect %x, sect %x, lcyl %x, hcyl %x, sel %x\n",
 	dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, feat %x, nsect %x, sect %x, lcyl %x, hcyl %x, sel %x\n",
 		__func__,
 		__func__,
 		command[0],
 		command[0],
@@ -2029,7 +1815,7 @@ static int exec_drive_task(struct mtip_port *port, u8 *command)
 				 0,
 				 0,
 				 0,
 				 0,
 				 GFP_KERNEL,
 				 GFP_KERNEL,
-				 MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) {
+				 to) < 0) {
 		return -1;
 		return -1;
 	}
 	}
 
 
@@ -2069,6 +1855,7 @@ static int exec_drive_command(struct mtip_port *port, u8 *command,
 	u8 *buf = NULL;
 	u8 *buf = NULL;
 	dma_addr_t dma_addr = 0;
 	dma_addr_t dma_addr = 0;
 	int rv = 0, xfer_sz = command[3];
 	int rv = 0, xfer_sz = command[3];
+	unsigned int to;
 
 
 	if (xfer_sz) {
 	if (xfer_sz) {
 		if (!user_buffer)
 		if (!user_buffer)
@@ -2100,6 +1887,8 @@ static int exec_drive_command(struct mtip_port *port, u8 *command,
 		fis.cyl_hi	= 0xC2;
 		fis.cyl_hi	= 0xC2;
 	}
 	}
 
 
+	mtip_set_timeout(port->dd, &fis, &to, 0);
+
 	if (xfer_sz)
 	if (xfer_sz)
 		reply = (port->rxfis + RX_FIS_PIO_SETUP);
 		reply = (port->rxfis + RX_FIS_PIO_SETUP);
 	else
 	else
@@ -2122,7 +1911,7 @@ static int exec_drive_command(struct mtip_port *port, u8 *command,
 				 (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0),
 				 (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0),
 				 0,
 				 0,
 				 GFP_KERNEL,
 				 GFP_KERNEL,
-				 MTIP_IOCTL_COMMAND_TIMEOUT_MS)
+				 to)
 				 < 0) {
 				 < 0) {
 		rv = -EFAULT;
 		rv = -EFAULT;
 		goto exit_drive_command;
 		goto exit_drive_command;
@@ -2202,36 +1991,6 @@ static unsigned int implicit_sector(unsigned char command,
 	}
 	}
 	return rv;
 	return rv;
 }
 }
-static void mtip_set_timeout(struct driver_data *dd,
-					struct host_to_dev_fis *fis,
-					unsigned int *timeout, u8 erasemode)
-{
-	switch (fis->command) {
-	case ATA_CMD_DOWNLOAD_MICRO:
-		*timeout = 120000; /* 2 minutes */
-		break;
-	case ATA_CMD_SEC_ERASE_UNIT:
-	case 0xFC:
-		if (erasemode)
-			*timeout = ((*(dd->port->identify + 90) * 2) * 60000);
-		else
-			*timeout = ((*(dd->port->identify + 89) * 2) * 60000);
-		break;
-	case ATA_CMD_STANDBYNOW1:
-		*timeout = 120000;  /* 2 minutes */
-		break;
-	case 0xF7:
-	case 0xFA:
-		*timeout = 60000;  /* 60 seconds */
-		break;
-	case ATA_CMD_SMART:
-		*timeout = 15000;  /* 15 seconds */
-		break;
-	default:
-		*timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
-		break;
-	}
-}
 
 
 /*
 /*
  * Executes a taskfile
  * Executes a taskfile
@@ -2606,22 +2365,21 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
  * return value
  * return value
  *	None
  *	None
  */
  */
-static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector,
-			      int nsect, int nents, int tag, void *callback,
-			      void *data, int dir, int unaligned)
+static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq,
+			      struct mtip_cmd *command, int nents,
+			      struct blk_mq_hw_ctx *hctx)
 {
 {
 	struct host_to_dev_fis	*fis;
 	struct host_to_dev_fis	*fis;
 	struct mtip_port *port = dd->port;
 	struct mtip_port *port = dd->port;
-	struct mtip_cmd *command = &port->commands[tag];
-	int dma_dir = (dir == READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-	u64 start = sector;
+	int dma_dir = rq_data_dir(rq) == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+	u64 start = blk_rq_pos(rq);
+	unsigned int nsect = blk_rq_sectors(rq);
 
 
 	/* Map the scatter list for DMA access */
 	/* Map the scatter list for DMA access */
 	nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
 	nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
 
 
 	command->scatter_ents = nents;
 	command->scatter_ents = nents;
 
 
-	command->unaligned = unaligned;
 	/*
 	/*
 	 * The number of retries for this command before it is
 	 * The number of retries for this command before it is
 	 * reported as a failure to the upper layers.
 	 * reported as a failure to the upper layers.
@@ -2632,8 +2390,10 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector,
 	fis = command->command;
 	fis = command->command;
 	fis->type        = 0x27;
 	fis->type        = 0x27;
 	fis->opts        = 1 << 7;
 	fis->opts        = 1 << 7;
-	fis->command     =
-		(dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE);
+	if (rq_data_dir(rq) == READ)
+		fis->command = ATA_CMD_FPDMA_READ;
+	else
+		fis->command = ATA_CMD_FPDMA_WRITE;
 	fis->lba_low     = start & 0xFF;
 	fis->lba_low     = start & 0xFF;
 	fis->lba_mid     = (start >> 8) & 0xFF;
 	fis->lba_mid     = (start >> 8) & 0xFF;
 	fis->lba_hi      = (start >> 16) & 0xFF;
 	fis->lba_hi      = (start >> 16) & 0xFF;
@@ -2643,14 +2403,14 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector,
 	fis->device	 = 1 << 6;
 	fis->device	 = 1 << 6;
 	fis->features    = nsect & 0xFF;
 	fis->features    = nsect & 0xFF;
 	fis->features_ex = (nsect >> 8) & 0xFF;
 	fis->features_ex = (nsect >> 8) & 0xFF;
-	fis->sect_count  = ((tag << 3) | (tag >> 5));
+	fis->sect_count  = ((rq->tag << 3) | (rq->tag >> 5));
 	fis->sect_cnt_ex = 0;
 	fis->sect_cnt_ex = 0;
 	fis->control     = 0;
 	fis->control     = 0;
 	fis->res2        = 0;
 	fis->res2        = 0;
 	fis->res3        = 0;
 	fis->res3        = 0;
 	fill_command_sg(dd, command, nents);
 	fill_command_sg(dd, command, nents);
 
 
-	if (unaligned)
+	if (command->unaligned)
 		fis->device |= 1 << 7;
 		fis->device |= 1 << 7;
 
 
 	/* Populate the command header */
 	/* Populate the command header */
@@ -2667,82 +2427,18 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector,
 	command->comp_func = mtip_async_complete;
 	command->comp_func = mtip_async_complete;
 	command->direction = dma_dir;
 	command->direction = dma_dir;
 
 
-	/*
-	 * Set the completion function and data for the command passed
-	 * from the upper layer.
-	 */
-	command->async_data = data;
-	command->async_callback = callback;
-
 	/*
 	/*
 	 * To prevent this command from being issued
 	 * To prevent this command from being issued
 	 * if an internal command is in progress or error handling is active.
 	 * if an internal command is in progress or error handling is active.
 	 */
 	 */
 	if (port->flags & MTIP_PF_PAUSE_IO) {
 	if (port->flags & MTIP_PF_PAUSE_IO) {
-		set_bit(tag, port->cmds_to_issue);
+		set_bit(rq->tag, port->cmds_to_issue);
 		set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
 		set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
 		return;
 		return;
 	}
 	}
 
 
 	/* Issue the command to the hardware */
 	/* Issue the command to the hardware */
-	mtip_issue_ncq_command(port, tag);
-
-	return;
-}
-
-/*
- * Release a command slot.
- *
- * @dd  Pointer to the driver data structure.
- * @tag Slot tag
- *
- * return value
- *      None
- */
-static void mtip_hw_release_scatterlist(struct driver_data *dd, int tag,
-								int unaligned)
-{
-	struct semaphore *sem = unaligned ? &dd->port->cmd_slot_unal :
-							&dd->port->cmd_slot;
-	release_slot(dd->port, tag);
-	up(sem);
-}
-
-/*
- * Obtain a command slot and return its associated scatter list.
- *
- * @dd  Pointer to the driver data structure.
- * @tag Pointer to an int that will receive the allocated command
- *            slot tag.
- *
- * return value
- *	Pointer to the scatter list for the allocated command slot
- *	or NULL if no command slots are available.
- */
-static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
-						   int *tag, int unaligned)
-{
-	struct semaphore *sem = unaligned ? &dd->port->cmd_slot_unal :
-							&dd->port->cmd_slot;
-
-	/*
-	 * It is possible that, even with this semaphore, a thread
-	 * may think that no command slots are available. Therefore, we
-	 * need to make an attempt to get_slot().
-	 */
-	down(sem);
-	*tag = get_slot(dd->port);
-
-	if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) {
-		up(sem);
-		return NULL;
-	}
-	if (unlikely(*tag < 0)) {
-		up(sem);
-		return NULL;
-	}
-
-	return dd->port->commands[*tag].sg;
+	mtip_issue_ncq_command(port, rq->tag);
 }
 }
 
 
 /*
 /*
@@ -3113,6 +2809,7 @@ static int mtip_free_orphan(struct driver_data *dd)
 		if (dd->queue) {
 		if (dd->queue) {
 			dd->queue->queuedata = NULL;
 			dd->queue->queuedata = NULL;
 			blk_cleanup_queue(dd->queue);
 			blk_cleanup_queue(dd->queue);
+			blk_mq_free_tag_set(&dd->tags);
 			dd->queue = NULL;
 			dd->queue = NULL;
 		}
 		}
 	}
 	}
@@ -3270,6 +2967,11 @@ static int mtip_service_thread(void *data)
 	int ret;
 	int ret;
 
 
 	while (1) {
 	while (1) {
+		if (kthread_should_stop() ||
+			test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
+			goto st_out;
+		clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
+
 		/*
 		/*
 		 * the condition is to check neither an internal command is
 		 * the condition is to check neither an internal command is
 		 * is in progress nor error handling is active
 		 * is in progress nor error handling is active
@@ -3277,11 +2979,12 @@ static int mtip_service_thread(void *data)
 		wait_event_interruptible(port->svc_wait, (port->flags) &&
 		wait_event_interruptible(port->svc_wait, (port->flags) &&
 			!(port->flags & MTIP_PF_PAUSE_IO));
 			!(port->flags & MTIP_PF_PAUSE_IO));
 
 
-		if (kthread_should_stop())
-			goto st_out;
-
 		set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
 		set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
 
 
+		if (kthread_should_stop() ||
+			test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
+			goto st_out;
+
 		/* If I am an orphan, start self cleanup */
 		/* If I am an orphan, start self cleanup */
 		if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags))
 		if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags))
 			break;
 			break;
@@ -3290,6 +2993,16 @@ static int mtip_service_thread(void *data)
 				&dd->dd_flag)))
 				&dd->dd_flag)))
 			goto st_out;
 			goto st_out;
 
 
+restart_eh:
+		/* Demux bits: start with error handling */
+		if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags)) {
+			mtip_handle_tfe(dd);
+			clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
+		}
+
+		if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags))
+			goto restart_eh;
+
 		if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
 		if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
 			slot = 1;
 			slot = 1;
 			/* used to restrict the loop to one iteration */
 			/* used to restrict the loop to one iteration */
@@ -3319,16 +3032,14 @@ static int mtip_service_thread(void *data)
 			}
 			}
 
 
 			clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
 			clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
-		} else if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) {
+		}
+
+		if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) {
 			if (mtip_ftl_rebuild_poll(dd) < 0)
 			if (mtip_ftl_rebuild_poll(dd) < 0)
 				set_bit(MTIP_DDF_REBUILD_FAILED_BIT,
 				set_bit(MTIP_DDF_REBUILD_FAILED_BIT,
 							&dd->dd_flag);
 							&dd->dd_flag);
 			clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
 			clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
 		}
 		}
-		clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
-
-		if (test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
-			goto st_out;
 	}
 	}
 
 
 	/* wait for pci remove to exit */
 	/* wait for pci remove to exit */
@@ -3365,7 +3076,6 @@ st_out:
  */
  */
 static void mtip_dma_free(struct driver_data *dd)
 static void mtip_dma_free(struct driver_data *dd)
 {
 {
-	int i;
 	struct mtip_port *port = dd->port;
 	struct mtip_port *port = dd->port;
 
 
 	if (port->block1)
 	if (port->block1)
@@ -3376,13 +3086,6 @@ static void mtip_dma_free(struct driver_data *dd)
 		dmam_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
 		dmam_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
 				port->command_list, port->command_list_dma);
 				port->command_list, port->command_list_dma);
 	}
 	}
-
-	for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
-		if (port->commands[i].command)
-			dmam_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
-				port->commands[i].command,
-				port->commands[i].command_dma);
-	}
 }
 }
 
 
 /*
 /*
@@ -3396,8 +3099,6 @@ static void mtip_dma_free(struct driver_data *dd)
 static int mtip_dma_alloc(struct driver_data *dd)
 static int mtip_dma_alloc(struct driver_data *dd)
 {
 {
 	struct mtip_port *port = dd->port;
 	struct mtip_port *port = dd->port;
-	int i, rv = 0;
-	u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
 
 
 	/* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
 	/* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
 	port->block1 =
 	port->block1 =
@@ -3430,41 +3131,63 @@ static int mtip_dma_alloc(struct driver_data *dd)
 	port->smart_buf     = port->block1 + AHCI_SMARTBUF_OFFSET;
 	port->smart_buf     = port->block1 + AHCI_SMARTBUF_OFFSET;
 	port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET;
 	port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET;
 
 
-	/* Setup per command SGL DMA region */
-
-	/* Point the command headers at the command tables */
-	for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
-		port->commands[i].command =
-			dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
-				&port->commands[i].command_dma, GFP_KERNEL);
-		if (!port->commands[i].command) {
-			rv = -ENOMEM;
-			mtip_dma_free(dd);
-			return rv;
-		}
-		memset(port->commands[i].command, 0, CMD_DMA_ALLOC_SZ);
-
-		port->commands[i].command_header = port->command_list +
-					(sizeof(struct mtip_cmd_hdr) * i);
-		port->commands[i].command_header_dma =
-					dd->port->command_list_dma +
-					(sizeof(struct mtip_cmd_hdr) * i);
+	return 0;
+}
 
 
-		if (host_cap_64)
-			port->commands[i].command_header->ctbau =
-				__force_bit2int cpu_to_le32(
-				(port->commands[i].command_dma >> 16) >> 16);
+static int mtip_hw_get_identify(struct driver_data *dd)
+{
+	struct smart_attr attr242;
+	unsigned char *buf;
+	int rv;
 
 
-		port->commands[i].command_header->ctba =
-				__force_bit2int cpu_to_le32(
-				port->commands[i].command_dma & 0xFFFFFFFF);
+	if (mtip_get_identify(dd->port, NULL) < 0)
+		return -EFAULT;
 
 
-		sg_init_table(port->commands[i].sg, MTIP_MAX_SG);
+	if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
+		MTIP_FTL_REBUILD_MAGIC) {
+		set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags);
+		return MTIP_FTL_REBUILD_MAGIC;
+	}
+	mtip_dump_identify(dd->port);
 
 
-		/* Mark command as currently inactive */
-		atomic_set(&dd->port->commands[i].active, 0);
+	/* check write protect, over temp and rebuild statuses */
+	rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
+				dd->port->log_buf,
+				dd->port->log_buf_dma, 1);
+	if (rv) {
+		dev_warn(&dd->pdev->dev,
+			"Error in READ LOG EXT (10h) command\n");
+		/* non-critical error, don't fail the load */
+	} else {
+		buf = (unsigned char *)dd->port->log_buf;
+		if (buf[259] & 0x1) {
+			dev_info(&dd->pdev->dev,
+				"Write protect bit is set.\n");
+			set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
+		}
+		if (buf[288] == 0xF7) {
+			dev_info(&dd->pdev->dev,
+				"Exceeded Tmax, drive in thermal shutdown.\n");
+			set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
+		}
+		if (buf[288] == 0xBF) {
+			dev_info(&dd->pdev->dev,
+				"Drive indicates rebuild has failed.\n");
+			/* TODO */
+		}
 	}
 	}
-	return 0;
+
+	/* get write protect progess */
+	memset(&attr242, 0, sizeof(struct smart_attr));
+	if (mtip_get_smart_attr(dd->port, 242, &attr242))
+		dev_warn(&dd->pdev->dev,
+				"Unable to check write protect progress\n");
+	else
+		dev_info(&dd->pdev->dev,
+				"Write protect progress: %u%% (%u blocks)\n",
+				attr242.cur, le32_to_cpu(attr242.data));
+
+	return rv;
 }
 }
 
 
 /*
 /*
@@ -3481,8 +3204,6 @@ static int mtip_hw_init(struct driver_data *dd)
 	int rv;
 	int rv;
 	unsigned int num_command_slots;
 	unsigned int num_command_slots;
 	unsigned long timeout, timetaken;
 	unsigned long timeout, timetaken;
-	unsigned char *buf;
-	struct smart_attr attr242;
 
 
 	dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR];
 	dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR];
 
 
@@ -3513,8 +3234,6 @@ static int mtip_hw_init(struct driver_data *dd)
 	else
 	else
 		dd->unal_qdepth = 0;
 		dd->unal_qdepth = 0;
 
 
-	/* Counting semaphore to track command slot usage */
-	sema_init(&dd->port->cmd_slot, num_command_slots - 1 - dd->unal_qdepth);
 	sema_init(&dd->port->cmd_slot_unal, dd->unal_qdepth);
 	sema_init(&dd->port->cmd_slot_unal, dd->unal_qdepth);
 
 
 	/* Spinlock to prevent concurrent issue */
 	/* Spinlock to prevent concurrent issue */
@@ -3599,73 +3318,16 @@ static int mtip_hw_init(struct driver_data *dd)
 	writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
 	writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
 					dd->mmio + HOST_CTL);
 					dd->mmio + HOST_CTL);
 
 
-	init_timer(&dd->port->cmd_timer);
 	init_waitqueue_head(&dd->port->svc_wait);
 	init_waitqueue_head(&dd->port->svc_wait);
 
 
-	dd->port->cmd_timer.data = (unsigned long int) dd->port;
-	dd->port->cmd_timer.function = mtip_timeout_function;
-	mod_timer(&dd->port->cmd_timer,
-		jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
-
-
 	if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
 	if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
 		rv = -EFAULT;
 		rv = -EFAULT;
 		goto out3;
 		goto out3;
 	}
 	}
 
 
-	if (mtip_get_identify(dd->port, NULL) < 0) {
-		rv = -EFAULT;
-		goto out3;
-	}
-	mtip_dump_identify(dd->port);
-
-	if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
-		MTIP_FTL_REBUILD_MAGIC) {
-		set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags);
-		return MTIP_FTL_REBUILD_MAGIC;
-	}
-
-	/* check write protect, over temp and rebuild statuses */
-	rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
-				dd->port->log_buf,
-				dd->port->log_buf_dma, 1);
-	if (rv) {
-		dev_warn(&dd->pdev->dev,
-			"Error in READ LOG EXT (10h) command\n");
-		/* non-critical error, don't fail the load */
-	} else {
-		buf = (unsigned char *)dd->port->log_buf;
-		if (buf[259] & 0x1) {
-			dev_info(&dd->pdev->dev,
-				"Write protect bit is set.\n");
-			set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
-		}
-		if (buf[288] == 0xF7) {
-			dev_info(&dd->pdev->dev,
-				"Exceeded Tmax, drive in thermal shutdown.\n");
-			set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
-		}
-		if (buf[288] == 0xBF) {
-			dev_info(&dd->pdev->dev,
-				"Drive is in security locked state.\n");
-			set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
-		}
-	}
-
-	/* get write protect progess */
-	memset(&attr242, 0, sizeof(struct smart_attr));
-	if (mtip_get_smart_attr(dd->port, 242, &attr242))
-		dev_warn(&dd->pdev->dev,
-				"Unable to check write protect progress\n");
-	else
-		dev_info(&dd->pdev->dev,
-				"Write protect progress: %u%% (%u blocks)\n",
-				attr242.cur, le32_to_cpu(attr242.data));
 	return rv;
 	return rv;
 
 
 out3:
 out3:
-	del_timer_sync(&dd->port->cmd_timer);
-
 	/* Disable interrupts on the HBA. */
 	/* Disable interrupts on the HBA. */
 	writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
 	writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
 			dd->mmio + HOST_CTL);
 			dd->mmio + HOST_CTL);
@@ -3685,6 +3347,22 @@ out1:
 	return rv;
 	return rv;
 }
 }
 
 
+static void mtip_standby_drive(struct driver_data *dd)
+{
+	if (dd->sr)
+		return;
+
+	/*
+	 * Send standby immediate (E0h) to the drive so that it
+	 * saves its state.
+	 */
+	if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) &&
+	    !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))
+		if (mtip_standby_immediate(dd->port))
+			dev_warn(&dd->pdev->dev,
+				"STANDBY IMMEDIATE failed\n");
+}
+
 /*
 /*
  * Called to deinitialize an interface.
  * Called to deinitialize an interface.
  *
  *
@@ -3700,12 +3378,6 @@ static int mtip_hw_exit(struct driver_data *dd)
 	 * saves its state.
 	 * saves its state.
 	 */
 	 */
 	if (!dd->sr) {
 	if (!dd->sr) {
-		if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) &&
-		    !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))
-			if (mtip_standby_immediate(dd->port))
-				dev_warn(&dd->pdev->dev,
-					"STANDBY IMMEDIATE failed\n");
-
 		/* de-initialize the port. */
 		/* de-initialize the port. */
 		mtip_deinit_port(dd->port);
 		mtip_deinit_port(dd->port);
 
 
@@ -3714,8 +3386,6 @@ static int mtip_hw_exit(struct driver_data *dd)
 				dd->mmio + HOST_CTL);
 				dd->mmio + HOST_CTL);
 	}
 	}
 
 
-	del_timer_sync(&dd->port->cmd_timer);
-
 	/* Release the IRQ. */
 	/* Release the IRQ. */
 	irq_set_affinity_hint(dd->pdev->irq, NULL);
 	irq_set_affinity_hint(dd->pdev->irq, NULL);
 	devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
 	devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
@@ -4032,100 +3702,138 @@ static const struct block_device_operations mtip_block_ops = {
  *
  *
  * @queue Pointer to the request queue. Unused other than to obtain
  * @queue Pointer to the request queue. Unused other than to obtain
  *              the driver data structure.
  *              the driver data structure.
- * @bio   Pointer to the BIO.
+ * @rq    Pointer to the request.
  *
  *
  */
  */
-static void mtip_make_request(struct request_queue *queue, struct bio *bio)
+static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
 {
 {
-	struct driver_data *dd = queue->queuedata;
-	struct scatterlist *sg;
-	struct bio_vec bvec;
-	struct bvec_iter iter;
-	int nents = 0;
-	int tag = 0, unaligned = 0;
+	struct driver_data *dd = hctx->queue->queuedata;
+	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
+	unsigned int nents;
 
 
 	if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
 	if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
 		if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
 		if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
 							&dd->dd_flag))) {
 							&dd->dd_flag))) {
-			bio_endio(bio, -ENXIO);
-			return;
+			return -ENXIO;
 		}
 		}
 		if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) {
 		if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) {
-			bio_endio(bio, -ENODATA);
-			return;
+			return -ENODATA;
 		}
 		}
 		if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT,
 		if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT,
 							&dd->dd_flag) &&
 							&dd->dd_flag) &&
-				bio_data_dir(bio))) {
-			bio_endio(bio, -ENODATA);
-			return;
-		}
-		if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))) {
-			bio_endio(bio, -ENODATA);
-			return;
-		}
-		if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
-			bio_endio(bio, -ENXIO);
-			return;
+				rq_data_dir(rq))) {
+			return -ENODATA;
 		}
 		}
+		if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)))
+			return -ENODATA;
+		if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))
+			return -ENXIO;
 	}
 	}
 
 
-	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
-		bio_endio(bio, mtip_send_trim(dd, bio->bi_iter.bi_sector,
-						bio_sectors(bio)));
-		return;
-	}
+	if (rq->cmd_flags & REQ_DISCARD) {
+		int err;
 
 
-	if (unlikely(!bio_has_data(bio))) {
-		blk_queue_flush(queue, 0);
-		bio_endio(bio, 0);
-		return;
+		err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
+		blk_mq_end_io(rq, err);
+		return 0;
 	}
 	}
 
 
-	if (bio_data_dir(bio) == WRITE && bio_sectors(bio) <= 64 &&
-							dd->unal_qdepth) {
-		if (bio->bi_iter.bi_sector % 8 != 0)
-			/* Unaligned on 4k boundaries */
-			unaligned = 1;
-		else if (bio_sectors(bio) % 8 != 0) /* Aligned but not 4k/8k */
-			unaligned = 1;
+	/* Create the scatter list for this request. */
+	nents = blk_rq_map_sg(hctx->queue, rq, cmd->sg);
+
+	/* Issue the read/write. */
+	mtip_hw_submit_io(dd, rq, cmd, nents, hctx);
+	return 0;
+}
+
+static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
+				  struct request *rq)
+{
+	struct driver_data *dd = hctx->queue->queuedata;
+	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
+
+	if (!dd->unal_qdepth || rq_data_dir(rq) == READ)
+		return false;
+
+	/*
+	 * If unaligned depth must be limited on this controller, mark it
+	 * as unaligned if the IO isn't on a 4k boundary (start of length).
+	 */
+	if (blk_rq_sectors(rq) <= 64) {
+		if ((blk_rq_pos(rq) & 7) || (blk_rq_sectors(rq) & 7))
+			cmd->unaligned = 1;
 	}
 	}
 
 
-	sg = mtip_hw_get_scatterlist(dd, &tag, unaligned);
-	if (likely(sg != NULL)) {
-		blk_queue_bounce(queue, &bio);
+	if (cmd->unaligned && down_trylock(&dd->port->cmd_slot_unal))
+		return true;
 
 
-		if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) {
-			dev_warn(&dd->pdev->dev,
-				"Maximum number of SGL entries exceeded\n");
-			bio_io_error(bio);
-			mtip_hw_release_scatterlist(dd, tag, unaligned);
-			return;
-		}
+	return false;
+}
 
 
-		/* Create the scatter list for this bio. */
-		bio_for_each_segment(bvec, bio, iter) {
-			sg_set_page(&sg[nents],
-					bvec.bv_page,
-					bvec.bv_len,
-					bvec.bv_offset);
-			nents++;
-		}
+static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
+{
+	int ret;
 
 
-		/* Issue the read/write. */
-		mtip_hw_submit_io(dd,
-				bio->bi_iter.bi_sector,
-				bio_sectors(bio),
-				nents,
-				tag,
-				bio_endio,
-				bio,
-				bio_data_dir(bio),
-				unaligned);
-	} else
-		bio_io_error(bio);
+	if (mtip_check_unal_depth(hctx, rq))
+		return BLK_MQ_RQ_QUEUE_BUSY;
+
+	ret = mtip_submit_request(hctx, rq);
+	if (!ret)
+		return BLK_MQ_RQ_QUEUE_OK;
+
+	rq->errors = ret;
+	return BLK_MQ_RQ_QUEUE_ERROR;
+}
+
+static void mtip_free_cmd(void *data, struct request *rq,
+			  unsigned int hctx_idx, unsigned int request_idx)
+{
+	struct driver_data *dd = data;
+	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
+
+	if (!cmd->command)
+		return;
+
+	dmam_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
+				cmd->command, cmd->command_dma);
+}
+
+static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx,
+			 unsigned int request_idx, unsigned int numa_node)
+{
+	struct driver_data *dd = data;
+	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
+	u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
+
+	cmd->command = dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
+			&cmd->command_dma, GFP_KERNEL);
+	if (!cmd->command)
+		return -ENOMEM;
+
+	memset(cmd->command, 0, CMD_DMA_ALLOC_SZ);
+
+	/* Point the command headers at the command tables. */
+	cmd->command_header = dd->port->command_list +
+				(sizeof(struct mtip_cmd_hdr) * request_idx);
+	cmd->command_header_dma = dd->port->command_list_dma +
+				(sizeof(struct mtip_cmd_hdr) * request_idx);
+
+	if (host_cap_64)
+		cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
+
+	cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
+
+	sg_init_table(cmd->sg, MTIP_MAX_SG);
+	return 0;
 }
 }
 
 
+static struct blk_mq_ops mtip_mq_ops = {
+	.queue_rq	= mtip_queue_rq,
+	.map_queue	= blk_mq_map_queue,
+	.init_request	= mtip_init_cmd,
+	.exit_request	= mtip_free_cmd,
+};
+
 /*
 /*
  * Block layer initialization function.
  * Block layer initialization function.
  *
  *
@@ -4148,11 +3856,7 @@ static int mtip_block_initialize(struct driver_data *dd)
 	if (dd->disk)
 	if (dd->disk)
 		goto skip_create_disk; /* hw init done, before rebuild */
 		goto skip_create_disk; /* hw init done, before rebuild */
 
 
-	/* Initialize the protocol layer. */
-	wait_for_rebuild = mtip_hw_init(dd);
-	if (wait_for_rebuild < 0) {
-		dev_err(&dd->pdev->dev,
-			"Protocol layer initialization failed\n");
+	if (mtip_hw_init(dd)) {
 		rv = -EINVAL;
 		rv = -EINVAL;
 		goto protocol_init_error;
 		goto protocol_init_error;
 	}
 	}
@@ -4194,29 +3898,53 @@ static int mtip_block_initialize(struct driver_data *dd)
 
 
 	mtip_hw_debugfs_init(dd);
 	mtip_hw_debugfs_init(dd);
 
 
-	/*
-	 * if rebuild pending, start the service thread, and delay the block
-	 * queue creation and add_disk()
-	 */
-	if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
-		goto start_service_thread;
-
 skip_create_disk:
 skip_create_disk:
-	/* Allocate the request queue. */
-	dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node);
-	if (dd->queue == NULL) {
+	memset(&dd->tags, 0, sizeof(dd->tags));
+	dd->tags.ops = &mtip_mq_ops;
+	dd->tags.nr_hw_queues = 1;
+	dd->tags.queue_depth = MTIP_MAX_COMMAND_SLOTS;
+	dd->tags.reserved_tags = 1;
+	dd->tags.cmd_size = sizeof(struct mtip_cmd);
+	dd->tags.numa_node = dd->numa_node;
+	dd->tags.flags = BLK_MQ_F_SHOULD_MERGE;
+	dd->tags.driver_data = dd;
+
+	rv = blk_mq_alloc_tag_set(&dd->tags);
+	if (rv) {
 		dev_err(&dd->pdev->dev,
 		dev_err(&dd->pdev->dev,
 			"Unable to allocate request queue\n");
 			"Unable to allocate request queue\n");
 		rv = -ENOMEM;
 		rv = -ENOMEM;
 		goto block_queue_alloc_init_error;
 		goto block_queue_alloc_init_error;
 	}
 	}
 
 
-	/* Attach our request function to the request queue. */
-	blk_queue_make_request(dd->queue, mtip_make_request);
+	/* Allocate the request queue. */
+	dd->queue = blk_mq_init_queue(&dd->tags);
+	if (IS_ERR(dd->queue)) {
+		dev_err(&dd->pdev->dev,
+			"Unable to allocate request queue\n");
+		rv = -ENOMEM;
+		goto block_queue_alloc_init_error;
+	}
 
 
 	dd->disk->queue		= dd->queue;
 	dd->disk->queue		= dd->queue;
 	dd->queue->queuedata	= dd;
 	dd->queue->queuedata	= dd;
 
 
+	/* Initialize the protocol layer. */
+	wait_for_rebuild = mtip_hw_get_identify(dd);
+	if (wait_for_rebuild < 0) {
+		dev_err(&dd->pdev->dev,
+			"Protocol layer initialization failed\n");
+		rv = -EINVAL;
+		goto init_hw_cmds_error;
+	}
+
+	/*
+	 * if rebuild pending, start the service thread, and delay the block
+	 * queue creation and add_disk()
+	 */
+	if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
+		goto start_service_thread;
+
 	/* Set device limits. */
 	/* Set device limits. */
 	set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags);
 	set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags);
 	blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
 	blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
@@ -4295,8 +4023,9 @@ kthread_run_error:
 	del_gendisk(dd->disk);
 	del_gendisk(dd->disk);
 
 
 read_capacity_error:
 read_capacity_error:
+init_hw_cmds_error:
 	blk_cleanup_queue(dd->queue);
 	blk_cleanup_queue(dd->queue);
-
+	blk_mq_free_tag_set(&dd->tags);
 block_queue_alloc_init_error:
 block_queue_alloc_init_error:
 	mtip_hw_debugfs_exit(dd);
 	mtip_hw_debugfs_exit(dd);
 disk_index_error:
 disk_index_error:
@@ -4345,6 +4074,9 @@ static int mtip_block_remove(struct driver_data *dd)
 				kobject_put(kobj);
 				kobject_put(kobj);
 			}
 			}
 		}
 		}
+
+		mtip_standby_drive(dd);
+
 		/*
 		/*
 		 * Delete our gendisk structure. This also removes the device
 		 * Delete our gendisk structure. This also removes the device
 		 * from /dev
 		 * from /dev
@@ -4357,6 +4089,7 @@ static int mtip_block_remove(struct driver_data *dd)
 			if (dd->disk->queue) {
 			if (dd->disk->queue) {
 				del_gendisk(dd->disk);
 				del_gendisk(dd->disk);
 				blk_cleanup_queue(dd->queue);
 				blk_cleanup_queue(dd->queue);
+				blk_mq_free_tag_set(&dd->tags);
 				dd->queue = NULL;
 				dd->queue = NULL;
 			} else
 			} else
 				put_disk(dd->disk);
 				put_disk(dd->disk);
@@ -4391,6 +4124,8 @@ static int mtip_block_remove(struct driver_data *dd)
  */
  */
 static int mtip_block_shutdown(struct driver_data *dd)
 static int mtip_block_shutdown(struct driver_data *dd)
 {
 {
+	mtip_hw_shutdown(dd);
+
 	/* Delete our gendisk structure, and cleanup the blk queue. */
 	/* Delete our gendisk structure, and cleanup the blk queue. */
 	if (dd->disk) {
 	if (dd->disk) {
 		dev_info(&dd->pdev->dev,
 		dev_info(&dd->pdev->dev,
@@ -4399,6 +4134,7 @@ static int mtip_block_shutdown(struct driver_data *dd)
 		if (dd->disk->queue) {
 		if (dd->disk->queue) {
 			del_gendisk(dd->disk);
 			del_gendisk(dd->disk);
 			blk_cleanup_queue(dd->queue);
 			blk_cleanup_queue(dd->queue);
+			blk_mq_free_tag_set(&dd->tags);
 		} else
 		} else
 			put_disk(dd->disk);
 			put_disk(dd->disk);
 		dd->disk  = NULL;
 		dd->disk  = NULL;
@@ -4408,8 +4144,6 @@ static int mtip_block_shutdown(struct driver_data *dd)
 	spin_lock(&rssd_index_lock);
 	spin_lock(&rssd_index_lock);
 	ida_remove(&rssd_index_ida, dd->index);
 	ida_remove(&rssd_index_ida, dd->index);
 	spin_unlock(&rssd_index_lock);
 	spin_unlock(&rssd_index_lock);
-
-	mtip_hw_shutdown(dd);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -4479,6 +4213,57 @@ static DEFINE_HANDLER(5);
 static DEFINE_HANDLER(6);
 static DEFINE_HANDLER(6);
 static DEFINE_HANDLER(7);
 static DEFINE_HANDLER(7);
 
 
+static void mtip_disable_link_opts(struct driver_data *dd, struct pci_dev *pdev)
+{
+	int pos;
+	unsigned short pcie_dev_ctrl;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+	if (pos) {
+		pci_read_config_word(pdev,
+			pos + PCI_EXP_DEVCTL,
+			&pcie_dev_ctrl);
+		if (pcie_dev_ctrl & (1 << 11) ||
+		    pcie_dev_ctrl & (1 << 4)) {
+			dev_info(&dd->pdev->dev,
+				"Disabling ERO/No-Snoop on bridge device %04x:%04x\n",
+					pdev->vendor, pdev->device);
+			pcie_dev_ctrl &= ~(PCI_EXP_DEVCTL_NOSNOOP_EN |
+						PCI_EXP_DEVCTL_RELAX_EN);
+			pci_write_config_word(pdev,
+				pos + PCI_EXP_DEVCTL,
+				pcie_dev_ctrl);
+		}
+	}
+}
+
+static void mtip_fix_ero_nosnoop(struct driver_data *dd, struct pci_dev *pdev)
+{
+	/*
+	 * This workaround is specific to AMD/ATI chipset with a PCI upstream
+	 * device with device id 0x5aXX
+	 */
+	if (pdev->bus && pdev->bus->self) {
+		if (pdev->bus->self->vendor == PCI_VENDOR_ID_ATI &&
+		    ((pdev->bus->self->device & 0xff00) == 0x5a00)) {
+			mtip_disable_link_opts(dd, pdev->bus->self);
+		} else {
+			/* Check further up the topology */
+			struct pci_dev *parent_dev = pdev->bus->self;
+			if (parent_dev->bus &&
+				parent_dev->bus->parent &&
+				parent_dev->bus->parent->self &&
+				parent_dev->bus->parent->self->vendor ==
+					 PCI_VENDOR_ID_ATI &&
+				(parent_dev->bus->parent->self->device &
+					0xff00) == 0x5a00) {
+				mtip_disable_link_opts(dd,
+					parent_dev->bus->parent->self);
+			}
+		}
+	}
+}
+
 /*
 /*
  * Called for each supported PCI device detected.
  * Called for each supported PCI device detected.
  *
  *
@@ -4630,6 +4415,8 @@ static int mtip_pci_probe(struct pci_dev *pdev,
 		goto msi_initialize_err;
 		goto msi_initialize_err;
 	}
 	}
 
 
+	mtip_fix_ero_nosnoop(dd, pdev);
+
 	/* Initialize the block layer. */
 	/* Initialize the block layer. */
 	rv = mtip_block_initialize(dd);
 	rv = mtip_block_initialize(dd);
 	if (rv < 0) {
 	if (rv < 0) {
@@ -4710,8 +4497,6 @@ static void mtip_pci_remove(struct pci_dev *pdev)
 		dev_warn(&dd->pdev->dev,
 		dev_warn(&dd->pdev->dev,
 			"Completion workers still active!\n");
 			"Completion workers still active!\n");
 	}
 	}
-	/* Cleanup the outstanding commands */
-	mtip_command_cleanup(dd);
 
 
 	/* Clean up the block layer. */
 	/* Clean up the block layer. */
 	mtip_block_remove(dd);
 	mtip_block_remove(dd);
@@ -4737,8 +4522,6 @@ static void mtip_pci_remove(struct pci_dev *pdev)
 
 
 	pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
 	pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
 	pci_set_drvdata(pdev, NULL);
 	pci_set_drvdata(pdev, NULL);
-	pci_dev_put(pdev);
-
 }
 }
 
 
 /*
 /*
@@ -4935,13 +4718,13 @@ static int __init mtip_init(void)
  */
  */
 static void __exit mtip_exit(void)
 static void __exit mtip_exit(void)
 {
 {
-	debugfs_remove_recursive(dfs_parent);
-
 	/* Release the allocated major block device number. */
 	/* Release the allocated major block device number. */
 	unregister_blkdev(mtip_major, MTIP_DRV_NAME);
 	unregister_blkdev(mtip_major, MTIP_DRV_NAME);
 
 
 	/* Unregister the PCI driver. */
 	/* Unregister the PCI driver. */
 	pci_unregister_driver(&mtip_pci_driver);
 	pci_unregister_driver(&mtip_pci_driver);
+
+	debugfs_remove_recursive(dfs_parent);
 }
 }
 
 
 MODULE_AUTHOR("Micron Technology, Inc");
 MODULE_AUTHOR("Micron Technology, Inc");

+ 8 - 24
drivers/block/mtip32xx/mtip32xx.h

@@ -40,9 +40,11 @@
 #define MTIP_MAX_RETRIES	2
 #define MTIP_MAX_RETRIES	2
 
 
 /* Various timeout values in ms */
 /* Various timeout values in ms */
-#define MTIP_NCQ_COMMAND_TIMEOUT_MS       5000
-#define MTIP_IOCTL_COMMAND_TIMEOUT_MS     5000
-#define MTIP_INTERNAL_COMMAND_TIMEOUT_MS  5000
+#define MTIP_NCQ_CMD_TIMEOUT_MS      15000
+#define MTIP_IOCTL_CMD_TIMEOUT_MS    5000
+#define MTIP_INT_CMD_TIMEOUT_MS      5000
+#define MTIP_QUIESCE_IO_TIMEOUT_MS   (MTIP_NCQ_CMD_TIMEOUT_MS * \
+				     (MTIP_MAX_RETRIES + 1))
 
 
 /* check for timeouts every 500ms */
 /* check for timeouts every 500ms */
 #define MTIP_TIMEOUT_CHECK_PERIOD	500
 #define MTIP_TIMEOUT_CHECK_PERIOD	500
@@ -331,12 +333,8 @@ struct mtip_cmd {
 	 */
 	 */
 	void (*comp_func)(struct mtip_port *port,
 	void (*comp_func)(struct mtip_port *port,
 				int tag,
 				int tag,
-				void *data,
+				struct mtip_cmd *cmd,
 				int status);
 				int status);
-	/* Additional callback function that may be called by comp_func() */
-	void (*async_callback)(void *data, int status);
-
-	void *async_data; /* Addl. data passed to async_callback() */
 
 
 	int scatter_ents; /* Number of scatter list entries used */
 	int scatter_ents; /* Number of scatter list entries used */
 
 
@@ -347,10 +345,6 @@ struct mtip_cmd {
 	int retries; /* The number of retries left for this command. */
 	int retries; /* The number of retries left for this command. */
 
 
 	int direction; /* Data transfer direction */
 	int direction; /* Data transfer direction */
-
-	unsigned long comp_time; /* command completion time, in jiffies */
-
-	atomic_t active; /* declares if this command sent to the drive. */
 };
 };
 
 
 /* Structure used to describe a port. */
 /* Structure used to describe a port. */
@@ -436,12 +430,6 @@ struct mtip_port {
 	 * or error handling is active
 	 * or error handling is active
 	 */
 	 */
 	unsigned long cmds_to_issue[SLOTBITS_IN_LONGS];
 	unsigned long cmds_to_issue[SLOTBITS_IN_LONGS];
-	/*
-	 * Array of command slots. Structure includes pointers to the
-	 * command header and command table, and completion function and data
-	 * pointers.
-	 */
-	struct mtip_cmd commands[MTIP_MAX_COMMAND_SLOTS];
 	/* Used by mtip_service_thread to wait for an event */
 	/* Used by mtip_service_thread to wait for an event */
 	wait_queue_head_t svc_wait;
 	wait_queue_head_t svc_wait;
 	/*
 	/*
@@ -452,13 +440,7 @@ struct mtip_port {
 	/*
 	/*
 	 * Timer used to complete commands that have been active for too long.
 	 * Timer used to complete commands that have been active for too long.
 	 */
 	 */
-	struct timer_list cmd_timer;
 	unsigned long ic_pause_timer;
 	unsigned long ic_pause_timer;
-	/*
-	 * Semaphore used to block threads if there are no
-	 * command slots available.
-	 */
-	struct semaphore cmd_slot;
 
 
 	/* Semaphore to control queue depth of unaligned IOs */
 	/* Semaphore to control queue depth of unaligned IOs */
 	struct semaphore cmd_slot_unal;
 	struct semaphore cmd_slot_unal;
@@ -485,6 +467,8 @@ struct driver_data {
 
 
 	struct request_queue *queue; /* Our request queue. */
 	struct request_queue *queue; /* Our request queue. */
 
 
+	struct blk_mq_tag_set tags; /* blk_mq tags */
+
 	struct mtip_port *port; /* Pointer to the port data structure. */
 	struct mtip_port *port; /* Pointer to the port data structure. */
 
 
 	unsigned product_type; /* magic value declaring the product type */
 	unsigned product_type; /* magic value declaring the product type */

+ 1 - 1
drivers/block/null_blk.c

@@ -203,8 +203,8 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
 		entry = llist_reverse_order(entry);
 		entry = llist_reverse_order(entry);
 		do {
 		do {
 			cmd = container_of(entry, struct nullb_cmd, ll_list);
 			cmd = container_of(entry, struct nullb_cmd, ll_list);
-			end_cmd(cmd);
 			entry = entry->next;
 			entry = entry->next;
+			end_cmd(cmd);
 		} while (entry);
 		} while (entry);
 	}
 	}
 
 

+ 3 - 4
drivers/block/skd_main.c

@@ -3944,15 +3944,14 @@ static int skd_acquire_msix(struct skd_device *skdev)
 	for (i = 0; i < SKD_MAX_MSIX_COUNT; i++)
 	for (i = 0; i < SKD_MAX_MSIX_COUNT; i++)
 		entries[i].entry = i;
 		entries[i].entry = i;
 
 
-	rc = pci_enable_msix_range(pdev, entries,
-				   SKD_MIN_MSIX_COUNT, SKD_MAX_MSIX_COUNT);
-	if (rc < 0) {
+	rc = pci_enable_msix_exact(pdev, entries, SKD_MAX_MSIX_COUNT);
+	if (rc) {
 		pr_err("(%s): failed to enable MSI-X %d\n",
 		pr_err("(%s): failed to enable MSI-X %d\n",
 		       skd_name(skdev), rc);
 		       skd_name(skdev), rc);
 		goto msix_out;
 		goto msix_out;
 	}
 	}
 
 
-	skdev->msix_count = rc;
+	skdev->msix_count = SKD_MAX_MSIX_COUNT;
 	skdev->msix_entries = kzalloc(sizeof(struct skd_msix_entry) *
 	skdev->msix_entries = kzalloc(sizeof(struct skd_msix_entry) *
 				      skdev->msix_count, GFP_KERNEL);
 				      skdev->msix_count, GFP_KERNEL);
 	if (!skdev->msix_entries) {
 	if (!skdev->msix_entries) {

+ 6 - 3
drivers/block/virtio_blk.c

@@ -162,6 +162,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
 	unsigned int num;
 	unsigned int num;
 	const bool last = (req->cmd_flags & REQ_END) != 0;
 	const bool last = (req->cmd_flags & REQ_END) != 0;
 	int err;
 	int err;
+	bool notify = false;
 
 
 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 
 
@@ -214,10 +215,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
 		return BLK_MQ_RQ_QUEUE_ERROR;
 		return BLK_MQ_RQ_QUEUE_ERROR;
 	}
 	}
 
 
-	if (last)
-		virtqueue_kick(vblk->vq);
-
+	if (last && virtqueue_kick_prepare(vblk->vq))
+		notify = true;
 	spin_unlock_irqrestore(&vblk->vq_lock, flags);
 	spin_unlock_irqrestore(&vblk->vq_lock, flags);
+
+	if (notify)
+		virtqueue_notify(vblk->vq);
 	return BLK_MQ_RQ_QUEUE_OK;
 	return BLK_MQ_RQ_QUEUE_OK;
 }
 }
 
 

+ 2 - 2
drivers/block/xen-blkback/common.h

@@ -314,7 +314,7 @@ struct xen_blkif {
 	unsigned long long			st_rd_sect;
 	unsigned long long			st_rd_sect;
 	unsigned long long			st_wr_sect;
 	unsigned long long			st_wr_sect;
 
 
-	wait_queue_head_t	waiting_to_free;
+	struct work_struct	free_work;
 	/* Thread shutdown wait queue. */
 	/* Thread shutdown wait queue. */
 	wait_queue_head_t	shutdown_wq;
 	wait_queue_head_t	shutdown_wq;
 };
 };
@@ -361,7 +361,7 @@ struct pending_req {
 #define xen_blkif_put(_b)				\
 #define xen_blkif_put(_b)				\
 	do {						\
 	do {						\
 		if (atomic_dec_and_test(&(_b)->refcnt))	\
 		if (atomic_dec_and_test(&(_b)->refcnt))	\
-			wake_up(&(_b)->waiting_to_free);\
+			schedule_work(&(_b)->free_work);\
 	} while (0)
 	} while (0)
 
 
 struct phys_req {
 struct phys_req {

+ 40 - 13
drivers/block/xen-blkback/xenbus.c

@@ -35,12 +35,26 @@ static void connect(struct backend_info *);
 static int connect_ring(struct backend_info *);
 static int connect_ring(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
 static void backend_changed(struct xenbus_watch *, const char **,
 			    unsigned int);
 			    unsigned int);
+static void xen_blkif_free(struct xen_blkif *blkif);
+static void xen_vbd_free(struct xen_vbd *vbd);
 
 
 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
 {
 {
 	return be->dev;
 	return be->dev;
 }
 }
 
 
+/*
+ * The last request could free the device from softirq context and
+ * xen_blkif_free() can sleep.
+ */
+static void xen_blkif_deferred_free(struct work_struct *work)
+{
+	struct xen_blkif *blkif;
+
+	blkif = container_of(work, struct xen_blkif, free_work);
+	xen_blkif_free(blkif);
+}
+
 static int blkback_name(struct xen_blkif *blkif, char *buf)
 static int blkback_name(struct xen_blkif *blkif, char *buf)
 {
 {
 	char *devpath, *devname;
 	char *devpath, *devname;
@@ -121,7 +135,6 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	init_completion(&blkif->drain_complete);
 	init_completion(&blkif->drain_complete);
 	atomic_set(&blkif->drain, 0);
 	atomic_set(&blkif->drain, 0);
 	blkif->st_print = jiffies;
 	blkif->st_print = jiffies;
-	init_waitqueue_head(&blkif->waiting_to_free);
 	blkif->persistent_gnts.rb_node = NULL;
 	blkif->persistent_gnts.rb_node = NULL;
 	spin_lock_init(&blkif->free_pages_lock);
 	spin_lock_init(&blkif->free_pages_lock);
 	INIT_LIST_HEAD(&blkif->free_pages);
 	INIT_LIST_HEAD(&blkif->free_pages);
@@ -132,6 +145,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
 	INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
 
 
 	INIT_LIST_HEAD(&blkif->pending_free);
 	INIT_LIST_HEAD(&blkif->pending_free);
+	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
 
 
 	for (i = 0; i < XEN_BLKIF_REQS; i++) {
 	for (i = 0; i < XEN_BLKIF_REQS; i++) {
 		req = kzalloc(sizeof(*req), GFP_KERNEL);
 		req = kzalloc(sizeof(*req), GFP_KERNEL);
@@ -231,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
 	return 0;
 	return 0;
 }
 }
 
 
-static void xen_blkif_disconnect(struct xen_blkif *blkif)
+static int xen_blkif_disconnect(struct xen_blkif *blkif)
 {
 {
 	if (blkif->xenblkd) {
 	if (blkif->xenblkd) {
 		kthread_stop(blkif->xenblkd);
 		kthread_stop(blkif->xenblkd);
@@ -239,9 +253,12 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
 		blkif->xenblkd = NULL;
 		blkif->xenblkd = NULL;
 	}
 	}
 
 
-	atomic_dec(&blkif->refcnt);
-	wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
-	atomic_inc(&blkif->refcnt);
+	/* The above kthread_stop() guarantees that at this point we
+	 * don't have any discard_io or other_io requests. So, checking
+	 * for inflight IO is enough.
+	 */
+	if (atomic_read(&blkif->inflight) > 0)
+		return -EBUSY;
 
 
 	if (blkif->irq) {
 	if (blkif->irq) {
 		unbind_from_irqhandler(blkif->irq, blkif);
 		unbind_from_irqhandler(blkif->irq, blkif);
@@ -252,6 +269,8 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
 		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
 		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
 		blkif->blk_rings.common.sring = NULL;
 		blkif->blk_rings.common.sring = NULL;
 	}
 	}
+
+	return 0;
 }
 }
 
 
 static void xen_blkif_free(struct xen_blkif *blkif)
 static void xen_blkif_free(struct xen_blkif *blkif)
@@ -259,8 +278,8 @@ static void xen_blkif_free(struct xen_blkif *blkif)
 	struct pending_req *req, *n;
 	struct pending_req *req, *n;
 	int i = 0, j;
 	int i = 0, j;
 
 
-	if (!atomic_dec_and_test(&blkif->refcnt))
-		BUG();
+	xen_blkif_disconnect(blkif);
+	xen_vbd_free(&blkif->vbd);
 
 
 	/* Remove all persistent grants and the cache of ballooned pages. */
 	/* Remove all persistent grants and the cache of ballooned pages. */
 	xen_blkbk_free_caches(blkif);
 	xen_blkbk_free_caches(blkif);
@@ -449,16 +468,15 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
 		be->backend_watch.node = NULL;
 		be->backend_watch.node = NULL;
 	}
 	}
 
 
+	dev_set_drvdata(&dev->dev, NULL);
+
 	if (be->blkif) {
 	if (be->blkif) {
 		xen_blkif_disconnect(be->blkif);
 		xen_blkif_disconnect(be->blkif);
-		xen_vbd_free(&be->blkif->vbd);
-		xen_blkif_free(be->blkif);
-		be->blkif = NULL;
+		xen_blkif_put(be->blkif);
 	}
 	}
 
 
 	kfree(be->mode);
 	kfree(be->mode);
 	kfree(be);
 	kfree(be);
-	dev_set_drvdata(&dev->dev, NULL);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -481,10 +499,15 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
 	struct xenbus_device *dev = be->dev;
 	struct xenbus_device *dev = be->dev;
 	struct xen_blkif *blkif = be->blkif;
 	struct xen_blkif *blkif = be->blkif;
 	int err;
 	int err;
-	int state = 0;
+	int state = 0, discard_enable;
 	struct block_device *bdev = be->blkif->vbd.bdev;
 	struct block_device *bdev = be->blkif->vbd.bdev;
 	struct request_queue *q = bdev_get_queue(bdev);
 	struct request_queue *q = bdev_get_queue(bdev);
 
 
+	err = xenbus_scanf(XBT_NIL, dev->nodename, "discard-enable", "%d",
+			   &discard_enable);
+	if (err == 1 && !discard_enable)
+		return;
+
 	if (blk_queue_discard(q)) {
 	if (blk_queue_discard(q)) {
 		err = xenbus_printf(xbt, dev->nodename,
 		err = xenbus_printf(xbt, dev->nodename,
 			"discard-granularity", "%u",
 			"discard-granularity", "%u",
@@ -700,7 +723,11 @@ static void frontend_changed(struct xenbus_device *dev,
 		 * Enforce precondition before potential leak point.
 		 * Enforce precondition before potential leak point.
 		 * xen_blkif_disconnect() is idempotent.
 		 * xen_blkif_disconnect() is idempotent.
 		 */
 		 */
-		xen_blkif_disconnect(be->blkif);
+		err = xen_blkif_disconnect(be->blkif);
+		if (err) {
+			xenbus_dev_fatal(dev, err, "pending I/O");
+			break;
+		}
 
 
 		err = connect_ring(be);
 		err = connect_ring(be);
 		if (err)
 		if (err)

+ 14 - 26
drivers/block/xen-blkfront.c

@@ -1635,36 +1635,24 @@ blkfront_closing(struct blkfront_info *info)
 static void blkfront_setup_discard(struct blkfront_info *info)
 static void blkfront_setup_discard(struct blkfront_info *info)
 {
 {
 	int err;
 	int err;
-	char *type;
 	unsigned int discard_granularity;
 	unsigned int discard_granularity;
 	unsigned int discard_alignment;
 	unsigned int discard_alignment;
 	unsigned int discard_secure;
 	unsigned int discard_secure;
 
 
-	type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
-	if (IS_ERR(type))
-		return;
-
-	info->feature_secdiscard = 0;
-	if (strncmp(type, "phy", 3) == 0) {
-		err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			"discard-granularity", "%u", &discard_granularity,
-			"discard-alignment", "%u", &discard_alignment,
-			NULL);
-		if (!err) {
-			info->feature_discard = 1;
-			info->discard_granularity = discard_granularity;
-			info->discard_alignment = discard_alignment;
-		}
-		err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			    "discard-secure", "%d", &discard_secure,
-			    NULL);
-		if (!err)
-			info->feature_secdiscard = discard_secure;
-
-	} else if (strncmp(type, "file", 4) == 0)
-		info->feature_discard = 1;
-
-	kfree(type);
+	info->feature_discard = 1;
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+		"discard-granularity", "%u", &discard_granularity,
+		"discard-alignment", "%u", &discard_alignment,
+		NULL);
+	if (!err) {
+		info->discard_granularity = discard_granularity;
+		info->discard_alignment = discard_alignment;
+	}
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+		    "discard-secure", "%d", &discard_secure,
+		    NULL);
+	if (!err)
+		info->feature_secdiscard = !!discard_secure;
 }
 }
 
 
 static int blkfront_setup_indirect(struct blkfront_info *info)
 static int blkfront_setup_indirect(struct blkfront_info *info)

+ 681 - 676
drivers/cdrom/cdrom.c

@@ -312,36 +312,24 @@ static const char *mrw_format_status[] = {
 
 
 static const char *mrw_address_space[] = { "DMA", "GAA" };
 static const char *mrw_address_space[] = { "DMA", "GAA" };
 
 
-#if (ERRLOGMASK!=CD_NOTHING)
-#define cdinfo(type, fmt, args...)			\
+#if (ERRLOGMASK != CD_NOTHING)
+#define cd_dbg(type, fmt, ...)				\
 do {							\
 do {							\
 	if ((ERRLOGMASK & type) || debug == 1)		\
 	if ((ERRLOGMASK & type) || debug == 1)		\
-		pr_info(fmt, ##args);			\
+		pr_debug(fmt, ##__VA_ARGS__);		\
 } while (0)
 } while (0)
 #else
 #else
-#define cdinfo(type, fmt, args...)			\
+#define cd_dbg(type, fmt, ...)				\
 do {							\
 do {							\
 	if (0 && (ERRLOGMASK & type) || debug == 1)	\
 	if (0 && (ERRLOGMASK & type) || debug == 1)	\
-		pr_info(fmt, ##args);			\
+		pr_debug(fmt, ##__VA_ARGS__);		\
 } while (0)
 } while (0)
 #endif
 #endif
 
 
-/* These are used to simplify getting data in from and back to user land */
-#define IOCTL_IN(arg, type, in)					\
-	if (copy_from_user(&(in), (type __user *) (arg), sizeof (in)))	\
-		return -EFAULT;
-
-#define IOCTL_OUT(arg, type, out) \
-	if (copy_to_user((type __user *) (arg), &(out), sizeof (out)))	\
-		return -EFAULT;
-
 /* The (cdo->capability & ~cdi->mask & CDC_XXX) construct was used in
 /* The (cdo->capability & ~cdi->mask & CDC_XXX) construct was used in
    a lot of places. This macro makes the code more clear. */
    a lot of places. This macro makes the code more clear. */
 #define CDROM_CAN(type) (cdi->ops->capability & ~cdi->mask & (type))
 #define CDROM_CAN(type) (cdi->ops->capability & ~cdi->mask & (type))
 
 
-/* used in the audio ioctls */
-#define CHECKAUDIO if ((ret=check_for_audio_disc(cdi, cdo))) return ret
-
 /*
 /*
  * Another popular OS uses 7 seconds as the hard timeout for default
  * Another popular OS uses 7 seconds as the hard timeout for default
  * commands, so it is a good choice for us as well.
  * commands, so it is a good choice for us as well.
@@ -349,21 +337,6 @@ do {							\
 #define CDROM_DEF_TIMEOUT	(7 * HZ)
 #define CDROM_DEF_TIMEOUT	(7 * HZ)
 
 
 /* Not-exported routines. */
 /* Not-exported routines. */
-static int open_for_data(struct cdrom_device_info * cdi);
-static int check_for_audio_disc(struct cdrom_device_info * cdi,
-			 struct cdrom_device_ops * cdo);
-static void sanitize_format(union cdrom_addr *addr, 
-		u_char * curr, u_char requested);
-static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
-		     unsigned long arg);
-
-int cdrom_get_last_written(struct cdrom_device_info *, long *);
-static int cdrom_get_next_writable(struct cdrom_device_info *, long *);
-static void cdrom_count_tracks(struct cdrom_device_info *, tracktype*);
-
-static int cdrom_mrw_exit(struct cdrom_device_info *cdi);
-
-static int cdrom_get_disc_info(struct cdrom_device_info *cdi, disc_information *di);
 
 
 static void cdrom_sysctl_register(void);
 static void cdrom_sysctl_register(void);
 
 
@@ -382,113 +355,65 @@ static int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi,
 	return -EIO;
 	return -EIO;
 }
 }
 
 
-/* This macro makes sure we don't have to check on cdrom_device_ops
- * existence in the run-time routines below. Change_capability is a
- * hack to have the capability flags defined const, while we can still
- * change it here without gcc complaining at every line.
- */
-#define ENSURE(call, bits) if (cdo->call == NULL) *change_capability &= ~(bits)
-
-int register_cdrom(struct cdrom_device_info *cdi)
-{
-	static char banner_printed;
-        struct cdrom_device_ops *cdo = cdi->ops;
-        int *change_capability = (int *)&cdo->capability; /* hack */
-
-	cdinfo(CD_OPEN, "entering register_cdrom\n"); 
-
-	if (cdo->open == NULL || cdo->release == NULL)
-		return -EINVAL;
-	if (!banner_printed) {
-		pr_info("Uniform CD-ROM driver " REVISION "\n");
-		banner_printed = 1;
-		cdrom_sysctl_register();
-	}
-
-	ENSURE(drive_status, CDC_DRIVE_STATUS );
-	if (cdo->check_events == NULL && cdo->media_changed == NULL)
-		*change_capability = ~(CDC_MEDIA_CHANGED | CDC_SELECT_DISC);
-	ENSURE(tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY);
-	ENSURE(lock_door, CDC_LOCK);
-	ENSURE(select_speed, CDC_SELECT_SPEED);
-	ENSURE(get_last_session, CDC_MULTI_SESSION);
-	ENSURE(get_mcn, CDC_MCN);
-	ENSURE(reset, CDC_RESET);
-	ENSURE(generic_packet, CDC_GENERIC_PACKET);
-	cdi->mc_flags = 0;
-	cdo->n_minors = 0;
-        cdi->options = CDO_USE_FFLAGS;
-	
-	if (autoclose==1 && CDROM_CAN(CDC_CLOSE_TRAY))
-		cdi->options |= (int) CDO_AUTO_CLOSE;
-	if (autoeject==1 && CDROM_CAN(CDC_OPEN_TRAY))
-		cdi->options |= (int) CDO_AUTO_EJECT;
-	if (lockdoor==1)
-		cdi->options |= (int) CDO_LOCK;
-	if (check_media_type==1)
-		cdi->options |= (int) CDO_CHECK_TYPE;
-
-	if (CDROM_CAN(CDC_MRW_W))
-		cdi->exit = cdrom_mrw_exit;
-
-	if (cdi->disk)
-		cdi->cdda_method = CDDA_BPC_FULL;
-	else
-		cdi->cdda_method = CDDA_OLD;
-
-	if (!cdo->generic_packet)
-		cdo->generic_packet = cdrom_dummy_generic_packet;
-
-	cdinfo(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name);
-	mutex_lock(&cdrom_mutex);
-	list_add(&cdi->list, &cdrom_list);
-	mutex_unlock(&cdrom_mutex);
-	return 0;
-}
-#undef ENSURE
-
-void unregister_cdrom(struct cdrom_device_info *cdi)
+static int cdrom_flush_cache(struct cdrom_device_info *cdi)
 {
 {
-	cdinfo(CD_OPEN, "entering unregister_cdrom\n"); 
+	struct packet_command cgc;
 
 
-	mutex_lock(&cdrom_mutex);
-	list_del(&cdi->list);
-	mutex_unlock(&cdrom_mutex);
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.cmd[0] = GPCMD_FLUSH_CACHE;
 
 
-	if (cdi->exit)
-		cdi->exit(cdi);
+	cgc.timeout = 5 * 60 * HZ;
 
 
-	cdi->ops->n_minors--;
-	cdinfo(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name);
+	return cdi->ops->generic_packet(cdi, &cgc);
 }
 }
 
 
-int cdrom_get_media_event(struct cdrom_device_info *cdi,
-			  struct media_event_desc *med)
+/* requires CD R/RW */
+static int cdrom_get_disc_info(struct cdrom_device_info *cdi,
+			       disc_information *di)
 {
 {
+	struct cdrom_device_ops *cdo = cdi->ops;
 	struct packet_command cgc;
 	struct packet_command cgc;
-	unsigned char buffer[8];
-	struct event_header *eh = (struct event_header *) buffer;
+	int ret, buflen;
 
 
-	init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
-	cgc.cmd[0] = GPCMD_GET_EVENT_STATUS_NOTIFICATION;
-	cgc.cmd[1] = 1;		/* IMMED */
-	cgc.cmd[4] = 1 << 4;	/* media event */
-	cgc.cmd[8] = sizeof(buffer);
+	/* set up command and get the disc info */
+	init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ);
+	cgc.cmd[0] = GPCMD_READ_DISC_INFO;
+	cgc.cmd[8] = cgc.buflen = 2;
 	cgc.quiet = 1;
 	cgc.quiet = 1;
 
 
-	if (cdi->ops->generic_packet(cdi, &cgc))
-		return 1;
+	ret = cdo->generic_packet(cdi, &cgc);
+	if (ret)
+		return ret;
 
 
-	if (be16_to_cpu(eh->data_len) < sizeof(*med))
-		return 1;
+	/* not all drives have the same disc_info length, so requeue
+	 * packet with the length the drive tells us it can supply
+	 */
+	buflen = be16_to_cpu(di->disc_information_length) +
+		sizeof(di->disc_information_length);
 
 
-	if (eh->nea || eh->notification_class != 0x4)
-		return 1;
+	if (buflen > sizeof(disc_information))
+		buflen = sizeof(disc_information);
 
 
-	memcpy(med, &buffer[sizeof(*eh)], sizeof(*med));
-	return 0;
+	cgc.cmd[8] = cgc.buflen = buflen;
+	ret = cdo->generic_packet(cdi, &cgc);
+	if (ret)
+		return ret;
+
+	/* return actual fill size */
+	return buflen;
 }
 }
 
 
+/* This macro makes sure we don't have to check on cdrom_device_ops
+ * existence in the run-time routines below. Change_capability is a
+ * hack to have the capability flags defined const, while we can still
+ * change it here without gcc complaining at every line.
+ */
+#define ENSURE(call, bits)			\
+do {						\
+	if (cdo->call == NULL)			\
+		*change_capability &= ~(bits);	\
+} while (0)
+
 /*
 /*
  * the first prototypes used 0x2c as the page code for the mrw mode page,
  * the first prototypes used 0x2c as the page code for the mrw mode page,
  * subsequently this was changed to 0x03. probe the one used by this drive
  * subsequently this was changed to 0x03. probe the one used by this drive
@@ -605,18 +530,6 @@ static int cdrom_mrw_bgformat_susp(struct cdrom_device_info *cdi, int immed)
 	return cdi->ops->generic_packet(cdi, &cgc);
 	return cdi->ops->generic_packet(cdi, &cgc);
 }
 }
 
 
-static int cdrom_flush_cache(struct cdrom_device_info *cdi)
-{
-	struct packet_command cgc;
-
-	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
-	cgc.cmd[0] = GPCMD_FLUSH_CACHE;
-
-	cgc.timeout = 5 * 60 * HZ;
-
-	return cdi->ops->generic_packet(cdi, &cgc);
-}
-
 static int cdrom_mrw_exit(struct cdrom_device_info *cdi)
 static int cdrom_mrw_exit(struct cdrom_device_info *cdi)
 {
 {
 	disc_information di;
 	disc_information di;
@@ -650,17 +563,19 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space)
 	cgc.buffer = buffer;
 	cgc.buffer = buffer;
 	cgc.buflen = sizeof(buffer);
 	cgc.buflen = sizeof(buffer);
 
 
-	if ((ret = cdrom_mode_sense(cdi, &cgc, cdi->mrw_mode_page, 0)))
+	ret = cdrom_mode_sense(cdi, &cgc, cdi->mrw_mode_page, 0);
+	if (ret)
 		return ret;
 		return ret;
 
 
-	mph = (struct mode_page_header *) buffer;
+	mph = (struct mode_page_header *)buffer;
 	offset = be16_to_cpu(mph->desc_length);
 	offset = be16_to_cpu(mph->desc_length);
 	size = be16_to_cpu(mph->mode_data_length) + 2;
 	size = be16_to_cpu(mph->mode_data_length) + 2;
 
 
 	buffer[offset + 3] = space;
 	buffer[offset + 3] = space;
 	cgc.buflen = size;
 	cgc.buflen = size;
 
 
-	if ((ret = cdrom_mode_select(cdi, &cgc)))
+	ret = cdrom_mode_select(cdi, &cgc);
+	if (ret)
 		return ret;
 		return ret;
 
 
 	pr_info("%s: mrw address space %s selected\n",
 	pr_info("%s: mrw address space %s selected\n",
@@ -668,6 +583,106 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space)
 	return 0;
 	return 0;
 }
 }
 
 
+int register_cdrom(struct cdrom_device_info *cdi)
+{
+	static char banner_printed;
+	struct cdrom_device_ops *cdo = cdi->ops;
+	int *change_capability = (int *)&cdo->capability; /* hack */
+
+	cd_dbg(CD_OPEN, "entering register_cdrom\n");
+
+	if (cdo->open == NULL || cdo->release == NULL)
+		return -EINVAL;
+	if (!banner_printed) {
+		pr_info("Uniform CD-ROM driver " REVISION "\n");
+		banner_printed = 1;
+		cdrom_sysctl_register();
+	}
+
+	ENSURE(drive_status, CDC_DRIVE_STATUS);
+	if (cdo->check_events == NULL && cdo->media_changed == NULL)
+		*change_capability = ~(CDC_MEDIA_CHANGED | CDC_SELECT_DISC);
+	ENSURE(tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY);
+	ENSURE(lock_door, CDC_LOCK);
+	ENSURE(select_speed, CDC_SELECT_SPEED);
+	ENSURE(get_last_session, CDC_MULTI_SESSION);
+	ENSURE(get_mcn, CDC_MCN);
+	ENSURE(reset, CDC_RESET);
+	ENSURE(generic_packet, CDC_GENERIC_PACKET);
+	cdi->mc_flags = 0;
+	cdo->n_minors = 0;
+	cdi->options = CDO_USE_FFLAGS;
+
+	if (autoclose == 1 && CDROM_CAN(CDC_CLOSE_TRAY))
+		cdi->options |= (int) CDO_AUTO_CLOSE;
+	if (autoeject == 1 && CDROM_CAN(CDC_OPEN_TRAY))
+		cdi->options |= (int) CDO_AUTO_EJECT;
+	if (lockdoor == 1)
+		cdi->options |= (int) CDO_LOCK;
+	if (check_media_type == 1)
+		cdi->options |= (int) CDO_CHECK_TYPE;
+
+	if (CDROM_CAN(CDC_MRW_W))
+		cdi->exit = cdrom_mrw_exit;
+
+	if (cdi->disk)
+		cdi->cdda_method = CDDA_BPC_FULL;
+	else
+		cdi->cdda_method = CDDA_OLD;
+
+	if (!cdo->generic_packet)
+		cdo->generic_packet = cdrom_dummy_generic_packet;
+
+	cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name);
+	mutex_lock(&cdrom_mutex);
+	list_add(&cdi->list, &cdrom_list);
+	mutex_unlock(&cdrom_mutex);
+	return 0;
+}
+#undef ENSURE
+
+void unregister_cdrom(struct cdrom_device_info *cdi)
+{
+	cd_dbg(CD_OPEN, "entering unregister_cdrom\n");
+
+	mutex_lock(&cdrom_mutex);
+	list_del(&cdi->list);
+	mutex_unlock(&cdrom_mutex);
+
+	if (cdi->exit)
+		cdi->exit(cdi);
+
+	cdi->ops->n_minors--;
+	cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name);
+}
+
+int cdrom_get_media_event(struct cdrom_device_info *cdi,
+			  struct media_event_desc *med)
+{
+	struct packet_command cgc;
+	unsigned char buffer[8];
+	struct event_header *eh = (struct event_header *)buffer;
+
+	init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
+	cgc.cmd[0] = GPCMD_GET_EVENT_STATUS_NOTIFICATION;
+	cgc.cmd[1] = 1;		/* IMMED */
+	cgc.cmd[4] = 1 << 4;	/* media event */
+	cgc.cmd[8] = sizeof(buffer);
+	cgc.quiet = 1;
+
+	if (cdi->ops->generic_packet(cdi, &cgc))
+		return 1;
+
+	if (be16_to_cpu(eh->data_len) < sizeof(*med))
+		return 1;
+
+	if (eh->nea || eh->notification_class != 0x4)
+		return 1;
+
+	memcpy(med, &buffer[sizeof(*eh)], sizeof(*med));
+	return 0;
+}
+
 static int cdrom_get_random_writable(struct cdrom_device_info *cdi,
 static int cdrom_get_random_writable(struct cdrom_device_info *cdi,
 			      struct rwrt_feature_desc *rfd)
 			      struct rwrt_feature_desc *rfd)
 {
 {
@@ -839,7 +854,7 @@ static int cdrom_ram_open_write(struct cdrom_device_info *cdi)
 	else if (CDF_RWRT == be16_to_cpu(rfd.feature_code))
 	else if (CDF_RWRT == be16_to_cpu(rfd.feature_code))
 		ret = !rfd.curr;
 		ret = !rfd.curr;
 
 
-	cdinfo(CD_OPEN, "can open for random write\n");
+	cd_dbg(CD_OPEN, "can open for random write\n");
 	return ret;
 	return ret;
 }
 }
 
 
@@ -928,12 +943,12 @@ static void cdrom_dvd_rw_close_write(struct cdrom_device_info *cdi)
 	struct packet_command cgc;
 	struct packet_command cgc;
 
 
 	if (cdi->mmc3_profile != 0x1a) {
 	if (cdi->mmc3_profile != 0x1a) {
-		cdinfo(CD_CLOSE, "%s: No DVD+RW\n", cdi->name);
+		cd_dbg(CD_CLOSE, "%s: No DVD+RW\n", cdi->name);
 		return;
 		return;
 	}
 	}
 
 
 	if (!cdi->media_written) {
 	if (!cdi->media_written) {
-		cdinfo(CD_CLOSE, "%s: DVD+RW media clean\n", cdi->name);
+		cd_dbg(CD_CLOSE, "%s: DVD+RW media clean\n", cdi->name);
 		return;
 		return;
 	}
 	}
 
 
@@ -969,82 +984,74 @@ static int cdrom_close_write(struct cdrom_device_info *cdi)
 #endif
 #endif
 }
 }
 
 
-/* We use the open-option O_NONBLOCK to indicate that the
- * purpose of opening is only for subsequent ioctl() calls; no device
- * integrity checks are performed.
- *
- * We hope that all cd-player programs will adopt this convention. It
- * is in their own interest: device control becomes a lot easier
- * this way.
- */
-int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, fmode_t mode)
+/* badly broken, I know. Is due for a fixup anytime. */
+static void cdrom_count_tracks(struct cdrom_device_info *cdi, tracktype *tracks)
 {
 {
-	int ret;
-
-	cdinfo(CD_OPEN, "entering cdrom_open\n"); 
-
-	/* open is event synchronization point, check events first */
-	check_disk_change(bdev);
-
-	/* if this was a O_NONBLOCK open and we should honor the flags,
-	 * do a quick open without drive/disc integrity checks. */
-	cdi->use_count++;
-	if ((mode & FMODE_NDELAY) && (cdi->options & CDO_USE_FFLAGS)) {
-		ret = cdi->ops->open(cdi, 1);
-	} else {
-		ret = open_for_data(cdi);
-		if (ret)
-			goto err;
-		cdrom_mmc3_profile(cdi);
-		if (mode & FMODE_WRITE) {
-			ret = -EROFS;
-			if (cdrom_open_write(cdi))
-				goto err_release;
-			if (!CDROM_CAN(CDC_RAM))
-				goto err_release;
-			ret = 0;
-			cdi->media_written = 0;
-		}
+	struct cdrom_tochdr header;
+	struct cdrom_tocentry entry;
+	int ret, i;
+	tracks->data = 0;
+	tracks->audio = 0;
+	tracks->cdi = 0;
+	tracks->xa = 0;
+	tracks->error = 0;
+	cd_dbg(CD_COUNT_TRACKS, "entering cdrom_count_tracks\n");
+	/* Grab the TOC header so we can see how many tracks there are */
+	ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCHDR, &header);
+	if (ret) {
+		if (ret == -ENOMEDIUM)
+			tracks->error = CDS_NO_DISC;
+		else
+			tracks->error = CDS_NO_INFO;
+		return;
 	}
 	}
-
-	if (ret)
-		goto err;
-
-	cdinfo(CD_OPEN, "Use count for \"/dev/%s\" now %d\n",
-			cdi->name, cdi->use_count);
-	return 0;
-err_release:
-	if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) {
-		cdi->ops->lock_door(cdi, 0);
-		cdinfo(CD_OPEN, "door unlocked.\n");
+	/* check what type of tracks are on this disc */
+	entry.cdte_format = CDROM_MSF;
+	for (i = header.cdth_trk0; i <= header.cdth_trk1; i++) {
+		entry.cdte_track = i;
+		if (cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &entry)) {
+			tracks->error = CDS_NO_INFO;
+			return;
+		}
+		if (entry.cdte_ctrl & CDROM_DATA_TRACK) {
+			if (entry.cdte_format == 0x10)
+				tracks->cdi++;
+			else if (entry.cdte_format == 0x20)
+				tracks->xa++;
+			else
+				tracks->data++;
+		} else {
+			tracks->audio++;
+		}
+		cd_dbg(CD_COUNT_TRACKS, "track %d: format=%d, ctrl=%d\n",
+		       i, entry.cdte_format, entry.cdte_ctrl);
 	}
 	}
-	cdi->ops->release(cdi);
-err:
-	cdi->use_count--;
-	return ret;
+	cd_dbg(CD_COUNT_TRACKS, "disc has %d tracks: %d=audio %d=data %d=Cd-I %d=XA\n",
+	       header.cdth_trk1, tracks->audio, tracks->data,
+	       tracks->cdi, tracks->xa);
 }
 }
 
 
 static
 static
-int open_for_data(struct cdrom_device_info * cdi)
+int open_for_data(struct cdrom_device_info *cdi)
 {
 {
 	int ret;
 	int ret;
 	struct cdrom_device_ops *cdo = cdi->ops;
 	struct cdrom_device_ops *cdo = cdi->ops;
 	tracktype tracks;
 	tracktype tracks;
-	cdinfo(CD_OPEN, "entering open_for_data\n");
+	cd_dbg(CD_OPEN, "entering open_for_data\n");
 	/* Check if the driver can report drive status.  If it can, we
 	/* Check if the driver can report drive status.  If it can, we
 	   can do clever things.  If it can't, well, we at least tried! */
 	   can do clever things.  If it can't, well, we at least tried! */
 	if (cdo->drive_status != NULL) {
 	if (cdo->drive_status != NULL) {
 		ret = cdo->drive_status(cdi, CDSL_CURRENT);
 		ret = cdo->drive_status(cdi, CDSL_CURRENT);
-		cdinfo(CD_OPEN, "drive_status=%d\n", ret); 
+		cd_dbg(CD_OPEN, "drive_status=%d\n", ret);
 		if (ret == CDS_TRAY_OPEN) {
 		if (ret == CDS_TRAY_OPEN) {
-			cdinfo(CD_OPEN, "the tray is open...\n"); 
+			cd_dbg(CD_OPEN, "the tray is open...\n");
 			/* can/may i close it? */
 			/* can/may i close it? */
 			if (CDROM_CAN(CDC_CLOSE_TRAY) &&
 			if (CDROM_CAN(CDC_CLOSE_TRAY) &&
 			    cdi->options & CDO_AUTO_CLOSE) {
 			    cdi->options & CDO_AUTO_CLOSE) {
-				cdinfo(CD_OPEN, "trying to close the tray.\n"); 
+				cd_dbg(CD_OPEN, "trying to close the tray\n");
 				ret=cdo->tray_move(cdi,0);
 				ret=cdo->tray_move(cdi,0);
 				if (ret) {
 				if (ret) {
-					cdinfo(CD_OPEN, "bummer. tried to close the tray but failed.\n"); 
+					cd_dbg(CD_OPEN, "bummer. tried to close the tray but failed.\n");
 					/* Ignore the error from the low
 					/* Ignore the error from the low
 					level driver.  We don't care why it
 					level driver.  We don't care why it
 					couldn't close the tray.  We only care 
 					couldn't close the tray.  We only care 
@@ -1054,19 +1061,19 @@ int open_for_data(struct cdrom_device_info * cdi)
 					goto clean_up_and_return;
 					goto clean_up_and_return;
 				}
 				}
 			} else {
 			} else {
-				cdinfo(CD_OPEN, "bummer. this drive can't close the tray.\n"); 
+				cd_dbg(CD_OPEN, "bummer. this drive can't close the tray.\n");
 				ret=-ENOMEDIUM;
 				ret=-ENOMEDIUM;
 				goto clean_up_and_return;
 				goto clean_up_and_return;
 			}
 			}
 			/* Ok, the door should be closed now.. Check again */
 			/* Ok, the door should be closed now.. Check again */
 			ret = cdo->drive_status(cdi, CDSL_CURRENT);
 			ret = cdo->drive_status(cdi, CDSL_CURRENT);
 			if ((ret == CDS_NO_DISC) || (ret==CDS_TRAY_OPEN)) {
 			if ((ret == CDS_NO_DISC) || (ret==CDS_TRAY_OPEN)) {
-				cdinfo(CD_OPEN, "bummer. the tray is still not closed.\n"); 
-				cdinfo(CD_OPEN, "tray might not contain a medium.\n");
+				cd_dbg(CD_OPEN, "bummer. the tray is still not closed.\n");
+				cd_dbg(CD_OPEN, "tray might not contain a medium\n");
 				ret=-ENOMEDIUM;
 				ret=-ENOMEDIUM;
 				goto clean_up_and_return;
 				goto clean_up_and_return;
 			}
 			}
-			cdinfo(CD_OPEN, "the tray is now closed.\n"); 
+			cd_dbg(CD_OPEN, "the tray is now closed\n");
 		}
 		}
 		/* the door should be closed now, check for the disc */
 		/* the door should be closed now, check for the disc */
 		ret = cdo->drive_status(cdi, CDSL_CURRENT);
 		ret = cdo->drive_status(cdi, CDSL_CURRENT);
@@ -1077,7 +1084,7 @@ int open_for_data(struct cdrom_device_info * cdi)
 	}
 	}
 	cdrom_count_tracks(cdi, &tracks);
 	cdrom_count_tracks(cdi, &tracks);
 	if (tracks.error == CDS_NO_DISC) {
 	if (tracks.error == CDS_NO_DISC) {
-		cdinfo(CD_OPEN, "bummer. no disc.\n");
+		cd_dbg(CD_OPEN, "bummer. no disc.\n");
 		ret=-ENOMEDIUM;
 		ret=-ENOMEDIUM;
 		goto clean_up_and_return;
 		goto clean_up_and_return;
 	}
 	}
@@ -1087,34 +1094,34 @@ int open_for_data(struct cdrom_device_info * cdi)
 		if (cdi->options & CDO_CHECK_TYPE) {
 		if (cdi->options & CDO_CHECK_TYPE) {
 		    /* give people a warning shot, now that CDO_CHECK_TYPE
 		    /* give people a warning shot, now that CDO_CHECK_TYPE
 		       is the default case! */
 		       is the default case! */
-		    cdinfo(CD_OPEN, "bummer. wrong media type.\n"); 
-		    cdinfo(CD_WARNING, "pid %d must open device O_NONBLOCK!\n",
-					(unsigned int)task_pid_nr(current));
+		    cd_dbg(CD_OPEN, "bummer. wrong media type.\n");
+		    cd_dbg(CD_WARNING, "pid %d must open device O_NONBLOCK!\n",
+			   (unsigned int)task_pid_nr(current));
 		    ret=-EMEDIUMTYPE;
 		    ret=-EMEDIUMTYPE;
 		    goto clean_up_and_return;
 		    goto clean_up_and_return;
 		}
 		}
 		else {
 		else {
-		    cdinfo(CD_OPEN, "wrong media type, but CDO_CHECK_TYPE not set.\n");
+		    cd_dbg(CD_OPEN, "wrong media type, but CDO_CHECK_TYPE not set\n");
 		}
 		}
 	}
 	}
 
 
-	cdinfo(CD_OPEN, "all seems well, opening the device.\n"); 
+	cd_dbg(CD_OPEN, "all seems well, opening the devicen");
 
 
 	/* all seems well, we can open the device */
 	/* all seems well, we can open the device */
 	ret = cdo->open(cdi, 0); /* open for data */
 	ret = cdo->open(cdi, 0); /* open for data */
-	cdinfo(CD_OPEN, "opening the device gave me %d.\n", ret); 
+	cd_dbg(CD_OPEN, "opening the device gave me %d\n", ret);
 	/* After all this careful checking, we shouldn't have problems
 	/* After all this careful checking, we shouldn't have problems
 	   opening the device, but we don't want the device locked if 
 	   opening the device, but we don't want the device locked if 
 	   this somehow fails... */
 	   this somehow fails... */
 	if (ret) {
 	if (ret) {
-		cdinfo(CD_OPEN, "open device failed.\n"); 
+		cd_dbg(CD_OPEN, "open device failed\n");
 		goto clean_up_and_return;
 		goto clean_up_and_return;
 	}
 	}
 	if (CDROM_CAN(CDC_LOCK) && (cdi->options & CDO_LOCK)) {
 	if (CDROM_CAN(CDC_LOCK) && (cdi->options & CDO_LOCK)) {
 			cdo->lock_door(cdi, 1);
 			cdo->lock_door(cdi, 1);
-			cdinfo(CD_OPEN, "door locked.\n");
+			cd_dbg(CD_OPEN, "door locked\n");
 	}
 	}
-	cdinfo(CD_OPEN, "device opened successfully.\n"); 
+	cd_dbg(CD_OPEN, "device opened successfully\n");
 	return ret;
 	return ret;
 
 
 	/* Something failed.  Try to unlock the drive, because some drivers
 	/* Something failed.  Try to unlock the drive, because some drivers
@@ -1123,11 +1130,67 @@ int open_for_data(struct cdrom_device_info * cdi)
 	This ensures that the drive gets unlocked after a mount fails.  This 
 	This ensures that the drive gets unlocked after a mount fails.  This 
 	is a goto to avoid bloating the driver with redundant code. */ 
 	is a goto to avoid bloating the driver with redundant code. */ 
 clean_up_and_return:
 clean_up_and_return:
-	cdinfo(CD_OPEN, "open failed.\n"); 
+	cd_dbg(CD_OPEN, "open failed\n");
 	if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) {
 	if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) {
 			cdo->lock_door(cdi, 0);
 			cdo->lock_door(cdi, 0);
-			cdinfo(CD_OPEN, "door unlocked.\n");
+			cd_dbg(CD_OPEN, "door unlocked\n");
+	}
+	return ret;
+}
+
+/* We use the open-option O_NONBLOCK to indicate that the
+ * purpose of opening is only for subsequent ioctl() calls; no device
+ * integrity checks are performed.
+ *
+ * We hope that all cd-player programs will adopt this convention. It
+ * is in their own interest: device control becomes a lot easier
+ * this way.
+ */
+int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
+	       fmode_t mode)
+{
+	int ret;
+
+	cd_dbg(CD_OPEN, "entering cdrom_open\n");
+
+	/* open is event synchronization point, check events first */
+	check_disk_change(bdev);
+
+	/* if this was a O_NONBLOCK open and we should honor the flags,
+	 * do a quick open without drive/disc integrity checks. */
+	cdi->use_count++;
+	if ((mode & FMODE_NDELAY) && (cdi->options & CDO_USE_FFLAGS)) {
+		ret = cdi->ops->open(cdi, 1);
+	} else {
+		ret = open_for_data(cdi);
+		if (ret)
+			goto err;
+		cdrom_mmc3_profile(cdi);
+		if (mode & FMODE_WRITE) {
+			ret = -EROFS;
+			if (cdrom_open_write(cdi))
+				goto err_release;
+			if (!CDROM_CAN(CDC_RAM))
+				goto err_release;
+			ret = 0;
+			cdi->media_written = 0;
+		}
+	}
+
+	if (ret)
+		goto err;
+
+	cd_dbg(CD_OPEN, "Use count for \"/dev/%s\" now %d\n",
+	       cdi->name, cdi->use_count);
+	return 0;
+err_release:
+	if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) {
+		cdi->ops->lock_door(cdi, 0);
+		cd_dbg(CD_OPEN, "door unlocked\n");
 	}
 	}
+	cdi->ops->release(cdi);
+err:
+	cdi->use_count--;
 	return ret;
 	return ret;
 }
 }
 
 
@@ -1139,21 +1202,21 @@ static int check_for_audio_disc(struct cdrom_device_info * cdi,
 {
 {
         int ret;
         int ret;
 	tracktype tracks;
 	tracktype tracks;
-	cdinfo(CD_OPEN, "entering check_for_audio_disc\n");
+	cd_dbg(CD_OPEN, "entering check_for_audio_disc\n");
 	if (!(cdi->options & CDO_CHECK_TYPE))
 	if (!(cdi->options & CDO_CHECK_TYPE))
 		return 0;
 		return 0;
 	if (cdo->drive_status != NULL) {
 	if (cdo->drive_status != NULL) {
 		ret = cdo->drive_status(cdi, CDSL_CURRENT);
 		ret = cdo->drive_status(cdi, CDSL_CURRENT);
-		cdinfo(CD_OPEN, "drive_status=%d\n", ret); 
+		cd_dbg(CD_OPEN, "drive_status=%d\n", ret);
 		if (ret == CDS_TRAY_OPEN) {
 		if (ret == CDS_TRAY_OPEN) {
-			cdinfo(CD_OPEN, "the tray is open...\n"); 
+			cd_dbg(CD_OPEN, "the tray is open...\n");
 			/* can/may i close it? */
 			/* can/may i close it? */
 			if (CDROM_CAN(CDC_CLOSE_TRAY) &&
 			if (CDROM_CAN(CDC_CLOSE_TRAY) &&
 			    cdi->options & CDO_AUTO_CLOSE) {
 			    cdi->options & CDO_AUTO_CLOSE) {
-				cdinfo(CD_OPEN, "trying to close the tray.\n"); 
+				cd_dbg(CD_OPEN, "trying to close the tray\n");
 				ret=cdo->tray_move(cdi,0);
 				ret=cdo->tray_move(cdi,0);
 				if (ret) {
 				if (ret) {
-					cdinfo(CD_OPEN, "bummer. tried to close tray but failed.\n"); 
+					cd_dbg(CD_OPEN, "bummer. tried to close tray but failed.\n");
 					/* Ignore the error from the low
 					/* Ignore the error from the low
 					level driver.  We don't care why it
 					level driver.  We don't care why it
 					couldn't close the tray.  We only care 
 					couldn't close the tray.  We only care 
@@ -1162,20 +1225,20 @@ static int check_for_audio_disc(struct cdrom_device_info * cdi,
 					return -ENOMEDIUM;
 					return -ENOMEDIUM;
 				}
 				}
 			} else {
 			} else {
-				cdinfo(CD_OPEN, "bummer. this driver can't close the tray.\n"); 
+				cd_dbg(CD_OPEN, "bummer. this driver can't close the tray.\n");
 				return -ENOMEDIUM;
 				return -ENOMEDIUM;
 			}
 			}
 			/* Ok, the door should be closed now.. Check again */
 			/* Ok, the door should be closed now.. Check again */
 			ret = cdo->drive_status(cdi, CDSL_CURRENT);
 			ret = cdo->drive_status(cdi, CDSL_CURRENT);
 			if ((ret == CDS_NO_DISC) || (ret==CDS_TRAY_OPEN)) {
 			if ((ret == CDS_NO_DISC) || (ret==CDS_TRAY_OPEN)) {
-				cdinfo(CD_OPEN, "bummer. the tray is still not closed.\n"); 
+				cd_dbg(CD_OPEN, "bummer. the tray is still not closed.\n");
 				return -ENOMEDIUM;
 				return -ENOMEDIUM;
 			}	
 			}	
 			if (ret!=CDS_DISC_OK) {
 			if (ret!=CDS_DISC_OK) {
-				cdinfo(CD_OPEN, "bummer. disc isn't ready.\n"); 
+				cd_dbg(CD_OPEN, "bummer. disc isn't ready.\n");
 				return -EIO;
 				return -EIO;
 			}	
 			}	
-			cdinfo(CD_OPEN, "the tray is now closed.\n"); 
+			cd_dbg(CD_OPEN, "the tray is now closed\n");
 		}	
 		}	
 	}
 	}
 	cdrom_count_tracks(cdi, &tracks);
 	cdrom_count_tracks(cdi, &tracks);
@@ -1193,17 +1256,18 @@ void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
 	struct cdrom_device_ops *cdo = cdi->ops;
 	struct cdrom_device_ops *cdo = cdi->ops;
 	int opened_for_data;
 	int opened_for_data;
 
 
-	cdinfo(CD_CLOSE, "entering cdrom_release\n");
+	cd_dbg(CD_CLOSE, "entering cdrom_release\n");
 
 
 	if (cdi->use_count > 0)
 	if (cdi->use_count > 0)
 		cdi->use_count--;
 		cdi->use_count--;
 
 
 	if (cdi->use_count == 0) {
 	if (cdi->use_count == 0) {
-		cdinfo(CD_CLOSE, "Use count for \"/dev/%s\" now zero\n", cdi->name);
+		cd_dbg(CD_CLOSE, "Use count for \"/dev/%s\" now zero\n",
+		       cdi->name);
 		cdrom_dvd_rw_close_write(cdi);
 		cdrom_dvd_rw_close_write(cdi);
 
 
 		if ((cdo->capability & CDC_LOCK) && !cdi->keeplocked) {
 		if ((cdo->capability & CDC_LOCK) && !cdi->keeplocked) {
-			cdinfo(CD_CLOSE, "Unlocking door!\n");
+			cd_dbg(CD_CLOSE, "Unlocking door!\n");
 			cdo->lock_door(cdi, 0);
 			cdo->lock_door(cdi, 0);
 		}
 		}
 	}
 	}
@@ -1262,7 +1326,7 @@ static int cdrom_slot_status(struct cdrom_device_info *cdi, int slot)
 	struct cdrom_changer_info *info;
 	struct cdrom_changer_info *info;
 	int ret;
 	int ret;
 
 
-	cdinfo(CD_CHANGER, "entering cdrom_slot_status()\n"); 
+	cd_dbg(CD_CHANGER, "entering cdrom_slot_status()\n");
 	if (cdi->sanyo_slot)
 	if (cdi->sanyo_slot)
 		return CDS_NO_INFO;
 		return CDS_NO_INFO;
 	
 	
@@ -1292,7 +1356,7 @@ int cdrom_number_of_slots(struct cdrom_device_info *cdi)
 	int nslots = 1;
 	int nslots = 1;
 	struct cdrom_changer_info *info;
 	struct cdrom_changer_info *info;
 
 
-	cdinfo(CD_CHANGER, "entering cdrom_number_of_slots()\n"); 
+	cd_dbg(CD_CHANGER, "entering cdrom_number_of_slots()\n");
 	/* cdrom_read_mech_status requires a valid value for capacity: */
 	/* cdrom_read_mech_status requires a valid value for capacity: */
 	cdi->capacity = 0; 
 	cdi->capacity = 0; 
 
 
@@ -1313,7 +1377,7 @@ static int cdrom_load_unload(struct cdrom_device_info *cdi, int slot)
 {
 {
 	struct packet_command cgc;
 	struct packet_command cgc;
 
 
-	cdinfo(CD_CHANGER, "entering cdrom_load_unload()\n"); 
+	cd_dbg(CD_CHANGER, "entering cdrom_load_unload()\n");
 	if (cdi->sanyo_slot && slot < 0)
 	if (cdi->sanyo_slot && slot < 0)
 		return 0;
 		return 0;
 
 
@@ -1342,7 +1406,7 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
 	int curslot;
 	int curslot;
 	int ret;
 	int ret;
 
 
-	cdinfo(CD_CHANGER, "entering cdrom_select_disc()\n"); 
+	cd_dbg(CD_CHANGER, "entering cdrom_select_disc()\n");
 	if (!CDROM_CAN(CDC_SELECT_DISC))
 	if (!CDROM_CAN(CDC_SELECT_DISC))
 		return -EDRIVE_CANT_DO_THIS;
 		return -EDRIVE_CANT_DO_THIS;
 
 
@@ -1476,51 +1540,6 @@ int cdrom_media_changed(struct cdrom_device_info *cdi)
 	return media_changed(cdi, 0);
 	return media_changed(cdi, 0);
 }
 }
 
 
-/* badly broken, I know. Is due for a fixup anytime. */
-static void cdrom_count_tracks(struct cdrom_device_info *cdi, tracktype* tracks)
-{
-	struct cdrom_tochdr header;
-	struct cdrom_tocentry entry;
-	int ret, i;
-	tracks->data=0;
-	tracks->audio=0;
-	tracks->cdi=0;
-	tracks->xa=0;
-	tracks->error=0;
-	cdinfo(CD_COUNT_TRACKS, "entering cdrom_count_tracks\n"); 
-	/* Grab the TOC header so we can see how many tracks there are */
-	if ((ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCHDR, &header))) {
-		if (ret == -ENOMEDIUM)
-			tracks->error = CDS_NO_DISC;
-		else
-			tracks->error = CDS_NO_INFO;
-		return;
-	}	
-	/* check what type of tracks are on this disc */
-	entry.cdte_format = CDROM_MSF;
-	for (i = header.cdth_trk0; i <= header.cdth_trk1; i++) {
-		entry.cdte_track  = i;
-		if (cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &entry)) {
-			tracks->error=CDS_NO_INFO;
-			return;
-		}	
-		if (entry.cdte_ctrl & CDROM_DATA_TRACK) {
-		    if (entry.cdte_format == 0x10)
-			tracks->cdi++;
-		    else if (entry.cdte_format == 0x20) 
-			tracks->xa++;
-		    else
-			tracks->data++;
-		} else
-		    tracks->audio++;
-		cdinfo(CD_COUNT_TRACKS, "track %d: format=%d, ctrl=%d\n",
-		       i, entry.cdte_format, entry.cdte_ctrl);
-	}	
-	cdinfo(CD_COUNT_TRACKS, "disc has %d tracks: %d=audio %d=data %d=Cd-I %d=XA\n", 
-		header.cdth_trk1, tracks->audio, tracks->data, 
-		tracks->cdi, tracks->xa);
-}	
-
 /* Requests to the low-level drivers will /always/ be done in the
 /* Requests to the low-level drivers will /always/ be done in the
    following format convention:
    following format convention:
 
 
@@ -1632,7 +1651,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
 	switch (ai->type) {
 	switch (ai->type) {
 	/* LU data send */
 	/* LU data send */
 	case DVD_LU_SEND_AGID:
 	case DVD_LU_SEND_AGID:
-		cdinfo(CD_DVD, "entering DVD_LU_SEND_AGID\n"); 
+		cd_dbg(CD_DVD, "entering DVD_LU_SEND_AGID\n");
 		cgc.quiet = 1;
 		cgc.quiet = 1;
 		setup_report_key(&cgc, ai->lsa.agid, 0);
 		setup_report_key(&cgc, ai->lsa.agid, 0);
 
 
@@ -1644,7 +1663,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
 		break;
 		break;
 
 
 	case DVD_LU_SEND_KEY1:
 	case DVD_LU_SEND_KEY1:
-		cdinfo(CD_DVD, "entering DVD_LU_SEND_KEY1\n"); 
+		cd_dbg(CD_DVD, "entering DVD_LU_SEND_KEY1\n");
 		setup_report_key(&cgc, ai->lsk.agid, 2);
 		setup_report_key(&cgc, ai->lsk.agid, 2);
 
 
 		if ((ret = cdo->generic_packet(cdi, &cgc)))
 		if ((ret = cdo->generic_packet(cdi, &cgc)))
@@ -1655,7 +1674,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
 		break;
 		break;
 
 
 	case DVD_LU_SEND_CHALLENGE:
 	case DVD_LU_SEND_CHALLENGE:
-		cdinfo(CD_DVD, "entering DVD_LU_SEND_CHALLENGE\n"); 
+		cd_dbg(CD_DVD, "entering DVD_LU_SEND_CHALLENGE\n");
 		setup_report_key(&cgc, ai->lsc.agid, 1);
 		setup_report_key(&cgc, ai->lsc.agid, 1);
 
 
 		if ((ret = cdo->generic_packet(cdi, &cgc)))
 		if ((ret = cdo->generic_packet(cdi, &cgc)))
@@ -1667,7 +1686,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
 
 
 	/* Post-auth key */
 	/* Post-auth key */
 	case DVD_LU_SEND_TITLE_KEY:
 	case DVD_LU_SEND_TITLE_KEY:
-		cdinfo(CD_DVD, "entering DVD_LU_SEND_TITLE_KEY\n"); 
+		cd_dbg(CD_DVD, "entering DVD_LU_SEND_TITLE_KEY\n");
 		cgc.quiet = 1;
 		cgc.quiet = 1;
 		setup_report_key(&cgc, ai->lstk.agid, 4);
 		setup_report_key(&cgc, ai->lstk.agid, 4);
 		cgc.cmd[5] = ai->lstk.lba;
 		cgc.cmd[5] = ai->lstk.lba;
@@ -1686,7 +1705,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
 		break;
 		break;
 
 
 	case DVD_LU_SEND_ASF:
 	case DVD_LU_SEND_ASF:
-		cdinfo(CD_DVD, "entering DVD_LU_SEND_ASF\n"); 
+		cd_dbg(CD_DVD, "entering DVD_LU_SEND_ASF\n");
 		setup_report_key(&cgc, ai->lsasf.agid, 5);
 		setup_report_key(&cgc, ai->lsasf.agid, 5);
 		
 		
 		if ((ret = cdo->generic_packet(cdi, &cgc)))
 		if ((ret = cdo->generic_packet(cdi, &cgc)))
@@ -1697,7 +1716,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
 
 
 	/* LU data receive (LU changes state) */
 	/* LU data receive (LU changes state) */
 	case DVD_HOST_SEND_CHALLENGE:
 	case DVD_HOST_SEND_CHALLENGE:
-		cdinfo(CD_DVD, "entering DVD_HOST_SEND_CHALLENGE\n"); 
+		cd_dbg(CD_DVD, "entering DVD_HOST_SEND_CHALLENGE\n");
 		setup_send_key(&cgc, ai->hsc.agid, 1);
 		setup_send_key(&cgc, ai->hsc.agid, 1);
 		buf[1] = 0xe;
 		buf[1] = 0xe;
 		copy_chal(&buf[4], ai->hsc.chal);
 		copy_chal(&buf[4], ai->hsc.chal);
@@ -1709,7 +1728,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
 		break;
 		break;
 
 
 	case DVD_HOST_SEND_KEY2:
 	case DVD_HOST_SEND_KEY2:
-		cdinfo(CD_DVD, "entering DVD_HOST_SEND_KEY2\n"); 
+		cd_dbg(CD_DVD, "entering DVD_HOST_SEND_KEY2\n");
 		setup_send_key(&cgc, ai->hsk.agid, 3);
 		setup_send_key(&cgc, ai->hsk.agid, 3);
 		buf[1] = 0xa;
 		buf[1] = 0xa;
 		copy_key(&buf[4], ai->hsk.key);
 		copy_key(&buf[4], ai->hsk.key);
@@ -1724,7 +1743,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
 	/* Misc */
 	/* Misc */
 	case DVD_INVALIDATE_AGID:
 	case DVD_INVALIDATE_AGID:
 		cgc.quiet = 1;
 		cgc.quiet = 1;
-		cdinfo(CD_DVD, "entering DVD_INVALIDATE_AGID\n"); 
+		cd_dbg(CD_DVD, "entering DVD_INVALIDATE_AGID\n");
 		setup_report_key(&cgc, ai->lsa.agid, 0x3f);
 		setup_report_key(&cgc, ai->lsa.agid, 0x3f);
 		if ((ret = cdo->generic_packet(cdi, &cgc)))
 		if ((ret = cdo->generic_packet(cdi, &cgc)))
 			return ret;
 			return ret;
@@ -1732,7 +1751,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
 
 
 	/* Get region settings */
 	/* Get region settings */
 	case DVD_LU_SEND_RPC_STATE:
 	case DVD_LU_SEND_RPC_STATE:
-		cdinfo(CD_DVD, "entering DVD_LU_SEND_RPC_STATE\n");
+		cd_dbg(CD_DVD, "entering DVD_LU_SEND_RPC_STATE\n");
 		setup_report_key(&cgc, 0, 8);
 		setup_report_key(&cgc, 0, 8);
 		memset(&rpc_state, 0, sizeof(rpc_state_t));
 		memset(&rpc_state, 0, sizeof(rpc_state_t));
 		cgc.buffer = (char *) &rpc_state;
 		cgc.buffer = (char *) &rpc_state;
@@ -1749,7 +1768,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
 
 
 	/* Set region settings */
 	/* Set region settings */
 	case DVD_HOST_SEND_RPC_STATE:
 	case DVD_HOST_SEND_RPC_STATE:
-		cdinfo(CD_DVD, "entering DVD_HOST_SEND_RPC_STATE\n");
+		cd_dbg(CD_DVD, "entering DVD_HOST_SEND_RPC_STATE\n");
 		setup_send_key(&cgc, 0, 6);
 		setup_send_key(&cgc, 0, 6);
 		buf[1] = 6;
 		buf[1] = 6;
 		buf[4] = ai->hrpcs.pdrc;
 		buf[4] = ai->hrpcs.pdrc;
@@ -1759,7 +1778,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
 		break;
 		break;
 
 
 	default:
 	default:
-		cdinfo(CD_WARNING, "Invalid DVD key ioctl (%d)\n", ai->type);
+		cd_dbg(CD_WARNING, "Invalid DVD key ioctl (%d)\n", ai->type);
 		return -ENOTTY;
 		return -ENOTTY;
 	}
 	}
 
 
@@ -1891,7 +1910,8 @@ static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s,
 
 
 	s->bca.len = buf[0] << 8 | buf[1];
 	s->bca.len = buf[0] << 8 | buf[1];
 	if (s->bca.len < 12 || s->bca.len > 188) {
 	if (s->bca.len < 12 || s->bca.len > 188) {
-		cdinfo(CD_WARNING, "Received invalid BCA length (%d)\n", s->bca.len);
+		cd_dbg(CD_WARNING, "Received invalid BCA length (%d)\n",
+		       s->bca.len);
 		ret = -EIO;
 		ret = -EIO;
 		goto out;
 		goto out;
 	}
 	}
@@ -1927,14 +1947,13 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s,
 
 
 	s->manufact.len = buf[0] << 8 | buf[1];
 	s->manufact.len = buf[0] << 8 | buf[1];
 	if (s->manufact.len < 0) {
 	if (s->manufact.len < 0) {
-		cdinfo(CD_WARNING, "Received invalid manufacture info length"
-				   " (%d)\n", s->manufact.len);
+		cd_dbg(CD_WARNING, "Received invalid manufacture info length (%d)\n",
+		       s->manufact.len);
 		ret = -EIO;
 		ret = -EIO;
 	} else {
 	} else {
 		if (s->manufact.len > 2048) {
 		if (s->manufact.len > 2048) {
-			cdinfo(CD_WARNING, "Received invalid manufacture info "
-					"length (%d): truncating to 2048\n",
-					s->manufact.len);
+			cd_dbg(CD_WARNING, "Received invalid manufacture info length (%d): truncating to 2048\n",
+			       s->manufact.len);
 			s->manufact.len = 2048;
 			s->manufact.len = 2048;
 		}
 		}
 		memcpy(s->manufact.value, &buf[4], s->manufact.len);
 		memcpy(s->manufact.value, &buf[4], s->manufact.len);
@@ -1965,8 +1984,8 @@ static int dvd_read_struct(struct cdrom_device_info *cdi, dvd_struct *s,
 		return dvd_read_manufact(cdi, s, cgc);
 		return dvd_read_manufact(cdi, s, cgc);
 		
 		
 	default:
 	default:
-		cdinfo(CD_WARNING, ": Invalid DVD structure read requested (%d)\n",
-					s->type);
+		cd_dbg(CD_WARNING, ": Invalid DVD structure read requested (%d)\n",
+		       s->type);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 }
 }
@@ -2255,7 +2274,7 @@ static int cdrom_ioctl_multisession(struct cdrom_device_info *cdi,
 	u8 requested_format;
 	u8 requested_format;
 	int ret;
 	int ret;
 
 
-	cdinfo(CD_DO_IOCTL, "entering CDROMMULTISESSION\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROMMULTISESSION\n");
 
 
 	if (!(cdi->ops->capability & CDC_MULTI_SESSION))
 	if (!(cdi->ops->capability & CDC_MULTI_SESSION))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2277,13 +2296,13 @@ static int cdrom_ioctl_multisession(struct cdrom_device_info *cdi,
 	if (copy_to_user(argp, &ms_info, sizeof(ms_info)))
 	if (copy_to_user(argp, &ms_info, sizeof(ms_info)))
 		return -EFAULT;
 		return -EFAULT;
 
 
-	cdinfo(CD_DO_IOCTL, "CDROMMULTISESSION successful\n");
+	cd_dbg(CD_DO_IOCTL, "CDROMMULTISESSION successful\n");
 	return 0;
 	return 0;
 }
 }
 
 
 static int cdrom_ioctl_eject(struct cdrom_device_info *cdi)
 static int cdrom_ioctl_eject(struct cdrom_device_info *cdi)
 {
 {
-	cdinfo(CD_DO_IOCTL, "entering CDROMEJECT\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROMEJECT\n");
 
 
 	if (!CDROM_CAN(CDC_OPEN_TRAY))
 	if (!CDROM_CAN(CDC_OPEN_TRAY))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2300,7 +2319,7 @@ static int cdrom_ioctl_eject(struct cdrom_device_info *cdi)
 
 
 static int cdrom_ioctl_closetray(struct cdrom_device_info *cdi)
 static int cdrom_ioctl_closetray(struct cdrom_device_info *cdi)
 {
 {
-	cdinfo(CD_DO_IOCTL, "entering CDROMCLOSETRAY\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROMCLOSETRAY\n");
 
 
 	if (!CDROM_CAN(CDC_CLOSE_TRAY))
 	if (!CDROM_CAN(CDC_CLOSE_TRAY))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2310,7 +2329,7 @@ static int cdrom_ioctl_closetray(struct cdrom_device_info *cdi)
 static int cdrom_ioctl_eject_sw(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_eject_sw(struct cdrom_device_info *cdi,
 		unsigned long arg)
 		unsigned long arg)
 {
 {
-	cdinfo(CD_DO_IOCTL, "entering CDROMEJECT_SW\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROMEJECT_SW\n");
 
 
 	if (!CDROM_CAN(CDC_OPEN_TRAY))
 	if (!CDROM_CAN(CDC_OPEN_TRAY))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2329,7 +2348,7 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi,
 	struct cdrom_changer_info *info;
 	struct cdrom_changer_info *info;
 	int ret;
 	int ret;
 
 
-	cdinfo(CD_DO_IOCTL, "entering CDROM_MEDIA_CHANGED\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_MEDIA_CHANGED\n");
 
 
 	if (!CDROM_CAN(CDC_MEDIA_CHANGED))
 	if (!CDROM_CAN(CDC_MEDIA_CHANGED))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2355,7 +2374,7 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_set_options(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_set_options(struct cdrom_device_info *cdi,
 		unsigned long arg)
 		unsigned long arg)
 {
 {
-	cdinfo(CD_DO_IOCTL, "entering CDROM_SET_OPTIONS\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_SET_OPTIONS\n");
 
 
 	/*
 	/*
 	 * Options need to be in sync with capability.
 	 * Options need to be in sync with capability.
@@ -2383,7 +2402,7 @@ static int cdrom_ioctl_set_options(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_clear_options(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_clear_options(struct cdrom_device_info *cdi,
 		unsigned long arg)
 		unsigned long arg)
 {
 {
-	cdinfo(CD_DO_IOCTL, "entering CDROM_CLEAR_OPTIONS\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_CLEAR_OPTIONS\n");
 
 
 	cdi->options &= ~(int) arg;
 	cdi->options &= ~(int) arg;
 	return cdi->options;
 	return cdi->options;
@@ -2392,7 +2411,7 @@ static int cdrom_ioctl_clear_options(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_select_speed(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_select_speed(struct cdrom_device_info *cdi,
 		unsigned long arg)
 		unsigned long arg)
 {
 {
-	cdinfo(CD_DO_IOCTL, "entering CDROM_SELECT_SPEED\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_SELECT_SPEED\n");
 
 
 	if (!CDROM_CAN(CDC_SELECT_SPEED))
 	if (!CDROM_CAN(CDC_SELECT_SPEED))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2402,7 +2421,7 @@ static int cdrom_ioctl_select_speed(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi,
 		unsigned long arg)
 		unsigned long arg)
 {
 {
-	cdinfo(CD_DO_IOCTL, "entering CDROM_SELECT_DISC\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_SELECT_DISC\n");
 
 
 	if (!CDROM_CAN(CDC_SELECT_DISC))
 	if (!CDROM_CAN(CDC_SELECT_DISC))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2420,14 +2439,14 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi,
 	if (cdi->ops->select_disc)
 	if (cdi->ops->select_disc)
 		return cdi->ops->select_disc(cdi, arg);
 		return cdi->ops->select_disc(cdi, arg);
 
 
-	cdinfo(CD_CHANGER, "Using generic cdrom_select_disc()\n");
+	cd_dbg(CD_CHANGER, "Using generic cdrom_select_disc()\n");
 	return cdrom_select_disc(cdi, arg);
 	return cdrom_select_disc(cdi, arg);
 }
 }
 
 
 static int cdrom_ioctl_reset(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_reset(struct cdrom_device_info *cdi,
 		struct block_device *bdev)
 		struct block_device *bdev)
 {
 {
-	cdinfo(CD_DO_IOCTL, "entering CDROM_RESET\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_RESET\n");
 
 
 	if (!capable(CAP_SYS_ADMIN))
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 		return -EACCES;
@@ -2440,7 +2459,7 @@ static int cdrom_ioctl_reset(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_lock_door(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_lock_door(struct cdrom_device_info *cdi,
 		unsigned long arg)
 		unsigned long arg)
 {
 {
-	cdinfo(CD_DO_IOCTL, "%socking door.\n", arg ? "L" : "Unl");
+	cd_dbg(CD_DO_IOCTL, "%socking door\n", arg ? "L" : "Unl");
 
 
 	if (!CDROM_CAN(CDC_LOCK))
 	if (!CDROM_CAN(CDC_LOCK))
 		return -EDRIVE_CANT_DO_THIS;
 		return -EDRIVE_CANT_DO_THIS;
@@ -2459,7 +2478,7 @@ static int cdrom_ioctl_lock_door(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_debug(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_debug(struct cdrom_device_info *cdi,
 		unsigned long arg)
 		unsigned long arg)
 {
 {
-	cdinfo(CD_DO_IOCTL, "%sabling debug.\n", arg ? "En" : "Dis");
+	cd_dbg(CD_DO_IOCTL, "%sabling debug\n", arg ? "En" : "Dis");
 
 
 	if (!capable(CAP_SYS_ADMIN))
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 		return -EACCES;
@@ -2469,7 +2488,7 @@ static int cdrom_ioctl_debug(struct cdrom_device_info *cdi,
 
 
 static int cdrom_ioctl_get_capability(struct cdrom_device_info *cdi)
 static int cdrom_ioctl_get_capability(struct cdrom_device_info *cdi)
 {
 {
-	cdinfo(CD_DO_IOCTL, "entering CDROM_GET_CAPABILITY\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_GET_CAPABILITY\n");
 	return (cdi->ops->capability & ~cdi->mask);
 	return (cdi->ops->capability & ~cdi->mask);
 }
 }
 
 
@@ -2485,7 +2504,7 @@ static int cdrom_ioctl_get_mcn(struct cdrom_device_info *cdi,
 	struct cdrom_mcn mcn;
 	struct cdrom_mcn mcn;
 	int ret;
 	int ret;
 
 
-	cdinfo(CD_DO_IOCTL, "entering CDROM_GET_MCN\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_GET_MCN\n");
 
 
 	if (!(cdi->ops->capability & CDC_MCN))
 	if (!(cdi->ops->capability & CDC_MCN))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2495,14 +2514,14 @@ static int cdrom_ioctl_get_mcn(struct cdrom_device_info *cdi,
 
 
 	if (copy_to_user(argp, &mcn, sizeof(mcn)))
 	if (copy_to_user(argp, &mcn, sizeof(mcn)))
 		return -EFAULT;
 		return -EFAULT;
-	cdinfo(CD_DO_IOCTL, "CDROM_GET_MCN successful\n");
+	cd_dbg(CD_DO_IOCTL, "CDROM_GET_MCN successful\n");
 	return 0;
 	return 0;
 }
 }
 
 
 static int cdrom_ioctl_drive_status(struct cdrom_device_info *cdi,
 static int cdrom_ioctl_drive_status(struct cdrom_device_info *cdi,
 		unsigned long arg)
 		unsigned long arg)
 {
 {
-	cdinfo(CD_DO_IOCTL, "entering CDROM_DRIVE_STATUS\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_DRIVE_STATUS\n");
 
 
 	if (!(cdi->ops->capability & CDC_DRIVE_STATUS))
 	if (!(cdi->ops->capability & CDC_DRIVE_STATUS))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2535,7 +2554,7 @@ static int cdrom_ioctl_disc_status(struct cdrom_device_info *cdi)
 {
 {
 	tracktype tracks;
 	tracktype tracks;
 
 
-	cdinfo(CD_DO_IOCTL, "entering CDROM_DISC_STATUS\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_DISC_STATUS\n");
 
 
 	cdrom_count_tracks(cdi, &tracks);
 	cdrom_count_tracks(cdi, &tracks);
 	if (tracks.error)
 	if (tracks.error)
@@ -2557,13 +2576,13 @@ static int cdrom_ioctl_disc_status(struct cdrom_device_info *cdi)
 		return CDS_DATA_1;
 		return CDS_DATA_1;
 	/* Policy mode off */
 	/* Policy mode off */
 
 
-	cdinfo(CD_WARNING,"This disc doesn't have any tracks I recognize!\n");
+	cd_dbg(CD_WARNING, "This disc doesn't have any tracks I recognize!\n");
 	return CDS_NO_INFO;
 	return CDS_NO_INFO;
 }
 }
 
 
 static int cdrom_ioctl_changer_nslots(struct cdrom_device_info *cdi)
 static int cdrom_ioctl_changer_nslots(struct cdrom_device_info *cdi)
 {
 {
-	cdinfo(CD_DO_IOCTL, "entering CDROM_CHANGER_NSLOTS\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_CHANGER_NSLOTS\n");
 	return cdi->capacity;
 	return cdi->capacity;
 }
 }
 
 
@@ -2574,7 +2593,7 @@ static int cdrom_ioctl_get_subchnl(struct cdrom_device_info *cdi,
 	u8 requested, back;
 	u8 requested, back;
 	int ret;
 	int ret;
 
 
-	/* cdinfo(CD_DO_IOCTL,"entering CDROMSUBCHNL\n");*/
+	/* cd_dbg(CD_DO_IOCTL,"entering CDROMSUBCHNL\n");*/
 
 
 	if (copy_from_user(&q, argp, sizeof(q)))
 	if (copy_from_user(&q, argp, sizeof(q)))
 		return -EFAULT;
 		return -EFAULT;
@@ -2594,7 +2613,7 @@ static int cdrom_ioctl_get_subchnl(struct cdrom_device_info *cdi,
 
 
 	if (copy_to_user(argp, &q, sizeof(q)))
 	if (copy_to_user(argp, &q, sizeof(q)))
 		return -EFAULT;
 		return -EFAULT;
-	/* cdinfo(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */
+	/* cd_dbg(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2604,7 +2623,7 @@ static int cdrom_ioctl_read_tochdr(struct cdrom_device_info *cdi,
 	struct cdrom_tochdr header;
 	struct cdrom_tochdr header;
 	int ret;
 	int ret;
 
 
-	/* cdinfo(CD_DO_IOCTL, "entering CDROMREADTOCHDR\n"); */
+	/* cd_dbg(CD_DO_IOCTL, "entering CDROMREADTOCHDR\n"); */
 
 
 	if (copy_from_user(&header, argp, sizeof(header)))
 	if (copy_from_user(&header, argp, sizeof(header)))
 		return -EFAULT;
 		return -EFAULT;
@@ -2615,7 +2634,7 @@ static int cdrom_ioctl_read_tochdr(struct cdrom_device_info *cdi,
 
 
 	if (copy_to_user(argp, &header, sizeof(header)))
 	if (copy_to_user(argp, &header, sizeof(header)))
 		return -EFAULT;
 		return -EFAULT;
-	/* cdinfo(CD_DO_IOCTL, "CDROMREADTOCHDR successful\n"); */
+	/* cd_dbg(CD_DO_IOCTL, "CDROMREADTOCHDR successful\n"); */
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2626,7 +2645,7 @@ static int cdrom_ioctl_read_tocentry(struct cdrom_device_info *cdi,
 	u8 requested_format;
 	u8 requested_format;
 	int ret;
 	int ret;
 
 
-	/* cdinfo(CD_DO_IOCTL, "entering CDROMREADTOCENTRY\n"); */
+	/* cd_dbg(CD_DO_IOCTL, "entering CDROMREADTOCENTRY\n"); */
 
 
 	if (copy_from_user(&entry, argp, sizeof(entry)))
 	if (copy_from_user(&entry, argp, sizeof(entry)))
 		return -EFAULT;
 		return -EFAULT;
@@ -2643,7 +2662,7 @@ static int cdrom_ioctl_read_tocentry(struct cdrom_device_info *cdi,
 
 
 	if (copy_to_user(argp, &entry, sizeof(entry)))
 	if (copy_to_user(argp, &entry, sizeof(entry)))
 		return -EFAULT;
 		return -EFAULT;
-	/* cdinfo(CD_DO_IOCTL, "CDROMREADTOCENTRY successful\n"); */
+	/* cd_dbg(CD_DO_IOCTL, "CDROMREADTOCENTRY successful\n"); */
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2652,7 +2671,7 @@ static int cdrom_ioctl_play_msf(struct cdrom_device_info *cdi,
 {
 {
 	struct cdrom_msf msf;
 	struct cdrom_msf msf;
 
 
-	cdinfo(CD_DO_IOCTL, "entering CDROMPLAYMSF\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYMSF\n");
 
 
 	if (!CDROM_CAN(CDC_PLAY_AUDIO))
 	if (!CDROM_CAN(CDC_PLAY_AUDIO))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2667,7 +2686,7 @@ static int cdrom_ioctl_play_trkind(struct cdrom_device_info *cdi,
 	struct cdrom_ti ti;
 	struct cdrom_ti ti;
 	int ret;
 	int ret;
 
 
-	cdinfo(CD_DO_IOCTL, "entering CDROMPLAYTRKIND\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYTRKIND\n");
 
 
 	if (!CDROM_CAN(CDC_PLAY_AUDIO))
 	if (!CDROM_CAN(CDC_PLAY_AUDIO))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2684,7 +2703,7 @@ static int cdrom_ioctl_volctrl(struct cdrom_device_info *cdi,
 {
 {
 	struct cdrom_volctrl volume;
 	struct cdrom_volctrl volume;
 
 
-	cdinfo(CD_DO_IOCTL, "entering CDROMVOLCTRL\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROMVOLCTRL\n");
 
 
 	if (!CDROM_CAN(CDC_PLAY_AUDIO))
 	if (!CDROM_CAN(CDC_PLAY_AUDIO))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2699,7 +2718,7 @@ static int cdrom_ioctl_volread(struct cdrom_device_info *cdi,
 	struct cdrom_volctrl volume;
 	struct cdrom_volctrl volume;
 	int ret;
 	int ret;
 
 
-	cdinfo(CD_DO_IOCTL, "entering CDROMVOLREAD\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROMVOLREAD\n");
 
 
 	if (!CDROM_CAN(CDC_PLAY_AUDIO))
 	if (!CDROM_CAN(CDC_PLAY_AUDIO))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2718,7 +2737,7 @@ static int cdrom_ioctl_audioctl(struct cdrom_device_info *cdi,
 {
 {
 	int ret;
 	int ret;
 
 
-	cdinfo(CD_DO_IOCTL, "doing audio ioctl (start/stop/pause/resume)\n");
+	cd_dbg(CD_DO_IOCTL, "doing audio ioctl (start/stop/pause/resume)\n");
 
 
 	if (!CDROM_CAN(CDC_PLAY_AUDIO))
 	if (!CDROM_CAN(CDC_PLAY_AUDIO))
 		return -ENOSYS;
 		return -ENOSYS;
@@ -2728,103 +2747,6 @@ static int cdrom_ioctl_audioctl(struct cdrom_device_info *cdi,
 	return cdi->ops->audio_ioctl(cdi, cmd, NULL);
 	return cdi->ops->audio_ioctl(cdi, cmd, NULL);
 }
 }
 
 
-/*
- * Just about every imaginable ioctl is supported in the Uniform layer
- * these days.
- * ATAPI / SCSI specific code now mainly resides in mmc_ioctl().
- */
-int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
-		fmode_t mode, unsigned int cmd, unsigned long arg)
-{
-	void __user *argp = (void __user *)arg;
-	int ret;
-
-	/*
-	 * Try the generic SCSI command ioctl's first.
-	 */
-	ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
-	if (ret != -ENOTTY)
-		return ret;
-
-	switch (cmd) {
-	case CDROMMULTISESSION:
-		return cdrom_ioctl_multisession(cdi, argp);
-	case CDROMEJECT:
-		return cdrom_ioctl_eject(cdi);
-	case CDROMCLOSETRAY:
-		return cdrom_ioctl_closetray(cdi);
-	case CDROMEJECT_SW:
-		return cdrom_ioctl_eject_sw(cdi, arg);
-	case CDROM_MEDIA_CHANGED:
-		return cdrom_ioctl_media_changed(cdi, arg);
-	case CDROM_SET_OPTIONS:
-		return cdrom_ioctl_set_options(cdi, arg);
-	case CDROM_CLEAR_OPTIONS:
-		return cdrom_ioctl_clear_options(cdi, arg);
-	case CDROM_SELECT_SPEED:
-		return cdrom_ioctl_select_speed(cdi, arg);
-	case CDROM_SELECT_DISC:
-		return cdrom_ioctl_select_disc(cdi, arg);
-	case CDROMRESET:
-		return cdrom_ioctl_reset(cdi, bdev);
-	case CDROM_LOCKDOOR:
-		return cdrom_ioctl_lock_door(cdi, arg);
-	case CDROM_DEBUG:
-		return cdrom_ioctl_debug(cdi, arg);
-	case CDROM_GET_CAPABILITY:
-		return cdrom_ioctl_get_capability(cdi);
-	case CDROM_GET_MCN:
-		return cdrom_ioctl_get_mcn(cdi, argp);
-	case CDROM_DRIVE_STATUS:
-		return cdrom_ioctl_drive_status(cdi, arg);
-	case CDROM_DISC_STATUS:
-		return cdrom_ioctl_disc_status(cdi);
-	case CDROM_CHANGER_NSLOTS:
-		return cdrom_ioctl_changer_nslots(cdi);
-	}
-
-	/*
-	 * Use the ioctls that are implemented through the generic_packet()
-	 * interface. this may look at bit funny, but if -ENOTTY is
-	 * returned that particular ioctl is not implemented and we
-	 * let it go through the device specific ones.
-	 */
-	if (CDROM_CAN(CDC_GENERIC_PACKET)) {
-		ret = mmc_ioctl(cdi, cmd, arg);
-		if (ret != -ENOTTY)
-			return ret;
-	}
-
-	/*
-	 * Note: most of the cdinfo() calls are commented out here,
-	 * because they fill up the sys log when CD players poll
-	 * the drive.
-	 */
-	switch (cmd) {
-	case CDROMSUBCHNL:
-		return cdrom_ioctl_get_subchnl(cdi, argp);
-	case CDROMREADTOCHDR:
-		return cdrom_ioctl_read_tochdr(cdi, argp);
-	case CDROMREADTOCENTRY:
-		return cdrom_ioctl_read_tocentry(cdi, argp);
-	case CDROMPLAYMSF:
-		return cdrom_ioctl_play_msf(cdi, argp);
-	case CDROMPLAYTRKIND:
-		return cdrom_ioctl_play_trkind(cdi, argp);
-	case CDROMVOLCTRL:
-		return cdrom_ioctl_volctrl(cdi, argp);
-	case CDROMVOLREAD:
-		return cdrom_ioctl_volread(cdi, argp);
-	case CDROMSTART:
-	case CDROMSTOP:
-	case CDROMPAUSE:
-	case CDROMRESUME:
-		return cdrom_ioctl_audioctl(cdi, cmd);
-	}
-
-	return -ENOSYS;
-}
-
 /*
 /*
  * Required when we need to use READ_10 to issue other than 2048 block
  * Required when we need to use READ_10 to issue other than 2048 block
  * reads
  * reads
@@ -2854,71 +2776,222 @@ static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size)
 	return cdo->generic_packet(cdi, &cgc);
 	return cdo->generic_packet(cdi, &cgc);
 }
 }
 
 
-static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi,
-					void __user *arg,
-					struct packet_command *cgc,
-					int cmd)
+static int cdrom_get_track_info(struct cdrom_device_info *cdi,
+				__u16 track, __u8 type, track_information *ti)
 {
 {
-	struct request_sense sense;
-	struct cdrom_msf msf;
-	int blocksize = 0, format = 0, lba;
-	int ret;
+	struct cdrom_device_ops *cdo = cdi->ops;
+	struct packet_command cgc;
+	int ret, buflen;
 
 
-	switch (cmd) {
-	case CDROMREADRAW:
-		blocksize = CD_FRAMESIZE_RAW;
-		break;
-	case CDROMREADMODE1:
-		blocksize = CD_FRAMESIZE;
-		format = 2;
-		break;
-	case CDROMREADMODE2:
-		blocksize = CD_FRAMESIZE_RAW0;
-		break;
-	}
-	IOCTL_IN(arg, struct cdrom_msf, msf);
-	lba = msf_to_lba(msf.cdmsf_min0, msf.cdmsf_sec0, msf.cdmsf_frame0);
-	/* FIXME: we need upper bound checking, too!! */
-	if (lba < 0)
-		return -EINVAL;
+	init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ);
+	cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO;
+	cgc.cmd[1] = type & 3;
+	cgc.cmd[4] = (track & 0xff00) >> 8;
+	cgc.cmd[5] = track & 0xff;
+	cgc.cmd[8] = 8;
+	cgc.quiet = 1;
 
 
-	cgc->buffer = kzalloc(blocksize, GFP_KERNEL);
-	if (cgc->buffer == NULL)
-		return -ENOMEM;
+	ret = cdo->generic_packet(cdi, &cgc);
+	if (ret)
+		return ret;
 
 
-	memset(&sense, 0, sizeof(sense));
-	cgc->sense = &sense;
-	cgc->data_direction = CGC_DATA_READ;
-	ret = cdrom_read_block(cdi, cgc, lba, 1, format, blocksize);
-	if (ret && sense.sense_key == 0x05 &&
-		   sense.asc == 0x20 &&
-		   sense.ascq == 0x00) {
-		/*
-		 * SCSI-II devices are not required to support
-		 * READ_CD, so let's try switching block size
-		 */
-		/* FIXME: switch back again... */
-		ret = cdrom_switch_blocksize(cdi, blocksize);
-		if (ret)
-			goto out;
-		cgc->sense = NULL;
-		ret = cdrom_read_cd(cdi, cgc, lba, blocksize, 1);
-		ret |= cdrom_switch_blocksize(cdi, blocksize);
-	}
-	if (!ret && copy_to_user(arg, cgc->buffer, blocksize))
-		ret = -EFAULT;
+	buflen = be16_to_cpu(ti->track_information_length) +
+		sizeof(ti->track_information_length);
+
+	if (buflen > sizeof(track_information))
+		buflen = sizeof(track_information);
+
+	cgc.cmd[8] = cgc.buflen = buflen;
+	ret = cdo->generic_packet(cdi, &cgc);
+	if (ret)
+		return ret;
+
+	/* return actual fill size */
+	return buflen;
+}
+
+/* return the last written block on the CD-R media. this is for the udf
+   file system. */
+int cdrom_get_last_written(struct cdrom_device_info *cdi, long *last_written)
+{
+	struct cdrom_tocentry toc;
+	disc_information di;
+	track_information ti;
+	__u32 last_track;
+	int ret = -1, ti_size;
+
+	if (!CDROM_CAN(CDC_GENERIC_PACKET))
+		goto use_toc;
+
+	ret = cdrom_get_disc_info(cdi, &di);
+	if (ret < (int)(offsetof(typeof(di), last_track_lsb)
+			+ sizeof(di.last_track_lsb)))
+		goto use_toc;
+
+	/* if unit didn't return msb, it's zeroed by cdrom_get_disc_info */
+	last_track = (di.last_track_msb << 8) | di.last_track_lsb;
+	ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti);
+	if (ti_size < (int)offsetof(typeof(ti), track_start))
+		goto use_toc;
+
+	/* if this track is blank, try the previous. */
+	if (ti.blank) {
+		if (last_track == 1)
+			goto use_toc;
+		last_track--;
+		ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti);
+	}
+
+	if (ti_size < (int)(offsetof(typeof(ti), track_size)
+				+ sizeof(ti.track_size)))
+		goto use_toc;
+
+	/* if last recorded field is valid, return it. */
+	if (ti.lra_v && ti_size >= (int)(offsetof(typeof(ti), last_rec_address)
+				+ sizeof(ti.last_rec_address))) {
+		*last_written = be32_to_cpu(ti.last_rec_address);
+	} else {
+		/* make it up instead */
+		*last_written = be32_to_cpu(ti.track_start) +
+				be32_to_cpu(ti.track_size);
+		if (ti.free_blocks)
+			*last_written -= (be32_to_cpu(ti.free_blocks) + 7);
+	}
+	return 0;
+
+	/* this is where we end up if the drive either can't do a
+	   GPCMD_READ_DISC_INFO or GPCMD_READ_TRACK_RZONE_INFO or if
+	   it doesn't give enough information or fails. then we return
+	   the toc contents. */
+use_toc:
+	toc.cdte_format = CDROM_MSF;
+	toc.cdte_track = CDROM_LEADOUT;
+	if ((ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &toc)))
+		return ret;
+	sanitize_format(&toc.cdte_addr, &toc.cdte_format, CDROM_LBA);
+	*last_written = toc.cdte_addr.lba;
+	return 0;
+}
+
+/* return the next writable block. also for udf file system. */
+static int cdrom_get_next_writable(struct cdrom_device_info *cdi,
+				   long *next_writable)
+{
+	disc_information di;
+	track_information ti;
+	__u16 last_track;
+	int ret, ti_size;
+
+	if (!CDROM_CAN(CDC_GENERIC_PACKET))
+		goto use_last_written;
+
+	ret = cdrom_get_disc_info(cdi, &di);
+	if (ret < 0 || ret < offsetof(typeof(di), last_track_lsb)
+				+ sizeof(di.last_track_lsb))
+		goto use_last_written;
+
+	/* if unit didn't return msb, it's zeroed by cdrom_get_disc_info */
+	last_track = (di.last_track_msb << 8) | di.last_track_lsb;
+	ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti);
+	if (ti_size < 0 || ti_size < offsetof(typeof(ti), track_start))
+		goto use_last_written;
+
+	/* if this track is blank, try the previous. */
+	if (ti.blank) {
+		if (last_track == 1)
+			goto use_last_written;
+		last_track--;
+		ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti);
+		if (ti_size < 0)
+			goto use_last_written;
+	}
+
+	/* if next recordable address field is valid, use it. */
+	if (ti.nwa_v && ti_size >= offsetof(typeof(ti), next_writable)
+				+ sizeof(ti.next_writable)) {
+		*next_writable = be32_to_cpu(ti.next_writable);
+		return 0;
+	}
+
+use_last_written:
+	ret = cdrom_get_last_written(cdi, next_writable);
+	if (ret) {
+		*next_writable = 0;
+		return ret;
+	} else {
+		*next_writable += 7;
+		return 0;
+	}
+}
+
+static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi,
+					      void __user *arg,
+					      struct packet_command *cgc,
+					      int cmd)
+{
+	struct request_sense sense;
+	struct cdrom_msf msf;
+	int blocksize = 0, format = 0, lba;
+	int ret;
+
+	switch (cmd) {
+	case CDROMREADRAW:
+		blocksize = CD_FRAMESIZE_RAW;
+		break;
+	case CDROMREADMODE1:
+		blocksize = CD_FRAMESIZE;
+		format = 2;
+		break;
+	case CDROMREADMODE2:
+		blocksize = CD_FRAMESIZE_RAW0;
+		break;
+	}
+	if (copy_from_user(&msf, (struct cdrom_msf __user *)arg, sizeof(msf)))
+		return -EFAULT;
+	lba = msf_to_lba(msf.cdmsf_min0, msf.cdmsf_sec0, msf.cdmsf_frame0);
+	/* FIXME: we need upper bound checking, too!! */
+	if (lba < 0)
+		return -EINVAL;
+
+	cgc->buffer = kzalloc(blocksize, GFP_KERNEL);
+	if (cgc->buffer == NULL)
+		return -ENOMEM;
+
+	memset(&sense, 0, sizeof(sense));
+	cgc->sense = &sense;
+	cgc->data_direction = CGC_DATA_READ;
+	ret = cdrom_read_block(cdi, cgc, lba, 1, format, blocksize);
+	if (ret && sense.sense_key == 0x05 &&
+	    sense.asc == 0x20 &&
+	    sense.ascq == 0x00) {
+		/*
+		 * SCSI-II devices are not required to support
+		 * READ_CD, so let's try switching block size
+		 */
+		/* FIXME: switch back again... */
+		ret = cdrom_switch_blocksize(cdi, blocksize);
+		if (ret)
+			goto out;
+		cgc->sense = NULL;
+		ret = cdrom_read_cd(cdi, cgc, lba, blocksize, 1);
+		ret |= cdrom_switch_blocksize(cdi, blocksize);
+	}
+	if (!ret && copy_to_user(arg, cgc->buffer, blocksize))
+		ret = -EFAULT;
 out:
 out:
 	kfree(cgc->buffer);
 	kfree(cgc->buffer);
 	return ret;
 	return ret;
 }
 }
 
 
 static noinline int mmc_ioctl_cdrom_read_audio(struct cdrom_device_info *cdi,
 static noinline int mmc_ioctl_cdrom_read_audio(struct cdrom_device_info *cdi,
-					void __user *arg)
+					       void __user *arg)
 {
 {
 	struct cdrom_read_audio ra;
 	struct cdrom_read_audio ra;
 	int lba;
 	int lba;
 
 
-	IOCTL_IN(arg, struct cdrom_read_audio, ra);
+	if (copy_from_user(&ra, (struct cdrom_read_audio __user *)arg,
+			   sizeof(ra)))
+		return -EFAULT;
 
 
 	if (ra.addr_format == CDROM_MSF)
 	if (ra.addr_format == CDROM_MSF)
 		lba = msf_to_lba(ra.addr.msf.minute,
 		lba = msf_to_lba(ra.addr.msf.minute,
@@ -2937,12 +3010,13 @@ static noinline int mmc_ioctl_cdrom_read_audio(struct cdrom_device_info *cdi,
 }
 }
 
 
 static noinline int mmc_ioctl_cdrom_subchannel(struct cdrom_device_info *cdi,
 static noinline int mmc_ioctl_cdrom_subchannel(struct cdrom_device_info *cdi,
-					void __user *arg)
+					       void __user *arg)
 {
 {
 	int ret;
 	int ret;
 	struct cdrom_subchnl q;
 	struct cdrom_subchnl q;
 	u_char requested, back;
 	u_char requested, back;
-	IOCTL_IN(arg, struct cdrom_subchnl, q);
+	if (copy_from_user(&q, (struct cdrom_subchnl __user *)arg, sizeof(q)))
+		return -EFAULT;
 	requested = q.cdsc_format;
 	requested = q.cdsc_format;
 	if (!((requested == CDROM_MSF) ||
 	if (!((requested == CDROM_MSF) ||
 	      (requested == CDROM_LBA)))
 	      (requested == CDROM_LBA)))
@@ -2954,19 +3028,21 @@ static noinline int mmc_ioctl_cdrom_subchannel(struct cdrom_device_info *cdi,
 	back = q.cdsc_format; /* local copy */
 	back = q.cdsc_format; /* local copy */
 	sanitize_format(&q.cdsc_absaddr, &back, requested);
 	sanitize_format(&q.cdsc_absaddr, &back, requested);
 	sanitize_format(&q.cdsc_reladdr, &q.cdsc_format, requested);
 	sanitize_format(&q.cdsc_reladdr, &q.cdsc_format, requested);
-	IOCTL_OUT(arg, struct cdrom_subchnl, q);
-	/* cdinfo(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */
+	if (copy_to_user((struct cdrom_subchnl __user *)arg, &q, sizeof(q)))
+		return -EFAULT;
+	/* cd_dbg(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */
 	return 0;
 	return 0;
 }
 }
 
 
 static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi,
 static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi,
-					void __user *arg,
-					struct packet_command *cgc)
+					     void __user *arg,
+					     struct packet_command *cgc)
 {
 {
 	struct cdrom_device_ops *cdo = cdi->ops;
 	struct cdrom_device_ops *cdo = cdi->ops;
 	struct cdrom_msf msf;
 	struct cdrom_msf msf;
-	cdinfo(CD_DO_IOCTL, "entering CDROMPLAYMSF\n");
-	IOCTL_IN(arg, struct cdrom_msf, msf);
+	cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYMSF\n");
+	if (copy_from_user(&msf, (struct cdrom_msf __user *)arg, sizeof(msf)))
+		return -EFAULT;
 	cgc->cmd[0] = GPCMD_PLAY_AUDIO_MSF;
 	cgc->cmd[0] = GPCMD_PLAY_AUDIO_MSF;
 	cgc->cmd[3] = msf.cdmsf_min0;
 	cgc->cmd[3] = msf.cdmsf_min0;
 	cgc->cmd[4] = msf.cdmsf_sec0;
 	cgc->cmd[4] = msf.cdmsf_sec0;
@@ -2979,13 +3055,14 @@ static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi,
 }
 }
 
 
 static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi,
 static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi,
-					void __user *arg,
-					struct packet_command *cgc)
+					     void __user *arg,
+					     struct packet_command *cgc)
 {
 {
 	struct cdrom_device_ops *cdo = cdi->ops;
 	struct cdrom_device_ops *cdo = cdi->ops;
 	struct cdrom_blk blk;
 	struct cdrom_blk blk;
-	cdinfo(CD_DO_IOCTL, "entering CDROMPLAYBLK\n");
-	IOCTL_IN(arg, struct cdrom_blk, blk);
+	cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYBLK\n");
+	if (copy_from_user(&blk, (struct cdrom_blk __user *)arg, sizeof(blk)))
+		return -EFAULT;
 	cgc->cmd[0] = GPCMD_PLAY_AUDIO_10;
 	cgc->cmd[0] = GPCMD_PLAY_AUDIO_10;
 	cgc->cmd[2] = (blk.from >> 24) & 0xff;
 	cgc->cmd[2] = (blk.from >> 24) & 0xff;
 	cgc->cmd[3] = (blk.from >> 16) & 0xff;
 	cgc->cmd[3] = (blk.from >> 16) & 0xff;
@@ -2998,9 +3075,9 @@ static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi,
 }
 }
 
 
 static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi,
 static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi,
-					void __user *arg,
-					struct packet_command *cgc,
-					unsigned int cmd)
+					   void __user *arg,
+					   struct packet_command *cgc,
+					   unsigned int cmd)
 {
 {
 	struct cdrom_volctrl volctrl;
 	struct cdrom_volctrl volctrl;
 	unsigned char buffer[32];
 	unsigned char buffer[32];
@@ -3008,9 +3085,11 @@ static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi,
 	unsigned short offset;
 	unsigned short offset;
 	int ret;
 	int ret;
 
 
-	cdinfo(CD_DO_IOCTL, "entering CDROMVOLUME\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROMVOLUME\n");
 
 
-	IOCTL_IN(arg, struct cdrom_volctrl, volctrl);
+	if (copy_from_user(&volctrl, (struct cdrom_volctrl __user *)arg,
+			   sizeof(volctrl)))
+		return -EFAULT;
 
 
 	cgc->buffer = buffer;
 	cgc->buffer = buffer;
 	cgc->buflen = 24;
 	cgc->buflen = 24;
@@ -3030,14 +3109,14 @@ static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi,
 	if (offset + 16 > cgc->buflen) {
 	if (offset + 16 > cgc->buflen) {
 		cgc->buflen = offset + 16;
 		cgc->buflen = offset + 16;
 		ret = cdrom_mode_sense(cdi, cgc,
 		ret = cdrom_mode_sense(cdi, cgc,
-					GPMODE_AUDIO_CTL_PAGE, 0);
+				       GPMODE_AUDIO_CTL_PAGE, 0);
 		if (ret)
 		if (ret)
 			return ret;
 			return ret;
 	}
 	}
 
 
 	/* sanity check */
 	/* sanity check */
 	if ((buffer[offset] & 0x3f) != GPMODE_AUDIO_CTL_PAGE ||
 	if ((buffer[offset] & 0x3f) != GPMODE_AUDIO_CTL_PAGE ||
-			buffer[offset + 1] < 14)
+	    buffer[offset + 1] < 14)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	/* now we have the current volume settings. if it was only
 	/* now we have the current volume settings. if it was only
@@ -3047,7 +3126,9 @@ static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi,
 		volctrl.channel1 = buffer[offset+11];
 		volctrl.channel1 = buffer[offset+11];
 		volctrl.channel2 = buffer[offset+13];
 		volctrl.channel2 = buffer[offset+13];
 		volctrl.channel3 = buffer[offset+15];
 		volctrl.channel3 = buffer[offset+15];
-		IOCTL_OUT(arg, struct cdrom_volctrl, volctrl);
+		if (copy_to_user((struct cdrom_volctrl __user *)arg, &volctrl,
+				 sizeof(volctrl)))
+			return -EFAULT;
 		return 0;
 		return 0;
 	}
 	}
 		
 		
@@ -3069,11 +3150,11 @@ static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi,
 }
 }
 
 
 static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi,
 static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi,
-					struct packet_command *cgc,
-					int cmd)
+					       struct packet_command *cgc,
+					       int cmd)
 {
 {
 	struct cdrom_device_ops *cdo = cdi->ops;
 	struct cdrom_device_ops *cdo = cdi->ops;
-	cdinfo(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n");
 	cgc->cmd[0] = GPCMD_START_STOP_UNIT;
 	cgc->cmd[0] = GPCMD_START_STOP_UNIT;
 	cgc->cmd[1] = 1;
 	cgc->cmd[1] = 1;
 	cgc->cmd[4] = (cmd == CDROMSTART) ? 1 : 0;
 	cgc->cmd[4] = (cmd == CDROMSTART) ? 1 : 0;
@@ -3082,11 +3163,11 @@ static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi,
 }
 }
 
 
 static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi,
 static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi,
-					struct packet_command *cgc,
-					int cmd)
+						 struct packet_command *cgc,
+						 int cmd)
 {
 {
 	struct cdrom_device_ops *cdo = cdi->ops;
 	struct cdrom_device_ops *cdo = cdi->ops;
-	cdinfo(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n");
 	cgc->cmd[0] = GPCMD_PAUSE_RESUME;
 	cgc->cmd[0] = GPCMD_PAUSE_RESUME;
 	cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0;
 	cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0;
 	cgc->data_direction = CGC_DATA_NONE;
 	cgc->data_direction = CGC_DATA_NONE;
@@ -3094,8 +3175,8 @@ static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi,
 }
 }
 
 
 static noinline int mmc_ioctl_dvd_read_struct(struct cdrom_device_info *cdi,
 static noinline int mmc_ioctl_dvd_read_struct(struct cdrom_device_info *cdi,
-						void __user *arg,
-						struct packet_command *cgc)
+					      void __user *arg,
+					      struct packet_command *cgc)
 {
 {
 	int ret;
 	int ret;
 	dvd_struct *s;
 	dvd_struct *s;
@@ -3108,7 +3189,7 @@ static noinline int mmc_ioctl_dvd_read_struct(struct cdrom_device_info *cdi,
 	if (!s)
 	if (!s)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	cdinfo(CD_DO_IOCTL, "entering DVD_READ_STRUCT\n");
+	cd_dbg(CD_DO_IOCTL, "entering DVD_READ_STRUCT\n");
 	if (copy_from_user(s, arg, size)) {
 	if (copy_from_user(s, arg, size)) {
 		kfree(s);
 		kfree(s);
 		return -EFAULT;
 		return -EFAULT;
@@ -3126,44 +3207,48 @@ out:
 }
 }
 
 
 static noinline int mmc_ioctl_dvd_auth(struct cdrom_device_info *cdi,
 static noinline int mmc_ioctl_dvd_auth(struct cdrom_device_info *cdi,
-					void __user *arg)
+				       void __user *arg)
 {
 {
 	int ret;
 	int ret;
 	dvd_authinfo ai;
 	dvd_authinfo ai;
 	if (!CDROM_CAN(CDC_DVD))
 	if (!CDROM_CAN(CDC_DVD))
 		return -ENOSYS;
 		return -ENOSYS;
-	cdinfo(CD_DO_IOCTL, "entering DVD_AUTH\n");
-	IOCTL_IN(arg, dvd_authinfo, ai);
+	cd_dbg(CD_DO_IOCTL, "entering DVD_AUTH\n");
+	if (copy_from_user(&ai, (dvd_authinfo __user *)arg, sizeof(ai)))
+		return -EFAULT;
 	ret = dvd_do_auth(cdi, &ai);
 	ret = dvd_do_auth(cdi, &ai);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
-	IOCTL_OUT(arg, dvd_authinfo, ai);
+	if (copy_to_user((dvd_authinfo __user *)arg, &ai, sizeof(ai)))
+		return -EFAULT;
 	return 0;
 	return 0;
 }
 }
 
 
 static noinline int mmc_ioctl_cdrom_next_writable(struct cdrom_device_info *cdi,
 static noinline int mmc_ioctl_cdrom_next_writable(struct cdrom_device_info *cdi,
-						void __user *arg)
+						  void __user *arg)
 {
 {
 	int ret;
 	int ret;
 	long next = 0;
 	long next = 0;
-	cdinfo(CD_DO_IOCTL, "entering CDROM_NEXT_WRITABLE\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_NEXT_WRITABLE\n");
 	ret = cdrom_get_next_writable(cdi, &next);
 	ret = cdrom_get_next_writable(cdi, &next);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
-	IOCTL_OUT(arg, long, next);
+	if (copy_to_user((long __user *)arg, &next, sizeof(next)))
+		return -EFAULT;
 	return 0;
 	return 0;
 }
 }
 
 
 static noinline int mmc_ioctl_cdrom_last_written(struct cdrom_device_info *cdi,
 static noinline int mmc_ioctl_cdrom_last_written(struct cdrom_device_info *cdi,
-						void __user *arg)
+						 void __user *arg)
 {
 {
 	int ret;
 	int ret;
 	long last = 0;
 	long last = 0;
-	cdinfo(CD_DO_IOCTL, "entering CDROM_LAST_WRITTEN\n");
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_LAST_WRITTEN\n");
 	ret = cdrom_get_last_written(cdi, &last);
 	ret = cdrom_get_last_written(cdi, &last);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
-	IOCTL_OUT(arg, long, last);
+	if (copy_to_user((long __user *)arg, &last, sizeof(last)))
+		return -EFAULT;
 	return 0;
 	return 0;
 }
 }
 
 
@@ -3212,181 +3297,101 @@ static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
 	return -ENOTTY;
 	return -ENOTTY;
 }
 }
 
 
-static int cdrom_get_track_info(struct cdrom_device_info *cdi, __u16 track, __u8 type,
-			 track_information *ti)
-{
-	struct cdrom_device_ops *cdo = cdi->ops;
-	struct packet_command cgc;
-	int ret, buflen;
-
-	init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ);
-	cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO;
-	cgc.cmd[1] = type & 3;
-	cgc.cmd[4] = (track & 0xff00) >> 8;
-	cgc.cmd[5] = track & 0xff;
-	cgc.cmd[8] = 8;
-	cgc.quiet = 1;
-
-	if ((ret = cdo->generic_packet(cdi, &cgc)))
-		return ret;
-	
-	buflen = be16_to_cpu(ti->track_information_length) +
-		     sizeof(ti->track_information_length);
-
-	if (buflen > sizeof(track_information))
-		buflen = sizeof(track_information);
-
-	cgc.cmd[8] = cgc.buflen = buflen;
-	if ((ret = cdo->generic_packet(cdi, &cgc)))
-		return ret;
-
-	/* return actual fill size */
-	return buflen;
-}
-
-/* requires CD R/RW */
-static int cdrom_get_disc_info(struct cdrom_device_info *cdi, disc_information *di)
+/*
+ * Just about every imaginable ioctl is supported in the Uniform layer
+ * these days.
+ * ATAPI / SCSI specific code now mainly resides in mmc_ioctl().
+ */
+int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
+		fmode_t mode, unsigned int cmd, unsigned long arg)
 {
 {
-	struct cdrom_device_ops *cdo = cdi->ops;
-	struct packet_command cgc;
-	int ret, buflen;
-
-	/* set up command and get the disc info */
-	init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ);
-	cgc.cmd[0] = GPCMD_READ_DISC_INFO;
-	cgc.cmd[8] = cgc.buflen = 2;
-	cgc.quiet = 1;
-
-	if ((ret = cdo->generic_packet(cdi, &cgc)))
-		return ret;
+	void __user *argp = (void __user *)arg;
+	int ret;
 
 
-	/* not all drives have the same disc_info length, so requeue
-	 * packet with the length the drive tells us it can supply
+	/*
+	 * Try the generic SCSI command ioctl's first.
 	 */
 	 */
-	buflen = be16_to_cpu(di->disc_information_length) +
-		     sizeof(di->disc_information_length);
-
-	if (buflen > sizeof(disc_information))
-		buflen = sizeof(disc_information);
-
-	cgc.cmd[8] = cgc.buflen = buflen;
-	if ((ret = cdo->generic_packet(cdi, &cgc)))
+	ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
+	if (ret != -ENOTTY)
 		return ret;
 		return ret;
 
 
-	/* return actual fill size */
-	return buflen;
-}
-
-/* return the last written block on the CD-R media. this is for the udf
-   file system. */
-int cdrom_get_last_written(struct cdrom_device_info *cdi, long *last_written)
-{
-	struct cdrom_tocentry toc;
-	disc_information di;
-	track_information ti;
-	__u32 last_track;
-	int ret = -1, ti_size;
-
-	if (!CDROM_CAN(CDC_GENERIC_PACKET))
-		goto use_toc;
-
-	ret = cdrom_get_disc_info(cdi, &di);
-	if (ret < (int)(offsetof(typeof(di), last_track_lsb)
-			+ sizeof(di.last_track_lsb)))
-		goto use_toc;
-
-	/* if unit didn't return msb, it's zeroed by cdrom_get_disc_info */
-	last_track = (di.last_track_msb << 8) | di.last_track_lsb;
-	ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti);
-	if (ti_size < (int)offsetof(typeof(ti), track_start))
-		goto use_toc;
-
-	/* if this track is blank, try the previous. */
-	if (ti.blank) {
-		if (last_track==1)
-			goto use_toc;
-		last_track--;
-		ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti);
-	}
-
-	if (ti_size < (int)(offsetof(typeof(ti), track_size)
-				+ sizeof(ti.track_size)))
-		goto use_toc;
-
-	/* if last recorded field is valid, return it. */
-	if (ti.lra_v && ti_size >= (int)(offsetof(typeof(ti), last_rec_address)
-				+ sizeof(ti.last_rec_address))) {
-		*last_written = be32_to_cpu(ti.last_rec_address);
-	} else {
-		/* make it up instead */
-		*last_written = be32_to_cpu(ti.track_start) +
-				be32_to_cpu(ti.track_size);
-		if (ti.free_blocks)
-			*last_written -= (be32_to_cpu(ti.free_blocks) + 7);
+	switch (cmd) {
+	case CDROMMULTISESSION:
+		return cdrom_ioctl_multisession(cdi, argp);
+	case CDROMEJECT:
+		return cdrom_ioctl_eject(cdi);
+	case CDROMCLOSETRAY:
+		return cdrom_ioctl_closetray(cdi);
+	case CDROMEJECT_SW:
+		return cdrom_ioctl_eject_sw(cdi, arg);
+	case CDROM_MEDIA_CHANGED:
+		return cdrom_ioctl_media_changed(cdi, arg);
+	case CDROM_SET_OPTIONS:
+		return cdrom_ioctl_set_options(cdi, arg);
+	case CDROM_CLEAR_OPTIONS:
+		return cdrom_ioctl_clear_options(cdi, arg);
+	case CDROM_SELECT_SPEED:
+		return cdrom_ioctl_select_speed(cdi, arg);
+	case CDROM_SELECT_DISC:
+		return cdrom_ioctl_select_disc(cdi, arg);
+	case CDROMRESET:
+		return cdrom_ioctl_reset(cdi, bdev);
+	case CDROM_LOCKDOOR:
+		return cdrom_ioctl_lock_door(cdi, arg);
+	case CDROM_DEBUG:
+		return cdrom_ioctl_debug(cdi, arg);
+	case CDROM_GET_CAPABILITY:
+		return cdrom_ioctl_get_capability(cdi);
+	case CDROM_GET_MCN:
+		return cdrom_ioctl_get_mcn(cdi, argp);
+	case CDROM_DRIVE_STATUS:
+		return cdrom_ioctl_drive_status(cdi, arg);
+	case CDROM_DISC_STATUS:
+		return cdrom_ioctl_disc_status(cdi);
+	case CDROM_CHANGER_NSLOTS:
+		return cdrom_ioctl_changer_nslots(cdi);
 	}
 	}
-	return 0;
 
 
-	/* this is where we end up if the drive either can't do a
-	   GPCMD_READ_DISC_INFO or GPCMD_READ_TRACK_RZONE_INFO or if
-	   it doesn't give enough information or fails. then we return
-	   the toc contents. */
-use_toc:
-	toc.cdte_format = CDROM_MSF;
-	toc.cdte_track = CDROM_LEADOUT;
-	if ((ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &toc)))
-		return ret;
-	sanitize_format(&toc.cdte_addr, &toc.cdte_format, CDROM_LBA);
-	*last_written = toc.cdte_addr.lba;
-	return 0;
-}
-
-/* return the next writable block. also for udf file system. */
-static int cdrom_get_next_writable(struct cdrom_device_info *cdi, long *next_writable)
-{
-	disc_information di;
-	track_information ti;
-	__u16 last_track;
-	int ret, ti_size;
-
-	if (!CDROM_CAN(CDC_GENERIC_PACKET))
-		goto use_last_written;
-
-	ret = cdrom_get_disc_info(cdi, &di);
-	if (ret < 0 || ret < offsetof(typeof(di), last_track_lsb)
-				+ sizeof(di.last_track_lsb))
-		goto use_last_written;
-
-	/* if unit didn't return msb, it's zeroed by cdrom_get_disc_info */
-	last_track = (di.last_track_msb << 8) | di.last_track_lsb;
-	ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti);
-	if (ti_size < 0 || ti_size < offsetof(typeof(ti), track_start))
-		goto use_last_written;
-
-        /* if this track is blank, try the previous. */
-	if (ti.blank) {
-		if (last_track == 1)
-			goto use_last_written;
-		last_track--;
-		ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti);
-		if (ti_size < 0)
-			goto use_last_written;
+	/*
+	 * Use the ioctls that are implemented through the generic_packet()
+	 * interface. this may look at bit funny, but if -ENOTTY is
+	 * returned that particular ioctl is not implemented and we
+	 * let it go through the device specific ones.
+	 */
+	if (CDROM_CAN(CDC_GENERIC_PACKET)) {
+		ret = mmc_ioctl(cdi, cmd, arg);
+		if (ret != -ENOTTY)
+			return ret;
 	}
 	}
 
 
-	/* if next recordable address field is valid, use it. */
-	if (ti.nwa_v && ti_size >= offsetof(typeof(ti), next_writable)
-				+ sizeof(ti.next_writable)) {
-		*next_writable = be32_to_cpu(ti.next_writable);
-		return 0;
+	/*
+	 * Note: most of the cd_dbg() calls are commented out here,
+	 * because they fill up the sys log when CD players poll
+	 * the drive.
+	 */
+	switch (cmd) {
+	case CDROMSUBCHNL:
+		return cdrom_ioctl_get_subchnl(cdi, argp);
+	case CDROMREADTOCHDR:
+		return cdrom_ioctl_read_tochdr(cdi, argp);
+	case CDROMREADTOCENTRY:
+		return cdrom_ioctl_read_tocentry(cdi, argp);
+	case CDROMPLAYMSF:
+		return cdrom_ioctl_play_msf(cdi, argp);
+	case CDROMPLAYTRKIND:
+		return cdrom_ioctl_play_trkind(cdi, argp);
+	case CDROMVOLCTRL:
+		return cdrom_ioctl_volctrl(cdi, argp);
+	case CDROMVOLREAD:
+		return cdrom_ioctl_volread(cdi, argp);
+	case CDROMSTART:
+	case CDROMSTOP:
+	case CDROMPAUSE:
+	case CDROMRESUME:
+		return cdrom_ioctl_audioctl(cdi, cmd);
 	}
 	}
 
 
-use_last_written:
-	if ((ret = cdrom_get_last_written(cdi, next_writable))) {
-		*next_writable = 0;
-		return ret;
-	} else {
-		*next_writable += 7;
-		return 0;
-	}
+	return -ENOSYS;
 }
 }
 
 
 EXPORT_SYMBOL(cdrom_get_last_written);
 EXPORT_SYMBOL(cdrom_get_last_written);

+ 9 - 0
include/linux/blkdev.h

@@ -620,6 +620,15 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 
 
 #define rq_data_dir(rq)		(((rq)->cmd_flags & 1) != 0)
 #define rq_data_dir(rq)		(((rq)->cmd_flags & 1) != 0)
 
 
+/*
+ * Driver can handle struct request, if it either has an old style
+ * request_fn defined, or is blk-mq based.
+ */
+static inline bool queue_is_rq_based(struct request_queue *q)
+{
+	return q->request_fn || q->mq_ops;
+}
+
 static inline unsigned int blk_queue_cluster(struct request_queue *q)
 static inline unsigned int blk_queue_cluster(struct request_queue *q)
 {
 {
 	return q->limits.cluster;
 	return q->limits.cluster;

+ 1 - 1
include/xen/interface/io/blkif.h

@@ -86,7 +86,7 @@ typedef uint64_t blkif_sector_t;
  *     Interface%20manuals/100293068c.pdf
  *     Interface%20manuals/100293068c.pdf
  * The backend can optionally provide three extra XenBus attributes to
  * The backend can optionally provide three extra XenBus attributes to
  * further optimize the discard functionality:
  * further optimize the discard functionality:
- * 'discard-aligment' - Devices that support discard functionality may
+ * 'discard-alignment' - Devices that support discard functionality may
  * internally allocate space in units that are bigger than the exported
  * internally allocate space in units that are bigger than the exported
  * logical block size. The discard-alignment parameter indicates how many bytes
  * logical block size. The discard-alignment parameter indicates how many bytes
  * the beginning of the partition is offset from the internal allocation unit's
  * the beginning of the partition is offset from the internal allocation unit's