|
@@ -345,12 +345,91 @@ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_
|
|
return 1;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
|
|
|
|
+{
|
|
|
|
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
|
|
|
|
+ if (!connection)
|
|
|
|
+ return;
|
|
|
|
+ if (connection->req_next == NULL)
|
|
|
|
+ connection->req_next = req;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
|
|
|
|
+{
|
|
|
|
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
|
|
|
|
+ if (!connection)
|
|
|
|
+ return;
|
|
|
|
+ if (connection->req_next != req)
|
|
|
|
+ return;
|
|
|
|
+ list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
|
|
|
|
+ const unsigned s = req->rq_state;
|
|
|
|
+ if (s & RQ_NET_QUEUED)
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ if (&req->tl_requests == &connection->transfer_log)
|
|
|
|
+ req = NULL;
|
|
|
|
+ connection->req_next = req;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
|
|
|
|
+{
|
|
|
|
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
|
|
|
|
+ if (!connection)
|
|
|
|
+ return;
|
|
|
|
+ if (connection->req_ack_pending == NULL)
|
|
|
|
+ connection->req_ack_pending = req;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
|
|
|
|
+{
|
|
|
|
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
|
|
|
|
+ if (!connection)
|
|
|
|
+ return;
|
|
|
|
+ if (connection->req_ack_pending != req)
|
|
|
|
+ return;
|
|
|
|
+ list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
|
|
|
|
+ const unsigned s = req->rq_state;
|
|
|
|
+ if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING))
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ if (&req->tl_requests == &connection->transfer_log)
|
|
|
|
+ req = NULL;
|
|
|
|
+ connection->req_ack_pending = req;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
|
|
|
|
+{
|
|
|
|
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
|
|
|
|
+ if (!connection)
|
|
|
|
+ return;
|
|
|
|
+ if (connection->req_not_net_done == NULL)
|
|
|
|
+ connection->req_not_net_done = req;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
|
|
|
|
+{
|
|
|
|
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
|
|
|
|
+ if (!connection)
|
|
|
|
+ return;
|
|
|
|
+ if (connection->req_not_net_done != req)
|
|
|
|
+ return;
|
|
|
|
+ list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
|
|
|
|
+ const unsigned s = req->rq_state;
|
|
|
|
+ if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE))
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ if (&req->tl_requests == &connection->transfer_log)
|
|
|
|
+ req = NULL;
|
|
|
|
+ connection->req_not_net_done = req;
|
|
|
|
+}
|
|
|
|
+
|
|
/* I'd like this to be the only place that manipulates
|
|
/* I'd like this to be the only place that manipulates
|
|
* req->completion_ref and req->kref. */
|
|
* req->completion_ref and req->kref. */
|
|
static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
|
|
static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
|
|
int clear, int set)
|
|
int clear, int set)
|
|
{
|
|
{
|
|
struct drbd_device *device = req->device;
|
|
struct drbd_device *device = req->device;
|
|
|
|
+ struct drbd_peer_device *peer_device = first_peer_device(device);
|
|
unsigned s = req->rq_state;
|
|
unsigned s = req->rq_state;
|
|
int c_put = 0;
|
|
int c_put = 0;
|
|
int k_put = 0;
|
|
int k_put = 0;
|
|
@@ -379,6 +458,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
|
|
|
|
|
|
if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) {
|
|
if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) {
|
|
atomic_inc(&req->completion_ref);
|
|
atomic_inc(&req->completion_ref);
|
|
|
|
+ set_if_null_req_next(peer_device, req);
|
|
}
|
|
}
|
|
|
|
|
|
if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
|
|
if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
|
|
@@ -386,8 +466,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
|
|
|
|
|
|
if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
|
|
if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
|
|
/* potentially already completed in the asender thread */
|
|
/* potentially already completed in the asender thread */
|
|
- if (!(s & RQ_NET_DONE))
|
|
|
|
|
|
+ if (!(s & RQ_NET_DONE)) {
|
|
atomic_add(req->i.size >> 9, &device->ap_in_flight);
|
|
atomic_add(req->i.size >> 9, &device->ap_in_flight);
|
|
|
|
+ set_if_null_req_not_net_done(peer_device, req);
|
|
|
|
+ }
|
|
|
|
+ if (s & RQ_NET_PENDING)
|
|
|
|
+ set_if_null_req_ack_pending(peer_device, req);
|
|
}
|
|
}
|
|
|
|
|
|
if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
|
|
if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
|
|
@@ -418,10 +502,13 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
|
|
dec_ap_pending(device);
|
|
dec_ap_pending(device);
|
|
++c_put;
|
|
++c_put;
|
|
req->acked_jif = jiffies;
|
|
req->acked_jif = jiffies;
|
|
|
|
+ advance_conn_req_ack_pending(peer_device, req);
|
|
}
|
|
}
|
|
|
|
|
|
- if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED))
|
|
|
|
|
|
+ if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) {
|
|
++c_put;
|
|
++c_put;
|
|
|
|
+ advance_conn_req_next(peer_device, req);
|
|
|
|
+ }
|
|
|
|
|
|
if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
|
|
if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
|
|
if (s & RQ_NET_SENT)
|
|
if (s & RQ_NET_SENT)
|
|
@@ -429,6 +516,13 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
|
|
if (s & RQ_EXP_BARR_ACK)
|
|
if (s & RQ_EXP_BARR_ACK)
|
|
++k_put;
|
|
++k_put;
|
|
req->net_done_jif = jiffies;
|
|
req->net_done_jif = jiffies;
|
|
|
|
+
|
|
|
|
+ /* in ahead/behind mode, or just in case,
|
|
|
|
+ * before we finally destroy this request,
|
|
|
|
+ * the caching pointers must not reference it anymore */
|
|
|
|
+ advance_conn_req_next(peer_device, req);
|
|
|
|
+ advance_conn_req_ack_pending(peer_device, req);
|
|
|
|
+ advance_conn_req_not_net_done(peer_device, req);
|
|
}
|
|
}
|
|
|
|
|
|
/* potentially complete and destroy */
|
|
/* potentially complete and destroy */
|
|
@@ -1423,36 +1517,13 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
|
|
return limit;
|
|
return limit;
|
|
}
|
|
}
|
|
|
|
|
|
-static void find_oldest_requests(
|
|
|
|
- struct drbd_connection *connection,
|
|
|
|
- struct drbd_device *device,
|
|
|
|
- struct drbd_request **oldest_req_waiting_for_peer,
|
|
|
|
- struct drbd_request **oldest_req_waiting_for_disk)
|
|
|
|
-{
|
|
|
|
- struct drbd_request *r;
|
|
|
|
- *oldest_req_waiting_for_peer = NULL;
|
|
|
|
- *oldest_req_waiting_for_disk = NULL;
|
|
|
|
- list_for_each_entry(r, &connection->transfer_log, tl_requests) {
|
|
|
|
- const unsigned s = r->rq_state;
|
|
|
|
- if (!*oldest_req_waiting_for_peer
|
|
|
|
- && ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE)))
|
|
|
|
- *oldest_req_waiting_for_peer = r;
|
|
|
|
-
|
|
|
|
- if (!*oldest_req_waiting_for_disk
|
|
|
|
- && (s & RQ_LOCAL_PENDING) && r->device == device)
|
|
|
|
- *oldest_req_waiting_for_disk = r;
|
|
|
|
-
|
|
|
|
- if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk)
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
void request_timer_fn(unsigned long data)
|
|
void request_timer_fn(unsigned long data)
|
|
{
|
|
{
|
|
struct drbd_device *device = (struct drbd_device *) data;
|
|
struct drbd_device *device = (struct drbd_device *) data;
|
|
struct drbd_connection *connection = first_peer_device(device)->connection;
|
|
struct drbd_connection *connection = first_peer_device(device)->connection;
|
|
- struct drbd_request *req_disk, *req_peer; /* oldest request */
|
|
|
|
|
|
+ struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */
|
|
struct net_conf *nc;
|
|
struct net_conf *nc;
|
|
|
|
+ unsigned long oldest_submit_jif;
|
|
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
|
|
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
|
|
unsigned long now;
|
|
unsigned long now;
|
|
|
|
|
|
@@ -1473,14 +1544,31 @@ void request_timer_fn(unsigned long data)
|
|
return; /* Recurring timer stopped */
|
|
return; /* Recurring timer stopped */
|
|
|
|
|
|
now = jiffies;
|
|
now = jiffies;
|
|
|
|
+ nt = now + et;
|
|
|
|
|
|
spin_lock_irq(&device->resource->req_lock);
|
|
spin_lock_irq(&device->resource->req_lock);
|
|
- find_oldest_requests(connection, device, &req_peer, &req_disk);
|
|
|
|
- if (req_peer == NULL && req_disk == NULL) {
|
|
|
|
- spin_unlock_irq(&device->resource->req_lock);
|
|
|
|
- mod_timer(&device->request_timer, now + et);
|
|
|
|
- return;
|
|
|
|
- }
|
|
|
|
|
|
+ req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
|
|
|
|
+ req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
|
|
|
|
+ req_peer = connection->req_not_net_done;
|
|
|
|
+ /* maybe the oldest request waiting for the peer is in fact still
|
|
|
|
+ * blocking in tcp sendmsg */
|
|
|
|
+ if (!req_peer && connection->req_next && connection->req_next->pre_send_jif)
|
|
|
|
+ req_peer = connection->req_next;
|
|
|
|
+
|
|
|
|
+ /* evaluate the oldest peer request only in one timer! */
|
|
|
|
+ if (req_peer && req_peer->device != device)
|
|
|
|
+ req_peer = NULL;
|
|
|
|
+
|
|
|
|
+ /* do we have something to evaluate? */
|
|
|
|
+ if (req_peer == NULL && req_write == NULL && req_read == NULL)
|
|
|
|
+ goto out;
|
|
|
|
+
|
|
|
|
+ oldest_submit_jif =
|
|
|
|
+ (req_write && req_read)
|
|
|
|
+ ? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif)
|
|
|
|
+ ? req_write->pre_submit_jif : req_read->pre_submit_jif )
|
|
|
|
+ : req_write ? req_write->pre_submit_jif
|
|
|
|
+ : req_read ? req_read->pre_submit_jif : now;
|
|
|
|
|
|
/* The request is considered timed out, if
|
|
/* The request is considered timed out, if
|
|
* - we have some effective timeout from the configuration,
|
|
* - we have some effective timeout from the configuration,
|
|
@@ -1499,13 +1587,13 @@ void request_timer_fn(unsigned long data)
|
|
* to expire twice (worst case) to become effective. Good enough.
|
|
* to expire twice (worst case) to become effective. Good enough.
|
|
*/
|
|
*/
|
|
if (ent && req_peer &&
|
|
if (ent && req_peer &&
|
|
- time_after(now, req_peer->start_jif + ent) &&
|
|
|
|
|
|
+ time_after(now, req_peer->pre_send_jif + ent) &&
|
|
!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
|
|
!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
|
|
drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
|
|
drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
|
|
_drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
|
|
_drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
|
|
}
|
|
}
|
|
- if (dt && req_disk &&
|
|
|
|
- time_after(now, req_disk->start_jif + dt) &&
|
|
|
|
|
|
+ if (dt && oldest_submit_jif != now &&
|
|
|
|
+ time_after(now, oldest_submit_jif + dt) &&
|
|
!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
|
|
!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
|
|
drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
|
|
drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
|
|
__drbd_chk_io_error(device, DRBD_FORCE_DETACH);
|
|
__drbd_chk_io_error(device, DRBD_FORCE_DETACH);
|
|
@@ -1513,11 +1601,12 @@ void request_timer_fn(unsigned long data)
|
|
|
|
|
|
/* Reschedule timer for the nearest not already expired timeout.
|
|
/* Reschedule timer for the nearest not already expired timeout.
|
|
* Fallback to now + min(effective network timeout, disk timeout). */
|
|
* Fallback to now + min(effective network timeout, disk timeout). */
|
|
- ent = (ent && req_peer && time_before(now, req_peer->start_jif + ent))
|
|
|
|
- ? req_peer->start_jif + ent : now + et;
|
|
|
|
- dt = (dt && req_disk && time_before(now, req_disk->start_jif + dt))
|
|
|
|
- ? req_disk->start_jif + dt : now + et;
|
|
|
|
|
|
+ ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent))
|
|
|
|
+ ? req_peer->pre_send_jif + ent : now + et;
|
|
|
|
+ dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt))
|
|
|
|
+ ? oldest_submit_jif + dt : now + et;
|
|
nt = time_before(ent, dt) ? ent : dt;
|
|
nt = time_before(ent, dt) ? ent : dt;
|
|
|
|
+out:
|
|
spin_unlock_irq(&connection->resource->req_lock);
|
|
spin_unlock_irq(&connection->resource->req_lock);
|
|
mod_timer(&device->request_timer, nt);
|
|
mod_timer(&device->request_timer, nt);
|
|
}
|
|
}
|