浏览代码

orangefs: delay freeing slot until cancel completes

Make cancels reuse the aborted read/write op, to make sure they do not
fail on lack of memory.

Don't issue a cancel unless the daemon has seen our read/write, has not
replied and isn't being shut down.

If cancel *is* issued, don't wait for it to complete; stash the slot
in there and just have it freed when cancel is finally replied to or
purged (and delay dropping the reference until then, obviously).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
Al Viro 9 年之前
父节点
当前提交
78699e29fd

+ 7 - 0
fs/orangefs/devorangefs-req.c

@@ -438,6 +438,8 @@ wakeup:
 		}
 	}
 out:
+	if (unlikely(op_is_cancel(op)))
+		put_cancel(op);
 	op_release(op);
 	return ret;
 
@@ -546,6 +548,11 @@ int is_daemon_in_service(void)
 	return in_service;
 }
 
+bool __is_daemon_in_service(void)
+{
+	return open_access_count == 1;
+}
+
 static inline long check_ioctl_command(unsigned int command)
 {
 	/* Check for valid ioctl codes */

+ 4 - 12
fs/orangefs/file.c

@@ -181,17 +181,6 @@ populate_shared_memory:
 	}
 
 	if (ret < 0) {
-		/*
-		 * XXX: needs to be optimized - we only need to cancel if it
-		 * had been seen by daemon and not completed
-		 */
-		if (!op_state_serviced(new_op)) {
-			orangefs_cancel_op_in_progress(new_op->tag);
-		} else {
-			complete(&new_op->done);
-		}
-		orangefs_bufmap_put(buffer_index);
-		buffer_index = -1;
 		/*
 		 * don't write an error to syslog on signaled operation
 		 * termination unless we've got debugging turned on, as
@@ -207,7 +196,10 @@ populate_shared_memory:
 				type == ORANGEFS_IO_READ ?
 					"read from" : "write to",
 				handle, ret);
-		goto out;
+		if (orangefs_cancel_op_in_progress(new_op))
+			return ret;
+
+		goto done_copying;
 	}
 
 	/*

+ 10 - 6
fs/orangefs/orangefs-cache.c

@@ -101,6 +101,15 @@ char *get_opname_string(struct orangefs_kernel_op_s *new_op)
 	return "OP_UNKNOWN?";
 }
 
+void orangefs_new_tag(struct orangefs_kernel_op_s *op)
+{
+	spin_lock(&next_tag_value_lock);
+	op->tag = next_tag_value++;
+	if (next_tag_value == 0)
+		next_tag_value = 100;
+	spin_unlock(&next_tag_value_lock);
+}
+
 struct orangefs_kernel_op_s *op_alloc(__s32 type)
 {
 	struct orangefs_kernel_op_s *new_op = NULL;
@@ -120,14 +129,9 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type)
 		new_op->downcall.status = -1;
 
 		new_op->op_state = OP_VFS_STATE_UNKNOWN;
-		new_op->tag = 0;
 
 		/* initialize the op specific tag and upcall credentials */
-		spin_lock(&next_tag_value_lock);
-		new_op->tag = next_tag_value++;
-		if (next_tag_value == 0)
-			next_tag_value = 100;
-		spin_unlock(&next_tag_value_lock);
+		orangefs_new_tag(new_op);
 		new_op->upcall.type = type;
 		new_op->attempts = 0;
 		gossip_debug(GOSSIP_CACHE_DEBUG,

+ 32 - 8
fs/orangefs/orangefs-kernel.h

@@ -190,9 +190,14 @@ struct orangefs_kernel_op_s {
 	/*
 	 * Set uses_shared_memory to 1 if this operation uses shared memory.
 	 * If true, then a retry on the op must also get a new shared memory
-	 * buffer and re-populate it.
+	 * buffer and re-populate it.  Cancels don't care - it only matters
+	 * for service_operation() retry logics and cancels don't go through
+	 * it anymore.
 	 */
-	int uses_shared_memory;
+	union {
+		int uses_shared_memory;
+		int slot_to_free;
+	};
 
 	struct orangefs_upcall_s upcall;
 	struct orangefs_downcall_s downcall;
@@ -219,17 +224,13 @@ static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op)
 	op->op_state = OP_VFS_STATE_SERVICED;
 	wake_up_interruptible(&op->waitq);
 }
-static inline void set_op_state_purged(struct orangefs_kernel_op_s *op)
-{
-	op->op_state |= OP_VFS_STATE_PURGED;
-	wake_up_interruptible(&op->waitq);
-}
 
 #define op_state_waiting(op)     ((op)->op_state & OP_VFS_STATE_WAITING)
 #define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR)
 #define op_state_serviced(op)    ((op)->op_state & OP_VFS_STATE_SERVICED)
 #define op_state_purged(op)      ((op)->op_state & OP_VFS_STATE_PURGED)
 #define op_state_given_up(op)    ((op)->op_state & OP_VFS_STATE_GIVEN_UP)
+#define op_is_cancel(op)         ((op)->upcall.type == ORANGEFS_VFS_OP_CANCEL)
 
 static inline void get_op(struct orangefs_kernel_op_s *op)
 {
@@ -249,6 +250,27 @@ static inline void op_release(struct orangefs_kernel_op_s *op)
 	}
 }
 
+extern void orangefs_bufmap_put(int);
+static inline void put_cancel(struct orangefs_kernel_op_s *op)
+{
+	orangefs_bufmap_put(op->slot_to_free);
+	op_release(op);
+}
+
+static inline void set_op_state_purged(struct orangefs_kernel_op_s *op)
+{
+	spin_lock(&op->lock);
+	if (unlikely(op_is_cancel(op))) {
+		list_del(&op->list);
+		spin_unlock(&op->lock);
+		put_cancel(op);
+	} else {
+		op->op_state |= OP_VFS_STATE_PURGED;
+		wake_up_interruptible(&op->waitq);
+		spin_unlock(&op->lock);
+	}
+}
+
 /* per inode private orangefs info */
 struct orangefs_inode_s {
 	struct orangefs_object_kref refn;
@@ -448,6 +470,7 @@ static inline int match_handle(struct orangefs_khandle resp_handle,
 int op_cache_initialize(void);
 int op_cache_finalize(void);
 struct orangefs_kernel_op_s *op_alloc(__s32 type);
+void orangefs_new_tag(struct orangefs_kernel_op_s *op);
 char *get_opname_string(struct orangefs_kernel_op_s *new_op);
 
 int orangefs_inode_cache_initialize(void);
@@ -528,6 +551,7 @@ ssize_t orangefs_inode_read(struct inode *inode,
 int orangefs_dev_init(void);
 void orangefs_dev_cleanup(void);
 int is_daemon_in_service(void);
+bool __is_daemon_in_service(void);
 int fs_mount_pending(__s32 fsid);
 
 /*
@@ -562,7 +586,7 @@ void orangefs_set_signals(sigset_t *);
 
 int orangefs_unmount_sb(struct super_block *sb);
 
-int orangefs_cancel_op_in_progress(__u64 tag);
+bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op);
 
 static inline __u64 orangefs_convert_time_field(const struct timespec *ts)
 {

+ 0 - 2
fs/orangefs/orangefs-mod.c

@@ -260,14 +260,12 @@ void purge_inprogress_ops(void)
 					 next,
 					 &htable_ops_in_progress[i],
 					 list) {
-			spin_lock(&op->lock);
 			gossip_debug(GOSSIP_INIT_DEBUG,
 				"pvfs2-client-core: purging in-progress op tag "
 				"%llu %s\n",
 				llu(op->tag),
 				get_opname_string(op));
 			set_op_state_purged(op);
-			spin_unlock(&op->lock);
 		}
 		spin_unlock(&htable_ops_in_progress_lock);
 	}

+ 0 - 32
fs/orangefs/orangefs-utils.c

@@ -688,38 +688,6 @@ int orangefs_unmount_sb(struct super_block *sb)
 	return ret;
 }
 
-/*
- * NOTE: on successful cancellation, be sure to return -EINTR, as
- * that's the return value the caller expects
- */
-int orangefs_cancel_op_in_progress(__u64 tag)
-{
-	int ret = -EINVAL;
-	struct orangefs_kernel_op_s *new_op = NULL;
-
-	gossip_debug(GOSSIP_UTILS_DEBUG,
-		     "orangefs_cancel_op_in_progress called on tag %llu\n",
-		     llu(tag));
-
-	new_op = op_alloc(ORANGEFS_VFS_OP_CANCEL);
-	if (!new_op)
-		return -ENOMEM;
-	new_op->upcall.req.cancel.op_tag = tag;
-
-	gossip_debug(GOSSIP_UTILS_DEBUG,
-		     "Attempting ORANGEFS operation cancellation of tag %llu\n",
-		     llu(new_op->upcall.req.cancel.op_tag));
-
-	ret = service_operation(new_op, "orangefs_cancel", ORANGEFS_OP_CANCELLATION);
-
-	gossip_debug(GOSSIP_UTILS_DEBUG,
-		     "orangefs_cancel_op_in_progress: got return value of %d\n",
-		     ret);
-
-	op_release(new_op);
-	return ret;
-}
-
 void orangefs_make_bad_inode(struct inode *inode)
 {
 	if (is_root_handle(inode)) {

+ 40 - 93
fs/orangefs/waitqueue.c

@@ -16,7 +16,6 @@
 #include "orangefs-kernel.h"
 #include "orangefs-bufmap.h"
 
-static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *);
 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *);
 
 /*
@@ -36,25 +35,29 @@ void purge_waiting_ops(void)
 			     "pvfs2-client-core: purging op tag %llu %s\n",
 			     llu(op->tag),
 			     get_opname_string(op));
-		spin_lock(&op->lock);
 		set_op_state_purged(op);
-		spin_unlock(&op->lock);
 	}
 	spin_unlock(&orangefs_request_list_lock);
 }
 
 static inline void
-add_op_to_request_list(struct orangefs_kernel_op_s *op)
+__add_op_to_request_list(struct orangefs_kernel_op_s *op)
 {
-	spin_lock(&orangefs_request_list_lock);
 	spin_lock(&op->lock);
 	set_op_state_waiting(op);
 	list_add_tail(&op->list, &orangefs_request_list);
-	spin_unlock(&orangefs_request_list_lock);
 	spin_unlock(&op->lock);
 	wake_up_interruptible(&orangefs_request_list_waitq);
 }
 
+static inline void
+add_op_to_request_list(struct orangefs_kernel_op_s *op)
+{
+	spin_lock(&orangefs_request_list_lock);
+	__add_op_to_request_list(op);
+	spin_unlock(&orangefs_request_list_lock);
+}
+
 static inline
 void add_priority_op_to_request_list(struct orangefs_kernel_op_s *op)
 {
@@ -159,15 +162,7 @@ retry_servicing:
 	if (flags & ORANGEFS_OP_ASYNC)
 		return 0;
 
-	if (flags & ORANGEFS_OP_CANCELLATION) {
-		gossip_debug(GOSSIP_WAIT_DEBUG,
-			     "%s:"
-			     "About to call wait_for_cancellation_downcall.\n",
-			     __func__);
-		ret = wait_for_cancellation_downcall(op);
-	} else {
-		ret = wait_for_matching_downcall(op);
-	}
+	ret = wait_for_matching_downcall(op);
 
 	if (ret < 0) {
 		/* failed to get matching downcall */
@@ -273,6 +268,36 @@ retry_servicing:
 	return ret;
 }
 
+bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op)
+{
+	u64 tag = op->tag;
+	if (!op_state_in_progress(op))
+		return false;
+
+	op->slot_to_free = op->upcall.req.io.buf_index;
+	memset(&op->upcall, 0, sizeof(op->upcall));
+	memset(&op->downcall, 0, sizeof(op->downcall));
+	op->upcall.type = ORANGEFS_VFS_OP_CANCEL;
+	op->upcall.req.cancel.op_tag = tag;
+	op->downcall.type = ORANGEFS_VFS_OP_INVALID;
+	op->downcall.status = -1;
+	orangefs_new_tag(op);
+
+	spin_lock(&orangefs_request_list_lock);
+	/* orangefs_request_list_lock is enough of a barrier here */
+	if (!__is_daemon_in_service()) {
+		spin_unlock(&orangefs_request_list_lock);
+		return false;
+	}
+	__add_op_to_request_list(op);
+	spin_unlock(&orangefs_request_list_lock);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Attempting ORANGEFS operation cancellation of tag %llu\n",
+		     llu(tag));
+	return true;
+}
+
 static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op)
 {
 	/*
@@ -426,81 +451,3 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op)
 
 	return ret;
 }
-
-/*
- * similar to wait_for_matching_downcall(), but used in the special case
- * of I/O cancellations.
- *
- * Note we need a special wait function because if this is called we already
- *      know that a signal is pending in current and need to service the
- *      cancellation upcall anyway.  the only way to exit this is to either
- *      timeout or have the cancellation be serviced properly.
- */
-static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op)
-{
-	int ret = -EINVAL;
-	DEFINE_WAIT(wait_entry);
-
-	while (1) {
-		spin_lock(&op->lock);
-		prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
-		if (op_state_serviced(op)) {
-			gossip_debug(GOSSIP_WAIT_DEBUG,
-				     "%s:op-state is SERVICED.\n",
-				     __func__);
-			spin_unlock(&op->lock);
-			ret = 0;
-			break;
-		}
-
-		if (signal_pending(current)) {
-			gossip_debug(GOSSIP_WAIT_DEBUG,
-				     "%s:operation interrupted by a signal (tag"
-				     " %llu, op %p)\n",
-				     __func__,
-				     llu(op->tag),
-				     op);
-			orangefs_clean_up_interrupted_operation(op);
-			ret = -EINTR;
-			break;
-		}
-
-		gossip_debug(GOSSIP_WAIT_DEBUG,
-			     "%s:About to call schedule_timeout.\n",
-			     __func__);
-		spin_unlock(&op->lock);
-		ret = schedule_timeout(op_timeout_secs * HZ);
-
-		gossip_debug(GOSSIP_WAIT_DEBUG,
-			     "%s:Value returned from schedule_timeout(%d).\n",
-			     __func__,
-			     ret);
-		if (!ret) {
-			gossip_debug(GOSSIP_WAIT_DEBUG,
-				     "%s:*** operation timed out: %p\n",
-				     __func__,
-				     op);
-			spin_lock(&op->lock);
-			orangefs_clean_up_interrupted_operation(op);
-			ret = -ETIMEDOUT;
-			break;
-		}
-
-		gossip_debug(GOSSIP_WAIT_DEBUG,
-			     "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",
-			     __func__);
-		ret = -ETIMEDOUT;
-		break;
-	}
-
-	spin_lock(&op->lock);
-	finish_wait(&op->waitq, &wait_entry);
-	spin_unlock(&op->lock);
-
-	gossip_debug(GOSSIP_WAIT_DEBUG,
-		     "%s:returning ret(%d)\n",
-		     __func__,
-		     ret);
-
-	return ret;
-}