|
@@ -875,12 +875,13 @@ out:
|
|
|
* rds_message is getting to be quite complicated, and we'd like to allocate
|
|
|
* it all in one go. This figures out how big it needs to be up front.
|
|
|
*/
|
|
|
-static int rds_rm_size(struct msghdr *msg, int data_len)
|
|
|
+static int rds_rm_size(struct msghdr *msg, int num_sgs)
|
|
|
{
|
|
|
struct cmsghdr *cmsg;
|
|
|
int size = 0;
|
|
|
int cmsg_groups = 0;
|
|
|
int retval;
|
|
|
+ bool zcopy_cookie = false;
|
|
|
|
|
|
for_each_cmsghdr(cmsg, msg) {
|
|
|
if (!CMSG_OK(msg, cmsg))
|
|
@@ -899,6 +900,8 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
|
|
|
|
|
|
break;
|
|
|
|
|
|
+ case RDS_CMSG_ZCOPY_COOKIE:
|
|
|
+ zcopy_cookie = true;
|
|
|
case RDS_CMSG_RDMA_DEST:
|
|
|
case RDS_CMSG_RDMA_MAP:
|
|
|
cmsg_groups |= 2;
|
|
@@ -919,7 +922,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
|
|
|
|
|
|
}
|
|
|
|
|
|
- size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
|
|
|
+ if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie)
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ size += num_sgs * sizeof(struct scatterlist);
|
|
|
|
|
|
/* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
|
|
|
if (cmsg_groups == 3)
|
|
@@ -928,6 +934,18 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
|
|
|
return size;
|
|
|
}
|
|
|
|
|
|
+static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
|
|
|
+ struct cmsghdr *cmsg)
|
|
|
+{
|
|
|
+ u32 *cookie;
|
|
|
+
|
|
|
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)))
|
|
|
+ return -EINVAL;
|
|
|
+ cookie = CMSG_DATA(cmsg);
|
|
|
+ rm->data.op_mmp_znotifier->z_cookie = *cookie;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
|
|
|
struct msghdr *msg, int *allocated_mr)
|
|
|
{
|
|
@@ -970,6 +988,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
|
|
|
ret = rds_cmsg_atomic(rs, rm, cmsg);
|
|
|
break;
|
|
|
|
|
|
+ case RDS_CMSG_ZCOPY_COOKIE:
|
|
|
+ ret = rds_cmsg_zcopy(rs, rm, cmsg);
|
|
|
+ break;
|
|
|
+
|
|
|
default:
|
|
|
return -EINVAL;
|
|
|
}
|
|
@@ -1040,10 +1062,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
|
|
|
long timeo = sock_sndtimeo(sk, nonblock);
|
|
|
struct rds_conn_path *cpath;
|
|
|
size_t total_payload_len = payload_len, rdma_payload_len = 0;
|
|
|
+ bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
|
|
|
+ sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
|
|
|
+ int num_sgs = ceil(payload_len, PAGE_SIZE);
|
|
|
|
|
|
/* Mirror Linux UDP mirror of BSD error message compatibility */
|
|
|
/* XXX: Perhaps MSG_MORE someday */
|
|
|
- if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
|
|
|
+ if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) {
|
|
|
ret = -EOPNOTSUPP;
|
|
|
goto out;
|
|
|
}
|
|
@@ -1087,8 +1112,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
+ if (zcopy) {
|
|
|
+ if (rs->rs_transport->t_type != RDS_TRANS_TCP) {
|
|
|
+ ret = -EOPNOTSUPP;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
|
|
|
+ }
|
|
|
/* size of rm including all sgs */
|
|
|
- ret = rds_rm_size(msg, payload_len);
|
|
|
+ ret = rds_rm_size(msg, num_sgs);
|
|
|
if (ret < 0)
|
|
|
goto out;
|
|
|
|
|
@@ -1100,12 +1132,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
|
|
|
|
|
|
/* Attach data to the rm */
|
|
|
if (payload_len) {
|
|
|
- rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
|
|
|
+ rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
|
|
|
if (!rm->data.op_sg) {
|
|
|
ret = -ENOMEM;
|
|
|
goto out;
|
|
|
}
|
|
|
- ret = rds_message_copy_from_user(rm, &msg->msg_iter);
|
|
|
+ ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
|
|
|
if (ret)
|
|
|
goto out;
|
|
|
}
|