Re: [PATCH v3 net-next 5/7] rds: zerocopy Tx support.

2018-02-15 Thread Willem de Bruijn
On Thu, Feb 15, 2018 at 2:47 PM, Santosh Shilimkar
 wrote:
> On 2/15/2018 10:49 AM, Sowmini Varadhan wrote:
>>
>> If the MSG_ZEROCOPY flag is specified with rds_sendmsg(), and,
>> if the SO_ZEROCOPY socket option has been set on the PF_RDS socket,
>> application pages sent down with rds_sendmsg() are pinned.
>>
>> The pinning uses the accounting infrastructure added by
>> Commit a91dbff551a6 ("sock: ulimit on MSG_ZEROCOPY pages")
>>
>> The payload bytes in the message may not be modified for the
>> duration that the message has been pinned. A multi-threaded
>> application using this infrastructure may thus need to be notified
>> about send-completion so that it can free/reuse the buffers
>> passed to rds_sendmsg(). Notification of send-completion will
>> identify each message-buffer by a cookie that the application
>> must specify as ancillary data to rds_sendmsg().
>> The ancillary data in this case has cmsg_level == SOL_RDS
>> and cmsg_type == RDS_CMSG_ZCOPY_COOKIE.
>>
>> Signed-off-by: Sowmini Varadhan 
>> ---
>
>
> Acked-by: Santosh Shilimkar 

Acked-by: Willem de Bruijn 


Re: [PATCH v3 net-next 5/7] rds: zerocopy Tx support.

2018-02-15 Thread Santosh Shilimkar

On 2/15/2018 10:49 AM, Sowmini Varadhan wrote:

If the MSG_ZEROCOPY flag is specified with rds_sendmsg(), and,
if the SO_ZEROCOPY socket option has been set on the PF_RDS socket,
application pages sent down with rds_sendmsg() are pinned.

The pinning uses the accounting infrastructure added by
Commit a91dbff551a6 ("sock: ulimit on MSG_ZEROCOPY pages")

The payload bytes in the message may not be modified for the
duration that the message has been pinned. A multi-threaded
application using this infrastructure may thus need to be notified
about send-completion so that it can free/reuse the buffers
passed to rds_sendmsg(). Notification of send-completion will
identify each message-buffer by a cookie that the application
must specify as ancillary data to rds_sendmsg().
The ancillary data in this case has cmsg_level == SOL_RDS
and cmsg_type == RDS_CMSG_ZCOPY_COOKIE.

Signed-off-by: Sowmini Varadhan 
---


Acked-by: Santosh Shilimkar 



[PATCH v3 net-next 5/7] rds: zerocopy Tx support.

2018-02-15 Thread Sowmini Varadhan
If the MSG_ZEROCOPY flag is specified with rds_sendmsg(), and,
if the SO_ZEROCOPY socket option has been set on the PF_RDS socket,
application pages sent down with rds_sendmsg() are pinned.

The pinning uses the accounting infrastructure added by
Commit a91dbff551a6 ("sock: ulimit on MSG_ZEROCOPY pages")

The payload bytes in the message may not be modified for the
duration that the message has been pinned. A multi-threaded
application using this infrastructure may thus need to be notified
about send-completion so that it can free/reuse the buffers
passed to rds_sendmsg(). Notification of send-completion will
identify each message-buffer by a cookie that the application
must specify as ancillary data to rds_sendmsg().
The ancillary data in this case has cmsg_level == SOL_RDS
and cmsg_type == RDS_CMSG_ZCOPY_COOKIE.

Signed-off-by: Sowmini Varadhan 
---
v2:
  - remove unused data_len argument to rds_rm_size;
  - unmap as necessary if we fail in the middle of zerocopy setup
v3: remove needless bzero of skb->cb[], consolidate err cleanup
 include/uapi/linux/rds.h |1 +
 net/rds/message.c|   51 +-
 net/rds/rds.h|3 +-
 net/rds/send.c   |   44 ++-
 4 files changed, 91 insertions(+), 8 deletions(-)

diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index e71d449..12e3bca 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -103,6 +103,7 @@
 #define RDS_CMSG_MASKED_ATOMIC_FADD8
 #define RDS_CMSG_MASKED_ATOMIC_CSWP9
 #define RDS_CMSG_RXPATH_LATENCY11
+#defineRDS_CMSG_ZCOPY_COOKIE   12
 
 #define RDS_INFO_FIRST 1
 #define RDS_INFO_COUNTERS  1
diff --git a/net/rds/message.c b/net/rds/message.c
index bf1a656..6518345 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -341,12 +341,14 @@ struct rds_message *rds_message_map_pages(unsigned long 
*page_addrs, unsigned in
return rm;
 }
 
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+  bool zcopy)
 {
unsigned long to_copy, nbytes;
unsigned long sg_off;
struct scatterlist *sg;
int ret = 0;
+   int length = iov_iter_count(from);
 
rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
 
@@ -356,6 +358,53 @@ int rds_message_copy_from_user(struct rds_message *rm, 
struct iov_iter *from)
sg = rm->data.op_sg;
sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
 
+   if (zcopy) {
+   int total_copied = 0;
+   struct sk_buff *skb;
+
+   skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32),
+   GFP_KERNEL);
+   if (!skb)
+   return -ENOMEM;
+   rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
+   if (mm_account_pinned_pages(>data.op_mmp_znotifier->z_mmp,
+   length)) {
+   ret = -ENOMEM;
+   goto err;
+   }
+   while (iov_iter_count(from)) {
+   struct page *pages;
+   size_t start;
+   ssize_t copied;
+
+   copied = iov_iter_get_pages(from, , PAGE_SIZE,
+   1, );
+   if (copied < 0) {
+   struct mmpin *mmp;
+   int i;
+
+   for (i = 0; i < rm->data.op_nents; i++)
+   put_page(sg_page(>data.op_sg[i]));
+   mmp = >data.op_mmp_znotifier->z_mmp;
+   mm_unaccount_pinned_pages(mmp);
+   ret = -EFAULT;
+   goto err;
+   }
+   total_copied += copied;
+   iov_iter_advance(from, copied);
+   length -= copied;
+   sg_set_page(sg, pages, copied, start);
+   rm->data.op_nents++;
+   sg++;
+   }
+   WARN_ON_ONCE(length != 0);
+   return ret;
+err:
+   consume_skb(skb);
+   rm->data.op_mmp_znotifier = NULL;
+   return ret;
+   } /* zcopy */
+
while (iov_iter_count(from)) {
if (!sg_page(sg)) {
ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 24576bc..31cd388 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -785,7 +785,8 @@ void rds_for_each_conn_info(struct socket *sock, unsigned 
int len,
 /* message.c */