Re: [PATCH v3 net-next 4/7] rds: support for zcopy completion notification

2018-02-15 Thread Willem de Bruijn
On Thu, Feb 15, 2018 at 2:46 PM, Santosh Shilimkar
 wrote:
> On 2/15/2018 10:49 AM, Sowmini Varadhan wrote:
>>
>> RDS removes a datagram (rds_message) from the retransmit queue when
>> an ACK is received. The ACK indicates that the receiver has queued
>> the RDS datagram, so that the sender can safely forget the datagram.
>> When all references to the rds_message are quiesced, rds_message_purge
>> is called to release resources used by the rds_message
>>
>> If the datagram to be removed had pinned pages set up, add
>> an entry to the rs->rs_znotify_queue so that the notifcation
>> will be sent up via rds_rm_zerocopy_callback() when the
>> rds_message is eventually freed by rds_message_purge.
>>
>> rds_rm_zerocopy_callback() attempts to batch the number of cookies
>> sent with each notification  to a max of SO_EE_ORIGIN_MAX_ZCOOKIES.
>> This is achieved by checking the tail skb in the sk_error_queue:
>> if this has room for one more cookie, the cookie from the
>> current notification is added; else a new skb is added to the
>> sk_error_queue. Every invocation of rds_rm_zerocopy_callback() will
>> trigger a ->sk_error_report to notify the application.
>>
>> Signed-off-by: Sowmini Varadhan 
>> ---
>
> Acked-by: Santosh Shilimkar 

Acked-by: Willem de Bruijn 


Re: [PATCH v3 net-next 4/7] rds: support for zcopy completion notification

2018-02-15 Thread Santosh Shilimkar

On 2/15/2018 10:49 AM, Sowmini Varadhan wrote:

RDS removes a datagram (rds_message) from the retransmit queue when
an ACK is received. The ACK indicates that the receiver has queued
the RDS datagram, so that the sender can safely forget the datagram.
When all references to the rds_message are quiesced, rds_message_purge
is called to release resources used by the rds_message

If the datagram to be removed had pinned pages set up, add
an entry to the rs->rs_znotify_queue so that the notifcation
will be sent up via rds_rm_zerocopy_callback() when the
rds_message is eventually freed by rds_message_purge.

rds_rm_zerocopy_callback() attempts to batch the number of cookies
sent with each notification  to a max of SO_EE_ORIGIN_MAX_ZCOOKIES.
This is achieved by checking the tail skb in the sk_error_queue:
if this has room for one more cookie, the cookie from the
current notification is added; else a new skb is added to the
sk_error_queue. Every invocation of rds_rm_zerocopy_callback() will
trigger a ->sk_error_report to notify the application.

Signed-off-by: Sowmini Varadhan 
---

Acked-by: Santosh Shilimkar 


[PATCH v3 net-next 4/7] rds: support for zcopy completion notification

2018-02-15 Thread Sowmini Varadhan
RDS removes a datagram (rds_message) from the retransmit queue when
an ACK is received. The ACK indicates that the receiver has queued
the RDS datagram, so that the sender can safely forget the datagram.
When all references to the rds_message are quiesced, rds_message_purge
is called to release resources used by the rds_message

If the datagram to be removed had pinned pages set up, add
an entry to the rs->rs_znotify_queue so that the notifcation
will be sent up via rds_rm_zerocopy_callback() when the
rds_message is eventually freed by rds_message_purge.

rds_rm_zerocopy_callback() attempts to batch the number of cookies
sent with each notification  to a max of SO_EE_ORIGIN_MAX_ZCOOKIES.
This is achieved by checking the tail skb in the sk_error_queue:
if this has room for one more cookie, the cookie from the
current notification is added; else a new skb is added to the
sk_error_queue. Every invocation of rds_rm_zerocopy_callback() will
trigger a ->sk_error_report to notify the application.

Signed-off-by: Sowmini Varadhan 
---
v2:
  - make sure to always sock_put m_rs even if there is no znotifier.
  - major rewrite of notification, resulting in much simplification.
v3:
  - fix fragile use of skb->cb[], do not set ee_code incorrectly.
 include/uapi/linux/errqueue.h |2 +
 net/rds/af_rds.c  |2 +
 net/rds/message.c |   83 +---
 net/rds/rds.h |   14 +++
 net/rds/recv.c|2 +
 5 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index dc64cfa..28812ed 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -20,11 +20,13 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP6 3
 #define SO_EE_ORIGIN_TXSTATUS  4
 #define SO_EE_ORIGIN_ZEROCOPY  5
+#define SO_EE_ORIGIN_ZCOOKIE   6
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1))
 
 #define SO_EE_CODE_ZEROCOPY_COPIED 1
+#defineSO_EE_ORIGIN_MAX_ZCOOKIES   8
 
 /**
  * struct scm_timestamping - timestamps exposed through cmsg
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 0a8eefd..a937f18 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -182,6 +182,8 @@ static __poll_t rds_poll(struct file *file, struct socket 
*sock,
mask |= (EPOLLIN | EPOLLRDNORM);
if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
mask |= (EPOLLOUT | EPOLLWRNORM);
+   if (sk->sk_err || !skb_queue_empty(>sk_error_queue))
+   mask |= POLLERR;
read_unlock_irqrestore(>rs_recv_lock, flags);
 
/* clear state any time we wake a seen-congested socket */
diff --git a/net/rds/message.c b/net/rds/message.c
index ef3daaf..bf1a656 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -33,6 +33,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "rds.h"
 
@@ -53,29 +56,95 @@ void rds_message_addref(struct rds_message *rm)
 }
 EXPORT_SYMBOL_GPL(rds_message_addref);
 
+static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
+{
+   struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+   int ncookies;
+   u32 *ptr;
+
+   if (serr->ee.ee_origin != SO_EE_ORIGIN_ZCOOKIE)
+   return false;
+   ncookies = serr->ee.ee_data;
+   if (ncookies == SO_EE_ORIGIN_MAX_ZCOOKIES)
+   return false;
+   ptr = skb_put(skb, sizeof(u32));
+   *ptr = cookie;
+   serr->ee.ee_data = ++ncookies;
+   return true;
+}
+
+static void rds_rm_zerocopy_callback(struct rds_sock *rs,
+struct rds_znotifier *znotif)
+{
+   struct sock *sk = rds_rs_to_sk(rs);
+   struct sk_buff *skb, *tail;
+   struct sock_exterr_skb *serr;
+   unsigned long flags;
+   struct sk_buff_head *q;
+   u32 cookie = znotif->z_cookie;
+
+   q = >sk_error_queue;
+   spin_lock_irqsave(>lock, flags);
+   tail = skb_peek_tail(q);
+
+   if (tail && skb_zcookie_add(tail, cookie)) {
+   spin_unlock_irqrestore(>lock, flags);
+   mm_unaccount_pinned_pages(>z_mmp);
+   consume_skb(rds_skb_from_znotifier(znotif));
+   sk->sk_error_report(sk);
+   return;
+   }
+
+   skb = rds_skb_from_znotifier(znotif);
+   serr = SKB_EXT_ERR(skb);
+   memset(>ee, 0, sizeof(serr->ee));
+   serr->ee.ee_errno = 0;
+   serr->ee.ee_origin = SO_EE_ORIGIN_ZCOOKIE;
+   serr->ee.ee_info = 0;
+   WARN_ON(!skb_zcookie_add(skb, cookie));
+
+   __skb_queue_tail(q, skb);
+
+   spin_unlock_irqrestore(>lock, flags);
+   sk->sk_error_report(sk);
+
+   mm_unaccount_pinned_pages(>z_mmp);
+}
+
 /*
  * This relies on dma_map_sg() not touching sg[].page during merging.
  */
 static void rds_message_purge(struct rds_message *rm)
 {