On Sun, 5 Feb 2012, Daniel Stenberg wrote:

I think I've identified a pattern to the failures and a potential cause. If you find a moment to chat on IRC I could use some help with a couple of questions.

If there are others interested in this work, me and Alexander had a go at his problem yesterday and together we managed to identify the problem and come up with a fix for it. The attached 0001*patch is that fix.

I identified the same basic flaw in the sftp_write() code path and wrote up a patch for that as well, as can be seen in the attached 0002*patch.

I'm interested in feedback on both how they work in reality but also on the actual technical subtleties.

I am especially aware that this logic needs better comments/documentation to make us less likely to re-introduce similar problems again in the future!

--

 / daniel.haxx.se
From 446c6d3fdfccb6cf310f0314b10f03569c519da3 Mon Sep 17 00:00:00 2001
From: Daniel Stenberg <dan...@haxx.se>
Date: Mon, 6 Feb 2012 23:27:12 +0100
Subject: [PATCH 1/2] sftp_read: avoid data *and* EAGAIN

Whenever we have data and is about to call a function that *MAY* return
EAGAIN we must return the data now and wait to get called again. Our API
only allows data *or* EAGAIN and we must never try to get both.
---
 src/sftp.c |   48 +++++++++++++++++++++++++++---------------------
 1 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/src/sftp.c b/src/sftp.c
index 48da9f2..0a1cf61 100644
--- a/src/sftp.c
+++ b/src/sftp.c
@@ -1085,7 +1085,6 @@ static ssize_t sftp_read(LIBSSH2_SFTP_HANDLE * handle, char *buffer,
     struct sftp_pipeline_chunk *chunk;
     struct sftp_pipeline_chunk *next;
     ssize_t rc;
-    size_t eagain = 0;
     size_t total_read = 0;
     struct _libssh2_sftp_handle_file_data *filep =
         &handle->u.file;
@@ -1111,11 +1110,12 @@ static ssize_t sftp_read(LIBSSH2_SFTP_HANDLE * handle, char *buffer,
         filep->data = NULL;
     }
 
-    /* if we previously aborted a channel_write due to EAGAIN, we must
-       continue that writing so that we don't risk trying to send another
-       channel_write here to enlarge the receive window */
+    /* if we previously aborted a sftp_read due to EAGAIN, we must continue at
+       the same spot to continue the previously aborted operation */
     if(sftp->read_state == libssh2_NB_state_sent)
         goto send_read_requests;
+    else if(sftp->read_state == libssh2_NB_state_sent2)
+        goto read_acks;
 
     /* We allow a number of bytes being requested at any given time without
        having been acked - until we reach EOF. */
@@ -1218,16 +1218,17 @@ static ssize_t sftp_read(LIBSSH2_SFTP_HANDLE * handle, char *buffer,
 
     while(chunk) {
         if(chunk->lefttosend) {
+            if(total_read)
+                /* since we risk getting EAGAIN below, we return here if there
+                   is data available */
+                return total_read;
+
             rc = _libssh2_channel_write(channel, 0,
                                         &chunk->packet[chunk->sent],
                                         chunk->lefttosend);
             if(rc < 0) {
-                if(rc != LIBSSH2_ERROR_EAGAIN)
-                    /* error */
-                    return rc;
-                eagain++;
                 sftp->read_state = libssh2_NB_state_sent;
-                break;
+                return rc;
             }
 
             /* remember where to continue sending the next time */
@@ -1243,6 +1244,10 @@ static ssize_t sftp_read(LIBSSH2_SFTP_HANDLE * handle, char *buffer,
         chunk = _libssh2_list_next(&chunk->node);
     }
 
+  read_acks:
+
+    sftp->read_state = libssh2_NB_state_idle;
+
     /*
      * Count all ACKed packets and act on the contents of them.
      */
@@ -1261,15 +1266,17 @@ static ssize_t sftp_read(LIBSSH2_SFTP_HANDLE * handle, char *buffer,
                an ACK for it just yet */
             break;
 
+        if(total_read)
+            /* since we risk getting EAGAIN below, we return here if there
+               is data available */
+            return total_read;
+
         rc = sftp_packet_requirev(sftp, 2, read_responses,
                                   chunk->request_id, &data, &data_len);
-        if (rc == LIBSSH2_ERROR_EAGAIN) {
-            eagain++;
-            break;
+        if (rc < 0) {
+            sftp->read_state = libssh2_NB_state_sent2;
+            return rc;
         }
-        else if (rc)
-            return _libssh2_error(session, rc,
-                                  "Error waiting for FXP_READ ACK");
 
         /*
          * We get DATA or STATUS back. STATUS can be error, or it is FX_EOF
@@ -1353,12 +1360,11 @@ static ssize_t sftp_read(LIBSSH2_SFTP_HANDLE * handle, char *buffer,
         chunk = next;
     }
 
-    if(total_read)
-        return total_read;
-    else if(eagain)
-        return _libssh2_error(session, LIBSSH2_ERROR_EAGAIN,
-                              "Would block sftp_read");
-    return 0;
+    if(! total_read) {
+        fprintf(stderr, "MOO\n");
+    }
+
+    return total_read;
 }
 
 /* libssh2_sftp_read
-- 
1.7.9

From b89dcf4d92d988fe9d8484f0eeb6c1fe3bfd5a2c Mon Sep 17 00:00:00 2001
From: Daniel Stenberg <dan...@haxx.se>
Date: Tue, 7 Feb 2012 00:35:51 +0100
Subject: [PATCH 2/2] sftp_write: cannot return acked data *and* EAGAIN

Whenever we have acked data and is about to call a function that *MAY*
return EAGAIN we must return the number now and wait to get called
again. Our API only allows data *or* EAGAIN and we must never try to get
both.
---
 src/sftp.c |   37 ++++++++++++++++++++-----------------
 1 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/src/sftp.c b/src/sftp.c
index 0a1cf61..4300d2e 100644
--- a/src/sftp.c
+++ b/src/sftp.c
@@ -1617,7 +1617,6 @@ static ssize_t sftp_write(LIBSSH2_SFTP_HANDLE *handle, const char *buffer,
     struct sftp_pipeline_chunk *next;
     size_t acked = 0;
     size_t org_count = count;
-    size_t eagain = 0;
 
     /* Number of bytes sent off that haven't been acked and therefor we will
        get passed in here again.
@@ -1638,6 +1637,10 @@ static ssize_t sftp_write(LIBSSH2_SFTP_HANDLE *handle, const char *buffer,
         /* there is more data already fine than what we got in this call */
         count = 0;
 
+    if(sftp->write_state == libssh2_NB_state_sent)
+        goto sftp_write_fxp_status;
+
+    sftp->write_state = libssh2_NB_state_idle;
     while(count) {
         /* TODO: Possibly this should have some logic to prevent a very very
            small fraction to be left but lets ignore that for now */
@@ -1687,13 +1690,9 @@ static ssize_t sftp_write(LIBSSH2_SFTP_HANDLE *handle, const char *buffer,
             rc = _libssh2_channel_write(channel, 0,
                                         &chunk->packet[chunk->sent],
                                         chunk->lefttosend);
-            if(rc < 0) {
-                if(rc != LIBSSH2_ERROR_EAGAIN)
-                    /* error */
-                    return rc;
-                eagain++;
-                break;
-            }
+            if(rc < 0)
+                /* remain in idle state */
+                return rc;
 
             /* remember where to continue sending the next time */
             chunk->lefttosend -= rc;
@@ -1708,6 +1707,9 @@ static ssize_t sftp_write(LIBSSH2_SFTP_HANDLE *handle, const char *buffer,
         chunk = _libssh2_list_next(&chunk->node);
     }
 
+  sftp_write_fxp_status:
+
+    sftp->write_state = libssh2_NB_state_idle;
     /*
      * Count all ACKed packets
      */
@@ -1719,16 +1721,19 @@ static ssize_t sftp_write(LIBSSH2_SFTP_HANDLE *handle, const char *buffer,
                an ACK for it just yet */
             break;
 
+        else if(acked)
+            /* if we have sent data that is acked, we must return that info
+               before we call a function that might return EAGAIN */
+            break;
+
         /* we check the packets in order */
         rc = sftp_packet_require(sftp, SSH_FXP_STATUS,
                                  chunk->request_id, &data, &data_len);
-        if (rc == LIBSSH2_ERROR_EAGAIN) {
-            eagain++;
-            break;
-        }
-        else if (rc) {
-            return _libssh2_error(session, rc, "Waiting for SFTP status");
+        if (rc < 0) {
+            sftp->write_state = libssh2_NB_state_sent;
+            return rc;
         }
+
         retcode = _libssh2_ntohu32(data + 5);
         LIBSSH2_FREE(session, data);
 
@@ -1787,9 +1792,7 @@ static ssize_t sftp_write(LIBSSH2_SFTP_HANDLE *handle, const char *buffer,
 
         return ret;
     }
-    else if(eagain)
-        return _libssh2_error(session, LIBSSH2_ERROR_EAGAIN,
-                              "Would block sftp_write");
+
     else
         return 0; /* nothing was acked, and no EAGAIN was received! */
 }
-- 
1.7.9

_______________________________________________
libssh2-devel http://cool.haxx.se/cgi-bin/mailman/listinfo/libssh2-devel

Reply via email to