On Sun, 5 Feb 2012, Daniel Stenberg wrote:
I think I've identified a pattern to the failures and a potential cause. If
you find a moment to chat on IRC I could use some help with a couple of
questions.
If there are others interested in this work, me and Alexander had a go at his
problem yesterday and together we managed to identify the problem and come up
with a fix for it. The attached 0001*patch is that fix.
I identified the same basic flaw in the sftp_write() code path and wrote up a
patch for that as well, as can be seen in the attached 0002*patch.
I'm interested in feedback on both how they work in reality but also on the
actual technical subtleties.
I am especially aware that this logic needs better comments/documentation to
make us less likely to re-introduce similar problems again in the future!
--
/ daniel.haxx.se
From 446c6d3fdfccb6cf310f0314b10f03569c519da3 Mon Sep 17 00:00:00 2001
From: Daniel Stenberg <dan...@haxx.se>
Date: Mon, 6 Feb 2012 23:27:12 +0100
Subject: [PATCH 1/2] sftp_read: avoid data *and* EAGAIN
Whenever we have data and is about to call a function that *MAY* return
EAGAIN we must return the data now and wait to get called again. Our API
only allows data *or* EAGAIN and we must never try to get both.
---
src/sftp.c | 48 +++++++++++++++++++++++++++---------------------
1 files changed, 27 insertions(+), 21 deletions(-)
diff --git a/src/sftp.c b/src/sftp.c
index 48da9f2..0a1cf61 100644
--- a/src/sftp.c
+++ b/src/sftp.c
@@ -1085,7 +1085,6 @@ static ssize_t sftp_read(LIBSSH2_SFTP_HANDLE * handle, char *buffer,
struct sftp_pipeline_chunk *chunk;
struct sftp_pipeline_chunk *next;
ssize_t rc;
- size_t eagain = 0;
size_t total_read = 0;
struct _libssh2_sftp_handle_file_data *filep =
&handle->u.file;
@@ -1111,11 +1110,12 @@ static ssize_t sftp_read(LIBSSH2_SFTP_HANDLE * handle, char *buffer,
filep->data = NULL;
}
- /* if we previously aborted a channel_write due to EAGAIN, we must
- continue that writing so that we don't risk trying to send another
- channel_write here to enlarge the receive window */
+ /* if we previously aborted a sftp_read due to EAGAIN, we must continue at
+ the same spot to continue the previously aborted operation */
if(sftp->read_state == libssh2_NB_state_sent)
goto send_read_requests;
+ else if(sftp->read_state == libssh2_NB_state_sent2)
+ goto read_acks;
/* We allow a number of bytes being requested at any given time without
having been acked - until we reach EOF. */
@@ -1218,16 +1218,17 @@ static ssize_t sftp_read(LIBSSH2_SFTP_HANDLE * handle, char *buffer,
while(chunk) {
if(chunk->lefttosend) {
+ if(total_read)
+ /* since we risk getting EAGAIN below, we return here if there
+ is data available */
+ return total_read;
+
rc = _libssh2_channel_write(channel, 0,
&chunk->packet[chunk->sent],
chunk->lefttosend);
if(rc < 0) {
- if(rc != LIBSSH2_ERROR_EAGAIN)
- /* error */
- return rc;
- eagain++;
sftp->read_state = libssh2_NB_state_sent;
- break;
+ return rc;
}
/* remember where to continue sending the next time */
@@ -1243,6 +1244,10 @@ static ssize_t sftp_read(LIBSSH2_SFTP_HANDLE * handle, char *buffer,
chunk = _libssh2_list_next(&chunk->node);
}
+ read_acks:
+
+ sftp->read_state = libssh2_NB_state_idle;
+
/*
* Count all ACKed packets and act on the contents of them.
*/
@@ -1261,15 +1266,17 @@ static ssize_t sftp_read(LIBSSH2_SFTP_HANDLE * handle, char *buffer,
an ACK for it just yet */
break;
+ if(total_read)
+ /* since we risk getting EAGAIN below, we return here if there
+ is data available */
+ return total_read;
+
rc = sftp_packet_requirev(sftp, 2, read_responses,
chunk->request_id, &data, &data_len);
- if (rc == LIBSSH2_ERROR_EAGAIN) {
- eagain++;
- break;
+ if (rc < 0) {
+ sftp->read_state = libssh2_NB_state_sent2;
+ return rc;
}
- else if (rc)
- return _libssh2_error(session, rc,
- "Error waiting for FXP_READ ACK");
/*
* We get DATA or STATUS back. STATUS can be error, or it is FX_EOF
@@ -1353,12 +1360,11 @@ static ssize_t sftp_read(LIBSSH2_SFTP_HANDLE * handle, char *buffer,
chunk = next;
}
- if(total_read)
- return total_read;
- else if(eagain)
- return _libssh2_error(session, LIBSSH2_ERROR_EAGAIN,
- "Would block sftp_read");
- return 0;
+ if(! total_read) {
+ fprintf(stderr, "MOO\n");
+ }
+
+ return total_read;
}
/* libssh2_sftp_read
--
1.7.9
From b89dcf4d92d988fe9d8484f0eeb6c1fe3bfd5a2c Mon Sep 17 00:00:00 2001
From: Daniel Stenberg <dan...@haxx.se>
Date: Tue, 7 Feb 2012 00:35:51 +0100
Subject: [PATCH 2/2] sftp_write: cannot return acked data *and* EAGAIN
Whenever we have acked data and is about to call a function that *MAY*
return EAGAIN we must return the number now and wait to get called
again. Our API only allows data *or* EAGAIN and we must never try to get
both.
---
src/sftp.c | 37 ++++++++++++++++++++-----------------
1 files changed, 20 insertions(+), 17 deletions(-)
diff --git a/src/sftp.c b/src/sftp.c
index 0a1cf61..4300d2e 100644
--- a/src/sftp.c
+++ b/src/sftp.c
@@ -1617,7 +1617,6 @@ static ssize_t sftp_write(LIBSSH2_SFTP_HANDLE *handle, const char *buffer,
struct sftp_pipeline_chunk *next;
size_t acked = 0;
size_t org_count = count;
- size_t eagain = 0;
/* Number of bytes sent off that haven't been acked and therefor we will
get passed in here again.
@@ -1638,6 +1637,10 @@ static ssize_t sftp_write(LIBSSH2_SFTP_HANDLE *handle, const char *buffer,
/* there is more data already fine than what we got in this call */
count = 0;
+ if(sftp->write_state == libssh2_NB_state_sent)
+ goto sftp_write_fxp_status;
+
+ sftp->write_state = libssh2_NB_state_idle;
while(count) {
/* TODO: Possibly this should have some logic to prevent a very very
small fraction to be left but lets ignore that for now */
@@ -1687,13 +1690,9 @@ static ssize_t sftp_write(LIBSSH2_SFTP_HANDLE *handle, const char *buffer,
rc = _libssh2_channel_write(channel, 0,
&chunk->packet[chunk->sent],
chunk->lefttosend);
- if(rc < 0) {
- if(rc != LIBSSH2_ERROR_EAGAIN)
- /* error */
- return rc;
- eagain++;
- break;
- }
+ if(rc < 0)
+ /* remain in idle state */
+ return rc;
/* remember where to continue sending the next time */
chunk->lefttosend -= rc;
@@ -1708,6 +1707,9 @@ static ssize_t sftp_write(LIBSSH2_SFTP_HANDLE *handle, const char *buffer,
chunk = _libssh2_list_next(&chunk->node);
}
+ sftp_write_fxp_status:
+
+ sftp->write_state = libssh2_NB_state_idle;
/*
* Count all ACKed packets
*/
@@ -1719,16 +1721,19 @@ static ssize_t sftp_write(LIBSSH2_SFTP_HANDLE *handle, const char *buffer,
an ACK for it just yet */
break;
+ else if(acked)
+ /* if we have sent data that is acked, we must return that info
+ before we call a function that might return EAGAIN */
+ break;
+
/* we check the packets in order */
rc = sftp_packet_require(sftp, SSH_FXP_STATUS,
chunk->request_id, &data, &data_len);
- if (rc == LIBSSH2_ERROR_EAGAIN) {
- eagain++;
- break;
- }
- else if (rc) {
- return _libssh2_error(session, rc, "Waiting for SFTP status");
+ if (rc < 0) {
+ sftp->write_state = libssh2_NB_state_sent;
+ return rc;
}
+
retcode = _libssh2_ntohu32(data + 5);
LIBSSH2_FREE(session, data);
@@ -1787,9 +1792,7 @@ static ssize_t sftp_write(LIBSSH2_SFTP_HANDLE *handle, const char *buffer,
return ret;
}
- else if(eagain)
- return _libssh2_error(session, LIBSSH2_ERROR_EAGAIN,
- "Would block sftp_write");
+
else
return 0; /* nothing was acked, and no EAGAIN was received! */
}
--
1.7.9
_______________________________________________
libssh2-devel http://cool.haxx.se/cgi-bin/mailman/listinfo/libssh2-devel