At certain network and disk speeds, tabled can blow its stack by
filling it with (essentially) endless recursion:

#2  0x000000000040c077 in cli_write_free (cli=<value optimized out>, tmp=
    0x7bb910, done=<value optimized out>) at server.c:397
#3  0x000000000040ca55 in cli_writable (cli=0x686e90) at server.c:525
#4  0x000000000040da65 in cli_write_start (cli=0x686e90) at server.c:561
#5  0x0000000000408ad5 in object_get_poke (cli=0x686e90) at object.c:1039
#6  0x000000000040c077 in cli_write_free (cli=<value optimized out>, tmp=
    0x7bb8d0, done=<value optimized out>) at server.c:397
#7  0x000000000040ca55 in cli_writable (cli=0x686e90) at server.c:525
#8  0x000000000040da65 in cli_write_start (cli=0x686e90) at server.c:561
#9  0x0000000000408ad5 in object_get_poke (cli=0x686e90) at object.c:1039
#10 0x000000000040c077 in cli_write_free (cli=<value optimized out>, tmp=
    0x7bb890, done=<value optimized out>) at server.c:397

The fix is to deliver callbacks only from the top level.

Callbacks must be delivered every time a send is completed,
which amounts to every call to is_writeable(). Since there
is a large number of callers to it, we found it advantageous
to run callbacks from every source of events. In other words,
every function that is passed to event_set must invoke
cli_write_run_compl. Mind that storage.c contains calls to
event_set.

Signed-off-by: Pete Zaitcev <[email protected]>

---
 server/object.c |    4 +++
 server/server.c |   52 +++++++++++++++++++++++++++++++++++-----------
 server/tabled.h |    6 +++++
 3 files changed, 50 insertions(+), 12 deletions(-)

commit 88959fb8c4cc8232f92805618c5bcaeee9099390
Author: Pete Zaitcev <[email protected]>
Date:   Thu Apr 1 19:04:13 2010 -0600

    Fix the endless recursion when reading long objects.

diff --git a/server/object.c b/server/object.c
index d61a446..6d32316 100644
--- a/server/object.c
+++ b/server/object.c
@@ -994,6 +994,9 @@ static bool object_get_poke(struct client *cli)
        char *buf;
        ssize_t bytes;
 
+       /* XXX flow control - what treshold? */
+       /*  if (cli_wqueued(cli) >= 1000000000) return false; */
+
        buf = malloc(CLI_DATA_BUF_SZ);
        if (!buf)
                return false;
@@ -1052,6 +1055,7 @@ static bool object_get_more(struct client *cli, void 
*cb_data, bool done)
 static void object_get_event(struct open_chunk *ochunk)
 {
        object_get_poke(ochunk->cli);
+       cli_write_run_compl(ochunk->cli);
 }
 
 bool object_get_body(struct client *cli, const char *user, const char *bucket,
diff --git a/server/server.c b/server/server.c
index 72db151..600c8de 100644
--- a/server/server.c
+++ b/server/server.c
@@ -380,6 +380,8 @@ static void stats_dump(void)
               tabled_srv.stats.event,
               tabled_srv.stats.tcp_accept,
               tabled_srv.stats.opt_write);
+       applog(LOG_INFO, "DEBUG: max_write_buf %lu",
+              tabled_srv.stats.max_write_buf);
        stor_stats();
        rep_stats();
 }
@@ -405,16 +407,24 @@ bool stat_status(struct client *cli, GList *content)
        content = g_list_append(content, str);
        if (asprintf(&str,
                     "<p>Stats: "
-                    "poll %lu event %lu tcp_accept %lu opt_write %lu</p>\r\n",
+                    "poll %lu event %lu tcp_accept %lu opt_write %lu</p>\r\n"
+                    "<p>Debug: max_write_buf %lu</p>\r\n",
                     tabled_srv.stats.poll,
                     tabled_srv.stats.event,
                     tabled_srv.stats.tcp_accept,
-                    tabled_srv.stats.opt_write) < 0)
+                    tabled_srv.stats.opt_write,
+                    tabled_srv.stats.max_write_buf) < 0)
                return false;
        content = g_list_append(content, str);
        return true;
 }
 
+static void cli_write_complete(struct client *cli, struct client_write *tmp)
+{
+       list_del(&tmp->node);
+       list_add_tail(&tmp->node, &cli->write_compl_q);
+}
+
 static bool cli_write_free(struct client *cli, struct client_write *tmp,
                           bool done)
 {
@@ -433,11 +443,28 @@ static void cli_write_free_all(struct client *cli)
 {
        struct client_write *wr, *tmp;
 
+       list_for_each_entry_safe(wr, tmp, &cli->write_compl_q, node) {
+               cli_write_free(cli, wr, true);
+       }
        list_for_each_entry_safe(wr, tmp, &cli->write_q, node) {
                cli_write_free(cli, wr, false);
        }
 }
 
+bool cli_write_run_compl(struct client *cli)
+{
+       struct client_write *wr;
+       bool do_loop;
+
+       do_loop = false;
+       while (!list_empty(&cli->write_compl_q)) {
+               wr = list_entry(cli->write_compl_q.next, struct client_write,
+                               node);
+               do_loop |= cli_write_free(cli, wr, true);
+       }
+       return do_loop;
+}
+
 static void cli_free(struct client *cli)
 {
        cli_write_free_all(cli);
@@ -455,6 +482,9 @@ static void cli_free(struct client *cli)
 
        req_free(&cli->req);
 
+       if (cli->write_cnt_max > tabled_srv.stats.max_write_buf)
+               tabled_srv.stats.max_write_buf = cli->write_cnt_max;
+
        if (debugging)
                applog(LOG_INFO, "client %s ended", cli->addr_host);
 
@@ -505,10 +535,6 @@ static void cli_writable(struct client *cli)
        struct client_write *tmp;
        ssize_t rc;
        struct iovec iov[CLI_MAX_WR_IOV];
-       bool more_work;
-
-restart:
-       more_work = false;
 
        /* accumulate pending writes into iovec */
        n_iov = 0;
@@ -548,16 +574,13 @@ do_write:
                rc -= sz;
 
                /* if tmp->len reaches zero, write is complete,
-                * call callback and clean up
+                * so schedule it for clean up (cannot call callback
+                * right away or an endless recursion will result)
                 */
                if (tmp->togo == 0)
-                       if (cli_write_free(cli, tmp, true))
-                               more_work = true;
+                       cli_write_complete(cli, tmp);
        }
 
-       if (more_work)
-               goto restart;
-
        /* if we emptied the queue, clear write notification */
        if (list_empty(&cli->write_q)) {
                cli->writing = false;
@@ -622,6 +645,8 @@ int cli_writeq(struct client *cli, const void *buf, 
unsigned int buflen,
        wr->cb_data = cb_data;
        list_add_tail(&wr->node, &cli->write_q);
        cli->write_cnt += buflen;
+       if (cli->write_cnt > cli->write_cnt_max)
+               cli->write_cnt_max = cli->write_cnt;
 
        return 0;
 }
@@ -1275,6 +1300,7 @@ static struct client *cli_alloc(bool is_status)
        cli->state = evt_read_req;
        cli->evt_table = is_status? evt_funcs_status: evt_funcs_server;
        INIT_LIST_HEAD(&cli->write_q);
+       INIT_LIST_HEAD(&cli->write_compl_q);
        INIT_LIST_HEAD(&cli->out_ch);
        cli->req_ptr = cli->req_buf;
        memset(&cli->req, 0, sizeof(cli->req) - sizeof(cli->req.hdr));
@@ -1287,6 +1313,7 @@ static void tcp_cli_wr_event(int fd, short events, void 
*userdata)
        struct client *cli = userdata;
 
        cli_writable(cli);
+       cli_write_run_compl(cli);
 }
 
 static void tcp_cli_event(int fd, short events, void *userdata)
@@ -1296,6 +1323,7 @@ static void tcp_cli_event(int fd, short events, void 
*userdata)
 
        do {
                loop = cli->evt_table[cli->state](cli, events);
+               loop |= cli_write_run_compl(cli);
        } while (loop);
 }
 
diff --git a/server/tabled.h b/server/tabled.h
index b4f51ed..3ef4a49 100644
--- a/server/tabled.h
+++ b/server/tabled.h
@@ -164,8 +164,11 @@ struct client {
        struct event            write_ev;
 
        struct list_head        write_q;        /* list of async writes */
+       struct list_head        write_compl_q;  /* list of done writes */
        size_t                  write_cnt;      /* water level */
        bool                    writing;
+       /* some debugging stats */
+       size_t                  write_cnt_max;
 
        unsigned int            req_used;       /* amount of req_buf in use */
        char                    *req_ptr;       /* start of unexamined data */
@@ -212,6 +215,8 @@ struct server_stats {
        unsigned long           event;          /* events dispatched */
        unsigned long           tcp_accept;     /* TCP accepted cxns */
        unsigned long           opt_write;      /* optimistic writes */
+
+       unsigned long           max_write_buf;
 };
 
 struct listen_cfg {
@@ -325,6 +330,7 @@ extern int cli_writeq(struct client *cli, const void *buf, 
unsigned int buflen,
 extern size_t cli_wqueued(struct client *cli);
 extern bool cli_cb_free(struct client *cli, void *cb_data, bool done);
 extern bool cli_write_start(struct client *cli);
+extern bool cli_write_run_compl(struct client *cli);
 extern int cli_req_avail(struct client *cli);
 extern void applog(int prio, const char *fmt, ...);
 extern int stor_update_cb(void);
--
To unsubscribe from this list: send the line "unsubscribe hail-devel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to