Hello,

Attached is a patch which stages all writes to wal writer till
the end of the event loop.


In my benchmarks it produces generally worse results, with number
of connections up to 500, after which results become even with the
tree without a patch.

>From this I conclude that write performance is generally
not bound by the memory barrier /mutex lock, but by something
else.

I'll keep digging.

diff --git a/src/log_io.m b/src/log_io.m
index 2b13d6c..3857b30 100644
--- a/src/log_io.m
+++ b/src/log_io.m
@@ -1262,11 +1262,12 @@ struct wal_write_request {
 
 struct wal_writer
 {
-       STAILQ_HEAD(wal_fifo, wal_write_request) input, output;
+       STAILQ_HEAD(wal_fifo, wal_write_request) staging, input, output;
        pthread_t thread;
        pthread_mutex_t mutex;
        pthread_cond_t cond;
        ev_async async;
+       ev_prepare stage;
        bool is_shutdown;
 };
 
@@ -1338,6 +1339,25 @@ wal_writer_schedule(ev_watcher *watcher, int event 
__attribute__((unused)))
        }
 }
 
+static void
+wal_writer_add_staged(ev_watcher *watcher, int event __attribute__((unused)))
+{
+       struct wal_writer *writer = watcher->data;
+       struct wal_fifo staging = writer->staging;
+       STAILQ_INIT(&writer->staging);
+
+       tt_pthread_mutex_lock(&writer->mutex);
+
+       bool was_empty = STAILQ_EMPTY(&writer->input);
+
+       STAILQ_CONCAT(&writer->input, &staging);
+
+       if (was_empty)
+               tt_pthread_cond_signal(&writer->cond);
+
+       tt_pthread_mutex_unlock(&writer->mutex);
+}
+
 /**
  * Initialize WAL writer context. Even though it's a singleton,
  * encapsulate the details just in case we may use
@@ -1360,11 +1380,13 @@ wal_writer_init(struct wal_writer *writer)
 
        tt_pthread_cond_init(&writer->cond, NULL);
 
+       STAILQ_INIT(&writer->staging);
        STAILQ_INIT(&writer->input);
        STAILQ_INIT(&writer->output);
 
        ev_async_init(&writer->async, (void *) wal_writer_schedule);
-       writer->async.data = writer;
+       ev_prepare_init(&writer->stage, (void *) wal_writer_add_staged);
+       writer->stage.data = writer->async.data = writer;
 
        tt_pthread_once(&wal_writer_once, wal_writer_init_once);
 }
@@ -1406,6 +1428,7 @@ wal_writer_start(struct recovery_state *r)
        r->writer = &wal_writer;
 
        ev_async_start(&wal_writer.async);
+       ev_prepare_start(&wal_writer.stage);
 
        /* II. Start the thread. */
 
@@ -1436,6 +1459,7 @@ wal_writer_stop(struct recovery_state *r)
        }
 
        ev_async_stop(&writer->async);
+       ev_prepare_stop(&writer->stage);
        wal_writer_destroy(writer);
 
        r->writer = NULL;
@@ -1609,16 +1633,7 @@ wal_write(struct recovery_state *r, i64 lsn, u64 cookie,
        row_v11_fill(&req->row, lsn, XLOG, cookie, &op, sizeof(op),
                     row->data, row->size);
 
-       tt_pthread_mutex_lock(&writer->mutex);
-
-       bool was_empty = STAILQ_EMPTY(&writer->input);
-
-       STAILQ_INSERT_TAIL(&writer->input, req, wal_fifo_entry);
-
-       if (was_empty)
-               tt_pthread_cond_signal(&writer->cond);
-
-       tt_pthread_mutex_unlock(&writer->mutex);
+       STAILQ_INSERT_TAIL(&writer->staging, req, wal_fifo_entry);
 
        fiber_yield();
 

-- 
http://tarantool.org - an efficient, extensible in-memory data store

_______________________________________________
Mailing list: https://launchpad.net/~tarantool-developers
Post to     : [email protected]
Unsubscribe : https://launchpad.net/~tarantool-developers
More help   : https://help.launchpad.net/ListHelp

Reply via email to