Elukey has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/311965

Change subject: Improve resilience during varnish (re)starts
......................................................................

Improve resilience during varnish (re)starts

This patch introduces a trick already used in Varnish's VUT library,
namely forcing a reconnect to the shm log if it is detected as
abandoned or not opened. This can happen if varnishkafka starts before varnish,
or if varnish restarts/stops while varnishkafka is running. Varnishkafka is now
going to retry every 100ms endlessly until a log is aquired.

Background reading:
- 
https://github.com/varnishcache/varnish-cache/blob/4.1/lib/libvarnishtools/vut.c#L366
- 
https://github.com/varnishcache/varnish-cache/blob/master/bin/varnishlog/varnishlog.c

To keep things consistent with the current behavior, the sequence number is 
restarted to its
starting value when a log overrun/abandon is detected.

Bug: T138747
Change-Id: I6969fcce80140bd9c24da4511ff7bbdde02a5ff8
---
M config.c
M varnishkafka.c
M varnishkafka.h
3 files changed, 78 insertions(+), 33 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/operations/software/varnish/varnishkafka 
refs/changes/65/311965/1

diff --git a/config.c b/config.c
index a9c48f4..33a8ed4 100644
--- a/config.c
+++ b/config.c
@@ -168,6 +168,7 @@
                        conf.sequence_number = (uint64_t)time(NULL)*1000000llu;
                else
                        conf.sequence_number = strtoull(val, NULL, 0);
+               conf.sequence_number_start = conf.sequence_number;
        } else if (!strcmp(name, "output")) {
                if (!strcmp(val, "kafka"))
                        outfunc = out_kafka;
diff --git a/varnishkafka.c b/varnishkafka.c
index c52594f..f37bdab 100644
--- a/varnishkafka.c
+++ b/varnishkafka.c
@@ -1866,6 +1866,40 @@
 
 }
 
+/**
+ * Open and configure VSM/VSL/VSLQ settings. The vslq_query parameter will be
+ * used to know if a VSLQ query/filter needs to be set or not.
+ * Returns 0 in case of success, -1 otherwise.
+ */
+static int varnish_api_open_handles(struct VSM_data **vsm, struct VSL_data 
**vsl,
+                                   struct VSL_cursor **vsl_cursor,
+                                   unsigned int vsl_cursor_options,
+                                   struct VSLQ **vslq, char* vslq_query) {
+       if (VSM_Open(*vsm) < 0) {
+               vk_log("VSM_OPEN", LOG_DEBUG, "Failed to open Varnish VSL: 
%s\n", VSM_Error(*vsm));
+               return -1;
+       }
+       *vsl_cursor = VSL_CursorVSM(*vsl, *vsm, vsl_cursor_options);
+       if (*vsl_cursor == NULL) {
+               vk_log("VSL_CursorVSM", LOG_DEBUG, "Failed to obtain a cursor 
for the SHM log: %s\n",
+                       VSL_Error(*vsl));
+               return -1;
+       }
+       /* Setting VSLQ query */
+       if (vslq_query) {
+               *vslq = VSLQ_New(*vsl, vsl_cursor, VSL_g_request, vslq_query);
+       } else {
+               *vslq = VSLQ_New(*vsl, vsl_cursor, VSL_g_request, NULL);
+       }
+       if (*vslq == NULL) {
+               vk_log("VSLQ_NEW", LOG_DEBUG, "Failed to instantiate the VSL 
query: %s\n",
+                       VSL_Error(*vsl));
+               return -1;
+       }
+       return 0;
+}
+
+
 int main (int argc, char **argv) {
        char errstr[4096];
        char hostname[1024];
@@ -2063,7 +2097,7 @@
         * in the header file because used in both config.c and varnishkafka.c
         */
        conf.vsl = VSL_New();
-       struct VSL_cursor *vsl_cursor;
+       struct VSL_cursor *vsl_cursor = NULL;
        conf.vsm = VSM_New();
 
        if (conf.T_flag) {
@@ -2103,40 +2137,19 @@
                }
        }
 
-       if (VSM_Open(conf.vsm) < 0) {
-               vk_log("VSM_OPEN", LOG_ERR, "Failed to open Varnish VSL: %s\n", 
VSM_Error(conf.vsm));
-               varnish_api_cleaning();
-               exit(1);
-       }
-       vsl_cursor = VSL_CursorVSM(conf.vsl, conf.vsm, VSL_COPT_TAIL | 
VSL_COPT_BATCH);
-       if (vsl_cursor == NULL) {
-               vk_log("VSL_CursorVSM", LOG_ERR, "Failed to obtain a cursor for 
the SHM log: %s\n",
-                               VSL_Error(conf.vsl));
-               varnish_api_cleaning();
-               exit(1);
-       }
-
-       /* Setting VSLQ query */
-       if (conf.q_flag) {
-               conf.vslq = VSLQ_New(conf.vsl, &vsl_cursor, VSL_g_request, 
conf.q_flag_query);
-       } else {
-               conf.vslq = VSLQ_New(conf.vsl, &vsl_cursor, VSL_g_request, 
NULL);
-       }
-       if (conf.vslq == NULL) {
-               vk_log("VSLQ_NEW", LOG_ERR, "Failed to instantiate the VSL 
query: %s\n",
-                               VSL_Error(conf.vsl));
-               varnish_api_cleaning();
-               exit(1);
-       }
-
        /* Main dispatcher loop depending on outputter */
        conf.run = 1;
        conf.pret = 0;
 
        /* time struct to sleep for 10ms */
-       struct timespec wait_for;
-       wait_for.tv_sec = 0;
-       wait_for.tv_nsec = 10000000L;
+       struct timespec duration_10ms;
+       duration_10ms.tv_sec = 0;
+       duration_10ms.tv_nsec = 10000000L;
+
+       /* time struct to sleep for 100ms */
+       struct timespec duration_100ms;
+       duration_100ms.tv_sec = 0;
+       duration_100ms.tv_nsec = 100000000L;
 
        /* Creating a new logline (will be re-used across log transactions) */
        struct logline *lp = NULL;
@@ -2144,17 +2157,44 @@
                return -1;
 
        while (conf.run) {
+               /* Attempt to connect to the shm log handle if not open. 
Varnishkafka
+                * will try to connect periodically to the shm log until it 
gets one,
+                * or it will keep trying endlessly.
+                * Two use cases:
+                * - varnishkafka is started but varnish is not. This behavior 
is
+                *   useful if there is no strict start ordering for varnish 
and varnishkafka.
+                * - varnish gets restarted and the shm log handle is not valid 
anymore.
+                */
+               if (conf.vsm != NULL && !VSM_IsOpen(conf.vsm)) {
+                       if (varnish_api_open_handles(&conf.vsm, &conf.vsl, 
&vsl_cursor,
+                                                    VSL_COPT_TAIL | 
VSL_COPT_BATCH, &conf.vslq,
+                                                    conf.q_flag_query) == -1) {
+                               nanosleep(&duration_100ms, NULL);
+                               continue;
+                       } else {
+                               vk_log("VSLQ_Dispatch", LOG_ERR, "Log 
acquired!");
+                               /* Setting the sequence number back to zero to 
track
+                                * the fact that Varnish abandoned the log, 
probably due to
+                                * a restart (or simply that varnishkafka 
started before varnish).
+                                */
+                               conf.sequence_number = 
conf.sequence_number_start;
+                       }
+               }
+
                int dispatch_status = VSLQ_Dispatch(conf.vslq, 
transaction_scribe, lp);
 
                /* Nothing to read from the shm handle, sleeping */
                if (dispatch_status == 0)
-                       nanosleep(&wait_for, NULL);
+                       nanosleep(&duration_10ms, NULL);
 
-               /* Varnish log abandoned or overrun, closing gracefully */
+               /* In case of Varnish log abandoned or overrun, the handle needs
+                * to be closed to allow a reconnect during the next while cycle
+                */
                else if (dispatch_status <= -2) {
                        vk_log("VSLQ_Dispatch", LOG_ERR, "Varnish Log abandoned 
or overrun.");
-                       break;
+                       VSM_Close(conf.vsm);
                }
+
                /* EOF from the Varnish Log, closing gracefully */
                else if (dispatch_status == -1) {
                        vk_log("VSLQ_Dispatch", LOG_ERR, "Varnish Log EOF.");
diff --git a/varnishkafka.h b/varnishkafka.h
index 2994f2f..ef99b2c 100644
--- a/varnishkafka.h
+++ b/varnishkafka.h
@@ -163,6 +163,10 @@
        struct tag **tag;
 
        uint64_t    sequence_number;
+       /* Useful to reset seq from the right starting point,
+        * defined in the configuration file, when needed.
+        */
+       uint64_t    sequence_number_start;
 
        size_t      scratch_size;    /* Size of scratch buffer */
        fmt_enc_t   fmt_enc;

-- 
To view, visit https://gerrit.wikimedia.org/r/311965
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I6969fcce80140bd9c24da4511ff7bbdde02a5ff8
Gerrit-PatchSet: 1
Gerrit-Project: operations/software/varnish/varnishkafka
Gerrit-Branch: master
Gerrit-Owner: Elukey <ltosc...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to