Elukey has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/311415

Change subject: Improve resilience during varnish restarts
......................................................................

Improve resilience during varnish restarts

This patch introduces a trick already used in Varnish's VUT library,
namely forcing a reconnect to the shm log if it is detected as
abandoned or overrun. This can happen during Varnish restarts and
at the moment it represent a problem since it returns the 0 exit code
and init systems like systemd can't restart it properly.

Background reading:
- 
https://github.com/varnishcache/varnish-cache/blob/4.1/lib/libvarnishtools/vut.c#L366
- 
https://github.com/varnishcache/varnish-cache/blob/master/bin/varnishlog/varnishlog.c

To keep things consistent with the current behavior, the sequence number is 
restarted to its
starting value when a log overrun/abandon is detected.

Bug: T138747
Change-Id: I87048786b36de325aee663a873b12874588664fb
---
M config.c
M varnishkafka.c
M varnishkafka.h
3 files changed, 75 insertions(+), 26 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/operations/software/varnish/varnishkafka 
refs/changes/15/311415/1

diff --git a/config.c b/config.c
index a9c48f4..33a8ed4 100644
--- a/config.c
+++ b/config.c
@@ -168,6 +168,7 @@
                        conf.sequence_number = (uint64_t)time(NULL)*1000000llu;
                else
                        conf.sequence_number = strtoull(val, NULL, 0);
+               conf.sequence_number_start = conf.sequence_number;
        } else if (!strcmp(name, "output")) {
                if (!strcmp(val, "kafka"))
                        outfunc = out_kafka;
diff --git a/varnishkafka.c b/varnishkafka.c
index c52594f..5a06cb1 100644
--- a/varnishkafka.c
+++ b/varnishkafka.c
@@ -1866,6 +1866,40 @@
 
 }
 
+/**
+ * Open and configure VSM/VSL/VSLQ settings. The vslq_query parameter will be
+ * used to know if a VSLQ query/filter needs to be set or not.
+ * Returns 0 in case of success, -1 otherwise.
+ */
+static int varnish_api_open_handles(struct VSM_data **vsm, struct VSL_data 
**vsl,
+                                                                       struct 
VSL_cursor **vsl_cursor,
+                                                                       
unsigned int vsl_cursor_options,
+                                                                       struct 
VSLQ **vslq, char* vslq_query) {
+       if (VSM_Open(*vsm) < 0) {
+               vk_log("VSM_OPEN", LOG_ERR, "Failed to open Varnish VSL: %s\n", 
VSM_Error(*vsm));
+               return -1;
+       }
+       *vsl_cursor = VSL_CursorVSM(*vsl, *vsm, vsl_cursor_options);
+       if (*vsl_cursor == NULL) {
+               vk_log("VSL_CursorVSM", LOG_ERR, "Failed to obtain a cursor for 
the SHM log: %s\n",
+                               VSL_Error(*vsl));
+               return -1;
+       }
+       /* Setting VSLQ query */
+       if (vslq_query) {
+               *vslq = VSLQ_New(*vsl, vsl_cursor, VSL_g_request, vslq_query);
+       } else {
+               *vslq = VSLQ_New(*vsl, vsl_cursor, VSL_g_request, NULL);
+       }
+       if (*vslq == NULL) {
+               vk_log("VSLQ_NEW", LOG_ERR, "Failed to instantiate the VSL 
query: %s\n",
+                               VSL_Error(*vsl));
+               return -1;
+       }
+       return 0;
+}
+
+
 int main (int argc, char **argv) {
        char errstr[4096];
        char hostname[1024];
@@ -2063,7 +2097,7 @@
         * in the header file because used in both config.c and varnishkafka.c
         */
        conf.vsl = VSL_New();
-       struct VSL_cursor *vsl_cursor;
+       struct VSL_cursor *vsl_cursor = NULL;
        conf.vsm = VSM_New();
 
        if (conf.T_flag) {
@@ -2103,28 +2137,9 @@
                }
        }
 
-       if (VSM_Open(conf.vsm) < 0) {
-               vk_log("VSM_OPEN", LOG_ERR, "Failed to open Varnish VSL: %s\n", 
VSM_Error(conf.vsm));
-               varnish_api_cleaning();
-               exit(1);
-       }
-       vsl_cursor = VSL_CursorVSM(conf.vsl, conf.vsm, VSL_COPT_TAIL | 
VSL_COPT_BATCH);
-       if (vsl_cursor == NULL) {
-               vk_log("VSL_CursorVSM", LOG_ERR, "Failed to obtain a cursor for 
the SHM log: %s\n",
-                               VSL_Error(conf.vsl));
-               varnish_api_cleaning();
-               exit(1);
-       }
-
-       /* Setting VSLQ query */
-       if (conf.q_flag) {
-               conf.vslq = VSLQ_New(conf.vsl, &vsl_cursor, VSL_g_request, 
conf.q_flag_query);
-       } else {
-               conf.vslq = VSLQ_New(conf.vsl, &vsl_cursor, VSL_g_request, 
NULL);
-       }
-       if (conf.vslq == NULL) {
-               vk_log("VSLQ_NEW", LOG_ERR, "Failed to instantiate the VSL 
query: %s\n",
-                               VSL_Error(conf.vsl));
+       if (varnish_api_open_handles(&conf.vsm, &conf.vsl, &vsl_cursor,
+                                                                VSL_COPT_TAIL 
| VSL_COPT_BATCH, &conf.vslq,
+                                                                
conf.q_flag_query) == -1) {
                varnish_api_cleaning();
                exit(1);
        }
@@ -2138,6 +2153,11 @@
        wait_for.tv_sec = 0;
        wait_for.tv_nsec = 10000000L;
 
+       /* In case the shm log is abandoned or overrun don't exit immediately
+        * since there might be data waiting to be dispatched to Kafka.
+        */
+       int exit_code = 0;
+
        /* Creating a new logline (will be re-used across log transactions) */
        struct logline *lp = NULL;
        if (unlikely(!(lp = logline_get())))
@@ -2150,11 +2170,35 @@
                if (dispatch_status == 0)
                        nanosleep(&wait_for, NULL);
 
-               /* Varnish log abandoned or overrun, closing gracefully */
+               /* Varnish log abandoned or overrun:
+                * 1) Close the current handle.
+                * 2) Attempt to reaquire the log if a new one is open or close
+                *    gracefully otherwise.
+                */
                else if (dispatch_status <= -2) {
                        vk_log("VSLQ_Dispatch", LOG_ERR, "Varnish Log abandoned 
or overrun.");
-                       break;
+                       VSM_Close(conf.vsm);
+                       /* Attempt to reconnect */
+                       sleep(3);
+                       if (conf.vsm != NULL && !VSM_IsOpen(conf.vsm)) {
+                               vk_log("VSLQ_Dispatch", LOG_ERR, "Attempt to 
reconnect to the Varnish log..");
+                               if (varnish_api_open_handles(&conf.vsm, 
&conf.vsl, &vsl_cursor,
+                                                                               
         VSL_COPT_TAIL | VSL_COPT_BATCH, &conf.vslq,
+                                                                               
         conf.q_flag_query) == -1) {
+                                       vk_log("VSLQ_Dispatch", LOG_ERR, 
"Attempt failed!");
+                                       exit_code = 1;
+                                       break;
+                               } else {
+                                       vk_log("VSLQ_Dispatch", LOG_ERR, "Log 
reaquired!");
+                                       /* Setting the sequence number back to 
zero to track
+                                        * the fact that Varnish abandoned the 
log, probably due to
+                                        * a restart.
+                                        */
+                                       conf.sequence_number = 
conf.sequence_number_start;
+                               }
+                       }
                }
+
                /* EOF from the Varnish Log, closing gracefully */
                else if (dispatch_status == -1) {
                        vk_log("VSLQ_Dispatch", LOG_ERR, "Varnish Log EOF.");
@@ -2196,5 +2240,5 @@
 
        varnish_api_cleaning();
 
-       exit(0);
+       exit(exit_code);
 }
diff --git a/varnishkafka.h b/varnishkafka.h
index 2994f2f..ef99b2c 100644
--- a/varnishkafka.h
+++ b/varnishkafka.h
@@ -163,6 +163,10 @@
        struct tag **tag;
 
        uint64_t    sequence_number;
+       /* Useful to reset seq from the right starting point,
+        * defined in the configuration file, when needed.
+        */
+       uint64_t    sequence_number_start;
 
        size_t      scratch_size;    /* Size of scratch buffer */
        fmt_enc_t   fmt_enc;

-- 
To view, visit https://gerrit.wikimedia.org/r/311415
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I87048786b36de325aee663a873b12874588664fb
Gerrit-PatchSet: 1
Gerrit-Project: operations/software/varnish/varnishkafka
Gerrit-Branch: master
Gerrit-Owner: Elukey <ltosc...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to