Elukey has uploaded a new change for review. https://gerrit.wikimedia.org/r/311415
Change subject: Improve resilience during varnish restarts ...................................................................... Improve resilience during varnish restarts This patch introduces a trick already used in Varnish's VUT library, namely forcing a reconnect to the shm log if it is detected as abandoned or overrun. This can happen during Varnish restarts and at the moment it represent a problem since it returns the 0 exit code and init systems like systemd can't restart it properly. Background reading: - https://github.com/varnishcache/varnish-cache/blob/4.1/lib/libvarnishtools/vut.c#L366 - https://github.com/varnishcache/varnish-cache/blob/master/bin/varnishlog/varnishlog.c To keep things consistent with the current behavior, the sequence number is restarted to its starting value when a log overrun/abandon is detected. Bug: T138747 Change-Id: I87048786b36de325aee663a873b12874588664fb --- M config.c M varnishkafka.c M varnishkafka.h 3 files changed, 75 insertions(+), 26 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/software/varnish/varnishkafka refs/changes/15/311415/1 diff --git a/config.c b/config.c index a9c48f4..33a8ed4 100644 --- a/config.c +++ b/config.c @@ -168,6 +168,7 @@ conf.sequence_number = (uint64_t)time(NULL)*1000000llu; else conf.sequence_number = strtoull(val, NULL, 0); + conf.sequence_number_start = conf.sequence_number; } else if (!strcmp(name, "output")) { if (!strcmp(val, "kafka")) outfunc = out_kafka; diff --git a/varnishkafka.c b/varnishkafka.c index c52594f..5a06cb1 100644 --- a/varnishkafka.c +++ b/varnishkafka.c @@ -1866,6 +1866,40 @@ } +/** + * Open and configure VSM/VSL/VSLQ settings. The vslq_query parameter will be + * used to know if a VSLQ query/filter needs to be set or not. + * Returns 0 in case of success, -1 otherwise. + */ +static int varnish_api_open_handles(struct VSM_data **vsm, struct VSL_data **vsl, + struct VSL_cursor **vsl_cursor, + unsigned int vsl_cursor_options, + struct VSLQ **vslq, char* vslq_query) { + if (VSM_Open(*vsm) < 0) { + vk_log("VSM_OPEN", LOG_ERR, "Failed to open Varnish VSL: %s\n", VSM_Error(*vsm)); + return -1; + } + *vsl_cursor = VSL_CursorVSM(*vsl, *vsm, vsl_cursor_options); + if (*vsl_cursor == NULL) { + vk_log("VSL_CursorVSM", LOG_ERR, "Failed to obtain a cursor for the SHM log: %s\n", + VSL_Error(*vsl)); + return -1; + } + /* Setting VSLQ query */ + if (vslq_query) { + *vslq = VSLQ_New(*vsl, vsl_cursor, VSL_g_request, vslq_query); + } else { + *vslq = VSLQ_New(*vsl, vsl_cursor, VSL_g_request, NULL); + } + if (*vslq == NULL) { + vk_log("VSLQ_NEW", LOG_ERR, "Failed to instantiate the VSL query: %s\n", + VSL_Error(*vsl)); + return -1; + } + return 0; +} + + int main (int argc, char **argv) { char errstr[4096]; char hostname[1024]; @@ -2063,7 +2097,7 @@ * in the header file because used in both config.c and varnishkafka.c */ conf.vsl = VSL_New(); - struct VSL_cursor *vsl_cursor; + struct VSL_cursor *vsl_cursor = NULL; conf.vsm = VSM_New(); if (conf.T_flag) { @@ -2103,28 +2137,9 @@ } } - if (VSM_Open(conf.vsm) < 0) { - vk_log("VSM_OPEN", LOG_ERR, "Failed to open Varnish VSL: %s\n", VSM_Error(conf.vsm)); - varnish_api_cleaning(); - exit(1); - } - vsl_cursor = VSL_CursorVSM(conf.vsl, conf.vsm, VSL_COPT_TAIL | VSL_COPT_BATCH); - if (vsl_cursor == NULL) { - vk_log("VSL_CursorVSM", LOG_ERR, "Failed to obtain a cursor for the SHM log: %s\n", - VSL_Error(conf.vsl)); - varnish_api_cleaning(); - exit(1); - } - - /* Setting VSLQ query */ - if (conf.q_flag) { - conf.vslq = VSLQ_New(conf.vsl, &vsl_cursor, VSL_g_request, conf.q_flag_query); - } else { - conf.vslq = VSLQ_New(conf.vsl, &vsl_cursor, VSL_g_request, NULL); - } - if (conf.vslq == NULL) { - vk_log("VSLQ_NEW", LOG_ERR, "Failed to instantiate the VSL query: %s\n", - VSL_Error(conf.vsl)); + if (varnish_api_open_handles(&conf.vsm, &conf.vsl, &vsl_cursor, + VSL_COPT_TAIL | VSL_COPT_BATCH, &conf.vslq, + conf.q_flag_query) == -1) { varnish_api_cleaning(); exit(1); } @@ -2138,6 +2153,11 @@ wait_for.tv_sec = 0; wait_for.tv_nsec = 10000000L; + /* In case the shm log is abandoned or overrun don't exit immediately + * since there might be data waiting to be dispatched to Kafka. + */ + int exit_code = 0; + /* Creating a new logline (will be re-used across log transactions) */ struct logline *lp = NULL; if (unlikely(!(lp = logline_get()))) @@ -2150,11 +2170,35 @@ if (dispatch_status == 0) nanosleep(&wait_for, NULL); - /* Varnish log abandoned or overrun, closing gracefully */ + /* Varnish log abandoned or overrun: + * 1) Close the current handle. + * 2) Attempt to reaquire the log if a new one is open or close + * gracefully otherwise. + */ else if (dispatch_status <= -2) { vk_log("VSLQ_Dispatch", LOG_ERR, "Varnish Log abandoned or overrun."); - break; + VSM_Close(conf.vsm); + /* Attempt to reconnect */ + sleep(3); + if (conf.vsm != NULL && !VSM_IsOpen(conf.vsm)) { + vk_log("VSLQ_Dispatch", LOG_ERR, "Attempt to reconnect to the Varnish log.."); + if (varnish_api_open_handles(&conf.vsm, &conf.vsl, &vsl_cursor, + VSL_COPT_TAIL | VSL_COPT_BATCH, &conf.vslq, + conf.q_flag_query) == -1) { + vk_log("VSLQ_Dispatch", LOG_ERR, "Attempt failed!"); + exit_code = 1; + break; + } else { + vk_log("VSLQ_Dispatch", LOG_ERR, "Log reaquired!"); + /* Setting the sequence number back to zero to track + * the fact that Varnish abandoned the log, probably due to + * a restart. + */ + conf.sequence_number = conf.sequence_number_start; + } + } } + /* EOF from the Varnish Log, closing gracefully */ else if (dispatch_status == -1) { vk_log("VSLQ_Dispatch", LOG_ERR, "Varnish Log EOF."); @@ -2196,5 +2240,5 @@ varnish_api_cleaning(); - exit(0); + exit(exit_code); } diff --git a/varnishkafka.h b/varnishkafka.h index 2994f2f..ef99b2c 100644 --- a/varnishkafka.h +++ b/varnishkafka.h @@ -163,6 +163,10 @@ struct tag **tag; uint64_t sequence_number; + /* Useful to reset seq from the right starting point, + * defined in the configuration file, when needed. + */ + uint64_t sequence_number_start; size_t scratch_size; /* Size of scratch buffer */ fmt_enc_t fmt_enc; -- To view, visit https://gerrit.wikimedia.org/r/311415 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I87048786b36de325aee663a873b12874588664fb Gerrit-PatchSet: 1 Gerrit-Project: operations/software/varnish/varnishkafka Gerrit-Branch: master Gerrit-Owner: Elukey <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
