Elukey has submitted this change and it was merged. Change subject: Improve resilience during varnish (re)starts ......................................................................
Improve resilience during varnish (re)starts This patch introduces a trick already used in Varnish's VUT library, namely forcing a reconnect to the shm log if it is detected as abandoned or not opened. This can happen if varnishkafka starts before varnish, or if varnish restarts/stops while varnishkafka is running. Varnishkafka is now going to retry every 100ms endlessly until a log is aquired. Background reading: - https://github.com/varnishcache/varnish-cache/blob/4.1/lib/libvarnishtools/vut.c#L366 - https://github.com/varnishcache/varnish-cache/blob/master/bin/varnishlog/varnishlog.c To keep things consistent with the current behavior, the sequence number is restarted to its starting value when a log overrun/abandon is detected. Bug: T138747 Change-Id: I87048786b36de325aee663a873b12874588664fb --- M config.c M varnishkafka.c M varnishkafka.h 3 files changed, 78 insertions(+), 33 deletions(-) Approvals: Elukey: Verified; Looks good to me, approved diff --git a/config.c b/config.c index a9c48f4..33a8ed4 100644 --- a/config.c +++ b/config.c @@ -168,6 +168,7 @@ conf.sequence_number = (uint64_t)time(NULL)*1000000llu; else conf.sequence_number = strtoull(val, NULL, 0); + conf.sequence_number_start = conf.sequence_number; } else if (!strcmp(name, "output")) { if (!strcmp(val, "kafka")) outfunc = out_kafka; diff --git a/varnishkafka.c b/varnishkafka.c index c52594f..f37bdab 100644 --- a/varnishkafka.c +++ b/varnishkafka.c @@ -1866,6 +1866,40 @@ } +/** + * Open and configure VSM/VSL/VSLQ settings. The vslq_query parameter will be + * used to know if a VSLQ query/filter needs to be set or not. + * Returns 0 in case of success, -1 otherwise. + */ +static int varnish_api_open_handles(struct VSM_data **vsm, struct VSL_data **vsl, + struct VSL_cursor **vsl_cursor, + unsigned int vsl_cursor_options, + struct VSLQ **vslq, char* vslq_query) { + if (VSM_Open(*vsm) < 0) { + vk_log("VSM_OPEN", LOG_DEBUG, "Failed to open Varnish VSL: %s\n", VSM_Error(*vsm)); + return -1; + } + *vsl_cursor = VSL_CursorVSM(*vsl, *vsm, vsl_cursor_options); + if (*vsl_cursor == NULL) { + vk_log("VSL_CursorVSM", LOG_DEBUG, "Failed to obtain a cursor for the SHM log: %s\n", + VSL_Error(*vsl)); + return -1; + } + /* Setting VSLQ query */ + if (vslq_query) { + *vslq = VSLQ_New(*vsl, vsl_cursor, VSL_g_request, vslq_query); + } else { + *vslq = VSLQ_New(*vsl, vsl_cursor, VSL_g_request, NULL); + } + if (*vslq == NULL) { + vk_log("VSLQ_NEW", LOG_DEBUG, "Failed to instantiate the VSL query: %s\n", + VSL_Error(*vsl)); + return -1; + } + return 0; +} + + int main (int argc, char **argv) { char errstr[4096]; char hostname[1024]; @@ -2063,7 +2097,7 @@ * in the header file because used in both config.c and varnishkafka.c */ conf.vsl = VSL_New(); - struct VSL_cursor *vsl_cursor; + struct VSL_cursor *vsl_cursor = NULL; conf.vsm = VSM_New(); if (conf.T_flag) { @@ -2103,40 +2137,19 @@ } } - if (VSM_Open(conf.vsm) < 0) { - vk_log("VSM_OPEN", LOG_ERR, "Failed to open Varnish VSL: %s\n", VSM_Error(conf.vsm)); - varnish_api_cleaning(); - exit(1); - } - vsl_cursor = VSL_CursorVSM(conf.vsl, conf.vsm, VSL_COPT_TAIL | VSL_COPT_BATCH); - if (vsl_cursor == NULL) { - vk_log("VSL_CursorVSM", LOG_ERR, "Failed to obtain a cursor for the SHM log: %s\n", - VSL_Error(conf.vsl)); - varnish_api_cleaning(); - exit(1); - } - - /* Setting VSLQ query */ - if (conf.q_flag) { - conf.vslq = VSLQ_New(conf.vsl, &vsl_cursor, VSL_g_request, conf.q_flag_query); - } else { - conf.vslq = VSLQ_New(conf.vsl, &vsl_cursor, VSL_g_request, NULL); - } - if (conf.vslq == NULL) { - vk_log("VSLQ_NEW", LOG_ERR, "Failed to instantiate the VSL query: %s\n", - VSL_Error(conf.vsl)); - varnish_api_cleaning(); - exit(1); - } - /* Main dispatcher loop depending on outputter */ conf.run = 1; conf.pret = 0; /* time struct to sleep for 10ms */ - struct timespec wait_for; - wait_for.tv_sec = 0; - wait_for.tv_nsec = 10000000L; + struct timespec duration_10ms; + duration_10ms.tv_sec = 0; + duration_10ms.tv_nsec = 10000000L; + + /* time struct to sleep for 100ms */ + struct timespec duration_100ms; + duration_100ms.tv_sec = 0; + duration_100ms.tv_nsec = 100000000L; /* Creating a new logline (will be re-used across log transactions) */ struct logline *lp = NULL; @@ -2144,17 +2157,44 @@ return -1; while (conf.run) { + /* Attempt to connect to the shm log handle if not open. Varnishkafka + * will try to connect periodically to the shm log until it gets one, + * or it will keep trying endlessly. + * Two use cases: + * - varnishkafka is started but varnish is not. This behavior is + * useful if there is no strict start ordering for varnish and varnishkafka. + * - varnish gets restarted and the shm log handle is not valid anymore. + */ + if (conf.vsm != NULL && !VSM_IsOpen(conf.vsm)) { + if (varnish_api_open_handles(&conf.vsm, &conf.vsl, &vsl_cursor, + VSL_COPT_TAIL | VSL_COPT_BATCH, &conf.vslq, + conf.q_flag_query) == -1) { + nanosleep(&duration_100ms, NULL); + continue; + } else { + vk_log("VSLQ_Dispatch", LOG_ERR, "Log acquired!"); + /* Setting the sequence number back to zero to track + * the fact that Varnish abandoned the log, probably due to + * a restart (or simply that varnishkafka started before varnish). + */ + conf.sequence_number = conf.sequence_number_start; + } + } + int dispatch_status = VSLQ_Dispatch(conf.vslq, transaction_scribe, lp); /* Nothing to read from the shm handle, sleeping */ if (dispatch_status == 0) - nanosleep(&wait_for, NULL); + nanosleep(&duration_10ms, NULL); - /* Varnish log abandoned or overrun, closing gracefully */ + /* In case of Varnish log abandoned or overrun, the handle needs + * to be closed to allow a reconnect during the next while cycle + */ else if (dispatch_status <= -2) { vk_log("VSLQ_Dispatch", LOG_ERR, "Varnish Log abandoned or overrun."); - break; + VSM_Close(conf.vsm); } + /* EOF from the Varnish Log, closing gracefully */ else if (dispatch_status == -1) { vk_log("VSLQ_Dispatch", LOG_ERR, "Varnish Log EOF."); diff --git a/varnishkafka.h b/varnishkafka.h index 2994f2f..ef99b2c 100644 --- a/varnishkafka.h +++ b/varnishkafka.h @@ -163,6 +163,10 @@ struct tag **tag; uint64_t sequence_number; + /* Useful to reset seq from the right starting point, + * defined in the configuration file, when needed. + */ + uint64_t sequence_number_start; size_t scratch_size; /* Size of scratch buffer */ fmt_enc_t fmt_enc; -- To view, visit https://gerrit.wikimedia.org/r/311415 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I87048786b36de325aee663a873b12874588664fb Gerrit-PatchSet: 7 Gerrit-Project: operations/software/varnish/varnishkafka Gerrit-Branch: master Gerrit-Owner: Elukey <[email protected]> Gerrit-Reviewer: BBlack <[email protected]> Gerrit-Reviewer: Elukey <[email protected]> Gerrit-Reviewer: Ema <[email protected]> Gerrit-Reviewer: Ottomata <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
