This is an automated email from the ASF dual-hosted git repository. nic443 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push: new 33b9632cb fix(etcd): load full data from etcd while worker restart (#12523) 33b9632cb is described below commit 33b9632cb893d150b54c5c52efe02bc0adbe7dc3 Author: Nic <qiany...@api7.ai> AuthorDate: Wed Aug 20 09:40:18 2025 +0800 fix(etcd): load full data from etcd while worker restart (#12523) Signed-off-by: Nic <qiany...@api7.ai> --- apisix/cli/config.lua | 1 + apisix/core/config_etcd.lua | 41 ++++++++++++++++++------ conf/config.yaml.example | 3 ++ t/cli/test_load_full_data_init_worker.sh | 55 ++++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 10 deletions(-) diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua index e1baea904..191a40a8d 100644 --- a/apisix/cli/config.lua +++ b/apisix/cli/config.lua @@ -72,6 +72,7 @@ local _M = { }, enable_control = true, disable_sync_configuration_during_start = false, + worker_startup_time_threshold = 60, data_encryption = { enable_encrypt_fields = true, keyring = { "qeddd145sfvddff3", "edd1c9f0985e76a2" } diff --git a/apisix/core/config_etcd.lua b/apisix/core/config_etcd.lua index 995e047c0..eca62bdd2 100644 --- a/apisix/core/config_etcd.lua +++ b/apisix/core/config_etcd.lua @@ -74,6 +74,7 @@ if not is_http then end local created_obj = {} local loaded_configuration = {} +local configuration_loaded_time local watch_ctx @@ -1158,6 +1159,22 @@ local function create_formatter(prefix) end +local function init_loaded_configuration() + loaded_configuration = {} + local etcd_cli, prefix, err = etcd_apisix.new_without_proxy() + if not etcd_cli then + return "failed to start a etcd instance: " .. err + end + + local res, err = readdir(etcd_cli, prefix, create_formatter(prefix)) + if not res then + return err + end + + configuration_loaded_time = ngx_time() +end + + function _M.init() local local_conf, err = config_local.local_conf() if not local_conf then @@ -1168,14 +1185,8 @@ function _M.init() return true end - -- don't go through proxy during start because the proxy is not available - local etcd_cli, prefix, err = etcd_apisix.new_without_proxy() - if not etcd_cli then - return nil, "failed to start a etcd instance: " .. err - end - - local res, err = readdir(etcd_cli, prefix, create_formatter(prefix)) - if not res then + local err = init_loaded_configuration() + if err then return nil, err end @@ -1190,8 +1201,18 @@ function _M.init_worker() return nil, err end - if table.try_read_attr(local_conf, "apisix", "disable_sync_configuration_during_start") then - return true + local threshold = table.try_read_attr(local_conf, "apisix", + "worker_startup_time_threshold") or 60 + -- if the startup time of a worker differs significantly from that of the master process, + -- we consider it to have restarted, and at this point, + -- it is necessary to reload the full configuration from etcd. + if configuration_loaded_time and ngx_time() - configuration_loaded_time > threshold then + log.warn("master process has been running for a long time, ", + "reloading the full configuration from etcd for this new worker") + local err = init_loaded_configuration() + if err then + return nil, err + end end return true diff --git a/conf/config.yaml.example b/conf/config.yaml.example index 1950571aa..408fb5138 100644 --- a/conf/config.yaml.example +++ b/conf/config.yaml.example @@ -116,6 +116,9 @@ apisix: disable_sync_configuration_during_start: false # Safe exit. TO BE REMOVED. + # This time will be used to distinguish whether the worker is started first time or restarted due to a crash, unit: second. + worker_startup_time_threshold: 60 + data_encryption: # Data encryption settings. enable_encrypt_fields: true # Whether enable encrypt fields specified in `encrypt_fields` in plugin schema. keyring: # This field is used to encrypt the private key of SSL and the `encrypt_fields` diff --git a/t/cli/test_load_full_data_init_worker.sh b/t/cli/test_load_full_data_init_worker.sh new file mode 100755 index 000000000..35e7fe2ab --- /dev/null +++ b/t/cli/test_load_full_data_init_worker.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +. ./t/cli/common.sh + +git checkout conf/config.yaml + +echo ' +apisix: + worker_startup_time_threshold: 3 +' > conf/config.yaml + +make run + +sleep 5 + +MASTER_PID=$(cat logs/nginx.pid) + +worker_pids=$(pgrep -P "$MASTER_PID" -f "nginx: worker process" || true) + +if [ -n "$worker_pids" ]; then + pid=$(echo "$worker_pids" | shuf -n 1) + echo "killing worker $pid (master $MASTER_PID)" + kill "$pid" +else + echo "failed: no worker process found for master $MASTER_PID" + exit 1 +fi + +sleep 2 + +if ! grep 'master process has been running for a long time, reloading the full configuration from etcd for this new worker' logs/error.log; then + echo "failed: could not detect new worker be started" + exit 1 +fi + +echo "passed: load full configuration for new worker" + +make stop