This is an automated email from the ASF dual-hosted git repository.

nic443 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git


The following commit(s) were added to refs/heads/master by this push:
     new 33b9632cb fix(etcd): load full data from etcd while worker restart 
(#12523)
33b9632cb is described below

commit 33b9632cb893d150b54c5c52efe02bc0adbe7dc3
Author: Nic <qiany...@api7.ai>
AuthorDate: Wed Aug 20 09:40:18 2025 +0800

    fix(etcd): load full data from etcd while worker restart (#12523)
    
    Signed-off-by: Nic <qiany...@api7.ai>
---
 apisix/cli/config.lua                    |  1 +
 apisix/core/config_etcd.lua              | 41 ++++++++++++++++++------
 conf/config.yaml.example                 |  3 ++
 t/cli/test_load_full_data_init_worker.sh | 55 ++++++++++++++++++++++++++++++++
 4 files changed, 90 insertions(+), 10 deletions(-)

diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua
index e1baea904..191a40a8d 100644
--- a/apisix/cli/config.lua
+++ b/apisix/cli/config.lua
@@ -72,6 +72,7 @@ local _M = {
     },
     enable_control = true,
     disable_sync_configuration_during_start = false,
+    worker_startup_time_threshold = 60,
     data_encryption = {
       enable_encrypt_fields = true,
       keyring = { "qeddd145sfvddff3", "edd1c9f0985e76a2" }
diff --git a/apisix/core/config_etcd.lua b/apisix/core/config_etcd.lua
index 995e047c0..eca62bdd2 100644
--- a/apisix/core/config_etcd.lua
+++ b/apisix/core/config_etcd.lua
@@ -74,6 +74,7 @@ if not is_http then
 end
 local created_obj  = {}
 local loaded_configuration = {}
+local configuration_loaded_time
 local watch_ctx
 
 
@@ -1158,6 +1159,22 @@ local function create_formatter(prefix)
 end
 
 
+local function init_loaded_configuration()
+    loaded_configuration = {}
+    local etcd_cli, prefix, err = etcd_apisix.new_without_proxy()
+    if not etcd_cli then
+        return "failed to start a etcd instance: " .. err
+    end
+
+    local res, err = readdir(etcd_cli, prefix, create_formatter(prefix))
+    if not res then
+        return err
+    end
+
+    configuration_loaded_time = ngx_time()
+end
+
+
 function _M.init()
     local local_conf, err = config_local.local_conf()
     if not local_conf then
@@ -1168,14 +1185,8 @@ function _M.init()
         return true
     end
 
-    -- don't go through proxy during start because the proxy is not available
-    local etcd_cli, prefix, err = etcd_apisix.new_without_proxy()
-    if not etcd_cli then
-        return nil, "failed to start a etcd instance: " .. err
-    end
-
-    local res, err = readdir(etcd_cli, prefix, create_formatter(prefix))
-    if not res then
+    local err = init_loaded_configuration()
+    if err then
         return nil, err
     end
 
@@ -1190,8 +1201,18 @@ function _M.init_worker()
         return nil, err
     end
 
-    if table.try_read_attr(local_conf, "apisix", 
"disable_sync_configuration_during_start") then
-        return true
+    local threshold = table.try_read_attr(local_conf, "apisix",
+                                    "worker_startup_time_threshold") or 60
+    -- if the startup time of a worker differs significantly from that of the 
master process,
+    -- we consider it to have restarted, and at this point,
+    -- it is necessary to reload the full configuration from etcd.
+    if configuration_loaded_time and ngx_time() - configuration_loaded_time > 
threshold then
+        log.warn("master process has been running for a long time, ",
+                     "reloading the full configuration from etcd for this new 
worker")
+        local err = init_loaded_configuration()
+        if err then
+            return nil, err
+        end
     end
 
     return true
diff --git a/conf/config.yaml.example b/conf/config.yaml.example
index 1950571aa..408fb5138 100644
--- a/conf/config.yaml.example
+++ b/conf/config.yaml.example
@@ -116,6 +116,9 @@ apisix:
 
   disable_sync_configuration_during_start: false  # Safe exit. TO BE REMOVED.
 
+  # This time will be used to distinguish whether the worker is started first 
time or restarted due to a crash, unit: second.
+  worker_startup_time_threshold: 60
+
   data_encryption:                # Data encryption settings.
     enable_encrypt_fields: true   # Whether enable encrypt fields specified in 
`encrypt_fields` in plugin schema.
     keyring:                      # This field is used to encrypt the private 
key of SSL and the `encrypt_fields`
diff --git a/t/cli/test_load_full_data_init_worker.sh 
b/t/cli/test_load_full_data_init_worker.sh
new file mode 100755
index 000000000..35e7fe2ab
--- /dev/null
+++ b/t/cli/test_load_full_data_init_worker.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+. ./t/cli/common.sh
+
+git checkout conf/config.yaml
+
+echo '
+apisix:
+  worker_startup_time_threshold: 3
+' > conf/config.yaml
+
+make run
+
+sleep 5
+
+MASTER_PID=$(cat logs/nginx.pid)
+
+worker_pids=$(pgrep -P "$MASTER_PID" -f "nginx: worker process" || true)
+
+if [ -n "$worker_pids" ]; then
+    pid=$(echo "$worker_pids" | shuf -n 1)
+    echo "killing worker $pid (master $MASTER_PID)"
+    kill "$pid"
+else
+    echo "failed: no worker process found for master $MASTER_PID"
+    exit 1
+fi
+
+sleep 2
+
+if ! grep 'master process has been running for a long time, reloading the full 
configuration from etcd for this new worker' logs/error.log; then
+    echo "failed: could not detect new worker be started"
+    exit 1
+fi
+
+echo "passed: load full configuration for new worker"
+
+make stop

Reply via email to