This is an automated email from the ASF dual-hosted git repository.

nic443 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git


The following commit(s) were added to refs/heads/master by this push:
     new 0b959f583 feat: kubernetes discovery readiness check (#12852)
0b959f583 is described below

commit 0b959f5835a8b304bc175bddfaa912878c54a879
Author: aie <[email protected]>
AuthorDate: Wed Jan 28 13:34:47 2026 +0800

    feat: kubernetes discovery readiness check (#12852)
---
 apisix/discovery/kubernetes/init.lua |  86 +++++++++++++++++++++++----
 apisix/init.lua                      | 105 ++++++++++++++++++++-------------
 t/kubernetes/discovery/kubernetes3.t | 110 ++++++++++++++++++++++++++++++-----
 3 files changed, 233 insertions(+), 68 deletions(-)

diff --git a/apisix/discovery/kubernetes/init.lua 
b/apisix/discovery/kubernetes/init.lua
index b6092d8ee..1b85181b6 100644
--- a/apisix/discovery/kubernetes/init.lua
+++ b/apisix/discovery/kubernetes/init.lua
@@ -211,17 +211,21 @@ end
 
 
 local function post_list(handle)
-    if not handle.existing_keys or not handle.current_keys_hash then
-        return
-    end
-    for _, key in ipairs(handle.existing_keys) do
-        if not handle.current_keys_hash[key] then
-            core.log.info("kubernetes discovery module find dirty data in 
shared dict, key:", key)
-            handle.endpoint_dict:delete(key)
+    if handle.existing_keys and handle.current_keys_hash then
+        for _, key in ipairs(handle.existing_keys) do
+            if not handle.current_keys_hash[key] then
+                core.log.info("kubernetes discovery module found dirty data in 
shared dict, key: ",
+                              key)
+                handle.endpoint_dict:delete(key)
+            end
         end
+        handle.existing_keys = nil
+        handle.current_keys_hash = nil
+    end
+    local _, err = handle.endpoint_dict:safe_set("discovery_ready", true)
+    if err then
+        core.log.error("set discovery_ready flag into discovery DICT failed, 
", err)
     end
-    handle.existing_keys = nil
-    handle.current_keys_hash = nil
 end
 
 
@@ -436,18 +440,24 @@ local function start_fetch(handle)
     ngx.timer.at(0, timer_runner)
 end
 
-local function get_endpoint_dict(id)
+
+local function get_endpoint_dict_name(id)
     local shm = "kubernetes"
 
-    if id and #id > 0 then
+    if id and type(id) == "string" and #id > 0 then
         shm = shm .. "-" .. id
     end
 
     if not is_http then
         shm = shm .. "-stream"
     end
+    return shm
+end
+
 
-    return ngx.shared[shm]
+local function get_endpoint_dict(id)
+    local dict_name = get_endpoint_dict_name(id)
+    return ngx.shared[dict_name]
 end
 
 
@@ -684,6 +694,7 @@ local function dump_endpoints_from_dict(endpoint_dict)
     return endpoints
 end
 
+
 function _M.dump_data()
     local discovery_conf = local_conf.discovery.kubernetes
     local eps = {}
@@ -715,4 +726,55 @@ function _M.dump_data()
 end
 
 
+local function check_ready(id)
+    local endpoint_dict = get_endpoint_dict(id)
+    if not endpoint_dict then
+        core.log.error("failed to get lua_shared_dict:", 
get_endpoint_dict_name(id),
+                       ", please check your APISIX version")
+        return false, "failed to get lua_shared_dict: " .. 
get_endpoint_dict_name(id)
+            .. ", please check your APISIX version"
+    end
+    -- check flag
+    local ready = endpoint_dict:get("discovery_ready")
+    if not ready then
+        core.log.warn("kubernetes discovery not ready")
+        return false, "kubernetes discovery not ready"
+    end
+    return true
+end
+
+
+local function single_mode_check_discovery_ready()
+    local _, err = check_ready()
+    if err then
+        return false, err
+    end
+    return true
+end
+
+
+local function multiple_mode_check_discovery_ready(confs)
+    for _, conf in ipairs(confs) do
+        local _, err = check_ready(conf.id)
+        if err then
+            return false, err
+        end
+    end
+    return true
+end
+
+
+function _M.check_discovery_ready()
+    local discovery_conf = local_conf.discovery and 
local_conf.discovery.kubernetes
+    if not discovery_conf then
+        return true
+    end
+    if #discovery_conf == 0 then
+        return single_mode_check_discovery_ready()
+    else
+        return multiple_mode_check_discovery_ready(discovery_conf)
+    end
+end
+
+
 return _M
diff --git a/apisix/init.lua b/apisix/init.lua
index e1aedabd2..1fb090025 100644
--- a/apisix/init.lua
+++ b/apisix/init.lua
@@ -47,6 +47,7 @@ local debug           = require("apisix.debug")
 local pubsub_kafka    = require("apisix.pubsub.kafka")
 local resource        = require("apisix.resource")
 local trusted_addresses_util = require("apisix.utils.trusted-addresses")
+local discovery = require("apisix.discovery.init").discovery
 local ngx             = ngx
 local get_method      = ngx.req.get_method
 local ngx_exit        = ngx.exit
@@ -121,7 +122,6 @@ function _M.http_init_worker()
 
     core.lrucache.init_worker()
 
-    local discovery = require("apisix.discovery.init").discovery
     if discovery and discovery.init_worker then
         discovery.init_worker()
     end
@@ -976,54 +976,78 @@ function _M.status()
     core.response.exit(200, core.json.encode({ status = "ok" }))
 end
 
-function _M.status_ready()
-    local local_conf = core.config.local_conf()
-    local role = core.table.try_read_attr(local_conf, "deployment", "role")
-    local provider = core.table.try_read_attr(local_conf, "deployment", 
"role_" ..
-                                              role, "config_provider")
-    if provider == "yaml" or provider == "etcd" then
-        local status_shdict = ngx.shared["status-report"]
-        local ids = status_shdict:get_keys()
-        local error
-        local worker_count = ngx.worker.count()
-       if #ids ~= worker_count then
-            core.log.warn("worker count: ", worker_count, " but status report 
count: ", #ids)
-            error = "worker count: " .. ngx.worker.count() ..
-            " but status report count: " .. #ids
-        end
-        if error then
-            core.response.exit(503, core.json.encode({
-                status = "error",
-                error = error
-            }))
-            return
-        end
-        for _, id in ipairs(ids) do
-            local ready = status_shdict:get(id)
+
+local function discovery_ready_check()
+    local discovery_type = local_conf.discovery
+    if not discovery_type then
+        return true
+    end
+    for discovery_name, _ in pairs(discovery_type) do
+        local dis_module = discovery[discovery_name]
+        if dis_module.check_discovery_ready then
+            local ready, message = dis_module.check_discovery_ready()
             if not ready then
-                core.log.warn("worker id: ", id, " has not received 
configuration")
-                error = "worker id: " .. id ..
-                                  " has not received configuration"
-                break
+                return false, message
             end
         end
+    end
+    return true
+end
 
-        if error then
-            core.response.exit(503, core.json.encode({
-                status = "error",
-                error = error
-            }))
-            return
+local function config_ready_check()
+    local role = core.table.try_read_attr(local_conf, "deployment", "role")
+    local provider = core.table.try_read_attr(local_conf, "deployment",
+                                              "role_" .. role, 
"config_provider")
+    if provider ~= "yaml" and provider ~= "etcd" then
+        return false, "unknown config provider: " .. tostring(provider)
+    end
+
+    local status_shdict = ngx.shared["status-report"]
+    if not status_shdict then
+        core.log.error("failed to get ngx.shared dict status-report")
+        return false, "failed to get ngx.shared dict status-report"
+    end
+    local ids = status_shdict:get_keys()
+
+    local worker_count = ngx.worker.count()
+    if #ids ~= worker_count then
+        local error = "worker count: " .. worker_count .. " but status report 
count: " .. #ids
+        core.log.error(error)
+        return false, error
+    end
+    for _, id in ipairs(ids) do
+        local ready = status_shdict:get(id)
+        if not ready then
+            local error = "worker id: " .. id .. " has not received 
configuration"
+            core.log.error(error)
+            return false, error
         end
+    end
+
+    return true
+end
 
-        core.response.exit(200, core.json.encode({ status = "ok" }))
+function _M.status_ready()
+    local ready, message = config_ready_check()
+    if not ready then
+        core.response.exit(503, core.json.encode({
+            status = "error",
+            error = message
+        }))
         return
     end
 
-    core.response.exit(503, core.json.encode({
-        status = "error",
-        message = "unknown config provider: " .. tostring(provider)
-    }), { ["Content-Type"] = "application/json" })
+    ready, message = discovery_ready_check()
+    if not ready then
+        core.response.exit(503, core.json.encode({
+            status = "error",
+            error = message
+        }))
+        return
+    end
+
+    core.response.exit(200, core.json.encode({ status = "ok" }))
+    return
 end
 
 
@@ -1182,7 +1206,6 @@ function _M.stream_init_worker()
     -- for admin api of standalone mode, we need to startup background timer 
and patch schema etc.
     require("apisix.admin.init").init_worker()
 
-    local discovery = require("apisix.discovery.init").discovery
     if discovery and discovery.init_worker then
         discovery.init_worker()
     end
diff --git a/t/kubernetes/discovery/kubernetes3.t 
b/t/kubernetes/discovery/kubernetes3.t
index 441f55d66..99b3dff37 100644
--- a/t/kubernetes/discovery/kubernetes3.t
+++ b/t/kubernetes/discovery/kubernetes3.t
@@ -242,10 +242,22 @@ _EOC_
             }
         }
 
-        location /t {
+        location /ready_check {
             content_by_lua_block {
-                ngx.sleep(2)
-                ngx.exit(200)
+                local http = require("resty.http")
+                local healthcheck_uri = "http://127.0.0.1:7085"; .. 
"/status/ready"
+                for i = 1, 4 do
+                    local httpc = http.new()
+                    local res, _ = httpc:request_uri(healthcheck_uri, {method 
= "GET", keepalive = false})
+                    if res.status == 200 then
+                        ngx.status = res.status
+                        return
+                    end
+                    ngx.sleep(1)
+                end
+                local httpc = http.new()
+                local res, _ = httpc:request_uri(healthcheck_uri, {method = 
"GET", keepalive = false})
+                ngx.status = res.status
             }
         }
 
@@ -516,13 +528,13 @@ GET /dump
         core.log.error("set dirty_key to dict fail, err: ", err)
     end
 --- request
-GET /t
+GET /ready_check
 --- no_error_log
 [error]
 --- grep_error_log eval
-qr/kubernetes discovery module find dirty data in shared dict/
+qr/kubernetes discovery module found dirty data in shared dict, key: dirty_key/
 --- grep_error_log_out
-kubernetes discovery module find dirty data in shared dict
+kubernetes discovery module found dirty data in shared dict, key: dirty_key
 
 
 
@@ -539,13 +551,13 @@ kubernetes discovery module find dirty data in shared dict
         core.log.error("set dirty_key to dict fail, err: ", err)
     end
 --- request
-GET /t
+GET /ready_check
 --- no_error_log
 [error]
 --- grep_error_log eval
-qr/kubernetes discovery module find dirty data in shared dict/
+qr/kubernetes discovery module found dirty data in shared dict, key: dirty_key/
 --- grep_error_log_out
-kubernetes discovery module find dirty data in shared dict
+kubernetes discovery module found dirty data in shared dict, key: dirty_key
 
 
 
@@ -576,13 +588,13 @@ discovery:
         core.log.error("set dirty_key to dict fail, err: ", err)
     end
 --- request
-GET /t
+GET /ready_check
 --- no_error_log
 [error]
 --- grep_error_log eval
-qr/kubernetes discovery module find dirty data in shared dict/
+qr/kubernetes discovery module found dirty data in shared dict, key: dirty_key/
 --- grep_error_log_out
-kubernetes discovery module find dirty data in shared dict
+kubernetes discovery module found dirty data in shared dict, key: dirty_key
 
 
 
@@ -622,10 +634,78 @@ discovery:
         core.log.error("set dirty_key to dict fail, err: ", err)
     end
 --- request
-GET /t
+GET /ready_check
 --- no_error_log
 [error]
 --- grep_error_log eval
-qr/kubernetes discovery module find dirty data in shared dict/
+qr/kubernetes discovery module found dirty data in shared dict, key: dirty_key/
 --- grep_error_log_out
-kubernetes discovery module find dirty data in shared dict
+kubernetes discovery module found dirty data in shared dict, key: dirty_key
+
+
+
+=== TEST 11: test healthcheck unready
+--- log_level: warn
+--- yaml_config
+apisix:
+  node_listen: 1984
+deployment:
+  role: data_plane
+  role_data_plane:
+    config_provider: yaml
+discovery:
+  kubernetes:
+    - id: first
+      service:
+        host: "127.0.0.1"
+        port: "6443"
+      client:
+        token_file: "/tmp/var/run/secrets/kubernetes.io/serviceaccount/token"
+      watch_endpoint_slices: false
+    - id: second
+      service:
+        schema: "http"
+        host: "127.0.0.1"
+        port: "6446"
+      client:
+        token_file: "/tmp/var/run/secrets/kubernetes.io/serviceaccount/token"
+      watch_endpoint_slices: false
+--- request
+GET /ready_check
+--- error_code: 503
+--- grep_error_log eval
+qr/connect apiserver failed/
+--- grep_error_log_out
+connect apiserver failed
+
+
+
+=== TEST 12: test healthcheck ready
+--- log_level: warn
+--- yaml_config
+apisix:
+  node_listen: 1984
+deployment:
+  role: data_plane
+  role_data_plane:
+    config_provider: yaml
+discovery:
+  kubernetes:
+    - id: first
+      service:
+        host: "127.0.0.1"
+        port: "6443"
+      client:
+        token_file: "/tmp/var/run/secrets/kubernetes.io/serviceaccount/token"
+      watch_endpoint_slices: false
+    - id: second
+      service:
+        schema: "http"
+        host: "127.0.0.1"
+        port: "6445"
+      client:
+        token_file: "/tmp/var/run/secrets/kubernetes.io/serviceaccount/token"
+      watch_endpoint_slices: false
+--- request
+GET /ready_check
+--- error_code: 200

Reply via email to