This is an automated email from the ASF dual-hosted git repository.
wenjin272 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/flink-agents.git
The following commit(s) were added to refs/heads/main by this push:
new 8d8d5d77 [ci] Refactor CI of test cases in integration module (#702)
8d8d5d77 is described below
commit 8d8d5d7702bd7a0c99d0fff183d8daa94e4d14f0
Author: Weiqing Yang <[email protected]>
AuthorDate: Tue Jun 9 00:08:34 2026 -0700
[ci] Refactor CI of test cases in integration module (#702)
Tag the 10 tests under python/flink_agents/integrations/*/tests/ that
require live external services (Ollama, DashScope, OpenAI, Azure,
Anthropic, Chroma) or optional Python deps (mem0) with a new
@pytest.mark.integration marker. Register the marker in
python/pyproject.toml with strict_markers enabled. Update tools/ut.sh
to split unit and integration arms via the new marker:
- ut-python (run_e2e=false): adds -m "not integration" to the
existing -k "not e2e_tests" filter. Drops 70 tests from the
unit-test job (was 518, now 448), which previously skipped
silently in 5 of 6 matrix cells.
- it-python (run_e2e=true): runs two sequential pytest invocations
-- existing -k "e2e_tests_integration" (27 tests, unchanged) and
new -m "integration" (70 tests) -- with aggregated exit codes and
an exit-5 trap on the integration arm to defend against -m
selector typos that --strict-markers cannot catch.
No .github/workflows/ci.yml change required.
test_mcp.py is intentionally not tagged: it self-hosts an MCP server
via multiprocessing and runs deterministically without external deps.
Closes #161
---
.../anthropic/tests/test_anthropic_chat_model.py | 2 ++
.../azure/tests/test_azure_openai_chat_model.py | 2 ++
.../openai/tests/test_openai_chat_model.py | 2 ++
.../chat_models/tests/test_ollama_chat_model.py | 2 ++
.../chat_models/tests/test_tongyi_chat_model.py | 2 ++
.../local/tests/test_ollama_embedding_model.py | 2 ++
.../tests/test_openai_embedding_model.py | 2 ++
.../tests/test_tongyi_embedding_model.py | 2 ++
.../chroma/tests/test_chroma_vector_store.py | 2 ++
.../mem0/tests/test_mem0_vector_store.py | 9 ++++--
python/pyproject.toml | 6 ++++
tools/ut.sh | 36 +++++++++++++++++++---
12 files changed, 62 insertions(+), 7 deletions(-)
diff --git
a/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py
b/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py
index 9741e054..1aeb0ce5 100644
---
a/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py
+++
b/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py
@@ -30,6 +30,8 @@ from
flink_agents.integrations.chat_models.anthropic.anthropic_chat_model import
AnthropicChatModelSetup,
)
+pytestmark = pytest.mark.integration
+
test_model = os.environ.get("TEST_MODEL")
api_key = os.environ.get("TEST_API_KEY")
diff --git
a/python/flink_agents/integrations/chat_models/azure/tests/test_azure_openai_chat_model.py
b/python/flink_agents/integrations/chat_models/azure/tests/test_azure_openai_chat_model.py
index ce69d42e..983bbb47 100644
---
a/python/flink_agents/integrations/chat_models/azure/tests/test_azure_openai_chat_model.py
+++
b/python/flink_agents/integrations/chat_models/azure/tests/test_azure_openai_chat_model.py
@@ -30,6 +30,8 @@ from
flink_agents.integrations.chat_models.azure.azure_openai_chat_model import
from flink_agents.plan.function import PythonFunction
from flink_agents.plan.tools.function_tool import FunctionTool
+pytestmark = pytest.mark.integration
+
test_deployment = os.environ.get("TEST_AZURE_DEPLOYMENT")
api_key = os.environ.get("AZURE_OPENAI_API_KEY")
azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
diff --git
a/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py
b/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py
index 7ccb6c22..ff4d2bb3 100644
---
a/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py
+++
b/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py
@@ -31,6 +31,8 @@ from
flink_agents.integrations.chat_models.openai.openai_chat_model import (
from flink_agents.plan.function import PythonFunction
from flink_agents.plan.tools.function_tool import FunctionTool
+pytestmark = pytest.mark.integration
+
test_model = os.environ.get("TEST_MODEL")
api_key = os.environ.get("TEST_API_KEY")
api_base_url = os.environ.get("TEST_API_BASE_URL")
diff --git
a/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py
b/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py
index 6a2a4711..27f07f34 100644
---
a/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py
+++
b/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py
@@ -34,6 +34,8 @@ from flink_agents.integrations.chat_models.ollama_chat_model
import (
from flink_agents.plan.function import PythonFunction
from flink_agents.plan.tools.function_tool import FunctionTool
+pytestmark = pytest.mark.integration
+
test_model = os.environ.get("OLLAMA_CHAT_MODEL", "qwen3:1.7b")
current_dir = Path(__file__).parent
diff --git
a/python/flink_agents/integrations/chat_models/tests/test_tongyi_chat_model.py
b/python/flink_agents/integrations/chat_models/tests/test_tongyi_chat_model.py
index c33a792c..99742251 100644
---
a/python/flink_agents/integrations/chat_models/tests/test_tongyi_chat_model.py
+++
b/python/flink_agents/integrations/chat_models/tests/test_tongyi_chat_model.py
@@ -32,6 +32,8 @@ from flink_agents.integrations.chat_models.tongyi_chat_model
import (
from flink_agents.plan.function import PythonFunction
from flink_agents.plan.tools.function_tool import FunctionTool
+pytestmark = pytest.mark.integration
+
test_model = os.environ.get("TONGYI_CHAT_MODEL", "qwen-plus")
api_key_available = "DASHSCOPE_API_KEY" in os.environ
diff --git
a/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py
b/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py
index b770eea3..dd71c14c 100644
---
a/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py
+++
b/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py
@@ -31,6 +31,8 @@ from
flink_agents.integrations.embedding_models.local.ollama_embedding_model imp
OllamaEmbeddingModelSetup,
)
+pytestmark = pytest.mark.integration
+
test_model = os.environ.get("OLLAMA_EMBEDDING_MODEL", "all-minilm:22m")
current_dir = Path(__file__).parent
diff --git
a/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py
b/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py
index e76cc3fa..49907340 100644
---
a/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py
+++
b/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py
@@ -27,6 +27,8 @@ from
flink_agents.integrations.embedding_models.openai_embedding_model import (
OpenAIEmbeddingModelSetup,
)
+pytestmark = pytest.mark.integration
+
test_model = os.environ.get("TEST_EMBEDDING_MODEL", "text-embedding-3-small")
api_key = os.environ.get("TEST_API_KEY")
diff --git
a/python/flink_agents/integrations/embedding_models/tests/test_tongyi_embedding_model.py
b/python/flink_agents/integrations/embedding_models/tests/test_tongyi_embedding_model.py
index 75e47706..b60c7559 100644
---
a/python/flink_agents/integrations/embedding_models/tests/test_tongyi_embedding_model.py
+++
b/python/flink_agents/integrations/embedding_models/tests/test_tongyi_embedding_model.py
@@ -29,6 +29,8 @@ from
flink_agents.integrations.embedding_models.tongyi_embedding_model import (
TongyiEmbeddingModelSetup,
)
+pytestmark = pytest.mark.integration
+
test_model = os.environ.get("TONGYI_EMBEDDING_MODEL", "text-embedding-v4")
api_key_available = "DASHSCOPE_API_KEY" in os.environ
diff --git
a/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py
b/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py
index 754798cc..135ded8b 100644
---
a/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py
+++
b/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py
@@ -42,6 +42,8 @@ from
flink_agents.integrations.vector_stores.chroma.chroma_vector_store import (
_translate_filters_to_chroma_where,
)
+pytestmark = pytest.mark.integration
+
api_key = os.environ.get("TEST_API_KEY")
tenant = os.environ.get("TEST_TENANT")
database = os.environ.get("TEST_DATABASE")
diff --git
a/python/flink_agents/integrations/vector_stores/mem0/tests/test_mem0_vector_store.py
b/python/flink_agents/integrations/vector_stores/mem0/tests/test_mem0_vector_store.py
index 357d0352..27253b96 100644
---
a/python/flink_agents/integrations/vector_stores/mem0/tests/test_mem0_vector_store.py
+++
b/python/flink_agents/integrations/vector_stores/mem0/tests/test_mem0_vector_store.py
@@ -62,9 +62,12 @@ if _backend_available:
Mem0VectorStore,
)
-pytestmark = pytest.mark.skipif(
- not _backend_available, reason="mem0 / chromadb is not available"
-)
+pytestmark = [
+ pytest.mark.skipif(
+ not _backend_available, reason="mem0 / chromadb is not available"
+ ),
+ pytest.mark.integration,
+]
# ---------------------------------------------------------------------------
diff --git a/python/pyproject.toml b/python/pyproject.toml
index cbd0d5b6..2d2d8d41 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -202,3 +202,9 @@ strict = true
[tool.ruff.format]
docstring-code-format = true
+
+[tool.pytest.ini_options]
+markers = [
+ "integration: tests that require live external services (Ollama,
DashScope, OpenAI, Azure, Anthropic, Chroma, mem0). Deselect with -m 'not
integration'.",
+]
+strict_markers = true
diff --git a/tools/ut.sh b/tools/ut.sh
index 8b9de711..a40303e4 100755
--- a/tools/ut.sh
+++ b/tools/ut.sh
@@ -229,6 +229,7 @@ python_tests() {
if $run_e2e; then
# There will be an individual build step before run e2e test
for including java dist
uv pip install apache-flink~=${version}.0
+ # Arm 1: existing e2e tests (directory-based selector).
uv run --no-sync pytest flink_agents \
-s \
-k "e2e_tests_integration" \
@@ -236,15 +237,32 @@ python_tests() {
--reruns-delay 5 \
-o log_cli=true \
-o log_cli_level=${LOG_LEVEL:-CRITICAL}
+ rc1=$?
+ # Arm 2: integration-marked tests (registered in
pyproject.toml).
+ # Trap exit code 5 (no tests collected) as failure to defend
+ # against -m selector typos that --strict-markers does not
catch.
+ uv run --no-sync pytest flink_agents \
+ -s \
+ -m "integration" \
+ -o log_cli=true \
+ -o log_cli_level=${LOG_LEVEL:-CRITICAL}
+ rc2=$?
+ if [ $rc2 -eq 5 ]; then rc2=1; fi
+ # Logical-OR aggregation: any nonzero exit on either arm
yields testcode=1.
+ # Side effect: pytest exit 5 (no tests collected) becomes
failure on BOTH
+ # arms, not just arm 2 — which is the correct semantics (zero
collection
+ # on either arm indicates a selector regression).
+ testcode=$((rc1 || rc2))
else
uv sync --extra test
uv pip install apache-flink~=${version}.0
uv run --no-sync pytest flink_agents \
-k "not e2e_tests" \
+ -m "not integration" \
-o log_cli=true \
- -o log_cli_level=${LOG_LEVEL:-CRITICAL}
+ -o log_cli_level=${LOG_LEVEL:-CRITICAL}
+ testcode=$?
fi
- testcode=$?
else
if $verbose; then
echo "uv not found, falling back to pip"
@@ -262,10 +280,20 @@ python_tests() {
fi
if $run_e2e; then
pytest flink_agents -k "e2e_tests_integration" --reruns 2
--reruns-delay 5 -o log_cli=true -o log_cli_level=${LOG_LEVEL:-OFF}
+ rc1=$?
+ # Arm 2: integration-marked tests; trap exit code 5 as failure.
+ pytest flink_agents -m "integration" -o log_cli=true -o
log_cli_level=${LOG_LEVEL:-OFF}
+ rc2=$?
+ if [ $rc2 -eq 5 ]; then rc2=1; fi
+ # Logical-OR aggregation: any nonzero exit on either arm
yields testcode=1.
+ # Side effect: pytest exit 5 (no tests collected) becomes
failure on BOTH
+ # arms, not just arm 2 — which is the correct semantics (zero
collection
+ # on either arm indicates a selector regression).
+ testcode=$((rc1 || rc2))
else
- pytest flink_agents -k "not e2e_tests" -o log_cli=true -o
log_cli_level=${LOG_LEVEL:-OFF}
+ pytest flink_agents -k "not e2e_tests" -m "not integration" -o
log_cli=true -o log_cli_level=${LOG_LEVEL:-OFF}
+ testcode=$?
fi
- testcode=$?
fi
# Handle pytest exit codes